diff -Nru mafft-7.471/core/filter.c mafft-7.475/core/filter.c --- mafft-7.471/core/filter.c 1970-01-01 00:00:00.000000000 +0000 +++ mafft-7.475/core/filter.c 2020-11-26 01:45:23.000000000 +0000 @@ -0,0 +1,164 @@ +#include "mltaln.h" + +#define DEBUG 0 + +double maxunusual; + +static double count_unusual( char *seq, char *usual ) +{ + int i; + char *pt; + int count, len; + count = 0; + pt = seq; + while( *pt ) + { + if( !strchr( usual, *pt ) ) + count++; + pt++; + } +// reporterr( "%d/%d=%f\n", count, pt-seq, ((double)count/(pt-seq)) ); + return( (double)count / (pt-seq) ); +} + + +void arguments( int argc, char *argv[] ) +{ + int c; + + maxunusual = 0.05; + inputfile = NULL; + dorp = NOTSPECIFIED; + + while( --argc > 0 && (*++argv)[0] == '-' ) + { + while ( (c = *++argv[0]) ) + { + switch( c ) + { + case 'm': + maxunusual = myatof( *++argv ); + fprintf( stderr, "maxunusual = %f\n", maxunusual ); + --argc; + goto nextoption; + case 'i': + inputfile = *++argv; +// fprintf( stderr, "inputfile = %s\n", inputfile ); + --argc; + goto nextoption; + case 'D': + dorp = 'd'; + break; + case 'P': + dorp = 'p'; + break; + default: + fprintf( stderr, "illegal option %c\n", c ); + argc = 0; + break; + } + } + nextoption: + ; + } + if( argc != 0 ) + { + fprintf( stderr, "options: Check source file !\n" ); + exit( 1 ); + } +} + + + +int main( int argc, char *argv[] ) +{ + FILE *infp; + int nlenmin; + char **name; + char **seq; + int *nlen; + int i; + char *usual; + int nout; + char *tmpseq; + + arguments( argc, argv ); + + if( inputfile ) + { + infp = fopen( inputfile, "r" ); + if( !infp ) + { + fprintf( stderr, "Cannot open %s\n", inputfile ); + exit( 1 ); + } + } + else + infp = stdin; + + +// dorp = NOTSPECIFIED; + getnumlen_casepreserve( infp, &nlenmin ); + +// fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); + + seq = AllocateCharMtx( njob, nlenmax+1 ); + name = AllocateCharMtx( njob, B+1 ); + nlen = AllocateIntVec( njob ); + tmpseq = AllocateCharVec( nlenmax+1 ); + +// readData_pointer( infp, name, nlen, seq ); + readData_pointer_casepreserve( infp, name, nlen, seq ); + fclose( infp ); + +// for( i=0; i%s\n", name[i]+1 ); + if( seq[i][nlen[i]-1] == '\n' ) seq[i][nlen[i]-1] = 0; + fprintf( origfp, "%s\n", seq[i] ); + } + fclose( origfp ); +#endif + + if( dorp == 'p' ) + usual = "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv-"; + else + usual = "ATGCUatgcu-"; + nout = 0; + for( i=0; i%s\n", name[i]+1 ); + fprintf( stdout, "%s\n", seq[i] ); + nout++; + } + } + + if( nout < njob ) + { + if( dorp == 'p' ) + fprintf( stderr, "\n\nRemoved %d sequence(s) where the frequency of ambiguous amino acids > %5.3f\n\n\n", njob-nout, maxunusual ); + else + fprintf( stderr, "\n\nRemoved %d sequence(s) where the frequency of ambiguous bases > %5.3f\n\n\n", njob-nout, maxunusual ); + } + + free( nlen ); + free( tmpseq ); + FreeCharMtx( seq ); + FreeCharMtx( name ); + + return( 0 ); +} diff -Nru mafft-7.471/core/io.c mafft-7.475/core/io.c --- mafft-7.471/core/io.c 2020-07-03 04:32:51.000000000 +0000 +++ mafft-7.475/core/io.c 2020-11-26 01:45:23.000000000 +0000 @@ -2664,7 +2664,9 @@ char b[B]; fgets( b, B, fp ); - fgets( b, B, fp ); b[5] = 0; nseq0 = atoi( b ); if( nseq != nseq0 ) +// fgets( b, B, fp ); b[5] = 0; nseq0 = atoi( b ); if( nseq != nseq0 ) + fgets( b, B, fp ); nseq0 = atoi( b ); // 2020/Oct/23 + if( nseq != nseq0 ) { fprintf( stderr, "%d != %d\n", nseq, nseq0 ); ErrorExit( "hat2 is wrong." ); diff -Nru mafft-7.471/core/mafft.tmpl mafft-7.475/core/mafft.tmpl --- mafft-7.471/core/mafft.tmpl 2020-07-03 04:32:51.000000000 +0000 +++ mafft-7.475/core/mafft.tmpl 2020-11-26 01:45:23.000000000 +0000 @@ -1,7 +1,7 @@ #! /bin/bash er=0; myself=`dirname "$0"`/`basename "$0"`; export myself -version="v7.471 (2020/Jul/3)"; export version +version="v7.475 (2020/Nov/23)"; export version LANG=C; export LANG os=`uname` progname=`basename "$0"` @@ -257,6 +257,7 @@ foldalignopt=" " treealg=" -X 0.1 " sueff="1.0" +maxambiguous="1.0" scoreoutarg=" " numthreads=0 numthreadsit=-1 @@ -405,6 +406,9 @@ shift sueff="$1" treealg=" -X $1" + elif [ "$1" = "--maxambiguous" ]; then + shift + maxambiguous="$1" elif [ "$1" = "--noscore" ]; then scorecalcopt=" -Z " elif [ "$1" = "--6mermultipair" ]; then @@ -1046,7 +1050,7 @@ if [ $debug -eq 1 ]; then # trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys # trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0 15 - trap "tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15 + trap "popd > /dev/null 2>&1; tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15 else # trap "rm -rf $TMPFILE" 0 15 trap "removetmpfile" 0 15 @@ -1065,8 +1069,16 @@ cat "$1" | tr "\r" "\n" > "$TMPFILE/infile" echo "" >> "$TMPFILE/infile" - cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> "$TMPFILE/infile" cat "$addfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_addfile" + + if [ $maxambiguous != "1.0" ]; then + mv "$TMPFILE/infile" "$TMPFILE/_tofilter" + "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/infile" 2>>"$progressfile" || exit 1 + mv "$TMPFILE/_addfile" "$TMPFILE/_tofilter" + "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/_addfile" 2>>"$progressfile" || exit 1 + fi + cat "$TMPFILE/_addfile" >> "$TMPFILE/infile" + cat "$scorematrix" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_aamtx" cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_subalignmentstable" cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_guidetree" @@ -1190,7 +1202,9 @@ exit 1; fi + if [ "$addarg0" != " " ]; then + # iterate=0 # 2013/03/23 -> commented out, 2017/12 "$prefix/countlen" < "$TMPFILE/_addfile" > "$TMPFILE/addsize" 2>>"$progressfile" nadd=`awk '{print $1}' "$TMPFILE/addsize"` @@ -1305,6 +1319,13 @@ echo "npair = " $npair 1>>"$progressfile" echo "nseq = " $nseq 1>>"$progressfile" echo "nlen = " $nlen 1>>"$progressfile" + + if [ $norg -eq 0 ]; then + echo "" 1>>"$progressfile" + echo "The reference sequence was removed because of ambiguous letters?" 1>>"$progressfile" + echo "" 1>>"$progressfile" + exit 1; + fi # nagasa check! # if [ $npair -gt 10000000 -o $nlen -gt 5000 ]; then # 2017/Oct @@ -1327,6 +1348,11 @@ exit 1; fi + if [ `awk "BEGIN {print( 0.0+\"$maxambiguous\" < 0.0 || 0.0+\"$maxambiguous\" > 1.0 )}"` -gt 0 ]; then + printf "\n%s\n\n" "The argument of --maxambiguous must be between 0.0 and 1.0" 1>>"$progressfile" + exit 1; + fi + if [ $allowshift -eq 1 ]; then if [ $unalignspecified -ne 1 ]; then unalignlevel="0.8" @@ -1945,6 +1971,7 @@ fi + if [ $nadd -gt "0" ]; then if [ $fragment -eq "1" ]; then addarg="$addarg0 $nadd -g -0.01" @@ -1972,12 +1999,13 @@ bunkatsuopt=" -B " # fftnsi demo bunktasu shinai if [ "$add2ndhalfarg" != " " ]; then if [ $auto -eq 1 -o $iterate -gt 0 ]; then - echo '' 1>>"$progressfile" - echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile" - echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile" - echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile" - echo '' 1>>"$progressfile" - exit 1 +# echo '' 1>>"$progressfile" +# echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile" +# echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile" +# echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile" +# echo '' 1>>"$progressfile" +# exit 1 + iterate=0 fi fi fi @@ -2179,6 +2207,7 @@ # echo "iterate = " $iterate 1>>"$progressfile" # echo "cycle = " $cycle 1>>"$progressfile" + if [ $anysymbol -eq 1 ]; then mv infile orig "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1 diff -Nru mafft-7.471/core/Makefile mafft-7.475/core/Makefile --- mafft-7.471/core/Makefile 2020-07-03 04:32:51.000000000 +0000 +++ mafft-7.475/core/Makefile 2020-11-26 01:45:23.000000000 +0000 @@ -57,7 +57,7 @@ PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \ multi2hat3s pairash addsingle maffttext2hex hex2maffttext \ splittbfast disttbfast tbfast nodepair mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \ - seq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version \ + seq2regtable regtable2seq score getlag dndpre setcore filter replaceu restoreu setdirection makedirectionlist version \ $(DASH_CLIENT) SOS = libdisttbfast.so DLLS = libdisttbfast.dll @@ -66,6 +66,7 @@ PERLPROGS = mafftash_premafft.pl seekquencer_premafft.pl SCRIPTS = mafft mafft-homologs.rb mafft-sparsecore.rb OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o +OBJFILTER = mtxutl.o io.o filter.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o @@ -161,8 +162,9 @@ dlls : $(DLLS) $(DASH_CLIENT): dash_client.go - go build dash_client.go -# go build --ldflags '-extldflags "-static"' dash_client.go +# go build dash_client.go + env CGO_ENABLED=0 go build --ldflags '-extldflags "-static"' dash_client.go # for conda + univscript: univscript.tmpl Makefile sed "s:_PROGS:$(PROGS):" univscript.tmpl > univscript @@ -241,6 +243,9 @@ replaceu : $(OBJREPLACEU) $(CC) -o $@ $(OBJREPLACEU) $(MYCFLAGS) $(LDFLAGS) $(LIBS) +filter : $(OBJFILTER) + $(CC) -o $@ $(OBJFILTER) $(MYCFLAGS) $(LDFLAGS) $(LIBS) + restoreu : $(OBJRESTOREU) $(CC) -o $@ $(OBJRESTOREU) $(MYCFLAGS) $(LDFLAGS) $(LIBS) diff -Nru mafft-7.471/core/mltaln.h mafft-7.475/core/mltaln.h --- mafft-7.471/core/mltaln.h 2020-07-03 04:32:51.000000000 +0000 +++ mafft-7.475/core/mltaln.h 2020-11-26 01:45:23.000000000 +0000 @@ -36,7 +36,7 @@ -#define VERSION "7.471" +#define VERSION "7.475" #define SHOWVERSION reporterr( "%s (%s) Version " VERSION "\nalg=%c, model=%s, amax=%3.1f\n%d thread(s)\n\n", progName( argv[0] ), (dorp=='d')?"nuc":((nblosum==-2)?"text":"aa"), alg, modelname, specificityconsideration, nthread ) #define FFT_THRESHOLD 80 diff -Nru mafft-7.471/core/pairlocalalign.c mafft-7.475/core/pairlocalalign.c --- mafft-7.471/core/pairlocalalign.c 2020-07-03 04:32:51.000000000 +0000 +++ mafft-7.475/core/pairlocalalign.c 2020-11-26 01:45:23.000000000 +0000 @@ -2475,7 +2475,8 @@ if( ngui == 0 ) { if( alg == 'Y' || alg == 'r' ) - distancemtx = AllocateDoubleMtx( njob, nadd ); +// distancemtx = AllocateDoubleMtx( njob, nadd ); + distancemtx = AllocateDoubleMtx( njob-nadd, nadd ); // 2020/Oct/23 else distancemtx = AllocateDoubleHalfMtx( njob ); // distancemtx = AllocateDoubleMtx( njob, njob ); @@ -3051,7 +3052,13 @@ } free( distseq1 ); free( distseq2 ); - if( store_dist && ngui == 0 ) FreeDoubleHalfMtx( distancemtx, njob ); + if( store_dist && ngui == 0 ) + { + if( alg == 'Y' || alg == 'r' ) + FreeDoubleMtx( distancemtx ); // 2020/Oct/23 + else + FreeDoubleHalfMtx( distancemtx, njob ); + } free( targetmap ); free( targetmapr ); diff -Nru mafft-7.471/core/replaceu.c mafft-7.475/core/replaceu.c --- mafft-7.471/core/replaceu.c 2020-07-03 04:32:51.000000000 +0000 +++ mafft-7.475/core/replaceu.c 2020-11-26 01:45:23.000000000 +0000 @@ -104,6 +104,7 @@ nlen = AllocateIntVec( njob ); readData_pointer_casepreserve( infp, name, nlen, seq ); + fclose( infp ); // for( i=0; i Mon, 07 Dec 2020 18:22:24 +0100 + mafft (7.471-1) unstable; urgency=medium * Team upload. diff -Nru mafft-7.471/debian/control mafft-7.475/debian/control --- mafft-7.471/debian/control 2020-07-08 13:59:05.000000000 +0000 +++ mafft-7.475/debian/control 2020-12-07 17:22:24.000000000 +0000 @@ -5,7 +5,7 @@ Section: science Priority: optional Build-Depends: debhelper-compat (= 13) -Standards-Version: 4.5.0 +Standards-Version: 4.5.1 Vcs-Browser: https://salsa.debian.org/med-team/mafft Vcs-Git: https://salsa.debian.org/med-team/mafft.git Homepage: https://mafft.cbrc.jp/alignment/software/ diff -Nru mafft-7.471/readme mafft-7.475/readme --- mafft-7.471/readme 2020-07-03 04:30:03.000000000 +0000 +++ mafft-7.475/readme 2020-11-23 00:44:14.000000000 +0000 @@ -1,6 +1,6 @@ ----------------------------------------------------------------------- MAFFT: a multiple sequence alignment program - version 7.471, 2020/Jul/3 + version 7.475, 2020/Nov/23 http://mafft.cbrc.jp/alignment/software/ katoh@ifrec.osaka-u.ac.jp