Binary files /tmp/tmpnETKNI/JVllxiaiNU/andi-0.11/andi-manual.pdf and /tmp/tmpnETKNI/c85NilQ4xh/andi-0.12/andi-manual.pdf differ diff -Nru andi-0.11/configure.ac andi-0.12/configure.ac --- andi-0.11/configure.ac 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/configure.ac 2018-02-26 12:10:19.000000000 +0000 @@ -1,5 +1,5 @@ -AC_INIT([andi], [0.11]) -AM_INIT_AUTOMAKE([-Wall foreign ]) +AC_INIT([andi], [0.12]) +AM_INIT_AUTOMAKE([-Wall foreign]) AC_CONFIG_MACRO_DIR([m4]) @@ -78,8 +78,7 @@ AC_SUBST([SEED]) -if test "x${try_unit_tests}" = xyes; then - +AS_IF([test "x${try_unit_tests}" = xyes], [ have_glib=yes PKG_CHECK_MODULES([GLIB], [glib-2.0], [], [have_glib=no]) @@ -88,7 +87,7 @@ fi AX_CXX_COMPILE_STDCXX_11([],[mandatory]) -fi +]) # Check for various headers including those used by libdivsufsort. diff -Nru andi-0.11/debian/changelog andi-0.12/debian/changelog --- andi-0.11/debian/changelog 2017-08-06 00:07:55.000000000 +0000 +++ andi-0.12/debian/changelog 2018-03-07 11:12:24.000000000 +0000 @@ -1,8 +1,32 @@ -andi (0.11-1build1) artful; urgency=medium +andi (0.12-3) unstable; urgency=medium - * No-change rebuild for libgsl soname change. + * Allow for parallelisation of unit tests. Really fixes build failures. - -- Matthias Klose Sun, 06 Aug 2017 00:07:55 +0000 + -- Fabian Klötzl Wed, 07 Mar 2018 12:12:24 +0100 + +andi (0.12-2) unstable; urgency=medium + + [ Fabian Klötzl ] + * Derandomize unit tests. Fixes build failures. + + [ Andreas Tille ] + * Standards-Version: 4.1.3 + * debhelper 11 + + -- Fabian Klötzl Mon, 05 Mar 2018 15:11:27 +0100 + +andi (0.12-1) unstable; urgency=medium + + * Team upload. + + [ Fabian Klötzl ] + * Remove useless dh-autoreconf build dependency + * Import new upstream release + + [ Steffen Moeller ] + * debian/upstream/metadata: added refs to registries + + -- Fabian Klötzl Wed, 09 Aug 2017 10:25:35 +0200 andi (0.11-1) unstable; urgency=medium diff -Nru andi-0.11/debian/compat andi-0.12/debian/compat --- andi-0.11/debian/compat 2017-07-13 07:23:09.000000000 +0000 +++ andi-0.12/debian/compat 2018-03-07 11:12:24.000000000 +0000 @@ -1 +1 @@ -10 +11 diff -Nru andi-0.11/debian/control andi-0.12/debian/control --- andi-0.11/debian/control 2017-07-13 07:23:09.000000000 +0000 +++ andi-0.12/debian/control 2018-03-07 11:12:24.000000000 +0000 @@ -3,13 +3,12 @@ Uploaders: Fabian Klötzl Section: science Priority: optional -Build-Depends: debhelper (>= 10), +Build-Depends: debhelper (>= 11~), libdivsufsort-dev, pkgconf, - dh-autoreconf, libgsl-dev, libglib2.0-dev -Standards-Version: 4.0.0 +Standards-Version: 4.1.3 Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/andi.git Vcs-Git: https://anonscm.debian.org/git/debian-med/andi.git Homepage: https://github.com/EvolBioInf/andi diff -Nru andi-0.11/debian/patches/0001-Make-unit-tests-reproducible.patch andi-0.12/debian/patches/0001-Make-unit-tests-reproducible.patch --- andi-0.11/debian/patches/0001-Make-unit-tests-reproducible.patch 1970-01-01 00:00:00.000000000 +0000 +++ andi-0.12/debian/patches/0001-Make-unit-tests-reproducible.patch 2018-03-07 11:12:24.000000000 +0000 @@ -0,0 +1,162 @@ +From: Fabian Klötzl +Date: Mon, 5 Mar 2018 14:43:06 +0100 +Subject: Make unit tests reproducible +Applied-Upstream: https://github.com/EvolBioInf/andi/commit/394c3b4c3dfb8fd40530fd6815f300a273a309d2 + +--- + test/low_homo.sh | 15 ++++++++++++--- + test/nan.sh | 12 ++++++++++-- + test/test_extra.sh | 15 ++++++++++++--- + test/test_join.sh | 40 ++++++++++++++++++++++++++++++++-------- + 4 files changed, 66 insertions(+), 16 deletions(-) + +diff --git a/test/low_homo.sh b/test/low_homo.sh +index 72d1693..11e889e 100755 +--- a/test/low_homo.sh ++++ b/test/low_homo.sh +@@ -1,8 +1,17 @@ + #!/bin/bash -f + +-./test/test_fasta -l 100000 > a.fa +-./test/test_fasta -l 100000 > b.fa +-./test/test_fasta -l 100 > both.fa ++SEED=${RANDOM_SEED:-0} ++SEED2=0 ++SEED3=0 ++if test $SEED -ne 0; then ++ SEED=$((SEED + 1)) ++ SEED2=$((SEED + 2)) ++ SEED3=$((SEED + 3)) ++fi ++ ++./test/test_fasta -s $SEED -l 100000 > a.fa ++./test/test_fasta -s $SEED2 -l 100000 > b.fa ++./test/test_fasta -s $SEED3 -l 100 > both.fa + + cat both.fa a.fa | awk -vRS='>' '{if($1 == "S0")print ">"$0 > "S0.fa"}' + cat both.fa b.fa | awk -vRS='>' '{if($1 == "S1")print ">"$0 > "S1.fa"}' +diff --git a/test/nan.sh b/test/nan.sh +index e0502d0..60db0e2 100755 +--- a/test/nan.sh ++++ b/test/nan.sh +@@ -1,7 +1,15 @@ + #!/bin/bash -f + +-./test/test_fasta -l 10000 > a.fa +-./test/test_fasta -l 10000 > b.fa ++SEED=${RANDOM_SEED:-0} ++SEED2=0 ++if test $SEED -ne 0; then ++ SEED=$((SEED + 1)) ++ SEED2=$((SEED + 2)) ++fi ++ ++ ++./test/test_fasta -s $SEED -l 10000 > a.fa ++./test/test_fasta -s $SEED2 -l 10000 > b.fa + + # this is expected to trigger the nan warning + ./src/andi -j a.fa b.fa 2>&1 | grep 'nan' +diff --git a/test/test_extra.sh b/test/test_extra.sh +index dbb23ea..c1c0641 100755 +--- a/test/test_extra.sh ++++ b/test/test_extra.sh +@@ -3,17 +3,26 @@ + # Test if andi exists, and can be executed + ./src/andi --version > /dev/null || exit 1 + ++SEED=${RANDOM_SEED:-0} ++SEED2=0 ++SEED3=0 ++if test $SEED -ne 0; then ++ SEED=$((SEED + 1)) ++ SEED2=$((SEED + 2)) ++ SEED3=$((SEED + 3)) ++fi ++ + # Test andi for more than just two sequences at a time +-./test/test_fasta -l 100000 -d 0.01 -d 0.01 -d 0.01 -d 0.01 | ./src/andi > /dev/null || exit 1 ++./test/test_fasta -s $SEED -l 100000 -d 0.01 -d 0.01 -d 0.01 -d 0.01 | ./src/andi > /dev/null || exit 1 + + # Test low-memory mode +-./test/test_fasta -l 10000 > test_extra.fasta ++./test/test_fasta -s $SEED2 -l 10000 > test_extra.fasta + ./src/andi test_extra.fasta > extra.out + ./src/andi test_extra.fasta --low-memory > extra_low_memory.out + diff extra.out extra_low_memory.out || exit 1 + + # Test file of filenames +-./test/test_fasta -l 10000 > test_extra.fasta ++./test/test_fasta -s $SEED3 -l 10000 > test_extra.fasta + echo "$PWD/test_extra.fasta" > fof.txt + ./src/andi test_extra.fasta > extra.out + ./src/andi --file-of-filenames fof.txt > fof.out +diff --git a/test/test_join.sh b/test/test_join.sh +index 60e099a..33cc2c6 100755 +--- a/test/test_join.sh ++++ b/test/test_join.sh +@@ -2,10 +2,19 @@ + + ./src/andi --help > /dev/null || exit 1 + ++SEED=${RANDOM_SEED:-0} ++SEED2=0 ++SEED3=0 ++if test $SEED -ne 0; then ++ SEED=$((SEED + 1)) ++ SEED2=$((SEED + 2)) ++ SEED3=$((SEED + 3)) ++fi ++ + # Simple join test +-./test/test_fasta -l 1000 -L 1000 -d 0.1 > p1.fasta +-./test/test_fasta -l 1000 -L 1000 -d 0.1 > p2.fasta +-./test/test_fasta -l 10000 -L 10000 -d 0.1 > p3.fasta ++./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p1.fasta ++./test/test_fasta -s $SEED2 -l 1000 -L 1000 -d 0.1 > p2.fasta ++./test/test_fasta -s $SEED3 -l 10000 -L 10000 -d 0.1 > p3.fasta + + head -qn 2 p1.fasta p2.fasta p3.fasta > S0.fasta + tail -qn 2 p1.fasta p2.fasta p3.fasta > S1.fasta +@@ -25,9 +34,16 @@ if test $RES -ne 1; then + exit 1; + fi + ++SEED=${RANDOM_SEED:-0} ++SEED2=0 ++if test $SEED -ne 0; then ++ SEED=$((SEED + 5)) ++ SEED2=$((SEED + 6)) ++fi ++ + #unbalanced number of contigs +-./test/test_fasta -l 1000 -L 1000 -d 0.1 > p2.fasta +-./test/test_fasta -l 10000 -L 10000 -d 0.1 > p3.fasta ++./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p2.fasta ++./test/test_fasta -s $SEED2 -l 10000 -L 10000 -d 0.1 > p3.fasta + + head -qn 2 p3.fasta > S0.fasta + tail -qn 2 p2.fasta p3.fasta > S1.fasta +@@ -47,11 +63,19 @@ if test $RES -ne 1; then + exit 1; + fi + ++SEED=${RANDOM_SEED:-0} ++SEED2=0 ++SEED3=0 ++if test $SEED -ne 0; then ++ SEED=$((SEED + 11)) ++ SEED2=$((SEED + 12)) ++ SEED3=$((SEED + 13)) ++fi + + #unbalanced number of contigs 2 +-./test/test_fasta -l 1000 -L 1000 -d 0.1 > p1.fasta +-./test/test_fasta -l 1000 -L 1000 -d 0.1 > p2.fasta +-./test/test_fasta -l 10000 -L 10000 -d 0.1 > p3.fasta ++./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p1.fasta ++./test/test_fasta -s $SEED2 -l 1000 -L 1000 -d 0.1 > p2.fasta ++./test/test_fasta -s $SEED3 -l 10000 -L 10000 -d 0.1 > p3.fasta + + head -qn 2 p1.fasta p3.fasta > S0.fasta + tail -qn 2 p1.fasta p2.fasta p3.fasta > S1.fasta diff -Nru andi-0.11/debian/patches/0002-prefix-files-in-unit-tests.patch andi-0.12/debian/patches/0002-prefix-files-in-unit-tests.patch --- andi-0.11/debian/patches/0002-prefix-files-in-unit-tests.patch 1970-01-01 00:00:00.000000000 +0000 +++ andi-0.12/debian/patches/0002-prefix-files-in-unit-tests.patch 2018-03-07 11:12:24.000000000 +0000 @@ -0,0 +1,182 @@ +From: =?utf-8?q?Fabian_Kl=C3=B6tzl?= +Date: Wed, 7 Mar 2018 11:26:26 +0100 +Subject: prefix files in unit tests +Applied-Upstream: https://github.com/EvolBioInf/andi/commit/e35d1a51f71541f246919a277386817f69c94a3e + +Unit tests can be run in parallel (see debian builds). Thus they should +work on different files, otherwise bad and confusing things happen. +--- + test/low_homo.sh | 16 ++++++++-------- + test/nan.sh | 10 +++++----- + test/test_join.sh | 48 ++++++++++++++++++++++++------------------------ + 3 files changed, 37 insertions(+), 37 deletions(-) + +diff --git a/test/low_homo.sh b/test/low_homo.sh +index 11e889e..1f16a79 100755 +--- a/test/low_homo.sh ++++ b/test/low_homo.sh +@@ -9,21 +9,21 @@ if test $SEED -ne 0; then + SEED3=$((SEED + 3)) + fi + +-./test/test_fasta -s $SEED -l 100000 > a.fa +-./test/test_fasta -s $SEED2 -l 100000 > b.fa +-./test/test_fasta -s $SEED3 -l 100 > both.fa ++./test/test_fasta -s $SEED -l 100000 > a_low.fa ++./test/test_fasta -s $SEED2 -l 100000 > b_low.fa ++./test/test_fasta -s $SEED3 -l 100 > both_low.fa + +-cat both.fa a.fa | awk -vRS='>' '{if($1 == "S0")print ">"$0 > "S0.fa"}' +-cat both.fa b.fa | awk -vRS='>' '{if($1 == "S1")print ">"$0 > "S1.fa"}' ++cat both_low.fa a_low.fa | awk -vRS='>' '{if($1 == "S0")print ">"$0 > "S0_low.fa"}' ++cat both_low.fa b_low.fa | awk -vRS='>' '{if($1 == "S1")print ">"$0 > "S1_low.fa"}' + + # this is expected to trigger the low homology warning +-./src/andi -j S0.fa S1.fa 2>&1 | grep 'homology' ++./src/andi -j S0_low.fa S1_low.fa 2>&1 | grep 'homology' + EXIT_VAL=$? + + if [[ EXIT_VAL -ge 1 ]]; then + echo "Triggering low homology failed" >&2 +- grep '^>' a.fa b.fa both.fa ++ grep '^>' a_low.fa b_low.fa both_low.fa + fi + +-rm -f a.fa b.fa both.fa S0.fa S1.fa ++rm -f a_low.fa b_low.fa both_low.fa S0_low.fa S1_low.fa + exit $EXIT_VAL +diff --git a/test/nan.sh b/test/nan.sh +index 60db0e2..97fa167 100755 +--- a/test/nan.sh ++++ b/test/nan.sh +@@ -8,18 +8,18 @@ if test $SEED -ne 0; then + fi + + +-./test/test_fasta -s $SEED -l 10000 > a.fa +-./test/test_fasta -s $SEED2 -l 10000 > b.fa ++./test/test_fasta -s $SEED -l 10000 > a_nan.fa ++./test/test_fasta -s $SEED2 -l 10000 > b_nan.fa + + # this is expected to trigger the nan warning +-./src/andi -j a.fa b.fa 2>&1 | grep 'nan' ++./src/andi -j a_nan.fa b_nan.fa 2>&1 | grep 'nan' + EXIT_VAL=$? + + + if [[ EXIT_VAL -ge 1 ]]; then + echo "Triggering nan failed" >&2 +- grep '^>' a.fa b.fa both.fa ++ grep '^>' a_nan.fa b_nan.fa + fi + +-rm -f a.fa b.fa ++rm -f a_nan.fa b_nan.fa + exit $EXIT_VAL +diff --git a/test/test_join.sh b/test/test_join.sh +index 33cc2c6..a0616bf 100755 +--- a/test/test_join.sh ++++ b/test/test_join.sh +@@ -12,17 +12,17 @@ if test $SEED -ne 0; then + fi + + # Simple join test +-./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p1.fasta +-./test/test_fasta -s $SEED2 -l 1000 -L 1000 -d 0.1 > p2.fasta +-./test/test_fasta -s $SEED3 -l 10000 -L 10000 -d 0.1 > p3.fasta ++./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p1_join.fasta ++./test/test_fasta -s $SEED2 -l 1000 -L 1000 -d 0.1 > p2_join.fasta ++./test/test_fasta -s $SEED3 -l 10000 -L 10000 -d 0.1 > p3_join.fasta + +-head -qn 2 p1.fasta p2.fasta p3.fasta > S0.fasta +-tail -qn 2 p1.fasta p2.fasta p3.fasta > S1.fasta ++head -qn 2 p1_join.fasta p2_join.fasta p3_join.fasta > S0_join.fasta ++tail -qn 2 p1_join.fasta p2_join.fasta p3_join.fasta > S1_join.fasta + +-rm p1.fasta p2.fasta p3.fasta; ++rm p1_join.fasta p2_join.fasta p3_join.fasta; + + +-RES=$(./src/andi -m RAW -t 1 -j S0.fasta S1.fasta | ++RES=$(./src/andi -m RAW -t 1 -j S0_join.fasta S1_join.fasta | + tail -n 1 | + awk '{print ($2 - 0.1)}' | + awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.03}' +@@ -30,7 +30,7 @@ RES=$(./src/andi -m RAW -t 1 -j S0.fasta S1.fasta | + + if test $RES -ne 1; then + echo "The last test computed a distance deviating more than three percent from its intended value." +- echo "See S0.fasta and S1.fasta for the used sequences." ++ echo "See S0_join.fasta and S1_join.fasta for the used sequences." + exit 1; + fi + +@@ -42,16 +42,16 @@ if test $SEED -ne 0; then + fi + + #unbalanced number of contigs +-./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p2.fasta +-./test/test_fasta -s $SEED2 -l 10000 -L 10000 -d 0.1 > p3.fasta ++./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p2_join.fasta ++./test/test_fasta -s $SEED2 -l 10000 -L 10000 -d 0.1 > p3_join.fasta + +-head -qn 2 p3.fasta > S0.fasta +-tail -qn 2 p2.fasta p3.fasta > S1.fasta ++head -qn 2 p3_join.fasta > S0_join.fasta ++tail -qn 2 p2_join.fasta p3_join.fasta > S1_join.fasta + +-rm p2.fasta p3.fasta; ++rm p2_join.fasta p3_join.fasta; + + +-RES=$(./src/andi -m RAW -t1 -j S0.fasta S1.fasta | ++RES=$(./src/andi -m RAW -t1 -j S0_join.fasta S1_join.fasta | + tail -n 1 | + awk '{print ($2 - 0.1)}' | + awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.03}' +@@ -59,7 +59,7 @@ RES=$(./src/andi -m RAW -t1 -j S0.fasta S1.fasta | + + if test $RES -ne 1; then + echo "The last test computed a distance deviating more than three percent from its intended value." +- echo "See S0.fasta and S1.fasta for the used sequences." ++ echo "See S0_join.fasta and S1_join.fasta for the used sequences." + exit 1; + fi + +@@ -73,17 +73,17 @@ if test $SEED -ne 0; then + fi + + #unbalanced number of contigs 2 +-./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p1.fasta +-./test/test_fasta -s $SEED2 -l 1000 -L 1000 -d 0.1 > p2.fasta +-./test/test_fasta -s $SEED3 -l 10000 -L 10000 -d 0.1 > p3.fasta ++./test/test_fasta -s $SEED -l 1000 -L 1000 -d 0.1 > p1_join.fasta ++./test/test_fasta -s $SEED2 -l 1000 -L 1000 -d 0.1 > p2_join.fasta ++./test/test_fasta -s $SEED3 -l 10000 -L 10000 -d 0.1 > p3_join.fasta + +-head -qn 2 p1.fasta p3.fasta > S0.fasta +-tail -qn 2 p1.fasta p2.fasta p3.fasta > S1.fasta ++head -qn 2 p1_join.fasta p3_join.fasta > S0_join.fasta ++tail -qn 2 p1_join.fasta p2_join.fasta p3_join.fasta > S1_join.fasta + +-rm p1.fasta p2.fasta p3.fasta; ++rm p1_join.fasta p2_join.fasta p3_join.fasta; + + +-RES=$(./src/andi -mRAW -t 1 -j S0.fasta S1.fasta | ++RES=$(./src/andi -mRAW -t 1 -j S0_join.fasta S1_join.fasta | + tail -n 1 | + awk '{print ($2 - 0.1)}' | + awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.03}' +@@ -91,9 +91,9 @@ RES=$(./src/andi -mRAW -t 1 -j S0.fasta S1.fasta | + + if test $RES -ne 1; then + echo "The last test computed a distance deviating more than three percent from its intended value." +- echo "See S0.fasta and S1.fasta for the used sequences." ++ echo "See S0_join.fasta and S1_join.fasta for the used sequences." + exit 1; + fi + + +-rm S0.fasta S1.fasta ++rm S0_join.fasta S1_join.fasta diff -Nru andi-0.11/debian/patches/series andi-0.12/debian/patches/series --- andi-0.11/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 +++ andi-0.12/debian/patches/series 2018-03-07 11:12:24.000000000 +0000 @@ -0,0 +1,2 @@ +0001-Make-unit-tests-reproducible.patch +0002-prefix-files-in-unit-tests.patch diff -Nru andi-0.11/debian/upstream/metadata andi-0.12/debian/upstream/metadata --- andi-0.11/debian/upstream/metadata 2017-07-13 07:23:09.000000000 +0000 +++ andi-0.12/debian/upstream/metadata 2018-03-07 11:12:24.000000000 +0000 @@ -10,3 +10,10 @@ PMID: 25504847 URL: http://bioinformatics.oxfordjournals.org/content/31/8/1169.abstract eprint: http://bioinformatics.oxfordjournals.org/content/31/8/1169.full.pdf+html +Registry: + - Name: bio.tools + Entry: andi + - Name: OMICtools + Entry: OMICS_09287 + - Name: SciCrunch + Entry: NA diff -Nru andi-0.11/docs/andi.1.in andi-0.12/docs/andi.1.in --- andi-0.11/docs/andi.1.in 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/docs/andi.1.in 2018-02-26 12:10:19.000000000 +0000 @@ -1,9 +1,9 @@ -.TH ANDI "1" "2017-06-30" "@VERSION@" "andi manual" +.TH ANDI "1" "2017-09-17" "@VERSION@" "andi manual" .SH NAME andi \- estimates evolutionary distance .SH SYNOPSIS .B andi -[\fI-jlv\fR] [\fI-b INT\fR] [\fI-p FLOAT\fR] [\fI-m MODEL\fR] [\fI-t INT\fR] \fIFILES\fR... +[\fIOPTIONS...\fR] \fIFILES\fR... .SH DESCRIPTION .TP \fBandi\fR estimates the evolutionary distance between closely related genomes. For this \fBandi\fR reads the input sequences from \fIFASTA\fR files and computes the pairwise anchor distance. The idea behind this is explained in a paper by Haubold et al. (2015). @@ -11,10 +11,10 @@ The output is a symmetrical distance matrix in \fIPHYLIP\fR format, with each entry representing divergence with a positive real number. A distance of zero means that two sequences are identical, whereas other values are estimates for the nucleotide substitution rate (Jukes-Cantor corrected). For technical reasons the comparison might fail and no estimate can be computed. In such cases \fInan\fR is printed. This either means that the input sequences were too short (<200bp) or too diverse (K>0.5) for our method to work properly. .SH OPTIONS .TP -\fB\-b\fR, \fB\-\-bootstrap\fR +\fB\-b\fR \fIINT\fR, \fB\-\-bootstrap\fR=\fIINT\fR Compute multiple distance matrices, with \fIn-1\fR bootstrapped from the first. See the paper Klötzl & Haubold (2016) for a detailed explanation. .TP -\fB--file-of-filenames\fR +\fB--file-of-filenames\fR=\fIFILE\fR Usually, \fBandi\fR is called with the filenames as commandline arguments. With this option the filenames may also be read from a file itself, with one name per line. Use a single dash (\fB'-'\fR) to read from stdin. .TP \fB\-j\fR, \fB\-\-join\fR @@ -23,13 +23,16 @@ \fB\-l\fR, \fB\-\-low-memory\fR In multithreaded mode, \fBandi\fR requires memory linear to the amount of threads. The low memory mode changes this to a constant demand independent from the used number of threads. Unfortunately, this comes at a significant runtime cost. .TP -\fB\-m\fR, \fB\-\-model\fR -Different models of nucleotide evolution are supported. By default the Jukes-Cantor correction is used. +\fB\-m\fR \fIMODEL\fR, \fB\-\-model\fR=\fIMODEL\fr +Set the nucleotide evolution model to one of 'Raw', 'JC', or 'Kimura'. By default the Jukes-Cantor correction is used. .TP -\fB\-p\fR +\fB\-p\fR \fIFLOAT\fR Significance of an anchor; default: 0.025. .TP -\fB\-t\fR, \fB\-\-threads\fR +\fB--progress\fR[=\fIWHEN\fR] +Print a progress bar. \fIWHEN\fR can be 'auto' (default if omitted), 'always', or 'never'. +.TP +\fB\-t\fR \fIINT\fR, \fB\-\-threads\fR=\fIINT\fR The number of threads to be used; by default, all available processors are used. .br Multithreading is only available if \fBandi\fR was compiled with OpenMP support. @@ -38,7 +41,7 @@ By default \fBandi\fR outputs the full names of sequences, optionally padded with spaces, if they are shorter than ten characters. Names longer than ten characters may lead to problems with downstream tools. With this switch names will be truncated. .TP \fB\-v\fR, \fB\-\-verbose\fR -Prints additional information. Apply multiple times for extra verboseness. +Prints additional information, including the amount of found homology. Apply multiple times for extra verboseness. .TP \fB\-h\fR, \fB\-\-help\fR Prints the synopsis and an explanation of available options. @@ -46,7 +49,7 @@ \fB\-\-version\fR Outputs version information and acknowledgments. .SH COPYRIGHT -Copyright \(co 2014 - 2016 Fabian Klötzl +Copyright \(co 2014 - 2017 Fabian Klötzl License GPLv3+: GNU GPL version 3 or later. .br This is free software: you are free to change and redistribute it. diff -Nru andi-0.11/docs/manual/andi-manual.tex andi-0.12/docs/manual/andi-manual.tex --- andi-0.11/docs/manual/andi-manual.tex 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/docs/manual/andi-manual.tex 2018-02-26 12:10:19.000000000 +0000 @@ -106,61 +106,62 @@ The easiest way to install \andi is via a package manager. This also handles all dependencies for you. -\noindent Debian and Ubuntu (since 16.04): +\noindent Debian and Ubuntu: \begin{lstlisting} ~ % sudo apt-get install andi \end{lstlisting} -\noindent OS X with homebrew: +\noindent macOS with homebrew: \begin{lstlisting} ~ % brew install homebrew/science/andi \end{lstlisting} -\noindent ArchLinux: +\noindent ArchLinux AUR package with aura: \begin{lstlisting} ~ % aura -A andi \end{lstlisting} -\andi is intended to run in a \algo{Unix} commandline such as \lstinline$bash$ or \lstinline$zsh$. All examples in this document are also intended for that environment. You can verify that \andi was installed correctly by executing \lstinline$andi -h$. This should give you a list of all available options (see Section~\ref{sec:options}). +\andi is intended to be run in a \algo{Unix} commandline such as \lstinline$bash$ or \lstinline$zsh$. All examples in this document are also intended for that environment. You can verify that \andi was installed correctly by executing \lstinline$andi -h$. This should give you a list of all available options (see Section~\ref{sec:options}). \section{Source Package} \label{sub:regular} -Download the latest \href{https://github.com/EvolBioInf/andi/releases}{release} from GitHub. Please note, that \andi requires the \algo{Gnu Scientific Library} and optionally \algo{libdivsufsort}\footnote{\url{https://github.com/y-256/libdivsufsort}} for optimal performance \cite{divsufsort}. If you wish to install \andi without \algo{libdivsufsort}, consult Section~\ref{sub:wo-divsufsort}. +To build \andi from source, download the latest \href{https://github.com/EvolBioInf/andi/releases}{release} from GitHub. Please note, that \andi requires the \algo{Gnu Scientific Library} and optionally \algo{libdivsufsort}\footnote{\url{https://github.com/y-256/libdivsufsort}} for optimal performance \cite{divsufsort}. If you wish to install \andi without \algo{libdivsufsort}, consult Section~\ref{sub:wo-divsufsort}. Once you have downloaded the package, unzip it and change into the newly created directory. \begin{lstlisting} -~ % tar -xzvf andi-0.11.tar.gz -~ % cd andi-0.11 +~ % tar -xzvf andi-0.12.tar.gz +~ % cd andi-0.12 \end{lstlisting} \noindent Now build and install \andi. \begin{lstlisting} -~/andi-0.11 % ./configure -~/andi-0.11 % make -~/andi-0.11 % sudo make install +~/andi-0.12 % ./configure +~/andi-0.12 % make +~/andi-0.12 % sudo make install \end{lstlisting} -\noindent This installs \andi for all users on your system. If you do not have root privileges, you will find a working copy of \andi in the \lstinline$src$ subdirectory. For the rest of this documentation, I will assume, that \andi is in your \textdollar\lstinline!PATH!. +\noindent This installs \andi for all users on your system. If you do not have root privileges, you will find a working copy of \andi in the \lstinline$src$ subdirectory. For the rest of this documentation, it is assumed, that \andi is in your \textdollar\lstinline!PATH!. Now \andi should be ready for use. Try invoking the help. \begin{lstlisting} -~/andi-0.11 % andi --help -Usage: andi [-jlv] [-b INT] [-p FLOAT] [-m MODEL] [-t INT] FILES... - FILES... can be any sequence of FASTA files. If no files are supplied, stdin is used instead. +~/andi-0.12 % Usage: andi [OPTIONS...] FILES... + FILES... can be any sequence of FASTA files. + Use '-' as file name to read from stdin. Options: - -b, --bootstrap Print additional bootstrap matrices - --file-of-filenames Read additional filenames from FILE; one per line + -b, --bootstrap=INT Print additional bootstrap matrices + --file-of-filenames=FILE Read additional filenames from FILE; one per line -j, --join Treat all sequences from one file as a single genome -l, --low-memory Use less memory at the cost of speed - -m, --model Pick an evolutionary model; default: JC - -p Significance of an anchor; default: 0.025 - -t, --threads Set the number of threads; by default, all available processors are used + -m, --model=MODEL Pick an evolutionary model of 'Raw', 'JC', 'Kimura'; default: JC + -p FLOAT Significance of an anchor; default: 0.025 + --progress=WHEN Print a progress bar 'always', 'never', or 'auto'; default: auto + -t, --threads=INT Set the number of threads; by default, all processors are used --truncate-names Truncate names to ten characters -v, --verbose Prints additional information -h, --help Display this help and exit @@ -230,9 +231,9 @@ If not enough file names are provided, \andi will try to read sequences from the standard input stream. This behaviour can be explicitly triggered by passing a single dash (\lstinline$-$) as a file name, which is useful in pipelines. -If \andi seems to take unusually long, or requires huge amounts of memory, then you might have forgotten the \algo{join} switch. This makes \andi compare each contig instead of each genome, resulting in many more comparisons! To make \andi output the number of genome it about to compare, use the \lstinline$--verbose$ switch. +If \andi seems to take unusually long, or requires huge amounts of memory, then you might have forgotten the \algo{join} switch. This makes \andi compare each contig instead of each genome, resulting in many more comparisons! Since version 0.12 \andi produces a progressmeter on the standard error stream. \andi tries to be smart about when to show or hide the progress bar. You can manually change this behaviour using the \lstinline!--progress! option. -Starting with version 0.11 \andi supports an extra way of input. Instead of passing file names directly to \andi via the commandline arguments, the files may also be read from a file itself. Using this new \lstinline$--file-of-filenames$ can work around limitations imposed be the shell. +Starting with version 0.11 \andi supports an extra way of input. Instead of passing file names directly to \andi via the commandline arguments, the file names may also be read from a file itself. Using this new \lstinline$--file-of-filenames$ argument can work around limitations imposed be the shell. The following three snippets have the same functionality. @@ -267,7 +268,7 @@ \section{Options} \label{sec:options} -\andi takes a small number of commandline options, of which even fewer are of interest on a day-to-day basis. If \lstinline$andi -h$ displays a \lstinline$-t$ option, then \andi was compiled with multi-threading support (implemented using \algo{OpenMP}). By default \andi uses all available processors. However, to restrict the number of threads, use \lstinline$-t$. +\andi takes a small number of commandline options, of which even fewer are of interest on a day-to-day basis. If \lstinline$andi -h$ displays a \lstinline$-t$ option, then \andi was compiled with multi-threading support (implemented using \algo{OpenMP}). By default, \andi uses all available processors. However, to restrict the number of threads, use \lstinline$-t$. \begin{lstlisting} ~ % time andi ../test/1M.1.fasta -t 1 @@ -298,13 +299,13 @@ S2 0.1071 0.0000 \end{lstlisting} -The original \algo{phylip} only supports distance matrices with names no longer than ten characters. However, this sometimes leads to problems with long accession numbers. Starting with version 0.11 \andi print the full name of a sequence, even if it is longer than ten characters. If your downstream tools have trouble with this, use \lstinline$--truncate-names$ to reimpose the limit. +The original \algo{phylip} only supports distance matrices with names no longer than ten characters. However, this sometimes leads to problems with long accession numbers. Starting with version 0.11 \andi prints the full name of a sequence, even if it is longer than ten characters. If your downstream tools have trouble with this, use \lstinline$--truncate-names$ to reimpose the limit. Also new in version 0.11 is the \lstinline$--file-of-filenames$ option. See Section~\ref{sec:join} for details. \section{Example: \algo{eco29}} -Here follows a real-world example of how to use \algo{andi}. It makes heavy use of the commandline and tools like \algo{Phylip}. If you prefer \algo{R}, check out this excellent \href{http://holtlab.net/2015/05/08/r-code-to-infer-tree-from-andi-output/}{blog post} by Kathryn Holt. +Here follows a real-world example of how to use \algo{andi}. It makes heavy use of the commandline and tools like \algo{Phylip}. If you prefer \algo{R}, check out this excellent blog post by Kathryn Holt.\footnote{\url{http://holtlab.net/2015/05/08/r-code-to-infer-tree-from-andi-output/}} As a data set we use \algo{eco29}; 29 genomes of \textit{E. Coli} and \textit{Shigella}. You can download the data from here: {\small{\url{http://guanine.evolbio.mpg.de/andi/eco29.fasta.gz}}}. The genomes have an average length of 4.9~million nucleotides amounting to a total \SI{138}{\mega\byte}. @@ -414,11 +415,11 @@ \section{Output-related Warnings} -As the input sequences get more evolutionary divergent, \andi finds less anchors. With less anchors, less nucleotides are considered homologous between two sequences. If no anchors are found, comparison fails and \lstinline!nan! is printed instead. See our paper and especially Figure~2 for details. +As the input sequences get more evolutionary divergent, \andi finds less homologous anchors. With less anchors, less nucleotides are considered homologous between two sequences. If no anchors are found, comparison fails and \lstinline!nan! is printed instead. See our paper and especially Figure~2 for details. \subsection*{NaN} -No anchors were found. Your sequences are very divergent ($d>0.5$) or sprout a lot of indels that make comparison difficult. +No homologous sections were found. Your sequences are very divergent ($d>0.5$) or sprout a lot of indels that make comparison difficult. \subsection*{Little Homology} @@ -469,7 +470,7 @@ ~/andi % make check \end{lstlisting} -\noindent The unit tests are also checked each time a commit is sent to the repository. This is done via \algo{TravisCI}.\footnote{\url{https://travis-ci.org/EvolBioInf/andi}} Thus, a warning is produced, when the builds fail, or the unit tests to not run successfully. Currently, the unit tests cover more than 75\% of the code. This is computed via the \algo{Travis} builds and a service called \algo{Coveralls}.\footnote{\url{https://coveralls.io/r/EvolBioInf/andi}} Unfortunately, coveralls is broken at this point in time. +\noindent The unit tests are also checked each time a commit is sent to the repository. This is done via \algo{TravisCI}.\footnote{\url{https://travis-ci.org/EvolBioInf/andi}} Thus, a warning is produced, when the builds fail, or the unit tests did not run successfully. Currently, the unit tests cover more than 75\% of the code. This is computed via the \algo{Travis} builds and a service called \algo{Coveralls}.\footnote{\url{https://coveralls.io/r/EvolBioInf/andi}} \section{Known Issues} diff -Nru andi-0.11/.gitignore andi-0.12/.gitignore --- andi-0.11/.gitignore 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/.gitignore 2018-02-26 12:10:19.000000000 +0000 @@ -58,6 +58,7 @@ test_esa test_seq test_fasta +test_process *.trs # Coverage diff -Nru andi-0.11/Makefile.am andi-0.12/Makefile.am --- andi-0.11/Makefile.am 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/Makefile.am 2018-02-26 12:10:19.000000000 +0000 @@ -18,7 +18,8 @@ AM_TESTS_ENVIRONMENT= \ RANDOM_SEED='@SEED@' ; export RANDOM_SEED ; -TESTS = test/test_esa test/test_seq test/test_extra.sh test/test_random.sh test/test_join.sh +XFAIL_TESTS= +TESTS = $(XFAIL_TESTS) test/nan.sh test/low_homo.sh test/test_esa test/test_seq test/test_extra.sh test/test_random.sh test/test_join.sh test/test_process $(TESTS): src/andi diff -Nru andi-0.11/scripts/_andi andi-0.12/scripts/_andi --- andi-0.11/scripts/_andi 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/scripts/_andi 2018-02-26 12:10:19.000000000 +0000 @@ -27,6 +27,7 @@ Kimura\:Kimura\-two\-parameter ))' "($info)-p+[Significance of an anchor; default\: 0.025]:float:" + "($info)--progress=[Show progress bar]:when:(always auto never)" "($info -t --threads)"{-t+,--threads=}'[The number of threads to be used; by default, all available processors are used]:num_threads:' "($info)--truncate-names[Print only the first ten characters of each name]" "($info)*"{-v,--verbose}'[Prints additional information]' diff -Nru andi-0.11/src/andi.c andi-0.12/src/andi.c --- andi-0.11/src/andi.c 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/src/andi.c 2018-02-26 12:10:19.000000000 +0000 @@ -45,10 +45,9 @@ int FLAGS = 0; int THREADS = 1; long unsigned int BOOTSTRAP = 0; -double RANDOM_ANCHOR_PROP = 0.025; +double ANCHOR_P_VALUE = 0.025; gsl_rng *RNG = NULL; int MODEL = M_JC; -int EXIT_CODE = EXIT_SUCCESS; void usage(int); void version(void); @@ -66,6 +65,7 @@ {"version", no_argument, NULL, 0}, {"truncate-names", no_argument, NULL, 0}, {"file-of-filenames", required_argument, NULL, 0}, + {"progress", optional_argument, NULL, 0}, {"help", no_argument, NULL, 'h'}, {"verbose", no_argument, NULL, 'v'}, {"join", no_argument, NULL, 'j'}, @@ -79,6 +79,9 @@ // Use all available processors by default. THREADS = omp_get_num_procs(); #endif + + enum { P_AUTO, P_NEVER, P_ALWAYS } progress = P_AUTO; + struct string_vector file_names; string_vector_init(&file_names); @@ -105,6 +108,19 @@ if (strcasecmp(option_str, "file-of-filenames") == 0) { read_into_string_vector(optarg, &file_names); } + if (strcasecmp(option_str, "progress") == 0) { + if (!optarg || strcasecmp(optarg, "always") == 0) { + progress = P_ALWAYS; + } else if (strcasecmp(optarg, "auto") == 0) { + progress = P_AUTO; + } else if (strcasecmp(optarg, "never") == 0) { + progress = P_NEVER; + } else { + warnx("invalid argument to --progress '%s'. Expected " + "one of 'auto', 'always', or 'never'.", + optarg); + } + } break; } case 'h': usage(EXIT_SUCCESS); break; @@ -117,21 +133,21 @@ double prop = strtod(optarg, &end); if (errno || end == optarg || *end != '\0') { - warnx( + soft_errx( "Expected a floating point number for -p argument, but " "'%s' was given. Skipping argument.", optarg); break; } - if (prop < 0.0 || prop > 1.0) { - warnx("A probability should be a value between 0 and 1; " - "Ignoring -p %f argument.", - prop); + if (prop <= 0.0 || prop >= 1.0) { + soft_errx("A probability should be a value between 0 and " + "1, exlusive; Ignoring -p %f argument.", + prop); break; } - RANDOM_ANCHOR_PROP = prop; + ANCHOR_P_VALUE = prop; break; } case 'l': FLAGS |= F_LOW_MEMORY; break; @@ -172,7 +188,7 @@ long unsigned int bootstrap = strtoul(optarg, &end, 10); if (errno || end == optarg || *end != '\0' || bootstrap == 0) { - warnx( + soft_errx( "Expected a positive number for -b argument, but '%s' " "was given. Ignoring -b argument.", optarg); @@ -191,8 +207,9 @@ } else if (strcasecmp(optarg, "KIMURA") == 0) { MODEL = M_KIMURA; } else { - warnx("Ignoring argument for --model. Expected Raw, JC or " - "Kimura"); + soft_errx( + "Ignoring argument for --model. Expected Raw, JC or " + "Kimura"); } break; } @@ -217,11 +234,13 @@ size_t minfiles = FLAGS & F_JOIN ? 2 : 1; if (string_vector_size(&file_names) < minfiles) { - // not enough files passed via arguments; read from stdin. - string_vector_push_back(&file_names, "-"); - // explain to the user, why nothing is happening. - if (isatty(STDIN_FILENO)) { - warnx("Not enough file names given; expecting input via stdin."); + // not enough files passed via arguments + if (!isatty(STDIN_FILENO)) { + // read from stdin in pipe + string_vector_push_back(&file_names, "-"); + } else { + // print a helpful message on './andi' without args + usage(EXIT_FAILURE); } } @@ -248,11 +267,6 @@ n); } - if (FLAGS & F_VERBOSE) { - fprintf(stderr, "Comparing %zu sequences\n", n); - fflush(stderr); - } - RNG = gsl_rng_alloc(gsl_rng_default); if (!RNG) { err(1, "RNG allocation failed."); @@ -293,20 +307,27 @@ } if (FLAGS & F_SHORT) { - warnx("One of the given input sequences is shorter than a thousand " - "nucleotides. This may result in inaccurate distances. Try an " - "alignment instead."); + soft_errx( + "One of the given input sequences is shorter than a thousand " + "nucleotides. This may result in inaccurate distances. Try an " + "alignment instead."); } - // side channel - EXIT_CODE = EXIT_SUCCESS; + // determine whether to print a progress bar + if (progress == P_AUTO) { + progress = isatty(STDERR_FILENO) ? P_ALWAYS : P_NEVER; + } + if (progress == P_ALWAYS) { + FLAGS |= F_PRINT_PROGRESS; + } // compute distance matrix calculate_distances(dsa_data(&dsa), n); dsa_free(&dsa); gsl_rng_free(RNG); - return EXIT_CODE; + + return FLAGS & F_SOFT_ERROR ? EXIT_FAILURE : EXIT_SUCCESS; } /** @@ -314,22 +335,25 @@ */ void usage(int status) { const char str[] = { - "Usage: andi [-jlv] [-b INT] [-p FLOAT] [-m MODEL] [-t INT] FILES...\n" - "\tFILES... can be any sequence of FASTA files. If no files are " - "supplied, stdin is used instead.\n" + "Usage: andi [OPTIONS...] FILES...\n" + "\tFILES... can be any sequence of FASTA files.\n" + "\tUse '-' as file name to read from stdin.\n" "Options:\n" - " -b, --bootstrap Print additional bootstrap matrices\n" - " --file-of-filenames Read additional filenames from " + " -b, --bootstrap=INT Print additional bootstrap matrices\n" + " --file-of-filenames=FILE Read additional filenames from " "FILE; one per line\n" " -j, --join Treat all sequences from one file as a single " "genome\n" " -l, --low-memory Use less memory at the cost of speed\n" - " -m, --model Pick an evolutionary model; default: " + " -m, --model=MODEL Pick an evolutionary model of 'Raw', 'JC', " + "'Kimura'; default: " "JC\n" - " -p Significance of an anchor; default: 0.025\n" + " -p FLOAT Significance of an anchor; default: 0.025\n" + " --progress=WHEN Print a progress bar 'always', 'never', or " + "'auto'; default: auto\n" #ifdef _OPENMP - " -t, --threads Set the number of threads; by default, all " - "available processors are used\n" + " -t, --threads=INT Set the number of threads; by default, all " + "processors are used\n" #endif " --truncate-names Truncate names to ten characters\n" " -v, --verbose Prints additional information\n" diff -Nru andi-0.11/src/dist_hack.h andi-0.12/src/dist_hack.h --- andi-0.11/src/dist_hack.h 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/src/dist_hack.h 2018-02-26 12:10:19.000000000 +0000 @@ -2,9 +2,10 @@ * @brief This file is a preprocessor hack for the two functions `distMatrix` * and `distMatrixLM`. */ +// clang-format off #ifdef FAST #define NAME distMatrix -#define P_OUTER _Pragma("omp parallel for num_threads( THREADS)") +#define P_OUTER _Pragma("omp parallel for num_threads( THREADS) default(none) shared(progress_counter) firstprivate( stderr, M, sequences, n, print_progress)") #define P_INNER #else #undef NAME @@ -12,8 +13,9 @@ #undef P_INNER #define NAME distMatrixLM #define P_OUTER -#define P_INNER _Pragma("omp parallel for num_threads( THREADS)") +#define P_INNER _Pragma("omp parallel for num_threads( THREADS) default(none) shared(progress_counter) firstprivate( stderr, M, sequences, n, print_progress, i, E, subject)") #endif +// clang-format on /** @brief This function calls dist_andi for pairs of subjects and queries, and * thereby fills the distance matrix. @@ -32,6 +34,14 @@ void NAME(struct model *M, const seq_t *sequences, size_t n) { size_t i; + size_t progress_counter = 0; + int print_progress = FLAGS & F_PRINT_PROGRESS; + + if (print_progress) { + fprintf(stderr, "Comparing %zu sequences: %5.1f%% (%zu/%zu)", n, 0.0, + (size_t)0, n * n - n); + } + //#pragma P_OUTER for (i = 0; i < n; i++) { @@ -56,15 +66,31 @@ size_t ql = sequences[j].len; M(i, j) = dist_anchor(&E, sequences[j].S, ql, subject.gc); + +#pragma omp atomic update + progress_counter++; } - // TODO: Provide a nicer progress indicator. - if (FLAGS & F_EXTRA_VERBOSE) { + if (print_progress) { + size_t local_progress_counter; + size_t num_comparisons = n * n - n; + +#pragma omp atomic read + local_progress_counter = progress_counter; + + double progress = + 100.0 * (double)local_progress_counter / num_comparisons; + #pragma omp critical - fprintf(stderr, "Subject %s done.\n", sequences[i].name); + fprintf(stderr, "\rComparing %zu sequences: %5.1f%% (%zu/%zu)", n, + progress, local_progress_counter, num_comparisons); } esa_free(&E); seq_subject_free(&subject); } + + if (print_progress) { + fprintf(stderr, ", done.\n"); + } } diff -Nru andi-0.11/src/esa.c andi-0.12/src/esa.c --- andi-0.11/src/esa.c 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/src/esa.c 2018-02-26 12:10:19.000000000 +0000 @@ -313,7 +313,9 @@ const saidx_t *LCP = C->LCP; - typedef struct pair_s { saidx_t idx, lcp; } pair_t; + typedef struct pair_s { + saidx_t idx, lcp; + } pair_t; pair_t *stack = malloc((C->len + 1) * sizeof(*stack)); CHECK_MALLOC(stack); diff -Nru andi-0.11/src/global.h andi-0.12/src/global.h --- andi-0.11/src/global.h 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/src/global.h 2018-02-26 12:10:19.000000000 +0000 @@ -26,11 +26,11 @@ extern int THREADS; /** - * The ::RANDOM_ANCHOR_PROP represents the probability with which a found - * anchor is a random match and not homologous. Its value can be set using - * the `-p` switch. + * The ::ANCHOR_P_VALUE represents the probability that an anchor will be found, + * if the two sequences are unrelated. I.e. it is the p-value for H_0: random + * sequences. Its value can be set using the `-p` switch. */ -extern double RANDOM_ANCHOR_PROP; +extern double ANCHOR_P_VALUE; /** * The number of matrices that should be bootstrapped. @@ -50,11 +50,6 @@ enum { M_RAW, M_JC, M_KIMURA }; /** - * Global exit code. Should be non-zero on error. - */ -extern int EXIT_CODE; - -/** * This enum contains the available flags. Please note that all * available options are a power of 2. */ @@ -66,7 +61,9 @@ F_NON_ACGT = 8, F_JOIN = 16, F_LOW_MEMORY = 32, - F_SHORT = 64 + F_SHORT = 64, + F_PRINT_PROGRESS = 128, + F_SOFT_ERROR = 256 }; /** @@ -79,6 +76,26 @@ if (PTR == NULL) { \ err(errno, "Out of memory"); \ } \ - } while (0); + } while (0) + +/** + * @brief This macro is used to print a warning and make the program exit with + * an failure exit code, later. + */ +#define soft_err(...) \ + do { \ + FLAGS |= F_SOFT_ERROR; \ + warn(__VA_ARGS__); \ + } while (0) + +/** + * @brief This macro is used to print a warning and make the program exit with + * an failure exit code, later. + */ +#define soft_errx(...) \ + do { \ + FLAGS |= F_SOFT_ERROR; \ + warnx(__VA_ARGS__); \ + } while (0) #endif diff -Nru andi-0.11/src/io.c andi-0.12/src/io.c --- andi-0.11/src/io.c 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/src/io.c 2018-02-26 12:10:19.000000000 +0000 @@ -103,7 +103,8 @@ void read_into_string_vector(const char *file_name, struct string_vector *sv) { FILE *file = strcmp(file_name, "-") ? fopen(file_name, "r") : stdin; if (!file) { - err(errno, "%s", file_name); + soft_err("%s", file_name); + return; } while (1) { @@ -118,7 +119,8 @@ } if (check == -1) { - err(errno, "%s", file_name); + soft_err("%s", file_name); + break; } char *nl = strchr(str, '\n'); @@ -137,7 +139,7 @@ int check = fclose(file); if (check != 0) { - err(errno, "%s", file_name); + soft_err("%s", file_name); } } @@ -198,7 +200,7 @@ strcmp(file_name, "-") ? open(file_name, O_RDONLY) : STDIN_FILENO; if (file_descriptor < 0) { - warn("%s", file_name); + soft_err("%s", file_name); return; } @@ -209,7 +211,7 @@ pfasta_file pf; if ((l = pfasta_parse(&pf, file_descriptor)) != 0) { - warnx("%s: %s", file_name, pfasta_strerror(&pf)); + soft_errx("%s: %s", file_name, pfasta_strerror(&pf)); goto fail; } @@ -225,7 +227,7 @@ } if (l < 0) { - warnx("%s: %s", file_name, pfasta_strerror(&pf)); + soft_errx("%s: %s", file_name, pfasta_strerror(&pf)); pfasta_seq_free(&ps); } @@ -276,23 +278,30 @@ } double dist = DD(i, j) = i == j ? 0.0 : estimate(&datum); - double coverage = model_coverage(&datum); + + if (dist > 0 && dist < 0.001) { + use_scientific = 1; + } if (isnan(dist) && warnings) { - EXIT_CODE = EXIT_FAILURE; const char str[] = { "For the two sequences '%s' and '%s' the distance " "computation failed and is reported as nan. " "Please refer to the documentation for further details."}; - warnx(str, sequences[i].name, sequences[j].name); - } else if (dist > 0 && dist < 0.001) { - use_scientific = 1; - } else if (i < j && coverage < 0.05 && warnings) { - const char str[] = { - "For the two sequences '%s' and '%s' less than 5%% " - "homology were found (%f and %f, respectively)."}; - warnx(str, sequences[i].name, sequences[j].name, - model_coverage(&D(i, j)), model_coverage(&D(j, i))); + soft_errx(str, sequences[i].name, sequences[j].name); + } + + if (!isnan(dist) && i < j && warnings) { + double coverage1 = model_coverage(&D(i, j)); + double coverage2 = model_coverage(&D(j, i)); + + if (coverage1 < 0.05 || coverage2 < 0.05) { + const char str[] = { + "For the two sequences '%s' and '%s' less than 5%% " + "homology were found (%f and %f, respectively)."}; + soft_errx(str, sequences[i].name, sequences[j].name, + coverage1, coverage2); + } } } } diff -Nru andi-0.11/src/model.c andi-0.12/src/model.c --- andi-0.11/src/model.c 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/src/model.c 2018-02-26 12:10:19.000000000 +0000 @@ -202,6 +202,26 @@ MM->counts[TtoT] += local_counts[2]; } +/** @brief Convert a nucleotide to a 2bit representation. + * + * We want to map characters: + * A → 0 + * C → 1 + * G → 2 + * T → 3 + * The trick used below is that the three lower bits of the + * characters are unique. Thus, they can be used to compute the mapping + * above. The mapping itself is done via tricky bitwise operations. + * + * @param c - input nucleotide + * @returns 2bit representation. + */ +char nucl2bit(char c) { + c &= 6; + c ^= c >> 1; + return c >> 1; +} + /** * @brief Count the substitutions and add them to the mutation matrix. * @@ -213,7 +233,7 @@ void model_count(model *MM, const char *S, const char *Q, size_t len) { size_t local_counts[MUTCOUNTS] = {0}; - for (; len--; S++, Q++) { + for (size_t i = 0; i < len; S++, Q++, i++) { char s = *S; char q = *Q; @@ -222,24 +242,9 @@ continue; } - /* We want to map characters: - * A → 0 - * C → 1 - * G → 2 - * T → 3 - * The trick used below is that the three lower bits of the - * characters are unique. Thus, they can be used to compute the mapping - * above. The mapping itself is done via tricky bitwise operations. - */ - - unsigned char nibble_s = s & 7; - unsigned char nibble_q = q & 7; - - static const unsigned int mm1 = 0x20031000; - // Pick the correct two bits representing s and q. - unsigned char foo = (mm1 >> (4 * nibble_s)) & 0x3; - unsigned char baz = (mm1 >> (4 * nibble_q)) & 0x3; + unsigned char foo = nucl2bit(s); + unsigned char baz = nucl2bit(q); /* * The mutation matrix is symmetric. For convenience we define the diff -Nru andi-0.11/src/process.c andi-0.12/src/process.c --- andi-0.11/src/process.c 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/src/process.c 2018-02-26 12:10:19.000000000 +0000 @@ -21,7 +21,7 @@ #include #endif -double shuprop(size_t x, double g, size_t l); +double shustring_cum_prob(size_t x, double g, size_t l); int calculate_bootstrap(const struct model *M, const seq_t *sequences, size_t n); @@ -31,18 +31,17 @@ * Given some parameters calculate the minimum length for anchors according * to the distribution from Haubold et al. (2009). * - * @param p - The probability with which an anchor is allowed to be random. + * @param p - The probability with which an anchor will be created under a + * random model. * @param g - The the relative amount of GC in the subject. - * @param l - The length of the subject. + * @param l - The length of the subject (includes revcomp). * @returns The minimum length of an anchor. */ size_t min_anchor_length(double p, double g, size_t l) { size_t x = 1; - double prop = 0.0; - // Find smallest x with P(X > x) < p - for (; prop < 1 - p; x++) { - prop = shuprop(x, g / 2, l); + while (shustring_cum_prob(x, g / 2, l) < 1 - p) { + x++; } return x; @@ -82,18 +81,20 @@ /** * @brief Given `x` this function calculates the probability of a shustring - * with a length less than `x`. + * with a length less or equal to `x` under a random model. This means, it is + * the cumulative probability. * * Let X be the longest shortest unique substring (shustring) at any position. * Then this function computes P{X <= x} with respect to the given parameter - * set. See Haubold et al. (2009). + * set. See Haubold et al. (2009). Note that `x` includes the final mismatch. + * Thus, `x` is `match length + 1`. * * @param x - The maximum length of a shustring. * @param p - The half of the relative amount of GC in the DNA. * @param l - The length of the subject. * @returns The probability of a certain shustring length. */ -double shuprop(size_t x, double p, size_t l) { +double shustring_cum_prob(size_t x, double p, size_t l) { double xx = (double)x; double ll = (double)l; size_t k; @@ -115,6 +116,106 @@ return s; } +typedef _Bool bool; +#define false 0 +#define true !false + +/** + * @brief This structure captures properties of an anchor. + */ +struct anchor { + /** The position on the subject. */ + size_t pos_S; + /** The position on the query. */ + size_t pos_Q; + /** The length of the exact match. */ + size_t length; +}; + +/** + * @brief This is a structure of assorted variables needed for anchor finding. + */ +struct context { + const esa_s *C; + const char *query; + size_t query_length; + size_t threshold; +}; + +/** + * @brief Compute the length of the longest common prefix of two strings. + * + * @param S - One string. + * @param Q - Another string. + * @param remaining - The length of one of the strings. + * @returns the length of the lcp. + */ +static inline size_t lcp(const char *S, const char *Q, size_t remaining) { + size_t length = 0; + while (length < remaining && S[length] == Q[length]) { + length++; + } + return length; +} + +/** + * @brief Check whether the last anchor can be extended by a lucky anchor. + * + * Anchors are defined to be unique and of a minimum length. The uniqueness + * requires us to search throw the suffix array for a second appearance of the + * anchor. However, if a left anchor is already unique, we could be sloppy and + * drop the uniqueness criterion for the second anchor. This way we can skip the + * lookup and just compare characters directly. However, for a lucky anchor the + * match still has to be longer than the threshold. + * + * @param ctx - Matching context of various variables. + * @param last_match - The last anchor. + * @param this_match - Input/Output variable for the current match. + * @returns true iff the current match is a lucky anchor. + */ +static inline bool lucky_anchor(const struct context *ctx, + const struct anchor *last_match, + struct anchor *this_match) { + + size_t advance = this_match->pos_Q - last_match->pos_Q; + size_t gap = this_match->pos_Q - last_match->pos_Q - last_match->length; + + size_t try_pos_S = last_match->pos_S + advance; + if (try_pos_S >= (size_t)ctx->C->len || gap > ctx->threshold) { + return false; + } + + this_match->pos_S = try_pos_S; + this_match->length = + lcp(ctx->query + this_match->pos_Q, ctx->C->S + try_pos_S, + ctx->query_length - this_match->pos_Q); + + return this_match->length >= ctx->threshold; +} + +/** + * @brief Check for a new anchor. + * + * Given the current context and starting position check if the new match is an + * anchor. The latter requires uniqueness and a certain minimum length. + * + * @param ctx - Matching context of various variables. + * @param last_match - (unused) + * @param this_match - Input/Output variable for the current match. + * @returns true iff an anchor was found. + */ +static inline bool anchor(const struct context *ctx, + const struct anchor *last_match, + struct anchor *this_match) { + + lcp_inter_t inter = get_match_cached(ctx->C, ctx->query + this_match->pos_Q, + ctx->query_length - this_match->pos_Q); + + this_match->pos_S = ctx->C->SA[inter.i]; + this_match->length = inter.l <= 0 ? 0 : inter.l; + return inter.i == inter.j && this_match->length >= ctx->threshold; +} + /** * @brief Divergence estimation using the anchor technique. * @@ -135,83 +236,72 @@ double gc) { struct model ret = {.seq_len = query_length, .counts = {0}}; - lcp_inter_t inter; + struct anchor this_match = {0}; + struct anchor last_match = {0}; + bool last_was_right_anchor = false; - size_t last_pos_Q = 0; - size_t last_pos_S = 0; - size_t last_length = 0; - // This variable indicates that the last anchor was the right anchor of a - // pair. - size_t last_was_right_anchor = 0; - - size_t this_pos_Q = 0; - size_t this_pos_S; - size_t this_length; + size_t threshold = min_anchor_length(ANCHOR_P_VALUE, gc, C->len); - size_t num_right_anchors = 0; - - size_t threshold = min_anchor_length(RANDOM_ANCHOR_PROP, gc, C->len); + struct context ctx = {C, query, query_length, threshold}; // Iterate over the complete query. - while (this_pos_Q < query_length) { - inter = - get_match_cached(C, query + this_pos_Q, query_length - this_pos_Q); - - this_length = inter.l <= 0 ? 0 : inter.l; + while (this_match.pos_Q < query_length) { - if (inter.i == inter.j && this_length >= threshold) { + // Check for lucky anchors and fall back to normal strategy. + if (lucky_anchor(&ctx, &last_match, &this_match) || + anchor(&ctx, &last_match, &this_match)) { // We have reached a new anchor. - this_pos_S = C->SA[inter.i]; + size_t end_S = last_match.pos_S + last_match.length; + size_t end_Q = last_match.pos_Q + last_match.length; // Check if this can be a right anchor to the last one. - if (this_pos_S > last_pos_S && - this_pos_Q - last_pos_Q == this_pos_S - last_pos_S) { - num_right_anchors++; + if (this_match.pos_S > end_S && + this_match.pos_Q - end_Q == this_match.pos_S - end_S) { - // classify nucleotides in the qanchor - model_count_equal(&ret, query + last_pos_Q, last_length); + // classify nucleotides in the left qanchor + model_count_equal(&ret, query + last_match.pos_Q, + last_match.length); // Count the SNPs in between. - model_count(&ret, C->S + last_pos_S + last_length, - query + last_pos_Q + last_length, - this_pos_Q - last_pos_Q - last_length); - last_was_right_anchor = 1; + model_count(&ret, C->S + end_S, query + end_Q, + this_match.pos_Q - end_Q); + last_was_right_anchor = true; } else { if (last_was_right_anchor) { // If the last was a right anchor, but with the current one, // we cannot extend, then add its length. - model_count_equal(&ret, query + last_pos_Q, last_length); - } else if (last_length >= threshold * 2) { + model_count_equal(&ret, query + last_match.pos_Q, + last_match.length); + } else if (last_match.length >= threshold * 2) { // The last anchor wasn't neither a left or right anchor. // But, it was as long as an anchor pair. So still count it. - model_count_equal(&ret, query + last_pos_Q, last_length); + model_count_equal(&ret, query + last_match.pos_Q, + last_match.length); } - last_was_right_anchor = 0; + last_was_right_anchor = false; } // Cache values for later - last_pos_Q = this_pos_Q; - last_pos_S = this_pos_S; - last_length = this_length; + last_match = this_match; } // Advance - this_pos_Q += this_length + 1; + this_match.pos_Q += this_match.length + 1; } // Very special case: The sequences are identical - if (last_length >= query_length) { - model_count(&ret, C->S + last_pos_S, query, query_length); + if (last_match.length >= query_length) { + model_count_equal(&ret, query, query_length); return ret; } // We might miss a few nucleotides if the last anchor was also a right // anchor. The logic is the same as a few lines above. if (last_was_right_anchor) { - model_count(&ret, C->S + last_pos_S, query + last_pos_Q, last_length); - } else if (last_length >= threshold * 2) { - model_count_equal(&ret, query + last_pos_Q, last_length); + model_count_equal(&ret, query + last_match.pos_Q, last_match.length); + } else if (last_match.length >= threshold * 2) { + model_count_equal(&ret, query + last_match.pos_Q, last_match.length); } return ret; @@ -266,7 +356,7 @@ if (BOOTSTRAP) { int res = calculate_bootstrap(M, sequences, n); if (res) { - warnx("Bootstrapping failed."); + soft_errx("Bootstrapping failed."); } } diff -Nru andi-0.11/test/low_homo.sh andi-0.12/test/low_homo.sh --- andi-0.11/test/low_homo.sh 1970-01-01 00:00:00.000000000 +0000 +++ andi-0.12/test/low_homo.sh 2018-02-26 12:10:19.000000000 +0000 @@ -0,0 +1,20 @@ +#!/bin/bash -f + +./test/test_fasta -l 100000 > a.fa +./test/test_fasta -l 100000 > b.fa +./test/test_fasta -l 100 > both.fa + +cat both.fa a.fa | awk -vRS='>' '{if($1 == "S0")print ">"$0 > "S0.fa"}' +cat both.fa b.fa | awk -vRS='>' '{if($1 == "S1")print ">"$0 > "S1.fa"}' + +# this is expected to trigger the low homology warning +./src/andi -j S0.fa S1.fa 2>&1 | grep 'homology' +EXIT_VAL=$? + +if [[ EXIT_VAL -ge 1 ]]; then + echo "Triggering low homology failed" >&2 + grep '^>' a.fa b.fa both.fa +fi + +rm -f a.fa b.fa both.fa S0.fa S1.fa +exit $EXIT_VAL diff -Nru andi-0.11/test/Makefile.am andi-0.12/test/Makefile.am --- andi-0.11/test/Makefile.am 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/test/Makefile.am 2018-02-26 12:10:19.000000000 +0000 @@ -1,5 +1,5 @@ -check_PROGRAMS = test_esa test_seq test_fasta -dist_noinst_DATA = test_extra.sh test_random.sh test_join.sh +check_PROGRAMS = test_esa test_seq test_fasta test_process +dist_noinst_DATA = test_extra.sh test_random.sh test_join.sh nan.sh low_homo.sh if !BUILD_WITH_LIBDIVSUFSORT PSUFSORT=$(top_builddir)/opt/psufsort/libpsufsort.a @@ -7,12 +7,19 @@ DUMMY=dummy.cxx endif -test_seq_SOURCES = test_seq.c ../src/sequence.c +test_seq_SOURCES = test_seq.c $(top_srcdir)/src/sequence.c test_seq_CPPFLAGS = -I$(top_srcdir)/src -I$(top_srcdir)/opt -DDEBUG -std=gnu99 test_seq_CFLAGS = -Wall -Wextra $(GLIB_CFLAGS) -Wno-missing-field-initializers test_seq_LDADD = $(GLIB_LIBS) $(top_builddir)/opt/libcompat.a -test_esa_SOURCES = test_esa.c ../src/esa.c ../src/sequence.c $(top_srcdir)/src/esa.h +test_process_SOURCES = test_process.c $(top_srcdir)/src/esa.c $(top_srcdir)/src/io.c $(top_srcdir)/src/model.c $(top_srcdir)/src/process.c $(top_srcdir)/src/sequence.c $(top_srcdir)/src/global.h +test_process_CPPFLAGS = $(OPENMP_CFLAGS) -I$(top_srcdir)/src -I$(top_srcdir)/opt -I$(top_srcdir)/libs -DDEBUG -std=gnu99 +test_process_CFLAGS = $(OPENMP_CFLAGS) -Wall -Wextra $(GLIB_CFLAGS) -Wno-missing-field-initializers +test_process_LDADD = $(GLIB_LIBS) $(PSUFSORT) $(top_builddir)/opt/libcompat.a $(top_builddir)/libs/libpfasta.a +test_process_CXXFLAGS = $(OPENMP_CXXFLAGS) -Wall -Wextra +nodist_EXTRA_test_process_SOURCES = $(DUMMY) + +test_esa_SOURCES = test_esa.c $(top_srcdir)/src/esa.c $(top_srcdir)/src/sequence.c $(top_srcdir)/src/esa.h test_esa_CPPFLAGS = $(OPENMP_CFLAGS) -I$(top_srcdir)/libs -I$(top_srcdir)/opt -I$(top_srcdir)/src -DDEBUG -std=gnu99 test_esa_CFLAGS = $(OPENMP_CFLAGS) -Wall -Wextra $(GLIB_CFLAGS) -Wno-missing-field-initializers test_esa_LDADD = $(GLIB_LIBS) $(PSUFSORT) $(top_builddir)/opt/libcompat.a diff -Nru andi-0.11/test/nan.sh andi-0.12/test/nan.sh --- andi-0.11/test/nan.sh 1970-01-01 00:00:00.000000000 +0000 +++ andi-0.12/test/nan.sh 2018-02-26 12:10:19.000000000 +0000 @@ -0,0 +1,17 @@ +#!/bin/bash -f + +./test/test_fasta -l 10000 > a.fa +./test/test_fasta -l 10000 > b.fa + +# this is expected to trigger the nan warning +./src/andi -j a.fa b.fa 2>&1 | grep 'nan' +EXIT_VAL=$? + + +if [[ EXIT_VAL -ge 1 ]]; then + echo "Triggering nan failed" >&2 + grep '^>' a.fa b.fa both.fa +fi + +rm -f a.fa b.fa +exit $EXIT_VAL diff -Nru andi-0.11/test/test_join.sh andi-0.12/test/test_join.sh --- andi-0.11/test/test_join.sh 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/test/test_join.sh 2018-02-26 12:10:19.000000000 +0000 @@ -16,11 +16,11 @@ RES=$(./src/andi -m RAW -t 1 -j S0.fasta S1.fasta | tail -n 1 | awk '{print ($2 - 0.1)}' | - awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.01}' + awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.03}' ) if test $RES -ne 1; then - echo "The last test computed a distance deviating more than one percent from its intended value." + echo "The last test computed a distance deviating more than three percent from its intended value." echo "See S0.fasta and S1.fasta for the used sequences." exit 1; fi @@ -38,11 +38,11 @@ RES=$(./src/andi -m RAW -t1 -j S0.fasta S1.fasta | tail -n 1 | awk '{print ($2 - 0.1)}' | - awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.01}' + awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.03}' ) if test $RES -ne 1; then - echo "The last test computed a distance deviating more than one percent from its intended value." + echo "The last test computed a distance deviating more than three percent from its intended value." echo "See S0.fasta and S1.fasta for the used sequences." exit 1; fi @@ -62,11 +62,11 @@ RES=$(./src/andi -mRAW -t 1 -j S0.fasta S1.fasta | tail -n 1 | awk '{print ($2 - 0.1)}' | - awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.01}' + awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) < 0.03}' ) if test $RES -ne 1; then - echo "The last test computed a distance deviating more than one percent from its intended value." + echo "The last test computed a distance deviating more than three percent from its intended value." echo "See S0.fasta and S1.fasta for the used sequences." exit 1; fi diff -Nru andi-0.11/test/test_process.c andi-0.12/test/test_process.c --- andi-0.11/test/test_process.c 1970-01-01 00:00:00.000000000 +0000 +++ andi-0.12/test/test_process.c 2018-02-26 12:10:19.000000000 +0000 @@ -0,0 +1,33 @@ +#include "global.h" +#include "process.h" +#include +#include + +int FLAGS = 0; +int THREADS = 1; +long unsigned int BOOTSTRAP = 0; +double ANCHOR_P_VALUE = 0.025; +gsl_rng *RNG = NULL; +int MODEL = M_JC; + +double shustring_cum_prob(size_t x, double g, size_t l); +size_t min_anchor_length(double p, double g, size_t l); + +void test_shustring_cum_prob() { + int len = 100000; + double gc = 0.5; + double p_value = 0.025; + + size_t threshold = min_anchor_length(p_value, gc, len); + + g_assert_cmpfloat(1 - p_value, <, shustring_cum_prob(threshold + 1, gc / 2, len)); + g_assert_cmpfloat(1 - p_value, <=, shustring_cum_prob(threshold, gc / 2, len)); + g_assert_cmpfloat(1 - p_value, >, shustring_cum_prob(threshold - 1, gc / 2, len)); +} + +int main(int argc, char *argv[]) { + g_test_init(&argc, &argv, NULL); + g_test_add_func("/process/shustring_cum_prob", test_shustring_cum_prob); + + return g_test_run(); +} diff -Nru andi-0.11/test/test_random.sh andi-0.12/test/test_random.sh --- andi-0.11/test/test_random.sh 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/test/test_random.sh 2018-02-26 12:10:19.000000000 +0000 @@ -35,9 +35,9 @@ ./src/andi -t 1 | tail -n 1 | awk -v dist=$dist '{print $2, dist}' | - awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) <= 0.02 && abs($1-$2) <= 0.02 * $2}') + awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) <= 0.055 && abs($1-$2) <= 0.055 * $2}') if test $res -ne 1; then - echo "The last test computed a distance deviating more than two percent from its intended value." + echo "The last test computed a distance deviating more than five percent from its intended value." echo "See test_random.fasta for the used sequences." echo "./test/test_fasta -s $SEED -l $LENGTH -d $dist" head -n 1 ./test/test_random.fasta @@ -57,9 +57,9 @@ ./src/andi -m RAW -t 1 | tail -n 1 | awk -v dist=$dist '{print $2, dist}' | - awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) <= 0.02 && abs($1-$2) <= 0.02 * $2}') + awk 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($1-$2) <= 0.055 && abs($1-$2) <= 0.055 * $2}') if test $res -ne 1; then - echo "The last test computed a distance deviating more than two percent from its intended value." + echo "The last test computed a distance deviating more than five percent from its intended value." echo "See test_random.fasta for the used sequences." echo "./test/test_fasta -r -s $SEED -l $LENGTH -d $dist" head -n 1 ./test/test_random.fasta diff -Nru andi-0.11/.travis.yml andi-0.12/.travis.yml --- andi-0.11/.travis.yml 2017-07-12 10:32:09.000000000 +0000 +++ andi-0.12/.travis.yml 2018-02-26 12:10:19.000000000 +0000 @@ -1,7 +1,4 @@ language: cpp -os: - - linux - # - osx compiler: - gcc - clang @@ -13,9 +10,9 @@ - ubuntu-toolchain-r-test packages: - cmake - - g++-4.8 - libglib2.0-dev - libgsl0-dev + install: - export LIBDIVDIR="$HOME/libdivsufsort" - pip install --user cpp-coveralls @@ -24,30 +21,26 @@ - cd libdivsufsort-master && mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE="Release" -DCMAKE_INSTALL_PREFIX="$LIBDIVDIR" .. - make && make install - - if [ "${TRAVIS_OS_NAME}" = "osx" ]; then brew install gsl; fi - - if [ "${TRAVIS_OS_NAME}" = "osx" ]; then brew install glib; fi -before_install: - - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi -# - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 90 script: -- export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$LIBDIVDIR/lib" +- CONFIGURE_FLAGS="" - export LD_LIBRARY_PATH="$LIBDIVDIR:$LIBDIVDIR/lib" - export LIBRARY_PATH="$LIBDIVDIR:$LIBRARY_PATH" - cd $TRAVIS_BUILD_DIR - autoreconf -fvi -Im4 - export MYFLAGS="-fprofile-arcs -ftest-coverage -I$LIBDIVDIR/include" -- ./configure --enable-unit-tests LDFLAGS="-L$LIBDIVDIR/lib" CFLAGS="$MYFLAGS" CXXFLAGS="$MYFLAGS" +- if [ "${CC}" = "clang" ]; then export CONFIGURE_FLAGS="--disable-openmp"; fi +- ./configure $CONFIGURE_FLAGS --enable-unit-tests LDFLAGS="-L$LIBDIVDIR/lib" CFLAGS="$MYFLAGS" CXXFLAGS="$MYFLAGS" - make -- make check +- make check || cat ./test-suite.log || exit 1 - export MYFLAGS="-I$LIBDIVDIR/include" -- ./configure --enable-unit-tests LDFLAGS="-L$LIBDIVDIR/lib" CFLAGS="$MYFLAGS" CXXFLAGS="$MYFLAGS" -- make distcheck DISTCHECK_CONFIGURE_FLAGS="LDFLAGS=\"-L$LIBDIVDIR/lib\" CFLAGS=\"-I$LIBDIVDIR/include\" CXXFLAGS=\"-I$LIBDIVDIR/include\"" +- ./configure $CONFIGURE_FLAGS --enable-unit-tests LDFLAGS="-L$LIBDIVDIR/lib" CFLAGS="$MYFLAGS" CXXFLAGS="$MYFLAGS" +- make distcheck DISTCHECK_CONFIGURE_FLAGS="LDFLAGS=\"-L$LIBDIVDIR/lib\" CFLAGS=\"-I$LIBDIVDIR/include\" CXXFLAGS=\"-I$LIBDIVDIR/include\" $CONFIGURE_FLAGS" - tar xzvf andi-*.tar.gz - cd andi-* -- ./configure --enable-unit-tests --without-libdivsufsort +- ./configure $CONFIGURE_FLAGS --enable-unit-tests --without-libdivsufsort - make -- make check +- make check || cat ./test-suite.log || exit 1 - cd .. after_success: -- if [ "${TRAVIS_OS_NAME}" = "linux" -a "$CXX" = "g++-4.8" ]; then coveralls --exclude libdivsufsort-master -E '^andi-.*' --exclude libs --exclude test --gcov `which gcov-4.8` --gcov-options '\-lp'; fi +- if [ "$CXX" = "g++" ]; then coveralls --exclude libdivsufsort-master -E '^andi-.*' --exclude libs --exclude test --gcov `which gcov-4.8` --gcov-options '\-lp'; fi