diff -Nru freebayes-1.0.2/debian/bamleftalign.1 freebayes-1.1.0/debian/bamleftalign.1 --- freebayes-1.0.2/debian/bamleftalign.1 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/bamleftalign.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.4. -.TH BAMLEFTALIGN "1" "January 2017" "bamleftalign 1.0.2" "User Commands" -.SH NAME -bamleftalign \- Left\-aligns and merges the insertions and deletions in all alignments -.SH SYNOPSIS -[BAM data stream] | \fBbamleftalign\fR [options] -.SH DESCRIPTION -Left\-aligns and merges the insertions and deletions in all alignments in stdin. -Iterates until each alignment is stable through a left\-realignment step. -.SH OPTIONS -.TP -\fB\-f\fR \fB\-\-fasta\-reference\fR FILE -FASTA reference file to use for realignment (required) -.TP -\fB\-d\fR \fB\-\-debug\fR -Print debugging information about realignment process -.TP -\fB\-s\fR \fB\-\-suppress\-output\fR -Don't write BAM output stream (for debugging) -.TP -\fB\-m\fR \fB\-\-max\-iterations\fR N -Iterate the left\-realignment no more than this many times -.TP -\fB\-c\fR \fB\-\-compressed\fR -Write compressed BAM on stdout, default is uncompressed -.SH AUTHOR -This manpage was written by Andreas Tille for the Debian distribution and can be used for any other usage of the program. diff -Nru freebayes-1.0.2/debian/changelog freebayes-1.1.0/debian/changelog --- freebayes-1.0.2/debian/changelog 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/changelog 2017-09-04 14:20:02.000000000 +0000 @@ -1,3 +1,18 @@ +freebayes (1.1.0-1) unstable; urgency=low + + [ Andreas Tille ] + * New upstream version + * debhelper 10 + * Standards-Version: 4.1.0 + + [ Fabian Klötzl ] + * Use C++11 + * Added patch for symbol name clash with htslib + Closes: #866646 + * d/upstream/metadata: Add registries (Thanks to Steffen) + + -- Andreas Tille Mon, 04 Sep 2017 16:20:02 +0200 + freebayes (1.0.2-1) unstable; urgency=low * Initial packaging (Closes: #851306) diff -Nru freebayes-1.0.2/debian/compat freebayes-1.1.0/debian/compat --- freebayes-1.0.2/debian/compat 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/compat 2017-09-04 14:20:02.000000000 +0000 @@ -1 +1 @@ -9 \ No newline at end of file +10 \ No newline at end of file diff -Nru freebayes-1.0.2/debian/control freebayes-1.1.0/debian/control --- freebayes-1.0.2/debian/control 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/control 2017-09-04 14:20:02.000000000 +0000 @@ -4,16 +4,20 @@ Andreas Tille Section: science Priority: optional -Build-Depends: debhelper (>= 9), +Build-Depends: debhelper (>= 10), cmake, pkg-config, + python, zlib1g-dev, libbamtools-dev, - libvcflib-dev, + libvcflib-dev (>= 1.0.0~rc1+dfsg1-4), libtabixpp-dev, + libseqlib-dev, bc, - samtools -Standards-Version: 3.9.8 + samtools, + parallel, + libvcflib-tools (>= 1.0.0~rc1+dfsg1-6) +Standards-Version: 4.1.0 Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/freebayes.git Vcs-Git: https://anonscm.debian.org/git/debian-med/freebayes.git Homepage: https://github.com/ekg/freebayes @@ -22,6 +26,7 @@ Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends} +Recommends: parallel Description: Bayesian haplotype-based polymorphism discovery and genotyping FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide diff -Nru freebayes-1.0.2/debian/copyright freebayes-1.1.0/debian/copyright --- freebayes-1.0.2/debian/copyright 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/copyright 2017-09-04 14:20:02.000000000 +0000 @@ -9,6 +9,14 @@ Files: debian/bash-tap/* Copyright: 2012-2016 Sam Graham License: MIT +Comment: Files obtained from + https://github.com/illusori/bash-tap + +Files: debian/test-simple-bash/* +Copyright: 2013 Ingy.Net +License: MIT +Comment: File obtained from + https://github.com/ingydotnet/test-simple-bash/tree/master/lib Files: ttmath/* Copyright: 2006-2012, Tomasz Sowa @@ -39,426 +47,6 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -Files: src/fastlz.* -Copyright: 2005-2007 Ariya Hidayat -License: MIT - -Files: paper/genome_research.bst -Copyright: 1994-2007 Patrick W Daly -License: LPPL-1+ - The LaTeX Project Public License - =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - . - LPPL Version 1.3c 2008-05-04 - . - PREAMBLE - ======== - . - The LaTeX Project Public License (LPPL) is the primary license under - which the LaTeX kernel and the base LaTeX packages are distributed. - . - You may use this license for any work of which you hold the copyright - and which you wish to distribute. This license may be particularly - suitable if your work is TeX-related (such as a LaTeX package), but - it is written in such a way that you can use it even if your work is - unrelated to TeX. - . - The section `WHETHER AND HOW TO DISTRIBUTE WORKS UNDER THIS LICENSE', - below, gives instructions, examples, and recommendations for authors - who are considering distributing their works under this license. - . - This license gives conditions under which a work may be distributed - and modified, as well as conditions under which modified versions of - that work may be distributed. - . - We, the LaTeX3 Project, believe that the conditions below give you - the freedom to make and distribute modified versions of your work - that conform with whatever technical specifications you wish while - maintaining the availability, integrity, and reliability of - that work. If you do not see how to achieve your goal while - meeting these conditions, then read the document `cfgguide.tex' - and `modguide.tex' in the base LaTeX distribution for suggestions. - . - . - DEFINITIONS - =========== - . - In this license document the following terms are used: - . - `Work' - Any work being distributed under this License. - . - `Derived Work' - Any work that under any applicable law is derived from the Work. - . - `Modification' - Any procedure that produces a Derived Work under any applicable - law -- for example, the production of a file containing an - original file associated with the Work or a significant portion of - such a file, either verbatim or with modifications and/or - translated into another language. - . - `Modify' - To apply any procedure that produces a Derived Work under any - applicable law. - . - `Distribution' - Making copies of the Work available from one person to another, in - whole or in part. Distribution includes (but is not limited to) - making any electronic components of the Work accessible by - file transfer protocols such as FTP or HTTP or by shared file - systems such as Sun's Network File System (NFS). - . - `Compiled Work' - A version of the Work that has been processed into a form where it - is directly usable on a computer system. This processing may - include using installation facilities provided by the Work, - transformations of the Work, copying of components of the Work, or - other activities. Note that modification of any installation - facilities provided by the Work constitutes modification of the Work. - . - `Current Maintainer' - A person or persons nominated as such within the Work. If there is - no such explicit nomination then it is the `Copyright Holder' under - any applicable law. - . - `Base Interpreter' - A program or process that is normally needed for running or - interpreting a part or the whole of the Work. - . - A Base Interpreter may depend on external components but these - are not considered part of the Base Interpreter provided that each - external component clearly identifies itself whenever it is used - interactively. Unless explicitly specified when applying the - license to the Work, the only applicable Base Interpreter is a - `LaTeX-Format' or in the case of files belonging to the - `LaTeX-format' a program implementing the `TeX language'. - . - . - . - CONDITIONS ON DISTRIBUTION AND MODIFICATION - =========================================== - . - 1. Activities other than distribution and/or modification of the Work - are not covered by this license; they are outside its scope. In - particular, the act of running the Work is not restricted and no - requirements are made concerning any offers of support for the Work. - . - 2. You may distribute a complete, unmodified copy of the Work as you - received it. Distribution of only part of the Work is considered - modification of the Work, and no right to distribute such a Derived - Work may be assumed under the terms of this clause. - . - 3. You may distribute a Compiled Work that has been generated from a - complete, unmodified copy of the Work as distributed under Clause 2 - above, as long as that Compiled Work is distributed in such a way that - the recipients may install the Compiled Work on their system exactly - as it would have been installed if they generated a Compiled Work - directly from the Work. - . - 4. If you are the Current Maintainer of the Work, you may, without - restriction, modify the Work, thus creating a Derived Work. You may - also distribute the Derived Work without restriction, including - Compiled Works generated from the Derived Work. Derived Works - distributed in this manner by the Current Maintainer are considered to - be updated versions of the Work. - . - 5. If you are not the Current Maintainer of the Work, you may modify - your copy of the Work, thus creating a Derived Work based on the Work, - and compile this Derived Work, thus creating a Compiled Work based on - the Derived Work. - . - 6. If you are not the Current Maintainer of the Work, you may - distribute a Derived Work provided the following conditions are met - for every component of the Work unless that component clearly states - in the copyright notice that it is exempt from that condition. Only - the Current Maintainer is allowed to add such statements of exemption - to a component of the Work. - . - a. If a component of this Derived Work can be a direct replacement - for a component of the Work when that component is used with the - Base Interpreter, then, wherever this component of the Work - identifies itself to the user when used interactively with that - Base Interpreter, the replacement component of this Derived Work - clearly and unambiguously identifies itself as a modified version - of this component to the user when used interactively with that - Base Interpreter. - . - b. Every component of the Derived Work contains prominent notices - detailing the nature of the changes to that component, or a - prominent reference to another file that is distributed as part - of the Derived Work and that contains a complete and accurate log - of the changes. - . - c. No information in the Derived Work implies that any persons, - including (but not limited to) the authors of the original version - of the Work, provide any support, including (but not limited to) - the reporting and handling of errors, to recipients of the - Derived Work unless those persons have stated explicitly that - they do provide such support for the Derived Work. - . - d. You distribute at least one of the following with the Derived Work: - . - 1. A complete, unmodified copy of the Work; - if your distribution of a modified component is made by - offering access to copy the modified component from a - designated place, then offering equivalent access to copy - the Work from the same or some similar place meets this - condition, even though third parties are not compelled to - copy the Work along with the modified component; - . - 2. Information that is sufficient to obtain a complete, - unmodified copy of the Work. - . - 7. If you are not the Current Maintainer of the Work, you may - distribute a Compiled Work generated from a Derived Work, as long as - the Derived Work is distributed to all recipients of the Compiled - Work, and as long as the conditions of Clause 6, above, are met with - regard to the Derived Work. - . - 8. The conditions above are not intended to prohibit, and hence do not - apply to, the modification, by any method, of any component so that it - becomes identical to an updated version of that component of the Work as - it is distributed by the Current Maintainer under Clause 4, above. - . - 9. Distribution of the Work or any Derived Work in an alternative - format, where the Work or that Derived Work (in whole or in part) is - then produced by applying some process to that format, does not relax or - nullify any sections of this license as they pertain to the results of - applying that process. - . - 10. a. A Derived Work may be distributed under a different license - provided that license itself honors the conditions listed in - Clause 6 above, in regard to the Work, though it does not have - to honor the rest of the conditions in this license. - . - b. If a Derived Work is distributed under a different license, that - Derived Work must provide sufficient documentation as part of - itself to allow each recipient of that Derived Work to honor the - restrictions in Clause 6 above, concerning changes from the Work. - . - 11. This license places no restrictions on works that are unrelated to - the Work, nor does this license place any restrictions on aggregating - such works with the Work by any means. - . - 12. Nothing in this license is intended to, or may be used to, prevent - complete compliance by all parties with all applicable laws. - . - . - NO WARRANTY - =========== - . - There is no warranty for the Work. Except when otherwise stated in - writing, the Copyright Holder provides the Work `as is', without - warranty of any kind, either expressed or implied, including, but not - limited to, the implied warranties of merchantability and fitness for a - particular purpose. The entire risk as to the quality and performance - of the Work is with you. Should the Work prove defective, you assume - the cost of all necessary servicing, repair, or correction. - . - In no event unless required by applicable law or agreed to in writing - will The Copyright Holder, or any author named in the components of the - Work, or any other party who may distribute and/or modify the Work as - permitted above, be liable to you for damages, including any general, - special, incidental or consequential damages arising out of any use of - the Work or out of inability to use the Work (including, but not limited - to, loss of data, data being rendered inaccurate, or losses sustained by - anyone as a result of any failure of the Work to operate with any other - programs), even if the Copyright Holder or said author or said other - party has been advised of the possibility of such damages. - . - . - MAINTENANCE OF THE WORK - ======================= - . - The Work has the status `author-maintained' if the Copyright Holder - explicitly and prominently states near the primary copyright notice in - the Work that the Work can only be maintained by the Copyright Holder - or simply that it is `author-maintained'. - . - The Work has the status `maintained' if there is a Current Maintainer - who has indicated in the Work that they are willing to receive error - reports for the Work (for example, by supplying a valid e-mail - address). It is not required for the Current Maintainer to acknowledge - or act upon these error reports. - . - The Work changes from status `maintained' to `unmaintained' if there - is no Current Maintainer, or the person stated to be Current - Maintainer of the work cannot be reached through the indicated means - of communication for a period of six months, and there are no other - significant signs of active maintenance. - . - You can become the Current Maintainer of the Work by agreement with - any existing Current Maintainer to take over this role. - . - If the Work is unmaintained, you can become the Current Maintainer of - the Work through the following steps: - . - 1. Make a reasonable attempt to trace the Current Maintainer (and - the Copyright Holder, if the two differ) through the means of - an Internet or similar search. - . - 2. If this search is successful, then enquire whether the Work - is still maintained. - . - a. If it is being maintained, then ask the Current Maintainer - to update their communication data within one month. - . - b. If the search is unsuccessful or no action to resume active - maintenance is taken by the Current Maintainer, then announce - within the pertinent community your intention to take over - maintenance. (If the Work is a LaTeX work, this could be - done, for example, by posting to comp.text.tex.) - . - 3a. If the Current Maintainer is reachable and agrees to pass - maintenance of the Work to you, then this takes effect - immediately upon announcement. - . - b. If the Current Maintainer is not reachable and the Copyright - Holder agrees that maintenance of the Work be passed to you, - then this takes effect immediately upon announcement. - . - 4. If you make an `intention announcement' as described in 2b. above - and after three months your intention is challenged neither by - the Current Maintainer nor by the Copyright Holder nor by other - people, then you may arrange for the Work to be changed so as - to name you as the (new) Current Maintainer. - . - 5. If the previously unreachable Current Maintainer becomes - reachable once more within three months of a change completed - under the terms of 3b) or 4), then that Current Maintainer must - become or remain the Current Maintainer upon request provided - they then update their communication data within one month. - . - A change in the Current Maintainer does not, of itself, alter the fact - that the Work is distributed under the LPPL license. - . - If you become the Current Maintainer of the Work, you should - immediately provide, within the Work, a prominent and unambiguous - statement of your status as Current Maintainer. You should also - announce your new status to the same pertinent community as - in 2b) above. - . - . - WHETHER AND HOW TO DISTRIBUTE WORKS UNDER THIS LICENSE - ====================================================== - . - This section contains important instructions, examples, and - recommendations for authors who are considering distributing their - works under this license. These authors are addressed as `you' in - this section. - . - Choosing This License or Another License - ---------------------------------------- - . - If for any part of your work you want or need to use *distribution* - conditions that differ significantly from those in this license, then - do not refer to this license anywhere in your work but, instead, - distribute your work under a different license. You may use the text - of this license as a model for your own license, but your license - should not refer to the LPPL or otherwise give the impression that - your work is distributed under the LPPL. - . - The document `modguide.tex' in the base LaTeX distribution explains - the motivation behind the conditions of this license. It explains, - for example, why distributing LaTeX under the GNU General Public - License (GPL) was considered inappropriate. Even if your work is - unrelated to LaTeX, the discussion in `modguide.tex' may still be - relevant, and authors intending to distribute their works under any - license are encouraged to read it. - . - A Recommendation on Modification Without Distribution - ----------------------------------------------------- - . - It is wise never to modify a component of the Work, even for your own - personal use, without also meeting the above conditions for - distributing the modified component. While you might intend that such - modifications will never be distributed, often this will happen by - accident -- you may forget that you have modified that component; or - it may not occur to you when allowing others to access the modified - version that you are thus distributing it and violating the conditions - of this license in ways that could have legal implications and, worse, - cause problems for the community. It is therefore usually in your - best interest to keep your copy of the Work identical with the public - one. Many works provide ways to control the behavior of that work - without altering any of its licensed components. - . - How to Use This License - ----------------------- - . - To use this license, place in each of the components of your work both - an explicit copyright notice including your name and the year the work - was authored and/or last substantially modified. Include also a - statement that the distribution and/or modification of that - component is constrained by the conditions in this license. - . - Here is an example of such a notice and statement: - . - %% pig.dtx - %% Copyright 2005 M. Y. Name - % - % This work may be distributed and/or modified under the - % conditions of the LaTeX Project Public License, either version 1.3 - % of this license or (at your option) any later version. - % The latest version of this license is in - % http://www.latex-project.org/lppl.txt - % and version 1.3 or later is part of all distributions of LaTeX - % version 2005/12/01 or later. - % - % This work has the LPPL maintenance status `maintained'. - % - % The Current Maintainer of this work is M. Y. Name. - % - % This work consists of the files pig.dtx and pig.ins - % and the derived file pig.sty. - . - Given such a notice and statement in a file, the conditions - given in this license document would apply, with the `Work' referring - to the three files `pig.dtx', `pig.ins', and `pig.sty' (the last being - generated from `pig.dtx' using `pig.ins'), the `Base Interpreter' - referring to any `LaTeX-Format', and both `Copyright Holder' and - `Current Maintainer' referring to the person `M. Y. Name'. - . - If you do not want the Maintenance section of LPPL to apply to your - Work, change `maintained' above into `author-maintained'. - However, we recommend that you use `maintained', as the Maintenance - section was added in order to ensure that your Work remains useful to - the community even when you can no longer maintain and support it - yourself. - . - Derived Works That Are Not Replacements - --------------------------------------- - . - Several clauses of the LPPL specify means to provide reliability and - stability for the user community. They therefore concern themselves - with the case that a Derived Work is intended to be used as a - (compatible or incompatible) replacement of the original Work. If - this is not the case (e.g., if a few lines of code are reused for a - completely different task), then clauses 6b and 6d shall not apply. - . - . - Important Recommendations - ------------------------- - . - Defining What Constitutes the Work - . - The LPPL requires that distributions of the Work contain all the - files of the Work. It is therefore important that you provide a - way for the licensee to determine which files constitute the Work. - This could, for example, be achieved by explicitly listing all the - files of the Work near the copyright notice of each file or by - using a line such as: - . - % This work consists of all files listed in manifest.txt. - . - in that place. In the absence of an unequivocal list it might be - impossible for the licensee to determine what is considered by you - to comprise the Work and, in such a case, the licensee would be - entitled to make reasonable conjectures as to which files comprise - the Work. -Comment: The file is LPPL version 1 or later. Since the license at the - spceified location is now 1.3c we take "or later" option. - Files: debian/* Copyright: 2014 Brad Chapman License: MIT diff -Nru freebayes-1.0.2/debian/createmanpages freebayes-1.1.0/debian/createmanpages --- freebayes-1.0.2/debian/createmanpages 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/createmanpages 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,28 @@ +#!/bin/sh +MANDIR=debian +mkdir -p $MANDIR + +VERSION=`dpkg-parsechangelog | awk '/^Version:/ {print $2}' | sed -e 's/^[0-9]*://' -e 's/-.*//' -e 's/[+~]dfsg$//'` + +AUTHOR=".SH AUTHOR\nThis manpage was written by $DEBFULLNAME for the Debian distribution and +can be used for any other usage of the program. +" + +progname=bamleftalign +help2man --no-info --no-discard-stderr \ + --name='' \ + --version-string="$VERSION" ${progname} > $MANDIR/${progname}.1 +echo $AUTHOR >> $MANDIR/${progname}.1 + +progname=freebayes +help2man --no-info --no-discard-stderr --help-option=" " \ + --name='' \ + --version-string="$VERSION" ${progname} > $MANDIR/${progname}.1 +echo $AUTHOR >> $MANDIR/${progname}.1 + +cat <[OUTPUT] -.SH DESCRIPTION -FreeBayes is a Bayesian genetic variant detector designed to find -small polymorphisms, specifically SNPs (single-nucleotide -polymorphisms), indels (insertions and deletions), MNPs -(multi-nucleotide polymorphisms), and complex events (composite -insertion and substitution events) smaller than the length of a -short-read sequencing alignment. -.SS Overview: -To call variants from aligned short\-read sequencing data, supply BAM files and -a reference. FreeBayes will provide VCF output on standard out describing SNPs, -indels, and complex variants in samples in the input alignments. -.PP -By default, FreeBayes will consider variants supported by at least 2 -observations in a single sample (\fB\-C\fR) and also by at least 20% of the reads from -a single sample (\fB\-F\fR). These settings are suitable to low to high depth -sequencing in haploid and diploid samples, but users working with polyploid or -pooled samples may wish to adjust them depending on the characteristics of -their sequencing data. -.IP -FreeBayes is capable of calling variant haplotypes shorter than a read length -where multiple polymorphisms segregate on the same read. The maximum distance -between polymorphisms phased in this way is determined by the -\fB\-\-max\-complex\-gap\fR, which defaults to 3bp. In practice, this can comfortably be -set to half the read length. -.IP -Ploidy may be set to any level (\fB\-p\fR), but by default all samples are assumed to -be diploid. FreeBayes can model per\-sample and per\-region variation in -copy\-number (\fB\-A\fR) using a copy\-number variation map. -.IP -FreeBayes can act as a frequency\-based pooled caller and describe variants -and haplotypes in terms of observation frequency rather than called genotypes. -To do so, use \fB\-\-pooled\-continuous\fR and set input filters to a suitable level. -Allele observation counts will be described by AO and RO fields in the VCF output. -.SH Examples -.TP -# call variants assuming a diploid sample -freebayes \fB\-f\fR ref.fa aln.bam >var.vcf -.TP -# call variants assuming a diploid sample, providing gVCF output -freebayes \fB\-f\fR ref.fa \fB\-\-gvcf\fR aln.bam >var.gvcf -.TP -# require at least 5 supporting observations to consider a variant -freebayes \fB\-f\fR ref.fa \fB\-C\fR 5 aln.bam >var.vcf -.TP -# use a different ploidy -freebayes \fB\-f\fR ref.fa \fB\-p\fR 4 aln.bam >var.vcf -.TP -# assume a pooled sample with a known number of genome copies -freebayes \fB\-f\fR ref.fa \fB\-p\fR 20 \fB\-\-pooled\-discrete\fR aln.bam >var.vcf -.TP -# generate frequency\-based calls for all variants passing input thresholds -freebayes \fB\-f\fR ref.fa \fB\-F\fR 0.01 \fB\-C\fR 1 \fB\-\-pooled\-continuous\fR aln.bam >var.vcf -.TP -# use an input VCF (bgzipped + tabix indexed) to force calls at particular alleles -freebayes \fB\-f\fR ref.fa \-@ in.vcf.gz aln.bam >var.vcf -.TP -# generate long haplotype calls over known variants -freebayes \fB\-f\fR ref.fa \fB\-\-haplotype\-basis\-alleles\fR in.vcf.gz \fB\-\-haplotype\-length\fR 50 aln.bam -.TP -# naive variant calling: simply annotate observation counts of SNPs and indels -freebayes \fB\-f\fR ref.fa \fB\-\-haplotype\-length\fR 0 \fB\-\-min\-alternate\-count\fR 1 \fB\-\-min\-alternate\-fraction\fR 0 \fB\-\-pooled\-continuous\fR \fB\-\-report\-monomorphic\fR >var.vcf -.SH OPTIONS -.TP -\fB\-h\fR \fB\-\-help\fR -Prints this help dialog. -.TP -\fB\-\-version\fR -Prints the release number and the git commit id. -.SS input -.TP -\fB\-b\fR \fB\-\-bam\fR FILE -Add FILE to the set of BAM files to be analyzed. -.HP -\fB\-L\fR \fB\-\-bam\-list\fR FILE -.IP -A file containing a list of BAM files to be analyzed. -.TP -\fB\-c\fR \fB\-\-stdin\fR -Read BAM input on stdin. -.HP -\fB\-f\fR \fB\-\-fasta\-reference\fR FILE -.IP -Use FILE as the reference sequence for analysis. -An index file (FILE.fai) will be created if none exists. -If neither \fB\-\-targets\fR nor \fB\-\-region\fR are specified, FreeBayes -will analyze every position in this reference. -.HP -\fB\-t\fR \fB\-\-targets\fR FILE -.IP -Limit analysis to targets listed in the BED\-format FILE. -.HP -\fB\-r\fR \fB\-\-region\fR :\- -.IP -Limit analysis to the specified region, 0\-base coordinates, -end_position not included (same as BED format). -Either '\-' or '..' maybe used as a separator. -.HP -\fB\-s\fR \fB\-\-samples\fR FILE -.IP -Limit analysis to samples listed (one per line) in the FILE. -By default FreeBayes will analyze all samples in its input -BAM files. -.HP -\fB\-\-populations\fR FILE -.IP -Each line of FILE should list a sample and a population which -it is part of. The population\-based bayesian inference model -will then be partitioned on the basis of the populations. -.HP -\fB\-A\fR \fB\-\-cnv\-map\fR FILE -.IP -Read a copy number map from the BED file FILE, which has -the format: -.IP -reference sequence, start, end, sample name, copy number -.IP -\&... for each region in each sample which does not have the -default copy number as set by \fB\-\-ploidy\fR. -.SS output -.TP -\fB\-v\fR \fB\-\-vcf\fR FILE -Output VCF\-format results to FILE. (default: stdout) -.HP -\fB\-\-gvcf\fR -.IP -Write gVCF output, which indicates coverage in uncalled regions. -.HP -\fB\-\-gvcf\-chunk\fR NUM -.IP -When writing gVCF output emit a record for every NUM bases. -.HP -\-@ \fB\-\-variant\-input\fR VCF -.IP -Use variants reported in VCF file as input to the algorithm. -Variants in this file will included in the output even if -there is not enough support in the data to pass input filters. -.HP -\fB\-l\fR \fB\-\-only\-use\-input\-alleles\fR -.IP -Only provide variant calls and genotype likelihoods for sites -and alleles which are provided in the VCF input, and provide -output in the VCF for all input alleles, not just those which -have support in the data. -.HP -\fB\-\-haplotype\-basis\-alleles\fR VCF -.IP -When specified, only variant alleles provided in this input -VCF will be used for the construction of complex or haplotype -alleles. -.HP -\fB\-\-report\-all\-haplotype\-alleles\fR -.IP -At sites where genotypes are made over haplotype alleles, -provide information about all alleles in output, not only -those which are called. -.HP -\fB\-\-report\-monomorphic\fR -.IP -Report even loci which appear to be monomorphic, and report all -considered alleles, even those which are not in called genotypes. -Loci which do not have any potential alternates have '.' for ALT. -.TP -\fB\-P\fR \fB\-\-pvar\fR N -Report sites if the probability that there is a polymorphism -at the site is greater than N. default: 0.0. Note that postfiltering is generally recommended over the use of this parameter. -.SS population model -.TP -\fB\-T\fR \fB\-\-theta\fR N -The expected mutation rate or pairwise nucleotide diversity -among the population under analysis. This serves as the -single parameter to the Ewens Sampling Formula prior model -default: 0.001 -.TP -\fB\-p\fR \fB\-\-ploidy\fR N -Sets the default ploidy for the analysis to N. default: 2 -.HP -\fB\-J\fR \fB\-\-pooled\-discrete\fR -.IP -Assume that samples result from pooled sequencing. -Model pooled samples using discrete genotypes across pools. -When using this flag, set \fB\-\-ploidy\fR to the number of -alleles in each sample or use the \fB\-\-cnv\-map\fR to define -per\-sample ploidy. -.HP -\fB\-K\fR \fB\-\-pooled\-continuous\fR -.IP -Output all alleles which pass input filters, regardles of -genotyping outcome or model. -.SS reference allele -.HP -\fB\-Z\fR \fB\-\-use\-reference\-allele\fR -.IP -This flag includes the reference allele in the analysis as -if it is another sample from the same population. -.HP -\fB\-\-reference\-quality\fR MQ,BQ -.IP -Assign mapping quality of MQ to the reference allele at each -site and base quality of BQ. default: 100,60 -.SS allele scope -.TP -\fB\-I\fR \fB\-\-no\-snps\fR -Ignore SNP alleles. -.TP -\fB\-i\fR \fB\-\-no\-indels\fR -Ignore insertion and deletion alleles. -.TP -\fB\-X\fR \fB\-\-no\-mnps\fR -Ignore multi\-nuceotide polymorphisms, MNPs. -.HP -\fB\-u\fR \fB\-\-no\-complex\fR Ignore complex events (composites of other classes). -.HP -\fB\-n\fR \fB\-\-use\-best\-n\-alleles\fR N -.IP -Evaluate only the best N SNP alleles, ranked by sum of -supporting quality scores. (Set to 0 to use all; default: all) -.HP -\fB\-E\fR \fB\-\-max\-complex\-gap\fR N -.HP -\fB\-\-haplotype\-length\fR N -.IP -Allow haplotype calls with contiguous embedded matches of up -to this length. (default: 3) -.HP -\fB\-\-min\-repeat\-size\fR N -.IP -When assembling observations across repeats, require the total repeat -length at least this many bp. (default: 5) -.HP -\fB\-\-min\-repeat\-entropy\fR N -.IP -To detect interrupted repeats, build across sequence until it has -entropy > N bits per bp. (default: 0, off) -.HP -\fB\-\-no\-partial\-observations\fR -.IP -Exclude observations which do not fully span the dynamically\-determined -detection window. (default, use all observations, dividing partial -support across matching haplotypes when generating haplotypes.) -.SS indel realignment -.HP -\fB\-O\fR \fB\-\-dont\-left\-align\-indels\fR -.IP -Turn off left\-alignment of indels, which is enabled by default. -.SS input filters -.HP -\fB\-4\fR \fB\-\-use\-duplicate\-reads\fR -.IP -Include duplicate\-marked alignments in the analysis. -default: exclude duplicates marked as such in alignments -.HP -\fB\-m\fR \fB\-\-min\-mapping\-quality\fR Q -.IP -Exclude alignments from analysis if they have a mapping -quality less than Q. default: 1 -.HP -\fB\-q\fR \fB\-\-min\-base\-quality\fR Q -.IP -Exclude alleles from analysis if their supporting base -quality is less than Q. default: 0 -.HP -\fB\-R\fR \fB\-\-min\-supporting\-allele\-qsum\fR Q -.IP -Consider any allele in which the sum of qualities of supporting -observations is at least Q. default: 0 -.HP -\fB\-Y\fR \fB\-\-min\-supporting\-mapping\-qsum\fR Q -.IP -Consider any allele in which and the sum of mapping qualities of -supporting reads is at least Q. default: 0 -.HP -\fB\-Q\fR \fB\-\-mismatch\-base\-quality\-threshold\fR Q -.IP -Count mismatches toward \fB\-\-read\-mismatch\-limit\fR if the base -quality of the mismatch is >= Q. default: 10 -.HP -\fB\-U\fR \fB\-\-read\-mismatch\-limit\fR N -.IP -Exclude reads with more than N mismatches where each mismatch -has base quality >= mismatch\-base\-quality\-threshold. -default: ~unbounded -.HP -\fB\-z\fR \fB\-\-read\-max\-mismatch\-fraction\fR N -.IP -Exclude reads with more than N [0,1] fraction of mismatches where -each mismatch has base quality >= mismatch\-base\-quality\-threshold -default: 1.0 -.HP -\-$ \fB\-\-read\-snp\-limit\fR N -.IP -Exclude reads with more than N base mismatches, ignoring gaps -with quality >= mismatch\-base\-quality\-threshold. -default: ~unbounded -.HP -\fB\-e\fR \fB\-\-read\-indel\-limit\fR N -.IP -Exclude reads with more than N separate gaps. -default: ~unbounded -.TP -\fB\-0\fR \fB\-\-standard\-filters\fR -Use stringent input base and mapping quality filters -.IP -Equivalent to \fB\-m\fR 30 \fB\-q\fR 20 \fB\-R\fR 0 \fB\-S\fR 0 -.HP -\fB\-F\fR \fB\-\-min\-alternate\-fraction\fR N -.IP -Require at least this fraction of observations supporting -an alternate allele within a single individual in the -in order to evaluate the position. default: 0.2 -.HP -\fB\-C\fR \fB\-\-min\-alternate\-count\fR N -.IP -Require at least this count of observations supporting -an alternate allele within a single individual in order -to evaluate the position. default: 2 -.HP -\fB\-3\fR \fB\-\-min\-alternate\-qsum\fR N -.IP -Require at least this sum of quality of observations supporting -an alternate allele within a single individual in order -to evaluate the position. default: 0 -.HP -\fB\-G\fR \fB\-\-min\-alternate\-total\fR N -.IP -Require at least this count of observations supporting -an alternate allele within the total population in order -to use the allele in analysis. default: 1 -.HP -\fB\-\-min\-coverage\fR N -.IP -Require at least this coverage to process a site. default: 0 -.HP -\fB\-\-max\-coverage\fR N -.IP -Do not process sites with greater than this coverage. default: no limit -.SS population priors -.HP -\fB\-k\fR \fB\-\-no\-population\-priors\fR -.IP -Equivalent to \fB\-\-pooled\-discrete\fR \fB\-\-hwe\-priors\-off\fR and removal of -Ewens Sampling Formula component of priors. -.SS mappability priors -.HP -\fB\-w\fR \fB\-\-hwe\-priors\-off\fR -.IP -Disable estimation of the probability of the combination -arising under HWE given the allele frequency as estimated -by observation frequency. -.HP -\fB\-V\fR \fB\-\-binomial\-obs\-priors\-off\fR -.IP -Disable incorporation of prior expectations about observations. -Uses read placement probability, strand balance probability, -and read position (5'\-3') probability. -.HP -\fB\-a\fR \fB\-\-allele\-balance\-priors\-off\fR -.IP -Disable use of aggregate probability of observation balance between alleles -as a component of the priors. -.SS genotype likelihoods -.HP -\fB\-\-observation\-bias\fR FILE -.IP -Read length\-dependent allele observation biases from FILE. -The format is [length] [alignment efficiency relative to reference] -where the efficiency is 1 if there is no relative observation bias. -.HP -\fB\-\-base\-quality\-cap\fR Q -.IP -Limit estimated observation quality by capping base quality at Q. -.HP -\fB\-\-prob\-contamination\fR F -.TP -An estimate of contamination to use for all samples. -default: 10e\-9 -.TP -\fB\-\-legacy\-gls\fR -Use legacy (polybayes equivalent) genotype likelihood calculations -.HP -\fB\-\-contamination\-estimates\fR FILE -.IP -A file containing per\-sample estimates of contamination, such as -those generated by VerifyBamID. The format should be: -.IP -sample p(read=R|genotype=AR) p(read=A|genotype=AA) -.IP -Sample '*' can be used to set default contamination estimates. -.SS algorithmic features -.HP -\fB\-\-report\-genotype\-likelihood\-max\fR -.IP -Report genotypes using the maximum\-likelihood estimate provided -from genotype likelihoods. -.HP -\fB\-B\fR \fB\-\-genotyping\-max\-iterations\fR N -.IP -Iterate no more than N times during genotyping step. default: 1000. -.HP -\fB\-\-genotyping\-max\-banddepth\fR N -.IP -Integrate no deeper than the Nth best genotype by likelihood when -genotyping. default: 6. -.HP -\fB\-W\fR \fB\-\-posterior\-integration\-limits\fR N,M -.IP -Integrate all genotype combinations in our posterior space -which include no more than N samples with their Mth best -data likelihood. default: 1,3. -.HP -\fB\-N\fR \fB\-\-exclude\-unobserved\-genotypes\fR -.IP -Skip sample genotypings for which the sample has no supporting reads. -.HP -\fB\-S\fR \fB\-\-genotype\-variant\-threshold\fR N -.IP -Limit posterior integration to samples where the second\-best -genotype likelihood is no more than log(N) from the highest -genotype likelihood for the sample. default: ~unbounded -.HP -\fB\-j\fR \fB\-\-use\-mapping\-quality\fR -.IP -Use mapping quality of alleles when calculating data likelihoods. -.HP -\fB\-H\fR \fB\-\-harmonic\-indel\-quality\fR -.IP -Use a weighted sum of base qualities around an indel, scaled by the -distance from the indel. By default use a minimum BQ in flanking sequence. -.HP -\fB\-D\fR \fB\-\-read\-dependence\-factor\fR N -.IP -Incorporate non\-independence of reads by scaling successive -observations by this factor during data likelihood -calculations. default: 0.9 -.HP -\fB\-=\fR \fB\-\-genotype\-qualities\fR -.IP -Calculate the marginal probability of genotypes and report as GQ in -each sample field in the VCF output. -.SS debugging -.TP -\fB\-d\fR \fB\-\-debug\fR -Print debugging output. -.TP -\fB\-dd\fR -Print more verbose debugging output (requires "make DEBUG") -.SH SEE ALSO -"Haplotype\-based variant detection from short\-read sequencing" -arXiv:1207.3907 (http://arxiv.org/abs/1207.3907) -.SH AUTHOR -Erik Garrison , Marth Lab, Boston College, 2010\-2014, Gabor Marth -.pp -This manpage was written by Andreas Tille for the Debian distribution and can be used for any other usage of the program. diff -Nru freebayes-1.0.2/debian/manpages freebayes-1.1.0/debian/manpages --- freebayes-1.0.2/debian/manpages 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/manpages 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -debian/*.1 diff -Nru freebayes-1.0.2/debian/patches/fix_json_name_clash.patch freebayes-1.1.0/debian/patches/fix_json_name_clash.patch --- freebayes-1.0.2/debian/patches/fix_json_name_clash.patch 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/patches/fix_json_name_clash.patch 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,85 @@ +Author: Fabian Klötzl +Date: Mon, 4 Sep 2017 12:27:38 +0200 +Acked-by: Andreas Tille +Description: Fix a "json" name clash + htslib exports an enum value called `json`. That clashes with the + definition of a friend function. For simplicity we resolve this by + renaming the local function and ask htslib to prefix their symbols in + the future. + +--- + src/Allele.cpp | 10 +++++----- + src/Allele.h | 10 +++++----- + src/Sample.cpp | 2 +- + 3 files changed, 11 insertions(+), 11 deletions(-) + +diff --git a/src/Allele.cpp b/src/Allele.cpp +index 2c58c79..8645704 100644 +--- a/src/Allele.cpp ++++ b/src/Allele.cpp +@@ -332,19 +332,19 @@ string stringForAlleles(vector &alleles) { + return out.str(); + } + +-string json(vector &alleles) { ++string to_json(vector &alleles) { + stringstream out; + vector::iterator a = alleles.begin(); +- out << "[" << (*a)->json(); ++a; ++ out << "[" << (*a)->to_json(); ++a; + for (; a != alleles.end(); ++a) +- out << "," << (*a)->json(); ++ out << "," << (*a)->to_json(); + out << "]"; + return out.str(); + } + +-string json(Allele*& allele) { return allele->json(); } ++string to_json(Allele*& allele) { return allele->to_json(); } + +-string Allele::json(void) { ++string Allele::to_json(void) { + stringstream out; + if (!genotypeAllele) { + out << "{\"id\":\"" << readID << "\"" +diff --git a/src/Allele.h b/src/Allele.h +index 3f0125f..ff41c10 100644 +--- a/src/Allele.h ++++ b/src/Allele.h +@@ -96,10 +96,10 @@ class Allele { + friend ostream &operator<<(ostream &out, Allele &a); + friend ostream &operator<<(ostream &out, Allele* &a); + +- friend string json(vector &alleles, long int &position); +- friend string json(vector &alleles); +- friend string json(Allele &allele, long int &position); +- friend string json(Allele* &allele); ++ friend string to_json(vector &alleles, long int &position); ++ friend string to_json(vector &alleles); ++ friend string to_json(Allele &allele, long int &position); ++ friend string to_json(Allele* &allele); + + public: + +@@ -262,7 +262,7 @@ public: + // this is used to update cached data in the allele prior to presenting the allele for analysis + // for the current base, just use allele.currentBase + +- string json(void); ++ string to_json(void); + unsigned int getLengthOnReference(void); + int referenceLengthFromCigar(void); + +diff --git a/src/Sample.cpp b/src/Sample.cpp +index 06207cd..a6e3886 100644 +--- a/src/Sample.cpp ++++ b/src/Sample.cpp +@@ -264,7 +264,7 @@ string Sample::json(void) { + vector& alleles = g->second; + for (vector::iterator a = alleles.begin(); a != alleles.end(); ++a) { + if (!first) { out << ","; } else { first = false; } +- out << (*a)->json(); ++ out << (*a)->to_json(); + } + } + out << "]"; diff -Nru freebayes-1.0.2/debian/patches/fix_test.patch freebayes-1.1.0/debian/patches/fix_test.patch --- freebayes-1.0.2/debian/patches/fix_test.patch 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/patches/fix_test.patch 2017-09-04 14:20:02.000000000 +0000 @@ -13,3 +13,11 @@ prove -v t $(freebayes): +--- a/scripts/freebayes-parallel ++++ b/scripts/freebayes-parallel +@@ -37,4 +37,4 @@ command=("freebayes" "$@") + # iterate over regions using gnu parallel to dispatch jobs + cat "$regionsfile" | parallel -k -j "$ncpus" "${command[@]}" --region {} + ) | ../vcflib/scripts/vcffirstheader \ +- | ../vcflib/bin/vcfstreamsort -w 1000 | vcfuniq # remove duplicates at region edges ++ | vcfstreamsort -w 1000 | vcfuniq # remove duplicates at region edges diff -Nru freebayes-1.0.2/debian/patches/series freebayes-1.1.0/debian/patches/series --- freebayes-1.0.2/debian/patches/series 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/patches/series 2017-09-04 14:20:02.000000000 +0000 @@ -1,3 +1,8 @@ use_debian_packaged_bamtools.patch use_debian_packaged_vcflib.patch +use_debian_packaged_seqlib.patch +use_debian_packaged_libjsoncpp.patch fix_test.patch +vcffirstheader.patch +use_cpp11.patch +fix_json_name_clash.patch diff -Nru freebayes-1.0.2/debian/patches/use_cpp11.patch freebayes-1.1.0/debian/patches/use_cpp11.patch --- freebayes-1.0.2/debian/patches/use_cpp11.patch 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/patches/use_cpp11.patch 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,21 @@ +From: Fabian Klötzl +Date: Mon, 4 Sep 2017 12:05:00 +0200 +Description: Compile as C++11 + +--- + src/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/Makefile b/src/Makefile +index 15a6da6..7b3b35a 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -9,7 +9,7 @@ CXX=g++ + C=gcc + + # Compiler flags +-CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -g ++CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -g -std=gnu++11 + #CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2 + + LIBS = -lbamtools -ltabixpp -lz -lm -lpthread `pkg-config --libs libvcflib` `pkg-config --libs htslib` `pkg-config --libs libseqlib` `pkg-config --libs jsoncpp` diff -Nru freebayes-1.0.2/debian/patches/use_debian_packaged_bamtools.patch freebayes-1.1.0/debian/patches/use_debian_packaged_bamtools.patch --- freebayes-1.0.2/debian/patches/use_debian_packaged_bamtools.patch 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/patches/use_debian_packaged_bamtools.patch 2017-09-04 14:20:02.000000000 +0000 @@ -4,83 +4,83 @@ --- a/src/Makefile +++ b/src/Makefile -@@ -12,11 +12,10 @@ C=gcc +@@ -12,14 +12,13 @@ C=gcc CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -g #CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2 -BAMTOOLS_ROOT=../bamtools + SEQLIB_ROOT=../SeqLib VCFLIB_ROOT=../vcflib + TABIX_ROOT=$(VCFLIB_ROOT)/tabixpp + HTSLIB_ROOT=$(TABIX_ROOT)/htslib --LIBS = -L./ -L$(VCFLIB_ROOT)/tabixpp/ -L$(BAMTOOLS_ROOT)/lib -ltabix -lz -lm --INCLUDE = -I$(BAMTOOLS_ROOT)/src -I../ttmath -I$(VCFLIB_ROOT)/src -I$(VCFLIB_ROOT)/ -+LIBS = -L./ -L$(VCFLIB_ROOT)/tabixpp/ -lbamtools -ltabixpp -lz -lm -+INCLUDE = -I/usr/include/bamtools -I../ttmath -I$(VCFLIB_ROOT)/src -I$(VCFLIB_ROOT)/ +-LIBS = -lz -lm -lpthread +-INCLUDE = -I../ttmath -I$(BAMTOOLS_ROOT)/src/ -I$(VCFLIB_ROOT)/src/ -I$(TABIX_ROOT)/ -I$(VCFLIB_ROOT)/smithwaterman/ -I$(VCFLIB_ROOT)/multichoose/ -I$(VCFLIB_ROOT)/filevercmp/ -I$(HTSLIB_ROOT) -I$(SEQLIB_ROOT) -I$(SEQLIB_ROOT)/htslib ++LIBS = -lbamtools -ltabixpp -lz -lm -lpthread ++INCLUDE = -I../ttmath -I/usr/include/bamtools -I$(VCFLIB_ROOT)/src/ -I$(TABIX_ROOT)/ -I$(VCFLIB_ROOT)/smithwaterman/ -I$(VCFLIB_ROOT)/multichoose/ -I$(VCFLIB_ROOT)/filevercmp/ -I$(HTSLIB_ROOT) -I$(SEQLIB_ROOT) -I$(SEQLIB_ROOT)/htslib all: autoversion ../bin/freebayes ../bin/bamleftalign -@@ -34,10 +33,6 @@ gprof: +@@ -37,9 +36,6 @@ gprof: .PHONY: all static debug profiling gprof -# builds bamtools static lib, and copies into root -$(BAMTOOLS_ROOT)/lib/libbamtools.a: - cd $(BAMTOOLS_ROOT) && mkdir -p build && cd build && cmake .. && $(MAKE) -- + $(HTSLIB_ROOT)/libhts.a: + cd $(HTSLIB_ROOT) && make - OBJECTS=BedReader.o \ - CNV.o \ -@@ -70,8 +65,7 @@ OBJECTS=BedReader.o \ - ../vcflib/smithwaterman/LeftAlign.o \ +@@ -78,7 +74,6 @@ OBJECTS=BedReader.o \ ../vcflib/smithwaterman/Repeats.o \ ../vcflib/smithwaterman/IndelAllele.o \ -- Variant.o \ -- $(BAMTOOLS_ROOT)/lib/libbamtools.a -+ Variant.o - - HEADERS=multichoose.h version_git.h - -@@ -86,10 +80,10 @@ alleles ../bin/alleles: alleles.o $(OBJE + Variant.o \ +- $(BAMTOOLS_ROOT)/lib/libbamtools.a \ + $(SEQLIB_ROOT)/src/libseqlib.a \ + $(SEQLIB_ROOT)/bwa/libbwa.a \ + $(SEQLIB_ROOT)/fermi-lite/libfml.a \ +@@ -97,10 +92,10 @@ alleles ../bin/alleles: alleles.o $(OBJE dummy ../bin/dummy: dummy.o $(OBJECTS) $(HEADERS) $(CXX) $(CFLAGS) $(INCLUDE) dummy.o $(OBJECTS) -o ../bin/dummy $(LIBS) --bamleftalign ../bin/bamleftalign: $(BAMTOOLS_ROOT)/lib/libbamtools.a bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o -- $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o $(BAMTOOLS_ROOT)/lib/libbamtools.a -o ../bin/bamleftalign $(LIBS) -+bamleftalign ../bin/bamleftalign: bamleftalign.o Fasta.o -+ $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o -o ../bin/bamleftalign -lbamtools +-bamleftalign ../bin/bamleftalign: $(BAMTOOLS_ROOT)/lib/libbamtools.a $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o +- $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o Utility.o LeftAlign.o IndelAllele.o split.o $(BAMTOOLS_ROOT)/lib/libbamtools.a $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a -o ../bin/bamleftalign $(LIBS) ++bamleftalign ../bin/bamleftalign: $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o ++ $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o Utility.o LeftAlign.o IndelAllele.o split.o $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a -o ../bin/bamleftalign $(LIBS) --bamfiltertech ../bin/bamfiltertech: $(BAMTOOLS_ROOT)/lib/libbamtools.a bamfiltertech.o $(OBJECTS) $(HEADERS) -+bamfiltertech ../bin/bamfiltertech: bamfiltertech.o $(OBJECTS) $(HEADERS) +-bamfiltertech ../bin/bamfiltertech: $(BAMTOOLS_ROOT)/lib/libbamtools.a $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a bamfiltertech.o $(OBJECTS) $(HEADERS) ++bamfiltertech ../bin/bamfiltertech: $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a bamfiltertech.o $(OBJECTS) $(HEADERS) $(CXX) $(CFLAGS) $(INCLUDE) bamfiltertech.o $(OBJECTS) -o ../bin/bamfiltertech $(LIBS) -@@ -104,7 +98,7 @@ alleles.o: alleles.cpp AlleleParser.o Al +@@ -115,7 +110,7 @@ alleles.o: alleles.cpp AlleleParser.o Al dummy.o: dummy.cpp AlleleParser.o Allele.o $(CXX) $(CFLAGS) $(INCLUDE) -c dummy.cpp --freebayes.o: freebayes.cpp TryCatch.h $(BAMTOOLS_ROOT)/lib/libbamtools.a -+freebayes.o: freebayes.cpp TryCatch.h +-freebayes.o: freebayes.cpp TryCatch.h $(HTSLIB_ROOT)/libhts.a $(BAMTOOLS_ROOT)/lib/libbamtools.a ++freebayes.o: freebayes.cpp TryCatch.h $(HTSLIB_ROOT)/libhts.a $(CXX) $(CFLAGS) $(INCLUDE) -c freebayes.cpp fastlz.o: fastlz.c fastlz.h -@@ -125,7 +119,7 @@ Genotype.o: Genotype.cpp Genotype.h Alle +@@ -136,7 +131,7 @@ Genotype.o: Genotype.cpp Genotype.h Alle Ewens.o: Ewens.cpp Ewens.h $(CXX) $(CFLAGS) $(INCLUDE) -c Ewens.cpp --AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h $(BAMTOOLS_ROOT)/lib/libbamtools.a -+AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h +-AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h $(BAMTOOLS_ROOT)/lib/libbamtools.a $(HTSLIB_ROOT)/libhts.a ++AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h $(HTSLIB_ROOT)/libhts.a $(CXX) $(CFLAGS) $(INCLUDE) -c AlleleParser.cpp Utility.o: Utility.cpp Utility.h Sum.h Product.h -@@ -173,7 +167,7 @@ bamleftalign.o: bamleftalign.cpp LeftAli +@@ -184,7 +179,7 @@ bamleftalign.o: bamleftalign.cpp LeftAli bamfiltertech.o: bamfiltertech.cpp $(CXX) $(CFLAGS) $(INCLUDE) -c bamfiltertech.cpp --LeftAlign.o: LeftAlign.h LeftAlign.cpp $(BAMTOOLS_ROOT)/lib/libbamtools.a -+LeftAlign.o: LeftAlign.h LeftAlign.cpp +-LeftAlign.o: LeftAlign.h LeftAlign.cpp $(BAMTOOLS_ROOT)/lib/libbamtools.a $(HTSLIB_ROOT)/libhts.a ++LeftAlign.o: LeftAlign.h LeftAlign.cpp $(HTSLIB_ROOT)/libhts.a $(CXX) $(CFLAGS) $(INCLUDE) -c LeftAlign.cpp IndelAllele.o: IndelAllele.cpp IndelAllele.h -@@ -263,6 +257,5 @@ autoversion: +@@ -275,6 +270,5 @@ autoversion: clean: rm -rf *.o *.cgh *~ freebayes alleles ../bin/freebayes ../bin/alleles ../vcflib/*.o ../vcflib/tabixpp/*.{o,a} diff -Nru freebayes-1.0.2/debian/patches/use_debian_packaged_libjsoncpp.patch freebayes-1.1.0/debian/patches/use_debian_packaged_libjsoncpp.patch --- freebayes-1.0.2/debian/patches/use_debian_packaged_libjsoncpp.patch 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/patches/use_debian_packaged_libjsoncpp.patch 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,17 @@ +Author: Andreas Tille +Last-Update: Wed, 28 May 2014 21:23:38 +0200 +Description: Use Debian packaged libjsoncpp + +--- a/src/Makefile ++++ b/src/Makefile +@@ -12,8 +12,8 @@ C=gcc + CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -g + #CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2 + +-LIBS = -lbamtools -ltabixpp -lz -lm -lpthread `pkg-config --libs libvcflib` `pkg-config --libs htslib` `pkg-config --libs libseqlib` +-INCLUDE = -I../ttmath -I/usr/include/bamtools `pkg-config --cflags libvcflib` `pkg-config --cflags libseqlib` `pkg-config --cflags htslib` ++LIBS = -lbamtools -ltabixpp -lz -lm -lpthread `pkg-config --libs libvcflib` `pkg-config --libs htslib` `pkg-config --libs libseqlib` `pkg-config --libs jsoncpp` ++INCLUDE = -I../ttmath -I/usr/include/bamtools `pkg-config --cflags libvcflib` `pkg-config --cflags libseqlib` `pkg-config --cflags htslib` `pkg-config --cflags jsoncpp` + + all: autoversion ../bin/freebayes ../bin/bamleftalign + diff -Nru freebayes-1.0.2/debian/patches/use_debian_packaged_seqlib.patch freebayes-1.1.0/debian/patches/use_debian_packaged_seqlib.patch --- freebayes-1.0.2/debian/patches/use_debian_packaged_seqlib.patch 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/patches/use_debian_packaged_seqlib.patch 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,87 @@ +Author: Andreas Tille +Last-Update: Wed, 28 May 2014 21:23:38 +0200 +Description: Use Debian packaged htslib and seqlib + seqlib carries another copy of htslib - we replace both + +--- a/src/Makefile ++++ b/src/Makefile +@@ -12,10 +12,8 @@ C=gcc + CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -g + #CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2 + +-SEQLIB_ROOT=../SeqLib +- +-LIBS = -lbamtools -ltabixpp -lz -lm -lpthread `pkg-config --libs libvcflib` +-INCLUDE = -I../ttmath -I/usr/include/bamtools `pkg-config --cflags libvcflib` -I$(SEQLIB_ROOT) -I$(SEQLIB_ROOT)/htslib ++LIBS = -lbamtools -ltabixpp -lz -lm -lpthread `pkg-config --libs libvcflib` `pkg-config --libs htslib` `pkg-config --libs libseqlib` ++INCLUDE = -I../ttmath -I/usr/include/bamtools `pkg-config --cflags libvcflib` `pkg-config --cflags libseqlib` `pkg-config --cflags htslib` + + all: autoversion ../bin/freebayes ../bin/bamleftalign + +@@ -33,12 +31,6 @@ gprof: + + .PHONY: all static debug profiling gprof + +-$(HTSLIB_ROOT)/libhts.a: +- cd $(HTSLIB_ROOT) && make +- +-$(SEQLIB_ROOT)/src/libseqlib.a: +- cd $(SEQLIB_ROOT) && ./configure && make +- + OBJECTS=BedReader.o \ + CNV.o \ + fastlz.o \ +@@ -62,11 +54,7 @@ OBJECTS=BedReader.o \ + Bias.o \ + Contamination.o \ + NonCall.o \ +- SegfaultHandler.o \ +- $(SEQLIB_ROOT)/src/libseqlib.a \ +- $(SEQLIB_ROOT)/bwa/libbwa.a \ +- $(SEQLIB_ROOT)/fermi-lite/libfml.a \ +- $(SEQLIB_ROOT)/htslib/libhts.a ++ SegfaultHandler.o + + HEADERS=multichoose.h version_git.h + +@@ -81,10 +69,10 @@ alleles ../bin/alleles: alleles.o $(OBJE + dummy ../bin/dummy: dummy.o $(OBJECTS) $(HEADERS) + $(CXX) $(CFLAGS) $(INCLUDE) dummy.o $(OBJECTS) -o ../bin/dummy $(LIBS) + +-bamleftalign ../bin/bamleftalign: $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o +- $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o Utility.o LeftAlign.o IndelAllele.o split.o $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a -o ../bin/bamleftalign $(LIBS) ++bamleftalign ../bin/bamleftalign: bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o ++ $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o Utility.o LeftAlign.o IndelAllele.o split.o -o ../bin/bamleftalign $(LIBS) + +-bamfiltertech ../bin/bamfiltertech: $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a bamfiltertech.o $(OBJECTS) $(HEADERS) ++bamfiltertech ../bin/bamfiltertech: bamfiltertech.o $(OBJECTS) $(HEADERS) + $(CXX) $(CFLAGS) $(INCLUDE) bamfiltertech.o $(OBJECTS) -o ../bin/bamfiltertech $(LIBS) + + +@@ -99,7 +87,7 @@ alleles.o: alleles.cpp AlleleParser.o Al + dummy.o: dummy.cpp AlleleParser.o Allele.o + $(CXX) $(CFLAGS) $(INCLUDE) -c dummy.cpp + +-freebayes.o: freebayes.cpp TryCatch.h $(HTSLIB_ROOT)/libhts.a ++freebayes.o: freebayes.cpp TryCatch.h + $(CXX) $(CFLAGS) $(INCLUDE) -c freebayes.cpp + + fastlz.o: fastlz.c fastlz.h +@@ -120,7 +108,7 @@ Genotype.o: Genotype.cpp Genotype.h Alle + Ewens.o: Ewens.cpp Ewens.h + $(CXX) $(CFLAGS) $(INCLUDE) -c Ewens.cpp + +-AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h $(HTSLIB_ROOT)/libhts.a ++AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h + $(CXX) $(CFLAGS) $(INCLUDE) -c AlleleParser.cpp + + Utility.o: Utility.cpp Utility.h Sum.h Product.h +@@ -168,7 +156,7 @@ bamleftalign.o: bamleftalign.cpp LeftAli + bamfiltertech.o: bamfiltertech.cpp + $(CXX) $(CFLAGS) $(INCLUDE) -c bamfiltertech.cpp + +-LeftAlign.o: LeftAlign.h LeftAlign.cpp $(HTSLIB_ROOT)/libhts.a ++LeftAlign.o: LeftAlign.h LeftAlign.cpp + $(CXX) $(CFLAGS) $(INCLUDE) -c LeftAlign.cpp + + IndelAllele.o: IndelAllele.cpp IndelAllele.h diff -Nru freebayes-1.0.2/debian/patches/use_debian_packaged_vcflib.patch freebayes-1.1.0/debian/patches/use_debian_packaged_vcflib.patch --- freebayes-1.0.2/debian/patches/use_debian_packaged_vcflib.patch 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/patches/use_debian_packaged_vcflib.patch 2017-09-04 14:20:02.000000000 +0000 @@ -4,45 +4,46 @@ --- a/src/Makefile +++ b/src/Makefile -@@ -12,10 +12,8 @@ C=gcc - CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -g +@@ -13,12 +13,9 @@ CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -g #CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2 + SEQLIB_ROOT=../SeqLib -VCFLIB_ROOT=../vcflib -- --LIBS = -L./ -L$(VCFLIB_ROOT)/tabixpp/ -lbamtools -ltabixpp -lz -lm --INCLUDE = -I/usr/include/bamtools -I../ttmath -I$(VCFLIB_ROOT)/src -I$(VCFLIB_ROOT)/ -+LIBS = -lbamtools -ltabixpp -lz -lm -lvcflib `pkg-config --libs libsmithwaterman` -+INCLUDE = -I/usr/include/bamtools -I../ttmath -I/usr/include/vcflib -I/usr/include/intervaltree `pkg-config --cflags libsmithwaterman` -ldisorder +-TABIX_ROOT=$(VCFLIB_ROOT)/tabixpp +-HTSLIB_ROOT=$(TABIX_ROOT)/htslib + +-LIBS = -lbamtools -ltabixpp -lz -lm -lpthread +-INCLUDE = -I../ttmath -I/usr/include/bamtools -I$(VCFLIB_ROOT)/src/ -I$(TABIX_ROOT)/ -I$(VCFLIB_ROOT)/smithwaterman/ -I$(VCFLIB_ROOT)/multichoose/ -I$(VCFLIB_ROOT)/filevercmp/ -I$(HTSLIB_ROOT) -I$(SEQLIB_ROOT) -I$(SEQLIB_ROOT)/htslib ++LIBS = -lbamtools -ltabixpp -lz -lm -lpthread `pkg-config --libs libvcflib` ++INCLUDE = -I../ttmath -I/usr/include/bamtools `pkg-config --cflags libvcflib` -I$(SEQLIB_ROOT) -I$(SEQLIB_ROOT)/htslib all: autoversion ../bin/freebayes ../bin/bamleftalign -@@ -57,15 +55,7 @@ OBJECTS=BedReader.o \ - Bias.o \ +@@ -66,14 +63,6 @@ OBJECTS=BedReader.o \ Contamination.o \ NonCall.o \ -- SegfaultHandler.o \ + SegfaultHandler.o \ - ../vcflib/tabixpp/tabix.o \ -- ../vcflib/tabixpp/bgzf.o \ +- ../vcflib/tabixpp/htslib/bgzf.o \ - ../vcflib/smithwaterman/SmithWatermanGotoh.o \ -- ../vcflib/smithwaterman/disorder.c \ +- ../vcflib/smithwaterman/disorder.cpp \ - ../vcflib/smithwaterman/LeftAlign.o \ - ../vcflib/smithwaterman/Repeats.o \ - ../vcflib/smithwaterman/IndelAllele.o \ -- Variant.o -+ SegfaultHandler.o - - HEADERS=multichoose.h version_git.h - -@@ -173,17 +163,6 @@ LeftAlign.o: LeftAlign.h LeftAlign.cpp +- Variant.o \ + $(SEQLIB_ROOT)/src/libseqlib.a \ + $(SEQLIB_ROOT)/bwa/libbwa.a \ + $(SEQLIB_ROOT)/fermi-lite/libfml.a \ +@@ -185,18 +174,6 @@ LeftAlign.o: LeftAlign.h LeftAlign.cpp $ IndelAllele.o: IndelAllele.cpp IndelAllele.h $(CXX) $(CFLAGS) $(INCLUDE) -c IndelAllele.cpp -Variant.o: $(VCFLIB_ROOT)/src/Variant.h $(VCFLIB_ROOT)/src/Variant.cpp - $(CXX) $(CFLAGS) $(INCLUDE) -c $(VCFLIB_ROOT)/src/Variant.cpp - --../vcflib/tabixpp/tabix.o: ../vcflib/tabixpp/tabix.hpp ../vcflib/tabixpp/tabix.cpp --../vcflib/tabixpp/bgzf.o: ../vcflib/tabixpp/bgzf.c ../vcflib/tabixpp/bgzf.h +-../vcflib/tabixpp/tabix.o: +- cd $(TABIX_ROOT)/ && make +-../vcflib/tabixpp/htslib/bgzf.o: ../vcflib/tabixpp/htslib/bgzf.c ../vcflib/tabixpp/htslib/htslib/bgzf.h - cd ../vcflib/tabixpp && $(MAKE) - -../vcflib/smithwaterman/SmithWatermanGotoh.o: ../vcflib/smithwaterman/SmithWatermanGotoh.h ../vcflib/smithwaterman/SmithWatermanGotoh.cpp @@ -52,11 +53,13 @@ VERSION_FILE=./version_git.h RELEASED_VERSION_FILE=./version_release.txt -@@ -257,5 +236,4 @@ autoversion: +@@ -269,6 +246,5 @@ autoversion: + clean: - rm -rf *.o *.cgh *~ freebayes alleles ../bin/freebayes ../bin/alleles ../vcflib/*.o ../vcflib/tabixpp/*.{o,a} +- rm -rf *.o *.cgh *~ freebayes alleles ../bin/freebayes ../bin/alleles ../vcflib/*.o ../vcflib/tabixpp/*.{o,a} - cd ../vcflib/smithwaterman && make clean ++ rm -rf *.o *.cgh *~ freebayes alleles ../bin/freebayes ../bin/alleles --- a/Makefile +++ b/Makefile @@ -66,227 +69,3 @@ cd src && $(MAKE) log: src/version_git.h ---- a/src/AlleleParser.cpp -+++ b/src/AlleleParser.cpp -@@ -485,7 +485,7 @@ void AlleleParser::setupVCFInput(void) { - // variant input for analysis and targeting - if (!parameters.variantPriorsFile.empty()) { - variantCallInputFile.open(parameters.variantPriorsFile); -- currentVariant = new vcf::Variant(variantCallInputFile); -+ currentVariant = new vcflib::Variant(variantCallInputFile); - usingVariantInputAlleles = true; - - // get sample names from VCF input file -@@ -1108,7 +1108,7 @@ void AlleleParser::updateHaplotypeBasisA - pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW + 1)) { - //cerr << "the vcf line " << haplotypeVariantInputFile.line << endl; - // get the variants in the target region -- vcf::Variant var(haplotypeVariantInputFile); -+ vcflib::Variant var(haplotypeVariantInputFile); - while (haplotypeVariantInputFile.getNextVariant(var)) { - //cerr << "input variant: " << var << endl; - -@@ -1122,9 +1122,9 @@ void AlleleParser::updateHaplotypeBasisA - } - */ - -- map > variants = var.parsedAlternates(); -- for (map >::iterator a = variants.begin(); a != variants.end(); ++a) { -- for (vector::iterator v = a->second.begin(); v != a->second.end(); ++v) { -+ map > variants = var.parsedAlternates(); -+ for (map >::iterator a = variants.begin(); a != variants.end(); ++a) { -+ for (vector::iterator v = a->second.begin(); v != a->second.end(); ++v) { - //cerr << v->ref << "/" << v->alt << endl; - if (v->ref != v->alt) { - //cerr << "basis allele " << v->position << " " << v->ref << "/" << v->alt << endl; -@@ -2107,7 +2107,7 @@ void AlleleParser::getInputVariantsInReg - if (!usingVariantInputAlleles) return; - - // get the variants in the target region -- vcf::Variant var(variantCallInputFile); -+ vcflib::Variant var(variantCallInputFile); - if (!seq.empty()) { - variantCallInputFile.setRegion(seq, start, end); - } -@@ -2117,10 +2117,10 @@ void AlleleParser::getInputVariantsInReg - long int pos = currentVariant->position - 1; - // get alternate alleles - bool includePreviousBaseForIndels = true; -- map > variantAlleles = currentVariant->parsedAlternates(); -+ map > variantAlleles = currentVariant->parsedAlternates(); - // TODO this would be a nice option: why does it not work? -- //map > variantAlleles = currentVariant->flatAlternates(); -- vector< vector > orderedVariantAlleles; -+ //map > variantAlleles = currentVariant->flatAlternates(); -+ vector< vector > orderedVariantAlleles; - for (vector::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) { - orderedVariantAlleles.push_back(variantAlleles[*a]); - } -@@ -2128,14 +2128,14 @@ void AlleleParser::getInputVariantsInReg - vector genotypeAlleles; - set alternatePositions; - -- for (vector< vector >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) { -+ for (vector< vector >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) { - -- vector& altAllele = *g; -+ vector& altAllele = *g; - - vector alleles; - -- for (vector::iterator v = altAllele.begin(); v != altAllele.end(); ++v) { -- vcf::VariantAllele& variant = *v; -+ for (vector::iterator v = altAllele.begin(); v != altAllele.end(); ++v) { -+ vcflib::VariantAllele& variant = *v; - long int allelePos = variant.position - 1; - AlleleType type; - string alleleSequence = variant.alt; -@@ -2240,7 +2240,7 @@ void AlleleParser::updateInputVariants(l - if (gotRegion) { - - // get the variants in the target region -- vcf::Variant var(variantCallInputFile); -+ vcflib::Variant var(variantCallInputFile); - bool ok; - while (ok = variantCallInputFile.getNextVariant(*currentVariant)) { - -@@ -2248,10 +2248,10 @@ void AlleleParser::updateInputVariants(l - long int pos = currentVariant->position - 1; - // get alternate alleles - bool includePreviousBaseForIndels = true; -- map > variantAlleles = currentVariant->parsedAlternates(); -+ map > variantAlleles = currentVariant->parsedAlternates(); - // TODO this would be a nice option: why does it not work? -- //map > variantAlleles = currentVariant->flatAlternates(); -- vector< vector > orderedVariantAlleles; -+ //map > variantAlleles = currentVariant->flatAlternates(); -+ vector< vector > orderedVariantAlleles; - for (vector::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) { - orderedVariantAlleles.push_back(variantAlleles[*a]); - } -@@ -2259,14 +2259,14 @@ void AlleleParser::updateInputVariants(l - vector genotypeAlleles; - set alternatePositions; - -- for (vector< vector >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) { -+ for (vector< vector >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) { - -- vector& altAllele = *g; -+ vector& altAllele = *g; - - vector alleles; - -- for (vector::iterator v = altAllele.begin(); v != altAllele.end(); ++v) { -- vcf::VariantAllele& variant = *v; -+ for (vector::iterator v = altAllele.begin(); v != altAllele.end(); ++v) { -+ vcflib::VariantAllele& variant = *v; - long int allelePos = variant.position - 1; - AlleleType type; - string alleleSequence = variant.alt; ---- a/src/AlleleParser.h -+++ b/src/AlleleParser.h -@@ -163,9 +163,9 @@ public: - BedReader bedReader; - - // VCF -- vcf::VariantCallFile variantCallFile; -- vcf::VariantCallFile variantCallInputFile; // input variant alleles, to target analysis -- vcf::VariantCallFile haplotypeVariantInputFile; // input alleles which will be used to construct haplotype alleles -+ vcflib::VariantCallFile variantCallFile; -+ vcflib::VariantCallFile variantCallInputFile; // input variant alleles, to target analysis -+ vcflib::VariantCallFile haplotypeVariantInputFile; // input alleles which will be used to construct haplotype alleles - - // input haplotype alleles - // -@@ -349,7 +349,7 @@ private: - - int currentRefID; - BamAlignment currentAlignment; -- vcf::Variant* currentVariant; -+ vcflib::Variant* currentVariant; - - }; - ---- a/src/ResultData.cpp -+++ b/src/ResultData.cpp -@@ -5,8 +5,8 @@ using namespace std; - - - --vcf::Variant& Results::vcf( -- vcf::Variant& var, // variant to update -+vcflib::Variant& Results::vcf( -+ vcflib::Variant& var, // variant to update - BigFloat pHom, - long double bestComboOddsRatio, - //long double alleleSamplingProb, -@@ -630,8 +630,8 @@ vcf::Variant& Results::vcf( - } - - --vcf::Variant& Results::gvcf( -- vcf::Variant& var, -+vcflib::Variant& Results::gvcf( -+ vcflib::Variant& var, - NonCalls& nonCalls, - AlleleParser* parser) { - ---- a/src/ResultData.h -+++ b/src/ResultData.h -@@ -41,8 +41,8 @@ public: - } - } - -- vcf::Variant& vcf( -- vcf::Variant& var, // variant to update -+ vcflib::Variant& vcf( -+ vcflib::Variant& var, // variant to update - BigFloat pHom, - long double bestComboOddsRatio, - //long double alleleSamplingProb, -@@ -61,8 +61,8 @@ public: - vector& sequencingTechnologies, - AlleleParser* parser); - -- vcf::Variant& gvcf( -- vcf::Variant& var, -+ vcflib::Variant& gvcf( -+ vcflib::Variant& var, - NonCalls& noncalls, - AlleleParser* parser); - }; ---- a/src/freebayes.cpp -+++ b/src/freebayes.cpp -@@ -144,7 +144,7 @@ int main (int argc, char *argv[]) { - || (parameters.gVCFchunk && - nonCalls.lastPos().second - nonCalls.firstPos().second - > parameters.gVCFchunk))) { -- vcf::Variant var(parser->variantCallFile); -+ vcflib::Variant var(parser->variantCallFile); - out << results.gvcf(var, nonCalls, parser) << endl; - nonCalls.clear(); - } -@@ -658,12 +658,12 @@ int main (int argc, char *argv[]) { - - // write the last gVCF record(s) - if (parameters.gVCFout && !nonCalls.empty()) { -- vcf::Variant var(parser->variantCallFile); -+ vcflib::Variant var(parser->variantCallFile); - out << results.gvcf(var, nonCalls, parser) << endl; - nonCalls.clear(); - } - -- vcf::Variant var(parser->variantCallFile); -+ vcflib::Variant var(parser->variantCallFile); - - out << results.vcf( - var, -@@ -696,7 +696,7 @@ int main (int argc, char *argv[]) { - // write the last gVCF record - if (parameters.gVCFout && !nonCalls.empty()) { - Results results; -- vcf::Variant var(parser->variantCallFile); -+ vcflib::Variant var(parser->variantCallFile); - out << results.gvcf(var, nonCalls, parser) << endl; - nonCalls.clear(); - } diff -Nru freebayes-1.0.2/debian/patches/vcffirstheader.patch freebayes-1.1.0/debian/patches/vcffirstheader.patch --- freebayes-1.0.2/debian/patches/vcffirstheader.patch 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/patches/vcffirstheader.patch 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,29 @@ +Author: Andreas Tille +Last-Update: Fri, 10 Feb 2017 09:09:35 +0100 +Description: This script was not part of the vcflib release + tarball but is available in Git + https://github.com/vcflib/vcflib/blob/master/scripts/vcffirstheader + Since it is used in the test suite of freebayes 1.1 it is installed + here as quilt patch. Once vcflib might be release including the + scripts directory the patch can be dropped and replaced by an according + symlink inside rules or by setting PATH accordingly + +--- /dev/null ++++ b/vcflib/scripts/vcffirstheader +@@ -0,0 +1,16 @@ ++#!/usr/bin/env python ++ ++import sys ++ ++header=True ++for line in sys.stdin: ++ if line.startswith('##'): ++ if header: ++ print line.strip() ++ continue ++ elif line.startswith('#'): ++ if header: ++ print line.strip() ++ header=False ++ continue ++ print line.strip() diff -Nru freebayes-1.0.2/debian/README.source freebayes-1.1.0/debian/README.source --- freebayes-1.0.2/debian/README.source 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/README.source 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,11 @@ +To run the test suite some bash testing framework was used +bu upstream as Git submodules. For the Git packaging these +were downloaded separately + +Files: debian/bash-tap/* + Obtained from + https://github.com/illusori/bash-tap + +Files: debian/test-simple-bash/* + Obtained from + https://github.com/ingydotnet/test-simple-bash/tree/master/lib diff -Nru freebayes-1.0.2/debian/rules freebayes-1.1.0/debian/rules --- freebayes-1.0.2/debian/rules 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/rules 2017-09-04 14:20:02.000000000 +0000 @@ -9,5 +9,9 @@ override_dh_auto_test: mkdir -p $(CURDIR)/test/bash-tap/ for bt in $(CURDIR)/debian/bash-tap/* ; do ln -s $${bt} $(CURDIR)/test/bash-tap/`basename $${bt}` ; done - export PATH=/usr/lib/vcflib/binaries/:$(PATH) dh_auto_test && echo "Tests were running successfully" + ln -s $(CURDIR)/debian/test-simple-bash/lib test/test-simple-bash + chmod +x vcflib/scripts/vcffirstheader + # export PATH=/usr/lib/vcflib/binaries/:$(PATH) dh_auto_test + echo "Tests should be run" && PATH=/usr/lib/vcflib/binaries/:$(PATH) dh_auto_test && echo "Tests were running successfully" rm -rf $(CURDIR)/test/bash-tap + rm test/test-simple-bash/lib diff -Nru freebayes-1.0.2/debian/test-simple-bash/lib/test-simple.bash freebayes-1.1.0/debian/test-simple-bash/lib/test-simple.bash --- freebayes-1.0.2/debian/test-simple-bash/lib/test-simple.bash 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/test-simple-bash/lib/test-simple.bash 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,110 @@ +# test-simple.bash - Simple TAP test framework for Bash +# +# Copyright (c) 2013 Ingy döt Net + +TestSimple_VERSION='0.0.1' + +TestSimple.init() { + TestSimple_plan=0 + TestSimple_run=0 + TestSimple_failed=0 + TestSimple_usage='Usage: source test-simple.bash tests ' + + if [ $# -gt 0 ]; then + [[ $# -eq 2 ]] && [[ "$1" == 'tests' ]] || + TestSimple.die "$TestSimple_usage" + [[ "$2" =~ ^-?[0-9]+$ ]] || + TestSimple.die 'Plan must be a number' + [[ $2 -gt 0 ]] || + TestSimple.die 'Plan must greater then 0' + TestSimple_plan=$2 + printf "1..%d\n" $TestSimple_plan + fi + + trap TestSimple.END EXIT +} + +ok() { + local args=("$@") + local last=$((${#args[@]} - 1)) + local label='' + local ending_re='^]]?$' + let TestSimple_run=TestSimple_run+1 + ( + set +e + local rc= + if [[ $last -gt 0 ]] && [[ ! "${args[$last]}" =~ $ending_re ]]; then + label="${args[$last]}" + unset args[$last] + fi + if [[ ${#args[@]} -eq 1 ]] && [[ "${args[0]}" =~ ^[0-9]+$ ]]; then + rc=${args[0]} + elif [ ${args[0]} == '[[' ]; then + # XXX Currently need eval to support [[. Is there another way? + # Is [[ overkill? So many questons! + eval "${args[@]}" &> /dev/null + rc=$? + else + "${args[@]}" &> /dev/null + rc=$? + fi + if [ $rc -eq 0 ]; then + if [ -n "$label" ]; then + echo "ok $TestSimple_run - $label" + else + echo "ok $TestSimple_run" + fi + else + let TestSimple_failed=TestSimple_failed+1 + if [ -n "$label" ]; then + echo "not ok $TestSimple_run - $label" + TestSimple.failure "$label" + else + echo "not ok $TestSimple_run" + TestSimple.failure "$label" + fi + fi + return $rc + ) +} + +TestSimple_CALL_STACK_LEVEL=1 +TestSimple.failure() { + local c=( $(caller $TestSimple_CALL_STACK_LEVEL) ) + local file=${c[2]} + local line=${c[0]} + local label="$1" + label=${label:+"'$label'\n# at $file line $line."} + label=${label:-"at $file line $line."} + echo -e "# Failed test $label" >&2 +} + +TestSimple.END() { + for v in plan run failed; do eval local $v=\$TestSimple_$v; done + if [ $plan -eq 0 ]; then + if [ $run -gt 0 ]; then + echo "# Tests were run but no plan was declared." >&2 + fi + else + if [ $run -eq 0 ]; then + echo "# No tests run!" >&2 + elif [ $run -ne $plan ]; then + local msg="# Looks like you planned $plan tests but ran $run." + [ $plan -eq 1 ] && msg=${msg/tests/test} + echo "$msg" >&2 + fi + fi + local exit_code=0 + if [ $TestSimple_failed -gt 0 ]; then + exit_code=$TestSimple_failed + [ $exit_code -gt 254 ] && exit_code=254 + local msg="# Looks like you failed $failed tests of $run run." + [ $TestSimple_failed -eq 1 ] && msg=${msg/tests/test} + echo "$msg" >&2 + fi + exit $exit_code +} + +TestSimple.die() { echo "$@" >&2; trap EXIT; exit 255; } + +[[ "${BASH_SOURCE[0]}" != "${0}" ]] && TestSimple.init "$@" diff -Nru freebayes-1.0.2/debian/test-simple-bash/test/basics.t freebayes-1.1.0/debian/test-simple-bash/test/basics.t --- freebayes-1.0.2/debian/test-simple-bash/test/basics.t 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/test-simple-bash/test/basics.t 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,26 @@ +#!/bin/bash + +PATH=lib:$PATH +source test-simple.bash tests 14 + +ok 0 '0 is true' +ok $((6 * 7 -42)) 'Math result is 0' +ok true 'true is ok' +ok $(false || true; echo $?) 'Expression expansion' + +ls &> /dev/null +ok $? '$? is success' +ls --qqq &> /dev/null +ok $((! $?)) 'Negate $? failure' + +fruit=apple + +ok [ $fruit = apple ] '[ … ] testing works' +ok [ "0" == "0" -a 1 -eq 1 ] '[ … -a … ] (AND) testing works' +ok [ ${fruit/a/A} = Apple ] 'Substitution expansion works' +ok [ "${fruit}s" = 'app''les' ] 'Quote removal works' +ok [[ $fruit = apple ]] '[[ … ]] works' +ok [[ $fruit == apple ]] '== works' +ok [[ $((6 * 7)) -eq 42 ]] '-eq works with math expression' +ok $(ls | grep lib &> /dev/null; echo $?) \ + 'Testing a grep command works' diff -Nru freebayes-1.0.2/debian/test-simple-bash/test/doc.t freebayes-1.1.0/debian/test-simple-bash/test/doc.t --- freebayes-1.0.2/debian/test-simple-bash/test/doc.t 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/test-simple-bash/test/doc.t 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,15 @@ +#!/bin/bash + +question() { echo yes; } + +PATH=lib:$PATH +source test-simple.bash tests 5 + +ok 0 '0 is true (other numbers are false)' + +answer=$(question "...?") +ok [ $answer == yes ] 'The answer is yes!' +ok [[ $answer =~ ^y ]] 'The answer begins with y' + +ok true 'true is ok' +ok '! false' '! false is true' diff -Nru freebayes-1.0.2/debian/test-simple-bash/test/no-label.t freebayes-1.1.0/debian/test-simple-bash/test/no-label.t --- freebayes-1.0.2/debian/test-simple-bash/test/no-label.t 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/debian/test-simple-bash/test/no-label.t 2017-09-04 14:20:02.000000000 +0000 @@ -0,0 +1,25 @@ +#!/bin/bash + +PATH=lib:$PATH +source test-simple.bash tests 14 + +ok 0 +ok $((6 * 7 -42)) +ok true +ok $(false || true; echo $?) + +ls &> /dev/null +ok $? +ls --qqq &> /dev/null +ok $((! $?)) + +fruit=apple + +ok [ $fruit = apple ] +ok [ "0" == "0" -a 1 -eq 1 ] +ok [ ${fruit/a/A} = Apple ] +ok [ "${fruit}s" = 'app''les' ] +ok [[ $fruit = apple ]] +ok [[ $fruit == apple ]] +ok [[ $((6 * 7)) -eq 42 ]] +ok $(ls | grep lib &> /dev/null; echo $?) diff -Nru freebayes-1.0.2/debian/upstream/metadata freebayes-1.1.0/debian/upstream/metadata --- freebayes-1.0.2/debian/upstream/metadata 2016-11-08 10:40:28.000000000 +0000 +++ freebayes-1.1.0/debian/upstream/metadata 2017-09-04 14:20:02.000000000 +0000 @@ -6,3 +6,8 @@ DOI: arXiv:1207.3907 URL: http://arxiv.org/abs/1207.3907 eprint: http://arxiv.org/pdf/1207.3907v2.pdf +Registry: + - Name: OMICtools + Entry: OMICS_00059 + - Name: RRID + Entry: SCR_010761 diff -Nru freebayes-1.0.2/.gitmodules freebayes-1.1.0/.gitmodules --- freebayes-1.0.2/.gitmodules 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/.gitmodules 2016-11-03 11:25:39.000000000 +0000 @@ -1,6 +1,3 @@ -[submodule "vcflib"] - path = vcflib - url = https://github.com/ekg/vcflib.git [submodule "bamtools"] path = bamtools url = https://github.com/ekg/bamtools.git @@ -13,3 +10,9 @@ [submodule "bash-tap"] path = test/bash-tap url = https://github.com/illusori/bash-tap.git +[submodule "vcflib"] + path = vcflib + url = https://github.com/vcflib/vcflib.git +[submodule "SeqLib"] + path = SeqLib + url = https://github.com/walaj/SeqLib.git diff -Nru freebayes-1.0.2/python/allelebayes.py freebayes-1.1.0/python/allelebayes.py --- freebayes-1.0.2/python/allelebayes.py 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/python/allelebayes.py 2016-11-03 11:25:39.000000000 +0000 @@ -1,3 +1,5 @@ +#!/usr/bin/env python + # calculates data likelihoods for sets of alleles import multiset diff -Nru freebayes-1.0.2/python/multiset.py freebayes-1.1.0/python/multiset.py --- freebayes-1.0.2/python/multiset.py 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/python/multiset.py 2016-11-03 11:25:39.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # -*- coding: utf-8 -*- """ multiset.py -- non-recursive n multichoose k and diff -Nru freebayes-1.0.2/README.md freebayes-1.1.0/README.md --- freebayes-1.0.2/README.md 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/README.md 2016-11-03 11:25:39.000000000 +0000 @@ -58,7 +58,7 @@ it is run without arguments or with the `--help` option. For example, you should see something like this: - version: v0.9.10-3-g47a713e-dirty + version: v0.9.10-3-g47a713e This provides both a point release number and a git commit id, which will ensure precise reproducibility of results. @@ -74,12 +74,6 @@ Note the use of --recursive. This is required in order to download all nested git submodules for external repositories. -After you've already done the above to clone the most recent development -version, if you wish to compile a specific version of FreeBayes from, you -can then do something like the following: - - git checkout v0.9.20 && git submodule update --recursive - ### Resolving proxy issues with git Depending on your local network configuration, you may have problems obtaining diff -Nru freebayes-1.0.2/scripts/bgziptabix freebayes-1.1.0/scripts/bgziptabix --- freebayes-1.0.2/scripts/bgziptabix 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/scripts/bgziptabix 2016-11-03 11:25:39.000000000 +0000 @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +if [ $# -ne 1 ]; +then +echo usage: $0 '[file name]' +echo runs bgzip on the input and tabix indexes the result +echo "== bgzip >[file] && tabix -fp vcf [file]" +exit +fi + +file=$1 + +bgzip >$file && tabix -fp vcf $file diff -Nru freebayes-1.0.2/scripts/fasta_generate_regions.py freebayes-1.1.0/scripts/fasta_generate_regions.py --- freebayes-1.0.2/scripts/fasta_generate_regions.py 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/scripts/fasta_generate_regions.py 2016-11-03 11:25:39.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import sys diff -Nru freebayes-1.0.2/scripts/freebayes-parallel freebayes-1.1.0/scripts/freebayes-parallel --- freebayes-1.0.2/scripts/freebayes-parallel 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/scripts/freebayes-parallel 2016-11-03 11:25:39.000000000 +0000 @@ -36,5 +36,5 @@ #$command | head -100 | grep "^#" # generate header # iterate over regions using gnu parallel to dispatch jobs cat "$regionsfile" | parallel -k -j "$ncpus" "${command[@]}" --region {} -) | vcffirstheader \ - | vcfstreamsort -w 1000 | vcfuniq # remove duplicates at region edges +) | ../vcflib/scripts/vcffirstheader \ + | ../vcflib/bin/vcfstreamsort -w 1000 | vcfuniq # remove duplicates at region edges diff -Nru freebayes-1.0.2/scripts/sam_add_rg.pl freebayes-1.1.0/scripts/sam_add_rg.pl --- freebayes-1.0.2/scripts/sam_add_rg.pl 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/scripts/sam_add_rg.pl 2016-11-03 11:25:39.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # diff -Nru freebayes-1.0.2/src/Allele.h freebayes-1.1.0/src/Allele.h --- freebayes-1.0.2/src/Allele.h 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/Allele.h 2016-11-03 11:25:39.000000000 +0000 @@ -13,10 +13,13 @@ #include #include "Utility.h" #include "convert.h" -#include "api/BamAlignment.h" + +//#ifdef HAVE_BAMTOOLS +//#include "api/BamAlignment.h" +//using namespace BamTools; +//#endif using namespace std; -using namespace BamTools; class Allele; diff -Nru freebayes-1.0.2/src/AlleleParser.cpp freebayes-1.1.0/src/AlleleParser.cpp --- freebayes-1.0.2/src/AlleleParser.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/AlleleParser.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -27,21 +27,32 @@ using namespace std; +namespace { // anonymous namespace + +// Convert a std::string into an integer, ignoring any commas. +int stringToInt(string str) { + str.erase(remove(str.begin(), str.end(), ','), str.end()); + return atoi(str.c_str()); +} + +} // anonymous namespace + // open BAM input file void AlleleParser::openBams(void) { // report differently if we have one or many bam files if (parameters.bams.size() == 1) { - DEBUG("Opening BAM fomat alignment input file: " << parameters.bams.front() << " ..."); + DEBUG("Opening BAM format alignment input file: " << parameters.bams.front() << " ..."); } else if (parameters.bams.size() > 1) { - DEBUG("Opening " << parameters.bams.size() << " BAM fomat alignment input files"); - for (vector::const_iterator b = parameters.bams.begin(); + DEBUG("Opening " << parameters.bams.size() << " BAM format alignment input files"); + for (vector::const_iterator b = parameters.bams.begin(); b != parameters.bams.end(); ++b) { DEBUG2(*b); } } - + +#ifdef HAVE_BAMTOOLS if (parameters.useStdin) { if (!bamMultiReader.Open(parameters.bams)) { ERROR("Could not read BAM data from stdin"); @@ -74,13 +85,65 @@ } } +#else + if (parameters.useStdin) { + if (!bamMultiReader.Open("-")) { + ERROR("Could not read BAM data from stdin"); + exit(1); + } + } else { + for (std::vector::const_iterator i = parameters.bams.begin(); + i != parameters.bams.end(); ++i) + if (!bamMultiReader.Open(*i)) { + ERROR("Could not open input BAM file: " + *i); + exit(1); + } + else { + /*if (!bamMultiReader.LocateIndexes()) { + ERROR("Opened BAM reader without index file, jumping is disabled."); + cerr << bamMultiReader.GetErrorString() << endl; + if (!targets.empty()) { + ERROR("Targets specified but no BAM index file provided."); + ERROR("FreeBayes cannot jump through targets in BAM files without BAM index files, exiting."); + ERROR("Please generate a BAM index file eithe, e.g.:"); + ERROR(" \% bamtools index -in "); + ERROR(" \% samtools index "); + exit(1); + } + }*/ + } + /*if (!bamMultiReader.SetExplicitMergeOrder(bamMultiReader.MergeByCoordinate)) { + ERROR("could not set sort order to coordinate"); + cerr << bamMultiReader.GetErrorString() << endl; + exit(1); + }*/ + } +#endif + + // from PR 319 below +#ifdef HAVE_BAMTOOLS + if (!parameters.useStdin) { + BamReader reader; + for (vector::const_iterator b = parameters.bams.begin(); + b != parameters.bams.end(); ++b) { + reader.Open(*b); + string bamHeader = reader.GetHeaderText(); + vector headerLines = split(bamHeader, '\n'); + bamHeaderLines.insert(bamHeaderLines.end(), headerLines.begin(), headerLines.end()); + reader.Close(); + } + } else { + bamHeaderLines = split(bamMultiReader.GetHeaderText(), '\n'); + } +#else // retrieve header information - bamHeader = bamMultiReader.GetHeaderText(); + string bamHeader = bamMultiReader.GETHEADERTEXT; bamHeaderLines = split(bamHeader, '\n'); - DEBUG(" done"); +#endif + DEBUG(" done"); } void AlleleParser::openOutputFile(void) { @@ -131,6 +194,26 @@ cerr << "no sequencing technology specified in @RG tag (no PL: in @RG tag) " << endl << headerLine << endl; } } else { + map::iterator s = readGroupToTechnology.find(readGroupID); + if (s != readGroupToTechnology.end()) { + if (s->second != tech) { + ERROR("multiple technologies (PL) map to the same read group (RG)" << endl + << endl + << "technologies " << tech << " and " << s->second << " map to " << readGroupID << endl + << endl + << "As freebayes operates on a virtually merged stream of its input files," << endl + << "it will not be possible to determine what technology an alignment belongs to" << endl + << "at runtime." << endl + << endl + << "To resolve the issue, ensure that RG ids are unique to one technology" << endl + << "across all the input files to freebayes." << endl + << endl + << "See bamaddrg (https://github.com/ekg/bamaddrg) for a method which can" << endl + << "add RG tags to alignments." << endl); + exit(1); + } + // if it's the same technology and RG combo, no worries + } readGroupToTechnology[readGroupID] = tech; technologies[tech] = true; } @@ -257,11 +340,11 @@ map::iterator s = readGroupToSampleNames.find(readGroupID); if (s != readGroupToSampleNames.end()) { if (s->second != name) { - ERROR("ERROR: multiple samples (SM) map to the same read group (RG)" << endl + ERROR("multiple samples (SM) map to the same read group (RG)" << endl << endl << "samples " << name << " and " << s->second << " map to " << readGroupID << endl << endl - << "As freebayes operates on a virtually merged stream of its input files," << endl + << "As freebayes operates on a virtually merged stream of its input files," << endl << "it will not be possible to determine what sample an alignment belongs to" << endl << "at runtime." << endl << endl @@ -323,7 +406,7 @@ //-------------------------------------------------------------------------------- << "Warning: No sample file given, and no @RG tags found in BAM header." << endl << "All alignments from all input files will be assumed to come from the same" << endl - << "individual. To group alignments by sample, you must add read groups and sample" << endl + << "individual. To group alignments by sample, you must add read groups and sample" << endl << "names to your alignments. You can do this using ./scripts/sam_add_rg.pl in the" << endl << "freebayes source tree, or by specifying read groups and sample names when you" << endl << "prepare your sequencing data for alignment." << endl @@ -340,10 +423,16 @@ stringstream headerss; headerss - << "##fileformat=VCFv4.1" << endl + << "##fileformat=VCFv4.2" << endl << "##fileDate=" << dateStr() << endl << "##source=freeBayes " << VERSION_GIT << endl - << "##reference=" << reference.filename << endl + << "##reference=" << reference.filename << endl; + + for (REFVEC::const_iterator it = referenceSequences.begin(); + it != referenceSequences.end(); ++it) + headerss << "##contig=REFNAME << ",length=" << it->REFLEN << ">" << endl; + + headerss << "##phasing=none" << endl << "##commandline=\"" << parameters.commandline << "\"" << endl << "##INFO=" << endl @@ -356,8 +445,8 @@ << "##INFO=" << endl // observation counts - << "##INFO=" << endl - << "##INFO=" << endl + << "##INFO=" << endl + << "##INFO=" << endl << "##INFO=" << endl << "##INFO=" << endl @@ -435,7 +524,7 @@ << "##INFO=" << endl << "##INFO=" << endl << "##INFO=" << endl - << "##INFO=" << endl + << "##INFO=" << endl << "##INFO=" << endl; // sequencing technology tags, which vary according to input data @@ -448,18 +537,23 @@ headerss << "##INFO=" << endl; } + string gqType = "Float"; + if (parameters.strictVCF) + gqType = "Integer"; + // format fields for genotypes headerss << "##FORMAT=" << endl - << "##FORMAT=" << endl + << "##FORMAT=" << endl // this can be regenerated with RA, AA, QR, QA << "##FORMAT=" << endl //<< "##FORMAT=" << endl << "##FORMAT=" << endl + << "##FORMAT=" << endl << "##FORMAT=" << endl << "##FORMAT=" << endl << "##FORMAT=" << endl << "##FORMAT=" << endl - << "##FORMAT=" << endl + << "##FORMAT=" << endl //<< "##FORMAT=" << endl //<< "##FORMAT=" << endl //<< "##FORMAT=" << endl @@ -485,7 +579,7 @@ // variant input for analysis and targeting if (!parameters.variantPriorsFile.empty()) { variantCallInputFile.open(parameters.variantPriorsFile); - currentVariant = new vcf::Variant(variantCallInputFile); + currentVariant = new vcflib::Variant(variantCallInputFile); usingVariantInputAlleles = true; // get sample names from VCF input file @@ -517,15 +611,14 @@ //-------------------------------------------------------------------------- // store the names of all the reference sequences in the BAM file - referenceSequences = bamMultiReader.GetReferenceData(); + referenceSequences = bamMultiReader.GETREFDATA; int i = 0; - for (RefVector::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) { - referenceIDToName[i] = r->RefName; + for (REFVEC::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) { + referenceIDToName[i] = r->REFNAME; ++i; } - DEBUG("Number of ref seqs: " << bamMultiReader.GetReferenceCount()); - + DEBUG("Number of ref seqs: " << bamMultiReader.GETREFNUM); } @@ -548,58 +641,58 @@ return next.first != -1; } -bool AlleleParser::loadNextPositionWithAlignmentOrInputVariant(BamAlignment& alignment) { +bool AlleleParser::loadNextPositionWithAlignmentOrInputVariant(BAMALIGN& alignment) { + pair next = nextInputVariantPosition(); if (next.first != -1) { int varRefID = next.first; - //cerr << varRefID << " " << alignment.RefID << " " << next.second << " " << alignment.Position << endl; - if (!hasMoreAlignments || varRefID < alignment.RefID || varRefID == alignment.RefID && next.second < alignment.Position) { - return loadNextPositionWithInputVariant(); + if (!hasMoreAlignments || varRefID < alignment.REFID || (varRefID == alignment.REFID && next.second < alignment.POSITION)) { + return loadNextPositionWithInputVariant(); } else { - loadReferenceSequence(alignment); + loadReferenceSequence(alignment); } } else { - loadReferenceSequence(alignment); + loadReferenceSequence(alignment); } return true; } bool AlleleParser::loadNextPositionWithInputVariant(void) { - pair next = nextInputVariantPosition(); - if (next.first != -1) { - //cerr << "Next is " << next.first << ":" << next.second << endl; - loadReferenceSequence(referenceIDToName[next.first]); - currentPosition = next.second; - rightmostHaplotypeBasisAllelePosition = currentPosition; - return true; - } else { - return false; - } + pair next = nextInputVariantPosition(); + if (next.first != -1) { + //cerr << "Next is " << next.first << ":" << next.second << endl; + loadReferenceSequence(referenceIDToName[next.first]); + currentPosition = next.second; + rightmostHaplotypeBasisAllelePosition = currentPosition; + return true; + } else { + return false; + } } // alignment-based method for loading the first bit of our reference sequence -void AlleleParser::loadReferenceSequence(BamAlignment& alignment) { - loadReferenceSequence(referenceIDToName[alignment.RefID]); - currentPosition = alignment.Position; +void AlleleParser::loadReferenceSequence(BAMALIGN& alignment) { + loadReferenceSequence(referenceIDToName[alignment.REFID]); + currentPosition = alignment.POSITION; } void AlleleParser::loadReferenceSequence(string& seqname) { if (currentSequenceName != seqname) { currentSequenceName = seqname; currentSequenceStart = 0; - currentRefID = bamMultiReader.GetReferenceID(currentSequenceName); - currentSequence = uppercase(reference.getSequence(currentSequenceName)); - int i = 0; // check the first few characters and verify they are not garbage - for (string::iterator citr = currentSequence.begin(); - i < 100 && citr != currentSequence.end(); ++citr, ++i) { - char c = *citr; - if (c != 'A' && c != 'T' && c != 'G' && c != 'C' && c != 'N') { - ERROR("Found non-DNA character " << c << " at position " << i << " in " << seqname << endl - << ". Is your reference compressed or corrupted? " - << "freebayes requires an uncompressed reference sequence."); - exit(1); - } + currentRefID = bamMultiReader.GETREFID(currentSequenceName); + currentSequence = uppercase(reference.getRawSequence(currentSequenceName)); + // check the first few characters and verify they are not garbage + string validBases = "ACGTURYKMSWBDHVN-"; + size_t found = currentSequence.substr(0, 100).find_first_not_of(validBases); + if (found != string::npos) { + ERROR("Found non-DNA character " << currentSequence.at(found) + << " at position " << found << " in " << seqname << endl + << "Is your reference compressed or corrupted? " + << "freebayes requires an uncompressed reference sequence."); + exit(1); } + currentSequence = reference.getSequence(currentSequenceName); } } @@ -653,18 +746,18 @@ size_t foundRangeSep = region.find(sep, foundFirstColon); if (foundRangeSep == string::npos) { sep = "-"; - foundRangeSep = region.find("-", foundFirstColon); + foundRangeSep = region.find(sep, foundFirstColon); } if (foundRangeSep == string::npos) { - startPos = atoi(region.substr(foundFirstColon + 1).c_str()); + startPos = stringToInt(region.substr(foundFirstColon + 1)); // differ from bamtools in this regard, in that we process only // the specified position if a range isn't given stopPos = startPos + 1; } else { - startPos = atoi(region.substr(foundFirstColon + 1, foundRangeSep - foundFirstColon).c_str()); + startPos = stringToInt(region.substr(foundFirstColon + 1, foundRangeSep - foundFirstColon).c_str()); // if we have range sep specified, but no second number, read to the end of sequence if (foundRangeSep + sep.size() != region.size()) { - stopPos = atoi(region.substr(foundRangeSep + sep.size()).c_str()); // end-exclusive, bed-format + stopPos = stringToInt(region.substr(foundRangeSep + sep.size()).c_str()); // end-exclusive, bed-format } else { stopPos = -1; } @@ -712,12 +805,12 @@ // otherwise, if we weren't given a region string or targets file, analyze // all reference sequences from BAM file DEBUG2("no targets specified, using all targets from BAM files"); - RefVector::iterator refIter = referenceSequences.begin(); - RefVector::iterator refEnd = referenceSequences.end(); + REFVEC::iterator refIter = referenceSequences.begin(); + REFVEC::iterator refEnd = referenceSequences.end(); for( ; refIter != refEnd; ++refIter) { - RefData refData = *refIter; - string refName = refData.RefName; - BedTarget bd(refName, 0, refData.RefLength); // 0-based inclusive internally + REFDATA refData = *refIter; + string refName = refData.REFNAME; + BedTarget bd(refName, 0, refData.REFLEN); // 0-based inclusive internally DEBUG2("will process reference sequence " << refName << ":" << bd.left << ".." << bd.right + 1); targets.push_back(bd); } @@ -740,11 +833,11 @@ // in the sampleCNV map. note that the reference "sample" is named after // the current reference sequence. if (!parameters.diploidReference) { - for (RefVector::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) { - sampleCNV.setPloidy(referenceSampleName, r->RefName, 0, r->RefLength, 1); + for (REFVEC::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) { + sampleCNV.setPloidy(referenceSampleName, r->REFNAME, 0, r->REFLEN, 1); } } - + } int AlleleParser::currentSamplePloidy(string const& sample) { @@ -851,8 +944,8 @@ } // position of alignment relative to current sequence -int AlleleParser::currentSequencePosition(const BamAlignment& alignment) { - return alignment.Position - currentSequenceStart; +int AlleleParser::currentSequencePosition(const BAMALIGN& alignment) { + return alignment.POSITION - currentSequenceStart; } // relative current position within the cached currentSequence @@ -900,14 +993,14 @@ } } -void capBaseQuality(BamAlignment& alignment, int baseQualityCap) { - string& rQual = alignment.Qualities; - char qualcap = qualityInt2Char(baseQualityCap); - for (string::iterator c = rQual.begin(); c != rQual.end(); ++c) { - if (qualityChar2ShortInt(*c) > baseQualityCap) { - *c = qualcap; - } +void capBaseQuality(BAMALIGN& alignment, int baseQualityCap) { + string rQual = alignment.QUALITIES; + char qualcap = qualityInt2Char(baseQualityCap); + for (string::iterator c = rQual.begin(); c != rQual.end(); ++c) { + if (qualityChar2ShortInt(*c) > baseQualityCap) { + *c = qualcap; } + } } void RegisteredAlignment::addAllele(Allele newAllele, bool mergeComplex, int maxComplexGap, bool boundIndels) { @@ -946,7 +1039,7 @@ Allele& lastAllele = alleles.back(); if (isEmptyAllele(newAllele) || - newAllele.isReference() && newAllele.referenceLength == 0) { + (newAllele.isReference() && newAllele.referenceLength == 0)) { // do nothing } else if (newAllele.isReference() && isUnflankedIndel(lastAllele)) { // add flanking base to indel, ensuring haplotype length of 2 for all indels @@ -1074,7 +1167,7 @@ } else { AlleleType atype = ALLELE_COMPLEX; if (lastAllele.isSNP() || lastAllele.isMNP()) { - if (lastCigar.back().second == "X" && newAllele.isSNP() || newAllele.isMNP()) { + if ((lastCigar.back().second == "X" && newAllele.isSNP()) || newAllele.isMNP()) { atype = ALLELE_MNP; } } @@ -1108,10 +1201,10 @@ pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW + 1)) { //cerr << "the vcf line " << haplotypeVariantInputFile.line << endl; // get the variants in the target region - vcf::Variant var(haplotypeVariantInputFile); + vcflib::Variant var(haplotypeVariantInputFile); while (haplotypeVariantInputFile.getNextVariant(var)) { //cerr << "input variant: " << var << endl; - + // the following stanza is for parsed // alternates. instead use whole haplotype calls, as // alternates can be parsed prior to providing the @@ -1122,9 +1215,9 @@ } */ - map > variants = var.parsedAlternates(); - for (map >::iterator a = variants.begin(); a != variants.end(); ++a) { - for (vector::iterator v = a->second.begin(); v != a->second.end(); ++v) { + map > variants = var.parsedAlternates(); + for (map >::iterator a = variants.begin(); a != variants.end(); ++a) { + for (vector::iterator v = a->second.begin(); v != a->second.end(); ++v) { //cerr << v->ref << "/" << v->alt << endl; if (v->ref != v->alt) { //cerr << "basis allele " << v->position << " " << v->ref << "/" << v->alt << endl; @@ -1176,13 +1269,12 @@ int basesRight, string& readSequence, string& sampleName, - BamAlignment& alignment, + BAMALIGN& alignment, string& sequencingTech, long double qual, string& qualstr ) { - string cigar; int reflen = length; @@ -1220,7 +1312,7 @@ // NB, if we are using haplotype basis alleles the algorithm forces // alleles that aren't in the haplotype basis set into the reference space if (type != ALLELE_REFERENCE - && type != ALLELE_NULL + && type != ALLELE_NULL && !allowedHaplotypeBasisAllele(pos + 1, refSequence, readSequence)) { @@ -1284,7 +1376,7 @@ while (minEntropy > 0 && // ignore if turned off repeatRightBoundary - currentSequenceStart < currentSequence.size() && //guard entropy(currentSequence.substr(start, repeatRightBoundary - pos)) < minEntropy) { - //cerr << "entropy of " << entropy(currentSequence.substr(start, repeatRightBoundary - pos)) << " is too low, "; + //cerr << "entropy of " << entropy(currentSequence.substr(start, repeatRightBoundary - pos)) << " is too low, "; //cerr << "increasing rought boundary to "; ++repeatRightBoundary; //cerr << repeatRightBoundary << endl; @@ -1299,6 +1391,8 @@ } } + string qnamer = alignment.QNAME; + return Allele(type, currentSequenceName, pos, @@ -1310,58 +1404,58 @@ basesRight, readSequence, sampleName, - alignment.Name, + qnamer, ra.readgroup, sequencingTech, - !alignment.IsReverseStrand(), + !alignment.ISREVERSESTRAND, max(qual, (long double) 0), // ensure qual is at least 0 qualstr, - alignment.MapQuality, - alignment.IsPaired(), - alignment.IsMateMapped(), - alignment.IsProperPair(), + alignment.MAPPINGQUALITY, + alignment.ISPAIRED, + alignment.ISMATEMAPPED, + alignment.ISPROPERPAIR, cigar, &ra.alleles, - alignment.Position, - alignment.GetEndPosition()); + alignment.POSITION, + alignment.ENDPOSITION); } -RegisteredAlignment& AlleleParser::registerAlignment(BamAlignment& alignment, RegisteredAlignment& ra, string& sampleName, string& sequencingTech) { +RegisteredAlignment& AlleleParser::registerAlignment(BAMALIGN& alignment, RegisteredAlignment& ra, string& sampleName, string& sequencingTech) { - string rDna = alignment.QueryBases; - string rQual = alignment.Qualities; + string rDna = alignment.QUERYBASES; + string rQual = alignment.QUALITIES; int rp = 0; // read position, 0-based relative to read int csp = currentSequencePosition(alignment); // current sequence position, 0-based relative to currentSequence - int sp = alignment.Position; // sequence position - + int sp = alignment.POSITION; // sequence position if (usingHaplotypeBasisAlleles) { - updateHaplotypeBasisAlleles(sp, alignment.AlignedBases.size()); + updateHaplotypeBasisAlleles(sp, alignment.ALIGNEDBASES); } #ifdef VERBOSE_DEBUG if (parameters.debug2) { DEBUG2("registering alignment " << rp << " " << csp << " " << sp << endl << - "alignment readName " << alignment.Name << endl << - "alignment isPaired " << alignment.IsPaired() << endl << - "alignment isMateMapped " << alignment.IsMateMapped() << endl << - "alignment isProperPair " << alignment.IsProperPair() << endl << - "alignment mapQual " << alignment.MapQuality << endl << - "alignment sampleID " << sampleName << endl << - "alignment position " << alignment.Position << endl << - "alignment length " << alignment.Length << endl << - "alignment AlignedBases.size() " << alignment.AlignedBases.size() << endl << - "alignment GetEndPosition() " << alignment.GetEndPosition() << endl << - "alignment end position " << alignment.Position + alignment.AlignedBases.size()); + "alignment readName " << alignment.QNAME << endl << + "alignment isPaired " << alignment.ISPAIRED << endl << + "alignment isMateMapped " << alignment.ISMATEMAPPED << endl << + "alignment isProperPair " << alignment.ISPROPERPAIR << endl << + "alignment mapQual " << alignment.MAPPINGQUALITY << endl << + "alignment sampleID " << sampleName << endl << + "alignment position " << alignment.POSITION << endl << + "alignment length " << alignment.ALIGNMENTLENGTH << endl << + "alignment AlignedBases.size() " << alignment.ALIGNEDBASES << endl << + "alignment GetEndPosition() " << alignment.ENDPOSITION << endl << + "alignment end position " << alignment.POSITION + alignment.ALIGNEDBASES); stringstream cigarss; int alignedLength = 0; - for (vector::const_iterator c = alignment.CigarData.begin(); c != alignment.CigarData.end(); ++c) { - cigarss << c->Type << c->Length; - if (c->Type == 'D') - alignedLength += c->Length; - if (c->Type == 'M') - alignedLength += c->Length; + CIGAR cig = alignment.GETCIGAR; + for (CIGAR::const_iterator c = cig.begin(); c != cig.end(); ++c) { + cigarss << c->CIGTYPE << c->CIGLEN; + if (c->CIGTYPE == 'D') + alignedLength += c->CIGLEN; + if (c->CIGTYPE == 'M') + alignedLength += c->CIGLEN; } DEBUG2("alignment cigar " << cigarss.str()); @@ -1369,9 +1463,9 @@ DEBUG2("current sequence pointer: " << csp); DEBUG2("read: " << rDna); - DEBUG2("aligned bases: " << alignment.AlignedBases); - DEBUG2("qualities: " << alignment.Qualities); - DEBUG2("reference seq: " << currentSequence.substr(csp, alignment.AlignedBases.size())); + DEBUG2("aligned bases: " << alignment.QUERYBASES); + DEBUG2("qualities: " << alignment.QUALITIES); + DEBUG2("reference seq: " << currentSequence.substr(csp, alignment.ALIGNEDBASES)); } #endif @@ -1381,7 +1475,7 @@ * * Also, we don't store the entire undelying sequence; just the subsequence * that matches our current target region. - * + * * As we step through a match sequence, we look for mismatches. When we * see one we set a positional flag indicating the location, and we emit a * 'Reference' allele that stretches from the the base after the last @@ -1394,14 +1488,24 @@ * */ - vector indelMask (alignment.AlignedBases.size(), false); + /* std::cerr << "********" << std::endl + << alignment.QueryBases << std::endl + << alignment.AlignedBases << std::endl; + vector::const_iterator cigarIter2 = alignment.CigarData.begin(); + vector::const_iterator cigarEnd2 = alignment.CigarData.end(); + for (; cigarIter2 != cigarEnd2; ++cigarIter2) + std::cerr << cigarIter2->Length << cigarIter2->Type; + std::cerr << std::endl; + */ + vector indelMask (alignment.ALIGNEDBASES, false); - vector::const_iterator cigarIter = alignment.CigarData.begin(); - vector::const_iterator cigarEnd = alignment.CigarData.end(); + CIGAR cigar = alignment.GETCIGAR; + CIGAR::const_iterator cigarIter = cigar.begin(); + CIGAR::const_iterator cigarEnd = cigar.end(); for ( ; cigarIter != cigarEnd; ++cigarIter ) { - int l = cigarIter->Length; - char t = cigarIter->Type; - DEBUG2("cigar item: " << t << l); + int l = cigarIter->CIGLEN; + char t = cigarIter->CIGTYPE; + DEBUG2("cigar item: " << t << l); if (t == 'M' || t == 'X' || t == '=') { // match or mismatch int firstMatch = csp; // track the first match after a mismatch, for recording 'reference' alleles @@ -1421,10 +1525,11 @@ b = rDna.at(rp); } catch (std::out_of_range outOfRange) { cerr << "Exception: Cannot read past the end of the alignment's sequence." << endl - << alignment.Name << endl + << alignment.QNAME << endl << currentSequenceName << ":" << (long unsigned int) currentPosition + 1 << endl - << alignment.AlignedBases << endl - << currentSequence.substr(csp, alignment.AlignedBases.size()) << endl; + //<< alignment.AlignedBases << endl + << currentSequence.substr(csp, alignment.ALIGNEDBASES) << endl; + cerr << " RP " << rp << " " << rDna <<" len " << rDna.length() << std::endl; abort(); } @@ -1436,13 +1541,13 @@ try { sb = currentSequence.at(csp); } catch (std::out_of_range outOfRange) { - cerr << "Exception: Unable to read reference sequence base past end of current cached sequence." << endl + cerr << "Exception: Unable to read reference sequence base past end of current cached sequence." << endl << currentSequenceName << ":" << (long unsigned int) currentPosition + 1 << endl - << alignment.Position << "-" << alignment.GetEndPosition() << endl - << "alignment: " << alignment.AlignedBases << endl - << "currentSequence: " << currentSequence << endl - << "currentSequence matching: " << currentSequence.substr(csp, alignment.AlignedBases.size()) << endl; - //abort(); + << alignment.POSITION << "-" << alignment.ENDPOSITION << endl + //<< "alignment: " << alignment.AlignedBases << endl + << "currentSequence: " << currentSequence << endl + << "currentSequence matching: " << currentSequence.substr(csp, alignment.ALIGNEDBASES) << endl; + //abort(); break; } @@ -1460,12 +1565,12 @@ sp - length, length, rp, // bases left (for first base in ref allele) - alignment.QueryBases.size() - rp, // bases right (for first base in ref allele) + alignment.SEQLEN - rp, // bases right (for first base in ref allele) readSequence, sampleName, alignment, sequencingTech, - alignment.MapQuality, // reference allele quality == mapquality + alignment.MAPPINGQUALITY, // reference allele quality == mapquality qualstr), parameters.allowComplex, parameters.maxComplexGap); } @@ -1503,7 +1608,7 @@ sp - length + j, 1, rp - length - j, // bases left - alignment.QueryBases.size() - rp + j, // bases right + alignment.SEQLEN - rp + j, // bases right rs, sampleName, alignment, @@ -1511,7 +1616,7 @@ lqual, qualp), parameters.allowComplex, parameters.maxComplexGap); - + } else { ra.addAllele( makeAllele(ra, @@ -1519,7 +1624,7 @@ sp - length + j, 1, rp - length - j, // bases left - alignment.QueryBases.size() - rp + j, // bases right + alignment.SEQLEN - rp + j, // bases right rs, sampleName, alignment, @@ -1553,7 +1658,7 @@ sp - length + j, 1, rp - length - j, // bases left - alignment.QueryBases.size() - rp + j, // bases right + alignment.SEQLEN - rp + j, // bases right rs, sampleName, alignment, @@ -1561,7 +1666,7 @@ lqual, qualp), parameters.allowComplex, parameters.maxComplexGap); - + } else { ra.addAllele( makeAllele(ra, @@ -1569,7 +1674,7 @@ sp - length + j, 1, rp - length - j, // bases left - alignment.QueryBases.size() - rp + j, // bases right + alignment.SEQLEN - rp + j, // bases right rs, sampleName, alignment, @@ -1592,12 +1697,12 @@ sp - length, length, rp, // bases left (for first base in ref allele) - alignment.QueryBases.size() - rp, // bases right (for first base in ref allele) + alignment.SEQLEN - rp, // bases right (for first base in ref allele) readSequence, sampleName, alignment, sequencingTech, - alignment.MapQuality, // ... hmm + alignment.MAPPINGQUALITY, // ... hmm qualstr), parameters.allowComplex, parameters.maxComplexGap); } @@ -1655,7 +1760,7 @@ if (qual >= parameters.BQL2) { //ra.mismatches += l; for (int i=0; i= parameters.BQL2) { //ra.mismatches += l; - indelMask[sp - alignment.Position] = true; + indelMask[sp - alignment.POSITION] = true; } string readseq = rDna.substr(rp, l); @@ -1747,7 +1853,7 @@ sp, l, rp - l, // bases left (for first base in ref allele) - alignment.QueryBases.size() - rp, // bases right (for first base in ref allele) + alignment.SEQLEN - rp, // bases right (for first base in ref allele) readseq, sampleName, alignment, @@ -1766,7 +1872,7 @@ // nothing to do, soft clip is beyond the beginning of the reference } else { string qualstr = rQual.substr(rp, l); - string readseq = alignment.QueryBases.substr(rp, l); + string readseq = alignment.QUERYBASES.substr(rp, l); // skip these bases in the read ra.addAllele( makeAllele(ra, @@ -1774,12 +1880,12 @@ sp - l, l, rp - l, // bases left - alignment.QueryBases.size() - rp, // bases right + alignment.SEQLEN - rp, // bases right readseq, sampleName, alignment, sequencingTech, - alignment.MapQuality, + alignment.MAPPINGQUALITY, qualstr), parameters.allowComplex, parameters.maxComplexGap); } @@ -1906,8 +2012,8 @@ bool gettingPartials) { DEBUG2("updating alignment queue"); - DEBUG2("currentPosition = " << position - << "; currentSequenceStart = " << currentSequenceStart + DEBUG2("currentPosition = " << position + << "; currentSequenceStart = " << currentSequenceStart << "; currentSequence end = " << currentSequence.size() + currentSequenceStart); // make sure we have sequence for the *first* alignment @@ -1916,25 +2022,30 @@ // push to the front until we get to an alignment that doesn't overlap our // current position or we reach the end of available alignments // filter input reads; only allow mapped reads with a certain quality - DEBUG2("currentAlignment.Position == " << currentAlignment.Position - << ", currentAlignment.AlignedBases.size() == " << currentAlignment.AlignedBases.size() + DEBUG2("currentAlignment.Position == " << currentAlignment.POSITION + << ", currentAlignment.AlignedBases.size() == " << currentAlignment.ALIGNEDBASES << ", currentPosition == " << position << ", currentSequenceStart == " << currentSequenceStart << " .. + currentSequence.size() == " << currentSequenceStart + currentSequence.size() ); if (hasMoreAlignments - && currentAlignment.Position <= position - && currentAlignment.RefID == currentRefID) { + && currentAlignment.POSITION <= position + && currentAlignment.REFID == currentRefID) { do { DEBUG2("top of alignment parsing loop"); - DEBUG("alignment: " << currentAlignment.Name); + DEBUG("alignment: " << currentAlignment.QNAME); // get read group, and map back to a sample name string readGroup; +#ifdef HAVE_BAMTOOLS if (!currentAlignment.GetTag("RG", readGroup)) { +#else + readGroup = currentAlignment.GetZTag("RG"); + if (readGroup.empty()) { +#endif if (!oneSampleAnalysis) { ERROR("Couldn't find read group id (@RG tag) for BAM Alignment " << - currentAlignment.Name << " at position " << position + currentAlignment.QNAME << " at position " << position << " in sequence " << currentSequence << " EXITING!"); exit(1); } else { @@ -1943,7 +2054,7 @@ } else { if (oneSampleAnalysis) { ERROR("No read groups specified in BAM header, but alignment " << - currentAlignment.Name << " at position " << position + currentAlignment.QNAME << " at position " << position << " in sequence " << currentSequence << " has a read group."); exit(1); } @@ -1956,33 +2067,37 @@ } // skip this alignment if we are not using duplicate reads (we remove them by default) - if (currentAlignment.IsDuplicate() && !parameters.useDuplicateReads) { - //DEBUG("skipping alignment " << currentAlignment.Name << " because it is a duplicate read"); + if (currentAlignment.ISDUPLICATE && !parameters.useDuplicateReads) { + DEBUG("skipping alignment " << currentAlignment.QNAME << " because it is a duplicate read"); continue; } // skip unmapped alignments, as they cannot be used in the algorithm - if (!currentAlignment.IsMapped()) { - //DEBUG("skipping alignment " << currentAlignment.Name << " because it is not mapped"); + if (!currentAlignment.ISMAPPED) { + DEBUG("skipping alignment " << currentAlignment.QNAME << " because it is not mapped"); continue; } // skip alignments which have no aligned bases - if (currentAlignment.AlignedBases.size() == 0) { - //DEBUG("skipping alignment " << currentAlignment.Name << " because it has no aligned bases"); + if (currentAlignment.ALIGNEDBASES == 0) { + DEBUG("skipping alignment " << currentAlignment.QNAME << " because it has no aligned bases"); continue; } // skip alignments which are non-primary +#ifdef HAVE_BAMTOOLS if (!currentAlignment.IsPrimaryAlignment()) { +#else + if (currentAlignment.SecondaryFlag()) { +#endif //DEBUG("skipping alignment " << currentAlignment.Name << " because it is not marked primary"); continue; } - if (!gettingPartials && currentAlignment.GetEndPosition() < position) { - cerr << currentAlignment.Name << " at " << currentSequenceName << ":" << currentAlignment.Position << " is out of order!" - << " expected after " << position << endl; - continue; + if (!gettingPartials && currentAlignment.ENDPOSITION < position) { + cerr << currentAlignment.QNAME << " at " << currentSequenceName << ":" << currentAlignment.POSITION << " is out of order!" + << " expected after " << position << endl; + continue; } // otherwise, get the sample name and register the alignment to generate a sequence of alleles @@ -1990,12 +2105,12 @@ // such as mismatches // initially skip reads with low mapping quality (what happens if MapQuality is not in the file) - if (currentAlignment.MapQuality >= parameters.MQL0) { + if (currentAlignment.MAPPINGQUALITY >= parameters.MQL0) { // extend our cached reference sequence to allow processing of this alignment //extendReferenceSequence(currentAlignment); // left realign indels if (parameters.leftAlignIndels) { - int length = currentAlignment.GetEndPosition() - currentAlignment.Position + 1; + int length = currentAlignment.ENDPOSITION - currentAlignment.POSITION + 1; stablyLeftAlign(currentAlignment, currentSequence.substr(currentSequencePosition(currentAlignment), length)); } @@ -2012,7 +2127,8 @@ } // decomposes alignment into a set of alleles // here we get the deque of alignments ending at this alignment's end position - deque& rq = registeredAlignments[currentAlignment.GetEndPosition()]; + deque& rq = registeredAlignments[currentAlignment.ENDPOSITION]; + // and insert the registered alignment into that deque rq.push_front(RegisteredAlignment(currentAlignment, parameters)); RegisteredAlignment& ra = rq.front(); @@ -2020,7 +2136,7 @@ // backtracking if we have too many mismatches // or if there are no recorded alleles if (ra.alleles.empty() - || ((float) ra.mismatches / (float) currentAlignment.QueryBases.size()) > parameters.readMaxMismatchFraction + || ((float) ra.mismatches / (float) currentAlignment.SEQLEN) > parameters.readMaxMismatchFraction || ra.mismatches > parameters.RMU || ra.snpCount > parameters.readSnpLimit || ra.indelCount > parameters.readIndelLimit) { @@ -2031,10 +2147,10 @@ newAlleles.push_back(&*allele); } } - } - } while ((hasMoreAlignments = bamMultiReader.GetNextAlignment(currentAlignment)) - && currentAlignment.Position <= position - && currentAlignment.RefID == currentRefID); + } + } while ((hasMoreAlignments = GETNEXT(bamMultiReader, currentAlignment)) + && currentAlignment.POSITION <= position + && currentAlignment.REFID == currentRefID); } DEBUG2("... finished pushing new alignments"); @@ -2086,7 +2202,8 @@ return make_pair(currentRefID, ic->first); } else { // find next chrom with input alleles - map > >::iterator nc = inputVariantAlleles.upper_bound(currentRefID); + map > >::iterator nc + = inputVariantAlleles.upper_bound(currentRefID); if (nc != inputVariantAlleles.end()) { return make_pair(nc->first, nc->second.begin()->first); } else { @@ -2107,35 +2224,39 @@ if (!usingVariantInputAlleles) return; // get the variants in the target region - vcf::Variant var(variantCallInputFile); + vcflib::Variant var(variantCallInputFile); if (!seq.empty()) { variantCallInputFile.setRegion(seq, start, end); } bool ok; - while (ok = variantCallInputFile.getNextVariant(*currentVariant)) { + while ((ok = variantCallInputFile.getNextVariant(*currentVariant))) { long int pos = currentVariant->position - 1; // get alternate alleles bool includePreviousBaseForIndels = true; - map > variantAlleles = currentVariant->parsedAlternates(); + map > variantAlleles = currentVariant->parsedAlternates(); // TODO this would be a nice option: why does it not work? - //map > variantAlleles = currentVariant->flatAlternates(); - vector< vector > orderedVariantAlleles; - for (vector::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) { + //map > variantAlleles = currentVariant->flatAlternates(); + vector< vector > orderedVariantAlleles; + for (vector::iterator a = currentVariant->alt.begin(); + a != currentVariant->alt.end(); ++a) { orderedVariantAlleles.push_back(variantAlleles[*a]); } vector genotypeAlleles; set alternatePositions; - for (vector< vector >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) { + for (vector< vector >::iterator + g = orderedVariantAlleles.begin(); + g != orderedVariantAlleles.end(); ++g) { - vector& altAllele = *g; + vector& altAllele = *g; vector alleles; - for (vector::iterator v = altAllele.begin(); v != altAllele.end(); ++v) { - vcf::VariantAllele& variant = *v; + for (vector::iterator v = altAllele.begin(); + v != altAllele.end(); ++v) { + vcflib::VariantAllele& variant = *v; long int allelePos = variant.position - 1; AlleleType type; string alleleSequence = variant.alt; @@ -2172,9 +2293,9 @@ allelePos -= 1; reflen = len + 2; alleleSequence = - uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos, 1)) + reference.getSubSequence(currentVariant->sequenceName, allelePos, 1) + alleleSequence - + uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos+1+len, 1)); + + reference.getSubSequence(currentVariant->sequenceName, allelePos+1+len, 1); cigar = "1M" + convert(len) + "D" + "1M"; } else { // we always include the flanking bases for these elsewhere, so here too in order to be consistent and trigger use @@ -2182,9 +2303,9 @@ // add previous base and post base to match format typically used for calling allelePos -= 1; alleleSequence = - uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos, 1)) + reference.getSubSequence(currentVariant->sequenceName, allelePos, 1) + alleleSequence - + uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos+1, 1)); + + reference.getSubSequence(currentVariant->sequenceName, allelePos+1, 1); len = variant.alt.size() - var.ref.size(); cigar = "1M" + convert(len) + "I" + "1M"; reflen = 2; @@ -2199,7 +2320,7 @@ genotypeAlleles.push_back(allele); if (allele.type != ALLELE_REFERENCE) { - inputVariantAlleles[bamMultiReader.GetReferenceID(currentVariant->sequenceName)][allele.position].push_back(allele); + inputVariantAlleles[bamMultiReader.GETREFID(currentVariant->sequenceName)][allele.position].push_back(allele); alternatePositions.insert(allele.position); } } @@ -2240,18 +2361,18 @@ if (gotRegion) { // get the variants in the target region - vcf::Variant var(variantCallInputFile); + vcflib::Variant var(variantCallInputFile); bool ok; - while (ok = variantCallInputFile.getNextVariant(*currentVariant)) { + while ((ok = variantCallInputFile.getNextVariant(*currentVariant))) { DEBUG("getting input alleles from input VCF at position " << currentVariant->sequenceName << ":" << currentVariant->position); long int pos = currentVariant->position - 1; // get alternate alleles bool includePreviousBaseForIndels = true; - map > variantAlleles = currentVariant->parsedAlternates(); + map > variantAlleles = currentVariant->parsedAlternates(); // TODO this would be a nice option: why does it not work? - //map > variantAlleles = currentVariant->flatAlternates(); - vector< vector > orderedVariantAlleles; + //map > variantAlleles = currentVariant->flatAlternates(); + vector< vector > orderedVariantAlleles; for (vector::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) { orderedVariantAlleles.push_back(variantAlleles[*a]); } @@ -2259,14 +2380,14 @@ vector genotypeAlleles; set alternatePositions; - for (vector< vector >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) { + for (vector< vector >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) { - vector& altAllele = *g; + vector& altAllele = *g; vector alleles; - for (vector::iterator v = altAllele.begin(); v != altAllele.end(); ++v) { - vcf::VariantAllele& variant = *v; + for (vector::iterator v = altAllele.begin(); v != altAllele.end(); ++v) { + vcflib::VariantAllele& variant = *v; long int allelePos = variant.position - 1; AlleleType type; string alleleSequence = variant.alt; @@ -2303,9 +2424,9 @@ allelePos -= 1; reflen = len + 2; alleleSequence = - uppercase(reference.getSubSequence(currentSequenceName, allelePos, 1)) + reference.getSubSequence(currentSequenceName, allelePos, 1) + alleleSequence - + uppercase(reference.getSubSequence(currentSequenceName, allelePos+1+len, 1)); + + reference.getSubSequence(currentSequenceName, allelePos+1+len, 1); cigar = "1M" + convert(len) + "D" + "1M"; } else { // we always include the flanking bases for these elsewhere, so here too in order to be consistent and trigger use @@ -2313,9 +2434,9 @@ // add previous base and post base to match format typically used for calling allelePos -= 1; alleleSequence = - uppercase(reference.getSubSequence(currentSequenceName, allelePos, 1)) + reference.getSubSequence(currentSequenceName, allelePos, 1) + alleleSequence - + uppercase(reference.getSubSequence(currentSequenceName, allelePos+1, 1)); + + reference.getSubSequence(currentSequenceName, allelePos+1, 1); len = variant.alt.size() - var.ref.size(); cigar = "1M" + convert(len) + "I" + "1M"; reflen = 2; @@ -2329,7 +2450,7 @@ genotypeAlleles.push_back(allele); if (allele.type != ALLELE_REFERENCE) { - inputVariantAlleles[bamMultiReader.GetReferenceID(allele.referenceName)][allele.position].push_back(allele); + inputVariantAlleles[bamMultiReader.GETREFID(allele.referenceName)][allele.position].push_back(allele); alternatePositions.insert(allele.position); } @@ -2534,7 +2655,7 @@ if (!loadTarget(++currentTarget)) { continue; } - if (ok = getFirstAlignment()) { + if ((ok = getFirstAlignment())) { break; } } @@ -2592,11 +2713,18 @@ currentPosition = currentTarget->left; rightmostHaplotypeBasisAllelePosition = currentTarget->left; +#ifdef HAVE_BAMTOOLS if (!bamMultiReader.SetRegion(currentRefID, currentTarget->left, currentRefID, currentTarget->right + 1)) { // bamtools expects 0-based, half-open ERROR("Could not SetRegion to " << currentTarget->seq << ":" << currentTarget->left << ".." << currentTarget->right + 1); cerr << bamMultiReader.GetErrorString() << endl; return false; } +#else + if (!bamMultiReader.SetRegion(SeqLib::GenomicRegion(currentRefID, currentTarget->left, currentTarget->right + 1))) { // bamtools expects 0-based, half-open + ERROR("Could not SetRegion to " << currentTarget->seq << ":" << currentTarget->left << ".." << currentTarget->right + 1); + return false; + } +#endif if (variantCallInputFile.is_open()) { stringstream r; @@ -2609,7 +2737,7 @@ currentTarget->right + 1); //return false; } else { - DEBUG("set region of variant input file to " << + DEBUG("set region of variant input file to " << currentTarget->seq << ":" << currentTarget->left << ".." << currentTarget->right + 1); } @@ -2627,15 +2755,15 @@ bool AlleleParser::getFirstAlignment(void) { bool hasAlignments = true; - if (!bamMultiReader.GetNextAlignment(currentAlignment)) { - hasAlignments = false; + if (!GETNEXT(bamMultiReader, currentAlignment)) { + hasAlignments = false; } else { - while (!currentAlignment.IsMapped()) { - if (!bamMultiReader.GetNextAlignment(currentAlignment)) { - hasAlignments = false; - break; - } - } + while (!currentAlignment.ISMAPPED) { + if (!GETNEXT(bamMultiReader, currentAlignment)) { + hasAlignments = false; + break; + } + } } if (hasAlignments) { @@ -2718,9 +2846,9 @@ if (parameters.useStdin || targets.empty()) { // here we loop over unaligned reads at the beginning of a target // we need to get to a mapped read to figure out where we are - while (hasMoreAlignments && !currentAlignment.IsMapped()) { - hasMoreAlignments = bamMultiReader.GetNextAlignment(currentAlignment); - } + while (hasMoreAlignments && !currentAlignment.ISMAPPED) { + hasMoreAlignments = GETNEXT(bamMultiReader, currentAlignment); + } // determine if we have more alignments or not if (!hasMoreAlignments) { if (hasMoreInputVariants()) { @@ -2739,11 +2867,13 @@ } } else { // step the position - ++currentPosition; + if (!first_pos) { + ++currentPosition; + } // if the current position of this alignment is outside of the reference sequence length // we need to switch references if (currentPosition >= reference.sequenceLength(currentSequenceName) - || registeredAlignments.empty() && currentRefID != currentAlignment.RefID) { + || (registeredAlignments.empty() && currentRefID != currentAlignment.REFID)) { DEBUG("at end of sequence"); clearRegisteredAlignments(); loadNextPositionWithAlignmentOrInputVariant(currentAlignment); @@ -2752,7 +2882,9 @@ } } else { // or if it's not we should step to the next position - ++currentPosition; + if (!first_pos) { + ++currentPosition; + } // if we've run off the right edge of a target, jump if (currentPosition > currentTarget->right) { // time to move to a new target @@ -2835,8 +2967,7 @@ return false; } - while (bamMultiReader.GetNextAlignment(currentAlignment)) { - } + while (GETNEXT(bamMultiReader, currentAlignment)) { } return true; } @@ -2880,7 +3011,7 @@ vector newAlleles; int haplotypeEnd = haplotypeStart + haplotypeLength; - + //if (containedAlleleTypes == ALLELE_REFERENCE) { // return false; //} @@ -3086,8 +3217,8 @@ deque& ras = registeredAlignments[i]; for (deque::iterator r = ras.begin(); r != ras.end(); ++r) { RegisteredAlignment& ra = *r; - if (ra.start > currentPosition && ra.start < currentPosition + haplotypeLength - || ra.end > currentPosition && ra.end < currentPosition + haplotypeLength) { + if ((ra.start > currentPosition && ra.start < currentPosition + haplotypeLength) + || (ra.end > currentPosition && ra.end < currentPosition + haplotypeLength)) { Allele* aptr; bool allowPartials = true; ra.fitHaplotype(currentPosition, haplotypeLength, aptr, allowPartials); @@ -3138,7 +3269,7 @@ // for each non-reference allele within the haplotype length of this // position, adjust the length and reference sequences of the adjacent - // alleles + // alleles DEBUG("fitting haplotype block " << currentPosition << " to " << currentPosition + haplotypeLength << ", " << haplotypeLength << "bp"); lastHaplotypeLength = haplotypeLength; @@ -3224,7 +3355,7 @@ */ Allele refAllele = genotypeAllele(ALLELE_REFERENCE, - uppercase(reference.getSubSequence(currentSequenceName, currentPosition, haplotypeLength)), + reference.getSubSequence(currentSequenceName, currentPosition, haplotypeLength), haplotypeLength, convert(haplotypeLength)+"M", haplotypeLength, @@ -3346,7 +3477,7 @@ (*p)->setQuality(); (*p)->update(haplotypeLength); } - + // now add in partial observations collected from partially-overlapping reads if (!partialHaplotypeObservations.empty()) { samples.assignPartialSupport(alleles, @@ -3397,7 +3528,7 @@ } -bool AlleleParser::getCompleteObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector& haplotypeObservations) { +void AlleleParser::getCompleteObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector& haplotypeObservations) { for (map >::iterator ras = registeredAlignments.begin(); ras != registeredAlignments.end(); ++ras) { deque& rq = ras->second; for (deque::iterator rai = rq.begin(); rai != rq.end(); ++rai) { @@ -3439,7 +3570,7 @@ // process the next length bp of alignments, so as to get allele observations partially overlapping our calling window -bool AlleleParser::getPartialObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector& partials) { +void AlleleParser::getPartialObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector& partials) { //cerr << "getting partial observations of haplotype from " << currentPosition << " to " << currentPosition + haplotypeLength << endl; vector newAlleles; @@ -3458,8 +3589,8 @@ deque& ras = registeredAlignments[i]; for (deque::iterator r = ras.begin(); r != ras.end(); ++r) { RegisteredAlignment& ra = *r; - if (ra.start > currentPosition && ra.start < currentPosition + haplotypeLength - || ra.end > currentPosition && ra.end < currentPosition + haplotypeLength) { + if ((ra.start > currentPosition && ra.start < currentPosition + haplotypeLength) + || (ra.end > currentPosition && ra.end < currentPosition + haplotypeLength)) { Allele* aptr; bool allowPartials = true; ra.fitHaplotype(currentPosition, haplotypeLength, aptr, allowPartials); @@ -3534,9 +3665,9 @@ if (allowedAlleleTypes & allele.type && ((haplotypeLength > 1 && ((allele.type == ALLELE_REFERENCE - && allele.position <= currentPosition + && allele.position <= currentPosition && allele.position + allele.referenceLength >= currentPosition + haplotypeLength) - || + || (allele.position == currentPosition && allele.referenceLength == haplotypeLength) || @@ -3549,7 +3680,7 @@ ((allele.type == ALLELE_REFERENCE && allele.position <= currentPosition && allele.position + allele.referenceLength > currentPosition) - || + || (allele.position == currentPosition))) ) ) { allele.update(haplotypeLength); @@ -3623,10 +3754,10 @@ string sequencingTech = "reference"; string baseQstr = ""; //baseQstr += qualityInt2Char(baseQ); - Allele* allele = new Allele(ALLELE_REFERENCE, + Allele* allele = new Allele(ALLELE_REFERENCE, currentSequenceName, currentPosition, - ¤tPosition, + ¤tPosition, ¤tReferenceBase, 1, currentPosition + 1, @@ -3699,7 +3830,7 @@ if (haplotypeLength == 1) { altseq = currentReferenceBase; } else { - altseq = uppercase(reference.getSubSequence(currentSequenceName, currentPosition, haplotypeLength)); + altseq = reference.getSubSequence(currentSequenceName, currentPosition, haplotypeLength); } } unfilteredAlleles.push_back(make_pair(genotypeAllele(allele.type, @@ -3715,7 +3846,7 @@ map filteredAlleles; - DEBUG("filtering genotype alleles which are not supported by at least " << parameters.minAltCount + DEBUG("filtering genotype alleles which are not supported by at least " << parameters.minAltCount << " observations comprising at least " << parameters.minAltFraction << " of the observations in a single individual"); for (vector >::iterator p = unfilteredAlleles.begin(); p != unfilteredAlleles.end(); ++p) { @@ -3725,7 +3856,7 @@ DEBUG("genotype allele: " << genotypeAllele << " qsum " << qSum); for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) { - Sample& sample = s->second; + Sample& sample = s->second; int alleleCount = 0; int qsum = 0; Sample::iterator c = sample.find(genotypeAllele.currentBase); @@ -3739,10 +3870,10 @@ } int observationCount = sample.observationCount(); if (qsum >= parameters.minAltQSum - && alleleCount >= parameters.minAltCount + && alleleCount >= parameters.minAltCount && ((float) alleleCount / (float) observationCount) >= parameters.minAltFraction) { - DEBUG(genotypeAllele << " has support of " << alleleCount - << " in individual " << s->first << " (" << observationCount << " obs)" << " and fraction " + DEBUG(genotypeAllele << " has support of " << alleleCount + << " in individual " << s->first << " (" << observationCount << " obs)" << " and fraction " << (float) alleleCount / (float) observationCount); filteredAlleles[genotypeAllele] = qSum; break; @@ -3915,7 +4046,7 @@ j += i; ++rightsteps; } - // if we went left and right a non-zero number of times, + // if we went left and right a non-zero number of times, if (leftsteps + rightsteps > 1) { counts[seq] = leftsteps + rightsteps; } diff -Nru freebayes-1.0.2/src/AlleleParser.h freebayes-1.1.0/src/AlleleParser.h --- freebayes-1.0.2/src/AlleleParser.h 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/AlleleParser.h 2016-11-03 11:25:39.000000000 +0000 @@ -15,7 +15,7 @@ #include #include "split.h" #include "join.h" -#include "api/BamReader.h" + #include "BedReader.h" #include "Parameters.h" #include "Utility.h" @@ -23,7 +23,7 @@ #include "Sample.h" #include "Fasta.h" #include "TryCatch.h" -#include "api/BamMultiReader.h" + #include "Genotype.h" #include "CNV.h" #include "Result.h" @@ -39,7 +39,6 @@ #define CACHED_BASIS_HAPLOTYPE_WINDOW 1000 using namespace std; -using namespace BamTools; // a structure holding information about our parameters @@ -60,19 +59,19 @@ int alleleTypes; Parameters parameters; - RegisteredAlignment(BamAlignment& alignment, Parameters parameters) + RegisteredAlignment(BAMALIGN& alignment, Parameters parameters) //: alignment(alignment) - : start(alignment.Position) - , end(alignment.GetEndPosition()) - , refid(alignment.RefID) - , name(alignment.Name) + : start(alignment.POSITION) + , end(alignment.ENDPOSITION) + , refid(alignment.REFID) + , name(alignment.QNAME) , mismatches(0) , snpCount(0) , indelCount(0) , alleleTypes(0) , parameters(parameters) { - alignment.GetTag("RG", readgroup); + FILLREADGROUP(readgroup, alignment); } void addAllele(Allele allele, bool mergeComplex = true, @@ -88,11 +87,11 @@ AlleleFilter(long unsigned int s, long unsigned int e) : start(s), end(e) {} // true of the allele is outside of our window - bool operator()(Allele& a) { + bool operator()(Allele& a) { return !(start >= a.position && end < a.position + a.length); } - bool operator()(Allele*& a) { + bool operator()(Allele*& a) { return !(start >= a->position && end < a->position + a->length); } @@ -121,17 +120,16 @@ bool operator<(const AllelicPrimitive& a, const AllelicPrimitive& b); -void capBaseQuality(BamAlignment& alignment, int baseQualityCap); - +void capBaseQuality(BAMALIGN& alignment, int baseQualityCap); class AlleleParser { public: Parameters parameters; // holds operational parameters passed at program invocation - + AlleleParser(int argc, char** argv); - ~AlleleParser(void); + ~AlleleParser(void); vector sampleList; // list of sample names, indexed by sample id vector sampleListFromBam; // sample names drawn from BAM file @@ -149,7 +147,7 @@ vector referenceSequenceNames; map referenceIDToName; string referenceSampleName; - + // target regions vector targets; // returns true if we are within a target @@ -157,18 +155,18 @@ bool inTarget(void); // bamreader - BamMultiReader bamMultiReader; + BAMREADER bamMultiReader; // bed reader BedReader bedReader; // VCF - vcf::VariantCallFile variantCallFile; - vcf::VariantCallFile variantCallInputFile; // input variant alleles, to target analysis - vcf::VariantCallFile haplotypeVariantInputFile; // input alleles which will be used to construct haplotype alleles + vcflib::VariantCallFile variantCallFile; + vcflib::VariantCallFile variantCallInputFile; // input variant alleles, to target analysis + vcflib::VariantCallFile haplotypeVariantInputFile; // input alleles which will be used to construct haplotype alleles // input haplotype alleles - // + // // as calling progresses, a window of haplotype basis alleles from the flanking sequence // map from starting position to length->alle map > haplotypeBasisAlleles; // this is in the current reference sequence @@ -187,7 +185,7 @@ int basesRight, string& readSequence, string& sampleName, - BamAlignment& alignment, + BAMALIGN& alignment, string& sequencingTech, long double qual, string& qualstr); @@ -205,7 +203,7 @@ map > > inputAlleleCounts; // drawn from input VCF Sample* nullSample; - bool loadNextPositionWithAlignmentOrInputVariant(BamAlignment& currentAlignment); + bool loadNextPositionWithAlignmentOrInputVariant(BAMALIGN& currentAlignment); bool loadNextPositionWithInputVariant(void); bool hasMoreInputVariants(void); @@ -215,15 +213,14 @@ void getInputAlleleCounts(vector& genotypeAlleles, map& inputAFs); // reference names indexed by id - vector referenceSequences; + REFVEC referenceSequences; // ^^ vector of objects containing: //RefName; //!< Name of reference sequence //RefLength; //!< Length of reference sequence //RefHasAlignments; //!< True if BAM file contains alignments mapped to reference sequence - string bamHeader; vector bamHeaderLines; - + void openBams(void); void openOutputFile(void); void getSampleNames(void); @@ -235,7 +232,7 @@ vector currentPloidies(Samples& samples); void loadBamReferenceSequenceNames(void); void loadFastaReference(void); - void loadReferenceSequence(BamAlignment& alignment); + void loadReferenceSequence(BAMALIGN& alignment); void loadReferenceSequence(string& seqname); string referenceSubstr(long int position, unsigned int length); void loadTargets(void); @@ -243,7 +240,7 @@ bool getFirstVariant(void); void loadTargetsFromBams(void); void initializeOutputFiles(void); - RegisteredAlignment& registerAlignment(BamAlignment& alignment, RegisteredAlignment& ra, string& sampleName, string& sequencingTech); + RegisteredAlignment& registerAlignment(BAMALIGN& alignment, RegisteredAlignment& ra, string& sampleName, string& sequencingTech); void clearRegisteredAlignments(void); void updateAlignmentQueue(long int position, vector& newAlleles, bool gettingPartials = false); void updateInputVariants(long int pos, int referenceLength); @@ -264,12 +261,12 @@ bool loadTarget(BedTarget*); bool toFirstTargetPosition(void); bool toNextPosition(void); - bool getCompleteObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector& haplotypeObservations); - bool getPartialObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector& partials); + void getCompleteObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector& haplotypeObservations); + void getPartialObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector& partials); bool dummyProcessNextTarget(void); bool toNextTarget(void); void setPosition(long unsigned int); - int currentSequencePosition(const BamAlignment& alignment); + int currentSequencePosition(const BAMALIGN& alignment); int currentSequencePosition(); void unsetAllProcessedFlags(void); bool getNextAlleles(Samples& allelesBySample, int allowedAlleleTypes); @@ -348,8 +345,8 @@ int basesAfterCurrentTarget; // ........................................ after ................... int currentRefID; - BamAlignment currentAlignment; - vcf::Variant* currentVariant; + BAMALIGN currentAlignment; + vcflib::Variant* currentVariant; }; diff -Nru freebayes-1.0.2/src/bamleftalign.cpp freebayes-1.1.0/src/bamleftalign.cpp --- freebayes-1.0.2/src/bamleftalign.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/bamleftalign.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -11,10 +11,7 @@ #include #include "Fasta.h" -#include "api/BamAlignment.h" -#include "api/BamReader.h" -#include "api/BamWriter.h" -//#include "IndelAllele.h" + #include "LeftAlign.h" #ifdef VERBOSE_DEBUG @@ -25,7 +22,6 @@ #endif using namespace std; -using namespace BamTools; void printUsage(char** argv) { cerr << "usage: [BAM data stream] | " << argv[0] << " [options]" << endl @@ -123,12 +119,15 @@ exit(1); } - BamReader reader; - if (!reader.Open("stdin")) { + + BAMSINGLEREADER reader; + if (!reader.Open(STDIN)) { cerr << "could not open stdin for reading" << endl; exit(1); } +#ifdef HAVE_BAMTOOLS + BamWriter writer; if (isuncompressed) { @@ -139,42 +138,57 @@ cerr << "could not open stdout for writing" << endl; exit(1); } +#else + + SeqLib::BamWriter writer(isuncompressed ? SeqLib::SAM : SeqLib::BAM); + SeqLib::BamHeader hdr = reader.Header(); + if (hdr.isEmpty()) { + cerr << "could not open header for input" << endl; + exit(1); + } + writer.SetHeader(hdr); + + if (!suppress_output && !writer.Open("-")) { + cerr << "could not open stdout for writing" << endl; + exit(1); + } +#endif // store the names of all the reference sequences in the BAM file map referenceIDToName; - vector referenceSequences = reader.GetReferenceData(); + REFVEC referenceSequences = reader.GETREFDATA; int i = 0; - for (RefVector::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) { - referenceIDToName[i] = r->RefName; + for (REFVEC::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) { + referenceIDToName[i] = r->REFNAME; ++i; } - BamAlignment alignment; - - while (reader.GetNextAlignment(alignment)) { + BAMALIGN alignment; + while (GETNEXT(reader, alignment)) { + DEBUG("--------------------------- read --------------------------" << endl); - DEBUG("| " << referenceIDToName[alignment.RefID] << ":" << alignment.Position << endl); - DEBUG("| " << alignment.Name << ":" << alignment.GetEndPosition() << endl); - DEBUG("| " << alignment.Name << ":" << (alignment.IsMapped() ? " mapped" : " unmapped") << endl); - DEBUG("| " << alignment.Name << ":" << " cigar data size: " << alignment.CigarData.size() << endl); + DEBUG("| " << referenceIDToName[alignment.REFID] << ":" << alignment.POSITION << endl); + DEBUG("| " << alignment.QNAME << ":" << alignment.ENDPOSITION << endl); + DEBUG("| " << alignment.QNAME << ":" << (alignment.ISMAPPED ? " mapped" : " unmapped") << endl); + DEBUG("| " << alignment.QNAME << ":" << " cigar data size: " << alignment.GETCIGAR.size() << endl); DEBUG("--------------------------- realigned --------------------------" << endl); // skip unmapped alignments, as they cannot be left-realigned without CIGAR data - if (alignment.IsMapped()) { + if (alignment.ISMAPPED) { - int endpos = alignment.GetEndPosition(); - int length = endpos - alignment.Position + 1; - if (alignment.Position >= 0 && length > 0) { + int endpos = alignment.ENDPOSITION; + int length = endpos - alignment.POSITION + 1; + if (alignment.POSITION >= 0 && length > 0) { if (!stablyLeftAlign(alignment, reference.getSubSequence( - referenceIDToName[alignment.RefID], - alignment.Position, + referenceIDToName[alignment.REFID], + alignment.POSITION, length), maxiterations, debug)) { - cerr << "unstable realignment of " << alignment.Name - << " at " << referenceIDToName[alignment.RefID] << ":" << alignment.Position << endl - << alignment.AlignedBases << endl; + cerr << "unstable realignment of " << alignment.QNAME + << " at " << referenceIDToName[alignment.REFID] << ":" << alignment.POSITION << endl + << alignment.QUERYBASES << endl; } } @@ -184,7 +198,7 @@ DEBUG(endl); if (!suppress_output) - writer.SaveAlignment(alignment); + WRITEALIGNMENT(writer, alignment); } @@ -193,5 +207,4 @@ writer.Close(); return 0; - } diff -Nru freebayes-1.0.2/src/Fasta.cpp freebayes-1.1.0/src/Fasta.cpp --- freebayes-1.0.2/src/Fasta.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/Fasta.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -245,7 +245,17 @@ delete index; } -string FastaReference::getSequence(string seqname) { +string removeIupacBases(string& str) { + const string validBases = "ATGCN"; + size_t found = str.find_first_not_of(validBases); + while (found != string::npos) { + str[found] = 'N'; + found = str.find_first_not_of(validBases, found + 1); + } + return str; +} + +string FastaReference::getRawSequence(string seqname) { FastaIndexEntry entry = index->entry(seqname); int newlines_in_sequence = entry.length / entry.line_blen; int seqlen = newlines_in_sequence + entry.length; @@ -263,6 +273,11 @@ return s; } +string FastaReference::getSequence(string seqname) { + string u = uppercase(getRawSequence(seqname)); + return removeIupacBases(u); +} + // TODO cleanup; odd function. use a map string FastaReference::sequenceNameStartingWith(string seqnameStart) { try { @@ -273,7 +288,7 @@ } } -string FastaReference::getSubSequence(string seqname, int start, int length) { +string FastaReference::getRawSubSequence(string seqname, int start, int length) { FastaIndexEntry entry = index->entry(seqname); length = min(length, entry.length - start); if (start < 0 || length < 1) { @@ -301,6 +316,11 @@ return s; } +string FastaReference::getSubSequence(string seqname, int start, int length) { + string u = uppercase(getRawSubSequence(seqname, start, length)); + return removeIupacBases(u); +} + long unsigned int FastaReference::sequenceLength(string seqname) { FastaIndexEntry entry = index->entry(seqname); return entry.length; diff -Nru freebayes-1.0.2/src/Fasta.h freebayes-1.1.0/src/Fasta.h --- freebayes-1.0.2/src/Fasta.h 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/Fasta.h 2016-11-03 11:25:39.000000000 +0000 @@ -17,6 +17,7 @@ #include #include #include "LargeFileSupport.h" +#include "Utility.h" #include #include "split.h" #include @@ -62,9 +63,11 @@ FILE* file; FastaIndex* index; vector findSequencesStartingWith(string seqnameStart); + string getRawSequence(string seqname); string getSequence(string seqname); // potentially useful for performance, investigate // void getSequence(string seqname, string& sequence); + string getRawSubSequence(string seqname, int start, int length); string getSubSequence(string seqname, int start, int length); string sequenceNameStartingWith(string seqnameStart); long unsigned int sequenceLength(string seqname); diff -Nru freebayes-1.0.2/src/freebayes.cpp freebayes-1.1.0/src/freebayes.cpp --- freebayes-1.0.2/src/freebayes.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/freebayes.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -2,7 +2,7 @@ // freebayes // // A bayesian genetic variant detector. -// +// // standard includes //#include @@ -44,7 +44,7 @@ // local helper debugging macros to improve code readability #define DEBUG(msg) \ - if (parameters.debug) { cerr << msg << endl; } + if (parameters.debug) { cerr << msg << endl; } // lower-priority messages #ifdef VERBOSE_DEBUG @@ -59,7 +59,7 @@ cerr << msg << endl; -using namespace std; +using namespace std; // todo // generalize the main function to take the parameters as input @@ -120,7 +120,7 @@ if (parameters.output == "vcf") { out << parser->variantCallFile.header << endl; } - + if (0 < parameters.maxCoverage) { srand(13); } @@ -144,7 +144,7 @@ || (parameters.gVCFchunk && nonCalls.lastPos().second - nonCalls.firstPos().second > parameters.gVCFchunk))) { - vcf::Variant var(parser->variantCallFile); + vcflib::Variant var(parser->variantCallFile); out << results.gvcf(var, nonCalls, parser) << endl; nonCalls.clear(); } @@ -177,13 +177,12 @@ for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) { string sampleName = s->first; Sample& sample = s->second; - // get the coverage for this sample + // get the coverage for this sample int sampleCoverage = 0; for (Sample::iterator sg = sample.begin(); sg != sample.end(); ++sg) { sampleCoverage += sg->second.size(); } if (sampleCoverage <= parameters.maxCoverage) { - skip = true; continue; } @@ -365,7 +364,7 @@ int estimatedMinorAllelesAtLocus = max(1, (int) ceil((double) numCopiesOfLocus * estimatedMinorFrequency)); //cerr << "estimated minor frequency " << estimatedMinorFrequency << endl; //cerr << "estimated minor count " << estimatedMinorAllelesAtLocus << endl; - + map > > sampleDataLikelihoodsByPopulation; map > > variantSampleDataLikelihoodsByPopulation; @@ -654,16 +653,16 @@ // output - if (!alts.empty() && (1 - pHom.ToDouble()) >= parameters.PVL || parameters.PVL == 0) { + if ((!alts.empty() && (1 - pHom.ToDouble()) >= parameters.PVL) || parameters.PVL == 0){ // write the last gVCF record(s) if (parameters.gVCFout && !nonCalls.empty()) { - vcf::Variant var(parser->variantCallFile); + vcflib::Variant var(parser->variantCallFile); out << results.gvcf(var, nonCalls, parser) << endl; nonCalls.clear(); } - vcf::Variant var(parser->variantCallFile); + vcflib::Variant var(parser->variantCallFile); out << results.vcf( var, @@ -696,7 +695,7 @@ // write the last gVCF record if (parameters.gVCFout && !nonCalls.empty()) { Results results; - vcf::Variant var(parser->variantCallFile); + vcflib::Variant var(parser->variantCallFile); out << results.gvcf(var, nonCalls, parser) << endl; nonCalls.clear(); } diff -Nru freebayes-1.0.2/src/Genotype.cpp freebayes-1.1.0/src/Genotype.cpp --- freebayes-1.0.2/src/Genotype.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/Genotype.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -1303,7 +1303,7 @@ } } if (allSameAndHomozygous) { - allelesWithHomozygousCombos[genotype->front().allele] == true; + allelesWithHomozygousCombos[genotype->front().allele] = true; } } diff -Nru freebayes-1.0.2/src/LeftAlign.cpp freebayes-1.1.0/src/LeftAlign.cpp --- freebayes-1.0.2/src/LeftAlign.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/LeftAlign.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -22,12 +22,13 @@ // // In practice, we must call this function until the alignment is stabilized. // -bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug) { +bool leftAlign(BAMALIGN& alignment, string& referenceSequence, bool debug) { + string alignmentAlignedBases = alignment.QUERYBASES; + const string alignmentSequence = alignment.QUERYBASES; int arsOffset = 0; // pointer to insertion point in aligned reference sequence string alignedReferenceSequence = referenceSequence; int aabOffset = 0; - string alignmentAlignedBases = alignment.QueryBases; // store information about the indels vector indels; @@ -39,12 +40,13 @@ string softEnd; stringstream cigar_before, cigar_after; - for (vector::const_iterator c = alignment.CigarData.begin(); - c != alignment.CigarData.end(); ++c) { - unsigned int l = c->Length; - char t = c->Type; + CIGAR cigar = alignment.GETCIGAR; + for (CIGAR::const_iterator c = cigar.begin(); + c != cigar.end(); ++c) { + unsigned int l = c->CIGLEN; + char t = c->CIGTYPE; cigar_before << l << t; - if (t == 'M') { // match or mismatch + if (t == 'M' || t == 'X' || t == '=') { // match or mismatch sp += l; rp += l; } else if (t == 'D') { // deletion @@ -58,7 +60,7 @@ aabOffset += l; sp += l; // update reference sequence position } else if (t == 'I') { // insertion - indels.push_back(FBIndelAllele(true, l, sp, rp, alignment.QueryBases.substr(rp, l), false)); + indels.push_back(FBIndelAllele(true, l, sp, rp, alignmentSequence.substr(rp, l), false)); alignedReferenceSequence.insert(sp + softBegin.size() + arsOffset, string(l, '-')); arsOffset += l; rp += l; @@ -116,7 +118,7 @@ if (debug) { if (steppos >= 0 && readsteppos >= 0) { cerr << referenceSequence.substr(steppos, indel.length) << endl; - cerr << alignment.QueryBases.substr(readsteppos, indel.length) << endl; + cerr << alignmentSequence.substr(readsteppos, indel.length) << endl; cerr << indel.sequence << endl; } } @@ -124,7 +126,8 @@ while (steppos >= 0 && readsteppos >= 0 && !indel.splice && indel.sequence == referenceSequence.substr(steppos, indel.length) - && indel.sequence == alignment.QueryBases.substr(readsteppos, indel.length) + && indel.sequence == alignmentSequence.substr(readsteppos, indel.length) + //&& indel.sequence == alignment.QueryBases.substr(readsteppos, indel.length) && (id == indels.begin() || (previous->insertion && steppos >= previous->position) || (!previous->insertion && steppos >= previous->position + previous->length))) { @@ -156,8 +159,10 @@ steppos = indel.position - 1; readsteppos = indel.readPosition - 1; while (steppos >= 0 && readsteppos >= 0 - && alignment.QueryBases.at(readsteppos) == referenceSequence.at(steppos) - && alignment.QueryBases.at(readsteppos) == indel.sequence.at(indel.sequence.size() - 1) + //&& alignment.QueryBases.at(readsteppos) == referenceSequence.at(steppos) + //&& alignment.QueryBases.at(readsteppos) == indel.sequence.at(indel.sequence.size() - 1) + && alignmentSequence.at(readsteppos) == referenceSequence.at(steppos) + && alignmentSequence.at(readsteppos) == indel.sequence.at(indel.sequence.size() - 1) && (id == indels.begin() || (previous->insertion && indel.position - 1 >= previous->position) || (!previous->insertion && indel.position - 1 >= previous->position + previous->length))) { @@ -196,7 +201,8 @@ ))) { if (previous->homopolymer()) { string seq = referenceSequence.substr(prev_end_ref, indel.position - prev_end_ref); - string readseq = alignment.QueryBases.substr(prev_end_read, indel.position - prev_end_ref); + //string readseq = alignment.QueryBases.substr(prev_end_read, indel.position - prev_end_ref); + string readseq = alignmentSequence.substr(prev_end_read, indel.position - prev_end_ref); LEFTALIGN_DEBUG("seq: " << seq << endl << "readseq: " << readseq << endl); if (previous->sequence.at(0) == seq.at(0) && FBhomopolymer(seq) @@ -237,23 +243,23 @@ // // and simultaneously reconstruct the cigar - vector newCigar; + CIGAR newCigar; if (!softBegin.empty()) { - newCigar.push_back(CigarOp('S', softBegin.size())); + newCigar.ADDCIGAR(CIGOP('S', softBegin.size())); } vector::iterator id = indels.begin(); FBIndelAllele last = *id++; if (last.position > 0) { - newCigar.push_back(CigarOp('M', last.position)); + newCigar.ADDCIGAR(CIGOP('M', last.position)); } if (last.insertion) { - newCigar.push_back(CigarOp('I', last.length)); + newCigar.ADDCIGAR(CIGOP('I', last.length)); } else if (last.splice) { - newCigar.push_back(CigarOp('N', last.length)); + newCigar.ADDCIGAR(CIGOP('N', last.length)); } else { - newCigar.push_back(CigarOp('D', last.length)); + newCigar.ADDCIGAR(CIGOP('D', last.length)); } int lastend = last.insertion ? last.position : (last.position + last.length); LEFTALIGN_DEBUG(last << ","); @@ -262,20 +268,25 @@ FBIndelAllele& indel = *id; LEFTALIGN_DEBUG(indel << ","); if (indel.position < lastend) { - cerr << "impossibility?: indel realigned left of another indel" << endl << alignment.Name - << " " << alignment.Position << endl << alignment.QueryBases << endl; + cerr << "impossibility?: indel realigned left of another indel" << endl << alignment.QNAME + << " " << alignment.POSITION << endl << alignment.QUERYBASES << endl; exit(1); + } else if (indel.position == lastend && indel.insertion == last.insertion) { - CigarOp& op = newCigar.back(); + CIGOP& op = newCigar.back(); +#ifdef HAVE_BAMTOOLS op.Length += indel.length; +#else + op = SeqLib::CigarField(op.Type(), op.Length() + indel.length); +#endif } else if (indel.position >= lastend) { // also catches differential indels, but with the same position - newCigar.push_back(CigarOp('M', indel.position - lastend)); + newCigar.ADDCIGAR(CIGOP('M', indel.position - lastend)); if (indel.insertion) { - newCigar.push_back(CigarOp('I', indel.length)); + newCigar.ADDCIGAR(CIGOP('I', indel.length)); } else if (indel.splice) { - newCigar.push_back(CigarOp('N', indel.length)); + newCigar.ADDCIGAR(CIGOP('N', indel.length)); } else { // deletion - newCigar.push_back(CigarOp('D', indel.length)); + newCigar.ADDCIGAR(CIGOP('D', indel.length)); } } @@ -284,34 +295,40 @@ } if (lastend < alignedLength) { - newCigar.push_back(CigarOp('M', alignedLength - lastend)); + newCigar.ADDCIGAR(CIGOP('M', alignedLength - lastend)); } if (!softEnd.empty()) { - newCigar.push_back(CigarOp('S', softEnd.size())); + newCigar.ADDCIGAR(CIGOP('S', softEnd.size())); } LEFTALIGN_DEBUG(endl); #ifdef VERBOSE_DEBUG if (debug) { - for (vector::const_iterator c = alignment.CigarData.begin(); - c != alignment.CigarData.end(); ++c) { - unsigned int l = c->Length; - char t = c->Type; + CIGAR cigar = alignment.GETCIGAR; + for (CIGAR::const_iterator c = cigar.begin(); + c != cigar.end(); ++c) { + unsigned int l = c->CIGLEN; + char t = c->CIGTYPE; cerr << l << t; } cerr << endl; } #endif +#ifdef HAVE_BAMTOOLS alignment.CigarData = newCigar; +#else + alignment.SetCigar(newCigar); +#endif - for (vector::const_iterator c = alignment.CigarData.begin(); - c != alignment.CigarData.end(); ++c) { - unsigned int l = c->Length; - char t = c->Type; - cigar_after << l << t; + cigar = alignment.GETCIGAR; + for (CIGAR::const_iterator c = cigar.begin(); + c != cigar.end(); ++c) { + unsigned int l = c->CIGLEN; + char t = c->CIGTYPE; + cigar_after << l << t; } LEFTALIGN_DEBUG(cigar_after.str() << endl); @@ -324,18 +341,22 @@ } -int countMismatches(BamAlignment& alignment, string referenceSequence) { +int countMismatches(BAMALIGN& alignment, string referenceSequence) { + const string alignmentSequence = alignment.QUERYBASES; int mismatches = 0; int sp = 0; int rp = 0; - for (vector::const_iterator c = alignment.CigarData.begin(); - c != alignment.CigarData.end(); ++c) { - unsigned int l = c->Length; - char t = c->Type; - if (t == 'M') { // match or mismatch + CIGAR cigar = alignment.GETCIGAR; + for (CIGAR::const_iterator c = cigar.begin(); + c != cigar.end(); ++c) { + unsigned int l = c->CIGLEN; + char t = c->CIGTYPE; + + if (t == 'M' || t == 'X' || t == '=') { // match or mismatch for (int i = 0; i < l; ++i) { - if (alignment.QueryBases.at(rp) != referenceSequence.at(sp)) + //if (alignment.QueryBases.at(rp) != referenceSequence.at(sp)) + if (alignmentSequence.at(rp) != referenceSequence.at(sp)) ++mismatches; ++sp; ++rp; @@ -360,15 +381,13 @@ // realignment. Returns true on realignment success or non-realignment. // Returns false if we exceed the maximum number of realignment iterations. // -bool stablyLeftAlign(BamAlignment& alignment, string referenceSequence, int maxiterations, bool debug) { + bool stablyLeftAlign(BAMALIGN& alignment, string referenceSequence, int maxiterations, bool debug) { #ifdef VERBOSE_DEBUG int mismatchesBefore = countMismatches(alignment, referenceSequence); #endif if (!leftAlign(alignment, referenceSequence, debug)) { - - LEFTALIGN_DEBUG("did not realign" << endl); return true; } else { @@ -381,7 +400,7 @@ int mismatchesAfter = countMismatches(alignment, referenceSequence); if (mismatchesBefore != mismatchesAfter) { - cerr << alignment.Name << endl; + cerr << alignment.QNAME << endl; cerr << "ERROR: found " << mismatchesBefore << " mismatches before, but " << mismatchesAfter << " after left realignment!" << endl; exit(1); } diff -Nru freebayes-1.0.2/src/LeftAlign.h freebayes-1.1.0/src/LeftAlign.h --- freebayes-1.0.2/src/LeftAlign.h 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/LeftAlign.h 2016-11-03 11:25:39.000000000 +0000 @@ -14,9 +14,96 @@ #include #include "Fasta.h" + +#ifdef HAVE_BAMTOOLS #include "api/BamAlignment.h" #include "api/BamReader.h" +#include "api/BamMultiReader.h" #include "api/BamWriter.h" +using namespace BamTools; +#define GETNEXT(reader, alignment) reader.GetNextAlignment(alignment) +#define GETERRORSTRING(reader) reader.GetErrorString() +#define ALIGNMENTLENGTH Length +#define ISMAPPED IsMapped() +#define REFID RefID +#define POSITION Position +#define REFNAME RefName +#define REFLEN RefLength +#define REFVEC RefVector +#define REFDATA RefData +#define BAMALIGN BamAlignment +#define QUALITIES Qualities +#define QUERYBASES QueryBases +#define ALIGNEDBASES AlignedBases.size() +#define QNAME Name +#define GETREFDATA GetReferenceData() +#define GETREFNUM GetReferenceCount() +#define GETREFID(name) GetReferenceID(name) +#define ENDPOSITION GetEndPosition() +#define SEQLEN QueryBases.size() +#define MAPPINGQUALITY MapQuality +#define CIGAR std::vector +#define GETCIGAR CigarData +#define ISDUPLICATE IsDuplicate() +#define ISREVERSESTRAND IsReverseStrand() +#define ISPAIRED IsPaired() +#define ISMATEMAPPED IsMateMapped() +#define ISPROPERPAIR IsProperPair() +#define CIGLEN Length +#define CIGTYPE Type +#define BAMREADER BamMultiReader +#define BAMSINGLEREADER BamReader +#define FILLREADGROUP(rg, align) (align).GetTag("RG", (rg)) +#define ADDCIGAR push_back +#define CIGOP CigarOp +#define GETHEADERTEXT GetHeaderText() +#define STDIN "stdin" +#define WRITEALIGNMENT(writer, alignment) writer.SaveAlignment(alignment) +#else + +#define GETNEXT(reader, alignment) reader.GetNextRecord(alignment) +#define MAPPINGQUALITY MapQuality() +#define ALIGNMENTLENGTH Length() +#define ISMAPPED MappedFlag() +#define ISPAIRED PairedFlag() +#define ISMATEMAPPED MateMappedFlag() +#define ISPROPERPAIR ProperPair() +#define ISREVERSESTRAND ReverseFlag() +#define SEQLEN Length() +#define BAMALIGN SeqLib::BamRecord +#define REFID ChrID() +#define POSITION Position() +#define REFVEC std::vector +#define REFDATA SeqLib::HeaderSequence +#define REFNAME Name +#define REFLEN Length +#define QUALITIES Qualities() +#define QUERYBASES Sequence() +#define ALIGNEDBASES NumAlignedBases() +#define QNAME Qname() +#define GETREFDATA Header().GetHeaderSequenceVector() +#define GETREFNUM Header().NumSequences() +#define ENDPOSITION PositionEnd() +#define CIGAR SeqLib::Cigar +#define BAMREADER SeqLib::BamReader +#define BAMSINGLEREADER SeqLib::BamReader +#define GETCIGAR GetCigar() +#define GETREFID(name) Header().Name2ID(name) +#define ISDUPLICATE DuplicateFlag() +#define CIGLEN Length() +#define CIGTYPE Type() +#define ADDCIGAR add +#define CIGOP SeqLib::CigarField +#define FILLREADGROUP(rg, align) (rg) = (align).GetZTag("RG") +#define GETHEADERTEXT HeaderConcat() +#include "SeqLib/BamReader.h" +#include "SeqLib/BamWriter.h" +#define STDIN "-" +#define WRITEALIGNMENT(writer, alignment) writer.WriteRecord(alignment) +#endif + + + #include "IndelAllele.h" @@ -28,10 +115,9 @@ #endif using namespace std; -using namespace BamTools; -bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug = false); -bool stablyLeftAlign(BamAlignment& alignment, string referenceSequence, int maxiterations = 20, bool debug = false); -int countMismatches(BamAlignment& alignment, string referenceSequence); +bool leftAlign(BAMALIGN& alignment, string& referenceSequence, bool debug = false); +bool stablyLeftAlign(BAMALIGN& alignment, string referenceSequence, int maxiterations = 20, bool debug = false); +int countMismatches(BAMALIGN& alignment, string referenceSequence); #endif diff -Nru freebayes-1.0.2/src/Makefile freebayes-1.1.0/src/Makefile --- freebayes-1.0.2/src/Makefile 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/Makefile 2016-11-03 11:25:39.000000000 +0000 @@ -13,10 +13,13 @@ #CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2 BAMTOOLS_ROOT=../bamtools +SEQLIB_ROOT=../SeqLib VCFLIB_ROOT=../vcflib +TABIX_ROOT=$(VCFLIB_ROOT)/tabixpp +HTSLIB_ROOT=$(TABIX_ROOT)/htslib -LIBS = -L./ -L$(VCFLIB_ROOT)/tabixpp/ -L$(BAMTOOLS_ROOT)/lib -ltabix -lz -lm -INCLUDE = -I$(BAMTOOLS_ROOT)/src -I../ttmath -I$(VCFLIB_ROOT)/src -I$(VCFLIB_ROOT)/ +LIBS = -lz -lm -lpthread +INCLUDE = -I../ttmath -I$(BAMTOOLS_ROOT)/src/ -I$(VCFLIB_ROOT)/src/ -I$(TABIX_ROOT)/ -I$(VCFLIB_ROOT)/smithwaterman/ -I$(VCFLIB_ROOT)/multichoose/ -I$(VCFLIB_ROOT)/filevercmp/ -I$(HTSLIB_ROOT) -I$(SEQLIB_ROOT) -I$(SEQLIB_ROOT)/htslib all: autoversion ../bin/freebayes ../bin/bamleftalign @@ -37,7 +40,11 @@ # builds bamtools static lib, and copies into root $(BAMTOOLS_ROOT)/lib/libbamtools.a: cd $(BAMTOOLS_ROOT) && mkdir -p build && cd build && cmake .. && $(MAKE) +$(HTSLIB_ROOT)/libhts.a: + cd $(HTSLIB_ROOT) && make +$(SEQLIB_ROOT)/src/libseqlib.a: + cd $(SEQLIB_ROOT) && ./configure && make OBJECTS=BedReader.o \ CNV.o \ @@ -64,14 +71,18 @@ NonCall.o \ SegfaultHandler.o \ ../vcflib/tabixpp/tabix.o \ - ../vcflib/tabixpp/bgzf.o \ + ../vcflib/tabixpp/htslib/bgzf.o \ ../vcflib/smithwaterman/SmithWatermanGotoh.o \ - ../vcflib/smithwaterman/disorder.c \ + ../vcflib/smithwaterman/disorder.cpp \ ../vcflib/smithwaterman/LeftAlign.o \ ../vcflib/smithwaterman/Repeats.o \ ../vcflib/smithwaterman/IndelAllele.o \ Variant.o \ - $(BAMTOOLS_ROOT)/lib/libbamtools.a + $(BAMTOOLS_ROOT)/lib/libbamtools.a \ + $(SEQLIB_ROOT)/src/libseqlib.a \ + $(SEQLIB_ROOT)/bwa/libbwa.a \ + $(SEQLIB_ROOT)/fermi-lite/libfml.a \ + $(SEQLIB_ROOT)/htslib/libhts.a HEADERS=multichoose.h version_git.h @@ -86,16 +97,16 @@ dummy ../bin/dummy: dummy.o $(OBJECTS) $(HEADERS) $(CXX) $(CFLAGS) $(INCLUDE) dummy.o $(OBJECTS) -o ../bin/dummy $(LIBS) -bamleftalign ../bin/bamleftalign: $(BAMTOOLS_ROOT)/lib/libbamtools.a bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o - $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o $(BAMTOOLS_ROOT)/lib/libbamtools.a -o ../bin/bamleftalign $(LIBS) +bamleftalign ../bin/bamleftalign: $(BAMTOOLS_ROOT)/lib/libbamtools.a $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o + $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o Utility.o LeftAlign.o IndelAllele.o split.o $(BAMTOOLS_ROOT)/lib/libbamtools.a $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a -o ../bin/bamleftalign $(LIBS) -bamfiltertech ../bin/bamfiltertech: $(BAMTOOLS_ROOT)/lib/libbamtools.a bamfiltertech.o $(OBJECTS) $(HEADERS) +bamfiltertech ../bin/bamfiltertech: $(BAMTOOLS_ROOT)/lib/libbamtools.a $(SEQLIB_ROOT)/src/libseqlib.a $(SEQLIB_ROOT)/htslib/libhts.a bamfiltertech.o $(OBJECTS) $(HEADERS) $(CXX) $(CFLAGS) $(INCLUDE) bamfiltertech.o $(OBJECTS) -o ../bin/bamfiltertech $(LIBS) # objects -Fasta.o: Fasta.cpp +Fasta.o: Fasta.cpp Utility.o $(CXX) $(CFLAGS) $(INCLUDE) -c Fasta.cpp alleles.o: alleles.cpp AlleleParser.o Allele.o @@ -104,7 +115,7 @@ dummy.o: dummy.cpp AlleleParser.o Allele.o $(CXX) $(CFLAGS) $(INCLUDE) -c dummy.cpp -freebayes.o: freebayes.cpp TryCatch.h $(BAMTOOLS_ROOT)/lib/libbamtools.a +freebayes.o: freebayes.cpp TryCatch.h $(HTSLIB_ROOT)/libhts.a $(BAMTOOLS_ROOT)/lib/libbamtools.a $(CXX) $(CFLAGS) $(INCLUDE) -c freebayes.cpp fastlz.o: fastlz.c fastlz.h @@ -125,7 +136,7 @@ Ewens.o: Ewens.cpp Ewens.h $(CXX) $(CFLAGS) $(INCLUDE) -c Ewens.cpp -AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h $(BAMTOOLS_ROOT)/lib/libbamtools.a +AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h $(BAMTOOLS_ROOT)/lib/libbamtools.a $(HTSLIB_ROOT)/libhts.a $(CXX) $(CFLAGS) $(INCLUDE) -c AlleleParser.cpp Utility.o: Utility.cpp Utility.h Sum.h Product.h @@ -173,7 +184,7 @@ bamfiltertech.o: bamfiltertech.cpp $(CXX) $(CFLAGS) $(INCLUDE) -c bamfiltertech.cpp -LeftAlign.o: LeftAlign.h LeftAlign.cpp $(BAMTOOLS_ROOT)/lib/libbamtools.a +LeftAlign.o: LeftAlign.h LeftAlign.cpp $(BAMTOOLS_ROOT)/lib/libbamtools.a $(HTSLIB_ROOT)/libhts.a $(CXX) $(CFLAGS) $(INCLUDE) -c LeftAlign.cpp IndelAllele.o: IndelAllele.cpp IndelAllele.h @@ -182,8 +193,9 @@ Variant.o: $(VCFLIB_ROOT)/src/Variant.h $(VCFLIB_ROOT)/src/Variant.cpp $(CXX) $(CFLAGS) $(INCLUDE) -c $(VCFLIB_ROOT)/src/Variant.cpp -../vcflib/tabixpp/tabix.o: ../vcflib/tabixpp/tabix.hpp ../vcflib/tabixpp/tabix.cpp -../vcflib/tabixpp/bgzf.o: ../vcflib/tabixpp/bgzf.c ../vcflib/tabixpp/bgzf.h +../vcflib/tabixpp/tabix.o: + cd $(TABIX_ROOT)/ && make +../vcflib/tabixpp/htslib/bgzf.o: ../vcflib/tabixpp/htslib/bgzf.c ../vcflib/tabixpp/htslib/htslib/bgzf.h cd ../vcflib/tabixpp && $(MAKE) ../vcflib/smithwaterman/SmithWatermanGotoh.o: ../vcflib/smithwaterman/SmithWatermanGotoh.h ../vcflib/smithwaterman/SmithWatermanGotoh.cpp diff -Nru freebayes-1.0.2/src/NonCall.cpp freebayes-1.1.0/src/NonCall.cpp --- freebayes-1.0.2/src/NonCall.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/NonCall.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -4,8 +4,8 @@ NonCall aggregate; bool first = true; for (NonCalls::const_iterator nc = this->begin(); nc != this->end(); ++nc) { - for (map >::const_iterator p = nc->second.begin(); - p != nc->second.end(); ++p) { + for (map >::const_iterator p + = nc->second.begin(); p != nc->second.end(); ++p) { for (map::const_iterator s = p->second.begin(); s != p->second.end(); ++s) { const NonCall& nonCall = s->second; @@ -13,12 +13,14 @@ aggregate.altCount += nonCall.altCount; aggregate.reflnQ += nonCall.reflnQ; aggregate.altlnQ += nonCall.altlnQ; + aggregate.nCount += 1; if (first) { - aggregate.minDepth = nonCall.refCount + nonCall.altCount; - first = false; + aggregate.minDepth = nonCall.refCount + nonCall.altCount; + first = false; } else { - aggregate.minDepth = min(aggregate.minDepth, nonCall.refCount + nonCall.altCount); - } + aggregate.minDepth = min(aggregate.minDepth, + nonCall.refCount + nonCall.altCount); + } } } } @@ -39,6 +41,7 @@ aggregate.altCount += nonCall.altCount; aggregate.reflnQ += nonCall.reflnQ; aggregate.altlnQ += nonCall.altlnQ; + aggregate.nCount += 1; if (!seen.count(name)) { aggregate.minDepth = nonCall.refCount + nonCall.altCount; seen.insert(name); diff -Nru freebayes-1.0.2/src/NonCall.h freebayes-1.1.0/src/NonCall.h --- freebayes-1.0.2/src/NonCall.h 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/NonCall.h 2016-11-03 11:25:39.000000000 +0000 @@ -20,6 +20,7 @@ , altCount(0) , altlnQ(0) , minDepth(0) + , nCount(0) { } NonCall(int rc, long double rq, int ac, long double aq, int mdp) @@ -32,6 +33,7 @@ int refCount; int altCount; int minDepth; + int nCount ; long double reflnQ; long double altlnQ; }; diff -Nru freebayes-1.0.2/src/Parameters.cpp freebayes-1.1.0/src/Parameters.cpp --- freebayes-1.0.2/src/Parameters.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/Parameters.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -161,6 +161,8 @@ << " -P --pvar N Report sites if the probability that there is a polymorphism" << endl << " at the site is greater than N. default: 0.0. Note that post-" << endl << " filtering is generally recommended over the use of this parameter." << endl + << " --strict-vcf" << endl + << " Generate strict VCF format (FORMAT/GQ will be an int)" << endl << endl << "population model:" << endl << endl @@ -398,6 +400,7 @@ allowMNPs = true; // -X --no-mnps allowSNPs = true; // -I --no-snps allowComplex = true; + strictVCF = false; maxComplexGap = 3; //maxHaplotypeLength = 100; minRepeatSize = 5; @@ -514,6 +517,7 @@ {"indel-exclusion-window", required_argument, 0, 'x'}, {"theta", required_argument, 0, 'T'}, {"pvar", required_argument, 0, 'P'}, + {"strict-vcf", no_argument, 0, '/'}, {"read-dependence-factor", required_argument, 0, 'D'}, {"binomial-obs-priors-off", no_argument, 0, 'V'}, {"allele-balance-priors-off", no_argument, 0, 'a'}, @@ -703,6 +707,10 @@ } break; + case '/': + strictVCF = true; + break; + case 'u': allowComplex = false; break; diff -Nru freebayes-1.0.2/src/Parameters.h freebayes-1.1.0/src/Parameters.h --- freebayes-1.0.2/src/Parameters.h 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/Parameters.h 2016-11-03 11:25:39.000000000 +0000 @@ -60,6 +60,7 @@ bool leftAlignIndels; // -O --left-align-indels bool allowMNPs; // -X --allow-mnps bool allowComplex; // -X --allow-complex + bool strictVCF; int maxComplexGap; //int maxHaplotypeLength; int minRepeatSize; diff -Nru freebayes-1.0.2/src/ResultData.cpp freebayes-1.1.0/src/ResultData.cpp --- freebayes-1.0.2/src/ResultData.cpp 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/ResultData.cpp 2016-11-03 11:25:39.000000000 +0000 @@ -5,8 +5,8 @@ -vcf::Variant& Results::vcf( - vcf::Variant& var, // variant to update +vcflib::Variant& Results::vcf( + vcflib::Variant& var, // variant to update BigFloat pHom, long double bestComboOddsRatio, //long double alleleSamplingProb, @@ -53,7 +53,7 @@ // get the required size of the reference sequence // strip identical bases from start and/or end of alleles - // if bases have been stripped from the beginning, + // if bases have been stripped from the beginning, // set up VCF record-wide variables @@ -76,6 +76,7 @@ if (parameters.calculateMarginals) var.format.push_back("GQ"); // XXX var.format.push_back("DP"); + var.format.push_back("AD"); var.format.push_back("RO"); var.format.push_back("QR"); var.format.push_back("AO"); @@ -296,7 +297,7 @@ long double altReadMismatchRate = (altObsCount == 0 ? 0 : altReadMismatchSum / altObsCount); long double altReadSNPRate = (altObsCount == 0 ? 0 : altReadSNPSum / altObsCount); long double altReadIndelRate = (altObsCount == 0 ? 0 : altReadIndelSum / altObsCount); - + //var.info["XAM"].push_back(convert(altReadMismatchRate)); //var.info["XAS"].push_back(convert(altReadSNPRate)); //var.info["XAI"].push_back(convert(altReadIndelRate)); @@ -420,7 +421,7 @@ // tally partial observations to get a mean coverage per bp of reference int haplotypeLength = refbase.size(); int basesInObservations = 0; - + for (map >::iterator g = alleleGroups.begin(); g != alleleGroups.end(); ++g) { for (vector::iterator a = g->second.begin(); a != g->second.end(); ++a) { basesInObservations += (*a)->alternateSequence.size(); @@ -430,7 +431,7 @@ for (map >::iterator p = partialObservationSupport.begin(); p != partialObservationSupport.end(); ++p) { basesInObservations += p->first->alternateSequence.size(); } - + double depthPerBase = (double) basesInObservations / (double) haplotypeLength; var.info["DPB"].push_back(convert(depthPerBase)); @@ -510,10 +511,15 @@ sampleOutput["GT"].push_back(genotype->relativeGenotype(refbase, altAlleles)); if (parameters.calculateMarginals) { - sampleOutput["GQ"].push_back(convert(nan2zero(big2phred((BigFloat)1 - big_exp(sampleLikelihoods.front().marginal))))); + double val = nan2zero(big2phred((BigFloat)1 - big_exp(sampleLikelihoods.front().marginal))); + if (parameters.strictVCF) + sampleOutput["GQ"].push_back(convert(int(round(val)))); + else + sampleOutput["GQ"].push_back(convert(val)); } sampleOutput["DP"].push_back(convert(sample.observationCount())); + sampleOutput["AD"].push_back(convert(sample.observationCount(refbase))); sampleOutput["RO"].push_back(convert(sample.observationCount(refbase))); sampleOutput["QR"].push_back(convert(sample.qualSum(refbase))); @@ -521,6 +527,7 @@ Allele& altAllele = *aa; string altbase = altAllele.base(); sampleOutput["AO"].push_back(convert(sample.observationCount(altbase))); + sampleOutput["AD"].push_back(convert(sample.observationCount(altbase))); sampleOutput["QA"].push_back(convert(sample.qualSum(altbase))); } @@ -630,8 +637,8 @@ } -vcf::Variant& Results::gvcf( - vcf::Variant& var, +vcflib::Variant& Results::gvcf( + vcflib::Variant& var, NonCalls& nonCalls, AlleleParser* parser) { @@ -652,7 +659,7 @@ assert(numSites > 0); // set up site call - var.ref = parser->currentReferenceBaseString(); + var.ref = parser->referenceSubstr(startPos, 1); var.alt.push_back("<*>"); var.sequenceName = parser->currentSequenceName; var.position = startPos + 1; // output text field is one-based @@ -664,19 +671,28 @@ var.format.clear(); var.format.push_back("GQ"); var.format.push_back("DP"); - var.format.push_back("MIN"); + var.format.push_back("MIN_DP"); var.format.push_back("QR"); var.format.push_back("QA"); - + NonCall total = nonCalls.aggregateAll(); + + /* This resets min depth to zero if nonCalls is less than numSites. */ + + int minDepth = total.minDepth; + + if(numSites != total.nCount){ + minDepth = 0; + } + var.info["DP"].push_back(convert((total.refCount+total.altCount) / numSites)); - var.info["MIN"].push_back(convert(total.minDepth)); + var.info["MIN_DP"].push_back(convert(minDepth)); // The text END field is one-based, inclusive. We proudly conflate this // with our zero-based, exclusive endPos. var.info["END"].push_back(convert(endPos)); // genotype quality is 1- p(polymorphic) - + map perSample; nonCalls.aggregatePerSample(perSample); @@ -688,8 +704,18 @@ map >& sampleOutput = var.samples[sampleName]; long double qual = nc.reflnQ - nc.altlnQ; sampleOutput["GQ"].push_back(convert(ln2phred(qual))); + + + /* This resets min depth to zero if nonCalls is less than numSites. */ + + int minDepth = nc.minDepth; + + if(numSites != nc.nCount){ + minDepth = 0; + } + sampleOutput["DP"].push_back(convert((nc.refCount+nc.altCount) / numSites)); - sampleOutput["MIN"].push_back(convert(nc.minDepth)); + sampleOutput["MIN_DP"].push_back(convert(minDepth)); sampleOutput["QR"].push_back(convert(ln2phred(nc.reflnQ))); sampleOutput["QA"].push_back(convert(ln2phred(nc.altlnQ))); } diff -Nru freebayes-1.0.2/src/ResultData.h freebayes-1.1.0/src/ResultData.h --- freebayes-1.0.2/src/ResultData.h 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/src/ResultData.h 2016-11-03 11:25:39.000000000 +0000 @@ -41,8 +41,8 @@ } } - vcf::Variant& vcf( - vcf::Variant& var, // variant to update + vcflib::Variant& vcf( + vcflib::Variant& var, // variant to update BigFloat pHom, long double bestComboOddsRatio, //long double alleleSamplingProb, @@ -61,8 +61,8 @@ vector& sequencingTechnologies, AlleleParser* parser); - vcf::Variant& gvcf( - vcf::Variant& var, + vcflib::Variant& gvcf( + vcflib::Variant& var, NonCalls& noncalls, AlleleParser* parser); }; diff -Nru freebayes-1.0.2/test/Makefile freebayes-1.1.0/test/Makefile --- freebayes-1.0.2/test/Makefile 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/test/Makefile 2016-11-03 11:25:39.000000000 +0000 @@ -12,4 +12,4 @@ cd .. && $(MAKE) $(vcfuniq): - cd ../vcflib && $(MAKE) + cd ../vcflib && make clean && make diff -Nru freebayes-1.0.2/test/region-and-target-handling.t freebayes-1.1.0/test/region-and-target-handling.t --- freebayes-1.0.2/test/region-and-target-handling.t 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/test/region-and-target-handling.t 1970-01-01 00:00:00.000000000 +0000 @@ -1,104 +0,0 @@ -#!/bin/bash -# vi: set ft=sh : -test=$(dirname $0) -root=$(dirname $0)/.. -source $test/test-simple-bash/lib/test-simple.bash \ - tests 8 - -PATH=$root/bin:$PATH - -ref=$test/`basename $0`.ref -alt=$test/`basename $0`.alt -bam=$test/`basename $0`.bam -bed=$test/`basename $0`.bed -vcf=$test/`basename $0`.vcf - -trap 'rm -f $ref* $alt $bam* $bed $vcf' EXIT - -# 01234567 start -# ATCGGCTA -# 12345678 end - -cat >$ref <ref -ATCGGCTA -REF -samtools faidx $ref - -cat >$alt <alt -GTTAGGTT -ALT - -samtools view -b - >$bam <$bed <$vcf < -#CHROM POS ID REF ALT QUAL FILTER INFO -ref 1 . A G 1234 PASS NAME=first base -ref 2 . T . 1234 PASS NAME=second base -ref 3 . C T 1234 PASS NAME=third base -ref 4 . G A 1234 PASS NAME=fourth base -ref 5 . G . 1234 PASS NAME=fifth base -ref 6 . C G 1234 PASS NAME=sixth base -ref 7 . T . 1234 PASS NAME=seventh base -ref 8 . A T 1234 PASS NAME=eigth base -VCF - -PS4='\n+ ' - -function run_freebayes() { - ($root/bin/freebayes "$@" \ - --haplotype-length 0 --min-alternate-count 1 \ - --min-alternate-fraction 0 --pooled-continuous --report-monomorphic \ - --ploidy 1 \ - -f $ref $bam \ - | grep -vE '^#' | cut -f1-5) -} - -if [[ -n $TEST_DEBUG ]]; then - cat $ref >&2 - cat $bed >&2 - cat $vcf >&2 - vcfannotate --bed $bed --key MATCH $vcf >&2 - vcfintersect --bed $bed $vcf >&2 - bedtools intersect -a $vcf -b $bed >&2 -fi - -[[ -z $(run_freebayes --region ref:4-5 --region ref:6-7) ]]; ok $? 'ref:4-5 ref:6-7 are empty' -[[ -z $(run_freebayes --region ref:4 --region ref:6) ]]; ok $? 'ref:4 ref:6 are empty' - -expected=`cat <&1) =~ "Target region coordinates (ref 1 19) outside of reference sequence bounds (ref 8)" ]] - ok $? 'region outside of bounds error' diff -Nru freebayes-1.0.2/test/t/00_region_and_target_handling.t freebayes-1.1.0/test/t/00_region_and_target_handling.t --- freebayes-1.0.2/test/t/00_region_and_target_handling.t 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/test/t/00_region_and_target_handling.t 2016-11-03 11:25:39.000000000 +0000 @@ -0,0 +1,145 @@ +#!/usr/bin/env bash +# vi: set ft=sh : + +test=$(dirname $0)/.. +root=$(dirname $0)/../.. + +source $test/test-simple-bash/lib/test-simple.bash \ + tests 11 + +PATH=$root/bin:$PATH + +ref=$test/$(basename $0).ref +alt=$test/$(basename $0).alt +bam=$test/$(basename $0).bam +bed=$test/$(basename $0).bed +vcf=$test/$(basename $0).vcf + +trap 'rm -f $ref* $alt $bam* $bed $vcf' EXIT + +# 01234567890 start +# ATCGGCTAAAA ref +# GTTAGGTTAAC alt +# 12345678901 end + +cat >$ref <ref +ATCGGCTAAAA +REF +samtools faidx $ref + +cat >$alt <alt +GTTAGGTTAAC +ALT + +samtools view -S -b - >$bam <$bed <$vcf < +#CHROM POS ID REF ALT QUAL FILTER INFO +ref 1 . A G 1234 PASS NAME=first base +ref 2 . T . 1234 PASS NAME=second base +ref 3 . C T 1234 PASS NAME=third base +ref 4 . G A 1234 PASS NAME=fourth base +ref 5 . G . 1234 PASS NAME=fifth base +ref 6 . C G 1234 PASS NAME=sixth base +ref 7 . T . 1234 PASS NAME=seventh base +ref 8 . A T 1234 PASS NAME=eighth base +ref 9 . A . 1234 PASS NAME=ninth base +ref 10 . A . 1234 PASS NAME=tenth base +ref 11 . A C 1234 PASS NAME=eleventh base +VCF + +PS4='\n+ ' + +function run_freebayes() { + ($root/bin/freebayes "$@" \ + --haplotype-length 0 --min-alternate-count 1 \ + --min-alternate-fraction 0 --pooled-continuous --report-monomorphic \ + --ploidy 1 \ + -f $ref $bam \ + | grep -vE "^#" | cut -f1-5) +} + +if [[ -n $TEST_DEBUG ]]; then + cat $ref >&2 + cat $bed >&2 + cat $vcf >&2 + vcfannotate --bed $bed --key MATCH $vcf >&2 + vcfintersect --bed $bed $vcf >&2 + bedtools intersect -a $vcf -b $bed >&2 +fi + + +output=$(run_freebayes --region ref:4-5 --region ref:6-7) +ok [ -z "$output" ] "ref:4-5 ref:6-7 are empty" || echo "$output" + +output=$(run_freebayes --region ref:4 --region ref:6) +ok [ -z "$output" ] "ref:4 ref:6 are empty" || echo "$output" + +expected=$(cat <&1) +ok [ "$output" == "$expected" ] "region outside of bounds error" || echo "$output" diff -Nru freebayes-1.0.2/test/t/01_call_variants.t freebayes-1.1.0/test/t/01_call_variants.t --- freebayes-1.0.2/test/t/01_call_variants.t 2015-12-24 19:40:06.000000000 +0000 +++ freebayes-1.1.0/test/t/01_call_variants.t 2016-11-03 11:25:39.000000000 +0000 @@ -62,7 +62,7 @@ EOF is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam -t targets.bed | grep -v "^#" | wc -l) $by_region "a targets bed file can be used with the same effect as running by region" -#rm targets.bed +rm targets.bed is $(samtools view -u tiny/NA12878.chr22.tiny.bam | freebayes -f tiny/q.fa --stdin | grep -v "^#" | wc -l) \ @@ -76,27 +76,27 @@ # ensure that positions at which no variants exist get put in the out vcf is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 3 "freebayes puts required variants in output" -is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz tiny/NA12878.chr22.tiny.bam -l | grep -v "^#" | wc -l) 3 "freebayes limits calls to input variants correctly" +is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz tiny/NA12878.chr22.tiny.bam -l | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 3 "freebayes limits calls to input variants correctly" is $(freebayes -f tiny/q.fa -@ tiny/q.vcf.gz -l tiny/1read.bam | grep -v "^#" | wc -l) 20 "freebayes reports all input variants even when there is no input data" # check variant input with region specified is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 2 "freebayes handles region and variant input" -is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 tiny/NA12878.chr22.tiny.bam -l | grep -v "^#" | wc -l) 2 "freebayes limits to variant input correctly when region is given" +is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 tiny/NA12878.chr22.tiny.bam -l | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t1000$)" | wc -l) 2 "freebayes limits to variant input correctly when region is given" # check variant input when reading from stdin is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 3 "freebayes handles variant input and reading from stdin" -is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -l --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) 3 "freebayes limits to variant input when reading from stdin" +is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -l --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 3 "freebayes limits to variant input when reading from stdin" -is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 -l --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) 2 "freebayes handles region, stdin, and variant input" +is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 -l --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t1000$)" | wc -l) 2 "freebayes handles region, stdin, and variant input" gzip -c tiny/q.fa >tiny/q.fa.gz cp tiny/q.fa.fai tiny/q.fa.gz.fai freebayes -f tiny/q.fa.gz -@ tiny/q_spiked.vcf.gz -r q:1-10000 -l - < tiny/NA12878.chr22.tiny.bam >/dev/null 2>/dev/null is $? 1 "freebayes bails out when given a gzipped or corrupted reference" -rm tiny/q.fa.gz.* +rm tiny/q.fa.gz* is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) $(freebayes-parallel tiny/q.regions 2 -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) "running in parallel makes no difference" diff -Nru freebayes-1.0.2/test/t/02_multi_bam.t freebayes-1.1.0/test/t/02_multi_bam.t --- freebayes-1.0.2/test/t/02_multi_bam.t 1970-01-01 00:00:00.000000000 +0000 +++ freebayes-1.1.0/test/t/02_multi_bam.t 2016-11-03 11:25:39.000000000 +0000 @@ -0,0 +1,88 @@ +#!/usr/bin/env bash + +BASH_TAP_ROOT=bash-tap +source ./bash-tap/bash-tap-bootstrap + +PATH=../bin:$PATH # for freebayes + +plan tests 7 + +ref=$(basename $0).ref + +trap 'rm -f ${ref}* $(basename $0)*.bam*' EXIT + +cat >${ref} <ref +AATCGGCTA +REF +samtools faidx ${ref} + +function run_freebayes() { + freebayes "$@" \ + --haplotype-length 0 --min-alternate-count 1 \ + --min-alternate-fraction 0 --pooled-continuous --report-monomorphic \ + --ploidy 1 \ + -f $ref $bam \ + 2>&1 \ + | grep -vE "^#" | cut -f1-5 +} + +function make_bam() { + local id=$1 && shift + local sm=$1 && shift + local pl=$1 && shift + local suffix=${1:-} && shift + local first_snp=${1:-G} && shift + + local bam="$(basename $0).${id}.${sm}.${pl}${suffix}.bam" + samtools view -S -b - >${bam} <${ref} <ref +${bases} +REF + samtools faidx ${ref} +} + +samtools view -S -b - >${bam} <&1 \ + | grep -vE "^#" | cut -f1-5 +} + + +make_ref "AATCGGCTAZ" +expected='ERROR\(freebayes\): Found non-DNA character Z at position 9' +like "$(run_freebayes)" "${expected}" "freebayes rejects invalid reference" + +expected=$(cat <