Binary files /tmp/tmpUT00p8/oECFLFUvmt/r-bioc-dada2-1.16.0+dfsg/build/vignette.rds and /tmp/tmpUT00p8/NapwzYeCID/r-bioc-dada2-1.18.0+dfsg/build/vignette.rds differ
diff -Nru r-bioc-dada2-1.16.0+dfsg/debian/changelog r-bioc-dada2-1.18.0+dfsg/debian/changelog
--- r-bioc-dada2-1.16.0+dfsg/debian/changelog	2020-09-30 13:51:38.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/debian/changelog	2020-11-03 12:19:13.000000000 +0000
@@ -1,3 +1,10 @@
+r-bioc-dada2 (1.18.0+dfsg-1) unstable; urgency=medium
+
+  * Team upload.
+  * d/copyright: pval.c was removed from source
+
+ -- Andreas Tille <tille@debian.org>  Tue, 03 Nov 2020 13:19:13 +0100
+
 r-bioc-dada2 (1.16.0+dfsg-2) unstable; urgency=medium
 
   * Team upload.
diff -Nru r-bioc-dada2-1.16.0+dfsg/debian/control r-bioc-dada2-1.18.0+dfsg/debian/control
--- r-bioc-dada2-1.16.0+dfsg/debian/control	2020-09-30 13:51:38.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/debian/control	2020-11-03 12:19:13.000000000 +0000
@@ -12,7 +12,7 @@
                r-cran-ggplot2 (>= 2.1.0),
                r-cran-reshape2 (>= 1.4.1),
                r-bioc-shortread (>= 1.32.0),
-               r-cran-rcppparallel (>= 5.0.2+dfsg-3),
+               r-cran-rcppparallel,
                r-bioc-iranges (>= 2.6.0),
                r-bioc-xvector (>= 0.16.0),
                r-bioc-biocgenerics (>= 0.22.0)
@@ -28,8 +28,7 @@
          ${shlibs:Depends},
          ${misc:Depends}
 Recommends: ${R:Recommends}
-Suggests: ${R:Suggests},
-          libjs-jquery
+Suggests: ${R:Suggests}
 Description: sample inference from amplicon sequencing data
  The dada2 package contributes to software workflows to interpret
  sequencing data from microbiota - the relative abundance of
diff -Nru r-bioc-dada2-1.16.0+dfsg/debian/copyright r-bioc-dada2-1.18.0+dfsg/debian/copyright
--- r-bioc-dada2-1.16.0+dfsg/debian/copyright	2020-09-30 13:51:38.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/debian/copyright	2020-11-03 12:19:13.000000000 +0000
@@ -11,15 +11,6 @@
                 Susan Holmes
 License: LGPL-3
 
-Files: src/pval.c
-Copyright: 2019 Benjamin Callahan <benjamin.j.callahan@gmail.com>
-           2005-6 Morten Welinder <terra@gnome.org>
-           2005-10 The R Foundation
-           2006-2015 The R Core Team
-           1998 Ross Ihaka
-           2000 The R Core Team
-License: GPL-2.0+
-
 Files: debian/*
 Copyright: 2019 Steffen Moeller <moeller@debian.org>
 License: LGPL-3
diff -Nru r-bioc-dada2-1.16.0+dfsg/DESCRIPTION r-bioc-dada2-1.18.0+dfsg/DESCRIPTION
--- r-bioc-dada2-1.16.0+dfsg/DESCRIPTION	2020-04-28 02:15:51.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/DESCRIPTION	2020-10-27 23:34:15.000000000 +0000
@@ -9,8 +9,8 @@
     removing substitution and chimera errors. Taxonomic classification is available
     via a native implementation of the RDP naive Bayesian classifier, and species-level
     assignment to 16S rRNA gene fragments by exact matching.
-Version: 1.16.0
-Date: 2020-04-07
+Version: 1.18.0
+Date: 2020-08-07
 Maintainer: Benjamin Callahan <benjamin.j.callahan@gmail.com>
 Author: Benjamin Callahan <benjamin.j.callahan@gmail.com>, Paul McMurdie, Susan Holmes
 License: LGPL-3
@@ -33,11 +33,11 @@
         'dada.R' 'errorModels.R' 'filter.R' 'misc.R' 'multiSample.R'
         'paired.R' 'plot-methods.R' 'sequenceIO.R' 'show-methods.R'
         'taxonomy.R'
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.0
 git_url: https://git.bioconductor.org/packages/dada2
-git_branch: RELEASE_3_11
-git_last_commit: 8bedfb4
-git_last_commit_date: 2020-04-27
-Date/Publication: 2020-04-27
+git_branch: RELEASE_3_12
+git_last_commit: a20a676
+git_last_commit_date: 2020-10-27
+Date/Publication: 2020-10-27
 NeedsCompilation: yes
-Packaged: 2020-04-28 02:15:51 UTC; biocbuild
+Packaged: 2020-10-27 23:34:15 UTC; biocbuild
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/addSpecies.Rd r-bioc-dada2-1.18.0+dfsg/man/addSpecies.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/addSpecies.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/addSpecies.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,8 +4,14 @@
 \alias{addSpecies}
 \title{Add species-level annotation to a taxonomic table.}
 \usage{
-addSpecies(taxtab, refFasta, allowMultiple = FALSE, tryRC = FALSE,
-  n = 2000, verbose = FALSE)
+addSpecies(
+  taxtab,
+  refFasta,
+  allowMultiple = FALSE,
+  tryRC = FALSE,
+  n = 2000,
+  verbose = FALSE
+)
 }
 \arguments{
 \item{taxtab}{(Required). A taxonomic table, the output of \code{\link{assignTaxonomy}}.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/assignSpecies.Rd r-bioc-dada2-1.18.0+dfsg/man/assignSpecies.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/assignSpecies.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/assignSpecies.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,8 +4,14 @@
 \alias{assignSpecies}
 \title{Taxonomic assignment to the species level by exact matching.}
 \usage{
-assignSpecies(seqs, refFasta, allowMultiple = FALSE, tryRC = FALSE,
-  n = 2000, verbose = FALSE)
+assignSpecies(
+  seqs,
+  refFasta,
+  allowMultiple = FALSE,
+  tryRC = FALSE,
+  n = 2000,
+  verbose = FALSE
+)
 }
 \arguments{
 \item{seqs}{(Required). A character vector of the sequences to be assigned, or an object 
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/assignTaxonomy.Rd r-bioc-dada2-1.18.0+dfsg/man/assignTaxonomy.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/assignTaxonomy.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/assignTaxonomy.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,10 +4,16 @@
 \alias{assignTaxonomy}
 \title{Classifies sequences against reference training dataset.}
 \usage{
-assignTaxonomy(seqs, refFasta, minBoot = 50, tryRC = FALSE,
-  outputBootstraps = FALSE, taxLevels = c("Kingdom", "Phylum", "Class",
-  "Order", "Family", "Genus", "Species"), multithread = FALSE,
-  verbose = FALSE)
+assignTaxonomy(
+  seqs,
+  refFasta,
+  minBoot = 50,
+  tryRC = FALSE,
+  outputBootstraps = FALSE,
+  taxLevels = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"),
+  multithread = FALSE,
+  verbose = FALSE
+)
 }
 \arguments{
 \item{seqs}{(Required). A character vector of the sequences to be assigned, or an object 
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/c-dada-method.Rd r-bioc-dada2-1.18.0+dfsg/man/c-dada-method.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/c-dada-method.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/c-dada-method.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -1,6 +1,5 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/show-methods.R
-\docType{methods}
 \name{c,dada-method}
 \alias{c,dada-method}
 \title{Change concatenation of dada-class objects to list construction.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/c-derep-method.Rd r-bioc-dada2-1.18.0+dfsg/man/c-derep-method.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/c-derep-method.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/c-derep-method.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -1,6 +1,5 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/show-methods.R
-\docType{methods}
 \name{c,derep-method}
 \alias{c,derep-method}
 \title{Change concatenation of derep-class objects to list construction.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/collapseNoMismatch.Rd r-bioc-dada2-1.18.0+dfsg/man/collapseNoMismatch.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/collapseNoMismatch.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/collapseNoMismatch.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,8 +4,15 @@
 \alias{collapseNoMismatch}
 \title{Combine together sequences that are identical up to shifts and/or length.}
 \usage{
-collapseNoMismatch(seqtab, minOverlap = 20, orderBy = "abundance",
-  identicalOnly = FALSE, vec = TRUE, band = -1, verbose = FALSE)
+collapseNoMismatch(
+  seqtab,
+  minOverlap = 20,
+  orderBy = "abundance",
+  identicalOnly = FALSE,
+  vec = TRUE,
+  band = -1,
+  verbose = FALSE
+)
 }
 \arguments{
 \item{seqtab}{(Required). A sample by sequence matrix, the return of \code{\link{makeSequenceTable}}.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/dada.Rd r-bioc-dada2-1.18.0+dfsg/man/dada.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/dada.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/dada.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,9 +4,17 @@
 \alias{dada}
 \title{High resolution sample inference from amplicon data.}
 \usage{
-dada(derep, err, errorEstimationFunction = loessErrfun,
-  selfConsist = FALSE, pool = FALSE, priors = character(0),
-  multithread = FALSE, verbose = TRUE, ...)
+dada(
+  derep,
+  err,
+  errorEstimationFunction = loessErrfun,
+  selfConsist = FALSE,
+  pool = FALSE,
+  priors = character(0),
+  multithread = FALSE,
+  verbose = TRUE,
+  ...
+)
 }
 \arguments{
 \item{derep}{(Required). \code{character} or \code{\link{derep-class}}.
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/derepFastq.Rd r-bioc-dada2-1.18.0+dfsg/man/derepFastq.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/derepFastq.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/derepFastq.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -42,7 +42,7 @@
 # Test that chunk-size, `n`, does not affect the result.
 testFastq = system.file("extdata", "sam1F.fastq.gz", package="dada2")
 derep1 = derepFastq(testFastq, verbose = TRUE)
-derep1.35 = derepFastq(testFastq, 35, TRUE)
+derep1.35 = derepFastq(testFastq, n = 35, verbose = TRUE)
 all.equal(getUniques(derep1), getUniques(derep1.35)[names(getUniques(derep1))])
 
 }
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/errBalancedF.Rd r-bioc-dada2-1.18.0+dfsg/man/errBalancedF.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/errBalancedF.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/errBalancedF.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -3,9 +3,11 @@
 \name{errBalancedF}
 \alias{errBalancedF}
 \title{An empirical error matrix.}
-\format{A numerical matrix with 16 rows and 41 columns.
+\format{
+A numerical matrix with 16 rows and 41 columns.
  Rows correspond to the 16 transition (eg. A2A, A2C, ...)
- Columns correspond to consensus quality scores 0 to 40.}
+ Columns correspond to consensus quality scores 0 to 40.
+}
 \description{
 A dataset containing the error matrix estimated by DADA2 from the forward reads of the
 Illumina Miseq 2x250 sequenced Balanced mock community (see manuscript).
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/errBalancedR.Rd r-bioc-dada2-1.18.0+dfsg/man/errBalancedR.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/errBalancedR.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/errBalancedR.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -3,9 +3,11 @@
 \name{errBalancedR}
 \alias{errBalancedR}
 \title{An empirical error matrix.}
-\format{A numerical matrix with 16 rows and 41 columns.
+\format{
+A numerical matrix with 16 rows and 41 columns.
  Rows correspond to the 16 transition (eg. A2A, A2C, ...)
- Columns correspond to consensus quality scores 0 to 40.}
+ Columns correspond to consensus quality scores 0 to 40.
+}
 \description{
 A dataset containing the error matrix estimated by DADA2 from the reverse reads of the
 Illumina Miseq 2x250 sequenced Balanced mock community (see manuscript).
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/fastqFilter.Rd r-bioc-dada2-1.18.0+dfsg/man/fastqFilter.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/fastqFilter.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/fastqFilter.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,11 +4,28 @@
 \alias{fastqFilter}
 \title{Filter and trim a fastq file.}
 \usage{
-fastqFilter(fn, fout, truncQ = 2, truncLen = 0, maxLen = Inf,
-  minLen = 20, trimLeft = 0, trimRight = 0, maxN = 0, minQ = 0,
-  maxEE = Inf, rm.phix = TRUE, rm.lowcomplex = 0,
-  orient.fwd = NULL, n = 1e+06, OMP = TRUE, qualityType = "Auto",
-  compress = TRUE, verbose = FALSE, ...)
+fastqFilter(
+  fn,
+  fout,
+  truncQ = 2,
+  truncLen = 0,
+  maxLen = Inf,
+  minLen = 20,
+  trimLeft = 0,
+  trimRight = 0,
+  maxN = 0,
+  minQ = 0,
+  maxEE = Inf,
+  rm.phix = TRUE,
+  rm.lowcomplex = 0,
+  orient.fwd = NULL,
+  n = 1e+06,
+  OMP = TRUE,
+  qualityType = "Auto",
+  compress = TRUE,
+  verbose = FALSE,
+  ...
+)
 }
 \arguments{
 \item{fn}{(Required). The path to the input fastq file.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/fastqPairedFilter.Rd r-bioc-dada2-1.18.0+dfsg/man/fastqPairedFilter.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/fastqPairedFilter.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/fastqPairedFilter.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,13 +4,31 @@
 \alias{fastqPairedFilter}
 \title{Filters and trims paired forward and reverse fastq files.}
 \usage{
-fastqPairedFilter(fn, fout, maxN = c(0, 0), truncQ = c(2, 2),
-  truncLen = c(0, 0), maxLen = c(Inf, Inf), minLen = c(20, 20),
-  trimLeft = c(0, 0), trimRight = c(0, 0), minQ = c(0, 0),
-  maxEE = c(Inf, Inf), rm.phix = c(TRUE, TRUE), rm.lowcomplex = c(0,
-  0), matchIDs = FALSE, orient.fwd = NULL, id.sep = "\\\\s",
-  id.field = NULL, n = 1e+06, OMP = TRUE, qualityType = "Auto",
-  compress = TRUE, verbose = FALSE, ...)
+fastqPairedFilter(
+  fn,
+  fout,
+  maxN = c(0, 0),
+  truncQ = c(2, 2),
+  truncLen = c(0, 0),
+  maxLen = c(Inf, Inf),
+  minLen = c(20, 20),
+  trimLeft = c(0, 0),
+  trimRight = c(0, 0),
+  minQ = c(0, 0),
+  maxEE = c(Inf, Inf),
+  rm.phix = c(TRUE, TRUE),
+  rm.lowcomplex = c(0, 0),
+  matchIDs = FALSE,
+  orient.fwd = NULL,
+  id.sep = "\\\\s",
+  id.field = NULL,
+  n = 1e+06,
+  OMP = TRUE,
+  qualityType = "Auto",
+  compress = TRUE,
+  verbose = FALSE,
+  ...
+)
 }
 \arguments{
 \item{fn}{(Required). A \code{character(2)} naming the paths to the (forward,reverse) fastq files.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/filterAndTrim.Rd r-bioc-dada2-1.18.0+dfsg/man/filterAndTrim.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/filterAndTrim.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/filterAndTrim.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,13 +4,33 @@
 \alias{filterAndTrim}
 \title{Filter and trim fastq file(s).}
 \usage{
-filterAndTrim(fwd, filt, rev = NULL, filt.rev = NULL,
-  compress = TRUE, truncQ = 2, truncLen = 0, trimLeft = 0,
-  trimRight = 0, maxLen = Inf, minLen = 20, maxN = 0, minQ = 0,
-  maxEE = Inf, rm.phix = TRUE, rm.lowcomplex = 0,
-  orient.fwd = NULL, matchIDs = FALSE, id.sep = "\\\\s",
-  id.field = NULL, multithread = FALSE, n = 1e+05, OMP = TRUE,
-  qualityType = "Auto", verbose = FALSE)
+filterAndTrim(
+  fwd,
+  filt,
+  rev = NULL,
+  filt.rev = NULL,
+  compress = TRUE,
+  truncQ = 2,
+  truncLen = 0,
+  trimLeft = 0,
+  trimRight = 0,
+  maxLen = Inf,
+  minLen = 20,
+  maxN = 0,
+  minQ = 0,
+  maxEE = Inf,
+  rm.phix = TRUE,
+  rm.lowcomplex = 0,
+  orient.fwd = NULL,
+  matchIDs = FALSE,
+  id.sep = "\\\\s",
+  id.field = NULL,
+  multithread = FALSE,
+  n = 1e+05,
+  OMP = TRUE,
+  qualityType = "Auto",
+  verbose = FALSE
+)
 }
 \arguments{
 \item{fwd}{(Required). \code{character}.
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/isBimeraDenovo.Rd r-bioc-dada2-1.18.0+dfsg/man/isBimeraDenovo.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/isBimeraDenovo.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/isBimeraDenovo.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,10 +4,16 @@
 \alias{isBimeraDenovo}
 \title{Identify bimeras from collections of unique sequences.}
 \usage{
-isBimeraDenovo(unqs, minFoldParentOverAbundance = 2,
-  minParentAbundance = 8, allowOneOff = FALSE,
-  minOneOffParentDistance = 4, maxShift = 16, multithread = FALSE,
-  verbose = FALSE)
+isBimeraDenovo(
+  unqs,
+  minFoldParentOverAbundance = 2,
+  minParentAbundance = 8,
+  allowOneOff = FALSE,
+  minOneOffParentDistance = 4,
+  maxShift = 16,
+  multithread = FALSE,
+  verbose = FALSE
+)
 }
 \arguments{
 \item{unqs}{(Required). A \code{\link{uniques-vector}} or any object that can be coerced
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/isBimeraDenovoTable.Rd r-bioc-dada2-1.18.0+dfsg/man/isBimeraDenovoTable.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/isBimeraDenovoTable.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/isBimeraDenovoTable.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,11 +4,18 @@
 \alias{isBimeraDenovoTable}
 \title{Identify bimeras in a sequence table.}
 \usage{
-isBimeraDenovoTable(seqtab, minSampleFraction = 0.9,
-  ignoreNNegatives = 1, minFoldParentOverAbundance = 1.5,
-  minParentAbundance = 2, allowOneOff = FALSE,
-  minOneOffParentDistance = 4, maxShift = 16, multithread = FALSE,
-  verbose = FALSE)
+isBimeraDenovoTable(
+  seqtab,
+  minSampleFraction = 0.9,
+  ignoreNNegatives = 1,
+  minFoldParentOverAbundance = 1.5,
+  minParentAbundance = 2,
+  allowOneOff = FALSE,
+  minOneOffParentDistance = 4,
+  maxShift = 16,
+  multithread = FALSE,
+  verbose = FALSE
+)
 }
 \arguments{
 \item{seqtab}{(Required). A sequence table. That is, an integer matrix with colnames
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/isBimera.Rd r-bioc-dada2-1.18.0+dfsg/man/isBimera.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/isBimera.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/isBimera.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,8 +4,13 @@
 \alias{isBimera}
 \title{Determine if input sequence is a bimera of putative parent sequences.}
 \usage{
-isBimera(sq, parents, allowOneOff = FALSE, minOneOffParentDistance = 4,
-  maxShift = 16)
+isBimera(
+  sq,
+  parents,
+  allowOneOff = FALSE,
+  minOneOffParentDistance = 4,
+  maxShift = 16
+)
 }
 \arguments{
 \item{sq}{(Required). A \code{character(1)}.
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/isPhiX.Rd r-bioc-dada2-1.18.0+dfsg/man/isPhiX.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/isPhiX.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/isPhiX.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,8 +4,7 @@
 \alias{isPhiX}
 \title{Determine if input sequence(s) match the phiX genome.}
 \usage{
-isPhiX(seqs, wordSize = 16, minMatches = 2, nonOverlapping = TRUE,
-  ...)
+isPhiX(seqs, wordSize = 16, minMatches = 2, nonOverlapping = TRUE, ...)
 }
 \arguments{
 \item{seqs}{(Required). A \code{character} vector of A/C/G/T sequences.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/isShiftDenovo.Rd r-bioc-dada2-1.18.0+dfsg/man/isShiftDenovo.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/isShiftDenovo.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/isShiftDenovo.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -5,8 +5,7 @@
 \title{Identify sequences that are identical to a more abundant sequence up to an
 overall shift.}
 \usage{
-isShiftDenovo(unqs, minOverlap = 20, flagSubseqs = FALSE,
-  verbose = FALSE)
+isShiftDenovo(unqs, minOverlap = 20, flagSubseqs = FALSE, verbose = FALSE)
 }
 \arguments{
 \item{unqs}{(Required). A \code{\link{uniques-vector}} or any object that can be coerced
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/learnErrors.Rd r-bioc-dada2-1.18.0+dfsg/man/learnErrors.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/learnErrors.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/learnErrors.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,10 +4,19 @@
 \alias{learnErrors}
 \title{Learns the error rates from an input list, or vector, of file names or a list of \code{\link{derep-class}} objects.}
 \usage{
-learnErrors(fls, nbases = 1e+08, nreads = NULL,
-  errorEstimationFunction = loessErrfun, multithread = FALSE,
-  randomize = FALSE, MAX_CONSIST = 10, OMEGA_C = 0,
-  qualityType = "Auto", verbose = FALSE, ...)
+learnErrors(
+  fls,
+  nbases = 1e+08,
+  nreads = NULL,
+  errorEstimationFunction = loessErrfun,
+  multithread = FALSE,
+  randomize = FALSE,
+  MAX_CONSIST = 10,
+  OMEGA_C = 0,
+  qualityType = "Auto",
+  verbose = FALSE,
+  ...
+)
 }
 \arguments{
 \item{fls}{(Required). \code{character}.
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/makeTaxonomyFasta_SilvaNR.Rd r-bioc-dada2-1.18.0+dfsg/man/makeTaxonomyFasta_SilvaNR.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/makeTaxonomyFasta_SilvaNR.Rd	1970-01-01 00:00:00.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/makeTaxonomyFasta_SilvaNR.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/taxonomy.R
+\name{makeTaxonomyFasta_SilvaNR}
+\alias{makeTaxonomyFasta_SilvaNR}
+\title{This function creates the dada2 assignTaxonomy training fasta for the official Silva NR99
+release files. If `include.species`=TRUE, a 7th taxonomic level (species) will be added based on the
+Genus species binomial in the Silva taxonomy string, if present and valid.}
+\usage{
+makeTaxonomyFasta_SilvaNR(
+  fin,
+  ftax,
+  fout,
+  include.species = FALSE,
+  compress = TRUE
+)
+}
+\description{
+## Silva release v138
+path <- "~/tax/Silva/v138"
+dada2:::makeTaxonomyFasta_SilvaNR(file.path(path, "SILVA_138_SSURef_NR99_tax_silva.fasta.gz"), 
+    file.path(path, "tax_slv_ssu_138.txt"), 
+    "~/Desktop/silva_nr99_v138_train_set.fa.gz")
+dada2:::makeTaxonomyFasta_SilvaNR(file.path(path, "SILVA_138_SSURef_NR99_tax_silva.fasta.gz"), 
+    file.path(path, "tax_slv_ssu_138.txt"), 
+    include.species=TRUE, "~/Desktop/silva_nr99_v138_wSpecies_train_set.fa.gz")
+}
+\keyword{internal}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/makeTaxonomyFasta_Silva.Rd r-bioc-dada2-1.18.0+dfsg/man/makeTaxonomyFasta_Silva.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/makeTaxonomyFasta_Silva.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/makeTaxonomyFasta_Silva.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -2,19 +2,21 @@
 % Please edit documentation in R/taxonomy.R
 \name{makeTaxonomyFasta_Silva}
 \alias{makeTaxonomyFasta_Silva}
-\title{This function creates the dada2 assignTaxonomy training fasta for the Silva .align file
-generated by the Mothur project.}
+\title{DEPRECATED in favor of `makeTaxonomyFasta_SilvaNR``}
 \usage{
 makeTaxonomyFasta_Silva(fin, ftax, fout, compress = TRUE)
 }
 \description{
+This function creates the dada2 assignTaxonomy training fasta for the Silva .align file
+generated by the Mothur project.
+}
+\details{
 ## Silva release v128
 path <- "~/Desktop/Silva/Silva.nr_v128"
 dada2:::makeTaxonomyFasta_Silva(file.path(path, "silva.nr_v128.align"), 
     file.path(path, "silva.nr_v128.tax"), 
     "~/tax/silva_nr_v128_train_set.fa.gz")
-}
-\details{
+
 ## Silva release v132
 path <- "~/Desktop/Silva/Silva.nr_v132"
 dada2:::makeTaxonomyFasta_Silva(file.path(path, "silva.nr_v132.align"), 
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/mergePairs.Rd r-bioc-dada2-1.18.0+dfsg/man/mergePairs.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/mergePairs.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/mergePairs.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,10 +4,20 @@
 \alias{mergePairs}
 \title{Merge denoised forward and reverse reads.}
 \usage{
-mergePairs(dadaF, derepF, dadaR, derepR, minOverlap = 12,
-  maxMismatch = 0, returnRejects = FALSE,
-  propagateCol = character(0), justConcatenate = FALSE,
-  trimOverhang = FALSE, verbose = FALSE, ...)
+mergePairs(
+  dadaF,
+  derepF,
+  dadaR,
+  derepR,
+  minOverlap = 12,
+  maxMismatch = 0,
+  returnRejects = FALSE,
+  propagateCol = character(0),
+  justConcatenate = FALSE,
+  trimOverhang = FALSE,
+  verbose = FALSE,
+  ...
+)
 }
 \arguments{
 \item{dadaF}{(Required). A \code{\link{dada-class}} object, or a list of such objects.
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/mergeSequenceTables.Rd r-bioc-dada2-1.18.0+dfsg/man/mergeSequenceTables.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/mergeSequenceTables.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/mergeSequenceTables.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,8 +4,15 @@
 \alias{mergeSequenceTables}
 \title{Merge two or more sample-by-sequence observation matrices.}
 \usage{
-mergeSequenceTables(table1 = NULL, table2 = NULL, ..., tables = NULL,
-  repeats = "error", orderBy = "abundance", tryRC = FALSE)
+mergeSequenceTables(
+  table1 = NULL,
+  table2 = NULL,
+  ...,
+  tables = NULL,
+  repeats = "error",
+  orderBy = "abundance",
+  tryRC = FALSE
+)
 }
 \arguments{
 \item{table1}{(Optional, default=NULL). Named integer matrix. Rownames correspond to samples
@@ -47,7 +54,7 @@
 \examples{
 
 \dontrun{
-  mergetab <- mergeSequenceTables(seqtab1, seqtab2, seqtab3) # unnamed arguments are assumed to be individual sequence tables
+  mergetab <- mergeSequenceTables(seqtab1, seqtab2, seqtab3) # unnamed arguments assumed to be sequence tables
   input_tables <- list(seqtab1, seqtab2, seqtab3)
   mergetab <- mergeSequenceTables(tables=input_tables) # list of sequence tables
   files <- c(file1, file2, file3)
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/names-set-dada-ANY-method.Rd r-bioc-dada2-1.18.0+dfsg/man/names-set-dada-ANY-method.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/names-set-dada-ANY-method.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/names-set-dada-ANY-method.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -1,6 +1,5 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/show-methods.R
-\docType{methods}
 \name{names<-,dada,ANY-method}
 \alias{names<-,dada,ANY-method}
 \title{Deactivate renaming of dada-class objects.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/names-set-derep-ANY-method.Rd r-bioc-dada2-1.18.0+dfsg/man/names-set-derep-ANY-method.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/names-set-derep-ANY-method.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/names-set-derep-ANY-method.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -1,6 +1,5 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/show-methods.R
-\docType{methods}
 \name{names<-,derep,ANY-method}
 \alias{names<-,derep,ANY-method}
 \title{Deactivate renaming of derep-class objects.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/nwalign.Rd r-bioc-dada2-1.18.0+dfsg/man/nwalign.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/nwalign.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/nwalign.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,9 +4,17 @@
 \alias{nwalign}
 \title{Needleman-Wunsch alignment.}
 \usage{
-nwalign(s1, s2, match = getDadaOpt("MATCH"),
-  mismatch = getDadaOpt("MISMATCH"), gap = getDadaOpt("GAP_PENALTY"),
-  homo_gap = NULL, band = -1, endsfree = TRUE, vec = FALSE)
+nwalign(
+  s1,
+  s2,
+  match = getDadaOpt("MATCH"),
+  mismatch = getDadaOpt("MISMATCH"),
+  gap = getDadaOpt("GAP_PENALTY"),
+  homo_gap = NULL,
+  band = -1,
+  endsfree = TRUE,
+  vec = FALSE
+)
 }
 \arguments{
 \item{s1}{(Required). \code{character(1)}. The first sequence to align. A/C/G/T only.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/plotComplexity.Rd r-bioc-dada2-1.18.0+dfsg/man/plotComplexity.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/plotComplexity.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/plotComplexity.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,8 +4,16 @@
 \alias{plotComplexity}
 \title{Plot sequence complexity profile of a fastq file.}
 \usage{
-plotComplexity(fl, kmerSize = 2, window = NULL, by = 5, n = 1e+05,
-  bins = 100, aggregate = FALSE, ...)
+plotComplexity(
+  fl,
+  kmerSize = 2,
+  window = NULL,
+  by = 5,
+  n = 1e+05,
+  bins = 100,
+  aggregate = FALSE,
+  ...
+)
 }
 \arguments{
 \item{fl}{(Required). \code{character}.
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/plotErrors.Rd r-bioc-dada2-1.18.0+dfsg/man/plotErrors.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/plotErrors.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/plotErrors.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,9 +4,15 @@
 \alias{plotErrors}
 \title{Plot observed and estimated error rates.}
 \usage{
-plotErrors(dq, nti = c("A", "C", "G", "T"), ntj = c("A", "C", "G",
-  "T"), obs = TRUE, err_out = TRUE, err_in = FALSE,
-  nominalQ = FALSE)
+plotErrors(
+  dq,
+  nti = c("A", "C", "G", "T"),
+  ntj = c("A", "C", "G", "T"),
+  obs = TRUE,
+  err_out = TRUE,
+  err_in = FALSE,
+  nominalQ = FALSE
+)
 }
 \arguments{
 \item{dq}{(Required). An object from which error rates can be extracted. Valid inputs are
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/qtables2.Rd r-bioc-dada2-1.18.0+dfsg/man/qtables2.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/qtables2.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/qtables2.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,7 +4,7 @@
 \alias{qtables2}
 \title{Internal tables function}
 \usage{
-qtables2(x, qeff = FALSE)
+qtables2(x, qeff = FALSE, handle.zerolen = TRUE)
 }
 \arguments{
 \item{x}{ShortReadQ.
@@ -12,6 +12,9 @@
 
 \item{qeff}{\code{logical(1)}.
 Calculate average quality by first transforming to expected error rate.}
+
+\item{handle.zerolen}{\code{logical(1)}.
+Default TRUE. If TRUE, gracefully excludes zero-length sequences.}
 }
 \value{
 List.
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/removePrimers.Rd r-bioc-dada2-1.18.0+dfsg/man/removePrimers.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/removePrimers.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/removePrimers.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,9 +4,19 @@
 \alias{removePrimers}
 \title{Removes primers and orients reads in a consistent direction.}
 \usage{
-removePrimers(fn, fout, primer.fwd, primer.rev = NULL,
-  max.mismatch = 2, allow.indels = FALSE, trim.fwd = TRUE,
-  trim.rev = TRUE, orient = TRUE, compress = TRUE, verbose = FALSE)
+removePrimers(
+  fn,
+  fout,
+  primer.fwd,
+  primer.rev = NULL,
+  max.mismatch = 2,
+  allow.indels = FALSE,
+  trim.fwd = TRUE,
+  trim.rev = TRUE,
+  orient = TRUE,
+  compress = TRUE,
+  verbose = FALSE
+)
 }
 \arguments{
 \item{fn}{(Required). \code{character}.
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/show-methods.Rd r-bioc-dada2-1.18.0+dfsg/man/show-methods.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/show-methods.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/show-methods.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -1,6 +1,5 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/show-methods.R
-\docType{methods}
 \name{show,derep-method}
 \alias{show,derep-method}
 \alias{show,dada-method}
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/tperr1.Rd r-bioc-dada2-1.18.0+dfsg/man/tperr1.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/tperr1.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/tperr1.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -3,9 +3,11 @@
 \name{tperr1}
 \alias{tperr1}
 \title{An empirical error matrix.}
-\format{A numerical matrix with 16 rows and 41 columns.
+\format{
+A numerical matrix with 16 rows and 41 columns.
  Rows correspond to the 16 transition (eg. A2A, A2C, ...)
- Columns correspond to consensus quality scores 0 to 40.}
+ Columns correspond to consensus quality scores 0 to 40.
+}
 \description{
 A dataset containing the error matrix estimated by fitting a piecewise linear model to
 the errors observed in the mock community featured in Schirmer 2015 (metaID 35).
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/uniquesToFasta.Rd r-bioc-dada2-1.18.0+dfsg/man/uniquesToFasta.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/uniquesToFasta.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/uniquesToFasta.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -4,8 +4,7 @@
 \alias{uniquesToFasta}
 \title{Write a uniques vector to a FASTA file}
 \usage{
-uniquesToFasta(unqs, fout, ids = NULL, mode = "w", width = 20000,
-  ...)
+uniquesToFasta(unqs, fout, ids = NULL, mode = "w", width = 20000, ...)
 }
 \arguments{
 \item{unqs}{(Required).
diff -Nru r-bioc-dada2-1.16.0+dfsg/man/writeFasta.Rd r-bioc-dada2-1.18.0+dfsg/man/writeFasta.Rd
--- r-bioc-dada2-1.16.0+dfsg/man/writeFasta.Rd	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/man/writeFasta.Rd	2020-10-27 17:56:28.000000000 +0000
@@ -1,13 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/sequenceIO.R
-\docType{methods}
 \name{writeFasta,character-method}
 \alias{writeFasta,character-method}
 \title{Writes a named character vector of DNA sequences to a fasta file.
 Values are the sequences, and names are used for the id lines.}
 \usage{
-\S4method{writeFasta}{character}(object, file, mode = "w",
-  width = 20000L, ...)
+\S4method{writeFasta}{character}(object, file, mode = "w", width = 20000L, ...)
 }
 \arguments{
 \item{object}{(Required). A named \code{character} vector.}
diff -Nru r-bioc-dada2-1.16.0+dfsg/NAMESPACE r-bioc-dada2-1.18.0+dfsg/NAMESPACE
--- r-bioc-dada2-1.16.0+dfsg/NAMESPACE	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/NAMESPACE	2020-10-27 17:56:28.000000000 +0000
@@ -46,6 +46,7 @@
 importFrom(Biostrings,DNAStringSet)
 importFrom(Biostrings,PDict)
 importFrom(Biostrings,end)
+importFrom(Biostrings,matchPattern)
 importFrom(Biostrings,oligonucleotideFrequency)
 importFrom(Biostrings,quality)
 importFrom(Biostrings,readDNAStringSet)
diff -Nru r-bioc-dada2-1.16.0+dfsg/NEWS r-bioc-dada2-1.18.0+dfsg/NEWS
--- r-bioc-dada2-1.16.0+dfsg/NEWS	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/NEWS	2020-10-27 17:56:28.000000000 +0000
@@ -1,3 +1,29 @@
+CHANGES IN VERSION 1.16.0
+-----------------------
+
+SIGNIFICANT USER-VISIBLE CHANGES
+
+    o The `dada-class$clustering` data.frame includes a new column `$birth_from` which encodes the cluster (ASV) from which the associated cluster (ASV) was divided (or "born").
+
+    o Short sequences are now given `NA` assignments at all levels, rather than stopping the `assignTaxonomy` function entirely.
+    
+BUG FIXES
+
+    o All uses of the `class(x) == "y"` idiom have been replaced by `is(x, "y")` to conform to current R best practices and Bioconductor build requirements.
+
+    o A rare error in how the `dada-class$clustering$birth_pval` was calculated has been fixed.
+
+    o Read lengths are now visualized correctly by `plotQualityProfile(..., aggregate=TRUE)`.
+
+
+CHANGES IN VERSION 1.14.1
+-----------------------
+
+BUG FIXES
+
+    o A segfault bug in `assignTaxonomy` has been fixed. This segfault occurred when multithreading was enabled and multiple exact matches to the query sequence existed in the reference database being queried against.
+
+
 CHANGES IN VERSION 1.14.0
 -----------------------
 
@@ -5,7 +31,7 @@
 
     o Directories containing fastq files (possibly compressed) can now be provided to core dada2 functions instead of a character vector of the fastq filenames. This functionality is supported by `filterAndTrim`, `learnErrors`, `dada`, `mergePairs` and `derepFastq`. Note, this feature requires fastqs in the provided directory to have standard file extensions: `.fastq`, `.fastq.gz` or `.fastq.bz2`.
 
-    o The new `DETECT_SINGLETONS` option removes the removes the “conditional” in the calculation of probabilties used in the core dada algorithm, which effectively discounts the first read of any novel sequence. In practice, setting `DETECT_SINGLETONS = TRUE` allows singletons to be detected (of course) and also increases sensitivity to other low abundance sequences slightly, i.e. those present in just 2/3/4 reads. Note, we do not generally recommend this option as it will result in a large increase in false positives in typical datasets. Instead we recommend `pool = "pseudo"` or `pool=TRUE` for typical datasets to increase sensitivity to rare sequences with less impact on specificity.
+    o The new `DETECT_SINGLETONS` option removes the “conditional” in the calculation of probabilties used in the core dada algorithm, which effectively discounts the first read of any novel sequence. In practice, setting `DETECT_SINGLETONS = TRUE` allows singletons to be detected (of course) and also increases sensitivity to other low abundance sequences slightly, i.e. those present in just 2/3/4 reads. Note, we do not generally recommend this option as it will result in a large increase in false positives in typical datasets. Instead we recommend `pool = "pseudo"` or `pool=TRUE` for typical datasets to increase sensitivity to rare sequences with less impact on specificity.
 
 SIGNIFICANT USER-VISIBLE CHANGES
 
diff -Nru r-bioc-dada2-1.16.0+dfsg/R/filter.R r-bioc-dada2-1.18.0+dfsg/R/filter.R
--- r-bioc-dada2-1.16.0+dfsg/R/filter.R	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/R/filter.R	2020-10-27 17:56:28.000000000 +0000
@@ -87,6 +87,7 @@
   if(length(fn) != length(fout)) stop("Every input file must have a corresponding output file.")
   if(allow.indels) message("Primer matching with indels allowed is currently significantly (~4x) slower.")
   require.fwd <- TRUE; require.rev <- TRUE ###
+  first.multi.msg <- TRUE
   odirs <- unique(dirname(fout))
   for(odir in odirs) {
     if(!dir.exists(odir)) { 
@@ -113,9 +114,11 @@
   colnames(rval) <- c("reads.in", "reads.out")
   rownames(rval) <- basename(fn)
   for(i in seq_along(fn)) {
-    # Read in file
+    # Read in file and init filtering stats
     fq <- readFastq(fn[[i]])
     inseqs <- length(fq)
+    outseqs <- 0
+    rval[i,c("reads.in", "reads.out")] <- c(inseqs, outseqs)
     # Match patterns
     if(allow.indels) { # Use slower matchPattern because it supports indels
       match.fwd <- lapply(sread(fq), function(x) matchPattern(primer.fwd, x, max.mismatch=max.mismatch, with.indels=allow.indels, fixed=fixed.fwd))
@@ -151,10 +154,13 @@
     if(has.rev) hits.rev <- sapply(match.rev, length)
     if(!require.fwd) stop("Currently, only require.fwd=TRUE is supported.")
     if(has.rev && !require.rev) stop("Currently, only require.rev=TRUE is supported when a reverse primer sequence is provided.")
-    if(require.fwd && sum(hits.fwd) == 0) stop("No sequences matched the provided forward primer sequence.")
-    if(has.rev && require.rev && sum(hits.rev) == 0) stop("Reverse primer sequences were provided, but no sequences matched the provided reverse primer sequence.")
+    if(require.fwd && sum(hits.fwd) == 0) { filt.print(inseqs, outseqs); next }
+    if(has.rev && require.rev && sum(hits.rev) == 0) { filt.print(inseqs, outseqs); next }
     if(any(hits.fwd>1) || (has.rev && any(hits.rev>1))) {
-      if(verbose) message("Multiple matches to the primer(s) in some sequences. Using the longest possible match.")
+      if(verbose && first.multi.msg) {
+        message("Multiple matches to the primer(s) in some sequences. Using the longest possible match.")
+        first.multi.msg <- FALSE
+      }
       match.fwd[hits.fwd>1] <- sapply(match.fwd[hits.fwd>1], `[`, 1)
       if(has.rev) match.rev[hits.rev>1] <- sapply(match.rev[hits.rev>1], function(x) rev(x)[1])
     }
@@ -162,7 +168,10 @@
       hits.fwd.rc <- sapply(match.fwd.rc, length)
       if(has.rev) hits.rev.rc <- sapply(match.rev.rc, length)
       if(any(hits.fwd.rc>1) || (has.rev && any(hits.rev.rc>1))) {
-        if(verbose) message("Multiple matches to the primer(s) in some reverse-complement sequences. Using the longest possible match.")
+        if(verbose && first.multi.msg) {
+          message("Multiple matches to the primer(s) in some sequences. Using the longest possible match.")
+          first.multi.msg <- FALSE
+        }
         match.fwd.rc[hits.fwd.rc>1] <- sapply(match.fwd.rc[hits.fwd.rc>1], `[`, 1)
         if(has.rev) match.rev.rc[hits.rev.rc>1] <- sapply(match.rev.rc[hits.rev.rc>1], function(x) rev(x)[1])
       }
@@ -213,11 +222,7 @@
     writeFastq(fq, fout[[i]], "w", compress=compress)
     outseqs <- length(fq)
     rval[i,c("reads.in", "reads.out")] <- c(inseqs, outseqs)
-    if(verbose) {
-      outperc <- round(outseqs * 100 / inseqs, 1)
-      outperc <- paste(" (", outperc, "%)", sep="")
-      message("Read in ", inseqs, ", output ", outseqs, outperc, " filtered sequences.", sep="")
-    }
+    if(verbose) filt.print(inseqs, outseqs)
   }
   if(all(rval[,"reads.out"]==0)) {
     warning("No reads passed the primer detection.")
@@ -227,6 +232,12 @@
   return(invisible(rval))
 }  
 
+filt.print <- function(inseqs, outseqs) {
+  outperc <- round(outseqs * 100 / inseqs, 1)
+  outperc <- paste(" (", outperc, "%)", sep="")
+  message("Read in ", inseqs, ", output ", outseqs, outperc, " filtered sequences.", sep="")
+}
+
 #' Filter and trim fastq file(s).
 #' 
 #' Filters and trims an input fastq file(s) (can be compressed)
diff -Nru r-bioc-dada2-1.16.0+dfsg/R/paired.R r-bioc-dada2-1.18.0+dfsg/R/paired.R
--- r-bioc-dada2-1.16.0+dfsg/R/paired.R	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/R/paired.R	2020-10-27 17:56:28.000000000 +0000
@@ -113,8 +113,9 @@
     mapR <- getDerep(derepR[[i]])$map
     if(!(is.integer(mapF) && is.integer(mapR))) stop("Incorrect format of $map in derep-class arguments.")
 #    if(any(is.na(rF)) || any(is.na(rR))) stop("Non-corresponding maps and dada-outputs.")
-    if(!(length(mapF) == length(mapR) && max(mapF) == length(dadaF[[i]]$map) &&
-         max(mapR) == length(dadaR[[i]]$map))) {
+    if(!(length(mapF) == length(mapR) && 
+         max(mapF, na.rm=TRUE) == length(dadaF[[i]]$map) &&
+         max(mapR, na.rm=TRUE) == length(dadaR[[i]]$map))) {
       stop("Non-corresponding derep-class and dada-class objects.")
     }
     rF <- dadaF[[i]]$map[mapF]
diff -Nru r-bioc-dada2-1.16.0+dfsg/R/plot-methods.R r-bioc-dada2-1.18.0+dfsg/R/plot-methods.R
--- r-bioc-dada2-1.16.0+dfsg/R/plot-methods.R	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/R/plot-methods.R	2020-10-27 17:56:28.000000000 +0000
@@ -179,7 +179,7 @@
       FIRST <- FALSE
     } else { plotdf <- rbind(plotdf, cbind(df, file=basename(f))) }
     statdf <- rbind(statdf, data.frame(Cycle=as.integer(rownames(means)), Mean=means, 
-                                       Q25=as.vector(q25s), Q50=as.vector(q50s), Q75=as.vector(q75s), Cum=10*as.vector(cums)/rc, file=basename(f)))
+                                       Q25=as.vector(q25s), Q50=as.vector(q50s), Q75=as.vector(q75s), Cum=10*as.vector(cums)/min(rc, n), file=basename(f)))
     anndf <- rbind(anndf, data.frame(minScore=min(df$Score), label=basename(f), rclabel=rclabel, rc=rc, file=basename(f)))
   }
   anndf$minScore <- min(anndf$minScore)
diff -Nru r-bioc-dada2-1.16.0+dfsg/R/RcppExports.R r-bioc-dada2-1.18.0+dfsg/R/RcppExports.R
--- r-bioc-dada2-1.16.0+dfsg/R/RcppExports.R	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/R/RcppExports.R	2020-10-27 17:56:28.000000000 +0000
@@ -61,10 +61,6 @@
     .Call('_dada2_C_nwvec', PACKAGE = 'dada2', s1, s2, match, mismatch, gap_p, band, endsfree)
 }
 
-C_assign_taxonomy <- function(seqs, rcs, refs, ref_to_genus, genusmat, try_rc, verbose) {
-    .Call('_dada2_C_assign_taxonomy', PACKAGE = 'dada2', seqs, rcs, refs, ref_to_genus, genusmat, try_rc, verbose)
-}
-
 C_assign_taxonomy2 <- function(seqs, rcs, refs, ref_to_genus, genusmat, try_rc, verbose) {
     .Call('_dada2_C_assign_taxonomy2', PACKAGE = 'dada2', seqs, rcs, refs, ref_to_genus, genusmat, try_rc, verbose)
 }
diff -Nru r-bioc-dada2-1.16.0+dfsg/R/sequenceIO.R r-bioc-dada2-1.18.0+dfsg/R/sequenceIO.R
--- r-bioc-dada2-1.16.0+dfsg/R/sequenceIO.R	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/R/sequenceIO.R	2020-10-27 17:56:28.000000000 +0000
@@ -39,10 +39,10 @@
 #' # Test that chunk-size, `n`, does not affect the result.
 #' testFastq = system.file("extdata", "sam1F.fastq.gz", package="dada2")
 #' derep1 = derepFastq(testFastq, verbose = TRUE)
-#' derep1.35 = derepFastq(testFastq, 35, TRUE)
+#' derep1.35 = derepFastq(testFastq, n = 35, verbose = TRUE)
 #' all.equal(getUniques(derep1), getUniques(derep1.35)[names(getUniques(derep1))])
 #' 
-derepFastq <- function(fls, n = 1e6, verbose = FALSE, qualityType = "Auto"){
+derepFastq <- function(fls, n = 1e6, verbose = FALSE, qualityType = "Auto") {
   if(!is.character(fls)) { stop("File paths must be provided in character format.") }
   if(length(fls)==1 && dir.exists(fls)) { fls <- parseFastqDirectory(fls) }
   if(!all(file.exists(fls))) { stop("Not all provided files exist.") }
@@ -56,7 +56,7 @@
     f <- FastqStreamer(fl, n = n)
     suppressWarnings(fq <- yield(f, qualityType = qualityType))
     
-    out <- qtables2(fq, FALSE) ###ITS
+    out <- qtables2(fq) ###ITS
     
     derepCounts <- out$uniques
     derepQuals <- out$cum_quals
@@ -68,7 +68,7 @@
       if(verbose){
         message(".", appendLF = FALSE)
       }
-      out <- qtables2(fq, FALSE)
+      out <- qtables2(fq)
       # Augment quality matrices with NAs as needed to match ncol
       if(ncol(out$cum_quals) > ncol(derepQuals)) {
         derepQuals <- cbind(derepQuals, matrix(NA, nrow=nrow(derepQuals), ncol=(ncol(out$cum_quals)-ncol(derepQuals))))
@@ -134,6 +134,9 @@
 #' @param qeff \code{logical(1)}.
 #'  Calculate average quality by first transforming to expected error rate.
 #' 
+#' @param handle.zerolen \code{logical(1)}.
+#' Default TRUE. If TRUE, gracefully excludes zero-length sequences.
+#' 
 #' @return List.
 #'  Matches format of derep-class object.
 #' 
@@ -144,7 +147,15 @@
 #' 
 #' @keywords internal
 #' 
-qtables2 <- function(x, qeff = FALSE) {
+qtables2 <- function(x, qeff = FALSE, handle.zerolen=TRUE) {
+  nread <- length(x)
+  npos <- sum(width(x) > 0)
+  if(npos == 0) { stop("Only zero-length sequences detected during dereplication.") }
+  if(handle.zerolen && npos < nread) { # Some zero length sequences in input
+    warning("Zero-length sequences detected during dereplication. They will be ignored.")
+    is.pos <- width(x) > 0
+    x <- x[is.pos]
+  }
   # ranks are lexical rank
   srt <- srsort(x) # map from rank to sequence/quality/id
   rnk <- srrank(x) # map from read_i to rank (integer vec of ranks, ties take top rank)
@@ -158,6 +169,11 @@
   rnk2unqi <- rep(seq(length(uniques)), tab[tab>0]) # map from rank to uniques index
   map <- rnk2unqi[rnk] # map from read index to unique index
   
+  if(handle.zerolen && npos < nread) { # Fix mapping to include NAs for the zero-length input reads
+    foo <- map
+    map <- rep(as.integer(NA), nread)
+    map[is.pos] <- foo
+  }
   # do matrices
   qmat <- as(quality(srt), "matrix") # map from read_i to quality
   if(qeff) qmat <- 10^(-qmat/10)  # Convert to nominal error probability first
diff -Nru r-bioc-dada2-1.16.0+dfsg/R/taxonomy.R r-bioc-dada2-1.18.0+dfsg/R/taxonomy.R
--- r-bioc-dada2-1.16.0+dfsg/R/taxonomy.R	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/R/taxonomy.R	2020-10-27 17:56:28.000000000 +0000
@@ -465,6 +465,136 @@
              width=20000L, compress=compress)
 }
 
+#' This function creates the dada2 assignTaxonomy training fasta for the official Silva NR99
+#' release files. If `include.species`=TRUE, a 7th taxonomic level (species) will be added based on the
+#' Genus species binomial in the Silva taxonomy string, if present and valid.
+#' 
+#' ## Silva release v138
+#' path <- "~/tax/Silva/v138"
+#' dada2:::makeTaxonomyFasta_SilvaNR(file.path(path, "SILVA_138_SSURef_NR99_tax_silva.fasta.gz"), 
+#'     file.path(path, "tax_slv_ssu_138.txt"), 
+#'     "~/Desktop/silva_nr99_v138_train_set.fa.gz")
+#' dada2:::makeTaxonomyFasta_SilvaNR(file.path(path, "SILVA_138_SSURef_NR99_tax_silva.fasta.gz"), 
+#'     file.path(path, "tax_slv_ssu_138.txt"), 
+#'     include.species=TRUE, "~/Desktop/silva_nr99_v138_wSpecies_train_set.fa.gz")
+#' 
+#' @importFrom ShortRead readFasta
+#' @importFrom ShortRead writeFasta
+#' @importFrom ShortRead sread
+#' @importFrom Biostrings BStringSet
+#' @importFrom utils read.table
+#' @keywords internal
+makeTaxonomyFasta_SilvaNR <- function(fin, ftax, fout, include.species=FALSE, compress=TRUE) {
+  xset <- DNAStringSet(readRNAStringSet(fin, format="fasta"))
+  # taxl: The taxonmic strings or (l)ines associated with each entry. Named by the sequence ID/accession.
+  taxl <- names(xset)
+  names(taxl) <- sapply(strsplit(names(xset), "\\s"), `[`, 1)
+  if(any(duplicated(names(taxl)))) stop("Duplicated sequence IDs detected.")
+  names(xset) <- names(taxl)
+  taxl <- gsub("^[A-Za-z0-9.]+\\s", "", taxl)
+  # Fix Silva- or release-specific errors
+  taxl <- gsub(";YM;", ";", taxl) # YM bacterial suborder included in 138 Release in error (confirmed by Silva)
+  ##  taxl <- gsub(";Rahnella1", ";Rahnella", taxl) # Rahnella1 genus seems like an error, shares same species w/ Rahnella, 
+  ##  But! also in official tax file. Maybe check with Silva on this one.
+  # taxa: A list of the ordered taxonomic levels corresponding to each reference sequence. Named by the sequence ID/accession.
+  taxa <- strsplit(taxl, ";")
+  # Read in the defined Silva taxonomic levels, e.g. Bacteria;Desulfobacterota;Desulfobulbia;Desulfobulbales;Desulfurivibrionaceae;
+  silva.taxa <- read.table(ftax, sep="\t", col.names=c("Taxon", "V2", "Level", "V4", "V5"), stringsAsFactors=FALSE)
+  silva.taxa <- silva.taxa[,c("Taxon", "Level")]
+  # Subset down to Bacteria and Archaea
+  kingdom <- sapply(strsplit(taxl, ";"), `[`, 1)
+  taxl.ba <- taxl[kingdom %in% c("Bacteria", "Archaea")]
+  taxa.ba <- taxa[names(taxl.ba)]
+  # Create 6-column matrix with Silva taxonomic assignment for each sequence at each level from Kingdom to Genus (NA if no assignment)
+  taxa.ba.mat <- matrix(sapply(taxa.ba, function(flds) {
+    c(flds[1], flds[2], flds[3], flds[4], flds[5], flds[6])
+  }), ncol=6, byrow=TRUE)
+  rownames(taxa.ba.mat) <- names(taxl.ba)
+  # Create 6-column matrix with full Silva taxonomic string at each level for each sequence, from Kingdom to Genus
+  # Strings will include NA levels if no assignment at that level, e.g. Bacteria;Firmicutes;NA;NA
+  taxa.ba.mat.string <- matrix("UNDEF", nrow=nrow(taxa.ba.mat), ncol=ncol(taxa.ba.mat))
+  rownames(taxa.ba.mat.string) <- names(taxl.ba)
+  taxa.ba.mat.string[,1] <- paste0(taxa.ba.mat[,1],";")
+  for(col in seq(2,6)) {
+    taxa.ba.mat.string[,col] <- paste0(taxa.ba.mat.string[,col-1], taxa.ba.mat[,col],";")
+  }
+  if(any(taxa.ba.mat.string == "UNDEF")) stop("Taxon string matrix was not fully initialized.")
+  # Define the set of valid taxonomic assignment by their appearance in the list of valid Silva taxonomic levels
+  taxa.ba.mat.is_valid <- matrix(taxa.ba.mat.string %in% silva.taxa$Taxon, ncol=6)
+  # Update taxa.ba.mat matrix by replacing invalid entries with NAs
+  taxa.ba.mat[!taxa.ba.mat.is_valid] <- NA
+  # Also replace "uncultured" taxonomic ranks with NAs (note, uncultured only shows up as the terminal "assigned" rank)
+  taxa.ba.mat[taxa.ba.mat %in% c("Uncultured", "uncultured")] <- NA
+  
+  ######### ADD SPECIES PART HERE ##############
+  if(include.species) {
+    # Add the 7th column, which will be the species column
+    taxa.ba.mat <- cbind(taxa.ba.mat, 
+                         matrix(sapply(taxa.ba, `[`, 7), ncol=1, byrow=TRUE))
+    # Get validated genus from the matrix
+    genus <- taxa.ba.mat[,6]
+    genus <- gsub("Candidatus ", "", genus)
+    genus <- gsub("\\[", "", genus)
+    genus <- gsub("\\]", "", genus)
+    # Get the "binomial" string from the 7th field in the Silva taxonomic annotation
+    # The "binomial" field is not curated like the other Silva taxonomic levels, and can have varying info
+    # We assume that the first two words are the Genus species binomial, when there is a valid one in the field
+    # NOTE: the binomial is actually not always in the 7th field, so this isn't strictly correct.
+    # the binomial is in the "last" field, which may be <7 when not all the levels down to genus are assigned.
+    # But we are throwing away everything that doesn't match the genus anyway, so that case
+    # doesn't need to be handled correctly here.
+    binom <- taxa.ba.mat[,7]
+    binom <- gsub("Candidatus ", "", binom)
+    binom <- gsub("\\[", "", binom)
+    binom <- gsub("\\]", "", binom)
+    # Pull out the first two fields, and turn binom into a two column matrix (Genus, species)
+    binom <- cbind(sapply(strsplit(binom, "\\s"), `[`, 1),
+                   sapply(strsplit(binom, "\\s"), `[`, 2))
+    # Identify binomials that match the curated genus
+    gen.match <- mapply(dada2:::matchGenera, genus, binom[,1], split.glyph="-")
+    # Identify some other types of invalid species names
+    is.NA <- apply(binom, 1, function(x) any(is.na(x)))
+    is.sp <- grepl("sp\\.", binom[,2]) # "sp." is not a valid species name, just a generic
+    is.endo <- binom[,1] %in% "endosymbiont" | binom[,2] %in% "endosymbiont"
+    is.uncult <- grepl("[Uu]ncultured", binom[,1]) | grepl("[Uu]ncultured", binom[,2])
+    is.unident <- grepl("[Uu]nidentified", binom[,1]) | grepl("[Uu]nidentified", binom[,2])
+    # Define the "valid" species, and set invalid species to NA in the taxonomic matrix
+    valid.spec <- gen.match & !is.NA & !is.sp & !is.endo & !is.uncult & !is.unident
+    binom[!valid.spec,2] <- NA
+    taxa.ba.mat[,7] <- binom[,2]
+  }
+  # Organize a small number of Eukaryota sequences for outgroup purposes, keeping only the Eukaryota Kingdom taxonomic assignment
+  set.seed(100); N_EUK <- 100
+  euk.keep <- sample(names(taxl)[kingdom %in% "Eukaryota"], N_EUK)
+  taxa.euk.mat <- matrix("", nrow=N_EUK, ncol=ncol(taxa.ba.mat))
+  rownames(taxa.euk.mat) <- euk.keep
+  taxa.euk.mat[,1] <- "Eukaryota"
+  taxa.euk.mat[,2:ncol(taxa.euk.mat)] <- NA
+  
+  # Now need to make the final training fasta in DADA2 format.
+  taxa.mat.final <- rbind(taxa.ba.mat, taxa.euk.mat)
+  taxa.string.final <- apply(taxa.mat.final, 1, function(x) {
+    tst <- paste(x, collapse=";")
+    tst <- paste0(tst, ";")
+    tst <- gsub("NA;", "", tst)
+    tst
+  })
+  
+  if(any(is.na(names(taxa.string.final)))) stop("NA names in the final set of taxon strings.")
+  if(!all(names(taxa.string.final) %in% names(xset))) stop("Some names of the final set of taxon strings don't match sequence names.")
+  xset.out <- xset[names(taxa.string.final)]
+  
+  ## Add some verbose output describing what happened.
+  cat(length(xset.out), "reference sequences were output.\n")
+  print(table(taxa.mat.final[,1], useNA="ifany"))
+  if(include.species) cat(sum(!is.na(taxa.mat.final[,7])), "entries include species names.\n")
+  
+  writeFasta(ShortRead(unname(xset.out), BStringSet(taxa.string.final)), fout,
+             width=20000L, compress=compress)
+}
+
+#' DEPRECATED in favor of `makeTaxonomyFasta_SilvaNR``
+#' 
 #' This function creates the dada2 assignTaxonomy training fasta for the Silva .align file
 #' generated by the Mothur project.
 #' 
diff -Nru r-bioc-dada2-1.16.0+dfsg/src/RcppExports.cpp r-bioc-dada2-1.18.0+dfsg/src/RcppExports.cpp
--- r-bioc-dada2-1.16.0+dfsg/src/RcppExports.cpp	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/src/RcppExports.cpp	2020-10-27 17:56:28.000000000 +0000
@@ -234,23 +234,6 @@
     return rcpp_result_gen;
 END_RCPP
 }
-// C_assign_taxonomy
-Rcpp::List C_assign_taxonomy(std::vector<std::string> seqs, std::vector<std::string> rcs, std::vector<std::string> refs, std::vector<int> ref_to_genus, Rcpp::IntegerMatrix genusmat, bool try_rc, bool verbose);
-RcppExport SEXP _dada2_C_assign_taxonomy(SEXP seqsSEXP, SEXP rcsSEXP, SEXP refsSEXP, SEXP ref_to_genusSEXP, SEXP genusmatSEXP, SEXP try_rcSEXP, SEXP verboseSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< std::vector<std::string> >::type seqs(seqsSEXP);
-    Rcpp::traits::input_parameter< std::vector<std::string> >::type rcs(rcsSEXP);
-    Rcpp::traits::input_parameter< std::vector<std::string> >::type refs(refsSEXP);
-    Rcpp::traits::input_parameter< std::vector<int> >::type ref_to_genus(ref_to_genusSEXP);
-    Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type genusmat(genusmatSEXP);
-    Rcpp::traits::input_parameter< bool >::type try_rc(try_rcSEXP);
-    Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP);
-    rcpp_result_gen = Rcpp::wrap(C_assign_taxonomy(seqs, rcs, refs, ref_to_genus, genusmat, try_rc, verbose));
-    return rcpp_result_gen;
-END_RCPP
-}
 // C_assign_taxonomy2
 Rcpp::List C_assign_taxonomy2(std::vector<std::string> seqs, std::vector<std::string> rcs, std::vector<std::string> refs, std::vector<int> ref_to_genus, Rcpp::IntegerMatrix genusmat, bool try_rc, bool verbose);
 RcppExport SEXP _dada2_C_assign_taxonomy2(SEXP seqsSEXP, SEXP rcsSEXP, SEXP refsSEXP, SEXP ref_to_genusSEXP, SEXP genusmatSEXP, SEXP try_rcSEXP, SEXP verboseSEXP) {
@@ -298,7 +281,6 @@
     {"_dada2_C_matchRef", (DL_FUNC) &_dada2_C_matchRef, 4},
     {"_dada2_C_matrixEE", (DL_FUNC) &_dada2_C_matrixEE, 1},
     {"_dada2_C_nwvec", (DL_FUNC) &_dada2_C_nwvec, 7},
-    {"_dada2_C_assign_taxonomy", (DL_FUNC) &_dada2_C_assign_taxonomy, 7},
     {"_dada2_C_assign_taxonomy2", (DL_FUNC) &_dada2_C_assign_taxonomy2, 7},
     {"_dada2_RcppExport_registerCCallable", (DL_FUNC) &_dada2_RcppExport_registerCCallable, 0},
     {NULL, NULL, 0}
diff -Nru r-bioc-dada2-1.16.0+dfsg/src/Rmain.cpp r-bioc-dada2-1.18.0+dfsg/src/Rmain.cpp
--- r-bioc-dada2-1.16.0+dfsg/src/Rmain.cpp	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/src/Rmain.cpp	2020-10-27 17:56:28.000000000 +0000
@@ -192,7 +192,7 @@
     // initialize with source and destination
     FinalSubsParallel(B *b, Sub **subs, Sub **birth_subs,
                     int match, int mismatch, int gap, int homo_gap,
-                    bool use_kmers, int band_size, bool vectorized_alignment, int SSE, bool gapless) 
+                    int band_size, bool use_kmers, bool vectorized_alignment, int SSE, bool gapless) 
       : b(b), subs(subs), birth_subs(birth_subs), match(match), mismatch(mismatch), gap(gap), homo_gap(homo_gap), 
         band_size(band_size), use_kmers(use_kmers), vectorized_alignment(vectorized_alignment), SSE(SSE), gapless(gapless) {}
     
@@ -203,7 +203,7 @@
       for(std::size_t i=begin;i<end;i++) {
         for(unsigned int r=0;r<b->bi[i]->nraw;r++) {
           raw = b->bi[i]->raw[r];
-          subs[raw->index] = sub_new(b->bi[i]->center, raw, match, mismatch, gap, homo_gap, use_kmers, 1.0, band_size, vectorized_alignment, SSE, gapless);
+          subs[raw->index] = sub_new(b->bi[i]->center, raw, match, mismatch, gap, homo_gap, false, 1.0, band_size, vectorized_alignment, SSE, gapless);
         }
         // Make birth sub for that cluster
         if(i==0) { birth_subs[i] = NULL; }
@@ -222,7 +222,7 @@
       // Make subs for members of that cluster
       for(r=0;r<bb->bi[i]->nraw;r++) {
         raw = bb->bi[i]->raw[r];
-        subs[raw->index] = sub_new(bb->bi[i]->center, raw, match, mismatch, gap, homo_gap, use_kmers, 1.0, band_size, vectorized_alignment, SSE, gapless);
+        subs[raw->index] = sub_new(bb->bi[i]->center, raw, match, mismatch, gap, homo_gap, false, 1.0, band_size, vectorized_alignment, SSE, gapless);
       }
       // Make birth sub for that cluster
       if(i==0) { birth_subs[i] = NULL; }
diff -Nru r-bioc-dada2-1.16.0+dfsg/src/taxonomy.cpp r-bioc-dada2-1.18.0+dfsg/src/taxonomy.cpp
--- r-bioc-dada2-1.16.0+dfsg/src/taxonomy.cpp	2020-04-27 20:45:33.000000000 +0000
+++ r-bioc-dada2-1.18.0+dfsg/src/taxonomy.cpp	2020-10-27 17:56:28.000000000 +0000
@@ -2,6 +2,8 @@
 #include <Rcpp.h>
 #include <RcppParallel.h>
 #include <random>
+#include <algorithm>
+#define NBOOT 100
   
 using namespace Rcpp;
 
@@ -29,6 +31,7 @@
   return(kmer);
 }
 
+// Sets to 1 (TRUE) the value of kvec corresponding to each valid kmer index in the provided sequence
 void tax_kvec(const char *seq, unsigned int k, unsigned char *kvec) {
   unsigned int i;
   unsigned int len = strlen(seq);
@@ -36,6 +39,7 @@
   int kmer = 0;
   size_t n_kmers = (1 << (2*k));  // 4^k kmers
   for(i=0;i<n_kmers;i++) { kvec[i] = 0; }
+  ///!    memset(kvec, 0, n_kmers);   ///! Seems slower at first glance, but could use better head-to-head. No major change anyway.
 
   for(i=0; i<klen; i++) {
     kmer = tax_kmer(&seq[i], k);
@@ -47,6 +51,7 @@
   }
 }
 
+// Writes all valid (>=0) kmer indices in the provided sequence to karray. Returns number written.
 unsigned int tax_karray(const char *seq, unsigned int k, int *karray) {
   unsigned int i, j;
   int kmer;
@@ -61,41 +66,33 @@
       j++;
     }
   }
+  std::sort(karray, karray+j);
   return(j);
 }
 
-int get_best_genus(int *karray, double *out_logp, unsigned int arraylen, unsigned int n_kmers, unsigned int *genus_kmers, unsigned int ngenus, double *kmer_prior, double *genus_num_plus1) {
+int get_best_genus(int *karray, float *out_logp, unsigned int arraylen, unsigned int n_kmers, unsigned int ngenus, float *lgk_probability) {
   unsigned int pos;
-  unsigned int *genus_kv;
+  float *lgk_v;
   int kmer, g, max_g = -1;
-  unsigned int log_step = 50; ///! Need log10(ngenus+1) * log_step < 300 (max double ~ 10^308)
-  double p, logp, max_logp = 1.0; // Init value to be replaced on first iteration
+  float logp, max_logp = -FLT_MAX; // Init value to be replaced on first iteration
   double rv; // Dummy random variable
   unsigned int nmax=0; // Number of times the current max logp has been seen
   std::random_device rd;  //Will be used to obtain a seed for the random number engine
   std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd()
   std::uniform_real_distribution<> cunif(0.0, 1.0);
-    
+  
   for(g=0;g<ngenus;g++) {
-    genus_kv = &genus_kmers[g*n_kmers];
+    lgk_v = &lgk_probability[g*n_kmers];
     logp = 0.0;
-    p = 1.0;
-    
-    // Take the product of the numerators
-    // Convert to log to avoid double overflow
+
+    // Take the product of the probabilitys -> sum of logs
+    // This is the rate limiting step of the entire assignTaxonomy (on query sets of non-trival size)
     for(pos=0;pos<arraylen;pos++) {
       kmer = karray[pos];
-      if(kmer < 0) { Rcpp::stop("Sequences to be classifed must be ACGT only."); }
-      p *= (genus_kv[kmer] + kmer_prior[kmer]);
-      if((pos+1) % log_step == 0) {
-        logp += log(p);
-        p = 1.0;
-      }
+      logp += lgk_v[kmer];
+      if(logp < max_logp) { break; }
     }
-    logp += log(p);
-    // Subtract the product of the denominators
-    logp = logp - (arraylen * log(genus_num_plus1[g]));
-    
+
     if(max_logp > 0 || logp>max_logp) { // Store if new max
       max_logp = logp;
       max_g = g;
@@ -103,7 +100,6 @@
     } else if (max_logp == logp) { // With uniform prob, store if equal to current max
       nmax++;
       rv = (double) cunif(gen);
-      ///!      rv = (double) Rcpp::runif(1)[0];
       if(rv < 1.0/nmax) {
         max_g = g;
       }
@@ -113,172 +109,13 @@
   return max_g;
 }
 
-//------------------------------------------------------------------
-// Assigns taxonomy to sequence based on provided ref seqs and corresponding taxonomies.
-// Single-threaded only.
-// DEPRECATED DEPREECATED DEPRECATED DEPRECATED DEPREECATED DEPRECATED DEPRECATED
-//
-// [[Rcpp::export]]
-Rcpp::List C_assign_taxonomy(std::vector<std::string> seqs, std::vector<std::string> rcs, std::vector<std::string> refs, std::vector<int> ref_to_genus, Rcpp::IntegerMatrix genusmat, bool try_rc, bool verbose) {
-  size_t i, j, g;
-  int kmer;
-  unsigned int k=8;
-  size_t n_kmers = (1 << (2*k));
-  size_t nseq = seqs.size();
-  if(nseq == 0) Rcpp::stop("No seqs provided to classify.");
-  size_t nref = refs.size();
-  if(nref != ref_to_genus.size()) Rcpp::stop("Length mismatch between number of references and map to genus.");
-  size_t ngenus = genusmat.nrow();
-
-  // Rprintf("Validated and 0-index ref_to_genus map.\n");
-  for(i=0;i<ref_to_genus.size();i++) {
-    ref_to_genus[i] = ref_to_genus[i]-1; // -> 0-index
-    if(ref_to_genus[i]<0 || ref_to_genus[i] >= ngenus) {
-      Rcpp::stop("Invalid map from references to genus.");
-    }
-  }
-  
-  // Rprintf("Count seqs in each genus (M_g).\n");
-  double *genus_num_plus1 = (double *) calloc(ngenus, sizeof(double));
-  if(genus_num_plus1 == NULL) Rcpp::stop("Memory allocation failed.");  
-  for(i=0;i<nref;i++) {
-    genus_num_plus1[ref_to_genus[i]]++;
-  }
-  for(g=0;g<ngenus;g++) {
-    genus_num_plus1[g]++;
-  }
-  
-  unsigned int *genus_kmers = (unsigned int *) calloc((ngenus * n_kmers), sizeof(unsigned int));
-  if(genus_kmers == NULL) Rcpp::stop("Memory allocation failed.");
-  unsigned int *genus_kv;
-  double *kmer_prior = (double *) calloc(n_kmers, sizeof(double));
-  if(kmer_prior == NULL) Rcpp::stop("Memory allocation failed.");
-  
-  unsigned char *ref_kv = (unsigned char *) malloc(n_kmers * sizeof(unsigned char));
-  if(ref_kv == NULL) Rcpp::stop("Memory allocation failed.");
-  for(i=0;i<nref;i++) {
-    // Calculate kmer-vector of this reference sequences
-    tax_kvec(refs[i].c_str(), k, ref_kv);
-    // Assign the kmer-counts to the appropriate "genus" and kmer-prior
-    g = ref_to_genus[i];
-    genus_kv = &genus_kmers[g*n_kmers];
-    for(kmer=0;kmer<n_kmers;kmer++) {
-      if(ref_kv[kmer]) { 
-        genus_kv[kmer]++;
-        kmer_prior[kmer]++;
-      }
-    }
-  }
-  
-  // Correct word priors
-  for(kmer=0;kmer<n_kmers;kmer++) {
-    kmer_prior[kmer] = (kmer_prior[kmer] + 0.5)/(1.0 + nref);
-  }
-  if(verbose) { Rprintf("Finished processing reference fasta."); }
-  
-  // Rprintf("Get size of the kmer arrays for the sequences to be classified.\n");
-  unsigned int max_arraylen = 0;
-  unsigned int seqlen;
-  for(i=0;i<nseq;i++) {
-    seqlen = seqs[i].size();
-///    if(seqlen < 50) Rcpp::stop("Sequences must be at least 50 nts to classify.");
-    if((seqlen-k+1) > max_arraylen) { max_arraylen = seqlen-k+1; }
-  }
-
-  // Rprintf("Allocate kmer array to be used by the seqs.");
-  int *karray = (int *) malloc(max_arraylen * sizeof(int));
-  if(karray == NULL) Rcpp::stop("Memory allocation failed.");
-  int *karray_rc = (int *) malloc(max_arraylen * sizeof(int));
-  if(karray_rc == NULL) Rcpp::stop("Memory allocation failed.");
-  
-  Rcpp::IntegerVector rval(nseq);
-  Rcpp::NumericVector unifs;
-  Rcpp::IntegerMatrix rboot(nseq, genusmat.ncol());
-  Rcpp::IntegerMatrix rboot_tax(nseq, 100);
-  
-  int max_g, max_g_rc, boot_g;
-  unsigned int boot, booti, boot_match, arraylen, arraylen_rc;
-  double logp, logp_rc;
-  
-  // Rprintf("Allocate bootstrap array to be used by the seqs.\n");
-  int *bootarray = (int *) malloc((max_arraylen/8) * sizeof(int));
-  if(bootarray == NULL) Rcpp::stop("Memory allocation failed.");
-  
-  // Rprintf("Classify the sequences.\n");
-  bool first_warning = true;
-  for(j=0;j<nseq;j++) {
-    seqlen = seqs[j].size();
-    if(seqlen < 50) { // No assignment made for very short seqeunces
-      if(first_warning) {
-        Rcpp::warning("Some sequences were less than 50nts and were not taxonomically classified."); 
-        first_warning=false;
-      }
-      // Now enter NA assignments and 0 bootstrap confidences for this sequence
-      rval(j) = NA_INTEGER;
-      for(i=0;i<genusmat.ncol();i++) {
-        rboot(j, i) = 0;
-      }
-      for(boot=0;boot<100;boot++) {
-        rboot_tax(j, boot) = NA_INTEGER;
-      }
-    } else {
-      arraylen = tax_karray(seqs[j].c_str(), k, karray);
-      // Find best hit
-      max_g = get_best_genus(karray, &logp, arraylen, n_kmers, genus_kmers, ngenus, kmer_prior, genus_num_plus1);
-      if(try_rc) { // see if rev-comp is a better match to refs
-        arraylen_rc = tax_karray(rcs[j].c_str(), k, karray_rc);
-        if(arraylen != arraylen_rc) { 
-          Rcpp::stop("Discrepancy between forward and RC arraylen."); }
-        max_g_rc = get_best_genus(karray_rc, &logp_rc, arraylen_rc, n_kmers, genus_kmers, ngenus, kmer_prior, genus_num_plus1);
-        if(logp_rc > logp) { // rev-comp is better, replace with it
-          max_g = max_g_rc;
-          memcpy(karray, karray_rc, arraylen * sizeof(int));
-        }
-      }
-      
-      rval(j) = max_g+1; // 1-index for return
-  
-      // Generate random indices to be used for subsampling
-      unifs = Rcpp::runif(100*(arraylen/8));
-      booti = 0;
-      boot_match = 0;
-      for(boot=0;boot<100;boot++) {
-        for(i=0;i<(arraylen/8);i++,booti++) {
-          bootarray[i] = karray[(int) (arraylen*unifs[booti])];
-        }
-        boot_g = get_best_genus(bootarray, &logp, (arraylen/8), n_kmers, genus_kmers, ngenus, kmer_prior, genus_num_plus1);
-        rboot_tax(j,boot) = boot_g+1; // 1-index for return
-        for(i=0;i<(genusmat.ncol());i++) {
-          if(genusmat(boot_g,i) == genusmat(max_g,i)) {
-            rboot(j,i)++;
-          } else {
-            break;
-          }
-        }
-        if(boot_g == max_g) { boot_match++; }
-      }
-    }
-    Rcpp::checkUserInterrupt();
-  }
-  
-  free(genus_num_plus1);
-  free(genus_kmers);
-  free(kmer_prior);
-  free(ref_kv);
-  free(karray);
-  free(karray_rc);
-  
-  return(Rcpp::List::create(_["tax"]=rval, _["boot"]=rboot, _["boot_tax"]=rboot_tax));
-}
 
 struct AssignParallel : public RcppParallel::Worker
 {
   // source data
   std::vector<std::string> seqs;
   std::vector<std::string> rcs;
-  double *genus_num_plus1;
-  unsigned int *genus_kmers;
-  double *kmer_prior;
+  float *lgk_probability;
   int *C_genusmat;
   double *C_unifs;
   int *C_rboot;
@@ -295,10 +132,10 @@
   bool try_rc;
   
   // initialize with source and destination
-  AssignParallel(std::vector<std::string> seqs, std::vector<std::string> rcs, double *genus_num_plus1, unsigned int *genus_kmers,
-                 double *kmer_prior, int *C_genusmat, double *C_unifs, int *C_rboot, int *C_rboot_tax, int *C_rval, 
+  AssignParallel(std::vector<std::string> seqs, std::vector<std::string> rcs, float *lgk_probability,
+                 int *C_genusmat, double *C_unifs, int *C_rboot, int *C_rboot_tax, int *C_rval, 
                  unsigned int k, size_t n_kmers, size_t ngenus, size_t nlevel, unsigned int max_arraylen, bool try_rc)
-    : seqs(seqs), rcs(rcs), genus_num_plus1(genus_num_plus1), genus_kmers(genus_kmers), kmer_prior(kmer_prior), 
+    : seqs(seqs), rcs(rcs), lgk_probability(lgk_probability), 
       C_genusmat(C_genusmat), C_unifs(C_unifs), C_rboot(C_rboot), C_rboot_tax(C_rboot_tax), C_rval(C_rval), 
       k(k), n_kmers(n_kmers), ngenus(ngenus), nlevel(nlevel), max_arraylen(max_arraylen), try_rc(try_rc) {}
 
@@ -311,7 +148,7 @@
     int karray_rc[9999];
     int bootarray[9999/8];
     double *unifs;
-    double logp, logp_rc;
+    float logp, logp_rc;
 
     for(std::size_t j=begin;j<end;j++) {
       seqlen = seqs[j].size();
@@ -321,18 +158,18 @@
         for(i=0;i<nlevel;i++) {
           C_rboot[j*nlevel+i] = 0;
         }
-        for(boot=0;boot<100;boot++) {
-          C_rboot_tax[j*100 + boot] = NA_INTEGER;
+        for(boot=0;boot<NBOOT;boot++) {
+          C_rboot_tax[j*NBOOT + boot] = NA_INTEGER;
         }
       } else {
         arraylen = tax_karray(seqs[j].c_str(), k, karray);
   
         // Find best hit
-        max_g = get_best_genus(karray, &logp, arraylen, n_kmers, genus_kmers, ngenus, kmer_prior, genus_num_plus1);
+        max_g = get_best_genus(karray, &logp, arraylen, n_kmers, ngenus, lgk_probability);
         if(try_rc) { // see if rev-comp is a better match to refs
           arraylen_rc = tax_karray(rcs[j].c_str(), k, karray_rc);
           if(arraylen != arraylen_rc) { Rcpp::stop("Discrepancy between forward and RC arraylen."); }
-          max_g_rc = get_best_genus(karray_rc, &logp_rc, arraylen_rc, n_kmers, genus_kmers, ngenus, kmer_prior, genus_num_plus1);
+          max_g_rc = get_best_genus(karray_rc, &logp_rc, arraylen_rc, n_kmers, ngenus, lgk_probability);
           if(logp_rc > logp) { // rev-comp is better, replace with it
             max_g = max_g_rc;
             memcpy(karray, karray_rc, arraylen * sizeof(int));
@@ -344,12 +181,12 @@
         unifs = &C_unifs[j*max_arraylen];
         booti = 0;
         boot_match = 0;
-        for(boot=0;boot<100;boot++) {
+        for(boot=0;boot<NBOOT;boot++) {
           for(i=0;i<(arraylen/8);i++,booti++) {
             bootarray[i] = karray[(int) (arraylen*unifs[booti])];
           }
-          boot_g = get_best_genus(bootarray, &logp, (arraylen/8), n_kmers, genus_kmers, ngenus, kmer_prior, genus_num_plus1);
-          C_rboot_tax[j*100+boot] = boot_g+1; // 1-index for return
+          boot_g = get_best_genus(bootarray, &logp, (arraylen/8), n_kmers, ngenus, lgk_probability);
+          C_rboot_tax[j*NBOOT+boot] = boot_g+1; // 1-index for return
           for(i=0;i<nlevel;i++) {
             if(C_genusmat[boot_g*nlevel+i] == C_genusmat[max_g*nlevel+i]) {
               C_rboot[j*nlevel+i]++;
@@ -357,7 +194,7 @@
               break;
             }
           }
-        } // for(boot=0;boot<100;boot++)
+        } // for(boot=0;boot<NBOOT;boot++)
       }
     } // for(std::size_t j=begin;j<end;j++)
   }
@@ -388,7 +225,7 @@
   }
   
   // Rprintf("Count seqs in each genus (M_g).\n");
-  double *genus_num_plus1 = (double *) calloc(ngenus, sizeof(double)); //E
+  float *genus_num_plus1 = (float *) calloc(ngenus, sizeof(float)); //E
   if(genus_num_plus1 == NULL) Rcpp::stop("Memory allocation failed.");  
   for(i=0;i<nref;i++) {
     genus_num_plus1[ref_to_genus[i]]++;
@@ -397,23 +234,24 @@
     genus_num_plus1[g]++;
   }
   
-  unsigned int *genus_kmers = (unsigned int *) calloc((ngenus * n_kmers), sizeof(unsigned int)); //E
-  if(genus_kmers == NULL) Rcpp::stop("Memory allocation failed.");
-  unsigned int *genus_kv;
-  double *kmer_prior = (double *) calloc(n_kmers, sizeof(double)); //E
+  float *kmer_prior = (float *) calloc(n_kmers, sizeof(float)); //E
   if(kmer_prior == NULL) Rcpp::stop("Memory allocation failed.");
+  float *lgk_v;
+  float *lgk_probability = (float *) calloc((ngenus * n_kmers), sizeof(float)); //E
+  if(lgk_probability == NULL) Rcpp::stop("Memory allocation failed.");
   
   unsigned char *ref_kv = (unsigned char *) malloc(n_kmers * sizeof(unsigned char)); //E
   if(ref_kv == NULL) Rcpp::stop("Memory allocation failed.");
+  
   for(i=0;i<nref;i++) {
     // Calculate kmer-vector of this reference sequences
     tax_kvec(refs[i].c_str(), k, ref_kv);
     // Assign the kmer-counts to the appropriate "genus" and kmer-prior
     g = ref_to_genus[i];
-    genus_kv = &genus_kmers[g*n_kmers];
+    lgk_v = &lgk_probability[g*n_kmers];
     for(kmer=0;kmer<n_kmers;kmer++) {
       if(ref_kv[kmer]) { 
-        genus_kv[kmer]++;
+        lgk_v[kmer]++;
         kmer_prior[kmer]++;
       }
     }
@@ -423,6 +261,15 @@
   for(kmer=0;kmer<n_kmers;kmer++) {
     kmer_prior[kmer] = (kmer_prior[kmer] + 0.5)/(1.0 + nref);
   }
+  
+  ///! Create log genus-kmer probability
+  for(g=0;g<ngenus;g++) {
+    lgk_v = &lgk_probability[g*n_kmers];
+    for(kmer=0;kmer<n_kmers;kmer++) {
+      lgk_v[kmer] = logf((lgk_v[kmer] + kmer_prior[kmer])/genus_num_plus1[g]);
+    }
+  }
+  
   if(verbose) { Rprintf("Finished processing reference fasta."); }
   
   // Rprintf("Get size of the kmer arrays for the sequences to be classified.\n");
@@ -430,19 +277,12 @@
   unsigned int seqlen;
   for(i=0;i<nseq;i++) {
     seqlen = seqs[i].size();
-///    if(seqlen < 50) {
-///      free(genus_num_plus1);
-///      free(genus_kmers);
-///      free(kmer_prior);
-///      free(ref_kv);
-///      Rcpp::stop("Sequences must be at least 50 nts to classify.");
-///    }
     if((seqlen-k+1) > max_arraylen) { max_arraylen = seqlen-k+1; }
   }
   
   // Rprintf("Generate random numbers for bootstrapping.");
   Rcpp::NumericVector unifs;
-  unifs = Rcpp::runif(nseq*100*(max_arraylen/8));
+  unifs = Rcpp::runif(nseq*NBOOT*(max_arraylen/8));
   double *C_unifs = (double *) malloc(unifs.size() * sizeof(double)); //E
   for(i=0;i<unifs.size();i++) { C_unifs[i] = unifs(i); }
   
@@ -451,8 +291,8 @@
   int *C_rval = (int *) malloc(nseq * sizeof(int)); //E
   Rcpp::IntegerMatrix rboot(nseq, nlevel);
   int *C_rboot = (int *) calloc(nseq * nlevel, sizeof(int)); //E
-  Rcpp::IntegerMatrix rboot_tax(nseq, 100);
-  int *C_rboot_tax = (int *) malloc(nseq * 100 * sizeof(int)); //E
+  Rcpp::IntegerMatrix rboot_tax(nseq, NBOOT);
+  int *C_rboot_tax = (int *) malloc(nseq * NBOOT * sizeof(int)); //E
   int *C_genusmat = (int *) malloc(ngenus * nlevel * sizeof(int)); //E
   if(C_rval == NULL || C_rboot == NULL || C_rboot_tax == NULL || C_genusmat == NULL) Rcpp::stop("Memory allocation failed.");
   for(i=0;i<ngenus;i++) {
@@ -461,7 +301,7 @@
     }
   }
   
-  AssignParallel assignParallel(seqs, rcs, genus_num_plus1, genus_kmers, kmer_prior, C_genusmat, C_unifs, C_rboot, C_rboot_tax, C_rval, k, n_kmers, ngenus, nlevel, max_arraylen, try_rc);
+  AssignParallel assignParallel(seqs, rcs, lgk_probability, C_genusmat, C_unifs, C_rboot, C_rboot_tax, C_rval, k, n_kmers, ngenus, nlevel, max_arraylen, try_rc);
   int INTERRUPT_BLOCK_SIZE=128;
   for(i=0;i<nseq;i+=INTERRUPT_BLOCK_SIZE) {
     j = i+INTERRUPT_BLOCK_SIZE;
@@ -480,8 +320,8 @@
     }
   }
   for(i=0;i<nseq;i++) {
-    for(j=0;j<100;j++) {
-      rboot_tax(i,j) = C_rboot_tax[i*100 + j];
+    for(j=0;j<NBOOT;j++) {
+      rboot_tax(i,j) = C_rboot_tax[i*NBOOT + j];
     }
   }
   
@@ -491,9 +331,9 @@
   free(C_rval);
   free(C_genusmat);
   free(genus_num_plus1);
-  free(genus_kmers);
   free(kmer_prior);
   free(ref_kv);
+  free(lgk_probability);
 
   return(Rcpp::List::create(_["tax"]=rval, _["boot"]=rboot, _["boot_tax"]=rboot_tax));
 }