diff -Nru pbbam-1.7.0+dfsg/CHANGELOG.md pbbam-2.0.0+dfsg/CHANGELOG.md --- pbbam-1.7.0+dfsg/CHANGELOG.md 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/CHANGELOG.md 2022-01-13 18:20:29.000000000 +0000 @@ -5,6 +5,21 @@ ## Active +## [2.0.0] - TBD + +### Changed + - C++20 is now a *hard* minimum. + +## [1.8.1] - 2021-11-15 + +### Added + - SupplementalResources dataset element. + +## [1.8.0] - 2021-11-10 + +### Fixed + - Incorrect hashing scheme to generate IDs for barcoded read groups + ## [1.7.0] - 2021-09-21 ### Added diff -Nru pbbam-1.7.0+dfsg/.clang-format pbbam-2.0.0+dfsg/.clang-format --- pbbam-1.7.0+dfsg/.clang-format 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/.clang-format 2022-01-13 18:20:29.000000000 +0000 @@ -7,6 +7,7 @@ ColumnLimit: 100 IndentWidth: 4 PointerAlignment: Left +DerivePointerAlignment: false TabWidth: 4 ReflowComments: false # protect ASCII art in comments diff -Nru pbbam-1.7.0+dfsg/debian/changelog pbbam-2.0.0+dfsg/debian/changelog --- pbbam-1.7.0+dfsg/debian/changelog 2022-03-07 20:50:09.000000000 +0000 +++ pbbam-2.0.0+dfsg/debian/changelog 2022-03-14 19:25:09.000000000 +0000 @@ -1,8 +1,16 @@ -pbbam (1.7.0+dfsg-2ubuntu1) jammy; urgency=medium +pbbam (2.0.0+dfsg-2) unstable; urgency=medium - * Use std=c++17 to fix a build failure with std::optional + * Source-only upload - -- Gianfranco Costamagna Mon, 07 Mar 2022 21:50:09 +0100 + -- Andreas Tille Mon, 14 Mar 2022 20:25:09 +0100 + +pbbam (2.0.0+dfsg-1) unstable; urgency=medium + + * New upstream version + Closes: #1006776 + * Bump SONAME to 2.0.0 + + -- Andreas Tille Mon, 07 Mar 2022 15:26:06 +0100 pbbam (1.7.0+dfsg-2) unstable; urgency=medium diff -Nru pbbam-1.7.0+dfsg/debian/control pbbam-2.0.0+dfsg/debian/control --- pbbam-1.7.0+dfsg/debian/control 2022-02-13 09:03:13.000000000 +0000 +++ pbbam-2.0.0+dfsg/debian/control 2022-03-14 19:25:09.000000000 +0000 @@ -28,7 +28,7 @@ Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, - libpbbam1.7.0 (= ${binary:Version}) + libpbbam2.0.0 (= ${binary:Version}) Recommends: samtools Description: processing Pacific Biosciences binary alignment/map files The BAM format is a binary, compressed, record-oriented container format @@ -43,7 +43,7 @@ This package provides command-line utilities for working with PacBio BAM files. -Package: libpbbam1.7.0 +Package: libpbbam2.0.0 Architecture: any Multi-Arch: same Section: libs @@ -66,7 +66,7 @@ Architecture: any Multi-Arch: same Section: libdevel -Depends: libpbbam1.7.0 (= ${binary:Version}), +Depends: libpbbam2.0.0 (= ${binary:Version}), libhts-dev, libssl-dev, ${misc:Depends} diff -Nru pbbam-1.7.0+dfsg/debian/patches/boost_optional.patch pbbam-2.0.0+dfsg/debian/patches/boost_optional.patch --- pbbam-1.7.0+dfsg/debian/patches/boost_optional.patch 2022-02-13 09:03:13.000000000 +0000 +++ pbbam-2.0.0+dfsg/debian/patches/boost_optional.patch 2022-03-14 19:25:09.000000000 +0000 @@ -8,41 +8,13 @@ Last-Update: 2020-11-12 --- This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/tests/src/test_ReadGroupInfo.cpp -+++ b/tests/src/test_ReadGroupInfo.cpp -@@ -275,14 +275,14 @@ TEST(BAM_ReadGroupInfo, returns_no_barco - EXPECT_EQ("00082ba1", rg.BaseId()); - - const auto barcodes = rg.Barcodes(); -- EXPECT_EQ(boost::none, barcodes); -+ //EXPECT_EQ(boost::none, barcodes); - EXPECT_EQ(boost::none, rg.BarcodeForward()); - EXPECT_EQ(boost::none, rg.BarcodeReverse()); - } - { // no '/' found - const ReadGroupInfo rg{"00082ba1.0--1"}; - const auto barcodes = rg.Barcodes(); -- EXPECT_EQ(boost::none, barcodes); -+ //EXPECT_EQ(boost::none, barcodes); - EXPECT_EQ(boost::none, rg.BarcodeForward()); - EXPECT_EQ(boost::none, rg.BarcodeReverse()); - } -@@ -292,7 +292,7 @@ TEST(BAM_ReadGroupInfo, returns_no_barco - { - const ReadGroupInfo rg{""}; - const auto barcodes = rg.Barcodes(); -- EXPECT_EQ(boost::none, barcodes); -+ //EXPECT_EQ(boost::none, barcodes); - EXPECT_EQ(boost::none, rg.BarcodeForward()); - EXPECT_EQ(boost::none, rg.BarcodeReverse()); - } --- a/include/pbbam/ReadGroupInfo.h +++ b/include/pbbam/ReadGroupInfo.h @@ -10,6 +10,7 @@ - #include - #include + #include + #include +#include - #include - - #include + #include + #include + #include diff -Nru pbbam-1.7.0+dfsg/debian/patches/series pbbam-2.0.0+dfsg/debian/patches/series --- pbbam-1.7.0+dfsg/debian/patches/series 2022-03-07 20:49:38.000000000 +0000 +++ pbbam-2.0.0+dfsg/debian/patches/series 2022-03-14 19:25:09.000000000 +0000 @@ -1,6 +1,4 @@ -# debug_tests.patch use_debian_packaged_python3-cram.patch python3.patch results_with_latest_samtools.patch -boost_optional.patch -stdc++17.patch +# boost_optional.patch diff -Nru pbbam-1.7.0+dfsg/debian/patches/stdc++17.patch pbbam-2.0.0+dfsg/debian/patches/stdc++17.patch --- pbbam-1.7.0+dfsg/debian/patches/stdc++17.patch 2022-03-07 20:50:08.000000000 +0000 +++ pbbam-2.0.0+dfsg/debian/patches/stdc++17.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -Description: Use std=c++17 to fix a build failure with std::optional -Author: Gianfranco Costamagna -Last-Update: 2022-03-07 - ---- pbbam-1.7.0+dfsg.orig/meson.build -+++ pbbam-1.7.0+dfsg/meson.build -@@ -5,7 +5,7 @@ project( - default_options : [ - 'buildtype=release', - 'warning_level=3', -- 'cpp_std=c++14', -+ 'cpp_std=c++17', - 'b_ndebug=if-release'], - license : 'BSD-3', - meson_version : '>= 0.52.0') diff -Nru pbbam-1.7.0+dfsg/.gitignore pbbam-2.0.0+dfsg/.gitignore --- pbbam-1.7.0+dfsg/.gitignore 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/.gitignore 2022-01-13 18:20:29.000000000 +0000 @@ -1,5 +1,6 @@ .DS_Store /build* +.cache # cram *.pyc diff -Nru pbbam-1.7.0+dfsg/include/pbbam/AlignmentPrinter.h pbbam-2.0.0+dfsg/include/pbbam/AlignmentPrinter.h --- pbbam-1.7.0+dfsg/include/pbbam/AlignmentPrinter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/AlignmentPrinter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include +#include +#include #include -#include -#include +#include +#include namespace PacBio { namespace BAM { @@ -50,7 +50,7 @@ /// information /// std::string Print(const BamRecord& record, - const Data::Orientation orientation = Data::Orientation::GENOMIC); + Data::Orientation orientation = Data::Orientation::GENOMIC); /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BaiIndexCache.h pbbam-2.0.0+dfsg/include/pbbam/BaiIndexCache.h --- pbbam-1.7.0+dfsg/include/pbbam/BaiIndexCache.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BaiIndexCache.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include -#include +#include #include -#include +#include +#include +#include namespace PacBio { namespace BAM { @@ -26,6 +26,8 @@ explicit BaiIndexCacheData(const BamFile& bamFile); explicit BaiIndexCacheData(const std::string& bamFilename); + BaiIndexCacheData(BaiIndexCacheData&&) noexcept; + BaiIndexCacheData& operator=(BaiIndexCacheData&&) noexcept; ~BaiIndexCacheData(); /// \note This is very much an internal method and should not be considered @@ -34,8 +36,7 @@ /// /// \note Does not own the returned pointer; caller is responsible. /// - hts_itr_t* IteratorForInterval(const int32_t refId, const Data::Position start, - const Data::Position stop) const; + hts_itr_t* IteratorForInterval(int32_t refId, Data::Position start, Data::Position stop) const; private: struct BaiIndexCacheDataPrivate; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BaiIndexedBamReader.h pbbam-2.0.0+dfsg/include/pbbam/BaiIndexedBamReader.h --- pbbam-1.7.0+dfsg/include/pbbam/BaiIndexedBamReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BaiIndexedBamReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,15 @@ #include -#include - -#include - #include #include #include #include +#include + +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamFile.h pbbam-2.0.0+dfsg/include/pbbam/BamFile.h --- pbbam-1.7.0+dfsg/include/pbbam/BamFile.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamFile.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include -#include +#include #include #include -#include +#include namespace PacBio { namespace BAM { @@ -146,13 +146,13 @@ int ReferenceId(const std::string& name) const; /// \return name of reference matching \p id, empty string if not found - std::string ReferenceName(const int id) const; + std::string ReferenceName(int id) const; /// \returns length of requested reference \p name. 0 if not found uint32_t ReferenceLength(const std::string& name) const; /// \returns length of requested reference \p id. 0 if not found - uint32_t ReferenceLength(const int id) const; + uint32_t ReferenceLength(int id) const; /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamFileMerger.h pbbam-2.0.0+dfsg/include/pbbam/BamFileMerger.h --- pbbam-1.7.0+dfsg/include/pbbam/BamFileMerger.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamFileMerger.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include - #include #include #include +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamHeader.h pbbam-2.0.0+dfsg/include/pbbam/BamHeader.h --- pbbam-1.7.0+dfsg/include/pbbam/BamHeader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamHeader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,16 +3,16 @@ #include -#include -#include +#include +#include +#include #include #include #include -#include -#include -#include +#include +#include namespace PacBio { namespace BAM { @@ -197,13 +197,13 @@ /// /// \sa SequenceInfo::Length, BamHeader::SequenceId /// - std::string SequenceLength(const int32_t id) const; + std::string SequenceLength(int32_t id) const; /// \returns the name of the sequence (\@SQ:SN) at index \p id /// /// \sa SequenceInfo::Name, BamHeader::SequenceId /// - std::string SequenceName(const int32_t id) const; + std::string SequenceName(int32_t id) const; /// \returns vector of sequence names (\@SQ:SN) stored in this header /// @@ -216,7 +216,7 @@ /// \throws std::out_of_range if \p is an invalid or unknown index /// \sa BamHeader::SequenceId /// - SequenceInfo Sequence(const int32_t id) const; + SequenceInfo Sequence(int32_t id) const; /// \returns SequenceInfo for the sequence matching \p name SequenceInfo Sequence(const std::string& name) const; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamReader.h pbbam-2.0.0+dfsg/include/pbbam/BamReader.h --- pbbam-1.7.0+dfsg/include/pbbam/BamReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,18 +3,18 @@ #include -#include - -#include -#include - -#include - #include #include #include #include +#include + +#include +#include + +#include + namespace PacBio { namespace BAM { @@ -47,7 +47,9 @@ /// explicit BamReader(BamFile bamFile); - virtual ~BamReader(); + BamReader(BamReader&&) noexcept; + BamReader& operator=(BamReader&&) noexcept; + ~BamReader() override; /// \} @@ -83,7 +85,7 @@ /// \throws std::runtime_error if failed to read from file (e.g. possible /// truncated or corrupted file). /// - bool GetNext(BamRecord& record); + bool GetNext(BamRecord& record) override; /// \brief Seeks to virtual offset in %BAM. /// diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamRecord.h pbbam-2.0.0+dfsg/include/pbbam/BamRecord.h --- pbbam-1.7.0+dfsg/include/pbbam/BamRecord.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamRecord.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,17 +3,6 @@ #include -#include -#include - -#include -#include -#include -#include - -#include -#include - #include #include #include @@ -31,6 +20,17 @@ #include #include +#include +#include + +#include +#include +#include +#include + +#include +#include + namespace PacBio { namespace BAM { @@ -484,7 +484,7 @@ /// /// \returns sequence string /// - std::string Sequence(const Data::Orientation orientation = Data::Orientation::NATIVE, + std::string Sequence(Data::Orientation orientation = Data::Orientation::NATIVE, bool aligned = false, bool exciseSoftClips = false) const; /// \brief Fetches this record's SubstitutionTag values ("st" tag). @@ -872,21 +872,21 @@ /// \param[in] holeNumber /// \returns reference to this record /// - BamRecord& HoleNumber(const int32_t holeNumber); + BamRecord& HoleNumber(int32_t holeNumber); /// \brief Sets this record's local context flags /// /// \param[in] flags /// \returns reference to this record /// - BamRecord& LocalContextFlags(const Data::LocalContextFlags flags); + BamRecord& LocalContextFlags(Data::LocalContextFlags flags); /// \brief Sets this record's "number of complete passes of the insert". /// /// \param[in] numPasses /// \returns reference to this record /// - BamRecord& NumPasses(const int32_t numPasses); + BamRecord& NumPasses(int32_t numPasses); /// \brief Sets this record's query end position. /// @@ -895,14 +895,14 @@ /// \param[in] pos /// \returns reference to this record /// - BamRecord& QueryEnd(const Data::Position pos); + BamRecord& QueryEnd(Data::Position pos); /// \brief Sets this record's query end frame number /// /// \param[in] frame number /// \returns reference to this record /// - BamRecord& QueryEndFrameNumber(const int32_t frameNumber); + BamRecord& QueryEndFrameNumber(int32_t frameNumber); /// \brief Sets this record's query start position. /// @@ -911,14 +911,14 @@ /// \param[in] pos /// \returns reference to this record /// - BamRecord& QueryStart(const Data::Position pos); + BamRecord& QueryStart(Data::Position pos); /// \brief Sets this record's query start frame number /// /// \param[in] frame number /// \returns reference to this record /// - BamRecord& QueryStartFrameNumber(const int32_t frameNumber); + BamRecord& QueryStartFrameNumber(int32_t frameNumber); /// \brief Sets this record's expected read accuracy [0, 1000] /// @@ -948,28 +948,28 @@ /// \param[in] type /// \returns reference to this record /// - BamRecord& ScrapRegionType(const VirtualRegionType type); + BamRecord& ScrapRegionType(VirtualRegionType type); /// \brief Sets this scrap record's ScrapRegionType /// /// \param[in] type character equivalent of VirtualRegionType /// \returns reference to this record /// - BamRecord& ScrapRegionType(const char type); + BamRecord& ScrapRegionType(char type); /// \brief Sets this scrap record's ScrapZmwType /// /// \param[in] type /// \returns reference to this record /// - BamRecord& ScrapZmwType(const ZmwType type); + BamRecord& ScrapZmwType(ZmwType type); /// \brief Sets this scrap record's ScrapZmwType /// /// \param[in] type character equivalent of ZmwType /// \returns reference to this record /// - BamRecord& ScrapZmwType(const char type); + BamRecord& ScrapZmwType(char type); /// \brief Sets this record's average signal-to-noise in each of A, C, G, /// and T @@ -997,7 +997,7 @@ /// \param[in] quality Phred-scaled confidence call /// \returns reference to this record /// - BamRecord& BarcodeQuality(const uint8_t quality); + BamRecord& BarcodeQuality(uint8_t quality); /// \} @@ -1087,7 +1087,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& ForwardIPD(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& ForwardIPD(const Data::Frames& frames, Data::FrameCodec encoding); /// \brief Sets this record's forward pulse width values ("fp" tag). /// @@ -1096,7 +1096,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& ForwardPulseWidth(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& ForwardPulseWidth(const Data::Frames& frames, Data::FrameCodec encoding); /// \brief Sets this record's IPD values ("ip" tag). /// @@ -1107,8 +1107,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - PBBAM_DEPRECATED_FRAMES BamRecord& IPD(const Data::Frames& frames, - const FrameEncodingType encoding); + PBBAM_DEPRECATED_FRAMES BamRecord& IPD(const Data::Frames& frames, FrameEncodingType encoding); /// \brief Sets this record's IPD values ("ip" tag). /// @@ -1117,7 +1116,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& IPD(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& IPD(const Data::Frames& frames, Data::FrameCodec encoding); /// \brief Sets this record's Pkmean values ("pm" tag). /// @@ -1185,7 +1184,7 @@ /// \returns reference to this record /// PBBAM_DEPRECATED_FRAMES BamRecord& PreBaseFrames(const Data::Frames& frames, - const FrameEncodingType encoding); + FrameEncodingType encoding); /// \brief Sets this record's PreBaseFrames aka IPD values ("ip" tag). /// @@ -1194,7 +1193,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& PreBaseFrames(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& PreBaseFrames(const Data::Frames& frames, Data::FrameCodec encoding); /// \brief Sets this record's PrePulseFrames values ("pd" tag). /// @@ -1206,7 +1205,7 @@ /// \returns reference to this record /// PBBAM_DEPRECATED_FRAMES BamRecord& PrePulseFrames(const Data::Frames& frames, - const FrameEncodingType encoding); + FrameEncodingType encoding); /// \brief Sets this record's PrePulseFrames values ("pd" tag). /// @@ -1215,7 +1214,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& PrePulseFrames(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& PrePulseFrames(const Data::Frames& frames, Data::FrameCodec encoding); /// \brief Sets this record's PulseCall values ("pc" tag). /// @@ -1234,7 +1233,7 @@ /// \returns reference to this record /// PBBAM_DEPRECATED_FRAMES BamRecord& PulseCallWidth(const Data::Frames& frames, - const FrameEncodingType encoding); + FrameEncodingType encoding); /// \brief Sets this record's PulseCallWidth values ("px" tag). /// @@ -1243,7 +1242,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& PulseCallWidth(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& PulseCallWidth(const Data::Frames& frames, Data::FrameCodec encoding); /// /// \\brief Sets this record's PulseExclusionReason values ("pe" tag). @@ -1269,7 +1268,7 @@ /// \returns reference to this record /// PBBAM_DEPRECATED_FRAMES BamRecord& PulseWidth(const Data::Frames& frames, - const FrameEncodingType encoding); + FrameEncodingType encoding); /// \brief Sets this record's PulseWidth values ("pw" tag). /// @@ -1278,7 +1277,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& PulseWidth(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& PulseWidth(const Data::Frames& frames, Data::FrameCodec encoding); /// \brief Sets this record's reverse IPD values ("ri" tag). /// @@ -1287,7 +1286,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& ReverseIPD(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& ReverseIPD(const Data::Frames& frames, Data::FrameCodec encoding); /// \brief Sets this record's reverse pulse width values ("rp" tag). /// @@ -1296,7 +1295,7 @@ /// 16-bit lossless) /// \returns reference to this record /// - BamRecord& ReversePulseWidth(const Data::Frames& frames, const Data::FrameCodec encoding); + BamRecord& ReversePulseWidth(const Data::Frames& frames, Data::FrameCodec encoding); /// \brief Sets this record's StartFrame values ("sf" tag). /// @@ -1371,32 +1370,51 @@ /// \{ /// Creates a copied record from input, with clipping applied - static BamRecord Clipped(const BamRecord& input, const ClipType clipType, - const Data::Position start, const Data::Position end, - const bool exciseFlankingInserts = false); + static BamRecord Clipped(const BamRecord& input, ClipType clipType, Data::Position start, + Data::Position end, bool exciseFlankingInserts = false); /// Creates a copied record from input, with mapping applied - static BamRecord Mapped(const BamRecord& input, const int32_t referenceId, - const Data::Position refStart, const Data::Strand strand, - const Data::Cigar& cigar, const uint8_t mappingQuality); + static BamRecord Mapped(const BamRecord& input, int32_t referenceId, Data::Position refStart, + Data::Strand strand, const Data::Cigar& cigar, uint8_t mappingQuality); + + /// Splits the (5mC) basemods `Mm` and `Ml` tags + struct SplitBasemods + { + std::vector LeadingSeparatingC; + std::vector LeadingQuals; + + std::vector RetainedSeparatingC; + std::vector RetainedQuals; + + std::vector TrailingSeparatingC; + std::vector TrailingQuals; + + int32_t PrefixLostBases{0}; + + static std::vector SplitBasemodsString(const std::string& str); + + static std::string SeparatingCToString(const std::vector& vec); + }; + static SplitBasemods ClipBasemodsTag(const std::string& seq, + const std::string& oldBasemodsString, + const std::vector& basemodsQVs, size_t clipFrom, + size_t clipLength); /// Applies clipping to this record - BamRecord& Clip(const ClipType clipType, const Data::Position start, const Data::Position end, - const bool exciseFlankingInserts = false); + BamRecord& Clip(ClipType clipType, Data::Position start, Data::Position end, + bool exciseFlankingInserts = false); /// Creates a copied record from this one, with clipping applied - BamRecord Clipped(const ClipType clipType, const Data::Position start, const Data::Position end, - const bool exciseFlankingInserts = false) const; + BamRecord Clipped(ClipType clipType, Data::Position start, Data::Position end, + bool exciseFlankingInserts = false) const; /// Applies mapping to this record - BamRecord& Map(const int32_t referenceId, const Data::Position refStart, - const Data::Strand strand, const Data::Cigar& cigar, - const uint8_t mappingQuality); + BamRecord& Map(int32_t referenceId, Data::Position refStart, Data::Strand strand, + const Data::Cigar& cigar, uint8_t mappingQuality); /// Creates a copied record from this one, with mapping applied - BamRecord Mapped(const int32_t referenceId, const Data::Position refStart, - const Data::Strand strand, const Data::Cigar& cigar, - const uint8_t mappingQuality) const; + BamRecord Mapped(int32_t referenceId, Data::Position refStart, Data::Strand strand, + const Data::Cigar& cigar, uint8_t mappingQuality) const; /// \} /// @@ -1429,69 +1447,68 @@ public: /// clips the PacBio tags to a specified length - void ClipTags(const size_t clipPos, const size_t clipLength); + void ClipTags(size_t clipPos, size_t clipLength); private: ///\internal /// clipping methods - void ClipFields(const size_t clipPos, const size_t clipLength); + void ClipFields(size_t clipPos, size_t clipLength); - BamRecord& ClipToQuery(const Data::Position start, const Data::Position end); - BamRecord& ClipToReference(const Data::Position start, const Data::Position end, - const bool exciseFlankingInserts); + BamRecord& ClipToQuery(Data::Position start, Data::Position end); + BamRecord& ClipToReference(Data::Position start, Data::Position end, + bool exciseFlankingInserts); private: ///\internal /// raw tag data fetching // sequence tags - std::string FetchBasesRaw(const BamRecordTag tag) const; - std::string FetchBases(const BamRecordTag tag, - const Orientation orientation = Orientation::NATIVE, - const bool aligned = false, const bool exciseSoftClips = false, - const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; + std::string FetchBasesRaw(BamRecordTag tag) const; + std::string FetchBases(BamRecordTag tag, Orientation orientation = Orientation::NATIVE, + bool aligned = false, bool exciseSoftClips = false, + PulseBehavior pulseBehavior = PulseBehavior::ALL) const; // frame tags - Data::Frames FetchFramesRaw(const BamRecordTag tag) const; - Data::Frames FetchFrames(const BamRecordTag tag, - const Data::Orientation orientation = Data::Orientation::NATIVE, - const bool aligned = false, const bool exciseSoftClips = false, - const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; + Data::Frames FetchFramesRaw(BamRecordTag tag) const; + Data::Frames FetchFrames(BamRecordTag tag, + Data::Orientation orientation = Data::Orientation::NATIVE, + bool aligned = false, bool exciseSoftClips = false, + PulseBehavior pulseBehavior = PulseBehavior::ALL) const; // pulse tags - std::vector FetchPhotonsRaw(const BamRecordTag tag) const; - std::vector FetchPhotons(const BamRecordTag tag, - const Data::Orientation orientation = Data::Orientation::NATIVE, - const bool aligned = false, const bool exciseSoftClips = false, - const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; + std::vector FetchPhotonsRaw(BamRecordTag tag) const; + std::vector FetchPhotons(BamRecordTag tag, + Data::Orientation orientation = Data::Orientation::NATIVE, + bool aligned = false, bool exciseSoftClips = false, + PulseBehavior pulseBehavior = PulseBehavior::ALL) const; // QV tags - Data::QualityValues FetchQualitiesRaw(const BamRecordTag tag) const; - Data::QualityValues FetchQualities( - const BamRecordTag tag, const Data::Orientation orientation = Data::Orientation::NATIVE, - const bool aligned = false, const bool exciseSoftClips = false, - const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; + Data::QualityValues FetchQualitiesRaw(BamRecordTag tag) const; + Data::QualityValues FetchQualities(BamRecordTag tag, + Data::Orientation orientation = Data::Orientation::NATIVE, + bool aligned = false, bool exciseSoftClips = false, + PulseBehavior pulseBehavior = PulseBehavior::ALL) const; // UInt tags (e.g. start frame) // // TODO (DB): clean this up w.r.t FetchUInt8s // - std::vector FetchUInt32sRaw(const BamRecordTag tag) const; - std::vector FetchUInt32s( - const BamRecordTag tag, const Data::Orientation orientation = Data::Orientation::NATIVE, - const bool aligned = false, const bool exciseSoftClips = false, - const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; + std::vector FetchUInt32sRaw(BamRecordTag tag) const; + std::vector FetchUInt32s(BamRecordTag tag, + Data::Orientation orientation = Data::Orientation::NATIVE, + bool aligned = false, bool exciseSoftClips = false, + PulseBehavior pulseBehavior = PulseBehavior::ALL) const; // UInt tags (e.g. pulse exclusion) // // ODO (DB): clean this up w.r.t FetchUInt32s // - std::vector FetchUInt8sRaw(const BamRecordTag tag) const; - std::vector FetchUInt8s( - const BamRecordTag tag, const Data::Orientation orientation = Data::Orientation::NATIVE, - const bool aligned = false, const bool exciseSoftClips = false, - const PulseBehavior pulseBehavior = PulseBehavior::ALL) const; + std::vector FetchUInt8sRaw(BamRecordTag tag) const; + std::vector FetchUInt8s(BamRecordTag tag, + Data::Orientation orientation = Data::Orientation::NATIVE, + bool aligned = false, bool exciseSoftClips = false, + PulseBehavior pulseBehavior = PulseBehavior::ALL) const; private: ///\internal diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamRecordImpl.h pbbam-2.0.0+dfsg/include/pbbam/BamRecordImpl.h --- pbbam-1.7.0+dfsg/include/pbbam/BamRecordImpl.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamRecordImpl.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,16 +3,6 @@ #include -#include -#include - -#include -#include -#include -#include - -#include - #include #include #include @@ -20,6 +10,16 @@ #include #include +#include + +#include +#include +#include +#include + +#include +#include + namespace PacBio { namespace BAM { @@ -317,7 +317,7 @@ /// /// \returns reference to this record. /// - BamRecordImpl& SetSequenceAndQualities(const char* sequence, const size_t sequenceLength, + BamRecordImpl& SetSequenceAndQualities(const char* sequence, size_t sequenceLength, const char* qualities = nullptr); /// \brief Sets the record's DNA sequence and quality values. @@ -344,7 +344,7 @@ /// const size_t sequenceLength, const char* qualities) /// BamRecordImpl& SetPreencodedSequenceAndQualities(const char* encodedSequence, - const size_t rawSequenceLength, + size_t rawSequenceLength, const char* qualities = nullptr); /// \} @@ -387,7 +387,7 @@ /// to be added /// \returns true if tag was successfully added. /// - bool AddTag(const BamRecordTag tag, const Tag& value); + bool AddTag(BamRecordTag tag, const Tag& value); /// \brief Adds a new tag to this record, with an optional modifier. /// @@ -407,7 +407,7 @@ /// /// \returns true if tag was successfully added. /// - bool AddTag(const std::string& tagName, const Tag& value, const TagModifier additionalModifier); + bool AddTag(const std::string& tagName, const Tag& value, TagModifier additionalModifier); /// \brief Adds a new tag to this record, with an optional modifier. /// @@ -421,7 +421,7 @@ /// /// \returns true if tag was successfully added. /// - bool AddTag(const BamRecordTag tag, const Tag& value, const TagModifier additionalModifier); + bool AddTag(BamRecordTag tag, const Tag& value, TagModifier additionalModifier); /// \brief Edits an existing tag on this record. /// @@ -452,7 +452,7 @@ /// /// \returns true if tag was successfully edited. /// - bool EditTag(const BamRecordTag tag, const Tag& newValue); + bool EditTag(BamRecordTag tag, const Tag& newValue); /// \brief Edits an existing tag on this record. /// @@ -473,8 +473,7 @@ /// /// \returns true if tag was successfully edited. /// - bool EditTag(const std::string& tagName, const Tag& value, - const TagModifier additionalModifier); + bool EditTag(const std::string& tagName, const Tag& value, TagModifier additionalModifier); /// \brief Edits an existing tag on this record. /// @@ -488,7 +487,7 @@ /// /// \returns true if tag was successfully edited. /// - bool EditTag(const BamRecordTag tag, const Tag& value, const TagModifier additionalModifier); + bool EditTag(BamRecordTag tag, const Tag& value, TagModifier additionalModifier); /// \returns true if a tag with this name is present in this record. bool HasTag(const std::string& tagName) const; @@ -497,7 +496,7 @@ /// /// This is an overloaded method. /// - bool HasTag(const BamRecordTag tag) const; + bool HasTag(BamRecordTag tag) const; /// \brief Removes an existing tag from this record. /// @@ -519,7 +518,7 @@ /// previously unknown) /// \sa HasTag /// - bool RemoveTag(const BamRecordTag tag); + bool RemoveTag(BamRecordTag tag); /// \brief Fetches a tag from this record. /// @@ -539,7 +538,7 @@ /// \returns Tag object for the requested name. If name is unknown, a /// default constructed Tag is returned (Tag::IsNull() is true). /// - Tag TagValue(const BamRecordTag tag) const; + Tag TagValue(BamRecordTag tag) const; // change above to Tag(); @@ -568,8 +567,7 @@ // (lazy update on request) // internal tag helper methods - bool AddTagImpl(const std::string& tagName, const Tag& value, - const TagModifier additionalModifier); + bool AddTagImpl(const std::string& tagName, const Tag& value, TagModifier additionalModifier); bool RemoveTagImpl(const std::string& tagName); int TagOffset(const std::string& tagName) const; @@ -578,8 +576,7 @@ void SetCigarData(const Data::Cigar& cigar); // core seq/qual logic shared by the public API - BamRecordImpl& SetSequenceAndQualitiesInternal(const char* sequence, - const size_t sequenceLength, + BamRecordImpl& SetSequenceAndQualitiesInternal(const char* sequence, size_t sequenceLength, const char* qualities, bool isPreencoded); private: diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamRecordTag.h pbbam-2.0.0+dfsg/include/pbbam/BamRecordTag.h --- pbbam-1.7.0+dfsg/include/pbbam/BamRecordTag.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamRecordTag.h 2022-01-13 18:20:29.000000000 +0000 @@ -48,6 +48,8 @@ START_FRAME, SUBSTITUTION_QV, SUBSTITUTION_TAG, + BASEMOD_LOCI, + BASEMOD_QV, // // not tags per se, but faking these here to simplify data fetching diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamRecordView.h pbbam-2.0.0+dfsg/include/pbbam/BamRecordView.h --- pbbam-1.7.0+dfsg/include/pbbam/BamRecordView.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamRecordView.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { @@ -38,9 +38,8 @@ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be /// removed from query ends /// - BamRecordView(const BamRecord& record, const Data::Orientation orientation, const bool aligned, - const bool exciseSoftClips, - const PulseBehavior pulseBehavior = PulseBehavior::ALL); + BamRecordView(const BamRecord& record, Data::Orientation orientation, bool aligned, + bool exciseSoftClips, PulseBehavior pulseBehavior = PulseBehavior::ALL); public: /// \returns BamRecord::AltLabelQV with this view's parameters applied diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamTagCodec.h pbbam-2.0.0+dfsg/include/pbbam/BamTagCodec.h --- pbbam-1.7.0+dfsg/include/pbbam/BamTagCodec.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamTagCodec.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include +#include #include -#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BamWriter.h pbbam-2.0.0+dfsg/include/pbbam/BamWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/BamWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BamWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,18 +3,18 @@ #include -#include -#include - -#include - -#include - #include #include #include #include +#include + +#include + +#include +#include + namespace PacBio { namespace BAM { @@ -136,10 +136,10 @@ /// writing or if an error occurred while writing the header /// BamWriter(const std::string& filename, const BamHeader& header, - const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression, - const size_t numThreads = 4, - const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON, - const bool useTempFile = true); + BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression, + size_t numThreads = 4, + BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON, + bool useTempFile = true); /// /// \brief Opens a %BAM file for writing & writes the header information. diff -Nru pbbam-1.7.0+dfsg/include/pbbam/bed/BedReader.h pbbam-2.0.0+dfsg/include/pbbam/bed/BedReader.h --- pbbam-1.7.0+dfsg/include/pbbam/bed/BedReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/bed/BedReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include +#include +#include + #include #include #include -#include -#include - namespace PacBio { namespace BED { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/bed/BedWriter.h pbbam-2.0.0+dfsg/include/pbbam/bed/BedWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/bed/BedWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/bed/BedWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BED { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BgzipFastaWriter.h pbbam-2.0.0+dfsg/include/pbbam/BgzipFastaWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/BgzipFastaWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BgzipFastaWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include - #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/BgzipFastqWriter.h pbbam-2.0.0+dfsg/include/pbbam/BgzipFastqWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/BgzipFastqWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/BgzipFastqWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include - #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/Cigar.h pbbam-2.0.0+dfsg/include/pbbam/Cigar.h --- pbbam-1.7.0+dfsg/include/pbbam/Cigar.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/Cigar.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/CollectionMetadata.h pbbam-2.0.0+dfsg/include/pbbam/CollectionMetadata.h --- pbbam-1.7.0+dfsg/include/pbbam/CollectionMetadata.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/CollectionMetadata.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,12 @@ #include +#include + #include +#include #include -#include - -#include - namespace PacBio { namespace BAM { @@ -183,7 +182,7 @@ }; private: - mutable boost::optional cache_ = boost::none; + mutable std::optional cache_; }; class PPAConfig : public internal::DataSetElement diff -Nru pbbam-1.7.0+dfsg/include/pbbam/Compare.h pbbam-2.0.0+dfsg/include/pbbam/Compare.h --- pbbam-1.7.0+dfsg/include/pbbam/Compare.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/Compare.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,15 @@ #include -#include -#include -#include +#include #include #include #include -#include +#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/CompositeBamReader.h pbbam-2.0.0+dfsg/include/pbbam/CompositeBamReader.h --- pbbam-1.7.0+dfsg/include/pbbam/CompositeBamReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/CompositeBamReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,6 @@ #include -#include -#include -#include -#include -#include -#include - #include #include #include @@ -20,6 +13,13 @@ #include #include +#include +#include +#include +#include +#include +#include + namespace PacBio { namespace BAM { @@ -90,9 +90,12 @@ public: SortedCompositeBamReader(const DataSet& dataset); SortedCompositeBamReader(std::vector bamFiles); - virtual ~SortedCompositeBamReader(); - bool GetNext(BamRecord& record); + SortedCompositeBamReader(SortedCompositeBamReader&&) noexcept; + SortedCompositeBamReader& operator=(SortedCompositeBamReader&&) noexcept; + ~SortedCompositeBamReader() override; + + bool GetNext(BamRecord& record) override; protected: std::vector bamFiles_; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/CompositeFastaReader.h pbbam-2.0.0+dfsg/include/pbbam/CompositeFastaReader.h --- pbbam-1.7.0+dfsg/include/pbbam/CompositeFastaReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/CompositeFastaReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,15 @@ #include +#include +#include +#include + #include #include #include #include -#include -#include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/DataSet.h pbbam-2.0.0+dfsg/include/pbbam/DataSet.h --- pbbam-1.7.0+dfsg/include/pbbam/DataSet.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/DataSet.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,6 +3,11 @@ #include +#include +#include +#include +#include + #include #include #include @@ -10,11 +15,6 @@ #include #include -#include -#include -#include -#include - namespace PacBio { namespace BAM { @@ -89,7 +89,7 @@ /// \param[in] type dataset type /// \throws std::runtime_error if \p type is unknown /// - DataSet(const DataSet::TypeEnum type); + DataSet(DataSet::TypeEnum type); /// \brief Constructs a DataSet from a %BAM file. /// @@ -325,6 +325,13 @@ /// const BAM::SubDataSets& SubDataSets() const; + /// \brief Fetches the dataset's SupplementalResources element. + /// + /// \returns const reference to child element + /// \throws std::runtime_error if element does not exist + /// + const BAM::SupplementalResources& SupplementalResources() const; + /// \} public: @@ -686,7 +693,7 @@ /// \param[in] type new dataset type /// \returns reference to this dataset object /// - DataSet& Type(const BAM::DataSet::TypeEnum type); + DataSet& Type(BAM::DataSet::TypeEnum type); /// \} @@ -734,6 +741,14 @@ /// BAM::SubDataSets& SubDataSets(); + /// \brief Fetches the dataset's SupplementalResources element. + /// + /// This element will be created if it does not yet exist. + /// + /// \returns non-const reference to child element + /// + BAM::SupplementalResources& SupplementalResources(); + /// \} public: @@ -785,6 +800,15 @@ /// DataSet& SubDataSets(const BAM::SubDataSets& subdatasets); + /// \brief Sets this dataset's SupplementalResources element. + /// + /// This element will be created if it does not yet exist. + /// + /// \param[in] resources new value for the element + /// \returns reference to this dataset object + /// + DataSet& SupplementalResources(const BAM::SupplementalResources& resources); + /// \} public: diff -Nru pbbam-1.7.0+dfsg/include/pbbam/DataSetTypes.h pbbam-2.0.0+dfsg/include/pbbam/DataSetTypes.h --- pbbam-1.7.0+dfsg/include/pbbam/DataSetTypes.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/DataSetTypes.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,14 @@ #include -#include -#include - #include #include #include #include +#include +#include + namespace PacBio { namespace BAM { @@ -673,6 +673,45 @@ Provenance& ParentTool(const BAM::ParentTool& tool); }; +/// \brief The SupplementalResources class represents an %SupplementalResources element +/// in DataSetXML. +/// +/// The SupplementalResources element is essentially just a list of ExternalResource +/// elements. +/// +class PBBAM_EXPORT SupplementalResources : public internal::DataSetElement +{ +public: + /// \brief Creates an empty resource list. + SupplementalResources(); + SupplementalResources(const internal::FromInputXml& fromInputXml); + + /// \brief Merges \p other resource list with this one. + SupplementalResources& operator+=(const SupplementalResources& other); + +public: + /// \brief Adds an ExternalResource to this list. + void Add(const ExternalResource& ext); + + /// \brief Removes an ExternalResource from this list. + void Remove(const ExternalResource& ext); + +public: + using value_type = ExternalResource; + using iterator_type = internal::DataSetElementIterator; + using const_iterator_type = internal::DataSetElementConstIterator; + + const value_type& operator[](size_t index) const; + value_type& operator[](size_t index); + + iterator_type begin(); + const_iterator_type begin() const; + const_iterator_type cbegin() const; + iterator_type end(); + const_iterator_type end() const; + const_iterator_type cend() const; +}; + /// \brief The DataSetMetadata class represents the %DataSetMetadata child /// element in DataSetXML. /// @@ -918,6 +957,13 @@ /// const BAM::SubDataSets& SubDataSets() const; + /// \brief Fetches the dataset's SupplementalResources element. + /// + /// \returns const reference to child element + /// \throws std::runtime_error if element does not exist + /// + const BAM::SupplementalResources& SupplementalResources() const; + public: /// \brief Access this dataset's namespace info. /// @@ -958,6 +1004,14 @@ /// BAM::SubDataSets& SubDataSets(); + /// \brief Fetches the dataset's SupplementalResources element. + /// + /// This element will be created if it does not yet exist. + /// + /// \returns non-const reference to child element + /// + BAM::SupplementalResources& SupplementalResources(); + public: /// \brief Sets this dataset's ExternalResources element. /// @@ -995,6 +1049,15 @@ /// DataSetBase& SubDataSets(const BAM::SubDataSets& subdatasets); + /// \brief Sets this dataset's SupplementalResources element. + /// + /// This element will be created if it does not yet exist. + /// + /// \param[in] resources new value for the element + /// \returns reference to this dataset object + /// + DataSetBase& SupplementalResources(const BAM::SupplementalResources& resources); + public: /// \brief Access this dataset's namespace info. /// @@ -1233,6 +1296,7 @@ PROPERTIES, PROVENANCE, SEQUENCING_KIT_PLATE, + SUPPLEMENTAL_RESOURCES, TEMPLATE_PREP_KIT, GENERIC_DATASET, diff -Nru pbbam-1.7.0+dfsg/include/pbbam/EntireFileQuery.h pbbam-2.0.0+dfsg/include/pbbam/EntireFileQuery.h --- pbbam-1.7.0+dfsg/include/pbbam/EntireFileQuery.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/EntireFileQuery.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { @@ -38,6 +38,9 @@ /// files. /// EntireFileQuery(const DataSet& dataset); + + EntireFileQuery(EntireFileQuery&&) noexcept; + EntireFileQuery& operator=(EntireFileQuery&&) noexcept; ~EntireFileQuery() override; public: diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FaiIndex.h pbbam-2.0.0+dfsg/include/pbbam/FaiIndex.h --- pbbam-1.7.0+dfsg/include/pbbam/FaiIndex.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FaiIndex.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include - #include #include #include #include +#include + namespace PacBio { namespace BAM { @@ -63,7 +63,7 @@ /// /// \returns FAI entry at \p row /// - const FaiEntry& Entry(const uint32_t row) const; + const FaiEntry& Entry(uint32_t row) const; /// /// \returns true if sequence name found in index diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FastaCache.h pbbam-2.0.0+dfsg/include/pbbam/FastaCache.h --- pbbam-1.7.0+dfsg/include/pbbam/FastaCache.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FastaCache.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,6 +3,8 @@ #include +#include + #include #include #include @@ -10,8 +12,6 @@ #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FastaReader.h pbbam-2.0.0+dfsg/include/pbbam/FastaReader.h --- pbbam-1.7.0+dfsg/include/pbbam/FastaReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FastaReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include +#include +#include + #include #include #include -#include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FastaSequenceQuery.h pbbam-2.0.0+dfsg/include/pbbam/FastaSequenceQuery.h --- pbbam-1.7.0+dfsg/include/pbbam/FastaSequenceQuery.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FastaSequenceQuery.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include - #include #include #include +#include +#include + namespace PacBio { namespace BAM { @@ -23,6 +23,9 @@ /// \{ FastaSequenceQuery(const BAM::DataSet& dataset); + + FastaSequenceQuery(FastaSequenceQuery&&) noexcept; + FastaSequenceQuery& operator=(FastaSequenceQuery&&) noexcept; ~FastaSequenceQuery() override; /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FastaWriter.h pbbam-2.0.0+dfsg/include/pbbam/FastaWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/FastaWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FastaWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FastqReader.h pbbam-2.0.0+dfsg/include/pbbam/FastqReader.h --- pbbam-1.7.0+dfsg/include/pbbam/FastqReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FastqReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include +#include +#include + #include #include #include -#include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FastqSequence.h pbbam-2.0.0+dfsg/include/pbbam/FastqSequence.h --- pbbam-1.7.0+dfsg/include/pbbam/FastqSequence.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FastqSequence.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include - #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FastqWriter.h pbbam-2.0.0+dfsg/include/pbbam/FastqWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/FastqWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FastqWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/FormatUtils.h pbbam-2.0.0+dfsg/include/pbbam/FormatUtils.h --- pbbam-1.7.0+dfsg/include/pbbam/FormatUtils.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/FormatUtils.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/GenomicIntervalQuery.h pbbam-2.0.0+dfsg/include/pbbam/GenomicIntervalQuery.h --- pbbam-1.7.0+dfsg/include/pbbam/GenomicIntervalQuery.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/GenomicIntervalQuery.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,15 @@ #include -#include - -#include - #include #include #include #include +#include + +#include + namespace PacBio { namespace BAM { @@ -56,6 +56,8 @@ GenomicIntervalQuery(const Data::GenomicInterval& interval, const DataSet& dataset, const BaiIndexCache& cache); + GenomicIntervalQuery(GenomicIntervalQuery&&) noexcept; + GenomicIntervalQuery& operator=(GenomicIntervalQuery&&) noexcept; ~GenomicIntervalQuery() override; public: diff -Nru pbbam-1.7.0+dfsg/include/pbbam/IFastaWriter.h pbbam-2.0.0+dfsg/include/pbbam/IFastaWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/IFastaWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/IFastaWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include - #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/IFastqWriter.h pbbam-2.0.0+dfsg/include/pbbam/IFastqWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/IFastqWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/IFastqWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include - #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/IndexedBamWriter.h pbbam-2.0.0+dfsg/include/pbbam/IndexedBamWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/IndexedBamWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/IndexedBamWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include - #include #include #include +#include +#include + namespace PacBio { namespace BAM { @@ -80,11 +80,10 @@ /// IndexedBamWriter( const std::string& outputFilename, const BamHeader& header, - const BamWriter::CompressionLevel bamCompressionLevel = BamWriter::DefaultCompression, - const size_t numBamThreads = 4, - const PbiBuilder::CompressionLevel pbiCompressionLevel = PbiBuilder::DefaultCompression, - const size_t numPbiThreads = 4, const size_t numGziThreads = 4, - const size_t tempFileBufferSize = 0x10000); + BamWriter::CompressionLevel bamCompressionLevel = BamWriter::DefaultCompression, + size_t numBamThreads = 4, + PbiBuilder::CompressionLevel pbiCompressionLevel = PbiBuilder::DefaultCompression, + size_t numPbiThreads = 4, size_t numGziThreads = 4, size_t tempFileBufferSize = 0x10000); /// \brief IndexedBamWRiter /// diff -Nru pbbam-1.7.0+dfsg/include/pbbam/IndexedFastaReader.h pbbam-2.0.0+dfsg/include/pbbam/IndexedFastaReader.h --- pbbam-1.7.0+dfsg/include/pbbam/IndexedFastaReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/IndexedFastaReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,18 +3,18 @@ #include -#include +#include +#include +#include + +#include #include #include #include #include -#include - -#include -#include -#include +#include namespace PacBio { namespace BAM { @@ -95,10 +95,9 @@ /// /// \throws std::runtime_error on failure to fetch sequence /// - std::string ReferenceSubsequence( - const BamRecord& bamRecord, - const Data::Orientation orientation = Data::Orientation::GENOMIC, const bool gapped = false, - const bool exciseSoftClips = false) const; + std::string ReferenceSubsequence(const BamRecord& bamRecord, + Data::Orientation orientation = Data::Orientation::GENOMIC, + bool gapped = false, bool exciseSoftClips = false) const; /// \} @@ -110,7 +109,7 @@ bool HasSequence(const std::string& name) const; /// \returns the names of the sequence at a specific index in the FASTA file - std::string Name(const size_t idx) const; + std::string Name(size_t idx) const; /// \returns the names of all sequences stored in the FASTA file std::vector Names() const; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/IndexedFastqReader.h pbbam-2.0.0+dfsg/include/pbbam/IndexedFastqReader.h --- pbbam-1.7.0+dfsg/include/pbbam/IndexedFastqReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/IndexedFastqReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,7 +3,14 @@ #include -#include +#include +#include +#include +#include +#include +#include + +#include #include #include @@ -11,14 +18,7 @@ #include #include -#include - -#include -#include -#include -#include -#include -#include +#include namespace PacBio { namespace BAM { @@ -93,9 +93,8 @@ /// \throws std::runtime_error on failure to fetch data /// std::pair ReferenceSubsequence( - const BamRecord& bamRecord, - const Data::Orientation orientation = Data::Orientation::GENOMIC, const bool gapped = false, - const bool exciseSoftClips = false); + const BamRecord& bamRecord, Data::Orientation orientation = Data::Orientation::GENOMIC, + bool gapped = false, bool exciseSoftClips = false); /// \} @@ -107,7 +106,7 @@ bool HasSequence(const std::string& name) const; /// \returns the names of the sequence at a specific index in the FASTQ file - std::string Name(const size_t idx) const; + std::string Name(size_t idx) const; /// \returns the names of all sequences stored in the FASTQ file std::vector Names() const; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/Compare.inl pbbam-2.0.0+dfsg/include/pbbam/internal/Compare.inl --- pbbam-1.7.0+dfsg/include/pbbam/internal/Compare.inl 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/Compare.inl 2022-01-13 18:20:29.000000000 +0000 @@ -1,3 +1,8 @@ +#ifndef PBBAM_COMPARE_INL +#define PBBAM_COMPARE_INL + +#include + #include namespace PacBio { @@ -35,3 +40,5 @@ } // namespace BAM } // namespace PacBio + +#endif // PBBAM_COMPARE_INL diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/CompositeBamReader.inl pbbam-2.0.0+dfsg/include/pbbam/internal/CompositeBamReader.inl --- pbbam-1.7.0+dfsg/include/pbbam/internal/CompositeBamReader.inl 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/CompositeBamReader.inl 2022-01-13 18:20:29.000000000 +0000 @@ -1,3 +1,8 @@ +#ifndef PBBAM_COMPOSITEREADER_INL +#define PBBAM_COMPOSITEREADER_INL + +#include + #include #include @@ -59,9 +64,15 @@ } template -SortedCompositeBamReader::~SortedCompositeBamReader() -{ -} +SortedCompositeBamReader::SortedCompositeBamReader( + SortedCompositeBamReader&&) noexcept = default; + +template +SortedCompositeBamReader& SortedCompositeBamReader::operator=( + SortedCompositeBamReader&&) noexcept = default; + +template +SortedCompositeBamReader::~SortedCompositeBamReader() = default; template bool SortedCompositeBamReader::GetNext(BamRecord& record) @@ -172,3 +183,5 @@ } // namespace BAM } // namespace PacBio + +#endif // PBBAM_COMPOSITEREADER_INL diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/DataSetElement.h pbbam-2.0.0+dfsg/include/pbbam/internal/DataSetElement.h --- pbbam-1.7.0+dfsg/include/pbbam/internal/DataSetElement.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/DataSetElement.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,17 +3,16 @@ #include +#include + #include #include #include #include #include +#include #include -#include - -#include - namespace PacBio { namespace BAM { namespace internal { @@ -37,8 +36,8 @@ bool operator!=(const XmlName& other) const noexcept; public: - const boost::string_ref LocalName() const; - const boost::string_ref Prefix() const; + const std::string_view LocalName() const; + const std::string_view Prefix() const; const std::string& QualifiedName() const; bool Verbatim() const; @@ -78,8 +77,8 @@ std::vector>& Children(); bool HasChild(const std::string& label) const; - const boost::string_ref LocalNameLabel() const; - const boost::string_ref PrefixLabel() const; + const std::string_view LocalNameLabel() const; + const std::string_view PrefixLabel() const; const std::string& QualifiedNameLabel() const; bool IsVerbatimLabel() const; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/DataSetElement.inl pbbam-2.0.0+dfsg/include/pbbam/internal/DataSetElement.inl --- pbbam-1.7.0+dfsg/include/pbbam/internal/DataSetElement.inl 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/DataSetElement.inl 2022-01-13 18:20:29.000000000 +0000 @@ -1,12 +1,17 @@ -#include +#ifndef PBBAM_DATASETELEMENT_INL +#define PBBAM_DATASETELEMENT_INL -#include +#include + +#include #include #include #include #include +#include + namespace PacBio { namespace BAM { namespace internal { @@ -205,9 +210,9 @@ return -1; } -inline const boost::string_ref DataSetElement::LocalNameLabel() const { return label_.LocalName(); } +inline const std::string_view DataSetElement::LocalNameLabel() const { return label_.LocalName(); } -inline const boost::string_ref DataSetElement::PrefixLabel() const { return label_.Prefix(); } +inline const std::string_view DataSetElement::PrefixLabel() const { return label_.Prefix(); } inline const std::string& DataSetElement::QualifiedNameLabel() const { @@ -412,14 +417,14 @@ inline bool XmlName::operator!=(const XmlName& other) const noexcept { return !(*this == other); } -inline const boost::string_ref XmlName::LocalName() const +inline const std::string_view XmlName::LocalName() const { - return boost::string_ref(qualifiedName_.data() + localNameOffset_, localNameSize_); + return {qualifiedName_.data() + localNameOffset_, localNameSize_}; } -inline const boost::string_ref XmlName::Prefix() const +inline const std::string_view XmlName::Prefix() const { - return boost::string_ref(qualifiedName_.data(), prefixSize_); + return {qualifiedName_.data(), prefixSize_}; } inline const std::string& XmlName::QualifiedName() const { return qualifiedName_; } @@ -429,3 +434,5 @@ } // namespace internal } // namespace BAM } // namespace PacBio + +#endif // PBBAM_DATASETELEMENT_INL diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/PbiBasicTypes.inl pbbam-2.0.0+dfsg/include/pbbam/internal/PbiBasicTypes.inl --- pbbam-1.7.0+dfsg/include/pbbam/internal/PbiBasicTypes.inl 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/PbiBasicTypes.inl 2022-01-13 18:20:29.000000000 +0000 @@ -1,3 +1,8 @@ +#ifndef PBBAM_PBIBASICTYPES_INL +#define PBBAM_PBIBASICTYPES_INL + +#include + #include #include @@ -23,3 +28,5 @@ } // namespace BAM } // namespace PacBio + +#endif // PBBAM_PBIBASICTYPES_INL diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/PbiFilter.inl pbbam-2.0.0+dfsg/include/pbbam/internal/PbiFilter.inl --- pbbam-1.7.0+dfsg/include/pbbam/internal/PbiFilter.inl 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/PbiFilter.inl 2022-01-13 18:20:29.000000000 +0000 @@ -1,6 +1,9 @@ -#include +#ifndef PBBAM_PBIFILTER_INL +#define PBBAM_PBIFILTER_INL -#include +#include + +#include #include #include @@ -8,6 +11,8 @@ #include #include +#include + namespace PacBio { namespace BAM { namespace internal { @@ -49,7 +54,7 @@ { virtual ~WrapperInterface() = default; virtual WrapperInterface* Clone() const = 0; - virtual bool Accepts(const PbiRawData& idx, const size_t row) const = 0; + virtual bool Accepts(const PbiRawData& idx, size_t row) const = 0; }; template @@ -58,7 +63,7 @@ WrapperImpl(T x); WrapperImpl(const WrapperImpl& other); WrapperInterface* Clone() const override; - bool Accepts(const PbiRawData& idx, const size_t row) const override; + bool Accepts(const PbiRawData& idx, size_t row) const override; T data_; }; @@ -233,3 +238,5 @@ } // namespace BAM } // namespace PacBio + +#endif // PBBAM_PBIFILTER_INL diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/PbiFilterTypes.inl pbbam-2.0.0+dfsg/include/pbbam/internal/PbiFilterTypes.inl --- pbbam-1.7.0+dfsg/include/pbbam/internal/PbiFilterTypes.inl 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/PbiFilterTypes.inl 2022-01-13 18:20:29.000000000 +0000 @@ -1,10 +1,15 @@ +#ifndef PBBAM_PBIFILTERTYPES_INL +#define PBBAM_PBIFILTERTYPES_INL + +#include + #include -#include +#include #include -#include +#include namespace PacBio { namespace BAM { @@ -38,10 +43,10 @@ template bool FilterBase::CompareHelper(const T& lhs) const { - if (multiValue_ == boost::none) { - return CompareSingleHelper(lhs); - } else { + if (multiValue_) { return CompareMultiHelper(lhs); + } else { + return CompareSingleHelper(lhs); } } @@ -53,7 +58,7 @@ // whitelist - return true on any hit if (cmp_ == Compare::CONTAINS) { - for (const auto& x : multiValue_.get()) { + for (const auto& x : *multiValue_) { if (x == lhs) { return true; } @@ -62,7 +67,7 @@ } // blacklist - return false on any hit else { - for (const auto& x : multiValue_.get()) { + for (const auto& x : *multiValue_) { if (x == lhs) { return false; } @@ -523,3 +528,5 @@ } // namespace BAM } // namespace PacBio + +#endif // PBBAM_PBIFILTERTYPES_INL diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/QueryBase.h pbbam-2.0.0+dfsg/include/pbbam/internal/QueryBase.h --- pbbam-1.7.0+dfsg/include/pbbam/internal/QueryBase.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/QueryBase.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,14 @@ #include -#include -#include -#include - #include #include #include +#include +#include +#include + namespace PacBio { namespace BAM { namespace internal { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/internal/QueryBase.inl pbbam-2.0.0+dfsg/include/pbbam/internal/QueryBase.inl --- pbbam-1.7.0+dfsg/include/pbbam/internal/QueryBase.inl 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/internal/QueryBase.inl 2022-01-13 18:20:29.000000000 +0000 @@ -1,3 +1,6 @@ +#ifndef PBBAM_QUERYBASE_INL +#define PBBAM_QUERYBASE_INL + #include #include @@ -153,3 +156,5 @@ } // namespace internal } // namespace BAM } // namespace PacBio + +#endif // PBBAM_QUERYBASE_INL diff -Nru pbbam-1.7.0+dfsg/include/pbbam/PbiBasicTypes.h pbbam-2.0.0+dfsg/include/pbbam/PbiBasicTypes.h --- pbbam-1.7.0+dfsg/include/pbbam/PbiBasicTypes.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/PbiBasicTypes.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,14 @@ #include -#include -#include +#include #include #include #include -#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/PbiBuilder.h pbbam-2.0.0+dfsg/include/pbbam/PbiBuilder.h --- pbbam-1.7.0+dfsg/include/pbbam/PbiBuilder.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/PbiBuilder.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include -#include -#include - #include #include +#include +#include + namespace PacBio { namespace BAM { @@ -71,8 +71,8 @@ /// \throws std::runtime_error if PBI file cannot be opened for writing /// PbiBuilder(const std::string& pbiFilename, - const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression, - const size_t numThreads = 4); + PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression, + size_t numThreads = 4); /// \brief Initializes builder to write data to \p pbiFilename. /// @@ -92,9 +92,9 @@ /// /// \throws std::runtime_error if PBI file cannot be opened for writing /// - PbiBuilder(const std::string& pbiFilename, const size_t numReferenceSequences, - const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression, - const size_t numThreads = 4); + PbiBuilder(const std::string& pbiFilename, size_t numReferenceSequences, + PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression, + size_t numThreads = 4); /// \brief Initializes builder to write data to \p pbiFilename. /// @@ -116,10 +116,13 @@ /// /// \throws std::runtime_error if PBI file cannot be opened for writing /// - PbiBuilder(const std::string& pbiFilename, const size_t numReferenceSequences, - const bool isCoordinateSorted, - const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression, - const size_t numThreads = 4); + PbiBuilder(const std::string& pbiFilename, size_t numReferenceSequences, + bool isCoordinateSorted, + PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression, + size_t numThreads = 4); + + PbiBuilder(PbiBuilder&&) noexcept; + PbiBuilder& operator=(PbiBuilder&&) noexcept; /// \brief Destroys builder, writing its data out to PBI file. /// @@ -151,7 +154,7 @@ /// \param[in] record input BamRecord to pull index data from /// \param[in] vOffset \b virtual offset into %BAM file where record begins /// - void AddRecord(const BamRecord& record, const int64_t vOffset); + void AddRecord(const BamRecord& record, int64_t vOffset); /// \brief Writes data out to PBI file & closes builder. /// diff -Nru pbbam-1.7.0+dfsg/include/pbbam/PbiFile.h pbbam-2.0.0+dfsg/include/pbbam/PbiFile.h --- pbbam-1.7.0+dfsg/include/pbbam/PbiFile.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/PbiFile.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include -#include -#include +#include #include -#include +#include +#include namespace PacBio { namespace BAM { @@ -96,10 +96,9 @@ /// /// \throws std::runtime_error if index file could not be created /// - static void CreateFrom( - const BamFile& bamFile, - const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression, - const size_t numThreads = 4); + static void CreateFrom(const BamFile& bamFile, PbiBuilder::CompressionLevel compressionLevel = + PbiBuilder::DefaultCompression, + size_t numThreads = 4); }; } // namespace BAM diff -Nru pbbam-1.7.0+dfsg/include/pbbam/PbiFilter.h pbbam-2.0.0+dfsg/include/pbbam/PbiFilter.h --- pbbam-1.7.0+dfsg/include/pbbam/PbiFilter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/PbiFilter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,17 +3,17 @@ #include -#include +#include +#include +#include + +#include #include #include #include -#include - -#include -#include -#include +#include namespace PacBio { namespace BAM { @@ -149,7 +149,7 @@ /// If INTERSECT, a record must match all child filters. If /// UNION, a record must match any child filter. /// - PbiFilter(const CompositionType type = INTERSECT); + PbiFilter(CompositionType type = INTERSECT); /// \brief Creates a composite filter (of INTERSECT type) with an initial /// child filter. @@ -226,7 +226,7 @@ /// \returns true if record at \p row passes this filter criteria, /// including children (if any) /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/PbiFilterQuery.h pbbam-2.0.0+dfsg/include/pbbam/PbiFilterQuery.h --- pbbam-1.7.0+dfsg/include/pbbam/PbiFilterQuery.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/PbiFilterQuery.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include - #include #include +#include + namespace PacBio { namespace BAM { @@ -50,6 +50,8 @@ PbiFilterQuery(const PbiFilter& filter, const DataSet& dataset, const PbiIndexCache& cache); + PbiFilterQuery(PbiFilterQuery&&) noexcept; + PbiFilterQuery& operator=(PbiFilterQuery&&) noexcept; ~PbiFilterQuery() override; /// \brief Main iteration point for record access. diff -Nru pbbam-1.7.0+dfsg/include/pbbam/PbiFilterTypes.h pbbam-2.0.0+dfsg/include/pbbam/PbiFilterTypes.h --- pbbam-1.7.0+dfsg/include/pbbam/PbiFilterTypes.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/PbiFilterTypes.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,20 +3,19 @@ #include -#include -#include +#include +#include +#include #include +#include #include #include #include #include -#include - -#include -#include -#include +#include +#include namespace PacBio { namespace BAM { @@ -32,12 +31,12 @@ { public: T value_; - boost::optional> multiValue_; + std::optional> multiValue_; Compare::Type cmp_; protected: - FilterBase(T value, const Compare::Type cmp); - FilterBase(std::vector values, const Compare::Type cmp = Compare::CONTAINS); + FilterBase(T value, Compare::Type cmp); + FilterBase(std::vector values, Compare::Type cmp = Compare::CONTAINS); bool CompareHelper(const T& lhs) const; @@ -54,11 +53,11 @@ class BarcodeDataFilterBase : public FilterBase { public: - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; protected: - BarcodeDataFilterBase(T value, const Compare::Type cmp); - BarcodeDataFilterBase(std::vector values, const Compare::Type cmp = Compare::CONTAINS); + BarcodeDataFilterBase(T value, Compare::Type cmp); + BarcodeDataFilterBase(std::vector values, Compare::Type cmp = Compare::CONTAINS); }; /// \internal @@ -69,11 +68,11 @@ class BasicDataFilterBase : public FilterBase { public: - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; protected: - BasicDataFilterBase(T value, const Compare::Type cmp); - BasicDataFilterBase(std::vector values, const Compare::Type cmp = Compare::CONTAINS); + BasicDataFilterBase(T value, Compare::Type cmp); + BasicDataFilterBase(std::vector values, Compare::Type cmp = Compare::CONTAINS); }; /// \internal @@ -84,11 +83,11 @@ class MappedDataFilterBase : public FilterBase { public: - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; protected: - MappedDataFilterBase(T value, const Compare::Type cmp); - MappedDataFilterBase(std::vector values, const Compare::Type cmp = Compare::CONTAINS); + MappedDataFilterBase(T value, Compare::Type cmp); + MappedDataFilterBase(std::vector values, Compare::Type cmp = Compare::CONTAINS); }; } // namespace internal @@ -109,7 +108,7 @@ /// \param[in] position value to compare on /// \param[in] cmp compare type /// - PbiAlignedEndFilter(const uint32_t position, const Compare::Type cmp = Compare::EQUAL); + PbiAlignedEndFilter(uint32_t position, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiAlignedLengthFilter class provides a PbiFilter-compatible @@ -127,13 +126,13 @@ /// \param[in] length value to compare on /// \param[in] cmp compare type /// - PbiAlignedLengthFilter(const uint32_t length, const Compare::Type cmp = Compare::EQUAL); + PbiAlignedLengthFilter(uint32_t length, Compare::Type cmp = Compare::EQUAL); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; }; /// \brief The PbiAlignedStartFilter class provides a PbiFilter-compatible @@ -152,7 +151,7 @@ /// \param[in] position value to compare on /// \param[in] cmp compare type /// - PbiAlignedStartFilter(const uint32_t position, const Compare::Type cmp = Compare::EQUAL); + PbiAlignedStartFilter(uint32_t position, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiAlignedStrandFilter class provides a PbiFilter-compatible @@ -171,7 +170,7 @@ /// \param[in] strand strand value to compare on /// \param[in] cmp compare type /// - PbiAlignedStrandFilter(const Strand strand, const Compare::Type cmp = Compare::EQUAL); + PbiAlignedStrandFilter(Strand strand, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiBarcodeFilter class provides a PbiFilter-compatible filter on @@ -191,7 +190,7 @@ /// \param[in] barcode barcode ID to compare on /// \param[in] cmp compare type /// - PbiBarcodeFilter(const int16_t barcode, const Compare::Type cmp = Compare::EQUAL); + PbiBarcodeFilter(int16_t barcode, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a whitelisted or blacklisted barcode filter. /// @@ -203,13 +202,13 @@ /// \param[in] barcodes barcode IDs /// \param[in] cmp compare type /// - PbiBarcodeFilter(std::vector barcodes, const Compare::Type cmp = Compare::CONTAINS); + PbiBarcodeFilter(std::vector barcodes, Compare::Type cmp = Compare::CONTAINS); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: PbiFilter compositeFilter_; @@ -231,7 +230,7 @@ /// \param[in] bcFwdId (forward) barcode ID to compare on /// \param[in] cmp compare type /// - PbiBarcodeForwardFilter(const int16_t bcFwdId, const Compare::Type cmp = Compare::EQUAL); + PbiBarcodeForwardFilter(int16_t bcFwdId, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a whitelisted or blacklisted forward barcode filter. /// @@ -243,8 +242,7 @@ /// \param[in] barcodes barcode IDs /// \param[in] cmp compare type /// - PbiBarcodeForwardFilter(std::vector barcodes, - const Compare::Type cmp = Compare::CONTAINS); + PbiBarcodeForwardFilter(std::vector barcodes, Compare::Type cmp = Compare::CONTAINS); }; /// \brief The PbiBarcodeQualityFilter class provides a PbiFilter-compatible @@ -263,7 +261,7 @@ /// \param[in] bcQuality barcode quality to compare on /// \param[in] cmp compare type /// - PbiBarcodeQualityFilter(const uint8_t bcQuality, const Compare::Type cmp = Compare::EQUAL); + PbiBarcodeQualityFilter(uint8_t bcQuality, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiBarcodeReverseFilter class provides a PbiFilter-compatible @@ -282,7 +280,7 @@ /// \param[in] bcRevId (reverse) barcode ID to compare on /// \param[in] cmp compare type /// - PbiBarcodeReverseFilter(const int16_t bcRevId, const Compare::Type cmp = Compare::EQUAL); + PbiBarcodeReverseFilter(int16_t bcRevId, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a whitelisted or blacklisted reverse barcode filter. /// @@ -294,8 +292,7 @@ /// \param[in] barcodes barcode IDs /// \param[in] cmp compare type /// - PbiBarcodeReverseFilter(std::vector barcodes, - const Compare::Type cmp = Compare::CONTAINS); + PbiBarcodeReverseFilter(std::vector barcodes, Compare::Type cmp = Compare::CONTAINS); }; /// \brief The PbiBarcodesFilter class provides a PbiFilter-compatible filter on @@ -318,8 +315,7 @@ /// \param[in] barcodes barcode IDs to compare on /// \param[in] cmp compare type /// - PbiBarcodesFilter(const std::pair barcodes, - const Compare::Type cmp = Compare::EQUAL); + PbiBarcodesFilter(std::pair barcodes, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a barcodes filter from forward & reverse IDs. /// @@ -327,14 +323,13 @@ /// \param[in] bcReverse reverse barcode ID to compare on /// \param[in] cmp compare type /// - PbiBarcodesFilter(const int16_t bcForward, const int16_t bcReverse, - const Compare::Type cmp = Compare::EQUAL); + PbiBarcodesFilter(int16_t bcForward, int16_t bcReverse, Compare::Type cmp = Compare::EQUAL); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: PbiFilter compositeFilter_; @@ -355,13 +350,13 @@ /// \param[in] identity value to compare on /// \param[in] cmp compare type /// - PbiIdentityFilter(const float identity, const Compare::Type cmp = Compare::EQUAL); + PbiIdentityFilter(float identity, Compare::Type cmp = Compare::EQUAL); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; }; /// \brief The PbiLocalContextFilter class provides a PbiFilter-compatible @@ -378,8 +373,7 @@ PbiFile::BasicField::CONTEXT_FLAG> { public: - PbiLocalContextFilter(const Data::LocalContextFlags& flags, - const Compare::Type cmp = Compare::EQUAL); + PbiLocalContextFilter(const Data::LocalContextFlags& flags, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiMapQualityFilter class provides a PbiFilter-compatible filter on @@ -398,7 +392,7 @@ /// \param[in] mapQual value to compare on /// \param[in] cmp compare type /// - PbiMapQualityFilter(const uint8_t mapQual, const Compare::Type cmp = Compare::EQUAL); + PbiMapQualityFilter(uint8_t mapQual, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiMovieNameFilter class provides a PbiFilter-compatible filter @@ -418,7 +412,7 @@ /// \note There is no compare type parameter here, it is always /// Compare::EQUAL. Records will match movie name, exactly. /// - PbiMovieNameFilter(const std::string& movieName, const Compare::Type cmp = Compare::EQUAL); + PbiMovieNameFilter(const std::string& movieName, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a whitelisted or blacklisted movie name filter. /// @@ -431,13 +425,13 @@ /// \param[in] cmp compare type /// PbiMovieNameFilter(const std::vector& movieNames, - const Compare::Type cmp = Compare::CONTAINS); + Compare::Type cmp = Compare::CONTAINS); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: std::set movieNames_; @@ -461,7 +455,7 @@ /// \param[in] numDeletions value to compare on /// \param[in] cmp compare type /// - PbiNumDeletedBasesFilter(const size_t numDeletions, const Compare::Type cmp = Compare::EQUAL); + PbiNumDeletedBasesFilter(size_t numDeletions, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiNumInsertededBasesFilter class provides a PbiFilter-compatible @@ -480,7 +474,7 @@ /// \param[in] numInsertions value to compare on /// \param[in] cmp compare type /// - PbiNumInsertedBasesFilter(const size_t numInsertions, const Compare::Type cmp = Compare::EQUAL); + PbiNumInsertedBasesFilter(size_t numInsertions, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiNumMatchesFilter class provides a PbiFilter-compatible filter @@ -498,7 +492,7 @@ /// \param[in] numMatchedBases value to compare on /// \param[in] cmp compare type /// - PbiNumMatchesFilter(const size_t numMatchedBases, const Compare::Type cmp = Compare::EQUAL); + PbiNumMatchesFilter(size_t numMatchedBases, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiNumMismatchesFilter class provides a PbiFilter-compatible @@ -517,8 +511,7 @@ /// \param[in] numMismatchedBases value to compare on /// \param[in] cmp compare type /// - PbiNumMismatchesFilter(const size_t numMismatchedBases, - const Compare::Type cmp = Compare::EQUAL); + PbiNumMismatchesFilter(size_t numMismatchedBases, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiNumSubreadsFilter class provides a PbiFilter-compatible @@ -532,16 +525,19 @@ /// \param[in] numSubreads value to compare on /// \param[in] cmp compare type /// - PbiNumSubreadsFilter(int numSubreads, const Compare::Type cmp = Compare::EQUAL); + PbiNumSubreadsFilter(int numSubreads, Compare::Type cmp = Compare::EQUAL); PbiNumSubreadsFilter(const PbiNumSubreadsFilter& other); + + PbiNumSubreadsFilter(PbiNumSubreadsFilter&&) noexcept; + PbiNumSubreadsFilter& operator=(PbiNumSubreadsFilter&&) noexcept; ~PbiNumSubreadsFilter(); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: struct PbiNumSubreadsFilterPrivate; @@ -563,7 +559,7 @@ /// \param[in] position value to compare on /// \param[in] cmp compare type /// - PbiQueryEndFilter(const int32_t position, const Compare::Type cmp = Compare::EQUAL); + PbiQueryEndFilter(int32_t position, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiQueryLengthFilter class provides a PbiFilter-compatible filter @@ -583,13 +579,13 @@ /// \param[in] length value to compare on /// \param[in] cmp compare type /// - PbiQueryLengthFilter(const int32_t length, const Compare::Type cmp = Compare::EQUAL); + PbiQueryLengthFilter(int32_t length, Compare::Type cmp = Compare::EQUAL); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; }; /// \brief The PbiQueryNameFilter class provides a PbiFilter-compatible filter @@ -609,7 +605,7 @@ /// \note There is no compare type parameter here, it is always /// Compare::EQUAL. Records will match query name, exactly. /// - PbiQueryNameFilter(const std::string& qname, const Compare::Type cmp = Compare::EQUAL); + PbiQueryNameFilter(const std::string& qname, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a whitelisted or blacklisted query name filter. /// @@ -622,16 +618,19 @@ /// \param[in] cmp compare type /// PbiQueryNameFilter(const std::vector& queryNames, - const Compare::Type cmp = Compare::CONTAINS); + Compare::Type cmp = Compare::CONTAINS); PbiQueryNameFilter(const PbiQueryNameFilter& other); + + PbiQueryNameFilter(PbiQueryNameFilter&&) noexcept; + PbiQueryNameFilter& operator=(PbiQueryNameFilter&&) noexcept; ~PbiQueryNameFilter(); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: struct PbiQueryNameFilterPrivate; @@ -654,7 +653,7 @@ /// \param[in] position value to compare on /// \param[in] cmp compare type /// - PbiQueryStartFilter(const int32_t position, const Compare::Type cmp = Compare::EQUAL); + PbiQueryStartFilter(int32_t position, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiReadAccuracyFilter class provides a PbiFilter-compatible filter @@ -673,7 +672,7 @@ /// \param[in] accuracy value to compare on /// \param[in] cmp compare type /// - PbiReadAccuracyFilter(const Data::Accuracy accuracy, const Compare::Type cmp = Compare::EQUAL); + PbiReadAccuracyFilter(Data::Accuracy accuracy, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiReadGroupFilter class provides a PbiFilter-compatible filter @@ -695,7 +694,7 @@ /// /// \sa BamRecord::ReadGroupNumericId /// - PbiReadGroupFilter(const int32_t rgId, const Compare::Type cmp = Compare::EQUAL); + PbiReadGroupFilter(int32_t rgId, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a filter on printable read group ID value /// @@ -704,7 +703,7 @@ /// /// \sa BamRecord::ReadGroupId /// - PbiReadGroupFilter(const std::string& rgId, const Compare::Type cmp = Compare::EQUAL); + PbiReadGroupFilter(const std::string& rgId, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a filter on read group (object). /// @@ -713,7 +712,7 @@ /// /// \sa BamRecord::ReadGroup /// - PbiReadGroupFilter(const ReadGroupInfo& rg, const Compare::Type cmp = Compare::EQUAL); + PbiReadGroupFilter(const ReadGroupInfo& rg, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a whitelisted or blacklisted filter on read group numeric IDs. /// @@ -725,8 +724,7 @@ /// \param[in] rgIds numeric read group IDs /// \param[in] cmp compare type /// - PbiReadGroupFilter(const std::vector& rgIds, - const Compare::Type cmp = Compare::CONTAINS); + PbiReadGroupFilter(const std::vector& rgIds, Compare::Type cmp = Compare::CONTAINS); /// \brief Creates a whitelisted or blacklisted filter on read group string IDs. /// @@ -739,7 +737,7 @@ /// \param[in] cmp compare type /// PbiReadGroupFilter(const std::vector& rgIds, - const Compare::Type cmp = Compare::CONTAINS); + Compare::Type cmp = Compare::CONTAINS); /// \brief Creates a whitelisted or blacklisted filter on read group objects. /// @@ -752,17 +750,18 @@ /// \param[in] cmp compare type /// PbiReadGroupFilter(const std::vector& readGroups, - const Compare::Type cmp = Compare::EQUAL); + Compare::Type cmp = Compare::CONTAINS); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: - // RGID number => barcode(s) filter - std::unordered_map>>> lookup_; + void AddReadGroups(const std::vector& readGroups); + + std::unordered_map> readGroups_; Compare::Type cmp_; }; @@ -782,7 +781,7 @@ /// \param[in] tEnd value to compare on /// \param[in] cmp compare type /// - PbiReferenceEndFilter(const uint32_t tEnd, const Compare::Type cmp = Compare::EQUAL); + PbiReferenceEndFilter(uint32_t tEnd, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiReferenceIdFilter class provides a PbiFilter-compatible @@ -801,7 +800,7 @@ /// \param[in] tId reference ID to compare on /// \param[in] cmp compare type /// - PbiReferenceIdFilter(const int32_t tId, const Compare::Type cmp = Compare::EQUAL); + PbiReferenceIdFilter(int32_t tId, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a whitelisted or blacklisted reference ID filter. /// @@ -813,7 +812,7 @@ /// \param[in] tIds reference IDs /// \param[in] cmp compare type /// - PbiReferenceIdFilter(std::vector tIds, const Compare::Type cmp = Compare::CONTAINS); + PbiReferenceIdFilter(std::vector tIds, Compare::Type cmp = Compare::CONTAINS); }; /// \brief The PbiReferenceNameFilter class provides a PbiFilter-compatible @@ -843,20 +842,19 @@ /// \param[in] rnames reference names /// \param[in] cmp compare type /// - PbiReferenceNameFilter(std::vector rnames, - const Compare::Type cmp = Compare::CONTAINS); + PbiReferenceNameFilter(std::vector rnames, Compare::Type cmp = Compare::CONTAINS); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: mutable bool initialized_ = false; mutable PbiFilter subFilter_; std::string rname_; - boost::optional> rnameWhitelist_; + std::optional> rnameWhitelist_; Compare::Type cmp_; // marked const so we can delay setup of filter in Accepts(), once we have @@ -882,7 +880,7 @@ /// \param[in] tStart value to compare on /// \param[in] cmp compare type /// - PbiReferenceStartFilter(const uint32_t tStart, const Compare::Type cmp = Compare::EQUAL); + PbiReferenceStartFilter(uint32_t tStart, Compare::Type cmp = Compare::EQUAL); }; /// \brief The PbiZmwFilter class provides a PbiFilter-compatible filter on @@ -900,7 +898,7 @@ /// \param[in] zmw value to compare on /// \param[in] cmp compare type /// - PbiZmwFilter(const int32_t zmw, const Compare::Type cmp = Compare::EQUAL); + PbiZmwFilter(int32_t zmw, Compare::Type cmp = Compare::EQUAL); /// \brief Creates a whitelisted or blacklisted ZMW hole number filter. /// @@ -912,13 +910,13 @@ /// \param[in] zmws ZMW hole numbers /// \param[in] cmp compare type /// - PbiZmwFilter(std::vector zmws, const Compare::Type cmp = Compare::CONTAINS); + PbiZmwFilter(std::vector zmws, Compare::Type cmp = Compare::CONTAINS); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: Compare::Type cmp_; @@ -942,15 +940,15 @@ class PbiZmwModuloFilter { public: - PbiZmwModuloFilter(const uint32_t denominator, const uint32_t value, - const FilterHash hashtype = FilterHash::UNSIGNED_LONG_CAST, - const Compare::Type = Compare::EQUAL); + PbiZmwModuloFilter(uint32_t denominator, uint32_t value, + FilterHash hashtype = FilterHash::UNSIGNED_LONG_CAST, + Compare::Type = Compare::EQUAL); /// \brief Performs the actual index lookup. /// /// Most client code should not need to use this method directly. /// - bool Accepts(const PbiRawData& idx, const size_t row) const; + bool Accepts(const PbiRawData& idx, size_t row) const; private: uint32_t denominator_; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/PbiIndexedBamReader.h pbbam-2.0.0+dfsg/include/pbbam/PbiIndexedBamReader.h --- pbbam-1.7.0+dfsg/include/pbbam/PbiIndexedBamReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/PbiIndexedBamReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include - #include #include #include #include +#include + namespace PacBio { namespace BAM { @@ -78,6 +78,8 @@ PbiIndexedBamReader(BamFile bamFile); PbiIndexedBamReader(BamFile bamFile, const std::shared_ptr& index); + PbiIndexedBamReader(PbiIndexedBamReader&&) noexcept; + PbiIndexedBamReader& operator=(PbiIndexedBamReader&&) noexcept; ~PbiIndexedBamReader() override; /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/PbiRawData.h pbbam-2.0.0+dfsg/include/pbbam/PbiRawData.h --- pbbam-1.7.0+dfsg/include/pbbam/PbiRawData.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/PbiRawData.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,14 @@ #include -#include -#include +#include #include #include #include -#include +#include +#include namespace PacBio { namespace BAM { @@ -337,7 +337,7 @@ /// \returns true if index has \b section /// \param[in] section PbiFile::Section identifier /// - bool HasSection(const PbiFile::Section section) const; + bool HasSection(PbiFile::Section section) const; /// \returns index filename ("*.pbi") /// diff -Nru pbbam-1.7.0+dfsg/include/pbbam/Position.h pbbam-2.0.0+dfsg/include/pbbam/Position.h --- pbbam-1.7.0+dfsg/include/pbbam/Position.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/Position.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/QualityValues.h pbbam-2.0.0+dfsg/include/pbbam/QualityValues.h --- pbbam-1.7.0+dfsg/include/pbbam/QualityValues.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/QualityValues.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/ReadGroupInfo.h pbbam-2.0.0+dfsg/include/pbbam/ReadGroupInfo.h --- pbbam-1.7.0+dfsg/include/pbbam/ReadGroupInfo.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/ReadGroupInfo.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,19 +3,19 @@ #include -#include -#include +#include + +#include +#include +#include #include +#include #include #include -#include - -#include -#include - -#include +#include +#include namespace PacBio { namespace BAM { @@ -91,6 +91,17 @@ SEQUELII }; +/// \brief Aggregate to simplify ReadGroupInfo constructor. +/// +struct ReadGroupInfoConfig +{ + std::string MovieName; + std::string ReadType; + std::optional Platform{}; + std::optional> Barcodes{}; + std::optional Strand{}; +}; + /// \brief The ReadGroupInfo class represents a read group entry (\@RG) in the /// SAM header. /// @@ -140,7 +151,7 @@ /// \param[in] id read group ID number /// \returns hexadecimal string representation of ID /// - static std::string IntToId(const int32_t id); + static std::string IntToId(int32_t id); /// \returns sequencing chemistry from (bindingKig, sequencingKit, /// basecallerVersion) @@ -217,6 +228,15 @@ ReadGroupInfo(std::string movieName, std::string readType, PlatformModelType platform, std::pair barcodes); + /// \brief Creates a read group info object from a ReadGroupInfoConfig + /// + /// \param[in] config aggregate that contains all information to + /// create a ReadGroupInfo + /// + /// \sa RecordType + /// + ReadGroupInfo(ReadGroupInfoConfig config); + /// \} public: @@ -297,19 +317,19 @@ /// /// \note This does **NOT** refer to any data in the description (DS) tag. /// - boost::optional> Barcodes() const; + std::optional> Barcodes() const; /// \returns forward barcode label stored in the read group ID (\@RG:ID) /// /// \note This does **NOT** refer to any data in the description (DS) tag. /// - boost::optional BarcodeForward() const; + std::optional BarcodeForward() const; /// \returns reverse barcode label stored in the read group ID (\@RG:ID) /// /// \note This does **NOT** refer to any data in the description (DS) tag. /// - boost::optional BarcodeReverse() const; + std::optional BarcodeReverse() const; //// \returns string value of \@RG:BC std::string BarcodeSequence() const; @@ -438,6 +458,9 @@ /// \returns sequencing kit part number std::string SequencingKit() const; + /// \returns CCS strand + std::optional Strand() const; + /// \} public: @@ -669,6 +692,13 @@ /// ReadGroupInfo& SequencingKit(std::string kitNumber); + /// \brief Sets the ccs strand. + /// + /// \param[in] strand new value + /// \returns reference to this object + /// + ReadGroupInfo& Strand(Data::Strand strand); + /// \} private: @@ -702,9 +732,10 @@ BarcodeModeType barcodeMode_ = BarcodeModeType::NONE; BarcodeQualityType barcodeQuality_ = BarcodeQualityType::NONE; std::map features_; + std::optional strand_; // (optional) barcode label handling - boost::optional> barcodes_ = boost::none; + std::optional> barcodes_; std::string baseId_; Data::FrameEncoder ipdEncoder_ = Data::V1FrameEncoder{}; @@ -717,6 +748,8 @@ std::string EncodeSamDescription() const; void DecodeSamDescription(const std::string& description); void DecodeBarcodeKey(const std::string& key, std::string value); + void DecodeStrand(std::string value); + std::string EncodeStrand(Data::Strand strand) const; void DecodeFrameCodecKey(const std::string& key, std::string value); }; @@ -725,10 +758,10 @@ /// \param[in] movieName sequencing movie name /// \param[in] readType string version of read type /// -/// \returns hexadecimal string read group ID +/// \returns hexadecimal string read group ID, e.g. "4c1bc9e4" /// -PBBAM_EXPORT -std::string MakeReadGroupId(const std::string& movieName, const std::string& readType); +std::string MakeReadGroupId(const std::string& movieName, const std::string& readType, + std::optional strand = {}); /// \brief Creates a read group ID from a movie name, read type, and barcode string. /// @@ -736,12 +769,12 @@ /// \param[in] readType string version of read type /// \param[in] barcodeString string version of barcode pair ("0--0") /// -/// \returns string containing the concatenation of the hex value with barcode label "/x--y" -/// (e.g. "4c1bc9e4/0--1") +/// \returns string containing the concatenation of the hex value with barcode +/// label "/x--y", e.g. "4c1bc9e4/0--1" /// -PBBAM_EXPORT std::string MakeReadGroupId(const std::string& movieName, const std::string& readType, - const std::string& barcodeString); + const std::string& barcodeString, + std::optional strand = {}); /// \brief Creates a read group ID from a movie name, read type, and barcode IDs /// @@ -749,12 +782,87 @@ /// \param[in] readType string version of read type /// \param[in] barcodes pair of barcode indices (0,0) /// +/// \returns string containing the concatenation of the hex value with barcode +/// label "/x--y", e.g. "4c1bc9e4/0--1" +/// +std::string MakeReadGroupId(const std::string& movieName, const std::string& readType, + const std::pair& barcodes, + std::optional strand = {}); + +/// \brief Creates a read group ID from a read group object +/// +/// This convenience method detects whether barcode information is available and +/// returns the appropriate label. +/// +/// \param[in] readGroup ReadGroupInfo object +/// +/// \returns string containing the concatenation of the hex value, optionally with +/// barcode label "/x--y", e.g. "4c1bc9e4" or "4c1bc9e4/0--1" +/// +std::string MakeReadGroupId(const ReadGroupInfo& readGroup); + +/// \brief Creates a \b LEGACY read group ID from a movie name & read type. +/// +/// \warning The IDs generated by the "legacy" group of methods were incorrect, where +/// barcode information was included as part of the MD5 hash generated. +/// These are provided in case there is a need to reproduce the old behavior. +/// +/// \param[in] movieName sequencing movie name +/// \param[in] readType string version of read type +/// +/// \returns hexadecimal string read group ID +/// +std::string MakeLegacyReadGroupId(const std::string& movieName, const std::string& readType); + +/// \brief Creates a \b LEGACY read group ID from a movie name, read type, +/// and barcode string. +/// +/// \warning The IDs generated by the "legacy" group of methods were incorrect, where +/// barcode information was included as part of the MD5 hash generated. +/// These are provided in case there is a need to reproduce the old behavior. +/// +/// \param[in] movieName sequencing movie name +/// \param[in] readType string version of read type +/// \param[in] barcodeString string version of barcode pair ("0--0") +/// +/// \returns string containing the concatenation of the hex value with barcode label "/x--y" +/// (e.g. "4c1bc9e4/0--1") +/// +std::string MakeLegacyReadGroupId(const std::string& movieName, const std::string& readType, + const std::string& barcodeString); + +/// \brief Creates a \b LEGACY read group ID from a movie name, read type, and +/// barcode IDs. +/// +/// \warning The IDs generated by the "legacy" group of methods were incorrect, where +/// barcode information was included as part of the MD5 hash generated. +/// These are provided in case there is a need to reproduce the old behavior. +/// +/// \param[in] movieName sequencing movie name +/// \param[in] readType string version of read type +/// \param[in] barcodes pair of barcode indices (0,0) +/// /// \returns string containing the concatenation of the hex value with barcode label "/x--y" /// (e.g. "4c1bc9e4/0--1") /// -PBBAM_EXPORT -std::string MakeReadGroupId(const std::string& movieName, const std::string& readType, - const std::pair& barcodes); +std::string MakeLegacyReadGroupId(const std::string& movieName, const std::string& readType, + const std::pair& barcodes); + +/// \brief Creates a \b LEGACY read group ID from a read group object +/// +/// This convenience method detects whether barcode information is available and +/// returns the appropriate label. +/// +/// \warning The IDs generated by the "legacy" group of methods were incorrect, where +/// barcode information was included as part of the MD5 hash generated. +/// These are provided in case there is a need to reproduce the old behavior. +/// +/// \param[in] readGroup ReadGroupInfo object +/// +/// \returns string containing the concatenation of the hex value, optionally with +/// barcode label "/x--y", e.g. "4c1bc9e4" or "4c1bc9e4/0--1" +/// +std::string MakeLegacyReadGroupId(const ReadGroupInfo& readGroup); } // namespace BAM } // namespace PacBio diff -Nru pbbam-1.7.0+dfsg/include/pbbam/RecordType.h pbbam-2.0.0+dfsg/include/pbbam/RecordType.h --- pbbam-1.7.0+dfsg/include/pbbam/RecordType.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/RecordType.h 2022-01-13 18:20:29.000000000 +0000 @@ -33,7 +33,7 @@ /// /// \param[in] type /// -bool IsCcsOrTranscript(const RecordType type); +bool IsCcsOrTranscript(RecordType type); /// /// \brief Returns string representation of RecordType @@ -42,7 +42,7 @@ /// \return std::string /// \throws std::runtime_error if type is unrecognized /// -std::string ToString(const RecordType type); +std::string ToString(RecordType type); } // namespace BAM } // namespace PacBio diff -Nru pbbam-1.7.0+dfsg/include/pbbam/RunMetadata.h pbbam-2.0.0+dfsg/include/pbbam/RunMetadata.h --- pbbam-1.7.0+dfsg/include/pbbam/RunMetadata.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/RunMetadata.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include +#include + #include #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/SamReader.h pbbam-2.0.0+dfsg/include/pbbam/SamReader.h --- pbbam-1.7.0+dfsg/include/pbbam/SamReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/SamReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include - #include #include #include +#include +#include + namespace PacBio { namespace BAM { @@ -28,7 +28,9 @@ /// explicit SamReader(std::string fn); - virtual ~SamReader(); + SamReader(SamReader&&) noexcept; + SamReader& operator=(SamReader&&) noexcept; + ~SamReader() override; public: /// \returns SAM filename @@ -50,7 +52,7 @@ /// \throw std::runtime_error if failed to read from file (e.g. possible /// truncated or corrupted file). /// - bool GetNext(BamRecord& record); + bool GetNext(BamRecord& record) override; private: class SamReaderPrivate; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/SamTagCodec.h pbbam-2.0.0+dfsg/include/pbbam/SamTagCodec.h --- pbbam-1.7.0+dfsg/include/pbbam/SamTagCodec.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/SamTagCodec.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/SamWriter.h pbbam-2.0.0+dfsg/include/pbbam/SamWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/SamWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/SamWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include - #include #include #include +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/StringUtilities.h pbbam-2.0.0+dfsg/include/pbbam/StringUtilities.h --- pbbam-1.7.0+dfsg/include/pbbam/StringUtilities.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/StringUtilities.h 2022-01-13 18:20:29.000000000 +0000 @@ -17,7 +17,7 @@ /// /// \return joined string /// -std::string Join(const std::vector& tokens, const char delim); +std::string Join(const std::vector& tokens, char delim); /// \brief Splits a string into tokens /// @@ -26,7 +26,7 @@ /// /// \returns vector of tokens /// -std::vector Split(const std::string& line, const char delim = '\t'); +std::vector Split(const std::string& line, char delim = '\t'); /// \brief Remove all whitespace from input string (start, end, & internal) /// diff -Nru pbbam-1.7.0+dfsg/include/pbbam/TagCollection.h pbbam-2.0.0+dfsg/include/pbbam/TagCollection.h --- pbbam-1.7.0+dfsg/include/pbbam/TagCollection.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/TagCollection.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include +#include + #include #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/Tag.h pbbam-2.0.0+dfsg/include/pbbam/Tag.h --- pbbam-1.7.0+dfsg/include/pbbam/Tag.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/Tag.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include +#include #include #include #include -#include +#include namespace PacBio { namespace BAM { @@ -107,7 +107,7 @@ /// /// \throws runtime_error if \p modifier is not valid for int8_t data /// - Tag(int8_t value, const TagModifier mod); + Tag(int8_t value, TagModifier mod); /// \brief Creates a Tag from an unsigned 8-bit integer or character. /// @@ -364,7 +364,7 @@ std::string Typename() const; /// \returns true if tag data modifier \p m is set - bool HasModifier(const TagModifier m) const; + bool HasModifier(TagModifier m) const; /// \returns current tag data modifier TagModifier Modifier() const; @@ -374,7 +374,7 @@ /// \param[in] m new modifier value /// /// \returns reference to this tag - Tag& Modifier(const TagModifier m); + Tag& Modifier(TagModifier m); /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/TextFileReader.h pbbam-2.0.0+dfsg/include/pbbam/TextFileReader.h --- pbbam-1.7.0+dfsg/include/pbbam/TextFileReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/TextFileReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/Validator.h pbbam-2.0.0+dfsg/include/pbbam/Validator.h --- pbbam-1.7.0+dfsg/include/pbbam/Validator.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/Validator.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include +#include #include -#include +#include namespace PacBio { namespace BAM { @@ -44,7 +44,7 @@ /// /// \sa Validator::ValidateFileMetdata, Validator::ValidateEntireFile /// - static bool IsValid(const BamFile& file, const bool entireFile); + static bool IsValid(const BamFile& file, bool entireFile); /// \brief Checks that a %BAM header conforms to the %PacBio specification. /// @@ -86,7 +86,7 @@ /// \throws ValidationException if \p header fails validation checks /// static void Validate(const BamHeader& header, - const size_t maxErrors = std::numeric_limits::max()); + size_t maxErrors = std::numeric_limits::max()); /// \brief Checks that a %BAM read group conforms to the %PacBio /// specification. @@ -97,7 +97,7 @@ /// \throws ValidationException if \p rg fails validation checks /// static void Validate(const ReadGroupInfo& rg, - const size_t maxErrors = std::numeric_limits::max()); + size_t maxErrors = std::numeric_limits::max()); /// \brief Checks that a %BAM record conforms to the %PacBio specification. /// @@ -107,7 +107,7 @@ /// \throws ValidationException if \p record fails validation checks /// static void Validate(const BamRecord& record, - const size_t maxErrors = std::numeric_limits::max()); + size_t maxErrors = std::numeric_limits::max()); /// \brief Checks that a %BAM file's (entire) contents conform to the /// %PacBio specification. @@ -127,7 +127,7 @@ /// \throws ValidationException if \p file fails validation checks /// static void ValidateEntireFile(const BamFile& file, - const size_t maxErrors = std::numeric_limits::max()); + size_t maxErrors = std::numeric_limits::max()); /// \brief Checks that a %BAM file's metadata conforms to the /// %PacBio specification. @@ -141,7 +141,7 @@ /// \throws ValidationException if \p header fails validation checks /// static void ValidateFileMetadata(const BamFile& file, - const size_t maxErrors = std::numeric_limits::max()); + size_t maxErrors = std::numeric_limits::max()); }; } // namespace BAM diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfFile.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfFile.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfFile.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfFile.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfFormat.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfFormat.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfFormat.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfFormat.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include -#include -#include - #include #include +#include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfHeader.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfHeader.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfHeader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfHeader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,13 @@ #include +#include + #include #include #include #include -#include - -#include - namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfHeaderTypes.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfHeaderTypes.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfHeaderTypes.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfHeaderTypes.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,11 @@ #include +#include #include #include #include -#include - namespace PacBio { namespace VCF { @@ -100,8 +99,8 @@ const std::string& Number() const; const std::string& Type() const; const std::string& Description() const; - const boost::optional& Source() const; - const boost::optional& Version() const; + const std::optional& Source() const; + const std::optional& Version() const; InfoDefinition& Source(std::string s); InfoDefinition& Version(std::string v); @@ -115,8 +114,8 @@ std::string description_; // optional fields - settable after ctor - boost::optional source_; - boost::optional version_; + std::optional source_; + std::optional version_; }; } // namespace VCF diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfQuery.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfQuery.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfQuery.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfQuery.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include - #include #include #include #include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfReader.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfReader.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,14 @@ #include -#include -#include -#include - #include #include #include +#include +#include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfSort.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfSort.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfSort.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfSort.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfVariant.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfVariant.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfVariant.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfVariant.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,29 +3,28 @@ #include +#include +#include + +#include #include #include #include -#include - -#include -#include - namespace PacBio { namespace VCF { struct InfoField { std::string id; - boost::optional value; - boost::optional> values; + std::optional value; + std::optional> values; }; struct GenotypeData { - boost::optional value; - boost::optional> values; + std::optional value; + std::optional> values; }; struct GenotypeField @@ -84,11 +83,11 @@ bool HasInfoField(const std::string& id) const; - const boost::optional InfoValue(const std::string& id) const; - VcfVariant& InfoValue(const std::string& id, boost::optional value); + const std::optional InfoValue(const std::string& id) const; + VcfVariant& InfoValue(const std::string& id, std::optional value); - const boost::optional> InfoValues(const std::string& id) const; - VcfVariant& InfoValues(const std::string& id, boost::optional> values); + const std::optional> InfoValues(const std::string& id) const; + VcfVariant& InfoValues(const std::string& id, std::optional> values); public: // sample genotypes @@ -101,18 +100,18 @@ std::vector Genotypes() const; VcfVariant& Genotypes(std::vector genotypes); - const boost::optional& GenotypeValue(const size_t sampleIndex, - const std::string& id) const; - VcfVariant& GenotypeValue(const size_t sampleIndex, const std::string& id, - boost::optional value); - - const boost::optional>& GenotypeValues(const size_t sampleIndex, - const std::string& id) const; - VcfVariant& GenotypeValues(const size_t sampleIndex, const std::string& id, - boost::optional> values); + const std::optional& GenotypeValue(size_t sampleIndex, + const std::string& id) const; + VcfVariant& GenotypeValue(size_t sampleIndex, const std::string& id, + std::optional value); + + const std::optional>& GenotypeValues(size_t sampleIndex, + const std::string& id) const; + VcfVariant& GenotypeValues(size_t sampleIndex, const std::string& id, + std::optional> values); - bool IsSampleHeterozygous(const size_t sampleIndex) const; - bool IsSamplePhased(const size_t sampleIndex) const; + bool IsSampleHeterozygous(size_t sampleIndex) const; + bool IsSamplePhased(size_t sampleIndex) const; private: // FIXED data diff -Nru pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfWriter.h pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfWriter.h --- pbbam-1.7.0+dfsg/include/pbbam/vcf/VcfWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/vcf/VcfWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include -#include -#include - #include #include +#include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/virtual/VirtualRegion.h pbbam-2.0.0+dfsg/include/pbbam/virtual/VirtualRegion.h --- pbbam-1.7.0+dfsg/include/pbbam/virtual/VirtualRegion.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/virtual/VirtualRegion.h 2022-01-13 18:20:29.000000000 +0000 @@ -24,14 +24,12 @@ /// \brief Creates a virtual region with basic type & position info. /// - VirtualRegion(const VirtualRegionType type_, const int beginPos_, const int endPos_, - const int score_ = 0); + VirtualRegion(VirtualRegionType type, int beginPos, int endPos, int score = 0); /// \brief Creates a virtual region with type/position info, as well as context & barcode. /// - VirtualRegion(const VirtualRegionType type_, const int beginPos_, const int endPos_, - const Data::LocalContextFlags cxTag_, const int barcodeLeft_, - const int barcodeRight_, const int score_ = 0); + VirtualRegion(VirtualRegionType type, int beginPos, int endPos, Data::LocalContextFlags cxTag, + int barcodeLeft, int barcodeRight, int score = 0); VirtualRegion() = default; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/virtual/VirtualRegionTypeMap.h pbbam-2.0.0+dfsg/include/pbbam/virtual/VirtualRegionTypeMap.h --- pbbam-1.7.0+dfsg/include/pbbam/virtual/VirtualRegionTypeMap.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/virtual/VirtualRegionTypeMap.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/include/pbbam/virtual/VirtualZmwBamRecord.h pbbam-2.0.0+dfsg/include/pbbam/virtual/VirtualZmwBamRecord.h --- pbbam-1.7.0+dfsg/include/pbbam/virtual/VirtualZmwBamRecord.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/virtual/VirtualZmwBamRecord.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include - #include #include #include #include +#include + namespace PacBio { namespace BAM { @@ -39,7 +39,7 @@ /// \returns true if requested VirtualRegionType has been annotated. /// - bool HasVirtualRegionType(const VirtualRegionType regionType) const; + bool HasVirtualRegionType(VirtualRegionType regionType) const; /// \returns IPD frame data /// @@ -54,7 +54,7 @@ /// \param[in] regionType requested region type /// \returns regions that match the requested type (empty vector if none found). /// - std::vector VirtualRegionsTable(const VirtualRegionType regionType) const; + std::vector VirtualRegionsTable(VirtualRegionType regionType) const; /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/virtual/WhitelistedZmwReadStitcher.h pbbam-2.0.0+dfsg/include/pbbam/virtual/WhitelistedZmwReadStitcher.h --- pbbam-1.7.0+dfsg/include/pbbam/virtual/WhitelistedZmwReadStitcher.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/virtual/WhitelistedZmwReadStitcher.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,16 +3,16 @@ #include -#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include +#include namespace PacBio { namespace BAM { @@ -56,6 +56,8 @@ const std::string& primaryBamFilePath, const std::string& scrapsBamFilePath); + WhitelistedZmwReadStitcher(WhitelistedZmwReadStitcher&&) noexcept; + WhitelistedZmwReadStitcher& operator=(WhitelistedZmwReadStitcher&&) noexcept; ~WhitelistedZmwReadStitcher(); /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/virtual/ZmwReadStitcher.h pbbam-2.0.0+dfsg/include/pbbam/virtual/ZmwReadStitcher.h --- pbbam-1.7.0+dfsg/include/pbbam/virtual/ZmwReadStitcher.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/virtual/ZmwReadStitcher.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,15 @@ #include -#include -#include -#include - #include #include #include #include +#include +#include +#include + namespace PacBio { namespace BAM { @@ -37,6 +37,8 @@ /// maybe filtered, from DataSet input ZmwReadStitcher(const DataSet& dataset); + ZmwReadStitcher(ZmwReadStitcher&&) noexcept; + ZmwReadStitcher& operator=(ZmwReadStitcher&&) noexcept; ~ZmwReadStitcher(); /// \} diff -Nru pbbam-1.7.0+dfsg/include/pbbam/ZmwChunkedFastaReader.h pbbam-2.0.0+dfsg/include/pbbam/ZmwChunkedFastaReader.h --- pbbam-1.7.0+dfsg/include/pbbam/ZmwChunkedFastaReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/ZmwChunkedFastaReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include +#include +#include + #include #include #include -#include -#include - namespace PacBio { namespace BAM { @@ -29,7 +29,7 @@ /// Actual chunk count may be smaller than the requested number, if the input /// size is smaller. /// - ZmwChunkedFastaReader(const std::string& fn, const size_t numChunks); + ZmwChunkedFastaReader(const std::string& fn, size_t numChunks); ZmwChunkedFastaReader(ZmwChunkedFastaReader&&) noexcept; ZmwChunkedFastaReader& operator=(ZmwChunkedFastaReader&&) noexcept; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/ZmwChunkedFastqReader.h pbbam-2.0.0+dfsg/include/pbbam/ZmwChunkedFastqReader.h --- pbbam-1.7.0+dfsg/include/pbbam/ZmwChunkedFastqReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/ZmwChunkedFastqReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include +#include +#include + #include #include #include -#include -#include - namespace PacBio { namespace BAM { @@ -29,7 +29,7 @@ /// Actual chunk count may be smaller than the requested number, if the input /// size is smaller. /// - ZmwChunkedFastqReader(const std::string& fn, const size_t numChunks); + ZmwChunkedFastqReader(const std::string& fn, size_t numChunks); ZmwChunkedFastqReader(ZmwChunkedFastqReader&&) noexcept; ZmwChunkedFastqReader& operator=(ZmwChunkedFastqReader&&) noexcept; diff -Nru pbbam-1.7.0+dfsg/include/pbbam/ZmwGroupQuery.h pbbam-2.0.0+dfsg/include/pbbam/ZmwGroupQuery.h --- pbbam-1.7.0+dfsg/include/pbbam/ZmwGroupQuery.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/ZmwGroupQuery.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,14 @@ #include -#include - -#include - #include #include #include +#include + +#include + namespace PacBio { namespace BAM { @@ -51,8 +51,8 @@ /// \param filterMode apply/ignore any filters in XML, if present /// ZmwGroupQuery(const DataSet& dataset, - const ZmwFileIterationMode iterationMode = ZmwFileIterationMode::SEQUENTIAL, - const DataSetFilterMode filterMode = DataSetFilterMode::APPLY); + ZmwFileIterationMode iterationMode = ZmwFileIterationMode::SEQUENTIAL, + DataSetFilterMode filterMode = DataSetFilterMode::APPLY); /// /// \brief Creates a new ZmwGroupQuery, limiting record results to only those @@ -79,7 +79,10 @@ /// PBI files. /// ZmwGroupQuery(const std::vector& zmwWhitelist, const DataSet& dataset); - ~ZmwGroupQuery(); + + ZmwGroupQuery(ZmwGroupQuery&&) noexcept; + ZmwGroupQuery& operator=(ZmwGroupQuery&&) noexcept; + ~ZmwGroupQuery() override; public: /// \brief Main iteration point for record access. diff -Nru pbbam-1.7.0+dfsg/include/pbbam/ZmwQuery.h pbbam-2.0.0+dfsg/include/pbbam/ZmwQuery.h --- pbbam-1.7.0+dfsg/include/pbbam/ZmwQuery.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/ZmwQuery.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include -#include +#include #include -#include +#include namespace PacBio { namespace BAM { @@ -37,7 +37,9 @@ /// ZmwQuery(std::vector zmwWhitelist, const DataSet& dataset); - ~ZmwQuery(); + ZmwQuery(ZmwQuery&&) noexcept; + ZmwQuery& operator=(ZmwQuery&&) noexcept; + ~ZmwQuery() override; /// \brief Main iteration point for record access. /// diff -Nru pbbam-1.7.0+dfsg/include/pbbam/ZmwTypeMap.h pbbam-2.0.0+dfsg/include/pbbam/ZmwTypeMap.h --- pbbam-1.7.0+dfsg/include/pbbam/ZmwTypeMap.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/include/pbbam/ZmwTypeMap.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/meson.build pbbam-2.0.0+dfsg/meson.build --- pbbam-1.7.0+dfsg/meson.build 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/meson.build 2022-01-13 18:20:29.000000000 +0000 @@ -1,14 +1,14 @@ project( 'PacBioBAM', 'cpp', - version : '1.7.0', + version : '2.0.0', default_options : [ 'buildtype=release', 'warning_level=3', - 'cpp_std=c++14', + 'cpp_std=c++20', 'b_ndebug=if-release'], license : 'BSD-3', - meson_version : '>= 0.52.0') + meson_version : '>= 0.57.0') ############ # CXXFLAGS # @@ -51,10 +51,10 @@ pbbam_boost_dep = dependency('boost', include_type : 'system', required : true) # zlib -pbbam_zlib_dep = dependency('zlib', required : true, fallback : ['zlib', 'zlib_dep']) +pbbam_zlib_dep = dependency('zlib', include_type : 'system', required : true, fallback : ['zlib', 'zlib_dep']) # htslib -pbbam_htslib_dep = dependency('htslib', required : true, version : '>=1.4', fallback : ['htslib', 'htslib_dep']) +pbbam_htslib_dep = dependency('htslib', include_type : 'system', required : true, version : '>=1.4', fallback : ['htslib', 'htslib_dep']) # pbcopper pbbam_pbcopper_dep = dependency('pbcopper', required : true, fallback : ['pbcopper', 'pbcopper_dep']) diff -Nru pbbam-1.7.0+dfsg/src/AlignmentPrinter.cpp pbbam-2.0.0+dfsg/src/AlignmentPrinter.cpp --- pbbam-1.7.0+dfsg/src/AlignmentPrinter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/AlignmentPrinter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,16 @@ #include -#include -#include +#include +#include #include #include #include #include -#include -#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/BaiIndexCache.cpp pbbam-2.0.0+dfsg/src/BaiIndexCache.cpp --- pbbam-1.7.0+dfsg/src/BaiIndexCache.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BaiIndexCache.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,17 +2,16 @@ #include -#include -#include - -#include - #include #include #include - #include "ErrnoReason.h" +#include + +#include +#include + namespace PacBio { namespace BAM { @@ -40,6 +39,10 @@ } } +BaiIndexCacheData::BaiIndexCacheData(BaiIndexCacheData&&) noexcept = default; + +BaiIndexCacheData& BaiIndexCacheData::operator=(BaiIndexCacheData&&) noexcept = default; + BaiIndexCacheData::~BaiIndexCacheData() = default; hts_itr_t* BaiIndexCacheData::IteratorForInterval(const int32_t refId, const Data::Position start, diff -Nru pbbam-1.7.0+dfsg/src/BaiIndexedBamReader.cpp pbbam-2.0.0+dfsg/src/BaiIndexedBamReader.cpp --- pbbam-1.7.0+dfsg/src/BaiIndexedBamReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BaiIndexedBamReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,15 @@ #include -#include -#include +#include +#include +#include "ErrnoReason.h" #include #include -#include -#include - -#include "ErrnoReason.h" +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/BamFile.cpp pbbam-2.0.0+dfsg/src/BamFile.cpp --- pbbam-1.7.0+dfsg/src/BamFile.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamFile.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,24 +2,23 @@ #include -#include +#include +#include +#include "Autovalidate.h" +#include "ErrnoReason.h" +#include "FileUtils.h" +#include "MemoryUtils.h" -#include -#include +#include #include #include #include -#include - -#include -#include +#include +#include -#include "Autovalidate.h" -#include "ErrnoReason.h" -#include "FileUtils.h" -#include "MemoryUtils.h" +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/BamFileMerger.cpp pbbam-2.0.0+dfsg/src/BamFileMerger.cpp --- pbbam-1.7.0+dfsg/src/BamFileMerger.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamFileMerger.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,9 +2,6 @@ #include -#include -#include - #include #include #include @@ -16,6 +13,9 @@ #include #include +#include +#include + namespace PacBio { namespace BAM { namespace { diff -Nru pbbam-1.7.0+dfsg/src/BamHeader.cpp pbbam-2.0.0+dfsg/src/BamHeader.cpp --- pbbam-1.7.0+dfsg/src/BamHeader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamHeader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,23 +2,22 @@ #include -#include -#include -#include +#include +#include +#include +#include +#include "Version.h" + +#include #include #include #include #include -#include - -#include -#include -#include -#include - -#include "Version.h" +#include +#include +#include namespace PacBio { namespace BAM { @@ -233,7 +232,7 @@ { const auto id = readGroup.Id(); if (!HasReadGroup(id)) { - d_->readGroups_[ReadGroupInfo::GetBaseId(id)] = std::move(readGroup); + d_->readGroups_[id] = std::move(readGroup); } return *this; } @@ -303,7 +302,7 @@ bool BamHeader::HasReadGroup(const std::string& id) const { - return d_->readGroups_.find(ReadGroupInfo::GetBaseId(id)) != d_->readGroups_.cend(); + return d_->readGroups_.find(id) != d_->readGroups_.cend(); } bool BamHeader::HasSequence(const std::string& name) const @@ -376,7 +375,7 @@ ReadGroupInfo BamHeader::ReadGroup(const std::string& id) const { - const auto iter = d_->readGroups_.find(ReadGroupInfo::GetBaseId(id)); + const auto iter = d_->readGroups_.find(id); if (iter == d_->readGroups_.cend()) { throw std::runtime_error{"[pbbam] BAM header ERROR: read group ID not found: " + id}; } diff -Nru pbbam-1.7.0+dfsg/src/BamReader.cpp pbbam-2.0.0+dfsg/src/BamReader.cpp --- pbbam-1.7.0+dfsg/src/BamReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,27 +2,26 @@ #include -#include -#include -#include -#include - -#include -#include -#include +#include +#include +#include +#include "Autovalidate.h" +#include "MemoryUtils.h" #include #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include -#include "Autovalidate.h" -#include "MemoryUtils.h" +#include +#include +#include +#include namespace PacBio { namespace BAM { @@ -118,6 +117,10 @@ BamReader::BamReader(BamFile bamFile) : BamReader{bamFile.Filename()} {} +BamReader::BamReader(BamReader&&) noexcept = default; + +BamReader& BamReader::operator=(BamReader&&) noexcept = default; + BamReader::~BamReader() = default; BGZF* BamReader::Bgzf() const { return d_->handle_.File->fp.bgzf; } diff -Nru pbbam-1.7.0+dfsg/src/BamRecord.cpp pbbam-2.0.0+dfsg/src/BamRecord.cpp --- pbbam-1.7.0+dfsg/src/BamRecord.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamRecord.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,29 +2,34 @@ #include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include -#include - #include #include #include - #include "BamRecordTags.h" #include "MemoryUtils.h" #include "Pulse2BaseCache.h" #include "SequenceUtils.h" +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + namespace PacBio { namespace BAM { namespace { @@ -472,7 +477,7 @@ const auto tagName = BamRecordTags::LabelFor(BamRecordTag::BARCODE_QUALITY); const auto bq = impl_.TagValue(tagName); if (bq.IsNull()) { - return 0; // ?? "missing" value for tags ?? should we consider boost::optional for these kind of guys ?? + return 0; // ?? "missing" value for tags ?? should we consider std::optional for these kind of guys ?? } return bq.ToUInt8(); } @@ -621,55 +626,92 @@ void BamRecord::ClipTags(const size_t clipFrom, const size_t clipLength) { + TagCollection tags = impl_.Tags(); + + const auto ClipQualTag = [&](const BamRecordTag tag) { + if (impl_.HasTag(tag)) { + tags[Label(tag)] = + ClipSeqQV(FetchQualities(tag, Data::Orientation::NATIVE), clipFrom, clipLength) + .Fastq(); + } + }; + ClipQualTag(BamRecordTag::DELETION_QV); + ClipQualTag(BamRecordTag::INSERTION_QV); + ClipQualTag(BamRecordTag::MERGE_QV); + ClipQualTag(BamRecordTag::SUBSTITUTION_QV); + + const auto ClipSeqTag = [&](const BamRecordTag tag) { + if (impl_.HasTag(tag)) { + tags[Label(tag)] = + ClipSeqQV(FetchBases(tag, Data::Orientation::NATIVE), clipFrom, clipLength); + } + }; + ClipSeqTag(BamRecordTag::DELETION_TAG); + ClipSeqTag(BamRecordTag::SUBSTITUTION_TAG); + + const auto ClipKineticsTag = [&](const BamRecordTag tag, const Data::FrameCodec codec, + const Data::FrameEncoder& encoder) { + if (impl_.HasTag(tag)) { + const auto frames = FetchFrames(tag).Data(); + if (frames.empty()) { + return; + } + + if (codec == Data::FrameCodec::RAW) { + tags[Label(tag)] = ClipSeqQV(frames, clipFrom, clipLength); + } else { + tags[Label(tag)] = ClipSeqQV(encoder.Encode(frames), clipFrom, clipLength); + } + } + }; + const auto ClipReverseKineticsTag = [&](const BamRecordTag tag, const Data::FrameCodec codec, + const Data::FrameEncoder& encoder) { + if (impl_.HasTag(tag)) { + const auto frames = FetchFrames(tag).Data(); + if (frames.empty()) { + return; + } + + const size_t originalClipEnd = clipFrom + clipLength; + assert(originalClipEnd <= frames.size()); + const size_t reverseClipFrom = frames.size() - originalClipEnd; + if (codec == Data::FrameCodec::RAW) { + tags[Label(tag)] = ClipSeqQV(frames, reverseClipFrom, clipLength); + } else { + tags[Label(tag)] = ClipSeqQV(encoder.Encode(frames), reverseClipFrom, clipLength); + } + } + }; const auto rg = ReadGroup(); const auto ipdCodec = rg.IpdCodec(); const auto ipdEncoder = rg.IpdFrameEncoder(); const auto pwCodec = rg.PulseWidthCodec(); const auto pwEncoder = rg.IpdFrameEncoder(); - - // update BAM tags - TagCollection tags = impl_.Tags(); - if (HasDeletionQV()) { - tags[Label(BamRecordTag::DELETION_QV)] = - ClipSeqQV(DeletionQV(Data::Orientation::NATIVE), clipFrom, clipLength).Fastq(); - } - if (HasInsertionQV()) { - tags[Label(BamRecordTag::INSERTION_QV)] = - ClipSeqQV(InsertionQV(Data::Orientation::NATIVE), clipFrom, clipLength).Fastq(); - } - if (HasMergeQV()) { - tags[Label(BamRecordTag::MERGE_QV)] = - ClipSeqQV(MergeQV(Data::Orientation::NATIVE), clipFrom, clipLength).Fastq(); - } - if (HasSubstitutionQV()) { - tags[Label(BamRecordTag::SUBSTITUTION_QV)] = - ClipSeqQV(SubstitutionQV(Data::Orientation::NATIVE), clipFrom, clipLength).Fastq(); - } - if (HasIPD()) { - const auto label = Label(BamRecordTag::IPD); - const auto ipd = IPD(Data::Orientation::NATIVE).Data(); - if (ipdCodec == Data::FrameCodec::RAW) { - tags[label] = ClipSeqQV(ipd, clipFrom, clipLength); - } else { - tags[label] = ClipSeqQV(ipdEncoder.Encode(ipd), clipFrom, clipLength); - } - } - if (HasPulseWidth()) { - const auto label = Label(BamRecordTag::PULSE_WIDTH); - const auto pw = PulseWidth(Data::Orientation::NATIVE).Data(); - if (pwCodec == Data::FrameCodec::RAW) { - tags[label] = ClipSeqQV(pw, clipFrom, clipLength); - } else { - tags[label] = ClipSeqQV(pwEncoder.Encode(pw), clipFrom, clipLength); + ClipKineticsTag(BamRecordTag::IPD, ipdCodec, ipdEncoder); + ClipKineticsTag(BamRecordTag::PULSE_WIDTH, pwCodec, pwEncoder); + ClipKineticsTag(BamRecordTag::FORWARD_IPD, ipdCodec, ipdEncoder); + ClipKineticsTag(BamRecordTag::FORWARD_PW, pwCodec, pwEncoder); + ClipReverseKineticsTag(BamRecordTag::REVERSE_IPD, ipdCodec, ipdEncoder); + ClipReverseKineticsTag(BamRecordTag::REVERSE_PW, pwCodec, pwEncoder); + + // basemods tags + if (impl_.HasTag(BamRecordTag::BASEMOD_LOCI)) { + if (!impl_.HasTag(BamRecordTag::BASEMOD_QV)) { + throw std::runtime_error{ + "[pbbam] BAM record ERROR: cannot clip 'Mm' tag without a corresponding 'Ml' tag."}; } - } - if (HasDeletionTag()) { - tags[Label(BamRecordTag::DELETION_TAG)] = - ClipSeqQV(DeletionTag(Data::Orientation::NATIVE), clipFrom, clipLength); - } - if (HasSubstitutionTag()) { - tags[Label(BamRecordTag::SUBSTITUTION_TAG)] = - ClipSeqQV(SubstitutionTag(Data::Orientation::NATIVE), clipFrom, clipLength); + + const std::string seq{Sequence(Data::Orientation::NATIVE)}; + const std::string oldBasemodsString{impl_.TagValue(BamRecordTag::BASEMOD_LOCI).ToString()}; + const std::vector basemodsQVs{ + impl_.TagValue(BamRecordTag::BASEMOD_QV).ToUInt8Array()}; + + SplitBasemods sb = + ClipBasemodsTag(seq, oldBasemodsString, basemodsQVs, clipFrom, clipLength); + + tags[Label(BamRecordTag::BASEMOD_LOCI)] = + SplitBasemods::SeparatingCToString(sb.RetainedSeparatingC); + tags[Label(BamRecordTag::BASEMOD_QV)] = std::move(sb.RetainedQuals); } // internal BAM tags @@ -679,57 +721,53 @@ CalculatePulse2BaseCache(); Pulse2BaseCache* p2bCache = p2bCache_.get(); - if (HasAltLabelQV()) { - tags[Label(BamRecordTag::ALT_LABEL_QV)] = - ClipPulse(AltLabelQV(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength) - .Fastq(); - } - if (HasLabelQV()) { - tags[Label(BamRecordTag::LABEL_QV)] = - ClipPulse(LabelQV(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength) - .Fastq(); - } - if (HasPulseMergeQV()) { - tags[Label(BamRecordTag::PULSE_MERGE_QV)] = - ClipPulse(PulseMergeQV(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength) - .Fastq(); - } - if (HasAltLabelTag()) { - tags[Label(BamRecordTag::ALT_LABEL_TAG)] = - ClipPulse(AltLabelTag(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength); - } - if (HasPulseCall()) { - tags[Label(BamRecordTag::PULSE_CALL)] = - ClipPulse(PulseCall(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength); - } - if (HasPkmean()) { - tags[Label(BamRecordTag::PKMEAN)] = EncodePhotons( - ClipPulse(Pkmean(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength)); - } - if (HasPkmid()) { - tags[Label(BamRecordTag::PKMID)] = EncodePhotons( - ClipPulse(Pkmid(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength)); - } - if (HasPkmean2()) { - tags[Label(BamRecordTag::PKMEAN_2)] = EncodePhotons( - ClipPulse(Pkmean2(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength)); - } - if (HasPkmid2()) { - tags[Label(BamRecordTag::PKMID_2)] = EncodePhotons( - ClipPulse(Pkmid2(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength)); - } - if (HasPrePulseFrames()) { - tags[Label(BamRecordTag::PRE_PULSE_FRAMES)] = ClipPulse( - PrePulseFrames(Data::Orientation::NATIVE).Data(), p2bCache, clipFrom, clipLength); - } - if (HasPulseCallWidth()) { - tags[Label(BamRecordTag::PULSE_CALL_WIDTH)] = ClipPulse( - PulseCallWidth(Data::Orientation::NATIVE).Data(), p2bCache, clipFrom, clipLength); - } - if (HasStartFrame()) { - tags[Label(BamRecordTag::START_FRAME)] = - ClipPulse(StartFrame(Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength); - } + const auto ClipPulseQualTag = [&](const BamRecordTag tag) { + if (impl_.HasTag(tag)) { + tags[Label(tag)] = ClipPulse(FetchQualities(tag, Data::Orientation::NATIVE), + p2bCache, clipFrom, clipLength) + .Fastq(); + } + }; + ClipPulseQualTag(BamRecordTag::ALT_LABEL_QV); + ClipPulseQualTag(BamRecordTag::LABEL_QV); + ClipPulseQualTag(BamRecordTag::PULSE_MERGE_QV); + + const auto ClipPulseSeqTag = [&](const BamRecordTag tag) { + if (impl_.HasTag(tag)) { + tags[Label(tag)] = ClipPulse(FetchBases(tag, Data::Orientation::NATIVE), p2bCache, + clipFrom, clipLength); + } + }; + ClipPulseSeqTag(BamRecordTag::ALT_LABEL_TAG); + ClipPulseSeqTag(BamRecordTag::PULSE_CALL); + + const auto ClipPhotonTag = [&](const BamRecordTag tag) { + if (impl_.HasTag(tag)) { + tags[Label(tag)] = EncodePhotons(ClipPulse( + FetchPhotons(tag, Data::Orientation::NATIVE), p2bCache, clipFrom, clipLength)); + } + }; + ClipPhotonTag(BamRecordTag::PKMEAN); + ClipPhotonTag(BamRecordTag::PKMEAN_2); + ClipPhotonTag(BamRecordTag::PKMID); + ClipPhotonTag(BamRecordTag::PKMID_2); + + const auto ClipPulseFrames = [&](const BamRecordTag tag) { + if (impl_.HasTag(tag)) { + tags[Label(tag)] = ClipPulse(FetchFrames(tag, Data::Orientation::NATIVE).Data(), + p2bCache, clipFrom, clipLength); + } + }; + ClipPulseFrames(BamRecordTag::PRE_PULSE_FRAMES); + ClipPulseFrames(BamRecordTag::PULSE_CALL_WIDTH); + + const auto ClipStartFrames = [&](const BamRecordTag tag) { + if (impl_.HasTag(tag)) { + tags[Label(tag)] = ClipPulse(FetchUInt32s(tag, Data::Orientation::NATIVE), p2bCache, + clipFrom, clipLength); + } + }; + ClipStartFrames(BamRecordTag::START_FRAME); } impl_.Tags(tags); @@ -747,9 +785,12 @@ ReverseComplement(sequence); Reverse(qualities); } - impl_.SetSequenceAndQualities(sequence, qualities.Fastq()); ClipTags(clipFrom, clipLength); + + // do *NOT* move this above ClipTags(), since we need the full/old sequence + // when clipping the `Mm` and `Ml` basemods tags + impl_.SetSequenceAndQualities(sequence, qualities.Fastq()); } BamRecord& BamRecord::ClipToQuery(const Data::Position start, const Data::Position end) @@ -1559,6 +1600,112 @@ return result; } +BamRecord::SplitBasemods BamRecord::ClipBasemodsTag(const std::string& seq, + const std::string& basemodsStr, + const std::vector& basemodsQVs, + const size_t clipFrom, const size_t clipLength) +{ + assert(clipFrom <= seq.size()); + const int32_t numClippedC = std::count(std::cbegin(seq), std::cbegin(seq) + clipFrom, 'C'); + assert(clipFrom + clipLength <= seq.size()); + const int32_t numRetainedC = + std::count(std::cbegin(seq) + clipFrom, std::cbegin(seq) + clipFrom + clipLength, 'C'); + + const std::vector separatingC{SplitBasemods::SplitBasemodsString(basemodsStr)}; + assert(separatingC.size() == basemodsQVs.size()); + + // prefix sum (with an off-by-one) for divide-and-conquer later + // + // input: separatingC == {3, 1, 4} + // output: prefixSum == {4, 6, 11} + // + // i.e., prefixSum[i] accounts for all Cs we have seen so far up to CpG island i, including itself + // + // TODO(dseifert): + // replace with std::inclusive_scan in C++17 + std::vector prefixSum; + prefixSum.reserve(separatingC.size()); + int32_t pSum = 0; + for (const int32_t p : separatingC) { + pSum += (p + 1); + prefixSum.emplace_back(pSum); + } + + // find the first retained CpG site + const auto startIt = + std::lower_bound(std::cbegin(prefixSum), std::cend(prefixSum), numClippedC + 1); + // find one past the last retained CpG site + const auto endIt = + std::upper_bound(std::cbegin(prefixSum), std::cend(prefixSum), numClippedC + numRetainedC); + + const auto startPos = std::distance(std::cbegin(prefixSum), startIt); + const auto endPos = std::distance(std::cbegin(prefixSum), endIt); + assert(startPos <= endPos); + + BamRecord::SplitBasemods result{ + // Leading + {std::cbegin(separatingC) + 0, std::cbegin(separatingC) + startPos}, + {std::cbegin(basemodsQVs) + 0, std::cbegin(basemodsQVs) + startPos}, + // Retained + {std::cbegin(separatingC) + startPos, std::cbegin(separatingC) + endPos}, + {std::cbegin(basemodsQVs) + startPos, std::cbegin(basemodsQVs) + endPos}, + // Trailing + {std::cbegin(separatingC) + endPos, std::cbegin(separatingC) + separatingC.size()}, + {std::cbegin(basemodsQVs) + endPos, std::cbegin(basemodsQVs) + basemodsQVs.size()}, + }; + + if (endPos - startPos) { + // we lost some intervening Cs + result.RetainedSeparatingC.front() = prefixSum[startPos] - numClippedC - 1; + result.PrefixLostBases = numClippedC - ((startPos >= 1) ? prefixSum[startPos - 1] : 0); + } + + return result; +} + +std::vector BamRecord::SplitBasemods::SplitBasemodsString(const std::string& str) +{ + assert(str.size() >= 4); + assert(boost::algorithm::starts_with(str, "C+m")); + assert(boost::algorithm::ends_with(str, ";")); + + const char* strView = str.c_str() + 3; // skip the "C+m" prefix + const int32_t strLen = str.size() - 3; + + // convert "C+m,3,1,4;" to std::vector{3, 1, 4} + std::vector result; + int32_t currentNumber = 0; + assert((strView[0] == ',') || + (strView[0] == ';')); // first character has to be either ',' or ';' + for (int32_t i = 1; i < strLen; ++i) { + // yes, this has to be an unsigned char for the EOF edge case on unsigned platforms (hi ARM!) + const unsigned char ch = strView[i]; + if (std::isdigit(ch)) { + currentNumber *= 10; + currentNumber += (ch - 48); + } else { + // have a comma or semi-colon + assert((ch == ',') || (ch == ';')); + result.emplace_back(currentNumber); + currentNumber = 0; + } + } + + return result; +} + +std::string BamRecord::SplitBasemods::SeparatingCToString(const std::vector& vec) +{ + std::ostringstream newBasemodsString; + newBasemodsString << "C+m"; + for (const auto val : vec) { + newBasemodsString << ',' << val; + } + newBasemodsString << ';'; + + return newBasemodsString.str(); +} + uint8_t BamRecord::MapQuality() const { return impl_.MapQuality(); } Data::QualityValues BamRecord::MergeQV(Data::Orientation orientation, bool aligned, diff -Nru pbbam-1.7.0+dfsg/src/BamRecordImpl.cpp pbbam-2.0.0+dfsg/src/BamRecordImpl.cpp --- pbbam-1.7.0+dfsg/src/BamRecordImpl.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamRecordImpl.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,11 +2,14 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include "BamRecordTags.h" +#include "MemoryUtils.h" + +#include + +#include #include #include @@ -14,15 +17,11 @@ #include #include -#include - -#include - -#include -#include - -#include "BamRecordTags.h" -#include "MemoryUtils.h" +#include +#include +#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/BamRecordTags.cpp pbbam-2.0.0+dfsg/src/BamRecordTags.cpp --- pbbam-1.7.0+dfsg/src/BamRecordTags.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamRecordTags.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -52,6 +52,8 @@ { BamRecordTag::START_FRAME, {"sf", true} }, { BamRecordTag::SUBSTITUTION_QV, {"sq", false} }, { BamRecordTag::SUBSTITUTION_TAG, {"st", false} }, + { BamRecordTag::BASEMOD_LOCI, {"Mm", false} }, + { BamRecordTag::BASEMOD_QV, {"Ml", false} }, // faux tags { BamRecordTag::SEQ, {" ", false} }, diff -Nru pbbam-1.7.0+dfsg/src/BamRecordTags.h pbbam-2.0.0+dfsg/src/BamRecordTags.h --- pbbam-1.7.0+dfsg/src/BamRecordTags.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamRecordTags.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,14 @@ #include -#include +#include +#include +#include #include #include -#include -#include -#include +#include namespace PacBio { namespace BAM { @@ -19,10 +19,10 @@ { public: // tag info - static bool IsIPD(const BamRecordTag tag); - static bool IsPulse(const BamRecordTag tag); - static bool IsPW(const BamRecordTag tag); - static std::string LabelFor(const BamRecordTag tag); + static bool IsIPD(BamRecordTag tag); + static bool IsPulse(BamRecordTag tag); + static bool IsPW(BamRecordTag tag); + static std::string LabelFor(BamRecordTag tag); private: struct BamRecordTagData diff -Nru pbbam-1.7.0+dfsg/src/BamTagCodec.cpp pbbam-2.0.0+dfsg/src/BamTagCodec.cpp --- pbbam-1.7.0+dfsg/src/BamTagCodec.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamTagCodec.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,12 +2,12 @@ #include +#include + #include #include #include -#include - namespace PacBio { namespace BAM { namespace { diff -Nru pbbam-1.7.0+dfsg/src/BamWriter.cpp pbbam-2.0.0+dfsg/src/BamWriter.cpp --- pbbam-1.7.0+dfsg/src/BamWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BamWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,29 +2,28 @@ #include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - #include #include #include - #include "Autovalidate.h" #include "ErrnoReason.h" #include "FileProducer.h" #include "MemoryUtils.h" +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/bed/BedReader.cpp pbbam-2.0.0+dfsg/src/bed/BedReader.cpp --- pbbam-1.7.0+dfsg/src/bed/BedReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/bed/BedReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,17 +2,17 @@ #include -#include -#include -#include - -#include -#include - #include #include #include +#include + +#include +#include +#include +#include + namespace PacBio { namespace BED { @@ -46,7 +46,7 @@ void GetNext() { - interval_ = boost::none; + interval_.reset(); std::string line; if (reader_->GetNext(line)) { interval_ = ParseInterval(std::move(line)); @@ -75,7 +75,7 @@ } std::unique_ptr reader_; - boost::optional interval_; + std::optional interval_; }; BedReader::BedReader(const std::string& fn) diff -Nru pbbam-1.7.0+dfsg/src/bed/BedWriter.cpp pbbam-2.0.0+dfsg/src/bed/BedWriter.cpp --- pbbam-1.7.0+dfsg/src/bed/BedWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/bed/BedWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,12 +2,12 @@ #include -#include -#include - #include #include +#include +#include + namespace PacBio { namespace BED { diff -Nru pbbam-1.7.0+dfsg/src/BgzipFastaWriter.cpp pbbam-2.0.0+dfsg/src/BgzipFastaWriter.cpp --- pbbam-1.7.0+dfsg/src/BgzipFastaWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BgzipFastaWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include -#include -#include - #include #include #include +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/BgzipFastqWriter.cpp pbbam-2.0.0+dfsg/src/BgzipFastqWriter.cpp --- pbbam-1.7.0+dfsg/src/BgzipFastqWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BgzipFastqWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include -#include -#include - #include #include #include +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/BgzipWriter.cpp pbbam-2.0.0+dfsg/src/BgzipWriter.cpp --- pbbam-1.7.0+dfsg/src/BgzipWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/BgzipWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,18 +2,17 @@ #include -#include -#include -#include -#include +#include +#include "ErrnoReason.h" +#include "FileProducer.h" #include #include -#include - -#include "ErrnoReason.h" -#include "FileProducer.h" +#include +#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ChemistryTable.cpp pbbam-2.0.0+dfsg/src/ChemistryTable.cpp --- pbbam-1.7.0+dfsg/src/ChemistryTable.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ChemistryTable.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,16 @@ #include "ChemistryTable.h" -#include +#include +#include "FileUtils.h" +#include "pugixml/pugixml.hpp" + +#include #include #include -#include -#include - -#include "FileUtils.h" -#include "pugixml/pugixml.hpp" +#include namespace PacBio { namespace BAM { @@ -138,6 +138,10 @@ {{"101-789-500", "101-826-100", "5.0", "S/P4-C2/5.0-8M", "TAGT-420"}}, // Sequel® II Binding Kit 2.0; Sequel® II Sequencing Plate 2.0 (4 Rxn) - QC {{"101-789-500", "101-820-300", "5.0", "S/P4-C2/5.0-8M", "TAGT-420"}}, + // Sequel® II Binding Kit 2.0; Sequel II Sequencing Plate 3.0 (1 rxn) + {{"101-789-500", "102-186-000", "5.0", "S/P4-C2/5.0-8M"}}, + // Sequel® II Binding Kit 2.0; Sequel II Sequencing Plate 3.0 (1 rxn), QC + {{"101-789-500", "102-186-100", "5.0", "S/P4-C2/5.0-8M"}}, // Sequel® II Binding Kit 2.1; Sequel® II Sequencing Plate 2.0EA (4 Rxn) {{"101-820-500", "101-789-300", "5.0", "S/P4.1-C2/5.0-8M", "TAGT-419"}}, @@ -145,6 +149,10 @@ {{"101-820-500", "101-826-100", "5.0", "S/P4.1-C2/5.0-8M", "TAGT-420"}}, // Sequel® II Binding Kit 2.1; Sequel® II Sequencing Plate 2.0 (4 Rxn) - QC {{"101-820-500", "101-820-300", "5.0", "S/P4.1-C2/5.0-8M", "TAGT-420"}}, + // Sequel® II Binding Kit 2.1; Sequel II Sequencing Plate 3.0 (1 rxn) + {{"101-820-500", "102-186-000", "5.0", "S/P4.1-C2/5.0-8M"}}, + // Sequel® II Binding Kit 2.1; Sequel II Sequencing Plate 3.0 (1 rxn), QC + {{"101-820-500", "102-186-100", "5.0", "S/P4.1-C2/5.0-8M"}}, // Sequel® II Binding Kit 2.2; Sequel® II Sequencing Plate 2.0 (4 rxn) {{"101-894-200", "101-826-100", "5.0", "S/P5-C2/5.0-8M", "TAGT-905"}}, @@ -152,6 +160,34 @@ {{"101-894-200", "101-789-300", "5.0", "S/P5-C2/5.0-8M", "TAGT-905"}}, // Sequel® II Binding Kit 2.2; Sequel® II Sequencing Plate 2.0 (4 rxn) - QC {{"101-894-200", "101-820-300", "5.0", "S/P5-C2/5.0-8M", "TAGT-905"}}, + // Sequel® II Binding Kit 2.2; Sequel II Sequencing Plate 3.0 (1 rxn) + {{"101-894-200", "102-186-000", "5.0", "S/P5-C2/5.0-8M"}}, + // Sequel® II Binding Kit 2.2; Sequel II Sequencing Plate 3.0 (1 rxn), QC + {{"101-894-200", "102-186-100", "5.0", "S/P5-C2/5.0-8M"}}, + // Future PN placeholder; SequencingChemistry and SoftwareVersion need to be reviewed/updated prior to integration/release + {{"101-894-200", "102-118-800", "5.0", "S/P5-C2/5.0-8M"}}, + + // Sequel® II Binding Kit 3.1; Sequel® II Sequencing Plate 2.0EA (4 Rxn) + {{"102-194-200", "101-789-300", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + // Sequel® II Binding Kit 3.1; Sequel® II Sequencing Plate 2.0 (4 rxn) + {{"102-194-200", "101-826-100", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + // Sequel® II Binding Kit 3.1; Sequel® II Sequencing Plate 2.0 (1 rxn) + {{"102-194-200", "102-186-000", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + // Sequel® II Binding Kit 3.1; Sequel® II Sequencing Plate 2.0 (1 rxn) - QC + {{"102-194-200", "102-186-100", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + // Sequel® II Binding Kit 3.1; Sequel® II Sequencing Plate 2.0 (4 Rxn) - QC + {{"102-194-200", "101-820-300", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + + // Sequel® II Binding Kit 3.2; Sequel® II Sequencing Plate 2.0EA (4 Rxn) + {{"102-194-100", "101-789-300", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + // Sequel® II Binding Kit 3.2; Sequel® II Sequencing Plate 2.0 (4 rxn) + {{"102-194-100", "101-826-100", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + // Sequel® II Binding Kit 3.2; Sequel® II Sequencing Plate 2.0 (1 rxn) + {{"102-194-100", "102-186-000", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + // Sequel® II Binding Kit 3.2; Sequel® II Sequencing Plate 2.0 (1 rxn) - QC + {{"102-194-100", "102-186-100", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, + // Sequel® II Binding Kit 3.2; Sequel® II Sequencing Plate 2.0 (4 Rxn) - QC + {{"102-194-100", "101-820-300", "5.0", "S/P5-C2/5.0-8M", "TAGT-5381"}}, }; // clang-format on diff -Nru pbbam-1.7.0+dfsg/src/CollectionMetadata.cpp pbbam-2.0.0+dfsg/src/CollectionMetadata.cpp --- pbbam-1.7.0+dfsg/src/CollectionMetadata.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/CollectionMetadata.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,25 +2,25 @@ #include +#include "DataSetUtils.h" +#include "RunMetadataParser.h" +#include "pugixml/pugixml.hpp" + +#include + #include #include #include #include -#include - -#include "DataSetUtils.h" -#include "RunMetadataParser.h" -#include "pugixml/pugixml.hpp" - namespace PacBio { namespace BAM { namespace { -boost::optional UpdateControlKitCache(const ControlKit& kit) +std::optional UpdateControlKitCache(const ControlKit& kit) { if (!kit.HasChild("CustomSequence")) { - return boost::none; + return {}; } const auto& customSeq = kit.ChildText("CustomSequence"); @@ -44,7 +44,7 @@ return ControlKit::CustomSequence{lines.at(1), lines.at(3), lines.at(5)}; } -void UpdateControlKit(const boost::optional& cache, ControlKit& kit) +void UpdateControlKit(const std::optional& cache, ControlKit& kit) { std::ostringstream seq; seq << ">left_adapter\\n" diff -Nru pbbam-1.7.0+dfsg/src/Compare.cpp pbbam-2.0.0+dfsg/src/Compare.cpp --- pbbam-1.7.0+dfsg/src/Compare.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/Compare.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,11 +2,11 @@ #include -#include - #include #include +#include + namespace PacBio { namespace BAM { namespace { diff -Nru pbbam-1.7.0+dfsg/src/CompositeBamReader.cpp pbbam-2.0.0+dfsg/src/CompositeBamReader.cpp --- pbbam-1.7.0+dfsg/src/CompositeBamReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/CompositeBamReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,11 +2,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/Config.cpp pbbam-2.0.0+dfsg/src/Config.cpp --- pbbam-1.7.0+dfsg/src/Config.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/Config.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,14 +2,15 @@ #include -#include -#include -#include +#include -#include #include -#include +#include + +#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/DataSetBaseTypes.cpp pbbam-2.0.0+dfsg/src/DataSetBaseTypes.cpp --- pbbam-1.7.0+dfsg/src/DataSetBaseTypes.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/DataSetBaseTypes.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,19 +2,27 @@ #include -#include - -#include - #include - #include "DataSetUtils.h" #include "TimeUtils.h" +#include +#include +#include + +#include + namespace PacBio { namespace BAM { namespace internal { +std::string GenerateUuid() +{ + static boost::uuids::random_generator gen; + const boost::uuids::uuid uuid = gen(); + return boost::uuids::to_string(uuid); +} + // ---------------- // BaseEntityType // ---------------- diff -Nru pbbam-1.7.0+dfsg/src/DataSet.cpp pbbam-2.0.0+dfsg/src/DataSet.cpp --- pbbam-1.7.0+dfsg/src/DataSet.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/DataSet.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,8 +2,19 @@ #include +#include +#include "DataSetIO.h" +#include "DataSetUtils.h" +#include "FileUtils.h" +#include "TimeUtils.h" + +#include +#include +#include + #include #include +#include #include #include #include @@ -11,18 +22,6 @@ #include #include -#include -#include -#include -#include - -#include - -#include "DataSetIO.h" -#include "DataSetUtils.h" -#include "FileUtils.h" -#include "TimeUtils.h" - namespace PacBio { namespace BAM { namespace { @@ -316,7 +315,7 @@ for (const auto& xmlFilter : Filters()) { ++numFilters; - boost::optional contigName; + std::optional contigName; intT intersectedInterval{intInterval{0, std::numeric_limits::max()}}; @@ -538,6 +537,19 @@ return *this; } +const BAM::SupplementalResources& DataSet::SupplementalResources() const +{ + return d_->SupplementalResources(); +} + +BAM::SupplementalResources& DataSet::SupplementalResources() { return d_->SupplementalResources(); } + +DataSet& DataSet::SupplementalResources(const BAM::SupplementalResources& resources) +{ + d_->SupplementalResources(resources); + return *this; +} + const std::string& DataSet::Tags() const { return d_->Tags(); } std::string& DataSet::Tags() { return d_->Tags(); } @@ -566,7 +578,7 @@ return *this; } -std::string DataSet::TypeName() const { return d_->LocalNameLabel().to_string(); } +std::string DataSet::TypeName() const { return std::string{d_->LocalNameLabel()}; } std::string DataSet::TypeToName(const DataSet::TypeEnum& type) { diff -Nru pbbam-1.7.0+dfsg/src/DataSetIO.cpp pbbam-2.0.0+dfsg/src/DataSetIO.cpp --- pbbam-1.7.0+dfsg/src/DataSetIO.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/DataSetIO.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,8 +2,14 @@ #include "DataSetIO.h" -#include -#include +#include +#include "ErrnoReason.h" +#include "FileUtils.h" +#include "FofnReader.h" +#include "XmlReader.h" +#include "XmlWriter.h" + +#include #include #include @@ -12,15 +18,8 @@ #include #include -#include - -#include - -#include "ErrnoReason.h" -#include "FileUtils.h" -#include "FofnReader.h" -#include "XmlReader.h" -#include "XmlWriter.h" +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/DataSetIO.h pbbam-2.0.0+dfsg/src/DataSetIO.h --- pbbam-1.7.0+dfsg/src/DataSetIO.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/DataSetIO.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include +#include + #include #include #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/DataSetTypes.cpp pbbam-2.0.0+dfsg/src/DataSetTypes.cpp --- pbbam-1.7.0+dfsg/src/DataSetTypes.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/DataSetTypes.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,20 +2,19 @@ #include -#include -#include +#include +#include "DataSetIO.h" +#include "DataSetUtils.h" +#include "FileUtils.h" +#include "TimeUtils.h" #include #include #include #include -#include - -#include "DataSetIO.h" -#include "DataSetUtils.h" -#include "FileUtils.h" -#include "TimeUtils.h" +#include +#include namespace { @@ -49,6 +48,7 @@ {"Properties", ElementType::PROPERTIES}, {"Provenance", ElementType::PROVENANCE}, {"SequencingKitPlate", ElementType::SEQUENCING_KIT_PLATE}, + {"SupplementalResources", ElementType::SUPPLEMENTAL_RESOURCES}, {"TemplatePrepKit", ElementType::TEMPLATE_PREP_KIT}, {"AlignmentSet", ElementType::ALIGNMENT_SET}, @@ -316,6 +316,26 @@ return *this; } +const BAM::SupplementalResources& DataSetBase::SupplementalResources() const +{ + return Child("SupplementalResources"); +} + +BAM::SupplementalResources& DataSetBase::SupplementalResources() +{ + if (!HasChild("SupplementalResources")) { + AddChild(BAM::SupplementalResources()); + } + auto& c = Child("SupplementalResources"); + return c; +} + +DataSetBase& DataSetBase::SupplementalResources(const BAM::SupplementalResources& resources) +{ + SupplementalResources() = resources; + return *this; +} + DataSetBase* DataSetBase::DeepCopy() const { auto* copyDataset = new DataSetElement(*this); @@ -337,6 +357,9 @@ ExternalResources() += other.ExternalResources(); Filters() += other.Filters(); SubDataSets() += other; + if (HasChild("SupplementalResources") || other.HasChild("SupplementalResources")) { + SupplementalResources() += other.SupplementalResources(); + } return *this; } @@ -1218,6 +1241,96 @@ { } +// --------------------- +// SupplentalResources +// --------------------- + +SupplementalResources::SupplementalResources() + : DataSetElement("SupplementalResources", XsdType::BASE_DATA_MODEL) +{ +} + +SupplementalResources::SupplementalResources(const internal::FromInputXml& fromInputXml) + : DataSetElement("", fromInputXml, XsdType::BASE_DATA_MODEL) +{ +} + +SupplementalResources& SupplementalResources::operator+=(const SupplementalResources& other) +{ + // only keep unique resource ids + std::set myResourceIds; + for (size_t i = 0; i < NumChildren(); ++i) { + const ExternalResource& resource = this->operator[](i); + myResourceIds.insert(resource.ResourceId()); + } + + std::vector newResourceIndices; + const size_t numOtherResourceIds = other.Size(); + for (size_t i = 0; i < numOtherResourceIds; ++i) { + const std::string& resourceId = other[i].ResourceId(); + auto found = myResourceIds.find(resourceId); + if (found == myResourceIds.cend()) { + newResourceIndices.push_back(i); + } + } + + for (size_t index : newResourceIndices) { + Add(other[index]); + } + + return *this; +} + +void SupplementalResources::Add(const ExternalResource& ext) +{ + // disallow external resources w/ duplicate ResourceIds + std::set myResourceIds; + for (size_t i = 0; i < NumChildren(); ++i) { + const ExternalResource& resource = this->operator[](i); + myResourceIds.insert(resource.ResourceId()); + } + + if (myResourceIds.find(ext.ResourceId()) == myResourceIds.cend()) { + AddChild(ext); + } +} + +void SupplementalResources::Remove(const ExternalResource& ext) { RemoveChild(ext); } + +SupplementalResources::iterator_type SupplementalResources::begin() +{ + return SupplementalResources::iterator_type(this, 0); +} + +SupplementalResources::const_iterator_type SupplementalResources::begin() const { return cbegin(); } + +SupplementalResources::const_iterator_type SupplementalResources::cbegin() const +{ + return SupplementalResources::const_iterator_type(this, 0); +} + +SupplementalResources::iterator_type SupplementalResources::end() +{ + return SupplementalResources::iterator_type(this, NumChildren()); +} + +SupplementalResources::const_iterator_type SupplementalResources::end() const { return cend(); } + +SupplementalResources::const_iterator_type SupplementalResources::cend() const +{ + return SupplementalResources::const_iterator_type(this, NumChildren()); +} + +const SupplementalResources::value_type& SupplementalResources::operator[](size_t index) const +{ + return dynamic_cast(*(children_.at(index).get())); +} + +SupplementalResources::value_type& SupplementalResources::operator[](size_t index) +{ + return dynamic_cast(*(children_.at(index).get())); +} + // ------------------- // TranscriptSet // ------------------- diff -Nru pbbam-1.7.0+dfsg/src/DataSetUtils.h pbbam-2.0.0+dfsg/src/DataSetUtils.h --- pbbam-1.7.0+dfsg/src/DataSetUtils.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/DataSetUtils.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,9 +3,6 @@ #include -#include -#include - #include namespace PacBio { @@ -28,12 +25,7 @@ return empty; } -inline std::string GenerateUuid() -{ - static boost::uuids::random_generator gen; - const boost::uuids::uuid uuid = gen(); - return boost::uuids::to_string(uuid); -} +std::string GenerateUuid(); } // namespace internal } // namespace BAM diff -Nru pbbam-1.7.0+dfsg/src/EntireFileQuery.cpp pbbam-2.0.0+dfsg/src/EntireFileQuery.cpp --- pbbam-1.7.0+dfsg/src/EntireFileQuery.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/EntireFileQuery.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -10,19 +10,23 @@ class EntireFileQuery::EntireFileQueryPrivate { public: - EntireFileQueryPrivate(const DataSet &dataset) : reader_{dataset} {} + EntireFileQueryPrivate(const DataSet& dataset) : reader_{dataset} {} SequentialCompositeBamReader reader_; }; -EntireFileQuery::EntireFileQuery(const DataSet &dataset) +EntireFileQuery::EntireFileQuery(const DataSet& dataset) : internal::IQuery{}, d_(new EntireFileQueryPrivate(dataset)) { } +EntireFileQuery::EntireFileQuery(EntireFileQuery&&) noexcept = default; + +EntireFileQuery& EntireFileQuery::operator=(EntireFileQuery&&) noexcept = default; + EntireFileQuery::~EntireFileQuery() = default; -bool EntireFileQuery::GetNext(BamRecord &r) { return d_->reader_.GetNext(r); } +bool EntireFileQuery::GetNext(BamRecord& r) { return d_->reader_.GetNext(r); } } // namespace BAM } // namespace PacBio diff -Nru pbbam-1.7.0+dfsg/src/ErrnoReason.cpp pbbam-2.0.0+dfsg/src/ErrnoReason.cpp --- pbbam-1.7.0+dfsg/src/ErrnoReason.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ErrnoReason.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,11 +2,11 @@ #include "ErrnoReason.h" +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FaiIndex.cpp pbbam-2.0.0+dfsg/src/FaiIndex.cpp --- pbbam-1.7.0+dfsg/src/FaiIndex.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FaiIndex.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,6 +2,11 @@ #include +#include +#include "ErrnoReason.h" + +#include + #include #include #include @@ -11,12 +16,6 @@ #include #include -#include - -#include - -#include "ErrnoReason.h" - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FaiZmwChunker.cpp pbbam-2.0.0+dfsg/src/FaiZmwChunker.cpp --- pbbam-1.7.0+dfsg/src/FaiZmwChunker.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FaiZmwChunker.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include "FaiZmwChunker.h" -#include - #include #include #include #include +#include + namespace PacBio { namespace BAM { namespace { diff -Nru pbbam-1.7.0+dfsg/src/FaiZmwChunker.h pbbam-2.0.0+dfsg/src/FaiZmwChunker.h --- pbbam-1.7.0+dfsg/src/FaiZmwChunker.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FaiZmwChunker.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { @@ -42,7 +42,7 @@ /// Actual chunk count may be smaller than the requested number, if the input /// size is smaller. /// - FaiZmwChunker(const FaiIndex& index, const size_t numChunks); + FaiZmwChunker(const FaiIndex& index, size_t numChunks); /// /// \brief Construct a new FaiZmwChunker @@ -53,7 +53,7 @@ /// Actual chunk count may be smaller than the requested number, if the input /// size is smaller. /// - FaiZmwChunker(const std::string& filename, const size_t numChunks); + FaiZmwChunker(const std::string& filename, size_t numChunks); FaiZmwChunker(const FaiZmwChunker&); FaiZmwChunker(FaiZmwChunker&&) noexcept; diff -Nru pbbam-1.7.0+dfsg/src/FastaCache.cpp pbbam-2.0.0+dfsg/src/FastaCache.cpp --- pbbam-1.7.0+dfsg/src/FastaCache.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FastaCache.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,11 +2,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FastaReader.cpp pbbam-2.0.0+dfsg/src/FastaReader.cpp --- pbbam-1.7.0+dfsg/src/FastaReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FastaReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,6 +2,9 @@ #include +#include +#include "KSeqReader.h" + #include #include #include @@ -9,10 +12,6 @@ #include #include -#include - -#include "KSeqReader.h" - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FastaSequence.cpp pbbam-2.0.0+dfsg/src/FastaSequence.cpp --- pbbam-1.7.0+dfsg/src/FastaSequence.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FastaSequence.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include -#include +#include #include #include #include -#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FastaSequenceQuery.cpp pbbam-2.0.0+dfsg/src/FastaSequenceQuery.cpp --- pbbam-1.7.0+dfsg/src/FastaSequenceQuery.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FastaSequenceQuery.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -20,6 +20,10 @@ { } +FastaSequenceQuery::FastaSequenceQuery(FastaSequenceQuery&&) noexcept = default; + +FastaSequenceQuery& FastaSequenceQuery::operator=(FastaSequenceQuery&&) noexcept = default; + FastaSequenceQuery::~FastaSequenceQuery() = default; bool FastaSequenceQuery::GetNext(FastaSequence& seq) { return d_->reader_.GetNext(seq); } diff -Nru pbbam-1.7.0+dfsg/src/FastaWriter.cpp pbbam-2.0.0+dfsg/src/FastaWriter.cpp --- pbbam-1.7.0+dfsg/src/FastaWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FastaWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,15 +2,14 @@ #include -#include -#include - #include #include #include - #include "ErrnoReason.h" +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FastqReader.cpp pbbam-2.0.0+dfsg/src/FastqReader.cpp --- pbbam-1.7.0+dfsg/src/FastqReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FastqReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,14 +2,13 @@ #include +#include +#include "KSeqReader.h" + #include #include #include -#include - -#include "KSeqReader.h" - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FastqSequence.cpp pbbam-2.0.0+dfsg/src/FastqSequence.cpp --- pbbam-1.7.0+dfsg/src/FastqSequence.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FastqSequence.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include -#include - #include #include #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FastqWriter.cpp pbbam-2.0.0+dfsg/src/FastqWriter.cpp --- pbbam-1.7.0+dfsg/src/FastqWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FastqWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,15 @@ #include -#include -#include - #include #include #include #include - #include "ErrnoReason.h" +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FileProducer.cpp pbbam-2.0.0+dfsg/src/FileProducer.cpp --- pbbam-1.7.0+dfsg/src/FileProducer.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FileProducer.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,10 +2,10 @@ #include "FileProducer.h" -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FileProducer.h pbbam-2.0.0+dfsg/src/FileProducer.h --- pbbam-1.7.0+dfsg/src/FileProducer.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FileProducer.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,10 +3,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FileUtils.cpp pbbam-2.0.0+dfsg/src/FileUtils.cpp --- pbbam-1.7.0+dfsg/src/FileUtils.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FileUtils.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,19 +2,19 @@ #include "FileUtils.h" -#include -#include +#include -#include -#include +#include #include #include #include -#include +#include +#include -#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FofnReader.h pbbam-2.0.0+dfsg/src/FofnReader.h --- pbbam-1.7.0+dfsg/src/FofnReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FofnReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include +#include + #include #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/FormatUtils.cpp pbbam-2.0.0+dfsg/src/FormatUtils.cpp --- pbbam-1.7.0+dfsg/src/FormatUtils.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/FormatUtils.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,15 +2,14 @@ #include -#include -#include -#include +#include +#include "ErrnoReason.h" #include -#include - -#include "ErrnoReason.h" +#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/GenomicIntervalQuery.cpp pbbam-2.0.0+dfsg/src/GenomicIntervalQuery.cpp --- pbbam-1.7.0+dfsg/src/GenomicIntervalQuery.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/GenomicIntervalQuery.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -49,6 +49,10 @@ { } +GenomicIntervalQuery::GenomicIntervalQuery(GenomicIntervalQuery&&) noexcept = default; + +GenomicIntervalQuery& GenomicIntervalQuery::operator=(GenomicIntervalQuery&&) noexcept = default; + GenomicIntervalQuery::~GenomicIntervalQuery() = default; bool GenomicIntervalQuery::GetNext(BamRecord& r) { return d_->reader_.GetNext(r); } diff -Nru pbbam-1.7.0+dfsg/src/IndexedBamWriter.cpp pbbam-2.0.0+dfsg/src/IndexedBamWriter.cpp --- pbbam-1.7.0+dfsg/src/IndexedBamWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/IndexedBamWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,9 +2,26 @@ #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ErrnoReason.h" +#include "FileProducer.h" +#include "MemoryUtils.h" +#include "PbiBuilderBase.h" -#include +#include + +#include + +#include +#include +#include #include #include @@ -16,25 +33,9 @@ #include #include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include +#include -#include "ErrnoReason.h" -#include "FileProducer.h" -#include "MemoryUtils.h" -#include "PbiBuilderBase.h" +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/IndexedFastaReader.cpp pbbam-2.0.0+dfsg/src/IndexedFastaReader.cpp --- pbbam-1.7.0+dfsg/src/IndexedFastaReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/IndexedFastaReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,24 +2,24 @@ #include -#include - -#include -#include -#include - -#include -#include - #include #include #include #include #include - #include "ErrnoReason.h" #include "SequenceUtils.h" +#include + +#include + +#include +#include +#include + +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/IndexedFastqBgzfReader.cpp pbbam-2.0.0+dfsg/src/IndexedFastqBgzfReader.cpp --- pbbam-1.7.0+dfsg/src/IndexedFastqBgzfReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/IndexedFastqBgzfReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include "IndexedFastqBgzfReader.h" -#include +#include "ErrnoReason.h" #include #include #include -#include "ErrnoReason.h" +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/IndexedFastqBgzfReader.h pbbam-2.0.0+dfsg/src/IndexedFastqBgzfReader.h --- pbbam-1.7.0+dfsg/src/IndexedFastqBgzfReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/IndexedFastqBgzfReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,15 @@ #include +#include #include "IndexedFastqReaderImpl.h" -#include - -#include #include #include -#include +#include + +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/IndexedFastqReader.cpp pbbam-2.0.0+dfsg/src/IndexedFastqReader.cpp --- pbbam-1.7.0+dfsg/src/IndexedFastqReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/IndexedFastqReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,22 +2,21 @@ #include -#include - -#include -#include -#include - #include #include #include #include - #include "IndexedFastqBgzfReader.h" #include "IndexedFastqReaderImpl.h" #include "IndexedFastqTextReader.h" #include "SequenceUtils.h" +#include +#include +#include + +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/IndexedFastqReaderImpl.h pbbam-2.0.0+dfsg/src/IndexedFastqReaderImpl.h --- pbbam-1.7.0+dfsg/src/IndexedFastqReaderImpl.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/IndexedFastqReaderImpl.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include +#include #include #include -#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/IndexedFastqTextReader.cpp pbbam-2.0.0+dfsg/src/IndexedFastqTextReader.cpp --- pbbam-1.7.0+dfsg/src/IndexedFastqTextReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/IndexedFastqTextReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,16 @@ #include "IndexedFastqTextReader.h" -#include - -#include -#include +#include "ErrnoReason.h" #include #include #include -#include "ErrnoReason.h" +#include +#include + +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/IndexedFastqTextReader.h pbbam-2.0.0+dfsg/src/IndexedFastqTextReader.h --- pbbam-1.7.0+dfsg/src/IndexedFastqTextReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/IndexedFastqTextReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,15 @@ #include -#include +#include "IndexedFastqReaderImpl.h" -#include +#include #include -#include -#include "IndexedFastqReaderImpl.h" +#include + +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/KSeqReader.cpp pbbam-2.0.0+dfsg/src/KSeqReader.cpp --- pbbam-1.7.0+dfsg/src/KSeqReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/KSeqReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include "KSeqReader.h" -#include +#include "ErrnoReason.h" #include #include #include -#include "ErrnoReason.h" +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/KSeqReader.h pbbam-2.0.0+dfsg/src/KSeqReader.h --- pbbam-1.7.0+dfsg/src/KSeqReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/KSeqReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include -#include +#include #include #include -#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/LibraryInfo.cpp pbbam-2.0.0+dfsg/src/LibraryInfo.cpp --- pbbam-1.7.0+dfsg/src/LibraryInfo.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/LibraryInfo.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include -#include -#include +#include "LibraryGitHash.h" +#include "LibraryVersion.h" #include -#include "LibraryGitHash.h" -#include "LibraryVersion.h" +#include +#include namespace PacBio { namespace Pbbam { diff -Nru pbbam-1.7.0+dfsg/src/MD5.cpp pbbam-2.0.0+dfsg/src/MD5.cpp --- pbbam-1.7.0+dfsg/src/MD5.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/MD5.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,10 +2,10 @@ #include -#include - #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/MemoryUtils.cpp pbbam-2.0.0+dfsg/src/MemoryUtils.cpp --- pbbam-1.7.0+dfsg/src/MemoryUtils.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/MemoryUtils.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,12 +2,12 @@ #include "MemoryUtils.h" -#include -#include +#include #include -#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/MemoryUtils.h pbbam-2.0.0+dfsg/src/MemoryUtils.h --- pbbam-1.7.0+dfsg/src/MemoryUtils.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/MemoryUtils.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include -#include - #include #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/PbiBuilderBase.h pbbam-2.0.0+dfsg/src/PbiBuilderBase.h --- pbbam-1.7.0+dfsg/src/PbiBuilderBase.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiBuilderBase.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,17 +3,14 @@ #include "PbbamInternalConfig.h" -#include -#include -#include +#include +#include +#include "ErrnoReason.h" +#include "FileProducer.h" +#include "MemoryUtils.h" -#include -#include -#include -#include -#include -#include -#include +#include +#include #include @@ -21,15 +18,17 @@ #include #include -#include -#include - -#include -#include +#include +#include +#include +#include +#include +#include +#include -#include "ErrnoReason.h" -#include "FileProducer.h" -#include "MemoryUtils.h" +#include +#include +#include namespace PacBio { namespace BAM { @@ -155,7 +154,7 @@ explicit PbiReferenceDataBuilder(size_t numReferenceSequences); - bool AddRecord(const BamRecord& record, const int32_t rowNumber); + bool AddRecord(const BamRecord& record, int32_t rowNumber); PbiRawReferenceData Result() const; void WriteData(BGZF* bgzf); @@ -169,7 +168,7 @@ { PbiBuilderBase() = delete; explicit PbiBuilderBase(const std::string& pbiFilename, - const PbiBuilder::CompressionLevel compressionLevel, size_t numThreads, + PbiBuilder::CompressionLevel compressionLevel, size_t numThreads, size_t bufferSize); virtual ~PbiBuilderBase() noexcept; diff -Nru pbbam-1.7.0+dfsg/src/PbiBuilder.cpp pbbam-2.0.0+dfsg/src/PbiBuilder.cpp --- pbbam-1.7.0+dfsg/src/PbiBuilder.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiBuilder.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,35 +2,33 @@ #include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include - #include #include #include #include #include - #include "ErrnoReason.h" #include "MemoryUtils.h" #include "PbiBuilderBase.h" +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + namespace PacBio { namespace BAM { @@ -77,6 +75,10 @@ { } +PbiBuilder::PbiBuilder(PbiBuilder&&) noexcept = default; + +PbiBuilder& PbiBuilder::operator=(PbiBuilder&&) noexcept = default; + PbiBuilder::~PbiBuilder() noexcept = default; void PbiBuilder::AddRecord(const BamRecord& record, const int64_t vOffset) diff -Nru pbbam-1.7.0+dfsg/src/PbiFile.cpp pbbam-2.0.0+dfsg/src/PbiFile.cpp --- pbbam-1.7.0+dfsg/src/PbiFile.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiFile.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include -#include -#include - #include #include #include +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/PbiFilter.cpp pbbam-2.0.0+dfsg/src/PbiFilter.cpp --- pbbam-1.7.0+dfsg/src/PbiFilter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiFilter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,8 +2,13 @@ #include -#include -#include +#include +#include +#include "FileUtils.h" + +#include +#include +#include #include #include @@ -11,14 +16,8 @@ #include #include -#include -#include -#include - -#include -#include - -#include "FileUtils.h" +#include +#include namespace PacBio { namespace BAM { @@ -269,11 +268,11 @@ std::vector rgIds; for (const auto& t : Split(value, ',')) { - rgIds.push_back(static_cast(std::stoul(t))); + rgIds.push_back(std::stoi(t)); } return PbiReadGroupFilter{rgIds, compareType}; } else { - const auto n = static_cast(std::stoul(value)); + const int32_t n = std::stoi(value); return PbiReadGroupFilter{n, compareType}; } } diff -Nru pbbam-1.7.0+dfsg/src/PbiFilterQuery.cpp pbbam-2.0.0+dfsg/src/PbiFilterQuery.cpp --- pbbam-1.7.0+dfsg/src/PbiFilterQuery.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiFilterQuery.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -40,6 +40,10 @@ { } +PbiFilterQuery::PbiFilterQuery(PbiFilterQuery&&) noexcept = default; + +PbiFilterQuery& PbiFilterQuery::operator=(PbiFilterQuery&&) noexcept = default; + PbiFilterQuery::~PbiFilterQuery() = default; bool PbiFilterQuery::GetNext(BamRecord& r) { return d_->reader_.GetNext(r); } diff -Nru pbbam-1.7.0+dfsg/src/PbiFilterTypes.cpp pbbam-2.0.0+dfsg/src/PbiFilterTypes.cpp --- pbbam-1.7.0+dfsg/src/PbiFilterTypes.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiFilterTypes.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,17 +2,18 @@ #include -#include -#include -#include +#include + +#include +#include +#include #include #include -#include -#include - -#include +#include +#include +#include namespace PacBio { namespace BAM { @@ -121,6 +122,7 @@ "Movie name filter can only compare equality or presence in whitelist/blacklist."}; } + // clang-format off for (const auto& movieName : movieNames) { candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "CCS"))); candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "TRANSCRIPT"))); @@ -130,8 +132,17 @@ candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "SCRAP"))); candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "UNKNOWN"))); candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "ZMW"))); + candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "CCS"))); + candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "TRANSCRIPT"))); + candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "POLYMERASE"))); + candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "HQREGION"))); + candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "SUBREAD"))); + candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "SCRAP"))); + candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "UNKNOWN"))); + candidateRgIds_.insert(ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "ZMW"))); movieNames_.insert(movieName); } + // clang-format on } bool PbiMovieNameFilter::Accepts(const PbiRawData& idx, const size_t row) const @@ -156,12 +167,20 @@ std::make_pair(barcodeData.bcForward_.at(i), barcodeData.bcReverse_.at(i)); for (const auto& movieName : movieNames_) { const auto tryBarcodedType = [&](const std::string& readType) { - const int32_t barcodedId = + int32_t barcodedId = ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, readType, barcodes)); if (barcodedId == rgId) { candidateRgIds_.insert(barcodedId); // found combo, save for future lookup return true; } + + barcodedId = + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, readType, barcodes)); + if (barcodedId == rgId) { + candidateRgIds_.insert(barcodedId); // found combo, save for future lookup + return true; + } + return false; }; @@ -220,7 +239,7 @@ bool Accepts(const PbiRawData& idx, const size_t row) const { // lazy-load - if (!lookup_.is_initialized()) { + if (!lookup_) { InitializeLookup(idx); } const auto holeNumber = idx.BasicData().holeNumber_[row]; @@ -257,7 +276,7 @@ int numSubreads_; Compare::Type cmp_; - mutable boost::optional> lookup_; // mutable for lazy-load + mutable std::optional> lookup_; // mutable for lazy-load }; PbiNumSubreadsFilter::PbiNumSubreadsFilter(int numSubreads, const Compare::Type cmp) @@ -270,6 +289,10 @@ { } +PbiNumSubreadsFilter::PbiNumSubreadsFilter(PbiNumSubreadsFilter&&) noexcept = default; + +PbiNumSubreadsFilter& PbiNumSubreadsFilter::operator=(PbiNumSubreadsFilter&&) noexcept = default; + PbiNumSubreadsFilter::~PbiNumSubreadsFilter() = default; bool PbiNumSubreadsFilter::Accepts(const PbiRawData& idx, const size_t row) const @@ -295,7 +318,7 @@ public: using QueryInterval = std::pair; using QueryIntervals = std::set; - using ZmwData = std::unordered_map>; + using ZmwData = std::unordered_map>; using RgIdLookup = std::unordered_map>; PbiQueryNameFilterPrivate(const std::vector& queryNames, @@ -369,11 +392,15 @@ std::vector CandidateRgIds(const std::string& movieName, const RecordType type) { if (type == RecordType::CCS) { - return {ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "CCS"))}; + return { + ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "CCS")), + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "CCS")), + }; } if (type == RecordType::TRANSCRIPT) { - return {ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "TRANSCRIPT"))}; + return {ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "TRANSCRIPT")), + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "TRANSCRIPT"))}; } // we can't know for sure from QNAME alone @@ -382,7 +409,13 @@ ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "SUBREAD")), ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "SCRAP")), ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "UNKNOWN")), - ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "ZMW"))}; + ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "ZMW")), + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "POLYMERASE")), + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "HQREGION")), + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "SUBREAD")), + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "SCRAP")), + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "UNKNOWN")), + ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(movieName, "ZMW"))}; } void HandleName(const std::string& queryName, const RecordType type) @@ -409,7 +442,7 @@ }(); if (IsCcsOrTranscript(type)) { - zmw->emplace(zmwId, boost::optional{}); + zmw->emplace(zmwId, std::optional{}); } else { const auto queryIntervalParts = Split(nameParts.at(2), '_'); @@ -446,8 +479,11 @@ if (rgFound == lookup_.end()) { zmw = std::make_shared(); for (const auto& rg : rgIds) { - assert(lookup_.find(rg) == lookup_.end()); - lookup_.emplace(rg, zmw); + // Extra RG hashes (fixed & legacy) have been calculated as + // candidates, but sometimes these are the same. Only store once. + if (lookup_.find(rg) == lookup_.end()) { + lookup_.emplace(rg, zmw); + } } } else { #ifndef NDEBUG @@ -482,6 +518,10 @@ { } +PbiQueryNameFilter::PbiQueryNameFilter(PbiQueryNameFilter&&) noexcept = default; + +PbiQueryNameFilter& PbiQueryNameFilter::operator=(PbiQueryNameFilter&&) noexcept = default; + PbiQueryNameFilter::~PbiQueryNameFilter() = default; bool PbiQueryNameFilter::Accepts(const PbiRawData& idx, const size_t row) const @@ -494,28 +534,10 @@ PbiReadGroupFilter::PbiReadGroupFilter(const std::vector& rgIds, const Compare::Type cmp) : cmp_{cmp} { - if (cmp_ == Compare::EQUAL) { - cmp_ = Compare::CONTAINS; - } else if (cmp_ == Compare::NOT_EQUAL) { - cmp_ = Compare::NOT_CONTAINS; - } - - if (cmp_ != Compare::CONTAINS && cmp_ != Compare::NOT_CONTAINS) { - throw std::runtime_error{ - "[pbbam] PBI filter ERROR: unsupported compare type (" + Compare::TypeToName(cmp) + - ") for this property. " - "Read group filter can only compare equality or presence in whitelist/blacklist."}; - } - - // Add RG ID & empty filter if not present. The empty filter will work for - // non-barcoded IDs that match the expected number(s). - // - for (const auto& rgId : rgIds) { - const auto found = lookup_.find(rgId); - if (found == lookup_.cend()) { - lookup_.emplace(rgId, boost::none); - } - } + std::vector readGroups{rgIds.size()}; + std::transform(rgIds.cbegin(), rgIds.cend(), readGroups.begin(), + [](const int32_t rgId) { return ReadGroupInfo{ReadGroupInfo::IntToId(rgId)}; }); + AddReadGroups(readGroups); } PbiReadGroupFilter::PbiReadGroupFilter(const int32_t rgId, const Compare::Type cmp) @@ -527,43 +549,7 @@ const Compare::Type cmp) : cmp_{cmp} { - if (cmp_ == Compare::EQUAL) { - cmp_ = Compare::CONTAINS; - } else if (cmp_ == Compare::NOT_EQUAL) { - cmp_ = Compare::NOT_CONTAINS; - } - - if (cmp_ != Compare::CONTAINS && cmp_ != Compare::NOT_CONTAINS) { - throw std::runtime_error{ - "[pbbam] PBI filter ERROR: unsupported compare type (" + Compare::TypeToName(cmp) + - ") for this property. " - "Read group filter can only compare equality or presence in whitelist/blacklist."}; - } - - for (const auto& rg : readGroups) { - // Add RG base ID with no filter if not present. The empty filter will - // work for non-barcoded IDs. We'll add to it if the base read group ID - // also has barcode labels,so that any barcode pair whitelisted for this - // read group filter will be a match. - // - const auto idNum = ReadGroupInfo::IdToInt(rg.BaseId()); - const auto found = lookup_.find(idNum); - if (found == lookup_.cend()) { - lookup_.emplace(idNum, boost::none); - } - - // Maybe add barcodes to base ID - const auto barcodes = rg.Barcodes(); - if (barcodes) { - const auto bcFor = static_cast(barcodes->first); - const auto bcRev = static_cast(barcodes->second); - auto& idBarcodes = lookup_.at(idNum); - if (!idBarcodes) { - idBarcodes = std::vector>{}; - } - idBarcodes->push_back(std::make_pair(bcFor, bcRev)); - } - } + AddReadGroups(readGroups); } PbiReadGroupFilter::PbiReadGroupFilter(const ReadGroupInfo& rg, const Compare::Type cmp) @@ -573,51 +559,98 @@ PbiReadGroupFilter::PbiReadGroupFilter(const std::vector& rgIds, const Compare::Type cmp) + : cmp_{cmp} { - std::vector readGroups; - for (const auto& rgId : rgIds) { - readGroups.push_back(rgId); - } - *this = PbiReadGroupFilter{readGroups, cmp}; + std::vector readGroups{rgIds.size()}; + std::transform(rgIds.cbegin(), rgIds.cend(), readGroups.begin(), + [](const std::string& rgId) { return ReadGroupInfo{rgId}; }); + AddReadGroups(readGroups); } -PbiReadGroupFilter::PbiReadGroupFilter(const std::string& rgId, const Compare::Type cmp) - : PbiReadGroupFilter{ReadGroupInfo{rgId}, cmp} +PbiReadGroupFilter::PbiReadGroupFilter(const std::string& rgId, + const Compare::Type cmp) //: cmp_{cmp} + : PbiReadGroupFilter{std::vector{rgId}, cmp} { } bool PbiReadGroupFilter::Accepts(const PbiRawData& idx, const size_t row) const { - const auto accepted = [this](const PbiRawData& index, const size_t i) { - // Check that read group base ID is found. - const auto rowRgId = index.BasicData().rgId_.at(i); - const auto foundAt = lookup_.find(rowRgId); - if (foundAt == lookup_.cend()) { + const auto DoFiltersMatch = [&](const int32_t rowRgId) { + const auto foundInFilterList = readGroups_.find(rowRgId); + if (foundInFilterList == readGroups_.cend()) { return false; } - // Read group's base ID is found, check for filtered barcodes. - // - // For non-barcoded read groups, the filter is empty. This is - // essentially a no-op for allowing all candidate rows. - // - const auto& barcodes = foundAt->second; - if (!barcodes) { - return true; - } + // matching ID found, check for potential barcode requirements - // Return success on first match, otherwise no match found. - for (const auto& bcPair : *barcodes) { - if (index.BarcodeData().bcForward_.at(i) == bcPair.first && - index.BarcodeData().bcReverse_.at(i) == bcPair.second) { - return true; + if (idx.HasBarcodeData()) { + const int16_t rowBcForward = idx.BarcodeData().bcForward_.at(row); + const int16_t rowBcReverse = idx.BarcodeData().bcReverse_.at(row); + + for (const auto& filterReadGroup : foundInFilterList->second) { + const auto& filterBarcodes = filterReadGroup.Barcodes(); + if (filterBarcodes) { + const int16_t filterBcForward = filterBarcodes->first; + const int16_t filterBcReverse = filterBarcodes->second; + if ((rowBcForward == filterBcForward) && (rowBcReverse == filterBcReverse)) { + // found matching barcodes + return true; + } + } } + + // no read groups in filter match this index row's barcodes + return false; + } else { + for (const auto& filterReadGroup : foundInFilterList->second) { + const auto& filterBarcodes = filterReadGroup.Barcodes(); + if (!filterBarcodes) { + // found a read group that matches ID & does not require a barcode match + return true; + } + } + + // all filter read groups require barcodes, but index does not + // contain any barcode information + return false; } - return false; - }(idx, row); + }; - assert(cmp_ == Compare::CONTAINS || cmp_ == Compare::NOT_CONTAINS); - return (cmp_ == Compare::CONTAINS ? accepted : !accepted); + const int rowRgId = idx.BasicData().rgId_.at(row); + const bool rowMatched = DoFiltersMatch(rowRgId); + const bool lookingForEquality = (cmp_ == Compare::CONTAINS) || (cmp_ == Compare::EQUAL); + return (lookingForEquality ? rowMatched : !rowMatched); +} + +void PbiReadGroupFilter::AddReadGroups(const std::vector& readGroups) +{ + if (cmp_ == Compare::EQUAL) { + cmp_ = Compare::CONTAINS; + } else if (cmp_ == Compare::NOT_EQUAL) { + cmp_ = Compare::NOT_CONTAINS; + } + + if (cmp_ != Compare::CONTAINS && cmp_ != Compare::NOT_CONTAINS) { + throw std::runtime_error{"[pbbam] PBI filter ERROR: unsupported compare type (" + + Compare::TypeToName(cmp_) + + ") for this property. Read group filter can only compare equality " + "or presence in whitelist/blacklist."}; + } + + // + // Ensure we track all potential representations of a read group's ID. + // + // NOTE: Storing the read group object more than once for equivalent IDs is + // allowed here. The matching phase does a linear walk over the read groups + // stored here to determine a match. This does not change the result. + // + for (const auto& rg : readGroups) { + const std::string rgId = rg.Id(); + readGroups_[ReadGroupInfo::IdToInt(rgId)].push_back(rg); + readGroups_[ReadGroupInfo::IdToInt(ReadGroupInfo::GetBaseId(rgId))].push_back(rg); + readGroups_[ReadGroupInfo::IdToInt(MakeReadGroupId(rg))].push_back(rg); + readGroups_[ReadGroupInfo::IdToInt(MakeLegacyReadGroupId(rg))].push_back(rg); + } } // PbiReferenceNameFilter @@ -652,7 +685,7 @@ const BamFile bamFile{bamFilename}; // single-value - if (rnameWhitelist_ == boost::none) { + if (!rnameWhitelist_) { const auto tId = bamFile.ReferenceId(rname_); subFilter_ = PbiReferenceIdFilter{tId, cmp_}; } @@ -660,7 +693,7 @@ // multi-value (whitelist/blacklist) else { std::vector ids; - for (const auto& rname : rnameWhitelist_.get()) { + for (const auto& rname : *rnameWhitelist_) { ids.push_back(bamFile.ReferenceId(rname)); } subFilter_ = PbiReferenceIdFilter{std::move(ids), cmp_}; @@ -712,7 +745,8 @@ } if (cmp_ != Compare::CONTAINS && cmp_ != Compare::NOT_CONTAINS) { throw std::runtime_error{ - "[pbbam] PBI filter ERROR: multi-valued filters (e.g. whitelists) can only check for " + "[pbbam] PBI filter ERROR: multi-valued filters (e.g. whitelists) can only check " + "for " "containment."}; } } diff -Nru pbbam-1.7.0+dfsg/src/PbiIndexedBamReader.cpp pbbam-2.0.0+dfsg/src/PbiIndexedBamReader.cpp --- pbbam-1.7.0+dfsg/src/PbiIndexedBamReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiIndexedBamReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,15 +2,15 @@ #include -#include -#include +#include "ErrnoReason.h" + +#include #include #include -#include - -#include "ErrnoReason.h" +#include +#include namespace PacBio { namespace BAM { @@ -171,6 +171,10 @@ { } +PbiIndexedBamReader::PbiIndexedBamReader(PbiIndexedBamReader&&) noexcept = default; + +PbiIndexedBamReader& PbiIndexedBamReader::operator=(PbiIndexedBamReader&&) noexcept = default; + PbiIndexedBamReader::~PbiIndexedBamReader() = default; const BamFile& PbiIndexedBamReader::File() const { return d_->file_; } diff -Nru pbbam-1.7.0+dfsg/src/PbiIndexIO.cpp pbbam-2.0.0+dfsg/src/PbiIndexIO.cpp --- pbbam-1.7.0+dfsg/src/PbiIndexIO.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiIndexIO.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,24 +2,23 @@ #include "PbiIndexIO.h" -#include - -#include -#include -#include - -#include -#include - -#include - #include #include #include #include - #include "ErrnoReason.h" +#include + +#include + +#include +#include +#include +#include + +#include + namespace PacBio { namespace BAM { namespace { @@ -115,7 +114,7 @@ aggregateData.FileSections(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::BARCODE); // Some GCC configurations give false-positive warnings against using uninitialized - // boost::optional here, hence the 'old-fashioned' bool flag. + // std::optional here, hence the 'old-fashioned' bool flag. bool isSet = false; PbiFile::VersionEnum aggregateVersion = PbiFile::CurrentVersion; const auto compatibleVersion = [&](PbiFile::VersionEnum next) { diff -Nru pbbam-1.7.0+dfsg/src/PbiIndexIO.h pbbam-2.0.0+dfsg/src/PbiIndexIO.h --- pbbam-1.7.0+dfsg/src/PbiIndexIO.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiIndexIO.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,22 +3,22 @@ #include -#include -#include -#include +#include +#include +#include +#include + +#include +#include #include #include #include #include -#include -#include - -#include -#include -#include -#include +#include +#include +#include namespace PacBio { namespace BAM { @@ -32,25 +32,22 @@ static void Save(const PbiRawData& rawData, const std::string& filename); // per-component load - static void LoadBarcodeData(PbiRawBarcodeData& barcodeData, const uint32_t numReads, BGZF* fp); + static void LoadBarcodeData(PbiRawBarcodeData& barcodeData, uint32_t numReads, BGZF* fp); static void LoadHeader(PbiRawData& index, BGZF* fp); - static void LoadMappedData(PbiRawMappedData& mappedData, const uint32_t numReads, BGZF* fp); + static void LoadMappedData(PbiRawMappedData& mappedData, uint32_t numReads, BGZF* fp); static void LoadReferenceData(PbiRawReferenceData& referenceData, BGZF* fp); - static void LoadBasicData(PbiRawBasicData& basicData, const uint32_t numReads, BGZF* fp); + static void LoadBasicData(PbiRawBasicData& basicData, uint32_t numReads, BGZF* fp); // per-data-field load template - static void LoadBgzfVector(BGZF* fp, std::vector& data, const uint32_t numReads); + static void LoadBgzfVector(BGZF* fp, std::vector& data, uint32_t numReads); // per-component write - static void WriteBarcodeData(const PbiRawBarcodeData& barcodeData, const uint32_t numReads, - BGZF* fp); + static void WriteBarcodeData(const PbiRawBarcodeData& barcodeData, uint32_t numReads, BGZF* fp); static void WriteHeader(const PbiRawData& index, BGZF* fp); - static void WriteMappedData(const PbiRawMappedData& mappedData, const uint32_t numReads, - BGZF* fp); + static void WriteMappedData(const PbiRawMappedData& mappedData, uint32_t numReads, BGZF* fp); static void WriteReferenceData(const PbiRawReferenceData& referenceData, BGZF* fp); - static void WriteBasicData(const PbiRawBasicData& subreadData, const uint32_t numReads, - BGZF* fp); + static void WriteBasicData(const PbiRawBasicData& subreadData, uint32_t numReads, BGZF* fp); // per-data-field write template diff -Nru pbbam-1.7.0+dfsg/src/PbiRawData.cpp pbbam-2.0.0+dfsg/src/PbiRawData.cpp --- pbbam-1.7.0+dfsg/src/PbiRawData.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/PbiRawData.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,20 +2,19 @@ #include -#include -#include +#include +#include +#include +#include "PbiIndexIO.h" + +#include #include #include #include -#include - -#include -#include -#include - -#include "PbiIndexIO.h" +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ProgramInfo.cpp pbbam-2.0.0+dfsg/src/ProgramInfo.cpp --- pbbam-1.7.0+dfsg/src/ProgramInfo.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ProgramInfo.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,12 +2,12 @@ #include -#include -#include - #include #include +#include +#include + namespace PacBio { namespace BAM { namespace { diff -Nru pbbam-1.7.0+dfsg/src/Pulse2BaseCache.h pbbam-2.0.0+dfsg/src/Pulse2BaseCache.h --- pbbam-1.7.0+dfsg/src/Pulse2BaseCache.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/Pulse2BaseCache.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,15 @@ #include -#include -#include -#include +#include +#include #include #include -#include -#include +#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ReadGroupInfo.cpp pbbam-2.0.0+dfsg/src/ReadGroupInfo.cpp --- pbbam-1.7.0+dfsg/src/ReadGroupInfo.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ReadGroupInfo.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,10 +2,13 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include "ChemistryTable.h" + +#include +#include #include #include @@ -16,14 +19,10 @@ #include #include -#include -#include - -#include -#include -#include - -#include "ChemistryTable.h" +#include +#include +#include +#include namespace PacBio { namespace BAM { @@ -80,6 +79,10 @@ static const std::string token_BM{"BarcodeMode"}; static const std::string token_BQ{"BarcodeQuality"}; +static const std::string token_ST{"STRAND"}; +static const std::string strand_FWD{"FORWARD"}; +static const std::string strand_REV{"REVERSE"}; + static const std::string codec_RAW{"Frames"}; static const std::string codec_V1{"CodecV1"}; @@ -290,31 +293,43 @@ ReadGroupInfo::ReadGroupInfo(std::string id) : readType_{"UNKNOWN"} { Id(std::move(id)); } ReadGroupInfo::ReadGroupInfo(std::string movieName, std::string readType) - : ReadGroupInfo{std::move(movieName), std::move(readType), PlatformModelType::SEQUEL} + : ReadGroupInfo{ + ReadGroupInfoConfig{std::move(movieName), std::move(readType), PlatformModelType::SEQUEL}} { } ReadGroupInfo::ReadGroupInfo(std::string movieName, std::string readType, std::pair barcodes) - : ReadGroupInfo{std::move(movieName), std::move(readType), PlatformModelType::SEQUEL, - std::move(barcodes)} + : ReadGroupInfo{ReadGroupInfoConfig{std::move(movieName), std::move(readType), + PlatformModelType::SEQUEL, std::move(barcodes)}} { } ReadGroupInfo::ReadGroupInfo(std::string movieName, std::string readType, PlatformModelType platform) - : platformModel_{std::move(platform)} + : ReadGroupInfo{ReadGroupInfoConfig{std::move(movieName), std::move(readType), platform}} { - Id(MakeReadGroupId(movieName, readType)); - movieName_ = std::move(movieName); - readType_ = std::move(readType); } ReadGroupInfo::ReadGroupInfo(std::string movieName, std::string readType, PlatformModelType platform, std::pair barcodes) - : ReadGroupInfo{MakeReadGroupId(movieName, readType), std::move(barcodes)} + : ReadGroupInfo{ReadGroupInfoConfig{std::move(movieName), std::move(readType), platform, + std::move(barcodes)}} { - platformModel_ = std::move(platform); +} + +ReadGroupInfo::ReadGroupInfo(ReadGroupInfoConfig config) + : movieName_{config.MovieName}, readType_{config.ReadType}, strand_{config.Strand} +{ + if (config.Barcodes) { + Id(MakeReadGroupId(config.MovieName, config.ReadType, *config.Barcodes, config.Strand)); + barcodes_ = std::move(config.Barcodes); + } else { + Id(MakeReadGroupId(config.MovieName, config.ReadType, config.Strand)); + } + if (config.Platform) { + platformModel_ = *config.Platform; + } } bool ReadGroupInfo::operator==(const ReadGroupInfo& other) const noexcept @@ -396,25 +411,25 @@ return barcodeQuality_; } -boost::optional ReadGroupInfo::BarcodeForward() const +std::optional ReadGroupInfo::BarcodeForward() const { const auto barcodes = Barcodes(); if (barcodes) { return barcodes->first; } - return boost::make_optional(false, uint16_t{0}); + return {}; } -boost::optional ReadGroupInfo::BarcodeReverse() const +std::optional ReadGroupInfo::BarcodeReverse() const { const auto barcodes = Barcodes(); if (barcodes) { return barcodes->second; } - return boost::make_optional(false, uint16_t{0}); + return {}; } -boost::optional> ReadGroupInfo::Barcodes() const { return barcodes_; } +std::optional> ReadGroupInfo::Barcodes() const { return barcodes_; } std::string ReadGroupInfo::BarcodeSequence() const { @@ -523,6 +538,25 @@ } } +void ReadGroupInfo::DecodeStrand(std::string value) +{ + if (value == strand_FWD) { + strand_ = Data::Strand::FORWARD; + } else { + assert(value == strand_REV); + strand_ = Data::Strand::REVERSE; + } +} + +std::string ReadGroupInfo::EncodeStrand(Data::Strand strand) const +{ + if (strand == Data::Strand::FORWARD) { + return strand_FWD; + } + assert(strand == Data::Strand::REVERSE); + return strand_REV; +} + void ReadGroupInfo::DecodeFrameCodecKey(const std::string& key, std::string value) { const auto keyParts = Split(key, ':'); @@ -580,6 +614,10 @@ } else if (IsLikelyBarcodeKey(key)) { DecodeBarcodeKey(key, std::move(value)); + // strand + } else if (key == token_ST) { + DecodeStrand(std::move(value)); + // frame codecs } else { DecodeFrameCodecKey(key, std::move(value)); @@ -638,6 +676,10 @@ result.append(barcodeData); } + if (strand_) { + result.append(SEP + token_ST + EQ + EncodeStrand(*strand_)); + } + return result; } @@ -954,6 +996,14 @@ return *this; } +std::optional ReadGroupInfo::Strand() const { return strand_; } + +ReadGroupInfo& ReadGroupInfo::Strand(Data::Strand strand) +{ + strand_ = std::move(strand); + return *this; +} + std::string ReadGroupInfo::ToSam(const ReadGroupInfo& rg) { return rg.ToSam(); } std::string ReadGroupInfo::ToSam() const @@ -1004,25 +1054,84 @@ return out.str(); } -std::string MakeReadGroupId(const std::string& movieName, const std::string& readType) +// --------------------------------------------------------- + +std::string MakeReadGroupId(const std::string& movieName, const std::string& readType, + const std::optional strand) +{ + std::string content{movieName + "//" + readType}; + if (strand) { + if (strand == Data::Strand::FORWARD) { + content += "//fwd"; + } else if (strand == Data::Strand::REVERSE) { + content += "//rev"; + } + } + return MD5Hash(content).substr(0, 8); +} + +std::string MakeReadGroupId(const std::string& movieName, const std::string& readType, + const std::string& barcodeString, + const std::optional strand) { - return MD5Hash(movieName + "//" + readType).substr(0, 8); + const std::string baseId{MakeReadGroupId(movieName, readType, strand)}; + return baseId + "/" + barcodeString; } std::string MakeReadGroupId(const std::string& movieName, const std::string& readType, - const std::string& barcodeString) + const std::pair& barcodes, + const std::optional strand) +{ + const std::string barcodeString{std::to_string(barcodes.first) + "--" + + std::to_string(barcodes.second)}; + return MakeReadGroupId(movieName, readType, barcodeString, strand); +} + +std::string MakeReadGroupId(const ReadGroupInfo& readGroup) +{ + const auto barcodes = readGroup.Barcodes(); + if (barcodes) { + const int16_t bcFor = barcodes->first; + const int16_t bcRev = barcodes->second; + return MakeReadGroupId(readGroup.MovieName(), readGroup.ReadType(), + std::make_pair(bcFor, bcRev), readGroup.Strand()); + } else { + return MakeReadGroupId(readGroup.MovieName(), readGroup.ReadType(), readGroup.Strand()); + } +} + +std::string MakeLegacyReadGroupId(const std::string& movieName, const std::string& readType) +{ + return MD5Hash(movieName + "//" + readType).substr(0, 8); +} + +std::string MakeLegacyReadGroupId(const std::string& movieName, const std::string& readType, + const std::string& barcodeString) { const std::string baseId{ MD5Hash(movieName + "//" + readType + "//" + barcodeString).substr(0, 8)}; return baseId + "/" + barcodeString; } -std::string MakeReadGroupId(const std::string& movieName, const std::string& readType, - const std::pair& barcodes) +std::string MakeLegacyReadGroupId(const std::string& movieName, const std::string& readType, + const std::pair& barcodes) { const std::string barcodeString{std::to_string(barcodes.first) + "--" + std::to_string(barcodes.second)}; - return MakeReadGroupId(movieName, readType, barcodeString); + return MakeLegacyReadGroupId(movieName, readType, barcodeString); +} + +std::string MakeLegacyReadGroupId(const ReadGroupInfo& readGroup) +{ + const auto barcodes = readGroup.Barcodes(); + if (barcodes) { + const int16_t bcFor = barcodes->first; + const int16_t bcRev = barcodes->second; + return MakeLegacyReadGroupId(readGroup.MovieName(), readGroup.ReadType(), + std::make_pair(bcFor, bcRev)); + } else { + return MakeLegacyReadGroupId(readGroup.MovieName(), readGroup.ReadType()); + } } } // namespace BAM diff -Nru pbbam-1.7.0+dfsg/src/RunMetadataParser.cpp pbbam-2.0.0+dfsg/src/RunMetadataParser.cpp --- pbbam-1.7.0+dfsg/src/RunMetadataParser.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/RunMetadataParser.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include "RunMetadataParser.h" -#include -#include -#include +#include #include -#include +#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/RunMetadataParser.h pbbam-2.0.0+dfsg/src/RunMetadataParser.h --- pbbam-1.7.0+dfsg/src/RunMetadataParser.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/RunMetadataParser.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,13 @@ #include +#include +#include "pugixml/pugixml.hpp" + #include #include #include -#include - -#include "pugixml/pugixml.hpp" - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/SamReader.cpp pbbam-2.0.0+dfsg/src/SamReader.cpp --- pbbam-1.7.0+dfsg/src/SamReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/SamReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,20 +2,19 @@ #include -#include -#include -#include - -#include -#include +#include +#include "MemoryUtils.h" #include #include #include -#include +#include +#include -#include "MemoryUtils.h" +#include +#include +#include namespace PacBio { namespace BAM { @@ -70,6 +69,10 @@ { } +SamReader::SamReader(SamReader&&) noexcept = default; + +SamReader& SamReader::operator=(SamReader&&) noexcept = default; + SamReader::~SamReader() = default; const std::string& SamReader::Filename() const { return d_->filename_; } diff -Nru pbbam-1.7.0+dfsg/src/SamTagCodec.cpp pbbam-2.0.0+dfsg/src/SamTagCodec.cpp --- pbbam-1.7.0+dfsg/src/SamTagCodec.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/SamTagCodec.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,15 +2,15 @@ #include -#include +#include + +#include #include #include #include -#include - -#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/SamWriter.cpp pbbam-2.0.0+dfsg/src/SamWriter.cpp --- pbbam-1.7.0+dfsg/src/SamWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/SamWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,23 +2,22 @@ #include -#include -#include -#include -#include -#include - -#include -#include - #include #include - #include "Autovalidate.h" #include "ErrnoReason.h" #include "FileProducer.h" #include "MemoryUtils.h" +#include +#include + +#include +#include +#include +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/SequenceInfo.cpp pbbam-2.0.0+dfsg/src/SequenceInfo.cpp --- pbbam-1.7.0+dfsg/src/SequenceInfo.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/SequenceInfo.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,15 +2,15 @@ #include -#include +#include +#include #include #include #include #include -#include -#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/SequenceUtils.h pbbam-2.0.0+dfsg/src/SequenceUtils.h --- pbbam-1.7.0+dfsg/src/SequenceUtils.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/SequenceUtils.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,14 +3,14 @@ #include -#include -#include +#include #include #include #include -#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/Tag.cpp pbbam-2.0.0+dfsg/src/Tag.cpp --- pbbam-1.7.0+dfsg/src/Tag.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/Tag.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include -#include -#include +#include #include #include -#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/TextFileReader.cpp pbbam-2.0.0+dfsg/src/TextFileReader.cpp --- pbbam-1.7.0+dfsg/src/TextFileReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/TextFileReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,15 @@ #include -#include -#include -#include +#include +#include "ErrnoReason.h" #include #include -#include - -#include "ErrnoReason.h" +#include +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/TextFileWriter.cpp pbbam-2.0.0+dfsg/src/TextFileWriter.cpp --- pbbam-1.7.0+dfsg/src/TextFileWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/TextFileWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,18 +2,17 @@ #include +#include +#include "ErrnoReason.h" +#include "FileProducer.h" + +#include + #include #include #include #include -#include - -#include - -#include "ErrnoReason.h" -#include "FileProducer.h" - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/TimeUtils.h pbbam-2.0.0+dfsg/src/TimeUtils.h --- pbbam-1.7.0+dfsg/src/TimeUtils.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/TimeUtils.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,12 +3,12 @@ #include -#include - #include #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ValidationErrors.cpp pbbam-2.0.0+dfsg/src/ValidationErrors.cpp --- pbbam-1.7.0+dfsg/src/ValidationErrors.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ValidationErrors.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,12 @@ #include -#include +#include +#include "ValidationErrors.h" #include -#include - -#include "ValidationErrors.h" +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ValidationErrors.h pbbam-2.0.0+dfsg/src/ValidationErrors.h --- pbbam-1.7.0+dfsg/src/ValidationErrors.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ValidationErrors.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,13 +3,13 @@ #include -#include - #include #include #include #include +#include + namespace PacBio { namespace BAM { @@ -31,14 +31,13 @@ static const size_t MAX = std::numeric_limits::max(); - explicit ValidationErrors(const size_t maxNumErrors = ValidationErrors::MAX); + explicit ValidationErrors(size_t maxNumErrors = ValidationErrors::MAX); void AddFileError(const std::string& fn, std::string details); void AddReadGroupError(const std::string& rg, std::string details); void AddRecordError(const std::string& name, std::string details); void AddTagLengthError(const std::string& name, const std::string& tagLabel, - const std::string& tagName, const size_t observed, - const size_t expected); + const std::string& tagName, size_t observed, size_t expected); bool IsEmpty() const; size_t MaxNumErrors() const; diff -Nru pbbam-1.7.0+dfsg/src/Validator.cpp pbbam-2.0.0+dfsg/src/Validator.cpp --- pbbam-1.7.0+dfsg/src/Validator.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/Validator.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,25 +2,24 @@ #include -#include - -#include -#include -#include -#include - -#include -#include - #include #include #include #include #include - #include "ValidationErrors.h" #include "Version.h" +#include +#include + +#include +#include +#include +#include + +#include + namespace PacBio { namespace BAM { namespace { diff -Nru pbbam-1.7.0+dfsg/src/vcf/VcfFile.cpp pbbam-2.0.0+dfsg/src/vcf/VcfFile.cpp --- pbbam-1.7.0+dfsg/src/vcf/VcfFile.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/vcf/VcfFile.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,10 +2,10 @@ #include -#include - #include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/src/vcf/VcfFormat.cpp pbbam-2.0.0+dfsg/src/vcf/VcfFormat.cpp --- pbbam-1.7.0+dfsg/src/vcf/VcfFormat.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/vcf/VcfFormat.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,8 +2,11 @@ #include -#include -#include +#include +#include +#include "VcfFormatException.h" + +#include #include #include @@ -11,12 +14,8 @@ #include #include -#include - -#include -#include - -#include "VcfFormatException.h" +#include +#include namespace PacBio { namespace VCF { @@ -113,12 +112,12 @@ << def.Number() << ',' << Tokens::type << '=' << def.Type() << ',' << Tokens::description << '=' << QuotedText(def.Description()); - if (def.Source().is_initialized() && !def.Source().get().empty()) { - text << ',' << Tokens::source << '=' << QuotedText(def.Source().get()); + if (def.Source() && !def.Source()->empty()) { + text << ',' << Tokens::source << '=' << QuotedText(*def.Source()); } - if (def.Version().is_initialized() && !def.Version().get().empty()) { - text << ',' << Tokens::version << '=' << QuotedText(def.Version().get()); + if (def.Version() && !def.Version()->empty()) { + text << ',' << Tokens::version << '=' << QuotedText(*def.Version()); } text << '>'; @@ -534,10 +533,10 @@ { std::ostringstream out; out << field.id; - if (field.value.is_initialized()) { - out << '=' << field.value.get(); - } else if (field.values.is_initialized()) { - out << '=' << BAM::Join(field.values.get(), ','); + if (field.value) { + out << '=' << *field.value; + } else if (field.values) { + out << '=' << BAM::Join(*field.values, ','); } return out.str(); } @@ -559,11 +558,11 @@ if (!firstDataEntry) { result += ':'; } - if (d.value.is_initialized()) { - result += d.value.get(); + if (d.value) { + result += *d.value; } else { - assert(d.values.is_initialized()); - result += BAM::Join(d.values.get(), ','); + assert(d.values); + result += BAM::Join(*d.values, ','); } firstDataEntry = false; } diff -Nru pbbam-1.7.0+dfsg/src/vcf/VcfHeader.cpp pbbam-2.0.0+dfsg/src/vcf/VcfHeader.cpp --- pbbam-1.7.0+dfsg/src/vcf/VcfHeader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/vcf/VcfHeader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,10 +2,10 @@ #include -#include - #include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/src/vcf/VcfHeaderTypes.cpp pbbam-2.0.0+dfsg/src/vcf/VcfHeaderTypes.cpp --- pbbam-1.7.0+dfsg/src/vcf/VcfHeaderTypes.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/vcf/VcfHeaderTypes.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,12 +2,11 @@ #include -#include - #include - #include "VcfFormatException.h" +#include + namespace PacBio { namespace VCF { @@ -169,7 +168,7 @@ const std::string& InfoDefinition::Number() const { return number_; } -const boost::optional& InfoDefinition::Source() const { return source_; } +const std::optional& InfoDefinition::Source() const { return source_; } InfoDefinition& InfoDefinition::Source(std::string s) { @@ -179,7 +178,7 @@ const std::string& InfoDefinition::Type() const { return type_; } -const boost::optional& InfoDefinition::Version() const { return version_; } +const std::optional& InfoDefinition::Version() const { return version_; } InfoDefinition& InfoDefinition::Version(std::string v) { diff -Nru pbbam-1.7.0+dfsg/src/vcf/VcfSort.cpp pbbam-2.0.0+dfsg/src/vcf/VcfSort.cpp --- pbbam-1.7.0+dfsg/src/vcf/VcfSort.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/vcf/VcfSort.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,15 +2,15 @@ #include +#include +#include + #include #include #include #include #include -#include -#include - namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/src/vcf/VcfVariant.cpp pbbam-2.0.0+dfsg/src/vcf/VcfVariant.cpp --- pbbam-1.7.0+dfsg/src/vcf/VcfVariant.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/vcf/VcfVariant.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,15 @@ #include -#include -#include - -#include - #include #include - #include "VcfFormatException.h" +#include + +#include +#include + namespace PacBio { namespace VCF { @@ -88,8 +87,8 @@ return *this; } -const boost::optional& VcfVariant::GenotypeValue(const size_t sampleIndex, - const std::string& id) const +const std::optional& VcfVariant::GenotypeValue(const size_t sampleIndex, + const std::string& id) const { const auto& genotypeField = sampleGenotypes_.at(sampleIndex); const auto genotypeDataIndex = genotypeDataLookup_.at(id); @@ -98,7 +97,7 @@ } VcfVariant& VcfVariant::GenotypeValue(const size_t sampleIndex, const std::string& id, - boost::optional value) + std::optional value) { auto& genotypeField = sampleGenotypes_.at(sampleIndex); const auto genotypeDataIndex = genotypeDataLookup_.at(id); @@ -107,7 +106,7 @@ return *this; } -const boost::optional>& VcfVariant::GenotypeValues( +const std::optional>& VcfVariant::GenotypeValues( const size_t sampleIndex, const std::string& id) const { const auto& genotypeField = sampleGenotypes_.at(sampleIndex); @@ -117,7 +116,7 @@ } VcfVariant& VcfVariant::GenotypeValues(const size_t sampleIndex, const std::string& id, - boost::optional> values) + std::optional> values) { auto& genotypeField = sampleGenotypes_.at(sampleIndex); const auto genotypeDataIndex = genotypeDataLookup_.at(id); @@ -152,24 +151,24 @@ return *this; } -const boost::optional VcfVariant::InfoValue(const std::string& id) const +const std::optional VcfVariant::InfoValue(const std::string& id) const { return infoFields_.at(infoLookup_.at(id)).value; } -VcfVariant& VcfVariant::InfoValue(const std::string& id, boost::optional value) +VcfVariant& VcfVariant::InfoValue(const std::string& id, std::optional value) { infoFields_.at(infoLookup_.at(id)).value = std::move(value); return *this; } -const boost::optional> VcfVariant::InfoValues(const std::string& id) const +const std::optional> VcfVariant::InfoValues(const std::string& id) const { return infoFields_.at(infoLookup_.at(id)).values; } VcfVariant& VcfVariant::InfoValues(const std::string& id, - boost::optional> values) + std::optional> values) { infoFields_.at(infoLookup_.at(id)).values = std::move(values); return *this; @@ -184,24 +183,24 @@ bool VcfVariant::IsSampleHeterozygous(const size_t sampleIndex) const { const auto data = GenotypeValue(sampleIndex, "GT"); - auto fields = BAM::Split(data.get(), '/'); + auto fields = BAM::Split(*data, '/'); if (fields.size() == 1) { - fields = BAM::Split(data.get(), '|'); + fields = BAM::Split(*data, '|'); } if (fields.size() == 2) { return fields.at(0) != fields.at(1); } else { - throw VcfFormatException{"malformed GT field: " + data.get()}; + throw VcfFormatException{"malformed GT field: " + *data}; } } bool VcfVariant::IsSamplePhased(const size_t sampleIndex) const { const auto data = GenotypeValue(sampleIndex, "GT"); - const auto phaseFound = data.get().find('|') != std::string::npos; + const auto phaseFound = data->find('|') != std::string::npos; if (phaseFound) { - assert(data.get().find('/') == std::string::npos); + assert(data->find('/') == std::string::npos); } return phaseFound; } diff -Nru pbbam-1.7.0+dfsg/src/vcf/VcfWriter.cpp pbbam-2.0.0+dfsg/src/vcf/VcfWriter.cpp --- pbbam-1.7.0+dfsg/src/vcf/VcfWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/vcf/VcfWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,15 +2,14 @@ #include -#include -#include - #include #include #include - #include "../FileProducer.h" +#include +#include + namespace PacBio { namespace VCF { diff -Nru pbbam-1.7.0+dfsg/src/Version.cpp pbbam-2.0.0+dfsg/src/Version.cpp --- pbbam-1.7.0+dfsg/src/Version.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/Version.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,16 @@ #include "Version.h" +#include + #include #include #include -#include - namespace PacBio { namespace BAM { -const Version Version::Current = Version(3, 0, 7); +const Version Version::Current = Version(5, 0, 0); const Version Version::Minimum = Version(3, 0, 1); // string must be ".." diff -Nru pbbam-1.7.0+dfsg/src/VirtualStitching.h pbbam-2.0.0+dfsg/src/VirtualStitching.h --- pbbam-1.7.0+dfsg/src/VirtualStitching.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/VirtualStitching.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,20 +3,19 @@ #include +#include + #include +#include #include #include -#include - -#include - namespace PacBio { namespace BAM { using StitchingSources = std::deque>; -inline boost::optional ScrapsFileId(const ExternalResource& resource) +inline std::optional ScrapsFileId(const ExternalResource& resource) { const auto& childResources = resource.ExternalResources(); for (const auto& childResource : childResources) { @@ -26,7 +25,7 @@ return childResource.ResourceId(); } } - return boost::none; + return {}; } inline StitchingSources SourcesFromDataset(const DataSet& dataset) @@ -36,8 +35,8 @@ const ExternalResources& resources = dataset.ExternalResources(); for (const ExternalResource& resource : resources) { - boost::optional primaryId; - boost::optional scrapsId; + std::optional primaryId; + std::optional scrapsId; // if resource is possible "primary" BAM, store & look for associated scraps const auto& metatype = resource.MetaType(); @@ -49,8 +48,8 @@ // if found, resolve paths & store if (primaryId && scrapsId) { - std::string primaryFn = dataset.ResolvePath(primaryId.get()); - std::string scrapsFn = dataset.ResolvePath(scrapsId.get()); + std::string primaryFn = dataset.ResolvePath(*primaryId); + std::string scrapsFn = dataset.ResolvePath(*scrapsId); sources.emplace_back(std::make_pair(primaryFn, scrapsFn)); } } diff -Nru pbbam-1.7.0+dfsg/src/VirtualZmwBamRecord.cpp pbbam-2.0.0+dfsg/src/VirtualZmwBamRecord.cpp --- pbbam-1.7.0+dfsg/src/VirtualZmwBamRecord.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/VirtualZmwBamRecord.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,17 +2,17 @@ #include -#include +#include +#include + +#include #include #include #include #include -#include - -#include -#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/VirtualZmwCompositeReader.h pbbam-2.0.0+dfsg/src/VirtualZmwCompositeReader.h --- pbbam-1.7.0+dfsg/src/VirtualZmwCompositeReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/VirtualZmwCompositeReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,17 +3,16 @@ #include -#include -#include -#include -#include - #include #include - #include "VirtualStitching.h" #include "VirtualZmwReader.h" +#include +#include +#include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/VirtualZmwReader.cpp pbbam-2.0.0+dfsg/src/VirtualZmwReader.cpp --- pbbam-1.7.0+dfsg/src/VirtualZmwReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/VirtualZmwReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,11 +2,11 @@ #include "VirtualZmwReader.h" +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/VirtualZmwReader.h pbbam-2.0.0+dfsg/src/VirtualZmwReader.h --- pbbam-1.7.0+dfsg/src/VirtualZmwReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/VirtualZmwReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,8 +3,6 @@ #include -#include - #include #include #include @@ -12,6 +10,8 @@ #include #include +#include + namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/WhitelistedZmwReadStitcher.cpp pbbam-2.0.0+dfsg/src/WhitelistedZmwReadStitcher.cpp --- pbbam-1.7.0+dfsg/src/WhitelistedZmwReadStitcher.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/WhitelistedZmwReadStitcher.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,14 +2,13 @@ #include -#include +#include +#include "VirtualZmwReader.h" #include #include -#include - -#include "VirtualZmwReader.h" +#include namespace PacBio { namespace BAM { @@ -131,6 +130,12 @@ { } +WhitelistedZmwReadStitcher::WhitelistedZmwReadStitcher(WhitelistedZmwReadStitcher&&) noexcept = + default; + +WhitelistedZmwReadStitcher& WhitelistedZmwReadStitcher::operator=( + WhitelistedZmwReadStitcher&&) noexcept = default; + WhitelistedZmwReadStitcher::~WhitelistedZmwReadStitcher() = default; bool WhitelistedZmwReadStitcher::HasNext() const { return d_->HasNext(); } diff -Nru pbbam-1.7.0+dfsg/src/XmlReader.cpp pbbam-2.0.0+dfsg/src/XmlReader.cpp --- pbbam-1.7.0+dfsg/src/XmlReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/XmlReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,7 +2,8 @@ #include "XmlReader.h" -#include +#include +#include "pugixml/pugixml.hpp" #include #include @@ -10,9 +11,7 @@ #include #include -#include - -#include "pugixml/pugixml.hpp" +#include using DataSetElement = PacBio::BAM::internal::DataSetElement; using FromInputXml = PacBio::BAM::internal::FromInputXml; @@ -123,6 +122,8 @@ return std::make_shared(fromInputXml); case XmlElementType::SEQUENCING_KIT_PLATE: return std::make_shared(fromInputXml); + case XmlElementType::SUPPLEMENTAL_RESOURCES: + return std::make_shared(fromInputXml); case XmlElementType::TEMPLATE_PREP_KIT: return std::make_shared(fromInputXml); diff -Nru pbbam-1.7.0+dfsg/src/XmlReader.h pbbam-2.0.0+dfsg/src/XmlReader.h --- pbbam-1.7.0+dfsg/src/XmlReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/XmlReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/XmlWriter.cpp pbbam-2.0.0+dfsg/src/XmlWriter.cpp --- pbbam-1.7.0+dfsg/src/XmlWriter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/XmlWriter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,17 +2,16 @@ #include "XmlWriter.h" -#include -#include +#include +#include "FileUtils.h" +#include "pugixml/pugixml.hpp" #include #include #include -#include - -#include "FileUtils.h" -#include "pugixml/pugixml.hpp" +#include +#include using DataSetElement = PacBio::BAM::internal::DataSetElement; @@ -35,7 +34,7 @@ if (node.IsVerbatimLabel()) { return node.QualifiedNameLabel(); - } else if (node.LocalNameLabel().to_string() == "Collections") { + } else if (node.LocalNameLabel() == "Collections") { return "Collections"; // otherwise, probably user-generated @@ -45,9 +44,9 @@ static const std::string colon = ":"; auto xsdType = node.Xsd(); if (xsdType == XsdType::NONE) { - xsdType = registry.XsdForElement(node.LocalNameLabel().to_string()); + xsdType = registry.XsdForElement(std::string{node.LocalNameLabel()}); } - return registry.Namespace(xsdType).Name() + colon + node.LocalNameLabel().to_string(); + return registry.Namespace(xsdType).Name() + colon + std::string{node.LocalNameLabel()}; } // otherwise, has prefix - return full name else { diff -Nru pbbam-1.7.0+dfsg/src/XmlWriter.h pbbam-2.0.0+dfsg/src/XmlWriter.h --- pbbam-1.7.0+dfsg/src/XmlWriter.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/XmlWriter.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,11 +3,11 @@ #include +#include + #include #include -#include - namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ZmwChunkedFastaReader.cpp pbbam-2.0.0+dfsg/src/ZmwChunkedFastaReader.cpp --- pbbam-1.7.0+dfsg/src/ZmwChunkedFastaReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwChunkedFastaReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,8 +2,14 @@ #include -#include -#include +#include +#include +#include "MemoryUtils.h" +#include "ZmwChunkedFastxBgzfReader.h" +#include "ZmwChunkedFastxReaderImpl.h" +#include "ZmwChunkedFastxTextReader.h" + +#include #include #include @@ -12,15 +18,8 @@ #include #include -#include - -#include -#include - -#include "MemoryUtils.h" -#include "ZmwChunkedFastxBgzfReader.h" -#include "ZmwChunkedFastxReaderImpl.h" -#include "ZmwChunkedFastxTextReader.h" +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ZmwChunkedFastqReader.cpp pbbam-2.0.0+dfsg/src/ZmwChunkedFastqReader.cpp --- pbbam-1.7.0+dfsg/src/ZmwChunkedFastqReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwChunkedFastqReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,8 +2,14 @@ #include -#include -#include +#include +#include +#include "MemoryUtils.h" +#include "ZmwChunkedFastxBgzfReader.h" +#include "ZmwChunkedFastxReaderImpl.h" +#include "ZmwChunkedFastxTextReader.h" + +#include #include #include @@ -12,15 +18,8 @@ #include #include -#include - -#include -#include - -#include "MemoryUtils.h" -#include "ZmwChunkedFastxBgzfReader.h" -#include "ZmwChunkedFastxReaderImpl.h" -#include "ZmwChunkedFastxTextReader.h" +#include +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ZmwChunkedFastxBgzfReader.cpp pbbam-2.0.0+dfsg/src/ZmwChunkedFastxBgzfReader.cpp --- pbbam-1.7.0+dfsg/src/ZmwChunkedFastxBgzfReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwChunkedFastxBgzfReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,13 +2,13 @@ #include "ZmwChunkedFastxBgzfReader.h" -#include +#include "ErrnoReason.h" #include #include #include -#include "ErrnoReason.h" +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ZmwChunkedFastxBgzfReader.h pbbam-2.0.0+dfsg/src/ZmwChunkedFastxBgzfReader.h --- pbbam-1.7.0+dfsg/src/ZmwChunkedFastxBgzfReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwChunkedFastxBgzfReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -5,11 +5,11 @@ #include "ZmwChunkedFastxReaderImpl.h" -#include +#include #include -#include +#include namespace PacBio { namespace BAM { @@ -17,7 +17,7 @@ class ZmwChunkedFastxBgzfReader final : public ZmwChunkedFastxReaderImpl { public: - ZmwChunkedFastxBgzfReader(std::string filename, const size_t numChunks); + ZmwChunkedFastxBgzfReader(std::string filename, size_t numChunks); void Seek(uint64_t pos) final; FastaSequence ReadNextFasta(bool skipName) final; diff -Nru pbbam-1.7.0+dfsg/src/ZmwChunkedFastxReaderImpl.h pbbam-2.0.0+dfsg/src/ZmwChunkedFastxReaderImpl.h --- pbbam-1.7.0+dfsg/src/ZmwChunkedFastxReaderImpl.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwChunkedFastxReaderImpl.h 2022-01-13 18:20:29.000000000 +0000 @@ -3,15 +3,14 @@ #include -#include -#include - #include #include #include - #include "FaiZmwChunker.h" +#include +#include + namespace PacBio { namespace BAM { @@ -30,7 +29,7 @@ FaiZmwChunker chunker_; protected: - ZmwChunkedFastxReaderImpl(std::string fastxFilename, const size_t numChunks); + ZmwChunkedFastxReaderImpl(std::string fastxFilename, size_t numChunks); }; } // namespace BAM diff -Nru pbbam-1.7.0+dfsg/src/ZmwChunkedFastxTextReader.cpp pbbam-2.0.0+dfsg/src/ZmwChunkedFastxTextReader.cpp --- pbbam-1.7.0+dfsg/src/ZmwChunkedFastxTextReader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwChunkedFastxTextReader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,16 +2,16 @@ #include "ZmwChunkedFastxTextReader.h" -#include - -#include -#include +#include "ErrnoReason.h" #include #include #include -#include "ErrnoReason.h" +#include +#include + +#include namespace PacBio { namespace BAM { diff -Nru pbbam-1.7.0+dfsg/src/ZmwChunkedFastxTextReader.h pbbam-2.0.0+dfsg/src/ZmwChunkedFastxTextReader.h --- pbbam-1.7.0+dfsg/src/ZmwChunkedFastxTextReader.h 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwChunkedFastxTextReader.h 2022-01-13 18:20:29.000000000 +0000 @@ -5,12 +5,13 @@ #include "ZmwChunkedFastxReaderImpl.h" -#include +#include + +#include #include -#include -#include +#include namespace PacBio { namespace BAM { @@ -18,7 +19,7 @@ class ZmwChunkedFastxTextReader final : public ZmwChunkedFastxReaderImpl { public: - ZmwChunkedFastxTextReader(std::string filename, const size_t numChunks); + ZmwChunkedFastxTextReader(std::string filename, size_t numChunks); void Seek(uint64_t pos) final; FastaSequence ReadNextFasta(bool skipName) final; diff -Nru pbbam-1.7.0+dfsg/src/ZmwGroupQuery.cpp pbbam-2.0.0+dfsg/src/ZmwGroupQuery.cpp --- pbbam-1.7.0+dfsg/src/ZmwGroupQuery.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwGroupQuery.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,17 +2,16 @@ #include -#include - -#include -#include - #include #include #include - #include "MemoryUtils.h" +#include +#include + +#include + namespace PacBio { namespace BAM { @@ -239,6 +238,10 @@ { } +ZmwGroupQuery::ZmwGroupQuery(ZmwGroupQuery&&) noexcept = default; + +ZmwGroupQuery& ZmwGroupQuery::operator=(ZmwGroupQuery&&) noexcept = default; + ZmwGroupQuery::~ZmwGroupQuery() = default; bool ZmwGroupQuery::GetNext(std::vector& records) { return d_->GetNext(records); } diff -Nru pbbam-1.7.0+dfsg/src/ZmwQuery.cpp pbbam-2.0.0+dfsg/src/ZmwQuery.cpp --- pbbam-1.7.0+dfsg/src/ZmwQuery.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwQuery.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,11 +2,11 @@ #include -#include - #include #include +#include + namespace PacBio { namespace BAM { @@ -26,6 +26,10 @@ { } +ZmwQuery::ZmwQuery(ZmwQuery&&) noexcept = default; + +ZmwQuery& ZmwQuery::operator=(ZmwQuery&&) noexcept = default; + ZmwQuery::~ZmwQuery() = default; bool ZmwQuery::GetNext(BamRecord& r) { return d_->reader_.GetNext(r); } diff -Nru pbbam-1.7.0+dfsg/src/ZmwReadStitcher.cpp pbbam-2.0.0+dfsg/src/ZmwReadStitcher.cpp --- pbbam-1.7.0+dfsg/src/ZmwReadStitcher.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/src/ZmwReadStitcher.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2,18 +2,17 @@ #include -#include -#include -#include - #include #include #include #include - #include "VirtualStitching.h" #include "VirtualZmwReader.h" +#include +#include +#include + namespace PacBio { namespace BAM { @@ -122,6 +121,10 @@ { } +ZmwReadStitcher::ZmwReadStitcher(ZmwReadStitcher&&) noexcept = default; + +ZmwReadStitcher& ZmwReadStitcher::operator=(ZmwReadStitcher&&) noexcept = default; + ZmwReadStitcher::~ZmwReadStitcher() = default; bool ZmwReadStitcher::HasNext() { return d_->HasNext(); } diff -Nru pbbam-1.7.0+dfsg/tests/data/dataset/supplemental_resource1.consensusreadset.xml pbbam-2.0.0+dfsg/tests/data/dataset/supplemental_resource1.consensusreadset.xml --- pbbam-1.7.0+dfsg/tests/data/dataset/supplemental_resource1.consensusreadset.xml 1970-01-01 00:00:00.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/data/dataset/supplemental_resource1.consensusreadset.xml 2022-01-13 18:20:29.000000000 +0000 @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + 85803 + 18 + + diff -Nru pbbam-1.7.0+dfsg/tests/data/dataset/supplemental_resource2.consensusreadset.xml pbbam-2.0.0+dfsg/tests/data/dataset/supplemental_resource2.consensusreadset.xml --- pbbam-1.7.0+dfsg/tests/data/dataset/supplemental_resource2.consensusreadset.xml 1970-01-01 00:00:00.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/data/dataset/supplemental_resource2.consensusreadset.xml 2022-01-13 18:20:29.000000000 +0000 @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + 85803 + 18 + + diff -Nru pbbam-1.7.0+dfsg/tests/data/dataset/supplemental_resource_empty.consensusreadset.xml pbbam-2.0.0+dfsg/tests/data/dataset/supplemental_resource_empty.consensusreadset.xml --- pbbam-1.7.0+dfsg/tests/data/dataset/supplemental_resource_empty.consensusreadset.xml 1970-01-01 00:00:00.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/data/dataset/supplemental_resource_empty.consensusreadset.xml 2022-01-13 18:20:29.000000000 +0000 @@ -0,0 +1,15 @@ + + + + + + + + + + + + 85803 + 18 + + diff -Nru pbbam-1.7.0+dfsg/tests/data/dataset/supplemental_resource_multiple.consensusreadset.xml pbbam-2.0.0+dfsg/tests/data/dataset/supplemental_resource_multiple.consensusreadset.xml --- pbbam-1.7.0+dfsg/tests/data/dataset/supplemental_resource_multiple.consensusreadset.xml 1970-01-01 00:00:00.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/data/dataset/supplemental_resource_multiple.consensusreadset.xml 2022-01-13 18:20:29.000000000 +0000 @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + 85803 + 18 + + Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_ccs.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_ccs.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_ccs.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_ccs.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_ccs_barcode_199_199.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_ccs_barcode_199_199.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_ccs_barcode_199_199.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_ccs_barcode_199_199.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_ccs_barcode_8_8.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_ccs_barcode_8_8.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_ccs_barcode_8_8.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_ccs_barcode_8_8.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_ccs_barcodes_mixed.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_ccs_barcodes_mixed.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_ccs_barcodes_mixed.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_ccs_barcodes_mixed.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_subreads.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_subreads.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_subreads.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_subreads.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_subreads_barcode_199_199.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_subreads_barcode_199_199.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_subreads_barcode_199_199.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_subreads_barcode_199_199.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_subreads_barcode_8_8.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_subreads_barcode_8_8.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_subreads_barcode_8_8.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_subreads_barcode_8_8.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_subreads_barcodes_mixed.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_subreads_barcodes_mixed.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/new_hash_subreads_barcodes_mixed.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/new_hash_subreads_barcodes_mixed.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_ccs.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_ccs.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_ccs.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_ccs.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_ccs_barcode_199_199.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_ccs_barcode_199_199.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_ccs_barcode_199_199.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_ccs_barcode_199_199.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_ccs_barcode_8_8.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_ccs_barcode_8_8.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_ccs_barcode_8_8.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_ccs_barcode_8_8.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_ccs_barcodes_mixed.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_ccs_barcodes_mixed.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_ccs_barcodes_mixed.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_ccs_barcodes_mixed.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_subreads.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_subreads.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_subreads.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_subreads.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_subreads_barcode_199_199.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_subreads_barcode_199_199.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_subreads_barcode_199_199.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_subreads_barcode_199_199.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_subreads_barcode_8_8.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_subreads_barcode_8_8.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_subreads_barcode_8_8.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_subreads_barcode_8_8.bam.pbi differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_subreads_barcodes_mixed.bam and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_subreads_barcodes_mixed.bam differ Binary files /tmp/tmp18gnfuw7/smUuwbSKr1/pbbam-1.7.0+dfsg/tests/data/read_groups/old_hash_subreads_barcodes_mixed.bam.pbi and /tmp/tmp18gnfuw7/jw0caLwPGU/pbbam-2.0.0+dfsg/tests/data/read_groups/old_hash_subreads_barcodes_mixed.bam.pbi differ diff -Nru pbbam-1.7.0+dfsg/tests/meson.build pbbam-2.0.0+dfsg/tests/meson.build --- pbbam-1.7.0+dfsg/tests/meson.build 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/meson.build 2022-01-13 18:20:29.000000000 +0000 @@ -64,18 +64,18 @@ pbbam_clang_formatter, args : [ '--all'], - workdir : meson.source_root()) + workdir : meson.project_source_root()) test( 'pbbam EOF check', pbbam_eof_check, - workdir : meson.source_root()) + workdir : meson.project_source_root()) test( 'pbbam gtest unittests', pbbam_test, args : [ - '--gtest_output=xml:' + join_paths(meson.build_root(), 'pbbam-gtest-unittests.xml')], + '--gtest_output=xml:' + join_paths(meson.project_build_root(), 'pbbam-gtest-unittests.xml')], env : [ 'ARGS=-V', 'VERBOSE=1']) @@ -84,7 +84,7 @@ 'pbbam zero-byte input check', pbbam_cram_script, args : [ - '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-zero-byte-check.xml'), + '--xunit-file=' + join_paths(meson.project_build_root(), 'pbbam-cram-zero-byte-check.xml'), '--verbose'] + files('src/cram/zero_byte_check.t'), env : [ '__ZERO_BYTE_CHECK_EXE=' + pbbam_zero_byte_check.full_path(), diff -Nru pbbam-1.7.0+dfsg/tests/src/meson.build pbbam-2.0.0+dfsg/tests/src/meson.build --- pbbam-1.7.0+dfsg/tests/src/meson.build 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/meson.build 2022-01-13 18:20:29.000000000 +0000 @@ -46,6 +46,7 @@ 'test_PbiFilterQuery.cpp', 'test_QualityValues.cpp', 'test_Pulse2BaseCache.cpp', + 'test_ReadGroupHashing.cpp', 'test_ReadGroupInfo.cpp', 'test_RunMetadata.cpp', 'test_SamIO.cpp', diff -Nru pbbam-1.7.0+dfsg/tests/src/test_BamRecordClipping.cpp pbbam-2.0.0+dfsg/tests/src/test_BamRecordClipping.cpp --- pbbam-1.7.0+dfsg/tests/src/test_BamRecordClipping.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_BamRecordClipping.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -2396,4 +2396,551 @@ } +TEST(BAM_BamRecordClipping, clips_ccs_kinetics_tags) +{ + auto MakeCcsKineticsRecord = [](const bool forwardEmpty = false, const bool reverseEmpty = false) + { + BamRecordImpl impl; + impl.SetSequenceAndQualities("AACCGTTAGC", + "!#%(+0{} : std::vector{0,10,20,30,40,50,60,70,80,90}); + tags["fp"] = (forwardEmpty ? std::vector{} : std::vector{2,12,22,32,42,52,62,72,82,92}); + tags["ri"] = (reverseEmpty ? std::vector{} : std::vector{4,14,24,34,44,54,64,74,84,94}); + tags["rp"] = (reverseEmpty ? std::vector{} : std::vector{6,16,26,36,46,56,66,76,86,96}); + impl.Tags(tags); + + const auto rg = BamRecordClippingTests::MakeReadGroup(Data::FrameCodec::V1, "movie", "CCS"); + + BamRecord b(std::move(impl)); + b.header_.AddReadGroup(rg); + b.ReadGroup(rg); + return b; + }; + + { // normal clip - 2,7 + auto bamRecord = MakeCcsKineticsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, 2, 7); + EXPECT_EQ(bamRecord.Sequence(), "CCGTT"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "%(+0<"); + + const std::vector expected_fi{20,30,40,50,60}; + const std::vector expected_fp{22,32,42,52,62}; + const std::vector expected_ri{34,44,54,64,74}; + const std::vector expected_rp{36,46,56,66,76}; + + EXPECT_EQ(expected_fi, bamRecord.ForwardIPD().DataRaw()); + EXPECT_EQ(expected_fp, bamRecord.ForwardPulseWidth().DataRaw()); + EXPECT_EQ(expected_ri, bamRecord.ReverseIPD().DataRaw()); + EXPECT_EQ(expected_rp, bamRecord.ReversePulseWidth().DataRaw()); + } + { // normal clip - 1,4 + auto bamRecord = MakeCcsKineticsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, 1, 4); + EXPECT_EQ(bamRecord.Sequence(), "ACC"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "#%("); + + const std::vector expected_fi{10,20,30}; + const std::vector expected_fp{12,22,32}; + const std::vector expected_ri{64,74,84}; + const std::vector expected_rp{66,76,86}; + + EXPECT_EQ(expected_fi, bamRecord.ForwardIPD().DataRaw()); + EXPECT_EQ(expected_fp, bamRecord.ForwardPulseWidth().DataRaw()); + EXPECT_EQ(expected_ri, bamRecord.ReverseIPD().DataRaw()); + EXPECT_EQ(expected_rp, bamRecord.ReversePulseWidth().DataRaw()); + } + { // effectively no clipping + auto bamRecord = MakeCcsKineticsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, 0, 10); + EXPECT_EQ(bamRecord.Sequence(), "AACCGTTAGC"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "!#%(+0 expected_fi{0,10,20,30,40,50,60,70,80,90}; + const std::vector expected_fp{2,12,22,32,42,52,62,72,82,92}; + const std::vector expected_ri{4,14,24,34,44,54,64,74,84,94}; + const std::vector expected_rp{6,16,26,36,46,56,66,76,86,96}; + + EXPECT_EQ(expected_fi, bamRecord.ForwardIPD().DataRaw()); + EXPECT_EQ(expected_fp, bamRecord.ForwardPulseWidth().DataRaw()); + EXPECT_EQ(expected_ri, bamRecord.ReverseIPD().DataRaw()); + EXPECT_EQ(expected_rp, bamRecord.ReversePulseWidth().DataRaw()); + } + { // clip to single base + auto bamRecord = MakeCcsKineticsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, 6, 7); + EXPECT_EQ(bamRecord.Sequence(), "T"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "<"); + + const std::vector expected_fi{60}; + const std::vector expected_fp{62}; + const std::vector expected_ri{34}; + const std::vector expected_rp{36}; + + EXPECT_EQ(expected_fi, bamRecord.ForwardIPD().DataRaw()); + EXPECT_EQ(expected_fp, bamRecord.ForwardPulseWidth().DataRaw()); + EXPECT_EQ(expected_ri, bamRecord.ReverseIPD().DataRaw()); + EXPECT_EQ(expected_rp, bamRecord.ReversePulseWidth().DataRaw()); + } + { // clip to empty + auto bamRecord = MakeCcsKineticsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, 8, 8); + EXPECT_EQ(bamRecord.Sequence(), ""); + EXPECT_EQ(bamRecord.Qualities().Fastq(), ""); + + const std::vector expected_fi; + const std::vector expected_fp; + const std::vector expected_ri; + const std::vector expected_rp; + + EXPECT_EQ(expected_fi, bamRecord.ForwardIPD().DataRaw()); + EXPECT_EQ(expected_fp, bamRecord.ForwardPulseWidth().DataRaw()); + EXPECT_EQ(expected_ri, bamRecord.ReverseIPD().DataRaw()); + EXPECT_EQ(expected_rp, bamRecord.ReversePulseWidth().DataRaw()); + } + + // partially missing tags + { // forward missing + auto bamRecord = MakeCcsKineticsRecord(true, false); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, 2, 7); + EXPECT_EQ(bamRecord.Sequence(), "CCGTT"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "%(+0<"); + + const std::vector expected_fi{}; + const std::vector expected_fp{}; + const std::vector expected_ri{34,44,54,64,74}; + const std::vector expected_rp{36,46,56,66,76}; + + EXPECT_EQ(expected_fi, bamRecord.ForwardIPD().DataRaw()); + EXPECT_EQ(expected_fp, bamRecord.ForwardPulseWidth().DataRaw()); + EXPECT_EQ(expected_ri, bamRecord.ReverseIPD().DataRaw()); + EXPECT_EQ(expected_rp, bamRecord.ReversePulseWidth().DataRaw()); + } + { // reverse missing + auto bamRecord = MakeCcsKineticsRecord(false, true); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, 2, 7); + EXPECT_EQ(bamRecord.Sequence(), "CCGTT"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "%(+0<"); + + const std::vector expected_fi{20,30,40,50,60}; + const std::vector expected_fp{22,32,42,52,62}; + const std::vector expected_ri{}; + const std::vector expected_rp{}; + + EXPECT_EQ(expected_fi, bamRecord.ForwardIPD().DataRaw()); + EXPECT_EQ(expected_fp, bamRecord.ForwardPulseWidth().DataRaw()); + EXPECT_EQ(expected_ri, bamRecord.ReverseIPD().DataRaw()); + EXPECT_EQ(expected_rp, bamRecord.ReversePulseWidth().DataRaw()); + } + { // forward and reverse missing + auto bamRecord = MakeCcsKineticsRecord(true, true); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, 2, 7); + EXPECT_EQ(bamRecord.Sequence(), "CCGTT"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "%(+0<"); + + const std::vector expected_fi{}; + const std::vector expected_fp{}; + const std::vector expected_ri{}; + const std::vector expected_rp{}; + + EXPECT_EQ(expected_fi, bamRecord.ForwardIPD().DataRaw()); + EXPECT_EQ(expected_fp, bamRecord.ForwardPulseWidth().DataRaw()); + EXPECT_EQ(expected_ri, bamRecord.ReverseIPD().DataRaw()); + EXPECT_EQ(expected_rp, bamRecord.ReversePulseWidth().DataRaw()); + } +} + // clang-format on + +constexpr char defaultSequence[] = "ACTCCACGACTCGTCACACTCACGTCTCA"; +constexpr char defaultBasemods[] = "C+m,3,1,4;"; +const std::vector defaultBasemodsQVs = {18, 128, 234}; + +TEST(BAM_BamRecordClipping, clips_basemods_tags) +{ + const auto MakeCcsBasemodsRecord = []( + const std::string& str = defaultSequence, + const std::string& qual = "hNfLpfSlpk59K>22LC'x*2W=*0GWv", + const std::string& basemods = defaultBasemods, + const std::vector& basemodsQVs = defaultBasemodsQVs) { + BamRecordImpl impl; + impl.SetSequenceAndQualities(str, qual); + + // tags + TagCollection tags; + tags["Mm"] = basemods; + tags["Ml"] = basemodsQVs; + impl.Tags(tags); + + const auto rg = BamRecordClippingTests::MakeReadGroup(Data::FrameCodec::V1, "movie", "CCS"); + + BamRecord b(std::move(impl)); + b.header_.AddReadGroup(rg); + b.ReadGroup(rg); + return b; + }; + + { // empty clip, no CpG site - 1,4 + static const char seq[] = "ATTGA"; + static const char basemods[] = "C+m;"; + constexpr int32_t start = 1; + constexpr int32_t end = 4; + + auto bamRecord = MakeCcsBasemodsRecord(seq, "!#a%(", basemods, {}); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "TTG"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "#a%"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = + BamRecord::ClipBasemodsTag(seq, basemods, {}, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 0); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{})); + } + + { // lost all basemods clip (before first CpG site) - 1,5 + constexpr int32_t start = 1; + constexpr int32_t end = 5; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "CTCC"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "NfLp"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 0); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{3, 1, 4})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{18, 128, 234})); + } + + { // lost all basemods clip (between CpG sites) - 13,16 + constexpr int32_t start = 13; + constexpr int32_t end = 16; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "TCA"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), ">22"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{3, 1})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{18, 128})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 0); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{4})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{234})); + } + + { // lost all basemods clip (past last CpG site) - 25,28 + constexpr int32_t start = 25; + constexpr int32_t end = 28; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "CTC"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "0GW"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{3, 1, 4})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{18, 128, 234})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 0); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{})); + } + + { // normal clip, first CpG site, lost some leading Cs - 3,9 + constexpr int32_t start = 3; + constexpr int32_t end = 9; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "CCACGA"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "LpfSlp"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m,2;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{18}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 1); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{2})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{18})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{1, 4})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{128, 234})); + } + + { // normal clip, first CpG site, lost all leading Cs - 6,10 + constexpr int32_t start = 6; + constexpr int32_t end = 10; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "CGAC"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "Slpk"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m,0;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{18}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 3); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{0})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{18})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{1, 4})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{128, 234})); + } + + { // normal clip, middle CpG site - 9,18 + constexpr int32_t start = 9; + constexpr int32_t end = 18; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "CTCGTCACA"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "k59K>22LC"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m,1;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{128}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{3})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{18})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 0); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{1})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{128})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{4})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{234})); + } + + { // normal clip, last CpG site - 12,27 + constexpr int32_t start = 12; + constexpr int32_t end = 27; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "GTCACACTCACGTCT"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "K>22LC'x*2W=*0G"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m,4;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{234}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{3, 1})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{18, 128})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 0); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{4})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{234})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{})); + } + + { // normal clip, first two CpG sites - 4,20 + constexpr int32_t start = 4; + constexpr int32_t end = 20; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "CACGACTCGTCACACT"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "pfSlpk59K>22LC'x"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m,1,1;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{18, 128}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 2); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{1, 1})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{18, 128})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{4})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{234})); + } + + { // normal clip, last two CpG sites - 10,26 + constexpr int32_t start = 10; + constexpr int32_t end = 26; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "TCGTCACACTCACGTC"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "59K>22LC'x*2W=*0"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m,0,4;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{128, 234}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{3})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{18})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 1); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{0, 4})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{128, 234})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{})); + } + + { // cut into last CpG site - 1,23 + constexpr int32_t start = 1; + constexpr int32_t end = 23; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "CTCCACGACTCGTCACACTCAC"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "NfLpfSlpk59K>22LC'x*2W"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m,3,1,4;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{18, 128, 234}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 0); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{3, 1, 4})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{18, 128, 234})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{})); + } + + { // no cut - 0,29 + constexpr int32_t start = 0; + constexpr int32_t end = 29; + + auto bamRecord = MakeCcsBasemodsRecord(); + bamRecord.Clip(ClipType::CLIP_TO_QUERY, start, end); + EXPECT_EQ(bamRecord.Sequence(), "ACTCCACGACTCGTCACACTCACGTCTCA"); + EXPECT_EQ(bamRecord.Qualities().Fastq(), "hNfLpfSlpk59K>22LC'x*2W=*0GWv"); + + const BamRecordImpl impl = bamRecord.Impl(); + const std::string basemodsString{impl.TagValue("Mm").ToString()}; + EXPECT_EQ(basemodsString, "C+m,3,1,4;"); + const std::vector basemodsQVs{impl.TagValue("Ml").ToUInt8Array()}; + const std::vector expectedQvs{18, 128, 234}; + EXPECT_EQ(basemodsQVs, expectedQvs); + + const auto splitBasemods = BamRecord::ClipBasemodsTag( + defaultSequence, defaultBasemods, defaultBasemodsQVs, start, end - start); + + EXPECT_EQ(splitBasemods.LeadingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.LeadingQuals, (std::vector{})); + EXPECT_EQ(splitBasemods.PrefixLostBases, 0); + + EXPECT_EQ(splitBasemods.RetainedSeparatingC, (std::vector{3, 1, 4})); + EXPECT_EQ(splitBasemods.RetainedQuals, (std::vector{18, 128, 234})); + + EXPECT_EQ(splitBasemods.TrailingSeparatingC, (std::vector{})); + EXPECT_EQ(splitBasemods.TrailingQuals, (std::vector{})); + } +} diff -Nru pbbam-1.7.0+dfsg/tests/src/test_DataSetCore.cpp pbbam-2.0.0+dfsg/tests/src/test_DataSetCore.cpp --- pbbam-1.7.0+dfsg/tests/src/test_DataSetCore.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_DataSetCore.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -29,19 +29,19 @@ TEST(BAM_DataSetCore, can_parse_xml_name_parts) { internal::XmlName name{"ns:node_name"}; - EXPECT_EQ(boost::string_ref("ns"), name.Prefix()); - EXPECT_EQ(boost::string_ref("node_name"), name.LocalName()); - EXPECT_EQ(boost::string_ref("ns:node_name"), name.QualifiedName()); + EXPECT_EQ("ns", name.Prefix()); + EXPECT_EQ("node_name", name.LocalName()); + EXPECT_EQ("ns:node_name", name.QualifiedName()); internal::XmlName bareName{"node_name"}; - EXPECT_EQ(boost::string_ref(""), bareName.Prefix()); - EXPECT_EQ(boost::string_ref("node_name"), bareName.LocalName()); - EXPECT_EQ(boost::string_ref("node_name"), bareName.QualifiedName()); + EXPECT_EQ("", bareName.Prefix()); + EXPECT_EQ("node_name", bareName.LocalName()); + EXPECT_EQ("node_name", bareName.QualifiedName()); internal::XmlName leadingColon{":node_name"}; - EXPECT_EQ(boost::string_ref(""), leadingColon.Prefix()); - EXPECT_EQ(boost::string_ref(":node_name"), leadingColon.LocalName()); - EXPECT_EQ(boost::string_ref(":node_name"), leadingColon.QualifiedName()); + EXPECT_EQ("", leadingColon.Prefix()); + EXPECT_EQ(":node_name", leadingColon.LocalName()); + EXPECT_EQ(":node_name", leadingColon.QualifiedName()); } TEST(BAM_DataSetCore, created_with_correct_defaults) @@ -564,3 +564,92 @@ EXPECT_EQ(dataset.BamFilenames().size(), 3); EXPECT_EQ(dataset.Samples(), expected); } + +TEST(BAM_DataSetCore, can_add_supplemental_resources) +{ + DataSet dataset; + EXPECT_EQ(0, dataset.SupplementalResources().Size()); + + ExternalResource resource1{"metatype", "id"}; + resource1.Name("file1"); + + ExternalResource resource2{"metatype", "id2"}; + resource2.Name("file2"); + + dataset.SupplementalResources().Add(resource1); + dataset.SupplementalResources().Add(resource2); + EXPECT_EQ(2, dataset.SupplementalResources().Size()); + + // disallow duplicates (checking on ResourceId) + const ExternalResource duplicateResource{"metatype", "id"}; + dataset.SupplementalResources().Add(duplicateResource); + EXPECT_EQ(2, dataset.SupplementalResources().Size()); + + // direct access + const SupplementalResources& resources = dataset.SupplementalResources(); + ASSERT_EQ(2, resources.Size()); + EXPECT_EQ("file1", resources[0].Name()); + EXPECT_EQ("file2", resources[1].Name()); + + // iterable + size_t i = 0; + for (auto r : resources) { + if (i == 0) { + EXPECT_EQ("file1", r.Name()); + } else { + EXPECT_EQ("file2", r.Name()); + } + ++i; + } +} + +TEST(BAM_DataSetCore, can_edit_supplemental_resources) +{ + DataSet dataset; + + ExternalResource resource{"metatype", "id"}; + resource.Name("file1"); + dataset.SupplementalResources().Add(resource); + + resource.Name("file2").ResourceId("id2"); + dataset.SupplementalResources().Add(resource); + EXPECT_EQ(2, dataset.SupplementalResources().Size()); + + // edit + dataset.SupplementalResources()[0].Name("some new name"); + EXPECT_EQ("some new name", dataset.SupplementalResources()[0].Name()); + EXPECT_EQ("file2", dataset.SupplementalResources()[1].Name()); +} + +TEST(BAM_DataSetCore, can_remove_supplemental_resources) +{ + DataSet dataset; + EXPECT_EQ(0, dataset.SupplementalResources().Size()); + + ExternalResource resource1{"metatype", "id"}; + resource1.Name("file1"); + + ExternalResource resource2{"metatype", "id2"}; + resource2.Name("file2"); + + dataset.SupplementalResources().Add(resource1); + dataset.SupplementalResources().Add(resource2); + EXPECT_EQ(2, dataset.SupplementalResources().Size()); + + // remove + dataset.SupplementalResources().Remove(resource1); + EXPECT_EQ(1, dataset.SupplementalResources().Size()); + + // direct access + const SupplementalResources& resources = dataset.SupplementalResources(); + EXPECT_EQ("file2", resources[0].Name()); + + // iterable + size_t i = 0; + for (auto r : resources) { + if (i == 0) { + EXPECT_EQ("file2", r.Name()); + } + ++i; + } +} diff -Nru pbbam-1.7.0+dfsg/tests/src/test_DataSetIO.cpp pbbam-2.0.0+dfsg/tests/src/test_DataSetIO.cpp --- pbbam-1.7.0+dfsg/tests/src/test_DataSetIO.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_DataSetIO.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -1685,3 +1685,58 @@ boost::iends_with(fn, "fsa")); } } + +TEST(BAM_DataSetIO, can_read_write_supplemental_resources) +{ + DataSet ds1{PbbamTestsConfig::Data_Dir + + "/dataset/supplemental_resource1.consensusreadset.xml"}; + const DataSet ds2{PbbamTestsConfig::Data_Dir + + "/dataset/supplemental_resource2.consensusreadset.xml"}; + + ASSERT_EQ(1, ds1.SupplementalResources().Size()); + EXPECT_EQ("report.txt", ds1.SupplementalResources()[0].ResourceId()); + ASSERT_EQ(1, ds2.SupplementalResources().Size()); + EXPECT_EQ("report2.txt", ds2.SupplementalResources()[0].ResourceId()); + + ds1 += ds2; + ASSERT_EQ(2, ds1.SupplementalResources().Size()); + EXPECT_EQ("report.txt", ds1.SupplementalResources()[0].ResourceId()); + EXPECT_EQ("report2.txt", ds1.SupplementalResources()[1].ResourceId()); + + std::ostringstream out; + ds1.SaveToStream(out); + + const DataSet ds3 = DataSet::FromXml(out.str()); + ASSERT_EQ(2, ds3.SupplementalResources().Size()); + EXPECT_EQ("report.txt", ds3.SupplementalResources()[0].ResourceId()); + EXPECT_EQ("report2.txt", ds3.SupplementalResources()[1].ResourceId()); +} + +TEST(Bam_DataSetIO, can_merge_from_various_supplemental_resource_counts) +{ + DataSet dataset{PbbamTestsConfig::Data_Dir + + "/dataset/supplemental_resource1.consensusreadset.xml"}; + EXPECT_EQ(1, dataset.SupplementalResources().Size()); + + const DataSet singleResoureDataset{PbbamTestsConfig::Data_Dir + + "/dataset/supplemental_resource2.consensusreadset.xml"}; + EXPECT_EQ(1, singleResoureDataset.SupplementalResources().Size()); + + const DataSet noResourceDataset{PbbamTestsConfig::Data_Dir + + "/dataset/supplemental_resource_empty.consensusreadset.xml"}; + EXPECT_EQ(0, noResourceDataset.SupplementalResources().Size()); + + const DataSet mutlipleResourceDataset{ + PbbamTestsConfig::Data_Dir + + "/dataset/supplemental_resource_multiple.consensusreadset.xml"}; + EXPECT_EQ(3, mutlipleResourceDataset.SupplementalResources().Size()); + + dataset += singleResoureDataset; + EXPECT_EQ(2, dataset.SupplementalResources().Size()); + + dataset += noResourceDataset; + EXPECT_EQ(2, dataset.SupplementalResources().Size()); + + dataset += mutlipleResourceDataset; + EXPECT_EQ(5, dataset.SupplementalResources().Size()); +} diff -Nru pbbam-1.7.0+dfsg/tests/src/test_PbiFilter.cpp pbbam-2.0.0+dfsg/tests/src/test_PbiFilter.cpp --- pbbam-1.7.0+dfsg/tests/src/test_PbiFilter.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_PbiFilter.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -22,6 +22,40 @@ static PbiRawData test2Bam_RawIndex() { PbiRawData index; + index.FileSections(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::REFERENCE); + index.NumReads(4); + + PbiRawBasicData& subreadData = index.BasicData(); + subreadData.rgId_ = {-1197849594, -1197849594, -1197849594, -1197849594}; + subreadData.qStart_ = {2114, 2579, 4101, 5615}; + subreadData.qEnd_ = {2531, 4055, 5571, 6237}; + subreadData.holeNumber_ = {14743, 14743, 14743, 14743}; + subreadData.readQual_ = {0.901, 0.601, 0.901, 0.601}; + subreadData.ctxtFlag_ = {0, 1, 2, 3}; + subreadData.fileOffset_ = {35651584, 35655125, 35667128, 35679170}; + + PbiRawMappedData& mappedData = index.MappedData(); + mappedData.tId_ = {0, 0, 0, 0}; + mappedData.tStart_ = {9507, 8453, 8455, 9291}; + mappedData.tEnd_ = {9903, 9902, 9893, 9900}; + mappedData.aStart_ = {2130, 2581, 4102, 5619}; + mappedData.aEnd_ = {2531, 4055, 5560, 6237}; + mappedData.revStrand_ = {0, 1, 0, 1}; + mappedData.mapQV_ = {254, 254, 254, 254}; + mappedData.nM_ = {384, 1411, 1393, 598}; + mappedData.nMM_ = {0, 0, 0, 0}; + + PbiRawReferenceData& referenceData = index.ReferenceData(); + referenceData.entries_.emplace_back(0, 0, 3); + referenceData.entries_.emplace_back(1); + referenceData.entries_.emplace_back(PbiReferenceEntry::UNMAPPED_ID); + + return index; +} + +static PbiRawData test2Bam_RawBarcodedIndex() +{ + PbiRawData index; index.NumReads(4); PbiRawBasicData& subreadData = index.BasicData(); @@ -58,6 +92,7 @@ } static const PbiRawData shared_index = test2Bam_RawIndex(); +static const PbiRawData shared_barcoded_index = test2Bam_RawBarcodedIndex(); static void checkFilterRows(const PbiFilter& filter, const std::vector expectedRows) { @@ -72,6 +107,19 @@ } } +static void checkFilterBarcodedRows(const PbiFilter& filter, const std::vector expectedRows) +{ + if (expectedRows.empty()) { + for (size_t row = 0; row < shared_barcoded_index.NumReads(); ++row) { + EXPECT_FALSE(filter.Accepts(shared_barcoded_index, row)); + } + } else { + for (size_t row : expectedRows) { + EXPECT_TRUE(filter.Accepts(shared_barcoded_index, row)); + } + } +} + static void checkFilterInternals(const PbiFilter& filter, const PbiFilter::CompositionType expectedType, const size_t expectedNumChildren, @@ -412,15 +460,15 @@ { { const auto filter = PbiFilter{PbiBarcodeFilter{17}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } { const auto filter = PbiFilter{PbiBarcodeFilter{18}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } { const auto filter = PbiFilter{PbiBarcodeFilter{0}}; - PbiFilterTests::checkFilterRows(filter, std::vector{0}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{0}); } } @@ -428,20 +476,20 @@ { { const auto filter = PbiFilter{PbiBarcodeForwardFilter{17}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } { const auto filter = PbiFilter{PbiBarcodeForwardFilter{400}}; - PbiFilterTests::checkFilterRows(filter, std::vector{}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{}); } { const auto filter = PbiFilter{PbiBarcodeForwardFilter{{0, 256}}}; - PbiFilterTests::checkFilterRows(filter, std::vector{0, 2}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{0, 2}); } { //blacklist const auto filter = PbiFilter{PbiBarcodeForwardFilter{{0, 256}, Compare::NOT_CONTAINS}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } } @@ -449,11 +497,11 @@ { { const auto filter = PbiFilter{PbiBarcodeQualityFilter{80, Compare::GREATER_THAN_EQUAL}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } { const auto filter = PbiFilter{PbiBarcodeQualityFilter{40, Compare::LESS_THAN}}; - PbiFilterTests::checkFilterRows(filter, std::vector{}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{}); } } @@ -461,20 +509,20 @@ { { const auto filter = PbiFilter{PbiBarcodeReverseFilter{18}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } { const auto filter = PbiFilter{PbiBarcodeReverseFilter{400}}; - PbiFilterTests::checkFilterRows(filter, std::vector{}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{}); } { const auto filter = PbiFilter{PbiBarcodeReverseFilter{{1, 257}}}; - PbiFilterTests::checkFilterRows(filter, std::vector{0, 2}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{0, 2}); } { // blacklist const auto filter = PbiFilter{PbiBarcodeReverseFilter{{1, 257}, Compare::NOT_CONTAINS}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } } @@ -482,15 +530,15 @@ { { const auto filter = PbiFilter{PbiBarcodesFilter{17, 18}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } { const auto filter = PbiFilter{PbiBarcodesFilter{17, 19}}; - PbiFilterTests::checkFilterRows(filter, std::vector{}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{}); } { const auto filter = PbiFilter{PbiBarcodesFilter{std::make_pair(17, 18)}}; - PbiFilterTests::checkFilterRows(filter, std::vector{1, 3}); + PbiFilterTests::checkFilterBarcodedRows(filter, std::vector{1, 3}); } } @@ -995,8 +1043,8 @@ dataset.Filters().Add(filter); const auto generatedFilter = PbiFilter::FromDataSet(dataset); - PbiFilterTests::checkFilterRows(expectedFilter, expectedResults); - PbiFilterTests::checkFilterRows(generatedFilter, expectedResults); + PbiFilterTests::checkFilterBarcodedRows(expectedFilter, expectedResults); + PbiFilterTests::checkFilterBarcodedRows(generatedFilter, expectedResults); }; // single barcode @@ -1059,8 +1107,8 @@ dataset.Filters().Add(filter); const auto generatedFilter = PbiFilter::FromDataSet(dataset); - PbiFilterTests::checkFilterRows(expectedFilter, std::vector{0}); - PbiFilterTests::checkFilterRows(generatedFilter, std::vector{0}); + PbiFilterTests::checkFilterBarcodedRows(expectedFilter, std::vector{0}); + PbiFilterTests::checkFilterBarcodedRows(generatedFilter, std::vector{0}); } { // any adapters or barcodes diff -Nru pbbam-1.7.0+dfsg/tests/src/test_PbiFilterQuery.cpp pbbam-2.0.0+dfsg/tests/src/test_PbiFilterQuery.cpp --- pbbam-1.7.0+dfsg/tests/src/test_PbiFilterQuery.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_PbiFilterQuery.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -375,7 +375,7 @@ TEST(BAM_PbiFilterQuery, can_filter_read_groups_from_xml) { - const BamFile file{PbbamTestsConfig::Data_Dir + "/phi29.bam"}; + const BamFile file{PbbamTestsConfig::Data_Dir + "/transcript.subreads.bam"}; const std::string xmlHeader = R"_XML_( + ResourceId="transcript.subreads.bam"> + ResourceId="transcipt.subreads.bam.pbi"/> @@ -421,16 +421,16 @@ { // equal const std::string xmlProperty = - R"_XML_(\n)_XML_"; + R"_XML_(\n)_XML_"; const std::string xml = xmlHeader + xmlProperty + xmlFooter; const DataSet ds = DataSet::FromXml(xml); const PbiFilterQuery query{PbiFilter::FromDataSet(ds), file}; - EXPECT_EQ(120, query.NumReads()); - EXPECT_EQ(120, std::distance(query.begin(), query.end())); + EXPECT_EQ(4, query.NumReads()); + EXPECT_EQ(4, std::distance(query.begin(), query.end())); } { // not equal const std::string xmlProperty = - R"_XML_(\n)_XML_"; + R"_XML_(\n)_XML_"; const std::string xml = xmlHeader + xmlProperty + xmlFooter; const DataSet ds = DataSet::FromXml(xml); const PbiFilterQuery query{PbiFilter::FromDataSet(ds), file}; @@ -636,15 +636,17 @@ { const BamFile bamFile{PbbamTestsConfig::Data_Dir + std::string{"/barcoded_read_groups.bam"}}; - { // query read group with no barcodes - should catche all, barcoded or not + { + // query read group with no barcodes, should not be mixed in barcoded files + // barcoded BAMs are "all or nothing", per the PBI spec const PbiReadGroupFilter filter{"0d7b28fa"}; PbiFilterQuery query{filter, bamFile}; - EXPECT_EQ(5, query.NumReads()); - EXPECT_EQ(5, std::distance(query.begin(), query.end())); + EXPECT_EQ(0, query.NumReads()); + EXPECT_EQ(0, std::distance(query.begin(), query.end())); } - { // query read group with barcode label - + { + // query read group with barcode label const ReadGroupInfo rg{"0d7b28fa/0--0"}; const PbiReadGroupFilter filter{rg}; @@ -652,8 +654,8 @@ EXPECT_EQ(1, query.NumReads()); EXPECT_EQ(1, std::distance(query.begin(), query.end())); } - { // query multiple read groups with barcode label - + { + // query multiple read groups with barcode label const ReadGroupInfo rg{"0d7b28fa/0--0"}; const ReadGroupInfo rg1{"0d7b28fa/1--0"}; const PbiReadGroupFilter filter{std::vector{rg, rg1}}; diff -Nru pbbam-1.7.0+dfsg/tests/src/test_ReadGroupHashing.cpp pbbam-2.0.0+dfsg/tests/src/test_ReadGroupHashing.cpp --- pbbam-1.7.0+dfsg/tests/src/test_ReadGroupHashing.cpp 1970-01-01 00:00:00.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_ReadGroupHashing.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -0,0 +1,501 @@ +#include + +#include +#include +#include +#include + +#include + +#include +#include + +#include "PbbamTestData.h" + +using namespace PacBio::BAM; + +namespace ReadGroupHashingTests { + +// clang-format off + +// ----------------------------------------------- +// IDs +// +// movie name : m54006_200116_134114 +// +// CCS: +// old : 550216e7 +// old_barcoded (8--8) : c68e726b/8--8 +// old_barcoded (199--199) : 8d2d0124/199--199 +// new : 550216e7 +// new barcoded (8--8) : 550216e7/8--8 +// new barcoded (199--199) : 550216e7/199--199 +// +// SUBREAD: +// old : 0388f94c +// old_barcoded (8--8) : e93f69d9/8--8 +// old_barcoded (199--199) : 9a04acc8/199--199 +// new : 0388f94c +// new barcoded (8--8) : 0388f94c/8--8 +// new barcoded (199--199) : 0388f94c/199--199 +// ----------------------------------------------- + +// +// CCS read groups, using legacy RG ID hash +// +static const std::string ccs_no_barcodes_OLD_HASH_RG{ + "@RG\tID:550216e7\tPL:PACBIO\tDS:READTYPE=CCS;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\t" + "PU:m54006_200116_134114\tPM:SEQUEL" +}; +static const std::string ccs_barcode_8_8_OLD_HASH_RG{ + "@RG\tID:c68e726b/8--8\tPL:PACBIO\t" + "DS:READTYPE=CCS;BINDINGKIT=100-619-300;SEQUENCINGKIT=100-619-400;" + "BASECALLERVERSION=3.0;FRAMERATEHZ=100;BarcodeFile=foo;BarcodeHash=foo;" + "BarcodeCount=2;BarcodeMode=Symmetric;BarcodeQuality=Score\t" + "PU:m54006_200116_134114\tPM:SEQUEL\tBC:8--8" +}; +static const std::string ccs_barcode_199_199_OLD_HASH_RG{ + "@RG\tID:8d2d0124/199--199\tPL:PACBIO\tDS:READTYPE=CCS;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100;BarcodeFile=foo;" + "BarcodeHash=foo;BarcodeCount=2;BarcodeMode=Symmetric;BarcodeQuality=Score\t" + "PU:m54006_200116_134114\tPM:SEQUEL\tBC:199--199" +}; +// +// CCS read groups, using fixed RG ID hash +// +static const std::string ccs_no_barcodes_NEW_HASH_RG{ + "@RG\tID:550216e7\tPL:PACBIO\tDS:READTYPE=CCS;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\t" + "PU:m54006_200116_134114\tPM:SEQUEL" +}; +static const std::string ccs_barcode_8_8_NEW_HASH_RG{ + "@RG\tID:550216e7/8--8\tPL:PACBIO\tDS:READTYPE=CCS;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100;BarcodeFile=foo;" + "BarcodeHash=foo;BarcodeCount=2;BarcodeMode=Symmetric;BarcodeQuality=Score\t" + "PU:m54006_200116_134114\tPM:SEQUEL\tBC:8--8" +}; +static const std::string ccs_barcode_199_199_NEW_HASH_RG{ + "@RG\tID:550216e7/199--199\tPL:PACBIO\tDS:READTYPE=CCS;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100;BarcodeFile=foo;" + "BarcodeHash=foo;BarcodeCount=2;BarcodeMode=Symmetric;BarcodeQuality=Score\t" + "PU:m54006_200116_134114\tPM:SEQUEL\tBC:199--199" +}; +// +// subread read groups, using legacy RG ID hash +// +static const std::string subread_no_barcodes_OLD_HASH_RG{ + "@RG\tID:0388f94c\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\t" + "PU:m54006_200116_134114\tPM:SEQUEL" +}; +static const std::string subread_barcode_8_8_OLD_HASH_RG{ + "@RG\tID:e93f69d9/8--8\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100;BarcodeFile=foo;" + "BarcodeHash=foo;BarcodeCount=2;BarcodeMode=Symmetric;BarcodeQuality=Score\t" + "PU:m54006_200116_134114\tPM:SEQUEL\tBC:8--8" +}; +static const std::string subread_barcode_199_199_OLD_HASH_RG{ + "@RG\tID:9a04acc8/199--199\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100;BarcodeFile=foo;" + "BarcodeHash=foo;BarcodeCount=2;BarcodeMode=Symmetric;BarcodeQuality=Score\t" + "PU:m54006_200116_134114\tPM:SEQUEL\tBC:199--199" +}; +// +// subread read groups, using fixed RG ID hash +// +static const std::string subread_no_barcodes_NEW_HASH_RG{ + "@RG\tID:0388f94c\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100" + "\tPU:m54006_200116_134114\tPM:SEQUEL" +}; +static const std::string subread_barcode_8_8_NEW_HASH_RG{ + "@RG\tID:0388f94c/8--8\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100;BarcodeFile=foo;" + "BarcodeHash=foo;BarcodeCount=2;BarcodeMode=Symmetric;BarcodeQuality=Score\t" + "PU:m54006_200116_134114\tPM:SEQUEL\tBC:8--8" +}; +static const std::string subread_barcode_199_199_NEW_HASH_RG{ + "@RG\tID:0388f94c/199--199\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;" + "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100;BarcodeFile=foo;" + "BarcodeHash=foo;BarcodeCount=2;BarcodeMode=Symmetric;BarcodeQuality=Score\t" + "PU:m54006_200116_134114\tPM:SEQUEL\tBC:199--199" +}; + +static const std::string unrelated_read_group_RG{ + "@RG\tID:ab118ebd\tPL:PACBIO\tDS:READTYPE=CCS;Ipd:CodecV1=ip;PulseWidth:CodecV1=pw;" + "BINDINGKIT=101-490-800;SEQUENCINGKIT=101-490-900;BASECALLERVERSION=5.0.0;" + "FRAMERATEHZ=100.000000\tPU:m64011_190228_190319\tPM:SEQUELII\tCM:S/P3-C1/5.0-8M" +}; + +static const std::string Dir = PbbamTestsConfig::Data_Dir + "/read_groups/"; + +static const std::string ccs_no_barcodes_OLD_HASH_FILE{Dir + "old_hash_ccs.bam"}; // 5 +static const std::string ccs_barcode_8_8_OLD_HASH_FILE{Dir + "old_hash_ccs_barcode_8_8.bam"}; // 4 +static const std::string ccs_barcode_199_199_OLD_HASH_FILE{Dir + "old_hash_ccs_barcode_199_199.bam"}; // 3 +static const std::string ccs_barcodes_mixed_OLD_HASH_FILE{Dir + "old_hash_ccs_barcodes_mixed.bam"}; // 10 + +static const std::string ccs_no_barcodes_NEW_HASH_FILE{Dir + "new_hash_ccs.bam"}; // 5 +static const std::string ccs_barcode_8_8_NEW_HASH_FILE{Dir + "new_hash_ccs_barcode_8_8.bam"}; // 4 +static const std::string ccs_barcode_199_199_NEW_HASH_FILE{Dir + "new_hash_ccs_barcode_199_199.bam"}; // 3 +static const std::string ccs_barcodes_mixed_NEW_HASH_FILE{Dir + "new_hash_ccs_barcodes_mixed.bam"}; // 10 + +static const std::string subread_no_barcodes_OLD_HASH_FILE{Dir + "old_hash_subreads.bam"}; // 3 +static const std::string subread_barcode_8_8_OLD_HASH_FILE{Dir + "old_hash_subreads_barcode_8_8.bam"}; // 2 +static const std::string subread_barcode_199_199_OLD_HASH_FILE{Dir + "old_hash_subreads_barcode_199_199.bam"}; // 1 +static const std::string subread_barcodes_mixed_OLD_HASH_FILE{Dir + "old_hash_subreads_barcodes_mixed.bam"}; // 6 + +static const std::string subread_no_barcodes_NEW_HASH_FILE{Dir + "new_hash_subreads.bam"}; // 3 +static const std::string subread_barcode_8_8_NEW_HASH_FILE{Dir + "new_hash_subreads_barcode_8_8.bam"}; // 2 +static const std::string subread_barcode_199_199_NEW_HASH_FILE{Dir + "new_hash_subreads_barcode_199_199.bam"}; // 1 +static const std::string subread_barcodes_mixed_NEW_HASH_FILE{Dir + "new_hash_subreads_barcodes_mixed.bam"}; // 6 + +// clang-format on + +void CheckReadGroupFilter(const std::map& samReadGroupsCounts, + const std::string& fn) +{ + SCOPED_TRACE(fn); + + for (const auto& samReadGroupCount : samReadGroupsCounts) { + SCOPED_TRACE(samReadGroupCount.first); + const ReadGroupInfo rg = ReadGroupInfo::FromSam(samReadGroupCount.first); + const PbiReadGroupFilter filter{rg}; + PbiFilterQuery query{filter, fn}; + EXPECT_EQ(samReadGroupCount.second, query.NumReads()); + } +} + +} // namespace ReadGroupHashingTests + +// clang-format off + +TEST(BAM_ReadGroupHashing, can_filter_old_bam_with_old_barcode_read_hash) +{ + using namespace ReadGroupHashingTests; + + { + SCOPED_TRACE("file contains barcodes: none"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_OLD_HASH_RG, 5}, + {ccs_barcode_8_8_OLD_HASH_RG, 0}, + {ccs_barcode_199_199_OLD_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_no_barcodes_OLD_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_OLD_HASH_RG, 3}, + {subread_barcode_8_8_OLD_HASH_RG, 0}, + {subread_barcode_199_199_OLD_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_no_barcodes_OLD_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 8--8"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_OLD_HASH_RG, 0}, + {ccs_barcode_8_8_OLD_HASH_RG, 4}, + {ccs_barcode_199_199_OLD_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcode_8_8_OLD_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_OLD_HASH_RG, 0}, + {subread_barcode_8_8_OLD_HASH_RG, 2}, + {subread_barcode_199_199_OLD_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcode_8_8_OLD_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 199--199"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_OLD_HASH_RG, 0}, + {ccs_barcode_8_8_OLD_HASH_RG, 0}, + {ccs_barcode_199_199_OLD_HASH_RG, 3}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcode_199_199_OLD_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_OLD_HASH_RG, 0}, + {subread_barcode_8_8_OLD_HASH_RG, 0}, + {subread_barcode_199_199_OLD_HASH_RG, 1}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcode_199_199_OLD_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 8--8, 199-199"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_OLD_HASH_RG, 0}, + {ccs_barcode_8_8_OLD_HASH_RG, 4}, + {ccs_barcode_199_199_OLD_HASH_RG, 3}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcodes_mixed_OLD_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_OLD_HASH_RG, 0}, + {subread_barcode_8_8_OLD_HASH_RG, 2}, + {subread_barcode_199_199_OLD_HASH_RG, 1}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcodes_mixed_OLD_HASH_FILE); + } +} + +TEST(BAM_ReadGroupHashing, can_filter_old_bam_with_new_barcode_read_hash) +{ + using namespace ReadGroupHashingTests; + + { + SCOPED_TRACE("file contains barcodes: none"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_NEW_HASH_RG, 5}, + {ccs_barcode_8_8_NEW_HASH_RG, 0}, + {ccs_barcode_199_199_NEW_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_no_barcodes_OLD_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_NEW_HASH_RG, 3}, + {subread_barcode_8_8_NEW_HASH_RG, 0}, + {subread_barcode_199_199_NEW_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_no_barcodes_OLD_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 8--8"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_NEW_HASH_RG, 0}, + {ccs_barcode_8_8_NEW_HASH_RG, 4}, + {ccs_barcode_199_199_NEW_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcode_8_8_OLD_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_NEW_HASH_RG, 0}, + {subread_barcode_8_8_NEW_HASH_RG, 2}, + {subread_barcode_199_199_NEW_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcode_8_8_OLD_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 199--199"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_NEW_HASH_RG, 0}, + {ccs_barcode_8_8_NEW_HASH_RG, 0}, + {ccs_barcode_199_199_NEW_HASH_RG, 3}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcode_199_199_OLD_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_NEW_HASH_RG, 0}, + {subread_barcode_8_8_NEW_HASH_RG, 0}, + {subread_barcode_199_199_NEW_HASH_RG, 1}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcode_199_199_OLD_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 8--8, 199-199"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_NEW_HASH_RG, 0}, + {ccs_barcode_8_8_NEW_HASH_RG, 4}, + {ccs_barcode_199_199_NEW_HASH_RG, 3}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcodes_mixed_OLD_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_NEW_HASH_RG, 0}, + {subread_barcode_8_8_NEW_HASH_RG, 2}, + {subread_barcode_199_199_NEW_HASH_RG, 1}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcodes_mixed_OLD_HASH_FILE); + } +} + +TEST(BAM_ReadGroupHashing, can_filter_new_bam_with_old_barcode_read_hash) +{ + using namespace ReadGroupHashingTests; + + { + SCOPED_TRACE("file contains barcodes: none"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_OLD_HASH_RG, 5}, + {ccs_barcode_8_8_OLD_HASH_RG, 0}, + {ccs_barcode_199_199_OLD_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_no_barcodes_NEW_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_OLD_HASH_RG, 3}, + {subread_barcode_8_8_OLD_HASH_RG, 0}, + {subread_barcode_199_199_OLD_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_no_barcodes_NEW_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 8--8"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_OLD_HASH_RG, 0}, + {ccs_barcode_8_8_OLD_HASH_RG, 4}, + {ccs_barcode_199_199_OLD_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcode_8_8_NEW_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_OLD_HASH_RG, 0}, + {subread_barcode_8_8_OLD_HASH_RG, 2}, + {subread_barcode_199_199_OLD_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcode_8_8_NEW_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 199--199"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_OLD_HASH_RG, 0}, + {ccs_barcode_8_8_OLD_HASH_RG, 0}, + {ccs_barcode_199_199_OLD_HASH_RG, 3}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcode_199_199_NEW_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_OLD_HASH_RG, 0}, + {subread_barcode_8_8_OLD_HASH_RG, 0}, + {subread_barcode_199_199_OLD_HASH_RG, 1}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcode_199_199_NEW_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 8--8, 199-199"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_OLD_HASH_RG, 0}, + {ccs_barcode_8_8_OLD_HASH_RG, 4}, + {ccs_barcode_199_199_OLD_HASH_RG, 3}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcodes_mixed_NEW_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_OLD_HASH_RG, 0}, + {subread_barcode_8_8_OLD_HASH_RG, 2}, + {subread_barcode_199_199_OLD_HASH_RG, 1}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcodes_mixed_NEW_HASH_FILE); + } +} + +TEST(BAM_ReadGroupHashing, can_filter_new_bam_with_new_barcode_read_hash) +{ + using namespace ReadGroupHashingTests; + + { + SCOPED_TRACE("file contains barcodes: none"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_NEW_HASH_RG, 5}, + {ccs_barcode_8_8_NEW_HASH_RG, 0}, + {ccs_barcode_199_199_NEW_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_no_barcodes_NEW_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_NEW_HASH_RG, 3}, + {subread_barcode_8_8_NEW_HASH_RG, 0}, + {subread_barcode_199_199_NEW_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_no_barcodes_NEW_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 8--8"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_NEW_HASH_RG, 0}, + {ccs_barcode_8_8_NEW_HASH_RG, 4}, + {ccs_barcode_199_199_NEW_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcode_8_8_NEW_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_NEW_HASH_RG, 0}, + {subread_barcode_8_8_NEW_HASH_RG, 2}, + {subread_barcode_199_199_NEW_HASH_RG, 0}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcode_8_8_NEW_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 199--199"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_NEW_HASH_RG, 0}, + {ccs_barcode_8_8_NEW_HASH_RG, 0}, + {ccs_barcode_199_199_NEW_HASH_RG, 3}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcode_199_199_NEW_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_NEW_HASH_RG, 0}, + {subread_barcode_8_8_NEW_HASH_RG, 0}, + {subread_barcode_199_199_NEW_HASH_RG, 1}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcode_199_199_NEW_HASH_FILE); + } + { + SCOPED_TRACE("file contains barcodes: 8--8, 199-199"); + + const std::map ccsReadGroupCounts{ + {ccs_no_barcodes_NEW_HASH_RG, 0}, + {ccs_barcode_8_8_NEW_HASH_RG, 4}, + {ccs_barcode_199_199_NEW_HASH_RG, 3}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(ccsReadGroupCounts, ccs_barcodes_mixed_NEW_HASH_FILE); + + const std::map subreadReadGroupCounts{ + {subread_no_barcodes_NEW_HASH_RG, 0}, + {subread_barcode_8_8_NEW_HASH_RG, 2}, + {subread_barcode_199_199_NEW_HASH_RG, 1}, + {unrelated_read_group_RG, 0}, + }; + CheckReadGroupFilter(subreadReadGroupCounts, subread_barcodes_mixed_NEW_HASH_FILE); + } +} + +// clang-format on diff -Nru pbbam-1.7.0+dfsg/tests/src/test_ReadGroupInfo.cpp pbbam-2.0.0+dfsg/tests/src/test_ReadGroupInfo.cpp --- pbbam-1.7.0+dfsg/tests/src/test_ReadGroupInfo.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_ReadGroupInfo.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -9,8 +9,10 @@ #include +#include #include #include +#include #include "PbbamTestData.h" @@ -249,8 +251,8 @@ ASSERT_TRUE(barcodes); EXPECT_EQ(0, barcodes->first); EXPECT_EQ(1, barcodes->second); - EXPECT_EQ(0, rg.BarcodeForward().get()); - EXPECT_EQ(1, rg.BarcodeReverse().get()); + EXPECT_EQ(0, *rg.BarcodeForward()); + EXPECT_EQ(1, *rg.BarcodeReverse()); } TEST(BAM_ReadGroupInfo, can_determine_barcodes_from_id_string_and_barcode_pair) @@ -263,8 +265,8 @@ ASSERT_TRUE(barcodes); EXPECT_EQ(0, barcodes->first); EXPECT_EQ(1, barcodes->second); - EXPECT_EQ(0, rg.BarcodeForward().get()); - EXPECT_EQ(1, rg.BarcodeReverse().get()); + EXPECT_EQ(0, *rg.BarcodeForward()); + EXPECT_EQ(1, *rg.BarcodeReverse()); } TEST(BAM_ReadGroupInfo, returns_no_barcodes_from_non_barcoded_id) @@ -275,16 +277,16 @@ EXPECT_EQ("00082ba1", rg.BaseId()); const auto barcodes = rg.Barcodes(); - EXPECT_EQ(boost::none, barcodes); - EXPECT_EQ(boost::none, rg.BarcodeForward()); - EXPECT_EQ(boost::none, rg.BarcodeReverse()); + EXPECT_FALSE(barcodes); + EXPECT_FALSE(rg.BarcodeForward()); + EXPECT_FALSE(rg.BarcodeReverse()); } { // no '/' found const ReadGroupInfo rg{"00082ba1.0--1"}; const auto barcodes = rg.Barcodes(); - EXPECT_EQ(boost::none, barcodes); - EXPECT_EQ(boost::none, rg.BarcodeForward()); - EXPECT_EQ(boost::none, rg.BarcodeReverse()); + EXPECT_FALSE(barcodes); + EXPECT_FALSE(rg.BarcodeForward()); + EXPECT_FALSE(rg.BarcodeReverse()); } } @@ -292,9 +294,9 @@ { const ReadGroupInfo rg{""}; const auto barcodes = rg.Barcodes(); - EXPECT_EQ(boost::none, barcodes); - EXPECT_EQ(boost::none, rg.BarcodeForward()); - EXPECT_EQ(boost::none, rg.BarcodeReverse()); + EXPECT_FALSE(barcodes); + EXPECT_FALSE(rg.BarcodeForward()); + EXPECT_FALSE(rg.BarcodeReverse()); } TEST(BAM_ReadGroupInfo, throws_on_malformatted_barcoded_ids) @@ -306,4 +308,68 @@ EXPECT_THROW(ReadGroupInfo{"00082ba1/A--B"}, std::runtime_error); } +TEST(BAM_ReadGroupInfo, strandness_and_new_rg_ctors) +{ + const std::string movieName{"m64012_210628_093300"}; + const std::string readType{"CCS"}; + const PlatformModelType platform{PlatformModelType::SEQUELII}; + const std::pair barcodes{2,6}; + const PacBio::Data::Strand fwd{PacBio::Data::Strand::FORWARD}; + const PacBio::Data::Strand rev{PacBio::Data::Strand::REVERSE}; + + { + const ReadGroupInfo rg{ReadGroupInfoConfig{movieName, readType}}; + const ReadGroupInfo rgPlatform{ReadGroupInfoConfig{movieName, readType, platform}}; + EXPECT_EQ(rg.Id(), rgPlatform.Id()); + EXPECT_EQ(MD5Hash(movieName + "//" + readType).substr(0, 8), rg.Id()); + } + { + const ReadGroupInfo rg{ReadGroupInfoConfig{movieName, readType, platform, barcodes}}; + EXPECT_EQ(MD5Hash(movieName + "//" + readType).substr(0, 8) + "/2--6", rg.Id()); + } + { + const ReadGroupInfo rg{ReadGroupInfoConfig{movieName, readType, platform, {}, fwd}}; + EXPECT_EQ(fwd, rg.Strand()); + EXPECT_EQ(MD5Hash(movieName + "//" + readType + "//fwd").substr(0, 8), rg.Id()); + } + { + const ReadGroupInfo rg{ReadGroupInfoConfig{movieName, readType, platform, {}, rev}}; + EXPECT_EQ(rev, rg.Strand()); + EXPECT_EQ(MD5Hash(movieName + "//" + readType + "//rev").substr(0, 8), rg.Id()); + } + { + const ReadGroupInfo rg{ReadGroupInfoConfig{movieName, readType, platform, barcodes, fwd}}; + EXPECT_EQ(fwd, rg.Strand()); + EXPECT_EQ(MD5Hash(movieName + "//" + readType + "//fwd").substr(0, 8) + "/2--6", rg.Id()); + } + { + const ReadGroupInfo rg{ReadGroupInfoConfig{movieName, readType, platform, barcodes, rev}}; + EXPECT_EQ(rev, rg.Strand()); + EXPECT_EQ(MD5Hash(movieName + "//" + readType + "//rev").substr(0, 8) + "/2--6", rg.Id()); + + } + { + const ReadGroupInfo rgOrig{ReadGroupInfoConfig{movieName, readType, platform, barcodes, fwd}}; + const ReadGroupInfo rg = ReadGroupInfo::FromSam(rgOrig.ToSam()); + EXPECT_EQ(fwd, rg.Strand()); + EXPECT_EQ(barcodes.first, rg.Barcodes()->first); + EXPECT_EQ(barcodes.second, rg.Barcodes()->second); + EXPECT_EQ(movieName, rg.MovieName()); + EXPECT_EQ(readType, rg.ReadType()); + EXPECT_EQ(platform, rg.PlatformModel()); + EXPECT_EQ(MD5Hash(movieName + "//" + readType + "//fwd").substr(0, 8) + "/2--6", rg.Id()); + } + { + const ReadGroupInfo rgOrig{ReadGroupInfoConfig{movieName, readType, platform, barcodes, rev}}; + const ReadGroupInfo rg = ReadGroupInfo::FromSam(rgOrig.ToSam()); + EXPECT_EQ(rev, rg.Strand()); + EXPECT_EQ(barcodes.first, rg.Barcodes()->first); + EXPECT_EQ(barcodes.second, rg.Barcodes()->second); + EXPECT_EQ(movieName, rg.MovieName()); + EXPECT_EQ(readType, rg.ReadType()); + EXPECT_EQ(platform, rg.PlatformModel()); + EXPECT_EQ(MD5Hash(movieName + "//" + readType + "//rev").substr(0, 8) + "/2--6", rg.Id()); + } +} + // clang-format on diff -Nru pbbam-1.7.0+dfsg/tests/src/test_VcfFormat.cpp pbbam-2.0.0+dfsg/tests/src/test_VcfFormat.cpp --- pbbam-1.7.0+dfsg/tests/src/test_VcfFormat.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_VcfFormat.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -274,8 +274,8 @@ EXPECT_EQ("0", info.Number()); EXPECT_EQ("Flag", info.Type()); EXPECT_EQ("Imprecise structural variant", info.Description()); - EXPECT_FALSE(info.Source().is_initialized()); - EXPECT_FALSE(info.Version().is_initialized()); + EXPECT_FALSE(info.Source()); + EXPECT_FALSE(info.Version()); } TEST(VCF_VcfFormat, parsing_info_definition_throws_on_malformed_info_line) @@ -411,13 +411,13 @@ const auto& sampleGenotype = genotypes.at(0); ASSERT_EQ(4, sampleGenotype.data.size()); - EXPECT_EQ("0/1", sampleGenotype.data.at(0).value.get()); - EXPECT_EQ("2", sampleGenotype.data.at(1).value.get()); - EXPECT_EQ("5", sampleGenotype.data.at(2).value.get()); + EXPECT_EQ("0/1", *sampleGenotype.data.at(0).value); + EXPECT_EQ("2", *sampleGenotype.data.at(1).value); + EXPECT_EQ("5", *sampleGenotype.data.at(2).value); const auto& acData = sampleGenotype.data.at(3); ASSERT_EQ(2, acData.values->size()); EXPECT_EQ("1", acData.values->at(0)); EXPECT_EQ("2", acData.values->at(1)); - // ASSERT_TRUE(sampleGenotype.values.is_initialized()); + // ASSERT_TRUE(sampleGenotype.values); } diff -Nru pbbam-1.7.0+dfsg/tests/src/test_VcfHeader.cpp pbbam-2.0.0+dfsg/tests/src/test_VcfHeader.cpp --- pbbam-1.7.0+dfsg/tests/src/test_VcfHeader.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_VcfHeader.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -94,14 +94,14 @@ { InfoDefinition info{"id", "num", "type", "description"}; - EXPECT_FALSE(info.Source().is_initialized()); - EXPECT_FALSE(info.Version().is_initialized()); + EXPECT_FALSE(info.Source()); + EXPECT_FALSE(info.Version()); info.Source("source"); info.Version("version"); - EXPECT_TRUE(info.Source().is_initialized()); - EXPECT_TRUE(info.Version().is_initialized()); + EXPECT_TRUE(info.Source()); + EXPECT_TRUE(info.Version()); } TEST(VCF_VcfHeader, defaults_to_current_version) diff -Nru pbbam-1.7.0+dfsg/tests/src/test_VcfVariant.cpp pbbam-2.0.0+dfsg/tests/src/test_VcfVariant.cpp --- pbbam-1.7.0+dfsg/tests/src/test_VcfVariant.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tests/src/test_VcfVariant.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -96,8 +96,8 @@ { const VcfVariant v{VcfVariantTests::BasicVariantText}; const auto& value = v.InfoValue("SVTYPE"); - EXPECT_TRUE(value.is_initialized()); - EXPECT_EQ("INS", value.get()); + EXPECT_TRUE(value); + EXPECT_EQ("INS", *value); } TEST(VCF_VcfVariant, can_add_single_value_info_field) @@ -110,14 +110,14 @@ v.AddInfoField(i); EXPECT_TRUE(v.HasInfoField("NEW")); - EXPECT_EQ("42", v.InfoValue("NEW").get()); + EXPECT_EQ("42", *v.InfoValue("NEW")); } TEST(VCF_VcfVariant, can_fetch_multi_value_info_field) { const VcfVariant v{VcfVariantTests::BasicVariantText}; const auto& values = v.InfoValues("MULTI"); - EXPECT_TRUE(values.is_initialized()); + EXPECT_TRUE(values); EXPECT_EQ(3, values->size()); EXPECT_EQ("1", values->at(0)); EXPECT_EQ("2", values->at(1)); @@ -128,14 +128,14 @@ { VcfVariant v{VcfVariantTests::BasicVariantText}; auto value = v.InfoValue("SVTYPE"); - EXPECT_TRUE(value.is_initialized()); - EXPECT_EQ("INS", value.get()); + EXPECT_TRUE(value); + EXPECT_EQ("INS", *value); v.InfoValue("SVTYPE", std::string{"FOO"}); value = v.InfoValue("SVTYPE"); - EXPECT_TRUE(value.is_initialized()); - EXPECT_EQ("FOO", value.get()); + EXPECT_TRUE(value); + EXPECT_EQ("FOO", *value); } TEST(VCF_VcfVariant, can_edit_multi_value_info_field) @@ -143,7 +143,7 @@ VcfVariant v{VcfVariantTests::BasicVariantText}; auto values = v.InfoValues("MULTI"); - EXPECT_TRUE(values.is_initialized()); + EXPECT_TRUE(values); EXPECT_EQ(3, values->size()); EXPECT_EQ("1", values->at(0)); EXPECT_EQ("2", values->at(1)); @@ -153,7 +153,7 @@ v.InfoValues("MULTI", newData); values = v.InfoValues("MULTI"); - EXPECT_TRUE(values.is_initialized()); + EXPECT_TRUE(values); EXPECT_EQ(3, values->size()); EXPECT_EQ("42", values->at(0)); EXPECT_EQ("42", values->at(1)); @@ -181,12 +181,12 @@ VcfVariant v{VcfVariantTests::BasicVariantText}; EXPECT_TRUE(v.HasInfoField("SVLEN")); - EXPECT_EQ("INS", v.InfoValue("SVTYPE").get()); + EXPECT_EQ("INS", *v.InfoValue("SVTYPE")); v.RemoveInfoField("SVLEN"); EXPECT_FALSE(v.HasInfoField("SVLEN")); - EXPECT_EQ("INS", v.InfoValue("SVTYPE").get()); + EXPECT_EQ("INS", *v.InfoValue("SVTYPE")); } TEST(VCF_VcfVariant, can_fetch_all_genotype_ids) @@ -211,15 +211,15 @@ { const VcfVariant v{VcfVariantTests::BasicVariantText}; const auto& value = v.GenotypeValue(0, "AD"); - EXPECT_TRUE(value.is_initialized()); - EXPECT_EQ("2", value.get()); + EXPECT_TRUE(value); + EXPECT_EQ("2", *value); } TEST(VCF_VcfVariant, can_fetch_multi_value_genotype_field) { const VcfVariant v{VcfVariantTests::BasicVariantText}; const auto& values = v.GenotypeValues(0, "AC"); - EXPECT_TRUE(values.is_initialized()); + EXPECT_TRUE(values); ASSERT_EQ(2, values->size()); } diff -Nru pbbam-1.7.0+dfsg/tools/ccs-kinetics-bystrandify/src/CcsKineticsBystrandifyWorkflow.cpp pbbam-2.0.0+dfsg/tools/ccs-kinetics-bystrandify/src/CcsKineticsBystrandifyWorkflow.cpp --- pbbam-1.7.0+dfsg/tools/ccs-kinetics-bystrandify/src/CcsKineticsBystrandifyWorkflow.cpp 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tools/ccs-kinetics-bystrandify/src/CcsKineticsBystrandifyWorkflow.cpp 2022-01-13 18:20:29.000000000 +0000 @@ -5,12 +5,12 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -207,8 +207,8 @@ } bool IsXml = false; - boost::optional InputDatasetFile; - boost::optional OutputDatasetFile; + std::optional InputDatasetFile; + std::optional OutputDatasetFile; std::vector Tasks; }; diff -Nru pbbam-1.7.0+dfsg/tools/meson.build pbbam-2.0.0+dfsg/tools/meson.build --- pbbam-1.7.0+dfsg/tools/meson.build 2021-09-21 19:47:37.000000000 +0000 +++ pbbam-2.0.0+dfsg/tools/meson.build 2022-01-13 18:20:29.000000000 +0000 @@ -37,7 +37,7 @@ 'bam2sam_CramTests', pbbam_cram_script, args : [ - '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-bam2sam.xml'), + '--xunit-file=' + join_paths(meson.project_build_root(), 'pbbam-cram-bam2sam.xml'), '--verbose'] + [ bam2sam_t], timeout : 1800) @@ -114,7 +114,7 @@ 'pbindexdump_CramTests', pbbam_cram_script, args : [ - '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbindexdump.xml'), + '--xunit-file=' + join_paths(meson.project_build_root(), 'pbbam-cram-pbindexdump.xml'), '--verbose'] + [ pbindexdump_json_t, pbindexdump_cpp_t], @@ -173,7 +173,7 @@ 'pbmerge_CramTests', pbbam_cram_script, args : [ - '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbmerge.xml'), + '--xunit-file=' + join_paths(meson.project_build_root(), 'pbbam-cram-pbmerge.xml'), '--verbose'] + [ pbmerge_pacbio_ordering_t, pbmerge_aligned_ordering_t, @@ -230,11 +230,11 @@ 'pbbamify_CramTests', pbbam_cram_script, args : [ - '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbbamify.xml'), + '--xunit-file=' + join_paths(meson.project_build_root(), 'pbbam-cram-pbbamify.xml'), '--verbose'] + [ pbbamify_t], env : [ - 'SAMTOOLS=' + pbbam_test_samtools.path()], + 'SAMTOOLS=' + pbbam_test_samtools.full_path()], timeout : 1800) endif @@ -276,10 +276,10 @@ 'ccs-kinetics-bystrandify_CramTests', pbbam_cram_script, args : [ - '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-ccs-kinetics-bystrandify-check.xml'), + '--xunit-file=' + join_paths(meson.project_build_root(), 'pbbam-cram-ccs-kinetics-bystrandify-check.xml'), '--verbose'] + files('../tests/src/cram/ccs-kinetics-bystrandify.t'), env : [ 'CCS_KINETICS_BYSTRANDIFY=' + pbbam_ccs_kinetics_bystrandify.full_path(), - 'SAMTOOLS=' + pbbam_test_samtools.path(), + 'SAMTOOLS=' + pbbam_test_samtools.full_path(), ]) endif