diff -Nru htsjdk-2.22.0+dfsg/debian/changelog htsjdk-2.23.0+dfsg/debian/changelog --- htsjdk-2.22.0+dfsg/debian/changelog 2020-05-29 11:08:17.000000000 +0000 +++ htsjdk-2.23.0+dfsg/debian/changelog 2020-09-22 13:14:11.000000000 +0000 @@ -1,6 +1,25 @@ +htsjdk (2.23.0+dfsg-2) unstable; urgency=medium + + * source-only upload + + -- Olivier Sallou Tue, 22 Sep 2020 13:14:11 +0000 + +htsjdk (2.23.0+dfsg-1) unstable; urgency=medium + + * New upstream version + * Standards-Version: 4.5.0 (routine-update) + * debhelper-compat 13 (routine-update) + * Remove trailing whitespace in debian/changelog (routine-update) + * Add salsa-ci file (routine-update) + * Rules-Requires-Root: no (routine-update) + * Set upstream metadata fields: Bug-Database, Bug-Submit, Repository, + Repository-Browse. + + -- Andreas Tille Thu, 10 Sep 2020 17:20:43 +0200 + htsjdk (2.22.0+dfsg-1) unstable; urgency=medium - * New upstream release + * New upstream release -- Olivier Sallou Fri, 29 May 2020 11:08:17 +0000 diff -Nru htsjdk-2.22.0+dfsg/debian/compat htsjdk-2.23.0+dfsg/debian/compat --- htsjdk-2.22.0+dfsg/debian/compat 2020-05-29 11:08:17.000000000 +0000 +++ htsjdk-2.23.0+dfsg/debian/compat 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -12 diff -Nru htsjdk-2.22.0+dfsg/debian/control htsjdk-2.23.0+dfsg/debian/control --- htsjdk-2.22.0+dfsg/debian/control 2020-05-29 11:08:17.000000000 +0000 +++ htsjdk-2.23.0+dfsg/debian/control 2020-09-22 13:14:11.000000000 +0000 @@ -10,14 +10,14 @@ javahelper, gradle-debian-helper, maven-repo-helper, - debhelper (>= 12~), + debhelper-compat (= 13), libcommons-jexl2-java, libcommons-logging-java, libjaxb-api-java, libjaxb-java, libsnappy-java, libcommons-compress-java, - libxz-java (>= 1.5), + libxz-java, libngs-java, # required for correct dependencies for -doc default-jdk-doc, @@ -26,10 +26,11 @@ junit4, libjimfs-java, scala-library -Standards-Version: 4.3.0 +Standards-Version: 4.5.0 Vcs-Browser: https://salsa.debian.org/med-team/htsjdk Vcs-Git: https://salsa.debian.org/med-team/htsjdk.git Homepage: http://samtools.github.io/htsjdk/ +Rules-Requires-Root: no Package: libhtsjdk-java Architecture: all diff -Nru htsjdk-2.22.0+dfsg/debian/libhtsjdk-java-doc.javadoc htsjdk-2.23.0+dfsg/debian/libhtsjdk-java-doc.javadoc --- htsjdk-2.22.0+dfsg/debian/libhtsjdk-java-doc.javadoc 2020-05-29 11:08:17.000000000 +0000 +++ htsjdk-2.23.0+dfsg/debian/libhtsjdk-java-doc.javadoc 2020-09-22 13:14:11.000000000 +0000 @@ -1 +1 @@ -build/docs/javadoc /usr/share/doc/libhtsjdk-java/api/ +build/docs/javadoc usr/share/doc/libhtsjdk-java-doc/api/ diff -Nru htsjdk-2.22.0+dfsg/debian/patches/92-disable-testhttp.patch htsjdk-2.23.0+dfsg/debian/patches/92-disable-testhttp.patch --- htsjdk-2.22.0+dfsg/debian/patches/92-disable-testhttp.patch 2020-05-29 11:08:17.000000000 +0000 +++ htsjdk-2.23.0+dfsg/debian/patches/92-disable-testhttp.patch 2020-09-22 13:14:11.000000000 +0000 @@ -35,31 +35,6 @@ @Test(dataProvider = "customInflaterInput", singleThreaded = true) public void testCustomInflater(final CheckedExceptionInputStreamSupplier bcisSupplier, final Supplier> expectedOutputSupplier, ---- a/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java -+++ b/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java -@@ -22,22 +22,6 @@ - Assert.assertEquals(SeekableStreamFactory.isFilePath("ftp://broadinstitute.org"), false); - } - -- @DataProvider(name="getStreamForData") -- public Object[][] getStreamForData() throws Exception { -- return new Object[][] { -- { new File(TEST_DATA_DIR, "BAMFileIndexTest/index_test.bam").getAbsolutePath(), -- new File(TEST_DATA_DIR, "BAMFileIndexTest/index_test.bam").getAbsolutePath() }, -- { new File(TEST_DATA_DIR, "cram_with_bai_index.cram").getAbsolutePath(), -- new File(TEST_DATA_DIR, "cram_with_bai_index.cram").getAbsolutePath() }, -- { new URL("file://" + new File(TEST_DATA_DIR, "cram_with_bai_index.cram").getAbsolutePath()).toExternalForm(), -- new File(TEST_DATA_DIR, "cram_with_bai_index.cram").getAbsolutePath() }, -- { new URL(TestUtil.BASE_URL_FOR_HTTP_TESTS + "index_test.bam").toExternalForm(), -- new URL(TestUtil.BASE_URL_FOR_HTTP_TESTS + "index_test.bam").toExternalForm() }, -- { new URL(TestUtil.BASE_URL_FOR_HTTP_TESTS + "index_test.bam.bai").toExternalForm(), -- new URL(TestUtil.BASE_URL_FOR_HTTP_TESTS + "index_test.bam.bai").toExternalForm() } -- }; -- } -- - @Test(dataProvider = "getStreamForData") - public void testGetStreamFor(final String path, final String expectedPath) throws IOException { - Assert.assertEquals(SeekableStreamFactory.getInstance().getStreamFor(path).getSource(), expectedPath); --- a/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java +++ b/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java @@ -230,66 +230,6 @@ diff -Nru htsjdk-2.22.0+dfsg/debian/salsa-ci.yml htsjdk-2.23.0+dfsg/debian/salsa-ci.yml --- htsjdk-2.22.0+dfsg/debian/salsa-ci.yml 1970-01-01 00:00:00.000000000 +0000 +++ htsjdk-2.23.0+dfsg/debian/salsa-ci.yml 2020-09-22 13:14:11.000000000 +0000 @@ -0,0 +1,4 @@ +--- +include: + - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml + - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml diff -Nru htsjdk-2.22.0+dfsg/debian/upstream/metadata htsjdk-2.23.0+dfsg/debian/upstream/metadata --- htsjdk-2.22.0+dfsg/debian/upstream/metadata 1970-01-01 00:00:00.000000000 +0000 +++ htsjdk-2.23.0+dfsg/debian/upstream/metadata 2020-09-22 13:14:11.000000000 +0000 @@ -0,0 +1,5 @@ +--- +Bug-Database: https://github.com/samtools/htsjdk/issues +Bug-Submit: https://github.com/samtools/htsjdk/issues/new +Repository: https://github.com/samtools/htsjdk.git +Repository-Browse: https://github.com/samtools/htsjdk diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java 2020-07-08 16:28:01.000000000 +0000 @@ -27,7 +27,13 @@ import htsjdk.samtools.util.Iso8601Date; import htsjdk.samtools.util.SamConstants; -import java.util.*; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; /** * Header information about a read group. @@ -53,13 +59,13 @@ /* Platform values for the @RG-PL tag */ public enum PlatformValue { - CAPILLARY, LS454, ILLUMINA, + BGI, CAPILLARY, LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT, - ONT, PACBIO + ONT, PACBIO, OTHER } public static final Set STANDARD_TAGS = - new HashSet(Arrays.asList(READ_GROUP_ID_TAG, SEQUENCING_CENTER_TAG, DESCRIPTION_TAG, + new HashSet<>(Arrays.asList(READ_GROUP_ID_TAG, SEQUENCING_CENTER_TAG, DESCRIPTION_TAG, DATE_RUN_PRODUCED_TAG, FLOW_ORDER_TAG, KEY_SEQUENCE_TAG, LIBRARY_TAG, PROGRAM_GROUP_TAG, PREDICTED_MEDIAN_INSERT_SIZE_TAG, PLATFORM_TAG, PLATFORM_MODEL_TAG, PLATFORM_UNIT_TAG, READ_GROUP_SAMPLE_TAG, BARCODE_TAG)); @@ -121,8 +127,11 @@ public Date getRunDate() { final String dt = getAttribute(DATE_RUN_PRODUCED_TAG); - if (dt == null) return null; - else return new Iso8601Date(dt); + if (dt == null) { + return null; + } else { + return new Iso8601Date(dt); + } } public String getFlowOrder() { return getAttribute(FLOW_ORDER_TAG); } @@ -149,7 +158,9 @@ public Integer getPredictedMedianInsertSize() { final String stringRep = getAttribute(PREDICTED_MEDIAN_INSERT_SIZE_TAG); - if (stringRep == null) return null; + if (stringRep == null) { + return null; + } return Integer.parseInt(stringRep); } public void setPredictedMedianInsertSize(final Integer predictedMedianInsertSize) { @@ -173,12 +184,18 @@ @Override public boolean equals(final Object o) { if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + if (o == null || getClass() != o.getClass()) { + return false; + } final SAMReadGroupRecord that = (SAMReadGroupRecord) o; - if (!attributesEqual(that)) return false; - if (mReadGroupId != null ? !mReadGroupId.equals(that.mReadGroupId) : that.mReadGroupId != null) return false; + if (!attributesEqual(that)) { + return false; + } + if (mReadGroupId != null ? !mReadGroupId.equals(that.mReadGroupId) : that.mReadGroupId != null) { + return false; + } return true; } diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java 2020-07-08 16:28:01.000000000 +0000 @@ -26,6 +26,7 @@ import htsjdk.samtools.util.IOUtil; import java.io.File; import java.io.IOException; +import java.net.URI; import java.net.URL; import java.nio.channels.SeekableByteChannel; import java.util.function.Function; @@ -96,7 +97,14 @@ } else if (path.startsWith("ftp:")) { return new SeekableFTPStream(new URL(path)); } else if (path.startsWith("file:")) { - return new SeekableFileStream(new File(new URL(path).getPath())); + try { + // convert to URI in order to obtain a decoded version of the path string suitable + // for use with the File constructor + final String decodedPath = new URI(path).getPath(); + return new SeekableFileStream(new File(decodedPath)); + } catch (java.net.URISyntaxException e) { + throw new IllegalArgumentException(String.format("The input string %s contains a URI scheme but is not a valid URI", path), e); + } } else if (IOUtil.hasScheme(path)) { return new SeekablePathStream(IOUtil.getPath(path), wrapper); } else { diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java 2020-07-08 16:28:01.000000000 +0000 @@ -143,8 +143,9 @@ protected void validateOffset(int offset, final byte[] array) { if (offset < 0 || offset >= array.length) { - throw new IllegalArgumentException("The requested position is not covered by this " + this.getClass().getSimpleName() + - " object."); + throw new IllegalArgumentException("The requested position is not covered by this " + this.getClass().getSimpleName() + " object. " + + "\n Offset = " + offset + " Array length = " + array.length + + "\n Record is: " + getRecord().toString()); } } } diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java 2020-07-08 16:28:01.000000000 +0000 @@ -95,24 +95,33 @@ // 1-based reference position that the current base aligns to final int refPos = alignmentBlock.getReferenceStart(); - // 0-based offset from the aligned position of the first base in the read to the aligned position - // of the current base. - final int refOffset = refPos - rec.getAlignmentStart(); - final int refOffsetEnd = refPos - rec.getAlignmentStart() + alignmentBlock.getLength(); + if (accumulator.isEmpty()) { + accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), rec.getAlignmentStart())); + } + + // The accumulator should always have LocusInfos that correspond to one consecutive segment of loci from one reference + // sequence. So + // accumulator.get(0).getPosition() + accumulator.size() == accumulator.get(accumulator.size()-1).getPosition()+1 + final int accumulatorNextPosition = accumulator.get(0).getPosition() + accumulator.size(); + if (accumulatorNextPosition != accumulator.get(accumulator.size() - 1).getPosition() + 1) { + throw new IllegalStateException("The accumulator has gotten into a funk. Cannot continue"); + } // Ensure there are AbstractLocusInfos up to and including this position - for (int j = accumulator.size(); j <= refOffsetEnd; ++j) { - accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), - rec.getAlignmentStart() + j)); + for (int locusPos = accumulatorNextPosition; locusPos <= refPos + alignmentBlock.getLength(); ++locusPos) { + accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), locusPos)); } /* Let's assume an alignment block starts in some locus. * We put two records to the accumulator. The first one has the "begin" type which corresponds to the locus * where the block starts. The second one has the "end" type which corresponds to the other locus where the block ends. */ - int refOffsetInterval = refOffset; // corresponds to the beginning of the alignment block - int refOffsetEndInterval = refOffsetEnd; + + // 0-based offset from the aligned position of the first base in the read to the aligned position + // of the current base. + int refOffsetInterval = refPos - rec.getAlignmentStart(); // corresponds to the beginning of the alignment block + int refOffsetEndInterval = refOffsetInterval + alignmentBlock.getLength();; int startShift = 0; // intersect intervals and alignment block @@ -133,7 +142,7 @@ } // if the alignment block ends out of an interval, shift the ending position final int readEnd = refPos + alignmentBlock.getLength(); - if (refPos + alignmentBlock.getLength() > intervalEnd) { + if (readEnd > intervalEnd) { refOffsetEndInterval = refOffsetEndInterval - (readEnd - intervalEnd) + 1; } } diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java 2020-07-08 16:28:01.000000000 +0000 @@ -2,39 +2,57 @@ import htsjdk.tribble.annotation.Strand; +import java.util.ArrayList; import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; public class Gff3BaseData { private static final String ID_ATTRIBUTE_KEY = "ID"; private static final String NAME_ATTRIBUTE_KEY = "Name"; private static final String ALIAS_ATTRIBUTE_KEY = "Alias"; - final String contig; - final String source; - final String type; - final int start; - final int end; - final Strand strand; - final int phase; - final Map attributes; - final String id; - final String name; - final String alias; - - Gff3BaseData(final String contig, final String source, final String type, - final int start, final int end, final Strand strand, final int phase, - final Map attributes) { + private final String contig; + private final String source; + private final String type; + private final int start; + private final int end; + private final double score; + private final Strand strand; + private final int phase; + private final Map> attributes; + private final String id; + private final String name; + private final List aliases; + private final int hashCode; + + public Gff3BaseData(final String contig, final String source, final String type, + final int start, final int end, final Double score, final Strand strand, final int phase, + final Map> attributes) { this.contig = contig; this.source = source; this.type = type; this.start = start; this.end = end; + this.score = score; this.phase = phase; this.strand = strand; - this.attributes = Collections.unmodifiableMap(attributes); - this.id = attributes.get(ID_ATTRIBUTE_KEY); - this.name = attributes.get(NAME_ATTRIBUTE_KEY); - this.alias = attributes.get(ALIAS_ATTRIBUTE_KEY); + this.attributes = copyAttributesSafely(attributes); + this.id = Gff3Codec.extractSingleAttribute(attributes.get(ID_ATTRIBUTE_KEY)); + this.name = Gff3Codec.extractSingleAttribute(attributes.get(NAME_ATTRIBUTE_KEY)); + this.aliases = attributes.getOrDefault(ALIAS_ATTRIBUTE_KEY, Collections.emptyList()); + this.hashCode = computeHashCode(); + } + + private static Map> copyAttributesSafely(final Map> attributes) { + final Map> modifiableDeepMap = new LinkedHashMap<>(); + + for (final Map.Entry> entry : attributes.entrySet()) { + final List unmodifiableDeepList = Collections.unmodifiableList(new ArrayList<>(entry.getValue())); + modifiableDeepMap.put(entry.getKey(), unmodifiableDeepList); + } + + return Collections.unmodifiableMap(modifiableDeepMap); } @Override @@ -47,57 +65,109 @@ } final Gff3BaseData otherBaseData = (Gff3BaseData) other; - boolean ret = otherBaseData.contig.equals(contig) && - otherBaseData.source.equals(source) && - otherBaseData.type.equals(type) && - otherBaseData.start == start && - otherBaseData.end == end && - otherBaseData.phase == phase && - otherBaseData.strand.equals(strand) && - otherBaseData.attributes.equals(attributes); - if (id == null) { - ret = ret && otherBaseData.id == null; + boolean ret = otherBaseData.getContig().equals(getContig()) && + otherBaseData.getSource().equals(getSource()) && + otherBaseData.getType().equals(getType()) && + otherBaseData.getStart() == getStart() && + otherBaseData.getEnd() == getEnd() && + ((Double)otherBaseData.getScore()).equals(score) && + otherBaseData.getPhase() == getPhase() && + otherBaseData.getStrand().equals(getStrand()) && + otherBaseData.getAttributes().equals(getAttributes()); + if (getId() == null) { + ret = ret && otherBaseData.getId() == null; } else { - ret = ret && otherBaseData.id != null && otherBaseData.id.equals(id); + ret = ret && otherBaseData.getId() != null && otherBaseData.getId().equals(getId()); } - if (name == null) { - ret = ret && otherBaseData.name == null; + if (getName() == null) { + ret = ret && otherBaseData.getName() == null; } else { - ret = ret && otherBaseData.name != null && otherBaseData.name.equals(name); + ret = ret && otherBaseData.getName() != null && otherBaseData.getName().equals(getName()); } - if (alias == null) { - ret = ret && otherBaseData.alias == null; - } else { - ret = ret && otherBaseData.alias != null && otherBaseData.alias.equals(alias); - } + ret = ret && otherBaseData.getAliases().equals(getAliases()); return ret; } @Override public int hashCode() { - int hash = contig.hashCode(); - hash = 31 * hash + source.hashCode(); - hash = 31 * hash + type.hashCode(); - hash = 31 * hash + start; - hash = 31 * hash + end; - hash = 31 * hash + phase; - hash = 31 * hash + strand.hashCode(); - hash = 31 * hash + attributes.hashCode(); - if (id != null) { - hash = 31 * hash + id.hashCode(); - } + return hashCode; + } - if (name != null) { - hash = 31 * hash + name.hashCode(); + private int computeHashCode() { + int hash = getContig().hashCode(); + hash = 31 * hash + getSource().hashCode(); + hash = 31 * hash + getType().hashCode(); + hash = 31 * hash + getStart(); + hash = 31 * hash + getEnd(); + hash = 31 * hash + Double.hashCode(getScore()); + hash = 31 * hash + getPhase(); + hash = 31 * hash + getStrand().hashCode(); + hash = 31 * hash + getAttributes().hashCode(); + if (getId() != null) { + hash = 31 * hash + getId().hashCode(); } - if (alias != null) { - hash = 31 * hash + alias.hashCode(); + if (getName() != null) { + hash = 31 * hash + getName().hashCode(); } + hash = 31 * hash + aliases.hashCode(); + return hash; } + + public String getContig() { + return contig; + } + + public String getSource() { + return source; + } + + public String getType() { + return type; + } + + public int getStart() { + return start; + } + + public int getEnd() { + return end; + } + + public double getScore() { + return score; + } + + public Strand getStrand() { + return strand; + } + + public int getPhase() { + return phase; + } + + public Map> getAttributes() { + return attributes; + } + + public List getAttribute(final String key) { + return attributes.getOrDefault(key, Collections.emptyList()); + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + + public List getAliases() { + return aliases; + } } diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Codec.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Codec.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Codec.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Codec.java 2020-07-08 16:28:01.000000000 +0000 @@ -9,7 +9,6 @@ import htsjdk.tribble.AbstractFeatureCodec; import htsjdk.tribble.Feature; import htsjdk.tribble.FeatureCodecHeader; -import htsjdk.tribble.SimpleFeature; import htsjdk.tribble.TribbleException; import htsjdk.tribble.annotation.Strand; import htsjdk.tribble.index.tabix.TabixFormat; @@ -17,13 +16,13 @@ import htsjdk.tribble.util.ParsingUtils; + import java.io.*; import java.net.URLDecoder; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; import java.util.regex.Pattern; -import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; /** @@ -37,10 +36,7 @@ public class Gff3Codec extends AbstractFeatureCodec { - private static final char FIELD_DELIMITER = '\t'; - private static final char ATTRIBUTE_DELIMITER = ';'; - private static final char KEY_VALUE_SEPARATOR = '='; - private static final char VALUE_DELIMITER = ','; + private static final int NUM_FIELDS = 9; @@ -49,15 +45,12 @@ private static final int FEATURE_TYPE_INDEX = 2; private static final int START_LOCATION_INDEX = 3; private static final int END_LOCATION_INDEX = 4; + private static final int SCORE_INDEX = 5; private static final int GENOMIC_STRAND_INDEX = 6; private static final int GENOMIC_PHASE_INDEX = 7; private static final int EXTRA_FIELDS_INDEX = 8; - private static final String COMMENT_START = "#"; - private static final String DIRECTIVE_START = "##"; - - static final String PARENT_ATTRIBUTE_KEY = "Parent"; private static final String IS_CIRCULAR_ATTRIBUTE_KEY = "Is_circular"; private static final String ARTEMIS_FASTA_MARKER = ">"; @@ -67,20 +60,38 @@ private final Map> activeFeaturesWithIDs = new HashMap<>(); private final Map> activeParentIDs = new HashMap<>(); - private int currentLineNum = 0; - - private final Map sequenceRegionMap = new HashMap<>(); + private final Map sequenceRegionMap = new LinkedHashMap<>(); + private final Map commentsWithLineNumbers = new LinkedHashMap<>(); private final static Log logger = Log.getInstance(Gff3Codec.class); private boolean reachedFasta = false; + private DecodeDepth decodeDepth; + + private int currentLine = 0; + public Gff3Codec() { + this(DecodeDepth.DEEP); + } + + public Gff3Codec(final DecodeDepth decodeDepth) { super(Gff3Feature.class); + this.decodeDepth = decodeDepth; + } + + public enum DecodeDepth { + DEEP , + SHALLOW } @Override public Gff3Feature decode(final LineIterator lineIterator) throws IOException { + return decode(lineIterator, decodeDepth); + } + + private Gff3Feature decode(final LineIterator lineIterator, final DecodeDepth depth) throws IOException { + currentLine++; /* Basic strategy: Load features into deque, create maps from a features ID to it, and from a features parents' IDs to it. For each feature, link to parents using these maps. When reaching flush directive, fasta, or end of file, prepare to flush features by moving all active features to deque of features to flush, and clearing @@ -93,7 +104,6 @@ } final String line = lineIterator.next(); - currentLineNum++; if (reachedFasta) { //previously reached fasta, flush whatever is active @@ -107,37 +117,23 @@ return featuresToFlush.poll(); } - if (line.startsWith(COMMENT_START) && !line.startsWith(DIRECTIVE_START)) { + if (line.startsWith(Gff3Constants.COMMENT_START) && !line.startsWith(Gff3Constants.DIRECTIVE_START)) { + commentsWithLineNumbers.put(currentLine, line.substring(Gff3Constants.COMMENT_START.length())); return featuresToFlush.poll(); } - if (line.startsWith(DIRECTIVE_START)) { + if (line.startsWith(Gff3Constants.DIRECTIVE_START)) { parseDirective(line); return featuresToFlush.poll(); } - final List splitLine = ParsingUtils.split(line, FIELD_DELIMITER); - if (splitLine.size() != NUM_FIELDS) { - throw new TribbleException("Found an invalid number of columns in the given Gff3 file on line " - + currentLineNum + " - Given: " + splitLine.size() + " Expected: " + NUM_FIELDS + " : " + line); - } - - try { - final String contig = URLDecoder.decode(splitLine.get(CHROMOSOME_NAME_INDEX), "UTF-8"); - final String source = URLDecoder.decode(splitLine.get(ANNOTATION_SOURCE_INDEX), "UTF-8"); - final String type = URLDecoder.decode(splitLine.get(FEATURE_TYPE_INDEX), "UTF-8"); - final int start = Integer.parseInt(splitLine.get(START_LOCATION_INDEX)); - final int end = Integer.parseInt(splitLine.get(END_LOCATION_INDEX)); - final int phase = splitLine.get(GENOMIC_PHASE_INDEX).equals(".")? -1 : Integer.parseInt(splitLine.get(GENOMIC_PHASE_INDEX)); - final Strand strand = Strand.decode(splitLine.get(GENOMIC_STRAND_INDEX)); - final Map attributes = parseAttributes(splitLine.get(EXTRA_FIELDS_INDEX)); - final String parentIDAttribute = attributes.get(PARENT_ATTRIBUTE_KEY); - final List parentIDs = parentIDAttribute != null? ParsingUtils.split(parentIDAttribute, VALUE_DELIMITER) : new ArrayList<>(); - - final Gff3FeatureImpl thisFeature = new Gff3FeatureImpl(contig, source, type, start, end, strand, phase, attributes); - activeFeatures.add(thisFeature); + final Gff3FeatureImpl thisFeature = new Gff3FeatureImpl(parseLine(line, currentLine)); + activeFeatures.add(thisFeature); + if (depth == DecodeDepth.DEEP) { + //link to parents/children/co-features + final List parentIDs = thisFeature.getAttribute(Gff3Constants.PARENT_ATTRIBUTE_KEY); final String id = thisFeature.getID(); for (final String parentID : parentIDs) { @@ -171,11 +167,14 @@ child.addParent(thisFeature); } } - validateFeature(thisFeature); - return featuresToFlush.poll(); - } catch( final NumberFormatException ex ){ - throw new TribbleException("Cannot read integer value for start/end position!", ex); } + + validateFeature(thisFeature); + if (depth == DecodeDepth.SHALLOW) { + //flush all features immediatly + prepareToFlushFeatures(); + } + return featuresToFlush.poll(); } @@ -185,19 +184,71 @@ * @return map of keys to values for attributes of this feature * @throws UnsupportedEncodingException */ - static private Map parseAttributes(final String attributesString) throws UnsupportedEncodingException { - final Map attributes = new LinkedHashMap<>(); - final List splitLine = ParsingUtils.split(attributesString,ATTRIBUTE_DELIMITER); + static private Map> parseAttributes(final String attributesString) throws UnsupportedEncodingException { + if (attributesString.equals(Gff3Constants.UNDEFINED_FIELD_VALUE)) { + return Collections.emptyMap(); + } + final Map> attributes = new LinkedHashMap<>(); + final List splitLine = ParsingUtils.split(attributesString,Gff3Constants.ATTRIBUTE_DELIMITER); for(String attribute : splitLine) { - final List key_value = ParsingUtils.split(attribute,KEY_VALUE_SEPARATOR); - if (key_value.size()<2) { - continue; + final List key_value = ParsingUtils.split(attribute,Gff3Constants.KEY_VALUE_SEPARATOR); + if (key_value.size() != 2) { + throw new TribbleException("Attribute string " + attributesString + " is invalid"); } - attributes.put(URLDecoder.decode(key_value.get(0).trim(), "UTF-8"), URLDecoder.decode(key_value.get(1).trim(), "UTF-8")); + attributes.put(URLDecoder.decode(key_value.get(0).trim(), "UTF-8"), decodeAttributeValue(key_value.get(1).trim())); } return attributes; } + static private Gff3BaseData parseLine(final String line, final int currentLine) { + final List splitLine = ParsingUtils.split(line, Gff3Constants.FIELD_DELIMITER); + + if (splitLine.size() != NUM_FIELDS) { + throw new TribbleException("Found an invalid number of columns in the given Gff3 file at line + " + currentLine + " - Given: " + splitLine.size() + " Expected: " + NUM_FIELDS + " : " + line); + } + + try { + final String contig = URLDecoder.decode(splitLine.get(CHROMOSOME_NAME_INDEX), "UTF-8"); + final String source = URLDecoder.decode(splitLine.get(ANNOTATION_SOURCE_INDEX), "UTF-8"); + final String type = URLDecoder.decode(splitLine.get(FEATURE_TYPE_INDEX), "UTF-8"); + final int start = Integer.parseInt(splitLine.get(START_LOCATION_INDEX)); + final int end = Integer.parseInt(splitLine.get(END_LOCATION_INDEX)); + final double score = splitLine.get(SCORE_INDEX).equals(Gff3Constants.UNDEFINED_FIELD_VALUE) ? -1 : Double.parseDouble(splitLine.get(SCORE_INDEX)); + final int phase = splitLine.get(GENOMIC_PHASE_INDEX).equals(Gff3Constants.UNDEFINED_FIELD_VALUE) ? -1 : Integer.parseInt(splitLine.get(GENOMIC_PHASE_INDEX)); + final Strand strand = Strand.decode(splitLine.get(GENOMIC_STRAND_INDEX)); + final Map> attributes = parseAttributes(splitLine.get(EXTRA_FIELDS_INDEX)); + return new Gff3BaseData(contig, source, type, start, end, score, strand, phase, attributes); + } catch (final NumberFormatException ex ) { + throw new TribbleException("Cannot read integer value for start/end position from line " + currentLine + ". Line is: " + line, ex); + } catch (final IOException ex) { + throw new TribbleException("Cannot decode feature info from line " + currentLine + ". Line is: " + line, ex); + } + } + + /** + * Get list of sequence regions parsed by the codec. + * @return list of sequence regions + */ + public List getSequenceRegions() { + return Collections.unmodifiableList(new ArrayList<>(sequenceRegionMap.values())); + } + + /** + * Gets map from line number to comment found on that line. The text of the comment EXCLUDES the leading # which indicates a comment line. + * @return Map from line number to comment found on line + */ + public Map getCommentsWithLineNumbers() { + return Collections.unmodifiableMap(new LinkedHashMap<>(commentsWithLineNumbers)); + } + + /** + * Gets list of comments parsed by the codec. Excludes leading # which indicates a comment line. + * @return + */ + public List getCommentTexts() { + return Collections.unmodifiableList(new ArrayList<>(commentsWithLineNumbers.values())); + } + /** * If sequence region of feature's contig has been specified with sequence region directive, validates that * feature's coordinates are within the specified sequence region. TribbleException is thrown if invalid. @@ -208,7 +259,7 @@ final SequenceRegion region = sequenceRegionMap.get(feature.getContig()); if (feature.getStart() == region.getStart() && feature.getEnd() == region.getEnd()) { //landmark feature - final boolean isCircular = Boolean.parseBoolean(feature.getAttribute(IS_CIRCULAR_ATTRIBUTE_KEY)); + final boolean isCircular = Boolean.parseBoolean(extractSingleAttribute(feature.getAttribute(IS_CIRCULAR_ATTRIBUTE_KEY))); region.setCircular(isCircular); } if (region.isCircular()? !region.overlaps(feature) : !region.contains(feature)) { @@ -219,20 +270,8 @@ } @Override - public Feature decodeLoc(LineIterator lineIterator) { - final String line = lineIterator.next(); - - if (line.startsWith(COMMENT_START)) { - return null; - } - - final List splitLine = ParsingUtils.split(line,FIELD_DELIMITER); - - try { - return new SimpleFeature(splitLine.get(CHROMOSOME_NAME_INDEX), Integer.parseInt(splitLine.get(START_LOCATION_INDEX)), Integer.parseInt(splitLine.get(END_LOCATION_INDEX))); - } catch (final NumberFormatException ex ) { - throw new TribbleException("Cannot read integer value for start/end position!", ex); - } + public Feature decodeLoc(LineIterator lineIterator) throws IOException { + return decode(lineIterator, DecodeDepth.SHALLOW); } @Override @@ -256,7 +295,7 @@ if (Gff3Directive.toDirective(line) != Gff3Directive.VERSION3_DIRECTIVE) { return false; } - while (line.startsWith(COMMENT_START)) { + while (line.startsWith(Gff3Constants.COMMENT_START)) { line = br.readLine(); if ( line == null ) { return false; @@ -264,7 +303,7 @@ } // make sure line conforms to gtf spec - final List fields = ParsingUtils.split(line,FIELD_DELIMITER); + final List fields = ParsingUtils.split(line, Gff3Constants.FIELD_DELIMITER); canDecode &= fields.size() == NUM_FIELDS; @@ -300,13 +339,40 @@ return canDecode; } + static List decodeAttributeValue(final String attributeValue) { + //split on VALUE_DELIMITER, then decode + final List splitValues = ParsingUtils.split(attributeValue, Gff3Constants.VALUE_DELIMITER); + + final List decodedValues = new ArrayList<>(); + for (final String encodedValue : splitValues) { + try { + decodedValues.add(URLDecoder.decode(encodedValue.trim(), "UTF-8")); + } catch (final UnsupportedEncodingException ex) { + throw new TribbleException("Error decoding attribute " + encodedValue, ex); + } + } + + return decodedValues; + } + + static String extractSingleAttribute(final List values) { + if (values == null || values.isEmpty()) { + return null; + } + + if (values.size() != 1) { + throw new TribbleException("Attribute has multiple values when only one expected"); + } + return values.get(0); + } + @Override public FeatureCodecHeader readHeader(LineIterator lineIterator) { List header = new ArrayList<>(); while(lineIterator.hasNext()) { String line = lineIterator.peek(); - if (line.startsWith(COMMENT_START)) { + if (line.startsWith(Gff3Constants.COMMENT_START)) { header.add(line); lineIterator.next(); } else { @@ -407,29 +473,75 @@ /** * Enum for parsing directive lines. If information in directive line needs to be parsed beyond specifying directive type, decode method should be overriden */ - enum Gff3Directive { + public enum Gff3Directive { - VERSION3_DIRECTIVE("##gff-version\\s+3(?:.\\d)*(?:\\.\\d)*$"), + VERSION3_DIRECTIVE("##gff-version\\s+3(?:\\.\\d*)*$") { + @Override + protected Object decode(final String line) throws IOException { + final String[] splitLine = line.split("\\s+"); + return splitLine[1]; + } + + @Override + String encode(final Object object) { + if (object == null) { + throw new TribbleException("Cannot encode null in VERSION3_DIRECTIVE"); + } + if (!(object instanceof String)) { + throw new TribbleException("Cannot encode object of type " + object.getClass() + " in VERSION3_DIRECTIVE"); + } + + final String versionLine = "##gff-version " + (String)object; + if (!regexPattern.matcher(versionLine).matches()) { + throw new TribbleException("Version " + (String)object + " is not a valid version"); + } + + return versionLine; + } + }, SEQUENCE_REGION_DIRECTIVE("##sequence-region\\s+.+ \\d+ \\d+$") { - private int CONTIG_INDEX = 1; - private int START_INDEX = 2; - private int END_INDEX = 3; + final private int CONTIG_INDEX = 1; + final private int START_INDEX = 2; + final private int END_INDEX = 3; @Override - public Object decode(final String line) throws IOException { + protected Object decode(final String line) throws IOException { final String[] splitLine = line.split("\\s+"); final String contig = URLDecoder.decode(splitLine[CONTIG_INDEX], "UTF-8"); final int start = Integer.parseInt(splitLine[START_INDEX]); final int end = Integer.parseInt(splitLine[END_INDEX]); return new SequenceRegion(contig, start, end); } + + @Override + String encode(final Object object) { + if (object == null) { + throw new TribbleException("Cannot encode null in SEQUENCE_REGION_DIRECTIVE"); + } + if (!(object instanceof SequenceRegion)) { + throw new TribbleException("Cannot encode object of type " + object.getClass() + " in SEQUENCE_REGION_DIRECTIVE"); + } + + final SequenceRegion sequenceRegion = (SequenceRegion) object; + return "##sequence-region " + Gff3Writer.encodeString(sequenceRegion.getContig()) + " " + sequenceRegion.getStart() + " " + sequenceRegion.getEnd(); + } }, - FLUSH_DIRECTIVE("###$"), + FLUSH_DIRECTIVE("###$") { + @Override + String encode(final Object object) { + return "###"; + } + }, - FASTA_DIRECTIVE("##FASTA$"); + FASTA_DIRECTIVE("##FASTA$") { + @Override + String encode(final Object object) { + return "##FASTA"; + } + }; - private final Pattern regexPattern; + protected final Pattern regexPattern; Gff3Directive(String regex) { this.regexPattern = Pattern.compile(regex); @@ -444,9 +556,11 @@ return null; } - public Object decode(final String line) throws IOException { + protected Object decode(final String line) throws IOException { return null; } + + abstract String encode(final Object object); } } diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Constants.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Constants.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Constants.java 1970-01-01 00:00:00.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Constants.java 2020-07-08 16:28:01.000000000 +0000 @@ -0,0 +1,13 @@ +package htsjdk.tribble.gff; + +public class Gff3Constants { + public static final char FIELD_DELIMITER = '\t'; + public static final char ATTRIBUTE_DELIMITER = ';'; + public static final char KEY_VALUE_SEPARATOR = '='; + public static final char VALUE_DELIMITER = ','; + public static final String COMMENT_START = "#"; + public static final String DIRECTIVE_START = "##"; + public static final String UNDEFINED_FIELD_VALUE = "."; + public static final String PARENT_ATTRIBUTE_KEY = "Parent"; + public final static char END_OF_LINE_CHARACTER = '\n'; +} diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3FeatureImpl.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3FeatureImpl.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3FeatureImpl.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3FeatureImpl.java 2020-07-08 16:28:01.000000000 +0000 @@ -30,12 +30,16 @@ private final Set topLevelFeatures = new HashSet<>(); public Gff3FeatureImpl(final String contig, final String source, final String type, - final int start, final int end, final Strand strand, final int phase, - final Map attributes) { - baseData = new Gff3BaseData(contig, source, type, start, end, strand, phase, attributes); + final int start, final int end, final Double score, final Strand strand, final int phase, + final Map> attributes) { + baseData = new Gff3BaseData(contig, source, type, start, end, score, strand, phase, attributes); } + public Gff3FeatureImpl(final Gff3BaseData baseData) { + this.baseData = baseData; + } + /** * Get the set of top level features from which this feature is descended * @return set of top level feature from which this feature is descended @@ -80,15 +84,15 @@ public Set getAncestors() { final List ancestors = new ArrayList<>(parents); for (final Gff3FeatureImpl parent : parents) { - ancestors.addAll(baseData.attributes.containsKey(DERIVES_FROM_ATTRIBUTE_KEY)? parent.getAncestors(baseData.attributes.get(DERIVES_FROM_ATTRIBUTE_KEY)) : parent.getAncestors()); + ancestors.addAll(getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).isEmpty()? parent.getAncestors() : parent.getAncestors(new HashSet<>(baseData.getAttributes().get(DERIVES_FROM_ATTRIBUTE_KEY)))); } return new LinkedHashSet<>(ancestors); } - private Set getAncestors(final String derivingFrom) { + private Set getAncestors(final Collection derivingFrom) { final List ancestors = new ArrayList<>(); for (final Gff3FeatureImpl parent : parents) { - if (parent.getID().equals(derivingFrom) || parent.getAncestors().stream().anyMatch(f -> f.getID().equals(derivingFrom))) { + if (derivingFrom.contains(parent.getID()) || parent.getAncestors().stream().anyMatch(f -> derivingFrom.contains(f.getID()))) { ancestors.add(parent); ancestors.addAll(parent.getAncestors()); } @@ -103,7 +107,7 @@ @Override public Set getDescendents() { final List descendants = new ArrayList<>(children); - final Set idsInLineage = new HashSet<>(Collections.singleton(baseData.id)); + final Set idsInLineage = new HashSet<>(Collections.singleton(baseData.getId())); idsInLineage.addAll(children.stream().map(Gff3Feature::getID).collect(Collectors.toSet())); for(final Gff3FeatureImpl child : children) { descendants.addAll(child.getDescendents(idsInLineage)); @@ -112,8 +116,8 @@ } private Set getDescendents(final Set idsInLineage) { - final List childrenToAdd = children.stream().filter(c -> c.getAttribute(DERIVES_FROM_ATTRIBUTE_KEY) == null || - idsInLineage.contains(c.getAttribute(DERIVES_FROM_ATTRIBUTE_KEY))). + final List childrenToAdd = children.stream().filter(c -> c.getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).isEmpty() || + !Collections.disjoint(idsInLineage, c.getAttribute(DERIVES_FROM_ATTRIBUTE_KEY))). collect(Collectors.toList()); final List descendants = new ArrayList<>(childrenToAdd); @@ -144,8 +148,8 @@ public void addParent(final Gff3FeatureImpl parent) { final Set topLevelFeaturesToAdd = new HashSet<>(parent.getTopLevelFeatures()); - if (baseData.attributes.containsKey(DERIVES_FROM_ATTRIBUTE_KEY)) { - topLevelFeaturesToAdd.removeIf(f -> !f.getID().equals(baseData.attributes.get(DERIVES_FROM_ATTRIBUTE_KEY)) && f.getDescendents().stream().noneMatch(f2 -> f2.getID()== null? false:f2.getID().equals(baseData.attributes.get(DERIVES_FROM_ATTRIBUTE_KEY)))); + if (!getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).isEmpty()) { + topLevelFeaturesToAdd.removeIf(f -> !getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).contains(f.getID()) && f.getDescendents().stream().noneMatch(f2 -> f2.getID()!= null && getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).contains(f2.getID()))); } parents.add(parent); parent.addChild(this); @@ -184,7 +188,7 @@ public void addCoFeature(final Gff3FeatureImpl coFeature) { if (!parents.equals(coFeature.getParents())) { - throw new TribbleException("Co-features " + baseData.id + " do not have same parents"); + throw new TribbleException("Co-features " + baseData.getId() + " do not have same parents"); } for (final Gff3FeatureImpl feature : coFeatures) { feature.addCoFeatureShallow(coFeature); @@ -196,8 +200,8 @@ private void addCoFeatureShallow(final Gff3FeatureImpl coFeature) { coFeatures.add(coFeature); - if (!coFeature.getID().equals(baseData.id)) { - throw new TribbleException("Attempting to add co-feature with id " + coFeature.getID() + " to feature with id " + baseData.id); + if (!coFeature.getID().equals(baseData.getId())) { + throw new TribbleException("Attempting to add co-feature with id " + coFeature.getID() + " to feature with id " + baseData.getId()); } } diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Feature.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Feature.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Feature.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Feature.java 2020-07-08 16:28:01.000000000 +0000 @@ -23,45 +23,48 @@ default String getSource() { - return getBaseData().source; + return getBaseData().getSource(); } @Override default int getEnd() { - return getBaseData().end; + return getBaseData().getEnd(); } default Strand getStrand() { - return getBaseData().strand; + return getBaseData().getStrand(); } default int getPhase() { - return getBaseData().phase; + return getBaseData().getPhase(); } - default String getType() {return getBaseData().type;} + default String getType() {return getBaseData().getType();} @Override default String getContig() { - return getBaseData().contig; + return getBaseData().getContig(); } @Override default int getStart() { - return getBaseData().start; + return getBaseData().getStart(); } - default String getAttribute(final String key) { - return getBaseData().attributes.get(key); + + default List getAttribute(final String key) { + return getBaseData().getAttribute(key); } - default Map getAttributes() { return getBaseData().attributes;} + default Map> getAttributes() { return getBaseData().getAttributes();} + + default String getID() { return getBaseData().getId();} - default String getID() { return getBaseData().id;} + default String getName() { return getBaseData().getName();} - default String getName() { return getBaseData().name;} + default List getAliases() { return getBaseData().getAliases();} - default String getAlias() { return getBaseData().alias;} + default double getScore() { return getBaseData().getScore();} /** * Get BaseData object which contains all the basic information of the feature diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Writer.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Writer.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Writer.java 1970-01-01 00:00:00.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/Gff3Writer.java 2020-07-08 16:28:01.000000000 +0000 @@ -0,0 +1,175 @@ +package htsjdk.tribble.gff; + +import htsjdk.samtools.util.BlockCompressedOutputStream; +import htsjdk.samtools.util.FileExtensions; +import htsjdk.samtools.util.IOUtil; +import htsjdk.tribble.TribbleException; + +import java.io.BufferedOutputStream; +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + + +/** + * A class to write out gff3 files. Features are added using {@link #addFeature(Gff3Feature)}, directives using {@link #addDirective(Gff3Codec.Gff3Directive)}, + * and comments using {@link #addComment(String)}. Note that the version 3 directive is automatically added at creation, so should not be added separately. + */ +public class Gff3Writer implements Closeable { + + private final OutputStream out; + private final static String version = "3.1.25"; + + public Gff3Writer(final Path path) throws IOException { + if (FileExtensions.GFF3.stream().noneMatch(e -> path.toString().endsWith(e))) { + throw new TribbleException("File " + path + " does not have extension consistent with gff3"); + } + + final OutputStream outputStream = IOUtil.hasGzipFileExtension(path)? new BlockCompressedOutputStream(path.toFile()) : Files.newOutputStream(path); + out = new BufferedOutputStream(outputStream); + //start with version directive + initialize(); + } + + public Gff3Writer(final OutputStream stream) { + out = stream; + initialize(); + } + + private void initialize() { + try { + writeWithNewLine(Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE.encode(version)); + } catch (final IOException ex) { + throw new TribbleException("Error writing version directive", ex); + } + } + + private void writeWithNewLine(final String txt) throws IOException { + out.write(txt.getBytes()); + out.write(Gff3Constants.END_OF_LINE_CHARACTER); + } + + private void tryToWrite(final String string) { + try { + out.write(string.getBytes()); + } catch (final IOException ex) { + throw new TribbleException("Error writing out string " + string, ex); + } + } + + private void writeFirstEightFields(final Gff3Feature feature) throws IOException { + writeJoinedByDelimiter(Gff3Constants.FIELD_DELIMITER, this::tryToWrite, Arrays.asList( + encodeString(feature.getContig()), + encodeString(feature.getSource()), + encodeString(feature.getType()), + Integer.toString(feature.getStart()), + Integer.toString(feature.getEnd()), + feature.getScore() < 0 ? Gff3Constants.UNDEFINED_FIELD_VALUE : Double.toString(feature.getScore()), + feature.getStrand().toString(), + feature.getPhase() < 0 ? Gff3Constants.UNDEFINED_FIELD_VALUE : Integer.toString(feature.getPhase()) + ) + ); + } + + void writeAttributes(final Map> attributes) throws IOException { + if (attributes.isEmpty()) { + out.write(Gff3Constants.UNDEFINED_FIELD_VALUE.getBytes()); + } + + writeJoinedByDelimiter(Gff3Constants.ATTRIBUTE_DELIMITER, e -> writeKeyValuePair(e.getKey(), e.getValue()), attributes.entrySet()); + } + + void writeKeyValuePair(final String key, final List values) { + try { + tryToWrite(key); + out.write(Gff3Constants.KEY_VALUE_SEPARATOR); + writeJoinedByDelimiter(Gff3Constants.VALUE_DELIMITER, v -> tryToWrite(encodeString(v)), values); + } catch (final IOException ex) { + throw new TribbleException("error writing out key value pair " + key + " " + values); + } + } + + private void writeJoinedByDelimiter(final char delimiter, final Consumer consumer, final Collection fields) throws IOException { + boolean isNotFirstField = false; + for (final T field : fields) { + if (isNotFirstField) { + out.write(delimiter); + } else { + isNotFirstField = true; + } + + consumer.accept(field); + } + } + + /*** + * add a feature + * @param feature the feature to be added + * @throws IOException + */ + public void addFeature(final Gff3Feature feature) throws IOException { + writeFirstEightFields(feature); + out.write(Gff3Constants.FIELD_DELIMITER); + writeAttributes(feature.getAttributes()); + out.write(Gff3Constants.END_OF_LINE_CHARACTER); + } + + static String encodeString(final String s) { + try { + //URLEncoder.encode is hardcoded to change all spaces to +, but we want spaces left unchanged so have to do this + //+ is escaped to %2B, so no loss of information + return URLEncoder.encode(s, "UTF-8").replace("+", " "); + } catch (final UnsupportedEncodingException ex) { + throw new TribbleException("Encoding failure", ex); + } + } + + /** + * Add a directive with an object + * @param directive the directive to be added + * @param object the object to be encoded with the directive + * @throws IOException + */ + public void addDirective(final Gff3Codec.Gff3Directive directive, final Object object) throws IOException { + if (directive == Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE) { + throw new TribbleException("VERSION3_DIRECTIVE is automatically added and should not be added manually."); + } + writeWithNewLine(directive.encode(object)); + } + + /** + * Add a directive + * @param directive the directive to be added + * @throws IOException + */ + public void addDirective(final Gff3Codec.Gff3Directive directive) throws IOException { + if (directive == Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE) { + throw new TribbleException("VERSION3_DIRECTIVE is automatically added and should not be added manually."); + } + addDirective(directive, null); + } + + /** + * Add comment line + * @param comment the comment line (not including leading #) + * @throws IOException + */ + public void addComment(final String comment) throws IOException { + out.write(Gff3Constants.COMMENT_START.getBytes()); + writeWithNewLine(comment); + } + + @Override + public void close() throws IOException { + out.close(); + } +} \ No newline at end of file diff -Nru htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/SequenceRegion.java htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/SequenceRegion.java --- htsjdk-2.22.0+dfsg/src/main/java/htsjdk/tribble/gff/SequenceRegion.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/main/java/htsjdk/tribble/gff/SequenceRegion.java 2020-07-08 16:28:01.000000000 +0000 @@ -9,7 +9,8 @@ private final int start; private final int end; private final String contig; - private boolean isCircular; + private Boolean isCircular; + private int hashCode; SequenceRegion(final String contig, final int start, final int end) { this(contig, start, end, false); @@ -20,10 +21,12 @@ this.start = start; this.end = end; this.isCircular = isCircular; + hashCode = computeHashCode(); } void setCircular(final boolean isCircular) { this.isCircular = isCircular; + hashCode = computeHashCode(); } void setCircular() { @@ -41,9 +44,28 @@ public boolean isCircular(){return isCircular;} - public boolean equals(final SequenceRegion other) { - return other.start == start && other.end==end && other.contig.equals(contig) && other.isCircular == isCircular; + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + + if (!(other instanceof SequenceRegion)) { + return false; + } + + final SequenceRegion otherSequenceRegion = (SequenceRegion) other; + return otherSequenceRegion.start == start && otherSequenceRegion.end==end && otherSequenceRegion.contig.equals(contig) && otherSequenceRegion.isCircular == isCircular; } + private int computeHashCode() { + int hash = contig.hashCode(); + hash = 31 * hash + start; + hash = 31 * hash + end; + hash = 31 * hash + isCircular.hashCode(); + return hash; + } + @Override + public int hashCode() { return hashCode;} } diff -Nru htsjdk-2.22.0+dfsg/src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java htsjdk-2.23.0+dfsg/src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java --- htsjdk-2.22.0+dfsg/src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java 2020-07-08 16:28:01.000000000 +0000 @@ -3,10 +3,11 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.util.CloserUtil; import org.testng.annotations.Test; -import static org.testng.Assert.assertEquals; import java.io.File; +import static org.testng.Assert.assertEquals; + /** * Test the fix of a bug reported by s-andrews in which the use of an arithmetic rather than a logical right shift in BinaryCigarCodec.binaryCigarToCigarElement() * causes an overflow in the CIGAR when reading a BAM file for a read that spans a very large intron. diff -Nru htsjdk-2.22.0+dfsg/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java htsjdk-2.23.0+dfsg/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java --- htsjdk-2.22.0+dfsg/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/java/htsjdk/samtools/seekablestream/SeekableStreamFactoryTest.java 2020-07-08 16:28:01.000000000 +0000 @@ -1,6 +1,7 @@ package htsjdk.samtools.seekablestream; import htsjdk.HtsjdkTest; +import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.TestUtil; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -8,13 +9,16 @@ import java.io.File; import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URISyntaxException; import java.net.URL; +import java.nio.file.Paths; public class SeekableStreamFactoryTest extends HtsjdkTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); @Test - public void testIsFilePath() throws Exception { + public void testIsFilePath() { Assert.assertEquals(SeekableStreamFactory.isFilePath("x"), true); Assert.assertEquals(SeekableStreamFactory.isFilePath(""), true); Assert.assertEquals(SeekableStreamFactory.isFilePath("http://broadinstitute.org"), false); @@ -23,7 +27,7 @@ } @DataProvider(name="getStreamForData") - public Object[][] getStreamForData() throws Exception { + public Object[][] getStreamForData() throws MalformedURLException { return new Object[][] { { new File(TEST_DATA_DIR, "BAMFileIndexTest/index_test.bam").getAbsolutePath(), new File(TEST_DATA_DIR, "BAMFileIndexTest/index_test.bam").getAbsolutePath() }, @@ -43,4 +47,29 @@ Assert.assertEquals(SeekableStreamFactory.getInstance().getStreamFor(path).getSource(), expectedPath); } + @Test + public void testPathWithEmbeddedSpace() throws IOException { + final File testBam = new File(TEST_DATA_DIR, "BAMFileIndexTest/index_test.bam"); + + //create a temp dir with a space in the name and copy the test file there + final File tempDir = IOUtil.createTempDir("test spaces", ""); + Assert.assertTrue(tempDir.getAbsolutePath().contains(" ")); + tempDir.deleteOnExit(); + final File inputBam = new File(tempDir, "index_test.bam"); + inputBam.deleteOnExit(); + IOUtil.copyFile(testBam, inputBam); + + // make sure the input string we use is URL-encoded + final String inputString = Paths.get(inputBam.getAbsolutePath()).toUri().toString(); + Assert.assertFalse(inputString.contains(" ")); + Assert.assertTrue(inputString.contains("%20")); + + try (final SeekableStream seekableStream = + SeekableStreamFactory.getInstance().getStreamFor(inputString)) { + final int BYTES_TO_READ = 10; + Assert.assertEquals(seekableStream.read(new byte[BYTES_TO_READ], 0,BYTES_TO_READ), BYTES_TO_READ); + } + + } + } diff -Nru htsjdk-2.22.0+dfsg/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java htsjdk-2.23.0+dfsg/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java --- htsjdk-2.22.0+dfsg/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java 2020-07-08 16:28:01.000000000 +0000 @@ -25,10 +25,13 @@ import htsjdk.samtools.SAMRecordSetBuilder; import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; +import org.testng.Assert; import org.testng.annotations.Test; import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.util.Arrays; @@ -311,7 +314,7 @@ EdgeReadIterator iterator = new EdgeReadIterator(builder.getSamReader(), intervals); int locusPosition = 40; while (iterator.hasNext()) { - AbstractLocusInfo next = iterator.next(); + final AbstractLocusInfo next = iterator.next(); int position = next.getPosition(); assertEquals(locusPosition++, position); if (position == 40) { @@ -333,6 +336,24 @@ assertEquals(81, locusPosition); } + @Test + public void testNoGapsInLocusAccumulator() { + final SamReader reader = SamReaderFactory.make().open(new File("src/test/resources/htsjdk/samtools/util/sliver.sam")); + final EdgeReadIterator iterator = new EdgeReadIterator(reader, null); + + AbstractLocusInfo previous = null; + int counter = 0; + while (iterator.hasNext() && (previous == null || previous.getPosition() < 1_000_000)) { + counter++; + final AbstractLocusInfo next = iterator.next(); + if (previous != null) { + Assert.assertEquals(next.getPosition(), previous.getPosition() + 1); + } + previous = next; + } + Assert.assertEquals(counter, 1_000_000); + } + /** * Test for intersecting interval for read with a deletion in the middle */ diff -Nru htsjdk-2.22.0+dfsg/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java htsjdk-2.23.0+dfsg/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java --- htsjdk-2.22.0+dfsg/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java 2020-07-08 16:28:01.000000000 +0000 @@ -23,7 +23,6 @@ */ package htsjdk.samtools.util; - import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; @@ -32,18 +31,16 @@ import org.testng.annotations.Test; /** - * * @author Mariia_Zueva@epam.com, EPAM Systems, Inc. - * */ public class EdgingRecordAndOffsetTest extends HtsjdkTest { - private final byte[] qualities = {30, 50, 50, 60, 60, 70 ,70, 70, 80, 90}; + private final byte[] qualities = {30, 50, 50, 60, 60, 70, 70, 70, 80, 90}; private final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C'}; private SAMRecord record; @BeforeTest - public void setUp(){ + public void setUp() { record = new SAMRecord(new SAMFileHeader()); record.setReadName("testRecord"); record.setReadBases(bases); @@ -51,7 +48,7 @@ } @Test - public void testConstructor(){ + public void testConstructor() { EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3); Assert.assertEquals(qualities, typedRecordAndOffset.getBaseQualities()); Assert.assertEquals(bases, typedRecordAndOffset.getRecord().getReadBases()); @@ -62,7 +59,7 @@ } @Test - public void testGetSetStart(){ + public void testGetSetStart() { EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3); EdgingRecordAndOffset typedRecordAndOffsetEnd = EdgingRecordAndOffset.createEndRecord(typedRecordAndOffset); Assert.assertEquals(typedRecordAndOffset, typedRecordAndOffsetEnd.getStart()); @@ -70,7 +67,7 @@ } @Test - public void testNotEqualsTypedRecords(){ + public void testNotEqualsTypedRecords() { EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3); EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 5, 10, 3); Assert.assertNotEquals(typedRecordAndOffset.getBaseQuality(), secondEdgingRecordAndOffset.getBaseQuality()); @@ -78,14 +75,14 @@ } @Test - public void testGetOffset(){ + public void testGetOffset() { EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 5, 10, 3); Assert.assertEquals(70, secondEdgingRecordAndOffset.getBaseQuality()); Assert.assertEquals('C', secondEdgingRecordAndOffset.getReadBase()); } @Test - public void testGetQualityAtPosition(){ + public void testGetQualityAtPosition() { EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 1); Assert.assertEquals(50, secondEdgingRecordAndOffset.getBaseQuality(2)); } diff -Nru htsjdk-2.22.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3CodecTest.java htsjdk-2.23.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3CodecTest.java --- htsjdk-2.22.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3CodecTest.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3CodecTest.java 2020-07-08 16:28:01.000000000 +0000 @@ -4,6 +4,7 @@ import htsjdk.HtsjdkTest; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.TestUtils; +import htsjdk.tribble.TribbleException; import htsjdk.tribble.annotation.Strand; import htsjdk.tribble.readers.LineIterator; import org.testng.Assert; @@ -14,6 +15,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -67,6 +69,24 @@ Assert.assertEquals(countTotalFeatures, expectedTotalFeatures); } + @Test(dataProvider = "basicDecodeDataProvider") + public void basicShallowDecodeTest(final Path inputGff3, final int expectedTotalFeatures) throws IOException { + Assert.assertTrue((new Gff3Codec(Gff3Codec.DecodeDepth.SHALLOW)).canDecode(inputGff3.toAbsolutePath().toString())); + final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(inputGff3.toAbsolutePath().toString(), null, new Gff3Codec(Gff3Codec.DecodeDepth.SHALLOW), false); + int countTotalFeatures = 0; + for (final Gff3Feature feature : reader.iterator()) { + countTotalFeatures++; + //shouldn't have any children, parents, or cofeatures + Assert.assertEquals(feature.getAncestors().size(), 0); + Assert.assertEquals(feature.getDescendents().size(), 0); + Assert.assertEquals(feature.getCoFeatures().size(), 0); + } + + Assert.assertEquals(countTotalFeatures, expectedTotalFeatures); + + + } + @DataProvider(name = "testGZippedDataProvider") Object[][] testGZippedDataProvider(){ return new Object[][] { @@ -86,7 +106,6 @@ final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(inputGff3.toAbsolutePath().toString(), null, new Gff3Codec(), false); final AbstractFeatureReader readerGZipped = AbstractFeatureReader.getFeatureReader(inputGff3GZipped.toAbsolutePath().toString(), null, new Gff3Codec(), false); - final Set topLevelFeatures = new HashSet<>(); final Set topLevelFeaturesGZipped = new HashSet<>(); @@ -101,6 +120,34 @@ Assert.assertEquals(topLevelFeatures, topLevelFeaturesGZipped); } + @Test(dataProvider = "testGZippedDataProvider") + public void testGZippedShallow(final Path inputGff3, final Path inputGff3GZipped) throws IOException { + Assert.assertTrue((new Gff3Codec()).canDecode(inputGff3.toAbsolutePath().toString())); + Assert.assertTrue((new Gff3Codec()).canDecode(inputGff3GZipped.toAbsolutePath().toString())); + + final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(inputGff3.toAbsolutePath().toString(), null, new Gff3Codec(Gff3Codec.DecodeDepth.SHALLOW), false); + final AbstractFeatureReader readerGZipped = AbstractFeatureReader.getFeatureReader(inputGff3GZipped.toAbsolutePath().toString(), null, new Gff3Codec(Gff3Codec.DecodeDepth.SHALLOW), false); + + final Set features = new HashSet<>(); + final Set featuresGZipped = new HashSet<>(); + + for (final Gff3Feature feature : reader.iterator()) { + features.add(feature); + Assert.assertEquals(feature.getAncestors().size(), 0); + Assert.assertEquals(feature.getDescendents().size(), 0); + Assert.assertEquals(feature.getCoFeatures().size(), 0); + } + + for (final Gff3Feature feature : readerGZipped.iterator()) { + featuresGZipped.add(feature); + Assert.assertEquals(feature.getAncestors().size(), 0); + Assert.assertEquals(feature.getDescendents().size(), 0); + Assert.assertEquals(feature.getCoFeatures().size(), 0); + } + + Assert.assertEquals(features, featuresGZipped); + } + @DataProvider(name = "sequenceRegionValidationDataProvider") Object[][] sequenceRegionValidationDataProvider() { return new Object[][] { @@ -132,7 +179,7 @@ Assert.assertEquals(feature.getSource(), "a source & also a str*)%nge source"); Assert.assertEquals(feature.getType(), "a region"); Assert.assertEquals(feature.getID(), "this is the ID of this wacky feature^&%##$%*&>,. ,."); - Assert.assertEquals(feature.getAttribute("Another key"), "Another=value"); + Assert.assertEquals(feature.getAttribute("Another key"), Arrays.asList("Another=value", "And a second, value")); } @@ -151,113 +198,113 @@ final Set canonicalGeneFeatures = new HashSet<>(); - final Gff3FeatureImpl canonicalGene_gene00001 = new Gff3FeatureImpl("ctg123", ".", "gene", 1000, 9000, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene00001", "Name", "EDEN")); + final Gff3FeatureImpl canonicalGene_gene00001 = new Gff3FeatureImpl("ctg123", ".", "gene", 1000, 9000, 1030d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene00001"), "Name", Collections.singletonList("EDEN"))); canonicalGeneFeatures.add(canonicalGene_gene00001); - final Gff3FeatureImpl canonicalGene_tfbs00001 = new Gff3FeatureImpl("ctg123", ".", "TF_binding_site", 1000, 1012, Strand.POSITIVE, -1, ImmutableMap.of("ID", "tfbs00001", "Parent", "gene00001")); + final Gff3FeatureImpl canonicalGene_tfbs00001 = new Gff3FeatureImpl("ctg123", ".", "TF_binding_site", 1000, 1012, 0.999d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("tfbs00001"), "Parent", Collections.singletonList("gene00001"))); canonicalGene_tfbs00001.addParent(canonicalGene_gene00001); canonicalGeneFeatures.add(canonicalGene_tfbs00001); - final Gff3FeatureImpl canonicalGene_mRNA00001 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 1050, 9000, Strand.POSITIVE, -1, ImmutableMap.of("ID", "mRNA00001", "Name", "EDEN.1", "Parent", "gene00001")); + final Gff3FeatureImpl canonicalGene_mRNA00001 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 1050, 9000, 1.37d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("mRNA00001"), "Name", Collections.singletonList("EDEN.1"), "Parent", Collections.singletonList("gene00001"))); canonicalGene_mRNA00001.addParent(canonicalGene_gene00001); canonicalGeneFeatures.add(canonicalGene_mRNA00001); - final Gff3FeatureImpl canonicalGene_mRNA00002 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 1050, 9000, Strand.POSITIVE, -1, ImmutableMap.of("ID", "mRNA00002", "Name", "EDEN.2", "Parent", "gene00001")); + final Gff3FeatureImpl canonicalGene_mRNA00002 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 1050, 9000, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("mRNA00002"), "Name", Collections.singletonList("EDEN.2"), "Parent", Collections.singletonList("gene00001"))); canonicalGene_mRNA00002.addParent(canonicalGene_gene00001); canonicalGeneFeatures.add(canonicalGene_mRNA00002); - final Gff3FeatureImpl canonicalGene_mRNA00003 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 1300, 9000, Strand.POSITIVE, -1, ImmutableMap.of("ID", "mRNA00003", "Name", "EDEN.3", "Parent", "gene00001")); + final Gff3FeatureImpl canonicalGene_mRNA00003 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 1300, 9000, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("mRNA00003"), "Name", Collections.singletonList("EDEN.3"), "Parent", Collections.singletonList("gene00001"))); canonicalGene_mRNA00003.addParent(canonicalGene_gene00001); canonicalGeneFeatures.add(canonicalGene_mRNA00003); - final Gff3FeatureImpl canonicalGene_exon00001 = new Gff3FeatureImpl("ctg123", ".", "exon", 1300, 1500, Strand.POSITIVE, -1, ImmutableMap.of("ID", "exon00001", "Parent", "mRNA00003")); + final Gff3FeatureImpl canonicalGene_exon00001 = new Gff3FeatureImpl("ctg123", ".", "exon", 1300, 1500, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("exon00001"), "Parent", Collections.singletonList("mRNA00003"))); canonicalGene_exon00001.addParent(canonicalGene_mRNA00003); canonicalGeneFeatures.add(canonicalGene_exon00001); - final Gff3FeatureImpl canonicalGene_exon00002 = new Gff3FeatureImpl("ctg123", ".", "exon", 1050, 1500, Strand.POSITIVE, -1, ImmutableMap.of("ID", "exon00002", "Parent", "mRNA00001,mRNA00002")); + final Gff3FeatureImpl canonicalGene_exon00002 = new Gff3FeatureImpl("ctg123", ".", "exon", 1050, 1500, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("exon00002"), "Parent", Arrays.asList("mRNA00001", "mRNA00002"))); canonicalGene_exon00002.addParent(canonicalGene_mRNA00001); canonicalGene_exon00002.addParent(canonicalGene_mRNA00002); canonicalGeneFeatures.add(canonicalGene_exon00002); - final Gff3FeatureImpl canonicalGene_exon00003 = new Gff3FeatureImpl("ctg123", ".", "exon", 3000, 3902, Strand.POSITIVE, -1, ImmutableMap.of("ID", "exon00003", "Parent", "mRNA00001,mRNA00003")); + final Gff3FeatureImpl canonicalGene_exon00003 = new Gff3FeatureImpl("ctg123", ".", "exon", 3000, 3902, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("exon00003"), "Parent", Arrays.asList("mRNA00001", "mRNA00003"))); canonicalGene_exon00003.addParent(canonicalGene_mRNA00001); canonicalGene_exon00003.addParent(canonicalGene_mRNA00003); canonicalGeneFeatures.add(canonicalGene_exon00003); - final Gff3FeatureImpl canonicalGene_exon00004 = new Gff3FeatureImpl("ctg123", ".", "exon", 5000, 5500, Strand.POSITIVE, -1, ImmutableMap.of("ID", "exon00004", "Parent", "mRNA00001,mRNA00002,mRNA00003")); + final Gff3FeatureImpl canonicalGene_exon00004 = new Gff3FeatureImpl("ctg123", ".", "exon", 5000, 5500, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("exon00004"), "Parent", Arrays.asList("mRNA00001", "mRNA00002", "mRNA00003"))); canonicalGene_exon00004.addParent(canonicalGene_mRNA00001); canonicalGene_exon00004.addParent(canonicalGene_mRNA00002); canonicalGene_exon00004.addParent(canonicalGene_mRNA00003); canonicalGeneFeatures.add(canonicalGene_exon00004); - final Gff3FeatureImpl canonicalGene_exon00005 = new Gff3FeatureImpl("ctg123", ".", "exon", 7000, 9000, Strand.POSITIVE, -1, ImmutableMap.of("ID", "exon00005", "Parent", "mRNA00001,mRNA00002,mRNA00003")); + final Gff3FeatureImpl canonicalGene_exon00005 = new Gff3FeatureImpl("ctg123", ".", "exon", 7000, 9000, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("exon00005"), "Parent", Arrays.asList("mRNA00001", "mRNA00002", "mRNA00003"))); canonicalGene_exon00005.addParent(canonicalGene_mRNA00001); canonicalGene_exon00005.addParent(canonicalGene_mRNA00002); canonicalGene_exon00005.addParent(canonicalGene_mRNA00003); canonicalGeneFeatures.add(canonicalGene_exon00005); - final Gff3FeatureImpl canonicalGene_cds00001_1 = new Gff3FeatureImpl("ctg123", ".", "CDS", 1201, 1500, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00001", "Parent", "mRNA00001", "Name", "edenprotein.1")); + final Gff3FeatureImpl canonicalGene_cds00001_1 = new Gff3FeatureImpl("ctg123", ".", "CDS", 1201, 1500, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00001"), "Parent", Collections.singletonList("mRNA00001"), "Name", Collections.singletonList("edenprotein.1"))); canonicalGene_cds00001_1.addParent(canonicalGene_mRNA00001); canonicalGeneFeatures.add(canonicalGene_cds00001_1); - final Gff3FeatureImpl canonicalGene_cds00001_2 = new Gff3FeatureImpl("ctg123", ".", "CDS", 3000, 3902, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00001", "Parent", "mRNA00001", "Name", "edenprotein.1")); + final Gff3FeatureImpl canonicalGene_cds00001_2 = new Gff3FeatureImpl("ctg123", ".", "CDS", 3000, 3902, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00001"), "Parent", Collections.singletonList("mRNA00001"), "Name", Collections.singletonList("edenprotein.1"))); canonicalGene_cds00001_2.addParent(canonicalGene_mRNA00001); canonicalGene_cds00001_2.addCoFeature(canonicalGene_cds00001_1); canonicalGeneFeatures.add(canonicalGene_cds00001_2); - final Gff3FeatureImpl canonicalGene_cds00001_3 = new Gff3FeatureImpl("ctg123", ".", "CDS", 5000, 5500, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00001", "Parent", "mRNA00001", "Name", "edenprotein.1")); + final Gff3FeatureImpl canonicalGene_cds00001_3 = new Gff3FeatureImpl("ctg123", ".", "CDS", 5000, 5500, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00001"), "Parent", Collections.singletonList("mRNA00001"), "Name", Collections.singletonList("edenprotein.1"))); canonicalGene_cds00001_3.addParent(canonicalGene_mRNA00001); canonicalGene_cds00001_3.addCoFeature(canonicalGene_cds00001_1); canonicalGene_cds00001_3.addCoFeature(canonicalGene_cds00001_2); canonicalGeneFeatures.add(canonicalGene_cds00001_3); - final Gff3FeatureImpl canonicalGene_cds00001_4 = new Gff3FeatureImpl("ctg123", ".", "CDS", 7000, 7600, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00001", "Parent", "mRNA00001", "Name", "edenprotein.1")); + final Gff3FeatureImpl canonicalGene_cds00001_4 = new Gff3FeatureImpl("ctg123", ".", "CDS", 7000, 7600, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00001"), "Parent", Collections.singletonList("mRNA00001"), "Name", Collections.singletonList("edenprotein.1"))); canonicalGene_cds00001_4.addParent(canonicalGene_mRNA00001); canonicalGene_cds00001_4.addCoFeature(canonicalGene_cds00001_1); canonicalGene_cds00001_4.addCoFeature(canonicalGene_cds00001_2); canonicalGene_cds00001_4.addCoFeature(canonicalGene_cds00001_3); canonicalGeneFeatures.add(canonicalGene_cds00001_4); - final Gff3FeatureImpl canonicalGene_cds00002_1 = new Gff3FeatureImpl("ctg123", ".", "CDS", 1201, 1500, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00002", "Parent", "mRNA00002", "Name", "edenprotein.2")); + final Gff3FeatureImpl canonicalGene_cds00002_1 = new Gff3FeatureImpl("ctg123", ".", "CDS", 1201, 1500, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00002"), "Parent", Collections.singletonList("mRNA00002"), "Name", Collections.singletonList("edenprotein.2"))); canonicalGene_cds00002_1.addParent(canonicalGene_mRNA00002); canonicalGeneFeatures.add(canonicalGene_cds00002_1); - final Gff3FeatureImpl canonicalGene_cds00002_2 = new Gff3FeatureImpl("ctg123", ".", "CDS", 5000, 5500, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00002", "Parent", "mRNA00002", "Name", "edenprotein.2")); + final Gff3FeatureImpl canonicalGene_cds00002_2 = new Gff3FeatureImpl("ctg123", ".", "CDS", 5000, 5500, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00002"), "Parent", Collections.singletonList("mRNA00002"), "Name", Collections.singletonList("edenprotein.2"))); canonicalGene_cds00002_2.addParent(canonicalGene_mRNA00002); canonicalGene_cds00002_2.addCoFeature(canonicalGene_cds00002_1); canonicalGeneFeatures.add(canonicalGene_cds00002_2); - final Gff3FeatureImpl canonicalGene_cds00002_3 = new Gff3FeatureImpl("ctg123", ".", "CDS", 7000, 7600, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00002", "Parent", "mRNA00002", "Name", "edenprotein.2")); + final Gff3FeatureImpl canonicalGene_cds00002_3 = new Gff3FeatureImpl("ctg123", ".", "CDS", 7000, 7600, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00002"), "Parent", Collections.singletonList("mRNA00002"), "Name", Collections.singletonList("edenprotein.2"))); canonicalGene_cds00002_3.addParent(canonicalGene_mRNA00002); canonicalGene_cds00002_3.addCoFeature(canonicalGene_cds00002_1); canonicalGene_cds00002_3.addCoFeature(canonicalGene_cds00002_2); canonicalGeneFeatures.add(canonicalGene_cds00002_3); - final Gff3FeatureImpl canonicalGene_cds00003_1 = new Gff3FeatureImpl("ctg123", ".", "CDS", 3301, 3902, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00003", "Parent", "mRNA00003", "Name", "edenprotein.3")); + final Gff3FeatureImpl canonicalGene_cds00003_1 = new Gff3FeatureImpl("ctg123", ".", "CDS", 3301, 3902, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00003"), "Parent", Collections.singletonList("mRNA00003"), "Name", Collections.singletonList("edenprotein.3"))); canonicalGene_cds00003_1.addParent(canonicalGene_mRNA00003); canonicalGeneFeatures.add(canonicalGene_cds00003_1); - final Gff3FeatureImpl canonicalGene_cds00003_2 = new Gff3FeatureImpl("ctg123", ".", "CDS", 5000, 5500, Strand.POSITIVE, 1, ImmutableMap.of("ID", "cds00003", "Parent", "mRNA00003", "Name", "edenprotein.3")); + final Gff3FeatureImpl canonicalGene_cds00003_2 = new Gff3FeatureImpl("ctg123", ".", "CDS", 5000, 5500, -1d, Strand.POSITIVE, 1, ImmutableMap.of("ID", Collections.singletonList("cds00003"), "Parent", Collections.singletonList("mRNA00003"), "Name", Collections.singletonList("edenprotein.3"))); canonicalGene_cds00003_2.addParent(canonicalGene_mRNA00003); canonicalGene_cds00003_2.addCoFeature(canonicalGene_cds00003_1); canonicalGeneFeatures.add(canonicalGene_cds00003_2); - final Gff3FeatureImpl canonicalGene_cds00003_3 = new Gff3FeatureImpl("ctg123", ".", "CDS", 7000, 7600, Strand.POSITIVE, 1, ImmutableMap.of("ID", "cds00003", "Parent", "mRNA00003", "Name", "edenprotein.3")); + final Gff3FeatureImpl canonicalGene_cds00003_3 = new Gff3FeatureImpl("ctg123", ".", "CDS", 7000, 7600, -1d, Strand.POSITIVE, 1, ImmutableMap.of("ID", Collections.singletonList("cds00003"), "Parent", Collections.singletonList("mRNA00003"), "Name", Collections.singletonList("edenprotein.3"))); canonicalGene_cds00003_3.addParent(canonicalGene_mRNA00003); canonicalGene_cds00003_3.addCoFeature(canonicalGene_cds00003_1); canonicalGene_cds00003_3.addCoFeature(canonicalGene_cds00003_2); canonicalGeneFeatures.add(canonicalGene_cds00003_3); - final Gff3FeatureImpl canonicalGene_cds00004_1 = new Gff3FeatureImpl("ctg123", ".", "CDS", 3391, 3902, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds00004", "Parent", "mRNA00003", "Name", "edenprotein.4")); + final Gff3FeatureImpl canonicalGene_cds00004_1 = new Gff3FeatureImpl("ctg123", ".", "CDS", 3391, 3902, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds00004"), "Parent", Collections.singletonList("mRNA00003"), "Name", Collections.singletonList("edenprotein.4"))); canonicalGene_cds00004_1.addParent(canonicalGene_mRNA00003); canonicalGeneFeatures.add(canonicalGene_cds00004_1); - final Gff3FeatureImpl canonicalGene_cds00004_2 = new Gff3FeatureImpl("ctg123", ".", "CDS", 5000, 5500, Strand.POSITIVE, 1, ImmutableMap.of("ID", "cds00004", "Parent", "mRNA00003", "Name", "edenprotein.4")); + final Gff3FeatureImpl canonicalGene_cds00004_2 = new Gff3FeatureImpl("ctg123", ".", "CDS", 5000, 5500, -1d, Strand.POSITIVE, 1, ImmutableMap.of("ID", Collections.singletonList("cds00004"), "Parent", Collections.singletonList("mRNA00003"), "Name", Collections.singletonList("edenprotein.4"))); canonicalGene_cds00004_2.addParent(canonicalGene_mRNA00003); canonicalGene_cds00004_2.addCoFeature(canonicalGene_cds00004_1); canonicalGeneFeatures.add(canonicalGene_cds00004_2); - final Gff3FeatureImpl canonicalGene_cds00004_3 = new Gff3FeatureImpl("ctg123", ".", "CDS", 7000, 7600, Strand.POSITIVE, 1, ImmutableMap.of("ID", "cds00004", "Parent", "mRNA00003", "Name", "edenprotein.4")); + final Gff3FeatureImpl canonicalGene_cds00004_3 = new Gff3FeatureImpl("ctg123", ".", "CDS", 7000, 7600, -1d, Strand.POSITIVE, 1, ImmutableMap.of("ID", Collections.singletonList("cds00004"), "Parent", Collections.singletonList("mRNA00003"), "Name", Collections.singletonList("edenprotein.4"))); canonicalGene_cds00004_3.addParent(canonicalGene_mRNA00003); canonicalGene_cds00004_3.addCoFeature(canonicalGene_cds00004_1); canonicalGene_cds00004_3.addCoFeature(canonicalGene_cds00004_2); @@ -271,40 +318,40 @@ final Set polycisctronicTranscriptFeatures = new HashSet<>(); - final Gff3FeatureImpl polycistronicTranscript_gene01 = new Gff3FeatureImpl("chrX", ".", "gene", 100, 200, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene01", "name", "resA")); - final Gff3FeatureImpl polycistronicTranscript_gene02 = new Gff3FeatureImpl("chrX", ".", "gene", 250, 350, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene02", "name", "resB")); - final Gff3FeatureImpl polycistronicTranscript_gene03 = new Gff3FeatureImpl("chrX", ".", "gene", 400, 500, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene03", "name", "resX")); - final Gff3FeatureImpl polycistronicTranscript_gene04 = new Gff3FeatureImpl("chrX", ".", "gene", 550, 650, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene04", "name", "resZ")); + final Gff3FeatureImpl polycistronicTranscript_gene01 = new Gff3FeatureImpl("chrX", ".", "gene", 100, 200, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"), "name", Collections.singletonList("resA"))); + final Gff3FeatureImpl polycistronicTranscript_gene02 = new Gff3FeatureImpl("chrX", ".", "gene", 250, 350, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene02"), "name", Collections.singletonList("resB"))); + final Gff3FeatureImpl polycistronicTranscript_gene03 = new Gff3FeatureImpl("chrX", ".", "gene", 400, 500, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene03"), "name", Collections.singletonList("resX"))); + final Gff3FeatureImpl polycistronicTranscript_gene04 = new Gff3FeatureImpl("chrX", ".", "gene", 550, 650, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene04"), "name", Collections.singletonList("resZ"))); polycisctronicTranscriptFeatures.add(polycistronicTranscript_gene01); polycisctronicTranscriptFeatures.add(polycistronicTranscript_gene02); polycisctronicTranscriptFeatures.add(polycistronicTranscript_gene03); polycisctronicTranscriptFeatures.add(polycistronicTranscript_gene04); - final Gff3FeatureImpl polycistronicTranscript_mRNA = new Gff3FeatureImpl("chrX", ".", "mRNA", 100, 650, Strand.POSITIVE, -1, ImmutableMap.of("ID", "tran01", "Parent", "gene01,gene02,gene03,gene04")); + final Gff3FeatureImpl polycistronicTranscript_mRNA = new Gff3FeatureImpl("chrX", ".", "mRNA", 100, 650, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("tran01"), "Parent", Arrays.asList("gene01", "gene02", "gene03", "gene04"))); polycistronicTranscript_mRNA.addParent(polycistronicTranscript_gene01); polycistronicTranscript_mRNA.addParent(polycistronicTranscript_gene02); polycistronicTranscript_mRNA.addParent(polycistronicTranscript_gene03); polycistronicTranscript_mRNA.addParent(polycistronicTranscript_gene04); polycisctronicTranscriptFeatures.add(polycistronicTranscript_mRNA); - final Gff3FeatureImpl polycistronicTranscript_exon = new Gff3FeatureImpl("chrX", ".", "exon", 100, 650, Strand.POSITIVE, -1, ImmutableMap.of("Parent", "tran01")); + final Gff3FeatureImpl polycistronicTranscript_exon = new Gff3FeatureImpl("chrX", ".", "exon", 100, 650, -1d, Strand.POSITIVE, -1, ImmutableMap.of("Parent", Collections.singletonList("tran01"))); polycistronicTranscript_exon.addParent(polycistronicTranscript_mRNA); polycisctronicTranscriptFeatures.add(polycistronicTranscript_exon); - final Gff3FeatureImpl polycistronicTranscript_CDS1 = new Gff3FeatureImpl("chrX", ".", "CDS", 100, 200, Strand.POSITIVE, 0, ImmutableMap.of("Parent", "tran01", "Derives_from", "gene01")); + final Gff3FeatureImpl polycistronicTranscript_CDS1 = new Gff3FeatureImpl("chrX", ".", "CDS", 100, 200, -1d, Strand.POSITIVE, 0, ImmutableMap.of("Parent", Collections.singletonList("tran01"), "Derives_from", Collections.singletonList("gene01"))); polycistronicTranscript_CDS1.addParent(polycistronicTranscript_mRNA); polycisctronicTranscriptFeatures.add(polycistronicTranscript_CDS1); - final Gff3FeatureImpl polycistronicTranscript_CDS2 = new Gff3FeatureImpl("chrX", ".", "CDS", 250, 350, Strand.POSITIVE, 0, ImmutableMap.of("Parent", "tran01", "Derives_from", "gene02")); + final Gff3FeatureImpl polycistronicTranscript_CDS2 = new Gff3FeatureImpl("chrX", ".", "CDS", 250, 350, -1d, Strand.POSITIVE, 0, ImmutableMap.of("Parent", Collections.singletonList("tran01"), "Derives_from", Collections.singletonList("gene02"))); polycistronicTranscript_CDS2.addParent(polycistronicTranscript_mRNA); polycisctronicTranscriptFeatures.add(polycistronicTranscript_CDS2); - final Gff3FeatureImpl polycistronicTranscript_CDS3 = new Gff3FeatureImpl("chrX", ".", "CDS", 400, 500, Strand.POSITIVE, 0, ImmutableMap.of("Parent", "tran01", "Derives_from", "gene03")); + final Gff3FeatureImpl polycistronicTranscript_CDS3 = new Gff3FeatureImpl("chrX", ".", "CDS", 400, 500, -1d, Strand.POSITIVE, 0, ImmutableMap.of("Parent", Collections.singletonList("tran01"), "Derives_from", Collections.singletonList("gene03"))); polycistronicTranscript_CDS3.addParent(polycistronicTranscript_mRNA); polycisctronicTranscriptFeatures.add(polycistronicTranscript_CDS3); - final Gff3FeatureImpl polycistronicTranscript_CDS4 = new Gff3FeatureImpl("chrX", ".", "CDS", 550, 650, Strand.POSITIVE, 0, ImmutableMap.of("Parent", "tran01", "Derives_from", "gene04")); + final Gff3FeatureImpl polycistronicTranscript_CDS4 = new Gff3FeatureImpl("chrX", ".", "CDS", 550, 650, -1d, Strand.POSITIVE, 0, ImmutableMap.of("Parent", Collections.singletonList("tran01"), "Derives_from", Collections.singletonList("gene04"))); polycistronicTranscript_CDS4.addParent(polycistronicTranscript_mRNA); polycisctronicTranscriptFeatures.add(polycistronicTranscript_CDS4); @@ -316,23 +363,23 @@ final Set programmedFrameshiftFeatures = new HashSet<>(); - final Gff3FeatureImpl programmedFrameshift_gene = new Gff3FeatureImpl("chrX", ".", "gene", 100, 200, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene01", "name", "my_gene")); + final Gff3FeatureImpl programmedFrameshift_gene = new Gff3FeatureImpl("chrX", ".", "gene", 100, 200, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"), "name", Collections.singletonList("my_gene"))); programmedFrameshiftFeatures.add(programmedFrameshift_gene); - final Gff3FeatureImpl programmedFrameshift_mRNA = new Gff3FeatureImpl("chrX", ".", "mRNA", 100, 200, Strand.POSITIVE, -1, ImmutableMap.of("ID", "tran01", "Parent", "gene01", "Ontology_term", "SO:1000069")); + final Gff3FeatureImpl programmedFrameshift_mRNA = new Gff3FeatureImpl("chrX", ".", "mRNA", 100, 200, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("tran01"), "Parent", Collections.singletonList("gene01"), "Ontology_term", Collections.singletonList("SO:1000069"))); programmedFrameshift_mRNA.addParent(programmedFrameshift_gene); programmedFrameshiftFeatures.add(programmedFrameshift_mRNA); - final Gff3FeatureImpl programmedFrameshift_exon = new Gff3FeatureImpl("chrX", ".", "exon", 100, 200, Strand.POSITIVE, -1, ImmutableMap.of("ID", "exon01", "Parent", "tran01")); + final Gff3FeatureImpl programmedFrameshift_exon = new Gff3FeatureImpl("chrX", ".", "exon", 100, 200, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("exon01"), "Parent", Collections.singletonList("tran01"))); programmedFrameshift_exon.addParent(programmedFrameshift_mRNA); programmedFrameshiftFeatures.add(programmedFrameshift_exon); - final Gff3FeatureImpl programmedFrameshift_CDS1_1 = new Gff3FeatureImpl("chrX", ".", "CDS", 100, 150, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds01", "Parent", "tran01")); + final Gff3FeatureImpl programmedFrameshift_CDS1_1 = new Gff3FeatureImpl("chrX", ".", "CDS", 100, 150, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"), "Parent",Collections.singletonList( "tran01"))); programmedFrameshift_CDS1_1.addParent(programmedFrameshift_mRNA); programmedFrameshiftFeatures.add(programmedFrameshift_CDS1_1); - final Gff3FeatureImpl programmedFrameshift_CDS1_2 = new Gff3FeatureImpl("chrX", ".", "CDS", 149, 200, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds01", "Parent", "tran01")); + final Gff3FeatureImpl programmedFrameshift_CDS1_2 = new Gff3FeatureImpl("chrX", ".", "CDS", 149, 200, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"), "Parent", Collections.singletonList("tran01"))); programmedFrameshift_CDS1_2.addParent(programmedFrameshift_mRNA); programmedFrameshift_CDS1_2.addCoFeature(programmedFrameshift_CDS1_1); programmedFrameshiftFeatures.add(programmedFrameshift_CDS1_2); @@ -345,17 +392,17 @@ final Set multipleGenesFeatures = new HashSet<>(); - final Gff3FeatureImpl multipleGenes_gene1 = new Gff3FeatureImpl("ctg123", ".", "gene", 1000, 1500, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene00001")); + final Gff3FeatureImpl multipleGenes_gene1 = new Gff3FeatureImpl("ctg123", ".", "gene", 1000, 1500, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene00001"))); multipleGenesFeatures.add(multipleGenes_gene1); - final Gff3FeatureImpl multipleGenes_mRNA1 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 1050, 1400, Strand.POSITIVE, -1, ImmutableMap.of("Parent", "gene00001")); + final Gff3FeatureImpl multipleGenes_mRNA1 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 1050, 1400, -1d, Strand.POSITIVE, -1, ImmutableMap.of("Parent", Collections.singletonList("gene00001"))); multipleGenes_mRNA1.addParent(multipleGenes_gene1); multipleGenesFeatures.add(multipleGenes_mRNA1); - final Gff3FeatureImpl multipleGenes_gene2 = new Gff3FeatureImpl("ctg123", ".", "gene", 2000, 2500, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene00002")); + final Gff3FeatureImpl multipleGenes_gene2 = new Gff3FeatureImpl("ctg123", ".", "gene", 2000, 2500, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene00002"))); multipleGenesFeatures.add(multipleGenes_gene2); - final Gff3FeatureImpl multipleGenes_mRNA2 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 2050, 2400, Strand.POSITIVE, -1, ImmutableMap.of("Parent", "gene00002")); + final Gff3FeatureImpl multipleGenes_mRNA2 = new Gff3FeatureImpl("ctg123", ".", "mRNA", 2050, 2400, -1d, Strand.POSITIVE, -1, ImmutableMap.of("Parent", Collections.singletonList("gene00002"))); multipleGenes_mRNA2.addParent(multipleGenes_gene2); multipleGenesFeatures.add(multipleGenes_mRNA2); @@ -368,7 +415,6 @@ @Test(dataProvider = "examplesDataProvider") public void examplesTest(final String inputGff, final Set expectedFeatures) throws IOException { final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(inputGff, null, new Gff3Codec(), false); - int observedTopLevelFeatures = 0; int observedFeatures = 0; for (final Gff3Feature feature : reader.iterator()) { @@ -378,4 +424,98 @@ Assert.assertEquals(observedFeatures, expectedFeatures.size()); } + + @DataProvider(name = "directiveDataProvider") + public Object[][] directiveDataProvider() { + return new Object[][] { + {"##gff-version 3.1.25", Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE, "3.1.25"}, + {"##gff-version 3.7", Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE, "3.7"}, + {"##gff-version 3", Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE, "3"}, + {"##gff-version 3.112.25.4.2", Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE, "3.112.25.4.2"}, + {"##gff-version 2.7", null, null}, + {"##sequence-region chr10 250 277", Gff3Codec.Gff3Directive.SEQUENCE_REGION_DIRECTIVE, new SequenceRegion("chr10", 250, 277)}, + {"###", Gff3Codec.Gff3Directive.FLUSH_DIRECTIVE, null}, + {"####", null, null}, + {"##FASTA", Gff3Codec.Gff3Directive.FASTA_DIRECTIVE, null} + }; + } + + @Test(dataProvider = "directiveDataProvider") + public void directiveTest(final String line, final Gff3Codec.Gff3Directive expectedDirectiveType, final Object expectedDecodedDirective) throws IOException { + final Gff3Codec.Gff3Directive directive = Gff3Codec.Gff3Directive.toDirective(line); + Assert.assertEquals(directive, expectedDirectiveType); + if (directive != null) { + Assert.assertEquals(directive.decode(line), expectedDecodedDirective); + if (expectedDecodedDirective != null) { + Assert.assertEquals(directive.encode(expectedDecodedDirective), line); + } + } + } + + @DataProvider(name = "directiveEncodingDataProvider") + public Object [][] directiveEncodingDataProvider() { + return new Object[][] { + {Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE, "3.1.3", "##gff-version 3.1.3"}, + {Gff3Codec.Gff3Directive.SEQUENCE_REGION_DIRECTIVE, new SequenceRegion("theContig", 101, 170), "##sequence-region theContig 101 170"}, + {Gff3Codec.Gff3Directive.FLUSH_DIRECTIVE, null, "###"}, + {Gff3Codec.Gff3Directive.FASTA_DIRECTIVE, null, "##FASTA"} + }; + } + + @Test(dataProvider = "directiveEncodingDataProvider") + public void directiveEncodingTest(final Gff3Codec.Gff3Directive directive, final Object object, final String expectedEncoding) { + final String encoding = directive.encode(object); + + Assert.assertEquals(encoding, expectedEncoding); + } + + @DataProvider(name = "version3InvalidDirectives") + public Object[][] version3InvalidDirectivesDataProvider() { + return new Object[][] { + {"3.1.a"}, + {"2"}, + {"2.1"}, + {".3.1"} + }; + } + + @Test(dataProvider = "version3InvalidDirectives", expectedExceptions = TribbleException.class) + public void version3InvalidDirectivesTest(final String v3Directive) { + Gff3Codec.Gff3Directive.VERSION3_DIRECTIVE.encode(v3Directive); + } + + @DataProvider(name = "decodeAttributeValueDataProvider") + public Object[][] decodeAttributeValueDataProvider() { + return new Object[][] { + {"value1, value2, value3", Arrays.asList("value1", "value2", "value3")}, + {"value1, value %3B with %3D special %26 encoded %2C characters, value3", Arrays.asList("value1", "value ; with = special & encoded , characters", "value3")} + }; + } + + @Test(dataProvider = "decodeAttributeValueDataProvider") + public void decodeAttributeValueTest(final String attributeValueString, final List expectedAttributeValues) { + final List attributeValues = Gff3Codec.decodeAttributeValue(attributeValueString); + + Assert.assertEquals(attributeValues, expectedAttributeValues); + } + + @DataProvider(name = "extractSingleAttributeDataProvider") + public Object[][] extractSingleAttributeDataProvider() { + return new Object[][] { + {null, null, false}, //null returns null + {Collections.emptyList(), null, false}, //empty returns null + {Collections.singletonList("single value"), "single value", false}, //single value returns single value + {Arrays.asList("value1", "value2"), null, true} //multiple values throws exception + }; + } + + @Test(dataProvider = "extractSingleAttributeDataProvider") + public void extractSingleAttributeTest(final List attributes, final String expectedSingleAttribute, final boolean expectException) { + if (expectException) { + Assert.assertThrows(() -> Gff3Codec.extractSingleAttribute(attributes)); + } else { + final String singleAttribute = Gff3Codec.extractSingleAttribute(attributes); + Assert.assertEquals(singleAttribute, expectedSingleAttribute); + } + } } \ No newline at end of file diff -Nru htsjdk-2.22.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3FeatureTest.java htsjdk-2.23.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3FeatureTest.java --- htsjdk-2.22.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3FeatureTest.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3FeatureTest.java 2020-07-08 16:28:01.000000000 +0000 @@ -23,42 +23,42 @@ @DataProvider(name = "equalityTestDataProvider") public Object[][] equalityTestDatProvider() { final ArrayList examples = new ArrayList<>(); - examples.add(new Object[] {new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")), - new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")), true}); - examples.add(new Object[] {new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")), - new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")), true}); + examples.add(new Object[] {new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))), + new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))), true}); + examples.add(new Object[] {new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))), + new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))), true}); //two features with same baseData, one with child (or parent) feature, one without - final Gff3FeatureImpl feature1_1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature2_1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature3_1 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature1_1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature2_1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature3_1 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature3_1.addParent(feature1_1); - final Gff3FeatureImpl feature4_1 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature4_1 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); examples.add(new Object[] {feature1_1, feature2_1, false}); examples.add(new Object[] {feature3_1, feature4_1, false}); //give both genes child feature - final Gff3FeatureImpl feature1_2 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature2_2 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature3_2 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, -0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature1_2 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature2_2 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature3_2 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, -0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature3_2.addParent(feature1_2); - final Gff3FeatureImpl feature4_2 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature4_2 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature4_2.addParent(feature2_2); examples.add(new Object[] {feature1_2, feature2_2, true}); examples.add(new Object[] {feature3_2, feature4_2, true}); //give one cds a co-feature - final Gff3FeatureImpl feature1_3 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature2_3 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature3_3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature1_3 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature2_3 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature3_3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature3_3.addParent(feature1_3); - final Gff3FeatureImpl feature4_3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature4_3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature4_3.addParent(feature2_3); - final Gff3FeatureImpl feature5_3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1080, 1150, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature5_3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1080, 1150, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature5_3.addParent(feature1_3); - final Gff3FeatureImpl feature6_3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1080, 1150, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature6_3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1080, 1150, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature3_3.addCoFeature(feature5_3); @@ -67,15 +67,15 @@ examples.add(new Object[] {feature5_3, feature6_3, false}); //give both cds co-features - final Gff3FeatureImpl feature1_4 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature2_4 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature3_4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature1_4 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature2_4 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature3_4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature3_4.addParent(feature1_4); - final Gff3FeatureImpl feature4_4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature4_4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature4_4.addParent(feature2_4); - final Gff3FeatureImpl feature5_4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1080, 1150, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature5_4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1080, 1150, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature5_4.addParent(feature1_4); - final Gff3FeatureImpl feature6_4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1080, 1150, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature6_4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1080, 1150, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature6_4.addParent(feature2_4); feature3_4.addCoFeature(feature5_4); @@ -99,8 +99,8 @@ @Test public void testChildren() { //test that when a feature has a parent it is added as it's parent's child - final Gff3FeatureImpl feature1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature2 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature2 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature2.addParent(feature1); Assert.assertTrue(feature1.getChildren().contains(feature2)); @@ -110,12 +110,12 @@ @Test public void testCofeatures() { //test that when a adding a cofeature it is reciprocated - final Gff3FeatureImpl region = new Gff3FeatureImpl("chr1", ".", "region", 1, 10000, Strand.NONE, -1, ImmutableMap.of("ID", "region01")); - final Gff3FeatureImpl feature1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01", "Parent", "region01")); + final Gff3FeatureImpl region = new Gff3FeatureImpl("chr1", ".", "region", 1, 10000, -1d, Strand.NONE, -1, ImmutableMap.of("ID", Collections.singletonList("region01"))); + final Gff3FeatureImpl feature1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"), "Parent", Collections.singletonList("region01"))); feature1.addParent(region); - final Gff3FeatureImpl feature2 = new Gff3FeatureImpl("chr1", ".", "gene", 1300, 1600, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01", "Parent", "region01")); + final Gff3FeatureImpl feature2 = new Gff3FeatureImpl("chr1", ".", "gene", 1300, 1600, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"), "Parent", Collections.singletonList("region01"))); feature2.addParent(region); - final Gff3FeatureImpl feature3 = new Gff3FeatureImpl("chr1", ".", "gene", 1700, 1900, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01", "Parent", "region01")); + final Gff3FeatureImpl feature3 = new Gff3FeatureImpl("chr1", ".", "gene", 1700, 1900, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"), "Parent", Collections.singletonList("region01"))); feature3.addParent(region); feature1.addCoFeature(feature2); @@ -128,11 +128,11 @@ @Test(expectedExceptions = TribbleException.class) public void testCofeautresDifferentParents() { - final Gff3FeatureImpl feature1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene01")); - final Gff3FeatureImpl feature2 = new Gff3FeatureImpl("chr1", ".", "gene", 1300, 1600, Strand.NEGATIVE, -1, ImmutableMap.of("ID", "gene02")); - final Gff3FeatureImpl feature3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene01")); + final Gff3FeatureImpl feature1 = new Gff3FeatureImpl("chr1", ".", "gene", 1000, 1200, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"))); + final Gff3FeatureImpl feature2 = new Gff3FeatureImpl("chr1", ".", "gene", 1300, 1600, -1d, Strand.NEGATIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene02"))); + final Gff3FeatureImpl feature3 = new Gff3FeatureImpl("chr1", ".", "CDS", 1010, 1050, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene01"))); feature3.addParent(feature1); - final Gff3FeatureImpl feature4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1310, 1350, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "cds01","Parent", "gene02")); + final Gff3FeatureImpl feature4 = new Gff3FeatureImpl("chr1", ".", "CDS", 1310, 1350, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"),"Parent", Collections.singletonList("gene02"))); feature4.addParent(feature2); //should throw exception because feature3 and feature4 have different parents so should not be co-features @@ -144,7 +144,7 @@ final int nGenerations = 10; - final Gff3FeatureImpl topLevelFeature = new Gff3FeatureImpl("chrX", ".", "type0", 1, 100, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "feature0")); + final Gff3FeatureImpl topLevelFeature = new Gff3FeatureImpl("chrX", ".", "type0", 1, 100, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("feature0"))); Gff3FeatureImpl prevFeature = topLevelFeature; final Map> ancestorsMap = new HashMap<>(); @@ -156,7 +156,7 @@ final List features = new ArrayList<>(Arrays.asList(topLevelFeature)); for (int i=1; i(ancestorsMap.get(prevFeature))); ancestorsMap.get(newFeature).add(prevFeature); @@ -179,13 +179,13 @@ public void testFlatten() { final int nGenerations = 10; - final Gff3FeatureImpl topLevelFeature = new Gff3FeatureImpl("chrX", ".", "type0", 1, 100, Strand.NEGATIVE, 0, ImmutableMap.of("ID", "feature0")); + final Gff3FeatureImpl topLevelFeature = new Gff3FeatureImpl("chrX", ".", "type0", 1, 100, -1d, Strand.NEGATIVE, 0, ImmutableMap.of("ID", Collections.singletonList("feature0"))); Gff3FeatureImpl prevFeature = topLevelFeature; final Map> flattenMap = new HashMap<>(Collections.singletonMap(topLevelFeature, new HashSet<>(Collections.singleton(topLevelFeature)))); for (int i=1; i v.add(newFeature)); flattenMap.put(newFeature, new HashSet<>(Collections.singleton(newFeature))); @@ -197,22 +197,22 @@ @Test public void testDerivesFrom() { - final Gff3FeatureImpl region01 = new Gff3FeatureImpl("chrX", ".", "gene", 65, 1000, Strand.POSITIVE, -1, ImmutableMap.of("ID", "region01")); + final Gff3FeatureImpl region01 = new Gff3FeatureImpl("chrX", ".", "gene", 65, 1000, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("region01"))); - final Gff3FeatureImpl gene01 = new Gff3FeatureImpl("chrX", ".", "gene", 1, 35, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene01", "Parent", "region01")); + final Gff3FeatureImpl gene01 = new Gff3FeatureImpl("chrX", ".", "gene", 1, 35, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"), "Parent", Collections.singletonList("region01"))); gene01.addParent(region01); - final Gff3FeatureImpl gene02 = new Gff3FeatureImpl("chrX", ".", "gene", 70, 100, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene02")); + final Gff3FeatureImpl gene02 = new Gff3FeatureImpl("chrX", ".", "gene", 70, 100, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene02"))); - final Gff3FeatureImpl mRNA01 = new Gff3FeatureImpl("chrX", ".", "mRNA", 1, 100, Strand.POSITIVE, -1 , ImmutableMap.of("ID", "mRNA01", "Parent", "gene01, gene02")); + final Gff3FeatureImpl mRNA01 = new Gff3FeatureImpl("chrX", ".", "mRNA", 1, 100, -1d, Strand.POSITIVE, -1 , ImmutableMap.of("ID", Collections.singletonList("mRNA01"), "Parent", Arrays.asList("gene01", "gene02"))); mRNA01.addParent(gene01); mRNA01.addParent(gene02); - final Gff3FeatureImpl cds01 = new Gff3FeatureImpl("chrX", ".", "CDS", 1, 35, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds01", "Parent", "mRNA01", "Derives_from", "gene01")); + final Gff3FeatureImpl cds01 = new Gff3FeatureImpl("chrX", ".", "CDS", 1, 35, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds01"), "Parent", Collections.singletonList("mRNA01"), "Derives_from", Collections.singletonList("gene01"))); cds01.addParent(mRNA01); - final Gff3FeatureImpl cds02 = new Gff3FeatureImpl("chrX", ".", "CDS", 70, 100, Strand.POSITIVE, 0, ImmutableMap.of("ID", "cds02", "Parent", "mRNA01", "Derives_from", "gene02")); + final Gff3FeatureImpl cds02 = new Gff3FeatureImpl("chrX", ".", "CDS", 70, 100, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("cds02"), "Parent", Collections.singletonList("mRNA01"), "Derives_from", Collections.singletonList("gene02"))); cds02.addParent(mRNA01); - final Gff3FeatureImpl codon01 = new Gff3FeatureImpl("chrX", ".", "codon", 1, 3, Strand.POSITIVE, 0, ImmutableMap.of("ID", "codon01", "Parent", "cds01")); + final Gff3FeatureImpl codon01 = new Gff3FeatureImpl("chrX", ".", "codon", 1, 3, -1d, Strand.POSITIVE, 0, ImmutableMap.of("ID", Collections.singletonList("codon01"), "Parent", Collections.singletonList("cds01"))); codon01.addParent(cds01); Assert.assertEquals(cds01.getAncestors(), ImmutableSet.of(mRNA01, gene01, region01)); @@ -232,12 +232,12 @@ @Test public void testFeatureWithUnLoadedParent() { - final Gff3FeatureImpl gene01 = new Gff3FeatureImpl("chrX", ".", "gene", 1, 35, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene01", "Parent", "region01")); - final Gff3FeatureImpl gene02 = new Gff3FeatureImpl("chrX", ".", "gene", 1, 35, Strand.POSITIVE, -1, ImmutableMap.of("ID", "gene01", "Parent", "region01")); + final Gff3FeatureImpl gene01 = new Gff3FeatureImpl("chrX", ".", "gene", 1, 35, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"), "Parent", Collections.singletonList("region01"))); + final Gff3FeatureImpl gene02 = new Gff3FeatureImpl("chrX", ".", "gene", 1, 35, -1d, Strand.POSITIVE, -1, ImmutableMap.of("ID", Collections.singletonList("gene01"), "Parent", Collections.singletonList("region01"))); Assert.assertEquals(gene01, gene02); - final Gff3FeatureImpl mRNA01 = new Gff3FeatureImpl("chrX", ".", "mRNA", 1, 100, Strand.POSITIVE, -1 , ImmutableMap.of("ID", "mRNA01", "Parent", "gene01, gene02")); + final Gff3FeatureImpl mRNA01 = new Gff3FeatureImpl("chrX", ".", "mRNA", 1, 100, -1d, Strand.POSITIVE, -1 , ImmutableMap.of("ID", Collections.singletonList("mRNA01"), "Parent", Arrays.asList("gene01", "gene02"))); mRNA01.addParent(gene01); Assert.assertNotEquals(gene01, gene02); diff -Nru htsjdk-2.22.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3WriterTest.java htsjdk-2.23.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3WriterTest.java --- htsjdk-2.22.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3WriterTest.java 1970-01-01 00:00:00.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/java/htsjdk/tribble/gff/Gff3WriterTest.java 2020-07-08 16:28:01.000000000 +0000 @@ -0,0 +1,205 @@ +package htsjdk.tribble.gff; + +import com.google.common.collect.ImmutableMap; +import htsjdk.HtsjdkTest; +import htsjdk.samtools.util.IOUtil; +import htsjdk.tribble.AbstractFeatureReader; +import htsjdk.tribble.TestUtils; +import htsjdk.tribble.TribbleException; +import htsjdk.tribble.readers.LineIterator; +import org.testng.Assert; +import org.testng.TestException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.zip.GZIPInputStream; + +public class Gff3WriterTest extends HtsjdkTest { + private final static String DATA_DIR = TestUtils.DATA_DIR + "/gff/"; + private final Path ensembl_human_small = Paths.get(DATA_DIR + "Homo_sapiens.GRCh38.97.chromosome.1.small.gff3"); + private final Path gencode_mouse_small = Paths.get(DATA_DIR + "gencode.vM22.annotation.small.gff3"); + private final Path ncbi_woodpecker_small = Paths.get(DATA_DIR + "ref_ASM69900v1_top_level.small.gff3"); + private final Path feature_extends_past_circular_region = Paths.get(DATA_DIR + "feature_extends_past_circular_region.gff3"); + private final Path with_fasta = Paths.get(DATA_DIR + "fasta_test.gff3"); + private final Path with_fasta_artemis = Paths.get(DATA_DIR + "fasta_test_artemis.gff3"); + private final Path ordered_cofeature = Paths.get(DATA_DIR, "ordered_cofeatures.gff3"); + private final Path child_before_parent = Paths.get(DATA_DIR, "child_before_parent.gff3"); + private final Path url_encoding = Paths.get(DATA_DIR, "url_encoding.gff3"); + private final static Path[] tmpDir = new Path[] {IOUtil.getDefaultTmpDirPath()}; + private final static String version3Directive = "##gff-version 3.1.25\n"; + + @DataProvider(name = "roundTripDataProvider") + public Object[][] roundTripDataProvider() { + return new Object[][] { + {ensembl_human_small}, {gencode_mouse_small}, {ncbi_woodpecker_small}, {feature_extends_past_circular_region}, {with_fasta}, {with_fasta_artemis}, + {ordered_cofeature}, {child_before_parent}, {url_encoding} + }; + } + + @Test(dataProvider = "roundTripDataProvider") + public void testRoundTrip(final Path path) { + + final List comments1 = new ArrayList<>(); + final HashSet regions1 = new HashSet<>(); + final LinkedHashSet features1 = readFromFile(path, comments1, regions1); + + //write out to temp files (one gzipped, on not) + try { + final Path tempFile = IOUtil.newTempPath("gff3Writer", ".gff3", tmpDir); + final Path tempFileGzip = IOUtil.newTempPath("gff3Writer", ".gff3.gz", tmpDir); + + writeToFile(tempFile, comments1, regions1, features1); + writeToFile(tempFileGzip, comments1, regions1, features1); + + //read temp files back in + + Assert.assertTrue(isGZipped(tempFileGzip.toFile())); + final List comments2 = new ArrayList<>(); + final HashSet regions2 = new HashSet<>(); + final LinkedHashSet features2 = readFromFile(tempFile, comments2, regions2); + + + final List comments3 = new ArrayList<>(); + final HashSet regions3 = new HashSet<>(); + final LinkedHashSet features3 = readFromFile(path, comments3, regions3); + + Assert.assertEquals(features1, features2); + Assert.assertEquals(features1, features3); + + Assert.assertEquals(comments1, comments2); + Assert.assertEquals(comments1, comments3); + Assert.assertEquals(regions1, regions2); + Assert.assertEquals(regions1, regions3); + } catch (final IOException ex) { + throw new TribbleException("Error creating temp files", ex); + } + } + + private void writeToFile(final Path path, final List comments, final Set regions, final Set features) { + try (final Gff3Writer writer = new Gff3Writer(path)) { + for (final String comment : comments) { + writer.addComment(comment); + } + + for (final SequenceRegion region : regions) { + writer.addDirective(Gff3Codec.Gff3Directive.SEQUENCE_REGION_DIRECTIVE, region); + } + + for (final Gff3Feature feature : features) { + writer.addFeature(feature); + } + } catch (final IOException ex) { + throw new TribbleException("Error writing to file " + path, ex); + } + } + + private LinkedHashSet readFromFile(final Path path, List commentsStore, Set regionsStore) { + final Gff3Codec codec = new Gff3Codec(); + final LinkedHashSet features = new LinkedHashSet<>(); + try (final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(path.toAbsolutePath().toString(), null, codec, false)) { + for (final Gff3Feature feature : reader.iterator()) { + features.add(feature); + } + + commentsStore.addAll(codec.getCommentTexts()); + regionsStore.addAll(codec.getSequenceRegions()); + } catch (final IOException ex) { + throw new TribbleException("Error reading gff3 file " + path); + } + + return features; + } + + @DataProvider(name = "writeKeyValuePairDataProvider") + public Object[][] writeKeyValuePairDataProvider() { + return new Object[][] { + {"key",Arrays.asList("value1", "value2", "value3"), "key=value1,value2,value3"}, + {"key",Arrays.asList("value1", "value ; with = special & encoded , characters", "value3"), "key=value1,value %3B with %3D special %26 encoded %2C characters,value3"} + }; + } + + @Test(dataProvider = "writeKeyValuePairDataProvider") + public void testWriteKeyValuePair(final String key, final List values, final String expectedOutput) { + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + try(final Gff3Writer writer = new Gff3Writer(outputStream)) { + writer.writeKeyValuePair(key, values); + } catch (final IOException ex) { + throw new TestException("Error writing key value pair", ex); + } + + final byte[] expectedBytes = (version3Directive + expectedOutput).getBytes(); + + Assert.assertEquals(outputStream.toByteArray(), expectedBytes); + } + + @DataProvider(name = "writeAttributesDataProvider") + public Object[][] writeAttributesDataProvider() { + return new Object[][] { + {ImmutableMap.of("key1", Arrays.asList("value1", "value2"), "key2", Collections.singletonList("another value"), "key3", Arrays.asList("thisValue")), + "key1=value1,value2;key2=another value;key3=thisValue"}, + {ImmutableMap.of("singleKey", Arrays.asList("multipleValue1", "multipleValue2")), "singleKey=multipleValue1,multipleValue2"}, + {ImmutableMap.of("singleKey", Collections.singletonList("singleValue")), "singleKey=singleValue"}, + {Collections.emptyMap(), "."} + }; + } + + @Test(dataProvider = "writeAttributesDataProvider") + public void testWriteAttributes(final Map> attributes, final String expectedOutput) { + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + try(final Gff3Writer writer = new Gff3Writer(outputStream)) { + writer.writeAttributes(attributes); + } catch (final IOException ex) { + throw new TestException("Error writing key value pair", ex); + } + + final byte[] expectedBytes = (version3Directive + expectedOutput).getBytes(); + + Assert.assertEquals(outputStream.toByteArray(), expectedBytes); + } + + @DataProvider(name = "encodeStringDataProvider") + public Object[][] encodeStringDataProvider() { + return new Object[][] { + {"%", "%25"}, + {";", "%3B"}, + {"=", "%3D"}, + {"&", "%26"}, + {",", "%2C"}, + {" ", " "}, + {"qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM ", "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM "} //these should remain unchanged + }; + } + + @Test(dataProvider = "encodeStringDataProvider") + public void testEncodeString(final String decoded, final String expectedEncoded) { + final String encoded = Gff3Writer.encodeString(decoded); + + Assert.assertEquals(encoded, expectedEncoded); + } + + private static boolean isGZipped(final File f) { + int magic = 0; + try { + RandomAccessFile raf = new RandomAccessFile(f, "r"); + magic = raf.read() & 0xff | ((raf.read() << 8) & 0xff00); + raf.close(); + } catch (Throwable e) { + e.printStackTrace(System.err); + } + return magic == GZIPInputStream.GZIP_MAGIC; + } +} \ No newline at end of file diff -Nru htsjdk-2.22.0+dfsg/src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java htsjdk-2.23.0+dfsg/src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java --- htsjdk-2.22.0+dfsg/src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java 2020-07-08 16:28:01.000000000 +0000 @@ -3,6 +3,9 @@ import com.google.common.jimfs.Configuration; import com.google.common.jimfs.Jimfs; import htsjdk.HtsjdkTest; +import htsjdk.samtools.seekablestream.SeekableStream; +import htsjdk.samtools.seekablestream.SeekableStreamFactory; +import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.TestUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -12,6 +15,7 @@ import java.io.IOException; import java.nio.file.FileSystem; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -97,4 +101,29 @@ // fail if a test that should have thrown didn't Assert.assertTrue(shouldSucceed, "Test should have failed but succeeded"); } + + @Test + public void testTabixFileWithEmbeddedSpaces() throws IOException { + final File testVCF = new File(TEST_DATA_DIR, "HiSeq.10000.vcf.bgz"); + final File testTBI = new File(TEST_DATA_DIR, "HiSeq.10000.vcf.bgz.tbi"); + + // Copy the input files into a temporary directory with embedded spaces in the name. + // This test needs to include the associated .tbi file because we want to force execution + // of the tabix code path. + final File tempDir = IOUtil.createTempDir("test spaces", ""); + Assert.assertTrue(tempDir.getAbsolutePath().contains(" ")); + tempDir.deleteOnExit(); + final File inputVCF = new File(tempDir, "HiSeq.10000.vcf.bgz"); + inputVCF.deleteOnExit(); + final File inputTBI = new File(tempDir, "HiSeq.10000.vcf.bgz.tbi"); + inputTBI.deleteOnExit(); + IOUtil.copyFile(testVCF, inputVCF); + IOUtil.copyFile(testTBI, inputTBI); + + try (final VCFFileReader vcfFileReader = new VCFFileReader(inputVCF)) { + Assert.assertNotNull(vcfFileReader.getFileHeader()); + } + + } + } diff -Nru htsjdk-2.22.0+dfsg/src/test/resources/htsjdk/samtools/util/sliver.sam htsjdk-2.23.0+dfsg/src/test/resources/htsjdk/samtools/util/sliver.sam --- htsjdk-2.22.0+dfsg/src/test/resources/htsjdk/samtools/util/sliver.sam 1970-01-01 00:00:00.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/resources/htsjdk/samtools/util/sliver.sam 2020-07-08 16:28:01.000000000 +0000 @@ -0,0 +1,120 @@ +@HD VN:1.5 GO:query SO:coordinate +@SQ SN:chr1 LN:248956422 AS:38 M5:6aef897c3d6ff0c78aff06ac189178dd UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens +@RG ID:HJYFJ.4 SM:NA12891 LB:Pond-492101 PL:illumina PU:HJYFJCCXX160204.4.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HJYFJ.5 SM:NA12891 LB:Pond-492101 PL:illumina PU:HJYFJCCXX160204.5.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HJYFJ.6 SM:NA12891 LB:Pond-492101 PL:illumina PU:HJYFJCCXX160204.6.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HJYFJ.7 SM:NA12891 LB:Pond-492101 PL:illumina PU:HJYFJCCXX160204.7.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HJYFJ.8 SM:NA12891 LB:Pond-492101 PL:illumina PU:HJYFJCCXX160204.8.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HJYN2.1 SM:NA12891 LB:Pond-492101 PL:illumina PU:HJYN2CCXX160123.1.TATCCAGG CN:BI DT:2016-01-23T00:00:00-0500 PI:0 +@RG ID:HK35M.1 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35MCCXX160204.1.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35M.2 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35MCCXX160204.2.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35M.3 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35MCCXX160204.3.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35M.4 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35MCCXX160204.4.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35M.5 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35MCCXX160204.5.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35M.6 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35MCCXX160204.6.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35M.7 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35MCCXX160204.7.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35M.8 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35MCCXX160204.8.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35N.1 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35NCCXX160204.1.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK35N.2 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK35NCCXX160204.2.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK3T5.1 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK3T5CCXX160204.1.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK3T5.2 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK3T5CCXX160204.2.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK3T5.3 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK3T5CCXX160204.3.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK3T5.4 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK3T5CCXX160204.4.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK3T5.5 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK3T5CCXX160204.5.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK3T5.6 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK3T5CCXX160204.6.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK3T5.7 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK3T5CCXX160204.7.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HK3T5.8 SM:NA12891 LB:Pond-492101 PL:illumina PU:HK3T5CCXX160204.8.TATCCAGG CN:BI DT:2016-02-04T00:00:00-0500 +@RG ID:HM3G2.2 SM:NA12891 LB:Pond-492101 PL:illumina PU:HM3G2CCXX160308.2.TATCCAGG CN:BI DT:2016-03-08T00:00:00-0500 +@RG ID:HTL2K.5 SM:NA12891 LB:Pond-492101 PL:illumina PU:HTL2KCCXX160412.5.TATCCAGG-GAAGTGGA CN:BI DT:2016-04-12T00:00:00-0400 +@RG ID:HTL2K.6 SM:NA12891 LB:Pond-492101 PL:illumina PU:HTL2KCCXX160412.6.TATCCAGG-GAAGTGGA CN:BI DT:2016-04-12T00:00:00-0400 +@PG ID:bwamem VN:0.7.15-r1140 CL:bwa mem -K 100000000 -p -v 3 -t 16 -Y /cromwell_root/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta PN:bwamem +@PG ID:MarkDuplicates VN:2.22.3-SNAPSHOT CL:MarkDuplicates CLEAR_DT=false ADD_PG_TAG_TO_READS=false INPUT=[/cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-0/HJYFJCCXX.4.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-1/attempt-2/HJYFJCCXX.5.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-2/HJYFJCCXX.6.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-3/HJYFJCCXX.7.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-4/HJYFJCCXX.8.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-5/HJYN2CCXX.1.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-6/HK35MCCXX.1.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-7/attempt-2/HK35MCCXX.2.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-8/HK35MCCXX.3.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-9/attempt-2/HK35MCCXX.4.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-10/HK35MCCXX.5.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-11/HK35MCCXX.6.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-12/HK35MCCXX.7.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-13/HK35MCCXX.8.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-14/HK35NCCXX.1.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-15/HK35NCCXX.2.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-16/HK3T5CCXX.1.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-17/HK3T5CCXX.2.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-18/attempt-2/HK3T5CCXX.3.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-19/HK3T5CCXX.4.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-20/HK3T5CCXX.5.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-21/HK3T5CCXX.6.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-22/attempt-2/HK3T5CCXX.7.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-23/HK3T5CCXX.8.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SplitRG/shard-24/SplitLargeReadGroup/49809670-d61e-402a-b77d-153e60fced2c/call-GatherMonolithicBamFile/HM3G2CCXX.2.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-25/HTL2KCCXX.5.Pond-492101.aligned.unsorted.bam, /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/ea7f25d6-e4fd-4b5a-922b-275d6a0c77e6/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/9c5cdb6a-911d-42cb-ace2-33dc123a2cac/call-SamToFastqAndBwaMemAndMba/shard-26/attempt-2/HTL2KCCXX.6.Pond-492101.aligned.unsorted.bam] OUTPUT=NA12891.aligned.unsorted.duplicates_marked.bam METRICS_FILE=NA12891.duplicate_metrics ASSUME_SORT_ORDER=queryname OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 VALIDATION_STRINGENCY=SILENT MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 SORTING_COLLECTION_SIZE_RATIO=0.25 TAG_DUPLICATE_SET_MEMBERS=false REMOVE_SEQUENCING_DUPLICATES=false TAGGING_POLICY=DontTag DUPLEX_UMI=false REMOVE_DUPLICATES=false ASSUME_SORTED=false DUPLICATE_SCORING_STRATEGY=SUM_OF_BASE_QUALITIES PROGRAM_RECORD_ID=MarkDuplicates PROGRAM_GROUP_NAME=MarkDuplicates READ_NAME_REGEX= MAX_OPTICAL_DUPLICATE_SET_SIZE=300000 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=2 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false PN:MarkDuplicates +@PG ID:GATK ApplyBQSR VN:4.1.7.0-11-gee56f27-SNAPSHOT CL:ApplyBQSR --output NA12891.aligned.duplicates_marked.recalibrated.bam --bqsr-recal-file /cromwell_root/broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/75ae53b9-3b67-4ec8-88af-2dffc4704879/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/e51137c1-0348-4b13-9a93-557fdb7073db/call-GatherBqsrReports/NA12891.recal_data.csv --use-original-qualities true --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 --intervals chr1:1+ --input gs://broad-gotc-dev-cromwell-execution/WholeGenomeGermlineSingleSample/75ae53b9-3b67-4ec8-88af-2dffc4704879/call-UnmappedBamToAlignedBam/UnmappedBamToAlignedBam/e51137c1-0348-4b13-9a93-557fdb7073db/call-SortSampleBam/NA12891.aligned.duplicate_marked.sorted.bam --reference /cromwell_root/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta --create-output-bam-md5 true --add-output-sam-program-record true --preserve-qscores-less-than 6 --quantize-quals 0 --round-down-quantized false --emit-original-quals false --global-qscore-prior -1.0 --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-variant-index true --create-output-variant-md5 false --lenient false --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays --disable-tool-default-read-filters false PN:GATK ApplyBQSR +@PG ID:GATK PrintReads VN:4.0.11.0-13-g36f4305-SNAPSHOT CL:PrintReads --output sliver.bam --intervals chr1:781443-790000 --intervals chr1:790000-790478 --input /seq/tng/ggrant/Support/GL-744/NA12891.bam --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays --disable-tool-default-read-filters false PN:GATK PrintReads +HK35MCCXX160204:5:1107:6340:34799 99 chr1 787876 60 151M = 787997 272 TTTCTACTCCACTATTCTATCATGGTGCATTTTAGGTGCTAATGGGAAAATCTGCTCTATGATGGTGTCCAATAGACAATTAAAATTCAAATCTGGAGATGAGATCTGGACTGAAGATTTTTTTAAAAATTAGCCCTCATCACATGTAGTC ??????5??5?5?+??????????5+'??????????????+5?????????????5?+????5?????'?????555?+??5????+???++????'?+??55+5??5????????++?????5???5++5+'?55'5?555?+?????? MC:Z:151M RG:Z:HK35M.5 MQ:i:50 AS:i:146 +HK3T5CCXX160204:7:1212:23267:47492 147 chr1 788025 48 151M = 787983 -193 TCAGCTGTAGAAATCCTTAGCATGGCTGAAATTTTTCATATTAAGACAAGACTAGAACTTGGCCAGGTGTGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGTGAATCACCTGAGGTCTGGAGTTTGAGACCAGCTG ????????5?????????????????????????????????????????????????????5???????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HK3T5.7 MQ:i:60 AS:i:151 +HK35NCCXX160204:1:2218:30391:4034 147 chr1 788028 47 151M = 787976 -203 GCTGTAGAAATCCTTAGCATGGCTGAAATGTTTCATATTAAGACAAGACTAGAACTTGGCCAGGTGTGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGTGAATCACCTGAGGTCTGGAGTTTGAGACCAGCTGACC ?????????5???????????????????'?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????5??? MC:Z:151M RG:Z:HK35N.1 MQ:i:60 AS:i:146 +HK35MCCXX160204:2:1206:26555:70768 99 chr1 788175 48 151M = 788302 275 GACCAACATGGTGAAACACCATCTCTACTAAAAATACAAAAAATTAGCCAGGCGTGGTTGCACATGCCTGTAATCCTAGCTACTTGGGAAGCTGAGGTAGGAGAATTGCTTGAACCCAGGAGGTGGAGGTTGCAGCAAGCTGAGATCGTGC ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????5?????5?????????5??????????? MC:Z:5S117M2D29M RG:Z:HK35M.2 MQ:i:60 AS:i:151 +HTL2KCCXX160412:5:2211:10044:18854 147 chr1 788179 45 151M = 788003 -327 AGCATGGTGAAACACCATCTCGACGAAAAATACAAAAAATTAGCCAGGCGTGGTTGCACATGCCTGTAATCCTAGCTACTTGGGAAGCTGAGGGAGGAGAATTGCTTGAACCCAGGAGGTGGAGGTTGCAGCAAGCTGAGATCGTGCCATT ###############################################???????????+?????????5???????5?++5???5??5?????'??????????????5555??????????????????????????????????????? MC:Z:151M RG:Z:HTL2K.5 MQ:i:47 AS:i:134 +HK35MCCXX160204:7:2201:4350:10925 147 chr1 788265 60 151M = 787867 -549 GCTGAGGCGGGAGAATTCATTGAAACCAGGGGGTGGAGGTTGCAGCACGCTGAGATCGTGCCATTGCACTGCAGTCTGGGCAAAAAAAGCAAAACTCTGAAGAAAGAGAGAGAGAGGGAGAGAGGGAGAGAGAAAGGAAGAGACGAGGAGA ###############################??????????5+5??+%?5?????55???'5+??5+?5?????55????5?????5?5??+5???????5????????????5??5????????????????????????5+5'?+??5+ MC:Z:151M RG:Z:HK35M.7 MQ:i:60 AS:i:112 +HK3T5CCXX160204:3:2222:23226:52098 147 chr1 788330 60 89M2D62M = 787951 -532 GCACGGCAGTCGGGGCAAAAAAAGCAAAGCGCTGAGGAAAGAGAGAGAGAGGGAGAGAGGGAGAGAGAAAGGAAGAGACGATGAGAGACAGAGAAGGAGAGAGAAAGTACAAAAGAACGAATGAACGAACAAACTAGAAATCGAGCAGGAA ######################################???????????5?5?????55???????????????????5????????????????????????????????????????????55????'???????5+???????????? MC:Z:151M RG:Z:HK3T5.3 MQ:i:47 AS:i:118 CO:Z:needed +HTL2KCCXX160412:5:2105:26463:13263 83 chr1 788340 60 79M2D72M = 788051 -442 CTGGGCAAAAAAAGCAAAACTCTGAAGAAAGAGAGAGAGAGGGAGAGAGGGAGAGAGAAAGGAAGAGACGATGAGAGACAGAGAAGGAGAGAGAAAGTACAAAAGAACGAATGAACGAACAAACTAGAAATCGAGCAGGAACCTTGGAGGA ???????????5??5???+?55????????????55????????????????????????????????5????????????????????????????????????5????????????????????????????????????????????? MC:Z:151M RG:Z:HTL2K.5 MQ:i:47 AS:i:143 CO:Z:needed +HTL2KCCXX160412:6:2222:7923:31670 99 chr1 788483 47 151M = 788786 448 CCTTGGAGGACCTATTGCTTAAGGTGTGCGCCAAAGAAAGTAAGTTAGGGCAAGAGACTAAGGTATGCCAGAGACCCAGGACAAAACACAGTGCAGAGTGATGTCACAGAGCCAAATGGGAGTGCAAGCTATGGCAAGCCCTCCCAAGTAT ?????????????????????????????5????????????????????????????????????????????????????????????????????????????????????????????5???????????????????????????? MC:Z:6S145M RG:Z:HTL2K.6 MQ:i:60 AS:i:146 +HM3G2CCXX160308:2:2202:10906:14055 147 chr1 788492 47 151M = 788009 -634 ACCTATTGCTTAAGGTGTGCGCCAAAGAAAGTAAGTTAGGGCAAGAGACTAAGGTATGCCAGAGACCCAGGACAAAACACAGTGCAGAGTGATGTCACAGAGCCAAATGGGAGTGCAAGCTATGGCAAGCCCTCCCAAGTATGTGTGGGTT ??55??????????5+'+?????????5??????????????????5?????5?????????????????????????????5?????5++55??????????????5????????????5???????????5?????????????????5 MC:Z:151M RG:Z:HM3G2.2 MQ:i:48 AS:i:146 +HM3G2CCXX160308:2:2204:20344:54559 83 chr1 788632 60 151M = 788525 -258 ATGTGTGGGTTAAATGTAATTAAATTCAAAATCTCTCAACCCAAAAATTTTCTCCACAAAGGAAGTAGAGAAACAAAACAGTTCATTATTGAATAAGCATTAAACCAGAATGTGATGTGTGAATATAATGGAATGGAATGGAATGGAATGG ?????5????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:48 AS:i:151 +HTL2KCCXX160412:5:2114:28970:33533 83 chr1 788634 60 151M = 788475 -310 GTGTGGGTTAAATGTAATTAAATTCAAAATCTCTCAACCCAAAAATTTTCTCCACAAAGGAAGTAGAGAAACAAAACAGTTCATTATTGAATAAGCATTAAACCAGAATGTGATGTGTGAATATAATGGAATGGAATGGAATGGAATGGAA 5?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HTL2K.5 MQ:i:47 AS:i:151 +HM3G2CCXX160308:2:1112:23947:64316 147 chr1 788638 60 151M = 788512 -277 GGGTTAAATGTAATTAAATTCAAAATCTCTCAACCCAAAAATTTTCTCCACAAAGGAAGTAGAGAAACAAAACAGTTCATTATTGAATAAGCATTAAACCAGAATGTGATGTGTGAATATAATGGAATGGAATGGAATGGAATGGAATGGA 5???????5?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:57 AS:i:151 +HTL2KCCXX160412:5:1203:20730:58005 163 chr1 788659 60 151M = 789197 689 AAAATCTCTCAACCCAAAAATTTTCTCCACAAAGGAAGTAGAGAAACAAAACAGTTCATTATTGAATAAGCATTAAACCAGAATGTGATGTGTGAATATAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAA ??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????5+5?? MC:Z:151M RG:Z:HTL2K.5 MQ:i:59 AS:i:150 +HTL2KCCXX160412:6:2222:7923:31670 147 chr1 788786 60 6S145M = 788483 -448 AGGAGGGGAATGGAATGGAATGGAATGGAGTGGGGTGGAATGGACCCGAATGGAATGGGACGGAACGGAATGGAATGGAACGCACTCGAATGGAATGGAACGGACATGAATGGAATGGAATGGAACGGACACGAATGGAACGGAACGGAAC ################?+????55?5??5555++55??55???5+?5??????5?5??'?????????????????????5?55??5???????????5?5??????????????????????????????5????????5????5????? MC:Z:151M RG:Z:HTL2K.6 MQ:i:47 AS:i:135 +HM3G2CCXX160308:2:2210:12611:69607 145 chr1 788787 0 30S38M83S = 104086 0 GAATGGAGTAGAGTGAGATGAAATGAAATAGAATGGAATGGAATGGAACGGAGTGGAGTGGAATGGACATGAGTGGAGTTGACTGGAGTGGAACGGAGTGCAGGGGAAAGGAATGGTATAGATTGGNATTGAATGTAATGGAATGGAGTGG ???????????5?5?5??+?5????5???5?5?555?????55?????5???????????????????????????????????????5???????????+?????????????????????????#???????????????????????? SA:Z:chrUn_KI270589v1,44033,+,46S55M50S,0,5; MC:Z:15M6D73M5I31M27S RG:Z:HM3G2.2 MQ:i:4 AS:i:33 +HM3G2CCXX160308:2:2203:16427:24181 163 chr1 788788 60 147M4S = 789217 580 AATGGAATGGAATGGAATGGAGTGGAGTGGAGTGGACCGGAATGGAATGGAACGGAACGGAATGGAATGGAACGGACTCGAATGGAATGGAACGGAAAGGAATGGCATGGAATGGAACGGACTGGAATGGAACGGGAAGGAACGGAAAGGG ???????????????????????????????+5?5??+&+?+???5+5++5++5+??&&??5?????5????+5++'?5?5?55??+5????########################################################### MC:Z:151M RG:Z:HM3G2.2 MQ:i:60 AS:i:97 +HK3T5CCXX160204:4:2104:3194:19118 163 chr1 788792 60 108M43S = 788987 347 GAATGGAATGGAATGGAGTGGAGTGGAATGGACCCGAATGGAATGGAACGGAACGGAATGGAATGGAACGCAATCGAGGGGAATGGAACGGAAAGGAATGGAATGGAAGGGAAGTGACGGGAAGTGAGGGGAGGGGAACGTAATGAATGTG ????????????????????????????????????????????????????????????????????????++'+?+5'&?5+????'5?############################################################ MC:Z:3M1D148M RG:Z:HK3T5.4 MQ:i:60 AS:i:83 +HTL2KCCXX160412:6:2112:19816:65634 147 chr1 788923 60 151M = 788531 -543 AACGGAACGGAATGGACTTGAGTGGGATGGAATGGAATTGAATGGACTCTAATGGAATGGACATGAATGGAATGGAATGGACTCAATTGGAATGCAGTTGAATTGAAAGGATCCAAAAGGAATGCAATGGAATGCATTGGAATGGAATGCA ??5????55??????5+????'5??????????5?????????????5?????????????????????????????????5??????????????5???????????????5?5????????5+?????????????????????????? MC:Z:151M RG:Z:HTL2K.6 MQ:i:48 AS:i:151 +HK35MCCXX160204:4:2111:17716:35098 99 chr1 788932 60 71M80S = 788932 71 GAATGGACTTGAGTGGGATGGAATGGAATTGAATGGACTCTAATGGAATGGACATGAATGGAATGGAATGGAGATCGGAAGAGCACACGTCTGAACTCCAGTCACTATCGAGGATCACGTATGCCGTCTTCTGCTGGAAGAAATGAAGGTG ???????????????????????????????????????????????????????????????????????????5%'55??+'55????+5+?????++?+5????############################################ MC:Z:80S71M RG:Z:HK35M.4 MQ:i:60 AS:i:72 XT:i:72 +HK35MCCXX160204:3:2108:30573:38526 83 chr1 788932 60 151M = 788685 -398 GAATGGACTGGGGTGGGATGGAATGGAATTGAATGGACTCTAAGGGAATGGAAATGAATGGAATGGAATGGGCTCAATTGGAATGCAGTTGAATTGAAAGGATCCAAAAGGAATGCAATGGAATGCATTGGAATGGAATGCAATGGACTTG ################################################################??+5???'55?????55+?+?5??+????5???+????????????55??55??+???????????????????????????????+ MC:Z:115M36S RG:Z:HK35M.3 MQ:i:60 AS:i:126 +HK35MCCXX160204:4:2111:17716:35098 147 chr1 788932 60 80S71M = 788932 -71 TATATTTTTTAATGATATGGCGACTAGCGAGATCTACACTCCACTTCACACTCTTTCCCTACGCGACGCTCTTCAGATCTGAATGGAGTTGAGTGGGATGGAATGGAATTGAATGGACTCTAATGGAATGGACATGAATGGAATGGAATGG ###########################??55????+5?????5???5+5+??+?5????5?%%$5+%5+?+55+55+5'????????&?????????????????????????????5?5??????????????????????????????? MC:Z:71M80S RG:Z:HK35M.4 MQ:i:60 AS:i:66 XT:i:72 +HJYFJCCXX160204:7:2101:22110:38280 163 chr1 788933 58 151M = 789379 569 AATGGACTTGAGTGGGATGGAATGGAATTGAATGGACTCTAATGGAATGGACATGAATGGAATGGAATGGACTCAATTGGAATGCAGTTGAATTGAAAGGATCCAAAAGGAATGCAATGGAATGCATTGGAATGGAATGCAATGGACTTGA ????????????????????????????????????????????????????????????????????????????????????'?????????????????'?????????????????????5?+??????????5?'??5???'?5?? MC:Z:123M28S RG:Z:HJYFJ.7 MQ:i:53 AS:i:151 +HK35MCCXX160204:7:2109:16518:61995 163 chr1 788946 60 151M = 789009 214 GGGATGGAATGGAATTGAATGGACTCTAATGGAATGGACATGAATGGAATGGAATGGACTCAATTGGAATGCAGTTGAATTGAAAGGATCCAAAAGGAATGCAATGGAATGCATTGGAATGGAATGCAATGGACTTGAATGGGATGGAATG ???????????????????????????????????????????????????????????????5??????????5??????????????????????????????????????????????????????????55?????????5?????5 MC:Z:151M RG:Z:HK35M.7 MQ:i:60 AS:i:151 +HK3T5CCXX160204:4:2208:17208:11119 163 chr1 788946 45 151M = 789266 471 GGGATGGAATGGAATTGAATGGACTCTAATGGAATGGACATGAATGGAATGGAATGGACTCAATTGGAATGCAGTTGAATTGAAAGGATCCAAAAGGAATGCAATGGAATGCATTGGAATGGAATGCAATGGAATGGAATGGGATGGAATG ?????????????????????????????????????????????????????????????5++??????????????????????????????????????????????????5?????5?????5??????555?5?5??5??+?#### MC:Z:151M RG:Z:HK3T5.4 MQ:i:60 AS:i:141 +HTL2KCCXX160412:6:1219:25195:10152 83 chr1 789083 60 151M = 788936 -298 AATGGGATGGAATGGAATGGAATGGACTCTAATGGAACGGAATGGAATGGAACCGAATGGAATGGAATGGACTTGAATGGAATAGAATGGAATGGAATGGACTCGAAAGAAATGAAATGGAATGGATTCAAATGGAATGGAATGGACACAA ###????55????????????????55?????????????5?????????????????????????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HTL2K.6 MQ:i:60 AS:i:151 +HJYFJCCXX160204:7:1205:5203:35924 99 chr1 789086 60 151M = 789187 245 GGGATGGAATGGAATGGAATGGACTCTAATGGAACGGAATGGAATGGAACCGAATGGAATGGAATGGACTTGAATGGAATAGAATGGAATGGAATGGACTCGAAAGAAATGAAATGGAATGGATTCAAATGGAATGGAATGGACACAAATG ????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????+?????????????????????????? MC:Z:7S144M RG:Z:HK3T5.1 MQ:i:45 AS:i:151 +HM3G2CCXX160308:2:1117:12865:16990 99 chr1 789235 21 77M5I69M = 789666 503 TGGAACAGAATGGAATGGAAAGGACTCGAATAGAATACAATGGAATTTAATGGAATGGTCTCTAATGGAATGGAATGGAATAGACTAGAATGGTATGGAAGGGAATAGACTAGAGTGGAATGGAAGGCAATGGAATGGTCTCCAATTGAAT ???????????????????????????????????????????????????????????????????????????????????????????????????????+???????????5??????????????????????????????????? MC:Z:69S39M10I33M RG:Z:HM3G2.2 MQ:i:21 AS:i:59 +HK35MCCXX160204:8:2202:18984:30158 83 chr1 789237 45 151M = 788898 -490 GAACCAAATGGAATGGAATGGACTCGAATGGAATACAATGGAATTTAATGGAATGGACTCTAATGGAATGGAATGGACTAGAATGGAATAGCACGGAATAGGCTCGAATGGTATGGAATGCAATGGAATGGACTCGAATGGAATAGAACAG ??????????????????????????????????????????????????????55?'+?++??????5?????????????5???????5'+5??????&?5++??????????5+?????????????????????????????????? MC:Z:28M123S RG:Z:HK35M.8 MQ:i:45 AS:i:146 +HTL2KCCXX160412:6:1111:20740:62101 147 chr1 789237 60 151M = 789136 -252 GAACCAAATGGAAGGGAAAGGACTCGAATGGAATACAATGGAATTTAATGGAATGGACTCTAATGGAATGGAATGGACTAGAATGGAATAGAACGGAATAGGCTCGAATGGTATGGAATGCAATGGAATGGACTCGAATGGAATAGAACAG #########################???55?????????55??????????????????????????????????????????????????+??????????????????????????????????????????5???????????????? MC:Z:149M2S RG:Z:HTL2K.6 MQ:i:60 AS:i:141 +HM3G2CCXX160308:2:2213:9557:38965 147 chr1 789238 59 151M = 789076 -313 AAACAAATGGAATGGAATGGACTCGTATGGAATACAATGGAATTTAATGGAATGGACTCTAATGGAATGGCATTGACTAGAATGGAATAGAACGGAATAGGCTCGAATGGTATGGAATGCAATGGANTGGACTCGAATGGAATAGAACAGA ?55???????????????????+'5'?5++55'5+5?+5?????5????????55??55'5++????5?5+??+?????5??++5?????????+????????5??????'????????5??????#???????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:60 AS:i:131 +HTL2KCCXX160412:6:2205:10145:11189 99 chr1 789239 9 151M = 789681 493 ACCAAATGGAATGGAATGGACTCGAATGGAATACAATGGAATTTAATGGAATGGACTCTAATGGAATGGAATGGAATAGAACGGAATAGAACGGAATAGACTCGAATGGAATGGAACTGAAAGGAATTGAATCGAATGGAATAGAACAGAA ??????????????????????55????????5????????????????????????55???????5????55??5?????+&??????555&?5?55+?&?55??5??########################################## MC:Z:91S51M9S RG:Z:HTL2K.6 MQ:i:9 AS:i:101 +HM3G2CCXX160308:2:1104:29011:27486 83 chr1 789240 60 151M = 789106 -285 CCAAATGGAATGGAATGGACTCGAATGGAATACAATGGAATTTAATGGAATGGACTCTAATGGAATGGAATGGACTAGAATGGAATAGAACGGAATAGGCTCGAATGGTATGGAATGCAATGGAATGGACTCGAATGGAATAGAACAGAAT ??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:60 AS:i:151 +HK3T5CCXX160204:5:2207:25175:46402 83 chr1 789290 60 151M = 788937 -504 TGGACTCAAATTGAAAGGAATGGACTAGAATGGAACAGAACGGAATTGGCTCGAATGGTATGGAATGCAATGGAATGGACTCGAATGGAATAGAACAGAATAGACTAGAATGGTATGGAATGCAATGTAATGGACTCGAATGGAACGGAAT #############################?????5++5??55????+????$'+5?????????????????????5'+'+$???????5?????'?'++????????????5?????????????55?????????????????"????? MC:Z:97M54S RG:Z:HK3T5.5 MQ:i:59 AS:i:126 +HM3G2CCXX160308:2:1122:25804:7726 83 chr1 789356 60 146M5S = 789229 -273 GCAATGGAATGGACTCGAATGGAATAGAACAGGATAGACTAGTATGGTATGGAATGCAATGTAATGGACTCGAATGGAACGGAATGGAATGGACAAGAATTGAATTGAATGGACTGGAATGGAATGGAATGGAATGCAATGGAATGCACTC ?????+????+??++???????????+55+?++5'?55+55''?+5$'???5??55+????'???+??'?5???55???'???5+5???????5??+?????????5???+5?????????????55???5?????????5????+????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:59 AS:i:126 +HTL2KCCXX160412:6:1212:28929:37366 97 chr1 789385 47 117M34S = 16360682 0 CAGAATAGACTAGAATGGTATGGAATGCAATGTAATGGACTCGAATGGAACGGAATGGAATGGACAAGAATTGAATTGAATGGACTGGAATGGAATGGAATGGAATGCAATGGAATGCACTCGAACGGATTGGAATGGAATGGACTCGAAT ??????????????????????????????????????????5???????????????????????????????????????????????????????????????????????????????????????????????????????????# SA:Z:chrUn_GL000216v2,65758,-,58M93S,0,4; MC:Z:90M1I60M RG:Z:HTL2K.6 MQ:i:13 AS:i:107 +HTL2KCCXX160412:6:1217:9485:17149 129 chr1 789385 58 104M47S = 24814 0 CAGAATAGACTAGAATGGTATGGAATGCAATGTAATGGACTCGAATGGAACGGAATGGAATGGACAAGAATTGAATTGAATGGACTGGAATGGAATGGAATGGAATGGAATGGAATGCACTCGAACGGATTGGAATGGAATGGACTCGAAA ???????????????????????????????????????????????????????????????????????5????5??????????????????????????????5?????????5???55??'55??5??5????############# SA:Z:chr21,10698725,-,7S58M86S,0,2; MC:Z:25M5D126M RG:Z:HTL2K.6 MQ:i:0 AS:i:104 +HM3G2CCXX160308:2:2107:5883:16006 99 chr1 789493 60 76M5D68M7S = 789790 448 AATGGAATGTGGTGGGATGGATTCAAATGGAATGGAATGGAATTGAGTGGATTTGAATTGAATGGAATGGAATGGTATGGAATGGAATGGAATGAAATGGACTAGAATGGAATGGAATGGACTCGAATGGAATGGAATGGAATGGAATGTA ?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????## MC:Z:151M RG:Z:HM3G2.2 MQ:i:60 AS:i:133 +HK3T5CCXX160204:1:2215:26423:37875 145 chr1 789499 39 70M5D48M5I28M = 224014368 223224720 ATGTGGTGGGGTGGAATCAAATGGAATTGAATGGAATTGAGTCGATTTGAATTGAATGGAATGGAATGGTATGGAATGGAATGGAATGAAATGGACTAGAATGGAATGGAATGGACTCGAATGGAATGGAATGGAATGGAATGTACTTGAA #######################?5????5????+55???5?555555+5+5?????+??+55????+5+?5?????55?5?+???????????5???5????????????????&?$????????????????????????????????? MC:Z:151M RG:Z:HK3T5.1 MQ:i:60 AS:i:107 +HK35NCCXX160204:2:1115:12073:67937 163 chr1 789502 60 67M5D72M12S = 789575 224 TGGTGGGATGGATTCAAATGGAATGGAATGGAATTGAGTGGATTTGAATTGAATGGAATGGAATGGTATGGAATGGAATGGAATGAAATGGACTAGAATGGAATGGAATGGACTCGAATGGAATGGAAAGGAATGTACTATGAGAGTAGAG 5++???5????????+????????'????????5????55????5'?5???????????????5??5+?'5???????????????+?+'5?%????+5+55????5????######################################## MC:Z:151M RG:Z:HK35N.2 MQ:i:60 AS:i:123 +HK3T5CCXX160204:5:1112:23916:44468 147 chr1 789503 60 85M5D41M25S = 789476 -158 AGTGGGATGGAATGAAATGGAATGGAATGGAATTGAGTGGAATTGAATGGAATGGAATGGAATGGTATGGAATGGAATGGAATGGAATGGACTAGAATGGAATGGAATGGACTCGAATGGCATGGACAGGAGTGGACCCGAATGAAATGGA ################?5??????'+???'+?+???'5??+++5?55?'???????????????5+????????????????5''?????+5???????'?+????????????????????????????????????????????????? MC:Z:93M5D58M RG:Z:HK3T5.5 MQ:i:48 AS:i:89 +HM3G2CCXX160308:2:2214:7689:20366 145 chr1 789504 57 65M5D86M = 224014004 223224346 GTGGGATGGATTCAAATTGACTGGAATGGAATCGTGTGGATTTGAATTGAATGGAATGGAATGGTATGGAATGGAATGGAATGAAATGGACTAGAATGGAATGGAATGGACTCGAATGGAATGGAANGGAATGTACTTGAATTGTATGGAA ####################################??5?+?????5+????5?????????????????????????????????????????????????????????????????????????#???????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:31 AS:i:118 CO:Z:needed +HK3T5CCXX160204:4:1106:25997:53662 163 chr1 789591 18 91S31M5I20M4S = 789681 226 TAGAATGGAATTGACTCGAACGTAATGGAAATGATTGGACTAGAATGGAATGCAATGAAATGCAATGGAATGAAAGGAGATGGAAGGAAACGAAATGGACTTGAATGGAATGGAATGGACTCAGATAGAATGGAATGCAATGGAATGAATT ??????????????????????????????????????????????????????????????????????????????????????5?+?55???????55???????????????????????5++5?????????############## MC:Z:11S136M4S RG:Z:HK3T5.4 MQ:i:41 AS:i:30 +HM3G2CCXX160308:2:2118:19481:39563 83 chr1 789649 20 32M15I104M = 789482 -303 ATTGGATGGAAAGGAATTGAATGGACTCGAAAGGAAAGGAATGGAATGGAATGGAAAGGAATGGAATGGAATGGAATGGAATGGAATGGACTCGAATGGAATGGAATGGAATGAACTCCAATGGAATGGAATGGACTCGAATAGAATGGAA #############?5?5????++55+?55???+?5?5+???5????'555?5+???5????+5???5?5??5???5?+???5?5??+5????5????5??????????????5?????????????????????????????????????? MC:Z:103S48M RG:Z:HM3G2.2 MQ:i:15 AS:i:100 +HK3T5CCXX160204:7:1220:28818:11927 65 chr1 789650 0 131M20S = 1170 0 TTGTATGGAAGGGAATCAAATGAAATGGAATGGAAGGGAATGAAATGAAATGGAATGGAAGGGAATGAAATAGACTCGAATTGAATGGAATGGAATGAACTCCAATGGAATGGAAAGGACTCGAAAAGAATAGAATAAAATCTGTTGGATT ????????????????????????????????????????????????????????'5???????????????????5???+?????????????????????????????????+5'?55+5??+++??????################# MC:Z:12S80M59S RG:Z:HK3T5.7 MQ:i:0 AS:i:55 +HM3G2CCXX160308:2:2205:8998:69361 81 chr1 789650 60 42M5I104M = 224014451 223224657 TTGTATGGAACGGAATTGAATGGACTCGAAAGGAATGGAATGGAATCGAATGGAATGGAATGGAATGGAATGGAATGGACTCGAATGGAATGGAATGGAATGAACTCCAATGGAATGGAATGGACTCGAATAGAATGGAATGGAATGGAAA ######??5?5????+?+?????5+?5????5???+5????????+$???????????5???????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:56 AS:i:135 +HM3G2CCXX160308:2:1101:18304:53979 99 chr1 789651 40 87M64S = 789985 427 TGTATGGAACGGAATTGAATGGACTCGAAAGGAATGGAATGGAATGGAATGGAATGGAATGGTATGGAATGGAATGGAATGGAATGGGATCGAATGGGGAGGAATGGACTGAAGTCCACTGGAAAGGAGTGGGCTCGACTAGTGAGGAATG ?????????????5????????????????5?????????????5????????????55???5'5???+5??5????+5???%5???++?+5?+5??###################################################### MC:Z:58S93M RG:Z:HM3G2.2 MQ:i:58 AS:i:72 +HTL2KCCXX160412:6:2116:13616:52080 177 chr1 789655 0 8S42M5I50M46S = 10698427 0 GAATTGACTGGAATGGAATTGAATGGACGCGAATGGAATGGAATGTAATGGAAACGAATGGAATGGACTCGAATGGAATGGACTCGAATGGAATGGAATGGAATGGATTCGAACAGAATGGAAAAAATGGAATGGACTTGAATGGAATGGA ############?555??5??????5?5?'5????+????????5?????????????5????????+????????????????5?????????????????????????????????????????????????????????????????? SA:Z:chrUn_KI270442v1,26107,-,85S51M15S,0,4; MC:Z:110S41M RG:Z:HTL2K.6 MQ:i:14 AS:i:51 +HK35MCCXX160204:7:2102:18071:59095 163 chr1 789656 60 25M5I121M = 789957 452 GGAACGGAATTGAATGGACTCGAAAGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGACTCGAATGGAATGGAATGGAATGAACTCCAATGGAATGGAATGGACCCGAATAGAATGGAATGGAATGGAAAGGCCTC ?????5?????????????????????????????????????????????????5?????????????+????????5?5????????+??5????????+???????+?????????+?5??5++55+???????############## MC:Z:151M RG:Z:HK35M.7 MQ:i:60 AS:i:126 +HK35MCCXX160204:4:1201:25276:57196 99 chr1 789657 40 48M15I88M = 789815 300 GAACGGAATTGAATGGACTCGAAAGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGACTGGAATGGAATGGAATGGACTCGAATGGAATGGCATGGACTGGACTCCAATGGAATGGAAAGGAATCGAATAGAATGGAAAGGAATGG ????5?????????+??'5???????????????5?5??5??????????5????5??+???5?5??5+?????55??++??5??????+5??########################################################## MC:Z:3S142M6S RG:Z:HK35M.4 MQ:i:60 AS:i:85 +HTL2KCCXX160412:6:2219:30036:29648 99 chr1 789771 60 151M = 789937 317 CGAATAGAATGGAATGGAATGGAAAGGACTCGAGTGGGATGGAATGGAGTGGAATGGACTCGAATGGGATGGAATGGAATGGAATGGACTCGAATGGAATGGAACCGAAAGGAATGGAACGGAACGGAACGGAACGCAATGGAATCGACCC ????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????5?????????55??????????????# MC:Z:151M RG:Z:HTL2K.6 MQ:i:60 AS:i:151 +HK3T5CCXX160204:5:2101:31649:34693 3169 chr1 789787 9 85S41M25S = 104013 -6315 AGAGAGGACTGGAATGGAACGGACTCGAAGGGAATGGACTGGAATGGAATGGATTGAACGGAATGGACTGGAGCGGAATGGACTAGAATGGAAAGGAAACGAGTGGGATGGAATGGAGTGGAATGGCGCAAAATGGAGAAGAAAAGGAGGA ????????????????????5??55?+??'????5????5???5??????????55??&5???????&5'??5&$5????????+???5?????????????????'??5?5++'?5?5+?5????######################### SA:Z:chrUn_KI270438v1,110472,+,5S97M49S,9,3; MC:Z:5S146M RG:Z:HK3T5.5 AS:i:31 +HK3T5CCXX160204:4:2120:5132:34254 97 chr1 789788 60 119M32S = 41875411 0 AATGGAAAGGACTCGAGTGGGATGGAATGGAGTGGAATGGACTCGAATGGGATGGAATGGAATGGAATGGACTCGAATGGAATGGAACGGAAAGGAATGGAACGGAACGGAACGGAACGGAAGGGACGCGAACGGAATGGAAGGGAAGGGC ????????????????????????????????+???????????????????5?????????????????????5?????????????5????????5????################################################# MC:Z:69S82M RG:Z:HK3T5.4 MQ:i:27 AS:i:114 +HJYFJCCXX160204:4:1214:22313:71401 147 chr1 789789 60 151M = 789758 -182 ATGGAAAGGACTCGAGTGGGATGGAATGGAGTGGAATGGACTCGAATGGGATGGAATGGAATGGAATGGACTCGAATGGAATGGAACCGAAAGGAATGGAACGGAACGGAACGGAACGCAATGGAATCGACCCGAATGGAATGGAATGGAA ???????5+5+?5??5?????????????55???????????5???????????????????????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HJYFJ.4 MQ:i:60 AS:i:151 +HM3G2CCXX160308:2:1122:4188:47738 163 chr1 789790 60 151M = 790062 423 TGGAAAGGACTCGAGTGGGATGGAATGGAGTGGAATGGACTCGAATGGGGTGGAGTGGAGTGGAGTGGAGTGGAATGGGGTGGAAAGGAGAGAAATGGAACGGAGCGGAAGGGAACGGAATGGAAGCGAACGGAATGGAATGGAATGGAAG ???????????????????5????5+????5??????????????+??????5++????5??5?5????+55?+????######################################################################### MC:Z:151M RG:Z:HM3G2.2 MQ:i:60 AS:i:60 +HK3T5CCXX160204:1:2217:19015:54401 99 chr1 789802 60 151M = 790036 385 GAGTGGGATGGAATGGAGTGGAATGGACTCGAATGGGATGGAATGGAATGGAATGGACTCGAATGGAATGGAACCGAAAGGAATGGAACGGAACGGAACGGAACGCAATGGAATCGACCCGAATGGAATGGAATGGAATGGAATGGAATGG ?????????????????????????????????????????????????????????????????????????5?5?????????????5?5???????????????????5??????????????????????????????????????? MC:Z:151M RG:Z:HK3T5.1 MQ:i:60 AS:i:151 +HM3G2CCXX160308:2:1109:22820:24743 2209 chr1 789802 0 67S41M43S = 85758 0 TGGATTGGAATGGAATGGAATGGGAAGGAATGGAATTGAACAGAGTGGAGCGGAGTGGAGTGGAGTGGAGTGGGATGGAATGGAGTGGAATGGAATCGAATGGGATGGGGAGAGGAAGGGTGAGGGGAGAGTATGGACTGCACGACAGCCG ?????????????????????????????????????????????????????????????????????????+5????55?+?+5???5????+?5?##################################################### SA:Z:chrUn_KI270442v1,11157,+,5S79M67S,4,5; MC:Z:16S73M5D21M5D41M RG:Z:HM3G2.2 AS:i:36 +HM3G2CCXX160308:2:1114:23561:12666 163 chr1 789802 40 73M78S = 790389 686 GAGTGGGATGGAATGGAGTGGAATGGACTCGAATGGGATGGAATGGAATGGAATGGACTCGAATGGAATGGAATGGAATTGACTCGGATGGAATGGAATGGAATGGAGAGCGGAAGAGCGGCGTGGAGGGAAAGAGGGTGAAGTGGAGTGT ?????????????????????????????????????5?????????????????????????????????????????5+5'?+5+55+??55????5??++5????########################################### MC:Z:52S99M RG:Z:HM3G2.2 MQ:i:54 AS:i:73 XT:i:107 +HM3G2CCXX160308:2:2203:27417:52959 2145 chr1 789802 0 90S36M25S = 153291 0 GAGCAGTGTGGATTGGAAGGGAATTGGTTGGAATGGGATGGAACAGAATGTAATGGAGTTGAGTGGAGTGGAGTGGAATGGAATTTAATGGAGTGGGATGGAATGGAGTGGAATGGAGTCGAATGGACTAAAAAGGAATGTAGTGGAATGG ???????????????????????????????????????5??????????55?5?????????5??????????5??????????????5????5?????????5????????5?????+5++55??&5'????????5?55?######## SA:Z:chrY,11293824,-,44S54M53S,0,3; MC:Z:33S104M14S RG:Z:HM3G2.2 AS:i:31 +HK35MCCXX160204:2:1217:6411:29015 3219 chr1 789802 0 50S51M50S = 49601542 -127 GCATGGAAGCCGCGCGTGGGGAAAGGAAGAGGAAGGAAAAGAACCGAAGGGAGTGGGATGGAATGGAGTGGAAGGGAAGGGAATGGGATGGAATGGAATGGTAGGGAAGAGAATGGAATGGAACGAAATGGAATGGAATGGAATGGAATGG #############################################################################?+??++5?+'5+?5+?+?5+??55+?+???????5???????????5????????????5??????????5??? SA:Z:chr5,49601605,-,87S64M,0,2;chrUn_KN707862v1_decoy,9661,+,50S52M49S,0,4; MC:Z:127M24S RG:Z:HK35M.2 AS:i:31 +HK35NCCXX160204:1:1106:7060:45189 2193 chr1 789802 0 26S24M5I49M47S = 31052792 0 GGGAGAGACCGGAGTGGGAGAGGATAGAGTGGGATGGAATGGAGTGGAATCAAGCAGAGTGGAAAGGTATGGAATGGAATGGAATGGACTGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGCAATGGAATGGAATAGAGTG ################################???5+55??55'5+??+?++?'+???55+'555+5+55+5+??????5+??+?555+55????+5+??5???????5?????????????????????????????????????????? SA:Z:chr10,38899886,-,71S80M,0,2; MC:Z:151M RG:Z:HK35N.1 AS:i:32 +HK3T5CCXX160204:2:1201:5802:72227 147 chr1 789802 40 75S69M7S = 789800 -71 ACAGAGGATAGGGGGAACTCGGAGCGCTACGCAGCAAGTGACACGCAATGCCCAGGCATGGGTCGTCCGGTAATGGAGTGGGTTGGAATGGGGTGGAATGGACTTGAATGGGATGGAATGGAATGGAATGGACTTGAATGGAATAGAATGG #####################################################################################????5''&????????+'++?????????????+'?+?????????+'?????????????????5 MC:Z:71M80S RG:Z:HK3T5.2 MQ:i:51 AS:i:49 +HM3G2CCXX160308:2:2217:4046:26888 2161 chr1 789802 0 4S30M5I43M69S = 39775 0 AACAGAGTGGGATGGAATGGAGTGGAATGGAATCACACAGAATTGAATGGAGTGGAATGGAATGGAATGGAATGGAATGGAATGAAATGGAATGGAATGGAATGGAATCAACCCGAGTGAAATTGAATGGAATCGAATGGAATGGAATGGA ##############???5+55555?5?+5+5?+??????55?5?5???5??5??????5?5??5??????????????????????????????????????????????????????????????????????????????????????? SA:Z:chr4,49102543,+,107M44S,0,6; MC:Z:56S68M5I22M RG:Z:HM3G2.2 AS:i:32 +HK3T5CCXX160204:1:1219:26798:12297 83 chr1 789804 60 12S139M = 789541 -402 GGAAGGGCTGGGGTGGGATGGGATGGGGTGGAATGGAATCGAATGGGATGGGATGGAATGGAATGGACTCGAATGGAATGGAACCGAAAGGAATGGAACGGAACGGAACGGAACGCAATGGAATCGACCCGAATGGAATGGAATGGAATGG ############################??5+??5?++?%?++55??55??+?5??+????+????5+5&?+???5+????++?5?+?5??+????+?????++?????????5????5?????5????????????????5????????? MC:Z:28M5D46M77S RG:Z:HK3T5.1 MQ:i:40 AS:i:119 +HJYFJCCXX160204:7:2214:23297:36012 2163 chr1 789805 0 16S70M65S = 49092081 132 AGGGGAAGCCAGGGAATGGGATGGAATGGAGTGGAATGGAATCAAATGGGATAGAATGGATTGTAATGGAATGGAATGGAATGGAATGGAATTGAATGGAATGGAATGGAATGGAATGGAGGGGAATGGAATGGAATGGAATGGAGTGGAA ###############################????55''5??5?5??55'5+?'5???55??5?5?5'?5?5??55???????????????55???????????????????????????5?????????????????????????????? SA:Z:chr4,49092039,+,85M66S,0,3; MC:Z:61S90M RG:Z:HJYFJ.7 AS:i:35 +HK35MCCXX160204:5:1108:26707:70328 113 chr1 789805 0 35S20M5I38M53S = 29568 0 GGGGGTGGAGTGGGGGGGAAGTAAACTGGGTCGCCTGGGATGGGATGGAGTGGAAGGGAGTGGACCGGAATGGGATGGAATGGAATGGAATGGACTCGGCCCGAGTGCAGGGGAATGGAATGGAATGCCATGGAATGGAATCATCCGGAAT #################################################################################??+?+??55??5+'+%'&555???+5'????????555?5??5?5??+?????+???????????????? SA:Z:chr17,21954755,-,111S31M9S,0,0;chrUn_KN707896v1_decoy,17459,+,24M5D53M74S,0,9; MC:Z:119S32M RG:Z:HK35M.5 MQ:i:0 AS:i:32 +HJYFJCCXX160204:7:1201:27194:12490 99 chr1 789942 40 80M5D36M35S = 790137 342 GAATGGAATGGAATGCAAAGGAATCGAATGGGACAGAATGTATTGGAATGGACTCGAATGGAATGGATAGGAATGGACCCAAATGGAATGGAGTGGAATGGTCTCGAGTGGAATGGAAGGGAATGGAATGGACTCGAAGGGAACGGGATGG ??5?????????????????????????????????????????????????????????????????????5????????5???????55??5??????????????5??555????+5???5????5????555??+??55'5?+55?? MC:Z:5S25M4I74M5D43M RG:Z:HJYFJ.7 MQ:i:45 AS:i:70 +HM3G2CCXX160308:2:1217:27509:46085 2177 chr1 789942 0 11S52M88S = 49118886 0 GGAATCATCACGAATGGAATGGAACGCAATAGAATGGGATGGGATGGAATGTATTGGAATGGAAGTTAATCAACACGAATGGAATGGAATGGAAAGGAATGGAAGGAAATGGAATGGAATGGAATGGAATGCAATGAAAAGGAAGGGAAGG ????????????????????????????????????????????????????5????????????5???????5???????????????+??????????????+?+555?????????5?????????????555+?++???######## SA:Z:chr5,49657667,-,20S64M67S,0,3;chrUn_KN707862v1_decoy,4997,-,15S60M76S,0,2; MC:Z:133M18S RG:Z:HM3G2.2 AS:i:32 +HK3T5CCXX160204:2:1206:7608:64421 147 chr1 789956 60 151M = 789752 -355 GCAAGGGAATCGAAGGGGATGGAATGTATTGGAATGGACTCGAATGGCATGGACAGGAGTGGACCCGAATGAAATGGAATGGAATGGAATGGTCTCGAGTGGAATGGGATGGGATGGGATGGGATGCGATGGGATGGGATGGGATGGGATG ########??5???+++?????????++5??'+?55?+?5?????????????5????5???????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HK3T5.2 MQ:i:60 AS:i:141 +HK35MCCXX160204:6:2112:25388:7568 2161 chr1 790055 0 33S40M78S = 49103939 4575 GAGAGGAATGGGATGGGATTTAGTGGGGTGGGATGGAAAGGGATGGGATGGGATGGGATGCGATGGGATGGGAAGGAATGGAACTTAATCAACACGAATGGAACGGAATGCAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGG ################################?5????5?'+?55'??+???5???5+??+'+5?''?5??+??????+??++??????'????5??????+?5??????????????????????????????????????????????? SA:Z:chr4,49099465,+,58M93S,0,1; MC:Z:50S101M RG:Z:HK35M.6 AS:i:35 +HTL2KCCXX160412:6:1209:30289:48476 83 chr1 790055 60 28S123M = 789618 -560 AAGTGGAAGGGGATGAGGGGGTGGCGGTTGGAATGGGATGGGATGGGATGGAATGGAATGGAATGGGATGGGATGGGATGGGATGGGATGGGATTGGAATGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATAG ########################################?5??'?5????5??''5?5''555'5'55%5'??+5'55555????5???????55'+55?5??5???5'55????????????'??????5??????????????????? MC:Z:106M45S RG:Z:HTL2K.6 MQ:i:60 AS:i:78 +HM3G2CCXX160308:2:1209:19654:58919 83 chr1 790056 60 151M = 790008 -199 GGAAGGGGGTGGGATGGGGTGGGATGGGATGGGATGGGATGGGTTGGGATGGGATGGGATGGGATGGATTGTGATGGGATAGAAGGGAATGGAATGGAATGGAATGGAATGGAATGGAATAGACTCGAAAGGATTGGAATGGAATGGACTC ##############??55'????55?+55??55555??5?5?'+??++5+??5?+?55+?5????????5'++?+??5?5??5?55?55????55+???5'?5??????????????????????5????5???????????????????? MC:Z:111S40M RG:Z:HM3G2.2 MQ:i:60 AS:i:102 +HM3G2CCXX160308:2:2215:12987:67234 83 chr1 790056 60 5S146M = 789833 -369 GGAGGGGAATGGGAGGGGATGGGATGGGATGAGAGGGGATGGGAGGGGATGGGATGGGATGGGATGGGATGGATTGGAATGGAATAGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATAGACTCGAAAGGATTGGAATGGAATG ##########??'55??+55???5???55++5??????5++%'55?5555???5???5+????5???????????+?????????????????????????????5????????????????????????????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:60 AS:i:121 +HM3G2CCXX160308:2:2101:8968:67234 113 chr1 790066 10 110M6D11M1I29M = 49104531 0 GGGATGGGATGGGATGGGATGGGAGGGGATGGGAGGGGATGGGATGGGATGGGATGGACTGGAATGGAATGGGATGGAATGGAATGGAATGGTACGGAATGGAATGGAATGGAACGAATGGTAATGGAATGGAATTGAATGGAATGGAATG ###################################5????55+5?5++???55??5?55????5?5555??????555????55??55???5555?????'??????55??5?55?5????????5????????????????????????? SA:Z:chrUn_KN707896v1_decoy,2751,+,103M48S,0,4; MC:Z:108S43M RG:Z:HM3G2.2 MQ:i:0 AS:i:65 +HTL2KCCXX160412:6:2110:6644:12718 99 chr1 790107 60 151M = 790287 331 GGATGGGATGGGATGGATTTGAATGGAATAGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATAGACTCGAAAGGATTGGAATGGAATGGACTCGAATGGAATGGAATGGACATGAATGGAACGGAATGGAATGGAATGAACTCG ?????????????????????????????????????????????????????????????????????????????????????????????????5??????????????????????????????5????5????5???5????#### MC:Z:151M RG:Z:HTL2K.6 MQ:i:60 AS:i:151 +HM3G2CCXX160308:2:2123:23815:34729 2225 chr1 790107 0 60M91S = 49103555 0 GGATAGAATGGGATGGAATGGAAGGGGATAGAATGGAATGGAATGGAATGGAATGGAATGAACCCGTGTGCAATGGAATGGAATGGAATGGAATGGAAGGGATTCAACGCGAATGGAATCAACTCGATTGCAATGGAATGGAATGGAATTG ################################?+????5??5?+???5?????5????5?????+???5?5???????'????+????5????5????+????????5'?????????????????????????????????????????? SA:Z:chr10,41859902,-,31S67M53S,0,2;chrUn_KI270442v1,91567,-,115S36M,0,1;chrUn_JTFH01000872v1_decoy,9,+,41M5I74M5I26M,12,22; MC:Z:38S104M9S RG:Z:HM3G2.2 AS:i:33 +HTL2KCCXX160412:5:2208:8400:68605 2129 chr1 790108 0 63M88S = 41874802 0 GAGGGGATGGGATGGGTTGGTAGGGAATAGAATGGAATGGAATGGAATGGAATGGAATGGAATTTAATGGAATGGAATGGAATGAACCCGTGTGCAATGGAATGGAATGGAATGGAATGGAATGTATTCAACTCGAATGGAATCAACTCGA ########################################????'???5?55+'?5??5?????????5??????5??????????5?+?????5???????????????????????????????????????????????????????? SA:Z:chr4,49137211,+,27S92M32S,0,5; MC:Z:6S103M42S RG:Z:HTL2K.5 AS:i:40 +HK35MCCXX160204:7:2119:9019:57548 81 chr1 790137 2 50S52M49S = 69931 0 GAAGGGGGGGGGTTGTAGGGGAGTGGAATGAAATGGGAGGGAAAGGAAGGGAATGGAAGGGAAGGGAATGGAATGGAATGGAATGGAATAGACTCGAAAGGAAAGGAACCGCATGGAAGCTACTATAATGGAATGGACTGGAATGGAATGG ###############################?5??5+?5??++???55+???????????+?????????????????????????????5???+???????????????5?????????????????????????????????????5?? MC:Z:32M119S RG:Z:HK35M.7 MQ:i:22 AS:i:42 +HK35NCCXX160204:1:2120:5416:32777 3161 chr1 790137 2 21M5I28M5I33M59S = 74288 0 GAATGGACTAGAATGGAATGGACATAAATGGAATGAAAAGGAATAGACTGGAAATAAATGAATTGGAATGGAATGGACTCGAATGGAATGGACTAGATTCGAATGGAATGGAAAAGAATGGAATGGATTCGAATGGAATGTGATGGAATGG ###########################?55????5+???5???+??????????????5???5?????5??????????5?????????????????5????????????????????????????????????????????????????? SA:Z:chrUn_KI270442v1,74288,-,51S100M,60,6; RG:Z:HK35N.1 AS:i:31 +HM3G2CCXX160308:2:2118:24434:49443 147 chr1 790203 53 151M = 789932 -422 AATCGAATGGAATGGAAGGGACATGAATGGAACGGAATGGAATGGAATTAACTCTAATGGAATACCATGGAATTTAACGGAATGGAATCCAATGGAATAGACCTGAATGGAATAGAATGGAAGAGACTCGAATGGAATGGAATACAATGGA #####???555??5+55?5??+?5??????5?&?+55+555?5+5555'??+++'?55?+??55++?'5???+?555&'+?5++5555++5?5+555'555++?5?55???5'????55???+5??5?5?????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:60 AS:i:104 +HTL2KCCXX160412:5:2123:31030:28980 2193 chr1 790205 0 60S47M44S = 56760780 13141 GGAGATGGAATGGAAAGGCACGGACTGCAACCGAAATCGACACACTGGAGCCGAAACTAATCGAATGGGATGGAATGGACATGAATGGAACGGAATGGAATGGAATGGAATGGAATGGAAGGGAATCGAACGGAAGGGAATCGAAGGGAAA ####################################################################################???5+5%+?555''????????''???5??????????????????????????????????????? SA:Z:chrY,56747588,-,98S53M,0,3; MC:Z:151M RG:Z:HTL2K.5 AS:i:42 +HK35MCCXX160204:4:1203:32086:55455 163 chr1 790206 40 114M37S = 790528 473 CGAATGGAATGGAATGGACATGAATGGAACGGAATGGAATGGAATGAACGCGAATGGCATGGAAGGGAATTTAATGGAATGGACTATAATGGAATGGACTGGACTGGAATAGAAGCGAAGAGAAACGGATGGGAGGGAACGGAGGCGAGAG ????????5???????????+??????++55?????????????+'?5&$'$+?+5?'?+''?+'5??++++??5??????'?'???################################################################ MC:Z:151M RG:Z:HK35M.4 MQ:i:60 AS:i:74 +HJYFJCCXX160204:6:1109:5873:34166 65 chr1 790207 0 30S97M24S = 10693901 0 TAATGGAATGAAATTTACTCAAATGGAGTGGAATGGAATGGAATGGACATGAATGGAACAGAATGGAATGGAATGTTCTCGAATGGAACACAAAGGAATTTAATGGAATGGACTCTAATGGAATGGAAGGGAATGGAATGGAATGAACTTG ????????????????????????????5????????????????????5?????????????????????????????'5??????????????????????55+????????'?????5???????+?+?????????????+??#### SA:Z:chr10,38487562,+,116S31M4S,0,0; MC:Z:33S99M19S RG:Z:HJYFJ.6 MQ:i:7 AS:i:72 +HK35MCCXX160204:3:2217:25611:48529 163 chr1 790257 33 151M = 790402 296 GAATGGAATACAATGGAATTTAATGGAATGGACTCTAATGGAATGGACTTGAATGGAATAGAATGGAAGAGACTCGAATGGAATGGAATGCAATGGAATGGAATGGAATGGAATGTAATGGAATGGACTGGAGTGGAATAGAATGGAAGGG ???????????????????????????????????????????????????????????????????5????5??5??5????????5++5??????+?5'?+++??+5???####################################### MC:Z:151M RG:Z:HK35M.3 MQ:i:60 AS:i:114 +HM3G2CCXX160308:2:1222:14418:40425 147 chr1 790332 60 151M = 790243 -240 GAATGGAATGGAATACAATGGAATTTAATGGAATGGACTCTAATGGAATGGACTTGAATGGAATAGAATGGAAGAGACTCGAATGGAATGGAATGCAATGGAATGGACTCGAATGGAATGGAATGGAATTGACTCAGATGGAATGGAATGG ????????????5??????5???5???5?????5????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? MC:Z:151M RG:Z:HM3G2.2 MQ:i:60 AS:i:151 CO:Z:needed +HTL2KCCXX160412:5:1214:2920:10240 99 chr1 790408 27 63M5I73M10S = 790478 221 ACTCTAATGGAATGGAATGCAAGGGACTGGACTCAAATGGAATGGAATGGAATTGACTCAAATGGTAGGGAAAGGAATTGAATGGACTCGAAAGGATTGGGATGGAGTACAGGGGAATGGACTCGAATGGAATGGAATGGACGGAACTCGA +?55??????????????????????&5????????5????????????????????????????5?+??????????????5??????????????????????????5??5???5???????????????+????5???########## MC:Z:151M RG:Z:HTL2K.5 MQ:i:27 AS:i:65 CO:Z:needed +HTL2KCCXX160412:5:1214:2920:10240 147 chr1 790478 27 151M = 790408 -221 AATTGAATAGAATCGAAAGGAATGGGATGGAGTACAGTGGAATGAACTCGAATAGAATGGAAAGGAATGAACTCGAATGCAATGTTATGCAATGGAATGGACCCAAATGGAATGGAATGTAATTGACTCGAATGGAATTGAATGGAATGGA ################################????5?+??55'55???????5????''??5??????????&???????????????5??????????????????????????????????????+?????????????????????? MC:Z:63M5I73M10S RG:Z:HTL2K.5 MQ:i:27 AS:i:59 diff -Nru htsjdk-2.22.0+dfsg/src/test/resources/htsjdk/tribble/gff/canonical_gene.gff3 htsjdk-2.23.0+dfsg/src/test/resources/htsjdk/tribble/gff/canonical_gene.gff3 --- htsjdk-2.22.0+dfsg/src/test/resources/htsjdk/tribble/gff/canonical_gene.gff3 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/resources/htsjdk/tribble/gff/canonical_gene.gff3 2020-07-08 16:28:01.000000000 +0000 @@ -1,8 +1,8 @@ ##gff-version 3.2.1 ##sequence-region ctg123 1 1497228 -ctg123 . gene 1000 9000 . + . ID=gene00001;Name=EDEN -ctg123 . TF_binding_site 1000 1012 . + . ID=tfbs00001;Parent=gene00001 -ctg123 . mRNA 1050 9000 . + . ID=mRNA00001;Parent=gene00001;Name=EDEN.1 +ctg123 . gene 1000 9000 1.03e03 + . ID=gene00001;Name=EDEN +ctg123 . TF_binding_site 1000 1012 0.999 + . ID=tfbs00001;Parent=gene00001 +ctg123 . mRNA 1050 9000 1.37 + . ID=mRNA00001;Parent=gene00001;Name=EDEN.1 ctg123 . mRNA 1050 9000 . + . ID=mRNA00002;Parent=gene00001;Name=EDEN.2 ctg123 . mRNA 1300 9000 . + . ID=mRNA00003;Parent=gene00001;Name=EDEN.3 ctg123 . exon 1300 1500 . + . ID=exon00001;Parent=mRNA00003 diff -Nru htsjdk-2.22.0+dfsg/src/test/resources/htsjdk/tribble/gff/feature_extends_past_circular_region.gff3 htsjdk-2.23.0+dfsg/src/test/resources/htsjdk/tribble/gff/feature_extends_past_circular_region.gff3 --- htsjdk-2.22.0+dfsg/src/test/resources/htsjdk/tribble/gff/feature_extends_past_circular_region.gff3 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/resources/htsjdk/tribble/gff/feature_extends_past_circular_region.gff3 2020-07-08 16:28:01.000000000 +0000 @@ -1,4 +1,4 @@ ##gff-version 3 ##sequence-region 1 1 10 1 . region 1 10 . . . ID=chromosome:1; Is_circular=true -1 . biological_region 7 11 . . . \ No newline at end of file +1 . biological_region 7 11 . . . . \ No newline at end of file diff -Nru htsjdk-2.22.0+dfsg/src/test/resources/htsjdk/tribble/gff/url_encoding.gff3 htsjdk-2.23.0+dfsg/src/test/resources/htsjdk/tribble/gff/url_encoding.gff3 --- htsjdk-2.22.0+dfsg/src/test/resources/htsjdk/tribble/gff/url_encoding.gff3 2020-05-21 22:48:09.000000000 +0000 +++ htsjdk-2.23.0+dfsg/src/test/resources/htsjdk/tribble/gff/url_encoding.gff3 2020-07-08 16:28:01.000000000 +0000 @@ -1,3 +1,3 @@ ##gff-version 3 ##sequence-region 1 1 10 -the%20contig a%20source%20%26%20also%20a%20str*)%25nge%20source a%20region 1 10 . . . ID=this%20is%20the%20ID%20of%20this%20wacky%20feature%5E%26%25%23%23%24%25*%26%3E%2C.%20%2C.; Another%20key=Another%3Dvalue \ No newline at end of file +the contig a source %26 also a str*)%25nge source a region 1 10 . . . ID=this is the ID of this wacky feature%5E%26%25%23%23%24%25*%26%3E%2C. %2C.; Another key=Another%3Dvalue,And a second%2C value \ No newline at end of file