diff -Nru micommons-0.9/.flow micommons-1.0.3/.flow --- micommons-0.9/.flow 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/.flow 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -[branchname] -master = default -develop = develop -feature = feature/ -release = release/ -hotfix = hotfix/ -support = support/ - diff -Nru micommons-0.9/.hg_archival.txt micommons-1.0.3/.hg_archival.txt --- micommons-0.9/.hg_archival.txt 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/.hg_archival.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -repo: 256a85dfd848a123631913fbf5215ad4892ba477 -node: cf232f28ff5ae38774a32f53d8897048a5e3de99 -branch: default -tag: v0.9 diff -Nru micommons-0.9/.hgignore micommons-1.0.3/.hgignore --- micommons-0.9/.hgignore 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/.hgignore 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -target/ -nbactions.*\.xml$ -nb-configuration.xml$ -\.log$ -\.log\. -\.iml$ -.idea\/ -\.DS_Store$ diff -Nru micommons-0.9/debian/changelog micommons-1.0.3/debian/changelog --- micommons-0.9/debian/changelog 2013-08-15 12:38:34.000000000 +0000 +++ micommons-1.0.3/debian/changelog 2013-10-10 19:46:32.000000000 +0000 @@ -1,3 +1,9 @@ +micommons (1.0.3-1~raring) raring; urgency=low + + * 1.0.3 Release. + + -- Dmitriy Bolotin Thu, 10 Oct 2013 18:24:35 +0400 + micommons (0.9-2~raring) raring; urgency=low * Jar is now present in /usr/share/java diff -Nru micommons-0.9/pom.xml micommons-1.0.3/pom.xml --- micommons-0.9/pom.xml 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/pom.xml 2013-10-09 08:03:10.000000000 +0000 @@ -24,9 +24,10 @@ com.milaboratory micommons - 0.9 + 1.0.3 jar MiCommons + http://mitcr.milaboratory.com/ org.sonatype.oss @@ -112,11 +113,16 @@ - scm:hg:http://bitbucket.org/milaboratory/micommons - v0.9 - https://bitbucket.org/milaboratory/micommons/src/?at=v0.9 + scm:hg:http://bitbucket.org/milaboratory/mitcr + v1.0.3 + https://bitbucket.org/milaboratory/mitcr/src/?at=1.0.3 + + YouTrack + http://dev.milaboratory.com/ + + diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/Alphabet.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/Alphabet.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/Alphabet.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/Alphabet.java 2013-10-09 08:03:10.000000000 +0000 @@ -50,7 +50,7 @@ */ public byte codeFromSymbol(char symbol); - public SequenceBuilderFactory getSequenceBuilderFactory(); + public SequenceBuilderFactory getBuilderFactory(); /** * Id of alphabet diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/NucleotideSQPair.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/NucleotideSQPair.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/NucleotideSQPair.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/NucleotideSQPair.java 2013-10-09 08:03:10.000000000 +0000 @@ -21,12 +21,8 @@ package com.milaboratory.core.sequence; import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; import com.milaboratory.core.sequence.quality.SequenceQualityPhred; -import com.milaboratory.core.sequence.util.NucleotideRCSequence; -import com.milaboratory.core.sequence.util.NucleotideSubSequence; -import com.milaboratory.core.sequence.util.SequenceQualityUtils; -import com.milaboratory.core.sequence.util.SubSequenceQuality; +import com.milaboratory.core.sequence.quality.SequenceQualityUtils; import java.io.Serializable; @@ -42,30 +38,30 @@ public final class NucleotideSQPair implements Serializable { private static final long serialVersionUID = 1L; - private NucleotideSequence sequence; - private SequenceQuality quality; + private final NucleotideSequence sequence; + private final SequenceQualityPhred quality; public NucleotideSQPair(String sequence) { - this(NucleotideSequenceImpl.fromSequence(sequence)); + this(new NucleotideSequence(sequence)); + } + + public NucleotideSQPair(NucleotideSequence sequence) { + this.sequence = sequence; + this.quality = SequenceQualityUtils.createGoodQualityObject(sequence.size()); } public NucleotideSQPair(String sequence, String quality) { - this(NucleotideSequenceImpl.fromSequence(sequence), + this(new NucleotideSequence(sequence), new SequenceQualityPhred(quality)); } - public NucleotideSQPair(NucleotideSequence sequence, SequenceQuality quality) { + public NucleotideSQPair(NucleotideSequence sequence, SequenceQualityPhred quality) { if (sequence.size() != quality.size()) throw new IllegalArgumentException(); this.sequence = sequence; this.quality = quality; } - public NucleotideSQPair(NucleotideSequence sequence) { - this.sequence = sequence; - this.quality = SequenceQualityUtils.getGoodQualityObject(sequence.size()); - } - /** * Generates a new instance of NucleotideSQPair containing sub sequence. If to < from then reverse complement * will be returned. @@ -73,27 +69,28 @@ * @param from inclusive * @param to exclusive */ - public NucleotideSQPair getSubPairCopy(int from, int to) { + //TODO test required + public NucleotideSQPair getRange(int from, int to) { if (from >= size() || from < 0 || to > size() || to < 0) throw new IndexOutOfBoundsException("\"from\" or \"to\" are out of range."); NucleotideSequence s; - SequenceQuality q; + SequenceQualityPhred q; if (to < from) { - s = new NucleotideRCSequence(new NucleotideSubSequence(sequence, to + 1, from - to)); - q = new SubSequenceQuality(quality, to + 1, from - to).reverse(); + s = sequence.getRange(to + 1, from + 1).getReverseComplement(); //TODO possible optimization point + //new NucleotideRCSequence(new NucleotideSubSequence(sequence, to + 1, from - to)); + q = quality.getRange(to + 1, from + 1).reverse(); //TODO possible optimization point + //new SubSequenceQuality(quality, to + 1, from - to).reverse(); } else { - s = new NucleotideSubSequence(sequence, from, to - from); - q = new SubSequenceQuality(quality, from, to - from); + s = sequence.getRange(from, to); + //new NucleotideSubSequence(sequence, from, to - from); + q = quality.getRange(from, to); + //new SubSequenceQuality(quality, from, to - from); } return new NucleotideSQPair(s, q); } public NucleotideSQPair getRC() { - return new NucleotideSQPair(new NucleotideRCSequence(sequence), quality.reverse()); - } - - public NucleotideSQPair getDeepCopy() { - return new NucleotideSQPair(NucleotideSequenceImpl.fromSequence(sequence), SequenceQualityUtils.copy(quality)); + return new NucleotideSQPair(sequence.getReverseComplement(), quality.reverse()); } public int size() { @@ -104,7 +101,7 @@ return sequence; } - public SequenceQuality getQuality() { + public SequenceQualityPhred getQuality() { return quality; } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/SequenceQuality.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/SequenceQuality.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/SequenceQuality.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/SequenceQuality.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,101 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequence; - - -/** - * Parent class for all sequence quality classes. - * - * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) - * @author Shugay Mikhail (mikhail.shugay@gmail.com) - */ -public interface SequenceQuality { - Class getBaseClass(); - - /** - * Get the probability of error (e.g. nucleotide substitution) at given sequence point - * - * @param coord coordinate in sequence - * @return probability of error - */ - float probabilityOfErrorAt(int coord); - - /** - * Get the log10 of probability of error (e.g. nucleotide substitution) at given sequence point - * - * @param coord coordinate in sequence - * @return log10 of probability of error - */ - float log10ProbabilityOfErrorAt(int coord); - - /** - * Get the raw sequence quality value (in binary format) at given sequence point - * - * @param coord coordinate in sequence - * @return raw sequence quality value - */ - byte rawQualityValue(int coord); - - /** - * Sets the raw sequence quality value (in binary format) at given sequence point - * - * @param coord coordinate in sequence - */ - void setRawQualityValue(int coord, byte value); - - /** - * Get the worst raw sequence quality value (in binary format) in all sequence - * - * @return worst raw sequence quality value - */ - byte minRawQuality(); - - /** - * Merges two quality arrays by choosing the best quality at each position - * - * @param quality other quality array - */ - void mergeWith(SequenceQuality quality); - - /** - * Gets the qualities of characters in reverse order - * - * @return reverse of quality array - */ - SequenceQuality reverse(); - - /** - * Gets the size of quality array - * - * @return size of quality array - */ - int size(); - - byte[] encode(byte offset); - - /** - * Default quality thresholds - */ - public static class Constants { - public static final byte goodQuality = 34; - public static final byte badQuality = 3; - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/aminoacid/AminoAcidAlphabet.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/aminoacid/AminoAcidAlphabet.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/aminoacid/AminoAcidAlphabet.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/aminoacid/AminoAcidAlphabet.java 2013-10-09 08:03:10.000000000 +0000 @@ -103,7 +103,7 @@ }; @Override - public SequenceBuilderFactory getSequenceBuilderFactory() { + public SequenceBuilderFactory getBuilderFactory() { return FACTORY; } } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/aminoacid/CDRAminoAcidAlphabet.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/aminoacid/CDRAminoAcidAlphabet.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/aminoacid/CDRAminoAcidAlphabet.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/aminoacid/CDRAminoAcidAlphabet.java 2013-10-09 08:03:10.000000000 +0000 @@ -107,7 +107,7 @@ }; @Override - public SequenceBuilderFactory getSequenceBuilderFactory() { + public SequenceBuilderFactory getBuilderFactory() { return FACTORY; } } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/motif/SSequencingReadBindings.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/motif/SSequencingReadBindings.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/motif/SSequencingReadBindings.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/motif/SSequencingReadBindings.java 2013-10-09 08:03:10.000000000 +0000 @@ -40,7 +40,7 @@ return new LowQualityIndicator() { @Override public boolean isLowQuality(int point) { - return object.getData().getQuality().rawQualityValue(point) < minQuality; + return object.getData().getQuality().value(point) < minQuality; } }; } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/motif/search/NucleotideMotifSearch.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/motif/search/NucleotideMotifSearch.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/motif/search/NucleotideMotifSearch.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/motif/search/NucleotideMotifSearch.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,24 @@ +package com.milaboratory.core.sequence.motif.search; + +import com.milaboratory.core.sequence.motif.NucleotideMotif; + +public class NucleotideMotifSearch { + private final NucleotideMotif motif; + private final NucleotideMotifSearchOptions options; + private final int exactRegionFrom, exactRegionTo; + + public NucleotideMotifSearch(NucleotideMotif motif, NucleotideMotifSearchOptions options, int exactRegionFrom, int exactRegionTo) { + this.motif = motif; + this.options = options; + this.exactRegionFrom = exactRegionFrom; + this.exactRegionTo = exactRegionTo; + } + +// public int nextMatch(int from, NucleotideSequence sequence) { +// if (exactRegionFrom >= 0) { //If exact region exists +// for (int i = 0; i < sequence.size(); ++i) { +// +// } +// } +// } +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/motif/search/NucleotideMotifSearchOptions.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/motif/search/NucleotideMotifSearchOptions.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/motif/search/NucleotideMotifSearchOptions.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/motif/search/NucleotideMotifSearchOptions.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,17 @@ +package com.milaboratory.core.sequence.motif.search; + +public class NucleotideMotifSearchOptions { + public static NucleotideMotifSearchOptions NORMAL_WITH_TRUNCATION = new NucleotideMotifSearchOptions(2, 1, 1, 2, 2); + public static NucleotideMotifSearchOptions NORMAL_WITHOUT_TRUNCATION = new NucleotideMotifSearchOptions(2, 1, 1, 2, 0); + + public final int maxMismatches, maxDeletions, maxInsertions, maxTotalErrors; + public final int maxLeftTruncation; + + public NucleotideMotifSearchOptions(int maxMismatches, int maxDeletions, int maxInsertions, int maxTotalErrors, int maxLeftTruncation) { + this.maxMismatches = maxMismatches; + this.maxDeletions = maxDeletions; + this.maxInsertions = maxInsertions; + this.maxTotalErrors = maxTotalErrors; + this.maxLeftTruncation = maxLeftTruncation; + } +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideAlphabet.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideAlphabet.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideAlphabet.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideAlphabet.java 2013-10-09 08:03:10.000000000 +0000 @@ -77,7 +77,7 @@ } @Override - public SequenceBuilderFactory getSequenceBuilderFactory() { + public SequenceBuilderFactory getBuilderFactory() { return NucleotideSequenceBuilder.FACTORY; } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideAlphabetWithN.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideAlphabetWithN.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideAlphabetWithN.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideAlphabetWithN.java 2013-10-09 08:03:10.000000000 +0000 @@ -72,7 +72,7 @@ } @Override - public SequenceBuilderFactory getSequenceBuilderFactory() { + public SequenceBuilderFactory getBuilderFactory() { throw new UnsupportedOperationException(); } } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequence.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequence.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequence.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequence.java 2013-10-09 08:03:10.000000000 +0000 @@ -22,6 +22,11 @@ import com.milaboratory.core.sequence.Alphabet; import com.milaboratory.core.sequence.Sequence; +import com.milaboratory.util.Bit2Array; + +import java.io.Serializable; + +import static com.milaboratory.core.sequence.nucleotide.NucleotideAlphabet.INSTANCE; /** * Nucleotide sequence. @@ -29,9 +34,83 @@ * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) * @author Shugay Mikhail (mikhail.shugay@gmail.com) */ -public abstract class NucleotideSequence extends Sequence { +public final class NucleotideSequence extends Sequence implements Serializable { + private static final long serialVersionUID = 1L; + final Bit2Array data; + + public NucleotideSequence(String sequence) { + data = new Bit2Array(sequence.length()); + byte code; + for (int i = 0; i < sequence.length(); ++i) + data.set(i, INSTANCE.codeFromSymbol(sequence.charAt(i))); + } + + public NucleotideSequence(char[] sequence) { + data = new Bit2Array(sequence.length); + byte code; + for (int i = 0; i < sequence.length; ++i) + data.set(i, INSTANCE.codeFromSymbol(sequence[i])); + } + + + public NucleotideSequence(Bit2Array data) { + this.data = data.clone(); + } + + NucleotideSequence(Bit2Array data, boolean unsafe) { + this.data = data; + } + + @Override + public byte codeAt(int position) { + return (byte) data.get(position); + } + + @Override + public int size() { + return data.size(); + } + + public NucleotideSequence getRange(int from, int to) { + return new NucleotideSequence(data.getRange(from, to), true); + } + + /** + * Returns reverse complement of this sequence. + * + * @return reverse complement sequence + */ + public NucleotideSequence getReverseComplement() { + return new NucleotideSequence(transformToRC(data), true); + } + + public Bit2Array getInnerData() { + return data.clone(); + } + @Override public Alphabet getAlphabet() { - return NucleotideAlphabet.INSTANCE; + return INSTANCE; + } + + public static NucleotideSequence fromStorage(Bit2Array b2a) { + return new NucleotideSequence(b2a.clone(), true); + } + + public static NucleotideSequence fromSequence(byte[] sequence, int offset, int length) { + Bit2Array storage = new Bit2Array(length); + for (int i = 0; i < length; ++i) + storage.set(i, INSTANCE.codeFromSymbol((char) sequence[offset + i])); + return new NucleotideSequence(storage, true); + } + + private static Bit2Array transformToRC(Bit2Array data) { + Bit2Array newData = new Bit2Array(data.size()); + int reverseCoord; + for (int coord = 0; coord < data.size(); coord++) { + reverseCoord = data.size() - 1 - coord; + newData.set(coord, (~data.get(reverseCoord)) & 0x3); + } + return newData; } } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceAggregator.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceAggregator.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceAggregator.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceAggregator.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,119 +0,0 @@ -package com.milaboratory.core.sequence.nucleotide; - -import com.milaboratory.util.Bit2Array; - -public class NucleotideSequenceAggregator { - final long[] observations; - long sequences = 0; - final int length, delta; - - public NucleotideSequenceAggregator(int length, int delta) { - this.observations = new long[(length + 2 * delta) * 4]; - this.length = length; - this.delta = delta; - } - - public long getObservations(int position, int code) { - if (code < 0 || code >= 4 || position < 0 || position >= length + 2 * delta) - throw new IllegalArgumentException(); - - return observations[position * 4 + code]; - } - - private long getScore(NucleotideSequence sequence, int d) { - long result = 0; - - for (int i = sequence.size() - 1; i >= 0; --i) - result += observations[(delta + i + d) * 4 + sequence.codeAt(i)]; - - return result; - } - - public long getSequences() { - return sequences; - } - - private void putSequence(NucleotideSequence sequence, int d) { - ++sequences; - for (int i = sequence.size() - 1; i >= 0; --i) - ++observations[(delta + i + d) * 4 + sequence.codeAt(i)]; - } - - public void putSequence(NucleotideSequence sequence) { - if (sequence.size() != length) - throw new IllegalArgumentException(); - - if (sequences == 0) { - putSequence(sequence, 0); - return; - } - - int maxD = 0; - long maxScore = getScore(sequence, 0), score; - - for (int d = 1; d <= delta; ++d) { - if ((score = getScore(sequence, d)) > maxScore) { - maxScore = score; - maxD = d; - } - if ((score = getScore(sequence, -d)) > maxScore) { - maxScore = score; - maxD = -d; - } - } - - putSequence(sequence, maxD); - } - - public NucleotideSequence getSequence(double minPercent) { - int start = 0, end = 0, lastBad = -1; - byte maxCode, code; - long maxCount, sum; - double percent; - - for (int i = 0; i < length + delta * 2; ++i) { - //maxCode = -1; - maxCount = 0; - sum = 0; - - for (code = 0; code < 4; ++code) { - sum += observations[i * 4 + code]; - if (maxCount < observations[i * 4 + code]) { - maxCount = observations[i * 4 + code]; - //maxCode = code; - } - } - - if (sum == 0) { - lastBad = i; - continue; - } - - percent = 1.0 * maxCount / sum; - - if (percent < minPercent) - lastBad = i; - else if (end - start < i - lastBad) { - end = i + 1; - start = lastBad + 1; - } - } - - Bit2Array st = new Bit2Array(end - start); - - for (int i = start; i < end; ++i) { - maxCode = -1; - maxCount = 0; - - for (code = 0; code < 4; ++code) - if (maxCount < observations[i * 4 + code]) { - maxCount = observations[i * 4 + code]; - maxCode = code; - } - - st.set(i - start, maxCode); - } - - return NucleotideSequenceImpl.fromStorage(st); - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceBuilder.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceBuilder.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceBuilder.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceBuilder.java 2013-10-09 08:03:10.000000000 +0000 @@ -25,7 +25,7 @@ import com.milaboratory.util.Bit2Array; /** - * Creates {@link NucleotideSequenceImpl}. + * Creates {@link NucleotideSequence}. */ public class NucleotideSequenceBuilder implements SequenceBuilder { public static final SequenceBuilderFactory FACTORY = new SequenceBuilderFactory() { @@ -47,7 +47,7 @@ @Override public NucleotideSequence create() { - return NucleotideSequenceImpl.fromStorage(storage); + return new NucleotideSequence(storage); } @Override @@ -57,15 +57,6 @@ @Override public void copyFrom(NucleotideSequence sequence, int otherOffset, int thisOffset, int length) { - if (sequence instanceof NucleotideSequenceImpl) - storage.copyFrom(((NucleotideSequenceImpl) sequence).data, otherOffset, thisOffset, length); - else { - if (thisOffset < 0 || thisOffset + length > storage.size() || - otherOffset < 0 || otherOffset + length > sequence.size()) - throw new IndexOutOfBoundsException(); - - for (int i = 0; i < length; ++i) - storage.set(i + thisOffset, sequence.codeAt(i + otherOffset)); - } + storage.copyFrom(sequence.data, otherOffset, thisOffset, length); } } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceImpl.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceImpl.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceImpl.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceImpl.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,120 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequence.nucleotide; - -import com.milaboratory.core.sequence.Sequence; -import com.milaboratory.util.Bit2Array; - -import java.io.Serializable; - -/** - * Main implementation of nucleotide sequence. - * - * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) - * @author Shugay Mikhail (mikhail.shugay@gmail.com) - */ -public class NucleotideSequenceImpl extends NucleotideSequence implements Serializable { - private static final long serialVersionUID = 1L; - - final Bit2Array data; - - protected NucleotideSequenceImpl(Bit2Array data) { - this.data = data; - } - - @Override - public byte codeAt(int position) { - return (byte) data.get(position); - } - - @Override - public Sequence clone() { - return new NucleotideSequenceImpl(data.clone()); - } - - @Override - public int size() { - return data.size(); - } - - public Bit2Array getInnerStorage() { - return data; - } - - /*@Override - public boolean equals(Object obj) { - if (obj == null) - return false; - if (!(obj instanceof NucleotideSequenceImpl)) - return false; - final NucleotideSequenceImpl other = (NucleotideSequenceImpl) obj; - if (!this.data.equals(other.data)) - return false; - return true; - } - - @Override - public int hashCode() { - int hash = 5; - hash = 83 * hash + this.data.hashCode(); - return hash; - }*/ - - public static NucleotideSequenceImpl fromStorage(Bit2Array b2a) { - return new NucleotideSequenceImpl(b2a); - } - - public static NucleotideSequenceImpl fromSequence(char[] sequence) { - Bit2Array storage = new Bit2Array(sequence.length); - byte code; - for (int i = 0; i < sequence.length; ++i) { - code = NucleotideAlphabet.INSTANCE.codeFromSymbol(sequence[i]); - if (code > 3) - throw new RuntimeException("This sequence doesn't support 'N/n' nucleotides."); - storage.set(i, code); - } - return new NucleotideSequenceImpl(storage); - } - - public static NucleotideSequenceImpl fromSequence(String sequence) { - return fromSequence(sequence.toCharArray()); - } - - public static NucleotideSequenceImpl fromSequence(NucleotideSequence sequence) { - return new NucleotideSequenceImpl(storageFromSequence(sequence)); - } - - public static Bit2Array storageFromSequence(NucleotideSequence sequence) { - if (sequence instanceof NucleotideSequenceImpl) - return ((NucleotideSequenceImpl) sequence).data.clone(); - Bit2Array storage = new Bit2Array(sequence.size()); - for (int i = 0; i < sequence.size(); ++i) - storage.set(i, sequence.codeAt(i)); - return storage; - } - - public static NucleotideSequenceImpl fromSequence(byte[] sequence, int offset, int length) { - Bit2Array storage = new Bit2Array(length); - for (int i = 0; i < length; ++i) - storage.set(i, NucleotideAlphabet.INSTANCE.codeFromSymbol((char) sequence[offset + i])); - return new NucleotideSequenceImpl(storage); - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/quality/QualityFormat.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/quality/QualityFormat.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/quality/QualityFormat.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/quality/QualityFormat.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,70 @@ +/* + * MiTCR + * + * Copyright (c) 2010-2013: + * Bolotin Dmitriy + * Chudakov Dmitriy + * + * MiTCR is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ +package com.milaboratory.core.sequence.quality; + +/** + * A base class to store various sequencing quality formats. + * + * See corresponding Wikipedia page for details: link. + */ +public enum QualityFormat { + /** + * Phred quality values encoded with 33 value offset.

Allowed quality score values range is 0-50.

+ */ + Phred33((byte) 33, (byte) 0, (byte) 50, "phred33"), + /** + * Phred quality values encoded with 64 value offset.

Allowed quality score values range is 0-41.

+ */ + Phred64((byte) 64, (byte) 0, (byte) 41, "phred64"); + private byte offset, minValue, maxValue; + private String name; + + private QualityFormat(byte offset, byte minValue, byte maxVaule, String name) { + this.offset = offset; + this.minValue = minValue; + this.maxValue = maxVaule; + this.name = name; + } + + public byte getMinValue() { + return minValue; + } + + public byte getMaxValue() { + return maxValue; + } + + public byte getOffset() { + return offset; + } + + public static QualityFormat fromXML(String xml) { + for (QualityFormat format : values()) + if (format.name.equalsIgnoreCase(xml)) + return format; + return null; + } + + @Override + public String toString() { + return name; + } +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/quality/SequenceQualityPhred.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/quality/SequenceQualityPhred.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/quality/SequenceQualityPhred.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/quality/SequenceQualityPhred.java 2013-10-09 08:03:10.000000000 +0000 @@ -20,70 +20,117 @@ */ package com.milaboratory.core.sequence.quality; -import com.milaboratory.core.sequence.SequenceQuality; - import java.io.Serializable; import java.util.Arrays; /** * Implementation sequence quality. * - *

Phred sequence quality format used. It is common format for all new fastq formats (Sanger, Solexa v1.4+).

+ *

Phred sequence quality scores.

* * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) * @author Shugay Mikhail (mikhail.shugay@gmail.com) */ -public final class SequenceQualityPhred implements SequenceQuality, Serializable { +public final class SequenceQualityPhred implements Serializable { private static final long serialVersionUID = 1L; + private final byte[] data; - private byte[] data; - + /** + * Creates a phred sequence quality from a Sanger formatted quality string (33 based). + * + * @param string + */ public SequenceQualityPhred(String string) { this(string, 33); } + /** + * Creates a phred sequence quality from a string formatted with corresponding offset. + * + * @param string + */ public SequenceQualityPhred(String string, int offset) { this.data = string.getBytes(); for (int i = this.data.length - 1; i >= 0; --i) this.data[i] -= offset; } + /** + * Creates a phred sequence quality from a string formatted with corresponding offset. + * + * @param string + */ + public SequenceQualityPhred(String string, QualityFormat format) { + this(string, format.getOffset()); + } + + public byte[] getInnerData() { + return data.clone(); + } + + /** + * Creates a phred sequence quality containing only given values of quality. + * + * @param value value to fill the quality values with + * @param length size of quality string + */ public SequenceQualityPhred(byte value, int length) { data = new byte[length]; Arrays.fill(data, value); } + /** + * Creates quality object from raw quality score values. + * + * @param data raw quality score values + */ public SequenceQualityPhred(byte[] data) { - this.data = data; + this.data = data.clone(); } - @Override - public Class getBaseClass() { - return SequenceQualityPhred.class; + /** + * Constructor for factory method. + */ + private SequenceQualityPhred(byte[] data, boolean unsafe) { + this.data = data; } - @Override + /** + * Get the log10 of probability of error (e.g. nucleotide substitution) at given sequence point + * + * @param coord coordinate in sequence + * @return log10 of probability of error + */ public float log10ProbabilityOfErrorAt(int coord) { return -((float) data[coord]) / 10; } - @Override + /** + * Get probability of error (e.g. nucleotide substitution) at given sequence point + * + * @param coord coordinate in sequence + * @return probability of error + */ public float probabilityOfErrorAt(int coord) { return (float) Math.pow(10.0, -(data[coord]) / 10); } - @Override - public byte rawQualityValue(int coord) { + /** + * Get the raw sequence quality value (in binary format) at given sequence point + * + * @param coord coordinate in sequence + * @return raw sequence quality value + */ + public byte value(int coord) { return data[coord]; } - @Override - public void setRawQualityValue(int coord, byte value) { - data[coord] = value; - } - - @Override - public byte minRawQuality() { + /** + * Returns the worst sequence quality value + * + * @return worst sequence quality value + */ + public byte minValue() { byte min = Byte.MAX_VALUE; for (byte b : data) if (b < min) @@ -91,55 +138,53 @@ return min; } - public byte[] getRawData() { - return data; - } - - public void set(int index, byte value) { - data[index] = value; + /** + * Gets quality values in reverse order + * + * @return quality values in reverse order + */ + public SequenceQualityPhred reverse() { + return new SequenceQualityPhred(reverseTransformQualityStorage(data)); } - @Override - public SequenceQuality reverse() { - return new SequenceQualityPhred(reverceTransformQualityStorage(data)); - } - - public static byte[] reverceTransformQualityStorage(byte[] quality) { + /** + * Helper method. + */ + private static byte[] reverseTransformQualityStorage(byte[] quality) { byte[] newData = new byte[quality.length]; - int reverseCoord; - for (int coord = 0; coord < quality.length; coord++) { - reverseCoord = quality.length - 1 - coord; + int reverseCoord = quality.length - 1; + for (int coord = 0; coord < quality.length; ++coord, --reverseCoord) + //reverseCoord = quality.length - 1 - coord; newData[coord] = quality[reverseCoord]; - } - return newData; - } - @Override - public void mergeWith(SequenceQuality quality) { - if (quality.getBaseClass() != SequenceQualityPhred.class - || quality.size() != this.data.length) - return; - byte q; - for (int i = 0; i < data.length; ++i) - if (data[i] < (q = quality.rawQualityValue(i))) - data[i] = q; + assert reverseCoord == -1; + + return newData; } - @Override + /** + * Returns substring of current quality scores line. + * + * @param from inclusive + * @param to exclusive + * @return substring of current quality scores line + */ + public SequenceQualityPhred getRange(int from, int to) { + return new SequenceQualityPhred(Arrays.copyOfRange(data, from, to), true); + } + + /** + * Returns size of quality array + * + * @return size of quality array + */ public int size() { return data.length; } @Override public String toString() { - char[] chars = new char[data.length]; - for (int i = 0; i < data.length; ++i) - chars[i] = (char) (33 + data[i]); - return new String(chars); - } - - public static byte[] getContent(SequenceQualityPhred qualityPhred) { - return qualityPhred.data.clone(); + return encodeToString(33); } @Override @@ -154,16 +199,77 @@ return true; } - @Override - public byte[] encode(byte offset) { - byte[] copy = data.clone(); + /** + * Encodes current quality line with given offset. Common values for offset are 33 and 64. + * + * @param offset offset + * @return bytes encoded quality values + */ + public byte[] encode(int offset) { + if (offset < 0 || offset > 70) + throw new IllegalArgumentException(); + + byte[] copy = new byte[data.length]; for (int i = copy.length - 1; i >= 0; --i) - copy[i] += offset; + copy[i] += data[i] + offset; return copy; } + /** + * Encodes current quality line with given offset. Common values for offset are 33 and 64. + * + * @param offset offset + * @return encoded quality values + */ + public String encodeToString(int offset) { + return new String(encode(offset)); + } + @Override public int hashCode() { - return Arrays.hashCode(data); + return Arrays.hashCode(data) * 31 + 17; + } + + /** + * Returns a copy of inner byte array. + * + * @param quality quality + * @return values + */ + public static byte[] getContent(SequenceQualityPhred quality) { + return quality.data.clone(); + } + + public static byte[] parse(QualityFormat format, byte[] data, boolean check) { + //For performance + final byte offset = format.getOffset(), from = format.getMinValue(), to = format.getMaxValue(); + byte[] res = new byte[data.length]; + for (int i = 0; i < data.length; i++) { + res[i] = (byte) (data[i] - offset); + + if (check && + (res[i] < from || res[i] > to)) + throw new WrongQualityStringException(((char) (data[i])) + " [" + res[i] + "]"); + + //if (data[i] < qualityCodeFrom) + // if (!lowerUnSafe) + // throw new WrongQualityStringException(((char) (data[i] + qualityCodeOffset)) + " [" + data[i] + "]"); + // else + // data[i] = (byte) qualityCodeFrom; + } + return res; + } + + /** + * Factory method for the SequenceQualityPhred object. It performs all necessary range checks if required. + * + * @param format format of encoded quality values + * @param data byte with encoded quality values + * @param check determines whether range check is required + * @return quality line object + * @throws WrongQualityStringException if encoded value are out of range and checking is enabled + */ + public static SequenceQualityPhred create(QualityFormat format, byte[] data, boolean check) { + return new SequenceQualityPhred(parse(format, data, check), true); } } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/quality/SequenceQualityUtils.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/quality/SequenceQualityUtils.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/quality/SequenceQualityUtils.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/quality/SequenceQualityUtils.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,33 @@ +/* + * MiTCR + * + * Copyright (c) 2010-2013: + * Bolotin Dmitriy + * Chudakov Dmitriy + * + * MiTCR is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ +package com.milaboratory.core.sequence.quality; + +public final class SequenceQualityUtils { + public static final byte GOOD_QUALITY_VALUE = (byte) 34; + public static final byte BAD_QUALITY_VALUE = (byte) 2; + + private SequenceQualityUtils() { + } + + public static SequenceQualityPhred createGoodQualityObject(int length) { + return new SequenceQualityPhred(GOOD_QUALITY_VALUE, length); + } +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/quality/WrongQualityStringException.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/quality/WrongQualityStringException.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/quality/WrongQualityStringException.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/quality/WrongQualityStringException.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,30 @@ +/* + * MiTCR + * + * Copyright (c) 2010-2013: + * Bolotin Dmitriy + * Chudakov Dmitriy + * + * MiTCR is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ +package com.milaboratory.core.sequence.quality; + +public class WrongQualityStringException extends RuntimeException { + public WrongQualityStringException() { + } + + public WrongQualityStringException(String message) { + super(message); + } +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/tree/NucleotideSQPairSet.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/tree/NucleotideSQPairSet.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/tree/NucleotideSQPairSet.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/tree/NucleotideSQPairSet.java 2013-10-09 08:03:10.000000000 +0000 @@ -23,7 +23,6 @@ import com.milaboratory.core.sequence.NucleotideSQPair; import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; import com.milaboratory.core.sequence.quality.SequenceQualityPhred; import com.milaboratory.core.sequence.util.SequencesUtils; @@ -45,7 +44,7 @@ //TODO idea: store {byte[] qual, int counter} to count aggregated sequences //TODO and not to remove good sequence if better sequenced PCR-error variant was passed to add() method private final float fFactor; - private final Map map = new HashMap<>(); + private final Map map = new HashMap<>(); /** * Main constructor. @@ -79,7 +78,7 @@ float t = fFactor * penaltyThreshold; for (int i = 0; i < pair.size() && t >= 0.0f; ++i) - t -= penalty.penalty(40, pair.getQuality().rawQualityValue(i)); + t -= penalty.penalty(40, pair.getQuality().value(i)); if (t >= 0.0f) { //Debug @@ -91,7 +90,8 @@ filtered++; //~ type cast. Only NucleotideSequenceImpl has right implementation for hashCode and equals - final NucleotideSequenceImpl sequence = NucleotideSequenceImpl.fromSequence(pair.getSequence()); + //final NucleotideSequence sequence = NucleotideSequence.fromSequence(pair.getSequence()); + final NucleotideSequence sequence = pair.getSequence(); //~~~~~~~~~~~~~ Search for exact hit ~~~~~~~~~~~~~ byte[] qualities = map.get(sequence); @@ -105,7 +105,7 @@ //Updating quality values from new sequence //Setting "max" quality. for (int i = 0; i < qualities.length; ++i) - if ((qual = pair.getQuality().rawQualityValue(i)) > qualities[i]) + if ((qual = pair.getQuality().value(i)) > qualities[i]) qualities[i] = qual; //This sequence is already in the map, so, return. (No count++ is needed) return; @@ -116,15 +116,15 @@ boolean found = false; //Iterating through whole map - Iterator> it = map.entrySet().iterator(); //Iterator here is used to be abel to remove entries - Map.Entry e; + Iterator> it = map.entrySet().iterator(); //Iterator here is used to be abel to remove entries + Map.Entry e; while (it.hasNext()) { //Getting entry e = it.next(); //Extracting quality and sequence final byte[] qualsInMap = e.getValue(); - final NucleotideSequenceImpl sequenceInMap = e.getKey(); + final NucleotideSequence sequenceInMap = e.getKey(); //Just in case... if (qualsInMap.length != sequence.size()) @@ -134,7 +134,7 @@ p = 0.0f; for (int i = 0; i < qualsInMap.length; ++i) if (sequence.codeAt(i) != sequenceInMap.codeAt(i)) { - p += penalty.penalty(qualsInMap[i], pair.getQuality().rawQualityValue(i)); + p += penalty.penalty(qualsInMap[i], pair.getQuality().value(i)); if (p > penaltyThreshold) //Not a hit, break. break; } @@ -145,7 +145,7 @@ //Who is the best? int sum = 0; for (int i = 0; i < pair.size(); ++i) { - sum += pair.getQuality().rawQualityValue(i); + sum += pair.getQuality().value(i); sum -= e.getValue()[i]; } @@ -233,9 +233,9 @@ } private static class It implements Iterator { - private final Iterator> innerIt; + private final Iterator> innerIt; - private It(Iterator> innerIt) { + private It(Iterator> innerIt) { this.innerIt = innerIt; } @@ -246,7 +246,7 @@ @Override public NucleotideSQPair next() { - Map.Entry n = innerIt.next(); + Map.Entry n = innerIt.next(); return new NucleotideSQPair(n.getKey(), new SequenceQualityPhred(n.getValue())); } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/tree/SequenceTreeMap.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/tree/SequenceTreeMap.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/tree/SequenceTreeMap.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/tree/SequenceTreeMap.java 2013-10-09 08:03:10.000000000 +0000 @@ -593,7 +593,7 @@ } public S getSequence() { - SequenceBuilder builder = alphabet.getSequenceBuilderFactory().create(pointer); + SequenceBuilder builder = alphabet.getBuilderFactory().create(pointer); for (int i = 0; i < pointer; ++i) builder.setCode(i, wrappers[i].position); return (S) builder.create(); diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/tree/TSSequenceTreeMap.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/tree/TSSequenceTreeMap.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/tree/TSSequenceTreeMap.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/tree/TSSequenceTreeMap.java 2013-10-09 08:03:10.000000000 +0000 @@ -250,14 +250,14 @@ return new Iterable>() { @Override public java.util.Iterator> iterator() { - return new EntryIterator(root, alphabet.getSequenceBuilderFactory()); + return new EntryIterator(root, alphabet.getBuilderFactory()); } }; } @Override public java.util.Iterator iterator() { - return new Iterator<>(new EntryIterator(root, alphabet.getSequenceBuilderFactory())); + return new Iterator<>(new EntryIterator(root, alphabet.getBuilderFactory())); //return new Iterator<>(root, alphabet.codesCount()); } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/NucleotideRCSequence.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/NucleotideRCSequence.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/NucleotideRCSequence.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/NucleotideRCSequence.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequence.util; - -import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; -import com.milaboratory.util.Bit2Array; - -/** - * Nucleotide reverse complement sequence. - * - * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) - * @author Shugay Mikhail (mikhail.shugay@gmail.com) - */ -public class NucleotideRCSequence extends NucleotideSequenceImpl { - private NucleotideSequence originalSequence; - - public NucleotideRCSequence(NucleotideSequence originalSequence) { - super(transformNucleotideStorage(originalSequence)); - this.originalSequence = originalSequence; - } - - private static Bit2Array transformNucleotideStorage(NucleotideSequence sequence) { - Bit2Array newData = new Bit2Array(sequence.size()); - int reverseCoord; - for (int coord = 0; coord < sequence.size(); coord++) { - reverseCoord = sequence.size() - 1 - coord; - newData.set(coord, (~sequence.codeAt(reverseCoord)) & 0x3); - } - return newData; - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/NucleotideSequenceAggregator.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/NucleotideSequenceAggregator.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/NucleotideSequenceAggregator.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/NucleotideSequenceAggregator.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,120 @@ +package com.milaboratory.core.sequence.util; + +import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; +import com.milaboratory.util.Bit2Array; + +public class NucleotideSequenceAggregator { + final long[] observations; + long sequences = 0; + final int length, delta; + + public NucleotideSequenceAggregator(int length, int delta) { + this.observations = new long[(length + 2 * delta) * 4]; + this.length = length; + this.delta = delta; + } + + public long getObservations(int position, int code) { + if (code < 0 || code >= 4 || position < 0 || position >= length + 2 * delta) + throw new IllegalArgumentException(); + + return observations[position * 4 + code]; + } + + private long getScore(NucleotideSequence sequence, int d) { + long result = 0; + + for (int i = sequence.size() - 1; i >= 0; --i) + result += observations[(delta + i + d) * 4 + sequence.codeAt(i)]; + + return result; + } + + public long getSequences() { + return sequences; + } + + private void putSequence(NucleotideSequence sequence, int d) { + ++sequences; + for (int i = sequence.size() - 1; i >= 0; --i) + ++observations[(delta + i + d) * 4 + sequence.codeAt(i)]; + } + + public void putSequence(NucleotideSequence sequence) { + if (sequence.size() != length) + throw new IllegalArgumentException(); + + if (sequences == 0) { + putSequence(sequence, 0); + return; + } + + int maxD = 0; + long maxScore = getScore(sequence, 0), score; + + for (int d = 1; d <= delta; ++d) { + if ((score = getScore(sequence, d)) > maxScore) { + maxScore = score; + maxD = d; + } + if ((score = getScore(sequence, -d)) > maxScore) { + maxScore = score; + maxD = -d; + } + } + + putSequence(sequence, maxD); + } + + public NucleotideSequence getSequence(double minPercent) { + int start = 0, end = 0, lastBad = -1; + byte maxCode, code; + long maxCount, sum; + double percent; + + for (int i = 0; i < length + delta * 2; ++i) { + //maxCode = -1; + maxCount = 0; + sum = 0; + + for (code = 0; code < 4; ++code) { + sum += observations[i * 4 + code]; + if (maxCount < observations[i * 4 + code]) { + maxCount = observations[i * 4 + code]; + //maxCode = code; + } + } + + if (sum == 0) { + lastBad = i; + continue; + } + + percent = 1.0 * maxCount / sum; + + if (percent < minPercent) + lastBad = i; + else if (end - start < i - lastBad) { + end = i + 1; + start = lastBad + 1; + } + } + + Bit2Array st = new Bit2Array(end - start); + + for (int i = start; i < end; ++i) { + maxCode = -1; + maxCount = 0; + + for (code = 0; code < 4; ++code) + if (maxCount < observations[i * 4 + code]) { + maxCount = observations[i * 4 + code]; + maxCode = code; + } + + st.set(i - start, maxCode); + } + + return NucleotideSequence.fromStorage(st); + } +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/NucleotideSequenceGenerator.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/NucleotideSequenceGenerator.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/NucleotideSequenceGenerator.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/NucleotideSequenceGenerator.java 2013-10-09 08:03:10.000000000 +0000 @@ -21,7 +21,6 @@ package com.milaboratory.core.sequence.util; import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; import com.milaboratory.util.Bit2Array; public class NucleotideSequenceGenerator { @@ -33,7 +32,7 @@ byte[] data = new byte[stSize]; for (int i = 0; i < count; ++i) { Bit2Array st = Bit2Array.construct(length, data.clone()); - result[i] = NucleotideSequenceImpl.fromStorage(st); + result[i] = new NucleotideSequence(st); data[data.length - 1]++; for (int j = data.length - 2; j >= 0; --j) diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/NucleotideSubSequence.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/NucleotideSubSequence.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/NucleotideSubSequence.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/NucleotideSubSequence.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequence.util; - -import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; - -/** - * Nucleotide sub sequence adapter. Implements all NucleotideSequence methods. - * - * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) - * @author Shugay Mikhail (mikhail.shugay@gmail.com) - */ -public class NucleotideSubSequence extends NucleotideSequence { - private final NucleotideSequence sequence; - private final int from, length; - - public NucleotideSubSequence(NucleotideSequence sequence, int from, int length) { - if (from < 0 || from >= sequence.size() || length < 0 || from + length > sequence.size()) - throw new IllegalArgumentException(); - this.sequence = sequence; - this.from = from; - this.length = length; - } - - /** - * Till the end of sequence. - * - * @param sequence - * @param from - */ - public NucleotideSubSequence(NucleotideSequence sequence, int from) { - this(sequence, from, sequence.size() - from); - } - - @Override - public byte codeAt(int position) { - if (position < 0 || position >= length) - throw new IndexOutOfBoundsException(); - return sequence.codeAt(from + position); - } - - @Override - public int size() { - return length; - } - - public int getTo() { - return from + length; - } - - public NucleotideSequence getInnerSequence() { - return sequence; - } - - public int getFrom() { - return from; - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/SequenceQualityUtils.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/SequenceQualityUtils.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/SequenceQualityUtils.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/SequenceQualityUtils.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequence.util; - -import com.milaboratory.core.sequence.SequenceQuality; -import com.milaboratory.core.sequence.quality.SequenceQualityPhred; - -public final class SequenceQualityUtils { - private SequenceQualityUtils() { - } - - public static SequenceQualityPhred getGoodQualityObject(int length) { - return new SequenceQualityPhred(SequenceQuality.Constants.goodQuality, length); - } - - public static SequenceQuality copy(SequenceQuality qulity) { - if (qulity.getBaseClass() == SequenceQualityPhred.class) { - byte[] storage = new byte[qulity.size()]; - for (int i = 0; i < qulity.size(); ++i) - storage[i] = qulity.rawQualityValue(i); - return new SequenceQualityPhred(storage); - } - throw new RuntimeException("Unsupported quality type"); - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/SequencesUtils.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/SequencesUtils.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/SequencesUtils.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/SequencesUtils.java 2013-10-09 08:03:10.000000000 +0000 @@ -23,8 +23,6 @@ import com.milaboratory.core.sequence.Alphabet; import com.milaboratory.core.sequence.Sequence; import com.milaboratory.core.sequence.SequenceBuilder; -import com.milaboratory.core.sequence.SequenceQuality; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; import com.milaboratory.core.sequence.quality.SequenceQualityPhred; import java.util.ArrayList; @@ -89,16 +87,17 @@ return result; } - public static byte[] extractRawQualityValues(SequenceQuality quality) { + public static byte[] extractRawQualityValues(SequenceQualityPhred quality) { if (quality instanceof SequenceQualityPhred) - return SequenceQualityPhred.getContent((SequenceQualityPhred) quality); + return SequenceQualityPhred.getContent(quality); + //TODO <---- byte[] values = new byte[quality.size()]; for (int i = 0; i < values.length; ++i) - values[i] = quality.rawQualityValue(i); + values[i] = quality.value(i); return values; } - /** + /* * A factory method of NucleotideSubSequence class. If sub-sequence coords lies out of target sequence it returns * null. * @@ -107,14 +106,14 @@ * @param length length of subsequence * @return null or NucleotideSubSequence. */ - public static NucleotideSubSequence getSubSequence(NucleotideSequence sequence, int from, int length) { - if (from < 0 || from + length > sequence.size()) - return null; - return new NucleotideSubSequence(sequence, from, length); - } + //public static NucleotideSubSequence getSubSequence(NucleotideSequence sequence, int from, int length) { + // if (from < 0 || from + length > sequence.size()) + // return null; + // return new NucleotideSubSequence(sequence, from, length); + //} - /** + /* * A factory method of NucleotideSubSequence class. If sub-sequence coords lies out of target sequence it returns * null. * @@ -122,11 +121,11 @@ * @param from from coordinate * @return null or NucleotideSubSequence. */ - public static NucleotideSubSequence getSubSequence(NucleotideSequence sequence, int from) { - if (from < 0 || from > sequence.size() - 1) - return null; - return new NucleotideSubSequence(sequence, from, sequence.size() - from); - } + //public static NucleotideSubSequence getSubSequence(NucleotideSequence sequence, int from) { + // if (from < 0 || from > sequence.size() - 1) + // return null; + // return new NucleotideSubSequence(sequence, from, sequence.size() - from); + //} public static S cat(S... sequences) { if (sequences.length == 0) @@ -139,7 +138,7 @@ for (S s : sequences) size += s.size(); - SequenceBuilder builder = sequences[0].getAlphabet().getSequenceBuilderFactory().create(size); + SequenceBuilder builder = sequences[0].getAlphabet().getBuilderFactory().create(size); int pointer = 0; for (S s : sequences) { builder.copyFrom(s, 0, pointer, s.size()); diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/SubSequenceQuality.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/SubSequenceQuality.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequence/util/SubSequenceQuality.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequence/util/SubSequenceQuality.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequence.util; - -import com.milaboratory.core.sequence.SequenceQuality; -import com.milaboratory.core.sequence.quality.SequenceQualityPhred; - -import java.util.Arrays; - -/** - * Sub sequence quality object. - * - * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) - * @author Shugay Mikhail (mikhail.shugay@gmail.com) - */ -public class SubSequenceQuality implements SequenceQuality { - private SequenceQuality quality; - private int from, length; - - public SubSequenceQuality(SequenceQuality quality, int from, int length) { - this.quality = quality; - this.from = from; - this.length = length; - } - - @Override - public byte[] encode(byte offset) { - throw new UnsupportedOperationException(); - } - - @Override - public byte rawQualityValue(int coord) { - if (coord < 0 || coord >= length) - throw new IndexOutOfBoundsException(); - return quality.rawQualityValue(coord + from); - } - - @Override - public byte minRawQuality() { - byte min = Byte.MAX_VALUE; - for (int i = 0; i < length; ++i) - if (quality.rawQualityValue(i + from) < min) - min = quality.rawQualityValue(i + from); - return min; - } - - @Override - public float probabilityOfErrorAt(int coord) { - if (coord < 0 || coord >= length) - throw new IndexOutOfBoundsException(); - return quality.probabilityOfErrorAt(coord + from); - } - - @Override - public float log10ProbabilityOfErrorAt(int coord) { - if (coord < 0 || coord >= length) - throw new IndexOutOfBoundsException(); - return quality.log10ProbabilityOfErrorAt(coord + from); - } - - public SequenceQuality getInnerSequenceQuality() { - return quality; - } - - public int getFrom() { - return from; - } - - @Override - public void mergeWith(SequenceQuality quality) { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public void setRawQualityValue(int coord, byte value) { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public Class getBaseClass() { - return quality.getBaseClass(); - } - - @Override - public int size() { - return length; - } - - public SequenceQuality toNativeQualityObject() { - if (quality instanceof SequenceQualityPhred) - return new SequenceQualityPhred(Arrays.copyOfRange(((SequenceQualityPhred) quality).getRawData(), from, from + length)); - throw new UnsupportedOperationException(); - } - - @Override - public SequenceQuality reverse() { - return new SubSequenceQuality(quality.reverse(), quality.size() - from - length, length); - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fasta/FastaReader.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fasta/FastaReader.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fasta/FastaReader.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fasta/FastaReader.java 2013-10-09 08:03:10.000000000 +0000 @@ -22,11 +22,9 @@ import cc.redberry.pipe.blocks.AbstractOutputPortUninterruptible; import com.milaboratory.core.sequence.NucleotideSQPair; -import com.milaboratory.core.sequence.SequenceQuality; import com.milaboratory.core.sequence.nucleotide.NucleotideAlphabetWithN; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; +import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; import com.milaboratory.core.sequence.quality.SequenceQualityPhred; -import com.milaboratory.core.sequence.util.SequenceQualityUtils; import com.milaboratory.core.sequencing.io.SSequencingDataReader; import com.milaboratory.core.sequencing.read.SSequencingRead; import com.milaboratory.core.sequencing.read.SSequencingReadImpl; @@ -36,6 +34,10 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.Arrays; + +import static com.milaboratory.core.sequence.quality.SequenceQualityUtils.BAD_QUALITY_VALUE; +import static com.milaboratory.core.sequence.quality.SequenceQualityUtils.GOOD_QUALITY_VALUE; /** * Reads sequences from a FASTA file @@ -94,20 +96,22 @@ char[] chars = item.getSequence(); //No quality in FAST format, so use good string - SequenceQualityPhred quality = SequenceQualityUtils.getGoodQualityObject(chars.length);//new SequenceQualityPhred(SequenceQuality.Constants.goodQuality, chars.length); + //SequenceQualityPhred quality = SequenceQualityUtils.createGoodQualityObject(chars.length);//new SequenceQualityPhred(SequenceQuality.Constants.goodQuality, chars.length); + byte[] quality = new byte[chars.length]; + Arrays.fill(quality, GOOD_QUALITY_VALUE); Bit2Array seqData = new Bit2Array(chars.length); for (int i = 0; i < chars.length; ++i) { byte base = NucleotideAlphabetWithN.INSTANCE.codeFromSymbol(chars[i]); if (base == 4) //The letter will be "A" - quality.set(i, SequenceQuality.Constants.badQuality); + quality[i] = BAD_QUALITY_VALUE; else seqData.set(i, base); } //setPercentRed(); return new SSequencingReadImpl(item.getDescription(), - new NucleotideSQPair(NucleotideSequenceImpl.fromStorage(seqData), - quality), id); + new NucleotideSQPair(new NucleotideSequence(seqData), + new SequenceQualityPhred(quality)), id); } /*public void setPercentRed() { diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/Casava18InfoProvider.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/Casava18InfoProvider.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/Casava18InfoProvider.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/Casava18InfoProvider.java 2013-10-09 08:03:10.000000000 +0000 @@ -23,12 +23,12 @@ /** * Parses read metadata from Casava18 formatted header */ -public class Casava18InfoProvider implements IlluminaInfoProvider { +public class Casava18InfoProvider implements ReadInfoProvider { // HWUSI-EAS1814:40:1:4:9:1523:931 1:N:0:TGACCA @Override - public IlluminaReadInfo getInfo(String description) { + public ReadInfo getInfo(String description) { String[] split0 = description.split(" "); String[] split1 = split0[1].split(":"); - return new IlluminaReadInfoImpl(split0[0], (byte) (Byte.parseByte(split1[0], 10) - 1), split1[1].equals("Y")); + return new ReadInfoImpl(split0[0], (byte) (Byte.parseByte(split1[0], 10) - 1), split1[1].equals("Y")); } } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaInfoProvider.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaInfoProvider.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaInfoProvider.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaInfoProvider.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq; - -public interface IlluminaInfoProvider { - IlluminaReadInfo getInfo(String description); -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaInfoProviderFactory.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaInfoProviderFactory.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaInfoProviderFactory.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaInfoProviderFactory.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq; - -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; - -public class IlluminaInfoProviderFactory { - public static IlluminaInfoProvider createProvider(QualityStringFormat format) { - switch (format) { - case Illumina18: - return new Casava18InfoProvider(); - } - throw new RuntimeException("ReadInfoProvider not implemented for this FASTQ format."); - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaReadInfo.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaReadInfo.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaReadInfo.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaReadInfo.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq; - -/** - * An interface for read metadata in Illumina format - */ -public interface IlluminaReadInfo { - /** - * Order of the read in paired data - * - * @return {@code 0} for first read, {@code 1} for second read - */ - byte getReadNumber(); - - /** - * Checks if this read is a mate of another one - * - * @param other other read to compare - * @return {@code true} if reads are paired, {@code false} otherwise - */ - boolean isPairOf(IlluminaReadInfo other); - - /** - * Checks if read is filtered (i.e. should be omitted) - * - * @return {@code true} if read is filtered, {@code false} otherwise - */ - boolean isFiltered(); - - /** - * Gets the description for read - * - * @return the description for read - */ - String pDescription(); -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaReadInfoImpl.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaReadInfoImpl.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaReadInfoImpl.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/IlluminaReadInfoImpl.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq; - -/** - * An implementation of {@link IlluminaReadInfo} - */ -public class IlluminaReadInfoImpl implements IlluminaReadInfo { - private final String pairId; - private final byte readNumber; - private final boolean isFiltered; - - /** - * Creates a container holding read metadata - * - * @param pairId id of read pair - * @param readNumber number of the read - * @param filtered is the read filtered - */ - public IlluminaReadInfoImpl(String pairId, byte readNumber, boolean filtered) { - this.pairId = pairId; - this.readNumber = readNumber; - this.isFiltered = filtered; - } - - @Override - /** - * {@inheritDoc} - */ - public byte getReadNumber() { - return readNumber; - } - - @Override - /** - * {@inheritDoc} - */ - public boolean isPairOf(IlluminaReadInfo other) { - if (other.getClass() != IlluminaReadInfoImpl.class) - return false; - IlluminaReadInfoImpl impl = (IlluminaReadInfoImpl) other; - return impl.pairId.equals(this.pairId) && (1 - this.readNumber) == (impl.readNumber); - } - - @Override - /** - * {@inheritDoc} - */ - public boolean isFiltered() { - return isFiltered; - } - - @Override - /** - * {@inheritDoc} - */ - public String pDescription() { - return pairId; - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/NucleotideSQPairTrimmer.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/NucleotideSQPairTrimmer.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/NucleotideSQPairTrimmer.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/NucleotideSQPairTrimmer.java 2013-10-09 08:03:10.000000000 +0000 @@ -22,8 +22,7 @@ import cc.redberry.pipe.Processor; import com.milaboratory.core.sequence.NucleotideSQPair; -import com.milaboratory.core.sequence.SequenceQuality; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; +import com.milaboratory.core.sequence.quality.SequenceQualityPhred; /** * Trims bad quality nucleotides from {@link NucleotideSQPair}s @@ -31,26 +30,21 @@ * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) */ public class NucleotideSQPairTrimmer implements Processor { - private QualityStringFormat format; + private byte thresholdValue; - /** - * Creates bad quality trimmer - * - * @param format sequencing quality format - */ - public NucleotideSQPairTrimmer(QualityStringFormat format) { - this.format = format; + public NucleotideSQPairTrimmer(byte thresholdValue) { + this.thresholdValue = thresholdValue; } @Override public NucleotideSQPair process(NucleotideSQPair input) { - SequenceQuality quality = input.getQuality(); + SequenceQualityPhred quality = input.getQuality(); int trimTo = quality.size() - 1; for (int i = quality.size() - 1; i >= 0; --i) - if (quality.rawQualityValue(i) != format.getZeroQualityValue()) + if (quality.value(i) <= thresholdValue) break; else trimTo = i; - return input.getSubPairCopy(0, trimTo).getDeepCopy(); + return input.getRange(0, trimTo); } } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/PFastqReader.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/PFastqReader.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/PFastqReader.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/PFastqReader.java 2013-10-09 08:03:10.000000000 +0000 @@ -21,8 +21,8 @@ package com.milaboratory.core.sequencing.io.fastq; import cc.redberry.pipe.blocks.AbstractOutputPortUninterruptible; +import com.milaboratory.core.sequence.quality.QualityFormat; import com.milaboratory.core.sequencing.io.PSequencingDataReader; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; import com.milaboratory.core.sequencing.read.PSequencingRead; import com.milaboratory.core.sequencing.read.PSequencingReadImpl; import com.milaboratory.core.sequencing.read.SSequencingRead; @@ -39,22 +39,27 @@ // TODO: _close() and _take() public class PFastqReader extends AbstractOutputPortUninterruptible implements PSequencingDataReader { private final BufferedReader[] readers; - private final QualityStringFormat format; - private final IlluminaInfoProvider infoProvider; + private final QualityFormat format; + private final ReadInfoProvider infoProvider; private long counter = 0; private final boolean notFilteredOnly, check; - /*public PFastqReader(String file0, String file1, QualityStringFormat format, boolean notFilteredOnly) throws FileNotFoundException { - this(file0, file1, format, CompressionType.None, notFilteredOnly); + //TODO docs.... + public PFastqReader(String file0, String file1, QualityFormat format) throws IOException { + this(file0, file1, format, CompressionType.None); } - public PFastqReader(String file0, String file1, QualityStringFormat format, CompressionType ct, boolean notFilteredOnly) throws FileNotFoundException { - this(new File(file0), new File(file1), format, ct, notFilteredOnly); + public PFastqReader(String file0, String file1, QualityFormat format, CompressionType ct) throws IOException { + this(new File(file0), new File(file1), format, ct, null, false, false); } - public PFastqReader(File file0, File file1, QualityStringFormat format, boolean notFilteredOnly) throws FileNotFoundException { - this(file0, file1, format, CompressionType.None, notFilteredOnly); - }*/ + public PFastqReader(File file0, File file1, QualityFormat format) throws IOException { + this(file0, file1, format, CompressionType.None); + } + + public PFastqReader(File file0, File file1, QualityFormat format, CompressionType ct) throws IOException { + this(file0, file1, format, ct, null, false, false); + } /** * Creates a {@link PSequencingRead} stream from two FASTQ files with paired-end read data @@ -67,8 +72,10 @@ * @param notFilteredOnly outputs only reads that are not marked by 'filtered' flag in their header * @throws IOException in case there is problem with reading from files */ - public PFastqReader(File file0, File file1, QualityStringFormat format, CompressionType ct, boolean check, boolean notFilteredOnly) throws IOException { - this(new FileInputStream(file0), new FileInputStream(file1), format, ct, check, notFilteredOnly); + public PFastqReader(File file0, File file1, QualityFormat format, CompressionType ct, + ReadInfoProvider infoProvider, + boolean check, boolean notFilteredOnly) throws IOException { + this(new FileInputStream(file0), new FileInputStream(file1), format, ct, infoProvider, check, notFilteredOnly); } /** @@ -82,17 +89,20 @@ * @param notFilteredOnly outputs only reads that are not marked by 'filtered' flag in their header * @throws IOException in case there is problem with reading from files */ - public PFastqReader(InputStream stream0, InputStream stream1, QualityStringFormat format, CompressionType ct, boolean check, boolean notFilteredOnly) throws IOException { + public PFastqReader(InputStream stream0, InputStream stream1, QualityFormat format, CompressionType ct, + ReadInfoProvider infoProvider, + boolean check, boolean notFilteredOnly) throws IOException { this.readers = new BufferedReader[]{ new BufferedReader(new InputStreamReader(ct.createInputStream(stream0))), new BufferedReader(new InputStreamReader(ct.createInputStream(stream1))) }; this.format = format; + + if (infoProvider == null && (check || notFilteredOnly)) + throw new IllegalArgumentException("Read info provider is required for filtering checking and reads pair errors detection."); + + this.infoProvider = infoProvider; this.check = check; - if (check || notFilteredOnly) - infoProvider = IlluminaInfoProviderFactory.createProvider(format); - else - infoProvider = null; this.notFilteredOnly = notFilteredOnly; } @@ -124,8 +134,8 @@ if (check || notFilteredOnly) { //Creating info - IlluminaReadInfo info0 = infoProvider.getInfo(read0.getDescription()); - IlluminaReadInfo info1 = infoProvider.getInfo(read1.getDescription()); + ReadInfo info0 = infoProvider.getInfo(read0.getDescription()); + ReadInfo info1 = infoProvider.getInfo(read1.getDescription()); if (check) { //Correct reads number and pairing @@ -178,13 +188,13 @@ }*/ //TODO move to some util class - /*public static OutputPort fromFolderSingleRun(String folder, QualityStringFormat + /*public static OutputPort fromFolderSingleRun(String folder, QualityFormat format, CompressionType ct, boolean notFiltered, boolean trim) { return fromFolderSingleRun(new File(folder), format, ct, notFiltered, trim); } - public static OutputPort fromFolderSingleRun(File folder, QualityStringFormat + public static OutputPort fromFolderSingleRun(File folder, QualityFormat format, CompressionType ct, boolean notFiltered, boolean trim) { Pattern pattern = Pattern.compile("R(\\d)_(\\d{3})\\.fastq(.*)"); diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/PFastqWriter.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/PFastqWriter.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/PFastqWriter.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/PFastqWriter.java 2013-10-09 08:03:10.000000000 +0000 @@ -21,7 +21,7 @@ package com.milaboratory.core.sequencing.io.fastq; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; +import com.milaboratory.core.sequence.quality.QualityFormat; import com.milaboratory.core.sequencing.read.PSequencingRead; import com.milaboratory.util.CompressionType; @@ -40,7 +40,7 @@ * @param fileR2 file to store second read * @param format sequencing quality format */ - public PFastqWriter(String fileR1, String fileR2, QualityStringFormat format) throws IOException { + public PFastqWriter(String fileR1, String fileR2, QualityFormat format) throws IOException { this.writer0 = new SFastqWriter(fileR1, format); this.writer1 = new SFastqWriter(fileR2, format); } @@ -53,7 +53,7 @@ * @param format sequencing quality format * @param ct compression type to use */ - public PFastqWriter(String fileR1, String fileR2, QualityStringFormat format, CompressionType ct) throws IOException { + public PFastqWriter(String fileR1, String fileR2, QualityFormat format, CompressionType ct) throws IOException { this.writer0 = new SFastqWriter(fileR1, format, ct); this.writer1 = new SFastqWriter(fileR2, format, ct); } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/PSequencingReadsTrimmer.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/PSequencingReadsTrimmer.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/PSequencingReadsTrimmer.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/PSequencingReadsTrimmer.java 2013-10-09 08:03:10.000000000 +0000 @@ -21,7 +21,7 @@ package com.milaboratory.core.sequencing.io.fastq; import cc.redberry.pipe.Processor; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; +import com.milaboratory.core.sequence.quality.QualityFormat; import com.milaboratory.core.sequencing.read.PSequencingRead; import com.milaboratory.core.sequencing.read.PSequencingReadImpl; import com.milaboratory.core.sequencing.read.SSequencingRead; @@ -39,8 +39,8 @@ * * @param format sequencing quality format */ - public PSequencingReadsTrimmer(QualityStringFormat format) { - this.trimmer = new NucleotideSQPairTrimmer(format); + public PSequencingReadsTrimmer(QualityFormat format) { + this.trimmer = new NucleotideSQPairTrimmer((byte) 2); } @Override diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/QualityFormatChecker.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/QualityFormatChecker.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/QualityFormatChecker.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/QualityFormatChecker.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,62 @@ +package com.milaboratory.core.sequencing.io.fastq; + +import com.milaboratory.core.sequence.quality.QualityFormat; + +import java.io.BufferedReader; +import java.io.IOException; + +import static com.milaboratory.core.sequence.quality.QualityFormat.Phred33; +import static com.milaboratory.core.sequence.quality.QualityFormat.Phred64; + +/** + * This class contains methods to infer quality string format from FASTQ data. + */ +public final class QualityFormatChecker { + public static QualityFormat guessFormat(BufferedReader reader, int maxBytes) throws IOException { + String line; + int k, chr; + boolean signal33, signal64; + int maxReadSize = 0, currentReadSize; + do { + currentReadSize = 0; + + for (k = 0; k < 3; ++k) + if ((line = reader.readLine()) != null) + currentReadSize += line.length() + 2; + else + return null; + + line = reader.readLine(); + if (line == null) + return null; + currentReadSize += line.length() + 2; + + signal33 = false; + signal64 = false; + + for (k = line.length() - 1; k >= 0; --k) { + chr = (int) line.charAt(k); + signal33 |= (chr - 64) < Phred64.getMinValue(); + signal64 |= (chr - 33) > Phred33.getMaxValue(); + } + + //The file has bad format. + //If any of formats is applicable file contains out of range values in any way. + if (signal33 && signal64) + return null; + + if (signal33) + return Phred33; + if (signal64) + return Phred64; + + maxBytes -= currentReadSize; + + if (maxReadSize < currentReadSize) + maxReadSize = currentReadSize; + + } while (maxBytes - maxReadSize > 0); //Weak condition if file contains reads with different read lengths + + return null; + } +} \ No newline at end of file diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/QualityFormatGuesser.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/QualityFormatGuesser.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/QualityFormatGuesser.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/QualityFormatGuesser.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,63 +0,0 @@ -package com.milaboratory.core.sequencing.io.fastq; - -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; - -import java.io.BufferedReader; -import java.io.IOException; - -/** - * This class contains methods to infer quality string format from FASTQ data. - */ -public class QualityFormatGuesser { - /** - * According to Wikipedia maximal quality is 41 for Illumina 1.8+ format 2 is added just in case. - */ - private static final int MAX_QUALITY = 43; - private static final int MIN_QUALITY = -5; - - public static QualityStringFormat guessFormat(BufferedReader reader, int maxBytes) throws IOException { - String line; - int k, chr; - boolean signal33, signal64; - int maxReadSize = 0, readSize; - do { - readSize = 0; - - for (k = 0; k < 3; ++k) - if ((line = reader.readLine()) != null) - readSize += line.length() + 2; - else - return null; - - line = reader.readLine(); - if (line == null) - return null; - readSize += line.length() + 2; - - signal33 = false; - signal64 = false; - - for (k = line.length() - 1; k >= 0; --k) { - chr = (int) line.charAt(k); - signal33 |= (chr - 64) < MIN_QUALITY; - signal64 |= (chr - 33) > MAX_QUALITY; - } - - if (signal33 && signal64) - return null; - - if (signal33) - return QualityStringFormat.Illumina18u; - if (signal64) - return QualityStringFormat.Illumina15; - - maxBytes -= readSize; - - if (maxReadSize < readSize) - maxReadSize = readSize; - - } while (maxBytes - maxReadSize > 0); - - return null; - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfo.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfo.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfo.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfo.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,55 @@ +/* + * MiTCR + * + * Copyright (c) 2010-2013: + * Bolotin Dmitriy + * Chudakov Dmitriy + * + * MiTCR is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ +package com.milaboratory.core.sequencing.io.fastq; + +/** + * An interface for read metadata in Illumina format + */ +public interface ReadInfo { + /** + * Order of the read in paired data + * + * @return {@code 0} for first read, {@code 1} for second read + */ + byte getReadNumber(); + + /** + * Checks if this read is a mate of another one + * + * @param other other read to compare + * @return {@code true} if reads are paired, {@code false} otherwise + */ + boolean isPairOf(ReadInfo other); + + /** + * Checks if read is filtered (i.e. should be omitted) + * + * @return {@code true} if read is filtered, {@code false} otherwise + */ + boolean isFiltered(); + + /** + * Gets the description for read + * + * @return the description for read + */ + String pDescription(); +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfoImpl.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfoImpl.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfoImpl.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfoImpl.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,78 @@ +/* + * MiTCR + * + * Copyright (c) 2010-2013: + * Bolotin Dmitriy + * Chudakov Dmitriy + * + * MiTCR is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ +package com.milaboratory.core.sequencing.io.fastq; + +/** + * An implementation of {@link ReadInfo} + */ +public class ReadInfoImpl implements ReadInfo { + private final String pairId; + private final byte readNumber; + private final boolean isFiltered; + + /** + * Creates a container holding read metadata + * + * @param pairId id of read pair + * @param readNumber number of the read + * @param filtered is the read filtered + */ + public ReadInfoImpl(String pairId, byte readNumber, boolean filtered) { + this.pairId = pairId; + this.readNumber = readNumber; + this.isFiltered = filtered; + } + + @Override + /** + * {@inheritDoc} + */ + public byte getReadNumber() { + return readNumber; + } + + @Override + /** + * {@inheritDoc} + */ + public boolean isPairOf(ReadInfo other) { + if (other.getClass() != ReadInfoImpl.class) + return false; + ReadInfoImpl impl = (ReadInfoImpl) other; + return impl.pairId.equals(this.pairId) && (1 - this.readNumber) == (impl.readNumber); + } + + @Override + /** + * {@inheritDoc} + */ + public boolean isFiltered() { + return isFiltered; + } + + @Override + /** + * {@inheritDoc} + */ + public String pDescription() { + return pairId; + } +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfoProvider.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfoProvider.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfoProvider.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/ReadInfoProvider.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,25 @@ +/* + * MiTCR + * + * Copyright (c) 2010-2013: + * Bolotin Dmitriy + * Chudakov Dmitriy + * + * MiTCR is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ +package com.milaboratory.core.sequencing.io.fastq; + +public interface ReadInfoProvider { + ReadInfo getInfo(String description); +} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/SFastqReader.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/SFastqReader.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/SFastqReader.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/SFastqReader.java 2013-10-09 08:03:10.000000000 +0000 @@ -22,12 +22,12 @@ import cc.redberry.pipe.blocks.AbstractOutputPortUninterruptible; import com.milaboratory.core.sequence.NucleotideSQPair; -import com.milaboratory.core.sequence.SequenceQuality; import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; +import com.milaboratory.core.sequence.quality.QualityFormat; +import com.milaboratory.core.sequence.quality.SequenceQualityPhred; +import com.milaboratory.core.sequence.quality.SequenceQualityUtils; +import com.milaboratory.core.sequence.quality.WrongQualityStringException; import com.milaboratory.core.sequencing.io.SSequencingDataReader; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; -import com.milaboratory.core.sequencing.io.fastq.quality.WrongQualityStringException; import com.milaboratory.core.sequencing.read.SSequencingRead; import com.milaboratory.core.sequencing.read.SSequencingReadImpl; import com.milaboratory.util.CanReportProgress; @@ -35,7 +35,6 @@ import com.milaboratory.util.CountingInputStream; import java.io.*; -import java.util.concurrent.atomic.AtomicLong; /** * FASTQ files reader, using BufferedReader.readLine() method. @@ -45,111 +44,75 @@ */ // TODO: _close() and _take() public class SFastqReader extends AbstractOutputPortUninterruptible implements SSequencingDataReader, CanReportProgress { + private static final int BUFFER_SIZE = 32768; + //Main Input private final BufferedReader reader; - //To determine red percent - //private final FileChannel channel; - //private volatile int percentRed = 0; - private final AtomicLong dotCorrections = new AtomicLong(); + //The total size of input stream if known. private final long totalSize; + //Reads read private long counter = 0; - //FASTQ format + //Bytes read counting IS private final CountingInputStream countingStream; - private final QualityStringFormat format; - private final IlluminaInfoProvider infoProvider; + //FASTQ format + private final QualityFormat format; + private final ReadInfoProvider infoProvider; //private final boolean notFilteredOnly; /** * Creates a {@link SSequencingRead} stream from a FASTQ files with single-end read data * - * @param stream stream with reads - * @param format read quality encoding format - * @param ct type of compression (NONE, GZIP, etc) - * @param notFilteredOnly outputs only reads that are not marked by 'filtered' flag in their header + * @param file file with reads * @throws IOException in case there is problem with reading from files */ - public SFastqReader(InputStream stream, QualityStringFormat format, CompressionType ct, boolean notFilteredOnly) throws IOException { - this(stream, format, ct, notFilteredOnly, false); + public SFastqReader(String file) throws IOException { + this(new FileInputStream(file), null, file.endsWith(".gz") ? CompressionType.GZIP : CompressionType.None, + true, null, false); } /** * Creates a {@link SSequencingRead} stream from a FASTQ files with single-end read data * - * @param stream stream with reads - * @param format read quality encoding format, if {@code guessQualityFormat} is true this value is used - * as default format - * @param ct type of compression (NONE, GZIP, etc) - * @param notFilteredOnly outputs only reads that are not marked by 'filtered' flag in their header - * @param guessQualityFormat if true Reader will try to guess quality string format, if guess fails {@code format} - * will be used as quality string format + * @param file file with reads + * @param ct type of compression (NONE, GZIP, etc) * @throws IOException in case there is problem with reading from files */ - public SFastqReader(InputStream stream, QualityStringFormat format, CompressionType ct, boolean notFilteredOnly, - boolean guessQualityFormat) throws IOException { - //Check for null - if (stream == null) - throw new NullPointerException(); - - if (stream instanceof FileInputStream) - totalSize = ((FileInputStream) stream).getChannel().size(); - else - totalSize = 0L; - - if (notFilteredOnly) - infoProvider = IlluminaInfoProviderFactory.createProvider(format); - else - infoProvider = null; - - //Initialization - InputStream is = this.countingStream = new CountingInputStream(stream); - - //Wrapping stream if UnCompression needed - is = ct.createInputStream(is); - - //Creating main reder - this.reader = new BufferedReader(new InputStreamReader(is)); - - if (guessQualityFormat) { - reader.mark(8192); - QualityStringFormat f = QualityFormatGuesser.guessFormat(reader, 5500); - if (f != null) - format = f; - - reader.reset(); - } - - if (format == null) - if (guessQualityFormat) - throw new RuntimeException("Format guess failed..."); - else - throw new NullPointerException(); - - this.format = format; + public SFastqReader(String file, CompressionType ct) throws IOException { + this(new FileInputStream(file), null, ct, true, null, false); } /** * Creates a {@link SSequencingRead} stream from a FASTQ files with single-end read data * - * @param stream stream with reads + * @param file file with reads * @param format read quality encoding format * @param ct type of compression (NONE, GZIP, etc) * @throws IOException in case there is problem with reading from files */ - public SFastqReader(InputStream stream, QualityStringFormat format, CompressionType ct) throws IOException { - this(stream, format, ct, false); + public SFastqReader(String file, QualityFormat format, CompressionType ct) throws IOException { + this(new FileInputStream(file), format, ct, false, null, false); + } + + /** + * Creates a {@link SSequencingRead} stream from a FASTQ files with single-end read data + * + * @param file file with reads + * @throws IOException in case there is problem with reading from files + */ + public SFastqReader(File file) throws IOException { + this(new FileInputStream(file), null, file.getName().endsWith(".gz") ? CompressionType.GZIP : CompressionType.None, + true, null, false); } /** * Creates a {@link SSequencingRead} stream from a FASTQ files with single-end read data * - * @param file file with reads - * @param format read quality encoding format - * @param ct type of compression (NONE, GZIP, etc) - * @param notFilteredOnly outputs only reads that are not marked by 'filtered' flag in their header + * @param file file with reads + * @param ct type of compression (NONE, GZIP, etc) * @throws IOException in case there is problem with reading from files */ - public SFastqReader(File file, QualityStringFormat format, CompressionType ct, boolean notFilteredOnly) throws IOException { - this(new FileInputStream(file), format, ct, notFilteredOnly); + public SFastqReader(File file, CompressionType ct) throws IOException { + this(new FileInputStream(file), null, ct, true, null, false); } /** @@ -160,66 +123,91 @@ * @param ct type of compression (NONE, GZIP, etc) * @throws IOException in case there is problem with reading from files */ - public SFastqReader(File file, QualityStringFormat format, CompressionType ct) throws IOException { - this(new FileInputStream(file), format, ct, false); + public SFastqReader(File file, QualityFormat format, CompressionType ct) throws IOException { + this(new FileInputStream(file), format, ct, false, null, false); } - - /*public SFastqReader(File file, QualityStringFormat format, CompressionType ct) throws FileNotFoundException { - //Check for null - if (file == null || format == null) - throw new NullPointerException(); - - //Initialization - InputStream is = new FileInputStream(file); - //this.channel = ((FileInputStream) is).getChannel(); - this.format = format; - - //Wrapping stream if UnCompression needed - try { - is = ct.createInputStream(is); - } catch (IOException e) { - throw new RuntimeException(e); - } - - //Creating main reder - this.reader = new BufferedReader(new InputStreamReader(is)); - }*/ - - //Additional constructors - /** - * Creates a {@link SSequencingRead} stream from a FASTQ files with single-end read data + * Creates a {@link SSequencingRead} stream from a FASTQ stream with single-end reads data * - * @param fileName name of file containing reads - * @param format read quality encoding format + * @param stream stream with reads + * @param ct type of compression (NONE, GZIP, etc) * @throws IOException in case there is problem with reading from files */ - public SFastqReader(String fileName, QualityStringFormat format) throws IOException { - this(fileName, format, CompressionType.None); + public SFastqReader(InputStream stream, CompressionType ct) throws IOException { + this(stream, null, ct, true, null, false); } /** * Creates a {@link SSequencingRead} stream from a FASTQ files with single-end read data * - * @param fileName name of file containing reads - * @param format read quality encoding format - * @param ct type of compression (NONE, GZIP, etc) + * @param stream stream with reads + * @param format read quality encoding format + * @param ct type of compression (NONE, GZIP, etc) * @throws IOException in case there is problem with reading from files */ - public SFastqReader(String fileName, QualityStringFormat format, CompressionType ct) throws IOException { - this(new File(fileName), format, ct); + public SFastqReader(InputStream stream, QualityFormat format, CompressionType ct) throws IOException { + this(stream, format, ct, false, null, false); } + //Root constructor + /** * Creates a {@link SSequencingRead} stream from a FASTQ files with single-end read data * - * @param file file with reads - * @param format read quality encoding format - * @throws IOException in case there is problem with reading from files + * @param stream stream with reads + * @param format read quality encoding format, if {@code guessQualityFormat} is true this value is used + * as a default format + * @param ct type of compression (NONE, GZIP, etc) + * @param guessQualityFormat if true reader will try to guess quality string format, if guess fails {@code format} + * will be used as a default quality string format, if {@code format==null} exception will + * be thrown + * @param infoProvider read info provider + * @param notFilteredOnly outputs only reads that are not marked by 'filtered' flag in their header + * @throws IOException */ - public SFastqReader(File file, QualityStringFormat format) throws IOException { - this(file, format, CompressionType.None); + public SFastqReader(InputStream stream, QualityFormat format, CompressionType ct, + boolean guessQualityFormat, ReadInfoProvider infoProvider, boolean notFilteredOnly) throws IOException { + //Check for null + if (stream == null) + throw new NullPointerException(); + + if (stream instanceof FileInputStream) + totalSize = ((FileInputStream) stream).getChannel().size(); + else + totalSize = -1L; + + this.infoProvider = infoProvider; + if (infoProvider == null && notFilteredOnly) + throw new IllegalArgumentException("Read info provider should be provided for filtering."); + + //Initialization + InputStream is = this.countingStream = new CountingInputStream(stream); + + //Wrapping stream if un-compression needed + is = ct.createInputStream(is); + + //Creating main reder + this.reader = new BufferedReader(new InputStreamReader(is), BUFFER_SIZE); + + //Guessing quality format + if (guessQualityFormat) { + reader.mark(BUFFER_SIZE); + QualityFormat f = QualityFormatChecker.guessFormat(reader, BUFFER_SIZE - 3072); //Buffer minus ~ one read. + + if (f != null) + format = f; + + reader.reset(); + } + + if (format == null) + if (guessQualityFormat) + throw new RuntimeException("Format guess failed."); + else + throw new NullPointerException(); + + this.format = format; } @Override @@ -253,7 +241,7 @@ read = parse(format, lines, id); if (infoProvider != null) { - IlluminaReadInfo info = infoProvider.getInfo(read.getDescription()); + ReadInfo info = infoProvider.getInfo(read.getDescription()); if (info.isFiltered()) continue; @@ -263,7 +251,7 @@ } } - public static SSequencingRead parse(QualityStringFormat format, String[] lines, long id) { + public static SSequencingRead parse(QualityFormat format, String[] lines, long id) { String descriptionLine = lines[0]; //Parsing: @@ -286,37 +274,33 @@ throw new RuntimeException("Wrong file format"); //Creating quality - SequenceQuality quality; - try { - quality = format.getQualityFactory().create(qualityString.getBytes()); - } catch (WrongQualityStringException ex) { - throw new RuntimeException("Error while parsing quality", ex); - } - //Dot correction //if (sequenceLine.contains(".") // || sequenceLine.contains("n") // || sequenceLine.contains("N")) { + byte[] qualityValues; + try { + qualityValues = SequenceQualityPhred.parse(format, qualityString.getBytes(), true); + } catch (WrongQualityStringException ex) { + throw new RuntimeException("Error while parsing quality", ex); + } + char[] seqChars = sequenceLine.toCharArray(); for (int i = 0; i < seqChars.length; ++i) if (seqChars[i] == '.' || seqChars[i] == 'n' || seqChars[i] == 'N') { //Substituting '.'/'n'/'N' with A seqChars[i] = 'A'; //and setting bad quality to this nucleotide - quality.setRawQualityValue(i, format.getZeroQualityValue()); - //increment corresponding counter - //dotCorrections.incrementAndGet(); + qualityValues[i] = SequenceQualityUtils.BAD_QUALITY_VALUE; } - // //Creating new sequence line - // sequenceLine = new String(seqChars); - //} + SequenceQualityPhred quality = new SequenceQualityPhred(qualityValues); //Parsing sequence NucleotideSequence sequence; try { - sequence = NucleotideSequenceImpl.fromSequence(seqChars); + sequence = new NucleotideSequence(seqChars); } catch (RuntimeException re) { throw new RuntimeException("Error while parsing sequence.", re); } @@ -325,13 +309,10 @@ if (sequence.size() != quality.size()) throw new RuntimeException("Wrong file format. Different sequence and quality sizes."); - //Refreshing redPrecent field for status update - //refreshRedPercent(); - return new SSequencingReadImpl(descriptionLine, new NucleotideSQPair(sequence, quality), id); } - public QualityStringFormat getQualityStringFormat() { + public QualityFormat getQualityFormat() { return format; } @@ -351,6 +332,8 @@ @Override public double getProgress() { + if (totalSize == -1L) + return Double.NaN; return ((double) countingStream.getBytesRead()) / totalSize; } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/SFastqWriter.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/SFastqWriter.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/SFastqWriter.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/SFastqWriter.java 2013-10-09 08:03:10.000000000 +0000 @@ -20,7 +20,7 @@ */ package com.milaboratory.core.sequencing.io.fastq; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; +import com.milaboratory.core.sequence.quality.QualityFormat; import com.milaboratory.core.sequencing.read.SSequencingRead; import com.milaboratory.util.CompressionType; @@ -37,7 +37,7 @@ private static final byte[] PLUS_DELIMITER = "\n+\n".getBytes(); //private FileOutputStream foStream; private final BufferedOutputStream bufferedOutputStream; - private final QualityStringFormat format; + private final QualityFormat format; /** * Creates file writer in FASTQ format for single-end reads @@ -45,7 +45,7 @@ * @param file file to store reads * @param format sequencing quality format */ - public SFastqWriter(String file, QualityStringFormat format) throws IOException { + public SFastqWriter(String file, QualityFormat format) throws IOException { this(new File(file), format, CompressionType.None); } @@ -56,7 +56,7 @@ * @param format sequencing quality format * @param ct compression type to use */ - public SFastqWriter(String file, QualityStringFormat format, CompressionType ct) throws IOException { + public SFastqWriter(String file, QualityFormat format, CompressionType ct) throws IOException { this(new File(file), format, ct); } @@ -66,7 +66,7 @@ * @param file file to store reads * @param format sequencing quality format */ - public SFastqWriter(File file, QualityStringFormat format) throws IOException { + public SFastqWriter(File file, QualityFormat format) throws IOException { this(new FileOutputStream(file), format, CompressionType.None); } @@ -77,7 +77,7 @@ * @param format sequencing quality format * @param ct compression type to use */ - public SFastqWriter(File file, QualityStringFormat format, CompressionType ct) throws IOException { + public SFastqWriter(File file, QualityFormat format, CompressionType ct) throws IOException { this(new FileOutputStream(file), format, ct); } @@ -87,7 +87,7 @@ * @param outputStream stream for formatted output * @param format sequencing quality format */ - public SFastqWriter(OutputStream outputStream, QualityStringFormat format) throws IOException { + public SFastqWriter(OutputStream outputStream, QualityFormat format) throws IOException { this(outputStream, format, CompressionType.None); } @@ -98,7 +98,7 @@ * @param format sequencing quality format * @param ct compression type to use */ - public SFastqWriter(OutputStream outputStream, QualityStringFormat format, CompressionType ct) throws IOException { + public SFastqWriter(OutputStream outputStream, QualityFormat format, CompressionType ct) throws IOException { this.format = format; bufferedOutputStream = new BufferedOutputStream(ct.createOutputStream(outputStream), 65536); } @@ -118,13 +118,13 @@ /* //Not optimized byte quality; for (int i = 0; i < read.getData().size(); ++i) { - quality = read.getData().getQuality().rawQualityValue(i); - if (!format.isUnSafe() && quality < format.getQualityCodeFrom() || quality > format.getQualityCodeTo()) + quality = read.getData().getQuality().value(i); + if (!format.isUnSafe() && quality < format.getMinValue() || quality > format.getMinValue()) throw new RuntimeException("Incompatible format and quality value"); - bufferedOutputStream.write(quality + format.getQualityCodeOffset()); + bufferedOutputStream.write(quality + format.getOffset()); }*/ - bufferedOutputStream.write(read.getData().getQuality().encode(format.getQualityCodeOffset())); + bufferedOutputStream.write(read.getData().getQuality().encode(format.getOffset())); bufferedOutputStream.write('\n'); } diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/SSequencingReadTrimmer.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/SSequencingReadTrimmer.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/SSequencingReadTrimmer.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/SSequencingReadTrimmer.java 2013-10-09 08:03:10.000000000 +0000 @@ -22,7 +22,7 @@ package com.milaboratory.core.sequencing.io.fastq; import cc.redberry.pipe.Processor; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; +import com.milaboratory.core.sequence.quality.QualityFormat; import com.milaboratory.core.sequencing.read.SSequencingRead; import com.milaboratory.core.sequencing.read.SSequencingReadImpl; @@ -39,8 +39,8 @@ * * @param format sequencing quality format */ - public SSequencingReadTrimmer(QualityStringFormat format) { - this.trimmer = new NucleotideSQPairTrimmer(format); + public SSequencingReadTrimmer(QualityFormat format) { + this.trimmer = new NucleotideSQPairTrimmer((byte) 2); } @Override diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/AbstractSequenceQualityFactory.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/AbstractSequenceQualityFactory.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/AbstractSequenceQualityFactory.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/AbstractSequenceQualityFactory.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq.quality; - -abstract class AbstractSequenceQualityFactory implements SequenceQualityFactory { - protected int qualityCodeOffset; - protected int qualityCodeFrom; - protected int qualityCodeTo; - protected boolean lowerUnSafe; - - public boolean isLowerUnSafe() { - return lowerUnSafe; - } - - public void setLowerUnSafe(boolean lowerUnSafe) { - this.lowerUnSafe = lowerUnSafe; - } - - public int getQualityCodeFrom() { - return qualityCodeFrom; - } - - public void setQualityCodeFrom(int qualityCodeFrom) { - this.qualityCodeFrom = qualityCodeFrom; - } - - public int getQualityCodeOffset() { - return qualityCodeOffset; - } - - public void setQualityCodeOffset(int qualityCodeOffset) { - this.qualityCodeOffset = qualityCodeOffset; - } - - public int getQualityCodeTo() { - return qualityCodeTo; - } - - public void setQualityCodeTo(int qualityCodeTo) { - this.qualityCodeTo = qualityCodeTo; - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/PhredSequenceQualityFactory.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/PhredSequenceQualityFactory.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/PhredSequenceQualityFactory.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/PhredSequenceQualityFactory.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq.quality; - -import com.milaboratory.core.sequence.SequenceQuality; -import com.milaboratory.core.sequence.quality.SequenceQualityPhred; - -class PhredSequenceQualityFactory extends AbstractSequenceQualityFactory { - public SequenceQuality create(byte[] data) { - for (int i = 0; i < data.length; i++) { - data[i] -= qualityCodeOffset; - if (data[i] > qualityCodeTo) - throw new WrongQualityStringException(((char) (data[i] + qualityCodeOffset)) + " [" + data[i] + "]"); - if (data[i] < qualityCodeFrom) - if (!lowerUnSafe) - throw new WrongQualityStringException(((char) (data[i] + qualityCodeOffset)) + " [" + data[i] + "]"); - else - data[i] = (byte) qualityCodeFrom; - } - return new SequenceQualityPhred(data); - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/QualityStringFormat.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/QualityStringFormat.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/QualityStringFormat.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/QualityStringFormat.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,107 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq.quality; - -/** - * A base class to store various sequencing quality formats - */ -public enum QualityStringFormat { - Sanger((byte) 33, (byte) 0, (byte) 40, false, new PhredSequenceQualityFactory(), "Sanger", "sanger"), - //Solexa((byte) 64, (byte) -5, (byte) 40, false, new SolexaSequenceQualityFactory(), "Illumina 1.2 or less", "illumina12"), - Illumina13((byte) 64, (byte) 0, (byte) 40, false, new PhredSequenceQualityFactory(), "Illumina 1.3+", "illumina13"), - Illumina15((byte) 64, (byte) 2, (byte) 40, true, new PhredSequenceQualityFactory(), "Illumina 1.5+", "illumina15"), - Illumina18((byte) 33, (byte) 2, (byte) 45, true, new PhredSequenceQualityFactory(), "Illumina [Casava 1.8]", "illumina18"), - Illumina18u((byte) 33, (byte) 2, (byte) 45, true, new PhredSequenceQualityFactory(), "Illumina Unsafe [Casava 1.8]", "illumina18u", true); - private byte qualityCodeOffset; - private byte qualityCodeFrom; - private byte qualityCodeTo; - private byte zeroQualityValue; - private boolean zeroFlanked, isUnSafe; - private SequenceQualityFactory qualityFactory; - private String name, xmlRepresentation; - - private QualityStringFormat(byte qualityCodeOffset, byte qualityCodeFrom, byte qualityCodeTo, boolean zeroFlanked, AbstractSequenceQualityFactory qualityFactory, String name, - String xmlRepresentation) { - this.qualityCodeOffset = qualityCodeOffset; - this.qualityCodeFrom = qualityCodeFrom; - this.qualityCodeTo = qualityCodeTo; - this.zeroQualityValue = qualityCodeFrom; - this.zeroFlanked = zeroFlanked; - this.xmlRepresentation = xmlRepresentation; - qualityFactory.setQualityCodeFrom(qualityCodeFrom); - qualityFactory.setQualityCodeOffset(qualityCodeOffset); - qualityFactory.setQualityCodeTo(qualityCodeTo); - this.qualityFactory = qualityFactory; - this.name = name; - this.isUnSafe = true; - } - - private QualityStringFormat(byte qualityCodeOffset, byte qualityCodeFrom, byte qualityCodeTo, boolean zeroFlanked, AbstractSequenceQualityFactory qualityFactory, String name, - String xmlRepresentation, boolean unSafe) { - this(qualityCodeOffset, qualityCodeFrom, qualityCodeTo, zeroFlanked, qualityFactory, name, xmlRepresentation); - qualityFactory.setLowerUnSafe(unSafe); - this.isUnSafe = unSafe; - } - - public byte getQualityCodeFrom() { - return qualityCodeFrom; - } - - public byte getQualityCodeOffset() { - return qualityCodeOffset; - } - - public byte getQualityCodeTo() { - return qualityCodeTo; - } - - public SequenceQualityFactory getQualityFactory() { - return qualityFactory; - } - - public boolean isZeroFlanked() { - return zeroFlanked; - } - - public byte getZeroQualityValue() { - return zeroQualityValue; - } - - public String getXmlRepresentation() { - return xmlRepresentation; - } - - public boolean isUnSafe() { - return isUnSafe; - } - - public static QualityStringFormat fromXML(String xml) { - for (QualityStringFormat format : values()) - if (format.xmlRepresentation.equals(xml)) - return format; - return null; - } - - @Override - public String toString() { - return name; - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/SequenceQualityFactory.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/SequenceQualityFactory.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/SequenceQualityFactory.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/SequenceQualityFactory.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq.quality; - -import com.milaboratory.core.sequence.SequenceQuality; - -/** - * Sequence quality factory from byte array. - * - * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) - */ -public interface SequenceQualityFactory { - SequenceQuality create(byte[] data); -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/WrongQualityStringException.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/WrongQualityStringException.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/WrongQualityStringException.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/fastq/quality/WrongQualityStringException.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.io.fastq.quality; - -public class WrongQualityStringException extends RuntimeException { - public WrongQualityStringException() { - } - - public WrongQualityStringException(String message) { - super(message); - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFClipper.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFClipper.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFClipper.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFClipper.java 2013-10-09 08:03:10.000000000 +0000 @@ -40,7 +40,7 @@ int[] flowgramIndexes = new int[to - from]; System.arraycopy(input.getSequenceFlowgramMappping(), from, flowgramIndexes, 0, to - from); - NucleotideSQPair clippedSQPair = input.getData().getSubPairCopy(from, to).getDeepCopy(); + NucleotideSQPair clippedSQPair = input.getData().getRange(from, to); //last element in this array. (input.getData().size() == input.getSequenceFlowgramMappping().length) int fFrom = 0, fTo = input.getSequenceFlowgramMappping()[input.getData().size() - 1]; if (from > 0) diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFHeader.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFHeader.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFHeader.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFHeader.java 2013-10-09 08:03:10.000000000 +0000 @@ -21,7 +21,6 @@ package com.milaboratory.core.sequencing.io.sff; import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; /** * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) @@ -48,7 +47,7 @@ this.numberOfFlows = numberOfFlows; this.flowChars = flowChars; this.keySequence = keySequence; - this.flowsSequence = NucleotideSequenceImpl.fromSequence(flowChars); + this.flowsSequence = new NucleotideSequence(flowChars); } public char[] getFlowChars() { diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFReader.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFReader.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFReader.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/io/sff/SFFReader.java 2013-10-09 08:03:10.000000000 +0000 @@ -24,7 +24,6 @@ import com.milaboratory.core.sequence.NucleotideSQPair; import com.milaboratory.core.sequence.nucleotide.NucleotideAlphabet; import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; import com.milaboratory.core.sequence.quality.SequenceQualityPhred; import com.milaboratory.core.sequencing.io.SSequencingDataReaderWithFlowgram; import com.milaboratory.core.sequencing.read.SSequencingReadWithFlowgram; @@ -167,7 +166,7 @@ for (int i = 0; i < numberOfBases; ++i) sequenceData.set(i, NucleotideAlphabet.INSTANCE.codeFromSymbol((char) (buff[2 * header.getNumberOfFlows() + numberOfBases + i]))); byte[] qualityData = new byte[numberOfBases]; - NucleotideSequence sequence = NucleotideSequenceImpl.fromStorage(sequenceData); + NucleotideSequence sequence = new NucleotideSequence(sequenceData); System.arraycopy(buff, 2 * header.getNumberOfFlows() + 2 * numberOfBases, qualityData, 0, numberOfBases); NucleotideSQPair sqData = new NucleotideSQPair(sequence, new SequenceQualityPhred(qualityData)); int[] flowgramIndexes = new int[numberOfBases]; diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotif.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotif.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotif.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotif.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,171 +0,0 @@ -package com.milaboratory.core.sequencing.motif; - -import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; - -import java.util.BitSet; - -/** - * Implementation of simplest nucleotide sequence motif. - * - * @author Bolotin Dmitriy - */ -public final class NucleotideMotif { - private final BitSet data; - private final int size; - - /** - * Inner private constructor for reverseComplement motif - * - * @param data - */ - private NucleotideMotif(BitSet data, int size) { - this.data = data; - this.size = size; - } - - /** - * Creates motif from it's String representation. - * - * @param motif - */ - public NucleotideMotif(String motif) { - this.data = new BitSet(motif.length() << 2); - for (int i = 0; i < motif.length(); ++i) { - byte[] codes = NucleotideWildcards.getCodes(motif.charAt(i)); - if (codes == null) - throw new RuntimeException("Illegal nucleotide/wildcard: \"" + motif.charAt(i) + "\"."); - for (byte b : codes) - data.set(b + (i << 2)); - } - this.size = motif.length(); - } - - /** - * Size of motif. - * - * @return - */ - public final int size() { - return size; - } - - /** - * Return true if motif matches sub-sequence of nucleotide sequence from (from) inclusive to (from + size()) - * exclusive. - * - * @param sequence target sequence - * @param from offset to start matching - * @return - */ - public final boolean matches(NucleotideSequence sequence, int from) { - if (from < 0 || from >= sequence.size()) - throw new IllegalArgumentException(); - if (sequence.size() < from + size()) - return false; - for (int i = 0; i < size; ++i) - if (!data.get(sequence.codeAt(from + i) + (i << 2))) - return false; - return true; - } - - /** - * Return number of mismatches between motif and sub-sequence of nucleotide - * sequence from (from) inclusive to (from + size()) exclusive. - * - * @param sequence target sequence - * @param from offset to start matching - * @return - */ - /*public final int mismatchCount(NucleotideSequence sequence, int from) { - if (from < 0 || from >= sequence.size()) - throw new IllegalArgumentException(); - if (sequence.size() < from + size()) - return -1; - for (int i = 0; i < size; ++i) - if (!data.get(sequence.codeAt(from + i) + (i << 2))) - return false; - return true; - }*/ - - - /** - * Return reverse-complement motif. - */ - public final NucleotideMotif reverseComplement() { - BitSet newStore = new BitSet(data.size()); - for (int i = 0; i < data.size(); ++i) - if (data.get(data.size() - 1 - i)) - newStore.set(i); - return new NucleotideMotif(newStore, size); - } - - /** - * Try to find this motif in sub-sequence. - * - * @param sequence target sequence - * @param from left subsequence border; inclusive - * @param to right subsequence border; exclusive - * @return coordinate of first match (coordinate of first nucleotide in the match) or -1 if no matches found - */ - public final int findMatch(NucleotideSequence sequence, int from, int to) { - if (from < 0 || from >= sequence.size() || to < 0 - || to > sequence.size() - || from > to) - throw new IllegalArgumentException(); - to -= size; - for (int i = from; i < to; ++i) - if (matches(sequence, i)) - return i; - return -1; - } - - /** - * Try to find this motif in sequence. - * - * @param sequence target sequence - * @return coordinate of first match (coordinate of first nucleotide in the match) or -1 if no matches found - */ - public final int findMatch(NucleotideSequence sequence) { - return findMatch(sequence, 0, sequence.size()); - } - - /** - * Direct motif querying method. Returns true if nucleotide with specified code is allowed on the specified - * position. - * - * @param position position in the motif - * @param code nucleotide code (1..3) - * @return - */ - public final boolean get(int position, byte code) { - return data.get(code + (position << 2)); - } - - public String toString() { - char[] chars = new char[size]; - int offset; - for (int i = 0; i < size; ++i) { - offset = i << 2; - chars[i] = NucleotideWildcards.getSymbol(data.get(offset), data.get(offset + 1), - data.get(offset + 2), data.get(offset + 3)); - } - return new String(chars); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - NucleotideMotif that = (NucleotideMotif) o; - - if (!data.equals(that.data)) return false; - - return true; - } - - @Override - public int hashCode() { - return data.hashCode(); - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotifSearch.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotifSearch.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotifSearch.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotifSearch.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -package com.milaboratory.core.sequencing.motif; - -import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; - -public class NucleotideMotifSearch { - private final NucleotideMotif motif; - private final NucleotideMotifSearchOptions options; - private final int exactRegionFrom, exactRegionTo; - - public NucleotideMotifSearch(NucleotideMotif motif, NucleotideMotifSearchOptions options, int exactRegionFrom, int exactRegionTo) { - this.motif = motif; - this.options = options; - this.exactRegionFrom = exactRegionFrom; - this.exactRegionTo = exactRegionTo; - } - -// public int nextMatch(int from, NucleotideSequence sequence) { -// if (exactRegionFrom >= 0) { //If exact region exists -// for (int i = 0; i < sequence.size(); ++i) { -// -// } -// } -// } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotifSearchOptions.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotifSearchOptions.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotifSearchOptions.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideMotifSearchOptions.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -package com.milaboratory.core.sequencing.motif; - -public class NucleotideMotifSearchOptions { - public static NucleotideMotifSearchOptions NORMAL_WITH_TRUNCATION = new NucleotideMotifSearchOptions(2, 1, 1, 2, 2); - public static NucleotideMotifSearchOptions NORMAL_WITHOUT_TRUNCATION = new NucleotideMotifSearchOptions(2, 1, 1, 2, 0); - - public final int maxMismatches, maxDeletions, maxInsertions, maxTotalErrors; - public final int maxLeftTruncation; - - public NucleotideMotifSearchOptions(int maxMismatches, int maxDeletions, int maxInsertions, int maxTotalErrors, int maxLeftTruncation) { - this.maxMismatches = maxMismatches; - this.maxDeletions = maxDeletions; - this.maxInsertions = maxInsertions; - this.maxTotalErrors = maxTotalErrors; - this.maxLeftTruncation = maxLeftTruncation; - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideWildcards.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideWildcards.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideWildcards.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/motif/NucleotideWildcards.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,125 +0,0 @@ -package com.milaboratory.core.sequencing.motif; - -import com.milaboratory.core.sequence.nucleotide.NucleotideAlphabet; - -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Map; - -import static com.milaboratory.core.sequence.nucleotide.NucleotideAlphabet.*; - -/** - * Implementation of standard IUPAC notations.

See http://users.ox.ac.uk/~linc1775/blueprint.htm. - * - * @author Bolotin Dmitriy - */ -public class NucleotideWildcards { - private static final char[] CHARS; - private static final Wildcard[] WILDCARDS; - private static final Map pCodeToWildcard = new HashMap<>(); - - static { - WILDCARDS = new Wildcard[]{ - //No nucleotide - new Wildcard('-'), - //Exact nucleotides - new Wildcard('A', 'A'), - new Wildcard('G', 'G'), - new Wildcard('C', 'C'), - new Wildcard('T', 'T'), - //Two-letter wildcard - new Wildcard('R', 'A', 'G'), - new Wildcard('Y', 'C', 'T'), - new Wildcard('S', 'G', 'C'), - new Wildcard('W', 'A', 'T'), - new Wildcard('K', 'G', 'T'), - new Wildcard('M', 'A', 'C'), - //Three-letter wildcards - new Wildcard('B', 'C', 'G', 'T'), - new Wildcard('D', 'A', 'G', 'T'), - new Wildcard('H', 'A', 'C', 'T'), - new Wildcard('V', 'A', 'C', 'G'), - //Any nucleotide - new Wildcard('N', 'A', 'C', 'G', 'T') - }; - - //Sorting to make binary search possible - Arrays.sort(WILDCARDS, new Comparator() { - @Override - public int compare(Wildcard o1, Wildcard o2) { - return Character.compare(o1.character, o2.character); - } - }); - - //Creating projections - CHARS = new char[WILDCARDS.length]; - for (int i = 0; i < CHARS.length; ++i) { - CHARS[i] = WILDCARDS[i].character; - pCodeToWildcard.put(WILDCARDS[i].pCode, WILDCARDS[i]); - } - } - - /** - * Returns sorted byte array with codes corresponding to specified symbol.

- *

- * Allowed symbols: -, A, T, G, C, R, Y, S, W, K, M, B, D, H, V, N

- *

- * IUPAC standard notion used. For more information see http://users.ox.ac.uk/~linc1775/blueprint.htm. - * - * @param symbol - * @return - */ - public static byte[] getCodes(char symbol) { - char upper = Character.toUpperCase(symbol); - int position = Arrays.binarySearch(CHARS, upper); - if (position < 0) - return null; - return WILDCARDS[position].to; - } - - /** - * Returns wildcard symbol for specified nucleotides.

- *

- * IUPAC standard notion used. For more information see http://users.ox.ac.uk/~linc1775/blueprint.htm. - * - * @param a allow A - * @param g allow G - * @param c allow C - * @param t allow T - * @return - */ - public static char getSymbol(boolean a, boolean g, boolean c, boolean t) { - byte pCode = 0; - - if (a) - pCode |= 1 << A; - if (g) - pCode |= 1 << G; - if (c) - pCode |= 1 << C; - if (t) - pCode |= 1 << T; - - Wildcard wc = pCodeToWildcard.get(pCode); - if (wc == null) - throw new RuntimeException("Impossible exception. (Assertion fail)"); - return wc.character; - } - - private static class Wildcard { - public final char character; - public final byte[] to; - public final byte pCode; - - public Wildcard(char character, char... to) { - this.character = character; - this.to = new byte[to.length]; - byte pCode = 0; - for (int i = 0; i < to.length; ++i) - pCode |= 1 << (this.to[i] = NucleotideAlphabet.INSTANCE.codeFromSymbol(to[i])); - Arrays.sort(this.to); - this.pCode = pCode; - } - } -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/read/PDataSerRecord.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/read/PDataSerRecord.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/read/PDataSerRecord.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/read/PDataSerRecord.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -/* - * MiTCR - * - * Copyright (c) 2010-2013: - * Bolotin Dmitriy - * Chudakov Dmitriy - * - * MiTCR is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -package com.milaboratory.core.sequencing.read; - -import com.milaboratory.core.sequence.NucleotideSQPair; - -/** - * Interface for paired sequencing read. - * - * @author Bolotin Dmitriy (bolotin.dmitriy@gmail.com) - * @author Shugay Mikhail (mikhail.shugay@gmail.com) - */ -public interface PDataSerRecord extends SequencingRead { - /** - * Retrieves one read form paired read - * - * @param idInPair 0 or 1 - * @return single read - */ - SSequencingRead getSRecord(int idInPair); - - /** - * Retrieves data form paired read - * - * @param idInPair 0 or 1 - * @return single read - */ - NucleotideSQPair getData(int idInPair); - -} diff -Nru micommons-0.9/src/main/java/com/milaboratory/core/sequencing/read/PSequencingRead.java micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/read/PSequencingRead.java --- micommons-0.9/src/main/java/com/milaboratory/core/sequencing/read/PSequencingRead.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/core/sequencing/read/PSequencingRead.java 2013-10-09 08:03:10.000000000 +0000 @@ -44,5 +44,4 @@ * @return single read */ NucleotideSQPair getData(int idInPair); - } diff -Nru micommons-0.9/src/main/java/com/milaboratory/util/Bit2Array.java micommons-1.0.3/src/main/java/com/milaboratory/util/Bit2Array.java --- micommons-0.9/src/main/java/com/milaboratory/util/Bit2Array.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/main/java/com/milaboratory/util/Bit2Array.java 2013-10-09 08:03:10.000000000 +0000 @@ -90,6 +90,19 @@ return true; } + //TODO optimize + public Bit2Array getRange(int from, int to) { + if (from < 0 || (from >= size && size != 0) + || to < from || to > size) + throw new IndexOutOfBoundsException(); + + Bit2Array ret = new Bit2Array(to - from); + int i = 0; + for (int j = from; j < to; ++j, ++i) + ret.set(i, get(j)); + return ret; + } + /*public static Bit2Array wrap(byte[] data, int size) { return new Bit2Array(size, data); }*/ diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/motif/NucleotideMotifSearchAdvancedTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/motif/NucleotideMotifSearchAdvancedTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/motif/NucleotideMotifSearchAdvancedTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/motif/NucleotideMotifSearchAdvancedTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -24,7 +24,7 @@ package com.milaboratory.core.sequence.motif; import com.milaboratory.core.sequence.SequencingErrorType; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; +import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -43,14 +43,14 @@ @Test public void mmExactTest01() { - int result = search.search(NucleotideSequenceImpl.fromSequence(s0), + int result = search.search(new NucleotideSequence(s0), LowQualityIndicator.Utils.wrap(q0)); assertEquals(result, 3); } @Test public void mmComparisonTest01() { - int result = search.search(NucleotideSequenceImpl.fromSequence(s0), + int result = search.search(new NucleotideSequence(s0), LowQualityIndicator.Utils.wrap(q0), 23, s0.length()); assertEquals(result, 26); @@ -64,7 +64,7 @@ @Test public void mmComparisonTest02() { - int result = search.search(NucleotideSequenceImpl.fromSequence(s1), + int result = search.search(new NucleotideSequence(s1), LowQualityIndicator.Utils.wrap(q1)); assertEquals(result, 52); } @@ -77,7 +77,7 @@ @Test public void mmComparisonTest03() { - int result = search.search(NucleotideSequenceImpl.fromSequence(s2), + int result = search.search(new NucleotideSequence(s2), LowQualityIndicator.Utils.wrap(q2)); assertEquals(result, 3); } @@ -91,7 +91,7 @@ @Test public void mmFalseTest01() { //Focused on exact match rejection - int result = search.search(NucleotideSequenceImpl.fromSequence(sf0), + int result = search.search(new NucleotideSequence(sf0), LowQualityIndicator.Utils.wrap(qf0), 0, 25); assertEquals(result, -1); @@ -99,7 +99,7 @@ @Test public void mmFalseTest02() { - int result = search.search(NucleotideSequenceImpl.fromSequence(sf0), + int result = search.search(new NucleotideSequence(sf0), LowQualityIndicator.Utils.wrap(qf0)); assertEquals(result, -1); } diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/motif/NucleotideMotifTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/motif/NucleotideMotifTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/motif/NucleotideMotifTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/motif/NucleotideMotifTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -20,7 +20,7 @@ */ package com.milaboratory.core.sequence.motif; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; +import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; import org.junit.Test; import static org.junit.Assert.*; @@ -32,24 +32,24 @@ @Test public void testMatches01() { NucleotideMotif nm = new NucleotideMotif("ATCG"); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("ATCG"), 0)); - assertFalse(nm.matches(NucleotideSequenceImpl.fromSequence("ACCG"), 0)); + assertTrue(nm.matches(new NucleotideSequence("ATCG"), 0)); + assertFalse(nm.matches(new NucleotideSequence("ACCG"), 0)); } @Test public void testMatches02() { NucleotideMotif nm = new NucleotideMotif("GAGGAGACGGTGACCRKGGT"); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("GAGGAGACGGTGACCGGGGT"), 0)); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("GAGGAGACGGTGACCGTGGT"), 0)); - assertFalse(nm.matches(NucleotideSequenceImpl.fromSequence("GAGGAGACGGTGACCGCGGT"), 0)); + assertTrue(nm.matches(new NucleotideSequence("GAGGAGACGGTGACCGGGGT"), 0)); + assertTrue(nm.matches(new NucleotideSequence("GAGGAGACGGTGACCGTGGT"), 0)); + assertFalse(nm.matches(new NucleotideSequence("GAGGAGACGGTGACCGCGGT"), 0)); } @Test public void testMatches03() { NucleotideMotif nm = new NucleotideMotif("GAGGAGACGGTGACCRKGGT"); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("GACAGAGGAGACGGTGACCGGGGTAA"), 4)); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("CAGATGAGGAGACGGTGACCGTGGTGG"), 5)); - assertFalse(nm.matches(NucleotideSequenceImpl.fromSequence("CCAGAGGAGACGGTGACCGCGGTG"), 3)); + assertTrue(nm.matches(new NucleotideSequence("GACAGAGGAGACGGTGACCGGGGTAA"), 4)); + assertTrue(nm.matches(new NucleotideSequence("CAGATGAGGAGACGGTGACCGTGGTGG"), 5)); + assertFalse(nm.matches(new NucleotideSequence("CCAGAGGAGACGGTGACCGCGGTG"), 3)); } @Test @@ -61,14 +61,15 @@ @Test public void rcTest() { NucleotideMotif nm = new NucleotideMotif("ATGC"); + System.out.println(nm.reverseComplement()); assertTrue(nm.reverseComplement().equals(new NucleotideMotif("GCAT"))); } @Test public void testFindMatch01() { NucleotideMotif nm = new NucleotideMotif("GAGGAGACGGTGACCRKGGT"); - assertEquals(nm.findMatch(NucleotideSequenceImpl.fromSequence("GACAGAGGAGACGGTGACCGGGGTAA")), 4); - assertEquals(nm.findMatch(NucleotideSequenceImpl.fromSequence("CAGATGAGGAGACGGTGACCGTGGTGG")), 5); - assertEquals(nm.findMatch(NucleotideSequenceImpl.fromSequence("CCAGAGGAGACGGTGACCGCGGTG")), -1); + assertEquals(nm.findMatch(new NucleotideSequence("GACAGAGGAGACGGTGACCGGGGTAA")), 4); + assertEquals(nm.findMatch(new NucleotideSequence("CAGATGAGGAGACGGTGACCGTGGTGG")), 5); + assertEquals(nm.findMatch(new NucleotideSequence("CCAGAGGAGACGGTGACCGCGGTG")), -1); } } diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceAggregatorTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceAggregatorTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceAggregatorTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequenceAggregatorTest.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -package com.milaboratory.core.sequence.nucleotide; - -import org.junit.Test; - -import static com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl.fromSequence; - -public class NucleotideSequenceAggregatorTest { - @Test - public void test1() throws Exception { - NucleotideSequenceAggregator aggr = new NucleotideSequenceAggregator(8, 2); - aggr.putSequence(fromSequence("attacaca")); - aggr.putSequence(fromSequence("tattacac")); - aggr.putSequence(fromSequence("tattacaa")); - - System.out.println(aggr.getSequence(.5)); - } -} diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequencesTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequencesTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequencesTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/nucleotide/NucleotideSequencesTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -20,22 +20,21 @@ */ package com.milaboratory.core.sequence.nucleotide; -import com.milaboratory.core.sequence.util.NucleotideSubSequence; import org.junit.Assert; import org.junit.Test; public class NucleotideSequencesTest { @Test public void test1() { - NucleotideSequence sequence = NucleotideSequenceImpl.fromSequence("ATTAGACATAGACA"); + NucleotideSequence sequence = new NucleotideSequence("ATTAGACATAGACA"); Assert.assertEquals(sequence.toString(), "ATTAGACATAGACA"); - NucleotideSequence subSequence = new NucleotideSubSequence(sequence, 0, sequence.size()); + NucleotideSequence subSequence = sequence.getRange(0, sequence.size()); Assert.assertEquals(subSequence.toString(), "ATTAGACATAGACA"); Assert.assertEquals(subSequence.hashCode(), sequence.hashCode()); Assert.assertEquals(subSequence, sequence); - NucleotideSequence sequence1 = NucleotideSequenceImpl.fromSequence("AGACATAGACA"); - NucleotideSequence subSequence1 = new NucleotideSubSequence(sequence, 3); + NucleotideSequence sequence1 = new NucleotideSequence("AGACATAGACA"); + NucleotideSequence subSequence1 = sequence.getRange(3, sequence.size()); Assert.assertEquals(subSequence1.hashCode(), sequence1.hashCode()); Assert.assertEquals(subSequence1, sequence1); diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/nucleotide/ReverseComplementTransformerTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/nucleotide/ReverseComplementTransformerTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/nucleotide/ReverseComplementTransformerTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/nucleotide/ReverseComplementTransformerTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -20,7 +20,6 @@ */ package com.milaboratory.core.sequence.nucleotide; -import com.milaboratory.core.sequence.util.NucleotideRCSequence; import org.junit.Test; import static org.hamcrest.CoreMatchers.not; @@ -30,9 +29,9 @@ public class ReverseComplementTransformerTest { @Test public void test1() { - NucleotideSequence ns = NucleotideSequenceImpl.fromSequence("atagagaattagataaggcagatacgatcgacgtgtactactagcta"); - NucleotideSequence rc = new NucleotideRCSequence(ns); - NucleotideSequence rcrc = new NucleotideRCSequence(rc); + NucleotideSequence ns = new NucleotideSequence("atagagaattagataaggcagatacgatcgacgtgtactactagcta"); + NucleotideSequence rc = ns.getReverseComplement(); + NucleotideSequence rcrc = rc.getReverseComplement(); assertEquals(rcrc, ns); assertEquals(rcrc.hashCode(), ns.hashCode()); assertThat(rc, not(ns)); diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/tree/SequenceTreeMapTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/tree/SequenceTreeMapTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/tree/SequenceTreeMapTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/tree/SequenceTreeMapTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -8,7 +8,6 @@ import com.milaboratory.core.sequence.aminoacid.CDRAminoAcidSequence; import com.milaboratory.core.sequence.nucleotide.NucleotideAlphabet; import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; import com.milaboratory.util.Bit2Array; import org.apache.commons.math.random.RandomGenerator; import org.apache.commons.math.random.Well19937a; @@ -16,7 +15,6 @@ import java.util.*; -import static com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl.fromSequence; import static com.milaboratory.core.sequence.util.SequencesUtils.cat; import static org.hamcrest.CoreMatchers.not; import static org.junit.Assert.*; @@ -45,16 +43,16 @@ public void testExact1() throws Exception { SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); - assertNull(map.put(fromSequence("attagaca"), 1)); - assertEquals((Integer) 1, map.put(fromSequence("attagaca"), 2)); + assertNull(map.put(new NucleotideSequence("attagaca"), 1)); + assertEquals((Integer) 1, map.put(new NucleotideSequence("attagaca"), 2)); - assertNull(map.put(fromSequence("attacaca"), 3)); + assertNull(map.put(new NucleotideSequence("attacaca"), 3)); - assertEquals((Integer) 3, map.get(fromSequence("attacaca"))); + assertEquals((Integer) 3, map.get(new NucleotideSequence("attacaca"))); Set sequences = new HashSet<>(); - sequences.add(fromSequence("attacaca")); - sequences.add(fromSequence("attagaca")); + sequences.add(new NucleotideSequence("attacaca")); + sequences.add(new NucleotideSequence("attagaca")); Set ints = new HashSet<>(); ints.add(2); @@ -78,17 +76,17 @@ Set sequences = new HashSet<>(); Set ints = new HashSet<>(); - assertNull(map.put(fromSequence("attacacaattaattacacacacaattacaca"), 3)); - sequences.add(fromSequence("attacacaattaattacacacacaattacaca")); + assertNull(map.put(new NucleotideSequence("attacacaattaattacacacacaattacaca"), 3)); + sequences.add(new NucleotideSequence("attacacaattaattacacacacaattacaca")); ints.add(3); - assertNull(map.put(fromSequence("attacacaattacacaattacgacacttacaca"), 4)); - sequences.add(fromSequence("attacacaattacacaattacgacacttacaca")); + assertNull(map.put(new NucleotideSequence("attacacaattacacaattacgacacttacaca"), 4)); + sequences.add(new NucleotideSequence("attacacaattacacaattacgacacttacaca")); ints.add(4); - assertNull(map.put(fromSequence("atattattacacaacacatacattacacaaca"), 5)); - sequences.add(fromSequence("atattattacacaacacatacattacacaaca")); + assertNull(map.put(new NucleotideSequence("atattattacacaacacatacattacacaaca"), 5)); + sequences.add(new NucleotideSequence("atattattacacaacacatacattacacaaca")); ints.add(5); - assertNull(map.put(fromSequence("attacacaattacacaattacacaattacacaattacacaattacaca"), 19)); - sequences.add(fromSequence("attacacaattacacaattacacaattacacaattacacaattacaca")); + assertNull(map.put(new NucleotideSequence("attacacaattacacaattacacaattacacaattacacaattacaca"), 19)); + sequences.add(new NucleotideSequence("attacacaattacacaattacacaattacacaattacacaattacaca")); ints.add(19); SequenceTreeMap.Node node; @@ -110,27 +108,27 @@ public void testBranchingEnumerator1() { SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); - assertNull(map.put(fromSequence("attagaca"), 1)); // 1 mm - assertNull(map.put(fromSequence("attacaca"), 2)); // match - assertNull(map.put(fromSequence("ataacaca"), 3)); // 1 mm - assertNull(map.put(fromSequence("attcgtca"), 4)); // many mm - assertNull(map.put(fromSequence("atttacaca"), 5)); // 1 insertion in stretch - assertNull(map.put(fromSequence("atacaca"), 6)); // 1 deletion in the "t" stretch - assertNull(map.put(fromSequence("attacacta"), 7)); // 1 insertion - assertNull(map.put(fromSequence("attcaca"), 8)); // 1 deletion - assertNull(map.put(fromSequence("attacac"), 9)); // 1 deletion in the end - assertNull(map.put(fromSequence("ttacaca"), 10)); // 1 deletion in the beginning - assertNull(map.put(fromSequence("tattacaca"), 11)); // 1 insertion in the beginning - assertNull(map.put(fromSequence("attacacat"), 12)); // 1 insertion in the ent - assertNull(map.put(fromSequence("attacact"), 13)); // 1 mm end - assertNull(map.put(fromSequence("tttacaca"), 14)); // 1 mm begin + assertNull(map.put(new NucleotideSequence("attagaca"), 1)); // 1 mm + assertNull(map.put(new NucleotideSequence("attacaca"), 2)); // match + assertNull(map.put(new NucleotideSequence("ataacaca"), 3)); // 1 mm + assertNull(map.put(new NucleotideSequence("attcgtca"), 4)); // many mm + assertNull(map.put(new NucleotideSequence("atttacaca"), 5)); // 1 insertion in stretch + assertNull(map.put(new NucleotideSequence("atacaca"), 6)); // 1 deletion in the "t" stretch + assertNull(map.put(new NucleotideSequence("attacacta"), 7)); // 1 insertion + assertNull(map.put(new NucleotideSequence("attcaca"), 8)); // 1 deletion + assertNull(map.put(new NucleotideSequence("attacac"), 9)); // 1 deletion in the end + assertNull(map.put(new NucleotideSequence("ttacaca"), 10)); // 1 deletion in the beginning + assertNull(map.put(new NucleotideSequence("tattacaca"), 11)); // 1 insertion in the beginning + assertNull(map.put(new NucleotideSequence("attacacat"), 12)); // 1 insertion in the ent + assertNull(map.put(new NucleotideSequence("attacact"), 13)); // 1 mm end + assertNull(map.put(new NucleotideSequence("tttacaca"), 14)); // 1 mm begin HashSet[] asserts = new HashSet[3]; asserts[0] = new HashSet<>(Arrays.asList(1, 3, 13, 14)); asserts[1] = new HashSet<>(Arrays.asList(6, 8, 9, 10)); asserts[2] = new HashSet<>(Arrays.asList(5, 7, 11, 12)); - NucleotideSequence reference = fromSequence("attacaca"); + NucleotideSequence reference = new NucleotideSequence("attacaca"); for (byte mode = 0; mode < 3; ++mode) { @@ -162,22 +160,22 @@ public void testNIterator() throws Exception { SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); - assertNull(map.put(fromSequence("attagaca"), 1)); // 1 mm - assertNull(map.put(fromSequence("attacaca"), 2)); // match - assertNull(map.put(fromSequence("ataacaca"), 3)); // 1 mm - assertNull(map.put(fromSequence("attcgtca"), 4)); // many mm - assertNull(map.put(fromSequence("atttacaca"), 5)); // 1 insertion in stretch - assertNull(map.put(fromSequence("atacaca"), 6)); // 1 deletion in the "t" stretch - assertNull(map.put(fromSequence("attacacta"), 7)); // 1 insertion - assertNull(map.put(fromSequence("attcaca"), 8)); // 1 deletion - assertNull(map.put(fromSequence("attacac"), 9)); // 1 deletion in the end - assertNull(map.put(fromSequence("ttacaca"), 10)); // 1 deletion in the beginning - assertNull(map.put(fromSequence("tattacaca"), 11)); // 1 insertion in the beginning - assertNull(map.put(fromSequence("attacacat"), 12)); // 1 insertion in the ent - assertNull(map.put(fromSequence("attacact"), 13)); // 1 mm end - assertNull(map.put(fromSequence("tttacaca"), 14)); // 1 mm begin + assertNull(map.put(new NucleotideSequence("attagaca"), 1)); // 1 mm + assertNull(map.put(new NucleotideSequence("attacaca"), 2)); // match + assertNull(map.put(new NucleotideSequence("ataacaca"), 3)); // 1 mm + assertNull(map.put(new NucleotideSequence("attcgtca"), 4)); // many mm + assertNull(map.put(new NucleotideSequence("atttacaca"), 5)); // 1 insertion in stretch + assertNull(map.put(new NucleotideSequence("atacaca"), 6)); // 1 deletion in the "t" stretch + assertNull(map.put(new NucleotideSequence("attacacta"), 7)); // 1 insertion + assertNull(map.put(new NucleotideSequence("attcaca"), 8)); // 1 deletion + assertNull(map.put(new NucleotideSequence("attacac"), 9)); // 1 deletion in the end + assertNull(map.put(new NucleotideSequence("ttacaca"), 10)); // 1 deletion in the beginning + assertNull(map.put(new NucleotideSequence("tattacaca"), 11)); // 1 insertion in the beginning + assertNull(map.put(new NucleotideSequence("attacacat"), 12)); // 1 insertion in the ent + assertNull(map.put(new NucleotideSequence("attacact"), 13)); // 1 mm end + assertNull(map.put(new NucleotideSequence("tttacaca"), 14)); // 1 mm begin - NucleotideSequence reference = fromSequence("attacaca"); + NucleotideSequence reference = new NucleotideSequence("attacaca"); SequenceTreeMap.Node node; @@ -216,8 +214,8 @@ @Test public void testEdge1() throws Exception { - NucleotideSequence sequence1 = fromSequence("CTG"), - sequence2 = fromSequence("C"); + NucleotideSequence sequence1 = new NucleotideSequence("CTG"), + sequence2 = new NucleotideSequence("C"); SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); @@ -234,8 +232,8 @@ @Test public void testEdge2() throws Exception { - NucleotideSequence sequence1 = fromSequence("CTG"), - sequence2 = fromSequence("CGT"); + NucleotideSequence sequence1 = new NucleotideSequence("CTG"), + sequence2 = new NucleotideSequence("CGT"); SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); @@ -252,8 +250,8 @@ @Test public void testEdge3() throws Exception { - NucleotideSequence sequence1 = fromSequence("C"), - sequence2 = fromSequence("CTG"); + NucleotideSequence sequence1 = new NucleotideSequence("C"), + sequence2 = new NucleotideSequence("CTG"); SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); @@ -275,9 +273,9 @@ @Test public void testGuideDel() throws Exception { SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); - map.put(fromSequence("attacacaattaattacacacacaattacaca"), 3); + map.put(new NucleotideSequence("attacacaattaattacacacacaattacaca"), 3); - Sequence sequence = fromSequence("attacacaattaatttacacacacaattacaca"); + Sequence sequence = new NucleotideSequence("attacacaattaatttacacacacaattacaca"); SequenceTreeMap.NeighborhoodIterator neighborhoodIterator = map.getNeighborhoodIterator(sequence, 0.2, @@ -307,10 +305,10 @@ @Test public void testGuideMM() throws Exception { SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); - map.put(fromSequence("attacacaattaattacacacacaattacaca"), 3); - //map.put(fromSequence("attacacaattaatttacacacacaattacaca"), 4); + map.put(new NucleotideSequence("attacacaattaattacacacacaattacaca"), 3); + //map.put(new NucleotideSequence("attacacaattaatttacacacacaattacaca"), 4); - Sequence sequence = fromSequence("attacacaattaataacacacacaattacaca"); + Sequence sequence = new NucleotideSequence("attacacaattaataacacacacaattacaca"); SequenceTreeMap.NeighborhoodIterator neighborhoodIterator = map.getNeighborhoodIterator(sequence, 0.2, @@ -340,10 +338,10 @@ @Test public void testGuideIns() throws Exception { SequenceTreeMap map = new SequenceTreeMap<>(NucleotideAlphabet.INSTANCE); - map.put(fromSequence("attacacaattaattacacacacaattacaca"), 3); - //map.put(fromSequence("attacacaattaatttacacacacaattacaca"), 4); + map.put(new NucleotideSequence("attacacaattaattacacacacaattacaca"), 3); + //map.put(new NucleotideSequence("attacacaattaatttacacacacaattacaca"), 4); - Sequence sequence = fromSequence("attacacaattaatacacacacaattacaca"); + Sequence sequence = new NucleotideSequence("attacacaattaatacacacacaattacaca"); SequenceTreeMap.NeighborhoodIterator neighborhoodIterator = map.getNeighborhoodIterator(sequence, 0.2, @@ -439,7 +437,7 @@ * Utility functions and their tests */ private Sequence getRandomSequence(Alphabet alphabet, int length) { - SequenceBuilder builder = alphabet.getSequenceBuilderFactory().create(length); + SequenceBuilder builder = alphabet.getBuilderFactory().create(length); for (int i = 0; i < length; ++i) builder.setCode(i, (byte) random.nextInt(alphabet.codesCount())); return builder.create(); @@ -450,7 +448,7 @@ int position, i; switch (type) { case -1: //Copy - builder = sequence.getAlphabet().getSequenceBuilderFactory().create(sequence.size()); + builder = sequence.getAlphabet().getBuilderFactory().create(sequence.size()); for (i = 0; i < builder.size(); ++i) builder.setCode(i, sequence.codeAt(i)); return builder.create(); @@ -458,7 +456,7 @@ case 0: //Mismatch if (sequence.getAlphabet() == NucleotideAlphabet.INSTANCE) return introduceNucleotideMismatch((NucleotideSequence) sequence); - builder = sequence.getAlphabet().getSequenceBuilderFactory().create(sequence.size()); + builder = sequence.getAlphabet().getBuilderFactory().create(sequence.size()); for (i = 0; i < builder.size(); ++i) builder.setCode(i, sequence.codeAt(i)); position = random.nextInt(sequence.size()); @@ -467,7 +465,7 @@ return builder.create(); case 1: //Deletion - builder = sequence.getAlphabet().getSequenceBuilderFactory().create(sequence.size() - 1); + builder = sequence.getAlphabet().getBuilderFactory().create(sequence.size() - 1); position = random.nextInt(sequence.size()); for (i = 0; i < position; ++i) builder.setCode(i, sequence.codeAt(i)); @@ -477,7 +475,7 @@ return builder.create(); case 2: //Insertion - builder = sequence.getAlphabet().getSequenceBuilderFactory().create(sequence.size() + 1); + builder = sequence.getAlphabet().getBuilderFactory().create(sequence.size() + 1); position = random.nextInt(sequence.size() + 1); for (i = 0; i < position; ++i) builder.setCode(i, sequence.codeAt(i)); @@ -492,10 +490,10 @@ } private NucleotideSequence introduceNucleotideMismatch(NucleotideSequence sequence) { - final Bit2Array storage = NucleotideSequenceImpl.storageFromSequence(sequence); + final Bit2Array storage = sequence.getInnerData(); int position = random.nextInt(storage.size()); storage.set(position, 0x3 & (storage.get(position) + 1 + random.nextInt(3))); - return NucleotideSequenceImpl.fromStorage(storage); + return new NucleotideSequence(storage); } final static Alphabet[] alphabets = {NucleotideAlphabet.INSTANCE, AminoAcidAlphabet.INSTANCE, CDRAminoAcidAlphabet.INSTANCE}; @@ -789,24 +787,24 @@ //public void testPosition() throws Exception { // NSTa map = new NSTa(); // - // //assertNull(map.put(fromSequence("attagaca"))); // 1 mm - // //assertNull(map.put(fromSequence("attacaca"))); // match - // //assertNull(map.put(fromSequence("ataacaca"))); // 1 mm - // //assertNull(map.put(fromSequence("attcgtca"))); // many mm - // //assertNull(map.put(fromSequence("atttacaca"))); // 1 insertion in stretch - // //assertNull(map.put(fromSequence("atacaca"))); // 1 deletion in the "t" stretch - // //assertNull(map.put(fromSequence("attacacga"))); // 1 insertion - // //assertNull(map.put(fromSequence("attcaca"))); // 1 deletion - // //assertNull(map.put(fromSequence("attacac"))); // 1 deletion in the end - // //assertNull(map.put(fromSequence("ttacaca"))); // 1 deletion in the beginning - // //assertNull(map.put(fromSequence("tattacaca"))); // 1 insertion in the beginning - // //assertNull(map.put(fromSequence("attacacat"))); // 1 insertion in the ent - // assertNull(map.put(fromSequence("attacagt"))); // 1 mm end - // //assertNull(map.put(fromSequence("attacaca"))); // 1 mm end - // //assertNull(map.put(fromSequence("tttacaca"))); // 1 mm begin - // //assertNull(map.put(fromSequence("cttagaca"))); // 2 mm begin + // //assertNull(map.put(new NucleotideSequence("attagaca"))); // 1 mm + // //assertNull(map.put(new NucleotideSequence("attacaca"))); // match + // //assertNull(map.put(new NucleotideSequence("ataacaca"))); // 1 mm + // //assertNull(map.put(new NucleotideSequence("attcgtca"))); // many mm + // //assertNull(map.put(new NucleotideSequence("atttacaca"))); // 1 insertion in stretch + // //assertNull(map.put(new NucleotideSequence("atacaca"))); // 1 deletion in the "t" stretch + // //assertNull(map.put(new NucleotideSequence("attacacga"))); // 1 insertion + // //assertNull(map.put(new NucleotideSequence("attcaca"))); // 1 deletion + // //assertNull(map.put(new NucleotideSequence("attacac"))); // 1 deletion in the end + // //assertNull(map.put(new NucleotideSequence("ttacaca"))); // 1 deletion in the beginning + // //assertNull(map.put(new NucleotideSequence("tattacaca"))); // 1 insertion in the beginning + // //assertNull(map.put(new NucleotideSequence("attacacat"))); // 1 insertion in the ent + // assertNull(map.put(new NucleotideSequence("attacagt"))); // 1 mm end + // //assertNull(map.put(new NucleotideSequence("attacaca"))); // 1 mm end + // //assertNull(map.put(new NucleotideSequence("tttacaca"))); // 1 mm begin + // //assertNull(map.put(new NucleotideSequence("cttagaca"))); // 2 mm begin // - // NucleotideSequence reference = fromSequence("attacaga"); + // NucleotideSequence reference = new NucleotideSequence("attacaga"); // // SequenceTreeMap.NeighborhoodIterator ni = map.getNeighborhoodIterator(reference, 2, 1, 1, 2); // diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/tree/TSSequenceTreeMapTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/tree/TSSequenceTreeMapTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/tree/TSSequenceTreeMapTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/tree/TSSequenceTreeMapTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -22,7 +22,6 @@ import com.milaboratory.core.sequence.nucleotide.NucleotideAlphabet; import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; import com.milaboratory.core.sequence.util.NucleotideSequenceGenerator; import com.milaboratory.util.Factory; import org.junit.Test; @@ -38,8 +37,8 @@ public class TSSequenceTreeMapTest { @Test public void simpleTest() { - NucleotideSequence sequence1 = NucleotideSequenceImpl.fromSequence("ATTAGACA"); - NucleotideSequence sequence2 = NucleotideSequenceImpl.fromSequence("ATTATACA"); + NucleotideSequence sequence1 = new NucleotideSequence("ATTAGACA"); + NucleotideSequence sequence2 = new NucleotideSequence("ATTATACA"); Integer i1 = 1; Integer i2 = 2; TSSequenceTreeMap map = new TSSequenceTreeMap<>(NucleotideAlphabet.INSTANCE); @@ -51,8 +50,8 @@ @Test public void simpleIterationTest() { - NucleotideSequence sequence1 = NucleotideSequenceImpl.fromSequence("ATTAGACA"); - NucleotideSequence sequence2 = NucleotideSequenceImpl.fromSequence("ATTATACA"); + NucleotideSequence sequence1 = new NucleotideSequence("ATTAGACA"); + NucleotideSequence sequence2 = new NucleotideSequence("ATTATACA"); Integer i1 = 1; Integer i2 = 2; TSSequenceTreeMap map = new TSSequenceTreeMap<>(NucleotideAlphabet.INSTANCE); diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/util/NucleotideSequenceAggregatorTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/util/NucleotideSequenceAggregatorTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/util/NucleotideSequenceAggregatorTest.java 1970-01-01 00:00:00.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/util/NucleotideSequenceAggregatorTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -0,0 +1,16 @@ +package com.milaboratory.core.sequence.util; + +import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; +import org.junit.Test; + +public class NucleotideSequenceAggregatorTest { + @Test + public void test1() throws Exception { + NucleotideSequenceAggregator aggr = new NucleotideSequenceAggregator(8, 2); + aggr.putSequence(new NucleotideSequence("attacaca")); + aggr.putSequence(new NucleotideSequence("tattacac")); + aggr.putSequence(new NucleotideSequence("tattacaa")); + + System.out.println(aggr.getSequence(.5)); + } +} diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequence/util/SequencesUtilsTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/util/SequencesUtilsTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequence/util/SequencesUtilsTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequence/util/SequencesUtilsTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -3,7 +3,6 @@ import com.milaboratory.core.sequence.nucleotide.NucleotideSequence; import org.junit.Test; -import static com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl.fromSequence; import static com.milaboratory.core.sequence.util.SequencesUtils.cat; import static org.junit.Assert.assertEquals; @@ -12,9 +11,9 @@ public void testCat() throws Exception { String seq1s = "gttacagc", seq2s = "gctatacgatgc"; - NucleotideSequence seq1 = fromSequence(seq1s), - seq2 = fromSequence(seq2s), - catAssert = fromSequence(seq1s + seq2s); + NucleotideSequence seq1 = new NucleotideSequence(seq1s), + seq2 = new NucleotideSequence(seq2s), + catAssert = new NucleotideSequence(seq1s + seq2s); assertEquals(catAssert, cat(seq1, seq2)); assertEquals(catAssert.hashCode(), cat(seq1, seq2).hashCode()); //Just in case diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequencing/io/fastq/PFastqReaderTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequencing/io/fastq/PFastqReaderTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequencing/io/fastq/PFastqReaderTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequencing/io/fastq/PFastqReaderTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -20,8 +20,8 @@ */ package com.milaboratory.core.sequencing.io.fastq; +import com.milaboratory.core.sequence.quality.QualityFormat; import com.milaboratory.core.sequencing.io.PSequencingDataReader; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; import com.milaboratory.core.sequencing.read.PSequencingRead; import com.milaboratory.util.CompressionType; import org.junit.Assert; @@ -36,7 +36,8 @@ public void test0() throws URISyntaxException, IOException { File sampleR1 = new File(ClassLoader.getSystemResource("sample_r1.fastq").toURI()); File sampleR2 = new File(ClassLoader.getSystemResource("sample_r2.fastq").toURI()); - PSequencingDataReader reader = new PFastqReader(sampleR1, sampleR2, QualityStringFormat.Illumina18, CompressionType.None, true, true); + PSequencingDataReader reader = new PFastqReader(sampleR1, sampleR2, QualityFormat.Phred33, CompressionType.None, + new Casava18InfoProvider(), true, true); PSequencingRead read; int count = 0; while ((read = reader.take()) != null) { @@ -49,7 +50,8 @@ public void test1() throws URISyntaxException, IOException { File sampleR1 = new File(ClassLoader.getSystemResource("sample_r1.fastq").toURI()); File sampleR2 = new File(ClassLoader.getSystemResource("sample_r2.fastq").toURI()); - PSequencingDataReader reader = new PFastqReader(sampleR1, sampleR2, QualityStringFormat.Illumina18, CompressionType.None, true, false); + PSequencingDataReader reader = new PFastqReader(sampleR1, sampleR2, QualityFormat.Phred33, CompressionType.None, + new Casava18InfoProvider(), true, false); PSequencingRead read; int count = 0; while ((read = reader.take()) != null) { diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequencing/io/fastq/SFastqReaderTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequencing/io/fastq/SFastqReaderTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequencing/io/fastq/SFastqReaderTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequencing/io/fastq/SFastqReaderTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -20,7 +20,7 @@ */ package com.milaboratory.core.sequencing.io.fastq; -import com.milaboratory.core.sequencing.io.fastq.quality.QualityStringFormat; +import com.milaboratory.core.sequence.quality.QualityFormat; import com.milaboratory.core.sequencing.read.SSequencingRead; import com.milaboratory.util.CompressionType; import org.junit.Assert; @@ -36,8 +36,8 @@ public void test1() throws URISyntaxException, IOException { File sample = new File(ClassLoader.getSystemResource("sample_r1.fastq").toURI()); File sampleGz = new File(ClassLoader.getSystemResource("sample_r1.fastq.gz").toURI()); - SFastqReader reader = new SFastqReader(sample, QualityStringFormat.Illumina18, CompressionType.None, false); - SFastqReader readerGz = new SFastqReader(sampleGz, QualityStringFormat.Illumina18, CompressionType.GZIP, false); + SFastqReader reader = new SFastqReader(sample, QualityFormat.Phred33, CompressionType.None); + SFastqReader readerGz = new SFastqReader(sampleGz, QualityFormat.Phred33, CompressionType.GZIP); SSequencingRead read; while ((read = reader.take()) != null) Assert.assertEquals(read.getData().getSequence(), readerGz.take().getData().getSequence()); @@ -51,14 +51,14 @@ @Test public void testGuess1() throws Exception { String[] files = {"solexa1.fastq.gz", "solexa2.fastq.gz", "solexa3.fastq.gz", "sample_r1.fastq.gz", "sample_r2.fastq.gz"}; - QualityStringFormat formats[] = {QualityStringFormat.Illumina15, QualityStringFormat.Illumina15, QualityStringFormat.Illumina15, - QualityStringFormat.Illumina18u, QualityStringFormat.Illumina18u}; + QualityFormat formats[] = {QualityFormat.Phred64, QualityFormat.Phred64, QualityFormat.Phred64, + QualityFormat.Phred33, QualityFormat.Phred33}; int reads[] = {10, 10, 10, 10, 10}; for (int i = 0; i < files.length; ++i) { InputStream stream = ClassLoader.getSystemResource(files[i]).openStream(); - SFastqReader reader = new SFastqReader(stream, null, CompressionType.GZIP, false, true); - Assert.assertEquals(formats[i], reader.getQualityStringFormat()); + SFastqReader reader = new SFastqReader(stream, CompressionType.GZIP); + Assert.assertEquals(formats[i], reader.getQualityFormat()); int n = 0; while (reader.take() != null) ++n; diff -Nru micommons-0.9/src/test/java/com/milaboratory/core/sequencing/motif/NucleotideMotifTest.java micommons-1.0.3/src/test/java/com/milaboratory/core/sequencing/motif/NucleotideMotifTest.java --- micommons-0.9/src/test/java/com/milaboratory/core/sequencing/motif/NucleotideMotifTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/core/sequencing/motif/NucleotideMotifTest.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,56 +0,0 @@ -package com.milaboratory.core.sequencing.motif; - -import com.milaboratory.core.sequence.nucleotide.NucleotideSequenceImpl; -import org.junit.Ignore; -import org.junit.Test; - -import static org.junit.Assert.*; - -/** - * @author Bolotin Dmitriy - */ -public class NucleotideMotifTest { - @Test - public void testMatches01() { - NucleotideMotif nm = new NucleotideMotif("ATCG"); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("ATCG"), 0)); - assertFalse(nm.matches(NucleotideSequenceImpl.fromSequence("ACCG"), 0)); - } - - @Test - public void testMatches02() { - NucleotideMotif nm = new NucleotideMotif("GAGGAGACGGTGACCRKGGT"); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("GAGGAGACGGTGACCGGGGT"), 0)); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("GAGGAGACGGTGACCGTGGT"), 0)); - assertFalse(nm.matches(NucleotideSequenceImpl.fromSequence("GAGGAGACGGTGACCGCGGT"), 0)); - } - - @Test - public void testMatches03() { - NucleotideMotif nm = new NucleotideMotif("GAGGAGACGGTGACCRKGGT"); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("GACAGAGGAGACGGTGACCGGGGTAA"), 4)); - assertTrue(nm.matches(NucleotideSequenceImpl.fromSequence("CAGATGAGGAGACGGTGACCGTGGTGG"), 5)); - assertFalse(nm.matches(NucleotideSequenceImpl.fromSequence("CCAGAGGAGACGGTGACCGCGGTG"), 3)); - } - - @Test - public void testToString01() { - NucleotideMotif nm = new NucleotideMotif("GAGGAGACGGTGACCRKGGT"); - assertEquals(nm.toString(), "GAGGAGACGGTGACCRKGGT"); - } - - @Test - @Ignore - public void rcTest() { - NucleotideMotif nm = new NucleotideMotif("ATGC"); - assertTrue(nm.reverseComplement().equals(new NucleotideMotif("GCAT"))); - } - - @Test - public void testFindMatch01() { - NucleotideMotif nm = new NucleotideMotif("GAGGAGACGGTGACCRKGGT"); - assertEquals(nm.findMatch(NucleotideSequenceImpl.fromSequence("GACAGAGGAGACGGTGACCGGGGTAA")), 4); - assertEquals(nm.findMatch(NucleotideSequenceImpl.fromSequence("CAGATGAGGAGACGGTGACCGTGGTGG")), 5); - assertEquals(nm.findMatch(NucleotideSequenceImpl.fromSequence("CCAGAGGAGACGGTGACCGCGGTG")), -1); - } -} diff -Nru micommons-0.9/src/test/java/com/milaboratory/util/Bit2ArrayTest.java micommons-1.0.3/src/test/java/com/milaboratory/util/Bit2ArrayTest.java --- micommons-0.9/src/test/java/com/milaboratory/util/Bit2ArrayTest.java 2013-08-12 11:21:58.000000000 +0000 +++ micommons-1.0.3/src/test/java/com/milaboratory/util/Bit2ArrayTest.java 2013-10-09 08:03:10.000000000 +0000 @@ -30,6 +30,7 @@ import java.util.Random; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * @author dmitriybolotin @@ -46,6 +47,7 @@ values[i] = r.nextInt(4); ba.set(i, values[i]); } + assertTrue(ba.equals(ba.getRange(0, ba.size()))); //Testing for (int i = 0; i < length; ++i) assertEquals(values[i], ba.get(i)); @@ -65,6 +67,7 @@ values[i] = r.nextInt(4); ba.set(i, values[i]); } + assertTrue(ba.equals(ba.getRange(0, ba.size()))); //Testing for (int i = 0; i < length; ++i) assertEquals(values[i], ba.get(i));