diff -Nru rsem-1.3.0+dfsg/convert-sam-for-rsem rsem-1.3.1+dfsg/convert-sam-for-rsem --- rsem-1.3.0+dfsg/convert-sam-for-rsem 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/convert-sam-for-rsem 2018-06-27 18:52:38.000000000 +0000 @@ -56,11 +56,7 @@ =head1 NAME -convert-sam-for-rsem - -=head1 PURPOSE - -Make a RSEM compatible BAM file. +convert-sam-for-rsem - Make a RSEM compatible BAM file. =head1 SYNOPSIS diff -Nru rsem-1.3.0+dfsg/debian/changelog rsem-1.3.1+dfsg/debian/changelog --- rsem-1.3.0+dfsg/debian/changelog 2018-05-25 11:56:22.000000000 +0000 +++ rsem-1.3.1+dfsg/debian/changelog 2018-09-26 19:41:44.000000000 +0000 @@ -1,3 +1,12 @@ +rsem (1.3.1+dfsg-1) unstable; urgency=medium + + * New upstream version + * Standards-Version: 4.2.1 + * Fix Perl interpreter path + * Remove unneeded get-orig-source target + + -- Andreas Tille Wed, 26 Sep 2018 21:41:44 +0200 + rsem (1.3.0+dfsg-2) unstable; urgency=medium [ Katerina Kalou ] diff -Nru rsem-1.3.0+dfsg/debian/control rsem-1.3.1+dfsg/debian/control --- rsem-1.3.0+dfsg/debian/control 2018-05-25 11:56:22.000000000 +0000 +++ rsem-1.3.1+dfsg/debian/control 2018-09-26 19:41:44.000000000 +0000 @@ -15,7 +15,7 @@ # bowtie is a run-time dependency available on only a few systems, not actually # needed for building but it prevents the creation of uninstallable packages bowtie | bowtie2 -Standards-Version: 4.1.4 +Standards-Version: 4.2.1 Vcs-Browser: https://salsa.debian.org/med-team/rsem Vcs-Git: https://salsa.debian.org/med-team/rsem.git Homepage: http://deweylab.biostat.wisc.edu/rsem/ diff -Nru rsem-1.3.0+dfsg/debian/install rsem-1.3.1+dfsg/debian/install --- rsem-1.3.0+dfsg/debian/install 2018-05-25 11:56:22.000000000 +0000 +++ rsem-1.3.1+dfsg/debian/install 2018-09-26 19:41:44.000000000 +0000 @@ -1,3 +1,4 @@ # For whatever reason the following scripts are not installed by the upstream install target rsem-control-fdr usr/bin rsem-generate-ngvector usr/bin +rsem_perl_utils.pm usr/share/perl5 diff -Nru rsem-1.3.0+dfsg/debian/rules rsem-1.3.1+dfsg/debian/rules --- rsem-1.3.0+dfsg/debian/rules 2018-05-25 11:56:22.000000000 +0000 +++ rsem-1.3.1+dfsg/debian/rules 2018-09-26 19:41:44.000000000 +0000 @@ -26,9 +26,10 @@ debian/rsem-simulate-reads.1 override_dh_auto_install: - #find . -maxdepth 1 -type f -perm /u+x | xargs -n 1 -I{} dh_install {} /usr/bin/ - dh_install rsem_perl_utils.pm /usr/share/perl5 dh_auto_install -- BOOST=/usr/include prefix="/usr" -get-orig-source: - uscan --verbose --force-download --repack --compression xz +override_dh_install: + dh_install + for pl in `grep -Rl '#!/usr/bin/env[[:space:]]\+perl' debian/*/usr/*` ; do \ + sed -i '1s?^#!/usr/bin/env[[:space:]]\+perl?#!/usr/bin/perl?' $${pl} ; \ + done diff -Nru rsem-1.3.0+dfsg/Gibbs.cpp rsem-1.3.1+dfsg/Gibbs.cpp --- rsem-1.3.0+dfsg/Gibbs.cpp 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/Gibbs.cpp 2018-06-27 18:52:38.000000000 +0000 @@ -432,7 +432,7 @@ printf("- One isoform's prior per line\n"); printf("- Priors must be in the same order as in the .ti file\n"); printf("- Priors for those to-be-omitted isoforms must be included as well\n"); - printf("- Comments can be added after prior seperated by space(s)\n"); + printf("- Comments can be added after prior separated by space(s)\n"); exit(-1); } diff -Nru rsem-1.3.0+dfsg/pRSEM/File.py rsem-1.3.1+dfsg/pRSEM/File.py --- rsem-1.3.0+dfsg/pRSEM/File.py 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/pRSEM/File.py 2018-06-27 18:52:38.000000000 +0000 @@ -10,7 +10,7 @@ self.fullname = None ## file's full name, include dir, base, and all ext self.is_gz = None ## if file is gzipped self.dirname = None ## directory name - self.basename = None ## base name sans all extension seperated by dot + self.basename = None ## base name sans all extension separated by dot self.filename_sans_ext = None ## no path, no last extension sep by dot diff -Nru rsem-1.3.0+dfsg/README.md rsem-1.3.1+dfsg/README.md --- rsem-1.3.0+dfsg/README.md 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/README.md 2018-06-27 18:52:38.000000000 +0000 @@ -12,6 +12,7 @@ * [Compilation & Installation](#compilation) * [Usage](#usage) * [Build RSEM references using RefSeq, Ensembl, or GENCODE annotations](#built) + * [Build RSEM references for untypical organisms](#untypical) * [Example](#example-main) * [Simulation](#simulation) * [Generate Transcript-to-Gene-Map from Trinity Output](#gen_trinity) @@ -216,6 +217,20 @@ Similar to Ensembl annotation, if you want to use GFF3 files (not recommended), add option `--gff3-RNA-patterns transcript`. +#### Build RSEM references for untypical organisms + +For untypical organisms, such as viruses, you may only have a GFF3 file that containing only genes but not any transcripts. You need to turn on `--gff3-genes-as-transcripts` so that RSEM will make each gene as a unique transcript. + +Here is an example command: + +``` +rsem-prepare-reference --gff3 virus.gff \ + --gff3-genes-as-transcripts \ + --bowtie \ + virus.genome.fa \ + ref/virus +``` + ### II. Calculating Expression Values To calculate expression values, you should run the diff -Nru rsem-1.3.0+dfsg/rsem-calculate-expression rsem-1.3.1+dfsg/rsem-calculate-expression --- rsem-1.3.0+dfsg/rsem-calculate-expression 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/rsem-calculate-expression 2018-06-27 18:52:38.000000000 +0000 @@ -774,11 +774,7 @@ =head1 NAME -rsem-calculate-expression - -=head1 PURPOSE - -Estimate gene and isoform expression from RNA-Seq data. +rsem-calculate-expression - Estimate gene and isoform expression from RNA-Seq data. =head1 SYNOPSIS @@ -1125,7 +1121,7 @@ =back -Parameters for all the above models are learned from a training set. For detailed explainations, please see prior-enhanced RSEM's paper. (Default: 'pk') +Parameters for all the above models are learned from a training set. For detailed explanations, please see prior-enhanced RSEM's paper. (Default: 'pk') =back diff -Nru rsem-1.3.0+dfsg/rsem-control-fdr rsem-1.3.1+dfsg/rsem-control-fdr --- rsem-1.3.0+dfsg/rsem-control-fdr 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/rsem-control-fdr 2018-06-27 18:52:38.000000000 +0000 @@ -62,11 +62,7 @@ =head1 NAME -rsem-control-fdr - -=head1 PURPOSE - -Filter EBSeq output for statistical significance. +rsem-control-fdr - Filter EBSeq output for statistical significance. =head1 SYNOPSIS diff -Nru rsem-1.3.0+dfsg/rsem-generate-ngvector rsem-1.3.1+dfsg/rsem-generate-ngvector --- rsem-1.3.0+dfsg/rsem-generate-ngvector 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/rsem-generate-ngvector 2018-06-27 18:52:38.000000000 +0000 @@ -33,11 +33,7 @@ =head1 NAME -rsem-generate-ngvector - -=head1 PURPOSE - -Create Ng vector for EBSeq based only on transcript sequences. +rsem-generate-ngvector - Create Ng vector for EBSeq based only on transcript sequences. =head1 SYNOPSIS diff -Nru rsem-1.3.0+dfsg/rsem-gff3-to-gtf rsem-1.3.1+dfsg/rsem-gff3-to-gtf --- rsem-1.3.0+dfsg/rsem-gff3-to-gtf 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/rsem-gff3-to-gtf 2018-06-27 18:52:38.000000000 +0000 @@ -160,6 +160,13 @@ self.index += 1 return interval + def __next__(self): + if self.index == len(self.results): + raise StopIteration + interval = self.results[self.index] + self.index += 1 + return interval + def getTranscript(tid, feature): assert tid != None @@ -217,6 +224,7 @@ parser = HelpOnErrorParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter, description = "Convert GFF3 files to GTF files.") parser.add_argument("input_GFF3_file", help = "Input GFF3 file.") parser.add_argument("output_GTF_file", help = "Output GTF file.") +parser.add_argument("--make-genes-as-transcripts", help = "GFF3 file does not contain transcripts, make each gene as a transcript.", action = "store_true") parser.add_argument("--RNA-patterns", help = "Types of RNAs to be extracted, e.g. mRNA,rRNA", metavar = "") parser.add_argument("--extract-sequences", help = "If GFF3 file contains reference sequences, extract them to the specified file", metavar = "") args = parser.parse_args() @@ -274,11 +282,16 @@ my_assert(gid not in gid2gname, "Gene {0} appears multiple times! Last occurrence is at line {1}:\n{2}".format(gid, feature.line_no, feature.line)) gid2gname[gid] = feature.getAttribute("Name") - elif feature.feature_type == "transcript": + + if args.make_genes_as_transcripts: + feature.feature_type = feature.original_type = "transcript" + feature.attribute_dict["Parent"] = [feature.attribute_dict["ID"]] + + if feature.feature_type == "transcript": transcript = getTranscript(feature.getAttribute("ID", True), feature) transcript.setTranscript(feature) - else: - assert feature.feature_type == "exon" + + if feature.feature_type == "exon": for parent in feature.getAttribute("Parent", True): transcript = getTranscript(parent, feature) transcript.addExon(feature) diff -Nru rsem-1.3.0+dfsg/rsem_perl_utils.pm rsem-1.3.1+dfsg/rsem_perl_utils.pm --- rsem-1.3.0+dfsg/rsem_perl_utils.pm 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/rsem_perl_utils.pm 2018-06-27 18:52:38.000000000 +0000 @@ -9,7 +9,7 @@ our @EXPORT = qw(runCommand); our @EXPORT_OK = qw(runCommand collectResults showVersionInfo getSAMTOOLS hasPolyA); -my $version = "RSEM v1.2.31"; # Update version info here +my $version = "RSEM v1.3.1"; # Update version info here my $samtools = "samtools-1.3"; # If update to another version of SAMtools, need to change this # command, {err_msg} diff -Nru rsem-1.3.0+dfsg/rsem-plot-transcript-wiggles rsem-1.3.1+dfsg/rsem-plot-transcript-wiggles --- rsem-1.3.0+dfsg/rsem-plot-transcript-wiggles 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/rsem-plot-transcript-wiggles 2018-06-27 18:52:38.000000000 +0000 @@ -81,11 +81,7 @@ =head1 NAME -rsem-plot-transcript-wiggles - -=head1 PURPOSE - -Generate PDF wiggle plots from transcript or gene ids +rsem-plot-transcript-wiggles - Generate PDF wiggle plots from transcript or gene ids =head1 SYNOPSIS @@ -149,7 +145,7 @@ =item B -This is a pdf file containing all plots generated. If a list of transcript ids is provided, each page display at most 6 plots in 3 rows and 2 columns. If gene ids are provided, each page display a gene. The gene's id is showed at the top and all its transcripts' wiggle plots are showed in this page. The arrangment of plots is determined automatically. For each transcript wiggle plot, the transcript id is displayed as title. x-axis is position in the transcript and y-axis is read depth. If allele-specific expression is calculated, the basin unit becomes an allele-specific transcript and transcript ids and gene ids can be used to group allele-specific transcripts. +This is a pdf file containing all plots generated. If a list of transcript ids is provided, each page display at most 6 plots in 3 rows and 2 columns. If gene ids are provided, each page display a gene. The gene's id is showed at the top and all its transcripts' wiggle plots are showed in this page. The arrangement of plots is determined automatically. For each transcript wiggle plot, the transcript id is displayed as title. x-axis is position in the transcript and y-axis is read depth. If allele-specific expression is calculated, the basin unit becomes an allele-specific transcript and transcript ids and gene ids can be used to group allele-specific transcripts. =item B diff -Nru rsem-1.3.0+dfsg/rsem-prepare-reference rsem-1.3.1+dfsg/rsem-prepare-reference --- rsem-1.3.0+dfsg/rsem-prepare-reference 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/rsem-prepare-reference 2018-06-27 18:52:38.000000000 +0000 @@ -18,6 +18,7 @@ my $gtfF = ""; my $gff3F = ""; my $gff3_RNA_patterns = ""; +my $gff3_genes_as_transcripts = 0; my $gtf_sources = "None"; my $mappingF = ""; my $polyAChoice = 1; # 0, --polyA, add polyA tails for all isoforms; 1, default, no polyA tails; 2, --no-polyA-subset @@ -43,6 +44,7 @@ GetOptions("gtf=s" => \$gtfF, "gff3=s" => \$gff3F, "gff3-RNA-patterns=s" => \$gff3_RNA_patterns, + "gff3-genes-as-transcripts" => \$gff3_genes_as_transcripts, "trusted-sources=s" => \$gtf_sources, "transcript-to-gene-map=s" => \$mappingF, "allele-to-gene-map=s" => \$alleleMappingF, @@ -116,6 +118,9 @@ if ($gff3_RNA_patterns ne "") { $command .= " --RNA-patterns $gff3_RNA_patterns"; } + if ($gff3_genes_as_transcripts) { + $command .= " --make-genes-as-transcripts"; + } $command .= " $gff3F $gtfF"; &runCommand($command) } @@ -156,6 +161,7 @@ if ($bowtie2) { $command = $bowtie2_path."bowtie2-build -f"; + if ($star_nthreads > 1) { $command .= " --threads $star_nthreads"; } if ($quiet) { $command .= " -q"; } $command .= " $ARGV[1].idx.fa $ARGV[1]"; @@ -219,11 +225,7 @@ =head1 NAME -rsem-prepare-reference - -=head1 PURPOSE - -Prepare transcript references for RSEM and optionally build BOWTIE/BOWTIE2/STAR indices. +rsem-prepare-reference - Prepare transcript references for RSEM and optionally build BOWTIE/BOWTIE2/STAR indices. =head1 SYNOPSIS @@ -263,6 +265,10 @@ is a comma-separated list of transcript categories, e.g. "mRNA,rRNA". Only transcripts that match the will be extracted. (Default: "mRNA") +=item B<--gff3-genes-as-transcripts> + +This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes. RSEM will assume each gene as a unique transcript when it converts the GFF3 file into GTF format. + =item B<--trusted-sources> is a comma-separated list of trusted sources, e.g. "ENSEMBL,HAVANA". Only transcripts coming from these sources will be extracted. If this option is off, all sources are accepted. (Default: off) @@ -329,7 +335,7 @@ =item B<--star-path> -The path to STAR's executable. (Default: the path to STAR executable is assumed to be in user's PATH environment varaible) +The path to STAR's executable. (Default: the path to STAR executable is assumed to be in user's PATH environment variable) =item B<--star-sjdboverhang> diff -Nru rsem-1.3.0+dfsg/rsem-run-ebseq rsem-1.3.1+dfsg/rsem-run-ebseq --- rsem-1.3.0+dfsg/rsem-run-ebseq 2016-10-02 14:34:20.000000000 +0000 +++ rsem-1.3.1+dfsg/rsem-run-ebseq 2018-06-27 18:52:38.000000000 +0000 @@ -38,11 +38,7 @@ =head1 NAME -rsem-run-ebseq - -=head1 PURPOSE - -Wrapper for EBSeq to perform differential expression analysis. +rsem-run-ebseq - Wrapper for EBSeq to perform differential expression analysis. =head1 SYNOPSIS