diff -Nru gffread-0.11.7/debian/changelog gffread-0.11.8/debian/changelog --- gffread-0.11.7/debian/changelog 2020-02-14 13:22:24.000000000 +0000 +++ gffread-0.11.8/debian/changelog 2020-04-03 09:43:09.000000000 +0000 @@ -1,3 +1,14 @@ +gffread (0.11.8-1) unstable; urgency=medium + + * Adjust the autopkgtests to exclude the spades dependency on !amd64 + * New upstream version + * Add salsa-ci file (routine-update) + * Rules-Requires-Root: no (routine-update) + * Set upstream metadata fields: Bug-Database, Bug-Submit, Repository, + Repository-Browse. + + -- Michael R. Crusoe Fri, 03 Apr 2020 11:43:09 +0200 + gffread (0.11.7-2) unstable; urgency=medium [ Steffen Moeller ] diff -Nru gffread-0.11.7/debian/control gffread-0.11.8/debian/control --- gffread-0.11.7/debian/control 2020-02-14 13:22:24.000000000 +0000 +++ gffread-0.11.8/debian/control 2020-04-03 09:14:01.000000000 +0000 @@ -5,11 +5,12 @@ Section: science Priority: optional Build-Depends: debhelper-compat (= 12), - libgclib-dev (>= 0.11.4) + libgclib-dev Standards-Version: 4.5.0 Vcs-Browser: https://salsa.debian.org/med-team/gffread Vcs-Git: https://salsa.debian.org/med-team/gffread.git Homepage: https://ccb.jhu.edu/software/stringtie/gff.shtml +Rules-Requires-Root: no Package: gffread Architecture: any diff -Nru gffread-0.11.7/debian/salsa-ci.yml gffread-0.11.8/debian/salsa-ci.yml --- gffread-0.11.7/debian/salsa-ci.yml 1970-01-01 00:00:00.000000000 +0000 +++ gffread-0.11.8/debian/salsa-ci.yml 2020-04-03 09:13:54.000000000 +0000 @@ -0,0 +1,4 @@ +--- +include: + - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml + - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml diff -Nru gffread-0.11.7/debian/tests/control gffread-0.11.8/debian/tests/control --- gffread-0.11.7/debian/tests/control 2019-09-25 10:52:44.000000000 +0000 +++ gffread-0.11.8/debian/tests/control 2020-03-04 10:15:07.000000000 +0000 @@ -1,2 +1,2 @@ Tests: run-tests -Depends: gffread, bash, augustus-doc, bedtools-test, cnvkit, emboss-data, emboss-test, gbrowse, genometools-common, gff2aplot, gff2ps, htslib-test, libbio-graphics-perl, optimir, proteinortho, python3-gffutils, python3-pybedtools, r-bioc-genomicfeatures, r-bioc-gviz, r-bioc-rhtslib, r-bioc-rtracklayer, seqan-apps, spades, trinityrnaseq-examples +Depends: gffread, bash, augustus-doc, bedtools-test, cnvkit, emboss-data, emboss-test, gbrowse, genometools-common, gff2aplot, gff2ps, htslib-test, libbio-graphics-perl, optimir, proteinortho, python3-gffutils, python3-pybedtools, r-bioc-genomicfeatures, r-bioc-gviz, r-bioc-rhtslib, r-bioc-rtracklayer, seqan-apps, spades [amd64], trinityrnaseq-examples, dpkg-dev diff -Nru gffread-0.11.7/debian/tests/run-tests gffread-0.11.8/debian/tests/run-tests --- gffread-0.11.7/debian/tests/run-tests 2020-02-13 16:09:42.000000000 +0000 +++ gffread-0.11.8/debian/tests/run-tests 2020-03-04 10:14:58.000000000 +0000 @@ -10,7 +10,11 @@ # generated via # apt-file search --package-only --regexp '\.gff.?$' | grep -v -E "bedops|cct|ugene|gbrowse-data" -PACKAGES="augustus-doc, bedtools-test, cnvkit, emboss-data, emboss-test, gbrowse, genometools-common, gff2aplot, gff2ps, htslib-test, libbio-graphics-perl, optimir, proteinortho, python3-gffutils, python3-pybedtools, r-bioc-genomicfeatures, r-bioc-gviz, r-bioc-rhtslib, r-bioc-rtracklayer, seqan-apps, spades, trinityrnaseq-examples" +PACKAGES="augustus-doc, bedtools-test, cnvkit, emboss-data, emboss-test, gbrowse, genometools-common, gff2aplot, gff2ps, htslib-test, libbio-graphics-perl, optimir, proteinortho, python3-gffutils, python3-pybedtools, r-bioc-genomicfeatures, r-bioc-gviz, r-bioc-rhtslib, r-bioc-rtracklayer, seqan-apps, trinityrnaseq-examples" + +if [ "$(dpkg-architecture -qDEB_BUILD_ARCH)" = "amd64" ] ; then + PACKAGES+=", spades" +fi IFS=", " read -r -a pkgs <<< ${PACKAGES} diff -Nru gffread-0.11.7/debian/upstream/metadata gffread-0.11.8/debian/upstream/metadata --- gffread-0.11.7/debian/upstream/metadata 2020-02-13 16:09:41.000000000 +0000 +++ gffread-0.11.8/debian/upstream/metadata 2020-04-03 09:14:00.000000000 +0000 @@ -1,3 +1,5 @@ +Bug-Database: https://github.com/gpertea/gffread/issues +Bug-Submit: https://github.com/gpertea/gffread/issues/new Registry: - Name: OMICtools Entry: OMICS_28050 @@ -5,4 +7,5 @@ Entry: gffread - Name: bio.tools Entry: gffread -Repository: https://github.com/gpertea/gffread +Repository: https://github.com/gpertea/gffread.git +Repository-Browse: https://github.com/gpertea/gffread diff -Nru gffread-0.11.7/gffread.cpp gffread-0.11.8/gffread.cpp --- gffread-0.11.7/gffread.cpp 2020-01-23 21:42:45.000000000 +0000 +++ gffread-0.11.8/gffread.cpp 2020-04-02 20:13:41.000000000 +0000 @@ -4,7 +4,7 @@ #define __STDC_FORMAT_MACROS #include -#define VERSION "0.11.7" +#define VERSION "0.11.8" #define USAGE "gffread v" VERSION ". Usage:\n\ gffread [-g | ][-s ] \n\ @@ -105,9 +105,8 @@ both upstream and downstream of the transcript boundaries\n\ -x write a fasta file with spliced CDS for each GFF transcript\n\ -y write a protein fasta file with the translation of CDS for each record\n\ - -W for -w and -x options, write in the FASTA defline the exon\n\ + -W for -w and -x options, write in the FASTA defline all the exon\n\ coordinates projected onto the spliced sequence;\n\ - for -y option, write transcript attributes in the FASTA defline\n\ -S for -y option, use '*' instead of '.' as stop codon translation\n\ -L Ensembl GTF to GFF3 conversion (implies -F; should be used with -m)\n\ -m is a name mapping table for converting reference \n\ @@ -128,6 +127,9 @@ pseudo-attributes (prefixed by @) are recognized:\n\ @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, \n\ @cds, @covlen, @cdslen\n\ + If any of -w/-y/-x FASTA output files are enabled, the same fields\n\ + (excluding @id) are appended to the definition line of corresponding\n\ + FASTA records\n\ -v,-E expose (warn about) duplicate transcript IDs and other potential\n\ problems with the given GFF/GTF records\n\ " @@ -161,7 +163,8 @@ enum ETableFieldType { ctfGFF_Attr=0, // attribute name as is ctfGFF_ID, //ID or @id or transcript_id - ctfGFF_geneID, //geneID or @gene_id + ctfGFF_geneID, //geneID or @gene_id or @geneid + ctfGFF_geneName, //geneName or @gene_name or @genename ctfGFF_Parent, //Parent or @parent ctfGFF_chr, //@chr ctfGFF_feature, //@feature @@ -296,6 +299,7 @@ specialFields.Add("chr", new ETableFieldType(ctfGFF_chr)); specialFields.Add("id", new ETableFieldType(ctfGFF_ID)); specialFields.Add("geneid", new ETableFieldType(ctfGFF_geneID)); + specialFields.Add("genename", new ETableFieldType(ctfGFF_geneName)); specialFields.Add("parent", new ETableFieldType(ctfGFF_Parent)); specialFields.Add("feature", new ETableFieldType(ctfGFF_feature)); specialFields.Add("start", new ETableFieldType(ctfGFF_start)); @@ -312,6 +316,7 @@ while (s.nextToken(w)) { if (w[0]=='@') { w=w.substr(1); + w.lower(); ETableFieldType* v=specialFields.Find(w.chars()); if (v!=NULL) { CTableField tcol(*v); @@ -330,6 +335,11 @@ tableCols.Add(tcol); continue; } + if (w=="geneID" || w=="gene_id") { + CTableField tcol(ctfGFF_geneID); + tableCols.Add(tcol); + continue; + } if (w=="Parent") { CTableField tcol(ctfGFF_Parent); tableCols.Add(tcol); @@ -431,6 +441,88 @@ return realadj; } +void printTableData(FILE* f, GffObj& g, bool inFasta=false) { + //using attribute list in tableCols + char* av=NULL; + for(int i=0;i0 || inFasta) { + if (!inFasta || tableCols[i].type!=ctfGFF_ID) + fprintf(f,"\t"); + } + switch(tableCols[i].type) { + case ctfGFF_Attr: + av=g.getAttr(tableCols[i].name.chars()); + fprintf(f,"%s",av!=NULL? av : "."); + break; + case ctfGFF_chr: + fprintf(f,"%s",g.getGSeqName()); + break; + case ctfGFF_ID: + if (!inFasta) + fprintf(f,"%s",g.getID()); + break; + case ctfGFF_geneID: + fprintf(f,"%s",g.getGeneID()!=NULL ? g.getGeneID() : "."); + break; + case ctfGFF_geneName: + fprintf(f,"%s",g.getGeneName()!=NULL ? g.getGeneName() : "."); + break; + case ctfGFF_Parent: + fprintf(f,"%s",g.parent!=NULL ? g.parent->getID() : "."); + break; + case ctfGFF_feature: + fprintf(f,"%s",g.getFeatureName()); + break; + case ctfGFF_start: + fprintf(f,"%d",g.start); + break; + case ctfGFF_end: + fprintf(f,"%d",g.end); + break; + case ctfGFF_strand: + fprintf(f,"%c",g.strand); + break; + case ctfGFF_numexons: + fprintf(f,"%d",g.exons.Count()); + break; + case ctfGFF_exons: + if (g.exons.Count()>0) { + for (int x=0;x0) fprintf(f,","); + fprintf(f,"%d-%d",g.exons[x]->start, g.exons[x]->end); + } + } else fprintf(f,"."); + break; + case ctfGFF_cds: + if (g.hasCDS()) { + GVec cds; + g.getCDSegs(cds); + for (int x=0;x0) fprintf(f,","); + fprintf(f,"%d-%d",cds[x].start, cds[x].end); + } + } + else fprintf(f,"."); + break; + case ctfGFF_covlen: + fprintf(f, "%d", g.covlen); + break; + case ctfGFF_cdslen: + if (g.hasCDS()) { + GVec cds; + g.getCDSegs(cds); + int clen=0; + for (int x=0;xCount();i++) { - defline.append(" "); - defline.append(gffrec.getAttrName(i)); - defline.append("="); - char* s=gffrec.getAttrValue(i); - if (s[0]=='"') defline.append(s); - else defline.appendQuoted(s, '{', true); - } - } if (aalen>0) { if (cdsaa[aalen-1]=='.' || cdsaa[aalen-1]=='\0') --aalen; //avoid printing the stop codon - printFasta(f_y, defline, cdsaa, aalen, StarStop); + fprintf(f_y, ">%s", gffrec.getID()); + if (fmtTable) printTableData(f_y, gffrec, true); + else fprintf(f_y, "\n"); + printFasta(f_y, NULL, cdsaa, aalen, StarStop); } } if (f_x!=NULL) { //CDS only - GStr defline(gffrec.getID()); + GStr defline(gffrec.getID(), 94); if (writeExonSegs) { defline.append(" loc:"); defline.append(gffrec.getGSeqName()); @@ -685,19 +768,11 @@ defline.append("-"); defline+=(int)seglst[i].end; } - } - if (gffrec.attrs!=NULL) { - //append all attributes found for each transcript - for (int i=0;iCount();i++) { - defline.append(" "); - defline.append(gffrec.getAttrName(i)); - defline.append("="); - char* s=gffrec.getAttrValue(i); - if (s[0]=='"') defline.append(s); - else defline.appendQuoted(s, '{', true); - } - } - printFasta(f_x, defline, cdsnt, seqlen); + } + fprintf(f_x, ">%s", defline.chars()); + if (fmtTable) printTableData(f_x, gffrec, true); + else fprintf(f_x, "\n"); + printFasta(f_x, NULL, cdsnt, seqlen); } GFREE(cdsnt); GFREE(cdsaa); @@ -758,18 +833,10 @@ } } - if (gffrec.attrs!=NULL) { - //append all attributes found for each transcripts - for (int i=0;iCount();i++) { - defline.append(" "); - defline.append(gffrec.getAttrName(i)); - defline.append("="); - char* s=gffrec.getAttrValue(i); - if (s[0]=='"') defline.append(s); - else defline.appendQuoted(s, '{', true); - } - } - printFasta(f_w, defline, exont, seqlen); + fprintf(f_w, ">%s", defline.chars()); + if (fmtTable) printTableData(f_w, gffrec, true); + else fprintf(f_w, "\n"); + printFasta(f_w, NULL, exont, seqlen); GFREE(exont); } } //writing f_w (spliced exons) @@ -941,84 +1008,6 @@ t.printGxf(f, exonPrinting, tracklabel, NULL, decodeChars); } - -void printGxfTab(FILE* f, GffObj& g) { - //using attribute list in tableCols - char* av=NULL; - for(int i=0;i0) fprintf(f,"\t"); - switch(tableCols[i].type) { - case ctfGFF_Attr: - av=g.getAttr(tableCols[i].name.chars()); - if (av!=NULL) fprintf(f,"%s",av); - else fprintf(f, "."); - break; - case ctfGFF_chr: - fprintf(f,"%s",g.getGSeqName()); - break; - case ctfGFF_ID: - fprintf(f,"%s",g.getID()); - break; - case ctfGFF_geneID: - fprintf(f,"%s",g.getGeneID()); - break; - case ctfGFF_Parent: - if (g.parent!=NULL) fprintf(f,"%s",g.parent->getID()); - else fprintf(f, "."); - break; - case ctfGFF_feature: - fprintf(f,"%s",g.getFeatureName()); - break; - case ctfGFF_start: - fprintf(f,"%d",g.start); - break; - case ctfGFF_end: - fprintf(f,"%d",g.end); - break; - case ctfGFF_strand: - fprintf(f,"%c",g.strand); - break; - case ctfGFF_numexons: - fprintf(f,"%d",g.exons.Count()); - break; - case ctfGFF_exons: - if (g.exons.Count()>0) { - for (int x=0;x0) fprintf(f,","); - fprintf(f,"%d-%d",g.exons[x]->start, g.exons[x]->end); - } - } else fprintf(f,"."); - break; - case ctfGFF_cds: - if (g.hasCDS()) { - GVec cds; - g.getCDSegs(cds); - for (int x=0;x0) fprintf(f,","); - fprintf(f,"%d-%d",cds[x].start, cds[x].end); - } - } - else fprintf(f,"."); - break; - case ctfGFF_covlen: - fprintf(f, "%d", g.covlen); - break; - case ctfGFF_cdslen: - if (g.hasCDS()) { - GVec cds; - g.getCDSegs(cds); - int clen=0; - for (int x=0;xgeneinfo->finalize(); //t.parent->addAttr("locus", locname.chars()); //(*out_counter)++; ? - printGxfTab(f, *t.parent); + printTableData(f, *t.parent); T_NO_PRINT(t.parent->udata); } - printGxfTab(f, *gfo); + printTableData(f, *gfo); } int main(int argc, char* argv[]) { @@ -1423,7 +1412,7 @@ if (firstGSeqHeader) { printGSeqHeader(f_out, gdata); firstGSeqHeader=false; } gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } - else printGxfTab(f_out, gfst); + else printTableData(f_out, gfst); } ++gfs_i; } @@ -1448,13 +1437,13 @@ if (pdata && pdata->geneinfo!=NULL) pdata->geneinfo->finalize(); if (fmtTable) - printGxfTab(f_out, *(t.parent)); + printTableData(f_out, *(t.parent)); else t.parent->printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); T_NO_PRINT(t.parent->udata); } if (fmtTable) - printGxfTab(f_out, t); + printTableData(f_out, t); else t.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } @@ -1472,7 +1461,7 @@ if (firstGSeqHeader) { printGSeqHeader(f_out, gdata); firstGSeqHeader=false; } gfst.printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars); } else - printGxfTab(f_out, gfst); + printTableData(f_out, gfst); } ++gfs_i; } diff -Nru gffread-0.11.7/gff_utils.cpp gffread-0.11.8/gff_utils.cpp --- gffread-0.11.7/gff_utils.cpp 2020-01-23 21:42:45.000000000 +0000 +++ gffread-0.11.8/gff_utils.cpp 2020-04-02 20:13:41.000000000 +0000 @@ -2,38 +2,12 @@ bool verbose=false; //same with GffReader::showWarnings and GffLoader::beVserbose -//bool debugState=false; -/* -void printTabFormat(FILE* f, GffObj* t) { - static char dbuf[1024]; - fprintf(f, "%s\t%s\t%c\t%d\t%d\t%d\t", t->getID(), t->getGSeqName(), t->strand, t->start, t->end, t->exons.Count()); - t->printExonList(f); - if (t->hasCDS()) fprintf(f, "\t%d:%d", t->CDstart, t->CDend); - else fprintf(f, "\t."); - - if (t->getGeneID()!=NULL) - fprintf(f, "\tgeneID=%s",t->getGeneID()); - if (t->getGeneName()!=NULL) { - GffObj::decodeHexChars(dbuf, t->getGeneName()); - fprintf(f, "\tgene_name=%s", dbuf); - } - if (t->attrs!=NULL) { - for (int i=0;iattrs->Count();i++) { - const char* attrname=t->getAttrName(i); - GffObj::decodeHexChars(dbuf, t->attrs->Get(i)->attr_val); - fprintf(f,"\t%s=%s", attrname, dbuf); - } - } - fprintf(f, "\n"); -} -*/ - -void printFasta(FILE* f, GStr& defline, char* seq, int seqlen, bool useStar) { +void printFasta(FILE* f, GStr* defline, char* seq, int seqlen, bool useStar) { if (seq==NULL) return; int len=(seqlen>0)?seqlen:strlen(seq); if (len<=0) return; - if (!defline.is_empty()) - fprintf(f, ">%s\n",defline.chars()); + if (defline!=NULL) + fprintf(f, ">%s\n",defline->chars()); int ilen=0; for (int i=0; i < len; i++, ilen++) { if (ilen == 70) { diff -Nru gffread-0.11.7/gff_utils.h gffread-0.11.8/gff_utils.h --- gffread-0.11.7/gff_utils.h 2020-01-23 21:42:45.000000000 +0000 +++ gffread-0.11.8/gff_utils.h 2020-04-02 20:13:41.000000000 +0000 @@ -627,7 +627,7 @@ }; -void printFasta(FILE* f, GStr& defline, char* seq, int seqlen=-1, bool useStar=false); +void printFasta(FILE* f, GStr* defline, char* seq, int seqlen=-1, bool useStar=false); //void printTabFormat(FILE* f, GffObj* t); diff -Nru gffread-0.11.7/LICENSE gffread-0.11.8/LICENSE --- gffread-0.11.7/LICENSE 2020-01-23 21:42:45.000000000 +0000 +++ gffread-0.11.8/LICENSE 2020-04-02 20:13:41.000000000 +0000 @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2001-2018 Geo Pertea +Copyright (c) 2001 Geo Pertea Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff -Nru gffread-0.11.7/README.md gffread-0.11.8/README.md --- gffread-0.11.7/README.md 2020-01-23 21:42:45.000000000 +0000 +++ gffread-0.11.8/README.md 2020-04-02 20:13:41.000000000 +0000 @@ -2,11 +2,10 @@ GFF/GTF parsing utility providing format conversions, region filtering, FASTA sequence extraction and more. - Use gffread -h to check the usage options. -Compiling this program from source requires my other code -library, [GCLib](../../../gclib). It can be done like this: +Compiling this program from source requires the [GCLib](../../../gclib) code +library. Building the program can be done like this: ``` cd /some/build/dir @@ -15,4 +14,5 @@ cd gffread make release ``` + This should build the **gffread** binary in the current directory.