diff -Nru stringtie-2.1.1+ds/debian/changelog stringtie-2.1.2+ds/debian/changelog
--- stringtie-2.1.1+ds/debian/changelog	2020-02-09 06:51:25.000000000 +0000
+++ stringtie-2.1.2+ds/debian/changelog	2020-04-27 13:44:50.000000000 +0000
@@ -1,3 +1,12 @@
+stringtie (2.1.2+ds-1) unstable; urgency=medium
+
+  * New upstream version
+  * Add salsa-ci file (routine-update)
+  * Rules-Requires-Root: no (routine-update)
+  * Removed "debian uupdate" from d/watch
+
+ -- Steffen Moeller <moeller@debian.org>  Mon, 27 Apr 2020 15:44:50 +0200
+
 stringtie (2.1.1+ds-2) unstable; urgency=medium
 
   * Team upload
diff -Nru stringtie-2.1.1+ds/debian/control stringtie-2.1.2+ds/debian/control
--- stringtie-2.1.1+ds/debian/control	2020-02-09 06:51:25.000000000 +0000
+++ stringtie-2.1.2+ds/debian/control	2020-04-27 13:44:50.000000000 +0000
@@ -11,6 +11,7 @@
 Vcs-Browser: https://salsa.debian.org/med-team/stringtie
 Vcs-Git: https://salsa.debian.org/med-team/stringtie.git
 Homepage: http://ccb.jhu.edu/software/stringtie/
+Rules-Requires-Root: no
 
 Package: stringtie
 Architecture: any
diff -Nru stringtie-2.1.1+ds/debian/salsa-ci.yml stringtie-2.1.2+ds/debian/salsa-ci.yml
--- stringtie-2.1.1+ds/debian/salsa-ci.yml	1970-01-01 00:00:00.000000000 +0000
+++ stringtie-2.1.2+ds/debian/salsa-ci.yml	2020-04-27 13:44:47.000000000 +0000
@@ -0,0 +1,4 @@
+---
+include:
+  - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml
+  - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml
diff -Nru stringtie-2.1.1+ds/debian/watch stringtie-2.1.2+ds/debian/watch
--- stringtie-2.1.1+ds/debian/watch	2020-02-09 06:51:25.000000000 +0000
+++ stringtie-2.1.2+ds/debian/watch	2020-04-27 13:44:20.000000000 +0000
@@ -1,3 +1,3 @@
 version=4
 opts="dversionmangle=auto,oversionmangle=s/$/+ds/,repack" \
-http://ccb.jhu.edu/software/stringtie/ dl/stringtie-([\d\.]+)\.tar\.gz debian uupdate
+http://ccb.jhu.edu/software/stringtie/ dl/stringtie-([\d\.]+)\.tar\.gz
diff -Nru stringtie-2.1.1+ds/gclib/GArgs.cpp stringtie-2.1.2+ds/gclib/GArgs.cpp
--- stringtie-2.1.1+ds/gclib/GArgs.cpp	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GArgs.cpp	2020-04-21 20:54:10.000000000 +0000
@@ -172,6 +172,9 @@
                    //value is the next argument
                    if (p+1<_argc && _argv[p+1][0]!=0) {
                       p++;
+					#if defined(__APPLE__) && defined(DEBUG)
+                      dbg_dequote(_argv[p]);
+   	   	   	   	   	#endif
                       args[count].value=Gstrdup(_argv[p]);
                       }
                     else {
diff -Nru stringtie-2.1.1+ds/gclib/GBam.cpp stringtie-2.1.2+ds/gclib/GBam.cpp
--- stringtie-2.1.1+ds/gclib/GBam.cpp	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GBam.cpp	2020-04-21 20:54:10.000000000 +0000
@@ -374,6 +374,16 @@
    return 0;
   }
 
+ char GBamRecord::tag_char1(const char tag[2]) { //just the first char from Z type tags
+	uint8_t* s=bam_aux_get(this->b, tag);
+	if (s==NULL) return 0;
+ 	int type;
+ 	type = *s++;
+ 	if (s == 0) return 0;
+ 	if (type == 'A' || type == 'Z') return *(char*)s;
+ 	else return 0;
+ }
+
  int GBamRecord::tag_int(const char tag[2]) { //get the numeric value of tag
    uint8_t *s=find_tag(tag);
    if (s) return ( bam_aux2i(s) );
@@ -393,10 +403,10 @@
    }
 
  char GBamRecord::spliceStrand() { // '+', '-' from the XS tag, or 0 if no XS tag
-   char c=tag_char("XS");
+   char c=tag_char1("XS");
    if (c==0) {
     //try minimap2's "ts" tag
-    char m=tag_char("ts");
+    char m=tag_char1("ts");
     if (m=='+' || m=='-') {
        if ((this->b->core.flag & BAM_FREVERSE) != 0) c=((m=='+') ? '-' : '+');
          else c=m;
diff -Nru stringtie-2.1.1+ds/gclib/GBam.h stringtie-2.1.2+ds/gclib/GBam.h
--- stringtie-2.1.1+ds/gclib/GBam.h	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GBam.h	2020-04-21 20:54:10.000000000 +0000
@@ -194,6 +194,7 @@
  int tag_int(const char tag[2]); //return numeric value of tag (for numeric types)
  float tag_float(const char tag[2]); //return float value of tag (for float types)
  char tag_char(const char tag[2]); //return char value of tag (for type 'A')
+ char tag_char1(const char tag[2]); //return char value of tag (for type 'A')
  char spliceStrand(); // '+', '-' from the XS tag, or '.' if no XS tag
 
  char* sequence(); //user should free after use
diff -Nru stringtie-2.1.1+ds/gclib/GBase.cpp stringtie-2.1.2+ds/gclib/GBase.cpp
--- stringtie-2.1.1+ds/gclib/GBase.cpp	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GBase.cpp	2020-04-21 20:54:10.000000000 +0000
@@ -637,11 +637,56 @@
     g=h&0xF0000000;
     if(g) h^=g>>24;
     h&=0x0fffffff;
-    }
+  }
   GASSERT(h<=0x0fffffff);
   return h;
   }
 
+
+inline uint32_t fmix32 ( uint32_t h ) {
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+  return h;
+}
+
+int murmur3(const char *key) {
+  int len=strlen(key);
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = 5381; //seed;
+
+  const uint32_t c1 = 0xcc9e2d51;
+  const uint32_t c2 = 0x1b873593;
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+  for(int i = -nblocks; i; i++) {
+    uint32_t k1 = blocks[i];
+    k1 *= c1;
+    k1 = (k1 << 15) | (k1 >> (32 - 15));
+    k1 *= c2;
+    h1 ^= k1;
+    h1 = (k1 << 13) | (k1 >> (32 - 13));
+    h1 = h1*5+0xe6546b64;
+  }
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+   uint32_t k1 = 0;
+   switch(len & 3)  {
+    case 3: k1 ^= tail[2] << 16;
+    case 2: k1 ^= tail[1] << 8;
+    case 1: k1 ^= tail[0];
+            k1 *= c1; k1 = (k1 << 15) | (k1 >> (32 - 15)); k1 *= c2; h1 ^= k1;
+   };
+
+    h1 ^= len;
+    return (fmix32(h1) & 0x7FFFFFFF);
+  }
+
+
 int djb_hash(const char* cp)
 {
     int h = 5381;
diff -Nru stringtie-2.1.1+ds/gclib/GBase.h stringtie-2.1.2+ds/gclib/GBase.h
--- stringtie-2.1.1+ds/gclib/GBase.h	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GBase.h	2020-04-21 20:54:10.000000000 +0000
@@ -1,6 +1,6 @@
 #ifndef G_BASE_DEFINED
 #define G_BASE_DEFINED
-#define GCLIB_VERSION "0.11.4"
+#define GCLIB_VERSION "0.11.7"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -289,6 +289,7 @@
 //alternate hash functions:
 int fnv1a_hash(const char* cp);
 int djb_hash(const char* cp);
+int murmur3(const char *key);
 
 //---- generic base GSeg : genomic segment (interval) --
 // coordinates are considered 1-based (so 0 is invalid)
diff -Nru stringtie-2.1.1+ds/gclib/GBitVec.h stringtie-2.1.2+ds/gclib/GBitVec.h
--- stringtie-2.1.1+ds/gclib/GBitVec.h	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GBitVec.h	2020-04-21 20:54:10.000000000 +0000
@@ -153,7 +153,7 @@
 
 
   void bitSizeError() {
-    GError("Error at GBitVec: unsupported BitWord size (%d)!\n", 
+    GError("Error at GBitVec: unsupported BitWord size (%d)!\n",
         sizeof(BitWord));
     }
   /// count - Returns the number of bits which are set.
@@ -277,6 +277,9 @@
   }
 
   GBitVec &set(uint Idx) {
+#ifndef NDEBUG
+	  indexCheck(Idx, Size);
+#endif
     fBits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE);
     return *this;
   }
@@ -287,6 +290,9 @@
   }
 
   GBitVec &reset(uint Idx) {
+#ifndef NDEBUG
+	  indexCheck(Idx, Size);
+#endif
     fBits[Idx / BITWORD_SIZE] &= ~(1L << (Idx % BITWORD_SIZE));
     return *this;
   }
@@ -299,6 +305,9 @@
   }
 
   GBitVec &flip(uint Idx) {
+#ifndef NDEBUG
+	  indexCheck(Idx, Size);
+#endif
     fBits[Idx / BITWORD_SIZE] ^= 1L << (Idx % BITWORD_SIZE);
     return *this;
   }
@@ -310,7 +319,7 @@
 
   inline static void indexCheck(uint vIdx, uint vSize) {
     if (vIdx >= vSize)
-      GError("Error at GBitVec: index %d out of bounds (size %d)\n", 
+      GError("Error at GBitVec: index %d out of bounds (size %d)\n",
         (int)vIdx, vSize);
    }
 
@@ -439,7 +448,7 @@
 
     //  Then set any stray high bits of the last used word.
     uint ExtraBits = Size % BITWORD_SIZE;
-    
+
     if (ExtraBits) {
       BitWord ExtraBitMask = ~0UL << ExtraBits;
       if (value)
diff -Nru stringtie-2.1.1+ds/gclib/gff.cpp stringtie-2.1.2+ds/gclib/gff.cpp
--- stringtie-2.1.1+ds/gclib/gff.cpp	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/gff.cpp	2020-04-21 20:54:10.000000000 +0000
@@ -3201,7 +3201,6 @@
 		}
 		//-- single-exon qry overlaping multi-exon ref
 		//check full pre-mRNA case (all introns retained): code 'm'
-
 		if (m.start<=r.exons[0]->end && m.end>=r.exons[jmax]->start)
 			return 'm';
 
@@ -3272,27 +3271,32 @@
 	//check intron chain overlap (match, containment, intron retention etc.)
 	int i=1; //index of exon to the right of current qry intron
 	int j=1; //index of exon to the right of current ref intron
-	bool intron_conflict=false; //used for checking for retained introns
+	bool intron_conflict=false; //overlapping introns have at least a mismatching splice site
 	//from here on we check all qry introns against ref introns
 	bool junct_match=false; //true if at least a junction match is found
-	bool ichain_match=true; //if there is intron (sub-)chain match, to be updated by any mismatch
+	bool ichain_match=false; //if there is intron (sub-)chain match, to be updated by any mismatch
 	bool intron_ovl=false; //if any intron overlap is found
 	bool intron_retention=false; //if any ref intron is covered by a qry exon
-	int imfirst=0; //index of first intron match in query (valid>0)
-	int jmfirst=0; //index of first intron match in reference (valid>0)
-	int imlast=0;  //index of first intron match in query
-	int jmlast=0;  //index of first intron match in reference
+	//intron chain (partial) match exon-index boundaries:
+	int imfirst=0; //index of exon after first intron match in query (valid>0)
+	int jmfirst=0; //index of exon after first intron match in reference (valid>0)
+	int imlast=0;  //index of exon after last intron match in query
+	int jmlast=0;  //index of  exon after last intron match in reference
+	//--keep track of the last overlapping introns in both qry and ref:
+	//int q_last_iovl=0;
+	//int r_last_iovl=0;
+
 	//check for intron matches
 	while (i<=imax && j<=jmax) {
-		uint mstart=m.exons[i-1]->end;
+		uint mstart=m.exons[i-1]->end; //qry intron start-end
 		uint mend=m.exons[i]->start;
-		uint rstart=r.exons[j-1]->end;
+		uint rstart=r.exons[j-1]->end; //ref intron start-end
 		uint rend=r.exons[j]->start;
 		if (rend<mstart) { //qry intron starts after ref intron ends
 			if (!intron_conflict && r.exons[j]->overlap(mstart+1, mend-1))
-				intron_conflict=true;
-			if (!intron_retention && rstart>=m.exons[i-1]->start)
-				intron_retention=true;
+				intron_conflict=true; //next ref exon overlaps this qry intron
+			if (!intron_retention && rstart>=m.exons[i-1]->start && rend<=m.exons[i-1]->end)
+				intron_retention=true; //this ref intron is covered by previous qry exons[i-1]
 			if (intron_ovl) ichain_match=false;
 			j++;
 			continue;
@@ -3301,64 +3305,136 @@
 			//if qry intron overlaps the exon on the left, we have an intron conflict
 			if (!intron_conflict && r.exons[j-1]->overlap(mstart+1, mend-1))
 				intron_conflict=true;
-			if (!intron_retention && rend<=m.exons[i]->end)
+			if (!intron_retention && rstart>=m.exons[i]->start && rend<=m.exons[i]->end)
 				intron_retention=true;
 			if (intron_ovl) ichain_match=false;
 			i++;
 			continue;
 		} //no intron overlap, skipping qry intron
 		intron_ovl=true;
+		//q_last_iovl=i; //keep track of the last overlapping introns in both qry and ref
+		//r_last_iovl=j;
 		//overlapping introns, test junction matching
 		bool smatch=(mstart==rstart);
 		bool ematch=(mend==rend);
 		if (smatch || ematch) junct_match=true;
 		if (smatch && ematch) {
 			//perfect match for this intron
-			if (ichain_match) { //chain matching still possible
-			  if (jmfirst==0) jmfirst=j;
-			  if (imfirst==0) imfirst=i;
-			  imlast=i;
-			  jmlast=j;
+			if (jmfirst==0) {
+				ichain_match=true;
+				jmfirst=j;
+				imfirst=i;
+			}
+			if (ichain_match) {
+  		       imlast=i;
+			   jmlast=j;
 			}
 			i++; j++;
 			continue;
 		}
-		//intron overlapping but with at least a junction mismatch
+		//intron overlapping but not fully matching
 		intron_conflict=true;
 		ichain_match=false;
 		if (mend>rend) j++; else i++;
 	} //while checking intron overlaps
-	if (ichain_match) { //intron sub-chain match
+	/*** additional checking needed for intron retention when there is no ichain_match or overlap ?
+    if (!intron_retention && r_last_iovl<jmax) {
+	   //-- check the remaining ref introns not checked yet for retention
+       int i=q_last_iovl;
+       for (int j=r_last_iovl+1;j<=jmax && i<=imax;++j) {
+   		uint rstart=r.exons[j-1]->end; //ref intron start-end
+   		uint rend=r.exons[j]->start;
+   		if (rend<m.exons[i]->start) {
+   			i++;
+   			continue;
+   		}
+   		if (rstart>m.exons[i]->end)
+   			continue;
+   		//overlap between ref intron and m.exons[i]
+   		if (rstart>=m.exons[i]->start && rend<=m.exons[i]->end) {
+   			intron_retention=true;
+   			break;
+   		}
+       }
+    }
+    ***/
+	// --- when qry intron chain is contained within ref intron chain
+	//     qry terminal exons may poke (overhang) into ref's other introns
+	int l_iovh=0;   // overhang of q left boundary beyond the end of ref intron on the left
+	int r_iovh=0;   // same type of overhang through the ref intron on the right
+	int qry_intron_poking=0;
+	// --- when ref intron chain is contained within qry intron chain,
+	//     terminal exons of ref may poke (overhang) into qry other introns
+	int l_jovh=0;   // overhang of q left boundary beyond the end of ref intron to the left
+	int r_jovh=0;   // same type of overhang through the ref intron on the right
+	int ref_intron_poking=0;
+	if (ichain_match) { //intron (sub-)chain compatible so far (but there could still be conflicts)
 		if (imfirst==1 && imlast==imax) { // qry full intron chain match
 			if (jmfirst==1 && jmlast==jmax) {//identical intron chains
 				if (strictMatch) return (r.exons[0]->start==m.exons[0]->start &&
 						              r.exons.Last()->end && m.exons.Last()->end) ? '=' : '~';
 				else return '=';
 			}
-			// -- qry intron chain is shorter than ref intron chain --
-			int l_iovh=0;   // overhang of leftmost q exon left boundary beyond the end of ref intron to the left
-			int r_iovh=0;   // same type of overhang through the ref intron on the right
-			if (jmfirst>1 && r.exons[jmfirst-1]->start>m.start)
-				l_iovh = r.exons[jmfirst-1]->start - m.start;
-			if (jmlast<jmax && m.end > r.exons[jmlast]->end)
-				r_iovh = m.end - r.exons[jmlast]->end;
-			if (l_iovh<4 && r_iovh<4) return 'c';
-		} else if ((jmfirst==1 && jmlast==jmax)) {//ref full intron chain match
-			//check if the reference i-chain is contained in qry i-chain
-			int l_jovh=0;   // overhang of leftmost q exon left boundary beyond the end of ref intron to the left
-			int r_jovh=0;   // same type of overhang through the ref intron on the right
-			if (imfirst>1 && m.exons[imfirst-1]->start>r.start)
-				l_jovh = m.exons[imfirst-1]->start - r.start;
-			if (imlast<imax && r.end > m.exons[imlast]->end)
-				r_jovh = r.end - m.exons[imlast]->end;
-			if (l_jovh<4 && r_jovh<4) return 'k'; //reverse containment
+			// -- a partial intron chain match
+			if (jmfirst>1) {
+				//find if m.start falls within any ref intron before jmfirst
+				for (int j=jmfirst-1;j>0;--j)
+					if (m.start<r.exons[j]->start) {
+						if (m.start>r.exons[j-1]->end) { //m.start within this ref intron
+							l_iovh = r.exons[j]->start - m.start;
+							break;
+						}
+						else { intron_retention=true; ichain_match=false; }
+					}
+			}
+			if (jmlast<jmax) {
+				for (int j=jmlast;j<jmax;++j)
+					if (m.end > r.exons[j]->end) {
+						if (m.end < r.exons[j+1]->start) { //m.end within this ref intron
+							r_iovh = m.end - r.exons[j]->end;
+						    break;
+						}
+						else { intron_retention=true; ichain_match=false; }
+					}
+			}
+			if (ichain_match && l_iovh<4 && r_iovh<4) return 'c';
+			qry_intron_poking=GMAX(l_iovh, r_iovh);
+		} else if ((jmfirst==1 && jmlast==jmax)) {//ref intron chain match
+			//check if the reference j-chain is contained in qry i-chain
+			//check for ref ends poking into qry introns
+			if (imfirst>1)  {
+				for (int i=imfirst-1;i>0;--i)
+					if (m.exons[i]->start>r.start) {
+						if (r.start>m.exons[i-1]->end) {
+							l_jovh = m.exons[i]->start - r.start;
+							break;
+						}
+						else { ichain_match = false; }
+					}
+			}
+			if (imlast<imax) {
+				for (int i=imlast;i<imax;++i)
+					if (r.end > m.exons[i]->end) {
+						if (r.end < m.exons[i+1]->start)
+							 { r_jovh = r.end - m.exons[i]->end; break; }
+						else { ichain_match = false; }
+					}
+			}
+			if (ichain_match && l_jovh<4 && r_jovh<4) return 'k'; //reverse containment
+			ref_intron_poking=GMAX(l_jovh, r_jovh);
 		}
 	}
 	//'=', 'c' and 'k' were checked and assigned, check for 'm' and 'n' before falling back to 'j'
-	if (!intron_conflict && (m.start<=r.exons[0]->end && m.end>=r.exons[jmax]->start)) {
-			return 'm';
+	if (intron_retention) {
+			//ref is boundary contained with qry intron chain ? that's not required for 'm'
+		    //GMessage("r_jovh=%d, r_iovh=%d, l_jovh=%d, l_iovh=%d\n", r_jovh, r_iovh, l_jovh, l_iovh);
+		    //GMessage("m.start=%d, r.exons[0]->end=%d, m.end=%d, r.exons[jmax]->start=%d\n",
+		    //		m.start, r.exons[0]->end, m.end, r.exons[jmax]->start);
+		    //if (ref_intron_poking>0 && )
+		//we just need to have no intron poking going on
+		if (!intron_conflict && ref_intron_poking<4 && qry_intron_poking<4) return 'm';
+		else return 'n';
 	}
-	if (intron_retention) return 'n';
 	if (junct_match) return 'j';
 	//we could have 'o' or 'y' here
 	//any real exon overlaps?
diff -Nru stringtie-2.1.1+ds/gclib/GHash.hh stringtie-2.1.2+ds/gclib/GHash.hh
--- stringtie-2.1.1+ds/gclib/GHash.hh	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GHash.hh	2020-04-21 20:54:10.000000000 +0000
@@ -1,139 +1,129 @@
 /********************************************************************************
-*                  Hash table class template (char* based)                               *
-*********************************************************************************/
+ *                  Hash table class template (char* based)                               *
+ *********************************************************************************/
 
 #ifndef GHash_HH
 #define GHash_HH
 #include "GBase.h"
 
 /**
-* This class maintains a fast-access hash table of entities
-* indexed by a character string (essentially, maps strings to pointers)
-*/
+ * This class maintains a fast-access hash table of entities
+ * indexed by a character string (essentially, maps strings to pointers)
+ */
 
 //#define HASH_DBG_PRINT 1
 
 #define GSTR_HASH(s) strhash(s)
 //#define GSTR_HASH(s) djb_hash(s)
 //#define GSTR_HASH(s) fnv1a_hash(s)
+//#define GSTR_HASH(s) murmur3(s)
 
 template <class OBJ> class GHash {
- protected:
+protected:
 	struct GHashEntry {
-	     char*   key;              // Key string
-	     bool    keyalloc;         //shared key flag (to not free the key chars)
-	     int     hash;             // Hash value of key
-	     pointer data;              // Data
-	     bool    mark;             // Entry is marked
-	     };
-  GHashEntry* hash;         // Hash
-  int         fCapacity;     // table size
-  int         fCount;        // number of valid entries
-  int  fCurrentEntry;
-  char* lastkeyptr; //pointer to last key string added
-    //---------- Raw data retrieval (including empty entries
-  // Return key at position pos.
-  const char* Key(uint pos) const { return hash[pos].key; }
-  // return data OBJ* at given position
-  OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
-  // Return mark flag of entry at position pos.
-  bool Mark(uint pos) const { return hash[pos].mark; }
-  // Return position of first filled slot, or >= fCapacity
-  int First() const;
-  // Return position of last filled slot or -1
-  int Last() const;
-  // Return position of next filled slot in hash table
-  // or a value greater than or equal to fCapacity if no filled
-  // slot was found
-  int Next(int pos) const;
-  //Return position of previous filled slot in hash table
-  //or a -1 if no filled slot was found
-  int Prev(int pos) const;
+		char*   key;              // Key string
+		bool    keyalloc;         // shared key flag (to free/not the key)
+		int     hash;             // Hash value of key
+		pointer data;             // Data
+	};
+	GHashEntry* hash;          // Hash
+	int         fCapacity;     // table size
+	int         fCount;        // number of valid entries
+	int  fCurrentEntry;
+	char* lastkeyptr; //pointer to last key string added
+	//---------- Raw data retrieval (including empty entries)
+	// Return key at position pos.
+	const char* Key(uint pos) const { return hash[pos].key; }
+	// return data OBJ* at given position
+	OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
+	// Return position of first filled slot, or >= fCapacity
+	int First() const;
+	// Return position of last filled slot or -1
+	int Last() const;
+	// Return position of next filled slot in hash table
+	// or a value greater than or equal to fCapacity if no filled
+	// slot was found
+	int Next(int pos) const;
+	//Return position of previous filled slot in hash table
+	//or a -1 if no filled slot was found
+	int Prev(int pos) const;
 
 private:
-  GHash(const GHash&);
-  GHash &operator=(const GHash&);
-  GFreeProc* fFreeProc; //procedure to free item data
+	GHash(const GHash&);
+	GHash &operator=(const GHash&);
+	GFreeProc* fFreeProc; //procedure to free item data
 protected:
 public:
-  static void DefaultFreeProc(pointer item) {
-      delete (OBJ*)item;
-  }
+	static void DefaultFreeProc(pointer item) {
+		delete (OBJ*)item;
+	}
 public:
-  GHash(GFreeProc* freeProc); // constructs of an empty hash
-  GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
-  void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
-  void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
-  int Capacity() const { return fCapacity; } // table's size, including the empty slots.
-  void Resize(int m);  // Resize the table to the given size.
-  int Count() const { return fCount; }// the total number of entries in the table.
-
-  // Insert a new entry into the table given key and mark.
-  // If there is already an entry with that key, leave it unchanged
-  OBJ* Add(const char* ky, OBJ* ptr=NULL, bool mrk=false);
-
-  //same with Add, but frees the old element if it's a replacement
-  OBJ* fAdd(const char* ky, OBJ* ptr=NULL);
-
-  //same as Add, but the key pointer is stored directly, no string duplicate
-  //is made (shared-key-Add)
-  OBJ* shkAdd(const char* ky, OBJ* ptr, bool mrk=false);
-
-  // Replace data at key, if the entry's mark is less than
-  // or equal to the given mark.  If there was no existing entry,
-  // a new entry is inserted with the given mark.
-  OBJ* Replace(const char* ky, OBJ* ptr, bool mrk=false);
-  // Remove a given key and its data
-  OBJ* Remove(const char* ky);
-  // Find data OBJ* given key.
-  OBJ* Find(const char* ky, char** keyptr=NULL);
-  bool hasKey(const char* ky);
-  char* getLastKey() { return lastkeyptr; }
-  OBJ* operator[](const char* ky) { return Find(ky); }
-  void startIterate(); //iterator-like initialization
-  char* NextKey(); //returns next valid key in the table (NULL if no more)
-  OBJ* NextData(); //returns next valid hash[].data
-  OBJ* NextData(char*& nextkey); //returns next valid hash[].data
-                                //or NULL if no more
-                                //nextkey is SET to the corresponding key
-  GHashEntry* NextEntry() { //returns a pointer to a GHashEntry
-  	 int pos=fCurrentEntry;
-  	 while (pos<fCapacity && hash[pos].hash<0) pos++;
-  	 if (pos==fCapacity) {
-  	                 fCurrentEntry=fCapacity;
-  	                 return NULL;
-  	                 }
-  	              else {
-  	                 fCurrentEntry=pos+1;
-  	                 return &hash[pos];
-  	                 }
-  }
-  /// Clear all entries
-  void Clear();
-
-  /// Destructor
-  virtual ~GHash();
-  };
+	GHash(GFreeProc* freeProc); // constructs of an empty hash
+	GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
+	void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
+	void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
+	int Capacity() const { return fCapacity; } // table's size, including the empty slots.
+	void Resize(int m);  // Resize the table to the given size.
+	int Count() const { return fCount; }// the total number of entries in the table.
+
+	// Insert a new entry into the table given key.
+	// If there is already an entry with that key, leave it unchanged
+	OBJ* Add(const char* ky, OBJ* ptr=NULL);
+
+	//same with Add, but frees the old element if it's a replacement
+	OBJ* fAdd(const char* ky, OBJ* ptr=NULL);
+
+	//same as Add, but the key pointer is stored directly, no string copy needed
+	//(shared-key-Add)
+	OBJ* shkAdd(const char* ky, OBJ* ptr);
+
+	// Replace data at key. If there was no existing entry,
+	// a new entry is inserted.
+	OBJ* Replace(const char* ky, OBJ* ptr);
+	// Remove a given key and its data
+	OBJ* Remove(const char* ky);
+	// Find data OBJ* given key.
+	OBJ* Find(const char* ky, char** keyptr=NULL);
+	bool hasKey(const char* ky);
+	char* getLastKey() { return lastkeyptr; }
+	OBJ* operator[](const char* ky) { return Find(ky); }
+	void startIterate(); //iterator-like initialization
+	char* NextKey(); //returns next valid key in the table (NULL if no more)
+	OBJ* NextData(); //returns next valid hash[].data
+	OBJ* NextData(char*& nextkey); //returns next valid hash[].data
+	//or NULL if no more
+	//nextkey is SET to the corresponding key
+	GHashEntry* NextEntry() { //returns a pointer to a GHashEntry
+		int pos=fCurrentEntry;
+		while (pos<fCapacity && hash[pos].hash<0) pos++;
+		if (pos==fCapacity) {
+			fCurrentEntry=fCapacity;
+			return NULL;
+		}
+		else {
+			fCurrentEntry=pos+1;
+			return &hash[pos];
+		}
+	}
+	/// Clear all entries
+	void Clear();
+
+	/// Destructor
+	virtual ~GHash();
+};
 //
 //======================== method definitions ========================
 //
 /*
   Notes:
-  - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
-     GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
   - Since the algorithm doubles the table size when exceeding MAX_LOAD,
     it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
-    otherwise, the algorithm might hip-hop between halving and doubling,
-    which would be quite expensive!!
-  - Not many people seem to know that hash tables don't have to be prime
-    numbers; in fact, a table size of 2**n and odd probe distance are very
-    easy to arrange, and this works just as well!
-  - We store the hash key, so that 99.999% of the time we can compare hash numbers;
-    only when hash numbers match do we need to compare keys.
-    Thus, with a good hash function, the fCount of calls to strcmp() should be
+    otherwise, the algorithm might flip between halving and doubling!
+  - We store the key hash value so that 99.999% of the time we can compare hash numbers;
+    only when hash numbers match we need to compare keys.
+  - Thus with a good hash function the fCount of calls to strcmp() should be
     roughly the same as the fCount of successful lookups.
-  - The hash table should NEVER get full, or stuff will loop forever!!
-*/
+ */
 
 // Initial table size (MUST be power of 2)
 #define DEF_HASH_SIZE      32
@@ -141,6 +131,7 @@
 #define MAX_LOAD           80
 // Minimum hash table load factor (%)
 #define MIN_LOAD           10
+
 // Probe Position [0..n-1]
 #define HASH1(x,n) (((unsigned int)(x)*13)%(n))
 // Probe Distance [1..n-1]
@@ -151,544 +142,462 @@
 /*******************************************************************************/
 // Construct empty hash
 template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
-  GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
-  fCurrentEntry=-1;
-  fFreeProc=freeProc;
-  lastkeyptr=NULL;
-  for (uint i=0; i<DEF_HASH_SIZE; i++)
-         hash[i].hash=-1; //this will be an indicator for 'empty' entries
-  fCapacity=DEF_HASH_SIZE;
-  fCount=0;
-  }
+	GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
+	fCurrentEntry=-1;
+	fFreeProc=freeProc;
+	lastkeyptr=NULL;
+	for (uint i=0; i<DEF_HASH_SIZE; i++)
+		hash[i].hash=-1; //this will be an indicator for 'empty' entries
+	fCapacity=DEF_HASH_SIZE;
+	fCount=0;
+}
 
 template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
-  GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
-  fCurrentEntry=-1;
-  lastkeyptr=NULL;
-  fFreeProc = (doFree)?&DefaultFreeProc : NULL;
-  for (uint i=0; i<DEF_HASH_SIZE; i++)
-         hash[i].hash=-1; //this will be an indicator for 'empty' entries
-  fCapacity=DEF_HASH_SIZE;
-  fCount=0;
-  }
+	GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
+	fCurrentEntry=-1;
+	lastkeyptr=NULL;
+	fFreeProc = (doFree)?&DefaultFreeProc : NULL;
+	for (uint i=0; i<DEF_HASH_SIZE; i++)
+		hash[i].hash=-1; //this will be an indicator for 'empty' entries
+	fCapacity=DEF_HASH_SIZE;
+	fCount=0;
+}
 
 
 // Resize table
-template <class OBJ> void GHash<OBJ>::Resize(int m){
-  int i,n,p,x,h;
-  GHashEntry *k;
-  GASSERT(fCount<=fCapacity);
-  if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
-  n=fCapacity;
-  while((n>>2)>m) n>>=1;            // Shrink until n/4 <= m
-  while((n>>1)<m) n<<=1;            // Grow until m <= n/2
-  GASSERT(m<=(n>>1));
-  GASSERT(DEF_HASH_SIZE<=n);
-  if(n!=fCapacity){
-    GASSERT(m<=n);
-    GMALLOC(k, sizeof(GHashEntry)*n);
-    for(i=0; i<n; i++) k[i].hash=-1;
-    for(i=0; i<fCapacity; i++){
-      h=hash[i].hash;
-      if(0<=h){
-        p=HASH1(h,n);
-        GASSERT(0<=p && p<n);
-        x=HASH2(h,n);
-        GASSERT(1<=x && x<n);
-        while(k[p].hash!=-1) p=(p+x)%n;
-        GASSERT(k[p].hash<0);
-        k[p]=hash[i];
-        }
-      }
-    GFREE(hash);
-    hash=k;
-    fCapacity=n;
-    }
-  }
+template <class OBJ> void GHash<OBJ>::Resize(int m) {
+	int i,n,p,x,h;
+	GHashEntry *k;
+	GASSERT(fCount<=fCapacity);
+	if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
+	n=fCapacity;
+	while((n>>2)>m) n>>=1;            // Shrink until n/4 <= m
+	while((n>>1)<m) n<<=1;            // Grow until m <= n/2
+	GASSERT(m<=(n>>1));
+	GASSERT(DEF_HASH_SIZE<=n);
+	if(n!=fCapacity){
+		GASSERT(m<=n);
+		GMALLOC(k, sizeof(GHashEntry)*n);
+		for(i=0; i<n; i++) k[i].hash=-1;
+		for(i=0; i<fCapacity; i++){
+			h=hash[i].hash;
+			if(h>=0){
+				p=HASH1(h,n);
+				GASSERT(0<=p && p<n);
+				x=HASH2(h,n);
+				GASSERT(1<=x && x<n);
+				while(k[p].hash!=-1) p=(p+x)%n;
+				GASSERT(k[p].hash<0);
+				k[p]=hash[i];
+			}
+		}
+		GFREE(hash);
+		hash=k;
+		fCapacity=n;
+	}
+}
 
 // add a new entry, or update it if it already exists
 
 
-template <class OBJ> OBJ* GHash<OBJ>::Add(const char* ky,
-	                      OBJ* pdata, bool mrk){
-	  int p,i,x,h,n;
-	  if(!ky) GError("GHash::insert: NULL key argument.\n");
-	  GASSERT(fCount<fCapacity);
-	  h=GSTR_HASH(ky);
-	  GASSERT(0<=h);
-	  p=HASH1(h,fCapacity);
-	  GASSERT(0<=p && p<fCapacity);
-	  x=HASH2(h,fCapacity);
-	  GASSERT(1<=x && x<fCapacity);
-	  i=-1;
-	  n=fCapacity;
-#ifdef HASH_DBG_PRINT
-	  int iterations=0;
-	  int init_p=p;
-	  int init_x=x;
-#endif
-	  while(n && hash[p].hash!=-1) {
-	    if ((i==-1)&&(hash[p].hash==-2)) i=p;
-	    if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
-	      //replace hash data for this key!
-	      lastkeyptr=hash[p].key;
-	      OBJ* r = (OBJ*) hash[p].data;
-	      hash[p].data = (void*) pdata;
-#ifdef HASH_DBG_PRINT
-	      GMessage("Add.R\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
-	    		  ky, h,init_p,init_x, iterations,  fCount, fCapacity);
-#endif
-	      //return (OBJ*)hash[p].data;
-	      return r;
-	      }
-	    p=(p+x)%fCapacity;
-	    n--;
-	    }
-	  if(i==-1) i=p;
-#ifdef HASH_DBG_PRINT
-      GMessage("Add.N\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
-    		  ky, h,init_p,init_x, iterations,  fCount, fCapacity);
-#endif
-	  GTRACE(("GHash::insert: key=\"%s\"\n",ky));
-	  //GMessage("GHash::insert: key=\"%s\"\n",ky);
-	  GASSERT(0<=i && i<fCapacity);
-	  GASSERT(hash[i].hash<0);
-	  hash[i].hash=h;
-	  hash[i].mark=mrk;
-	  hash[i].key=Gstrdup(ky);
-	  hash[i].keyalloc=true;
-	  lastkeyptr=hash[i].key;
-	  hash[i].data= (void*) pdata;
-	  fCount++;
-	  if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
-	  GASSERT(fCount<fCapacity);
-	  return pdata;
- }
+template <class OBJ> OBJ* GHash<OBJ>::Add(const char* ky, OBJ* pdata) {
+	int p,i,x,h,n;
+	if(!ky) GError("GHash::insert: NULL key argument.\n");
+	GASSERT(fCount<fCapacity);
+	h=GSTR_HASH(ky);
+	GASSERT(0<=h);
+	p=HASH1(h,fCapacity);
+	GASSERT(0<=p && p<fCapacity);
+	x=HASH2(h,fCapacity);
+	GASSERT(1<=x && x<fCapacity);
+	i=-1;
+	n=fCapacity;
+#ifdef HASH_DBG_PRINT
+	int iterations=0;
+	int init_p=p;
+	int init_x=x;
+#endif
+	while(n && hash[p].hash!=-1) {
+		if ((i==-1)&&(hash[p].hash==-2)) i=p;
+		if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
+			//replace hash data for this key!
+			lastkeyptr=hash[p].key;
+			OBJ* r = (OBJ*) hash[p].data;
+			hash[p].data = (void*) pdata;
+#ifdef HASH_DBG_PRINT
+			GMessage("Add.R\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
+					ky, h,init_p,init_x, iterations,  fCount, fCapacity);
+#endif
+			return r;
+		}
+		p=(p+x)%fCapacity;
+		n--;
+	}
+	if(i==-1) i=p;
+#ifdef HASH_DBG_PRINT
+	GMessage("Add.N\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
+			ky, h,init_p,init_x, iterations,  fCount, fCapacity);
+#endif
+	GTRACE(("GHash::insert: key=\"%s\"\n",ky));
+	//GMessage("GHash::insert: key=\"%s\"\n",ky);
+	GASSERT(0<=i && i<fCapacity);
+	GASSERT(hash[i].hash<0);
+	hash[i].hash=h;
+	hash[i].key=Gstrdup(ky);
+	hash[i].keyalloc=true;
+	lastkeyptr=hash[i].key;
+	hash[i].data= (void*) pdata;
+	fCount++;
+	if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
+	GASSERT(fCount<fCapacity);
+	return pdata;
+}
 
-/*
-  int p,i,x,h;
-  if(!ky) GError("GHash::insert: NULL key argument.\n");
-  GASSERT(fCount<fCapacity);
-  h=GSTR_HASH(ky);
-  GASSERT(0<=h);
-  p=HASH1(h,fCapacity);
-  GASSERT(0<=p && p<fCapacity);
-  x=HASH2(h,fCapacity);
-  GASSERT(1<=x && x<fCapacity);
-  if (checkReplace(ky, pdata, p, i, h, x)) {
-	  return (OBJ*)hash[p].data;
-  }
-  GTRACE(("GHash::insert: key=\"%s\"\n",ky));
-  //GMessage("GHash::insert: key=\"%s\"\n",ky);
-  GASSERT(0<=i && i<fCapacity);
-  GASSERT(hash[i].hash<0);
-  hash[i].hash=h;
-  hash[i].mark=mrk;
-  hash[i].key=Gstrdup(ky);
-  hash[i].keyalloc=true;
-  lastkeyptr=hash[i].key;
-  hash[i].data= (void*) pdata;
-  fCount++;
-  if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
-  GASSERT(fCount<fCapacity);
-  return pdata;
-  }
-*/
-template <class OBJ> OBJ* GHash<OBJ>::fAdd(const char* ky,
-                      OBJ* pdata){
-  int p,i,x,h,n;
-  if(!ky) GError("GHash::insert: NULL key argument.\n");
-  GASSERT(fCount<fCapacity);
-  h=GSTR_HASH(ky);
-  GASSERT(0<=h);
-  p=HASH1(h,fCapacity);
-  GASSERT(0<=p && p<fCapacity);
-  x=HASH2(h,fCapacity);
-  GASSERT(1<=x && x<fCapacity);
-  i=-1;
-  n=fCapacity;
-#ifdef HASH_DBG_PRINT
-  int iterations=0;
-  int init_p=p;
-  int init_x=x;
-#endif
-  while(n && hash[p].hash!=-1) {
-    if ((i==-1)&&(hash[p].hash==-2)) i=p;
-    if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
-      //replace hash data for this key!
-      lastkeyptr=hash[p].key;
-      if (FREEDATA) (*fFreeProc)(hash[p].data);
-      hash[p].data = (void*) pdata;
-#ifdef HASH_DBG_PRINT
-      GMessage("Add.R\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
-    		  ky, h,init_p,init_x, iterations,  fCount, fCapacity);
-#endif
-      return pdata;
-      }
-    p=(p+x)%fCapacity;
-#ifdef HASH_DBG_PRINT
-    ++iterations;
-#endif
-    n--;
-    }
-  if(i==-1) i=p;
-#ifdef HASH_DBG_PRINT
-  GMessage("Add.N\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
-		  ky, h,init_p,init_x, iterations,  fCount, fCapacity);
-#endif
-  GTRACE(("GHash::insert: key=\"%s\"\n",ky));
-  //GMessage("GHash::insert: key=\"%s\"\n",ky);
-  GASSERT(0<=i && i<fCapacity);
-  GASSERT(hash[i].hash<0);
-  hash[i].hash=h;
-  hash[i].mark=false;
-  hash[i].key=Gstrdup(ky);
-  hash[i].keyalloc=true;
-  lastkeyptr=hash[i].key;
-  hash[i].data= (void*) pdata;
-  fCount++;
-  if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
-  GASSERT(fCount<fCapacity);
-  return pdata;
-  }
-
-
-
-template <class OBJ> OBJ* GHash<OBJ>::shkAdd(const char* ky,
-                      OBJ* pdata,bool mrk){
-  int p,i,x,h,n;
-  if(!ky) GError("GHash::insert: NULL key argument.\n");
-  GASSERT(fCount<fCapacity);
-  h=GSTR_HASH(ky);
-  GASSERT(0<=h);
-  p=HASH1(h,fCapacity);
-  GASSERT(0<=p && p<fCapacity);
-  x=HASH2(h,fCapacity);
-  GASSERT(1<=x && x<fCapacity);
-  i=-1;
-  n=fCapacity;
-  while(n && hash[p].hash!=-1){
-    if((i==-1)&&(hash[p].hash==-2)) i=p;
-    if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
-      //replace hash data for this key!
-      lastkeyptr=hash[p].key;
-      hash[p].data = (void*) pdata;
-      return (OBJ*)hash[p].data;
-      }
-    p=(p+x)%fCapacity;
-    n--;
-    }
-  if(i==-1) i=p;
-  GTRACE(("GHash::insert: key=\"%s\"\n",ky));
-  //GMessage("GHash::insert: key=\"%s\"\n",ky);
-  GASSERT(0<=i && i<fCapacity);
-  GASSERT(hash[i].hash<0);
-  hash[i].hash=h;
-  hash[i].mark=mrk;
-  hash[i].key=(char *)ky;
-  lastkeyptr=hash[i].key;
-  hash[i].keyalloc=false;
-  hash[i].data= (void*) pdata;
-  fCount++;
-  if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
-  GASSERT(fCount<fCapacity);
-  return pdata;
-  }
+template <class OBJ> OBJ* GHash<OBJ>::fAdd(const char* ky, OBJ* pdata) {
+	int p,i,x,h,n;
+	if(!ky) GError("GHash::insert: NULL key argument.\n");
+	GASSERT(fCount<fCapacity);
+	h=GSTR_HASH(ky);
+	GASSERT(0<=h);
+	p=HASH1(h,fCapacity);
+	GASSERT(0<=p && p<fCapacity);
+	x=HASH2(h,fCapacity);
+	GASSERT(1<=x && x<fCapacity);
+	i=-1;
+	n=fCapacity;
+#ifdef HASH_DBG_PRINT
+	int iterations=0;
+	int init_p=p;
+	int init_x=x;
+#endif
+	while(n && hash[p].hash!=-1) {
+		if ((i==-1)&&(hash[p].hash==-2)) i=p;
+		if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
+			//replace hash data for this key!
+			lastkeyptr=hash[p].key;
+			if (FREEDATA) (*fFreeProc)(hash[p].data);
+			hash[p].data = (void*) pdata;
+#ifdef HASH_DBG_PRINT
+			GMessage("Add.R\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
+					ky, h,init_p,init_x, iterations,  fCount, fCapacity);
+#endif
+			return pdata;
+		}
+		p=(p+x)%fCapacity;
+#ifdef HASH_DBG_PRINT
+		++iterations;
+#endif
+		n--;
+	}
+	if(i==-1) i=p;
+#ifdef HASH_DBG_PRINT
+	GMessage("Add.N\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
+			ky, h,init_p,init_x, iterations,  fCount, fCapacity);
+#endif
+	GTRACE(("GHash::insert: key=\"%s\"\n",ky));
+	//GMessage("GHash::insert: key=\"%s\"\n",ky);
+	GASSERT(0<=i && i<fCapacity);
+	GASSERT(hash[i].hash<0);
+	hash[i].hash=h;
+	hash[i].key=Gstrdup(ky);
+	hash[i].keyalloc=true;
+	lastkeyptr=hash[i].key;
+	hash[i].data= (void*) pdata;
+	fCount++;
+	if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
+	GASSERT(fCount<fCapacity);
+	return pdata;
+}
 
+template <class OBJ> OBJ* GHash<OBJ>::shkAdd(const char* ky, OBJ* pdata) {
+	int p,i,x,h,n;
+	if(!ky) GError("GHash::insert: NULL key argument.\n");
+	GASSERT(fCount<fCapacity);
+	h=GSTR_HASH(ky);
+	GASSERT(0<=h);
+	p=HASH1(h,fCapacity);
+	GASSERT(0<=p && p<fCapacity);
+	x=HASH2(h,fCapacity);
+	GASSERT(1<=x && x<fCapacity);
+	i=-1;
+	n=fCapacity;
+	while(n && hash[p].hash!=-1){
+		if((i==-1)&&(hash[p].hash==-2)) i=p;
+		if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+			//replace hash data for this key!
+			lastkeyptr=hash[p].key;
+			hash[p].data = (void*) pdata;
+			return (OBJ*)hash[p].data;
+		}
+		p=(p+x)%fCapacity;
+		n--;
+	}
+	if(i==-1) i=p;
+	GTRACE(("GHash::insert: key=\"%s\"\n",ky));
+	//GMessage("GHash::insert: key=\"%s\"\n",ky);
+	GASSERT(0<=i && i<fCapacity);
+	GASSERT(hash[i].hash<0);
+	hash[i].hash=h;
+	hash[i].key=(char *)ky;
+	lastkeyptr=hash[i].key;
+	hash[i].keyalloc=false;
+	hash[i].data= (void*) pdata;
+	fCount++;
+	if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
+	GASSERT(fCount<fCapacity);
+	return pdata;
+}
 
 // Add or replace entry
-template <class OBJ>  OBJ* GHash<OBJ>::Replace(const char* ky, OBJ* pdata, bool mrk){
-  int p,i,x,h,n;
-  if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
-  GASSERT(fCount<fCapacity);
-  h=GSTR_HASH(ky);
-  GASSERT(0<=h);
-  p=HASH1(h,fCapacity);
-  GASSERT(0<=p && p<fCapacity);
-  x=HASH2(h,fCapacity);
-  GASSERT(1<=x && x<fCapacity);
-  i=-1;
-  n=fCapacity;
-  while(n && hash[p].hash!=-1){
-    if((i==-1)&&(hash[p].hash==-2)) i=p;
-    if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
-      if(hash[p].mark<=mrk){
-        GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
-        if (FREEDATA) (*fFreeProc)(hash[p].data);
-        hash[p].mark=mrk;
-        hash[p].data=pdata;
-        }
-      return hash[p].data;
-      }
-    p=(p+x)%fCapacity;
-    n--;
-    }
-  if(i==-1) i=p;
-  GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
-  GASSERT(0<=i && i<fCapacity);
-  GASSERT(hash[i].hash<0);
-  hash[i].hash=h;
-  hash[i].mark=mrk;
-  hash[i].key=Gstrdup(ky);
-  hash[i].data=pdata;
-  fCount++;
-  if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
-  GASSERT(fCount<fCapacity);
-  return pdata;
-  }
+template <class OBJ>  OBJ* GHash<OBJ>::Replace(const char* ky, OBJ* pdata){
+	int p,i,x,h,n;
+	if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
+	GASSERT(fCount<fCapacity);
+	h=GSTR_HASH(ky);
+	GASSERT(0<=h);
+	p=HASH1(h,fCapacity);
+	GASSERT(0<=p && p<fCapacity);
+	x=HASH2(h,fCapacity);
+	GASSERT(1<=x && x<fCapacity);
+	i=-1;
+	n=fCapacity;
+	while(n && hash[p].hash!=-1){
+		if((i==-1)&&(hash[p].hash==-2)) i=p;
+		if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+			GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
+			if (FREEDATA) (*fFreeProc)(hash[p].data);
+			hash[p].data=pdata;
+			return hash[p].data;
+		}
+		p=(p+x)%fCapacity;
+		n--;
+	}
+	if(i==-1) i=p;
+	GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
+	GASSERT(0<=i && i<fCapacity);
+	GASSERT(hash[i].hash<0);
+	hash[i].hash=h;
+	hash[i].key=Gstrdup(ky);
+	hash[i].data=pdata;
+	fCount++;
+	if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
+	GASSERT(fCount<fCapacity);
+	return pdata;
+}
 
 
 // Remove entry
 template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
-  int p,x,h,n;
-  if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
-  OBJ* removed=NULL;
-  if(0<fCount){
-    h=GSTR_HASH(ky);
-    GASSERT(0<=h);
-    p=HASH1(h,fCapacity);
-    GASSERT(0<=p && p<fCapacity);
-    x=HASH2(h,fCapacity);
-    GASSERT(1<=x && x<fCapacity);
-    GASSERT(fCount<fCapacity);
-    n=fCapacity;
-    while(n && hash[p].hash!=-1){
-      if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
-        GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
-        hash[p].hash=-2;
-        hash[p].mark=false;
-        if (hash[p].keyalloc) GFREE((hash[p].key));
-        if (FREEDATA) (*fFreeProc)(hash[p].data);
-            else removed=(OBJ*)hash[p].data;
-        hash[p].key=NULL;
-        hash[p].data=NULL;
-        fCount--;
-        if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
-        GASSERT(fCount<fCapacity);
-        return removed;
-        }
-      p=(p+x)%fCapacity;
-      n--;
-      }
-    }
-  return removed;
-  }
+	int p,x,h,n;
+	if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
+	OBJ* removed=NULL;
+	if(0<fCount){
+		h=GSTR_HASH(ky);
+		GASSERT(0<=h);
+		p=HASH1(h,fCapacity);
+		GASSERT(0<=p && p<fCapacity);
+		x=HASH2(h,fCapacity);
+		GASSERT(1<=x && x<fCapacity);
+		GASSERT(fCount<fCapacity);
+		n=fCapacity;
+		while(n && hash[p].hash!=-1){
+			if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+				GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
+				hash[p].hash=-2;
+				if (hash[p].keyalloc) GFREE((hash[p].key));
+				if (FREEDATA) (*fFreeProc)(hash[p].data);
+				else removed=(OBJ*)hash[p].data;
+				hash[p].key=NULL;
+				hash[p].data=NULL;
+				fCount--;
+				if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
+				GASSERT(fCount<fCapacity);
+				return removed;
+			}
+			p=(p+x)%fCapacity;
+			n--;
+		}
+	}
+	return removed;
+}
 
 
 // Find entry
 template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
-  int p,x,h,n;
-  if(!ky){ GError("GHash::find: NULL key argument.\n"); }
-  if(0<fCount){
-    h=GSTR_HASH(ky);
-    GASSERT(0<=h);
-    p=HASH1(h,fCapacity);
-    GASSERT(0<=p && p<fCapacity);
-    x=HASH2(h,fCapacity);
-    GASSERT(1<=x && x<fCapacity);
-    GASSERT(fCount<fCapacity);
-    n=fCapacity;
-    while(n && hash[p].hash!=-1){
-      if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
-        return true;
-        }
-      p=(p+x)%fCapacity;
-      n--;
-      }
-    }
-  return false;
+	int p,x,h,n;
+	if(!ky){ GError("GHash::find: NULL key argument.\n"); }
+	if(0<fCount){
+		h=GSTR_HASH(ky);
+		GASSERT(0<=h);
+		p=HASH1(h,fCapacity);
+		GASSERT(0<=p && p<fCapacity);
+		x=HASH2(h,fCapacity);
+		GASSERT(1<=x && x<fCapacity);
+		GASSERT(fCount<fCapacity);
+		n=fCapacity;
+		while(n && hash[p].hash!=-1){
+			if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+				return true;
+			}
+			p=(p+x)%fCapacity;
+			n--;
+		}
+	}
+	return false;
 }
 
 
 template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
-  int p,x,h,n;
-  if(!ky){ GError("GHash::find: NULL key argument.\n"); }
-  if (fCount==0) return NULL;
-  h=GSTR_HASH(ky);
-  GASSERT(0<=h);
-  p=HASH1(h,fCapacity);
-  GASSERT(0<=p && p<fCapacity);
-  x=HASH2(h,fCapacity);
-  GASSERT(1<=x && x<fCapacity);
-  GASSERT(fCount<fCapacity);
-  n=fCapacity;
-#ifdef HASH_DBG_PRINT
-  int iterations=0;
-  int init_p=p;
-  int init_x=x;
-#endif
-  while(n && hash[p].hash!=-1){
-      if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
-        if (keyptr!=NULL) *keyptr = hash[p].key;
-#ifdef HASH_DBG_PRINT
-         GMessage("Found \t%s\t%d,%d,%d\t%d\t%d\t%d\n",
-             ky, h,init_p,init_x, iterations,  fCount, fCapacity);
-#endif
-        return (OBJ*)hash[p].data;
-        }
-      p=(p+x)%fCapacity;
-      n--;
-#ifdef HASH_DBG_PRINT
-      ++iterations;
+	int p,x,h,n;
+	if(!ky){ GError("GHash::find: NULL key argument.\n"); }
+	if (fCount==0) return NULL;
+	h=GSTR_HASH(ky);
+	GASSERT(0<=h);
+	p=HASH1(h,fCapacity);
+	GASSERT(0<=p && p<fCapacity);
+	x=HASH2(h,fCapacity);
+	GASSERT(1<=x && x<fCapacity);
+	GASSERT(fCount<fCapacity);
+	n=fCapacity;
+#ifdef HASH_DBG_PRINT
+	int iterations=0;
+	int init_p=p;
+	int init_x=x;
+#endif
+	while(n && hash[p].hash!=-1){
+		if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+			if (keyptr!=NULL) *keyptr = hash[p].key;
+#ifdef HASH_DBG_PRINT
+			GMessage("Found \t%s\t%d,%d,%d\t%d\t%d\t%d\n",
+					ky, h,init_p,init_x, iterations,  fCount, fCapacity);
+#endif
+			return (OBJ*)hash[p].data;
+		}
+		p=(p+x)%fCapacity;
+		n--;
+#ifdef HASH_DBG_PRINT
+		++iterations;
 #endif
-  }
+	}
 #ifdef HASH_DBG_PRINT
-   GMessage("Nfound\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
-       ky, h,init_p,init_x, iterations,  fCount, fCapacity);
+	GMessage("Nfound\t%s\t%d,%d,%d\t%d\t%d\t%d\n",
+			ky, h,init_p,init_x, iterations,  fCount, fCapacity);
 #endif
-  return NULL;
- }
+	return NULL;
+}
 
 template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
- fCurrentEntry=0;
+	fCurrentEntry=0;
 }
 
 template <class OBJ> char* GHash<OBJ>::NextKey() {
- int pos=fCurrentEntry;
- while (pos<fCapacity && hash[pos].hash<0) pos++;
- if (pos==fCapacity) {
-                 fCurrentEntry=fCapacity;
-                 return NULL;
-                 }
-              else {
-                 fCurrentEntry=pos+1;
-                 return hash[pos].key;
-                 }
+	int pos=fCurrentEntry;
+	while (pos<fCapacity && hash[pos].hash<0) pos++;
+	if (pos==fCapacity) {
+		fCurrentEntry=fCapacity;
+		return NULL;
+	}
+	else {
+		fCurrentEntry=pos+1;
+		return hash[pos].key;
+	}
 }
 
 template <class OBJ> OBJ* GHash<OBJ>::NextData() {
- int pos=fCurrentEntry;
- while (pos<fCapacity && hash[pos].hash<0) pos++;
- if (pos==fCapacity) {
-                 fCurrentEntry=fCapacity;
-                 return NULL;
-                 }
-              else {
-                 fCurrentEntry=pos+1;
-                 return (OBJ*)hash[pos].data;
-                 }
+	int pos=fCurrentEntry;
+	while (pos<fCapacity && hash[pos].hash<0) pos++;
+	if (pos==fCapacity) {
+		fCurrentEntry=fCapacity;
+		return NULL;
+	}
+	else {
+		fCurrentEntry=pos+1;
+		return (OBJ*)hash[pos].data;
+	}
 
 }
 
 template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
- int pos=fCurrentEntry;
- while (pos<fCapacity && hash[pos].hash<0) pos++;
- if (pos==fCapacity) {
-                 fCurrentEntry=fCapacity;
-                 nextkey=NULL;
-                 return NULL;
-                 }
-              else {
-                 fCurrentEntry=pos+1;
-                 nextkey=hash[pos].key;
-                 return (OBJ*)hash[pos].data;
-                 }
+	int pos=fCurrentEntry;
+	while (pos<fCapacity && hash[pos].hash<0) pos++;
+	if (pos==fCapacity) {
+		fCurrentEntry=fCapacity;
+		nextkey=NULL;
+		return NULL;
+	}
+	else {
+		fCurrentEntry=pos+1;
+		nextkey=hash[pos].key;
+		return (OBJ*)hash[pos].data;
+	}
 
 }
 
 
 // Get first non-empty entry
 template <class OBJ> int GHash<OBJ>::First() const {
-  int pos=0;
-  while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
-  GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
-  return pos;
-  }
+	int pos=0;
+	while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
+	GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
+	return pos;
+}
 
 // Get last non-empty entry
 template <class OBJ> int GHash<OBJ>::Last() const {
-  int pos=fCapacity-1;
-  while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
-  GASSERT(pos<0 || 0<=hash[pos].hash);
-  return pos;
-  }
+	int pos=fCapacity-1;
+	while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
+	GASSERT(pos<0 || 0<=hash[pos].hash);
+	return pos;
+}
 
 
 // Find next valid entry
 template <class OBJ> int GHash<OBJ>::Next(int pos) const {
-  GASSERT(0<=pos && pos<fCapacity);
-  while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
-  GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
-  return pos;
-  }
+	GASSERT(0<=pos && pos<fCapacity);
+	while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
+	GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
+	return pos;
+}
 
 
 // Find previous valid entry
 template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
-  GASSERT(0<=pos && pos<fCapacity);
-  while(--pos >= 0){ if(0<=hash[pos].hash) break; }
-  GASSERT(pos<0 || 0<=hash[pos].hash);
-  return pos;
-  }
+	GASSERT(0<=pos && pos<fCapacity);
+	while(--pos >= 0){ if(0<=hash[pos].hash) break; }
+	GASSERT(pos<0 || 0<=hash[pos].hash);
+	return pos;
+}
 
 
 // Remove all
 template <class OBJ> void GHash<OBJ>::Clear(){
-  int i;
-  for(i=0; i<fCapacity; i++){
-    if(hash[i].hash>=0){
-      if (hash[i].keyalloc) GFREE((hash[i].key));
-      if (FREEDATA)
-            (*fFreeProc)(hash[i].data);
-      }
-    }
-  GFREE(hash);
-  GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
-  //reinitialize it
-  for (i=0; i<DEF_HASH_SIZE; i++)
-         hash[i].hash=-1; //this will be an indicator for 'empty' entries
-  fCapacity=DEF_HASH_SIZE;
-  fCount=0;
-  }
-
-
-// Save data
-/*
-void GHash::Save(Stream& store) const {
-  Object::save(store);
-  store << fCapacity;
-  store << fCount;
-  for(int i=0; i<fCapacity; i++){
-    store << hash[i].hash;
-    if(hash[i].hash>=0){
-      uint len=strlen(hash[i].key);
-      store << len;
-      store << hash[i].mark;
-      store.save(hash[i].key,len);
-      }
-    }
-  }
-
-
-// Load data
-void GHash::Load(Stream& store){
-  Object::load(store);
-  store >> fCapacity;
-  store >> fCount;
-  for(int i=0; i<fCapacity; i++){
-    store >> hash[i].hash;
-    if(hash[i].hash>=0){
-      uint len;
-      store >> len;
-      store >> hash[i].mark;
-      GMALLOC(hash[i].key,len+1);
-      store.load(hash[i].key,len);
-      hash[i].key[len]='\0';
-      }
-    }
-  }
-*/
+	int i;
+	for(i=0; i<fCapacity; i++){
+		if(hash[i].hash>=0){
+			if (hash[i].keyalloc) GFREE((hash[i].key));
+			if (FREEDATA)
+				(*fFreeProc)(hash[i].data);
+		}
+	}
+	GFREE(hash);
+	GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
+	//reinitialize it
+	for (i=0; i<DEF_HASH_SIZE; i++)
+		hash[i].hash=-1; //this will be an indicator for 'empty' entries
+	fCapacity=DEF_HASH_SIZE;
+	fCount=0;
+}
 
 // Destroy table
 template <class OBJ> GHash<OBJ>::~GHash(){
-  for(int i=0; i<fCapacity; i++){
-    if(hash[i].hash>=0){
-      if (hash[i].keyalloc) GFREE((hash[i].key));
-      if (FREEDATA) (*fFreeProc)(hash[i].data);
-      }
-    }
-  GFREE(hash);
-  }
+	for(int i=0; i<fCapacity; i++){
+		if(hash[i].hash>=0){
+			if (hash[i].keyalloc) GFREE((hash[i].key));
+			if (FREEDATA) (*fFreeProc)(hash[i].data);
+		}
+	}
+	GFREE(hash);
+}
 
 class GStrSet:public GHash<int> {
 protected:
diff -Nru stringtie-2.1.1+ds/gclib/GIntHash.hh stringtie-2.1.2+ds/gclib/GIntHash.hh
--- stringtie-2.1.1+ds/gclib/GIntHash.hh	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GIntHash.hh	2020-04-21 20:54:10.000000000 +0000
@@ -2,15 +2,15 @@
 #define _GHASHT_HH
 #include "GBase.h"
 //----------------------------------------------
-//  Hash table templates based on Jeff Preshing's code
+//  Int Hash table templates
 // ---------------------------------------------
 //  Maps 32-bit integers to user data
 //  Uses open addressing with linear probing.
 //  In the m_cells array, key = 0 is reserved to indicate an unused cell.
 //  Actual value for key 0 (if any) is stored in m_zeroCell.
 //  The hash table automatically doubles in size when it becomes 75% full.
-//  The hash table never shrinks in size, even after Clear(),
-//  unless you explicitly call Compact().
+//  The hash table never shrinks in size
+//  unless you explicitly call Clear() or Compact().
 //----------------------------------------------
 inline uint32_t upper_power_of_two(uint32_t v) {
     v--;
diff -Nru stringtie-2.1.1+ds/gclib/GList.hh stringtie-2.1.2+ds/gclib/GList.hh
--- stringtie-2.1.1+ds/gclib/GList.hh	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/gclib/GList.hh	2020-04-21 20:54:10.000000000 +0000
@@ -481,7 +481,7 @@
  return result;
 }
 
-//by default, it deletes the item if it has an equal in the list!
+//by default, it deletes item if it an equal is found in the list!
 //returns the existing equal (==) object if it's in the list already
 //or returns the item itself if it's unique (and adds it)
 template <class OBJ> OBJ* GList<OBJ>::AddIfNew(OBJ* item,
@@ -490,21 +490,20 @@
  if (Found(item, r)) {
     if (deleteIfFound && (pointer)item != (pointer)(this->fList[r])) {
        this->deallocate_item(item);
-       }
+    }
     if (fidx!=NULL) *fidx=r;
     return this->fList[r]; //found
-    }
+ }
  //not found:
  if (SORTED) {
    //Found() set result to the position where the item should be inserted:
    sortInsert(r, item);
-   }
-  else {
+ } else {
    r = this->fCount;
    if (r==this->fCapacity) GPVec<OBJ>::Grow();
    this->fList[r]=item;
    this->fCount++;
-   }
+ }
  if (fidx!=NULL) *fidx=r;
  return item;
 }
diff -Nru stringtie-2.1.1+ds/README.md stringtie-2.1.2+ds/README.md
--- stringtie-2.1.1+ds/README.md	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/README.md	2020-04-21 20:54:10.000000000 +0000
@@ -1,7 +1,9 @@
+For StringTie's manual and prepared source and binary packages, please refer to the official website: <https://ccb.jhu.edu/software/stringtie>
+
 ## Obtaining and installing StringTie
 
 Source and binary packages for this software, along with a small test data set 
-can be directly downloaded from the <a href="https://github.com/gpertea/stringtie/releases">Releases</a> page for this repository. 
+can be directly downloaded from the [Releases](https://github.com/gpertea/stringtie/releases) page for this repository. 
 StringTie is compatible with a wide range of Linux and Apple OS systems (going as far back as RedHat Enterprise Linux 5.0 and OS X 10.7). 
 The main program (StringTie) does not have any other library dependencies and in order to compile it from source it requires only 
 a C++ compiler which supports the C++ 0x standard (GCC 4.5 or newer).
diff -Nru stringtie-2.1.1+ds/rlink.cpp stringtie-2.1.2+ds/rlink.cpp
--- stringtie-2.1.1+ds/rlink.cpp	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/rlink.cpp	2020-04-21 20:54:10.000000000 +0000
@@ -46,10 +46,10 @@
 
 
 CJunction* add_junction(int start, int end, GList<CJunction>& junction, char strand) {
+
 	int oidx=-1;
 	CJunction *nj=NULL;
 
-	//CJunction* nj=junction.AddIfNew(new CJunction(start, end, strand), true, &oidx);
 	CJunction jn(start, end, strand);
 	if (junction.Found(&jn, oidx)) {
 		nj=junction.Get(oidx);
@@ -335,7 +335,12 @@
 		for (int i=0;i<brec.exons.Count();i++) {
 			readaln->len+=brec.exons[i].len();
 			if(i) {
-				CJunction* nj=add_junction(brec.exons[i-1].end, brec.exons[i].start, junction, strand); // unstranded junction might get added even though it might match a stranded one
+				if(!junction.Count()) { // always add null junction first
+					CJunction *nullj=new CJunction(0, 0, 0);
+					junction.Add(nullj);
+				}
+				//CJunction* nj=add_junction(brec.exons[i-1].end, brec.exons[i].start, junction, strand); // unstranded junction might get added even though it might match a stranded one
+				CJunction* nj=junction.AddIfNew(new CJunction(brec.exons[i-1].end, brec.exons[i].start, strand), true);
 				if (alndata.juncs.Count())
 					nj->guide_match=alndata.juncs[i-1]->guide_match;
 				if (nj) {
@@ -359,7 +364,11 @@
 		*/
 	}
 
-
+	/*fprintf(stderr,"Add read %s with strand=%d and exons:",brec.name(),strand);
+	for (int i=0;i<brec.exons.Count();i++) {
+		fprintf(stderr," %d-%d", brec.exons[i].start, brec.exons[i].end);
+	}
+	fprintf(stderr,"\n");*/
 	//fprintf(stderr,"Read %s is at n=%d with unitig_cov=%f and strand=%d\n",brec.name(),n,unitig_cov,strand);
 
 	if((int)brec.end>currentend) {
@@ -412,11 +421,8 @@
 	//if (brec.isProperlyPaired()) {  //only consider mate pairing data if mates  are properly paired
 		int pairstart=brec.mate_start();
 		if (currentstart<=pairstart) { // if pairstart is in a previous bundle I don't care about it
-
 			GStr readname(brec.name());
 			GStr id(readname); // init id with readname
-			//id+=':';id+=hi; // the HI tag is not stored in all aligners, like HISAT
-
 			if(pairstart<=readstart) { // if I've seen the pair already <- I might not have seen it yet because the pair starts at the same place
 				id+='-';id+=pairstart;
 				id+=".=";id+=hi; // (!) this suffix actually speeds up the hash by improving distribution!
@@ -755,11 +761,9 @@
 
 	if(group1->color<group2->color) {
 	   eqcol[group2->color]=group1->color;
-	   // print STDERR "in merge set eqcol[",$$group2[2],"]=",$$eqcol{$$group1[2]},"\n";
 	}
 	else if(group1->color>group2->color) {
 		eqcol[group1->color]=group2->color;
-		// print STDERR "in merge set eqcol[",$$group1[2],"]=",$$group2[2],"\n";
 		group1->color=group2->color;
 	}
 
@@ -1734,73 +1738,86 @@
 	while(nle<lend.Count() && lend[nle].predno<(int)graphnode->start) nle++;
 	while((nls<lstart.Count() && lstart[nls].predno<nodeend) || (nle<lend.Count() && lend[nle].predno<nodeend)){
 		if(nle>=lend.Count() || (nls<lstart.Count() && lstart[nls].predno<=lend[nle].predno)) { // start comes first
-			if((startcov || lstart[nls].predno>(int)(graphnode->start+longintronanchor)) &&(endcov || lstart[nls].predno<nodeend+(int)longintronanchor)) {
+			float tmpcov=0;
+			if((startcov || lstart[nls].predno>(int)(graphnode->start+longintronanchor)) &&(endcov || lstart[nls].predno<nodeend+(int)longintronanchor)) { // start and ends can not be too close to a junction
 				int startpos=lstart[nls].predno-refstart;
-				float tmpcov=(get_cov(1,startpos,startpos+CHI_THR-1,bpcov)-get_cov(2-2*s,startpos,startpos+CHI_THR-1,bpcov)-
-										get_cov(1,startpos-CHI_THR,startpos-1,bpcov)+get_cov(2-2*s,startpos-CHI_THR,startpos-1,bpcov))/(DROP*CHI_THR);
-				if(tmpcov>0) {
-					tmpcov+=trthr;
-
-					uint prevend=graphnode->end;
-					graphnode->end=lstart[nls].predno-1;
-					CGraphnode *prevnode=graphnode;
-					graphnode=create_graphnode(s,g,lstart[nls].predno,prevend,graphno,bundlenode,bundle2graph,no2gnode);
-					graphno++;
-					source->child.Add(graphnode->nodeid);  // this node is the child of source
-					graphnode->parent.Add(source->nodeid); // this node has source as parent
-					//fprintf(stderr,"trim edge 0-%d with sourceabund=%f at position=%d\n",graphnode->nodeid,sourceabundance,graphnode->start);
-					prevnode->child.Add(graphnode->nodeid); // this node is the child of previous node
+				int winstart=startpos-CHI_THR;
+				if(winstart<0) winstart=0;
+				int winend=startpos+CHI_THR-1;
+				if(winend>=bpcov->Count()) winend=bpcov->Count()-1;
+				tmpcov=(get_cov(1,startpos,winend,bpcov)-get_cov(2-2*s,startpos,winend,bpcov)-
+						get_cov(1,winstart,startpos-1,bpcov)+get_cov(2-2*s,winstart,startpos-1,bpcov))/(DROP*CHI_THR);
+			}
+			if(tmpcov<=0 && lstart[nls].cov<0) tmpcov=ERROR_PERC; // to re-estimate later in process_transfrags
+			if(tmpcov>0) {
+				tmpcov+=trthr;
+				uint prevend=graphnode->end;
+				graphnode->end=lstart[nls].predno-1;
+				CGraphnode *prevnode=graphnode;
+				graphnode=create_graphnode(s,g,lstart[nls].predno,prevend,graphno,bundlenode,bundle2graph,no2gnode);
+				graphnode->hardstart=true;
+				graphno++;
+				source->child.Add(graphnode->nodeid);  // this node is the child of source
+				graphnode->parent.Add(source->nodeid); // this node has source as parent
+				//fprintf(stderr,"trim edge 0-%d with sourceabund=%f at position=%d\n",graphnode->nodeid,sourceabundance,graphnode->start);
+				prevnode->child.Add(graphnode->nodeid); // this node is the child of previous node
 					graphnode->parent.Add(prevnode->nodeid); // this node has as parent the previous node
-					//fprintf(stderr,"trim edge %d-%d\n",prevnode->nodeid,graphnode->nodeid);
-					float tmp=graphno-1;
-					futuretr.cAdd(0.0);
-					futuretr.Add(tmp);
-					//tmp=lstart[nls].cov+trthr;
-					futuretr.Add(tmpcov);
-					//fprintf(stderr,"s=%d trim edge 0-%d with sourceabund=%f at position=%d lstart->cov=%f %f (%f %f) %f (%f %f)\n",s,graphnode->nodeid,tmpcov,
-					//		graphnode->start,lstart[nls].cov,get_cov(2*s,startpos,startpos+CHI_THR-1,bpcov),get_cov(1,startpos,startpos+CHI_THR-1,bpcov),get_cov(2-2*s,startpos,startpos+CHI_THR-1,bpcov),
-					//		get_cov(2*s,startpos-CHI_THR,startpos-1,bpcov),get_cov(1,startpos-CHI_THR,startpos-1,bpcov),get_cov(2-2*s,startpos-CHI_THR,startpos-1,bpcov));
-					tmp=prevnode->nodeid;futuretr.Add(tmp);
-					tmp=graphnode->nodeid;futuretr.Add(tmp);
-					tmp=trthr;futuretr.Add(tmp);
-					// COUNT 2 EDGES HERE
-					edgeno+=2;
-					//fprintf(stderr,"edgeno=%d\n",edgeno);
-					startcov=false;
-				}
+				//fprintf(stderr,"trim edge %d-%d\n",prevnode->nodeid,graphnode->nodeid);
+				float tmp=graphno-1;
+				futuretr.cAdd(0.0);
+				futuretr.Add(tmp);
+				//tmp=lstart[nls].cov+trthr;
+				futuretr.Add(tmpcov);
+				//fprintf(stderr,"s=%d trim edge 0-%d with sourceabund=%f at position=%d lstart->cov=%f %f (%f %f) %f (%f %f)\n",s,graphnode->nodeid,tmpcov,
+				//		graphnode->start,lstart[nls].cov,get_cov(2*s,startpos,startpos+CHI_THR-1,bpcov),get_cov(1,startpos,startpos+CHI_THR-1,bpcov),get_cov(2-2*s,startpos,startpos+CHI_THR-1,bpcov),
+				//		get_cov(2*s,startpos-CHI_THR,startpos-1,bpcov),get_cov(1,startpos-CHI_THR,startpos-1,bpcov),get_cov(2-2*s,startpos-CHI_THR,startpos-1,bpcov));
+				tmp=prevnode->nodeid;futuretr.Add(tmp);
+				tmp=graphnode->nodeid;futuretr.Add(tmp);
+				tmp=trthr;futuretr.Add(tmp);
+				// COUNT 2 EDGES HERE
+				edgeno+=2;
+				//fprintf(stderr,"edgeno=%d\n",edgeno);
+				startcov=false;
 			}
 			nls++;
 		}
 		else if(nls>=lstart.Count() || (nle<lend.Count() && lend[nle].predno<lstart[nls].predno)) { // end comes first
+			float tmpcov=0;
 			if((!startcov || lend[nle].predno>(int)(graphnode->start+longintronanchor)) &&(!endcov || lend[nle].predno<nodeend+(int)longintronanchor)) {
 				int endpos=lend[nle].predno-refstart;
-				float tmpcov=(get_cov(1,endpos-CHI_THR+1,endpos,bpcov)-get_cov(2-2*s,endpos-CHI_THR+1,endpos,bpcov)-
-						get_cov(1,endpos+1,endpos+CHI_THR,bpcov)+get_cov(2-2*s,endpos+1,endpos+CHI_THR,bpcov))/(DROP*CHI_THR);
-				if(tmpcov>0) {
-					tmpcov+=trthr;
-					float tmp=graphno-1;
-					uint prevend=graphnode->end;
-					graphnode->end=lend[nle].predno;
-					CGraphnode *prevnode=graphnode;
-					graphnode=create_graphnode(s,g,lend[nle].predno+1,prevend,graphno,bundlenode,bundle2graph,no2gnode);
-					graphno++;
-					prevnode->child.Add(graphnode->nodeid); // this node is the child of previous node
-					graphnode->parent.Add(prevnode->nodeid); // this node has as parent the previous node
-					//fprintf(stderr,"trim edge %d-%d with sinkabundance=%f at position=%d\n",prevnode->nodeid,graphnode->nodeid,sinkabundance,prevnode->end);
-					sink->parent.Add(prevnode->nodeid); // prevnode is the parent of sink
-					// remember to create transfrag as well -> I don't know the gno yet, so I can not create it here
-					futuretr.Add(tmp);
-					futuretr.cAdd(-1.0);
-					//tmp=lend[nle].cov+trthr;
-					futuretr.Add(tmpcov);
-					tmp=prevnode->nodeid;futuretr.Add(tmp);
-					tmp=graphnode->nodeid;futuretr.Add(tmp);
-					tmp=trthr;futuretr.Add(tmp);
-					// COUNT 2 EDGES HERE
-					edgeno+=2;
-					//fprintf(stderr,"edgeno=%d\n",edgeno);
-					startcov=true;
-				}
+				int winstart=endpos-CHI_THR+1;
+				if(winstart<0) winstart=0;
+				int winend=endpos+CHI_THR;
+				if(winend>=bpcov->Count()) winend=bpcov->Count()-1;
+				tmpcov=(get_cov(1,winstart,endpos,bpcov)-get_cov(2-2*s,winstart,endpos,bpcov)-
+						get_cov(1,endpos+1,winend,bpcov)+get_cov(2-2*s,endpos+1,winend,bpcov))/(DROP*CHI_THR);
+			}
+			if(tmpcov<=0 && lend[nle].cov<0) tmpcov=ERROR_PERC; // to re-estimate later in process_transfrags
+			if(tmpcov>0) {
+				tmpcov+=trthr;
+				float tmp=graphno-1;
+				uint prevend=graphnode->end;
+				graphnode->end=lend[nle].predno;
+				CGraphnode *prevnode=graphnode;
+				graphnode->hardend=true;
+				graphnode=create_graphnode(s,g,lend[nle].predno+1,prevend,graphno,bundlenode,bundle2graph,no2gnode);
+				graphno++;
+				prevnode->child.Add(graphnode->nodeid); // this node is the child of previous node
+				graphnode->parent.Add(prevnode->nodeid); // this node has as parent the previous node
+				//fprintf(stderr,"trim edge %d-%d with sinkabundance=%f at position=%d\n",prevnode->nodeid,graphnode->nodeid,sinkabundance,prevnode->end);
+				sink->parent.Add(prevnode->nodeid); // prevnode is the parent of sink
+				// remember to create transfrag as well -> I don't know the gno yet, so I can not create it here
+				futuretr.Add(tmp);
+				futuretr.cAdd(-1.0);
+				//tmp=lend[nle].cov+trthr;
+				futuretr.Add(tmpcov);
+				tmp=prevnode->nodeid;futuretr.Add(tmp);
+				tmp=graphnode->nodeid;futuretr.Add(tmp);
+				tmp=trthr;futuretr.Add(tmp);
+				// COUNT 2 EDGES HERE
+				edgeno+=2;
+				//fprintf(stderr,"edgeno=%d\n",edgeno);
+				startcov=true;
 			}
 			nle++;
 		}
@@ -2482,6 +2499,7 @@
 		int &edgeno,int &lastgpos,GArray<GEdge>& guideedge, int refend=0){
 
 	GVec<float>* bpcov = bdata->bpcov; // I might want to use a different type of data for bpcov to save memory in the case of very long bundles
+	GPVec<GPtFeature>& feature = bdata->ptfs; // these are point features (confirmed starts/stops)
 
 	CGraphnode* source=new CGraphnode(0,0,0);
 	no2gnode[s][g].Add(source);
@@ -2546,6 +2564,8 @@
 
 	//int seenjunc=0;
 
+	int f=0; // feature index
+
 	uint bundle_end=bnode[bundle->lastnodeid]->end;
 	while(bundlenode!=NULL) {
 
@@ -2562,134 +2582,190 @@
 	      nje++;
 	    }
 
-	    // see if I need to adjust the start to ignore little hanging pieces that make no sense
-	    if(!end) {
-	    	while(nje<njunctions && ejunction[nje]->strand+1!=2*s) nje++; // skip all junctions that are not on the same strand
-	    	if(nje<njunctions && ejunction[nje]->end - currentstart < junctionsupport) { // there is a junction ending soon here
-	    		float covleft=get_cov(1,currentstart-refstart,ejunction[nje]->end-1-refstart,bpcov);
-	    		float covright=get_cov(1,ejunction[nje]->end-refstart,2*ejunction[nje]->end - currentstart-1-refstart,bpcov);
-	    		if(covleft<covright*(1-ERROR_PERC)) { // adjust start here if needed
-	    			currentstart=ejunction[nje]->end;
-	    			// I have to check ending junctions here again
-	    			while(nje<njunctions && ejunction[nje]->end<=currentstart) { // read all junction ends at or before the current start -> assuming there are any (at this point, smaller junction ends should not be relevant to this bundle/currentstart
-	    				if(ejunction[nje]->end==currentstart && (ejunction[nje]->strand+1) == 2*s) { // junction ends at current start and is on the same strand and not deleted
-	    					end=1;
+    	GVec<CPred> lstart;
+    	GVec<CPred> lend;
+    	int fs=-1; // first start feature index in lstart
+    	int fe=-1; // first end feature index in lend
+	    if(longreads) {
+	    	if(feature.Count()) { // I already know the features
+	    		while(f<feature.Count() && feature[f]->coord<currentstart) f++;
+	    	}
+	    	if(endbundle-currentstart<=CHI_WIN+CHI_THR) { // I only consider the given features for this case
+	    		while(f<feature.Count() && feature[f]->coord<=endbundle) {
+	    			if(feature[f]->strand==2*s-1) {
+	    				if(feature[f]->ftype==GPFT_TSS) { // this is a start
+	    					CPred p(feature[f]->coord,-1); // start here -> a -1 indicates this is a given point feature
+	    					if(fs<0) fs=lstart.Count();
+	    					lstart.Add(p);
+	    				}
+	    				else if(feature[f]->ftype==GPFT_CPAS) { // this is an end
+	    					CPred p(feature[f]->coord,-1); // start here -> a -1 indicates this is a given point feature
+	    					if(fe<0) fe=lend.Count();
+	    					lend.Add(p);
 	    				}
-	    				nje++;
 	    			}
+	    			f++;
 	    		}
 	    	}
-	    }
-
-	    //fprintf(stderr,"create graph 1\n");
-	    CGraphnode *graphnode=create_graphnode(s,g,currentstart,endbundle,graphno,bundlenode,bundle2graph,no2gnode); // creates a $graphno graphnode  with start at bundle start, and end at bundle end
-	    graphno++;
+	    	else { // populate starts and ends for the bundlenode here if I have enough space to determine starts/ends in the bundlenode
 
-
-    	GVec<CPred> lstart;
-    	GVec<CPred> lend;
-	    if(longreads && endbundle-currentstart>CHI_WIN+CHI_THR) { // populate starts and ends for the bundlenode here if I have enough space to determine starts/ends in the bundlenode
-	    	GVec<float> diffval;
-	    	float sumstartleft=0;
-	    	float sumendleft=0;
-	    	float sumstartright=0;
-	    	float sumendright=0;
-	    	float lastcov=get_cov(2*s,longintronanchor-1,longintronanchor-1,bpcov);
-	    	for(int i=currentstart+longintronanchor;i<(int)currentstart+CHI_THR+(int)longintronanchor;i++) {
-	    		float icov=get_cov(2*s,i-refstart,i-refstart,bpcov);
-	    		float diff=icov-lastcov;
-	    		diffval.Add(diff);
-	    		if(diff>0) sumstartleft+=diff;
-	    		else sumendleft-=diff;
-	    		lastcov=icov;
-	    	}
-	    	for(int i=currentstart+CHI_THR+longintronanchor;i<(int)currentstart+CHI_WIN+(int)longintronanchor;i++) {
-	    		float icov=get_cov(2*s,i-refstart,i-refstart,bpcov);
-	    		float diff=icov-lastcov;
-	    		diffval.Add(diff);
-	    		if(diff>0) sumstartright+=diff;
-	    		else sumendright-=diff;
-	    		lastcov=icov;
-	    	}
-	    	int covend=endbundle-refstart;
-	    	if(bpcov->Count()<(int)endbundle) covend=bpcov->Count();
-	    	covend-=longintronanchor;
-	    	//fprintf(stderr,"covend=%d\n",covend);
-	    	for(int i=currentstart+CHI_WIN+longintronanchor-refstart;i<covend;i++) {
-	    		int m=(i-CHI_THR-longintronanchor)%CHI_WIN;
-	    		//fprintf(stderr,"m=%d ",i-CHI_THR+refstart);
-	    		if(diffval[m]>1/ERROR_PERC) { // starts here
-	    			if(sumstartleft<sumstartright*ERROR_PERC) { // potential start here
-	    				bool addstart=true;
-	    				int istart=refstart+i-CHI_THR;
-	    				if(lstart.Count()) {
-	    					int j=lstart.Count()-1;
-	    					if(istart-lstart[j].predno<CHI_WIN) {
-	    						addstart=false;
-		    					if(diffval[m]>lstart[j].cov) {
-		    						lstart[j].predno=istart;
-		    						lstart[j].cov=diffval[m];
-		    					}
-	    					}
-	    				}
-	    				if(addstart) {
-	    					CPred p(istart,diffval[m]);
+	    		while(f<feature.Count() && feature[f]->coord<currentstart+CHI_WIN+CHI_THR+longintronanchor) { // see if there are any features before the start
+	    			if(feature[f]->strand==2*s-1) {
+	    				if(feature[f]->ftype==GPFT_TSS) { // this is a start
+	    					CPred p(feature[f]->coord,-1); // start here -> a -1 indicates this is a given point feature
+	    					if(fs<0) fs=lstart.Count();
 	    					lstart.Add(p);
 	    				}
+	    				else if(feature[f]->ftype==GPFT_CPAS) { // this is an end
+	    					CPred p(feature[f]->coord,-1); // start here -> a -1 indicates this is a given point feature
+	    					if(fe<0) fe=lend.Count();
+	    					lend.Add(p);
+	    				}
 	    			}
+	    			f++;
 	    		}
-	    		float icov=get_cov(2*s,i,i,bpcov);
-	    		//fprintf(stderr," dfV[%d]=%.1f cov[%d]=%.1f",s,diffval[m],i+refstart,icov);
-	    		int p=(m+CHI_THR)%CHI_WIN;
-	    		float diff=icov-lastcov;
-	    		//fprintf(stderr," [%s](sL=%.1f sR=%.1f sL/sR=",s,sumstartleft,sumstartright);
-	    		//if(diffval[m]>1/ERROR_PERC) fprintf(stderr,"%.1f",sumstartleft/sumstartright);
-	    		//else fprintf(stderr,"*");
-	    		if(diffval[p]>0) sumstartleft-=diffval[p];
-	    		else sumendleft+=diffval[p];
-	    		if(diffval[m]>0) {
-	    			sumstartleft+=diffval[m];
-	    			sumstartright-=diffval[m];
+
+	    		GVec<float> diffval;
+	    		float sumstartleft=0;
+	    		float sumendleft=0;
+	    		float sumstartright=0;
+	    		float sumendright=0;
+	    		float lastcov=get_cov(2*s,longintronanchor-1,longintronanchor-1,bpcov); // this only computes signed coverage (should I include neutral coverage here too?)
+	    		for(int i=currentstart+longintronanchor;i<(int)currentstart+CHI_THR+(int)longintronanchor;i++) {
+	    			float icov=get_cov(2*s,i-refstart,i-refstart,bpcov);
+	    			float diff=icov-lastcov; // compute how many reads start at position i
+	    			diffval.Add(diff);
+	    			if(diff>0) sumstartleft+=diff;
+	    			else sumendleft-=diff;
+	    			lastcov=icov;
 	    		}
-	    		else {
-	    			sumendleft-=diffval[m];
-	    			sumendright+=diffval[m];
+	    		for(int i=currentstart+CHI_THR+longintronanchor;i<(int)currentstart+CHI_WIN+(int)longintronanchor;i++) {
+	    			float icov=get_cov(2*s,i-refstart,i-refstart,bpcov);
+	    			float diff=icov-lastcov;
+	    			diffval.Add(diff);
+	    			if(diff>0) sumstartright+=diff;
+	    			else sumendright-=diff;
+	    			lastcov=icov;
 	    		}
-	    		diffval[p]=diff;
-	    		if(diff>0) sumstartright+=diff;
-	    		else sumendright-=diff;
-	    		lastcov=icov;
-	    		//fprintf(stderr," eL=%.1f eR=%.1f eR/eL=",sumendleft,sumendright);
-	    		if(diffval[m]<-1/ERROR_PERC) { // ends here
-	    			if(sumendleft*ERROR_PERC>sumendright) {
-	    				bool addend=true;
-	    				int istart=refstart+i-CHI_THR-1;
-	    				if(lend.Count()) {
-	    					int j=lend.Count()-1;
-	    					if(istart-lend[j].predno<CHI_WIN) {
-	    						addend=false;
-	    						if(-diffval[m]>=lend[j].cov) {
-	    							lend[j].predno=istart;
-	    							lend[j].cov=-diffval[m];
+	    		int covend=endbundle-refstart;
+	    		if(bpcov->Count()<(int)endbundle) covend=bpcov->Count();
+	    		covend-=longintronanchor;
+	    		//fprintf(stderr,"covend=%d\n",covend);
+	    		for(int i=currentstart+CHI_WIN+longintronanchor-refstart;i<covend;i++) {
+
+	    			// add features at position i
+	    			if((uint)i+CHI_THR+refstart<endbundle) while(f<feature.Count() && feature[f]->coord==(uint)i+CHI_THR+refstart) { // see if there are any features before the start
+	    				if(feature[f]->strand==2*s-1) {
+	    					if(feature[f]->ftype==GPFT_TSS) { // this is a start
+	    						CPred p(feature[f]->coord,-1); // start here -> a -1 indicates this is a given point feature
+	    						lstart.Add(p);
+	    					}
+	    					else if(feature[f]->ftype==GPFT_CPAS) { // this is an end
+	    						CPred p(feature[f]->coord,-1); // start here -> a -1 indicates this is a given point feature
+	    						lend.Add(p);
+	    					}
+	    				}
+	    				f++;
+	    			}
+
+	    			int m=(i-CHI_THR-longintronanchor)%CHI_WIN;
+	    			//fprintf(stderr,"m=%d ",i-CHI_THR+refstart);
+					if(diffval[m]>1/ERROR_PERC) { // starts here
+	    				if(sumstartleft<sumstartright*ERROR_PERC) { // potential start here
+	    					bool addstart=true;
+	    					int istart=refstart+i-CHI_THR;
+	    					if(lstart.Count()) {
+	    						int j=lstart.Count()-1;
+	    						if(abs(istart-lstart[j].predno)<CHI_WIN) {
+	    							addstart=false;
+	    							if(lstart[j].cov>0 && diffval[m]>lstart[j].cov) {
+	    								lstart[j].predno=istart;
+	    								lstart[j].cov=diffval[m];
+	    							}
 	    						}
 	    					}
+	    					if(addstart) {
+	    						CPred p(istart,diffval[m]);
+	    						lstart.Add(p);
+	    					}
 	    				}
-	    				if(addend) {
-	    					CPred p(refstart+i-CHI_THR-1,-diffval[m]);
-	    					lend.Add(p);
+	    			}
+	    			float icov=get_cov(2*s,i,i,bpcov);
+	    			//fprintf(stderr," dfV[%d]=%.1f cov[%d]=%.1f",s,diffval[m],i+refstart,icov);
+	    			int p=(m+CHI_THR)%CHI_WIN;
+	    			float diff=icov-lastcov;
+	    			//fprintf(stderr," [%s](sL=%.1f sR=%.1f sL/sR=",s,sumstartleft,sumstartright);
+	    			//if(diffval[m]>1/ERROR_PERC) fprintf(stderr,"%.1f",sumstartleft/sumstartright);
+	    			//else fprintf(stderr,"*");
+	    			if(diffval[p]>0) sumstartleft-=diffval[p];
+	    			else sumendleft+=diffval[p];
+	    			if(diffval[m]>0) {
+	    				sumstartleft+=diffval[m];
+	    				sumstartright-=diffval[m];
+	    			}
+	    			else {
+	    				sumendleft-=diffval[m];
+	    				sumendright+=diffval[m];
+	    			}
+	    			diffval[p]=diff;
+	    			if(diff>0) sumstartright+=diff;
+	    			else sumendright-=diff;
+	    			lastcov=icov;
+	    			//fprintf(stderr," eL=%.1f eR=%.1f eR/eL=",sumendleft,sumendright);
+					if(diffval[m]<-1/ERROR_PERC) { // ends here
+	    				if(sumendleft*ERROR_PERC>sumendright) {
+	    					bool addend=true;
+	    					int istart=refstart+i-CHI_THR-1;
+	    					if(lend.Count()) {
+	    						int j=lend.Count()-1;
+	    						if(abs(istart-lend[j].predno)<CHI_WIN) {
+	    							addend=false;
+	    							if(lend[j].cov>0 && -diffval[m]>=lend[j].cov) {
+	    								lend[j].predno=istart;
+	    								lend[j].cov=-diffval[m];
+	    							}
+	    						}
+	    					}
+	    					if(addend) {
+	    						CPred p(refstart+i-CHI_THR-1,-diffval[m]);
+	    						lend.Add(p);
+	    					}
 	    				}
+	    				//fprintf(stderr,"%.1f)",sumendright/sumendleft);
 	    			}
-	    			//fprintf(stderr,"%.1f)",sumendright/sumendleft);
+	    			//else fprintf(stderr,"*)");
+	    			//fprintf(stderr,"\n");
 	    		}
-	    		//else fprintf(stderr,"*)");
-	    		//fprintf(stderr,"\n");
-	    	}
-	    	//for(int i=0;i<lstart.Count();i++) fprintf(stderr,"s=%d start at coord %d with cov=%f\n",s,lstart[i].predno,lstart[i].cov);
-	    	//for(int i=0;i<lend.Count();i++) fprintf(stderr,"s=%d end at coord %d with cov=%f\n",s,lend[i].predno,lend[i].cov);
+	    		//for(int i=0;i<lstart.Count();i++) fprintf(stderr,"s=%d start at coord %d with cov=%f\n",s,lstart[i].predno,lstart[i].cov);
+	    		//for(int i=0;i<lend.Count();i++) fprintf(stderr,"s=%d end at coord %d with cov=%f\n",s,lend[i].predno,lend[i].cov);
 
+	    	}
 	    }
 
+	    // see if I need to adjust the start to ignore little hanging pieces that make no sense
+	    if(!end) {
+	    	while(nje<njunctions && ejunction[nje]->strand+1!=2*s) nje++; // skip all junctions that are not on the same strand
+	    	if((nje<njunctions && ejunction[nje]->end - currentstart < junctionsupport) &&
+	    			(fs<0 || (uint)lstart[fs].predno>=ejunction[nje]->end) &&  // I do not want to miss any hard starts/ends
+	    			(fe<0 || (uint)lend[fe].predno>=ejunction[nje]->end)) { // there is a junction ending soon here
+	    		float covleft=get_cov(1,currentstart-refstart,ejunction[nje]->end-1-refstart,bpcov);
+	    		float covright=get_cov(1,ejunction[nje]->end-refstart,2*ejunction[nje]->end - currentstart-1-refstart,bpcov);
+	    		if(covleft<covright*(1-ERROR_PERC)) { // adjust start here if needed
+	    			currentstart=ejunction[nje]->end;
+	    			// I have to check ending junctions here again
+	    			while(nje<njunctions && ejunction[nje]->end<=currentstart) { // read all junction ends at or before the current start -> assuming there are any (at this point, smaller junction ends should not be relevant to this bundle/currentstart
+	    				if(ejunction[nje]->end==currentstart && (ejunction[nje]->strand+1) == 2*s) { // junction ends at current start and is on the same strand and not deleted
+	    					end=1;
+	    				}
+	    				nje++;
+	    			}
+	    		}
+	    	}
+	    }
 
+	    //fprintf(stderr,"create graph 1\n");
+	    CGraphnode *graphnode=create_graphnode(s,g,currentstart,endbundle,graphno,bundlenode,bundle2graph,no2gnode); // creates a $graphno graphnode  with start at bundle start, and end at bundle end
+	    graphno++;
 
 	    if(end) { // I might have nodes finishing here; but I have a junction finishing here for sure
 	    	GStr cs((int)currentstart);
@@ -2704,7 +2780,7 @@
 	    			//fprintf(stderr,"1 Edge %d-%d, edgeno=%d\n",node->nodeid,graphnode->nodeid,edgeno);
 	    		}
 	    	}
-	    	else { // I haven't seen nodes before that finish here => link to source
+	    	else { // I haven't seen nodes before that finish here (maybe due to error correction?) => link to source
 		    	source->child.Add(graphnode->nodeid);  // this node is the child of source
 		    	graphnode->parent.Add(source->nodeid); // this node has source as parent
 		    	// COUNT EDGE HERE
@@ -2723,7 +2799,7 @@
 
 	    bool completed=false;
 
-	    bool dropcov=false; // false(0) means start of bundle or junction end (drop in coverage); true(1) means junction start (raise in coverage)
+	    bool dropcov=false; // false(0) means start of bundle or junction end (raise in coverage); true(1) means junction start (drop in coverage)
 	    int nls=0; // index in longstart
 	    int nle=0; // index in longend
 
@@ -3395,6 +3471,7 @@
 	CTreePat *root=new CTreePat(0,gno-1); // if links from source to nodes are desired source==1 and all nodes are treated as +1
 
 	// now construct all child CTreePat's
+	//fprintf(stderr,"There are %d transfrags\n",transfrag.Count());
 	for(int t=0;t<transfrag.Count();t++)
 		if(transfrag[t]->nodes[0]){ // don't include transfrags from source -> not needed
 			CTreePat *tree=root;
@@ -3481,8 +3558,10 @@
 	for(int n=0;n<node.Count();n++) {
 		if(n) { // not the first node in pattern
 			int *pos=gpos[edge(node[n-1],node[n],gno)];
-			if(pos && pattern[*pos]) // there is an edge between node[n-1] and node[n]
+			if(pos && pattern[*pos]) { // there is an edge between node[n-1] and node[n]
+				//fprintf(stderr,"node[n-1]=%d node[n]=%d nextpat=%d\n",node[n-1],node[n],gno-1-node[n-1]+node[n]-node[n-1]-1);
 				tree=tree->nextpat[gno-1-node[n-1]+node[n]-node[n-1]-1];
+			}
 			else tree=tree->nextpat[node[n]-node[n-1]-1];
 		}
 		else tree=tree->nextpat[node[n]-1];
@@ -3528,17 +3607,20 @@
 	}
 	*/
 
-	bool is_source=(rstart>=no2gnode[node[0]]->start);
-	bool is_sink=(rend<=no2gnode[node.Last()]->end);
-
 	if(longreads) { // see if I need to adjust nodes in pattern
 		if(node.Count()>1) { // correct longreads that extend inside introns or past nodes that link back to source or sink
+
+			bool is_source=(rstart>=no2gnode[node[0]]->start); // read start could be inside of the node (node could be extended due to other reads)
+			bool is_sink=(rend<=no2gnode[node.Last()]->end); // read end could be inside of the node (node could be extended due to other reads
+			if(no2gnode[node[0]]->hardstart) is_source=false; // do not do any trimming if this is an ending node
+			if(no2gnode[node.Last()]->hardend) is_sink=false; // do not do any trimming if this is an ending node
+
 			if(is_sink) { // check last node
 				int i=node.Count()-1;
 				while(i>0 && no2gnode[node[i-1]]->end+1==no2gnode[node[i]]->start) {
 					uint dist=rend-no2gnode[node[i-1]]->end;
 					if(dist<longintronanchor) { // I can potentially eliminate node[i] from transfrag due to proximity
-						if(no2gnode[node[i-1]]->cov*DROP*no2gnode[node[i]]->len()>no2gnode[node[i]]->cov*no2gnode[node[i-1]]->len()) { // there is a significant drop
+						if(no2gnode[node[i-1]]->hardend || no2gnode[node[i-1]]->cov*DROP*no2gnode[node[i]]->len()>no2gnode[node[i]]->cov*no2gnode[node[i-1]]->len()) { // there is a significant drop
 							int j=no2gnode[node[i-1]]->child.Count()-1;
 							bool trim=false;
 							while(j>=0) {
@@ -3561,7 +3643,7 @@
 						}
 					}
 					else if(dist<CHI_WIN) { // check for sink connection
-						if(no2gnode[node[i-1]]->cov*DROP*no2gnode[node[i]]->len()>no2gnode[node[i]]->cov*no2gnode[node[i-1]]->len()) { // there is a significant drop
+						if(no2gnode[node[i-1]]->hardend || no2gnode[node[i-1]]->cov*DROP*no2gnode[node[i]]->len()>no2gnode[node[i]]->cov*no2gnode[node[i-1]]->len()) { // there is a significant drop
 							int j=no2gnode[node[i-1]]->child.Count()-1;
 							bool trim=false;
 							while(j>=0) {
@@ -3591,9 +3673,9 @@
 			if(is_source) { // check first node
 				int i=0;
 				while(i+1<node.Count() && no2gnode[node[i]]->end+1==no2gnode[node[i+1]]->start) {
-					uint dist=no2gnode[node[i+1]]->start-rend;
+					uint dist=no2gnode[node[i+1]]->start-rstart;
 					if(dist<longintronanchor) { // I can potentially eliminate node[i] from transfrag due to proximity
-						if(no2gnode[node[i+1]]->cov*DROP*no2gnode[node[i]]->len()>no2gnode[node[i]]->cov*no2gnode[node[i+1]]->len()) { // there is a significant drop
+						if(no2gnode[node[i+1]]->hardstart || no2gnode[node[i+1]]->cov*DROP*no2gnode[node[i]]->len()>no2gnode[node[i]]->cov*no2gnode[node[i+1]]->len()) { // there is a significant drop
 							int j=0;
 							bool trim=false;
 							while(j<no2gnode[node[i+1]]->parent.Count()) {
@@ -3616,7 +3698,7 @@
 						}
 					}
 					else if(dist<CHI_WIN) { // check for sink connection
-						if(no2gnode[node[i+1]]->cov*DROP*no2gnode[node[i]]->len()>no2gnode[node[i]]->cov*no2gnode[node[i+1]]->len()) { // there is a significant drop
+						if(no2gnode[node[i+1]]->hardstart || no2gnode[node[i+1]]->cov*DROP*no2gnode[node[i]]->len()>no2gnode[node[i]]->cov*no2gnode[node[i+1]]->len()) { // there is a significant drop
 							int j=0;
 							bool trim=false;
 							while(j<no2gnode[node[i+1]]->parent.Count()) {
@@ -3642,13 +3724,12 @@
 					i++;
 				}
 			}
-
 		}
 	}
 	else if(!mergeMode && node.Count()==1) return(NULL); // do the one node transfrag make any difference? CHECK IF YOU NEED TO KEEP THIS ONE
 
-  CTransfrag *t=findtrf_in_treepat(gno,gpos,node,pattern,tr2no[s][g]);
-  if(!t) { // t is NULL
+	CTransfrag *t=findtrf_in_treepat(gno,gpos,node,pattern,tr2no[s][g]);
+	if(!t) { // t is NULL
 	  t=new CTransfrag(node,pattern,0);
 
 	  /*
@@ -3678,16 +3759,16 @@
 		  tree=child;
 	  }
 	  tree->tr=t;
-  }
+	}
 
-  if(is_sr) t->srabund+=abundance;
-  else  //fprintf(stderr,"Update transfrag with abundance=%f in graph[%d][%d]\n",abundance,s,g);
+	if(is_sr) t->srabund+=abundance;
+	else  //fprintf(stderr,"Update transfrag with abundance=%f in graph[%d][%d]\n",abundance,s,g);
 	  t->abundance+=abundance;
 
-  if(no2gnode[node[0]]->start<=rstart) if(!t->longstart || rstart<t->longstart) t->longstart=rstart;
-  if(no2gnode[node.Last()]->end>=rend) if(!t->longend|| rend>t->longend) t->longend=rend;
+	if(no2gnode[node[0]]->start<=rstart) if(!t->longstart || rstart<t->longstart) t->longstart=rstart; // value of longstart inside node; the other extensions could come from spliced nodes
+	if(no2gnode[node.Last()]->end>=rend) if(!t->longend|| rend>t->longend) t->longend=rend; // value of longend inside node; the other extensions could come from spliced nodes
 
-  return(t);
+	return(t);
 }
 
 void get_fragment_pattern(GList<CReadAln>& readlist,int n, int np,float readcov,GVec<int> *readgroup,GVec<int>& merge, GVec<int> *group2bundle,
@@ -3695,7 +3776,7 @@
 		GPVec<CTransfrag> **transfrag,CTreePat ***tr2no,GPVec<CGroup> &group) {
 
 	//fprintf(stderr,"get fragment for read[%d]:%d-%d-%d-%d-%f with pair[%d]\n",n,readlist[n]->start,readlist[n]->end,int(readlist[n]->strand),readlist[n]->nh,readlist[n]->read_count,np);
-	uint rstart=readlist[n]->start;
+	uint rstart=readlist[n]->start; // this only works for unpaired long reads -> I will have to take into account the pair if I want to do it for all reads
 	uint rend=readlist[n]->end;
 	if(np>-1 && readlist[np]->end>rend) rend=readlist[np]->end;
 
@@ -4414,18 +4495,35 @@
 }
 
 int compatible_long(int* t,int *len,GPVec<CTransfrag>& transfrag,GPVec<CGraphnode>& no2gnode,int gno,GIntHash<int> &gpos) {
-// return 0 if no compatibility, then 4*(rets)+rete where rets/ret3 = 1 if t[0] has extra intron, 2 if t[1] has extra intron, 3 if compatible starts/ends
+// return 0 if no compatibility, then 4*(rets)+rete where rets/rete = 1 if t[0] has extra intron, 2 if t[1] has extra intron, 3 if compatible starts/ends
 // len[0] is how much t[0] extends past t[1] if positive, otherwise how much t[1] extends past t[0] at the start of transcripts
 // len[1] positive: if t[0] has extra intron at start how much is t[1] inside of the intron -> default is 0; negative: same for t[1]
 // len[2] is how much t[0] extends past t[1] if positive, otherwise how much t[1] extends past t[0]	at the end of transcripts
 // len[3] positive: if t[0] has extra intron at end how much is t[1] inside of the intron -> default is 0; negative: same for t[1]
 
+
+	if(transfrag[t[0]]->nodes[0]!=transfrag[t[1]]->nodes[0] &&
+			no2gnode[transfrag[t[0]]->nodes[0]]->hardstart && no2gnode[transfrag[t[1]]->nodes[0]]->hardstart)
+		return 0; // both starts can't be hard
+
+	if(transfrag[t[0]]->nodes.Last()!=transfrag[t[1]]->nodes.Last() &&
+			no2gnode[transfrag[t[0]]->nodes.Last()]->hardend && no2gnode[transfrag[t[1]]->nodes.Last()]->hardend)
+		return 0; // both ends can't be hard
+
+	uint tstart[2]={no2gnode[transfrag[t[0]]->nodes[0]]->start,no2gnode[transfrag[t[1]]->nodes[0]]->start};
+	if(transfrag[t[0]]->longstart) tstart[0]=transfrag[t[0]]->longstart;
+	if(transfrag[t[1]]->longstart) tstart[1]=transfrag[t[1]]->longstart;
+	uint tend[2]={no2gnode[transfrag[t[0]]->nodes.Last()]->end,no2gnode[transfrag[t[1]]->nodes.Last()]->end};
+	if(transfrag[t[0]]->longend) tend[0]=transfrag[t[0]]->longend;
+	if(transfrag[t[1]]->longend) tend[1]=transfrag[t[1]]->longend;
+
 	int f=0; // first starting transcript
 	int s=1; // second starting transcript
-	if(transfrag[t[1]]->nodes[0]<transfrag[t[0]]->nodes[0]) { // transfrag t1 starts before trf t0
+	if(tstart[1]<tstart[0]) { // transfrag t1 starts before trf t0
 		f=1;
 		s=0;
 	}
+
 	if(transfrag[t[f]]->nodes.Last()<transfrag[t[s]]->nodes[0]) // no overlapping transcripts
 		return 0;
 
@@ -4440,24 +4538,29 @@
 		if(no2gnode[transfrag[t[f]]->nodes[i[f]]]->end+1<no2gnode[transfrag[t[f]]->nodes[i[f]+1]]->start) { // found intron in f
 			rets=1+f;
 			//fprintf(stderr,"..found intron in t[f]=%d rets=%d\n",t[f],rets);
+			if(no2gnode[transfrag[t[s]]->nodes[0]]->hardstart) return 0; // there is no compatibility here
 		}
 
-		len[0]+=no2gnode[transfrag[t[f]]->nodes[i[f]]]->len(); // this only approximates the length
+		// len[0]+=no2gnode[transfrag[t[f]]->nodes[i[f]]]->len(); // this only approximates the length
 		//  adj if I want real distances:
-		//if(i[f]) len[0]+=no2gnode[transfrag[t[f]]->nodes[i[f]]]->len();
-		//else len[0]+=no2gnode[transfrag[t[f]]->nodes[i[f]]]->end-transfrag[t[f]]->longstart+1;
-
+		if(i[f]) len[0]+=no2gnode[transfrag[t[f]]->nodes[i[f]]]->len();
+		else len[0]+=no2gnode[transfrag[t[f]]->nodes[i[f]]]->end-tstart[f]+1;
 		i[f]++;
 	}
+
 	if(transfrag[t[s]]->nodes.Last()<transfrag[t[f]]->nodes[i[f]]) return 0; // no overlap: t[s] is contained in t[f]
-	if(f) len[0]=-len[0]; // a negative value signals that t[1] extends past t[0]
+
+	// adj for real distance
+	if(!i[f]) { // transfrag[t[f]]->nodes[0]==transfrag[t[s]]->nodes[0]
+		len[0]=tstart[s]-tstart[f];
+	}
 
 	if(transfrag[t[s]]->nodes[0]<transfrag[t[f]]->nodes[i[f]]) { // i[f]>0 because f has to start first
 		if(!no2gnode[transfrag[t[s]]->nodes[0]]->childpat[transfrag[t[f]]->nodes[i[f]]]) return 0; // I can not reach node i[f] from t[s][0]
 		// to approximate:
-		//len[1]=no2gnode[transfrag[t[f]]->nodes[i[f]]]->start-no2gnode[transfrag[t[s]]->nodes[0]]->start;
-		//  adj if I want real distances:
-		len[1]=no2gnode[transfrag[t[f]]->nodes[i[f]]]->start-transfrag[t[s]]->longstart;
+		// len[1]=no2gnode[transfrag[t[f]]->nodes[i[f]]]->start-no2gnode[transfrag[t[s]]->nodes[0]]->start;
+		// adj if I want real distances:
+		len[1]=no2gnode[transfrag[t[f]]->nodes[i[f]]]->start-tstart[s];
 		while(transfrag[t[s]]->nodes[i[s]]<transfrag[t[f]]->nodes[i[f]]) {
 			i[s]++;
 			if(no2gnode[transfrag[t[s]]->nodes[i[s]-1]]->end+1<no2gnode[transfrag[t[s]]->nodes[i[s]]]->start) { // not continuous
@@ -4465,12 +4568,17 @@
 			}
 		}
 	}
+	else if(i[f]){ // no intron -> adj lentgh for real distance
+		len[0]+=tstart[s]-no2gnode[transfrag[t[s]]->nodes[0]]->start;
+	}
+
+	if(f) len[0]=-len[0]; // a negative value signals that t[1] extends past t[0]
 
 	// check END
 	int rete=3;
 	f=0;
 	s=1;
-	if(transfrag[t[0]]->nodes.Last()<transfrag[t[1]]->nodes.Last()) { // f should be the longer transcript at the end
+	if(tend[1]>tend[0]) { // f should be the longer transcript at the end
 		f=1;
 		s=0;
 	}
@@ -4487,23 +4595,25 @@
 			rete=1+f;
 			//fprintf(stderr,"..found intron in t[f]=%d rete=%d\n",t[f],rete);
 			if(rets<3 && rete!=rets) return 0;
+			if(no2gnode[transfrag[t[s]]->nodes.Last()]->hardend) return 0; // there is no compatibility here
 		}
 		// to approximate:
-		len[2]+=no2gnode[transfrag[t[f]]->nodes[j[f]]]->len();
+		//len[2]+=no2gnode[transfrag[t[f]]->nodes[j[f]]]->len();
 		// adj:
-		//if(j[f]<transfrag[t[f]]->nodes.Count()-1) len[2]+=no2gnode[transfrag[t[f]]->nodes[j[f]]]->len();
-		//else len[2]+=transfrag[t[f]]->longend-no2gnode[transfrag[t[f]]->nodes[j[f]]]->start+1;
+		if(j[f]<transfrag[t[f]]->nodes.Count()-1) len[2]+=no2gnode[transfrag[t[f]]->nodes[j[f]]]->len();
+		else len[2]+=tend[f]-no2gnode[transfrag[t[f]]->nodes[j[f]]]->start+1;
 
 		j[f]--;
 	}
 	if(transfrag[t[s]]->nodes[0]>transfrag[t[f]]->nodes[j[f]]) return 0; // no overlap (this shouldn't be the case since there is one node in common)
-	if(f) len[2]=-len[2]; // a negative value signals that t[1] extends past t[0]
+
+	if(j[f]==transfrag[t[f]]->nodes.Count()-1) len[2]+=tend[f]-tend[s];
 
 	if(transfrag[t[s]]->nodes[j[s]]>transfrag[t[f]]->nodes[j[f]]) {
 		if(!no2gnode[transfrag[t[f]]->nodes[j[f]]]->childpat[transfrag[t[s]]->nodes[j[s]]]) return 0; // I can not reach node j[s] from j[f]
 		// len[3]=no2gnode[transfrag[t[s]]->nodes.Last()]->end-no2gnode[transfrag[t[f]]->nodes[j[f]]]->end; // to approximate dist
 		//  adj if I want real distances:
-		len[3]=transfrag[t[s]]->longend-no2gnode[transfrag[t[f]]->nodes[j[f]]]->end;
+		len[3]=tend[s]-no2gnode[transfrag[t[f]]->nodes[j[f]]]->end;
 		while(transfrag[t[s]]->nodes[j[s]]>transfrag[t[f]]->nodes[j[f]]) {
 			j[s]--;
 			if(no2gnode[transfrag[t[s]]->nodes[j[s]]]->end+1<no2gnode[transfrag[t[s]]->nodes[j[s]+1]]->start) { // not continuous
@@ -4511,9 +4621,14 @@
 			}
 		}
 	}
+	else if(j[f]<transfrag[t[f]]->nodes.Count()-1) {
+		len[2]+=no2gnode[transfrag[t[s]]->nodes[0]]->end-tend[s];
+	}
 
 	if(i[f]>j[f] || i[s]>j[s]) return 0; // there should be i<=j
 
+	if(f) len[2]=-len[2]; // a negative value signals that t[1] extends past t[0]
+
 	// now if and is point at the same node in transcripts, and so do jf and js
 	while(i[f]<=j[f] && i[s]<=j[s]) {
 		if(transfrag[t[f]]->nodes[i[f]]==transfrag[t[s]]->nodes[i[s]]) { // skip equal nodes
@@ -4555,7 +4670,7 @@
 }
 
 void process_transfrags(int s, int gno,int edgeno,GPVec<CGraphnode>& no2gnode,GPVec<CTransfrag>& transfrag,CTreePat *tr2no,
-		GIntHash<int> &gpos,GVec<CGuide>& guidetrf,GList<CPrediction>& pred) {
+		GIntHash<int> &gpos,GVec<CGuide>& guidetrf,GList<CPrediction>& pred,GVec<int>& trflong) {
 
 	/*
 	{ // DEBUG ONLY
@@ -4576,9 +4691,9 @@
 	/*
 	{ // DEBUG ONLY
 		//printTime(stderr);
-		fprintf(stderr,"There are %d transfrags after clean up:\n",transfrag.Count());
+		fprintf(stderr,"\nThere are %d transfrags after clean up:\n",transfrag.Count());
 		for(int i=0;i<transfrag.Count();i++) {
-			fprintf(stderr,"transfrag[%d]:",i);
+			fprintf(stderr,"transfrag[%d] abund=%f:",i,transfrag[i]->abundance);
 			for(int j=0;j<transfrag[i]->nodes.Count();j++) fprintf(stderr," %d",transfrag[i]->nodes[j]);
 			fprintf(stderr,"\n");
 		}
@@ -4586,37 +4701,72 @@
 	*/
 
 	// add all guide patterns to the set of transfrags so that I can have a "backbone" for each guide
-	// I need this because there might be an uncompatible transfrag connecting the nodes in the guide
+	// I need this because there might be an incompatible transfrag connecting the nodes in the guide
 	for(int i=0;i<guidetrf.Count();i++) if(guidetrf[i].trf->guide){
 
-		//CTransfrag *t=findtrf_in_treepat(gno,gpos,guidetrf[i].trf->nodes,guidetrf[i].trf->pattern,tr2no); // I need to adjust first/last node
-		//if(!t) { // t is NULL
-		CTransfrag *t=new CTransfrag(guidetrf[i].trf->nodes,guidetrf[i].trf->pattern,trthr*ERROR_PERC);
+		CTransfrag *t=NULL;
+		bool add=true;
+		if(longreads) {
+			/*guidetrf[i].trf->pattern[0]=0;
+			guidetrf[i].trf->pattern[gno-1]=0;
+			int *pos=gpos[edge(0,guidetrf[i].trf->nodes[1],gno)];
+			if(pos) guidetrf[i].trf->pattern[*pos]=0;
+			pos=gpos[edge(guidetrf[i].trf->nodes[guidetrf[i].trf->nodes.Count()-2],guidetrf[i].trf->nodes.Last(),gno)];
+			if(pos) guidetrf[i].trf->pattern[*pos]=0;
+			guidetrf[i].trf->nodes.Pop();
+			guidetrf[i].trf->nodes.Shift();*/
+
+			t=findtrf_in_treepat(gno,gpos,guidetrf[i].trf->nodes,guidetrf[i].trf->pattern,tr2no); // I need to adjust first/last node
+			if(!t) { // t is NULL
+				t=new CTransfrag(guidetrf[i].trf->nodes,guidetrf[i].trf->pattern,0);
+			}
+			else add=false;
+		}
+		else {
+			t=new CTransfrag(guidetrf[i].trf->nodes,guidetrf[i].trf->pattern,trthr*ERROR_PERC);
+		}
 
 		/*
+		float abund=0;
+		if(!longreads) {
+			abund=trthr*ERROR_PERC;
+		}
+		CTransfrag *t=new CTransfrag(guidetrf[i].trf->nodes,guidetrf[i].trf->pattern,abund);
+		*/
+		/*
 		{ // DEBUG ONLY
 			fprintf(stderr,"Add guidetrf with nodes:");
 			for(int j=0;j<guidetrf[i].trf->nodes.Count();j++) fprintf(stderr," %d",guidetrf[i].trf->nodes[j]);
 			//fprintf(stderr," and pattern: ");
 			//printBitVec(guidetrf[i].trf->pattern);
 			fprintf(stderr,"\n");
+			fprintf(stderr,"Found transfrag %p with nodes:",t);
+			for(int j=0;j<t->nodes.Count();j++) fprintf(stderr," %d",t->nodes[j]);
+			//fprintf(stderr," and pattern: ");
+			//printBitVec(guidetrf[i].trf->pattern);
+			fprintf(stderr,"\n");
+
 		}
 		*/
 
 		// I might not need to do this for the normal max_flow, but for the push I am counting on having only one transcript linking back to source from the first node
-		t->pattern[0]=0;
-		t->pattern[gno-1]=0;
-		int *pos=gpos[edge(0,t->nodes[1],gno)];
-		if(pos) t->pattern[*pos]=0;
-		pos=gpos[edge(t->nodes[t->nodes.Count()-2],t->nodes.Last(),gno)];
-		if(pos) t->pattern[*pos]=0;
-		t->nodes.Pop();
-		t->nodes.Shift();
+		if(!longreads) {
+			t->pattern[0]=0;
+			t->pattern[gno-1]=0;
+			int *pos=gpos[edge(0,t->nodes[1],gno)];
+			if(pos) t->pattern[*pos]=0;
+			pos=gpos[edge(t->nodes[t->nodes.Count()-2],t->nodes.Last(),gno)];
+			if(pos) t->pattern[*pos]=0;
+			t->nodes.Pop();
+			t->nodes.Shift();
+		}
 		t->guide=true;
 		t->longstart=no2gnode[t->nodes[0]]->start;
 		t->longend=no2gnode[t->nodes.Last()]->end;
-		transfrag.Add(t);
-
+		if(longreads) t->usepath=guidetrf[i].g; // guide index
+		no2gnode[t->nodes[0]]->hardstart=1;  // I can always trust a guide's start
+		no2gnode[t->nodes.Last()]->hardend=1; // I can always trust a guide's end
+		if(add) transfrag.Add(t);
 	}
 
 	GBitVec allpat(gno+edgeno);
@@ -4625,19 +4775,32 @@
 
 	if(longreads) { // add source/sink links but only if they need to be added to explain the traversals in the graph
 		transfrag.Sort(longtrCmp); // most abundant transfrag in the graph come first, then the ones with most nodes, then the ones more complete
-		//trsort=false;
+		trsort=false;
 		int source=0;
 		int sink=gno-1;
 		GVec<int> hassource(gno,-1); // remembers transcript number that links given node to source
 		GVec<int> hassink(gno,-1); // remembers transcript number that links given node to sink
-		GBitVec keepsource(gno);
-		GBitVec keepsink(gno);
+		GBitVec keepsource(gno); // if not set then I can remove link from node to source; keeps source link if it exists otherwise otherwise
+		GBitVec keepsink(gno); // if not set then I can remove link from node to sink; keeps sink link if it exists otherwise otherwise
 		GVec<CLongTrf> keeptrf; // keeps all potential transfrags that will be kept from most abundant to least, unassembled
 		float zero=0;
 		GVec<float> addsource(gno,zero);
 		GVec<float> addsink(gno,zero);
-		int edgedist=CHI_WIN+CHI_THR;
+		int edgedist=CHI_WIN; // I need to be consistent (if I change here then I need to change in update_abundance too)
 		int ssdist=longintronanchor;
+
+		/*
+		{ // DEBUG ONLY
+			//printTime(stderr);
+			fprintf(stderr,"\nThere are %d transfrags after clean up:\n",transfrag.Count());
+			for(int i=0;i<transfrag.Count();i++) {
+				fprintf(stderr,"transfrag[%d] abund=%f:",i,transfrag[i]->abundance);
+				for(int j=0;j<transfrag[i]->nodes.Count();j++) fprintf(stderr," %d",transfrag[i]->nodes[j]);
+				fprintf(stderr,"\n");
+			}
+		}
+		*/
+
 		for(int t1=0;t1<transfrag.Count();t1++) {
 			/*fprintf(stderr,"Consider t=%d with abund=%f and nodes:",t1,transfrag[t1]->abundance);
 			for(int j=0;j<transfrag[t1]->nodes.Count();j++) {
@@ -4659,50 +4822,73 @@
 				//fprintf(stderr,"Node %d in t=%d with cov=%f has sink\n",transfrag[t1]->nodes[0],t1,transfrag[t1]->abundance);
 			}
 			else {
-				if(transfrag[t1]->longstart) keepsource[transfrag[t1]->nodes[0]]=1; //fprintf(stderr,"keep source %d\n",transfrag[t1]->nodes[0]);}
-				if(transfrag[t1]->longend) keepsink[transfrag[t1]->nodes.Last()]=1;//fprintf(stderr,"keep sink %d\n",transfrag[t1]->nodes.Last());}
+				if(eonly && !transfrag[t1]->guide) continue; // do not remember transfrags that are not guides
+				if(!keepsource[transfrag[t1]->nodes[0]]) {
+					if(transfrag[t1]->longstart) keepsource[transfrag[t1]->nodes[0]]=1; //fprintf(stderr,"keep source %d\n",transfrag[t1]->nodes[0]);}
+					else if(no2gnode[transfrag[t1]->nodes[0]]->hardstart) keepsource[transfrag[t1]->nodes[0]]=1;
+				}
+				if(!keepsink[transfrag[t1]->nodes.Last()]) {
+					if(transfrag[t1]->longend) keepsink[transfrag[t1]->nodes.Last()]=1;//fprintf(stderr,"keep sink %d\n",transfrag[t1]->nodes.Last());}
+					else if(no2gnode[transfrag[t1]->nodes.Last()]) keepsink[transfrag[t1]->nodes.Last()]=1;
+				}
 				bool included=false;
 				// a transfrag that starts at source and ends at sink can never be included in a kept transfrag, so I am safe to do next
 				for(int t2=0; t2<keeptrf.Count();t2++) {
-					int t[2]={t1,keeptrf[t2].t}; // t1 current, t2 the one I kep
+					int t[2]={t1,keeptrf[t2].t}; // t1 current, t2 the one I kept
 					int len[4]={MAX_NODE,MAX_NODE,MAX_NODE,MAX_NODE};
 					int ret=compatible_long(t,len,transfrag,no2gnode,gno,gpos);
 					//fprintf(stderr,"  ret=%d t[0]=%d t[1]=%d len[0]=%d len[1]=%d len[2]=%d len[3]=%d\n",ret,t[0],t[1],len[0],len[1],len[2],len[3]);
 					if(ret){
 						switch(ret) {
 						case 1: // t[0] includes t[1]: it extends with introns on on one or both sides -> keep unless I can eliminate a previous one
-							if(!transfrag[t[1]]->guide && transfrag[t1]->longstart && transfrag[t1]->longend) { // t[1] might be included in t[0] so I might eliminate if it doesn't pass threshold
+							if(!transfrag[t[1]]->guide && transfrag[t1]->longstart && transfrag[t1]->longend && // t[1] might be included in t[0] so I might eliminate if it doesn't pass threshold
+									(!no2gnode[transfrag[t[1]]->nodes[0]]->hardstart || transfrag[t[0]]->nodes[0] == transfrag[t[1]]->nodes[0] ) &&
+									(!no2gnode[transfrag[t[1]]->nodes.Last()]->hardend || transfrag[t[0]]->nodes.Last() == transfrag[t[1]]->nodes.Last())) {
 								//if(transfrag[t[0]]->abundance>(1-ERROR_PERC/DROP)*transfrag[t[1]]->abundance) { // t[0] is within limits of t[1]
-									//if(transfrag[t[0]]->abundance>DROP*transfrag[t[1]]->abundance) { // t[0] is within limits of t[1]
-								if(len[1]<ssdist && len[3]<ssdist) { // prefer t0 instead of
-									keeptrf[t2].t=t1;
-									keeptrf[t2].cov+=transfrag[t1]->abundance;
-									keeptrf[t2].group.Add(t1);
-									included=true; // I do not want to store transcript
-											//fprintf(stderr,"trf %d includes %d\n",t[0],t[1]);
-										}
-									//}
+								if(transfrag[t[0]]->abundance>DROP*transfrag[t[1]]->abundance) { // t[0] is within limits of t[1]
+									if(len[1]<ssdist && len[3]<ssdist) { // prefer t[0] instead of t[1]
+										keeptrf[t2].t=t1;
+										keeptrf[t2].cov+=transfrag[t1]->abundance;
+										keeptrf[t2].group.Add(t1);
+										included=true; // I do not want to store transcript
+										//fprintf(stderr,"trf %d includes %d\n",t[0],t[1]);
+									}
+								}
 							}
 							break;
 						case 2: // t[1] includes t[0]: extends with introns past ends of t[0] (t[1] possibly includes t[0]); t[1] is more abundant than t0
 							//if(transfrag[t[1]]->guide || transfrag[t[1]]->abundance>(1-ERROR_PERC/DROP)*transfrag[t[0]]->abundance) {
-							if(!transfrag[t[0]]->guide) {
+							if(!transfrag[t[0]]->guide &&
+									(!no2gnode[transfrag[t[0]]->nodes[0]]->hardstart || transfrag[t[0]]->nodes[0] == transfrag[t[1]]->nodes[0]) &&
+									(!no2gnode[transfrag[t[0]]->nodes.Last()]->hardend || transfrag[t[0]]->nodes.Last() == transfrag[t[1]]->nodes.Last())) {
 								if(len[1]<ssdist && len[3]<ssdist) {
 									keeptrf[t2].cov+=transfrag[t1]->abundance;
 									keeptrf[t2].group.Add(t1);
 									included=true;
-										//fprintf(stderr,"trf %d is intronic including %d\n",t[1],t[0]);
+									//fprintf(stderr,"trf %d is intronic including %d\n",t[1],t[0]);
 								}
 							}
 							//}
 							break;
 						case 3: // t1 and t0 are compatible --> just look for the edges; t1 goes further apart
-							if(len[0]<edgedist && len[2]<edgedist) {
-								if(transfrag[t[0]]->guide) keeptrf[t2].t=t1; // t[0] to replace t[1]
+							if(transfrag[t[0]]->nodes[0]!=transfrag[t[1]]->nodes[0] && transfrag[t[0]]->nodes.Last()!=transfrag[t[1]]->nodes.Last() &&
+									((no2gnode[transfrag[t[0]]->nodes[0]]->hardstart && !no2gnode[transfrag[t[1]]->nodes[0]]->hardstart &&
+											!no2gnode[transfrag[t[0]]->nodes.Last()]->hardend && no2gnode[transfrag[t[1]]->nodes.Last()]->hardend) ||
+											(!no2gnode[transfrag[t[0]]->nodes[0]]->hardstart && no2gnode[transfrag[t[1]]->nodes[0]]->hardstart &&
+													no2gnode[transfrag[t[0]]->nodes.Last()]->hardend && !no2gnode[transfrag[t[1]]->nodes.Last()]->hardend))) {
+								// these two transcripts both have one good start and one different --> keep them both (different option would be to add them to another transfrag that is compatible and has both hardends but then it's more complicated
+								break;
+							}
+
+							// I keep both if both are guides
+							//if((!transfrag[t[0]]->guide || !transfrag[t[1]]->guide) && abs(len[0])<edgedist && abs(len[2])<edgedist) { // close by
+							if((!transfrag[t[0]]->guide || !transfrag[t[1]]->guide) && len[0]<edgedist && len[2]<edgedist) { // close by
+								if(transfrag[t[0]]->guide || (!transfrag[t[1]]->guide && no2gnode[transfrag[t[0]]->nodes[0]]->hardstart && no2gnode[transfrag[t[0]]->nodes.Last()]->hardend))
+									keeptrf[t2].t=t1; // t[0] to replace t[1]
 								keeptrf[t2].cov+=transfrag[t1]->abundance;
 								keeptrf[t2].group.Add(t1);
 								included=true;
-									//fprintf(stderr,"trf %d %d equivalent start/ends\n",t[1],t[0]);
+								//fprintf(stderr,"trf %d %d equivalent start/ends\n",t[1],t[0]);
 							}
 							break;
 						}
@@ -4711,14 +4897,16 @@
 				}
 
 				if(!included){
-					if(transfrag[t1]->guide || (transfrag[t1]->longstart && transfrag[t1]->longend)) { // if this is not included and has correct start/end
+					if(transfrag[t1]->guide || ((transfrag[t1]->longstart || no2gnode[transfrag[t1]->nodes[0]]->hardstart)&&
+							(transfrag[t1]->longend || no2gnode[transfrag[t1]->nodes.Last()]->hardend))) { // if this is not included and has correct start/end
 						CLongTrf kt(t1,transfrag[t1]->abundance);
 						keeptrf.Add(kt);
 						keeptrf.Last().group.Add(t1);
+						//fprintf(stderr,"keep transfrag %d\n",t1);
 					}
 					else { // incomplete transcript, possibly wrong
 						transfrag[t1]->weak=1;
-						//fprintf(stderr,"Incomplete transcript\n");
+						//fprintf(stderr,"Incomplete transcript %d\n",t1);
 					}
 				}
 			}
@@ -4728,22 +4916,25 @@
 		if(s) { sign='+';}
 		//GBitVec guidesource(gno);
 		//GBitVec guidesink(gno);
-		for(int i=0;i<keeptrf.Count();i++) {
+		//for(int i=0;i<keeptrf.Count();i++) {
+		for(int i=keeptrf.Count()-1;i>=0;i--) { // I add the kept transcripts to trflong from least significant to most in order to make deletion easier
+
 			//fprintf(stderr,"Build source/sink for transfrag %d\n",keeptrf[i].t);
 			int n1=transfrag[keeptrf[i].t]->nodes[0];
 			int n2=transfrag[keeptrf[i].t]->nodes.Last();
 			//if(hassource[n1]<0 || hassink[n2]<0) fprintf(stderr,"Build source/sink for transfrag %d\n",keeptrf[i].t);
 
-			/*if(transfrag[keeptrf[i].t]->guide) {
-				addsource[n1]=keeptrf[i].cov; // give a boost to a guide
-				addsink[n2]=keeptrf[i].cov; // give a boost to a guide
-				guidesource[n1]=1;
-				guidesource[n2]=1;
-			}
-			else {*/
-			addsource[n1]=transfrag[keeptrf[i].t]->abundance; //all_eq  else makes no difference
-			addsink[n2]=transfrag[keeptrf[i].t]->abundance; //all_eq else makes no difference
-			//}
+			if(!rawreads && !transfrag[keeptrf[i].t]->guide && ((!no2gnode[n1]->hardstart && (hassource[n1]<0 || !keepsource[n1])) ||
+					(!no2gnode[n2]->hardend && (hassink[n2]<0 || !keepsink[n2])))) {
+			//if(!rawreads && (no2gnode[n1]->hardstart || (hassource[n1]>=0 && keepsource[n1])) && (no2gnode[n2]->hardend ||(hassink[n2]>=0 && keepsink[n2]))) {
+				trflong.Add(keeptrf[i].t);
+			}
+
+			/******* previous implementation here
+
+			addsource[n1]=keeptrf[i].cov;
+			addsink[n2]=keeptrf[i].cov;
+			 *******/
 
 			/*
 			if(!addsource[n1] && hassource[n1]<0) {
@@ -4764,7 +4955,7 @@
 				}
 			}*/
 
-			/* // all
+			// all
 			for(int j=0;j<keeptrf[i].group.Count();j++) {
 				if(n1==transfrag[keeptrf[i].group[j]]->nodes[0]) {
 					addsource[n1]+=transfrag[keeptrf[i].group[j]]->abundance;
@@ -4773,9 +4964,9 @@
 				if(n2==transfrag[keeptrf[i].group[j]]->nodes.Last()) {
 					addsink[n2]+=transfrag[keeptrf[i].group[j]]->abundance;
 					//fprintf(stderr,"Add sink t[%d]->cov=%f to node %d = %f\n",keeptrf[i].group[j],transfrag[keeptrf[i].group[j]]->abundance,n2,addsink[n2]);
+					}
 				}
-				}
-			}*/
+
 
 
 			if(rawreads) {
@@ -4803,37 +4994,50 @@
 				pred.Add(p);
 			}
 		}
+		for(int i=keeptrf.Count()-1;i>=0;i--) {
+			int n1=transfrag[keeptrf[i].t]->nodes[0];
+			int n2=transfrag[keeptrf[i].t]->nodes.Last();
+
+			if(transfrag[keeptrf[i].t]->guide || ((no2gnode[n1]->hardstart || (hassource[n1]>=0 && keepsource[n1])) && (no2gnode[n2]->hardend ||(hassink[n2]>=0 && keepsink[n2]))))
+				trflong.Add(keeptrf[i].t);
+
+		}
 
 		/*
 		{ // DEBUG ONLY
 			fprintf(stderr,"%d keeptrf:\n",keeptrf.Count());
 			for(int i=0;i<keeptrf.Count();i++) {
-				fprintf(stderr,"(%d) %d abund=%f",i,keeptrf[i].t,transfrag[keeptrf[i].t]->abundance);
+				fprintf(stderr,"(%d) %d abund=%f keepcov=%f",i,keeptrf[i].t,transfrag[keeptrf[i].t]->abundance,keeptrf[i].cov);
 				for(int j=0;j<transfrag[keeptrf[i].t]->nodes.Count();j++) {
 					fprintf(stderr," %d",transfrag[keeptrf[i].t]->nodes[j]);
 				}
 				fprintf(stderr,"\n");
-				int j=0;
-				while(j<transfrag[keeptrf[i].t]->nodes.Count()) {
-					fprintf(stderr,"Ad5\tStringTie\texon\t%d",no2gnode[transfrag[keeptrf[i].t]->nodes[j]]->start);
-					while(j+1<transfrag[keeptrf[i].t]->nodes.Count() && no2gnode[transfrag[keeptrf[i].t]->nodes[j]]->end+1==no2gnode[transfrag[keeptrf[i].t]->nodes[j+1]]->start)
-						j++;
-					fprintf(stderr,"\t%d\t1000\t-\t.\ttranscript_id \"STRG.%d.%.1f\n",no2gnode[transfrag[keeptrf[i].t]->nodes[j]]->end,keeptrf[i].t,transfrag[keeptrf[i].t]->abundance);
-					j++;
-				}
+
 			}
+			fprintf(stderr,"%d trflong:",trflong.Count());
+			for(int i=0;i<trflong.Count();i++)
+				fprintf(stderr," %d",trflong[i]);
+			fprintf(stderr,"\n");
 		}
 		*/
 
+
 		// add source/sink connections
 		for(int i=1;i<gno-1;i++) {
+			//fprintf(stderr,"i=%d hassource=%d addsource=%f hassink=%d addsink=%f\n",i,hassource[i],addsource[i],hassink[i],addsink[i]);
 			if(hassource[i]<0) {
-				if(addsource[i]) { // node i doesn't have source as parent
+				if(addsource[i] && no2gnode[i]->hardstart) {
+					/*float abund=trthr;
+					if(no2gnode[i]->hardstart) { // node i doesn't have source as parent but it should
+						abund=addsource[i];
+					}*/
+					// else I am not that confident this is a start
 					no2gnode[i]->parent.Insert(0,zero);
 					no2gnode[0]->child.Add(i);
 					GVec<int> nodes;
 					nodes.Add(source);
 					nodes.Add(i);
+					//CTransfrag *t=new CTransfrag(nodes,allpat,abund);
 					CTransfrag *t=new CTransfrag(nodes,allpat,addsource[i]);
 					t->pattern[source]=1;
 					t->pattern[i]=1;
@@ -4843,19 +5047,25 @@
 			}
 			else {
 				if(!keepsource[i]) { // it was a mistake to link this to source --> remove
-					//fprintf(stderr,"delete abundance of trf:");
-					//for(int j=0;j<transfrag[hassource[i]]->nodes.Count();j++) fprintf(stderr," %d",transfrag[hassource[i]]->nodes[j]);fprintf(stderr,"\n");
+					/*fprintf(stderr,"delete abundance of trf:");
+					for(int j=0;j<transfrag[hassource[i]]->nodes.Count();j++) fprintf(stderr," %d",transfrag[hassource[i]]->nodes[j]);fprintf(stderr,"\n");*/
 					transfrag[hassource[i]]->abundance=0;
 				}
-				/*else { // this one has source and it should be kept => update trf abundance to a more realistic value
-					//fprintf(stderr,"source %d:%d abd=%f addsource=%f\n",i,no2gnode[i]->start,transfrag[hassource[i]]->abundance,addsource[i]);
-					if(guidesource[i] && transfrag[hassource[i]]->abundance<addsource[i])
-						transfrag[hassource[i]]->abundance=addsource[i];
-				}*/
+				else { // this one has source and it should be kept => update trf abundance to a more realistic value
+					//fprintf(stderr,"source %d:%d abund=%f addsource=%f\n",i,no2gnode[i]->start,transfrag[hassource[i]]->abundance,addsource[i]);
+					//if(guidesource[i] && transfrag[hassource[i]]->abundance<addsource[i])
+					transfrag[hassource[i]]->abundance=addsource[i];
+				}
 			}
 
 			if(hassink[i]<0) {
-				if(addsink[i]) {
+				if(addsink[i] && no2gnode[i]->hardend) {
+					/*float abund=trthr;
+					if(no2gnode[i]->hardend) {
+						abund=addsink[i];
+					}*/
+					// else  not very confident about this end
+
 					no2gnode[i]->child.Add(sink);
 					no2gnode[sink]->parent.Add(i);
 					GVec<int> nodes;
@@ -4870,13 +5080,15 @@
 			}
 			else {
 				if(!keepsink[i]) { // it was a mistake to link this to sink --> remove
+					/*fprintf(stderr,"delete abundance of trf:");
+					for(int j=0;j<transfrag[hassink[i]]->nodes.Count();j++) fprintf(stderr," %d",transfrag[hassink[i]]->nodes[j]);fprintf(stderr,"\n");*/
 					transfrag[hassink[i]]->abundance=0;
 				}
-				/*else {
-					//fprintf(stderr,"sink %d:%d abd=%f addsource=%f\n",i,no2gnode[i]->end,transfrag[hassink[i]]->abundance,addsink[i]);
-					if(guidesink[i] && transfrag[hassink[i]]->abundance<addsink[i])
-						transfrag[hassink[i]]->abundance=addsink[i];
-				}*/
+				else {
+					//fprintf(stderr,"sink %d:%d abund=%f addsink=%f\n",i,no2gnode[i]->end,transfrag[hassink[i]]->abundance,addsink[i]);
+					//if(guidesink[i] && transfrag[hassink[i]]->abundance<addsink[i])
+					transfrag[hassink[i]]->abundance=addsink[i];
+				}
 			}
 		}
 	}
@@ -4916,7 +5128,7 @@
 	/*
 	{ // DEBUG ONLY
 		//printTime(stderr);
-		fprintf(stderr,"There are %d transfrags after sorting:\n",transfrag.Count());
+		fprintf(stderr,"\nThere are %d transfrags after sorting:\n",transfrag.Count());
 		for(int i=0;i<transfrag.Count();i++) {
 			fprintf(stderr,"transfrag[%d]:",i);
 			for(int j=0;j<transfrag[i]->nodes.Count();j++) fprintf(stderr," %d",transfrag[i]->nodes[j]);
@@ -4980,6 +5192,7 @@
 			else transfrag[t1]->real=true;
 
 		}
+		//else if(longreads) no2gnode[n1]->trf.Add(t1);
 		/*
 		else { // this transcript is included completely in node
 			no2gnode[transfrag[t1]->nodes[0]]->frag+=transfrag[t1]->abundance;
@@ -5933,7 +6146,7 @@
 	int tn=trnode.Count();
 	while(1) {
 		while(j<tn && trnode[j]<p) {
-			if(edgep && pathpattern[*edgep]) return false; // there is an edge between prevp<trnode[j] and p>trnode[j]
+			if(!prevp || (edgep && pathpattern[*edgep]) || p==gno-1) return false; // there is an edge between prevp<trnode[j] and p>trnode[j]; when prevp==0/gno-1 I might not have an edge there
 			if(!no2gnode[trnode[j]]->childpat[p]) {
 			        //fprintf(stderr,"Node p=%d cannot be reached from node=%d\n",p,trnode[j]);
 				return false; // I can not reach p from trnode[j]
@@ -6165,6 +6378,8 @@
 	/*
 	fprintf(stderr,"Children of node %d with maxpath=%d are:",i,maxpath);
 	for(int c=0;c<nchildren;c++) fprintf(stderr," %d",inode->child[c]);
+	fprintf(stderr," pathpat=");
+	printBitVec(pathpat);
 	fprintf(stderr,"\n");
 	*/
 
@@ -6181,11 +6396,16 @@
 	int tmax=-1;
 	bool exclude=false;
 
+	int nextnode=0; // nextnode on path
+	bool reach=false;
+	if(maxpath<=i) reach=true;
+
 	//int maxchild=inode->child[0];
 	//float maxchildcov=-1;
 	if(pathpat[i+1]) {
 		maxc=i+1;
 		tmax=-1;
+		reach=true;
 	}
 	else {
 		for(int c=0;c<nchildren;c++) {
@@ -6194,9 +6414,32 @@
 			if(pos && pathpat[*pos]) {
 				maxc=inode->child[c];
 				tmax=-1;
+				reach=true;
 				break;
 			}
 
+			// check if I can reach nextnode from c
+			if(maxpath>i) { // actually maxpath is >= i+2 otherwise I wouldn't get here
+				if(!nextnode) {
+					int j=i+2;
+					while(j<=maxpath) {
+						if(pathpat[j]) {
+							nextnode=j;
+							break;
+						}
+						j++;
+					}
+				}
+				if(!no2gnode[inode->child[c]]->childpat[nextnode]) {
+					continue;
+				}
+				reach=true;
+			}
+
+			/*if(no2gnode[maxpath]->hardend && pathpat[gno-1]) { // this is a trflong transcript
+				if(!pathpat[c]) continue;
+			}*/
+
 			float childcov=0;
 			int tchild=-1;
 			int endpath=maxpath;
@@ -6266,6 +6509,9 @@
 			}
 		}
 	}
+
+	if(!reach) return false;
+
 	if(maxc==-1) {
 		if(exclude && nodecov[i+1]) {
 			CGraphnode *cnode=no2gnode[i+1];
@@ -6346,6 +6592,8 @@
 	/*
 	fprintf(stderr,"Parents of node %d are:",i);
 	for(int p=0;p<nparents;p++) fprintf(stderr," %d",inode->parent[p]);
+	fprintf(stderr," pathpat=");
+	printBitVec(pathpat);
 	fprintf(stderr,"\n");
 	*/
 
@@ -6363,10 +6611,15 @@
 	int tmax=-1;
 	bool exclude=false;
 
+	int nextnode=gno; // nextnode on path
+	bool reach=false;
+	if(minpath>=i) reach=true;
+
 	// check if adjacent parent is on the path
 	if(pathpat[i-1]) {
 		maxp=i-1;
 		tmax=-1;
+		reach=true;
 	}
 	else {
 		//int maxparent=inode->parent[0];
@@ -6377,9 +6630,31 @@
 			if(pos && pathpat[*pos]) { // this is next child on the path
 				maxp=inode->parent[p];
 				tmax=-1;
+				reach=true;
 				break;
 			}
 
+			// check if I can reach nextnode from p
+			if(minpath<i) { // actually minpath is <= i-2 otherwise I wouldn't get here
+				if(nextnode==gno) {
+					int j=i-2;
+					while(j>=minpath) {
+						if(pathpat[j]) {
+							nextnode=j;
+							break;
+						}
+						j++;
+					}
+				}
+				if(!no2gnode[inode->parent[p]]->parentpat[nextnode]) {
+					continue;
+				}
+				reach=true;
+			}
+
+			/*if(no2gnode[minpath]->hardstart && pathpat[gno-1]) { // this is a trflong transcript
+				if(!pathpat[p]) continue;
+			}*/
 
 			float parentcov=0;
 			int tpar=-1;
@@ -6450,6 +6725,9 @@
 			}
 		}
 	}
+
+	if(!reach) return false;
+
 	if(maxp==-1) {
 		if(exclude && nodecov[i-1]) {
 			CGraphnode *pnode=no2gnode[i-1];
@@ -7317,6 +7595,7 @@
 	t->abundance-=val;
 	if(t->abundance<epsilon) t->abundance=0;
 	for(int j=start;j<t->nodes.Count()-1;j++) { // for all nodes but the last
+		//fprintf(stderr," %d",t->nodes[j]);
 		int i=node2path[t->nodes[j]];
 		capacity[i]+=val;
 	}
@@ -7448,7 +7727,7 @@
 		int nt=no2gnode[path[i]]->trf.Count();
 		for(int j=0;j<nt;j++) {
 			int t=no2gnode[path[i]]->trf[j];
-			if(transfrag[t]->abundance && (istranscript[t] || ((pathpat & transfrag[t]->pattern)==transfrag[t]->pattern))) {
+			if(transfrag[t]->abundance && (istranscript[t] || ((pathpat & transfrag[t]->pattern)==transfrag[t]->pattern))) { // this needs to be adjusted
 				istranscript[t]=1;
 				if(transfrag[t]->nodes[0]==path[i]) { // transfrag starts at this node
 					int n1=i;
@@ -7564,61 +7843,263 @@
 	return(flux);
 }
 
+float long_max_flow(int gno,GVec<int>& path,GBitVec& istranscript,GPVec<CTransfrag>& transfrag,GPVec<CGraphnode>& no2gnode,
+		GVec<float>& nodecapacity,GBitVec& pathpat) {
 
-// version of push_max_flow where I weight the incomplete transfrags
-float push_max_flow(int gno,GVec<int>& path,GBitVec& istranscript,GPVec<CTransfrag>& transfrag,GPVec<CGraphnode>& no2gnode,
-		GVec<float>& nodeflux,GBitVec& pathpat, GIntHash<int> &gpos, bool &full) {
-
+	float flux=0;
 	int n=path.Count();
+	GVec<float> *capacity=new GVec<float>[n]; // capacity of edges in network
+	GVec<float> *flow=new GVec<float>[n]; // flow in network
+	GVec<int> *link=new GVec<int>[n]; // for each node remembers it's neighbours
+	GVec<int> pred; // this stores the augmenting path
+	pred.Resize(n,-1);
 	GVec<int> node2path;
+	GVec<float> noderate;
 	node2path.Resize(gno,-1);
-	for(int i=0;i<n;i++) {
-		node2path[path[i]]=i;
-		nodeflux.cAdd(0.0);
-	}
-	GVec<float> capacityleft;   // how many transcripts compatible to path enter node
-	GVec<float> capacityright;  // how many transcripts compatible to path exit node
-	capacityleft.Resize(n,0);
-	capacityright.Resize(n,0);
-	GVec<float> sumleft;        // how many transcripts enter node
-	GVec<float> sumright;       // how many transcripts exit node
-	sumleft.Resize(n,0);
-	sumright.Resize(n,0);
-
-	//bool full=true;
-	//if(longreads && path.Count()>3) full=false;
 
 	/*
 	{ // DEBUG ONLY
-		//printTime(stderr);
-		fprintf(stderr,"Start push max flow algorithm for path ");
-		//printBitVec(pathpat);
+		printTime(stderr);
+		fprintf(stderr,"Start max flow algorithm for path ");
+		printBitVec(pathpat);
 		fprintf(stderr," :");
 		for(int i=0;i<n;i++) fprintf(stderr," %d:%d",i,path[i]);
 		fprintf(stderr,"\n");
-		//fprintf(stderr,"Used transcripts:");
-		//for(int i=0;i<transfrag.Count();i++) if(istranscript[i]) fprintf(stderr," %d(%f)",i,transfrag[i]->abundance);
-		//fprintf(stderr,"\n");
+		fprintf(stderr,"Used transcripts:");
+		for(int i=0;i<transfrag.Count();i++) if(istranscript[i]) fprintf(stderr," %d",i);
+		fprintf(stderr,"\n");
 	}
 	*/
 
-	// compute capacities and sums for all nodes
-	for(int i=1;i<n-1;i++) {
+	for(int i=0;i<n;i++) {
+		node2path[path[i]]=i;
+		nodecapacity.cAdd(0.0);
+		noderate.cAdd(1.0); // I set up all rates to be 1 for now
+		capacity[i].Resize(n,0);
+		flow[i].Resize(n,0);
+	}
+
+	float max_fl=0;
+
+	// establish capacities in the network
+	for(int i=0;i<n;i++) {
 		int nt=no2gnode[path[i]]->trf.Count();
+		float sumleft=0;
+		float sumright=0;
 		for(int j=0;j<nt;j++) {
 			int t=no2gnode[path[i]]->trf[j];
-			if(transfrag[t]->abundance) {
-				bool keeptr=false;
-				if(istranscript[t]) keeptr=true;
-				else if(!transfrag[t]->nodes[0]) {
-					if(transfrag[t]->nodes.Last()==path[1]) keeptr=true;
-				}
-				else if(transfrag[t]->nodes.Last()==path.Last()) {
-					if(transfrag[t]->nodes[0]==path[n-2]) keeptr=true;
+			if(transfrag[t]->nodes[0]==path[i] && transfrag[t]->abundance && (istranscript[t] || ((pathpat & transfrag[t]->pattern)==transfrag[t]->pattern))) {
+				bool keeptr=true;
+				if(i==0) max_fl=transfrag[t]->abundance; // this is the flow from source on this path
+				else {
+					int ti=1;
+					int pi=i+1;
+					int lenp=0;
+					while(ti<transfrag[t]->nodes.Count()) {
+						if(path[pi]!=transfrag[t]->nodes[ti]) { // I found a gap in transfrag => I need to check if it's not too big and can not support path
+							// node on path is coming before transfrag; otherwise I wouldn't have the match above
+							lenp+=no2gnode[path[pi]]->len();
+							if(lenp>CHI_WIN) {
+								keeptr=false;
+								break;
+							}
+						}
+						ti++;
+						pi++;
+					}
 				}
-				else if((pathpat & transfrag[t]->pattern)==transfrag[t]->pattern) {
+				if(keeptr) {
+					istranscript[t]=1;
+					int n1=i;
+					int n2=node2path[transfrag[t]->nodes.Last()];
+					//fprintf(stderr,"t=%d n1=%d n2=%d(%d) ",t,n1,n2,transfrag[t]->nodes.Last());
+					if(!capacity[n1][n2]) { // haven't seen this link before
+						link[n1].Add(n2);
+						link[n2].Add(n1);
+					}
+					capacity[n1][n2]+=transfrag[t]->abundance;
+					//fprintf(stderr,"capacity[%d][%d]=%f after adding transfrag[%d]->abundance=%f\n",n1,n2,capacity[n1][n2],t,transfrag[t]->abundance);
+				}
+			}
+			if(i>1 && i<n-2) { // rate needs to stay 1 for nodes next to source and sink
+				if(path[i]==transfrag[t]->nodes[0]) sumright+=transfrag[t]->abundance; // how many transfrags exit node
+				if(path[i]==transfrag[t]->nodes.Last()) sumleft+=transfrag[t]->abundance; // how many transfrags enter node
+			}
+		}
+		if(sumright && sumleft) {
+			noderate[i]=sumright/sumleft;
+		}
+	}
+
+	/*
+	fprintf(stderr,"Used transcripts:");
+	for(int i=0;i<transfrag.Count();i++) if(istranscript[i]) fprintf(stderr," %d(%f)",i,transfrag[i]->abundance);
+	fprintf(stderr,"\n");
+	*/
+
+	for(int i=0;i<n;i++) link[i].Sort();
+
+	GVec<float> rate;
+	rate.Resize(n,1);
+
+	while(bfs(n,capacity,flow,link,pred)) {
+		int r=0;
+		float increment=max_fl;
+		rate[r++]=1;
+		for(int u=n-1;pred[u]>=0;u=pred[u]) {
+			float adjflux=(capacity[pred[u]][u]-flow[pred[u]][u])*rate[r-1];
+			increment = increment < adjflux ? increment : adjflux; // minimum flux increment on the path
+			if(pred[pred[u]]>=0) {
+				if(pred[u]<u) {
+					if(pred[pred[u]]<pred[u]) rate[r]=rate[r-1]*noderate[pred[u]]; //no2gnode[path[pred[u]]]->rate;
+					else rate[r]=rate[r-1];
+				}
+				else {
+					if(pred[pred[u]]<pred[u]) rate[r]=rate[r-1];
+					else rate[r]=rate[r-1]/noderate[pred[u]];  //no2gnode[path[pred[u]]]->rate;
+				}
+				r++;
+			}
+		}
+		r=0;
+		for(int u=n-1;pred[u]>=0;u=pred[u]) {
+			flow[pred[u]][u]+=increment/rate[r];
+			flow[u][pred[u]]-=increment/rate[r];
+			r++;
+		}
+		flux+=increment;
+	}
+
+	/*
+	{ // DEBUG ONLY
+		fprintf(stderr,"Flow:");
+		for(int n1=0;n1<n;n1++)
+			for(int n2=n1+1;n2<n;n2++) if(flow[n1][n2]) fprintf(stderr," [%d][%d]=%f",n1,n2,flow[n1][n2]);
+		fprintf(stderr,"\n");
+	}
+	*/
+
+	// adjust transfrag abundances
+	for(int i=0;i<n;i++) {
+		int nt=no2gnode[path[i]]->trf.Count();
+		float sumout=0;
+		int pos=-1;
+		for(int j=0;j<nt;j++) {
+			int t=no2gnode[path[i]]->trf[j];
+			if(istranscript[t] && transfrag[t]->abundance) {
+				if(transfrag[t]->nodes[0]==path[i]) { // transfrag starts at this node
+					int n1=i;
+					int n2=node2path[transfrag[t]->nodes.Last()];
+					if(flow[n1][n2]>0) {
+						if(flow[n1][n2]<transfrag[t]->abundance) {
+							if(!i) sumout+=flow[n1][n2];
+							//fprintf(stderr,"Update capacity of transfrag[%d] with value %f to %f\n",t,transfrag[t]->abundance, transfrag[t]->abundance-flow[n1][n2]);
+							update_capacity(0,transfrag[t],flow[n1][n2],nodecapacity,node2path);
+							//if(path[i] && transfrag[t]->nodes.Last()!=gno-1) fragno+=flow[n1][n2];
+							flow[n1][n2]=0;
+						}
+						else {
+							if(!i) sumout+=transfrag[t]->abundance;
+							flow[n1][n2]-=transfrag[t]->abundance;
+							//if(path[i] && transfrag[t]->nodes.Last()!=gno-1) fragno+=transfrag[t]->abundance;
+							//fprintf(stderr,"Update capacity of transfrag[%d] with value=%f to 0\n",t,transfrag[t]->abundance);
+							update_capacity(0,transfrag[t],transfrag[t]->abundance,nodecapacity,node2path);
+						}
+					}
+				}
+				else if(!i && transfrag[t]->nodes.Last()==path[i]) pos=j;
+			}
+		}
+		if(!i && pos>-1) { // this is first node -> adjust entering transfrag
+			int t=no2gnode[path[i]]->trf[pos];
+			float val=sumout/noderate[i]; //no2gnode[path[i]]->rate;
+			transfrag[t]->abundance-=val;
+			if(transfrag[t]->abundance<epsilon) transfrag[t]->abundance=0;
+		}
+	}
+
+	// clean up
+	delete [] capacity;
+	delete [] flow;
+	delete [] link;
+
+	return(flux);
+}
+
+
+// version of push_max_flow where I weight the incomplete transfrags
+float push_max_flow(int gno,GVec<int>& path,GBitVec& istranscript,GPVec<CTransfrag>& transfrag,GPVec<CGraphnode>& no2gnode,
+		GVec<float>& nodeflux,GBitVec& pathpat, GIntHash<int> &gpos, bool &full) {
+
+	int n=path.Count();
+	GVec<int> node2path;
+	node2path.Resize(gno,-1);
+	for(int i=0;i<n;i++) {
+		node2path[path[i]]=i;
+		nodeflux.cAdd(0.0);
+	}
+	GVec<float> capacityleft;   // how many transcripts compatible to path enter node
+	GVec<float> capacityright;  // how many transcripts compatible to path exit node
+	capacityleft.Resize(n,0);
+	capacityright.Resize(n,0);
+	GVec<float> sumleft;        // how many transcripts enter node
+	GVec<float> sumright;       // how many transcripts exit node
+	sumleft.Resize(n,0);
+	sumright.Resize(n,0);
+
+	//bool full=true;
+	//if(longreads && path.Count()>3) full=false;
+
+	/*
+	{ // DEBUG ONLY
+		//printTime(stderr);
+		fprintf(stderr,"Start push max flow algorithm for path ");
+		//printBitVec(pathpat);
+		fprintf(stderr," :");
+		for(int i=0;i<n;i++) fprintf(stderr," %d:%d",i,path[i]);
+		fprintf(stderr,"\n");
+		//fprintf(stderr,"Used transcripts:");
+		//for(int i=0;i<transfrag.Count();i++) if(istranscript[i]) fprintf(stderr," %d(%f)",i,transfrag[i]->abundance);
+		//fprintf(stderr,"\n");
+	}
+	*/
+
+	// compute capacities and sums for all nodes
+	for(int i=1;i<n-1;i++) {
+		int nt=no2gnode[path[i]]->trf.Count();
+		for(int j=0;j<nt;j++) {
+			int t=no2gnode[path[i]]->trf[j];
+			if(transfrag[t]->abundance) {
+				bool keeptr=false;
+				if(istranscript[t]) keeptr=true;
+				else if(!transfrag[t]->nodes[0]) {
+					if(transfrag[t]->nodes.Last()==path[1]) keeptr=true;
+				}
+				else if(transfrag[t]->nodes.Last()==path.Last()) {
+					if(transfrag[t]->nodes[0]==path[n-2]) keeptr=true;
+				}
+				else if(transfrag[t]->nodes[0]==path[i] && ((pathpat & transfrag[t]->pattern)==transfrag[t]->pattern)) { // only need to check transfrag the first time I encounter it
 					keeptr=true;
-					if(!full) { // check if transcript fully supports path (full is false means I have not found any transcript to fully support path)
+
+					if(longreads) { // an extremely gapped transcript should not be considered to support path (I am doing this for longreads but it might work for paired reads too
+						int ti=1;
+						int pi=i+1;
+						int lenp=0;
+						while(ti<transfrag[t]->nodes.Count()) {
+							if(path[pi]!=transfrag[t]->nodes[ti]) { // I found a gap in transfrag => I need to check if it's not too big and can not support path
+								// node on path is coming before transfrag; otherwise I wouldn't have the match above
+								lenp+=no2gnode[path[pi]]->len();
+								if(lenp>CHI_WIN) {
+									keeptr=false;
+									break;
+								}
+							}
+							ti++;
+							pi++;
+						}
+					}
+
+					if(keeptr && !full) { // check if transcript fully supports path (full is false means I have not found any transcript to fully support path)
 						full=true;
 						int p=1;
 						if(!transfrag[t]->longstart || !transfrag[t]->longend) full=false;
@@ -8564,7 +9045,6 @@
 
 
 
-
 float weight_max_flow(int gno,GVec<int>& path,GBitVec& istranscript,GPVec<CTransfrag>& transfrag,GPVec<CGraphnode>& no2gnode,
 		GVec<float>& nodecapacity,GBitVec& pathpat) {//,float& fragno) {
 
@@ -9005,15 +9485,422 @@
 	return(np);
 }
 
+int best_trf_match(CTransfrag *t,GVec<CTransfrag>& keeptrf,GPVec<CGraphnode>& no2gnode,int gno) {
+	int mineditdist=no2gnode[gno-2]->end-no2gnode[1]->start+1;
+	int mininternaldist=mineditdist;
+	int maxintersect=0;
+	int mink=-1;
+	for(int k=0;k<keeptrf.Count();k++) if(t->nodes[0]<=keeptrf[k].nodes[keeptrf[k].nodes.Count()-2] && keeptrf[k].nodes[1]<=t->nodes.Last()){
+		int editdist=0;
+		int internaldist=0;
+		int intersect=0;
+		int i=0; // t index
+		int j=1; // k index
+
+		while(j<keeptrf[k].nodes.Count()-1 && t->nodes[i]>keeptrf[k].nodes[j]) {
+			editdist+=no2gnode[keeptrf[k].nodes[j]]->len();
+			j++;
+		}
+
+		while(i<t->nodes.Count() && t->nodes[i]<keeptrf[k].nodes.Last() && j<keeptrf[k].nodes.Count()-1) {
+			if(t->nodes[i]<keeptrf[k].nodes[j]) {
+				editdist+=no2gnode[t->nodes[i]]->len();
+				if(j>1) internaldist+=no2gnode[t->nodes[i]]->len();
+				i++;
+			}
+			else if(t->nodes[i]>keeptrf[k].nodes[j]) {
+				editdist+=no2gnode[keeptrf[k].nodes[j]]->len();
+				internaldist+=no2gnode[keeptrf[k].nodes[j]]->len();
+				j++;
+			}
+			else { // nodes are equal
+				i++;
+				j++;
+				intersect++;
+			}
+		}
+
+		if(!intersect || internaldist>CHI_THR) continue;
+
+		if(intersect>maxintersect) {
+			mink=k;
+			while(i<t->nodes.Count()) { // i>=1 because t intersects k
+				editdist+=no2gnode[t->nodes[i]]->len();
+				if(no2gnode[t->nodes[i-1]]->end+1<no2gnode[t->nodes[i]]->start) internaldist+=no2gnode[t->nodes[i]]->len();
+				i++;
+			}
+			while(j<keeptrf[k].nodes.Count()-1) {
+				editdist+=no2gnode[keeptrf[k].nodes[j]]->len();
+				j++;
+			}
+			mineditdist=editdist;
+			mininternaldist=internaldist;
+			maxintersect=intersect;
+		}
+		else if(intersect==maxintersect) {
+			if(internaldist<mininternaldist) {
+				while(i<t->nodes.Count()) { // i>=1 because t intersects k
+					editdist+=no2gnode[t->nodes[i]]->len();
+					if(no2gnode[t->nodes[i-1]]->end+1<no2gnode[t->nodes[i]]->start) internaldist+=no2gnode[t->nodes[i]]->len();
+					i++;
+				}
+				while(j<keeptrf[k].nodes.Count()-1) {
+					editdist+=no2gnode[keeptrf[k].nodes[j]]->len();
+					j++;
+				}
+				if(internaldist<mininternaldist) {
+					mink=k;
+					mineditdist=editdist;
+					mininternaldist=internaldist;
+				}
+			}
+			else if(internaldist==mininternaldist && editdist<mineditdist) {
+				while(i<t->nodes.Count()) {
+					editdist+=no2gnode[t->nodes[i]]->len();
+					i++;
+				}
+				while(j<keeptrf[k].nodes.Count()-1) {
+					editdist+=no2gnode[keeptrf[k].nodes[j]]->len();
+					j++;
+				}
+				if(editdist<mineditdist) {
+					mink=k;
+					mineditdist=editdist;
+				}
+			}
+		}
+	}
+	return(mink);
+}
+
+void get_trf_long(int gno,int edgeno, GIntHash<int> &gpos,GPVec<CGraphnode>& no2gnode,GPVec<CTransfrag>& transfrag,
+		int& geneno,int strand,GList<CPrediction>& pred,GVec<int>& trflong,BundleData *bdata) {
+
+	GPVec<GffObj>& guides = bdata->keepguides;
+
+	GVec<float> nodecov;
+	GVec<float> noderate;
+	for(int i=0;i<gno;i++) {
+		CGraphnode *inode=no2gnode[i]; // this is here only because of the DEBUG option below
+		nodecov.cAdd(0.0); // this are all transfrags that link nodes together
+		float rate=1;
+		if(i && i<gno-1) {
+			for(int j=0;j<inode->trf.Count();j++) { // for all transfrags going through node
+				int t=inode->trf[j];
+				if(transfrag[t]->nodes[0]<i) // entering transfrags:
+					nodecov[i]+=transfrag[t]->abundance;
+			}
+			if(nodecov[i]) rate=nodecov[i];
+			if(rate<=0) rate=1; // this shouldn't happen
+			//fprintf(stderr,"rate=%f\n",rate);
+			rate=inode->cov/rate;
+		}
+		noderate.Add(rate);
+		//fprintf(stderr,"Node[%d] no2gnode->cov=%f nodecov=%f noderate=%f\n",i,inode->cov,nodecov[i],noderate[i]);
+	}
+
+	GBitVec istranscript(transfrag.Count());
+
+	 GVec<int> path;
+	 GBitVec pathpat(gno+edgeno);
+	 int minp;
+	 int maxp;
+	 int maxi;
+
+	 char sign='-';
+	 if(strand) { sign='+';}
+	 int npred=pred.Count();
+
+	 GVec<CTransfrag> keeptrf;
+	 GVec<int> checktrf;
+	 for(int f=trflong.Count()-1;f>=0;f--) { // if this is a guide it should be reflected in the prediction downstream
+		 path.Clear();
+		 int t=trflong[f];
+		 pathpat=transfrag[t]->pattern;
+		 minp=transfrag[t]->nodes[0];
+		 maxp=transfrag[t]->nodes.Last();
+		 //if(no2gnode[transfrag[t]->nodes[0]]->hardstart) {
+			 int *pos=gpos[edge(0,minp,gno)];
+			 if(pos) pathpat[*pos]=1;
+			 //minp=0;
+		 //}
+		 //if(no2gnode[transfrag[t]->nodes.Last()]->hardend) {
+			 pos=gpos[edge(maxp,gno-1,gno)];
+			 if(pos) pathpat[*pos]=1;
+			 //maxp=gno-1;
+		 //}
+		 maxi=minp;
+		 path.Add(maxi);
+		 pathpat[maxi]=1;
+
+		 istranscript.reset();
+
+		 float flux=0;
+		 //float fragno=0;
+		 GVec<float> nodeflux;
+
+		 /*
+	 	 { // DEBUG ONLY
+	 	 fprintf(stderr,"\n\n***Start parse_trf with maxi=%d and transcript:",maxi);
+	 	 for(int i=0;i<transfrag[t]->nodes.Count();i++) fprintf(stderr," %d",transfrag[t]->nodes[i]);
+	 	 fprintf(stderr," pathpat=");
+	 	 printBitVec(pathpat);
+		 fprintf(stderr,"\n");
+
+#ifdef GMEMTRACE
+	 	 double vm,rsm;
+	 	 get_mem_usage(vm, rsm);
+	 	 GMessage("\t\tM(s):parse_trf memory usage: rsm=%6.1fMB vm=%6.1fMB\n",rsm/1024,vm/1024);
+#endif
+	 	 }
+	 	 */
+
+		 bool tocheck=true;
+		 if(back_to_source_fast_long(maxi,path,minp,maxp,pathpat,transfrag,no2gnode,nodecov,gno,gpos)) {
+			 path.cAdd(0);
+			 path.Reverse(); // back to source adds the nodes at the end to avoid pushing the list all the time
+
+			 if(fwd_to_sink_fast_long(maxi,path,minp,maxp,pathpat,transfrag,no2gnode,nodecov,gno,gpos)) {
+
+				 flux=long_max_flow(gno,path,istranscript,transfrag,no2gnode,nodeflux,pathpat);
+
+				 /*
+				 { // DEBUG ONLY
+					 //printTime(stderr);
+					 fprintf(stderr,"flux=%g Path:",flux);
+					 for(int i=0;i<path.Count();i++) fprintf(stderr," %d",path[i]);
+					 fprintf(stderr,"\n");
+					 fprintf(stderr,"Nodecapacities:");
+					 for(int i=0;i<path.Count();i++) fprintf(stderr," %f",nodeflux[i]);
+					 fprintf(stderr,"***\n");
+				 }
+				 */
+
+				 if(flux) { // these are not valid paths in the graph
+					 tocheck=false;
+
+					 GVec<GSeg> exons;
+					 GVec<float> exoncov;
+					 int j=1;
+					 int len=0;
+					 float cov=0;
+					 while(j<path.Count()-1) {
+						 int nodestart=no2gnode[path[j]]->start;
+						 int nodeend=no2gnode[path[j]]->end;
+						 nodecov[path[j]]-=nodeflux[j];
+						 len+=nodeend-nodestart+1;
+						 float excov=nodeflux[j]*noderate[path[j]];
+						 while(j+1<path.Count()-1 && no2gnode[path[j]]->end+1==no2gnode[path[j+1]]->start) {
+							 j++;
+							 len+=no2gnode[path[j]]->len();
+							 nodeend=no2gnode[path[j]]->end;
+							 excov+=nodeflux[j]*noderate[path[j]];
+						 }
+						 GSeg exon(nodestart,nodeend);
+						 exons.Add(exon);
+						 cov+=excov;
+						 exoncov.Add(excov);
+						 j++;
+					 }
+					 if(transfrag[t]->nodes.Count()==1) transfrag[t]->abundance=0;
+					 //fprintf(stderr,"Store prediction %d  with abundance=%f len=%d\n",pred.Count(),cov/len,len);
+					 GffObj *g=NULL;
+					 if(transfrag[t]->guide) {
+						 g=guides[int(transfrag[t]->usepath)];
+						 if (g && g->uptr) {
+							 RC_TData &td = *(RC_TData*) (g->uptr);
+							 td.in_bundle=3;
+							 //fprintf(stderr,"sg guide %s is stored\n",g->getID());
+						 }
+					 }
+					 CPrediction *p=new CPrediction(geneno, g,exons[0].start , exons.Last().end, cov, sign, len);
+					 p->exons=exons;
+					 p->exoncov=exoncov;
+					 p->mergename='.'; // I should not delete this prediction
+					 pred.Add(p);
+
+					 CTransfrag u(path,pathpat,cov/len);
+					 keeptrf.Add(u);
+				 }
+				 else if(transfrag[t]->guide) checktrf.Add(t);
+			 }
+		 }
+
+		 if(tocheck)  // try to see if you can rescue transfrag -> they are stored from more abundant to least
+			 checktrf.Add(t);
+	 }
+
+	 //keeptrf.Sort(longtrCmp); // most abundant transfrag in the graph come first, then the ones with most nodes, then the ones more complete
+
+	 for(int c=0;c<checktrf.Count();c++) if(transfrag[checktrf[c]]->guide || transfrag[checktrf[c]]->abundance>=readthr) { // only in this case it is worth considering it as a potential prediction
+		 int t=checktrf[c];
+		 int mink=best_trf_match(transfrag[t],keeptrf,no2gnode,gno);
+		 if(mink>=0) { // found good match
+			 int p=0;
+			 int i=0;
+			 int np=npred+mink;
+			 while(i<transfrag[t]->nodes.Count() && p<pred[np]->exons.Count()) {
+				 if(no2gnode[transfrag[t]->nodes[i]]->end<pred[np]->exons[p].start) i++;
+				 else if(pred[np]->exons[p].end<no2gnode[transfrag[t]->nodes[i]]->start) p++;
+				 else { // the two intersect (I can only have the full node included in exon)
+					 float addcov=transfrag[t]->abundance*noderate[transfrag[t]->nodes[i]];
+					 pred[np]->exoncov[p]+=transfrag[t]->abundance*noderate[transfrag[t]->nodes[i]];
+					 pred[np]->cov+=addcov;
+					 i++;
+				 }
+			 }
+		 }
+		 else { // store it as an independent prediction
+			 pathpat=transfrag[t]->pattern; // not used right now but maybe in the future?
+			 path.Clear();
+			 path.cAdd(0);
+			 path.Add(transfrag[t]->nodes[0]);
+			 for(int j=1;j<transfrag[t]->nodes.Count();j++) {
+				 if(transfrag[t]->nodes[j]!=1+transfrag[t]->nodes[j-1] ||
+						 no2gnode[transfrag[t]->nodes[j]]->start-1!=no2gnode[transfrag[t]->nodes[j-1]]->end) {
+					 // check if transfrag t1 is incomplete between node[n-1] and node [n]
+					 int *pos=gpos[edge(transfrag[t]->nodes[j-1],transfrag[t]->nodes[j],gno)];
+					 if(!pos || !transfrag[t]->pattern[*pos]) { // incomplete transfrag
+						 break;
+					 }
+					 if(pos) pathpat[*pos]=1;
+					 path.Add(transfrag[t]->nodes[j]);
+				 }
+				 else path.Add(transfrag[t]->nodes[j]);
+			 }
+			 if(path.Last()==transfrag[t]->nodes.Last()) { // this transfrag is complete, might be worth rescuing
+				 int sink=gno-1;
+				 path.Add(sink);
+
+				 GVec<GSeg> exons;
+				 GVec<float> exoncov;
+				 int j=1;
+				 int len=0;
+				 float cov=0;
+				 while(j<path.Count()-1) {
+					 int nodestart=no2gnode[path[j]]->start;
+					 int nodeend=no2gnode[path[j]]->end;
+					 nodecov[path[j]]-=transfrag[t]->abundance;
+					 len+=nodeend-nodestart+1;
+					 float excov=transfrag[t]->abundance*noderate[path[j]];
+					 while(j+1<path.Count()-1 && no2gnode[path[j]]->end+1==no2gnode[path[j+1]]->start) {
+						 j++;
+						 len+=no2gnode[path[j]]->len();
+						 nodeend=no2gnode[path[j]]->end;
+						 excov+=transfrag[t]->abundance*noderate[path[j]];
+					 }
+					 GSeg exon(nodestart,nodeend);
+					 exons.Add(exon);
+					 cov+=excov;
+					 exoncov.Add(excov);
+					 j++;
+				 }
+				 GffObj *g=NULL;
+				 if(transfrag[t]->guide) {
+					 g=guides[int(transfrag[t]->usepath)];
+					 if (g && g->uptr) {
+						 RC_TData &td = *(RC_TData*) (g->uptr);
+						 td.in_bundle=3;
+						 //fprintf(stderr,"sg guide %s is stored\n",g->getID());
+					 }
+				 }
+				 //fprintf(stderr,"Store prediction %d  with abundance=%f\n",pred.Count(),cov/len);
+				 CPrediction *p=new CPrediction(geneno, NULL,exons[0].start , exons.Last().end, cov, sign, len);
+				 p->exons=exons;
+				 p->exoncov=exoncov;
+				 pred.Add(p);
+
+				 CTransfrag u(path,pathpat,cov/len);
+				 keeptrf.Add(u);
+			 }
+		 }
+		 transfrag[t]->abundance=0;
+	 }
+
+	 for(int t=0;t<transfrag.Count();t++) {
+		 if(transfrag[t]->abundance>epsilon && transfrag[t]->nodes[0] && transfrag[t]->nodes.Last()!=gno-1 ) {
+			 /*fprintf(stderr,"Consider transfrag[%d]->abundance=%f with nodes:",t,transfrag[t]->abundance);
+		 	 for(int i=0;i<transfrag[t]->nodes.Count();i++) fprintf(stderr," %d",transfrag[t]->nodes[i]);
+			 fprintf(stderr,"\n");*/
+			 int mink=best_trf_match(transfrag[t],keeptrf,no2gnode,gno);
+
+			 if(mink>=0) { // mink gives the prediction's position too
+				 /*fprintf(stderr,"Added to prediction:");
+			 	 for(int i=0;i<keeptrf[mink].nodes.Count();i++) fprintf(stderr," %d",keeptrf[mink].nodes[i]);*/
+				 int p=0;
+				 int i=0;
+				 int np=npred+mink;
+				 while(i<transfrag[t]->nodes.Count() && p<pred[np]->exons.Count()) {
+					 if(no2gnode[transfrag[t]->nodes[i]]->end<pred[np]->exons[p].start) i++;
+					 else if(pred[np]->exons[p].end<no2gnode[transfrag[t]->nodes[i]]->start) p++;
+					 else { // the two intersect (I can only have the full node included in exon)
+						 float addcov=transfrag[t]->abundance*noderate[transfrag[t]->nodes[i]];
+						 pred[np]->exoncov[p]+=transfrag[t]->abundance*noderate[transfrag[t]->nodes[i]];
+						 pred[np]->cov+=addcov;
+						 i++;
+					 }
+				 }
+				 //fprintf(stderr," new abundance=%f\n",pred[np]->cov/pred[np]->tlen);
+			 }
+		 }
+	 }
+
+	 for(int p=npred;p<pred.Count();p++) {
+		 if(pred[p]->cov) {
+			 pred[p]->cov/=pred[p]->tlen;
+			 for(int i=0;i<pred[p]->exons.Count();i++)
+				 pred[p]->exoncov[i]/=pred[p]->exons[i].len();
+		 }
+		 else pred.Delete(p);
+	 }
+
+}
+
 
 void parse_trf_long(int maxi,int gno,int edgeno, GIntHash<int> &gpos,GPVec<CGraphnode>& no2gnode,GPVec<CTransfrag>& transfrag,
 		int& geneno,bool first,int strand,GList<CPrediction>& pred,GVec<float>& nodecov,
-		GBitVec& istranscript,GBitVec& removable,GBitVec& usednode,float maxcov,GBitVec& prevpath) {
+		GBitVec& istranscript,GBitVec& removable,GBitVec& usednode,float maxcov,GBitVec& prevpath,GVec<int>& trflong) {
 
 	 GVec<int> path;
-	 path.Add(maxi);
 	 GBitVec pathpat(gno+edgeno);
+	 int minp=maxi;
+	 int maxp=maxi;
+
+	 bool usetrflong=false;
+
+	 if(trflong.Count()) {
+		 /**** this version still maintains maxi in the center
+		 int i=trflong.Count()-1;
+		 int t=-1;
+		 while(i>=0) {
+			 if(transfrag[trflong[i]]->pattern[maxi]) {
+				 t=trflong[i];
+				 trflong.Delete(i);
+				 fprintf(stderr,"found transcript t=%d\n",t);
+				 break;
+			 }
+			 i--;
+		 }
+		 if(t<0) {****/
+		 int t=trflong.Pop();
+		 //}
+		 usetrflong=true;
+		 pathpat=pathpat | transfrag[t]->pattern;
+		 minp=transfrag[t]->nodes[0];
+		 maxp=transfrag[t]->nodes.Last();
+		 int *pos=gpos[edge(0,minp,gno)];
+		 if(pos) pathpat[*pos]=1;
+		 pos=gpos[edge(maxp,gno-1,gno)];
+		 if(pos) pathpat[*pos]=1;
+		 maxi=minp;
+	 }
+	 //else return;
+
+	 path.Add(maxi);
 	 pathpat[maxi]=1;
+
+
+	 bool full=false;
 	 istranscript.reset();
 
 	 float flux=0;
@@ -9035,9 +9922,6 @@
 	 }
 	*/
 
-	 int minp=maxi;
-	 int maxp=maxi;
-	 bool full=false;
 
 	 if(back_to_source_fast_long(maxi,path,minp,maxp,pathpat,transfrag,no2gnode,nodecov,gno,gpos)) {
 		 if(includesource) path.cAdd(0);
@@ -9047,14 +9931,14 @@
 
 			 flux=push_max_flow(gno,path,istranscript,transfrag,no2gnode,nodeflux,pathpat,gpos,full);
 
-			 /*
+			 	 /*
 	 			 { // DEBUG ONLY
 	 				 //printTime(stderr);
 	 				 fprintf(stderr,"flux=%g Path:",flux);
 	 				 for(int i=0;i<path.Count();i++) fprintf(stderr," %d",path[i]);
 	 				 fprintf(stderr,"***\n");
 	 			 }
-			 */
+	 			 */
 
 	 		}
 			/*else {
@@ -9082,11 +9966,7 @@
 		 }
 		 */
 
-		 if(included || cov<isofrac*maxcov) {
-			 /*
-			 if(sensitivitylevel) usednode[maxi]=1;
-			 else usednode = usednode | prevpath;
-			 */
+		 if(!usetrflong && (included || cov<isofrac*maxcov)) { // maybe not use it for long reads if
 			 //fprintf(stderr,"included\n");
 			 usednode[maxi]=1;
 			 //fprintf(stderr,"used maxi=%d\n",maxi);
@@ -9144,7 +10024,7 @@
 
 		 path.Clear();
 		 nodeflux.Clear();
-		 parse_trf_long(maxi,gno,edgeno,gpos,no2gnode,transfrag,geneno,first,strand,pred,nodecov,istranscript,removable,usednode,maxcov,prevpath);
+		 parse_trf_long(maxi,gno,edgeno,gpos,no2gnode,transfrag,geneno,first,strand,pred,nodecov,istranscript,removable,usednode,maxcov,prevpath,trflong);
 	 }
 
 }
@@ -9463,10 +10343,10 @@
 		}
 	}
 
-	if(longreads && guidetrf.Count()) { // if a guide is not matching any transcript the best do not include
+	/*if(longreads && guidetrf.Count()) { // if a guide is not matching any transcript the best do not include TODO: rewrite this!!
 		int ng=guidetrf.Count();
 		GVec<bool> covered(ng,false);
-		GVec<int> gcount;
+		GVec<int> gcount; // count number of bits set in each guide's pattern
 		for(int g=0;g<guidetrf.Count();g++) {
 			int gc=guidetrf[g].trf->pattern.count();
 			gcount.Add(gc);
@@ -9485,7 +10365,7 @@
 					maxcovscore=covscore;
 					//mingscore=gcount[g]-covscore;
 				}
-				/*else if(covscore==maxcovscore){
+				else if(covscore==maxcovscore){
 					int gcov=gcount[g]-covscore;
 					if(gcov<mingscore) {
 						mingscore=gcov;
@@ -9494,7 +10374,7 @@
 					else if(gcov==mingscore && !covered[maxg] && covered[g]) {
 						maxg=g;
 					}
-				}*/
+				}
 				//fprintf(stderr," and guidetrf=%d with gcount=%d has covscore=%d now: mingscore=%d maxg=%d\n",g,gcount[g],covscore,mingscore,maxg);
 			}
 			if(maxg>=0 && !covered[maxg]) {
@@ -9509,7 +10389,7 @@
 				guidetrf.Delete(g);
 			}
 		}
-	}
+	}*/
 
 	// print guide coverages
 	if(c_out) {
@@ -9573,7 +10453,7 @@
 
 	// **** NEXT PORTION NEEDS TO BE CHECKED!!!
 
-	guidetrf.Sort(guideCmp); //longest guide comes first
+	guidetrf.Sort(guideCmp); // guide with most bits set in pattern comes first
 	uint refstart=(uint)bdata->start;
 	int g=0;
 	while(g<guidetrf.Count()) {
@@ -9606,10 +10486,11 @@
 				pos=gpos[key];
 			}
 
-			if(includesource)
+			if(includesource && !longreads) {
 				guidetrf[g].trf->nodes.Insert(0,0); // I need to comment this if I need path not to include the source
-			guidetrf[g].trf->pattern[0]=1;
-			guidetrf[g].trf->pattern[*pos]=1;
+				guidetrf[g].trf->pattern[0]=1;
+				guidetrf[g].trf->pattern[*pos]=1;
+			}
 
 			bool sourcestart=false; // assume there is no extension to source
 			CGraphnode *inode=no2gnode[nodei];
@@ -9672,8 +10553,6 @@
 		nodei=guidetrf[g].trf->nodes.Last();
 		if(nodei != gno-1) { // sink is not already present in guide pattern
 			int sink=gno-1;
-			guidetrf[g].trf->nodes.Add(sink);
-			guidetrf[g].trf->pattern[sink]=1;
 			int *pos=gpos[edge(nodei,sink,gno)];
 			if(!pos) {
 				//fprintf(stderr,"Add sink link from position %d for guide=%d\n",no2gnode[nodei]->end,guidetrf[g].g);
@@ -9682,7 +10561,11 @@
 				lastgpos++;
 				pos=gpos[key];
 			}
-			guidetrf[g].trf->pattern[*pos]=1;
+			if(!longreads) {
+				guidetrf[g].trf->nodes.Add(sink);
+				guidetrf[g].trf->pattern[sink]=1;
+				guidetrf[g].trf->pattern[*pos]=1;
+			}
 
 			bool sinkend=false;
 			CGraphnode *inode=no2gnode[nodei];
@@ -9744,8 +10627,6 @@
 	}
 
 
-
-
 }
 
 bool is_reference_transcript(GVec<CGuide>& guidetrf,GBitVec& pattern) {
@@ -10472,14 +11353,15 @@
 }
 
 
-int find_transcripts(int gno,int edgeno, GIntHash<int> &gpos,GPVec<CGraphnode>& no2gnode,GPVec<CTransfrag>& transfrag,
-		int geneno,int strand,GVec<CGuide>& guidetrf,GList<CPrediction>& pred,GPVec<GffObj>& guides,GVec<int>& guidepred,BundleData* bdata) {
+int find_transcripts(int gno,int edgeno, GIntHash<int> &gpos,GPVec<CGraphnode>& no2gnode,GPVec<CTransfrag>& transfrag,int geneno,int strand,
+		GVec<CGuide>& guidetrf,GPVec<GffObj>& guides,GVec<int>& guidepred,BundleData* bdata,GVec<int>& trflong) {
+
+	GList<CPrediction>& pred = bdata->pred;
 
 	// process in and out coverages for each node
 	int maxi=0; // node with maximum coverage
 	GVec<float> nodecov; // node coverages
 
-
 	for(int i=0;i<gno;i++) {
 		CGraphnode *inode=no2gnode[i]; // this is here only because of the DEBUG option below
 		nodecov.cAdd(0.0);
@@ -10540,27 +11422,30 @@
 	bool first=true;
 
 	//fprintf(stderr,"guide count=%d\n",guidetrf.Count());
-
-	if(eonly || guidetrf.Count()) maxi=guides_pushmaxflow(gno,edgeno,gpos,no2gnode,transfrag,guidetrf,geneno,strand,pred,nodecov,istranscript,pathpat,first,guides,guidepred,bdata);
+	if(longreads) get_trf_long(gno,edgeno, gpos,no2gnode,transfrag,geneno,strand,pred,trflong,bdata);
+	else {
+		if(eonly || guidetrf.Count()) maxi=guides_pushmaxflow(gno,edgeno,gpos,no2gnode,transfrag,guidetrf,geneno,strand,pred,nodecov,istranscript,pathpat,first,guides,guidepred,bdata);
 
 
-	if(nodecov[maxi]>=1) { // sensitive mode only; otherwise >=readthr
+		if(nodecov[maxi]>=1) { // sensitive mode only; otherwise >=readthr
 
-		// process rest of the transfrags
-		if(!eonly && nodecov[maxi]>=1) {
-			GBitVec removable(transfrag.Count(),true);
+			// process rest of the transfrags
+			if(!eonly && nodecov[maxi]>=1) {
+				GBitVec removable(transfrag.Count(),true);
+
+				// 1:
+				// parse_trf_weight_max_flow(gno,no2gnode,transfrag,geneno,strand,pred,nodecov,pathpat);
+				// 2:
+				GBitVec usednode(gno+edgeno);
+				/*if(longreads) {
+					get_trf_long(gno,edgeno, gpos,no2gnode,transfrag,geneno,strand,pred,trflong);
+					//parse_trf_long(maxi,gno,edgeno,gpos,no2gnode,transfrag,geneno,first,strand,pred,nodecov,istranscript,removable,usednode,0,pathpat,trflong);
+				}
+				else {*/
+					parse_trf(maxi,gno,edgeno,gpos,no2gnode,transfrag,geneno,first,strand,pred,nodecov,istranscript,removable,usednode,0,pathpat);
+					//}
 
-			// 1:
-			// parse_trf_weight_max_flow(gno,no2gnode,transfrag,geneno,strand,pred,nodecov,pathpat);
-			// 2:
-			GBitVec usednode(gno+edgeno);
-			if(longreads) {
-				parse_trf_long(maxi,gno,edgeno,gpos,no2gnode,transfrag,geneno,first,strand,pred,nodecov,istranscript,removable,usednode,0,pathpat);
-			}
-			else {
-				parse_trf(maxi,gno,edgeno,gpos,no2gnode,transfrag,geneno,first,strand,pred,nodecov,istranscript,removable,usednode,0,pathpat);
 			}
-
 		}
 	}
 
@@ -10731,7 +11616,7 @@
 													  // ^ KEEP THIS IN MIND IF WE CHANGE HOW WE USE GOOD_JUNC
 
 	/*** 3. don't keep if it's below threshold ***/
-	if (jd.nreads_good<junctionthr) {
+	if (jd.nreads_good<junctionthr) { // junctionthr is always positive
 	//if (jd.leftsupport<junctionthr || jd.rightsupport<junctionthr || (jd.end-jd.start>longintron && jd.nreads_good<junctionthr)) {
 		jd.strand=0;
 		//fprintf(stderr,"nosupport: left=%f right=%f good=%f\n",jd.leftsupport,jd.rightsupport,jd.nreads_good);
@@ -10873,166 +11758,28 @@
 	GList<CJunction>& junction = bdata->junction;
 	GPVec<GffObj>& guides = bdata->keepguides;
 	GVec<float>* bpcov = bdata->bpcov; // I might want to use a different type of data for bpcov to save memory in the case of very long bundles
-	GList<CPrediction>& pred = bdata->pred;
-	// form groups on strands: all groups below are like this: 0 = negative strand; 1 = unknown strand; 2 = positive strand
-	GPVec<CGroup> group;
-	CGroup *currgroup[3]={NULL,NULL,NULL}; // current group of each type
-	CGroup *startgroup[3]={NULL,NULL,NULL}; // start group of each type
-	int color=0; // next color to assign
-	GVec<int> merge; // remembers merged groups
-	GVec<int> equalcolor; // remembers colors for the same bundle
-	GVec<int> *readgroup=new GVec<int>[readlist.Count()]; // remebers groups for each read; don't forget to delete it when no longer needed
-	GVec<int> guidepred; // for eonly keeps the prediction number associated with a guide
-	GArray<GEdge> guideedge; // 0: negative starts; 1 positive starts
-
-	//fprintf(stderr,"build_graphs with %d guides\n",guides.Count());
-
-	/*
-	GVec<CPred> starts[3];
-	GVec<CPred> ends[3];
-	int edgedist=longintronanchor;
-
-	if(bpcov->Count()>2*edgedist+CHI_WIN) {
-		GVec<float> *diffval=new GVec<float>[3];
-		float sumstartleft[3]={0,0,0};
-		float sumendleft[3]={0,0,0};
-		float sumstartright[3]={0,0,0};
-		float sumendright[3]={0,0,0};
-		float lastcov[3];
-		for(int s=0;s<3;s++)
-			lastcov[s]=get_cov(s,edgedist-1,edgedist-1,bpcov);
-		lastcov[1]-=lastcov[0]+lastcov[2];
-		if(lastcov[1]<0) lastcov[1]=0;
-		for(int i=edgedist;i<CHI_THR+edgedist;i++) {
-			float icov[3];
-			for(int s=0;s<3;s++) icov[s]=get_cov(s,i,i,bpcov);
-			icov[1]-=icov[0]+icov[2];
-			if(icov[1]<0) icov[1]=0;
-			for(int s=0;s<3;s++) {
-				float diff=icov[s]-lastcov[s];
-				diffval[s].Add(diff);
-				if(diff>0) sumstartleft[s]+=diff;
-				else sumendleft[s]-=diff;
-				lastcov[s]=icov[s];
-			}
-		}
-		for(int i=CHI_THR+edgedist;i<CHI_WIN+edgedist;i++) {
-			float icov[3];
-			for(int s=0;s<3;s++) icov[s]=get_cov(s,i,i,bpcov);
-			icov[1]-=icov[0]+icov[2];
-			if(icov[1]<0) icov[1]=0;
-			for(int s=0;s<3;s++) {
-				float diff=icov[s]-lastcov[s];
-				diffval[s].Add(diff);
-				if(diff>0) sumstartright[s]+=diff;
-				else sumendright[s]-=diff;
-				lastcov[s]=icov[s];
-			}
-		}
-
-		for(int i=CHI_WIN+edgedist;i<bpcov->Count()-edgedist;i++) {
-			float icov[3];
-			int m=(i-CHI_THR+edgedist)%CHI_WIN;
-			fprintf(stderr,"m=%d ",i-CHI_THR+refstart);
-			for(int s=0;s<3;s++) {
-				if(diffval[s][m]>1/ERROR_PERC) { // starts here
-					if(sumstartleft[s]<sumstartright[s]*ERROR_PERC) {
-						CPred p(refstart+i-CHI_THR,diffval[s][m]);
-						starts[s].Add(p);
-					}
-				}
-				icov[s]=get_cov(s,i,i,bpcov);
-				if(s!=1) { fprintf(stderr," dfV[%d]=%.1f cov[%d]=%.1f",s,diffval[s][m],i+refstart,icov[s]);}
-			}
-			icov[1]-=icov[0]+icov[2];
-			if(icov[1]<0) icov[1]=0;
-
-			int p=(m+CHI_THR)%CHI_WIN;
-			for(int s=0;s<3;s++) {
-				float diff=icov[s]-lastcov[s];
+	GList<CPrediction>& pred = bdata->pred;
+	// form groups on strands: all groups below are like this: 0 = negative strand; 1 = unknown strand; 2 = positive strand
+	GPVec<CGroup> group;
+	CGroup *currgroup[3]={NULL,NULL,NULL}; // current group of each type
+	CGroup *startgroup[3]={NULL,NULL,NULL}; // start group of each type
+	int color=0; // next color to assign
+	GVec<int> merge; // remembers merged groups
+	GVec<int> equalcolor; // remembers colors for the same bundle
+	GVec<int> *readgroup=new GVec<int>[readlist.Count()]; // remebers groups for each read; don't forget to delete it when no longer needed
+	GVec<int> guidepred; // for eonly keeps the prediction number associated with a guide
+	GArray<GEdge> guideedge; // 0: negative starts; 1 positive starts
+	/*GPVec<GPtFeature>& feature = bdata->ptfs; // these are point features (confirmed starts/stops)
 
-				if(s!=1) {fprintf(stderr," [%d](sL=%.1f sR=%.1f sL/sR=",s,sumstartleft[s],sumstartright[s]);
-				if(diffval[s][m]>1/ERROR_PERC) fprintf(stderr,"%.1f",sumstartleft[s]/sumstartright[s]);
-				else fprintf(stderr,"*");}
+	for(int i=0;i<feature.Count();i++) {
+		if(feature[i]->ftype==GPFT_TSS)
+			fprintf(stderr,"TSS at position %d on strand %d\n",feature[i]->coord,feature[i]->strand);
+		if(feature[i]->ftype==GPFT_CPAS)
+			fprintf(stderr,"CPAS at position %d on strand %d\n",feature[i]->coord,feature[i]->strand);
+	}*/
 
-				if(diffval[s][p]>0) {
-					sumstartleft[s]-=diffval[s][p];
-				}
-				else {
-					sumendleft[s]+=diffval[s][p];
-				}
-				if(diffval[s][m]>0) {
-					sumstartleft[s]+=diffval[s][m];
-					sumstartright[s]-=diffval[s][m];
-				}
-				else {
-					sumendleft[s]-=diffval[s][m];
-					sumendright[s]+=diffval[s][m];
-				}
-				diffval[s][p]=diff;
-				if(diff>0) sumstartright[s]+=diff;
-				else sumendright[s]-=diff;
-				lastcov[s]=icov[s];
-				if(s!=1) fprintf(stderr," eL=%.1f eR=%.1f eR/eL=",sumendleft[s],sumendright[s]);
-				if(diffval[s][m]<-1/ERROR_PERC) { // ends here
-					if(sumendleft[s]*ERROR_PERC>sumendright[s]) {
-						CPred p(refstart+i-CHI_THR-1,-diffval[s][m]);
-						ends[s].Add(p);
-					}
-					if(s!=1) fprintf(stderr,"%.1f)",sumendright[s]/sumendleft[s]);
-				}
-				else if(s!=1) fprintf(stderr,"*)");
-			}
-			fprintf(stderr,"\n");
-		}
+	//fprintf(stderr,"build_graphs with %d guides\n",guides.Count());
 
-		for(int s=0;s<3;s++) {
-			int i=0;
-			while(i<starts[s].Count()){
-				if(starts[s][i].cov) {
-					int j=i+1;
-					while(j<starts[s].Count() && starts[s][j].predno-starts[s][i].predno<CHI_WIN) {
-						if(starts[s][j].cov && starts[s][i].cov>starts[s][j].cov) {
-							starts[s][j].cov=0;
-						}
-						else if(starts[s][i].cov<starts[s][j].cov) {
-							starts[s][i].cov=0;
-							//fprintf(stderr,"eliminated by start %d with cov=%f",starts[s][j].predno,starts[s][j].cov);
-							break;
-						}
-						j++;
-					}
-					//fprintf(stderr,"\n");
-				}
-				i++;
-			}
-			for(int i=0;i<starts[s].Count();i++) {
-				if(starts[s][i].cov) fprintf(stderr,"s=%d start at coord %d with cov=%f leftcov=%f rightcov=%f\n",s,starts[s][i].predno,starts[s][i].cov,get_cov(s,starts[s][i].predno-CHI_WIN-refstart,starts[s][i].predno-refstart-1,bpcov),get_cov(s,starts[s][i].predno-refstart,starts[s][i].predno-refstart+CHI_WIN-1,bpcov));
-			}
-			i=0;
-			while(i<ends[s].Count()){
-				if(ends[s][i].cov) {
-					int j=i+1;
-					while(j<ends[s].Count() && ends[s][j].predno-ends[s][i].predno<CHI_WIN) {
-						if(ends[s][j].cov && ends[s][i].cov>ends[s][j].cov) {
-							ends[s][j].cov=0;
-						}
-						else if(ends[s][i].cov<ends[s][j].cov) {
-							ends[s][i].cov=0;
-							break;
-						}
-						j++;
-					}
-				}
-				i++;
-			}
-			for(int i=0;i<ends[s].Count();i++) {
-				if(ends[s][i].cov) fprintf(stderr,"s=%d end at coord %d with cov=%f\n",s,ends[s][i].predno,ends[s][i].cov);
-			}
-		}
-		delete [] diffval;
-	}
-	*/
 
 	if(guides.Count()) {
 
@@ -11192,7 +11939,7 @@
 			while(j>=0 && ejunction[j]->end==end) {
 				ejunction[j]->consright=rightcons;
 				if(!ejunction[j]->guide_match && !rightcons && ejunction[j]->nreads_good<DROP/ERROR_PERC) ejunction[j]->strand=0;
-				if(ejunction[j]->strand>0) ejunction[j]->rightsupport=rightsupport[(1+ejunction[j]->strand)/2];
+				if(ejunction[j]->strand) ejunction[j]->rightsupport=rightsupport[(1+ejunction[j]->strand)/2];
 				j--;
 			}
 			// I do not check here for possible missalignments
@@ -11244,341 +11991,116 @@
 
 		//fprintf(stderr,"In higherr!\n");
 
-		/*
-		int lastleftjunc=-1; // last left junction that is kept
-		int lastrightjunc=-1;
-		for(int i=0;i<junction.Count();i++) {
+		float tolerance=1-ERROR_PERC;
+
+		// strand based version
+		for(int i=0;i<junction.Count();i++) { // junction is sorted based on start
 
-			//fprintf(stderr,"junct:%d-%d:%d rightsupport=%f nm=%f nreads=%f\n",junction[i]->start,junction[i]->end,junction[i]->strand,junction[i]->rightsupport,junction[i]->nm,junction[i]->nreads);
+			//fprintf(stderr,"junct[%d]:%d-%d:%d lefttsupport=%f nm=%f mm=%f nreads=%f nreads_good=%f\n",i,junction[i]->start,junction[i]->end,junction[i]->strand,junction[i]->leftsupport,junction[i]->nm,junction[i]->mm,junction[i]->nreads,junction[i]->nreads_good);
 
 			if(junction[i]->strand && junction[i]->nm && !junction[i]->guide_match && junction[i]->nm>=junction[i]->nreads) { // this is a bad junction -> check if it's maximal;
-				if(junction[i]->nreads_good<1.25*junctionthr) { // threshold for bad junctions is higher; (should I also add that too short junctions not to be accepted?)
-					junction[i]->strand=0; // just delete junction if it's low count
+				if(junction[i]->nreads_good>0 && (junction[i]->nreads_good<1.25*junctionthr || !good_junc(*junction[i],refstart,bpcov))) { // threshold for bad junctions is higher; (should I also add that too short junctions not to be accepted?)
+					//junction[i]->strand=0; // just delete junction if it's low count
+					junction[i]->mm=-1;
+					//fprintf(stderr,"...delete due to being under threshold\n");
 				}
-				else { // junction passes higher threshold
-					int j=i-1;
-					//if(j>=0) fprintf(stderr,"...start at junct:%d-%d:%d leftsupport=%f dist=%d\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport,junction[i]->start-junction[j]->start);
-					while(j>=0 && junction[i]->start-junction[j]->start<juncsupport) {
-						if(junction[j]->start!=junction[i]->start && junction[j]->leftsupport>junction[i]->leftsupport) {
-							//fprintf(stderr,"...compare to junct:%d-%d:%d leftsupport=%f\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport);
-							junction[i]->strand=0; // delete junction if I have a better junction nearby on the left
+
+				int j=i-1;
+				float support=0;
+				bool searchjunc=true;
+				//if(j>=0) fprintf(stderr,"...start at junct:%d-%d:%d leftsupport=%f dist=%d\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport,junction[i]->start-junction[j]->start);
+				while(j>=0 && junction[i]->start-junction[j]->start<juncsupport) {
+					if(junction[j]->strand==junction[i]->strand) {
+						if(junction[j]->start==junction[i]->start) { // found a junction with the same start -> I have already searched it
+							if(junction[j]->nreads<0) {
+								junction[i]->nreads=junction[j]->nreads;
+								searchjunc=false;
+							}
 							break;
 						}
-						j--;
-					}
-					if(junction[i]->strand) {
-						j=i+1;
-						//if(j<junction.Count()) fprintf(stderr,"...start at junct:%d-%d:%d leftsupport=%f dist=%d\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport,junction[j]->start-junction[i]->start);
-						while(j<junction.Count() && junction[j]->start-junction[i]->start<juncsupport) {
-							//fprintf(stderr,"...compare to junct:%d-%d:%d leftsupport=%f\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport);
-							if(junction[j]->start!=junction[i]->start && junction[j]->leftsupport>junction[i]->leftsupport) { // junction is not best within window
-								junction[i]->strand=0; // delete junction if I have a better junction nearby on the right
-								break;
+						else if(junction[j]->leftsupport*tolerance>junction[i]->leftsupport) {
+							//if(junction[j]->start!=junction[i]->start && junction[j]->leftsupport*tolerance>junction[i]->leftsupport) {
+							//fprintf(stderr,"...1 compare to [%d]:%d-%d:%d leftsupport=%f\n",j,junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport);
+							//junction[i]->strand=0; // delete junction if I have a better junction nearby on the left
+							//break;
+							if(junction[j]->leftsupport>support) {
+								junction[i]->nreads=-j;
+								support=junction[j]->leftsupport;
 							}
-							j++;
 						}
 					}
-				}
-			}
-			if(junction[i]->strand) { // update closest left junctions' starts back to lastleftjunc's start
-				int k=lastleftjunc+1;
-				if(lastleftjunc>-1) {
-					uint laststart=junction[lastleftjunc]->start;
-					uint mid=laststart+(junction[i]->start-laststart)/2;
-					//while(junction[k]->start<mid && (junction[k]->start-junction[lastleftjunc]->start<junctionsupport || (longreads && junction[k]->leftsupport==1))) { // shorten exon
-					while(junction[k]->start<mid && junction[k]->start-junction[lastleftjunc]->start<juncsupport) {
-						//while(junction[k]->start<mid) { // shorten exon
-						junction[k]->start=junction[lastleftjunc]->start;  // junction is closer to last junction
-						junction[k]->nreads=lastleftjunc;
-						//fprintf(stderr,"update junction[%d]->start to junction[%d]->start=%d\n",k,lastleftjunc,junction[lastleftjunc]->start);
-						k++;
-					}
-				}
-				int j=i-1;
-				//while(j>k && (junction[i]->start-junction[j]->start<junctionsupport || (longreads && junction[j]->leftsupport==1))) { //lengthen exon
-				while(j>k && junction[i]->start-junction[j]->start<juncsupport) { //lengthen exon
-					//while(j>k) { //lengthen exon
-					junction[j]->start=junction[i]->start;  // junction is closer to last junction
-					//fprintf(stderr,"update junction[%d]->start to junction[%d]->start=%d\n",k,i,junction[i]->start);
-					junction[j]->nreads=i;
 					j--;
 				}
-
-				lastleftjunc=i;
-			}
-
-
-			//fprintf(stderr,"ejunct:%d-%d:%d rightsupport=%f nm=%f nreads=%f\n",ejunction[i]->start,ejunction[i]->end,ejunction[i]->strand,ejunction[i]->rightsupport,ejunction[i]->nm,ejunction[i]->nreads);
-
-			if(ejunction[i]->strand && ejunction[i]->nm && !ejunction[i]->guide_match && ejunction[i]->nm>=ejunction[i]->nreads) { // this is a bad junction -> check if it's maximal
-				if(ejunction[i]->nreads_good<1.25*junctionthr) { // threshold for bad junctions is higher
-					ejunction[i]->strand=0;
-				}
-				else {
-					int j=i-1;
-					//if(j>=0) fprintf(stderr,"...start at junct:%d-%d:%d rightsupport=%f dist=%d\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport,ejunction[i]->end-ejunction[j]->end);
-					while(j>=0 && ejunction[i]->end-ejunction[j]->end<juncsupport) {
-						//fprintf(stderr,"...compare to junct:%d-%d:%d rightsupport=%f\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport);
-						if(ejunction[j]->end!=ejunction[i]->end && ejunction[j]->rightsupport>ejunction[i]->rightsupport) {
-							ejunction[i]->strand=0;
-							break;
-						}
-						j--;
-					}
-					if(ejunction[i]->strand) {
-						j=i+1;
-						//if(j<junction.Count()) fprintf(stderr,"...start at junct:%d-%d:%d rightsupport=%f dist=%d\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport,ejunction[j]->end-ejunction[i]->end);
-						while(j<junction.Count() && ejunction[j]->end-ejunction[i]->end<juncsupport) {
-							//fprintf(stderr,"...compare to junct:%d-%d:%d rightsupport=%f\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport);
-							if(ejunction[j]->end!=ejunction[i]->end && ejunction[j]->rightsupport>ejunction[i]->rightsupport) { // junction is not best within window
-								ejunction[i]->strand=0;
-								break;
+				if(searchjunc) {
+					j=i+1;
+					//if(j<junction.Count()) fprintf(stderr,"...start at junct:%d-%d:%d leftsupport=%f dist=%d\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport,junction[j]->start-junction[i]->start);
+					while(j<junction.Count() && junction[j]->start-junction[i]->start<juncsupport) {
+						if(junction[j]->strand==junction[i]->strand && junction[j]->start!=junction[i]->start && junction[j]->leftsupport*tolerance>junction[i]->leftsupport) { // junction is not best within window
+							//if(junction[j]->start!=junction[i]->start && junction[j]->leftsupport*tolerance>junction[i]->leftsupport) { // junction is not best within window
+							//fprintf(stderr,"...2 compare to [%d]:%d-%d:%d leftsupport=%f\n",j,junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport);
+							//junction[i]->strand=0; // delete junction if I have a better junction nearby on the right
+							//break;
+							if(junction[j]->leftsupport>support) {
+								junction[i]->nreads=-j;
+								support=junction[j]->leftsupport;
 							}
-							j++;
 						}
-					}
-				}
-			}
-			if(ejunction[i]->strand) { // update closest right junctions back to lastrightjunc
-				if(lastrightjunc<0) {
-					int k=i-1;
-					//while(k>=0 && (ejunction[i]->end-ejunction[k]->end<junctionsupport || (longreads && ejunction[k]->rightsupport==1))) {
-					while(k>=0 && ejunction[i]->end-ejunction[k]->end<juncsupport) {
-						//while(k>=0) {
-						ejunction[k]->end=ejunction[i]->end;
-						//fprintf(stderr,"update ejunction[%d]->end to ejunction[%d]->end=%d\n",k,i,ejunction[i]->end);
-						ejunction[k]->nreads_good=i;
-						k--;
-					}
-				}
-				else	 {
-					uint lastend=ejunction[lastrightjunc]->end;
-					uint mid=lastend+(ejunction[i]->end-lastend)/2;
-					int k=i-1;
-					//while(ejunction[k]->end>mid && (ejunction[i]->end-ejunction[k]->end<junctionsupport || (longreads && ejunction[k]->rightsupport==1))) {
-					while(ejunction[k]->end>mid && ejunction[i]->end-ejunction[k]->end<juncsupport) {
-						//while(ejunction[k]->end>mid) {
-						ejunction[k]->end=ejunction[i]->end;  // junction is closer to i junction
-						//fprintf(stderr,"update ejunction[%d]->end to ejunction[%d]->end=%d\n",k,i,ejunction[i]->end);
-						ejunction[k]->nreads_good=i;
-						k--;
-					}
-					int j=lastrightjunc+1;
-					//while(j<k && (ejunction[j]->end-ejunction[lastrightjunc]->end<junctionsupport || (longreads && ejunction[j]->rightsupport==1))) {
-					while(j<k && ejunction[j]->end-ejunction[lastrightjunc]->end<juncsupport) {
-						//while(j<k) {
-						ejunction[j]->end=ejunction[lastrightjunc]->end;
-						//fprintf(stderr,"update ejunction[%d]->end to ejunction[%d]->end=%d\n",j,lastrightjunc,ejunction[lastrightjunc]->end);
-						ejunction[j]->nreads_good=lastrightjunc;
 						j++;
 					}
 				}
-				lastrightjunc=i;
-			}
-		}
-
-		if(lastleftjunc>-1) {
-			int i=lastleftjunc+1;
-			//while(i<junction.Count() && (junction[i]->start-junction[lastleftjunc]->start<junctionsupport || (longreads && junction[i]->leftsupport==1))) {
-			while(i<junction.Count() && junction[i]->start-junction[lastleftjunc]->start<juncsupport) {
-				//while(i<junction.Count()) {
-				junction[i]->start=junction[lastleftjunc]->start;
-				junction[i]->nreads=lastleftjunc;
-				//fprintf(stderr,"end update junction[%d]->start to junction[%d]->start=%d\n",i,lastleftjunc,junction[lastleftjunc]->start);
-				i++;
-			}
-		}
-
-		if(lastrightjunc>-1) {
-			int i=lastrightjunc+1;
-			//while(i<junction.Count() && (ejunction[i]->end-ejunction[lastrightjunc]->end<junctionsupport || (longreads && ejunction[i]->rightsupport==1))) {
-			while(i<junction.Count() && ejunction[i]->end-ejunction[lastrightjunc]->end<juncsupport) {
-				//while(i<junction.Count()) {
-				ejunction[i]->end=ejunction[lastrightjunc]->end;
-				ejunction[i]->nreads_good=lastrightjunc;
-				//fprintf(stderr,"end update ejunction[%d]->end to ejunction[%d]->end=%d\n",i,lastrightjunc,ejunction[lastrightjunc]->end);
-				i++;
 			}
-		}*/
-
-		float tolerance=1-ERROR_PERC;
 
-		// strand based version
-		int lastleftjunc[2]={-1,-1}; // last left junction that is kept
-		int lastrightjunc[2]={-1,-1};
-		for(int i=0;i<junction.Count();i++) {
+			//fprintf(stderr,"ejunct[%d]:%d-%d:%d rightsupport=%f nm=%f nreads=%f\n",i,ejunction[i]->start,ejunction[i]->end,ejunction[i]->strand,ejunction[i]->rightsupport,ejunction[i]->nm,ejunction[i]->nreads);
 
-			//fprintf(stderr,"junct:%d-%d:%d lefttsupport=%f nm=%f nreads=%f\n",junction[i]->start,junction[i]->end,junction[i]->strand,junction[i]->leftsupport,junction[i]->nm,junction[i]->nreads);
-
-			if(junction[i]->strand && junction[i]->nm && !junction[i]->guide_match && junction[i]->nm>=junction[i]->nreads) { // this is a bad junction -> check if it's maximal;
-				if(junction[i]->nreads_good<1.25*junctionthr) { // threshold for bad junctions is higher; (should I also add that too short junctions not to be accepted?)
-					junction[i]->strand=0; // just delete junction if it's low count
+			if(ejunction[i]->strand && ejunction[i]->nm && !ejunction[i]->guide_match && ejunction[i]->nm>=ejunction[i]->nreads) { // this is a bad junction -> check if it's maximal
+				if(ejunction[i]->nreads_good>0 && (ejunction[i]->nreads_good<1.25*junctionthr || !good_junc(*ejunction[i],refstart,bpcov))) { // threshold for bad junctions is higher
+					//ejunction[i]->strand=0;
+					ejunction[i]->mm=-1;
+					//fprintf(stderr,"...delete due to being under threshold\n");
 				}
-				else { // junction passes higher threshold
-					int j=i-1;
-					//if(j>=0) fprintf(stderr,"...start at junct:%d-%d:%d leftsupport=%f dist=%d\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport,junction[i]->start-junction[j]->start);
-					while(j>=0 && junction[i]->start-junction[j]->start<juncsupport) {
-						if(junction[j]->strand==junction[i]->strand && junction[j]->start!=junction[i]->start && junction[j]->leftsupport*tolerance>junction[i]->leftsupport) {
-						//if(junction[j]->start!=junction[i]->start && junction[j]->leftsupport*tolerance>junction[i]->leftsupport) {
-							//fprintf(stderr,"...compare to junct:%d-%d:%d leftsupport=%f\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport);
-							junction[i]->strand=0; // delete junction if I have a better junction nearby on the left
+				int j=i-1;
+				float support=0;
+				bool searchjunc=true;
+				//if(j>=0) fprintf(stderr,"...start at junct:%d-%d:%d rightsupport=%f dist=%d\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport,ejunction[i]->end-ejunction[j]->end);
+				while(j>=0 && ejunction[i]->end-ejunction[j]->end<juncsupport) {
+					if(ejunction[j]->strand==ejunction[i]->strand) {
+						if(ejunction[j]->end==ejunction[i]->end) {
+							if(ejunction[j]->nreads_good<0) {
+								ejunction[i]->nreads_good=ejunction[j]->nreads_good;
+								searchjunc=false;
+							}
 							break;
 						}
-						j--;
-					}
-					if(junction[i]->strand) {
-						j=i+1;
-						//if(j<junction.Count()) fprintf(stderr,"...start at junct:%d-%d:%d leftsupport=%f dist=%d\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport,junction[j]->start-junction[i]->start);
-						while(j<junction.Count() && junction[j]->start-junction[i]->start<juncsupport) {
-							if(junction[j]->strand==junction[i]->strand && junction[j]->start!=junction[i]->start && junction[j]->leftsupport*tolerance>junction[i]->leftsupport) { // junction is not best within window
-							//if(junction[j]->start!=junction[i]->start && junction[j]->leftsupport*tolerance>junction[i]->leftsupport) { // junction is not best within window
-								//fprintf(stderr,"...compare to junct:%d-%d:%d leftsupport=%f\n",junction[j]->start,junction[j]->end,junction[j]->strand,junction[j]->leftsupport);
-								junction[i]->strand=0; // delete junction if I have a better junction nearby on the right
-								break;
+						else if(ejunction[j]->rightsupport*tolerance>ejunction[i]->rightsupport) {
+							//if(ejunction[j]->end!=ejunction[i]->end && ejunction[j]->rightsupport*tolerance>ejunction[i]->rightsupport) {
+							//fprintf(stderr,"...1 compare to [%d]:%d-%d:%d rightsupport=%f\n",j,ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport);
+							//ejunction[i]->strand=0;
+							//break;
+							if(ejunction[j]->rightsupport>support) {
+								ejunction[i]->nreads_good=-j;
+								support=ejunction[j]->rightsupport;
 							}
-							j++;
-						}
-					}
-				}
-			}
-			if(junction[i]->strand) { // update closest left junctions' starts back to lastleftjunc's start
-				int s=(1+junction[i]->strand)/2;
-				int k=lastleftjunc[s]+1;
-				if(lastleftjunc[s]>-1) {
-					uint laststart=junction[lastleftjunc[s]]->start;
-					uint mid=laststart+(junction[i]->start-laststart)/2;
-					//while(junction[k]->start<mid && (junction[k]->start-junction[lastleftjunc]->start<junctionsupport || (longreads && junction[k]->leftsupport==1))) { // shorten exon
-					while(junction[k]->start<mid && junction[k]->start-junction[lastleftjunc[s]]->start<juncsupport) {
-					//while(junction[k]->start<mid) { // shorten exon
-						if(junction[k]->strand==junction[i]->strand) {
-							junction[k]->start=junction[lastleftjunc[s]]->start;  // junction is closer to last junction
-							junction[k]->nreads=lastleftjunc[s];
-						//fprintf(stderr,"update junction[%d]->start to junction[%d]->start=%d\n",k,lastleftjunc,junction[lastleftjunc]->start);
 						}
-						k++;
-					}
-				}
-				int j=i-1;
-				//while(j>k && (junction[i]->start-junction[j]->start<junctionsupport || (longreads && junction[j]->leftsupport==1))) { //lengthen exon
-				while(j>k && junction[i]->start-junction[j]->start<juncsupport) { //lengthen exon
-				//while(j>k) { //lengthen exon
-					if(junction[j]->strand==junction[i]->strand) {
-					junction[j]->start=junction[i]->start;  // junction is closer to last junction
-					//fprintf(stderr,"update junction[%d]->start to junction[%d]->start=%d\n",k,i,junction[i]->start);
-					junction[j]->nreads=i;
 					}
 					j--;
 				}
-
-				lastleftjunc[s]=i;
-			}
-
-
-			//fprintf(stderr,"ejunct:%d-%d:%d rightsupport=%f nm=%f nreads=%f\n",ejunction[i]->start,ejunction[i]->end,ejunction[i]->strand,ejunction[i]->rightsupport,ejunction[i]->nm,ejunction[i]->nreads);
-
-			if(ejunction[i]->strand && ejunction[i]->nm && !ejunction[i]->guide_match && ejunction[i]->nm>=ejunction[i]->nreads) { // this is a bad junction -> check if it's maximal
-				if(ejunction[i]->nreads_good<1.25*junctionthr) { // threshold for bad junctions is higher
-					ejunction[i]->strand=0;
-				}
-				else {
-					int j=i-1;
-					//if(j>=0) fprintf(stderr,"...start at junct:%d-%d:%d rightsupport=%f dist=%d\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport,ejunction[i]->end-ejunction[j]->end);
-					while(j>=0 && ejunction[i]->end-ejunction[j]->end<juncsupport) {
-						if(ejunction[j]->strand==ejunction[i]->strand && ejunction[j]->end!=ejunction[i]->end && ejunction[j]->rightsupport*tolerance>ejunction[i]->rightsupport) {
-						//if(ejunction[j]->end!=ejunction[i]->end && ejunction[j]->rightsupport*tolerance>ejunction[i]->rightsupport) {
-							//fprintf(stderr,"...compare to junct:%d-%d:%d rightsupport=%f\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport);
-							ejunction[i]->strand=0;
-							break;
-						}
-						j--;
-					}
-					if(ejunction[i]->strand) {
-						j=i+1;
-						//if(j<junction.Count()) fprintf(stderr,"...start at junct:%d-%d:%d rightsupport=%f dist=%d\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport,ejunction[j]->end-ejunction[i]->end);
-						while(j<junction.Count() && ejunction[j]->end-ejunction[i]->end<juncsupport) {
-							if(ejunction[j]->strand==ejunction[i]->strand && ejunction[j]->end!=ejunction[i]->end && ejunction[j]->rightsupport*tolerance>ejunction[i]->rightsupport) { // junction is not best within window
+				if(searchjunc) {
+					j=i+1;
+					//if(j<junction.Count()) fprintf(stderr,"...start at junct:%d-%d:%d rightsupport=%f dist=%d\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport,ejunction[j]->end-ejunction[i]->end);
+					while(j<junction.Count() && ejunction[j]->end-ejunction[i]->end<juncsupport) {
+						if(ejunction[j]->strand==ejunction[i]->strand && ejunction[j]->end!=ejunction[i]->end && ejunction[j]->rightsupport*tolerance>ejunction[i]->rightsupport) { // junction is not best within window
 							//if(ejunction[j]->end!=ejunction[i]->end && ejunction[j]->rightsupport*tolerance>ejunction[i]->rightsupport) { // junction is not best within window
-								//fprintf(stderr,"...compare to junct:%d-%d:%d rightsupport=%f\n",ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport);
-								ejunction[i]->strand=0;
-								break;
+							//fprintf(stderr,"...2 compare to [%d]:%d-%d:%d rightsupport=%f\n",j,ejunction[j]->start,ejunction[j]->end,ejunction[j]->strand,ejunction[j]->rightsupport);
+							//ejunction[i]->strand=0;
+							//break;
+							if(ejunction[j]->rightsupport>support) {
+								ejunction[i]->nreads_good=-j;
+								support=ejunction[j]->rightsupport;
 							}
-							j++;
-						}
-					}
-				}
-			}
-			if(ejunction[i]->strand) { // update closest right junctions back to lastrightjunc
-				int s=(1+ejunction[i]->strand)/2;
-				if(lastrightjunc[s]<0) {
-					int k=i-1;
-					//while(k>=0 && (ejunction[i]->end-ejunction[k]->end<junctionsupport || (longreads && ejunction[k]->rightsupport==1))) {
-					while(k>=0 && ejunction[i]->end-ejunction[k]->end<juncsupport) {
-					//while(k>=0) {
-						if(ejunction[k]->strand==ejunction[i]->strand) {
-							ejunction[k]->end=ejunction[i]->end;
-							//fprintf(stderr,"update ejunction[%d]->end to ejunction[%d]->end=%d\n",k,i,ejunction[i]->end);
-							ejunction[k]->nreads_good=i;
-						}
-						k--;
-					}
-				}
-				else	 {
-					uint lastend=ejunction[lastrightjunc[s]]->end;
-					uint mid=lastend+(ejunction[i]->end-lastend)/2;
-					int k=i-1;
-					//while(ejunction[k]->end>mid && (ejunction[i]->end-ejunction[k]->end<junctionsupport || (longreads && ejunction[k]->rightsupport==1))) {
-					while(ejunction[k]->end>mid && ejunction[i]->end-ejunction[k]->end<juncsupport) {
-					//while(ejunction[k]->end>mid) {
-						if(ejunction[k]->strand==ejunction[i]->strand) {
-							ejunction[k]->end=ejunction[i]->end;  // junction is closer to i junction
-							//fprintf(stderr,"update ejunction[%d]->end to ejunction[%d]->end=%d\n",k,i,ejunction[i]->end);
-							ejunction[k]->nreads_good=i;
-						}
-						k--;
-					}
-					int j=lastrightjunc[s]+1;
-					//while(j<k && (ejunction[j]->end-ejunction[lastrightjunc]->end<junctionsupport || (longreads && ejunction[j]->rightsupport==1))) {
-					while(j<k && ejunction[j]->end-ejunction[lastrightjunc[s]]->end<juncsupport) {
-					//while(j<k) {
-						if(ejunction[j]->strand==ejunction[i]->strand) {
-							ejunction[j]->end=ejunction[lastrightjunc[s]]->end;
-							//fprintf(stderr,"update ejunction[%d]->end to ejunction[%d]->end=%d\n",j,lastrightjunc,ejunction[lastrightjunc]->end);
-							ejunction[j]->nreads_good=lastrightjunc[s];
 						}
 						j++;
 					}
 				}
-				lastrightjunc[s]=i;
-			}
-		}
-
-		for(int s=0;s<2;s++) {
-			if(lastleftjunc[s]>-1) {
-				int i=lastleftjunc[s]+1;
-				//while(i<junction.Count() && (junction[i]->start-junction[lastleftjunc]->start<junctionsupport || (longreads && junction[i]->leftsupport==1))) {
-				while(i<junction.Count() && junction[i]->start-junction[lastleftjunc[s]]->start<juncsupport) {
-					//while(i<junction.Count()) {
-					if(junction[i]->strand==junction[lastleftjunc[s]]->strand) {
-						junction[i]->start=junction[lastleftjunc[s]]->start;
-						junction[i]->nreads=lastleftjunc[s];
-						//fprintf(stderr,"end update junction[%d]->start to junction[%d]->start=%d\n",i,lastleftjunc,junction[lastleftjunc]->start);
-					}
-					i++;
-				}
-			}
-
-			if(lastrightjunc[s]>-1) {
-				int i=lastrightjunc[s]+1;
-				//while(i<junction.Count() && (ejunction[i]->end-ejunction[lastrightjunc]->end<junctionsupport || (longreads && ejunction[i]->rightsupport==1))) {
-				while(i<junction.Count() && ejunction[i]->end-ejunction[lastrightjunc[s]]->end<juncsupport) {
-					//while(i<junction.Count()) {
-					if(ejunction[i]->strand==ejunction[lastrightjunc[s]]->strand) {
-						ejunction[i]->end=ejunction[lastrightjunc[s]]->end;
-						ejunction[i]->nreads_good=lastrightjunc[s];
-						//fprintf(stderr,"end update ejunction[%d]->end to ejunction[%d]->end=%d\n",i,lastrightjunc,ejunction[lastrightjunc]->end);
-					}
-					i++;
-				}
 			}
 		}
 
@@ -11587,8 +12109,8 @@
 	/*
 	{ // DEBUG ONLY
 		for(int i=0;i<junction.Count();i++) {
-			if(junction[i]->strand)
-				fprintf(stderr,"Junction %d-%d:%d has nm=%f mm=%f nreads=%f nreads_good=%f leftsupport=%f and rightsupport=%f\n",junction[i]->start,junction[i]->end,junction[i]->strand,
+			if(junction[i]->strand && junction[i]->nreads>0 && junction[i]->nreads_good>0 && junction[i]->mm>=0)
+				fprintf(stderr,"Junction[%d] %d-%d:%d has nm=%f mm=%f nreads=%f nreads_good=%f leftsupport=%f and rightsupport=%f\n",i,junction[i]->start,junction[i]->end,junction[i]->strand,
 					junction[i]->nm,junction[i]->mm,junction[i]->nreads,
 					junction[i]->nreads_good,junction[i]->leftsupport,junction[i]->rightsupport);
 		}
@@ -11613,18 +12135,11 @@
 	GHash<bool> boundaryleft;
 	GHash<bool> boundaryright;
 
-	//GIntHash<float> starts[3];
-	//GIntHash<float> ends[3];
-
+	bool resort=false;
+	int njunc=junction.Count();
 	for (int n=0;n<readlist.Count();n++) {
 		CReadAln & rd=*(readlist[n]);
 
-		/*int s=1+readlist[n]->strand;
-		if(!starts[s][readlist[n]->start]) starts[s].Add(readlist[n]->start,readlist[n]->read_count);
-		else starts[s][readlist[n]->start]+=readlist[n]->read_count;
-		if(!ends[s][readlist[n]->end]) ends[s].Add(readlist[n]->end,readlist[n]->read_count);
-		else ends[s][readlist[n]->end]+=readlist[n]->read_count;*/
-
 		// version that does not adjust read but discards it completely
 		bool keep=true;
 		int i=0;
@@ -11634,76 +12149,142 @@
 		fprintf(stderr," %d-%d\n",rd.segs[i].start,rd.segs[i].end);
 		i=0;*/
 
-		//int leftlen=0;
+
 		while(i<rd.juncs.Count()) {
 			CJunction& jd=*(rd.juncs[i]);
-			//fprintf(stderr, " junc %d-%d:%d nreads=%f nm=%f support=%f,%f between exons:%d-%d and %d-%d\n", jd.start, jd.end, jd.strand,jd.nreads,jd.nm,jd.leftsupport,jd.rightsupport,rd.segs[i].start,rd.segs[i].end,rd.segs[i+1].start,rd.segs[i+1].end);
+			//fprintf(stderr, " read junc %d-%d:%d nreads=%f nreads_good=%f nm=%f support=%f,%f between exons:%d-%d and %d-%d\n", jd.start, jd.end, jd.strand,jd.nreads,jd.nreads_good,jd.nm,jd.leftsupport,jd.rightsupport,rd.segs[i].start,rd.segs[i].end,rd.segs[i+1].start,rd.segs[i+1].end);
+
+			bool changeleft=jd.nreads<0;
+			bool changeright=jd.nreads_good<0;
+
+			if(changeleft || changeright) jd.strand=0;
+			if(jd.mm<0) jd.strand=0;
 
 			if(!jd.strand || !good_junc(jd,refstart,bpcov)) { // found a bad junction
 				//instead of deleting read -> just delete junction and make sure that add_read_to_group adds read to multiple groups
 				//jd.nreads-=rd.read_count;
 
-				bool changeleft=false;
-				bool changeright=false;
-				// I need to adjust ends of read to match the adjusted junctions
-				if(rd.segs[i].end!=jd.start && jd.start>=rd.segs[i].start) { // if start is before adjusted junction start there is nothing I can do
-					if(jd.start<rd.segs[i].end) {
-						rd.segs[i].end=jd.start;
-						changeleft=true;
-					}
-					else if(jd.start<rd.segs[i+1].start && (rd.segs[i].len()>junctionsupport || (i && rd.juncs[i-1]->strand))) { // only extend if the exon shouldn't just get erased
-						rd.segs[i].end=jd.start;
-						changeleft=true;
-					}
-				}
-
-				if(rd.segs[i+1].start!=jd.end && jd.end<=rd.segs[i+1].end) { // if end is after end of exon there is nothing I can do
-					if(jd.end>rd.segs[i+1].start) {
-						rd.segs[i+1].start=jd.end;
-						changeright=true;
+				if(jd.mm>=0 && (changeleft || changeright)) {
+					uint newstart=rd.segs[i].end;
+					uint newend=rd.segs[i+1].start;
+					int jk=-1;
+					int ek=-1;
+					if(changeleft) {
+						jk=abs(int(jd.nreads));
+						if(junction[jk]->nreads<0) { // not a valid left support -> delete junction
+							newstart=rd.segs[i].start-1;
+						}
+						else {
+							newstart=junction[jk]->start; // junction jk is good
+							//fprintf(stderr,"junction has start from junction[jk=%d]\n",jk);
+						}
 					}
-					else if(jd.end>rd.segs[i].end && (rd.segs[i+1].len()>junctionsupport || (i+1<rd.juncs.Count() && rd.juncs[i+1]->strand))) {
-						rd.segs[i+1].start=jd.end;
-						changeright=true;
+					if(changeright) {
+						ek=abs(int(jd.nreads_good));
+						if(ejunction[ek]->nreads_good<0) {
+							newend=rd.segs[i+1].end+1;
+						}
+						else {
+							newend=ejunction[ek]->end;
+							//fprintf(stderr,"junction has end from junction[ek=%d]\n",ek);
+						}
 					}
-				}
-
-				if(changeleft) {
-					int k=(int)jd.nreads;
-					if(junction[k]->end==rd.segs[i+1].start && junction[k]->strand==rd.strand) rd.juncs.Put(i,junction.Get(k));
-					else {
-						while(k>=0 && junction[k]->start==rd.segs[i].end) k--;
-						k++;
-						while(k<junction.Count() && junction[k]->start==rd.segs[i].end) {
-							if(junction[k]->end==rd.segs[i+1].start && junction[k]->strand==rd.strand) { // found the right junction
-								rd.juncs.Put(i,junction.Get(k));
-								//fprintf(stderr,"left junction[%d]->strand=%d vs this rd.juncs[%d]->strand=%d\n",k,junction[k]->strand,i,rd.juncs[i]->strand);
-								break;
+					if(newstart>=rd.segs[i].start && newend<=rd.segs[i+1].end) { // junction inside read boundaries
+						bool searchjunc=true;
+						rd.segs[i].end=newstart;
+						rd.segs[i+1].start=newend;
+						if(jk>0) { // search junctions
+							int k=jk;
+							while(k>0 && junction[k]->start==newstart) {
+								if(rd.strand==junction[k]->strand && junction[k]->end==newend) {
+									rd.juncs.Put(i,junction[k]);
+									searchjunc=false;
+									break;
+								}
+								k--;
+							}
+							if(searchjunc) {
+								k=jk+1;
+								while(k<junction.Count() && junction[k]->start==newstart) {
+									if(rd.strand==junction[k]->strand && junction[k]->end==newend) {
+										rd.juncs.Put(i,junction[k]);
+										searchjunc=false;
+										break;
+									}
+									k++;
+								}
+							}
+							if(searchjunc) { // I did not find junction -> I need to create a new one
+								// first check if I already added such a junction
+								for(k=njunc;k<junction.Count();k++) {
+									if(rd.strand==junction[k]->strand && junction[k]->start==newstart && junction[k]->end==newend) {
+										rd.juncs.Put(i,junction[k]);
+										searchjunc=false;
+										break;
+									}
+								}
+								if(searchjunc) {
+									if(!resort) {
+										junction.setSorted(false);
+										ejunction.setSorted(false);
+										resort=true;
+									}
+									//fprintf(stderr,"Add new junction:%d-%d:%d at position %d njunc=%d\n",newstart,newend,rd.strand,junction.Count(),njunc);
+									CJunction *junc=new CJunction(newstart,newend,rd.strand);
+									junction.Add(junc);
+									ejunction.Add(junc);
+									rd.juncs.Put(i,junc);
+								}
 							}
-							k++;
 						}
-					}
-				}
-				else if(changeright) { // why do I have else here? and not just if?
-					int k=(int)jd.nreads_good;
-					if(ejunction[k]->start==rd.segs[i].end && ejunction[k]->strand==rd.strand) rd.juncs.Put(i,ejunction.Get(k));
-					else {
-						while(k>=0 && ejunction[k]->end==rd.segs[i+1].start) k--;
-						k++;
-						while(k<junction.Count() && ejunction[k]->end==rd.segs[i+1].start) {
-							if(ejunction[k]->start==rd.segs[i].end && junction[k]->strand==rd.strand) { // found the right junction
-								rd.juncs.Put(i,ejunction.Get(k));
-								//fprintf(stderr,"right ejunction[%d]->strand=%d vs this rd.juncs[%d]->strand=%d\n",k,ejunction[k]->strand,i,rd.juncs[i]->strand);
-								break;
+						else { // ek>0 => search ejunctions
+							int k=ek;
+							while(k>0 && ejunction[k]->end==newend) {
+								if(rd.strand==ejunction[k]->strand && ejunction[k]->start==newstart) {
+									rd.juncs.Put(i,ejunction[k]);
+									searchjunc=false;
+									break;
+								}
+								k--;
+							}
+							if(searchjunc) {
+								k=ek+1;
+								while(k<ejunction.Count() && ejunction[k]->end==newend) {
+									if(rd.strand==ejunction[k]->strand && ejunction[k]->start==newstart) {
+										rd.juncs.Put(i,ejunction[k]);
+										searchjunc=false;
+										break;
+									}
+									k++;
+								}
+							}
+							if(searchjunc) { // I did not find junction -> I need to create a new one
+								// first check if I already added such a junction
+								for(k=njunc;k<ejunction.Count();k++) {
+									if(rd.strand==ejunction[k]->strand && ejunction[k]->start==newstart && ejunction[k]->end==newend) {
+										rd.juncs.Put(i,ejunction[k]);
+										searchjunc=false;
+										break;
+									}
+								}
+								if(searchjunc) {
+									if(!resort) {
+										junction.setSorted(false);
+										ejunction.setSorted(false);
+										resort=true;
+									}
+									//fprintf(stderr,"Add new junction:%d-%d:%d at position %d njunc=%d\n",newstart,newend,rd.strand,junction.Count(),njunc);
+									CJunction *junc=new CJunction(newstart,newend,rd.strand);
+									junction.Add(junc);
+									ejunction.Add(junc);
+									rd.juncs.Put(i,ejunction[k]);
+								}
 							}
-							k++;
 						}
 					}
+					// fprintf(stderr, "read[%d] adjusted to junction:%d-%d\n",n,rd.segs[i].end,rd.segs[i+1].start);
 				}
 
-				//if(changeleft || changeright) fprintf(stderr, "read[%d] adjusted to junction:%d-%d\n",n,rd.segs[i].end,rd.segs[i+1].start);
-
-
 				// because read might be poorly mapped I also have to unpair it
 				if(keep) { // this is the first time I unpair read -> remove strand if pair has single exon?
 					for(int p=0;p<rd.pair_idx.Count();p++) {
@@ -11806,6 +12387,11 @@
 		//else clean_read_junctions(readlist[n]);
 	}
 
+	if(resort) {
+		junction.setSorted(true);
+		ejunction.setSorted(juncCmpEnd);
+	}
+
 
 	//fprintf(stderr,"fragno=%d fraglen=%g\n",fragno,fraglen);
 
@@ -11894,7 +12480,6 @@
 		}
 		currgroup[nextgr]->color=grcol;
 
-		//print STDERR "nextgr=$nextgr grcol=$grcol current group is at coords: ",$currgroup[$nextgr][0],"-",$currgroup[$nextgr][1],"\n";
 		//fprintf(stderr,"group %d id=%d: %u-%u col=%d from col=%d\n",nextgr,currgroup[nextgr]->grid,currgroup[nextgr]->start,currgroup[nextgr]->end,grcol,prevcol);
 
 		if(nextgr == 1) { // unknown strand group
@@ -12179,7 +12764,9 @@
 
 	int g=0;
 
-	for(int b=0;b<bundle[1].Count();b++) {
+	for(int b=0;b<bundle[1].Count();b++) { // these are neutral bundles that do not overlap any signed reads
+
+		// I need to address features here too -> TODO
 
 		bool guide_ovlp=false;
 		while(g<guides.Count() && (guides[g]->exons.Count()>1 || guides[g]->end<bnode[1][bundle[1][b]->startnode]->start)) {
@@ -12474,8 +13061,10 @@
 
     				if(guides.Count()) process_refguides(graphno[s][b],edgeno[s][b],gpos[s][b],lastgpos[s][b],no2gnode[s][b],transfrag[s][b],s,guidetrf,bdata);
 
+    				GVec<int> trflong; // non-redundant long transfrags that I can use to guide the long read assemblies
     				//process transfrags to eliminate noise, and set compatibilities, and node memberships
-    				process_transfrags(s,graphno[s][b],edgeno[s][b],no2gnode[s][b],transfrag[s][b],tr2no[s][b],gpos[s][b],guidetrf,pred);
+    				process_transfrags(s,graphno[s][b],edgeno[s][b],no2gnode[s][b],transfrag[s][b],tr2no[s][b],gpos[s][b],guidetrf,pred,trflong);
+    				//get_trf_long(graphno[s][b],edgeno[s][b], gpos[s][b],no2gnode[s][b],transfrag[s][b],geneno,s,pred,trflong);
 
     				/*
     				{ //DEBUG ONLY
@@ -12514,10 +13103,11 @@
 
     				//fprintf(stderr,"guidetrf no=%d\n",guidetrf.Count());
 
+    				//if(!longreads) {
     				// find transcripts now
     				if(!rawreads) geneno=find_transcripts(graphno[s][b],edgeno[s][b],gpos[s][b],no2gnode[s][b],transfrag[s][b],
-    						geneno,s,guidetrf,pred,guides,guidepred,bdata);
-
+    						geneno,s,guidetrf,guides,guidepred,bdata,trflong);
+    				//}
     				for(int g=0;g<guidetrf.Count();g++) delete guidetrf[g].trf;
 
 
@@ -13286,7 +13876,10 @@
 			char s=0; // unknown strand
 			if(guides[g]->strand=='+') s=1; // guide on positive strand
 			else if(guides[g]->strand=='-') s=-1; // guide on negative strand
-			for(int i=1;i<guides[g]->exons.Count();i++) add_junction((int)guides[g]->exons[i-1]->end,(int)guides[g]->exons[i]->start,gjunc,s);
+			for(int i=1;i<guides[g]->exons.Count();i++) { //add_junction((int)guides[g]->exons[i-1]->end,(int)guides[g]->exons[i]->start,gjunc,s);
+				gjunc.AddIfNew(new CJunction(guides[g]->exons[i-1]->end,guides[g]->exons[i]->start,s),true);
+				//fprintf(stderr,"Add guide junction=%d-%d:%d from reference %s\n",guides[g]->exons[i-1]->end,guides[g]->exons[i]->start,s,guides[g]->getID());
+			}
 		}
 		if(gjunc.Count()) {
 
@@ -13305,7 +13898,7 @@
 			egjunc.setSorted(juncCmpEnd);
 			int s=0;
 			int e=0;
-			for(int i=0;i<junction.Count();i++) {
+			for(int i=1;i<junction.Count();i++) {
 
 				//fprintf(stderr,"check junction:%d-%d:%d rightsupport=%f nm=%f nreads=%f\n",junction[i]->start,junction[i]->end,junction[i]->strand,junction[i]->rightsupport,junction[i]->nm,junction[i]->nreads);
 
@@ -13459,7 +14052,6 @@
 			//if(nex>1) fprintf(stderr," %d-%d",rd.segs[i].start,rd.segs[i].end);
 			if(i) {
 				//fprintf(stderr,":%d",rd.juncs[i-1]->strand);
-				CJunction *nullj=NULL;
 
 				if(modified) { // see if read uses modified junction -> correct it
 					sprintf(sbuf, "%p", rd.juncs[i-1]);
@@ -13470,16 +14062,9 @@
 							if(rd.segs[i-1].start<=jp->start) rd.segs[i-1].end=jp->start;
 							if(rd.segs[i].end>=jp->end) rd.segs[i].start=jp->end;
 
-							if(!nullj) {
-								if(!junction[0]->start && !junction[0]->end) nullj=junction[0];
-								else {
-									nullj=new CJunction(0, 0, 0);
-									junction.Add(nullj);
-								}
-							}
 							rd.juncs[i-1]->nreads-=rd.read_count;
 							//if(rd.juncs[i-1]->nreads<ERROR_PERC) rd.juncs[i-1]->strand=0; // this approach removes a perfectly valid (guide) junction which might not be the desired effect
-							rd.juncs[i-1]=nullj;
+							rd.juncs[i-1]=junction[0];
 
 						}
 						else {
@@ -13495,16 +14080,9 @@
 						if(rd.segs[i-1].start>rd.juncs[i-1]->start || rd.segs[i].end<rd.juncs[i-1]->end) {
 							if(rd.segs[i-1].end!=rd.juncs[i-1]->start && rd.segs[i-1].start<=rd.juncs[i-1]->start) rd.segs[i-1].end=rd.juncs[i-1]->start;
 							if(rd.segs[i].start!=rd.juncs[i-1]->end && rd.segs[i].end>=rd.juncs[i-1]->end) rd.segs[i].start=rd.juncs[i-1]->end;
-							if(!nullj) {
-								if(!junction[0]->start && !junction[0]->end) nullj=junction[0];
-								else {
-									nullj=new CJunction(0, 0, 0);
-									junction.Add(nullj);
-								}
-							}
 							rd.juncs[i-1]->nreads-=rd.read_count;
 							//if(rd.juncs[i-1]->nreads<ERROR_PERC) rd.juncs[i-1]->strand=0; // this approach removes a perfectly valid (guide) junction which might not be the desired effect
-							rd.juncs[i-1]=nullj;
+							rd.juncs[i-1]=junction[0];
 						}
 						else {
 							//if(rd.segs[i-1].end!=rd.juncs[i-1]->start || rd.segs[i].start!=rd.juncs[i-1]->end) fprintf(stderr," [chg rd from %d-%d to %d-%d]",rd.segs[i-1].end,rd.segs[i].start,rd.juncs[i-1]->start,rd.juncs[i-1]->end);
@@ -14314,7 +14892,9 @@
 		}
 	}
 
-	CExon ex(0,0,pred[0]->cov*pred[0]->len()); // this keeps the exon flow based on per bp coverage
+	float excov=pred[0]->cov;
+	if(!longreads) excov*=pred[0]->tlen; // it was len() before but that doesn't make any sense
+	CExon ex(0,0,excov); // this keeps the exon flow based on per bp coverage
 	//CExon ex(0,0,pred[0]->exoncov[0]*pred[0]->exons[0].len()); // this keeps the exon flow based on read coverage (elen factor)
 	//pred[0]->exoncov[0]=0;
 	maxint->node.Add(ex);
@@ -14357,7 +14937,7 @@
 			//fprintf(stderr,"pred[%d] intron[%d]:%d-%d introncov=%f exoncov=%f\n",0,j-1,pred[0]->exons[j-1].end,pred[0]->exons[j].start,introncov,exoncov);
 		  }
 		}
-		nextmaxint=add_exon_to_maxint(nextmaxint,pred[0]->exons[j].start,pred[0]->exons[j].end,0,j,pred[0]->cov*pred[0]->len(),pred,overlap); // per bp coverage
+		nextmaxint=add_exon_to_maxint(nextmaxint,pred[0]->exons[j].start,pred[0]->exons[j].end,0,j,excov,pred,overlap); // per bp coverage
 		//nextmaxint=add_exon_to_maxint(nextmaxint,pred[0]->exons[j].start,pred[0]->exons[j].end,0,j,pred[0]->exoncov[j]*pred[0]->exons[j].len(),pred,overlap); // per read coverage
 		//pred[0]->exoncov[j]=0;
 	}
@@ -14380,10 +14960,12 @@
 
 		color.Add(n);
 
+		excov=pred[n]->cov;
+		if(!longreads) excov*=pred[n]->tlen;
 		//CPred p(n,pred[n]->cov); // priority based on cov/bp
 		CPred p(n,pred[n]->tlen*pred[n]->cov); // priority based on number of bases covered
 		predord.Add(p);
-		nextmaxint=add_exon_to_maxint(nextmaxint,pred[n]->exons[0].start,pred[n]->exons[0].end,n,0,pred[n]->cov*pred[n]->len(),pred,overlap); // per bp coverage
+		nextmaxint=add_exon_to_maxint(nextmaxint,pred[n]->exons[0].start,pred[n]->exons[0].end,n,0,excov,pred,overlap); // per bp coverage
 		//nextmaxint=add_exon_to_maxint(nextmaxint,pred[n]->exons[0].start,pred[n]->exons[0].end,n,0,pred[n]->exoncov[0]*pred[n]->exons[0].len(),pred,overlap); // read coverage
 		//pred[n]->exoncov[0]=0;
 		intron.clear();
@@ -14430,7 +15012,7 @@
 					}
 				}
 			}
-			nextintv=add_exon_to_maxint(nextintv,pred[n]->exons[j].start,pred[n]->exons[j].end,n,j,pred[n]->cov*pred[n]->len(),pred,overlap); // per bp coverage
+			nextintv=add_exon_to_maxint(nextintv,pred[n]->exons[j].start,pred[n]->exons[j].end,n,j,excov,pred,overlap); // per bp coverage
 			//nextintv=add_exon_to_maxint(nextintv,pred[n]->exons[j].start,pred[n]->exons[j].end,n,j,pred[n]->exoncov[j]*pred[n]->exons[j].len(),pred,overlap); // read coverage
 			//pred[n]->exoncov[j]=0;
 		}
@@ -14493,12 +15075,12 @@
 						uint anchor=longintronanchor;
 						if(longreads && pred[n2]->cov>readthr) anchor=junctionsupport;
 						if(pred[n2]->exons[0].len()<anchor || pred[n2]->exons.Last().len()<anchor) { // VAR8: leave this one out
-							//fprintf(stderr,"falseflag: n2=%d has low first/last exon\n",n2);
+							//fprintf(stderr,"falseflag: pred[%d] n2=%d has low first/last exon\n",n2,n2);
 							pred[n2]->flag=false;
 						}
 						else if(retainedintron(pred,n1,n2,lowintron)) {
 							//if(ret>1 || pred[n2]->cov<ERROR_PERC*pred[n1]->cov) {
-							//fprintf(stderr,"falseflag: n2=%d has low intron coverage\n",n2);
+							//fprintf(stderr,"falseflag: pred[%d] n2=%d has low intron coverage\n",n2,n2);
 							pred[n2]->flag=false;
 							//}
 						}
@@ -14506,12 +15088,12 @@
 
 							//fprintf(stderr,"diff strand\n");
 							if(pred[n2]->exons.Count()==1) { // why was I restricting it to single exons?
-								//fprintf(stderr,"falseflag: ...strand elmination of n2=%d by n1=%d\n",n2,n1);
+								//fprintf(stderr,"falseflag: ...strand elmination of pred[%d] n2=%d by n1=%d\n",n2,n2,n1);
 								pred[n2]->flag=false;
 							}
 							else if(!pred[n1]->t_eq && pred[n1]->exons.Count()==1) {
 								if(pred[n1]->cov < pred[n2]->cov+singlethr) {
-									//fprintf(stderr,"flaseflag: n1=%d with 1 exon eliminated by n2=%d\n",n1,n2);
+									//fprintf(stderr,"flaseflag: pred[%d] n1=%d with 1 exon eliminated by n2=%d\n",n1,n1,n2);
 									pred[n1]->flag=false;
 									break;
 								}
@@ -14522,7 +15104,7 @@
 								    }*/
 							}
 							else if(pred[n2]->exons.Count()<=pred[n1]->exons.Count() && included_pred(pred,n1,n2,(uint)bundleData->start,bpcov)) {
-								//fprintf(stderr,"falseflag: n2=%d is included in n1=%d\n",n2,n1);
+								//fprintf(stderr,"falseflag: pred[%d] n2=%d is included in n1=%d\n",n2,n2,n1);
 								pred[n2]->flag=false;
 							}
 							else if(!longreads){
@@ -14534,36 +15116,42 @@
 	    								break;
 	    							}
 	    						}
-	    						if(lowcov) pred[n2]->flag=false;
+	    						if(lowcov) {
+	    							pred[n2]->flag=false;
+	    							//fprintf(stderr,"falseflag: pred[%d] n2=%d has lowcov\n",n2,n2);
+	    						}
 	    					}
 
 						} // end pred[n1]->strand != pred[n2]->strand
 	    				//else if(pred[n2]->cov<isofrac*pred[n1]->cov) pred[n2]->flag=false;  // I am only considering inclusions here since this might change allocations
 	    				else if(pred[n2]->exons.Count()<=pred[n1]->exons.Count() && pred[n1]->cov>pred[n2]->cov*DROP && included_pred(pred,n1,n2,(uint)bundleData->start,bpcov)) {
 	    					pred[n2]->flag=false;
-	    					//fprintf(stderr,"falseflag: ...included elmination of n2=%d by n1=%d\n",n2,n1);
+	    					//fprintf(stderr,"falseflag: ...included elmination of pred[%d] n2=%d by n1=%d\n",n2,n2,n1);
 	    				}
 	    				else if(!pred[n1]->t_eq && n1 && ((!longreads && pred[n1]->exons.Count()<=2) || pred[n1]->cov < pred[n2]->cov+singlethr) && pred[n1]->exons.Count()<pred[n2]->exons.Count() && included_pred(pred,n2,n1,(uint)bundleData->start,bpcov)) {
 	    					pred[n1]->flag=false;
-	    					//fprintf(stderr,"falseflag: ...included elmination of n1=%d by n2=%d\n",n1,n2);
+	    					//fprintf(stderr,"falseflag: ...included elmination of pred[%d] n1=%d by n2=%d\n",n1,n1,n2);
 	    					break;
 	    				}
 	    			}
 	    			else if(!pred[n1]->t_eq && n1 && ((!longreads && pred[n1]->exons.Count()<=2) || pred[n1]->cov < pred[n2]->cov+singlethr) && pred[n1]->exons.Count()<=pred[n2]->exons.Count() && included_pred(pred,n2,n1,(uint)bundleData->start,bpcov)) {
-	    				//fprintf(stderr,"falseflag: ...included elmination of n1=%d(%f) by n2=%d(%f)\n",n1,pred[n1]->cov,n2,pred[n2]->cov);
+	    				//fprintf(stderr,"falseflag: ...included elmination of pred[%d] n1=%d(%f) by n2=%d(%f)\n",n1,n1,pred[n1]->cov,n2,pred[n2]->cov);
 	    				pred[n1]->flag=false;
 	    				break;
 	    			}
-	    			else if(longreads && pred[n2]->t_eq && pred[n2]->cov < DROP && pred[n2]->exons.Count()<=pred[n1]->exons.Count() && included_pred(pred,n1,n2,(uint)bundleData->start,bpcov,false)) pred[n2]->flag=false;
+	    			else if(longreads && !pred[n2]->t_eq && pred[n2]->cov < DROP && pred[n2]->exons.Count()<=pred[n1]->exons.Count() && included_pred(pred,n1,n2,(uint)bundleData->start,bpcov,false)) {
+	    				pred[n2]->flag=false;
+	    				//fprintf(stderr,"falseflag: pred[%d] n2=%d is included in n1=%d\n",n2,n2,n1);
+	    			}
 	    		}
 	    		//else if(!pred[n2]->t_eq && pred[n2]->exons.Count()==1 && pred[n2]->cov<DROP*pred[n1]->cov && pred[n1]->start<pred[n2]->start && pred[n2]->end<pred[n1]->end) {
 	    		else if(!pred[n2]->t_eq && pred[n2]->exons.Count()==1 && ((!longreads && pred[n2]->cov<pred[n1]->cov) || pred[n2]->cov<isofrac*pred[n1]->cov) && pred[n1]->start<=pred[n2]->start && pred[n2]->end<=pred[n1]->end) {
-	    			//fprintf(stderr,"falseflag: ...single exon elmination of n2=%d by n1=%d\n",n2,n1);
+	    			//fprintf(stderr,"falseflag: ...single exon elmination of pred[%d] n2=%d by n1=%d\n",n2,n2,n1);
 	    			pred[n2]->flag=false; // delete intronic prediction if single exon and at realtively low coverage
 	    		}
 	    		else if(!longreads) {
 	    			if(!pred[n1]->t_eq && pred[n1]->exons.Count()==1 && pred[n1]->cov<pred[n2]->cov && pred[n2]->start<=pred[n1]->start && pred[n1]->end<=pred[n2]->end) {
-	    				//fprintf(stderr,"falseflag: ...single exon elmination of n1=%d by n2=%d\n",n1,n2);
+	    				//fprintf(stderr,"falseflag: ...single exon elmination of pred[%d] n1=%d by n2=%d\n",n1, n1,n2);
 	    				pred[n1]->flag=false;
 	    				break;
 	    			}
@@ -14636,126 +15224,155 @@
 	}
 	*/
 
-	// recompute coverages
-	bool adjust=true;
-	while(adjust) { // by here I eliminated inclusions and intronic single exons
-		adjust=false;
-
-		for(int n=0;n<npred;n++) if(pred[n]->flag) {
-		    /*if(!pred[n]->t_eq && !keep[n] && pred[n]->cov<lowcov) pred[n]->flag=false; // this was commented and it was almost there Pr 29.8 for 534291
-		      else*/
-			  for(int i=0;i<pred[n]->exons.Count();i++)
-					pred[n]->exoncov[i]=0;
-		}
 
+	if(longreads) { // eliminate predictions under threshold without doing the coverage adjustment
 		nextmaxint=maxint;
 		while(nextmaxint) {
 			if((int)nextmaxint->start-bundleData->start<bpcov[1].Count()) {
-				float covsum=0;
 				GVec<CPred> exord;
 				for(int i=0;i<nextmaxint->node.Count();i++) if(pred[nextmaxint->node[i].predno]->flag) {
-				        //CPred ex(i,pred[nextmaxint->node[i].predno]->cov);
 					CPred ex(i,pred[nextmaxint->node[i].predno]->tlen*pred[nextmaxint->node[i].predno]->cov);
 					exord.Add(ex); //exord only remembers the non-false predictions
-
-					//covsum+=pred[nextmaxint->node[i].predno]->cov; // priority based on cov/bp
-					//covsum+=pred[nextmaxint->node[i].predno]->tlen*pred[nextmaxint->node[i].predno]->cov; // priority based on number of bases covered
-					covsum+=nextmaxint->node[i].exoncov; // priority based on exoncov
 				}
-
-				if(covsum) {
-					float totalcov=get_cov(1,nextmaxint->start-bundleData->start,nextmaxint->end-bundleData->start,bpcov);
+				if(exord.Count()) {
 					exord.Sort(predordCmp); // sort exons from the most highest coverage to lowest
 					int i=exord[0].predno;
 					int n=nextmaxint->node[i].predno;
-					int e=nextmaxint->node[i].exonno;
-					//float exoncov=totalcov*pred[n]->tlen*pred[n]->cov/covsum; // priority based on number of bases covered
-					//float exoncov=totalcov*pred[n]->cov/covsum; // priority based on per bp cov
-					float exoncov=totalcov*nextmaxint->node[i].exoncov/covsum; // priority based on per bp cov
-					pred[n]->exoncov[e]+=exoncov;
-					float usedcov=pred[n]->cov;
-					float strandcov[2]={0,0};
-					if(longreads) {
-						if(pred[n]->strand=='+') strandcov[1]+=pred[n]->cov;
-						else if(pred[n]->strand=='-') strandcov[0]+=pred[n]->cov;
-						else {
-							strandcov[0]+=pred[n]->cov;
-							strandcov[1]+=pred[n]->cov;
-						}
-					}
-
-					//fprintf(stderr,"maxint=%d-%d has first pred[%d] usedcov=%f\n",nextmaxint->start,nextmaxint->end,n,usedcov);
+					float usedcov[2]={0,0};
+					float multicov[2]={0,0};
+					int s=0;
+					if(pred[n]->strand=='+') s=1;
+					usedcov[s]=pred[n]->cov;
+					if(pred[n]->exons.Count()>1) multicov[s]=usedcov[s];
+					//fprintf(stderr,"maxint=%d-%d has first pred[%d] with cov=%f usedcov=%f\n",nextmaxint->start,nextmaxint->end,n,pred[n]->cov,usedcov[s]);
 
 					for(int j=1;j<exord.Count();j++) {
 						i=exord[j].predno;
 						n=nextmaxint->node[i].predno;
 						bool longunder=false;
-						if(longreads) {
+						if(pred[n]->strand=='+') s=1;
+						else s=0;
+						if(!pred[n]->t_eq) {
 							float isofraclong=isofrac;
 							if(pred[n]->cov>CHI_WIN) isofraclong=isofrac*ERROR_PERC*DROP; // more abundant transcript have higher error tollerance
-							if(pred[n]->cov>CHI_THR) isofraclong=isofrac*DROP;
-							if(pred[n]->strand=='+') { if(pred[n]->cov<isofraclong*strandcov[1]) longunder=true;}
-							else if(pred[n]->strand=='-') { if(pred[n]->cov<isofraclong*strandcov[0]) longunder=true;}
-							else if(pred[n]->cov<isofraclong*strandcov[0] || pred[n]->cov<isofraclong*strandcov[1]) longunder=true;
-						}
-						if(!pred[n]->t_eq && (longunder || (!longreads && (pred[n]->cov<isofrac*usedcov ||
-								(pred[n]->cov<lowcov && pred[n]->cov<lowisofrac*usedcov) || (pred[n]->exons.Count()==1 && pred[n]->cov<usedcov) ||
-								pred[n]->cov<1 || (pred[n]->exons.Count()==2 && (pred[n]->cov<lowcov || pred[n]->cov<ERROR_PERC*usedcov)))))) {
-							adjust=true;
-							//fprintf(stderr,"flaseflag: pred[%d] has low cov=%f percentage from usedcoverage=%f\n",n,pred[n]->cov,usedcov);
+							else if(pred[n]->cov>CHI_THR) isofraclong=isofrac*DROP;
+							//else if(pred[n]->cov>1/ERROR_PERC) isofraclong=isofrac*DROP;
+							if(pred[n]->exons.Count()>1) {
+								if((!multicov[s] && pred[n]->cov<isofraclong*usedcov[s] && pred[n]->cov<DROP/ERROR_PERC) ||
+										pred[n]->cov<isofraclong*multicov[s]) longunder=true;
+							}
+							else if(pred[n]->cov<isofraclong*usedcov[s]) longunder=true;
+						}
+						if(longunder) {
+							//fprintf(stderr,"flaseflag: pred[%d] has low cov=%f percentage from usedcoverage=%f\n",n,pred[n]->cov,usedcov[s]);
 							pred[n]->flag=false;
 						}
 						else {
-							e=nextmaxint->node[i].exonno;
-							//exoncov=totalcov*pred[n]->cov/covsum; // priority based on per bp cov
-							//float exoncov=totalcov*pred[n]->tlen*pred[n]->cov/covsum; // priority based on number of bases covered
-							exoncov=totalcov*nextmaxint->node[i].exoncov/covsum; // priority based on per bp cov
-							pred[n]->exoncov[e]+=exoncov;
-							usedcov+=pred[n]->cov;
-							if(longreads) {
-								if(pred[n]->strand=='+') strandcov[1]+=pred[n]->cov;
-								else if(pred[n]->strand=='-') strandcov[0]+=pred[n]->cov;
-								else {
-									strandcov[0]+=pred[n]->cov;
-									strandcov[1]+=pred[n]->cov;
-								}
-							}
-							//fprintf(stderr,"maxint=%d-%d add pred[%d]->cov=%f to usedcov=%f\n",nextmaxint->start,nextmaxint->end,n,pred[n]->cov,usedcov);
+							usedcov[s]+=pred[n]->cov;
+							if(pred[n]->exons.Count()>1) multicov[s]+=pred[n]->cov;
+							//fprintf(stderr,"maxint=%d-%d add pred[%d]->cov=%f to usedcov=%f\n",nextmaxint->start,nextmaxint->end,n,pred[n]->cov,usedcov[s]);
 						}
 					}
-
 				}
 			}
 			nextmaxint=nextmaxint->next;
 		}
+	}
+	else { // recompute coverages -> mostly for short reads; long reads should be more reliable in the way coverages are estimated
+		bool adjust=true;
+		while(adjust) { // by here I eliminated inclusions and intronic single exons
+			adjust=false;
+
+			for(int n=0;n<npred;n++) if(pred[n]->flag) {
+				/*if(!pred[n]->t_eq && !keep[n] && pred[n]->cov<lowcov) pred[n]->flag=false; // this was commented and it was almost there Pr 29.8 for 534291
+		      	  else*/
+				for(int i=0;i<pred[n]->exons.Count();i++)
+					pred[n]->exoncov[i]=0;
+			}
 
-		//for(int p=0;p<npred;p++) {
-		//	int n=predord[p].predno;
-		for(int n=0;n<npred;n++) {
-			if(pred[n]->flag) {
+			nextmaxint=maxint;
+			while(nextmaxint) {
+				if((int)nextmaxint->start-bundleData->start<bpcov[1].Count()) {
+					float covsum=0;
+					GVec<CPred> exord;
+					for(int i=0;i<nextmaxint->node.Count();i++) if(pred[nextmaxint->node[i].predno]->flag) {
+				        //CPred ex(i,pred[nextmaxint->node[i].predno]->cov);
+						CPred ex(i,pred[nextmaxint->node[i].predno]->tlen*pred[nextmaxint->node[i].predno]->cov);
+						exord.Add(ex); //exord only remembers the non-false predictions
 
-				// compute prediction coverage first
-				pred[n]->cov=0;
-				for(int i=0;i<pred[n]->exons.Count();i++) {
-					pred[n]->cov+=pred[n]->exoncov[i];
-					pred[n]->exoncov[i]/=pred[n]->exons[i].len();
+						//covsum+=pred[nextmaxint->node[i].predno]->cov; // priority based on cov/bp
+						//covsum+=pred[nextmaxint->node[i].predno]->tlen*pred[nextmaxint->node[i].predno]->cov; // priority based on number of bases covered
+						covsum+=nextmaxint->node[i].exoncov; // priority based on exoncov
+					}
+
+					if(covsum) {
+						float totalcov=get_cov(1,nextmaxint->start-bundleData->start,nextmaxint->end-bundleData->start,bpcov);
+						exord.Sort(predordCmp); // sort exons from the most highest coverage to lowest
+						int i=exord[0].predno;
+						int n=nextmaxint->node[i].predno;
+						int e=nextmaxint->node[i].exonno;
+						//float exoncov=totalcov*pred[n]->tlen*pred[n]->cov/covsum; // priority based on number of bases covered
+						//float exoncov=totalcov*pred[n]->cov/covsum; // priority based on per bp cov
+						float exoncov=totalcov*nextmaxint->node[i].exoncov/covsum; // priority based on per bp cov
+						pred[n]->exoncov[e]+=exoncov;
+						//fprintf(stderr,"pred[%d]->exoncov[%d]=%f exoncov=%f totalcov=%f covsum=%f\n",n,e,pred[n]->exoncov[e],exoncov,totalcov,covsum);
+						float usedcov=pred[n]->cov;
+
+						//fprintf(stderr,"maxint=%d-%d has first pred[%d] with cov=%f usedcov=%f\n",nextmaxint->start,nextmaxint->end,n,pred[n]->cov,usedcov);
+
+						for(int j=1;j<exord.Count();j++) {
+							i=exord[j].predno;
+							n=nextmaxint->node[i].predno;
+							if(!pred[n]->t_eq && (pred[n]->cov<isofrac*usedcov || (pred[n]->cov<lowcov && pred[n]->cov<lowisofrac*usedcov) ||
+									(pred[n]->exons.Count()==1 && pred[n]->cov<usedcov) || pred[n]->cov<1 ||
+									(pred[n]->exons.Count()==2 && (pred[n]->cov<lowcov || pred[n]->cov<ERROR_PERC*usedcov)))) {
+								adjust=true;
+								//fprintf(stderr,"flaseflag: pred[%d] has low cov=%f percentage from usedcoverage=%f\n",n,pred[n]->cov,usedcov);
+								pred[n]->flag=false;
+							}
+							else {
+								e=nextmaxint->node[i].exonno;
+								//exoncov=totalcov*pred[n]->cov/covsum; // priority based on per bp cov
+								//float exoncov=totalcov*pred[n]->tlen*pred[n]->cov/covsum; // priority based on number of bases covered
+								exoncov=totalcov*nextmaxint->node[i].exoncov/covsum; // priority based on per bp cov
+								pred[n]->exoncov[e]+=exoncov;
+								usedcov+=pred[n]->cov;
+								//fprintf(stderr,"maxint=%d-%d add pred[%d]->cov=%f to usedcov=%f\n",nextmaxint->start,nextmaxint->end,n,pred[n]->cov,usedcov);
+							}
+						}
+					}
 				}
-				pred[n]->cov/=pred[n]->tlen;
-				if(!pred[n]->t_eq) {
-					if(pred[n]->cov<1 || (pred[n]->exons.Count()==1 && pred[n]->cov<singlethr)) {
-						//if(pred[n]->cov<readthr || (pred[n]->exons.Count()==1 && pred[n]->cov<singlethr) ) {
+				nextmaxint=nextmaxint->next;
+			}
+
+			//for(int p=0;p<npred;p++) {
+			//	int n=predord[p].predno;
+			for(int n=0;n<npred;n++) {
+				if(pred[n]->flag) {
+
+					// compute prediction coverage first
+					pred[n]->cov=0;
+					for(int i=0;i<pred[n]->exons.Count();i++) {
+						pred[n]->cov+=pred[n]->exoncov[i];
+						pred[n]->exoncov[i]/=pred[n]->exons[i].len();
+					}
+					pred[n]->cov/=pred[n]->tlen;
+					if(!pred[n]->t_eq) {
+						if(pred[n]->cov<1 || (pred[n]->exons.Count()==1 && pred[n]->cov<singlethr)) {
+							//if(pred[n]->cov<readthr || (pred[n]->exons.Count()==1 && pred[n]->cov<singlethr) ) {
+							pred[n]->flag=false;
+							//fprintf(stderr,"falseflag: pred[%d]->cov=%f eliminated due to low coverage\n",n,pred[n]->cov);
+							adjust=true;
+						}
+					}
+					else if(longreads && pred[n]->cov<ERROR_PERC) {
 						pred[n]->flag=false;
-						//fprintf(stderr,"falseflag: pred[%d]->cov=%f eliminated due to low coverage\n",n,pred[n]->cov);
-						adjust=true;
+						//fprintf(stderr,"falseflag: too low cov of pred[%d]:%f\n",n,pred[n]->cov);
 					}
+					//predord[p].cov=pred[n]->tlen*pred[n]->cov; ** use this
+					//predord[p].cov=pred[n]->cov;
+					//fprintf(stderr,"coverage of pred[%d]=%f\n",n,pred[n]->cov);
 				}
-				else if(longreads && pred[n]->cov<ERROR_PERC) {
-					pred[n]->flag=false;
-					//fprintf(stderr,"falseflag: too low cov of pred[%d]:%f\n",n,pred[n]->cov);
-				}
-				//predord[p].cov=pred[n]->tlen*pred[n]->cov; ** use this
-				//predord[p].cov=pred[n]->cov;
-				//fprintf(stderr,"coverage of pred[%d]=%f\n",n,pred[n]->cov);
 			}
 		}
 	}
@@ -15230,6 +15847,20 @@
 
 	// print transcripts including the necessary isoform fraction cleanings
 	GList<CPrediction>& pred = bundleData->pred;
+
+	/*
+	{ // DEBUG ONLY
+		fprintf(stderr,"Pred set before sorting:\n");
+		for(int i=0;i<pred.Count();i++) {
+			if(pred[i]->t_eq) fprintf(stderr,"%s ",pred[i]->t_eq->getID());
+			fprintf(stderr,"pred[%d]:%d-%d (cov=%f, readcov=%f, strand=%c):",i,pred[i]->start,pred[i]->end,pred[i]->cov,pred[i]->tlen*pred[i]->cov,pred[i]->strand);
+			for(int j=0;j<pred[i]->exons.Count();j++) fprintf(stderr," %d-%d",pred[i]->exons[j].start,pred[i]->exons[j].end);
+			fprintf(stderr,"\n");
+		}
+		fprintf(stderr,"\n");
+	}
+	*/
+
 	int npred=pred.Count();
 	pred.setSorted(predCmp);
 
@@ -15430,7 +16061,7 @@
 		for(int i=0;i<pred.Count();i++) {
 			if(pred[i]->t_eq) fprintf(stderr,"%s ",pred[i]->t_eq->getID());
 			fprintf(stderr,"pred[%d]:%d-%d (cov=%f, readcov=%f, strand=%c):",i,pred[i]->start,pred[i]->end,pred[i]->cov,pred[i]->tlen*pred[i]->cov,pred[i]->strand);
-			for(int j=0;j<pred[i]->exons.Count();j++) fprintf(stderr," %d-%d",pred[i]->exons[j].start,pred[i]->exons[j].end);
+			for(int j=0;j<pred[i]->exons.Count();j++) fprintf(stderr," %d-%d(%f)",pred[i]->exons[j].start,pred[i]->exons[j].end,pred[i]->exoncov[j]);
 			fprintf(stderr,"\n");
 		}
 		fprintf(stderr,"\n");
@@ -15608,7 +16239,7 @@
 						if(pred[n]->t_eq && pred[m]->t_eq && pred[n]->t_eq!=pred[m]->t_eq) { m++; continue;} // both are equal but represent different transcripts
 
 						if(pred[n]->exons.Count()==1) {
-							if(pred[m]->cov>ERROR_PERC*pred[n]->cov && pred[n]->cov>ERROR_PERC*pred[m]->cov) { // predictions close to each other in abundance
+							if(pred[m]->cov>ERROR_PERC*pred[n]->cov && pred[n]->cov>ERROR_PERC*pred[m]->cov) { // predictions close to each other in abundance -> otherwise I just delete the less abundant overlapping single exon gene
 
 								if(!pred[m]->t_eq && pred[n]->end>pred[m]->end) pred[m]->end=pred[n]->end;
 
@@ -15914,7 +16545,7 @@
 		for(int i=0;i<pred.Count();i++) {
     		if(pred[i]->t_eq) fprintf(stderr,"%s ",pred[i]->t_eq->getID());
     		fprintf(stderr,"pred[%d]:%d-%d (cov=%f, readcov=%f, strand=%c):",i,pred[i]->start,pred[i]->end,pred[i]->cov,pred[i]->tlen*pred[i]->cov,pred[i]->strand);
-    		for(int j=0;j<pred[i]->exons.Count();j++) fprintf(stderr," %d-%d",pred[i]->exons[j].start,pred[i]->exons[j].end);
+    		for(int j=0;j<pred[i]->exons.Count();j++) fprintf(stderr," %d-%d(%f)",pred[i]->exons[j].start,pred[i]->exons[j].end,pred[i]->exoncov[j]);
     		fprintf(stderr,"\n");
     	}
     }
diff -Nru stringtie-2.1.1+ds/rlink.h stringtie-2.1.2+ds/rlink.h
--- stringtie-2.1.1+ds/rlink.h	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/rlink.h	2020-04-21 20:54:10.000000000 +0000
@@ -49,10 +49,8 @@
 //collect all refguide transcripts for a single genomic sequence
 struct GRefData {
   GList<GffObj> rnas; //all transcripts on this genomic seq
-  //GList<GRefLocus> loci;
   int gseq_id;
   const char* gseq_name;
-   //GList<GTData> tdata; //transcript data (uptr holder for all rnas loaded here)
   GRefData(int gid=-1):rnas(false,true,false),gseq_id(gid),gseq_name(NULL) {
     gseq_id=gid;
     if (gseq_id>=0)
@@ -93,6 +91,48 @@
 			cov(_cov),bid(_bid),nextnode(_nextnode) {}
 };
 
+
+enum GPFType {
+	GPFT_NONE=0,
+	GPFT_TSS,
+	GPFT_CPAS
+}; //on 4 bits: maximum 15 types
+
+struct GPtFeature { //point feature (single coordinate)
+ GPFType ftype : 4;
+ int ref_id: 26; //index in a reftable[] with reference names, max 67,108,863
+ int strand: 2; //-1=-, 0=unstranded, +1=+
+ uint coord; //genomic coordinate for this feature
+ GPtFeature(GPFType ft=GPFT_NONE, int rid=-1, int _strand=0, uint loc=0):ftype(ft),
+		    ref_id(rid), strand(_strand), coord(loc) {}
+ bool operator<(const GPtFeature &o) { return coord<o.coord; }
+ bool operator==(const GPtFeature &o) { return coord==o.coord; }
+    //-- should really match ftype and strand too,
+    //   but we don't care, for bundle inclusion
+};
+
+
+struct GRefPtData {
+  int ref_id; //same with GPtFeature::ref_id, also in GffObj::names->gseqs
+  GList<GPtFeature> pfs; //all point feature on this genomic seq, sorted
+  GRefPtData(int gid=-1):ref_id(gid), pfs(true,false,false) { }
+
+  void add(GPtFeature* t) { //adds a fully formed GPtFeature record
+     if (ref_id!=t->ref_id || ref_id<0 || t->ref_id<0)
+           GError("Error: invalid call to GRefPtData::add() - cannot add feature with ref id %d to ref data id %d!\n",
+        		   t->ref_id, ref_id);
+     pfs.Add(t);
+  }
+  //sorting by unique ref_id
+  bool operator==(GRefPtData& d){
+    return ref_id==d.ref_id;
+  }
+  bool operator<(GRefPtData& d){
+    return (ref_id<d.ref_id);
+  }
+};
+
+
 // bundle data structure, holds all data needed for
 // infering transcripts from a bundle
 enum BundleStatus {
@@ -493,9 +533,11 @@
 	GBitVec childpat;
 	GBitVec parentpat;
 	GVec<int> trf; // transfrags that pass the node
+	bool hardstart:1; // verified/strong start
+	bool hardend:1;	// verified/strong end
 	//CGraphnode(int s=0,int e=0,unsigned int id=MAX_NODE,float nodecov=0,float cap=0,float r=0,float f=0):GSeg(s,e),nodeid(id),cov(nodecov),capacity(cap),rate(r),frag(f),child(),parent(),childpat(),parentpat(),trf(){}
 	CGraphnode(int s=0,int e=0,unsigned int id=MAX_NODE,float nodecov=0,float cap=0,float r=0):GSeg(s,e),
-			nodeid(id),cov(nodecov),capacity(cap),rate(r),child(),parent(),childpat(),parentpat(),trf(){}
+			nodeid(id),cov(nodecov),capacity(cap),rate(r),child(),parent(),childpat(),parentpat(),trf(),hardstart(false),hardend(false){}
 };
 
 // # 0: strand; 1: start; 2: end; 3: nreads; 4: nreads_good;
@@ -586,7 +628,7 @@
  int idx; //index in the main bundles array
  int start;
  int end;
- unsigned long numreads; // number of reads in bundles
+ unsigned long numreads; // number of reads in this bundle
  /*
  float wnumreads; // NEW: weighted numreads; a multi-mapped read mapped in 2 places will contribute only 0.5
  double sumreads; // sum of all reads' lengths in bundle
@@ -608,7 +650,7 @@
  GVec<float> bpcov[3];   // this needs to be changed to a more inteligent way of storing the data
  GList<CJunction> junction;
  GPVec<GffObj> keepguides;
- //GPVec<CTCov> covguides;
+ GPVec<GPtFeature> ptfs; //point features for this bundle
  GList<CPrediction> pred;
  RC_BundleData* rc_data;
  BundleData():status(BUNDLE_STATUS_CLEAR), idx(0), start(0), end(0),
@@ -616,7 +658,7 @@
 		 num_fragments(0), frag_len(0),sum_cov(0),covflags(0),
 		 refseq(), gseq(NULL), readlist(false,true), //bpcov(1024),
 		 junction(true, true, true),
-		 keepguides(false), pred(false), rc_data(NULL) {
+		 keepguides(false), ptfs(false), pred(false), rc_data(NULL) {
 	 for(int i=0;i<3;i++) 	bpcov[i].setCapacity(1024);
  }
 
@@ -655,6 +697,7 @@
 
  void Clear() {
 	keepguides.Clear();
+	ptfs.Clear();
 	pred.Clear();
 	pred.setSorted(false);
 	readlist.Clear();
diff -Nru stringtie-2.1.1+ds/stringtie.cpp stringtie-2.1.2+ds/stringtie.cpp
--- stringtie-2.1.1+ds/stringtie.cpp	2020-01-30 15:09:03.000000000 +0000
+++ stringtie-2.1.2+ds/stringtie.cpp	2020-04-21 20:54:10.000000000 +0000
@@ -11,7 +11,7 @@
 #include "proc_mem.h"
 #endif
 
-#define VERSION "2.1.1"
+#define VERSION "2.1.2"
 
 //#define DEBUGPRINT 1
 
@@ -32,8 +32,8 @@
 #define USAGE "StringTie v" VERSION " usage:\n\
  stringtie <input.bam ..> [-G <guide_gff>] [-l <label>] [-o <out_gtf>] [-p <cpus>]\n\
   [-v] [-a <min_anchor_len>] [-m <min_tlen>] [-j <min_anchor_cov>] [-f <min_iso>]\n\
-  [-C <coverage_file_name>] [-c <min_bundle_cov>] [-g <bdist>] [-u] [-L]\n\
-  [-e] [-x <seqid,..>] [-A <gene_abund.out>] [-h] {-B | -b <dir_path>} \n\
+  [-C <coverage_file_name>] [-c <min_bundle_cov>] [-g <bdist>] [-u] [-L] [-e]\n\
+  [--ptf <f_tab>] [-x <seqid,..>] [-A <gene_abund.out>] [-h] {-B | -b <dir_path>} \n\
 Assemble RNA-Seq alignments into potential transcripts.\n\
  Options:\n\
  --version : print just the version at stdout and exit\n\
@@ -41,6 +41,7 @@
  --rf assume stranded library fr-firststrand\n\
  --fr assume stranded library fr-secondstrand\n\
  -G reference annotation to use for guiding the assembly process (GTF/GFF3)\n\
+ --ptf load point-features from a given 4 column feature file <f_tab>\n\
  -o output path/file name for the assembled transcripts GTF (default: stdout)\n\
  -l name prefix for output transcripts (default: STRG)\n\
  -f minimum isoform fraction (default: 0.01)\n\
@@ -171,7 +172,8 @@
 
 GFastaDb* gfasta=NULL;
 
-GStr guidegff;
+GStr guidegff; // -G option
+GStr ptff; // --ptf option (point features)
 
 bool debugMode=false;
 bool verbose=false;
@@ -192,10 +194,9 @@
 double Cov_Sum=0;
 //bool firstPrint=true; //just for writing the GFF header before the first transcript is printed
 
-GffNames* gseqNames=NULL; //used as a dictionary for genomic sequence names and ids
-
-int refseqCount=0;
+GffNames* gseqNames=NULL; //used as a dictionary for reference sequence names and ids
 
+int refseqCount=0; // number of reference sequences found in the guides file
 
 #ifdef GMEMTRACE
  double maxMemRS=0;
@@ -242,6 +243,9 @@
 void noMoreBundles(); //sets NoMoreBundles to true
 //--
 void processOptions(GArgs& args);
+
+int loadPtFeatures(FILE* f, GArray<GRefPtData>& refpts);
+
 char* sprintTime();
 
 void processBundle(BundleData* bundle);
@@ -266,13 +270,16 @@
 
  // == Process arguments.
  GArgs args(argc, argv,
-   "debug;help;version;conservative;keeptmp;rseq=;bam;fr;rf;merge;exclude=zEihvteuLRx:n:j:s:D:G:C:S:l:m:o:a:j:c:f:p:g:P:M:Bb:A:F:T:");
+   "debug;help;version;conservative;keeptmp;rseq=;ptf=;bam;fr;rf;merge;"
+   "exclude=zEihvteuLRx:n:j:s:D:G:C:S:l:m:o:a:j:c:f:p:g:P:M:Bb:A:F:T:");
  args.printError(USAGE, true);
 
  processOptions(args);
 
  GVec<GRefData> refguides; // plain vector with transcripts for each chromosome
 
+ GArray<GRefPtData> refpts(true, true); // sorted,unique array of refseq point-features data
+
  //table indexes for Ballgown Raw Counts data (-B/-b option)
  GPVec<RC_TData> guides_RC_tdata(true); //raw count data or other info for all guide transcripts
  GPVec<RC_Feature> guides_RC_exons(true); //raw count data for all guide exons
@@ -360,6 +367,22 @@
 	 }
  }
 
+ gseqNames=GffObj::names; //might have been populated already by gff data
+ gffnames_ref(gseqNames);  //initialize the names collection if not guided
+ bool havePtFeatures=false;
+
+ // -- loading point-feature data
+ if (!ptff.is_empty()) {
+   FILE* f=fopen(ptff.chars(),"r");
+   if (f==NULL) GError("Error: could not open reference annotation file (%s)!\n",
+       ptff.chars());
+   //                transcripts_only    sort by location?
+   int numptf=loadPtFeatures(f, refpts); //adds to gseqNames->gseqs accordingly, populates refpts
+   havePtFeatures=(numptf>0);
+   fclose(f);
+ }
+
+
 #ifdef GFF_DEBUG
   for (int r=0;r<refguides.Count();++r) {
 	  GRefData& grefdata = refguides[r];
@@ -372,18 +395,16 @@
   exit(0);
 #endif
 
- // --- here we do the input processing
- gseqNames=GffObj::names; //might have been populated already by gff data
- gffnames_ref(gseqNames);  //initialize the names collection if not guided
+ // --- input processing
 
 
  GHash<int> hashread;      //read_name:pos:hit_index => readlist index
-
- GList<GffObj>* guides=NULL; //list of transcripts on a specific chromosome
-
+ GList<GffObj>* guides=NULL; //list of transcripts on a specific reference
+ GList<GPtFeature>* refptfs=NULL; //list of point-features on a specific reference
  int currentstart=0, currentend=0;
  int ng_start=0;
  int ng_end=-1;
+ int ptf_idx=0; //point-feature current index in the current (*refptfs)[]
  int ng=0;
  GStr lastref;
  bool no_ref_used=true;
@@ -534,10 +555,18 @@
 	 }
 
 	 if (new_bundle || chr_changed) {
-		 //bgeneids.Clear();
 		 hashread.Clear();
 		 if (bundle->readlist.Count()>0) { // process reads in previous bundle
 			// (readthr, junctionthr, mintranscriptlen are globals)
+			if (refptfs) { //point-features defined for this reference
+				while (ptf_idx<refptfs->Count() && (int)(refptfs->Get(ptf_idx)->coord)<currentstart)
+					ptf_idx++;
+				//TODO: what if a PtFeature is nearby, just outside the bundle?
+				while (ptf_idx<refptfs->Count() && (int)(refptfs->Get(ptf_idx)->coord)<=currentend) {
+					bundle->ptfs.Add(refptfs->Get(ptf_idx)); //keep this PtFeature
+					ptf_idx++;
+				}
+			}
 			bundle->getReady(currentstart, currentend);
 			if (gfasta!=NULL) { //genomic sequence data requested
 				GFaSeqGet* faseq=gfasta->fetch(bundle->refseq.chars());
@@ -609,6 +638,17 @@
 					 ng=guides->Count();
 				 }
 			 }
+			 if (havePtFeatures) {
+				 ptf_idx=-1;
+				 //setup refptf
+				 refptfs=NULL;
+				 GRefPtData rd(gseq_id);
+				 int ridx=refpts.IndexOf(rd);
+				 if (ridx>=0) {
+					refptfs=&(refpts[ridx].pfs);
+					ptf_idx=0;
+				 }
+			 }
 			 lastref=refseqName;
 			 lastref_id=gseq_id;
 			 currentend=0;
@@ -727,7 +767,7 @@
 
  //cleaning up
  delete brec;
- //bamreader.bclose();
+
  bamreader.stop(); //close all BAM files
 
  if (guided && no_ref_used) {
@@ -759,6 +799,13 @@
  if (mergeMode && guided )
 	 writeUnbundledGuides(refguides, f_out);
 
+
+ // clear refpts data, if loaded
+  if (refpts.Count()>0)
+	  for (int i=0;i<refpts.Count();i++) {
+		  refpts[i].pfs.setFreeItem(true);
+	  }
+
  fclose(f_out);
  if (c_out && c_out!=stdout) fclose(c_out);
 
@@ -1079,6 +1126,13 @@
 	   else GError("Error: reference annotation file (%s) not found.\n",
 	             guidegff.chars());
 	 }
+	 s=args.getOpt("ptf");
+	 if (!s.is_empty()) {
+	   ptff=s;
+	   if (fileExists(ptff.chars())<=1)
+		   GError("Error: point features data file (%s) not found.\n",
+	             ptff.chars());
+	 }
 
 	 enableNames=(args.getOpt('E')!=NULL);
 
@@ -1317,6 +1371,7 @@
 
 	}
 #endif
+
 	infer_transcripts(bundle);
 
 	if (ballgown && bundle->rc_data) {
@@ -1453,6 +1508,66 @@
 
 #endif
 
+void addPtFeature(const char* refname, GPtFeature* pf, GArray<GRefPtData>& refpts) {
+  //expects gseqNames to be set to GffObj::names and initialized/populated already!
+  //MUST be called AFTER the guides file has been loaded (if given)
+  int gseq_id=gseqNames->gseqs.addName(refname);
+  pf->ref_id=gseq_id;
+  int ridx=-1;
+  GRefPtData* rpd=NULL;
+  GRefPtData rd(gseq_id);
+  if (refpts.Count()==0) {
+	  ridx=refpts.Add(rd);
+  } else {
+	  ridx=refpts.IndexOf(rd);
+	  if (ridx<0) {
+		  ridx=refpts.Add(rd);
+	  }
+  }
+  if (ridx<0) GError("Error adding GRefPtData entry (bug!)\n");
+  rpd = & refpts.Get(ridx);
+  rpd->add(pf);
+}
+
+int loadPtFeatures(FILE* f, GArray<GRefPtData>& refpts) {
+  //expected format:
+  //<chromosome> <coordinate> <strand> <feature_type>
+  int num=0;
+  GLineReader lr(f);
+  char* line=NULL;
+  GDynArray<char*> tokens;
+  while ((line=lr.nextLine())!=NULL) {
+    strsplit(line, tokens);
+    if (tokens.Count()<4)
+    	GError("Error parsing point-feature line (not enough columns):\n%s\n",line);
+    int start;
+    if (!strToInt(tokens[1], start))
+    	GError("Error parsing point-feature line (invalid coordinate):\n%s\n",line);
+    int8_t strand=-2;
+    if (strlen(tokens[2])==1) {
+    	if (tokens[2][0]=='+')
+    		strand=1;
+    	else if (tokens[2][0]=='-')
+    		strand=-1;
+    	else if (tokens[2][0]=='.')
+    		strand=0;
+    }
+    if (strand==-2)
+    	GError("Error parsing point-feature line (invalid strand):\n%s\n",line);
+    GPFType pftype=GPFT_NONE;
+    if (strcmp(tokens[3], "TSS")==0)
+			pftype=GPFT_TSS;
+	else if (strcmp(tokens[3], "CPAS")==0)
+			pftype=GPFT_CPAS;
+    if (pftype==0)
+    	GError("Error parsing point-feature line (unrecognized type):\n%s\n",line);
+    GPtFeature* ptf=new GPtFeature(pftype, -1, strand, start);
+    addPtFeature(tokens[0], ptf, refpts);
+    num++;
+  } //while line
+  return num;
+}
+
 void writeUnbundledGenes(GHash<CGene>& geneabs, const char* refseq, FILE* gout) {
 				 //write unbundled genes from this chromosome
 	geneabs.startIterate();