diff -Nru kio-recoll-1.25.16/configure kio-recoll-1.25.17/configure --- kio-recoll-1.25.16/configure 2019-05-22 12:46:16.000000000 +0000 +++ kio-recoll-1.25.17/configure 2019-05-24 06:59:39.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for Recoll 1.25.16. +# Generated by GNU Autoconf 2.69 for Recoll 1.25.17. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -587,8 +587,8 @@ # Identity of this package. PACKAGE_NAME='Recoll' PACKAGE_TARNAME='recoll' -PACKAGE_VERSION='1.25.16' -PACKAGE_STRING='Recoll 1.25.16' +PACKAGE_VERSION='1.25.17' +PACKAGE_STRING='Recoll 1.25.17' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1410,7 +1410,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures Recoll 1.25.16 to adapt to many kinds of systems. +\`configure' configures Recoll 1.25.17 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1485,7 +1485,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of Recoll 1.25.16:";; + short | recursive ) echo "Configuration of Recoll 1.25.17:";; esac cat <<\_ACEOF @@ -1651,7 +1651,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -Recoll configure 1.25.16 +Recoll configure 1.25.17 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2224,7 +2224,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by Recoll $as_me 1.25.16, which was +It was created by Recoll $as_me 1.25.17, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3093,7 +3093,7 @@ # Define the identity of the package. PACKAGE='recoll' - VERSION='1.25.16' + VERSION='1.25.17' # Some tools Automake needs. @@ -20041,7 +20041,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by Recoll $as_me 1.25.16, which was +This file was extended by Recoll $as_me 1.25.17, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -20107,7 +20107,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -Recoll config.status 1.25.16 +Recoll config.status 1.25.17 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff -Nru kio-recoll-1.25.16/debian/changelog kio-recoll-1.25.17/debian/changelog --- kio-recoll-1.25.16/debian/changelog 2019-05-22 12:52:00.000000000 +0000 +++ kio-recoll-1.25.17/debian/changelog 2019-05-24 06:58:00.000000000 +0000 @@ -1,9 +1,9 @@ -kio-recoll (1.25.16-1~ppa1~cosmic1) cosmic; urgency=low +kio-recoll (1.25.17-1~ppa1~cosmic1) cosmic; urgency=low - * New release 1.25.16 + * New release 1.25.17 * Keep kio in sync - -- Jean-Francois Dockes Wed, 22 May 2019 14:52:00 +0200 + -- Jean-Francois Dockes Fri, 24 May 2019 08:58:00 +0200 kio-recoll (1.25.12-1~ppa1~cosmic1) cosmic; urgency=low diff -Nru kio-recoll-1.25.16/rcldb/rclabsfromtext.cpp kio-recoll-1.25.17/rcldb/rclabsfromtext.cpp --- kio-recoll-1.25.16/rcldb/rclabsfromtext.cpp 2019-05-17 07:35:24.000000000 +0000 +++ kio-recoll-1.25.17/rcldb/rclabsfromtext.cpp 2019-05-23 15:50:12.000000000 +0000 @@ -136,6 +136,14 @@ // abstract will be incorrect or inexistant, but this is // better than taking forever (the default cutoff is 10E6) if (maxtermcount && termcount++ > maxtermcount) { + LOGINF("Rclabsfromtext: stopping because maxtermcount reached: "<< + maxtermcount << endl); + return false; + } + // Also limit the number of fragments (just in case safety) + if (m_fragments.size() > maxtermcount / 100) { + LOGINF("Rclabsfromtext: stopping because maxfragments reached: "<< + maxtermcount/100 << endl); return false; } // Remember recent past @@ -157,8 +165,8 @@ if (m_terms.find(dumb) != m_terms.end()) { // This word is a search term. Extend or create fragment LOGDEB2("match: [" << dumb << "] current: " << m_curfrag.first << - ", " << m_curfrag.second << " remain " << - m_remainingWords << endl); + ", " << m_curfrag.second << " remain " << + m_remainingWords << endl); double coef = m_wordcoefs[dumb]; if (!m_remainingWords) { // No current fragment. Start one @@ -172,7 +180,7 @@ m_curtermcoef = coef; } else { LOGDEB2("Extending current fragment: " << m_remainingWords << - " -> " << m_ctxwords << endl); + " -> " << m_ctxwords << endl); m_extcount++; #ifdef COMPUTE_HLZONES if (m_prevwordhit) { @@ -223,18 +231,25 @@ m_remainingWords--; m_curfrag.second = bte; if (m_remainingWords == 0) { - if (m_totalcoef < 5.0 || m_curfragcoef >= 1.0) { - // Don't push bad fragments if we have a lot already - m_fragments.push_back(MatchFragment(m_curfrag.first, - m_curfrag.second, - m_curfragcoef, + // We used to not push weak fragments if we had a lot + // already. This can cause problems if the fragments + // we drop are actually group fragments (which have + // not got their boost yet). The right cut value is + // difficult to determine, because the absolute values + // of the coefs depend on many things (index size, + // etc.) The old test was if (m_totalcoef < 5.0 || + // m_curfragcoef >= 1.0) We now just avoid creating a + // monster by testing the current fragments count at + // the top of the function + m_fragments.push_back(MatchFragment(m_curfrag.first, + m_curfrag.second, + m_curfragcoef, #ifdef COMPUTE_HLZONES - m_curhlzones, + m_curhlzones, #endif - m_curhitpos, - m_curterm - )); - } + m_curhitpos, + m_curterm + )); m_totalcoef += m_curfragcoef; m_curfragcoef = 0.0; m_curtermcoef = 0.0; @@ -252,6 +267,8 @@ // find the group matches. We process everything as NEAR (no // PHRASE specific processing). void updgroups() { + LOGDEB("TextSplitABS: stored total " << m_fragments.size() << + " fragments" << endl); vector tboffs; // Look for matches to PHRASE and NEAR term groups and finalize @@ -283,7 +300,7 @@ ); // Give a boost to fragments which contain a group match - // (phrase/near), they are dear to the user's heart. list are + // (phrase/near), they are dear to the user's heart. Lists are // sorted, so we never go back in the fragment list (can // always start the search where we previously stopped). if (m_fragments.empty()) { @@ -292,8 +309,8 @@ auto fragit = m_fragments.begin(); for (const auto& grpmatch : tboffs) { LOGDEB2("LOOKING FOR FRAGMENT: group: " << grpmatch.offs.first << - "-" << grpmatch.offs.second << " curfrag " << - fragit->start << "-" << fragit->stop << endl); + "-" << grpmatch.offs.second << " curfrag " << + fragit->start << "-" << fragit->stop << endl); while (fragit->stop < grpmatch.offs.first) { fragit++; if (fragit == m_fragments.end()) { @@ -413,7 +430,6 @@ } ); - vector vpbreaks; ndb->getPagePositions(docid, vpbreaks); diff -Nru kio-recoll-1.25.16/VERSION kio-recoll-1.25.17/VERSION --- kio-recoll-1.25.16/VERSION 2019-05-22 12:36:05.000000000 +0000 +++ kio-recoll-1.25.17/VERSION 2019-05-24 06:56:42.000000000 +0000 @@ -1 +1 @@ -1.25.16 +1.25.17