diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/changelog sunpinyin-3.0.0~rc2+ds1/debian/changelog --- sunpinyin-3.0.0~rc1+ds1/debian/changelog 2020-11-19 17:38:40.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/changelog 2021-10-02 02:53:17.000000000 +0000 @@ -1,20 +1,15 @@ -sunpinyin (3.0.0~rc1+ds1-3build3) hirsute; urgency=medium +sunpinyin (3.0.0~rc2+ds1-4) unstable; urgency=medium - * No-change rebuild to build with python3.9 as default. + * Team upload. + * New upstream release. + * Remove Liang Guo from the uploaders list. (Closes: #961611) + Thanks for all your work! + * debian/patches: Drop all backported patches. + * Refresh packaging: + + Bump Standards-Version to 4.6.0. + + Bump debhelper compat to v13. - -- Matthias Klose Thu, 19 Nov 2020 18:38:40 +0100 - -sunpinyin (3.0.0~rc1+ds1-3build2) focal; urgency=medium - - * No-change rebuild for libgcc-s1 package name change. - - -- Matthias Klose Sun, 22 Mar 2020 16:58:59 +0100 - -sunpinyin (3.0.0~rc1+ds1-3build1) focal; urgency=medium - - * No-change rebuild to build with python3.8. - - -- Matthias Klose Sat, 25 Jan 2020 04:39:58 +0000 + -- Boyuan Yang Fri, 01 Oct 2021 22:53:17 -0400 sunpinyin (3.0.0~rc1+ds1-3) unstable; urgency=medium diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/control sunpinyin-3.0.0~rc2+ds1/debian/control --- sunpinyin-3.0.0~rc1+ds1/debian/control 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/control 2021-10-02 02:52:25.000000000 +0000 @@ -3,16 +3,15 @@ Section: libs Maintainer: Debian Input Method Team Uploaders: - Liang Guo , YunQiang Su , Build-Depends: - debhelper-compat (= 12), - dh-python, + debhelper-compat (= 13), + dh-sequence-python3, libsqlite3-dev, pkg-config, python3-dev:any, scons, -Standards-Version: 4.4.1 +Standards-Version: 4.6.0 Rules-Requires-Root: no Homepage: https://github.com/sunpinyin/sunpinyin Vcs-Git: https://salsa.debian.org/debian/sunpinyin.git diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0002-sunpinyin-dictgen.mk.in-completely-abandon-now-defun.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0002-sunpinyin-dictgen.mk.in-completely-abandon-now-defun.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0002-sunpinyin-dictgen.mk.in-completely-abandon-now-defun.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0002-sunpinyin-dictgen.mk.in-completely-abandon-now-defun.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -From: "Casper Ti. Vector" -Date: Sat, 10 Sep 2016 10:27:01 +0800 -Subject: sunpinyin-dictgen.mk.in: completely abandon (now defunct) google - code. - ---- - src/sunpinyin-dictgen.mk.in | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/sunpinyin-dictgen.mk.in b/src/sunpinyin-dictgen.mk.in -index ee82c0c..e3e47fb 100644 ---- a/src/sunpinyin-dictgen.mk.in -+++ b/src/sunpinyin-dictgen.mk.in -@@ -36,14 +36,14 @@ W3M = @W3M@ - ENDIANNESS = @ENDIANNESS@ - DATA_DIR = @DATADIR@/sunpinyin - --DL_LIST = https://open-gram.googlecode.com/git/ -+DL_LIST = https://sourceforge.net/projects/open-gram/files/ - DL_HOST = http://heanet.dl.sourceforge.net - DL_ROOT = ${DL_HOST}/open-gram - DICT_PAT = 'dict\.utf8-[0-9]\+.tar.bz2' - SLM_PAT = 'lm_sc\.3gm\.arpa-[0-9]\+.tar.bz2' - --DICT_AR = $(shell ${W3M} ${DL_LIST} | grep -o ${DICT_PAT} | sort | tail -n 1) --SLM_AR = $(shell ${W3M} ${DL_LIST} | grep -o ${SLM_PAT} | sort | tail -n 1) -+DICT_AR = $(shell ${W3M} ${DL_LIST} | grep -o ${DICT_PAT} | sort -u | tail -n 1) -+SLM_AR = $(shell ${W3M} ${DL_LIST} | grep -o ${SLM_PAT} | sort -u | tail -n 1) - - all: install - diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0003-Portability-fix-for-NetBSD-iconv.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0003-Portability-fix-for-NetBSD-iconv.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0003-Portability-fix-for-NetBSD-iconv.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0003-Portability-fix-for-NetBSD-iconv.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -From: OBATA Akio -Date: Wed, 8 Feb 2017 16:22:45 +0900 -Subject: Portability fix for NetBSD iconv - ---- - src/portability.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/portability.h b/src/portability.h -index 60dd727..314128f 100644 ---- a/src/portability.h -+++ b/src/portability.h -@@ -72,7 +72,7 @@ - inline double log2(double x) { return log(x) / M_LN2; } - #endif - --#if defined(sun) // Solaris/HP-UX 's iconv is const char** -+#if defined(sun) || defined(__NetBSD__) // Solaris/HP-UX/NetBSD 's iconv is const char** - typedef const char* TIConvSrcPtr; - #else - typedef char* TIConvSrcPtr; diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0004-Make-the-.pc-output-reproducible.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0004-Make-the-.pc-output-reproducible.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0004-Make-the-.pc-output-reproducible.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0004-Make-the-.pc-output-reproducible.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -From: Chris Lamb -Date: Wed, 12 Apr 2017 19:50:27 +0100 -Subject: Make the .pc output reproducible - -Whilst working on the Reproducible Builds effort [0], we noticed -that sunpinyin could not be built reproducibly due to iterating over -the filesystem in a non-deterministic ordering - - [0] https://reproducible-builds.org/ - -Signed-off-by: Chris Lamb ---- - SConstruct | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/SConstruct b/SConstruct -index a78c7d9..133e393 100644 ---- a/SConstruct -+++ b/SConstruct -@@ -429,7 +429,7 @@ env.Substfile('sunpinyin-2.0.pc.in', SUBST_DICT={ - '@VERSION@': version, - '@CFLAGS@': reduce(lambda a, b: a + ' ' + b, - map(lambda x: '-I$${includedir}' + x[3:], -- allinc())), -+ sorted(allinc()))), - }) - - libname_default = '%ssunpinyin%s' % (env.subst('${SHLIBPREFIX}'), diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0005-sunpinyin-dictgen.mk.in-use-HTTPS-mirrors-to-avoid-M.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0005-sunpinyin-dictgen.mk.in-use-HTTPS-mirrors-to-avoid-M.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0005-sunpinyin-dictgen.mk.in-use-HTTPS-mirrors-to-avoid-M.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0005-sunpinyin-dictgen.mk.in-use-HTTPS-mirrors-to-avoid-M.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -From: "Casper Ti. Vector" -Date: Thu, 5 Oct 2017 22:36:23 +0800 -Subject: sunpinyin-dictgen.mk.in: use HTTPS mirrors to avoid MITM attacks. - ---- - src/sunpinyin-dictgen.mk.in | 35 ++++++----------------------------- - 1 file changed, 6 insertions(+), 29 deletions(-) - -diff --git a/src/sunpinyin-dictgen.mk.in b/src/sunpinyin-dictgen.mk.in -index e3e47fb..8572f1f 100644 ---- a/src/sunpinyin-dictgen.mk.in -+++ b/src/sunpinyin-dictgen.mk.in -@@ -1,34 +1,11 @@ - #!/usr/bin/@MAKE@ -f - # -*- mode: makefile; indent-tabs-mode: t -*- vim:noet:ts=4 - --# In case of problems, also try the following ${DL_HOST} values: --# (copied from Gentoo's `thirdpartymirrors' file) --# http://aarnet.dl.sourceforge.net --# http://colocrossing.dl.sourceforge.net --# http://cznic.dl.sourceforge.net --# http://dfn.dl.sourceforge.net --# http://freefr.dl.sourceforge.net --# http://garr.dl.sourceforge.net --# http://heanet.dl.sourceforge.net --# http://hivelocity.dl.sourceforge.net --# http://ignum.dl.sourceforge.net --# http://internode.dl.sourceforge.net --# http://iweb.dl.sourceforge.net --# http://jaist.dl.sourceforge.net --# http://kaz.dl.sourceforge.net --# http://kent.dl.sourceforge.net --# http://nchc.dl.sourceforge.net --# http://ncu.dl.sourceforge.net --# http://netcologne.dl.sourceforge.net --# http://optimate.dl.sourceforge.net --# http://softlayer.dl.sourceforge.net --# http://sunet.dl.sourceforge.net --# http://surfnet.dl.sourceforge.net --# http://switch.dl.sourceforge.net --# http://tcpdiag.dl.sourceforge.net --# http://ufpr.dl.sourceforge.net --# http://waia.dl.sourceforge.net --# http://waix.dl.sourceforge.net -+# In case of problems, try replacing `jaist' in ${DL_HOST} with one of these -+# (copied from ): -+# astuteinternet ayera cfhcable cytranet excellmedia freefr gigenet -+# iweb jaist kent liquidtelecom nchc netcologne netix newcontinuum -+# phoenixnap razaoinfo superb-dca2 superb-sea2 svwh ufpr versaweb - - WGET = @WGET@ - TAR = @TAR@ -@@ -37,7 +14,7 @@ ENDIANNESS = @ENDIANNESS@ - DATA_DIR = @DATADIR@/sunpinyin - - DL_LIST = https://sourceforge.net/projects/open-gram/files/ --DL_HOST = http://heanet.dl.sourceforge.net -+DL_HOST = https://jaist.dl.sourceforge.net - DL_ROOT = ${DL_HOST}/open-gram - DICT_PAT = 'dict\.utf8-[0-9]\+.tar.bz2' - SLM_PAT = 'lm_sc\.3gm\.arpa-[0-9]\+.tar.bz2' diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0006-pytrie_gen.cpp-work-around-iconv-segfault-with-musl.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0006-pytrie_gen.cpp-work-around-iconv-segfault-with-musl.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0006-pytrie_gen.cpp-work-around-iconv-segfault-with-musl.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0006-pytrie_gen.cpp-work-around-iconv-segfault-with-musl.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -From: "Casper Ti. Vector" -Date: Thu, 15 Mar 2018 22:39:45 +0800 -Subject: pytrie_gen.cpp: work around iconv() segfault with musl. - ---- - src/lexicon/pytrie_gen.cpp | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lexicon/pytrie_gen.cpp b/src/lexicon/pytrie_gen.cpp -index dc78a08..d3d62a2 100644 ---- a/src/lexicon/pytrie_gen.cpp -+++ b/src/lexicon/pytrie_gen.cpp -@@ -99,6 +99,8 @@ getPureGBEncoding(const char* utf8str) - static iconv_t ric_gb = iconv_open("UTF-8", "GB2312"); - static iconv_t ric_gbk = iconv_open("UTF-8", "GBK"); - -+ // FIXME -+ if (ic_gb == -1 || ic_gbk == -1 || ric_gb == -1 || ric_gbk == -1) return 3; - unsigned ret = 0; - - if (!isCorrectConverted(utf8str, ic_gb, ric_gb)) { diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0007-SConstruct-use-C-11-standard.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0007-SConstruct-use-C-11-standard.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0007-SConstruct-use-C-11-standard.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0007-SConstruct-use-C-11-standard.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -From: Kefu Chai -Date: Wed, 18 Apr 2018 19:22:10 +0800 -Subject: SConstruct: use C++11 standard - -Signed-off-by: Kefu Chai ---- - SConstruct | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/SConstruct b/SConstruct -index 133e393..459be01 100644 ---- a/SConstruct -+++ b/SConstruct -@@ -215,7 +215,7 @@ def CreateEnvironment(): - make = 'gmake' - tar = 'gtar' - libln_builder = Builder(action='cd ${TARGET.dir} && ln -s ${SOURCE.name} ${TARGET.name}') -- env = Environment(ENV=os.environ, CFLAGS=cflags, CXXFLAGS='', -+ env = Environment(ENV=os.environ, CFLAGS=cflags, CXXFLAGS='-std=c++11', - MAKE=make, WGET=wget, W3M=w3m, TAR=tar, - CPPPATH=['.'] + allinc(), - tools=['default', 'textfile']) diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0008-pytrie_gen.cpp-cast-1-to-iconv_t-before-comparison.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0008-pytrie_gen.cpp-cast-1-to-iconv_t-before-comparison.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0008-pytrie_gen.cpp-cast-1-to-iconv_t-before-comparison.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0008-pytrie_gen.cpp-cast-1-to-iconv_t-before-comparison.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ -From: Yuxuan Shui -Date: Sat, 14 Apr 2018 21:29:42 -0400 -Subject: pytrie_gen.cpp: cast "-1" to iconv_t before comparison - -this kills the errors like - - src/lexicon/pytrie_gen.cpp:103:19: error: ISO C++ forbids comparison - between pointer and integer ---- - src/lexicon/pytrie_gen.cpp | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/src/lexicon/pytrie_gen.cpp b/src/lexicon/pytrie_gen.cpp -index d3d62a2..fa9e4ab 100644 ---- a/src/lexicon/pytrie_gen.cpp -+++ b/src/lexicon/pytrie_gen.cpp -@@ -94,15 +94,15 @@ isCorrectConverted(const char* utf8, iconv_t ic, iconv_t ric) - unsigned - getPureGBEncoding(const char* utf8str) - { -- static iconv_t ic_gb = iconv_open("GB2312", "UTF-8"); -- static iconv_t ic_gbk = iconv_open("GBK", "UTF-8"); -- static iconv_t ric_gb = iconv_open("UTF-8", "GB2312"); -- static iconv_t ric_gbk = iconv_open("UTF-8", "GBK"); -- -+ static const iconv_t e = reinterpret_cast(-1); -+ static const iconv_t ic_gb = iconv_open("GB2312", "UTF-8"); -+ static const iconv_t ic_gbk = iconv_open("GBK", "UTF-8"); -+ static const iconv_t ric_gb = iconv_open("UTF-8", "GB2312"); -+ static const iconv_t ric_gbk = iconv_open("UTF-8", "GBK"); - // FIXME -- if (ic_gb == -1 || ic_gbk == -1 || ric_gb == -1 || ric_gbk == -1) return 3; -- unsigned ret = 0; -+ if (ic_gb == e || ic_gbk == e || ric_gb == e || ric_gbk == e) return 3; - -+ unsigned ret = 0; - if (!isCorrectConverted(utf8str, ic_gb, ric_gb)) { - ret = 1; // at least it is contains some GBK char - if (!isCorrectConverted(utf8str, ic_gbk, ric_gbk)) diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0009-slm-thread-be-more-consistent-by-using-the-C-variant.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0009-slm-thread-be-more-consistent-by-using-the-C-variant.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0009-slm-thread-be-more-consistent-by-using-the-C-variant.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0009-slm-thread-be-more-consistent-by-using-the-C-variant.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -From: Kefu Chai -Date: Wed, 18 Apr 2018 19:25:09 +0800 -Subject: slm/thread: be more consistent by using the C++ variant of log and - exp - -Signed-off-by: Kefu Chai ---- - src/slm/thread/slmthread.cpp | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/src/slm/thread/slmthread.cpp b/src/slm/thread/slmthread.cpp -index c6ef1d0..dc6c1b9 100644 ---- a/src/slm/thread/slmthread.cpp -+++ b/src/slm/thread/slmthread.cpp -@@ -57,7 +57,9 @@ - #include "ValueCompress.h" - - using std::log; -+using std::log2; - using std::exp; -+using std::exp2; - - class CSIMSlmWithIteration : public CSIMSlm { - public: -@@ -253,8 +255,8 @@ main(int argc, char* argv[]) - - bool usingLogPr = slm.isUseLogPr(); - -- #define EffectivePr(a) (usingLogPr ? ((a) / log(2.0)) : -log2f((a))) -- #define OriginalPr(b) (usingLogPr ? ((b) * log(2.0)) : exp2(-(b))) -+ #define EffectivePr(a) (usingLogPr ? ((a) / log(2.0f)) : -log2((a))) -+ #define OriginalPr(b) (usingLogPr ? ((b) * log(2.0f)) : exp2(-(b))) - #define EffectiveBow(a) (usingLogPr ? exp(-(a)) : (a)) - #define OriginalBow(b) (usingLogPr ? -log((b)) : (b)) - diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0010-SConstruct-Add-support-for-riscv64-architecture.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0010-SConstruct-Add-support-for-riscv64-architecture.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0010-SConstruct-Add-support-for-riscv64-architecture.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0010-SConstruct-Add-support-for-riscv64-architecture.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -From: Boyuan Yang <073plan@gmail.com> -Date: Fri, 29 Jun 2018 22:04:00 +0800 -Subject: SConstruct: Add support for riscv64 architecture - -This commit adds support for newly emerged riscv64 architecture. - -Original author: "Manuel A. Fernandez Montecelo" -Downstream report: https://bugs.debian.org/898019 - -Signed-off-by: Boyuan Yang <073plan@gmail.com> ---- - SConstruct | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/SConstruct b/SConstruct -index 459be01..92c3c55 100644 ---- a/SConstruct -+++ b/SConstruct -@@ -333,6 +333,7 @@ def AppendEndianCheck(conf): - || defined(_M_X64) || defined(__bfin__) \ - || defined(__alpha__) || defined(__ARMEL__) \ - || defined(_MIPSEL) || (defined(__sh__) && defined(__LITTLE_ENDIAN__)) \ -+ || defined(__riscv) \ - || defined(__AARCH64EL__) - # undef WORDS_BIGENDIAN - diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0011-Fix-typos-found-by-codespell.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0011-Fix-typos-found-by-codespell.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0011-Fix-typos-found-by-codespell.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0011-Fix-typos-found-by-codespell.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,477 +0,0 @@ -From: Boyuan Yang <073plan@gmail.com> -Date: Sat, 30 Jun 2018 08:57:20 +0800 -Subject: Fix typos found by codespell - ---- - man/genpyt.pod | 2 +- - man/idngram_merge.pod | 2 +- - man/slmprune.pod | 2 +- - man/slmseg.pod | 2 +- - man/slmthread.pod | 2 +- - src/ime-core/ic_history.cpp | 10 +++++----- - src/ime-core/ic_history.h | 2 +- - src/ime-core/imi_context.cpp | 4 ++-- - src/ime-core/imi_context.h | 4 ++-- - src/ime-core/imi_data.cpp | 2 +- - src/ime-core/imi_option_event.h | 2 +- - src/ime-core/imi_winHandler.cpp | 2 +- - src/ime-core/lattice_states.h | 2 +- - src/lexicon/pytrie_gen.cpp | 2 +- - src/portability.cpp | 2 +- - src/slm/ids2ngram/ids2ngram.cpp | 2 +- - src/slm/slm.cpp | 6 +++--- - src/slm/slm.h | 6 +++--- - src/slm/slmbuild/sim_slmbuilder.cpp | 18 +++++++++--------- - src/slm/slmbuild/slmbuild.cpp | 2 +- - src/slm/slmprune/slmprune.cpp | 6 +++--- - src/slm/thread/ValueCompress.cpp | 2 +- - 22 files changed, 42 insertions(+), 42 deletions(-) - -diff --git a/man/genpyt.pod b/man/genpyt.pod -index 00d549e..ca0cac9 100644 ---- a/man/genpyt.pod -+++ b/man/genpyt.pod -@@ -27,7 +27,7 @@ A default dictionary file can be found at F. - - =item I - --The output binary PINYIN lexicon file. This lexicon contains a trie presenting the key tree of PINYIN. And all of the candiate words are sorted using the unigram in I. This file can be used with sunpinyin input method engines. -+The output binary PINYIN lexicon file. This lexicon contains a trie presenting the key tree of PINYIN. And all of the candidate words are sorted using the unigram in I. This file can be used with sunpinyin input method engines. - - - =item I -diff --git a/man/idngram_merge.pod b/man/idngram_merge.pod -index 6dd6636..492f578 100644 ---- a/man/idngram_merge.pod -+++ b/man/idngram_merge.pod -@@ -25,7 +25,7 @@ Specify the N-gram of source data file. - - =item B<-o>, B<--out> I - --Specifiy the final merged idngram file. -+Specify the final merged idngram file. - - =back - -diff --git a/man/slmprune.pod b/man/slmprune.pod -index 67953b3..5ffd4bd 100644 ---- a/man/slmprune.pod -+++ b/man/slmprune.pod -@@ -20,7 +20,7 @@ Note that we do not ensure that during pruning process, exactly the - the given number of items are cut or reserved, because some items may - contains high level children, so could not be cut. - --Also it's your responsiblity to give right number of arguments based -+Also it's your responsibility to give right number of arguments based - on 'input_slm'. - - -diff --git a/man/slmseg.pod b/man/slmseg.pod -index bf4a2c7..fa87de1 100644 ---- a/man/slmseg.pod -+++ b/man/slmseg.pod -@@ -40,7 +40,7 @@ If under binary mode, print id(s) in text. - - - =item B<-m>, B<--model> I --Speficy the language model file. This file is always generated by B. -+Specify the language model file. This file is always generated by B. - - =back - -diff --git a/man/slmthread.pod b/man/slmthread.pod -index c81dc88..4b4499a 100644 ---- a/man/slmthread.pod -+++ b/man/slmthread.pod -@@ -12,7 +12,7 @@ B add back-off-state for each slm node in the I. - Also it compresses 32-bit float into 16 bit representation. These processing - speeds up the looking up. - --The I is always genreated by B. And -+The I is always generated by B. And - the I can be used to feed B as a reference to segment - Chinese text. - -diff --git a/src/ime-core/ic_history.cpp b/src/ime-core/ic_history.cpp -index ab45402..998c5ef 100644 ---- a/src/ime-core/ic_history.cpp -+++ b/src/ime-core/ic_history.cpp -@@ -74,8 +74,8 @@ CBigramHistory::memorize(uint32_t* its_wid, uint32_t* ite_wid) - { - TBigram bigram(DCWID, DCWID); - -- // First , we insert an DC word id before the context history -- // to seperated from previous stream. -+ // First, we insert a DC word id before the context history -+ // to separated from previous stream. - if (m_memory.size() == contxt_memory_size) { - TBigram hb; - hb.first = m_memory.front(); -@@ -347,14 +347,14 @@ void - CBigramHistory::incUniFreq(TUnigram& ug) - { - ++m_unifreq[ug]; -- //printf("Remebering uniFreq[%d]-->%d\n", ug, m_unifreq[ug]); -+ //printf("Remembering uniFreq[%d]-->%d\n", ug, m_unifreq[ug]); - } - - void - CBigramHistory::incBiFreq(TBigram& bg) - { - ++m_bifreq[bg]; -- //printf("Remebering biFreq[%d,%d]-->%d\n", bg.first, bg.second, m_bifreq[bg]); -+ //printf("Remembering biFreq[%d,%d]-->%d\n", bg.first, bg.second, m_bifreq[bg]); - } - - // so far, it's very expensive to erase a word from bigram pairs, need to design -@@ -416,7 +416,7 @@ CBigramHistory::initStopWords() - m_stopWords.clear(); - - m_stopWords.insert(0); //unknown world -- m_stopWords.insert(DCWID); //seperator word id used by history memory interanlly -+ m_stopWords.insert(DCWID); //separator word id used by history memory internally - } - - // -*- indent-tabs-mode: nil -*- vim:et:ts=4 -diff --git a/src/ime-core/ic_history.h b/src/ime-core/ic_history.h -index e620af3..fdf3221 100644 ---- a/src/ime-core/ic_history.h -+++ b/src/ime-core/ic_history.h -@@ -50,7 +50,7 @@ - */ - class CICHistory { - public: -- /** don't care word id, or seperator word id */ -+ /** don't care word id, or separator word id */ - static const uint32_t DCWID; - - virtual ~CICHistory(); -diff --git a/src/ime-core/imi_context.cpp b/src/ime-core/imi_context.cpp -index dcb0cdb..3b2d3c9 100644 ---- a/src/ime-core/imi_context.cpp -+++ b/src/ime-core/imi_context.cpp -@@ -519,12 +519,12 @@ CIMIContext::_transferBetween(unsigned start, unsigned end, - double ts = m_pModel->transfer(it->m_slmState, _wid, node.m_slmState); - m_pModel->historify(node.m_slmState); - -- // backward to psuedo root, so wid is probably a user word, -+ // backward to pseudo root, so wid is probably a user word, - // save the wid in idx field, so that later we could get it via - // CThreadSlm::lastWordId, to calculate p_{cache} correctly. - if (node.m_slmState.getLevel() == 0 - && m_pHistory && m_pHistory->seenBefore(wid)) -- node.m_slmState.setIdx(wid); // an psuedo unigram node state -+ node.m_slmState.setIdx(wid); // an pseudo unigram node state - - if (m_pHistory) { - unsigned history[2] = { m_pModel->lastWordId(it->m_slmState), _wid }; -diff --git a/src/ime-core/imi_context.h b/src/ime-core/imi_context.h -index cc40847..936be67 100644 ---- a/src/ime-core/imi_context.h -+++ b/src/ime-core/imi_context.h -@@ -122,7 +122,7 @@ public: - const TWCHAR *m_cwstr; - - public: -- /** Give out the constructor for convinience */ -+ /** Give out the constructor for convenience */ - CCandidate(unsigned start = 0, - unsigned end = 0, - TLexiconState* pLxst = NULL, -@@ -152,7 +152,7 @@ public: - ASCII = 0x0201, // english string - PUNC = 0x0202, // punctuation - SYMBOL = 0x0204, // other symbol -- DIGITAL = 0x0208, // not implemeted here -+ DIGITAL = 0x0208, // not implemented here - }; // TYPE - - enum BESTWORD_TYPE { -diff --git a/src/ime-core/imi_data.cpp b/src/ime-core/imi_data.cpp -index 35c1b85..1efe024 100644 ---- a/src/ime-core/imi_data.cpp -+++ b/src/ime-core/imi_data.cpp -@@ -74,7 +74,7 @@ CIMIData::loadResource(const char* lm_file_path, const char* pytrie_file_path) - clear(); - - #ifdef DEBUG -- printf("\n openning lm file %s, pytrie file %s...", -+ printf("\n opening lm file %s, pytrie file %s...", - lm_file_path, - pytrie_file_path); - #endif -diff --git a/src/ime-core/imi_option_event.h b/src/ime-core/imi_option_event.h -index d62ef2e..4e0a567 100644 ---- a/src/ime-core/imi_option_event.h -+++ b/src/ime-core/imi_option_event.h -@@ -65,7 +65,7 @@ public: - /** - * onConfigChanged will be called whenever an option is changed - * @param event presents the changed option -- * @return true if the event is consumed, and not intented to be -+ * @return true if the event is consumed, and not intended to be - * sent to another event listener, false otherwise. - */ - virtual bool onConfigChanged(const COptionEvent&) { return false; } -diff --git a/src/ime-core/imi_winHandler.cpp b/src/ime-core/imi_winHandler.cpp -index 8918a80..3adbfb4 100644 ---- a/src/ime-core/imi_winHandler.cpp -+++ b/src/ime-core/imi_winHandler.cpp -@@ -95,7 +95,7 @@ CIMIWinHandler::updateStatus(int key, int value) - case STATUS_ID_FULLSYMBOL: - printf("Full Simbol is "); break; - default: -- printf("Unknow Status id %d is ", key); -+ printf("Unknown Status id %d is ", key); - break; - } - -diff --git a/src/ime-core/lattice_states.h b/src/ime-core/lattice_states.h -index 84cc491..ad2e0b6 100644 ---- a/src/ime-core/lattice_states.h -+++ b/src/ime-core/lattice_states.h -@@ -53,7 +53,7 @@ typedef TLongExpFloat TSentenceScore; - * language model size, the state node in language model do not - * thread the back-off pointer. Now, we just use the Word Id for - * the node in the language model. Later we should abstract the -- * StateNode from language model implemetation to replace this -+ * StateNode from language model implementation to replace this - * definition. - */ - typedef CThreadSlm::TState CSlmState; -diff --git a/src/lexicon/pytrie_gen.cpp b/src/lexicon/pytrie_gen.cpp -index fa9e4ab..e53f104 100644 ---- a/src/lexicon/pytrie_gen.cpp -+++ b/src/lexicon/pytrie_gen.cpp -@@ -78,7 +78,7 @@ isCorrectConverted(const char* utf8, iconv_t ic, iconv_t ric) - size_t res = iconv(ic, &src, &srclen, &dst, &dstlen); - - if (res != size_t(-1) && srclen == 0) { -- // do revert convertion and compare them -+ // do revert conversion and compare them - src = (TIConvSrcPtr)gbstr; - srclen = strlen((char*)src) + 1; - dst = (char*)utstr; -diff --git a/src/portability.cpp b/src/portability.cpp -index 3281cb2..6b119a8 100644 ---- a/src/portability.cpp -+++ b/src/portability.cpp -@@ -138,7 +138,7 @@ MBSTOWCS(TWCHAR *pwcs, const char* s, size_t n) - - assert(ic != (iconv_t)-1); - -- // To eliminate the const char* and char* diffirence in differnt system -+ // To eliminate the const char* and char* difference in different system - TIConvSrcPtr src = (TIConvSrcPtr)s; - size_t srclen = std::strlen(s) + 1; - char* dst = (char*)pwcs; -diff --git a/src/slm/ids2ngram/ids2ngram.cpp b/src/slm/ids2ngram/ids2ngram.cpp -index 5d31a03..d0d2c36 100644 ---- a/src/slm/ids2ngram/ids2ngram.cpp -+++ b/src/slm/ids2ngram/ids2ngram.cpp -@@ -131,7 +131,7 @@ ShowUsage() - printf("\t -s swapfile # intermedia temporary file\n"); - printf( - "\t -o outputfile # result idngram file [id1, ... idN, freq]*\n"); -- printf("\t -p para_size # maxium ngram-items per para\n"); -+ printf("\t -p para_size # maximum ngram-items per para\n"); - printf("\nExample:\n"); - printf( - " Following example will use three input idstream file idsfile[1,2,3] to generate the idngram file all.id3gram. Each para (internal map size or hash size) would be 1024000, using swap file for temp result. All temp para result would final be merged to got the final result.\n"); -diff --git a/src/slm/slm.cpp b/src/slm/slm.cpp -index 3565bd1..29fee48 100644 ---- a/src/slm/slm.cpp -+++ b/src/slm/slm.cpp -@@ -173,7 +173,7 @@ find_id(NodeT* base, unsigned int h, unsigned int t, unsigned int id) - /** - * return value as the model suggested. The history state must be historified - * or the history's level should be 0. when level == 0 but idx != 0, the -- * history is a psuedo unigram state used for this model to combine another -+ * history is a pseudo unigram state used for this model to combine another - * bigram cache language model - */ - double -@@ -192,7 +192,7 @@ CThreadSlm::rawTransfer(TState history, unsigned int wid, TState& result) - } - - while (true) { -- //for psuedo cache model unigram state -+ //for pseudo cache model unigram state - TNode* pn = ((TNode*)m_Levels[lvl]) + ((lvl) ? pos : 0); - - unsigned int t = (pn + 1)->ch(); -@@ -271,7 +271,7 @@ CThreadSlm::lastWordId(TState st) - st.getIdx(); - return pn->wid(); - } -- return idx; // return the psuedo state word id -+ return idx; // return the pseudo state word id - } - } - -diff --git a/src/slm/slm.h b/src/slm/slm.h -index 3334c4f..74c8086 100644 ---- a/src/slm/slm.h -+++ b/src/slm/slm.h -@@ -54,7 +54,7 @@ - * level and use a table to map the index to a float value; - * -# Compact all float value of -log(pr) into 16384 (14 bits) - * level and use a table to map the index to a float value; -- * -# threading infomation embed into binary model file. Threading include -+ * -# threading information embed into binary model file. Threading include - * - bol(back-off-level) from current level - * - bon(back-off-node)'s index in the bol level array - * . -@@ -62,7 +62,7 @@ - * - when leaf node are arrived, it could use (bol,bon) as history for - * history node. - * - when a word could not be found in current node (cl, cn)'s children, -- * searching could be transfered to (bol, bon) directly and continue -+ * searching could be transferred to (bol, bon) directly and continue - * searching the target word - * -# Add a basic type TState in Language model, a state is pair of\n - * (level, array_idx_of_the level) -@@ -79,7 +79,7 @@ public: - - /** - * (level:idx) located a state in the language model very well -- * Please note the psuedo unigram state, with level == 0, but idx > 0 -+ * Please note the pseudo unigram state, with level == 0, but idx > 0 - * it's for used with bigram cache model - */ - union TState { -diff --git a/src/slm/slmbuild/sim_slmbuilder.cpp b/src/slm/slmbuild/sim_slmbuilder.cpp -index 825e2ea..fb079eb 100644 ---- a/src/slm/slmbuild/sim_slmbuilder.cpp -+++ b/src/slm/slmbuild/sim_slmbuilder.cpp -@@ -111,7 +111,7 @@ CSlmLinearDiscounter::discount(int freq) - } - - // n=1 for unigram, n=2 for bigram; --// level[0] is for psuedo 0 gram, ... -+// level[0] is for pseudo 0 gram, ... - void - CSlmBuilder::Create(int n) - { -@@ -126,7 +126,7 @@ CSlmBuilder::Create(int n) - level[n] = new std::vector; - ((TLeafLevel*)level[n])->reserve(1024); - -- //Add psuedo root node -+ //Add pseudo root node - ((TNodeLevel*)level[0])->push_back(TNode(0, 0, 0)); - - //Initialize the nr[n+1][SLM_MAX_R] 2-D array -@@ -277,7 +277,7 @@ CSlmBuilder::CutLeafLevel(TNodeIterator pfirst, - int idxfirst, idxchk; - TLeafIterator chchk = chfirst; - for (idxfirst = idxchk = 0; chchk != chlast; ++chchk, ++idxchk) { -- //do not cut item whoese 1. freq > thred; 2. psuedo tail -+ //do not cut item whoese 1. freq > thred; 2. pseudo tail - if ((int) chchk->freq > thred || (chchk + 1) == chlast) { - if (idxfirst < idxchk) - *chfirst = *chchk; -@@ -301,7 +301,7 @@ CSlmBuilder::CutNodeLevel(TNodeIterator pfirst, - int idxfirst, idxchk; - TNodeIterator chchk = chfirst; - for (idxfirst = idxchk = 0; chchk != chlast; ++chchk, ++idxchk) { -- //do not cut item whoese 1. freq > thred; 2. psuedo tail; 3. leading children -+ //do not cut item whoese 1. freq > thred; 2. pseudo tail; 3. leading children - TNodeIterator chnext = chchk + 1; - if ((int) chchk->freq > thred || chnext == chlast || - (chnext->child != chchk->child)) { -@@ -349,7 +349,7 @@ CSlmBuilder::Cut() - void - CSlmBuilder::AppendTails() - { -- printf("\nAppending psuedo tail node for each level..."); fflush(stdout); -+ printf("\nAppending pseudo tail node for each level..."); fflush(stdout); - for (int lvl = 0; lvl < nlevel; ++lvl) { - int child_size = 0; - if (lvl == nlevel - 1) { -@@ -360,7 +360,7 @@ CSlmBuilder::AppendTails() - TNodeLevel& v = *(TNodeLevel*)(level[lvl]); - v.push_back(TNode(0x00FFFFFF, child_size, 1)); - } -- //also make a psuedo tail node for the leaf level -+ //also make a pseudo tail node for the leaf level - ((TLeafLevel*)(level[nlevel]))->push_back(TLeaf(0, 1)); - printf("\n"); fflush(stdout); - } -@@ -374,7 +374,7 @@ DiscountOneLevel(CSlmBuilder::TNodeLevel& v, - { - CSlmBuilder::TNodeIterator it = v.begin(); - CSlmBuilder::TNodeIterator ite = v.begin() + (v.size() - 1); -- for (; it != ite; ++it) { //do not calc the psuedo tail item -+ for (; it != ite; ++it) { //do not calc the pseudo tail item - CSlmBuilder::TNodeIterator itnext = it + 1; - double root_freq = it->freq; - for (int h = it->child, t = itnext->child; h < t; ++h) { -@@ -411,8 +411,8 @@ CSlmBuilder::Discount() - DiscountOneLevel(v, ch, discounter[lvl + 1], bUseLogPr); - } - } -- printf("\n Giving psuedo root level 0 a distribution..."); -- //make the psuedo 0-gram a equal distribution -+ printf("\n Giving pseudo root level 0 a distribution..."); -+ //make the pseudo 0-gram a equal distribution - TNodeLevel& v0 = *(TNodeLevel*)(level[0]); - if (bUseLogPr) { - v0[0].pr = PR_TYPE(-log(double(1.0) / m_nWord)); -diff --git a/src/slm/slmbuild/slmbuild.cpp b/src/slm/slmbuild/slmbuild.cpp -index ac2d92f..579cf2c 100644 ---- a/src/slm/slmbuild/slmbuild.cpp -+++ b/src/slm/slmbuild/slmbuild.cpp -@@ -89,7 +89,7 @@ Options:\n\ - -w --wordcount N # Lexicon size, number of different word\n\ - -b --brk id[,id...] # set the ids which should be treat as breaker\n\ - -e --exclude id[,id...] # set the ids which should not be put into LM\n\ -- -c --cut c1[,c2...] # k-gram whose freq <= c[k] are droped\n\ -+ -c --cut c1[,c2...] # k-gram whose freq <= c[k] are dropped\n\ - -d --discount method,param # the k-th -d parm specify the discount method \n\ - for k-gram. Possible values for method/param:\n\ - GT,R,dis : GT discount for r <= R, r is the freq of a ngram.\n\ -diff --git a/src/slm/slmprune/slmprune.cpp b/src/slm/slmprune/slmprune.cpp -index 887630c..7b3c238 100644 ---- a/src/slm/slmprune/slmprune.cpp -+++ b/src/slm/slmprune/slmprune.cpp -@@ -161,7 +161,7 @@ CutLevel(CSIMSlm::TNode* pfirst, - int idxfirst, idxchk; - chIterator chchk = chfirst; - for (idxfirst = idxchk = 0; chchk != chlast; ++chchk, ++idxchk) { -- //cut item whoese pr == 1.0; and not psuedo tail -+ //cut item whoese pr == 1.0; and not pseudo tail - if (chchk->pr != ((bUseLogPr) ? 0.0 : 1.0) || (chchk + 1) == chlast) { - if (idxfirst < idxchk) *chfirst = *chchk; - while (pfirst != plast && pfirst->child <= idxchk) -@@ -188,7 +188,7 @@ CSlmPruner::PruneLevel(int lvl) - printf("\n Level %d (%d items), allocating...", lvl, sz[lvl] - 1); fflush( - stdout); - -- int n = sz[lvl] - 1; //do not count last psuedo tail -+ int n = sz[lvl] - 1; //do not count last pseudo tail - if (cut[lvl] >= n) cut[lvl] = n - 1; - TNodeInfo* pbuf = new TNodeInfo[n]; - TSIMWordId hw[16]; // it should be lvl+1, yet some compiler do not support it -@@ -446,7 +446,7 @@ ShowUsage(void) - Note that we do not ensure that during pruning process, exactly the\n\ - the given number of items are cut or reserved, because some items may \n\ - contains high level children, so could not be cut. \n\ -- Also it's your responsiblity to give right number of arguments based\n\ -+ Also it's your responsibility to give right number of arguments based\n\ - on 'input_slm'.\n\ - \nSee Also:\n\ - To get information of the back-off language model, try 'slminfo'.\n\n"); -diff --git a/src/slm/thread/ValueCompress.cpp b/src/slm/thread/ValueCompress.cpp -index c10c432..6be920a 100644 ---- a/src/slm/thread/ValueCompress.cpp -+++ b/src/slm/thread/ValueCompress.cpp -@@ -303,7 +303,7 @@ CValueCompressor::operator()(std::map& eff2val, - v2idx[eff2val[itm->first]] = itm->second; - } - --/* // Can not be maped back, because some value could not be in the eff2val maps -+/* // Can not be mapped back, because some value could not be in the eff2val maps - std::vector::iterator itt = table.begin(); - std::vector::iterator itte = table.end(); - for (; itt != itte; ++itt) diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0012-SConstruct-Add-support-for-m68k-architecture.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0012-SConstruct-Add-support-for-m68k-architecture.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0012-SConstruct-Add-support-for-m68k-architecture.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0012-SConstruct-Add-support-for-m68k-architecture.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -From: yangfl -Date: Tue, 18 Sep 2018 00:40:33 +0800 -Subject: SConstruct: Add support for m68k architecture - -This commit adds support for m68k architecture. - -Original author: John Paul Adrian Glaubitz -Downstream report: https://bugs.debian.org/905238 - -Signed-off-by: David Yang ---- - SConstruct | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/SConstruct b/SConstruct -index 92c3c55..35a3278 100644 ---- a/SConstruct -+++ b/SConstruct -@@ -321,7 +321,8 @@ def AppendEndianCheck(conf): - || defined(__ppc__) || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(_POWER) \ - || defined(__s390__) || (defined(__sh__) && defined(__BIG_ENDIAN__)) \ -- || defined(__AARCH64EB__) -+ || defined(__AARCH64EB__) \ -+ || definied(__m68k__) - # define WORDS_BIGENDIAN 1 - - #elif defined(__i386__) || defined(__i386) \ diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0013-SConstruct-Fix-typo-that-makes-build-failing.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0013-SConstruct-Fix-typo-that-makes-build-failing.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/backport/0013-SConstruct-Fix-typo-that-makes-build-failing.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/backport/0013-SConstruct-Fix-typo-that-makes-build-failing.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -From: Boyuan Yang -Date: Tue, 20 Nov 2018 14:49:21 -0500 -Subject: SConstruct: Fix typo that makes build failing. - ---- - SConstruct | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/SConstruct b/SConstruct -index 35a3278..ff52dc0 100644 ---- a/SConstruct -+++ b/SConstruct -@@ -322,7 +322,7 @@ def AppendEndianCheck(conf): - || defined(_MIPSEB) || defined(_POWER) \ - || defined(__s390__) || (defined(__sh__) && defined(__BIG_ENDIAN__)) \ - || defined(__AARCH64EB__) \ -- || definied(__m68k__) -+ || defined(__m68k__) - # define WORDS_BIGENDIAN 1 - - #elif defined(__i386__) || defined(__i386) \ diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/fix-data-dir.diff sunpinyin-3.0.0~rc2+ds1/debian/patches/fix-data-dir.diff --- sunpinyin-3.0.0~rc1+ds1/debian/patches/fix-data-dir.diff 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/fix-data-dir.diff 2021-10-02 02:51:46.000000000 +0000 @@ -7,10 +7,10 @@ 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SConstruct b/SConstruct -index 01b5cca..a78c7d9 100644 +index e063c6d..9fce9bf 100644 --- a/SConstruct +++ b/SConstruct -@@ -253,7 +253,7 @@ mandir = os.path.join(env['PREFIX'], 'share/man') +@@ -266,7 +266,7 @@ mandir = os.path.join(env['PREFIX'], 'share/man') man1dir = os.path.join(mandir, 'man1') docdir = os.path.join(env['PREFIX'], 'share/doc/sunpinyin') headersdir = os.path.join(env['PREFIX'], 'include/sunpinyin-2.0') diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/python3.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/python3.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/python3.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/python3.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,498 +0,0 @@ -Description: Use python3 -Origin: upstream, - https://github.com/sunpinyin/sunpinyin/commit/f16a9b5704702a5303a4b951c66dc068c19469a0 - https://github.com/sunpinyin/sunpinyin/commit/409345202251af6dfae26705b7c4c9ff65dfbb75 - https://github.com/sunpinyin/sunpinyin/pull/97 -Bug: https://github.com/sunpinyin/sunpinyin/issues/94 - ---- a/SConstruct -+++ b/SConstruct -@@ -227,7 +227,7 @@ - def PassVariables(envvar, env): - for (x, y) in envvar: - if x in os.environ: -- print 'Warning: you\'ve set %s in the environmental variable!' % x -+ print('Warning: you\'ve set %s in the environmental variable!' % x) - env[y] = os.environ[x] - - env = CreateEnvironment() -@@ -299,11 +299,11 @@ - - def CheckPython(context): - context.Message('Checking for Python library...') -- ret = context.TryAction('python-config --prefix')[0] -+ ret = context.TryAction('python3-config --prefix')[0] - context.Result(ret) - if ret: -- context.env.MergeFlags(['!python-config --includes', -- '!python-config --libs']) -+ context.env.MergeFlags(['!python3-config --includes', -+ '!python3-config --libs']) - return ret - - -@@ -429,9 +429,7 @@ - '@PREFIX@': env['PREFIX'], - '@LIBDIR@': env['LIBDIR'], - '@VERSION@': version, -- '@CFLAGS@': reduce(lambda a, b: a + ' ' + b, -- map(lambda x: '-I$${includedir}' + x[3:], -- sorted(allinc()))), -+ '@CFLAGS@': ' '.join(['-I$${includedir}' + x[3:] for x in sorted(allinc())]), - }) - - libname_default = '%ssunpinyin%s' % (env.subst('${SHLIBPREFIX}'), ---- a/python/imdict.py -+++ b/python/imdict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - # ---- a/python/importer/import_fcitx_userdict.py -+++ b/python/importer/import_fcitx_userdict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - import os, sys - import codecs ---- a/python/importer/import_fit_userdict.py -+++ b/python/importer/import_fit_userdict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - import os - import sqlite3 as sqlite ---- a/python/importer/import_google_userdict.py -+++ b/python/importer/import_google_userdict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - import os, sys - import codecs -@@ -23,7 +23,7 @@ - - def main (): - if len (sys.argv) != 2: -- print "Please specify the Google Pinyin exported user dict file!" -+ print("Please specify the Google Pinyin exported user dict file!") - exit (1) - - google_user_dict = load_google_user_dict(sys.argv[1]) ---- a/python/importer/import_qim_userdict.py -+++ b/python/importer/import_qim_userdict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - import os, sys - import codecs -@@ -18,7 +18,7 @@ - - def main (): - if len (sys.argv) != 2: -- print "Please specify the QIM exported user dict file!" -+ print("Please specify the QIM exported user dict file!") - exit (1) - - qim_user_dict = load_qim_user_dict(sys.argv[1]) ---- a/python/importer/import_qq_userdict.py -+++ b/python/importer/import_qq_userdict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - import os, sys - import codecs -@@ -21,7 +21,7 @@ - - def main (): - if len (sys.argv) != 2: -- print "Please specify the QQ PinYin exported user dict file!" -+ print("Please specify the QQ PinYin exported user dict file!") - exit (1) - - qq_user_dict = load_qq_user_dict(sys.argv[1]) ---- a/python/importer/import_sogou_celldict.py -+++ b/python/importer/import_sogou_celldict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - # thanks for the reverse engineering efforts of following projects/peoples: - # http://code.google.com/p/imewlconverter -@@ -71,7 +71,7 @@ - - def main (): - if len (sys.argv) != 2: -- print "Please specify the Sogou PinYin Cell dict file!" -+ print("Please specify the Sogou PinYin Cell dict file!") - exit (1) - - generator = get_word_from_sogou_cell_dict (sys.argv[1]) ---- a/python/importer/import_sogou_userdict.py -+++ b/python/importer/import_sogou_userdict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - import os, sys - import codecs -@@ -19,7 +19,7 @@ - - def main (): - if len (sys.argv) != 2: -- print "Please specify the Sogou PinYin exported user dict file!" -+ print("Please specify the Sogou PinYin exported user dict file!") - exit (1) - - sogou_user_dict = load_sogou_user_dict(sys.argv[1]) ---- a/python/importer/import_ziguang_userdict.py -+++ b/python/importer/import_ziguang_userdict.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - import os, sys - import codecs -@@ -21,7 +21,7 @@ - - def main (): - if len (sys.argv) != 2: -- print "Please specify the ZiGuang PinYin exported user dict file!" -+ print("Please specify the ZiGuang PinYin exported user dict file!") - exit (1) - - ziguang_user_dict = load_ziguang_user_dict(sys.argv[1]) ---- a/python/importer/importer.py -+++ b/python/importer/importer.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - import os, sys - import struct - import sqlite3 as sqlite -@@ -72,15 +72,15 @@ - try: - syllables = [valid_syllables[s] for s in pystr.split("'")] - except: -- print "[%s] has un-recognized syllables, ignoring this record!" % pystr -+ print("[%s] has un-recognized syllables, ignoring this record!" % pystr) - continue - - if len (syllables) < 2 or len (syllables) > 6: -- print "[%s] is too long or too short for sunpinyin userdict" % utf8str -+ print("[%s] is too long or too short for sunpinyin userdict" % utf8str) - continue - - if utf8str in sysdict: -- #print "[%s] is already in sunpinyin's sysdict" % utf8str -+ #print("[%s] is already in sunpinyin's sysdict" % utf8str) - continue - - record = [0]*14 -@@ -102,7 +102,7 @@ - """ - try: - db.execute (sqlstring, record) -- #print "[%s] is imported into sunpinyin's userdict" % utf8str -+ #print("[%s] is imported into sunpinyin's userdict" % utf8str) - - batch_count += 1 - if batch_count == 100: -@@ -110,7 +110,7 @@ - batch_count = 0 - - except: -- #print "[%s] is already in sunpinyin's userdict" % utf8str -+ #print("[%s] is already in sunpinyin's userdict" % utf8str) - pass - - db.commit() -@@ -130,7 +130,7 @@ - f = record[8:14] - str = record[-1] - syls = [initials[i[x]] + finals[f[x]] for x in range(l)] -- print str.encode ('UTF-8'), id, "'".join(syls) -+ print(str.encode ('UTF-8'), id, "'".join(syls)) - - if __name__ == "__main__": - export_sunpinyin_user_dict () ---- a/python/mmseg.py -+++ b/python/mmseg.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - # -@@ -45,7 +45,7 @@ - from utils import read_ch_sentences - - def usage(): -- print ''' -+ print(''' - Usage: - mmseg.py -d dict_file [-f (text|bin)] [-i] [-s STOK_ID] [-a AMBI_ID] corpus_file - -@@ -65,7 +65,7 @@ - The sequence ABC will not be segmented, in binary mode, the AMBI-ID - is written out; in text mode, ABC will be output. Default - is 9. --''' -+''') - - options={'show-id': False, - 'format' : 'bin', -@@ -75,8 +75,8 @@ - def parse_options(args): - try: - opts, args = getopt.getopt(args, "hid:f:s:a:", ["help", "show-id", "dict=", "format=", "stok-id=", "ambi-id="]) -- except getopt.GetoptError, err: -- print str(err) -+ except getopt.GetoptError as err: -+ print(str(err)) - sys.exit(1) - - for opt,val in opts: -@@ -114,7 +114,7 @@ - - def process_file(file, dict): - for line in read_ch_sentences(file): -- print >> sys.stderr, line.encode('UTF-8') -+ print(line.encode('UTF-8'), file=sys.stderr) - length = len(line) - i = 0 - while (i < length): ---- a/python/pinyin_data.py -+++ b/python/pinyin_data.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - # ---- a/python/pinyin_info_gen.py -+++ b/python/pinyin_info_gen.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - # -@@ -38,60 +38,60 @@ - from pinyin_data import * - - def fmt_str_array (name, var) : -- print 'static const char *%s[] = { %s };' % (name, ', '.join ('"%s"' % s for s in var)) -+ print('static const char *%s[] = { %s };' % (name, ', '.join ('"%s"' % s for s in var))) - - def fmt_array_size (name): -- print 'static const unsigned num_%s = sizeof(%s) / sizeof(*%s);' % (name, name, name) -+ print('static const unsigned num_%s = sizeof(%s) / sizeof(*%s);' % (name, name, name)) - - def fmt_str_pair_array (name, var) : -- print 'static const char *%s[] = {' % name -+ print('static const char *%s[] = {' % name) - for s1, s2 in var: -- print ' %-7s %s' % ('"%s",' % s1, '"%s",' % s2) -- print '};' -+ print(' %-7s %s' % ('"%s",' % s1, '"%s",' % s2)) -+ print('};') - - def fmt_pair_array_size (name): -- print 'static const unsigned num_%s = sizeof(%s) / sizeof(*%s) / 2;' % (name, name, name) -+ print('static const unsigned num_%s = sizeof(%s) / sizeof(*%s) / 2;' % (name, name, name)) - - fmt_str_array ('initials', initials) - fmt_array_size ('initials') --print '' -+print('') - - fmt_str_array('finals', finals) - fmt_array_size ('finals') --print '' -+print('') - - fmt_str_array('fuzzy_finals', inner_fuzzy_finals) - fmt_array_size ('fuzzy_finals') --print '' -+print('') - - fmt_str_pair_array ('fuzzy_pairs', fuzzy_pairs) - fmt_pair_array_size ('fuzzy_pairs') --print '' -+print('') - - fmt_str_pair_array ('auto_correction_pairs', sorted(auto_correction_pairs.items())) - fmt_pair_array_size ('auto_correction_pairs') --print '' -+print('') - --print 'static const unsigned fuzzy_finals_map [] = {' -+print('static const unsigned fuzzy_finals_map [] = {') - for s in inner_fuzzy_finals: -- print ' %-7s %-7s %-7s /* %-4s -> %-4s len %d */' % ('0x%02x,' % finals.index(s), '0x%02x,' % valid_syllables[s[1:]], '%d,' % (len(s)-1,), s, s[1:], len(s)-1) --print '};\n' -+ print(' %-7s %-7s %-7s /* %-4s -> %-4s len %d */' % ('0x%02x,' % finals.index(s), '0x%02x,' % valid_syllables[s[1:]], '%d,' % (len(s)-1,), s, s[1:], len(s)-1)) -+print('};\n') - --print 'static const TPyTabEntry pinyin_table[] = {' -+print('static const TPyTabEntry pinyin_table[] = {') - for syllable, hex_syllable in sorted(valid_syllables.items()): -- print ' { %-9s %s },' % ('"%s",' % syllable, '0x%05x' % hex_syllable) --print '};\n' -+ print(' { %-9s %s },' % ('"%s",' % syllable, '0x%05x' % hex_syllable)) -+print('};\n') - --print 'static const unsigned fuzzy_pre_syllables [] = {' -+print('static const unsigned fuzzy_pre_syllables [] = {') - for s in fuzzy_pre_syllables: -- print ' %-11s %-7s %-11s /* %s */' % ('0x%05x,' % valid_syllables[s[:-1]], "'%s'," % s[-1], '0x%05x,' % valid_syllables[s], s) --print ' 0x0,' --print '};\n' -+ print(' %-11s %-7s %-11s /* %s */' % ('0x%05x,' % valid_syllables[s[:-1]], "'%s'," % s[-1], '0x%05x,' % valid_syllables[s], s)) -+print(' 0x0,') -+print('};\n') - --print 'static const unsigned fuzzy_pro_syllables [] = {' -+print('static const unsigned fuzzy_pro_syllables [] = {') - for s in fuzzy_pro_syllables: -- print ' %-11s %-7s %-11s /* %s */' % ('0x%05x,' % valid_syllables[s], "'%s'," % s[0], '0x%05x,' % valid_syllables[s[1:]], s) --print ' 0x0,' --print '};\n' -+ print(' %-11s %-7s %-11s /* %s */' % ('0x%05x,' % valid_syllables[s], "'%s'," % s[0], '0x%05x,' % valid_syllables[s[1:]], s)) -+print(' 0x0,') -+print('};\n') - - # -*- indent-tabs-mode: nil -*- vim:et:ts=4 ---- a/python/pytrie.pyx -+++ b/python/pytrie.pyx -@@ -92,7 +92,7 @@ - def get_words (self): - words = [] - cdef CPinyinTrie_TWord *p= self.pnode.getWordIdPtr () -- for i in xrange (self.pnode.m_nWordId): -+ for i in range (self.pnode.m_nWordId): - words.append (WordInfo(p[i].m_id, p[i].m_bSeen, p[i].m_cost, p[i].m_len, p[i].m_csLevel)) - return words - ---- a/python/quanpin_trie_gen.py -+++ b/python/quanpin_trie_gen.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - # ---- a/python/test.py -+++ b/python/test.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - # -*- coding: UTF-8 -*- - - from pyslm import Slm, SlmState -@@ -10,21 +10,21 @@ - return - - pr, result = slm.transfer (SlmState(0,0), 58614) -- print "pr =", pr, "\tresult = %s" % result -+ print("pr =", pr, "\tresult = %s" % result) - - pr, result = slm.transfer (result, 75956) -- print "pr =", pr, "\tresult = %s" % result -+ print("pr =", pr, "\tresult = %s" % result) - - pr, result = slm.transfer (result, 84582) -- print "pr =", pr, "\tresult = %s" % result -+ print("pr =", pr, "\tresult = %s" % result) - - his = slm.history_state_of (result) -- print "his = %s" % his -+ print("his = %s" % his) - - slm.historify (result) -- print "result = %s" % result -+ print("result = %s" % result) - -- print 'last_word_id =', slm.last_word_id (result) -+ print('last_word_id =', slm.last_word_id (result)) - - slm.free () - -@@ -36,12 +36,12 @@ - root = trie.get_root_node () - node = trie.transfer (root, 0x1000) - for w in node.get_words (): -- print w -+ print(w) - -- print trie.is_valid (node, False, 0) -- print trie[10000] -+ print(trie.is_valid (node, False, 0)) -+ print(trie[10000]) - -- print trie.get_symbol_id (u'。') -+ print(trie.get_symbol_id (u'。')) - trie.free () - - test_pyslm() ---- a/python/trie.py -+++ b/python/trie.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - # -@@ -161,7 +161,7 @@ - if progress_cb: - progress_cb () - -- for i in xrange (self.chr_encoder (max(trie.root.trans))+1): -+ for i in range (self.chr_encoder (max(trie.root.trans))+1): - if self.check[i] == -1: - self.check[i] = 0 - -@@ -272,7 +272,7 @@ - v, l = match_longest (datrie, s+'b') - assert (len(s) == l and valid_syllables[s] == v) - -- print 'test executed successfully' -+ print('test executed successfully') - - if __name__ == "__main__": - test () ---- a/python/utils.py -+++ b/python/utils.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - # -*- coding: UTF-8 -*- - - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. -@@ -202,7 +202,7 @@ - self.__realsize += 1 - - def __iter__(self): -- for i in xrange(0, self.__realsize): -+ for i in range(0, self.__realsize): - yield self.__access(i) - - def truncate(self, tsize): ---- a/src/SConscript -+++ b/src/SConscript -@@ -53,7 +53,7 @@ - }) - env.Command('sunpinyin-dictgen', 'sunpinyin-dictgen.mk', [ - Copy("$TARGET", "$SOURCE"), -- Chmod("$TARGET", 0755), -+ Chmod("$TARGET", 0o755), - ]) - - # -*- indent-tabs-mode: nil -*- vim:et:ts=4 diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/series sunpinyin-3.0.0~rc2+ds1/debian/patches/series --- sunpinyin-3.0.0~rc1+ds1/debian/patches/series 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/series 2021-10-02 02:51:46.000000000 +0000 @@ -1,15 +1 @@ fix-data-dir.diff -backport/0002-sunpinyin-dictgen.mk.in-completely-abandon-now-defun.patch -backport/0003-Portability-fix-for-NetBSD-iconv.patch -backport/0004-Make-the-.pc-output-reproducible.patch -backport/0005-sunpinyin-dictgen.mk.in-use-HTTPS-mirrors-to-avoid-M.patch -backport/0006-pytrie_gen.cpp-work-around-iconv-segfault-with-musl.patch -backport/0007-SConstruct-use-C-11-standard.patch -backport/0008-pytrie_gen.cpp-cast-1-to-iconv_t-before-comparison.patch -backport/0009-slm-thread-be-more-consistent-by-using-the-C-variant.patch -backport/0010-SConstruct-Add-support-for-riscv64-architecture.patch -backport/0011-Fix-typos-found-by-codespell.patch -backport/0012-SConstruct-Add-support-for-m68k-architecture.patch -backport/0013-SConstruct-Fix-typo-that-makes-build-failing.patch -python3.patch -use-cppflags.patch diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/patches/use-cppflags.patch sunpinyin-3.0.0~rc2+ds1/debian/patches/use-cppflags.patch --- sunpinyin-3.0.0~rc1+ds1/debian/patches/use-cppflags.patch 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/patches/use-cppflags.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -Description: Use CPPFLAGS environment variable -Forwarded: https://github.com/sunpinyin/sunpinyin/pull/99 - -commit 4c9ee1b09aa237722414a82a8357408616802811 -Author: Changwoo Ryu -Date: Wed Dec 11 11:32:16 2019 +0900 - - Use CPPFLAGS environment variable - - It's useful when providing additional build flags like -D_FORTIFY_SOURCE=2. - -diff --git a/SConstruct b/SConstruct -index 9cebc03..696b2fc 100644 ---- a/SConstruct -+++ b/SConstruct -@@ -259,6 +259,7 @@ libdir = env['LIBDIR'] - # pass through environmental variables - envvar = [('CC', 'CC'), - ('CXX', 'CXX'), -+ ('CPPFLAGS', 'CPPFLAGS'), - ('CFLAGS', 'CFLAGS'), - ('CXXFLAGS', 'CXXFLAGS'), - ('LDFLAGS', 'LINKFLAGS'), diff -Nru sunpinyin-3.0.0~rc1+ds1/debian/rules sunpinyin-3.0.0~rc2+ds1/debian/rules --- sunpinyin-3.0.0~rc1+ds1/debian/rules 2019-12-11 04:35:01.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/debian/rules 2021-10-02 02:53:04.000000000 +0000 @@ -9,7 +9,7 @@ # see FEATURE AREAS in dpkg-buildflags(1) export DEB_BUILD_MAINT_OPTIONS = hardening=+all -export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed +export DEB_LDFLAGS_MAINT_APPEND = # export buildflags DPKG_EXPORT_BUILDFLAGS=1 @@ -23,10 +23,9 @@ mmseg slmbuild slmprune slmthread tslmendian tslmpack %: - dh $@ --with python3 + dh $@ -override_dh_auto_clean: - dh_auto_clean +execute_after_dh_auto_clean: scons -c . rm -rf .sconf_temp @@ -36,6 +35,3 @@ override_dh_auto_install: $(SCONS) install --install-sandbox=debian/tmp rm -f debian/tmp/usr/bin/sunpinyin-dictgen - -override_dh_missing: - dh_missing --fail-missing diff -Nru sunpinyin-3.0.0~rc1+ds1/man/genpyt.pod sunpinyin-3.0.0~rc2+ds1/man/genpyt.pod --- sunpinyin-3.0.0~rc1+ds1/man/genpyt.pod 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/man/genpyt.pod 2021-02-28 10:16:33.000000000 +0000 @@ -27,7 +27,7 @@ =item I -The output binary PINYIN lexicon file. This lexicon contains a trie presenting the key tree of PINYIN. And all of the candiate words are sorted using the unigram in I. This file can be used with sunpinyin input method engines. +The output binary PINYIN lexicon file. This lexicon contains a trie presenting the key tree of PINYIN. And all of the candidate words are sorted using the unigram in I. This file can be used with sunpinyin input method engines. =item I diff -Nru sunpinyin-3.0.0~rc1+ds1/man/idngram_merge.pod sunpinyin-3.0.0~rc2+ds1/man/idngram_merge.pod --- sunpinyin-3.0.0~rc1+ds1/man/idngram_merge.pod 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/man/idngram_merge.pod 2021-02-28 10:16:33.000000000 +0000 @@ -25,7 +25,7 @@ =item B<-o>, B<--out> I -Specifiy the final merged idngram file. +Specify the final merged idngram file. =back diff -Nru sunpinyin-3.0.0~rc1+ds1/man/slmprune.pod sunpinyin-3.0.0~rc2+ds1/man/slmprune.pod --- sunpinyin-3.0.0~rc1+ds1/man/slmprune.pod 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/man/slmprune.pod 2021-02-28 10:16:33.000000000 +0000 @@ -20,7 +20,7 @@ the given number of items are cut or reserved, because some items may contains high level children, so could not be cut. -Also it's your responsiblity to give right number of arguments based +Also it's your responsibility to give right number of arguments based on 'input_slm'. diff -Nru sunpinyin-3.0.0~rc1+ds1/man/slmseg.pod sunpinyin-3.0.0~rc2+ds1/man/slmseg.pod --- sunpinyin-3.0.0~rc1+ds1/man/slmseg.pod 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/man/slmseg.pod 2021-02-28 10:16:33.000000000 +0000 @@ -40,7 +40,7 @@ =item B<-m>, B<--model> I -Speficy the language model file. This file is always generated by B. +Specify the language model file. This file is always generated by B. =back diff -Nru sunpinyin-3.0.0~rc1+ds1/man/slmthread.pod sunpinyin-3.0.0~rc2+ds1/man/slmthread.pod --- sunpinyin-3.0.0~rc1+ds1/man/slmthread.pod 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/man/slmthread.pod 2021-02-28 10:16:33.000000000 +0000 @@ -12,7 +12,7 @@ Also it compresses 32-bit float into 16 bit representation. These processing speeds up the looking up. -The I is always genreated by B. And +The I is always generated by B. And the I can be used to feed B as a reference to segment Chinese text. diff -Nru sunpinyin-3.0.0~rc1+ds1/python/imdict.py sunpinyin-3.0.0~rc2+ds1/python/imdict.py --- sunpinyin-3.0.0~rc1+ds1/python/imdict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/imdict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. # diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/importer.py sunpinyin-3.0.0~rc2+ds1/python/importer/importer.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/importer.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/importer.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os, sys import struct import sqlite3 as sqlite @@ -72,15 +72,15 @@ try: syllables = [valid_syllables[s] for s in pystr.split("'")] except: - print "[%s] has un-recognized syllables, ignoring this record!" % pystr + print("[%s] has un-recognized syllables, ignoring this record!" % pystr) continue if len (syllables) < 2 or len (syllables) > 6: - print "[%s] is too long or too short for sunpinyin userdict" % utf8str + print("[%s] is too long or too short for sunpinyin userdict" % utf8str) continue if utf8str in sysdict: - #print "[%s] is already in sunpinyin's sysdict" % utf8str + #print("[%s] is already in sunpinyin's sysdict" % utf8str) continue record = [0]*14 @@ -102,7 +102,7 @@ """ try: db.execute (sqlstring, record) - #print "[%s] is imported into sunpinyin's userdict" % utf8str + #print("[%s] is imported into sunpinyin's userdict" % utf8str) batch_count += 1 if batch_count == 100: @@ -110,7 +110,7 @@ batch_count = 0 except: - #print "[%s] is already in sunpinyin's userdict" % utf8str + #print("[%s] is already in sunpinyin's userdict" % utf8str) pass db.commit() @@ -130,7 +130,7 @@ f = record[8:14] str = record[-1] syls = [initials[i[x]] + finals[f[x]] for x in range(l)] - print str.encode ('UTF-8'), id, "'".join(syls) + print(str.encode ('UTF-8'), id, "'".join(syls)) if __name__ == "__main__": export_sunpinyin_user_dict () diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/import_fcitx_userdict.py sunpinyin-3.0.0~rc2+ds1/python/importer/import_fcitx_userdict.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/import_fcitx_userdict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/import_fcitx_userdict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os, sys import codecs diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/import_fit_userdict.py sunpinyin-3.0.0~rc2+ds1/python/importer/import_fit_userdict.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/import_fit_userdict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/import_fit_userdict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os import sqlite3 as sqlite diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/import_google_userdict.py sunpinyin-3.0.0~rc2+ds1/python/importer/import_google_userdict.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/import_google_userdict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/import_google_userdict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os, sys import codecs @@ -23,7 +23,7 @@ def main (): if len (sys.argv) != 2: - print "Please specify the Google Pinyin exported user dict file!" + print("Please specify the Google Pinyin exported user dict file!") exit (1) google_user_dict = load_google_user_dict(sys.argv[1]) diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/import_qim_userdict.py sunpinyin-3.0.0~rc2+ds1/python/importer/import_qim_userdict.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/import_qim_userdict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/import_qim_userdict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os, sys import codecs @@ -18,7 +18,7 @@ def main (): if len (sys.argv) != 2: - print "Please specify the QIM exported user dict file!" + print("Please specify the QIM exported user dict file!") exit (1) qim_user_dict = load_qim_user_dict(sys.argv[1]) diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/import_qq_userdict.py sunpinyin-3.0.0~rc2+ds1/python/importer/import_qq_userdict.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/import_qq_userdict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/import_qq_userdict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os, sys import codecs @@ -21,7 +21,7 @@ def main (): if len (sys.argv) != 2: - print "Please specify the QQ PinYin exported user dict file!" + print("Please specify the QQ PinYin exported user dict file!") exit (1) qq_user_dict = load_qq_user_dict(sys.argv[1]) diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/import_sogou_celldict.py sunpinyin-3.0.0~rc2+ds1/python/importer/import_sogou_celldict.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/import_sogou_celldict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/import_sogou_celldict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # thanks for the reverse engineering efforts of following projects/peoples: # http://code.google.com/p/imewlconverter @@ -71,7 +71,7 @@ def main (): if len (sys.argv) != 2: - print "Please specify the Sogou PinYin Cell dict file!" + print("Please specify the Sogou PinYin Cell dict file!") exit (1) generator = get_word_from_sogou_cell_dict (sys.argv[1]) diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/import_sogou_userdict.py sunpinyin-3.0.0~rc2+ds1/python/importer/import_sogou_userdict.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/import_sogou_userdict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/import_sogou_userdict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os, sys import codecs @@ -19,7 +19,7 @@ def main (): if len (sys.argv) != 2: - print "Please specify the Sogou PinYin exported user dict file!" + print("Please specify the Sogou PinYin exported user dict file!") exit (1) sogou_user_dict = load_sogou_user_dict(sys.argv[1]) diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/import_ziguang_userdict.py sunpinyin-3.0.0~rc2+ds1/python/importer/import_ziguang_userdict.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/import_ziguang_userdict.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/import_ziguang_userdict.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os, sys import codecs @@ -21,7 +21,7 @@ def main (): if len (sys.argv) != 2: - print "Please specify the ZiGuang PinYin exported user dict file!" + print("Please specify the ZiGuang PinYin exported user dict file!") exit (1) ziguang_user_dict = load_ziguang_user_dict(sys.argv[1]) diff -Nru sunpinyin-3.0.0~rc1+ds1/python/importer/pinyin_data.py sunpinyin-3.0.0~rc2+ds1/python/importer/pinyin_data.py --- sunpinyin-3.0.0~rc1+ds1/python/importer/pinyin_data.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/importer/pinyin_data.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. # diff -Nru sunpinyin-3.0.0~rc1+ds1/python/mmseg.py sunpinyin-3.0.0~rc2+ds1/python/mmseg.py --- sunpinyin-3.0.0~rc1+ds1/python/mmseg.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/mmseg.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. # @@ -45,7 +45,7 @@ from utils import read_ch_sentences def usage(): - print ''' + print(''' Usage: mmseg.py -d dict_file [-f (text|bin)] [-i] [-s STOK_ID] [-a AMBI_ID] corpus_file @@ -65,7 +65,7 @@ The sequence ABC will not be segmented, in binary mode, the AMBI-ID is written out; in text mode, ABC will be output. Default is 9. -''' +''') options={'show-id': False, 'format' : 'bin', @@ -75,8 +75,8 @@ def parse_options(args): try: opts, args = getopt.getopt(args, "hid:f:s:a:", ["help", "show-id", "dict=", "format=", "stok-id=", "ambi-id="]) - except getopt.GetoptError, err: - print str(err) + except getopt.GetoptError as err: + print(str(err)) sys.exit(1) for opt,val in opts: @@ -114,7 +114,7 @@ def process_file(file, dict): for line in read_ch_sentences(file): - print >> sys.stderr, line.encode('UTF-8') + print(line.encode('UTF-8'), file=sys.stderr) length = len(line) i = 0 while (i < length): diff -Nru sunpinyin-3.0.0~rc1+ds1/python/pinyin_data.py sunpinyin-3.0.0~rc2+ds1/python/pinyin_data.py --- sunpinyin-3.0.0~rc1+ds1/python/pinyin_data.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/pinyin_data.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. # diff -Nru sunpinyin-3.0.0~rc1+ds1/python/pinyin_info_gen.py sunpinyin-3.0.0~rc2+ds1/python/pinyin_info_gen.py --- sunpinyin-3.0.0~rc1+ds1/python/pinyin_info_gen.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/pinyin_info_gen.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. # @@ -38,60 +38,60 @@ from pinyin_data import * def fmt_str_array (name, var) : - print 'static const char *%s[] = { %s };' % (name, ', '.join ('"%s"' % s for s in var)) + print('static const char *%s[] = { %s };' % (name, ', '.join ('"%s"' % s for s in var))) def fmt_array_size (name): - print 'static const unsigned num_%s = sizeof(%s) / sizeof(*%s);' % (name, name, name) + print('static const unsigned num_%s = sizeof(%s) / sizeof(*%s);' % (name, name, name)) def fmt_str_pair_array (name, var) : - print 'static const char *%s[] = {' % name + print('static const char *%s[] = {' % name) for s1, s2 in var: - print ' %-7s %s' % ('"%s",' % s1, '"%s",' % s2) - print '};' + print(' %-7s %s' % ('"%s",' % s1, '"%s",' % s2)) + print('};') def fmt_pair_array_size (name): - print 'static const unsigned num_%s = sizeof(%s) / sizeof(*%s) / 2;' % (name, name, name) + print('static const unsigned num_%s = sizeof(%s) / sizeof(*%s) / 2;' % (name, name, name)) fmt_str_array ('initials', initials) fmt_array_size ('initials') -print '' +print('') fmt_str_array('finals', finals) fmt_array_size ('finals') -print '' +print('') fmt_str_array('fuzzy_finals', inner_fuzzy_finals) fmt_array_size ('fuzzy_finals') -print '' +print('') fmt_str_pair_array ('fuzzy_pairs', fuzzy_pairs) fmt_pair_array_size ('fuzzy_pairs') -print '' +print('') fmt_str_pair_array ('auto_correction_pairs', sorted(auto_correction_pairs.items())) fmt_pair_array_size ('auto_correction_pairs') -print '' +print('') -print 'static const unsigned fuzzy_finals_map [] = {' +print('static const unsigned fuzzy_finals_map [] = {') for s in inner_fuzzy_finals: - print ' %-7s %-7s %-7s /* %-4s -> %-4s len %d */' % ('0x%02x,' % finals.index(s), '0x%02x,' % valid_syllables[s[1:]], '%d,' % (len(s)-1,), s, s[1:], len(s)-1) -print '};\n' + print(' %-7s %-7s %-7s /* %-4s -> %-4s len %d */' % ('0x%02x,' % finals.index(s), '0x%02x,' % valid_syllables[s[1:]], '%d,' % (len(s)-1,), s, s[1:], len(s)-1)) +print('};\n') -print 'static const TPyTabEntry pinyin_table[] = {' +print('static const TPyTabEntry pinyin_table[] = {') for syllable, hex_syllable in sorted(valid_syllables.items()): - print ' { %-9s %s },' % ('"%s",' % syllable, '0x%05x' % hex_syllable) -print '};\n' + print(' { %-9s %s },' % ('"%s",' % syllable, '0x%05x' % hex_syllable)) +print('};\n') -print 'static const unsigned fuzzy_pre_syllables [] = {' +print('static const unsigned fuzzy_pre_syllables [] = {') for s in fuzzy_pre_syllables: - print ' %-11s %-7s %-11s /* %s */' % ('0x%05x,' % valid_syllables[s[:-1]], "'%s'," % s[-1], '0x%05x,' % valid_syllables[s], s) -print ' 0x0,' -print '};\n' + print(' %-11s %-7s %-11s /* %s */' % ('0x%05x,' % valid_syllables[s[:-1]], "'%s'," % s[-1], '0x%05x,' % valid_syllables[s], s)) +print(' 0x0,') +print('};\n') -print 'static const unsigned fuzzy_pro_syllables [] = {' +print('static const unsigned fuzzy_pro_syllables [] = {') for s in fuzzy_pro_syllables: - print ' %-11s %-7s %-11s /* %s */' % ('0x%05x,' % valid_syllables[s], "'%s'," % s[0], '0x%05x,' % valid_syllables[s[1:]], s) -print ' 0x0,' -print '};\n' + print(' %-11s %-7s %-11s /* %s */' % ('0x%05x,' % valid_syllables[s], "'%s'," % s[0], '0x%05x,' % valid_syllables[s[1:]], s)) +print(' 0x0,') +print('};\n') # -*- indent-tabs-mode: nil -*- vim:et:ts=4 diff -Nru sunpinyin-3.0.0~rc1+ds1/python/pytrie.pyx sunpinyin-3.0.0~rc2+ds1/python/pytrie.pyx --- sunpinyin-3.0.0~rc1+ds1/python/pytrie.pyx 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/pytrie.pyx 2021-02-28 10:16:33.000000000 +0000 @@ -92,7 +92,7 @@ def get_words (self): words = [] cdef CPinyinTrie_TWord *p= self.pnode.getWordIdPtr () - for i in xrange (self.pnode.m_nWordId): + for i in range (self.pnode.m_nWordId): words.append (WordInfo(p[i].m_id, p[i].m_bSeen, p[i].m_cost, p[i].m_len, p[i].m_csLevel)) return words diff -Nru sunpinyin-3.0.0~rc1+ds1/python/quanpin_trie_gen.py sunpinyin-3.0.0~rc2+ds1/python/quanpin_trie_gen.py --- sunpinyin-3.0.0~rc1+ds1/python/quanpin_trie_gen.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/quanpin_trie_gen.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. # diff -Nru sunpinyin-3.0.0~rc1+ds1/python/test.py sunpinyin-3.0.0~rc2+ds1/python/test.py --- sunpinyin-3.0.0~rc1+ds1/python/test.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/test.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: UTF-8 -*- from pyslm import Slm, SlmState @@ -10,21 +10,21 @@ return pr, result = slm.transfer (SlmState(0,0), 58614) - print "pr =", pr, "\tresult = %s" % result + print("pr =", pr, "\tresult = %s" % result) pr, result = slm.transfer (result, 75956) - print "pr =", pr, "\tresult = %s" % result + print("pr =", pr, "\tresult = %s" % result) pr, result = slm.transfer (result, 84582) - print "pr =", pr, "\tresult = %s" % result + print("pr =", pr, "\tresult = %s" % result) his = slm.history_state_of (result) - print "his = %s" % his + print("his = %s" % his) slm.historify (result) - print "result = %s" % result + print("result = %s" % result) - print 'last_word_id =', slm.last_word_id (result) + print('last_word_id =', slm.last_word_id (result)) slm.free () @@ -36,12 +36,12 @@ root = trie.get_root_node () node = trie.transfer (root, 0x1000) for w in node.get_words (): - print w + print(w) - print trie.is_valid (node, False, 0) - print trie[10000] + print(trie.is_valid (node, False, 0)) + print(trie[10000]) - print trie.get_symbol_id (u'。') + print(trie.get_symbol_id (u'。')) trie.free () test_pyslm() diff -Nru sunpinyin-3.0.0~rc1+ds1/python/trie.py sunpinyin-3.0.0~rc2+ds1/python/trie.py --- sunpinyin-3.0.0~rc1+ds1/python/trie.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/trie.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. # @@ -161,7 +161,7 @@ if progress_cb: progress_cb () - for i in xrange (self.chr_encoder (max(trie.root.trans))+1): + for i in range (self.chr_encoder (max(trie.root.trans))+1): if self.check[i] == -1: self.check[i] = 0 @@ -272,7 +272,7 @@ v, l = match_longest (datrie, s+'b') assert (len(s) == l and valid_syllables[s] == v) - print 'test executed successfully' + print('test executed successfully') if __name__ == "__main__": test () diff -Nru sunpinyin-3.0.0~rc1+ds1/python/utils.py sunpinyin-3.0.0~rc2+ds1/python/utils.py --- sunpinyin-3.0.0~rc1+ds1/python/utils.py 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/python/utils.py 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: UTF-8 -*- # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. @@ -202,7 +202,7 @@ self.__realsize += 1 def __iter__(self): - for i in xrange(0, self.__realsize): + for i in range(0, self.__realsize): yield self.__access(i) def truncate(self, tsize): diff -Nru sunpinyin-3.0.0~rc1+ds1/SConstruct sunpinyin-3.0.0~rc2+ds1/SConstruct --- sunpinyin-3.0.0~rc1+ds1/SConstruct 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/SConstruct 2021-02-28 10:16:33.000000000 +0000 @@ -1,7 +1,11 @@ +# -*- mode:python -*- + import platform import os +import shutil import sys +import SCons.Errors version = "2.0.4" abi_major = 3 @@ -179,13 +183,22 @@ AddOption('--disable-plugins', dest='enable_plugins', action='store_false', default=False, help='disable plugin mechanism at libsunpinyin layer') + +def PathIsExecutable(key, path, env): + if shutil.which(path) is None: + m = "path '{path}' for option '{key}' is not found or not in $PATH" + raise SCons.Errors.UserError(m.format(key=key, path=path)) + # save the options opts = Variables('configure.conf') opts.Add('PREFIX', default='/usr/local') opts.Add('LIBDIR', default='/usr/local/lib') opts.Add('DATADIR', default='/usr/local/share') opts.Add('ENABLE_PLUGINS', default=False) - +opts.Add(PathVariable('PYTHON', + 'python3 interpreter used to generate header files', + '/usr/bin/python3', + PathIsExecutable)) # # ==============================environment============================== @@ -215,7 +228,7 @@ make = 'gmake' tar = 'gtar' libln_builder = Builder(action='cd ${TARGET.dir} && ln -s ${SOURCE.name} ${TARGET.name}') - env = Environment(ENV=os.environ, CFLAGS=cflags, CXXFLAGS='', + env = Environment(ENV=os.environ, CFLAGS=cflags, CXXFLAGS='-std=c++11', MAKE=make, WGET=wget, W3M=w3m, TAR=tar, CPPPATH=['.'] + allinc(), tools=['default', 'textfile']) @@ -227,7 +240,7 @@ def PassVariables(envvar, env): for (x, y) in envvar: if x in os.environ: - print 'Warning: you\'ve set %s in the environmental variable!' % x + print('Warning: you\'ve set %s in the environmental variable!' % x) env[y] = os.environ[x] env = CreateEnvironment() @@ -259,6 +272,7 @@ # pass through environmental variables envvar = [('CC', 'CC'), ('CXX', 'CXX'), + ('CPPFLAGS', 'CPPFLAGS'), ('CFLAGS', 'CFLAGS'), ('CXXFLAGS', 'CXXFLAGS'), ('LDFLAGS', 'LINKFLAGS'), @@ -299,11 +313,11 @@ def CheckPython(context): context.Message('Checking for Python library...') - ret = context.TryAction('python-config --prefix')[0] + ret = context.TryAction('python3-config --prefix')[0] context.Result(ret) if ret: - context.env.MergeFlags(['!python-config --includes', - '!python-config --libs']) + context.env.MergeFlags(['!python3-config --includes', + '!python3-config --libs']) return ret @@ -321,7 +335,8 @@ || defined(__ppc__) || defined(__hpux) || defined(__hppa) \ || defined(_MIPSEB) || defined(_POWER) \ || defined(__s390__) || (defined(__sh__) && defined(__BIG_ENDIAN__)) \ - || defined(__AARCH64EB__) + || defined(__AARCH64EB__) \ + || defined(__m68k__) # define WORDS_BIGENDIAN 1 #elif defined(__i386__) || defined(__i386) \ @@ -333,6 +348,7 @@ || defined(_M_X64) || defined(__bfin__) \ || defined(__alpha__) || defined(__ARMEL__) \ || defined(_MIPSEL) || (defined(__sh__) && defined(__LITTLE_ENDIAN__)) \ + || defined(__riscv) \ || defined(__AARCH64EL__) # undef WORDS_BIGENDIAN @@ -417,9 +433,9 @@ # env.Object(slmsource) env.Command('src/pinyin/quanpin_trie.h', 'python/quanpin_trie_gen.py', - 'cd ${SOURCE.dir} && ./quanpin_trie_gen.py > ../src/pinyin/quanpin_trie.h') + '$PYTHON $SOURCE > $TARGET') env.Command('src/pinyin/pinyin_info.h', 'python/pinyin_info_gen.py', - 'cd ${SOURCE.dir} && ./pinyin_info_gen.py > ../src/pinyin/pinyin_info.h') + '$PYTHON $SOURCE > $TARGET') SConscript(['src/SConscript', 'man/SConscript', 'doc/SConscript'], exports='env') @@ -427,9 +443,7 @@ '@PREFIX@': env['PREFIX'], '@LIBDIR@': env['LIBDIR'], '@VERSION@': version, - '@CFLAGS@': reduce(lambda a, b: a + ' ' + b, - map(lambda x: '-I$${includedir}' + x[3:], - allinc())), + '@CFLAGS@': ' '.join(['-I$${includedir}' + x[3:] for x in sorted(allinc())]), }) libname_default = '%ssunpinyin%s' % (env.subst('${SHLIBPREFIX}'), diff -Nru sunpinyin-3.0.0~rc1+ds1/src/ime-core/ic_history.cpp sunpinyin-3.0.0~rc2+ds1/src/ime-core/ic_history.cpp --- sunpinyin-3.0.0~rc1+ds1/src/ime-core/ic_history.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/ime-core/ic_history.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -74,8 +74,8 @@ { TBigram bigram(DCWID, DCWID); - // First , we insert an DC word id before the context history - // to seperated from previous stream. + // First, we insert a DC word id before the context history + // to separated from previous stream. if (m_memory.size() == contxt_memory_size) { TBigram hb; hb.first = m_memory.front(); @@ -347,14 +347,14 @@ CBigramHistory::incUniFreq(TUnigram& ug) { ++m_unifreq[ug]; - //printf("Remebering uniFreq[%d]-->%d\n", ug, m_unifreq[ug]); + //printf("Remembering uniFreq[%d]-->%d\n", ug, m_unifreq[ug]); } void CBigramHistory::incBiFreq(TBigram& bg) { ++m_bifreq[bg]; - //printf("Remebering biFreq[%d,%d]-->%d\n", bg.first, bg.second, m_bifreq[bg]); + //printf("Remembering biFreq[%d,%d]-->%d\n", bg.first, bg.second, m_bifreq[bg]); } // so far, it's very expensive to erase a word from bigram pairs, need to design @@ -416,7 +416,7 @@ m_stopWords.clear(); m_stopWords.insert(0); //unknown world - m_stopWords.insert(DCWID); //seperator word id used by history memory interanlly + m_stopWords.insert(DCWID); //separator word id used by history memory internally } // -*- indent-tabs-mode: nil -*- vim:et:ts=4 diff -Nru sunpinyin-3.0.0~rc1+ds1/src/ime-core/ic_history.h sunpinyin-3.0.0~rc2+ds1/src/ime-core/ic_history.h --- sunpinyin-3.0.0~rc1+ds1/src/ime-core/ic_history.h 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/ime-core/ic_history.h 2021-02-28 10:16:33.000000000 +0000 @@ -50,7 +50,7 @@ */ class CICHistory { public: - /** don't care word id, or seperator word id */ + /** don't care word id, or separator word id */ static const uint32_t DCWID; virtual ~CICHistory(); diff -Nru sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_context.cpp sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_context.cpp --- sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_context.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_context.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -519,12 +519,12 @@ double ts = m_pModel->transfer(it->m_slmState, _wid, node.m_slmState); m_pModel->historify(node.m_slmState); - // backward to psuedo root, so wid is probably a user word, + // backward to pseudo root, so wid is probably a user word, // save the wid in idx field, so that later we could get it via // CThreadSlm::lastWordId, to calculate p_{cache} correctly. if (node.m_slmState.getLevel() == 0 && m_pHistory && m_pHistory->seenBefore(wid)) - node.m_slmState.setIdx(wid); // an psuedo unigram node state + node.m_slmState.setIdx(wid); // an pseudo unigram node state if (m_pHistory) { unsigned history[2] = { m_pModel->lastWordId(it->m_slmState), _wid }; diff -Nru sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_context.h sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_context.h --- sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_context.h 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_context.h 2021-02-28 10:16:33.000000000 +0000 @@ -122,7 +122,7 @@ const TWCHAR *m_cwstr; public: - /** Give out the constructor for convinience */ + /** Give out the constructor for convenience */ CCandidate(unsigned start = 0, unsigned end = 0, TLexiconState* pLxst = NULL, @@ -152,7 +152,7 @@ ASCII = 0x0201, // english string PUNC = 0x0202, // punctuation SYMBOL = 0x0204, // other symbol - DIGITAL = 0x0208, // not implemeted here + DIGITAL = 0x0208, // not implemented here }; // TYPE enum BESTWORD_TYPE { diff -Nru sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_data.cpp sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_data.cpp --- sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_data.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_data.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -74,7 +74,7 @@ clear(); #ifdef DEBUG - printf("\n openning lm file %s, pytrie file %s...", + printf("\n opening lm file %s, pytrie file %s...", lm_file_path, pytrie_file_path); #endif diff -Nru sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_option_event.h sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_option_event.h --- sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_option_event.h 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_option_event.h 2021-02-28 10:16:33.000000000 +0000 @@ -65,7 +65,7 @@ /** * onConfigChanged will be called whenever an option is changed * @param event presents the changed option - * @return true if the event is consumed, and not intented to be + * @return true if the event is consumed, and not intended to be * sent to another event listener, false otherwise. */ virtual bool onConfigChanged(const COptionEvent&) { return false; } diff -Nru sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_winHandler.cpp sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_winHandler.cpp --- sunpinyin-3.0.0~rc1+ds1/src/ime-core/imi_winHandler.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/ime-core/imi_winHandler.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -95,7 +95,7 @@ case STATUS_ID_FULLSYMBOL: printf("Full Simbol is "); break; default: - printf("Unknow Status id %d is ", key); + printf("Unknown Status id %d is ", key); break; } diff -Nru sunpinyin-3.0.0~rc1+ds1/src/ime-core/lattice_states.h sunpinyin-3.0.0~rc2+ds1/src/ime-core/lattice_states.h --- sunpinyin-3.0.0~rc1+ds1/src/ime-core/lattice_states.h 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/ime-core/lattice_states.h 2021-02-28 10:16:33.000000000 +0000 @@ -53,7 +53,7 @@ * language model size, the state node in language model do not * thread the back-off pointer. Now, we just use the Word Id for * the node in the language model. Later we should abstract the - * StateNode from language model implemetation to replace this + * StateNode from language model implementation to replace this * definition. */ typedef CThreadSlm::TState CSlmState; diff -Nru sunpinyin-3.0.0~rc1+ds1/src/lexicon/pytrie_gen.cpp sunpinyin-3.0.0~rc2+ds1/src/lexicon/pytrie_gen.cpp --- sunpinyin-3.0.0~rc1+ds1/src/lexicon/pytrie_gen.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/lexicon/pytrie_gen.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -78,7 +78,7 @@ size_t res = iconv(ic, &src, &srclen, &dst, &dstlen); if (res != size_t(-1) && srclen == 0) { - // do revert convertion and compare them + // do revert conversion and compare them src = (TIConvSrcPtr)gbstr; srclen = strlen((char*)src) + 1; dst = (char*)utstr; @@ -94,13 +94,15 @@ unsigned getPureGBEncoding(const char* utf8str) { - static iconv_t ic_gb = iconv_open("GB2312", "UTF-8"); - static iconv_t ic_gbk = iconv_open("GBK", "UTF-8"); - static iconv_t ric_gb = iconv_open("UTF-8", "GB2312"); - static iconv_t ric_gbk = iconv_open("UTF-8", "GBK"); + static const iconv_t e = reinterpret_cast(-1); + static const iconv_t ic_gb = iconv_open("GB2312", "UTF-8"); + static const iconv_t ic_gbk = iconv_open("GBK", "UTF-8"); + static const iconv_t ric_gb = iconv_open("UTF-8", "GB2312"); + static const iconv_t ric_gbk = iconv_open("UTF-8", "GBK"); + // FIXME + if (ic_gb == e || ic_gbk == e || ric_gb == e || ric_gbk == e) return 3; unsigned ret = 0; - if (!isCorrectConverted(utf8str, ic_gb, ric_gb)) { ret = 1; // at least it is contains some GBK char if (!isCorrectConverted(utf8str, ic_gbk, ric_gbk)) diff -Nru sunpinyin-3.0.0~rc1+ds1/src/portability.cpp sunpinyin-3.0.0~rc2+ds1/src/portability.cpp --- sunpinyin-3.0.0~rc1+ds1/src/portability.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/portability.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -138,7 +138,7 @@ assert(ic != (iconv_t)-1); - // To eliminate the const char* and char* diffirence in differnt system + // To eliminate the const char* and char* difference in different system TIConvSrcPtr src = (TIConvSrcPtr)s; size_t srclen = std::strlen(s) + 1; char* dst = (char*)pwcs; diff -Nru sunpinyin-3.0.0~rc1+ds1/src/portability.h sunpinyin-3.0.0~rc2+ds1/src/portability.h --- sunpinyin-3.0.0~rc1+ds1/src/portability.h 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/portability.h 2021-02-28 10:16:33.000000000 +0000 @@ -72,7 +72,7 @@ inline double log2(double x) { return log(x) / M_LN2; } #endif -#if defined(sun) // Solaris/HP-UX 's iconv is const char** +#if defined(sun) || defined(__NetBSD__) // Solaris/HP-UX/NetBSD 's iconv is const char** typedef const char* TIConvSrcPtr; #else typedef char* TIConvSrcPtr; diff -Nru sunpinyin-3.0.0~rc1+ds1/src/SConscript sunpinyin-3.0.0~rc2+ds1/src/SConscript --- sunpinyin-3.0.0~rc1+ds1/src/SConscript 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/SConscript 2021-02-28 10:16:33.000000000 +0000 @@ -53,7 +53,7 @@ }) env.Command('sunpinyin-dictgen', 'sunpinyin-dictgen.mk', [ Copy("$TARGET", "$SOURCE"), - Chmod("$TARGET", 0755), + Chmod("$TARGET", 0o755), ]) # -*- indent-tabs-mode: nil -*- vim:et:ts=4 diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/ids2ngram/ids2ngram.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/ids2ngram/ids2ngram.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/ids2ngram/ids2ngram.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/ids2ngram/ids2ngram.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -131,7 +131,7 @@ printf("\t -s swapfile # intermedia temporary file\n"); printf( "\t -o outputfile # result idngram file [id1, ... idN, freq]*\n"); - printf("\t -p para_size # maxium ngram-items per para\n"); + printf("\t -p para_size # maximum ngram-items per para\n"); printf("\nExample:\n"); printf( " Following example will use three input idstream file idsfile[1,2,3] to generate the idngram file all.id3gram. Each para (internal map size or hash size) would be 1024000, using swap file for temp result. All temp para result would final be merged to got the final result.\n"); @@ -178,9 +178,14 @@ FILE *swap = fopen(swapfile, "wb+"); FILE *out = fopen(output, "wb+"); if (optind >= argc) ShowUsage(); - while (optind < argc) { + for (; optind < argc; ++optind) { printf("Processing %s:", argv[optind]); fflush(stdout); FILE *fp = fopen(argv[optind], "rb"); + if (fp == NULL) { + fprintf(stderr, "Failed to open %s: %s\n", argv[optind], strerror(errno)); + printf("\n"); + continue; + } switch (N) { case 1: ProcessingRead<1>(fp, swap, para_offsets, paraMax); @@ -193,8 +198,7 @@ break; } fclose(fp); - printf("\n"); fflush(stdout); - ++optind; + printf("\n"); } printf("Merging..."); fflush(stdout); switch (N) { diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/mmseg/mmseg.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/mmseg/mmseg.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/mmseg/mmseg.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/mmseg/mmseg.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -307,10 +307,10 @@ ftell(fp), nWords, nAmbis); fflush(stderr); + fclose(fp); } else { - fprintf(stderr, "Can not Open!!!!!!!\n"); fflush(stderr); + fprintf(stderr, "Failed to open %s: %s\n", argv[i], strerror(errno)); } - fclose(fp); } } diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/slmbuild/sim_slmbuilder.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/slmbuild/sim_slmbuilder.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/slmbuild/sim_slmbuilder.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/slmbuild/sim_slmbuilder.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -111,7 +111,7 @@ } // n=1 for unigram, n=2 for bigram; -// level[0] is for psuedo 0 gram, ... +// level[0] is for pseudo 0 gram, ... void CSlmBuilder::Create(int n) { @@ -126,7 +126,7 @@ level[n] = new std::vector; ((TLeafLevel*)level[n])->reserve(1024); - //Add psuedo root node + //Add pseudo root node ((TNodeLevel*)level[0])->push_back(TNode(0, 0, 0)); //Initialize the nr[n+1][SLM_MAX_R] 2-D array @@ -277,7 +277,7 @@ int idxfirst, idxchk; TLeafIterator chchk = chfirst; for (idxfirst = idxchk = 0; chchk != chlast; ++chchk, ++idxchk) { - //do not cut item whoese 1. freq > thred; 2. psuedo tail + //do not cut item whoese 1. freq > thred; 2. pseudo tail if ((int) chchk->freq > thred || (chchk + 1) == chlast) { if (idxfirst < idxchk) *chfirst = *chchk; @@ -301,7 +301,7 @@ int idxfirst, idxchk; TNodeIterator chchk = chfirst; for (idxfirst = idxchk = 0; chchk != chlast; ++chchk, ++idxchk) { - //do not cut item whoese 1. freq > thred; 2. psuedo tail; 3. leading children + //do not cut item whoese 1. freq > thred; 2. pseudo tail; 3. leading children TNodeIterator chnext = chchk + 1; if ((int) chchk->freq > thred || chnext == chlast || (chnext->child != chchk->child)) { @@ -349,7 +349,7 @@ void CSlmBuilder::AppendTails() { - printf("\nAppending psuedo tail node for each level..."); fflush(stdout); + printf("\nAppending pseudo tail node for each level..."); fflush(stdout); for (int lvl = 0; lvl < nlevel; ++lvl) { int child_size = 0; if (lvl == nlevel - 1) { @@ -360,7 +360,7 @@ TNodeLevel& v = *(TNodeLevel*)(level[lvl]); v.push_back(TNode(0x00FFFFFF, child_size, 1)); } - //also make a psuedo tail node for the leaf level + //also make a pseudo tail node for the leaf level ((TLeafLevel*)(level[nlevel]))->push_back(TLeaf(0, 1)); printf("\n"); fflush(stdout); } @@ -374,7 +374,7 @@ { CSlmBuilder::TNodeIterator it = v.begin(); CSlmBuilder::TNodeIterator ite = v.begin() + (v.size() - 1); - for (; it != ite; ++it) { //do not calc the psuedo tail item + for (; it != ite; ++it) { //do not calc the pseudo tail item CSlmBuilder::TNodeIterator itnext = it + 1; double root_freq = it->freq; for (int h = it->child, t = itnext->child; h < t; ++h) { @@ -411,8 +411,8 @@ DiscountOneLevel(v, ch, discounter[lvl + 1], bUseLogPr); } } - printf("\n Giving psuedo root level 0 a distribution..."); - //make the psuedo 0-gram a equal distribution + printf("\n Giving pseudo root level 0 a distribution..."); + //make the pseudo 0-gram a equal distribution TNodeLevel& v0 = *(TNodeLevel*)(level[0]); if (bUseLogPr) { v0[0].pr = PR_TYPE(-log(double(1.0) / m_nWord)); diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/slmbuild/slmbuild.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/slmbuild/slmbuild.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/slmbuild/slmbuild.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/slmbuild/slmbuild.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -89,7 +89,7 @@ -w --wordcount N # Lexicon size, number of different word\n\ -b --brk id[,id...] # set the ids which should be treat as breaker\n\ -e --exclude id[,id...] # set the ids which should not be put into LM\n\ - -c --cut c1[,c2...] # k-gram whose freq <= c[k] are droped\n\ + -c --cut c1[,c2...] # k-gram whose freq <= c[k] are dropped\n\ -d --discount method,param # the k-th -d parm specify the discount method \n\ for k-gram. Possible values for method/param:\n\ GT,R,dis : GT discount for r <= R, r is the freq of a ngram.\n\ @@ -239,7 +239,13 @@ CSlmBuilder::FREQ_TYPE freq; printf("Reading and Processing raw idngram..."); fflush(stdout); - FILE *fp = fopen(inputfilename, "rb"); + + FILE* fp = fopen(inputfilename, "rb"); + if (fp == NULL) { + fprintf(stderr, "Failed to open raw idngram file %s: %s\n", inputfilename, strerror(errno)); + return EXIT_FAILURE; + } + int nItems = 0; while (fread(ngram, sizeof(TSIMWordId), N, fp) == (size_t) N && fread(&freq, sizeof(freq), 1, fp) == 1) { diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/slm.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/slm.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/slm.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/slm.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -173,7 +173,7 @@ /** * return value as the model suggested. The history state must be historified * or the history's level should be 0. when level == 0 but idx != 0, the - * history is a psuedo unigram state used for this model to combine another + * history is a pseudo unigram state used for this model to combine another * bigram cache language model */ double @@ -192,7 +192,7 @@ } while (true) { - //for psuedo cache model unigram state + //for pseudo cache model unigram state TNode* pn = ((TNode*)m_Levels[lvl]) + ((lvl) ? pos : 0); unsigned int t = (pn + 1)->ch(); @@ -271,7 +271,7 @@ st.getIdx(); return pn->wid(); } - return idx; // return the psuedo state word id + return idx; // return the pseudo state word id } } diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/slm.h sunpinyin-3.0.0~rc2+ds1/src/slm/slm.h --- sunpinyin-3.0.0~rc1+ds1/src/slm/slm.h 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/slm.h 2021-02-28 10:16:33.000000000 +0000 @@ -54,7 +54,7 @@ * level and use a table to map the index to a float value; * -# Compact all float value of -log(pr) into 16384 (14 bits) * level and use a table to map the index to a float value; - * -# threading infomation embed into binary model file. Threading include + * -# threading information embed into binary model file. Threading include * - bol(back-off-level) from current level * - bon(back-off-node)'s index in the bol level array * . @@ -62,7 +62,7 @@ * - when leaf node are arrived, it could use (bol,bon) as history for * history node. * - when a word could not be found in current node (cl, cn)'s children, - * searching could be transfered to (bol, bon) directly and continue + * searching could be transferred to (bol, bon) directly and continue * searching the target word * -# Add a basic type TState in Language model, a state is pair of\n * (level, array_idx_of_the level) @@ -79,7 +79,7 @@ /** * (level:idx) located a state in the language model very well - * Please note the psuedo unigram state, with level == 0, but idx > 0 + * Please note the pseudo unigram state, with level == 0, but idx > 0 * it's for used with bigram cache model */ union TState { diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/slminfo/slminfo.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/slminfo/slminfo.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/slminfo/slminfo.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/slminfo/slminfo.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -215,6 +215,10 @@ if (lexicon_filename != NULL) { plexicon = new TReverseLexicon(); FILE* f_lex = fopen(lexicon_filename, "r"); + if (f_lex == NULL) { + fprintf(stderr, "Failed to open lexicon file %s: %s\n", lexicon_filename, strerror(errno)); + exit(EXIT_FAILURE); + } while (fgets(word, 10240, f_lex) != NULL) { if (strlen(word) > 0) { char* p = word; @@ -261,13 +265,12 @@ int main(int argc, char* argv[]) { - FILE* fp = NULL; - getParameters(argc, argv); - if ((fp = fopen(argv[argc - 1], "rb+")) == NULL) { - printf("Can not open back-off language model file %s\n", argv[argc - 1]); - return 99; + FILE* fp = fopen(argv[argc - 1], "rb+"); + if (fp == NULL) { + fprintf(stderr, "Failed to open back-off language model file %s: %s\n", argv[argc - 1], strerror(errno)); + return EXIT_FAILURE; } if (!verbose) diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/slmpack/slmpack.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/slmpack/slmpack.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/slmpack/slmpack.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/slmpack/slmpack.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -76,6 +76,10 @@ printf("Loading lexicon..."); fflush(stdout); static char word[1024 * 10]; FILE* f_lex = fopen(filename, "r"); + if (f_lex == NULL) { + fprintf(stderr, "Failed to open lexicon file %s: %s\n", filename, strerror(errno)); + exit(EXIT_FAILURE); + } TLexicon lexicon; while (fgets(word, sizeof(word), f_lex)) { if (strlen(word) > 0) { diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/slmprune/slmprune.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/slmprune/slmprune.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/slmprune/slmprune.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/slmprune/slmprune.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -161,7 +161,7 @@ int idxfirst, idxchk; chIterator chchk = chfirst; for (idxfirst = idxchk = 0; chchk != chlast; ++chchk, ++idxchk) { - //cut item whoese pr == 1.0; and not psuedo tail + //cut item whoese pr == 1.0; and not pseudo tail if (chchk->pr != ((bUseLogPr) ? 0.0 : 1.0) || (chchk + 1) == chlast) { if (idxfirst < idxchk) *chfirst = *chchk; while (pfirst != plast && pfirst->child <= idxchk) @@ -188,7 +188,7 @@ printf("\n Level %d (%d items), allocating...", lvl, sz[lvl] - 1); fflush( stdout); - int n = sz[lvl] - 1; //do not count last psuedo tail + int n = sz[lvl] - 1; //do not count last pseudo tail if (cut[lvl] >= n) cut[lvl] = n - 1; TNodeInfo* pbuf = new TNodeInfo[n]; TSIMWordId hw[16]; // it should be lvl+1, yet some compiler do not support it @@ -446,7 +446,7 @@ Note that we do not ensure that during pruning process, exactly the\n\ the given number of items are cut or reserved, because some items may \n\ contains high level children, so could not be cut. \n\ - Also it's your responsiblity to give right number of arguments based\n\ + Also it's your responsibility to give right number of arguments based\n\ on 'input_slm'.\n\ \nSee Also:\n\ To get information of the back-off language model, try 'slminfo'.\n\n"); diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/slmseg/slmseg.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/slmseg/slmseg.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/slmseg/slmseg.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/slmseg/slmseg.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -469,11 +469,10 @@ fprintf(stderr, "@Offset %ld, %d words, %d ambiguious. Done!\n", ftell(fp), nWords, nAmbis); fflush(stderr); + fclose(fp); } else { - fprintf(stderr, "Can not Open!!!!!!!\n"); - fflush(stderr); + fprintf(stderr, "Failed to open %s: %s\n", argv[i], strerror(errno)); } - fclose(fp); } } diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/thread/slmthread.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/thread/slmthread.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/thread/slmthread.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/thread/slmthread.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -57,7 +57,9 @@ #include "ValueCompress.h" using std::log; +using std::log2; using std::exp; +using std::exp2; class CSIMSlmWithIteration : public CSIMSlm { public: @@ -253,8 +255,8 @@ bool usingLogPr = slm.isUseLogPr(); - #define EffectivePr(a) (usingLogPr ? ((a) / log(2.0)) : -log2f((a))) - #define OriginalPr(b) (usingLogPr ? ((b) * log(2.0)) : exp2(-(b))) + #define EffectivePr(a) (usingLogPr ? ((a) / log(2.0f)) : -log2((a))) + #define OriginalPr(b) (usingLogPr ? ((b) * log(2.0f)) : exp2(-(b))) #define EffectiveBow(a) (usingLogPr ? exp(-(a)) : (a)) #define OriginalBow(b) (usingLogPr ? -log((b)) : (b)) diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/thread/ValueCompress.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/thread/ValueCompress.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/thread/ValueCompress.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/thread/ValueCompress.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -303,7 +303,7 @@ v2idx[eff2val[itm->first]] = itm->second; } -/* // Can not be maped back, because some value could not be in the eff2val maps +/* // Can not be mapped back, because some value could not be in the eff2val maps std::vector::iterator itt = table.begin(); std::vector::iterator itte = table.end(); for (; itt != itte; ++itt) diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/tools/clean_rmrb.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/tools/clean_rmrb.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/tools/clean_rmrb.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/tools/clean_rmrb.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -79,6 +79,10 @@ unsigned char buf[10240]; for (int i = 1; i < argc; ++i) { FILE *fp = fopen(argv[i], "r"); + if (fp == NULL) { + fprintf(stderr, "Failed to open %s: %s\n", argv[i], strerror(errno)); + continue; + } while (fgets((char*)buf, sizeof(buf), fp) != NULL) { bool emptyline = processline(buf); if ((unsigned int)buf[0] == '#' || (unsigned int)buf[0] == '0') diff -Nru sunpinyin-3.0.0~rc1+ds1/src/slm/tslminfo/tslminfo.cpp sunpinyin-3.0.0~rc2+ds1/src/slm/tslminfo/tslminfo.cpp --- sunpinyin-3.0.0~rc1+ds1/src/slm/tslminfo/tslminfo.cpp 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/slm/tslminfo/tslminfo.cpp 2021-02-28 10:16:33.000000000 +0000 @@ -230,6 +230,10 @@ if (lexicon_filename != NULL) { plexicon = new TReverseLexicon(); FILE* f_lex = fopen(lexicon_filename, "r"); + if (f_lex == NULL) { + fprintf(stderr, "Failed to open lexicon file %s: %s\n", lexicon_filename, strerror(errno)); + exit(EXIT_FAILURE); + } while (fgets(word, 10240, f_lex) != NULL) { if (strlen(word) > 0) { char* p = word; diff -Nru sunpinyin-3.0.0~rc1+ds1/src/sunpinyin-dictgen.mk.in sunpinyin-3.0.0~rc2+ds1/src/sunpinyin-dictgen.mk.in --- sunpinyin-3.0.0~rc1+ds1/src/sunpinyin-dictgen.mk.in 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/src/sunpinyin-dictgen.mk.in 2021-02-28 10:16:33.000000000 +0000 @@ -1,34 +1,11 @@ #!/usr/bin/@MAKE@ -f # -*- mode: makefile; indent-tabs-mode: t -*- vim:noet:ts=4 -# In case of problems, also try the following ${DL_HOST} values: -# (copied from Gentoo's `thirdpartymirrors' file) -# http://aarnet.dl.sourceforge.net -# http://colocrossing.dl.sourceforge.net -# http://cznic.dl.sourceforge.net -# http://dfn.dl.sourceforge.net -# http://freefr.dl.sourceforge.net -# http://garr.dl.sourceforge.net -# http://heanet.dl.sourceforge.net -# http://hivelocity.dl.sourceforge.net -# http://ignum.dl.sourceforge.net -# http://internode.dl.sourceforge.net -# http://iweb.dl.sourceforge.net -# http://jaist.dl.sourceforge.net -# http://kaz.dl.sourceforge.net -# http://kent.dl.sourceforge.net -# http://nchc.dl.sourceforge.net -# http://ncu.dl.sourceforge.net -# http://netcologne.dl.sourceforge.net -# http://optimate.dl.sourceforge.net -# http://softlayer.dl.sourceforge.net -# http://sunet.dl.sourceforge.net -# http://surfnet.dl.sourceforge.net -# http://switch.dl.sourceforge.net -# http://tcpdiag.dl.sourceforge.net -# http://ufpr.dl.sourceforge.net -# http://waia.dl.sourceforge.net -# http://waix.dl.sourceforge.net +# In case of problems, try replacing `jaist' in ${DL_HOST} with one of these +# (copied from ): +# astuteinternet ayera cfhcable cytranet excellmedia freefr gigenet +# iweb jaist kent liquidtelecom nchc netcologne netix newcontinuum +# phoenixnap razaoinfo superb-dca2 superb-sea2 svwh ufpr versaweb WGET = @WGET@ TAR = @TAR@ @@ -36,14 +13,14 @@ ENDIANNESS = @ENDIANNESS@ DATA_DIR = @DATADIR@/sunpinyin -DL_LIST = https://open-gram.googlecode.com/git/ -DL_HOST = http://heanet.dl.sourceforge.net +DL_LIST = https://sourceforge.net/projects/open-gram/files/ +DL_HOST = https://jaist.dl.sourceforge.net DL_ROOT = ${DL_HOST}/open-gram DICT_PAT = 'dict\.utf8-[0-9]\+.tar.bz2' SLM_PAT = 'lm_sc\.3gm\.arpa-[0-9]\+.tar.bz2' -DICT_AR = $(shell ${W3M} ${DL_LIST} | grep -o ${DICT_PAT} | sort | tail -n 1) -SLM_AR = $(shell ${W3M} ${DL_LIST} | grep -o ${SLM_PAT} | sort | tail -n 1) +DICT_AR = $(shell ${W3M} ${DL_LIST} | grep -o ${DICT_PAT} | sort -u | tail -n 1) +SLM_AR = $(shell ${W3M} ${DL_LIST} | grep -o ${SLM_PAT} | sort -u | tail -n 1) all: install diff -Nru sunpinyin-3.0.0~rc1+ds1/sunpinyin-2.0.pc.in sunpinyin-3.0.0~rc2+ds1/sunpinyin-2.0.pc.in --- sunpinyin-3.0.0~rc1+ds1/sunpinyin-2.0.pc.in 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/sunpinyin-2.0.pc.in 2021-02-28 10:16:33.000000000 +0000 @@ -6,6 +6,6 @@ Name: libsunpinyin Description: IME library based on Statistical Language Model Version: @VERSION@ -Requires: sqlite3 +Requires.private: sqlite3 Libs: -L${libdir} -lsunpinyin Cflags: @CFLAGS@ diff -Nru sunpinyin-3.0.0~rc1+ds1/.travis.yml sunpinyin-3.0.0~rc2+ds1/.travis.yml --- sunpinyin-3.0.0~rc1+ds1/.travis.yml 2018-06-29 23:40:45.000000000 +0000 +++ sunpinyin-3.0.0~rc2+ds1/.travis.yml 2021-02-28 10:16:33.000000000 +0000 @@ -1,4 +1,10 @@ language: cpp +dist: focal +addons: + apt: + packages: + - scons + - python3 script: scons branches: only: