--- tesseract-3.04.00.orig/debian/README.debian +++ tesseract-3.04.00/debian/README.debian @@ -0,0 +1,27 @@ +Upstream user documentation is at + + https://github.com/tesseract-ocr/tesseract/wiki + +And there is some programmer documentation at + + http://tesseract-ocr.github.io + +If you don't care about language training, you may stop reading now. +Normal users don't do any training, and can (for example) install +Japanese support with a single command. + + apt-get install tessseract-ocr-jpn + +For very advanced users, it is possible to run the training scripts +which have been slightly customized for Debian. Again using Japanese +as an example, here are the commands. Currently it is easier to +install all possible fonts rather than carefully figure out exactly +which ones are required. Definitely room for improvement. + + get clone https://github.com/tesseract-ocr/langdata.git + apt-get install fonts-noto-cjk fonts-japanese-mincho.ttf fonts-takao-gothic fonts-vlgothic + tesstrain.sh --lang jpn --langdata_dir langdata + + + + --- tesseract-3.04.00.orig/debian/README.source +++ tesseract-3.04.00/debian/README.source @@ -0,0 +1,68 @@ +#!/bin/bash +# +# The upstream repository for trained language data is multiple +# gigabytes in size. I think this crosses the limit for Debian source +# package size. So instead we package each language individually. This +# helper script was used last time we needed to update the packages. +# As such it is only for the package maintainer, and even then just +# as template for future packaging work. + +echo "You should not just run this script blindly." +exit 1 + +WORKING=`pwd` +TESSDATA=/tessdata # Upstream git repository + +function tarball() { + mkdir -p /tmp/tesseract-ocr-$1/tessdata + N=`echo $1 | sed 's/-/_/'` + cp $TESSDATA/$N.* /tmp/tesseract-ocr-$1/tessdata + cd /tmp + tar czvf /plang/tesseract-ocr-$1-3.04.00.tar.gz tesseract-ocr-$1/tessdata + cd - +} + +function one() { + cd $WORKING + rm -rf tesseract-$1* tesseract-ocr-$1* + apt-get source tesseract-$1 + tarball $1 + cd tesseract-$1-3.02* + uupdate -v 3.04.00 ../tesseract-ocr-$1-3.04.00.tar.gz + cd - + sed -i 's/UNRELEASED/unstable/g' tesseract-$1-3.04.00/debian/changelog + sed -i 's/root /Jeff Breidenbach /g' tesseract-$1-3.04.00/debian/changelog + sed -i 's/3.9.2/3.9.6/g' tesseract-$1-3.04.00/debian/control + cp rules tesseract-$1-3.04.00/debian + cd tesseract-$1-3.04.00 + debuild -us -uc +} + +function new() { + cd $WORKING + rm -rf tesseract-$1* tesseract-ocr-$1* + export DEBEMAIL=jab@debian.org + export DEBFULLNAME='Jeff Breidenbach' + tarball $1 + tar xvzf tesseract-ocr-$1-3.04.00.tar.gz + cd tesseract-ocr-$1 + dh_make -f ../tesseract-ocr-$1-3.04.00.tar.gz -p tesseract-ocr-$1_3.04.00 -i -y + rm debian/*.ex debian/*.EX debian/README.* debian/docs + cp ../compat ../format ../rules ../copyright ../control debian + sed -i 's/ (Closes: #nnnn) //g' debian/changelog + sed -i "s/XXX/$1/g" debian/control + sed -i "s/YYY/$2/g" debian/control + debuild -us -uc +} + + +LANGS=$(apt-cache search tesseract |\ + grep ^tesseract-ocr- |\ + grep -v ocr-dev |\ + cut -f3- -d\- |\ + cut -f1 -d' ') + +for i in $LANGS; do one $i; done + +#new amh Amharic +#new aze-cyrl 'Azerbaijani \(Cyrillic\)' --- tesseract-3.04.00.orig/debian/changelog +++ tesseract-3.04.00/debian/changelog @@ -0,0 +1,304 @@ +tesseract (3.04.00-5ubuntu1) wily; urgency=medium + + * Rename library packages for g++5 ABI transition. + + -- Iain Lane Tue, 04 Aug 2015 11:43:11 +0100 + +tesseract (3.04.00-5) unstable; urgency=medium + + * New metapackage language hoarders + * Bugfix to stop data corruption on stdout + + -- Jeff Breidenbach Fri, 17 Jul 2015 23:08:32 +0000 + +tesseract (3.04.00-4) unstable; urgency=medium + + * Fix file overwrite problem (closes: #792659) + + -- Jeff Breidenbach Fri, 17 Jul 2015 22:25:49 +0000 + +tesseract (3.04.00-3) unstable; urgency=medium + + * Improve training script and add a bit of documentation + + -- Jeff Breidenbach Tue, 14 Jul 2015 22:16:46 +0000 + +tesseract (3.04.00-2) unstable; urgency=medium + + * Include high level training script, tesstrain.sh + + -- Jeff Breidenbach Tue, 14 Jul 2015 00:05:52 +0000 + +tesseract (3.04.00-1) unstable; urgency=low + + * New upstream release + * Revised descriptions (closes: #763003) + * Include pkg-config file (closes: #745766) + + -- Jeff Breidenbach Fri, 10 Jul 2015 06:26:21 +0000 + +tesseract (3.03.03-1) unstable; urgency=medium + + * Remove accidental .o files in source tarball. + * Restore the missing training binaries. (closes: #742029) + + -- Jeff Breidenbach Fri, 28 Mar 2014 15:54:30 -0700 + +tesseract (3.03.02-3) unstable; urgency=low + + * Fix dependency mistake introduced in 3.02.02-2 + * Patch from upstream to fix PDF rendering for Arabic + + -- Jeff Breidenbach Fri, 07 Feb 2014 09:25:16 -0800 + +tesseract (3.03.02-2) unstable; urgency=low + + * improve version dependencies (closes: #737481) + * work on lintian warnings + + -- Jeff Breidenbach Wed, 05 Feb 2014 17:36:52 -0800 + +tesseract (3.03.02-1) unstable; urgency=low + + * New upstream release + + -- Jeff Breidenbach Tue, 04 Feb 2014 16:44:15 -0800 + +tesseract (3.03.01-1) unstable; urgency=low + + * New upstream release, includes critical fix to PDF rendering + * Complete leptonlib transition (see bug #735509) + * Promote from experimental to unstable + + -- Jeff Breidenbach Mon, 03 Feb 2014 11:10:20 -0800 + +tesseract (3.03.00-1) experimental; urgency=low + + * New upstream release + + -- Jeff Breidenbach Thu, 16 Jan 2014 16:21:29 -0800 + +tesseract (3.02.02-1) unstable; urgency=low + + * New upstream release + + -- Jeff Breidenbach Tue, 23 Oct 2012 23:25:05 +0000 + +tesseract (3.02.01-6) unstable; urgency=low + + * No changes. Bumping package version to poosibly help with upload. + + -- Jeff Breidenbach Mon, 30 Jul 2012 16:01:21 -0700 + +tesseract (3.02.01-5) unstable; urgency=low + + * Working with Jonathan to fix mistaken extra files. + + -- Jeff Breidenbach Mon, 30 Jul 2012 11:38:04 -0700 + +tesseract (3.02.01-4.2) unstable; urgency=low + + * Non-maintainer upload + * Remove files added to source by mistake in last sponsored upload + + -- Jeff Breidenbach Mon, 30 Jul 2012 11:37:58 -0700 + +tesseract (3.02.01-4.1) unstable; urgency=medium + + * Non-maintainer upload + * Remove hardcoded dependency on libleptonica (closes: #680598) + * Bump build-dependency on libleptonica-dev to ">> 1.69-3." for + fixed shlibs + + -- Jonathan Nieder Thu, 19 Jul 2012 16:51:03 -0500 + +tesseract (3.02.01-4) unstable; urgency=low + + * Create dummy transition package (closes: #658478) + * Put a version on leptonica dependency (closes: #664175) + * Remove quilt + + -- Jeff Breidenbach Fri, 02 Mar 2012 01:20:33 +0000 + +tesseract (3.02.01-2) unstable; urgency=low + + * Add patch from Martin Owens to fix a segfault when tesseract + encounters a grey-scale image saved from The GIMP (LP: #793002). + Thanks, Martin! Thanks, Ubuntu! + + -- Jeff Breidenbach Thu, 16 Feb 2012 15:36:22 -0800 + +tesseract (3.02.01-1) unstable; urgency=low + + * New upstream release + * Upstream fixed a segfault (closes: #658634) + * Upstream wrote some missing manpages. + + -- Jeff Breidenbach Tue, 14 Feb 2012 18:30:21 -0800 + +tesseract (3.02-3) unstable; urgency=low + + * lintian: ancient-standards-version, quilt-build-dep-but-no-series-file + * lintian: wrong-section-according-to-package-name + * simplify dependencies and require English (closes: 658099) + + -- Jeff Breidenbach Sat, 04 Feb 2012 15:27:27 -0800 + +tesseract (3.02-2) unstable; urgency=medium + + * Deal with file moving to new package name (closes: #658476) + * Move .so symlink to the dev package (closes: #658472) + * tesseract 3.0x officially breaks ocropus 0.3.x (closes: #658095) + * Add dependency to equation "language" at request of upstream + * Note that 3.0x tesseract-ocr-dev was renamed to libtesseract-dev + * Bumping urgency to medium due to looming propagation deadlines + + -- Jeff Breidenbach Fri, 03 Feb 2012 10:10:07 -0800 + +tesseract (3.02-1) unstable; urgency=low + + * New upstream release + * 3.0x doesn't have trouble with finding files (closes: #558254) + * 3.0x now works with TIFF format (closes: #589726) + * Fix subtlety in dependency versioning (closes: #658099) + * Fix another subtlety in dependency versioning (closes: #658095) + * 3.0x deals with 16bpp TIFF (closes: #634232) + * 3.0x deals with .tiff extension properly (closes: #523907) + * 3.0x has better overall error handling (closes: #551190) + + -- Jeff Breidenbach Wed, 01 Feb 2012 17:26:22 -0800 + +tesseract (3.01-3) unstable; urgency=low + + * Hey we are shipping version 3.x (closes: #599045) + * Death to .la files (closes: #658102) + * Temporarily remove osd dependency (closes: #658167) + * tesseract-ocr-osd dependency now valid (closes: #658167) + * Better package names for shared libraries (closes: #658097) + * Tersify descriptions a little bit + + -- Jeff Breidenbach Tue, 31 Jan 2012 14:22:52 -0800 + +tesseract (3.01-2) unstable; urgency=low + + * Add dependency on script + orientation detection. + + -- Jeff Breidenbach Mon, 30 Jan 2012 17:08:47 -0800 + +tesseract (3.01-1) unstable; urgency=low + + * New upstream release + + -- Jeff Breidenbach Mon, 30 Jan 2012 09:12:42 -0800 + +tesseract (2.04-2.1) unstable; urgency=low + + * Non-maintainer upload. + * Bump build-dependency on quilt to >= 0.46-7~. + * Disable xterm-based debug windows (closes: #612032, LP: #607297). Thanks + to Kees Cook for the bug report. + + -- Jakub Wilk Thu, 10 Feb 2011 16:35:45 +0100 + +tesseract (2.04-2) unstable; urgency=low + + * Fix FTBFS with gcc4.4 (Closes: #504885) + * Changed language dependency to tesseract-ocr-eng | tesseract-ocr-language + (Closes: #464085) + * Bumped standards to 3.8.3 (no changes needed) + * Updated debhelper build dependency to 7.0.50~ as override_dh_ targets are + used + * Added README.source + * Improved manpage (Closes: #551522) + + -- Jeffrey Ratcliffe Fri, 16 Dec 2009 17:35:24 +0100 + +tesseract (2.04-1) unstable; urgency=low + + * New upstream version (Closes: #484052) + * Added -fPIC to CFLAGS + * Removed --as-needed from LDFLAGS + * Bumped standards to 3.8.2 (no changes needed) + * Adapted java patch to fix distclean target + * Moved to dh7 + * Added watch file + * Updated copyright file according to http://dep.debian.net/deps/dep5/ + + -- Jeffrey Ratcliffe Fri, 03 Jul 2009 23:35:24 +0200 + +tesseract (2.03-3) unstable; urgency=low + + * Patch wordlist2dawg + * Bumped standards + * Fixed lintian errors in copyright + + -- Jeffrey Ratcliffe Thu, 15 Aug 2008 23:59:00 +0200 + +tesseract (2.03-2) unstable; urgency=low + + * Patch ccmain/baseapi.cpp to allow use with ocropus (Closes: #483896) + + -- Jeffrey Ratcliffe Thu, 12 Jun 2008 23:17:00 +0200 + +tesseract (2.03-1) unstable; urgency=low + + * Initial release of 2.03 (Closes: #478556) + * Switch to quilt for managing patches + * Patch java/makefile to fix install and distclean targets + * Patch ccutil/Makefile.* to fix redefine warnings (Closes: #455397) + * Patch viewer/scrollview.cpp, viewer/svmnode.cpp & viewer/svutil.cpp + to fix FTBFS with gcc 4.3 + * Corrected debian/copyright (thanks Winnie) + + -- Jeffrey Ratcliffe Tue, 22 Apr 2008 20:35:09 +0200 + +tesseract (2.01-4) unstable; urgency=low + + * + libtiff dependency (Closes: #459811) + * Updated description (Closes: #418991) + * Bumped standards + * + Uploaders: Gürkan Sengün + * + XS-DM-Upload-Allowed: yes + + -- Jeffrey Ratcliffe Tue, 08 Jan 2008 22:10:17 +0100 + +tesseract (2.01-3) unstable; urgency=low + + * - Recommends: (Closes: #451865) + + -- Jeffrey Ratcliffe Tue, 20 Nov 2007 21:14:26 +0100 + +tesseract (2.01-2) unstable; urgency=low + + * + Replaces: tesseract-ocr-data (Closes: #451042) + + -- Jeffrey Ratcliffe Thu, 15 Nov 2007 20:16:59 +0100 + +tesseract (2.01-1) unstable; urgency=low + + * Initial release of 2.01 (Closes: #434152) + * Applied tesseract-2.01.patch1.tar.gz + * Changed packaging licence to GPLv3 + + -- Jeffrey Ratcliffe Sat, 20 Oct 2007 09:07:28 +0200 + +tesseract (1.02-3) unstable; urgency=medium + + * Applied patch of Bryan Stillwell to fix + FTBFS on 64 bit arches. (Closes: #398379) + + -- Gürkan Sengün Mon, 11 Dec 2006 11:23:00 +0100 + +tesseract (1.02-2) unstable; urgency=low + + * Applied patch to fix tessdata directory access. (Closes: #400183) + * Split the data to a data package. + + -- Gürkan Sengün Mon, 27 Nov 2006 11:11:31 +0100 + +tesseract (1.02-1) unstable; urgency=low + + * Initial release. (Closes: #390204) + + -- Gürkan Sengün Mon, 9 Oct 2006 17:15:29 +0200 + --- tesseract-3.04.00.orig/debian/compat +++ tesseract-3.04.00/debian/compat @@ -0,0 +1 @@ +7 --- tesseract-3.04.00.orig/debian/control +++ tesseract-3.04.00/debian/control @@ -0,0 +1,66 @@ +Source: tesseract +Section: graphics +Priority: optional +Maintainer: Ubuntu Developers +XSBC-Original-Maintainer: Jeffrey Ratcliffe +Uploaders: Jeff Breidenbach +Build-Depends: debhelper (>= 7.0.50~), libleptonica-dev (>= 1.70~), automake, libtool, libpango1.0-dev, libcairo2-dev, libicu-dev +Standards-Version: 3.9.6 +Homepage: http://code.google.com/p/tesseract-ocr/ + +Package: tesseract-ocr-dev +Depends: libtesseract-dev, ${misc:Depends} +Architecture: all +Section: oldlibs +Description: transitional dummy package + This is a transitional dummy package. + +Package: libtesseract-dev +Section: libdevel +Architecture: any +Depends: libtesseract3v5 (= ${binary:Version}), ${misc:Depends} +Replaces: tesseract-ocr-dev (<< 3.0.1-1~) +Breaks: tesseract-ocr-dev (<< 3.0.1-1~) +Description: Development files for the tesseract command line OCR tool + The Tesseract OCR engine was one of the top 3 engines in the 1995 + UNLV Accuracy test. Between 1995 and 2006 it had little work done on + it, but since then it has been improved extensively by Google and is + probably one of the most accurate open source OCR engines + available. It can read a wide variety of image formats and convert + them to text in over 40 languages. This package includes the development + files, static library, and header files. + +Package: libtesseract3v5 +Breaks: tesseract-ocr (<< 3.01~), ocropus (<< 0.4.0~) +Replaces: libtesseract3, tesseract-ocr (<< 3.01~) +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Conflicts: libtesseract3 +Description: Tesseract OCR library + The Tesseract OCR engine was one of the top 3 engines in the 1995 + UNLV Accuracy test. Between 1995 and 2006 it had little work done on + it, but since then it has been improved extensively by Google and is + probably one of the most accurate open source OCR engines + available. It can read a wide variety of image formats and convert + them to text in over 40 languages. This package includes the shared + library. + +Package: tesseract-ocr +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends}, tesseract-ocr-eng (>= 3.01~), tesseract-ocr-osd, tesseract-ocr-equ, libtesseract3v5 (>= 3.03~) +Replaces: tesseract-ocr-data +Description: Tesseract command line OCR tool + The Tesseract OCR engine was one of the top 3 engines in the 1995 + UNLV Accuracy test. Between 1995 and 2006 it had little work done on + it, but since then it has been improved extensively by Google and is + probably one of the most accurate open source OCR engines + available. It can read a wide variety of image formats and convert + them to text in over 40 languages. This package includes the command + line tool. + +Package: tesseract-ocr-all +Architecture: all +Depends: ${misc:Depends}, tesseract-ocr, tesseract-ocr-afr, tesseract-ocr-amh, tesseract-ocr-ara, tesseract-ocr-asm, tesseract-ocr-aze, tesseract-ocr-aze-cyrl, tesseract-ocr-bel, tesseract-ocr-ben, tesseract-ocr-bod, tesseract-ocr-bos, tesseract-ocr-bul, tesseract-ocr-cat, tesseract-ocr-ceb, tesseract-ocr-ces, tesseract-ocr-chi-sim, tesseract-ocr-chi-tra, tesseract-ocr-chr, tesseract-ocr-cym, tesseract-ocr-dan, tesseract-ocr-dan-frak, tesseract-ocr-deu, tesseract-ocr-deu-frak, tesseract-ocr-dzo, tesseract-ocr-ell, tesseract-ocr-eng, tesseract-ocr-enm, tesseract-ocr-epo, tesseract-ocr-equ, tesseract-ocr-est, tesseract-ocr-eus, tesseract-ocr-fas, tesseract-ocr-fin, tesseract-ocr-fra, tesseract-ocr-frk, tesseract-ocr-frm, tesseract-ocr-gle, tesseract-ocr-gle-uncial, tesseract-ocr-glg, tesseract-ocr-grc, tesseract-ocr-guj, tesseract-ocr-hat, tesseract-ocr-heb, tesseract-ocr-hin, tesseract-ocr-hrv, tesseract-ocr-hun, tesseract-ocr-iku, tesseract-ocr-ind, tesseract-ocr-isl, tesseract-ocr-ita, tesseract-ocr-ita-old, tesseract-ocr-jav, tesseract-ocr-jpn, tesseract-ocr-kan, tesseract-ocr-kat, tesseract-ocr-kat-old, tesseract-ocr-kaz, tesseract-ocr-khm, tesseract-ocr-kir, tesseract-ocr-kor, tesseract-ocr-kur, tesseract-ocr-lao, tesseract-ocr-lat, tesseract-ocr-lav, tesseract-ocr-lit, tesseract-ocr-mal, tesseract-ocr-mar, tesseract-ocr-mkd, tesseract-ocr-mlt, tesseract-ocr-msa, tesseract-ocr-mya, tesseract-ocr-nep, tesseract-ocr-nld, tesseract-ocr-nor, tesseract-ocr-ori, tesseract-ocr-osd, tesseract-ocr-pan, tesseract-ocr-pol, tesseract-ocr-por, tesseract-ocr-pus, tesseract-ocr-ron, tesseract-ocr-rus, tesseract-ocr-san, tesseract-ocr-sin, tesseract-ocr-slk, tesseract-ocr-slk-frak, tesseract-ocr-slv, tesseract-ocr-spa, tesseract-ocr-spa-old, tesseract-ocr-sqi, tesseract-ocr-srp, tesseract-ocr-srp-latn, tesseract-ocr-swa, tesseract-ocr-swe, tesseract-ocr-syr, tesseract-ocr-tam, tesseract-ocr-tel, tesseract-ocr-tgk, tesseract-ocr-tgl, tesseract-ocr-tha, tesseract-ocr-tir, tesseract-ocr-tur, tesseract-ocr-uig, tesseract-ocr-ukr, tesseract-ocr-urd, tesseract-ocr-uzb, tesseract-ocr-uzb-cyrl, tesseract-ocr-vie, tesseract-ocr-yid +Description: Tesseract OCR with all language packages + This is a metapackage for Tesseract OCR and includes all supported + languages. \ No newline at end of file --- tesseract-3.04.00.orig/debian/copyright +++ tesseract-3.04.00/debian/copyright @@ -0,0 +1,37 @@ +This package was debianized by Jeffrey Ratcliffe +on Mon, 06 Aug 2007 21:27:22 +0200. + +It was downloaded from http://code.google.com/p/tesseract-ocr/ + +Upstream Authors: +Ray Smith (lead developer) +Phil Cheatle +Simon Crouch +Dan Johnson +Mark Seaman +Sheelagh Huddleston +Chris Newton +... and several others. + +Copyright: + + Copyright 2007 Google Inc. + +License: + + Licensed under the Apache License, Version 2.0 (the "License"); you + may not use this file except in compliance with the License. You may + obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +On a Debian system the complete text of the Apache-2.0 license can be found in +/usr/share/common-licenses/Apache-2.0 + +The Debian packaging is copyright 2007--2009, +Jeffrey Ratcliffe and is licensed under the +Apache-2.0 licence. --- tesseract-3.04.00.orig/debian/docs +++ tesseract-3.04.00/debian/docs @@ -0,0 +1 @@ +README --- tesseract-3.04.00.orig/debian/libtesseract-dev.install +++ tesseract-3.04.00/debian/libtesseract-dev.install @@ -0,0 +1,8 @@ +usr/lib/libtesseract.so +usr/lib/*.a +usr/include/tesseract/*.h +usr/lib/pkgconfig/* +training/tesstrain.sh usr/bin/ +training/language-specific.sh usr/share/tesseract-ocr/ +training/tesstrain_utils.sh usr/share/tesseract-ocr/ + --- tesseract-3.04.00.orig/debian/libtesseract3v5.install +++ tesseract-3.04.00/debian/libtesseract3v5.install @@ -0,0 +1,4 @@ +usr/lib/*.so.* +usr/share/tessdata/configs/* usr/share/tesseract-ocr/tessdata/configs/ +usr/share/tessdata/tessconfigs/* usr/share/tesseract-ocr/tessdata/tessconfigs/ +usr/share/tessdata/pdf.ttf usr/share/tesseract-ocr/tessdata/ --- tesseract-3.04.00.orig/debian/libtesseract3v5.lintian-overrides +++ tesseract-3.04.00/debian/libtesseract3v5.lintian-overrides @@ -0,0 +1,2 @@ +# G++5 ABI transition +libtesseract3v5: package-name-doesnt-match-sonames libtesseract3 --- tesseract-3.04.00.orig/debian/rules +++ tesseract-3.04.00/debian/rules @@ -0,0 +1,29 @@ +#!/usr/bin/make -f + +# Uncomment this to turn on verbose mode. +export DH_VERBOSE=1 + +CFLAGS:=$(shell dpkg-buildflags --get CFLAGS) +LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS) +CFLAGS += -Wall -g -fPIC -DTESSDATA_PREFIX=/usr/share/tesseract-ocr/ + +%: + dh $@ + +override_dh_auto_build: + make + make training + +override_dh_auto_install: + $(MAKE) DESTDIR=$$(pwd)/debian/tmp prefix=/usr install + $(MAKE) DESTDIR=$$(pwd)/debian/tmp prefix=/usr training-install + +override_dh_auto_test: + +override_dh_auto_clean: + dh_auto_clean + dh_clean java/com/Makefile java/com/google/Makefile java/com/google/scrollview/Makefile java/com/google/scrollview/events/Makefile java/com/google/scrollview/ui/Makefile + +override_dh_auto_configure: + ./autogen.sh + ./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --mandir=\$${prefix}/share/man --infodir=\$${prefix}/share/info CFLAGS="$(CFLAGS)" CXXFLAGS="$(CFLAGS)" LDFLAGS="-Wl,-z,defs $(LDFLAGS)" --- tesseract-3.04.00.orig/debian/tesseract-ocr.install +++ tesseract-3.04.00/debian/tesseract-ocr.install @@ -0,0 +1,2 @@ +usr/bin/* +/usr/share/man/man1/* --- tesseract-3.04.00.orig/debian/watch +++ tesseract-3.04.00/debian/watch @@ -0,0 +1,3 @@ +version=3 +https://github.com/tesseract-ocr/tesseract/releases https://github.com/tesseract-ocr/tesseract/archive/(.*)\.ta?r?\.?gz + --- tesseract-3.04.00.orig/tessdata/configs/hocr +++ tesseract-3.04.00/tessdata/configs/hocr @@ -1,2 +1,3 @@ +tessedit_create_txt 0 tessedit_create_hocr 1 tessedit_pageseg_mode 1 --- tesseract-3.04.00.orig/tessdata/configs/pdf +++ tesseract-3.04.00/tessdata/configs/pdf @@ -1,2 +1,3 @@ +tessedit_create_txt 0 tessedit_create_pdf 1 tessedit_pageseg_mode 1 --- tesseract-3.04.00.orig/tessdata/configs/unlv +++ tesseract-3.04.00/tessdata/configs/unlv @@ -1,2 +1,3 @@ +tessedit_create_txt 0 tessedit_write_unlv 1 tessedit_pageseg_mode 6 --- tesseract-3.04.00.orig/training/language-specific.sh +++ tesseract-3.04.00/training/language-specific.sh @@ -1,3 +1,4 @@ +#!/bin/bash # # Set some language specific variables. Works in conjunction with # tesstrain.sh @@ -281,9 +282,9 @@ "TakaoPMincho" \ "VL Gothic" \ "VL PGothic" \ - "Noto Sans Japanese Bold" \ - "Noto Sans Japanese Light" \ - ) + "Noto Sans CJK JP Bold" \ + "Noto Sans CJK JP Semi-Light" \ +) RUSSIAN_FONTS=( \ "Arial Bold" \ --- tesseract-3.04.00.orig/training/tesstrain.sh +++ tesseract-3.04.00/training/tesstrain.sh @@ -43,8 +43,10 @@ # your system is to run text2image with --list_available_fonts and the # appropriate --fonts_dir path. +export BINDIR=/usr/bin +export TESSDATA_DIR=/usr/share/tesseract-ocr/tessdata -source `dirname $0`/tesstrain_utils.sh +source /usr/share/tesseract-ocr/tesstrain_utils.sh ARGV=("$@") parse_flags @@ -55,7 +57,7 @@ mkdir -p ${TRAINING_DIR} rm -fr ${TRAINING_DIR}/* -source `dirname $0`/language-specific.sh +source /usr/share/tesseract-ocr/language-specific.sh set_lang_specific_parameters ${LANG_CODE} initialize_fontconfig