--- tesseract-3.03.03.orig/api/pdfrenderer.cpp +++ tesseract-3.03.03/api/pdfrenderer.cpp @@ -208,12 +208,17 @@ old_y = y; // Adjust font size on a per word granularity. Pay attention to - // pointsize, old_pointsize, and pdf_str. + // pointsize, old_pointsize, and pdf_str. We've found that for + // in Arabic, Tesseract will happily return a pointsize of zero, + // so we make up a default number to protect ourselves. { bool bold, italic, underlined, monospace, serif, smallcaps; int font_id; res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, &smallcaps, &pointsize, &font_id); + const int kDefaultPointSize = 8; + if (pointsize <= 0) + pointsize = kDefaultPointSize; if (pointsize != old_pointsize) { char textfont[20]; snprintf(textfont, sizeof(textfont), "/f-0-0 %d Tf ", pointsize); @@ -228,7 +233,7 @@ int pdf_word_len = 0; do { const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL); - if (grapheme && grapheme[0] != 0) { + if (grapheme && grapheme[0] != '\0') { // TODO(jbreiden) Do a real UTF-16BE conversion // http://en.wikipedia.org/wiki/UTF-16#Example_UTF-16_encoding_procedure string_32 utf32; --- tesseract-3.03.03.orig/debian/changelog +++ tesseract-3.03.03/debian/changelog @@ -0,0 +1,265 @@ +tesseract (3.03.03-1) unstable; urgency=medium + + * Remove accidental .o files in source tarball. + * Restore the missing training binaries. (closes: #742029) + + -- Jeff Breidenbach Fri, 28 Mar 2014 15:54:30 -0700 + +tesseract (3.03.02-3) unstable; urgency=low + + * Fix dependency mistake introduced in 3.02.02-2 + * Patch from upstream to fix PDF rendering for Arabic + + -- Jeff Breidenbach Fri, 07 Feb 2014 09:25:16 -0800 + +tesseract (3.03.02-2) unstable; urgency=low + + * improve version dependencies (closes: #737481) + * work on lintian warnings + + -- Jeff Breidenbach Wed, 05 Feb 2014 17:36:52 -0800 + +tesseract (3.03.02-1) unstable; urgency=low + + * New upstream release + + -- Jeff Breidenbach Tue, 04 Feb 2014 16:44:15 -0800 + +tesseract (3.03.01-1) unstable; urgency=low + + * New upstream release, includes critical fix to PDF rendering + * Complete leptonlib transition (see bug #735509) + * Promote from experimental to unstable + + -- Jeff Breidenbach Mon, 03 Feb 2014 11:10:20 -0800 + +tesseract (3.03.00-1) experimental; urgency=low + + * New upstream release + + -- Jeff Breidenbach Thu, 16 Jan 2014 16:21:29 -0800 + +tesseract (3.02.02-1) unstable; urgency=low + + * New upstream release + + -- Jeff Breidenbach Tue, 23 Oct 2012 23:25:05 +0000 + +tesseract (3.02.01-6) unstable; urgency=low + + * No changes. Bumping package version to poosibly help with upload. + + -- Jeff Breidenbach Mon, 30 Jul 2012 16:01:21 -0700 + +tesseract (3.02.01-5) unstable; urgency=low + + * Working with Jonathan to fix mistaken extra files. + + -- Jeff Breidenbach Mon, 30 Jul 2012 11:38:04 -0700 + +tesseract (3.02.01-4.2) unstable; urgency=low + + * Non-maintainer upload + * Remove files added to source by mistake in last sponsored upload + + -- Jeff Breidenbach Mon, 30 Jul 2012 11:37:58 -0700 + +tesseract (3.02.01-4.1) unstable; urgency=medium + + * Non-maintainer upload + * Remove hardcoded dependency on libleptonica (closes: #680598) + * Bump build-dependency on libleptonica-dev to ">> 1.69-3." for + fixed shlibs + + -- Jonathan Nieder Thu, 19 Jul 2012 16:51:03 -0500 + +tesseract (3.02.01-4) unstable; urgency=low + + * Create dummy transition package (closes: #658478) + * Put a version on leptonica dependency (closes: #664175) + * Remove quilt + + -- Jeff Breidenbach Fri, 02 Mar 2012 01:20:33 +0000 + +tesseract (3.02.01-2) unstable; urgency=low + + * Add patch from Martin Owens to fix a segfault when tesseract + encounters a grey-scale image saved from The GIMP (LP: #793002). + Thanks, Martin! Thanks, Ubuntu! + + -- Jeff Breidenbach Thu, 16 Feb 2012 15:36:22 -0800 + +tesseract (3.02.01-1) unstable; urgency=low + + * New upstream release + * Upstream fixed a segfault (closes: #658634) + * Upstream wrote some missing manpages. + + -- Jeff Breidenbach Tue, 14 Feb 2012 18:30:21 -0800 + +tesseract (3.02-3) unstable; urgency=low + + * lintian: ancient-standards-version, quilt-build-dep-but-no-series-file + * lintian: wrong-section-according-to-package-name + * simplify dependencies and require English (closes: 658099) + + -- Jeff Breidenbach Sat, 04 Feb 2012 15:27:27 -0800 + +tesseract (3.02-2) unstable; urgency=medium + + * Deal with file moving to new package name (closes: #658476) + * Move .so symlink to the dev package (closes: #658472) + * tesseract 3.0x officially breaks ocropus 0.3.x (closes: #658095) + * Add dependency to equation "language" at request of upstream + * Note that 3.0x tesseract-ocr-dev was renamed to libtesseract-dev + * Bumping urgency to medium due to looming propagation deadlines + + -- Jeff Breidenbach Fri, 03 Feb 2012 10:10:07 -0800 + +tesseract (3.02-1) unstable; urgency=low + + * New upstream release + * 3.0x doesn't have trouble with finding files (closes: #558254) + * 3.0x now works with TIFF format (closes: #589726) + * Fix subtlety in dependency versioning (closes: #658099) + * Fix another subtlety in dependency versioning (closes: #658095) + * 3.0x deals with 16bpp TIFF (closes: #634232) + * 3.0x deals with .tiff extension properly (closes: #523907) + * 3.0x has better overall error handling (closes: #551190) + + -- Jeff Breidenbach Wed, 01 Feb 2012 17:26:22 -0800 + +tesseract (3.01-3) unstable; urgency=low + + * Hey we are shipping version 3.x (closes: #599045) + * Death to .la files (closes: #658102) + * Temporarily remove osd dependency (closes: #658167) + * tesseract-ocr-osd dependency now valid (closes: #658167) + * Better package names for shared libraries (closes: #658097) + * Tersify descriptions a little bit + + -- Jeff Breidenbach Tue, 31 Jan 2012 14:22:52 -0800 + +tesseract (3.01-2) unstable; urgency=low + + * Add dependency on script + orientation detection. + + -- Jeff Breidenbach Mon, 30 Jan 2012 17:08:47 -0800 + +tesseract (3.01-1) unstable; urgency=low + + * New upstream release + + -- Jeff Breidenbach Mon, 30 Jan 2012 09:12:42 -0800 + +tesseract (2.04-2.1) unstable; urgency=low + + * Non-maintainer upload. + * Bump build-dependency on quilt to >= 0.46-7~. + * Disable xterm-based debug windows (closes: #612032, LP: #607297). Thanks + to Kees Cook for the bug report. + + -- Jakub Wilk Thu, 10 Feb 2011 16:35:45 +0100 + +tesseract (2.04-2) unstable; urgency=low + + * Fix FTBFS with gcc4.4 (Closes: #504885) + * Changed language dependency to tesseract-ocr-eng | tesseract-ocr-language + (Closes: #464085) + * Bumped standards to 3.8.3 (no changes needed) + * Updated debhelper build dependency to 7.0.50~ as override_dh_ targets are + used + * Added README.source + * Improved manpage (Closes: #551522) + + -- Jeffrey Ratcliffe Fri, 16 Dec 2009 17:35:24 +0100 + +tesseract (2.04-1) unstable; urgency=low + + * New upstream version (Closes: #484052) + * Added -fPIC to CFLAGS + * Removed --as-needed from LDFLAGS + * Bumped standards to 3.8.2 (no changes needed) + * Adapted java patch to fix distclean target + * Moved to dh7 + * Added watch file + * Updated copyright file according to http://dep.debian.net/deps/dep5/ + + -- Jeffrey Ratcliffe Fri, 03 Jul 2009 23:35:24 +0200 + +tesseract (2.03-3) unstable; urgency=low + + * Patch wordlist2dawg + * Bumped standards + * Fixed lintian errors in copyright + + -- Jeffrey Ratcliffe Thu, 15 Aug 2008 23:59:00 +0200 + +tesseract (2.03-2) unstable; urgency=low + + * Patch ccmain/baseapi.cpp to allow use with ocropus (Closes: #483896) + + -- Jeffrey Ratcliffe Thu, 12 Jun 2008 23:17:00 +0200 + +tesseract (2.03-1) unstable; urgency=low + + * Initial release of 2.03 (Closes: #478556) + * Switch to quilt for managing patches + * Patch java/makefile to fix install and distclean targets + * Patch ccutil/Makefile.* to fix redefine warnings (Closes: #455397) + * Patch viewer/scrollview.cpp, viewer/svmnode.cpp & viewer/svutil.cpp + to fix FTBFS with gcc 4.3 + * Corrected debian/copyright (thanks Winnie) + + -- Jeffrey Ratcliffe Tue, 22 Apr 2008 20:35:09 +0200 + +tesseract (2.01-4) unstable; urgency=low + + * + libtiff dependency (Closes: #459811) + * Updated description (Closes: #418991) + * Bumped standards + * + Uploaders: Gürkan Sengün + * + XS-DM-Upload-Allowed: yes + + -- Jeffrey Ratcliffe Tue, 08 Jan 2008 22:10:17 +0100 + +tesseract (2.01-3) unstable; urgency=low + + * - Recommends: (Closes: #451865) + + -- Jeffrey Ratcliffe Tue, 20 Nov 2007 21:14:26 +0100 + +tesseract (2.01-2) unstable; urgency=low + + * + Replaces: tesseract-ocr-data (Closes: #451042) + + -- Jeffrey Ratcliffe Thu, 15 Nov 2007 20:16:59 +0100 + +tesseract (2.01-1) unstable; urgency=low + + * Initial release of 2.01 (Closes: #434152) + * Applied tesseract-2.01.patch1.tar.gz + * Changed packaging licence to GPLv3 + + -- Jeffrey Ratcliffe Sat, 20 Oct 2007 09:07:28 +0200 + +tesseract (1.02-3) unstable; urgency=medium + + * Applied patch of Bryan Stillwell to fix + FTBFS on 64 bit arches. (Closes: #398379) + + -- Gürkan Sengün Mon, 11 Dec 2006 11:23:00 +0100 + +tesseract (1.02-2) unstable; urgency=low + + * Applied patch to fix tessdata directory access. (Closes: #400183) + * Split the data to a data package. + + -- Gürkan Sengün Mon, 27 Nov 2006 11:11:31 +0100 + +tesseract (1.02-1) unstable; urgency=low + + * Initial release. (Closes: #390204) + + -- Gürkan Sengün Mon, 9 Oct 2006 17:15:29 +0200 + --- tesseract-3.03.03.orig/debian/compat +++ tesseract-3.03.03/debian/compat @@ -0,0 +1 @@ +7 --- tesseract-3.03.03.orig/debian/control +++ tesseract-3.03.03/debian/control @@ -0,0 +1,55 @@ +Source: tesseract +Section: graphics +Priority: optional +Maintainer: Jeffrey Ratcliffe +Uploaders: Jeff Breidenbach +Build-Depends: debhelper (>= 7.0.50~), libleptonica-dev (>= 1.70~), automake, libtool, libpango1.0-dev, libcairo2-dev, libicu-dev +Standards-Version: 3.9.5 +Homepage: http://code.google.com/p/tesseract-ocr/ + +Package: tesseract-ocr-dev +Depends: libtesseract-dev, ${misc:Depends} +Architecture: all +Section: oldlibs +Description: transitional dummy package + This is a transitional dummy package. + +Package: libtesseract-dev +Section: libdevel +Architecture: any +Depends: libtesseract3 (= ${binary:Version}), ${misc:Depends} +Replaces: tesseract-ocr-dev (<< 3.0.1-1~) +Breaks: tesseract-ocr-dev (<< 3.0.1-1~) +Description: Development files for the tesseract command line OCR tool + The Tesseract OCR engine was one of the top 3 engines in the 1995 + UNLV Accuracy test. Between 1995 and 2006 it had little work done on + it, but since then it has been improved extensively by Google and is + probably one of the most accurate open source OCR engines + available. It can read a wide variety of image formats and convert + them to text in over 40 languages. + +Package: libtesseract3 +Breaks: tesseract-ocr (<< 3.01~), ocropus (<< 0.4.0~) +Replaces: tesseract-ocr (<< 3.01~) +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Description: Command line OCR tool + The Tesseract OCR engine was one of the top 3 engines in the 1995 + UNLV Accuracy test. Between 1995 and 2006 it had little work done on + it, but since then it has been improved extensively by Google and is + probably one of the most accurate open source OCR engines + available. It can read a wide variety of image formats and convert + them to text in over 40 languages. + +Package: tesseract-ocr +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends}, tesseract-ocr-eng (>= 3.01~), tesseract-ocr-osd, tesseract-ocr-equ, libtesseract3 (>= 3.03~) +Replaces: tesseract-ocr-data +Description: Command line OCR tool + The Tesseract OCR engine was one of the top 3 engines in the 1995 + UNLV Accuracy test. Between 1995 and 2006 it had little work done on + it, but since then it has been improved extensively by Google and is + probably one of the most accurate open source OCR engines + available. It can read a wide variety of image formats and convert + them to text in over 40 languages. + --- tesseract-3.03.03.orig/debian/copyright +++ tesseract-3.03.03/debian/copyright @@ -0,0 +1,37 @@ +This package was debianized by Jeffrey Ratcliffe +on Mon, 06 Aug 2007 21:27:22 +0200. + +It was downloaded from http://code.google.com/p/tesseract-ocr/ + +Upstream Authors: +Ray Smith (lead developer) +Phil Cheatle +Simon Crouch +Dan Johnson +Mark Seaman +Sheelagh Huddleston +Chris Newton +... and several others. + +Copyright: + + Copyright 2007 Google Inc. + +License: + + Licensed under the Apache License, Version 2.0 (the "License"); you + may not use this file except in compliance with the License. You may + obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +On a Debian system the complete text of the Apache-2.0 license can be found in +/usr/share/common-licenses/Apache-2.0 + +The Debian packaging is copyright 2007--2009, +Jeffrey Ratcliffe and is licensed under the +Apache-2.0 licence. --- tesseract-3.03.03.orig/debian/docs +++ tesseract-3.03.03/debian/docs @@ -0,0 +1 @@ +README --- tesseract-3.03.03.orig/debian/libtesseract-dev.install +++ tesseract-3.03.03/debian/libtesseract-dev.install @@ -0,0 +1,4 @@ +usr/lib/libtesseract.so +usr/lib/*.a +usr/include/tesseract/*.h + --- tesseract-3.03.03.orig/debian/libtesseract3.install +++ tesseract-3.03.03/debian/libtesseract3.install @@ -0,0 +1,5 @@ +usr/lib/*.so.* +usr/share/tessdata/configs/* usr/share/tesseract-ocr/tessdata/configs/ +usr/share/tessdata/tessconfigs/* usr/share/tesseract-ocr/tessdata/tessconfigs/ +usr/share/tessdata/pdf.ttf usr/share/tesseract-ocr/tessdata/ + --- tesseract-3.03.03.orig/debian/rules +++ tesseract-3.03.03/debian/rules @@ -0,0 +1,29 @@ +#!/usr/bin/make -f + +# Uncomment this to turn on verbose mode. +export DH_VERBOSE=1 + +CFLAGS:=$(shell dpkg-buildflags --get CFLAGS) +LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS) +CFLAGS += -Wall -g -fPIC -DTESSDATA_PREFIX=/usr/share/tesseract-ocr/ + +%: + dh $@ + +override_dh_auto_build: + make + make training + +override_dh_auto_install: + $(MAKE) DESTDIR=$$(pwd)/debian/tmp prefix=/usr install + $(MAKE) DESTDIR=$$(pwd)/debian/tmp prefix=/usr training-install + +override_dh_auto_test: + +override_dh_auto_clean: + dh_auto_clean + dh_clean java/com/Makefile java/com/google/Makefile java/com/google/scrollview/Makefile java/com/google/scrollview/events/Makefile java/com/google/scrollview/ui/Makefile + +override_dh_auto_configure: + ./autogen.sh + ./configure --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --mandir=\$${prefix}/share/man --infodir=\$${prefix}/share/info CFLAGS="$(CFLAGS)" CXXFLAGS="$(CFLAGS)" LDFLAGS="-Wl,-z,defs $(LDFLAGS)" --- tesseract-3.03.03.orig/debian/tesseract-ocr.install +++ tesseract-3.03.03/debian/tesseract-ocr.install @@ -0,0 +1,2 @@ +usr/bin/* +/usr/share/man/man1/* --- tesseract-3.03.03.orig/debian/watch +++ tesseract-3.03.03/debian/watch @@ -0,0 +1,3 @@ +version=3 +http://code.google.com/p/tesseract-ocr/downloads/list http://tesseract-ocr.googlecode.com/files/tesseract-(.*)\.ta?r?\.?gz +