Binary files /tmp/tmp35v318xs/HWr_lHAJ0y/tesseract-5.3.4+git6348-2b07505e/.git/index and /tmp/tmp35v318xs/Qv46qHBuGg/tesseract-5.3.4+git6361-d4618678/.git/index differ diff -Nru tesseract-5.3.4+git6348-2b07505e/.git/logs/HEAD tesseract-5.3.4+git6361-d4618678/.git/logs/HEAD --- tesseract-5.3.4+git6348-2b07505e/.git/logs/HEAD 2024-03-17 20:39:02.322504300 +0000 +++ tesseract-5.3.4+git6361-d4618678/.git/logs/HEAD 2024-04-26 18:21:25.605094200 +0000 @@ -1 +1 @@ -0000000000000000000000000000000000000000 2b07505e0e86026ae7c10767b334c337ccf06576 alex 1710707942 +0000 clone: from https://github.com/tesseract-ocr/tesseract.git +0000000000000000000000000000000000000000 d46186781285172947c80ddd2f5268753ef3764d alex 1714155685 +0000 clone: from https://github.com/tesseract-ocr/tesseract.git diff -Nru tesseract-5.3.4+git6348-2b07505e/.git/logs/refs/heads/main tesseract-5.3.4+git6361-d4618678/.git/logs/refs/heads/main --- tesseract-5.3.4+git6348-2b07505e/.git/logs/refs/heads/main 2024-03-17 20:39:02.322504300 +0000 +++ tesseract-5.3.4+git6361-d4618678/.git/logs/refs/heads/main 2024-04-26 18:21:25.605094200 +0000 @@ -1 +1 @@ -0000000000000000000000000000000000000000 2b07505e0e86026ae7c10767b334c337ccf06576 alex 1710707942 +0000 clone: from https://github.com/tesseract-ocr/tesseract.git +0000000000000000000000000000000000000000 d46186781285172947c80ddd2f5268753ef3764d alex 1714155685 +0000 clone: from https://github.com/tesseract-ocr/tesseract.git diff -Nru tesseract-5.3.4+git6348-2b07505e/.git/logs/refs/remotes/origin/HEAD tesseract-5.3.4+git6361-d4618678/.git/logs/refs/remotes/origin/HEAD --- tesseract-5.3.4+git6348-2b07505e/.git/logs/refs/remotes/origin/HEAD 2024-03-17 20:39:02.322504300 +0000 +++ tesseract-5.3.4+git6361-d4618678/.git/logs/refs/remotes/origin/HEAD 2024-04-26 18:21:25.605094200 +0000 @@ -1 +1 @@ -0000000000000000000000000000000000000000 2b07505e0e86026ae7c10767b334c337ccf06576 alex 1710707942 +0000 clone: from https://github.com/tesseract-ocr/tesseract.git +0000000000000000000000000000000000000000 d46186781285172947c80ddd2f5268753ef3764d alex 1714155685 +0000 clone: from https://github.com/tesseract-ocr/tesseract.git Binary files /tmp/tmp35v318xs/HWr_lHAJ0y/tesseract-5.3.4+git6348-2b07505e/.git/modules/test/index and /tmp/tmp35v318xs/Qv46qHBuGg/tesseract-5.3.4+git6361-d4618678/.git/modules/test/index differ diff -Nru tesseract-5.3.4+git6348-2b07505e/.git/modules/test/logs/HEAD tesseract-5.3.4+git6361-d4618678/.git/modules/test/logs/HEAD --- tesseract-5.3.4+git6348-2b07505e/.git/modules/test/logs/HEAD 2024-03-17 20:39:04.392591700 +0000 +++ tesseract-5.3.4+git6361-d4618678/.git/modules/test/logs/HEAD 2024-04-26 18:21:27.655182000 +0000 @@ -1,2 +1,2 @@ -0000000000000000000000000000000000000000 2761899921c08014cf9dbf3b63592237fb9e6ecb alex 1710707944 +0000 clone: from https://github.com/tesseract-ocr/test.git -2761899921c08014cf9dbf3b63592237fb9e6ecb 2761899921c08014cf9dbf3b63592237fb9e6ecb alex 1710707944 +0000 checkout: moving from main to 2761899921c08014cf9dbf3b63592237fb9e6ecb +0000000000000000000000000000000000000000 2761899921c08014cf9dbf3b63592237fb9e6ecb alex 1714155687 +0000 clone: from https://github.com/tesseract-ocr/test.git +2761899921c08014cf9dbf3b63592237fb9e6ecb 2761899921c08014cf9dbf3b63592237fb9e6ecb alex 1714155687 +0000 checkout: moving from main to 2761899921c08014cf9dbf3b63592237fb9e6ecb diff -Nru tesseract-5.3.4+git6348-2b07505e/.git/modules/test/logs/refs/heads/main tesseract-5.3.4+git6361-d4618678/.git/modules/test/logs/refs/heads/main --- tesseract-5.3.4+git6348-2b07505e/.git/modules/test/logs/refs/heads/main 2024-03-17 20:39:04.275920200 +0000 +++ tesseract-5.3.4+git6361-d4618678/.git/modules/test/logs/refs/heads/main 2024-04-26 18:21:27.558511000 +0000 @@ -1 +1 @@ -0000000000000000000000000000000000000000 2761899921c08014cf9dbf3b63592237fb9e6ecb alex 1710707944 +0000 clone: from https://github.com/tesseract-ocr/test.git +0000000000000000000000000000000000000000 2761899921c08014cf9dbf3b63592237fb9e6ecb alex 1714155687 +0000 clone: from https://github.com/tesseract-ocr/test.git diff -Nru tesseract-5.3.4+git6348-2b07505e/.git/modules/test/logs/refs/remotes/origin/HEAD tesseract-5.3.4+git6361-d4618678/.git/modules/test/logs/refs/remotes/origin/HEAD --- tesseract-5.3.4+git6348-2b07505e/.git/modules/test/logs/refs/remotes/origin/HEAD 2024-03-17 20:39:04.275920200 +0000 +++ tesseract-5.3.4+git6361-d4618678/.git/modules/test/logs/refs/remotes/origin/HEAD 2024-04-26 18:21:27.558511000 +0000 @@ -1 +1 @@ -0000000000000000000000000000000000000000 2761899921c08014cf9dbf3b63592237fb9e6ecb alex 1710707944 +0000 clone: from https://github.com/tesseract-ocr/test.git +0000000000000000000000000000000000000000 2761899921c08014cf9dbf3b63592237fb9e6ecb alex 1714155687 +0000 clone: from https://github.com/tesseract-ocr/test.git Binary files /tmp/tmp35v318xs/HWr_lHAJ0y/tesseract-5.3.4+git6348-2b07505e/.git/objects/pack/pack-1b986692799df349ea252008c1687c7942b17803.idx and /tmp/tmp35v318xs/Qv46qHBuGg/tesseract-5.3.4+git6361-d4618678/.git/objects/pack/pack-1b986692799df349ea252008c1687c7942b17803.idx differ Binary files /tmp/tmp35v318xs/HWr_lHAJ0y/tesseract-5.3.4+git6348-2b07505e/.git/objects/pack/pack-1b986692799df349ea252008c1687c7942b17803.pack and /tmp/tmp35v318xs/Qv46qHBuGg/tesseract-5.3.4+git6361-d4618678/.git/objects/pack/pack-1b986692799df349ea252008c1687c7942b17803.pack differ Binary files /tmp/tmp35v318xs/HWr_lHAJ0y/tesseract-5.3.4+git6348-2b07505e/.git/objects/pack/pack-8fcd39cb456a4e273a59751c332db4e659c0537c.idx and /tmp/tmp35v318xs/Qv46qHBuGg/tesseract-5.3.4+git6361-d4618678/.git/objects/pack/pack-8fcd39cb456a4e273a59751c332db4e659c0537c.idx differ Binary files /tmp/tmp35v318xs/HWr_lHAJ0y/tesseract-5.3.4+git6348-2b07505e/.git/objects/pack/pack-8fcd39cb456a4e273a59751c332db4e659c0537c.pack and /tmp/tmp35v318xs/Qv46qHBuGg/tesseract-5.3.4+git6361-d4618678/.git/objects/pack/pack-8fcd39cb456a4e273a59751c332db4e659c0537c.pack differ diff -Nru tesseract-5.3.4+git6348-2b07505e/.git/packed-refs tesseract-5.3.4+git6361-d4618678/.git/packed-refs --- tesseract-5.3.4+git6348-2b07505e/.git/packed-refs 2024-03-17 20:39:02.315837400 +0000 +++ tesseract-5.3.4+git6361-d4618678/.git/packed-refs 2024-04-26 18:21:25.598427300 +0000 @@ -1,2 +1,2 @@ # pack-refs with: peeled fully-peeled sorted -2b07505e0e86026ae7c10767b334c337ccf06576 refs/remotes/origin/main +d46186781285172947c80ddd2f5268753ef3764d refs/remotes/origin/main diff -Nru tesseract-5.3.4+git6348-2b07505e/.git/refs/heads/main tesseract-5.3.4+git6361-d4618678/.git/refs/heads/main --- tesseract-5.3.4+git6348-2b07505e/.git/refs/heads/main 2024-03-17 20:39:02.322504300 +0000 +++ tesseract-5.3.4+git6361-d4618678/.git/refs/heads/main 2024-04-26 18:21:25.605094200 +0000 @@ -1 +1 @@ -2b07505e0e86026ae7c10767b334c337ccf06576 +d46186781285172947c80ddd2f5268753ef3764d diff -Nru tesseract-5.3.4+git6348-2b07505e/.gitignore tesseract-5.3.4+git6361-d4618678/.gitignore --- tesseract-5.3.4+git6348-2b07505e/.gitignore 2024-03-17 20:39:02.325837900 +0000 +++ tesseract-5.3.4+git6361-d4618678/.gitignore 2024-04-26 18:21:25.605094200 +0000 @@ -83,10 +83,6 @@ *.traineddata tessdata_* -# OpenCL -tesseract_opencl_profile_devices.dat -kernel*.bin - # build dirs /build* /*.dll diff -Nru tesseract-5.3.4+git6348-2b07505e/CMakeLists.txt tesseract-5.3.4+git6361-d4618678/CMakeLists.txt --- tesseract-5.3.4+git6348-2b07505e/CMakeLists.txt 2024-03-17 20:39:02.325837900 +0000 +++ tesseract-5.3.4+git6361-d4618678/CMakeLists.txt 2024-04-26 18:21:25.605094200 +0000 @@ -88,7 +88,6 @@ option(DISABLED_LEGACY_ENGINE "Disable the legacy OCR engine" OFF) option(ENABLE_LTO "Enable link-time optimization" OFF) option(FAST_FLOAT "Enable float for LSTM" ON) -option(ENABLE_OPENCL "Enable unsupported experimental OpenCL support" OFF) option(ENABLE_NATIVE "Enable optimization for host CPU (could break HW compatibility)" OFF) # see @@ -460,18 +459,6 @@ endif(DISABLE_CURL) endif() -if(ENABLE_OPENCL) - find_package(OpenCL) - if(OpenCL_FOUND) - include_directories(${OpenCL_INCLUDE_DIRS}) - message(STATUS "OpenCL_INCLUDE_DIRS: ${OpenCL_INCLUDE_DIRS}") - message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}") - set(USE_OPENCL ON) - else() - set(USE_OPENCL OFF) - endif(OpenCL_FOUND) -endif(ENABLE_OPENCL) - # ############################################################################## # # configure @@ -565,11 +552,6 @@ message(STATUS "Build training tools [BUILD_TRAINING_TOOLS]: " "${BUILD_TRAINING_TOOLS}") message(STATUS "Build tests [BUILD_TESTS]: ${BUILD_TESTS}") -if(ENABLE_OPENCL) - message( - STATUS - "Enable unsupported experimental OpenCL [ENABLE_OPENCL]: ${USE_OPENCL}") -endif(ENABLE_OPENCL) message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}") message( STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}") @@ -608,7 +590,6 @@ src/cutil/*.cpp src/dict/*.cpp src/lstm/*.cpp - src/opencl/*.cpp src/textord/*.cpp src/viewer/*.cpp src/wordrec/*.cpp) @@ -749,7 +730,6 @@ src/cutil/*.h src/dict/*.h src/lstm/*.h - src/opencl/*.h src/textord/*.h src/viewer/*.h src/wordrec/*.h) @@ -760,6 +740,7 @@ src/api/capi.cpp src/api/renderer.cpp src/api/altorenderer.cpp + src/api/pagerenderer.cpp src/api/hocrrenderer.cpp src/api/lstmboxrenderer.cpp src/api/pdfrenderer.cpp @@ -784,6 +765,7 @@ tessdata/configs/lstmbox tessdata/configs/lstmdebug tessdata/configs/makebox + tessdata/configs/page tessdata/configs/pdf tessdata/configs/quiet tessdata/configs/rebox @@ -816,7 +798,6 @@ $ $ $ - $ $ $ $ @@ -832,9 +813,6 @@ if(OpenMP_CXX_FOUND) target_link_libraries(libtesseract PUBLIC OpenMP::OpenMP_CXX) endif() -if(OpenCL_FOUND) - target_link_libraries(libtesseract PUBLIC OpenCL::OpenCL) -endif() if(LibArchive_FOUND) target_link_libraries(libtesseract PUBLIC ${LibArchive_LIBRARIES}) endif(LibArchive_FOUND) diff -Nru tesseract-5.3.4+git6348-2b07505e/Makefile.am tesseract-5.3.4+git6361-d4618678/Makefile.am --- tesseract-5.3.4+git6348-2b07505e/Makefile.am 2024-03-17 20:39:02.329171400 +0000 +++ tesseract-5.3.4+git6361-d4618678/Makefile.am 2024-04-26 18:21:25.608427800 +0000 @@ -78,7 +78,6 @@ AM_CPPFLAGS += -DTESS_EXPORTS AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden -fPIC endif -AM_CPPFLAGS += $(OPENCL_CPPFLAGS) AM_CXXFLAGS = $(OPENMP_CXXFLAGS) @@ -94,9 +93,6 @@ libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/cutil libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/dict libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/lstm -if OPENCL -libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/opencl -endif libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/textord libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/training/common libtesseract_la_CPPFLAGS += -I$(top_srcdir)/src/viewer @@ -104,7 +100,7 @@ libtesseract_la_CPPFLAGS += $(libcurl_CFLAGS) lib_LTLIBRARIES = libtesseract.la -libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS) +libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) libtesseract_la_LDFLAGS += $(libarchive_LIBS) libtesseract_la_LDFLAGS += $(libcurl_LIBS) libtesseract_la_LDFLAGS += $(TENSORFLOW_LIBS) @@ -117,6 +113,7 @@ libtesseract_la_SOURCES = src/api/baseapi.cpp libtesseract_la_SOURCES += src/api/altorenderer.cpp +libtesseract_la_SOURCES += src/api/pagerenderer.cpp libtesseract_la_SOURCES += src/api/capi.cpp libtesseract_la_SOURCES += src/api/hocrrenderer.cpp libtesseract_la_SOURCES += src/api/lstmboxrenderer.cpp @@ -127,9 +124,6 @@ libtesseract_la_LIBADD = libtesseract_ccutil.la libtesseract_la_LIBADD += libtesseract_lstm.la libtesseract_la_LIBADD += libtesseract_native.la -if OPENCL -libtesseract_la_LIBADD += libtesseract_opencl.la -endif # Rules for src/arch. @@ -561,23 +555,6 @@ libtesseract_lstm_la_SOURCES += src/lstm/tfnetwork.pb.cc endif -# Rules for src/opencl. - -if OPENCL -libtesseract_opencl_la_CPPFLAGS = $(AM_CPPFLAGS) -libtesseract_opencl_la_CPPFLAGS += $(OPENCL_CFLAGS) -libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccutil -libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccstruct -libtesseract_opencl_la_CPPFLAGS += -I$(top_srcdir)/src/ccmain - -noinst_HEADERS += src/opencl/openclwrapper.h -noinst_HEADERS += src/opencl/oclkernels.h - -noinst_LTLIBRARIES += libtesseract_opencl.la - -libtesseract_opencl_la_SOURCES = src/opencl/openclwrapper.cpp -endif - # Rules for src/textord. noinst_HEADERS += src/textord/alignedblob.h @@ -743,16 +720,12 @@ tesseract_CPPFLAGS += -I$(top_srcdir)/src/textord tesseract_CPPFLAGS += -I$(top_srcdir)/src/viewer tesseract_CPPFLAGS += -I$(top_srcdir)/src/wordrec -if OPENCL -tesseract_CPPFLAGS += -I$(top_srcdir)/src/opencl -endif tesseract_CPPFLAGS += $(AM_CPPFLAGS) if VISIBILITY tesseract_CPPFLAGS += -DTESS_IMPORTS endif -tesseract_LDFLAGS = $(OPENCL_LDFLAGS) -tesseract_LDFLAGS += $(OPENMP_CXXFLAGS) +tesseract_LDFLAGS = $(OPENMP_CXXFLAGS) tesseract_LDADD = libtesseract.la tesseract_LDADD += $(LEPTONICA_LIBS) @@ -927,7 +900,6 @@ extralib = libtesseract.la extralib += $(libarchive_LIBS) extralib += $(LEPTONICA_LIBS) -extralib += $(OPENCL_LDFLAGS) extralib += $(TENSORFLOW_LIBS) if T_WIN extralib += -lws2_32 @@ -1308,7 +1280,7 @@ apiexample_test_SOURCES = unittest/apiexample_test.cc apiexample_test_CPPFLAGS = $(unittest_CPPFLAGS) -apiexample_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS) +apiexample_test_LDFLAGS = $(LEPTONICA_LIBS) apiexample_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS) if !DISABLED_LEGACY_ENGINE @@ -1494,7 +1466,7 @@ progress_test_SOURCES = unittest/progress_test.cc progress_test_CPPFLAGS = $(unittest_CPPFLAGS) -progress_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS) +progress_test_LDFLAGS = $(LEPTONICA_LIBS) progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS) qrsequence_test_SOURCES = unittest/qrsequence_test.cc diff -Nru tesseract-5.3.4+git6348-2b07505e/README.md tesseract-5.3.4+git6361-d4618678/README.md --- tesseract-5.3.4+git6348-2b07505e/README.md 2024-03-17 20:39:02.329171400 +0000 +++ tesseract-5.3.4+git6361-d4618678/README.md 2024-04-26 18:21:25.608427800 +0000 @@ -36,7 +36,7 @@ Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF. -Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO. +Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV, ALTO and PAGE. You should note that in many cases, in order to get better OCR results, you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract. diff -Nru tesseract-5.3.4+git6348-2b07505e/cmake/Configure.cmake tesseract-5.3.4+git6361-d4618678/cmake/Configure.cmake --- tesseract-5.3.4+git6348-2b07505e/cmake/Configure.cmake 2024-03-17 20:39:02.329171400 +0000 +++ tesseract-5.3.4+git6361-d4618678/cmake/Configure.cmake 2024-04-26 18:21:25.608427800 +0000 @@ -91,8 +91,6 @@ unistd.h cairo/cairo-version.h - CL/cl.h - OpenCL/cl.h pango-1.0/pango/pango-features.h unicode/uchar.h ) @@ -121,7 +119,6 @@ #cmakedefine HAVE_NEON ${HAVE_NEON} #cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE} #cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL} -#cmakedefine USE_OPENCL ${USE_OPENCL} ") if(TESSDATA_PREFIX) diff -Nru tesseract-5.3.4+git6348-2b07505e/cmake/SourceGroups.cmake tesseract-5.3.4+git6361-d4618678/cmake/SourceGroups.cmake --- tesseract-5.3.4+git6348-2b07505e/cmake/SourceGroups.cmake 2024-03-17 20:39:02.329171400 +0000 +++ tesseract-5.3.4+git6361-d4618678/cmake/SourceGroups.cmake 2024-04-26 18:21:25.608427800 +0000 @@ -31,7 +31,6 @@ source_group("cutil" "${SSRC}/cutil/${H_CPP}") source_group("dict" "${SSRC}/dict/${H_CPP}") source_group("lstm" "${SSRC}/lstm/${H_CPP}") -source_group("opencl" "${SSRC}/opencl/${H_CPP}") source_group("textord" "${SSRC}/textord/${H_CPP}") source_group("viewer" "${SSRC}/viewer/${H_CPP}") source_group("wordrec" "${SSRC}/wordrec/${H_CPP}") diff -Nru tesseract-5.3.4+git6348-2b07505e/configure.ac tesseract-5.3.4+git6361-d4618678/configure.ac --- tesseract-5.3.4+git6348-2b07505e/configure.ac 2024-03-17 20:39:02.329171400 +0000 +++ tesseract-5.3.4+git6361-d4618678/configure.ac 2024-04-26 18:21:25.608427800 +0000 @@ -79,8 +79,6 @@ # Can be overridden with `configure --disable-silent-rules` or with `make V=1`. AM_SILENT_RULES([yes]) -OPENCL_INC="/opt/AMDAPP/include" -OPENCL_LIBS="-lOpenCL" ############################# # # Platform specific setup @@ -102,15 +100,12 @@ AM_CONDITIONAL([ADD_RT], true) ;; *darwin*) - OPENCL_LIBS="" - OPENCL_INC="" AM_CONDITIONAL([ADD_RT], false) ;; *android*|openbsd*) AM_CONDITIONAL([ADD_RT], false) ;; powerpc-*-darwin*) - OPENCL_LIBS="" ;; *) # default @@ -263,19 +258,6 @@ # Note that the first usage of AC_CHECK_HEADERS must be unconditional. AC_CHECK_HEADERS([tiffio.h], [have_tiff=true], [have_tiff=false]) -# check whether to build opencl version -AC_MSG_CHECKING([--enable-opencl argument]) -AC_ARG_ENABLE([opencl], - AS_HELP_STRING([--enable-opencl], [enable opencl build [default=no]])) -AC_MSG_RESULT([$enable_opencl]) -# check for opencl header -have_opencl=false -if test "$enable_opencl" = "yes"; then - AC_CHECK_HEADERS([CL/cl.h], [have_opencl=true], [ - AC_CHECK_HEADERS(OpenCL/cl.h, [have_opencl=true], [have_opencl=false]) - ]) -fi - # Configure arguments which allow disabling some optional libraries. AC_ARG_WITH([archive], AS_HELP_STRING([--with-archive], @@ -323,9 +305,6 @@ fi] ) -have_opencl_lib=false -OPENCL_CPPFLAGS='' -OPENCL_LDFLAGS='' case "${host_os}" in *darwin* | *-macos10*) MY_CHECK_FRAMEWORK([Accelerate]) @@ -333,36 +312,11 @@ AM_CPPFLAGS="-DHAVE_FRAMEWORK_ACCELERATE $AM_CPPFLAGS" AM_LDFLAGS="$AM_LDFLAGS -framework Accelerate" fi - MY_CHECK_FRAMEWORK([OpenCL]) - if test "$enable_opencl" = "yes"; then - if test $my_cv_framework_OpenCL = no; then - AC_MSG_ERROR([Required OpenCL library not found!]) - fi - AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS" - OPENCL_CPPFLAGS="" - OPENCL_LDFLAGS="-framework OpenCL" - fi ;; *) # default - if test "$enable_opencl" = "yes"; then - AC_CHECK_LIB([OpenCL], [clGetPlatformIDs], - [have_opencl_lib=true], [have_opencl_lib=false]) - if !($have_opencl); then - AC_MSG_ERROR([Required OpenCL headers not found!]) - fi - if !($have_opencl_lib); then - AC_MSG_ERROR([Required OpenCL library not found!]) - fi - AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS" - OPENCL_CPPFLAGS="-I${OPENCL_INC}" - OPENCL_LDFLAGS="${OPENCL_LIBS}" - fi ;; esac -AM_CONDITIONAL([OPENCL], [test "$enable_opencl" = "yes"]) -AC_SUBST([OPENCL_CPPFLAGS]) -AC_SUBST([OPENCL_LDFLAGS]) # check whether to build tesseract with -fvisibility=hidden -fvisibility-inlines-hidden # http://gcc.gnu.org/wiki/Visibility diff -Nru tesseract-5.3.4+git6348-2b07505e/debian/changelog tesseract-5.3.4+git6361-d4618678/debian/changelog --- tesseract-5.3.4+git6348-2b07505e/debian/changelog 2024-03-24 18:05:28.000000000 +0000 +++ tesseract-5.3.4+git6361-d4618678/debian/changelog 2024-04-29 20:34:40.000000000 +0000 @@ -1,17 +1,32 @@ -tesseract (5.3.4+git6348-2b07505e-1ppa1~focal1) focal; urgency=medium +tesseract (5.3.4+git6361-d4618678-1ppa1~focal1) focal; urgency=medium * Automated backport upload; no source changes. - -- Alexander Pozdnyakov Sun, 24 Mar 2024 18:05:28 +0000 + -- Alexander Pozdnyakov Mon, 29 Apr 2024 20:34:40 +0000 -tesseract (5.3.4+git6348-2b07505e-1) unstable; urgency=medium +tesseract (5.3.4+git6361-d4618678-1) unstable; urgency=medium * Compile * URL: https://github.com/tesseract-ocr/tesseract.git * Branch: main - * Commit: 2b07505e0e86026ae7c10767b334c337ccf06576 - * Date: 1710644900 + * Commit: d46186781285172947c80ddd2f5268753ef3764d + * Date: 1714110811 * git changelog: + * d4618678 - Refactor function Textord::clean_noise_from_row (#4225) + * a1837f18 - Remove the variable + * 77c99e07 - A few typo fix + * 072b5d47 - Apply the change + * 6802705c - Apply the changes + * d6f0073c - Remove unused local variables in PAGE renderer + * 549b8767 - Support training without lstmf files + * 88771ca4 - Add missing new function prototypes for PAGE renderer to + C API + * 577e8a8b - Add PAGE XML renderer / export (#4214) + * bae520ea - Facilitate vectorization for generic build (#4223) + * d5e000bc - Remove unsupported OpenCL code and related API functions + (#4220) + * 912deb39 - Fix comments for function ReCachePages + * 87a152c0 - cmake: target_compile_features instead of set_property * 2b07505e - issue-bug.yml: Add a link to Supported Operating Systems page * cc3c5070 - Update autotools.yml @@ -245,7 +260,7 @@ * b76b5be6 - Create an issue template for a feature request * ce0ed917 - Create a new issue template - -- Alexander Pozdnyakov Sun, 17 Mar 2024 20:39:16 +0000 + -- Alexander Pozdnyakov Fri, 26 Apr 2024 18:21:41 +0000 tesseract (5.3.3-1) unstable; urgency=medium diff -Nru tesseract-5.3.4+git6348-2b07505e/doc/tesseract.1.asc tesseract-5.3.4+git6361-d4618678/doc/tesseract.1.asc --- tesseract-5.3.4+git6348-2b07505e/doc/tesseract.1.asc 2024-03-17 20:39:02.332504700 +0000 +++ tesseract-5.3.4+git6361-d4618678/doc/tesseract.1.asc 2024-04-26 18:21:25.608427800 +0000 @@ -104,6 +104,10 @@ * *alto* -- Output in ALTO format ('OUTPUTBASE'`.xml`). * *hocr* -- Output in hOCR format ('OUTPUTBASE'`.hocr`). + * *page* -- Output in PAGE format ('OUTPUTBASE'`.page.xml`). + The output can be customized with the flags: + page_xml_polygon -- Create polygons instead of bounding boxes (default: true) + page_xml_level -- Create the PAGE file on 0=linelevel or 1=wordlevel (default: 0) * *pdf* -- Output PDF ('OUTPUTBASE'`.pdf`). * *tsv* -- Output TSV ('OUTPUTBASE'`.tsv`). * *txt* -- Output plain text ('OUTPUTBASE'`.txt`). diff -Nru tesseract-5.3.4+git6348-2b07505e/include/tesseract/baseapi.h tesseract-5.3.4+git6361-d4618678/include/tesseract/baseapi.h --- tesseract-5.3.4+git6348-2b07505e/include/tesseract/baseapi.h 2024-03-17 20:39:02.332504700 +0000 +++ tesseract-5.3.4+git6361-d4618678/include/tesseract/baseapi.h 2024-04-26 18:21:25.608427800 +0000 @@ -87,15 +87,6 @@ static const char *Version(); /** - * If compiled with OpenCL AND an available OpenCL - * device is deemed faster than serial code, then - * "device" is populated with the cl_device_id - * and returns sizeof(cl_device_id) - * otherwise *device=nullptr and returns 0. - */ - static size_t getOpenCLDevice(void **device); - - /** * Set the name of the input file. Needed for training and * reading a UNLV zone file, and for searchable PDF output. */ @@ -559,6 +550,18 @@ */ char *GetAltoText(int page_number); + /** + * Make an XML-formatted string with PAGE markup from the internal + * data structures. + */ + char *GetPAGEText(ETEXT_DESC *monitor, int page_number); + + /** + * Make an XML-formatted string with PAGE markup from the internal + * data structures. + */ + char *GetPAGEText(int page_number); + /** * Make a TSV-formatted string from the internal data structures. * page_number is 0-based but will appear in the output as 1-based. diff -Nru tesseract-5.3.4+git6348-2b07505e/include/tesseract/capi.h tesseract-5.3.4+git6361-d4618678/include/tesseract/capi.h --- tesseract-5.3.4+git6348-2b07505e/include/tesseract/capi.h 2024-03-17 20:39:02.332504700 +0000 +++ tesseract-5.3.4+git6361-d4618678/include/tesseract/capi.h 2024-04-26 18:21:25.608427800 +0000 @@ -156,6 +156,7 @@ TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase, BOOL font_info); TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase); +TESS_API TessResultRenderer *TessPAGERendererCreate(const char *outputbase); TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase); TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase, const char *datadir, @@ -186,8 +187,6 @@ TESS_API TessBaseAPI *TessBaseAPICreate(); TESS_API void TessBaseAPIDelete(TessBaseAPI *handle); -TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device); - TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name); TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle); @@ -324,6 +323,7 @@ TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number); TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number); +TESS_API char *TessBaseAPIGetPAGEText(TessBaseAPI *handle, int page_number); TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number); TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number); diff -Nru tesseract-5.3.4+git6348-2b07505e/include/tesseract/renderer.h tesseract-5.3.4+git6361-d4618678/include/tesseract/renderer.h --- tesseract-5.3.4+git6348-2b07505e/include/tesseract/renderer.h 2024-03-17 20:39:02.332504700 +0000 +++ tesseract-5.3.4+git6361-d4618678/include/tesseract/renderer.h 2024-04-26 18:21:25.608427800 +0000 @@ -199,6 +199,23 @@ }; /** + * Renders Tesseract output into a PAGE XML text string + */ +class TESS_API TessPAGERenderer : public TessResultRenderer { +public: + explicit TessPAGERenderer(const char *outputbase); + +protected: + bool BeginDocumentHandler() override; + bool AddImageHandler(TessBaseAPI *api) override; + bool EndDocumentHandler() override; + +private: + bool begin_document; +}; + + +/** * Renders Tesseract output into a TSV string */ class TESS_API TessTsvRenderer : public TessResultRenderer { diff -Nru tesseract-5.3.4+git6348-2b07505e/src/api/baseapi.cpp tesseract-5.3.4+git6361-d4618678/src/api/baseapi.cpp --- tesseract-5.3.4+git6348-2b07505e/src/api/baseapi.cpp 2024-03-17 20:39:02.332504700 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/api/baseapi.cpp 2024-04-26 18:21:25.611761000 +0000 @@ -41,9 +41,6 @@ #endif #include "mutableiterator.h" // for MutableIterator #include "normalis.h" // for kBlnBaselineOffset, kBlnXHeight -#if defined(USE_OPENCL) -# include "openclwrapper.h" // for OpenclDevice -#endif #include "pageres.h" // for PAGE_RES_IT, WERD_RES, PAGE_RES, CR_DE... #include "paragraphs.h" // for DetectParagraphs #include "params.h" // for BoolParam, IntParam, DoubleParam, Stri... @@ -244,27 +241,6 @@ } /** - * If compiled with OpenCL AND an available OpenCL - * device is deemed faster than serial code, then - * "device" is populated with the cl_device_id - * and returns sizeof(cl_device_id) - * otherwise *device=nullptr and returns 0. - */ -size_t TessBaseAPI::getOpenCLDevice(void **data) { -#ifdef USE_OPENCL - ds_device device = OpenclDevice::getDeviceSelection(); - if (device.type == DS_DEVICE_OPENCL_DEVICE) { - *data = new cl_device_id; - memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id)); - return sizeof(cl_device_id); - } -#endif - - *data = nullptr; - return 0; -} - -/** * Set the name of the input file. Needed only for training and * loading a UNLV zone file. */ @@ -398,10 +374,6 @@ delete tesseract_; tesseract_ = nullptr; } -#ifdef USE_OPENCL - OpenclDevice od; - od.InitEnv(); -#endif bool reset_classifier = true; if (tesseract_ == nullptr) { reset_classifier = false; diff -Nru tesseract-5.3.4+git6348-2b07505e/src/api/capi.cpp tesseract-5.3.4+git6361-d4618678/src/api/capi.cpp --- tesseract-5.3.4+git6348-2b07505e/src/api/capi.cpp 2024-03-17 20:39:02.332504700 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/api/capi.cpp 2024-04-26 18:21:25.611761000 +0000 @@ -68,6 +68,10 @@ return new tesseract::TessAltoRenderer(outputbase); } +TessResultRenderer *TessPAGERendererCreate(const char *outputbase) { + return new tesseract::TessPAGERenderer(outputbase); +} + TessResultRenderer *TessTsvRendererCreate(const char *outputbase) { return new tesseract::TessTsvRenderer(outputbase); } @@ -137,10 +141,6 @@ delete handle; } -size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI * /*handle*/, void **device) { - return TessBaseAPI::getOpenCLDevice(device); -} - void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name) { handle->SetInputName(name); } @@ -424,6 +424,10 @@ return handle->GetAltoText(page_number); } +char *TessBaseAPIGetPAGEText(TessBaseAPI *handle, int page_number) { + return handle->GetPAGEText(page_number); +} + char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number) { return handle->GetTSVText(page_number); } diff -Nru tesseract-5.3.4+git6348-2b07505e/src/api/pagerenderer.cpp tesseract-5.3.4+git6361-d4618678/src/api/pagerenderer.cpp --- tesseract-5.3.4+git6348-2b07505e/src/api/pagerenderer.cpp 1970-01-01 00:00:00.000000000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/api/pagerenderer.cpp 2024-04-26 18:21:25.611761000 +0000 @@ -0,0 +1,1153 @@ +// File: pagerenderer.cpp +// Description: PAGE XML rendering interface +// Author: Jan Kamlah + +// (C) Copyright 2021 +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "errcode.h" // for ASSERT_HOST +#ifdef _WIN32 +# include "host.h" // windows.h for MultiByteToWideChar, ... +#endif +#include "tprintf.h" // for tprintf + +#include +#include + +#include +#include +#include +#include +#include // for std::stringstream +#include + +#include +#if (LIBLEPT_MAJOR_VERSION == 1 && LIBLEPT_MINOR_VERSION >= 83) || \ + LIBLEPT_MAJOR_VERSION > 1 +# include +# include +#endif + +namespace tesseract { + +/// +/// Slope and offset between two points +/// +static void GetSlopeAndOffset(float x0, float y0, float x1, float y1, float *m, + float *b) { + float slope; + + slope = ((y1 - y0) / (x1 - x0)); + *m = slope; + *b = y0 - slope * x0; +} + +/// +/// Write coordinates in the form of a points to a stream +/// +static void AddPointsToPAGE(Pta *pts, std::stringstream &str) { + int num_pts; + + str << "\n"; +} + +/// +/// Convert bbox information to top and bottom polygon +/// +static void AddPointToWordPolygon( + const ResultIterator *res_it, PageIteratorLevel level, Pta *word_top_pts, + Pta *word_bottom_pts, tesseract::WritingDirection writing_direction) { + int left, top, right, bottom; + + res_it->BoundingBox(level, &left, &top, &right, &bottom); + + if (writing_direction != WRITING_DIRECTION_TOP_TO_BOTTOM) { + ptaAddPt(word_top_pts, left, top); + ptaAddPt(word_top_pts, right, top); + + ptaAddPt(word_bottom_pts, left, bottom); + ptaAddPt(word_bottom_pts, right, bottom); + + } else { + // Transform from ttb to ltr + ptaAddPt(word_top_pts, top, right); + ptaAddPt(word_top_pts, bottom, right); + + ptaAddPt(word_bottom_pts, top, left); + ptaAddPt(word_bottom_pts, bottom, left); + } +} + +/// +/// Transpose polygonline, destroy old and return new pts +/// +Pta *TransposePolygonline(Pta *pts) { + Pta *pts_transposed; + + pts_transposed = ptaTranspose(pts); + ptaDestroy(&pts); + return pts_transposed; +} + +/// +/// Reverse polygonline, destroy old and return new pts +/// +Pta *ReversePolygonline(Pta *pts, int type) { + Pta *pts_reversed; + + pts_reversed = ptaReverse(pts, type); + ptaDestroy(&pts); + return pts_reversed; +} + +/// +/// Destroy old and create new pts +/// +Pta *DestroyAndCreatePta(Pta *pts) { + ptaDestroy(&pts); + return ptaCreate(0); +} + +/// +/// Recalculate linepolygon +/// Create a hull for overlapping areas +/// +Pta *RecalcPolygonline(Pta *pts, bool upper) { + int num_pts, num_bin, index = 0; + int y, x0, y0, x1, y1; + float x_min, y_min, x_max, y_max; + NUMA *bin_line; + Pta *pts_recalc; + + ptaGetMinMax(pts, &x_min, &y_min, &x_max, &y_max); + num_bin = x_max - x_min; + bin_line = numaCreate(num_bin + 1); + + for (int p = 0; p <= num_bin; ++p) { + bin_line->array[p] = -1.; + } + + num_pts = ptaGetCount(pts); + + if (num_pts == 2) { + pts_recalc = ptaCopy(pts); + ptaDestroy(&pts); + return pts_recalc; + } + + do { + ptaGetIPt(pts, index, &x0, &y0); + ptaGetIPt(pts, index + 1, &x1, &y1); + for (int p = x0 - x_min; p <= x1 - x_min; ++p) { + if (!upper) { + if (bin_line->array[p] == -1. || y0 > bin_line->array[p]) { + bin_line->array[p] = y0; + } + } else { + if (bin_line->array[p] == -1. || y0 < bin_line->array[p]) { + bin_line->array[p] = y0; + } + } + } + index += 2; + } while (index < num_pts - 1); + + pts_recalc = ptaCreate(0); + + for (int p = 0; p <= num_bin; ++p) { + if (p == 0) { + y = bin_line->array[p]; + ptaAddPt(pts_recalc, x_min + p, y); + } else if (p == num_bin) { + ptaAddPt(pts_recalc, x_min + p, y); + break; + } else if (y != bin_line->array[p]) { + if (y != -1.) { + ptaAddPt(pts_recalc, x_min + p, y); + } + y = bin_line->array[p]; + if (y != -1.) { + ptaAddPt(pts_recalc, x_min + p, y); + } + } + } + + ptaDestroy(&pts); + return pts_recalc; +} + +/// +/// Create a rectangle hull around a single line +/// +Pta *PolygonToBoxCoords(Pta *pts) { + Pta *pts_box; + float x_min, y_min, x_max, y_max; + + pts_box = ptaCreate(0); + ptaGetMinMax(pts, &x_min, &y_min, &x_max, &y_max); + ptaAddPt(pts_box, x_min, y_min); + ptaAddPt(pts_box, x_max, y_min); + ptaAddPt(pts_box, x_max, y_max); + ptaAddPt(pts_box, x_min, y_max); + ptaDestroy(&pts); + return pts_box; +} + +/// +/// Create a rectangle polygon round the existing multiple lines +/// +static void UpdateBlockPoints(Pta *block_top_pts, Pta *block_bottom_pts, + Pta *line_top_pts, Pta *line_bottom_pts, int lcnt, + int last_word_in_cblock) { + int num_pts; + int x, y; + + // Create a hull around all lines + if (lcnt == 0 && last_word_in_cblock) { + ptaJoin(block_top_pts, line_top_pts, 0, -1); + ptaJoin(block_bottom_pts, line_bottom_pts, 0, -1); + } else if (lcnt == 0) { + ptaJoin(block_top_pts, line_top_pts, 0, -1); + num_pts = ptaGetCount(line_bottom_pts); + ptaGetIPt(line_bottom_pts, num_pts - 1, &x, &y); + ptaAddPt(block_top_pts, x, y); + ptaGetIPt(line_bottom_pts, 0, &x, &y); + ptaAddPt(block_bottom_pts, x, y); + } else if (last_word_in_cblock) { + ptaGetIPt(line_top_pts, 0, &x, &y); + ptaAddPt(block_bottom_pts, x, y); + ptaJoin(block_bottom_pts, line_bottom_pts, 0, -1); + num_pts = ptaGetCount(line_top_pts); + ptaGetIPt(line_top_pts, num_pts - 1, &x, &y); + ptaAddPt(block_top_pts, x, y); + } else { + ptaGetIPt(line_top_pts, 0, &x, &y); + ptaAddPt(block_bottom_pts, x, y); + ptaGetIPt(line_bottom_pts, 0, &x, &y); + ptaAddPt(block_bottom_pts, x, y); + num_pts = ptaGetCount(line_top_pts); + ptaGetIPt(line_top_pts, num_pts - 1, &x, &y); + ptaAddPt(block_top_pts, x, y); + num_pts = ptaGetCount(line_bottom_pts); + ptaGetIPt(line_bottom_pts, num_pts - 1, &x, &y); + ptaAddPt(block_top_pts, x, y); + }; +} + +/// +/// Simplify polygonlines (only expanding not shrinking) (Due to recalculation +/// currently not necessary) +/// +static void SimplifyLinePolygon(Pta *polyline, int tolerance, bool upper) { + int x0, y0, x1, y1, x2, y2, x3, y3, index = 1; + float m, b, y_min, y_max; + + while (index <= polyline->n - 2) { + ptaGetIPt(polyline, index - 1, &x0, &y0); + ptaGetIPt(polyline, index, &x1, &y1); + ptaGetIPt(polyline, index + 1, &x2, &y2); + if (index + 2 < polyline->n) { + // Delete two point indentations + ptaGetIPt(polyline, index + 2, &x3, &y3); + if (abs(x3 - x0) <= tolerance * 2) { + GetSlopeAndOffset(x0, y0, x3, y3, &m, &b); + + if (upper && (m * x1 + b) < y1 && (m * x2 + b) < y2) { + ptaRemovePt(polyline, index + 1); + ptaRemovePt(polyline, index); + continue; + } else if (!upper && (m * x1 + b) > y1 && (m * x2 + b) > y2) { + ptaRemovePt(polyline, index + 1); + ptaRemovePt(polyline, index); + continue; + } + } + } + // Delete one point indentations + if (abs(y0 - y1) <= tolerance && abs(y1 - y2) <= tolerance) { + GetSlopeAndOffset(x0, y0, x2, y2, &m, &b); + if (upper && (m * x1 + b) <= y1) { + ptaRemovePt(polyline, index); + continue; + } else if (!upper && (m * x1 + b) >= y1) { + ptaRemovePt(polyline, index); + continue; + } + } + // Delete near by points + if (x1 != x0 && abs(y1 - y0) < 4 && abs(x1 - x0) <= tolerance) { + if (upper) { + y_min = std::min(y0, y1); + GetSlopeAndOffset(x0, y_min, x2, y2, &m, &b); + if ((m * x1 + b) <= y1) { + polyline->y[index - 1] = std::min(y0, y1); + ptaRemovePt(polyline, index); + continue; + } + } else { + y_max = std::max(y0, y1); + GetSlopeAndOffset(x0, y_max, x2, y2, &m, &b); + if ((m * x1 + b) >= y1) { + polyline->y[index - 1] = y_max; + ptaRemovePt(polyline, index); + continue; + } + } + } + index++; + } +} + +/// +/// Directly write bounding box information as coordinates a stream +/// +static void AddBoxToPAGE(const ResultIterator *it, PageIteratorLevel level, + std::stringstream &page_str) { + int left, top, right, bottom; + + it->BoundingBox(level, &left, &top, &right, &bottom); + page_str << "\n"; +} + +/// +/// Join ltr and rtl polygon information +/// +static void AppendLinePolygon(Pta *pts_ltr, Pta *pts_rtl, Pta *ptss, + tesseract::WritingDirection writing_direction) { + // If writing direction is NOT right-to-left, handle the left-to-right case. + if (writing_direction != WRITING_DIRECTION_RIGHT_TO_LEFT) { + if (ptaGetCount(pts_rtl) != 0) { + ptaJoin(pts_ltr, pts_rtl, 0, -1); + DestroyAndCreatePta(pts_rtl); + } + ptaJoin(pts_ltr, ptss, 0, -1); + } else { + // For right-to-left, work with a copy of ptss initially. + PTA *ptsd = ptaCopy(ptss); + if (ptaGetCount(pts_rtl) != 0) { + ptaJoin(ptsd, pts_rtl, 0, -1); + } + ptaDestroy(&pts_rtl); + ptaCopy(ptsd); + } +} + +/// +/// Convert baseline to points and add to polygon +/// +static void AddBaselineToPTA(const ResultIterator *it, PageIteratorLevel level, + Pta *baseline_pts) { + int x1, y1, x2, y2; + + it->Baseline(level, &x1, &y1, &x2, &y2); + ptaAddPt(baseline_pts, x1, y1); + ptaAddPt(baseline_pts, x2, y2); +} + +/// +/// Directly write baseline information as baseline points a stream +/// +static void AddBaselinePtsToPAGE(Pta *baseline_pts, std::stringstream &str) { + int x, y, num_pts = baseline_pts->n; + + str << "\n"; +} + +/// +/// Sort baseline points ascending and deleting duplicates +/// +Pta *SortBaseline(Pta *baseline_pts, + tesseract::WritingDirection writing_direction) { + int num_pts, index = 0; + float x0, y0, x1, y1; + Pta *sorted_baseline_pts; + + sorted_baseline_pts = + ptaSort(baseline_pts, L_SORT_BY_X, L_SORT_INCREASING, NULL); + + do { + ptaGetPt(sorted_baseline_pts, index, &x0, &y0); + ptaGetPt(sorted_baseline_pts, index + 1, &x1, &y1); + if (x0 >= x1) { + sorted_baseline_pts->y[index] = std::min(y0, y1); + ptaRemovePt(sorted_baseline_pts, index + 1); + } else { + index++; + } + num_pts = ptaGetCount(sorted_baseline_pts); + } while (index < num_pts - 1); + + ptaDestroy(&baseline_pts); + return sorted_baseline_pts; +} + +/// +/// Clip baseline to range of the exsitings polygon and simplifies the baseline +/// linepolygon +/// +Pta *ClipAndSimplifyBaseline(Pta *bottom_pts, Pta *baseline_pts, + tesseract::WritingDirection writing_direction) { + int num_pts; + float m, b, x0, y0, x1, y1; + float x_min, y_min, x_max, y_max; + Pta *baseline_clipped_pts; + + ptaGetMinMax(bottom_pts, &x_min, &y_min, &x_max, &y_max); + num_pts = ptaGetCount(baseline_pts); + baseline_clipped_pts = ptaCreate(0); + + // Clip Baseline + for (int p = 0; p < num_pts; ++p) { + ptaGetPt(baseline_pts, p, &x0, &y0); + if (x0 < x_min) { + if (p + 1 < num_pts) { + ptaGetPt(baseline_pts, p + 1, &x1, &y1); + if (x1 < x_min) { + continue; + } else { + GetSlopeAndOffset(x0, y0, x1, y1, &m, &b); + y0 = int(x_min * m + b); + x0 = x_min; + } + } + } else if (x0 > x_max) { + if (ptaGetCount(baseline_clipped_pts) > 0 && p > 0) { + ptaGetPt(baseline_pts, p - 1, &x1, &y1); + // See comment above + GetSlopeAndOffset(x1, y1, x0, y0, &m, &b); + y0 = int(x_max * m + b); + x0 = x_max; + ptaAddPt(baseline_clipped_pts, x0, y0); + break; + } + } + ptaAddPt(baseline_clipped_pts, x0, y0); + } + if (writing_direction == WRITING_DIRECTION_TOP_TO_BOTTOM) { + SimplifyLinePolygon(baseline_clipped_pts, 3, 0); + } else { + SimplifyLinePolygon(baseline_clipped_pts, 3, 1); + } + SimplifyLinePolygon( + baseline_clipped_pts, 3, + writing_direction == WRITING_DIRECTION_TOP_TO_BOTTOM ? 0 : 1); + + // Check the number of points in baseline_clipped_pts after processing + int clipped_pts_count = ptaGetCount(baseline_clipped_pts); + + if (clipped_pts_count < 2) { + // If there's only one point in baseline_clipped_pts, duplicate it + ptaDestroy(&baseline_clipped_pts); // Clean up the created but unused Pta + baseline_clipped_pts = ptaCreate(0); + ptaAddPt(baseline_clipped_pts, x_min, y_min); + ptaAddPt(baseline_clipped_pts, x_max, y_min); + } + + return baseline_clipped_pts; +} + +/// +/// Fit the baseline points into the existings polygon +/// +Pta *FitBaselineIntoLinePolygon(Pta *bottom_pts, Pta *baseline_pts, + tesseract::WritingDirection writing_direction) { + int num_pts, num_bin, x0, y0, x1, y1; + float m, b; + float x_min, y_min, x_max, y_max; + float delta_median, delta_median_Q1, delta_median_Q3; + NUMA *bin_line, *poly_bl_delta; + Pta *baseline_recalc_pts, *baseline_clipped_pts; + + ptaGetMinMax(bottom_pts, &x_min, &y_min, &x_max, &y_max); + num_bin = x_max - x_min; + bin_line = numaCreate(num_bin + 1); + + for (int p = 0; p < num_bin + 1; ++p) { + bin_line->array[p] = -1.; + } + + num_pts = ptaGetCount(bottom_pts); + // Create a interpolated polygon with stepsize 1 + for (int index = 0; index < num_pts - 1; ++index) { + ptaGetIPt(bottom_pts, index, &x0, &y0); + ptaGetIPt(bottom_pts, index + 1, &x1, &y1); + if (x0 >= x1) { + continue; + } + if (y0 == y1) { + for (int p = x0 - x_min; p < x1 - x_min + 1; ++p) { + if (bin_line->array[p] == -1. || y0 > bin_line->array[p]) { + bin_line->array[p] = y0; + } + } + } else { + GetSlopeAndOffset(x0, y0, x1, y1, &m, &b); + for (int p = x0 - x_min; p < x1 - x_min + 1; ++p) { + if (bin_line->array[p] == -1. || + ((p + x_min) * m + b) > bin_line->array[p]) { + bin_line->array[p] = ((p + x_min) * m + b); + } + } + } + } + + num_pts = ptaGetCount(baseline_pts); + baseline_clipped_pts = ptaCreate(0); + poly_bl_delta = numaCreate(0); + + // Clip Baseline and create a set of deltas between baseline and polygon + for (int p = 0; p < num_pts; ++p) { + ptaGetIPt(baseline_pts, p, &x0, &y0); + + if (x0 < x_min) { + ptaGetIPt(baseline_pts, p + 1, &x1, &y1); + if (x1 < x_min) { + continue; + } else { + GetSlopeAndOffset(x0, y0, x1, y1, &m, &b); + y0 = int(x_min * m + b); + x0 = x_min; + } + } else if (x0 > x_max) { + if (ptaGetCount(baseline_clipped_pts) > 0) { + ptaGetIPt(baseline_pts, p - 1, &x1, &y1); + GetSlopeAndOffset(x1, y1, x0, y0, &m, &b); + y0 = int(x_max * m + b); + x0 = x_max; + int x_val = x0 - x_min; + numaAddNumber(poly_bl_delta, abs(bin_line->array[x_val] - y0)); + ptaAddPt(baseline_clipped_pts, x0, y0); + break; + } + } + int x_val = x0 - x_min; + numaAddNumber(poly_bl_delta, abs(bin_line->array[x_val] - y0)); + ptaAddPt(baseline_clipped_pts, x0, y0); + } + + ptaDestroy(&baseline_pts); + + // Calculate quartiles to find outliers + numaGetMedian(poly_bl_delta, &delta_median); + numaGetRankValue(poly_bl_delta, 0.25, NULL, 0, &delta_median_Q1); + numaGetRankValue(poly_bl_delta, 0.75, NULL, 0, &delta_median_Q3); + + // Fit baseline into the polygon + // Todo: Needs maybe some adjustments to suppress fitting to superscript + // glyphs + baseline_recalc_pts = ptaCreate(0); + num_pts = ptaGetCount(baseline_clipped_pts); + for (int p = 0; p < num_pts; ++p) { + ptaGetIPt(baseline_clipped_pts, p, &x0, &y0); + int x_val = x0 - x_min; + // Delete outliers with IQR + if (abs(y0 - bin_line->array[x_val]) > + 1.5 * delta_median_Q3 + delta_median && + p != 0 && p != num_pts - 1) { + continue; + } + if (writing_direction == WRITING_DIRECTION_TOP_TO_BOTTOM) { + if (y0 < bin_line->array[x_val]) { + ptaAddPt(baseline_recalc_pts, x0, bin_line->array[x_val]); + } else { + ptaAddPt(baseline_recalc_pts, x0, y0); + } + } else { + if (y0 > bin_line->array[x_val]) { + ptaAddPt(baseline_recalc_pts, x0, bin_line->array[x_val]); + } else { + ptaAddPt(baseline_recalc_pts, x0, y0); + } + } + } + // Return recalculated baseline if this fails return the bottom line as + // baseline + ptaDestroy(&baseline_clipped_pts); + if (ptaGetCount(baseline_recalc_pts) < 2) { + ptaDestroy(&baseline_recalc_pts); + return ptaCopy(bottom_pts); + } else { + return baseline_recalc_pts; + } +} + +/// Convert writing direction to string representation +const char *WritingDirectionToStr(int wd) { + switch (wd) { + case 0: + return "left-to-right"; + case 1: + return "right-to-left"; + case 2: + return "top-to-bottom"; + default: + return "bottom-to-top"; + } +} +/// +/// Append the PAGE XML for the beginning of the document +/// +bool TessPAGERenderer::BeginDocumentHandler() { + // Delay the XML output because we need the name of the image file. + begin_document = true; + return true; +} + +/// +/// Append the PAGE XML for the layout of the image +/// +bool TessPAGERenderer::AddImageHandler(TessBaseAPI *api) { + if (begin_document) { + AppendString( + "\n" + "\n" + "\t + if (std::regex_search(api->GetInputName(), + std::regex("^(https?|ftp|ssh):"))) { + AppendString(" externalRef=\""); + AppendString(api->GetInputName()); + AppendString("\" "); + } + + AppendString( + ">\n" + "\t\tTesseract - "); + AppendString(TESSERACT_VERSION_STR); + // If gmtime conversion is problematic maybe l_getFormattedDate can be used + // here + // char *datestr = l_getFormattedDate(); + std::time_t now = std::time(nullptr); + std::tm *now_tm = std::gmtime(&now); + char mbstr[100]; + std::strftime(mbstr, sizeof(mbstr), "%Y-%m-%dT%H:%M:%S", now_tm); + AppendString( + "\n" + "\t\t"); + AppendString(mbstr); + AppendString("\n"); + AppendString("\t\t"); + AppendString(mbstr); + AppendString( + "\n" + "\t\n"); + begin_document = false; + } + + const std::unique_ptr text(api->GetPAGEText(imagenum())); + if (text == nullptr) { + return false; + } + + AppendString(text.get()); + + return true; +} + +/// +/// Append the PAGE XML for the end of the document +/// +bool TessPAGERenderer::EndDocumentHandler() { + AppendString("\t\t\n\n"); + return true; +} + +TessPAGERenderer::TessPAGERenderer(const char *outputbase) + : TessResultRenderer(outputbase, "page.xml"), begin_document(false) {} + +/// +/// Make an XML-formatted string with PAGE markup from the internal +/// data structures. +/// +char *TessBaseAPI::GetPAGEText(int page_number) { + return GetPAGEText(nullptr, page_number); +} + +/// +/// Make an XML-formatted string with PAGE markup from the internal +/// data structures. +/// +char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { + if (tesseract_ == nullptr || + (page_res_ == nullptr && Recognize(monitor) < 0)) { + return nullptr; + } + + int rcnt = 0, lcnt = 0, wcnt = 0; + + if (input_file_.empty()) { + SetInputName(nullptr); + } + +#ifdef _WIN32 + // convert input name from ANSI encoding to utf-8 + int str16_len = + MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0); + wchar_t *uni16_str = new WCHAR[str16_len]; + str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, + str16_len); + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, + 0, nullptr, nullptr); + char *utf8_str = new char[utf8_len]; + WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, + nullptr, nullptr); + input_file_ = utf8_str; + delete[] uni16_str; + delete[] utf8_str; +#endif + + // Used variables + + std::stringstream reading_order_str; + std::stringstream region_content; + std::stringstream line_content; + std::stringstream word_content; + std::stringstream line_str; + std::stringstream line_inter_str; + std::stringstream word_str; + std::stringstream page_str; + + float x1, y1, x2, y2, word_conf, line_conf, block_conf; + + tesseract::Orientation orientation_block; + tesseract::WritingDirection writing_direction_block; + tesseract::TextlineOrder textline_order_block; + + Pta *block_top_pts = ptaCreate(0); + Pta *block_bottom_pts = ptaCreate(0); + Pta *line_top_ltr_pts = ptaCreate(0); + Pta *line_bottom_ltr_pts = ptaCreate(0); + Pta *line_top_rtl_pts = ptaCreate(0); + Pta *line_bottom_rtl_pts = ptaCreate(0); + Pta *word_top_pts = ptaCreate(0); + Pta *word_bottom_pts = ptaCreate(0); + Pta *word_baseline_pts = ptaCreate(0); + Pta *line_baseline_rtl_pts = ptaCreate(0); + Pta *line_baseline_ltr_pts = ptaCreate(0); + Pta *line_baseline_pts = ptaCreate(0); + + bool POLYGONFLAG; + GetBoolVariable("page_xml_polygon", &POLYGONFLAG); + int LEVELFLAG; + GetIntVariable("page_xml_level", &LEVELFLAG); + + if (LEVELFLAG != 0 && LEVELFLAG != 1) { + tprintf( + "For now, only line level and word level are available, and the level " + "is reset to line level.\n"); + LEVELFLAG = 0; + } + + // Use "C" locale (needed for int values larger than 999). + page_str.imbue(std::locale::classic()); + reading_order_str << "\tGetInputName()); + reading_order_str << "\" " << "imageWidth=\"" << rect_width_ << "\" " + << "imageHeight=\"" << rect_height_ << "\">\n"; + std::size_t ro_id = std::hash{}(GetInputName()); + reading_order_str << "\t\t\n" + << "\t\t\t\n"; + + ResultIterator *res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } + + auto block_type = res_it->BlockType(); + + switch (block_type) { + case PT_FLOWING_IMAGE: + case PT_HEADING_IMAGE: + case PT_PULLOUT_IMAGE: { + // Handle all kinds of images. + page_str << "\t\t\n"; + page_str << "\t\t\t"; + AddBoxToPAGE(res_it, RIL_BLOCK, page_str); + page_str << "\t\t\n"; + res_it->Next(RIL_BLOCK); + continue; + } + case PT_HORZ_LINE: + case PT_VERT_LINE: + // Handle horizontal and vertical lines. + page_str << "\t\t\n"; + page_str << "\t\t\t"; + AddBoxToPAGE(res_it, RIL_BLOCK, page_str); + page_str << "\t\t\n"; + res_it->Next(RIL_BLOCK); + continue; + case PT_NOISE: + tprintf("TODO: Please report image which triggers the noise case.\n"); + ASSERT_HOST(false); + default: + break; + } + + if (res_it->IsAtBeginningOf(RIL_BLOCK)) { + // Add Block to reading order + reading_order_str << "\t\t\t\t\n"; + + float deskew_angle; + res_it->Orientation(&orientation_block, &writing_direction_block, + &textline_order_block, &deskew_angle); + block_conf = ((res_it->Confidence(RIL_BLOCK)) / 100.); + page_str << "\t\t\n"; + page_str << "\t\t\t"; + if ((!POLYGONFLAG || + (orientation_block != 0 && orientation_block != 2)) && + LEVELFLAG == 0) { + AddBoxToPAGE(res_it, RIL_BLOCK, page_str); + } + } + + // Writing direction changes at a per-word granularity + // tesseract::WritingDirection writing_direction_before; + tesseract::WritingDirection writing_direction; + + writing_direction = writing_direction_block; + if (writing_direction_block != WRITING_DIRECTION_TOP_TO_BOTTOM) { + switch (res_it->WordDirection()) { + case DIR_LEFT_TO_RIGHT: + writing_direction = WRITING_DIRECTION_LEFT_TO_RIGHT; + break; + case DIR_RIGHT_TO_LEFT: + writing_direction = WRITING_DIRECTION_RIGHT_TO_LEFT; + break; + default: + break; + } + } + + bool ttb_flag = (writing_direction == WRITING_DIRECTION_TOP_TO_BOTTOM); + // TODO: Rework polygon handling if line is skewed (90 or 180 degress), + // for now using LinePts + bool skewed_flag = (orientation_block != 0 && orientation_block != 2); + + if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { + // writing_direction_before = writing_direction; + line_conf = ((res_it->Confidence(RIL_TEXTLINE)) / 100.); + line_content << HOcrEscape(res_it->GetUTF8Text(RIL_TEXTLINE)).c_str(); + line_str << "\t\t\t\n"; + // If level is linebased, get the line polygon and baseline + if (LEVELFLAG == 0 && (!POLYGONFLAG || skewed_flag)) { + AddPointToWordPolygon(res_it, RIL_TEXTLINE, line_top_ltr_pts, + line_bottom_ltr_pts, writing_direction); + AddBaselineToPTA(res_it, RIL_TEXTLINE, line_baseline_pts); + if (ttb_flag) { + line_baseline_pts = TransposePolygonline(line_baseline_pts); + } + } + } + + // Get information if word is last in line and if its last in the region + bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); + bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); + + word_conf = ((res_it->Confidence(RIL_WORD)) / 100.); + + // Create word stream if word level output is active + if (LEVELFLAG > 0) { + word_str << "\t\t\t\t\n"; + if ((!POLYGONFLAG || skewed_flag) || ttb_flag) { + AddPointToWordPolygon(res_it, RIL_WORD, word_top_pts, word_bottom_pts, + writing_direction); + } + } + + if (POLYGONFLAG && !skewed_flag && ttb_flag && LEVELFLAG == 0) { + AddPointToWordPolygon(res_it, RIL_WORD, word_top_pts, word_bottom_pts, + writing_direction); + } + + // Get the word baseline information + AddBaselineToPTA(res_it, RIL_WORD, word_baseline_pts); + + // Get the word text content and polygon + do { + const std::unique_ptr grapheme( + res_it->GetUTF8Text(RIL_SYMBOL)); + if (grapheme && grapheme[0] != 0) { + word_content << HOcrEscape(grapheme.get()).c_str(); + if (POLYGONFLAG && !skewed_flag && !ttb_flag) { + AddPointToWordPolygon(res_it, RIL_SYMBOL, word_top_pts, + word_bottom_pts, writing_direction); + } + } + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + + if (LEVELFLAG > 0 || (POLYGONFLAG && !skewed_flag)) { + // Sort wordpolygons + word_top_pts = RecalcPolygonline(word_top_pts, 1 - ttb_flag); + word_bottom_pts = RecalcPolygonline(word_bottom_pts, 0 + ttb_flag); + + // AppendLinePolygon + AppendLinePolygon(line_top_ltr_pts, line_top_rtl_pts, word_top_pts, + writing_direction); + AppendLinePolygon(line_bottom_ltr_pts, line_bottom_rtl_pts, + word_bottom_pts, writing_direction); + + // Word level polygon + word_bottom_pts = ReversePolygonline(word_bottom_pts, 1); + ptaJoin(word_top_pts, word_bottom_pts, 0, -1); + } + + // Reverse the word baseline direction for rtl + if (writing_direction == WRITING_DIRECTION_RIGHT_TO_LEFT) { + word_baseline_pts = ReversePolygonline(word_baseline_pts, 1); + } + + // Write word information to the output + if (LEVELFLAG > 0) { + word_str << "\t\t\t\t\t"; + if (ttb_flag) { + word_top_pts = TransposePolygonline(word_top_pts); + } + AddPointsToPAGE(word_top_pts, word_str); + word_str << "\t\t\t\t\t"; + AddBaselinePtsToPAGE(word_baseline_pts, word_str); + word_str << "\t\t\t\t\t\n" + << "\t\t\t\t\t\t" << word_content.str() + << "\n" + << "\t\t\t\t\t\n" + << "\t\t\t\t\n"; + } + if (LEVELFLAG > 0 || (POLYGONFLAG && !skewed_flag)) { + // Add wordbaseline to linebaseline + if (ttb_flag) { + word_baseline_pts = TransposePolygonline(word_baseline_pts); + } + ptaJoin(line_baseline_pts, word_baseline_pts, 0, -1); + } + word_baseline_pts = DestroyAndCreatePta(word_baseline_pts); + + // Reset word pts arrays + word_top_pts = DestroyAndCreatePta(word_top_pts); + word_bottom_pts = DestroyAndCreatePta(word_bottom_pts); + + // Check why this combination of words is not working as expected! + // Write the word contents to the line +#if 0 + if (!last_word_in_line && writing_direction_before != writing_direction && + writing_direction < 2 && writing_direction_before < 2 && + res_it->WordDirection()) { + if (writing_direction_before == WRITING_DIRECTION_LEFT_TO_RIGHT) { + // line_content << "‏" << word_content.str(); + } else { + // line_content << "‎" << word_content.str(); + } + } else { + // line_content << word_content.str(); + } + // Check if WordIsNeutral + if (res_it->WordDirection()) { + writing_direction_before = writing_direction; + } +#endif + word_content.str(""); + wcnt++; + + // Write line information to the output + if (last_word_in_line) { + // Combine ltr and rtl lines + if (ptaGetCount(line_top_rtl_pts) != 0) { + ptaJoin(line_top_ltr_pts, line_top_rtl_pts, 0, -1); + line_top_rtl_pts = DestroyAndCreatePta(line_top_rtl_pts); + } + if (ptaGetCount(line_bottom_rtl_pts) != 0) { + ptaJoin(line_bottom_ltr_pts, line_bottom_rtl_pts, 0, -1); + line_bottom_rtl_pts = DestroyAndCreatePta(line_bottom_rtl_pts); + } + if ((POLYGONFLAG && !skewed_flag) || LEVELFLAG > 0) { + // Recalc Polygonlines + line_top_ltr_pts = RecalcPolygonline(line_top_ltr_pts, 1 - ttb_flag); + line_bottom_ltr_pts = + RecalcPolygonline(line_bottom_ltr_pts, 0 + ttb_flag); + + // Smooth the polygonline + SimplifyLinePolygon(line_top_ltr_pts, 5, 1 - ttb_flag); + SimplifyLinePolygon(line_bottom_ltr_pts, 5, 0 + ttb_flag); + + // Fit linepolygon matching the baselinepoints + line_baseline_pts = SortBaseline(line_baseline_pts, writing_direction); + // Fitting baseline into polygon is currently deactivated + // it tends to push the baseline directly under superscritpts + // but the baseline is always inside the polygon maybe it will be useful + // for something line_baseline_pts = + // FitBaselineIntoLinePolygon(line_bottom_ltr_pts, line_baseline_pts, + // writing_direction); and it only cut it to the length and simplifies + // the linepolyon + line_baseline_pts = ClipAndSimplifyBaseline( + line_bottom_ltr_pts, line_baseline_pts, writing_direction); + + // Update polygon of the block + UpdateBlockPoints(block_top_pts, block_bottom_pts, line_top_ltr_pts, + line_bottom_ltr_pts, lcnt, last_word_in_cblock); + } + // Line level polygon + line_bottom_ltr_pts = ReversePolygonline(line_bottom_ltr_pts, 1); + ptaJoin(line_top_ltr_pts, line_bottom_ltr_pts, 0, -1); + line_bottom_ltr_pts = DestroyAndCreatePta(line_bottom_ltr_pts); + + if (LEVELFLAG > 0 && !(POLYGONFLAG && !skewed_flag)) { + line_top_ltr_pts = PolygonToBoxCoords(line_top_ltr_pts); + } + + // Write level points + line_str << "\t\t\t\t"; + if (ttb_flag) { + line_top_ltr_pts = TransposePolygonline(line_top_ltr_pts); + } + AddPointsToPAGE(line_top_ltr_pts, line_str); + line_top_ltr_pts = DestroyAndCreatePta(line_top_ltr_pts); + + // Write Baseline + line_str << "\t\t\t\t"; + if (ttb_flag) { + line_baseline_pts = TransposePolygonline(line_baseline_pts); + } + AddBaselinePtsToPAGE(line_baseline_pts, line_str); + line_baseline_pts = DestroyAndCreatePta(line_baseline_pts); + + // Add word information if word level output is active + line_str << word_str.str(); + word_str.str(""); + // Write Line TextEquiv + line_str << "\t\t\t\t\n" + << "\t\t\t\t\t" << line_content.str() << "\n" + << "\t\t\t\t\n"; + line_str << "\t\t\t\n"; + region_content << line_content.str(); + line_content.str(""); + if (!last_word_in_cblock) { + region_content << "\n\t\t\t\t\t"; + } + lcnt++; + wcnt = 0; + } else { + line_content << " "; + } + + // Write region information to the output + if (last_word_in_cblock) { + if ((POLYGONFLAG && !skewed_flag) || LEVELFLAG > 0) { + page_str << "\n"; + block_top_pts = DestroyAndCreatePta(block_top_pts); + block_bottom_pts = DestroyAndCreatePta(block_bottom_pts); + } + page_str << line_str.str(); + line_str.str(""); + page_str << "\t\t\t\n" + << "\t\t\t\t" << region_content.str() << "\n" + << "\t\t\t\n"; + page_str << "\t\t\n"; + region_content.str(""); + rcnt++; + lcnt = 0; + } + } + + // Destroy all point information + ptaDestroy(&block_top_pts); + ptaDestroy(&block_bottom_pts); + ptaDestroy(&line_top_ltr_pts); + ptaDestroy(&line_bottom_ltr_pts); + ptaDestroy(&line_top_rtl_pts); + ptaDestroy(&line_bottom_rtl_pts); + ptaDestroy(&word_top_pts); + ptaDestroy(&word_bottom_pts); + ptaDestroy(&word_baseline_pts); + ptaDestroy(&line_baseline_rtl_pts); + ptaDestroy(&line_baseline_ltr_pts); + ptaDestroy(&line_baseline_pts); + + reading_order_str << "\t\t\t\n" + << "\t\t\n"; + + reading_order_str << page_str.str(); + page_str.str(""); + const std::string &text = reading_order_str.str(); + reading_order_str.str(""); + + // Allocate memory for result to hold text.length() characters plus a null + // terminator Safely copy the string into result, ensuring no overflow strncpy + // does not necessarily null-terminate the destination, so do it manually + char *result = new char[text.length() + 1]; + strncpy(result, text.c_str(), text.length()); + result[text.length()] = '\0'; + + delete res_it; + return result; +} + +} // namespace tesseract diff -Nru tesseract-5.3.4+git6348-2b07505e/src/arch/intsimdmatrix.cpp tesseract-5.3.4+git6361-d4618678/src/arch/intsimdmatrix.cpp --- tesseract-5.3.4+git6348-2b07505e/src/arch/intsimdmatrix.cpp 2024-03-17 20:39:02.335838300 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/arch/intsimdmatrix.cpp 2024-04-26 18:21:25.611761000 +0000 @@ -80,7 +80,32 @@ int num_out = w.dim1(); int num_in = w.dim2() - 1; // Base implementation. - for (int i = 0; i < num_out; ++i) { + int i; + // Break up into chunks of four to facilitate vectorization + for (i = 0; i < (num_out / 4) * 4; i += 4) { + const int8_t *wi0 = w[i + 0]; + const int8_t *wi1 = w[i + 1]; + const int8_t *wi2 = w[i + 2]; + const int8_t *wi3 = w[i + 3]; + int total0 = 0; + int total1 = 0; + int total2 = 0; + int total3 = 0; + for (int j = 0; j < num_in; ++j) { + total0 += wi0[j] * u[j]; + total1 += wi1[j] * u[j]; + total2 += wi2[j] * u[j]; + total3 += wi3[j] * u[j]; + } + // Add in the bias and correct for integer values. + v[i + 0] = (total0 + wi0[num_in] * INT8_MAX) * scales[i + 0]; + v[i + 1] = (total1 + wi1[num_in] * INT8_MAX) * scales[i + 1]; + v[i + 2] = (total2 + wi2[num_in] * INT8_MAX) * scales[i + 2]; + v[i + 3] = (total3 + wi3[num_in] * INT8_MAX) * scales[i + 3]; + } + + // Capture the remainder mod four + for (; i < num_out; ++i) { const int8_t *wi = w[i]; int total = 0; for (int j = 0; j < num_in; ++j) { diff -Nru tesseract-5.3.4+git6348-2b07505e/src/ccmain/paragraphs.cpp tesseract-5.3.4+git6361-d4618678/src/ccmain/paragraphs.cpp --- tesseract-5.3.4+git6348-2b07505e/src/ccmain/paragraphs.cpp 2024-03-17 20:39:02.335838300 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/ccmain/paragraphs.cpp 2024-04-26 18:21:25.611761000 +0000 @@ -2610,7 +2610,6 @@ // Run the paragraph detection algorithm. std::vector row_owners; - std::vector the_paragraphs; if (!is_image_block) { DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(), models); } else { diff -Nru tesseract-5.3.4+git6348-2b07505e/src/ccmain/tesseractclass.cpp tesseract-5.3.4+git6361-d4618678/src/ccmain/tesseractclass.cpp --- tesseract-5.3.4+git6348-2b07505e/src/ccmain/tesseractclass.cpp 2024-03-17 20:39:02.339172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/ccmain/tesseractclass.cpp 2024-04-26 18:21:25.615094700 +0000 @@ -340,6 +340,9 @@ , BOOL_MEMBER(tessedit_create_txt, false, "Write .txt output file", this->params()) , BOOL_MEMBER(tessedit_create_hocr, false, "Write .html hOCR output file", this->params()) , BOOL_MEMBER(tessedit_create_alto, false, "Write .xml ALTO file", this->params()) + , BOOL_MEMBER(tessedit_create_page_xml, false, "Write .page.xml PAGE file", this->params()) + , BOOL_MEMBER(page_xml_polygon, true, "Create the PAGE file with polygons instead of box values", this->params()) + , INT_MEMBER(page_xml_level, 0, "Create the PAGE file on 0=line or 1=word level.", this->params()) , BOOL_MEMBER(tessedit_create_lstmbox, false, "Write .box file for LSTM training", this->params()) , BOOL_MEMBER(tessedit_create_tsv, false, "Write .tsv output file", this->params()) diff -Nru tesseract-5.3.4+git6348-2b07505e/src/ccmain/tesseractclass.h tesseract-5.3.4+git6361-d4618678/src/ccmain/tesseractclass.h --- tesseract-5.3.4+git6348-2b07505e/src/ccmain/tesseractclass.h 2024-03-17 20:39:02.339172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/ccmain/tesseractclass.h 2024-04-26 18:21:25.615094700 +0000 @@ -897,6 +897,9 @@ BOOL_VAR_H(tessedit_create_txt); BOOL_VAR_H(tessedit_create_hocr); BOOL_VAR_H(tessedit_create_alto); + BOOL_VAR_H(tessedit_create_page_xml); + BOOL_VAR_H(page_xml_polygon); + INT_VAR_H(page_xml_level); BOOL_VAR_H(tessedit_create_lstmbox); BOOL_VAR_H(tessedit_create_tsv); BOOL_VAR_H(tessedit_create_wordstrbox); diff -Nru tesseract-5.3.4+git6348-2b07505e/src/ccmain/thresholder.cpp tesseract-5.3.4+git6361-d4618678/src/ccmain/thresholder.cpp --- tesseract-5.3.4+git6348-2b07505e/src/ccmain/thresholder.cpp 2024-03-17 20:39:02.339172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/ccmain/thresholder.cpp 2024-04-26 18:21:25.615094700 +0000 @@ -25,10 +25,6 @@ #include "thresholder.h" #include "tprintf.h" // for tprintf -#if defined(USE_OPENCL) -# include "openclwrapper.h" // for OpenclDevice -#endif - #include #include // for api->GetIntVariable() @@ -388,19 +384,7 @@ int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_, thresholds, hi_values); - // only use opencl if compiled w/ OpenCL and selected device is opencl -#ifdef USE_OPENCL - OpenclDevice od; - if (num_channels == 4 && od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) { - od.ThresholdRectToPixOCL((unsigned char *)pixGetData(src_pix), num_channels, - pixGetWpl(src_pix) * 4, &thresholds[0], &hi_values[0], out_pix /*pix_OCL*/, - rect_height_, rect_width_, rect_top_, rect_left_); - } else { -#endif - ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix); -#ifdef USE_OPENCL - } -#endif + ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix); } /// Threshold the rectangle, taking everything except the src_pix diff -Nru tesseract-5.3.4+git6348-2b07505e/src/ccstruct/imagedata.cpp tesseract-5.3.4+git6361-d4618678/src/ccstruct/imagedata.cpp --- tesseract-5.3.4+git6348-2b07505e/src/ccstruct/imagedata.cpp 2024-03-17 20:39:02.339172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/ccstruct/imagedata.cpp 2024-04-26 18:21:25.615094700 +0000 @@ -33,7 +33,8 @@ #include // for pixDestroy, pixGetHeight, pixGetWidth, lept_... -#include // for PRId64 +#include // for PRId64 +#include // for std::ifstream namespace tesseract { @@ -534,7 +535,7 @@ } } -// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_ +// Locks the pages_mutex_ and loads as many pages as will fit into max_memory_ // starting at index pages_offset_. bool DocumentData::ReCachePages() { std::lock_guard lock(pages_mutex_); @@ -546,6 +547,31 @@ delete page; } pages_.clear(); +#if !defined(TESSERACT_IMAGEDATA_AS_PIX) + auto name_size = document_name_.size(); + if (name_size > 4 && document_name_.substr(name_size - 4) == ".png") { + // PNG image given instead of LSTMF file. + std::string gt_name = document_name_.substr(0, name_size - 3) + "gt.txt"; + std::ifstream t(gt_name); + std::string line; + std::getline(t, line); + t.close(); + ImageData *image_data = ImageData::Build(document_name_.c_str(), 0, "", nullptr, 0, line.c_str(), nullptr); + Image image = pixRead(document_name_.c_str()); + image_data->SetPix(image); + pages_.push_back(image_data); + loaded_pages = 1; + pages_offset_ %= loaded_pages; + set_total_pages(loaded_pages); + set_memory_used(memory_used() + image_data->MemoryUsed()); +#if 0 + tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(), + loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(), + document_name_.c_str()); +#endif + return !pages_.empty(); + } +#endif TFile fp; if (!fp.Open(document_name_.c_str(), reader_) || !fp.DeSerializeSize(&loaded_pages) || loaded_pages <= 0) { diff -Nru tesseract-5.3.4+git6348-2b07505e/src/ccstruct/imagedata.h tesseract-5.3.4+git6361-d4618678/src/ccstruct/imagedata.h --- tesseract-5.3.4+git6348-2b07505e/src/ccstruct/imagedata.h 2024-03-17 20:39:02.339172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/ccstruct/imagedata.h 2024-04-26 18:21:25.615094700 +0000 @@ -250,7 +250,7 @@ std::lock_guard lock(general_mutex_); memory_used_ = memory_used; } - // Locks the pages_mutex_ and Loads as many pages can fit in max_memory_ + // Locks the pages_mutex_ and loads as many pages as will fit into max_memory_ // starting at index pages_offset_. bool ReCachePages(); diff -Nru tesseract-5.3.4+git6348-2b07505e/src/ccstruct/otsuthr.cpp tesseract-5.3.4+git6361-d4618678/src/ccstruct/otsuthr.cpp --- tesseract-5.3.4+git6348-2b07505e/src/ccstruct/otsuthr.cpp 2024-03-17 20:39:02.339172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/ccstruct/otsuthr.cpp 2024-04-26 18:21:25.618428000 +0000 @@ -21,9 +21,6 @@ #include #include #include "helpers.h" -#if defined(USE_OPENCL) -# include "openclwrapper.h" // for OpenclDevice -#endif namespace tesseract { @@ -47,90 +44,40 @@ thresholds.resize(num_channels); hi_values.resize(num_channels); - // only use opencl if compiled w/ OpenCL and selected device is opencl -#ifdef USE_OPENCL - // all of channel 0 then all of channel 1... - std::vector histogramAllChannels(kHistogramSize * num_channels); - - // Calculate Histogram on GPU - OpenclDevice od; - if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 && - left == 0) { - od.HistogramRectOCL(pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width, - height, kHistogramSize, &histogramAllChannels[0]); - - // Calculate Threshold from Histogram on cpu - for (int ch = 0; ch < num_channels; ++ch) { - thresholds[ch] = -1; - hi_values[ch] = -1; - int *histogram = &histogramAllChannels[kHistogramSize * ch]; - int H; - int best_omega_0; - int best_t = OtsuStats(histogram, &H, &best_omega_0); - if (best_omega_0 == 0 || best_omega_0 == H) { - // This channel is empty. - continue; - } - // To be a convincing foreground we must have a small fraction of H - // or to be a convincing background we must have a large fraction of H. - // In between we assume this channel contains no thresholding information. - int hi_value = best_omega_0 < H * 0.5; - thresholds[ch] = best_t; - if (best_omega_0 > H * 0.75) { - any_good_hivalue = true; - hi_values[ch] = 0; - } else if (best_omega_0 < H * 0.25) { - any_good_hivalue = true; - hi_values[ch] = 1; - } else { - // In case all channels are like this, keep the best of the bad lot. - double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; - if (hi_dist > best_hi_dist) { - best_hi_dist = hi_dist; - best_hi_value = hi_value; - best_hi_index = ch; - } - } + for (int ch = 0; ch < num_channels; ++ch) { + thresholds[ch] = -1; + hi_values[ch] = -1; + // Compute the histogram of the image rectangle. + int histogram[kHistogramSize]; + HistogramRect(src_pix, ch, left, top, width, height, histogram); + int H; + int best_omega_0; + int best_t = OtsuStats(histogram, &H, &best_omega_0); + if (best_omega_0 == 0 || best_omega_0 == H) { + // This channel is empty. + continue; } - } else { -#endif - for (int ch = 0; ch < num_channels; ++ch) { - thresholds[ch] = -1; - hi_values[ch] = -1; - // Compute the histogram of the image rectangle. - int histogram[kHistogramSize]; - HistogramRect(src_pix, ch, left, top, width, height, histogram); - int H; - int best_omega_0; - int best_t = OtsuStats(histogram, &H, &best_omega_0); - if (best_omega_0 == 0 || best_omega_0 == H) { - // This channel is empty. - continue; - } - // To be a convincing foreground we must have a small fraction of H - // or to be a convincing background we must have a large fraction of H. - // In between we assume this channel contains no thresholding information. - int hi_value = best_omega_0 < H * 0.5; - thresholds[ch] = best_t; - if (best_omega_0 > H * 0.75) { - any_good_hivalue = true; - hi_values[ch] = 0; - } else if (best_omega_0 < H * 0.25) { - any_good_hivalue = true; - hi_values[ch] = 1; - } else { - // In case all channels are like this, keep the best of the bad lot. - double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; - if (hi_dist > best_hi_dist) { - best_hi_dist = hi_dist; - best_hi_value = hi_value; - best_hi_index = ch; - } + // To be a convincing foreground we must have a small fraction of H + // or to be a convincing background we must have a large fraction of H. + // In between we assume this channel contains no thresholding information. + int hi_value = best_omega_0 < H * 0.5; + thresholds[ch] = best_t; + if (best_omega_0 > H * 0.75) { + any_good_hivalue = true; + hi_values[ch] = 0; + } else if (best_omega_0 < H * 0.25) { + any_good_hivalue = true; + hi_values[ch] = 1; + } else { + // In case all channels are like this, keep the best of the bad lot. + double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; + if (hi_dist > best_hi_dist) { + best_hi_dist = hi_dist; + best_hi_value = hi_value; + best_hi_index = ch; } } -#ifdef USE_OPENCL } -#endif // USE_OPENCL if (!any_good_hivalue) { // Use the best of the ones that were not good enough. diff -Nru tesseract-5.3.4+git6348-2b07505e/src/ccstruct/stepblob.cpp tesseract-5.3.4+git6361-d4618678/src/ccstruct/stepblob.cpp --- tesseract-5.3.4+git6348-2b07505e/src/ccstruct/stepblob.cpp 2024-03-17 20:39:02.342505200 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/ccstruct/stepblob.cpp 2024-04-26 18:21:25.618428000 +0000 @@ -43,7 +43,7 @@ **********************************************************************/ static void position_outline( // put in place C_OUTLINE *outline, // thing to place - C_OUTLINE_LIST *destlist // desstination list + C_OUTLINE_LIST *destlist // destination list ) { C_OUTLINE_IT it = destlist; // iterator // iterator on children diff -Nru tesseract-5.3.4+git6348-2b07505e/src/opencl/oclkernels.h tesseract-5.3.4+git6361-d4618678/src/opencl/oclkernels.h --- tesseract-5.3.4+git6348-2b07505e/src/opencl/oclkernels.h 2024-03-17 20:39:02.345838800 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/opencl/oclkernels.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,926 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TESSERACT_OPENCL_OCLKERNELS_H_ -# define TESSERACT_OPENCL_OCLKERNELS_H_ - -# ifndef USE_EXTERNAL_KERNEL -# define KERNEL(...) # __VA_ARGS__ "\n" -// Double precision is a default of spreadsheets -// cl_khr_fp64: Khronos extension -// cl_amd_fp64: AMD extension -// use build option outside to define fp_t -///////////////////////////////////////////// -static const char *kernel_src = KERNEL( -\n #ifdef KHR_DP_EXTENSION\n -\n #pragma OPENCL EXTENSION cl_khr_fp64 - : enable\n -\n #elif AMD_DP_EXTENSION\n -\n #pragma OPENCL EXTENSION cl_amd_fp64 - : enable\n -\n #else \n -\n #endif \n - __kernel void composeRGBPixel(__global uint *tiffdata, int w, int h, int wpl, __global uint *output) { - int i = get_global_id(1); - int j = get_global_id(0); - int tiffword, rval, gval, bval; - - //Ignore the excess - if ((i >= h) || (j >= w)) - return; - - tiffword = tiffdata[i * w + j]; - rval = ((tiffword)&0xff); - gval = (((tiffword) >> 8) & 0xff); - bval = (((tiffword) >> 16) & 0xff); - output[i * wpl + j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2))); - }) - - KERNEL( -\n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword, const int wpl, const int h) { - const unsigned int row = get_global_id(1); - const unsigned int col = get_global_id(0); - const unsigned int pos = row * wpl + col; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - *(dword + pos) &= ~(*(sword + pos)); - }\n) - - KERNEL( -\n__kernel void morphoDilateHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) { - const unsigned int pos = get_global_id(0); - unsigned int prevword, nextword, currword, tempword; - unsigned int destword; - const int col = pos % wpl; - - //Ignore the execss - if (pos >= (wpl * h)) - return; - - currword = *(sword + pos); - destword = currword; - - //Handle boundary conditions - if (col == 0) - prevword = 0; - else - prevword = *(sword + pos - 1); - - if (col == (wpl - 1)) - nextword = 0; - else - nextword = *(sword + pos + 1); - - //Loop unrolled - - //1 bit to left and 1 bit to right - //Get the max value on LHS of every pixel - tempword = (prevword << (31)) | ((currword >> 1)); - destword |= tempword; - //Get max value on RHS of every pixel - tempword = (currword << 1) | (nextword >> (31)); - destword |= tempword; - - //2 bit to left and 2 bit to right - //Get the max value on LHS of every pixel - tempword = (prevword << (30)) | ((currword >> 2)); - destword |= tempword; - //Get max value on RHS of every pixel - tempword = (currword << 2) | (nextword >> (30)); - destword |= tempword; - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoDilateVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h) { - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int tempword; - unsigned int destword; - int i; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - destword = *(sword + pos); - - //2 words above - i = (row - 2) < 0 ? row : (row - 2); - tempword = *(sword + i * wpl + col); - destword |= tempword; - - //1 word above - i = (row - 1) < 0 ? row : (row - 1); - tempword = *(sword + i * wpl + col); - destword |= tempword; - - //1 word below - i = (row >= (h - 1)) ? row : (row + 1); - tempword = *(sword + i * wpl + col); - destword |= tempword; - - //2 words below - i = (row >= (h - 2)) ? row : (row + 2); - tempword = *(sword + i * wpl + col); - destword |= tempword; - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoDilateHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h) { - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int parbitsxp, parbitsxn, nwords; - unsigned int destword, tempword, lastword, currword; - unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword; - int i, j, siter, eiter; - - //Ignore the execss - if (pos >= (wpl * h) || (xn < 1 && xp < 1)) - return; - - currword = *(sword + pos); - destword = currword; - - parbitsxp = xp & 31; - parbitsxn = xn & 31; - nwords = xp >> 5; - - if (parbitsxp > 0) - nwords += 1; - else - parbitsxp = 31; - - siter = (col - nwords); - eiter = (col + nwords); - - //Get prev word - if (col == 0) - firstword = 0x0; - else - firstword = *(sword + pos - 1); - - //Get next word - if (col == (wpl - 1)) - secondword = 0x0; - else - secondword = *(sword + pos + 1); - - //Last partial bits on either side - for (i = 1; i <= parbitsxp; i++) { - //Get the max value on LHS of every pixel - tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32 - i)) | ((currword >> i)); - - destword |= tempword; - - //Get max value on RHS of every pixel - tempword = (currword << i) | (secondword >> (32 - i)); - destword |= tempword; - } - - //Return if halfwidth <= 1 word - if (nwords == 1) { - if (xn == 32) { - destword |= firstword; - } - if (xp == 32) { - destword |= secondword; - } - - *(dword + pos) = destword; - return; - } - - if (siter < 0) - firstword = 0x0; - else - firstword = *(sword + row * wpl + siter); - - if (eiter >= wpl) - lastword = 0x0; - else - lastword = *(sword + row * wpl + eiter); - - for (i = 1; i < nwords; i++) { - //Gets LHS words - if ((siter + i) < 0) - secondword = 0x0; - else - secondword = *(sword + row * wpl + siter + i); - - lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn; - - firstword = secondword; - - if ((siter + i + 1) < 0) - secondword = 0x0; - else - secondword = *(sword + row * wpl + siter + i + 1); - - lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn; - - //Gets RHS words - if ((eiter - i) >= wpl) - firstword = 0x0; - else - firstword = *(sword + row * wpl + eiter - i); - - rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp); - - lastword = firstword; - if ((eiter - i - 1) >= wpl) - firstword = 0x0; - else - firstword = *(sword + row * wpl + eiter - i - 1); - - rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp); - - for (j = 1; j < 32; j++) { - //OR LHS full words - tempword = (lprevword << j) | (lnextword >> (32 - j)); - destword |= tempword; - - //OR RHS full words - tempword = (rprevword << j) | (rnextword >> (32 - j)); - destword |= tempword; - } - - destword |= lprevword; - destword |= lnextword; - destword |= rprevword; - destword |= rnextword; - - lastword = firstword; - firstword = secondword; - } - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoDilateHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char isEven) { - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int prevword, nextword, currword, tempword; - unsigned int destword; - int i; - - //Ignore the execss - if (pos >= (wpl * h)) - return; - - currword = *(sword + pos); - destword = currword; - - //Handle boundary conditions - if (col == 0) - prevword = 0; - else - prevword = *(sword + pos - 1); - - if (col == (wpl - 1)) - nextword = 0; - else - nextword = *(sword + pos + 1); - - for (i = 1; i <= halfwidth; i++) { - //Get the max value on LHS of every pixel - if (i == halfwidth && isEven) { - tempword = 0x0; - } else { - tempword = (prevword << (32 - i)) | ((currword >> i)); - } - - destword |= tempword; - - //Get max value on RHS of every pixel - tempword = (currword << i) | (nextword >> (32 - i)); - - destword |= tempword; - } - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoDilateVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const int yn) { - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int tempword; - unsigned int destword; - int i, siter, eiter; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - destword = *(sword + pos); - - //Set start position and end position considering the boundary conditions - siter = (row - yn) < 0 ? 0 : (row - yn); - eiter = (row >= (h - yp)) ? (h - 1) : (row + yp); - - for (i = siter; i <= eiter; i++) { - tempword = *(sword + i * wpl + col); - - destword |= tempword; - } - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoErodeHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) { - const unsigned int pos = get_global_id(0); - unsigned int prevword, nextword, currword, tempword; - unsigned int destword; - const int col = pos % wpl; - - //Ignore the execss - if (pos >= (wpl * h)) - return; - - currword = *(sword + pos); - destword = currword; - - //Handle boundary conditions - if (col == 0) - prevword = 0xffffffff; - else - prevword = *(sword + pos - 1); - - if (col == (wpl - 1)) - nextword = 0xffffffff; - else - nextword = *(sword + pos + 1); - - //Loop unrolled - - //1 bit to left and 1 bit to right - //Get the min value on LHS of every pixel - tempword = (prevword << (31)) | ((currword >> 1)); - destword &= tempword; - //Get min value on RHS of every pixel - tempword = (currword << 1) | (nextword >> (31)); - destword &= tempword; - - //2 bit to left and 2 bit to right - //Get the min value on LHS of every pixel - tempword = (prevword << (30)) | ((currword >> 2)); - destword &= tempword; - //Get min value on RHS of every pixel - tempword = (currword << 2) | (nextword >> (30)); - destword &= tempword; - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoErodeVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h, const int fwmask, const int lwmask) { - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int tempword; - unsigned int destword; - int i; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - destword = *(sword + pos); - - if (row < 2 || row >= (h - 2)) { - destword = 0x0; - } else { - //2 words above - //i = (row - 2) < 0 ? row : (row - 2); - i = (row - 2); - tempword = *(sword + i * wpl + col); - destword &= tempword; - - //1 word above - //i = (row - 1) < 0 ? row : (row - 1); - i = (row - 1); - tempword = *(sword + i * wpl + col); - destword &= tempword; - - //1 word below - //i = (row >= (h - 1)) ? row : (row + 1); - i = (row + 1); - tempword = *(sword + i * wpl + col); - destword &= tempword; - - //2 words below - //i = (row >= (h - 2)) ? row : (row + 2); - i = (row + 2); - tempword = *(sword + i * wpl + col); - destword &= tempword; - - if (col == 0) { - destword &= fwmask; - } - if (col == (wpl - 1)) { - destword &= lwmask; - } - } - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoErodeHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h, const char isAsymmetric, const int rwmask, const int lwmask) { - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int parbitsxp, parbitsxn, nwords; - unsigned int destword, tempword, lastword, currword; - unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword; - int i, j, siter, eiter; - - //Ignore the execss - if (pos >= (wpl * h) || (xn < 1 && xp < 1)) - return; - - currword = *(sword + pos); - destword = currword; - - parbitsxp = xp & 31; - parbitsxn = xn & 31; - nwords = xp >> 5; - - if (parbitsxp > 0) - nwords += 1; - else - parbitsxp = 31; - - siter = (col - nwords); - eiter = (col + nwords); - - //Get prev word - if (col == 0) - firstword = 0xffffffff; - else - firstword = *(sword + pos - 1); - - //Get next word - if (col == (wpl - 1)) - secondword = 0xffffffff; - else - secondword = *(sword + pos + 1); - - //Last partial bits on either side - for (i = 1; i <= parbitsxp; i++) { - //Get the max value on LHS of every pixel - tempword = (firstword << (32 - i)) | ((currword >> i)); - destword &= tempword; - - //Get max value on RHS of every pixel - tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i)); - - //tempword = (currword << i) | (secondword >> (32 - i)); - destword &= tempword; - } - - //Return if halfwidth <= 1 word - if (nwords == 1) { - if (xp == 32) { - destword &= firstword; - } - if (xn == 32) { - destword &= secondword; - } - - //Clear boundary pixels - if (isAsymmetric) { - if (col == 0) - destword &= rwmask; - if (col == (wpl - 1)) - destword &= lwmask; - } - - *(dword + pos) = destword; - return; - } - - if (siter < 0) - firstword = 0xffffffff; - else - firstword = *(sword + row * wpl + siter); - - if (eiter >= wpl) - lastword = 0xffffffff; - else - lastword = *(sword + row * wpl + eiter); - - for (i = 1; i < nwords; i++) { - //Gets LHS words - if ((siter + i) < 0) - secondword = 0xffffffff; - else - secondword = *(sword + row * wpl + siter + i); - - lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp); - - firstword = secondword; - - if ((siter + i + 1) < 0) - secondword = 0xffffffff; - else - secondword = *(sword + row * wpl + siter + i + 1); - - lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp); - - //Gets RHS words - if ((eiter - i) >= wpl) - firstword = 0xffffffff; - else - firstword = *(sword + row * wpl + eiter - i); - - rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn); - - lastword = firstword; - if ((eiter - i - 1) >= wpl) - firstword = 0xffffffff; - else - firstword = *(sword + row * wpl + eiter - i - 1); - - rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn); - - for (j = 0; j < 32; j++) { - //OR LHS full words - tempword = (lprevword << j) | (lnextword >> (32 - j)); - destword &= tempword; - - //OR RHS full words - tempword = (rprevword << j) | (rnextword >> (32 - j)); - destword &= tempword; - } - - destword &= lprevword; - destword &= lnextword; - destword &= rprevword; - destword &= rnextword; - - lastword = firstword; - firstword = secondword; - } - - if (isAsymmetric) { - //Clear boundary pixels - if (col < (nwords - 1)) - destword = 0x0; - else if (col == (nwords - 1)) - destword &= rwmask; - else if (col > (wpl - nwords)) - destword = 0x0; - else if (col == (wpl - nwords)) - destword &= lwmask; - } - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoErodeHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char clearBoundPixH, const int rwmask, const int lwmask, const char isEven) { - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int prevword, nextword, currword, tempword, destword; - int i; - - //Ignore the execss - if (pos >= (wpl * h)) - return; - - currword = *(sword + pos); - destword = currword; - - //Handle boundary conditions - if (col == 0) - prevword = 0xffffffff; - else - prevword = *(sword + pos - 1); - - if (col == (wpl - 1)) - nextword = 0xffffffff; - else - nextword = *(sword + pos + 1); - - for (i = 1; i <= halfwidth; i++) { - //Get the min value on LHS of every pixel - tempword = (prevword << (32 - i)) | ((currword >> i)); - - destword &= tempword; - - //Get min value on RHS of every pixel - if (i == halfwidth && isEven) { - tempword = 0xffffffff; - } else { - tempword = (currword << i) | (nextword >> (32 - i)); - } - - destword &= tempword; - } - - if (clearBoundPixH) { - if (col == 0) { - destword &= rwmask; - } else if (col == (wpl - 1)) { - destword &= lwmask; - } - } - - *(dword + pos) = destword; - }\n) - - KERNEL( -\n__kernel void morphoErodeVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const char clearBoundPixV, const int yn) { - const int col = get_global_id(0); - const int row = get_global_id(1); - const unsigned int pos = row * wpl + col; - unsigned int tempword, destword; - int i, siter, eiter; - - //Ignore the execss - if (row >= h || col >= wpl) - return; - - destword = *(sword + pos); - - //Set start position and end position considering the boundary conditions - siter = (row - yp) < 0 ? 0 : (row - yp); - eiter = (row >= (h - yn)) ? (h - 1) : (row + yn); - - for (i = siter; i <= eiter; i++) { - tempword = *(sword + i * wpl + col); - - destword &= tempword; - } - - //Clear boundary pixels - if (clearBoundPixV && ((row < yp) || ((h - row) <= yn))) { - destword = 0x0; - } - - *(dword + pos) = destword; - }\n) - - // HistogramRect Kernel: Accumulate - // assumes 4 channels, i.e., bytes_per_pixel = 4 - // assumes number of pixels is multiple of 8 - // data is laid out as - // ch0 ch1 ... - // bin0 bin1 bin2... bin0... - // rpt0,1,2...256 rpt0,1,2... - KERNEL( -\n #define HIST_REDUNDANCY 256\n -\n #define GROUP_SIZE 256\n -\n #define HIST_SIZE 256\n -\n #define NUM_CHANNELS 4\n -\n #define HR_UNROLL_SIZE 8 \n -\n #define HR_UNROLL_TYPE uchar8 \n - - __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannels(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) { - // declare variables - uchar8 pixels; - int threadOffset = get_global_id(0) % HIST_REDUNDANCY; - - // for each pixel/channel, accumulate in global memory - for (uint pc = get_global_id(0); pc < numPixels * NUM_CHANNELS / HR_UNROLL_SIZE; pc += get_global_size(0)) { - pixels = data[pc]; - // channel bin thread - atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s0 * HIST_REDUNDANCY + threadOffset]); // ch0 - atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s4 * HIST_REDUNDANCY + threadOffset]); // ch0 - atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s1 * HIST_REDUNDANCY + threadOffset]); // ch1 - atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s5 * HIST_REDUNDANCY + threadOffset]); // ch1 - atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s2 * HIST_REDUNDANCY + threadOffset]); // ch2 - atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s6 * HIST_REDUNDANCY + threadOffset]); // ch2 - atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s3 * HIST_REDUNDANCY + threadOffset]); // ch3 - atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s7 * HIST_REDUNDANCY + threadOffset]); // ch3 - } - }) - - KERNEL( - // NUM_CHANNELS = 1 - __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannel(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) { - // declare variables - uchar8 pixels; - int threadOffset = get_global_id(0) % HIST_REDUNDANCY; - - // for each pixel/channel, accumulate in global memory - for (uint pc = get_global_id(0); pc < numPixels / HR_UNROLL_SIZE; pc += get_global_size(0)) { - pixels = data[pc]; - // bin thread - atomic_inc(&histBuffer[pixels.s0 * HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s1 * HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s2 * HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s3 * HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s4 * HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s5 * HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s6 * HIST_REDUNDANCY + threadOffset]); - atomic_inc(&histBuffer[pixels.s7 * HIST_REDUNDANCY + threadOffset]); - } - }) - - // HistogramRect Kernel: Reduction - // only supports 4 channels - // each work group handles a single channel of a single histogram bin - KERNEL(__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannelsReduction(int n, // unused pixel redundancy - __global uint *histBuffer, __global int *histResult) { - // declare variables - int channel = get_group_id(0) / HIST_SIZE; - int bin = get_group_id(0) % HIST_SIZE; - int value = 0; - - // accumulate in register - for (uint i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) { - value += histBuffer[channel * HIST_SIZE * HIST_REDUNDANCY + bin * HIST_REDUNDANCY + i]; - } - - // reduction in local memory - __local int localHist[GROUP_SIZE]; - localHist[get_local_id(0)] = value; - barrier(CLK_LOCAL_MEM_FENCE); - for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) { - if (get_local_id(0) < stride) { - value = localHist[get_local_id(0) + stride]; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (get_local_id(0) < stride) { - localHist[get_local_id(0)] += value; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - - // write reduction to final result - if (get_local_id(0) == 0) { - histResult[get_group_id(0)] = localHist[0]; - } - } // kernel_HistogramRectAllChannels - ) - - KERNEL( - // NUM_CHANNELS = 1 - __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannelReduction(int n, // unused pixel redundancy - __global uint *histBuffer, __global int *histResult) { - // declare variables - // int channel = get_group_id(0)/HIST_SIZE; - int bin = get_group_id(0) % HIST_SIZE; - int value = 0; - - // accumulate in register - for (int i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) { - value += histBuffer[bin * HIST_REDUNDANCY + i]; - } - - // reduction in local memory - __local int localHist[GROUP_SIZE]; - localHist[get_local_id(0)] = value; - barrier(CLK_LOCAL_MEM_FENCE); - for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) { - if (get_local_id(0) < stride) { - value = localHist[get_local_id(0) + stride]; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (get_local_id(0) < stride) { - localHist[get_local_id(0)] += value; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - - // write reduction to final result - if (get_local_id(0) == 0) { - histResult[get_group_id(0)] = localHist[0]; - } - } // kernel_HistogramRectOneChannelReduction - ) - - // ThresholdRectToPix Kernel - // only supports 4 channels - // imageData is input image (24-bits/pixel) - // pix is output image (1-bit/pixel) - KERNEL( -\n #define CHAR_VEC_WIDTH 4 \n -\n #define PIXELS_PER_WORD 32 \n -\n #define PIXELS_PER_BURST 8 \n -\n #define BURSTS_PER_WORD (PIXELS_PER_WORD)/(PIXELS_PER_BURST) \n -\n typedef union { - uchar s[PIXELS_PER_BURST * NUM_CHANNELS]; - uchar4 v[(PIXELS_PER_BURST * NUM_CHANNELS) / CHAR_VEC_WIDTH]; - } charVec; - - __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix(__global const uchar4 *imageData, int height, int width, - int wpl, // words per line - __global int *thresholds, __global int *hi_values, __global int *pix) { - // declare variables - int pThresholds[NUM_CHANNELS]; - int pHi_Values[NUM_CHANNELS]; - for (int i = 0; i < NUM_CHANNELS; i++) { - pThresholds[i] = thresholds[i]; - pHi_Values[i] = hi_values[i]; - } - - // for each word (32 pixels) in output image - for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) { - unsigned int word = 0; // all bits start at zero - // for each burst in word - for (int b = 0; b < BURSTS_PER_WORD; b++) { - // load burst - charVec pixels; - int offset = (w / wpl) * width; - offset += (w % wpl) * PIXELS_PER_WORD; - offset += b * PIXELS_PER_BURST; - - for (int i = 0; i < PIXELS_PER_BURST; ++i) - pixels.v[i] = imageData[offset + i]; - - // for each pixel in burst - for (int p = 0; p < PIXELS_PER_BURST; p++) { - for (int c = 0; c < NUM_CHANNELS; c++) { - unsigned char pixChan = pixels.s[p * NUM_CHANNELS + c]; - if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) { - const uint kTopBit = 0x80000000; - word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31)); - } - } - } - } - pix[w] = word; - } - } - -\n #define CHAR_VEC_WIDTH 8 \n -\n #define PIXELS_PER_WORD 32 \n -\n #define PIXELS_PER_BURST 8 \n -\n #define BURSTS_PER_WORD (PIXELS_PER_WORD) / (PIXELS_PER_BURST) \n -\n typedef union { - uchar s[PIXELS_PER_BURST * 1]; - uchar8 v[(PIXELS_PER_BURST * 1) / CHAR_VEC_WIDTH]; - } charVec1; - - __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix_OneChan(__global const uchar8 *imageData, int height, int width, - int wpl, // words per line of output image - __global int *thresholds, __global int *hi_values, __global int *pix) { - // declare variables - int pThresholds[1]; - int pHi_Values[1]; - for (int i = 0; i < 1; i++) { - pThresholds[i] = thresholds[i]; - pHi_Values[i] = hi_values[i]; - } - - // for each word (32 pixels) in output image - for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) { - unsigned int word = 0; // all bits start at zero - - // for each burst in word - for (int b = 0; b < BURSTS_PER_WORD; b++) { - // load burst - charVec1 pixels; - // for each char8 in burst - pixels.v[0] = imageData[w * BURSTS_PER_WORD + b + 0]; - - // for each pixel in burst - for (int p = 0; p < PIXELS_PER_BURST; p++) { - //int littleEndianIdx = p ^ 3; - //int bigEndianIdx = p; - int idx = -\n #ifdef __ENDIAN_LITTLE__\n - p ^ - 3; - \n #else \n - p; - \n #endif \n unsigned char pixChan = pixels.s[idx]; - if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) { - const uint kTopBit = 0x80000000; - word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31)); - } - } - } - pix[w] = word; - } - }) - - ; // close char* - -# endif // USE_EXTERNAL_KERNEL -#endif // TESSERACT_OPENCL_OCLKERNELS_H_ -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff -Nru tesseract-5.3.4+git6348-2b07505e/src/opencl/openclwrapper.cpp tesseract-5.3.4+git6361-d4618678/src/opencl/openclwrapper.cpp --- tesseract-5.3.4+git6348-2b07505e/src/opencl/openclwrapper.cpp 2024-03-17 20:39:02.345838800 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/opencl/openclwrapper.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2466 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Include automatically generated configuration file -#ifdef HAVE_CONFIG_H -# include "config_auto.h" -#endif - -#ifdef USE_OPENCL - -# ifdef _WIN32 -# include -# include -# else -# include -# include -# endif -# include -# include // for clock_gettime - -# include "oclkernels.h" -# include "openclwrapper.h" - -// for micro-benchmark -# include "otsuthr.h" -# include "thresholder.h" - -// platform preprocessor commands -# if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(__CYGWIN__) || \ - defined(__MINGW32__) -# define ON_WINDOWS 1 -# define ON_APPLE 0 -# elif defined(__linux__) -# define ON_WINDOWS 0 -# define ON_APPLE 0 -# elif defined(__APPLE__) -# define ON_WINDOWS 0 -# define ON_APPLE 1 -# else -# define ON_WINDOWS 0 -# define ON_APPLE 0 -# endif - -# if ON_APPLE -# include -# endif - -# include -# include -# include // for memset, strcpy, ... -# include - -# include "errcode.h" // for ASSERT_HOST -# include "image.h" // for Image - -namespace tesseract { - -GPUEnv OpenclDevice::gpuEnv; - -bool OpenclDevice::deviceIsSelected = false; -ds_device OpenclDevice::selectedDevice; - -int OpenclDevice::isInited = 0; - -static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC; - -static const l_uint32 lmask32[] = { - 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000, 0xfc000000, 0xfe000000, 0xff000000, - 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000, 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, - 0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00, - 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, 0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff}; - -static const l_uint32 rmask32[] = { - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, - 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, - 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, - 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff}; - -static cl_mem pixsCLBuffer, pixdCLBuffer, - pixdCLIntermediate; // Morph operations buffers -static cl_mem pixThBuffer; // output from thresholdtopix calculation -static cl_int clStatus; -static KernelEnv rEnv; - -# define DS_TAG_VERSION "" -# define DS_TAG_VERSION_END "" -# define DS_TAG_DEVICE "" -# define DS_TAG_DEVICE_END "" -# define DS_TAG_SCORE "" -# define DS_TAG_SCORE_END "" -# define DS_TAG_DEVICE_TYPE "" -# define DS_TAG_DEVICE_TYPE_END "" -# define DS_TAG_DEVICE_NAME "" -# define DS_TAG_DEVICE_NAME_END "" -# define DS_TAG_DEVICE_DRIVER_VERSION "" -# define DS_TAG_DEVICE_DRIVER_VERSION_END "" - -# define DS_DEVICE_NATIVE_CPU_STRING "native_cpu" - -# define DS_DEVICE_NAME_LENGTH 256 - -enum ds_evaluation_type { DS_EVALUATE_ALL, DS_EVALUATE_NEW_ONLY }; - -struct ds_profile { - std::vector devices; - unsigned int numDevices; - const char *version; -}; - -enum ds_status { - DS_SUCCESS = 0, - DS_INVALID_PROFILE = 1000, - DS_MEMORY_ERROR, - DS_INVALID_PERF_EVALUATOR_TYPE, - DS_INVALID_PERF_EVALUATOR, - DS_PERF_EVALUATOR_ERROR, - DS_FILE_ERROR, - DS_UNKNOWN_DEVICE_TYPE, - DS_PROFILE_FILE_ERROR, - DS_SCORE_SERIALIZER_ERROR, - DS_SCORE_DESERIALIZER_ERROR -}; - -// Pointer to a function that calculates the score of a device (ex: -// device->score) update the data size of score. The encoding and the format -// of the score data is implementation defined. The function should return -// DS_SUCCESS if there's no error to be reported. -typedef ds_status (*ds_perf_evaluator)(ds_device *device, void *data); - -// deallocate memory used by score -typedef ds_status (*ds_score_release)(TessDeviceScore *score); - -static ds_status releaseDSProfile(ds_profile *profile, ds_score_release sr) { - ds_status status = DS_SUCCESS; - if (profile != nullptr) { - if (sr != nullptr) { - unsigned int i; - for (i = 0; i < profile->numDevices; i++) { - free(profile->devices[i].oclDeviceName); - free(profile->devices[i].oclDriverVersion); - status = sr(profile->devices[i].score); - if (status != DS_SUCCESS) - break; - } - } - delete profile; - } - return status; -} - -static ds_status initDSProfile(ds_profile **p, const char *version) { - int numDevices; - cl_uint numPlatforms; - std::vector platforms; - std::vector devices; - ds_status status = DS_SUCCESS; - unsigned int next; - unsigned int i; - - if (p == nullptr) - return DS_INVALID_PROFILE; - - ds_profile *profile = new ds_profile; - - memset(profile, 0, sizeof(ds_profile)); - - clGetPlatformIDs(0, nullptr, &numPlatforms); - - if (numPlatforms > 0) { - platforms.resize(numPlatforms); - clGetPlatformIDs(numPlatforms, platforms.data(), nullptr); - } - - numDevices = 0; - for (i = 0; i < numPlatforms; i++) { - cl_uint num; - clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, nullptr, &num); - numDevices += num; - } - - if (numDevices > 0) { - devices.resize(numDevices); - } - - profile->numDevices = numDevices + 1; // +1 to numDevices to include the native CPU - profile->devices.resize(profile->numDevices); - - next = 0; - for (i = 0; i < numPlatforms; i++) { - cl_uint num; - unsigned j; - clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, &devices[0], &num); - for (j = 0; j < num; j++, next++) { - char buffer[DS_DEVICE_NAME_LENGTH]; - size_t length; - - profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE; - profile->devices[next].oclDeviceID = devices[j]; - - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME, DS_DEVICE_NAME_LENGTH, - &buffer, nullptr); - length = strlen(buffer); - profile->devices[next].oclDeviceName = (char *)malloc(length + 1); - memcpy(profile->devices[next].oclDeviceName, buffer, length + 1); - - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION, DS_DEVICE_NAME_LENGTH, - &buffer, nullptr); - length = strlen(buffer); - profile->devices[next].oclDriverVersion = (char *)malloc(length + 1); - memcpy(profile->devices[next].oclDriverVersion, buffer, length + 1); - } - } - profile->devices[next].type = DS_DEVICE_NATIVE_CPU; - profile->version = version; - - *p = profile; - return status; -} - -static ds_status profileDevices(ds_profile *profile, const ds_evaluation_type type, - ds_perf_evaluator evaluator, void *evaluatorData, - unsigned int *numUpdates) { - ds_status status = DS_SUCCESS; - unsigned int i; - unsigned int updates = 0; - - if (profile == nullptr) { - return DS_INVALID_PROFILE; - } - if (evaluator == nullptr) { - return DS_INVALID_PERF_EVALUATOR; - } - - for (i = 0; i < profile->numDevices; i++) { - ds_status evaluatorStatus; - - switch (type) { - case DS_EVALUATE_NEW_ONLY: - if (profile->devices[i].score != nullptr) - break; - // else fall through - case DS_EVALUATE_ALL: - evaluatorStatus = evaluator(&profile->devices[i], evaluatorData); - if (evaluatorStatus != DS_SUCCESS) { - status = evaluatorStatus; - return status; - } - updates++; - break; - default: - return DS_INVALID_PERF_EVALUATOR_TYPE; - break; - }; - } - if (numUpdates) - *numUpdates = updates; - return status; -} - -static const char *findString(const char *contentStart, const char *contentEnd, - const char *string) { - size_t stringLength; - const char *currentPosition; - const char *found = nullptr; - stringLength = strlen(string); - currentPosition = contentStart; - for (currentPosition = contentStart; currentPosition < contentEnd; currentPosition++) { - if (*currentPosition == string[0]) { - if (currentPosition + stringLength < contentEnd) { - if (strncmp(currentPosition, string, stringLength) == 0) { - found = currentPosition; - break; - } - } - } - } - return found; -} - -static ds_status readProFile(const char *fileName, char **content, size_t *contentSize) { - *contentSize = 0; - *content = nullptr; - ds_status status = DS_SUCCESS; - FILE *input = fopen(fileName, "rb"); - if (input == nullptr) { - status = DS_FILE_ERROR; - } else { - fseek(input, 0L, SEEK_END); - auto pos = std::ftell(input); - rewind(input); - if (pos > 0) { - size_t size = pos; - char *binary = new char[size]; - if (fread(binary, sizeof(char), size, input) != size) { - status = DS_FILE_ERROR; - delete[] binary; - } else { - *contentSize = size; - *content = binary; - } - } - fclose(input); - } - return status; -} - -typedef ds_status (*ds_score_deserializer)(ds_device *device, const uint8_t *serializedScore, - unsigned int serializedScoreSize); - -static ds_status readProfileFromFile(ds_profile *profile, ds_score_deserializer deserializer, - const char *file) { - ds_status status = DS_SUCCESS; - char *contentStart; - size_t contentSize; - - if (profile == nullptr) - return DS_INVALID_PROFILE; - - status = readProFile(file, &contentStart, &contentSize); - if (status == DS_SUCCESS) { - const char *currentPosition; - const char *dataStart; - const char *dataEnd; - - const char *contentEnd = contentStart + contentSize; - currentPosition = contentStart; - - // parse the version string - dataStart = findString(currentPosition, contentEnd, DS_TAG_VERSION); - if (dataStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - dataStart += strlen(DS_TAG_VERSION); - - dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END); - if (dataEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - - size_t versionStringLength = strlen(profile->version); - if (versionStringLength + dataStart != dataEnd || - strncmp(profile->version, dataStart, versionStringLength) != 0) { - // version mismatch - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - currentPosition = dataEnd + strlen(DS_TAG_VERSION_END); - - // parse the device information - while (1) { - unsigned int i; - - const char *deviceTypeStart; - const char *deviceTypeEnd; - ds_device_type deviceType; - - const char *deviceNameStart; - const char *deviceNameEnd; - - const char *deviceScoreStart; - const char *deviceScoreEnd; - - const char *deviceDriverStart; - const char *deviceDriverEnd; - - dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE); - if (dataStart == nullptr) { - // nothing useful remain, quit... - break; - } - dataStart += strlen(DS_TAG_DEVICE); - dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END); - if (dataEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - - // parse the device type - deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE); - if (deviceTypeStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - deviceTypeStart += strlen(DS_TAG_DEVICE_TYPE); - deviceTypeEnd = findString(deviceTypeStart, contentEnd, DS_TAG_DEVICE_TYPE_END); - if (deviceTypeEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - memcpy(&deviceType, deviceTypeStart, sizeof(ds_device_type)); - - // parse the device name - if (deviceType == DS_DEVICE_OPENCL_DEVICE) { - deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME); - if (deviceNameStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - deviceNameStart += strlen(DS_TAG_DEVICE_NAME); - deviceNameEnd = findString(deviceNameStart, contentEnd, DS_TAG_DEVICE_NAME_END); - if (deviceNameEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - - deviceDriverStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION); - if (deviceDriverStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - deviceDriverStart += strlen(DS_TAG_DEVICE_DRIVER_VERSION); - deviceDriverEnd = - findString(deviceDriverStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION_END); - if (deviceDriverEnd == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - - // check if this device is on the system - for (i = 0; i < profile->numDevices; i++) { - if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE) { - size_t actualDeviceNameLength; - size_t driverVersionLength; - - actualDeviceNameLength = strlen(profile->devices[i].oclDeviceName); - driverVersionLength = strlen(profile->devices[i].oclDriverVersion); - if (deviceNameStart + actualDeviceNameLength == deviceNameEnd && - deviceDriverStart + driverVersionLength == deviceDriverEnd && - strncmp(profile->devices[i].oclDeviceName, deviceNameStart, - actualDeviceNameLength) == 0 && - strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart, - driverVersionLength) == 0) { - deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); - deviceScoreStart += strlen(DS_TAG_SCORE); - deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); - status = deserializer(&profile->devices[i], (const unsigned char *)deviceScoreStart, - deviceScoreEnd - deviceScoreStart); - if (status != DS_SUCCESS) { - goto cleanup; - } - } - } - } - } else if (deviceType == DS_DEVICE_NATIVE_CPU) { - for (i = 0; i < profile->numDevices; i++) { - if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU) { - deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); - if (deviceScoreStart == nullptr) { - status = DS_PROFILE_FILE_ERROR; - goto cleanup; - } - deviceScoreStart += strlen(DS_TAG_SCORE); - deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); - status = deserializer(&profile->devices[i], (const unsigned char *)deviceScoreStart, - deviceScoreEnd - deviceScoreStart); - if (status != DS_SUCCESS) { - goto cleanup; - } - } - } - } - - // skip over the current one to find the next device - currentPosition = dataEnd + strlen(DS_TAG_DEVICE_END); - } - } -cleanup: - delete[] contentStart; - return status; -} - -typedef ds_status (*ds_score_serializer)(ds_device *device, uint8_t **serializedScore, - unsigned int *serializedScoreSize); -static ds_status writeProfileToFile(ds_profile *profile, ds_score_serializer serializer, - const char *file) { - ds_status status = DS_SUCCESS; - - if (profile == nullptr) - return DS_INVALID_PROFILE; - - FILE *profileFile = fopen(file, "wb"); - if (profileFile == nullptr) { - status = DS_FILE_ERROR; - } else { - unsigned int i; - - // write version string - fwrite(DS_TAG_VERSION, sizeof(char), strlen(DS_TAG_VERSION), profileFile); - fwrite(profile->version, sizeof(char), strlen(profile->version), profileFile); - fwrite(DS_TAG_VERSION_END, sizeof(char), strlen(DS_TAG_VERSION_END), profileFile); - fwrite("\n", sizeof(char), 1, profileFile); - - for (i = 0; i < profile->numDevices && status == DS_SUCCESS; i++) { - uint8_t *serializedScore; - unsigned int serializedScoreSize; - - fwrite(DS_TAG_DEVICE, sizeof(char), strlen(DS_TAG_DEVICE), profileFile); - - fwrite(DS_TAG_DEVICE_TYPE, sizeof(char), strlen(DS_TAG_DEVICE_TYPE), profileFile); - fwrite(&profile->devices[i].type, sizeof(ds_device_type), 1, profileFile); - fwrite(DS_TAG_DEVICE_TYPE_END, sizeof(char), strlen(DS_TAG_DEVICE_TYPE_END), profileFile); - - switch (profile->devices[i].type) { - case DS_DEVICE_NATIVE_CPU: { - // There's no need to emit a device name for the native CPU device. - /* -fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), - profileFile); -fwrite(DS_DEVICE_NATIVE_CPU_STRING,sizeof(char), - strlen(DS_DEVICE_NATIVE_CPU_STRING), profileFile); -fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), - strlen(DS_TAG_DEVICE_NAME_END), profileFile); -*/ - } break; - case DS_DEVICE_OPENCL_DEVICE: { - fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), profileFile); - fwrite(profile->devices[i].oclDeviceName, sizeof(char), - strlen(profile->devices[i].oclDeviceName), profileFile); - fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), strlen(DS_TAG_DEVICE_NAME_END), profileFile); - - fwrite(DS_TAG_DEVICE_DRIVER_VERSION, sizeof(char), strlen(DS_TAG_DEVICE_DRIVER_VERSION), - profileFile); - fwrite(profile->devices[i].oclDriverVersion, sizeof(char), - strlen(profile->devices[i].oclDriverVersion), profileFile); - fwrite(DS_TAG_DEVICE_DRIVER_VERSION_END, sizeof(char), - strlen(DS_TAG_DEVICE_DRIVER_VERSION_END), profileFile); - } break; - default: - status = DS_UNKNOWN_DEVICE_TYPE; - continue; - }; - - fwrite(DS_TAG_SCORE, sizeof(char), strlen(DS_TAG_SCORE), profileFile); - status = serializer(&profile->devices[i], &serializedScore, &serializedScoreSize); - if (status == DS_SUCCESS && serializedScore != nullptr && serializedScoreSize > 0) { - fwrite(serializedScore, sizeof(char), serializedScoreSize, profileFile); - delete[] serializedScore; - } - fwrite(DS_TAG_SCORE_END, sizeof(char), strlen(DS_TAG_SCORE_END), profileFile); - fwrite(DS_TAG_DEVICE_END, sizeof(char), strlen(DS_TAG_DEVICE_END), profileFile); - fwrite("\n", sizeof(char), 1, profileFile); - } - fclose(profileFile); - } - return status; -} - -// substitute invalid characters in device name with _ -static void legalizeFileName(char *fileName) { - // tprintf("fileName: %s\n", fileName); - const char *invalidChars = "/\?:*\"><| "; // space is valid but can cause headaches - // for each invalid char - for (unsigned i = 0; i < strlen(invalidChars); i++) { - char invalidStr[4]; - invalidStr[0] = invalidChars[i]; - invalidStr[1] = '\0'; - // tprintf("eliminating %s\n", invalidStr); - // char *pos = strstr(fileName, invalidStr); - // initial ./ is valid for present directory - // if (*pos == '.') pos++; - // if (*pos == '/') pos++; - for (char *pos = strstr(fileName, invalidStr); pos != nullptr; - pos = strstr(pos + 1, invalidStr)) { - // tprintf("\tfound: %s, ", pos); - pos[0] = '_'; - // tprintf("fileName: %s\n", fileName); - } - } -} - -static void populateGPUEnvFromDevice(GPUEnv *gpuInfo, cl_device_id device) { - // tprintf("[DS] populateGPUEnvFromDevice\n"); - size_t size; - gpuInfo->mnIsUserCreated = 1; - // device - gpuInfo->mpDevID = device; - gpuInfo->mpArryDevsID = new cl_device_id[1]; - gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID; - clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE, sizeof(cl_device_type), - &gpuInfo->mDevType, &size); - CHECK_OPENCL(clStatus, "populateGPUEnv::getDeviceInfo(TYPE)"); - // platform - clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), - &gpuInfo->mpPlatformID, &size); - CHECK_OPENCL(clStatus, "populateGPUEnv::getDeviceInfo(PLATFORM)"); - // context - cl_context_properties props[3]; - props[0] = CL_CONTEXT_PLATFORM; - props[1] = (cl_context_properties)gpuInfo->mpPlatformID; - props[2] = 0; - gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID, nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "populateGPUEnv::createContext"); - // queue - cl_command_queue_properties queueProperties = 0; - gpuInfo->mpCmdQueue = - clCreateCommandQueue(gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus); - CHECK_OPENCL(clStatus, "populateGPUEnv::createCommandQueue"); -} - -int OpenclDevice::LoadOpencl() { -# ifdef WIN32 - HINSTANCE HOpenclDll = nullptr; - void *OpenclDll = nullptr; - // fprintf(stderr, " LoadOpenclDllxx... \n"); - OpenclDll = static_cast(HOpenclDll); - OpenclDll = LoadLibrary("openCL.dll"); - if (!static_cast(OpenclDll)) { - fprintf(stderr, "[OD] Load opencl.dll failed!\n"); - FreeLibrary(static_cast(OpenclDll)); - return 0; - } - fprintf(stderr, "[OD] Load opencl.dll successful!\n"); -# endif - return 1; -} -int OpenclDevice::SetKernelEnv(KernelEnv *envInfo) { - envInfo->mpkContext = gpuEnv.mpContext; - envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue; - envInfo->mpkProgram = gpuEnv.mpArryPrograms[0]; - - return 1; -} - -static cl_mem allocateZeroCopyBuffer(const KernelEnv &rEnv, l_uint32 *hostbuffer, size_t nElements, - cl_mem_flags flags, cl_int *pStatus) { - cl_mem membuffer = clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(flags), - nElements * sizeof(l_uint32), hostbuffer, pStatus); - - return membuffer; -} - -static Image mapOutputCLBuffer(const KernelEnv &rEnv, cl_mem clbuffer, Image pixd, Image pixs, - int elements, cl_mem_flags flags, bool memcopy = false, - bool sync = true) { - if (!pixd) { - if (memcopy) { - if ((pixd = pixCreateTemplate(pixs)) == nullptr) - tprintf("pixd not made\n"); - } else { - if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), pixGetDepth(pixs))) == - nullptr) - tprintf("pixd not made\n"); - } - } - l_uint32 *pValues = - (l_uint32 *)clEnqueueMapBuffer(rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0, - elements * sizeof(l_uint32), 0, nullptr, nullptr, nullptr); - - if (memcopy) { - memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32)); - } else { - pixSetData(pixd, pValues); - } - - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, nullptr, nullptr); - - if (sync) { - clFinish(rEnv.mpkCmdQueue); - } - - return pixd; -} - -void OpenclDevice::releaseMorphCLBuffers() { - if (pixdCLIntermediate != nullptr) - clReleaseMemObject(pixdCLIntermediate); - if (pixsCLBuffer != nullptr) - clReleaseMemObject(pixsCLBuffer); - if (pixdCLBuffer != nullptr) - clReleaseMemObject(pixdCLBuffer); - if (pixThBuffer != nullptr) - clReleaseMemObject(pixThBuffer); - pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr; -} - -int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs) { - SetKernelEnv(&rEnv); - - if (pixThBuffer != nullptr) { - pixsCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h, CL_MEM_ALLOC_HOST_PTR, &clStatus); - - // Get the output from ThresholdToPix operation - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0, - sizeof(l_uint32) * wpl * h, 0, nullptr, nullptr); - } else { - // Get data from the source image - l_uint32 *srcdata = reinterpret_cast(malloc(wpl * h * sizeof(l_uint32))); - memcpy(srcdata, pixGetData(pixs), wpl * h * sizeof(l_uint32)); - - pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl * h, CL_MEM_USE_HOST_PTR, &clStatus); - } - - pixdCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h, CL_MEM_ALLOC_HOST_PTR, &clStatus); - - pixdCLIntermediate = - allocateZeroCopyBuffer(rEnv, nullptr, wpl * h, CL_MEM_ALLOC_HOST_PTR, &clStatus); - - return (int)clStatus; -} - -int OpenclDevice::InitEnv() { -// tprintf("[OD] OpenclDevice::InitEnv()\n"); -# ifdef SAL_WIN32 - while (1) { - if (1 == LoadOpencl()) - break; - } -# endif - // sets up environment, compiles programs - - InitOpenclRunEnv_DeviceSelection(0); - return 1; -} - -int OpenclDevice::ReleaseOpenclRunEnv() { - ReleaseOpenclEnv(&gpuEnv); -# ifdef SAL_WIN32 - FreeOpenclDll(); -# endif - return 1; -} - -inline int OpenclDevice::AddKernelConfig(int kCount, const char *kName) { - ASSERT_HOST(kCount > 0); - ASSERT_HOST(strlen(kName) < sizeof(gpuEnv.mArrykernelNames[kCount - 1])); - strcpy(gpuEnv.mArrykernelNames[kCount - 1], kName); - gpuEnv.mnKernelCount++; - return 0; -} - -int OpenclDevice::RegistOpenclKernel() { - if (!gpuEnv.mnIsUserCreated) - memset(&gpuEnv, 0, sizeof(gpuEnv)); - - gpuEnv.mnFileCount = 0; // argc; - gpuEnv.mnKernelCount = 0UL; - - AddKernelConfig(1, "oclAverageSub1"); - return 0; -} - -int OpenclDevice::InitOpenclRunEnv_DeviceSelection(int argc) { - if (!isInited) { - // after programs compiled, selects best device - ds_device bestDevice_DS = getDeviceSelection(); - cl_device_id bestDevice = bestDevice_DS.oclDeviceID; - // overwrite global static GPUEnv with new device - if (selectedDeviceIsOpenCL()) { - // tprintf("[DS] InitOpenclRunEnv_DS::Calling populateGPUEnvFromDevice() - // for selected device\n"); - populateGPUEnvFromDevice(&gpuEnv, bestDevice); - gpuEnv.mnFileCount = 0; // argc; - gpuEnv.mnKernelCount = 0UL; - CompileKernelFile(&gpuEnv, ""); - } else { - // tprintf("[DS] InitOpenclRunEnv_DS::Skipping populateGPUEnvFromDevice() - // b/c native cpu selected\n"); - } - isInited = 1; - } - return 0; -} - -OpenclDevice::OpenclDevice() { - // InitEnv(); -} - -OpenclDevice::~OpenclDevice() { - // ReleaseOpenclRunEnv(); -} - -int OpenclDevice::ReleaseOpenclEnv(GPUEnv *gpuInfo) { - int i = 0; - int clStatus = 0; - - if (!isInited) { - return 1; - } - - for (i = 0; i < gpuEnv.mnFileCount; i++) { - if (gpuEnv.mpArryPrograms[i]) { - clStatus = clReleaseProgram(gpuEnv.mpArryPrograms[i]); - CHECK_OPENCL(clStatus, "clReleaseProgram"); - gpuEnv.mpArryPrograms[i] = nullptr; - } - } - if (gpuEnv.mpCmdQueue) { - clReleaseCommandQueue(gpuEnv.mpCmdQueue); - gpuEnv.mpCmdQueue = nullptr; - } - if (gpuEnv.mpContext) { - clReleaseContext(gpuEnv.mpContext); - gpuEnv.mpContext = nullptr; - } - isInited = 0; - gpuInfo->mnIsUserCreated = 0; - delete[] gpuInfo->mpArryDevsID; - return 1; -} -int OpenclDevice::BinaryGenerated(const char *clFileName, FILE **fhandle) { - unsigned int i = 0; - cl_int clStatus; - int status = 0; - FILE *fd = nullptr; - char fileName[256]; - char cl_name[128]; - char deviceName[1024]; - clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, - nullptr); - CHECK_OPENCL(clStatus, "clGetDeviceInfo"); - const char *str = strstr(clFileName, ".cl"); - memcpy(cl_name, clFileName, str - clFileName); - cl_name[str - clFileName] = '\0'; - snprintf(fileName, sizeof(fileName), "%s-%s.bin", cl_name, deviceName); - legalizeFileName(fileName); - fd = fopen(fileName, "rb"); - status = (fd != nullptr) ? 1 : 0; - if (fd != nullptr) { - *fhandle = fd; - } - return status; -} -int OpenclDevice::CachedOfKernerPrg(const GPUEnv *gpuEnvCached, const char *clFileName) { - int i; - for (i = 0; i < gpuEnvCached->mnFileCount; i++) { - if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) { - if (gpuEnvCached->mpArryPrograms[i] != nullptr) { - return 1; - } - } - } - - return 0; -} -int OpenclDevice::WriteBinaryToFile(const char *fileName, const char *birary, size_t numBytes) { - FILE *output = nullptr; - output = fopen(fileName, "wb"); - if (output == nullptr) { - return 0; - } - - fwrite(birary, sizeof(char), numBytes, output); - fclose(output); - - return 1; -} - -int OpenclDevice::GeneratBinFromKernelSource(cl_program program, const char *clFileName) { - unsigned int i = 0; - cl_int clStatus; - cl_uint numDevices; - - clStatus = - clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, nullptr); - CHECK_OPENCL(clStatus, "clGetProgramInfo"); - - std::vector mpArryDevsID(numDevices); - - /* grab the handles to all of the devices in the program. */ - clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * numDevices, - &mpArryDevsID[0], nullptr); - CHECK_OPENCL(clStatus, "clGetProgramInfo"); - - /* figure out the sizes of each of the binaries. */ - std::vector binarySizes(numDevices); - - clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * numDevices, - &binarySizes[0], nullptr); - CHECK_OPENCL(clStatus, "clGetProgramInfo"); - - /* copy over all of the generated binaries. */ - std::vector binaries(numDevices); - - for (i = 0; i < numDevices; i++) { - if (binarySizes[i] != 0) { - binaries[i] = new char[binarySizes[i]]; - } else { - binaries[i] = nullptr; - } - } - - clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *) * numDevices, - &binaries[0], nullptr); - CHECK_OPENCL(clStatus, "clGetProgramInfo"); - - /* dump out each binary into its own separate file. */ - for (i = 0; i < numDevices; i++) { - if (binarySizes[i] != 0) { - char fileName[256]; - char cl_name[128]; - char deviceName[1024]; - clStatus = - clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr); - CHECK_OPENCL(clStatus, "clGetDeviceInfo"); - - const char *str = strstr(clFileName, ".cl"); - memcpy(cl_name, clFileName, str - clFileName); - cl_name[str - clFileName] = '\0'; - snprintf(fileName, sizeof(fileName), "%s-%s.bin", cl_name, deviceName); - legalizeFileName(fileName); - if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) { - tprintf("[OD] write binary[%s] failed\n", fileName); - return 0; - } // else - tprintf("[OD] write binary[%s] successfully\n", fileName); - } - } - - // Release all resources and memory - for (i = 0; i < numDevices; i++) { - delete[] binaries[i]; - } - - return 1; -} - -int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption) { - cl_int clStatus = 0; - const char *source; - size_t source_size[1]; - int binary_status, binaryExisted, idx; - cl_uint numDevices; - FILE *fd, *fd1; - const char *filename = "kernel.cl"; - // fprintf(stderr, "[OD] CompileKernelFile ... \n"); - if (CachedOfKernerPrg(gpuInfo, filename) == 1) { - return 1; - } - - idx = gpuInfo->mnFileCount; - - source = kernel_src; - - source_size[0] = strlen(source); - binaryExisted = 0; - binaryExisted = BinaryGenerated(filename, &fd); // don't check for binary during microbenchmark - if (binaryExisted == 1) { - clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES, sizeof(numDevices), - &numDevices, nullptr); - CHECK_OPENCL(clStatus, "clGetContextInfo"); - - std::vector mpArryDevsID(numDevices); - bool b_error = fseek(fd, 0, SEEK_END) < 0; - auto pos = std::ftell(fd); - b_error |= (pos <= 0); - size_t length = pos; - b_error |= fseek(fd, 0, SEEK_SET) < 0; - if (b_error) { - fclose(fd); - return 0; - } - - std::vector binary(length + 2); - - memset(&binary[0], 0, length + 2); - b_error |= fread(&binary[0], 1, length, fd) != length; - - fclose(fd); - fd = nullptr; - // grab the handles to all of the devices in the context. - clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, - sizeof(cl_device_id) * numDevices, &mpArryDevsID[0], nullptr); - CHECK_OPENCL(clStatus, "clGetContextInfo"); - // fprintf(stderr, "[OD] Create kernel from binary\n"); - const uint8_t *c_binary = &binary[0]; - gpuInfo->mpArryPrograms[idx] = - clCreateProgramWithBinary(gpuInfo->mpContext, numDevices, &mpArryDevsID[0], &length, - &c_binary, &binary_status, &clStatus); - CHECK_OPENCL(clStatus, "clCreateProgramWithBinary"); - } else { - // create a CL program using the kernel source - // fprintf(stderr, "[OD] Create kernel from source\n"); - gpuInfo->mpArryPrograms[idx] = - clCreateProgramWithSource(gpuInfo->mpContext, 1, &source, source_size, &clStatus); - CHECK_OPENCL(clStatus, "clCreateProgramWithSource"); - } - - if (gpuInfo->mpArryPrograms[idx] == (cl_program) nullptr) { - return 0; - } - - // char options[512]; - // create a cl program executable for all the devices specified - // tprintf("[OD] BuildProgram.\n"); - if (!gpuInfo->mnIsUserCreated) { - clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, buildOption, - nullptr, nullptr); - } else { - clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), buildOption, - nullptr, nullptr); - } - if (clStatus != CL_SUCCESS) { - tprintf("BuildProgram error!\n"); - size_t length; - if (!gpuInfo->mnIsUserCreated) { - clStatus = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], - CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); - } else { - clStatus = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, - CL_PROGRAM_BUILD_LOG, 0, nullptr, &length); - } - if (clStatus != CL_SUCCESS) { - tprintf("opencl create build log fail\n"); - return 0; - } - std::vector buildLog(length); - if (!gpuInfo->mnIsUserCreated) { - clStatus = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], - CL_PROGRAM_BUILD_LOG, length, &buildLog[0], &length); - } else { - clStatus = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, - CL_PROGRAM_BUILD_LOG, length, &buildLog[0], &length); - } - if (clStatus != CL_SUCCESS) { - tprintf("opencl program build info fail\n"); - return 0; - } - - fd1 = fopen("kernel-build.log", "w+"); - if (fd1 != nullptr) { - fwrite(&buildLog[0], sizeof(char), length, fd1); - fclose(fd1); - } - - return 0; - } - - strcpy(gpuInfo->mArryKnelSrcFile[idx], filename); - if (binaryExisted == 0) { - GeneratBinFromKernelSource(gpuInfo->mpArryPrograms[idx], filename); - } - - gpuInfo->mnFileCount += 1; - return 1; -} - -l_uint32 *OpenclDevice::pixReadFromTiffKernel(l_uint32 *tiffdata, l_int32 w, l_int32 h, l_int32 wpl, - l_uint32 *line) { - cl_int clStatus; - KernelEnv rEnv; - size_t globalThreads[2]; - size_t localThreads[2]; - int gsize; - cl_mem valuesCl; - cl_mem outputCl; - - // global and local work dimensions for Horizontal pass - gsize = (w + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - SetKernelEnv(&rEnv); - - l_uint32 *pResult = (l_uint32 *)malloc(w * h * sizeof(l_uint32)); - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "composeRGBPixel", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel"); - - // Allocate input and output OCL buffers - valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w * h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - &clStatus); - outputCl = allocateZeroCopyBuffer(rEnv, pResult, w * h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, - &clStatus); - - // Kernel arguments - clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(w), &w); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(h), &h); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl); - CHECK_OPENCL(clStatus, "clSetKernelArg"); - - // Kernel enqueue - clStatus = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel"); - - /* map results back from gpu */ - void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ, 0, - w * h * sizeof(l_uint32), 0, nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, nullptr, nullptr); - - // Sync - clFinish(rEnv.mpkCmdQueue); - return pResult; -} - -// Morphology Dilate operation for 5x5 structuring element. Invokes the relevant -// OpenCL kernels -static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h) { - size_t globalThreads[2]; - cl_mem pixtemp; - cl_int status; - int gsize; - size_t localThreads[2]; - - // Horizontal pass - gsize = (wpl * h + GROUPSIZE_HMORX - 1) / GROUPSIZE_HMORX * GROUPSIZE_HMORX; - globalThreads[0] = gsize; - globalThreads[1] = GROUPSIZE_HMORY; - localThreads[0] = GROUPSIZE_HMORX; - localThreads[1] = GROUPSIZE_HMORY; - - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_5x5", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_5x5"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - // Swap source and dest buffers - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - // Vertical - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoDilateVer_5x5", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateVer_5x5"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - return status; -} - -// Morphology Erode operation for 5x5 structuring element. Invokes the relevant -// OpenCL kernels -static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h) { - size_t globalThreads[2]; - cl_mem pixtemp; - cl_int status; - int gsize; - l_uint32 fwmask, lwmask; - size_t localThreads[2]; - - lwmask = lmask32[31 - 2]; - fwmask = rmask32[31 - 2]; - - // Horizontal pass - gsize = (wpl * h + GROUPSIZE_HMORX - 1) / GROUPSIZE_HMORX * GROUPSIZE_HMORX; - globalThreads[0] = gsize; - globalThreads[1] = GROUPSIZE_HMORY; - localThreads[0] = GROUPSIZE_HMORX; - localThreads[1] = GROUPSIZE_HMORY; - - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_5x5", &status); - CHECK_OPENCL(status, "clCreateKernel morphoErodeHor_5x5"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - // Swap source and dest buffers - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - // Vertical - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeVer_5x5", &status); - CHECK_OPENCL(status, "clCreateKernel morphoErodeVer_5x5"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(fwmask), &fwmask); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(lwmask), &lwmask); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - return status; -} - -// Morphology Dilate operation. Invokes the relevant OpenCL kernels -static cl_int pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { - l_int32 xp, yp, xn, yn; - SEL *sel; - size_t globalThreads[2]; - cl_mem pixtemp; - cl_int status = 0; - int gsize; - size_t localThreads[2]; - char isEven; - - OpenclDevice::SetKernelEnv(&rEnv); - - if (hsize == 5 && vsize == 5) { - // Specific case for 5x5 - status = pixDilateCL_55(wpl, h); - return status; - } - - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - selDestroy(&sel); - // global and local work dimensions for Horizontal pass - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - if (xp > 31 || xn > 31) { - // Generic case. - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateHor"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - if (yp > 0 || yn > 0) { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } else if (xp > 0 || xn > 0) { - // Specific Horizontal pass kernel for half width < 32 - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_32word"); - isEven = (xp != xn); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), &isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - if (yp > 0 || yn > 0) { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } - - if (yp > 0 || yn > 0) { - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoDilateVer", &status); - CHECK_OPENCL(status, "clCreateKernel morphoDilateVer"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(yn), &yn); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - } - - return status; -} - -// Morphology Erode operation. Invokes the relevant OpenCL kernels -static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { - l_int32 xp, yp, xn, yn; - SEL *sel; - size_t globalThreads[2]; - size_t localThreads[2]; - cl_mem pixtemp; - cl_int status = 0; - int gsize; - char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC); - l_uint32 rwmask, lwmask; - char isEven; - - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - selDestroy(&sel); - OpenclDevice::SetKernelEnv(&rEnv); - - if (hsize == 5 && vsize == 5 && isAsymmetric) { - // Specific kernel for 5x5 - status = pixErodeCL_55(wpl, h); - return status; - } - - lwmask = lmask32[31 - (xn & 31)]; - rwmask = rmask32[31 - (xp & 31)]; - - // global and local work dimensions for Horizontal pass - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - // Horizontal Pass - if (xp > 31 || xn > 31) { - // Generic case. - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeHor", &status); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), &isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), &rwmask); - status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), &lwmask); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - if (yp > 0 || yn > 0) { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } else if (xp > 0 || xn > 0) { - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_32word", &status); - isEven = (xp != xn); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), &rwmask); - status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), &lwmask); - status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), &isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - if (yp > 0 || yn > 0) { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } - - // Vertical Pass - if (yp > 0 || yn > 0) { - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeVer", &status); - CHECK_OPENCL(status, "clCreateKernel morphoErodeVer"); - - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), &yn); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - } - - return status; -} - -// Morphology Open operation. Invokes the relevant OpenCL kernels -static cl_int pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { - cl_int status; - cl_mem pixtemp; - - // Erode followed by Dilate - status = pixErodeCL(hsize, vsize, wpl, h); - - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - status = pixDilateCL(hsize, vsize, wpl, h); - - return status; -} - -// Morphology Close operation. Invokes the relevant OpenCL kernels -static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) { - cl_int status; - cl_mem pixtemp; - - // Dilate followed by Erode - status = pixDilateCL(hsize, vsize, wpl, h); - - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - status = pixErodeCL(hsize, vsize, wpl, h); - - return status; -} - -// output = buffer1 & ~(buffer2) -static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2) { - cl_int status; - size_t globalThreads[2]; - int gsize; - size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y}; - - gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "pixSubtract_inplace", &status); - CHECK_OPENCL(status, "clCreateKernel pixSubtract_inplace"); - - // Enqueue a kernel run call. - status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &buffer1); - status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &buffer2); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr, globalThreads, - localThreads, 0, nullptr, nullptr); - - return status; -} - -// OpenCL implementation of Get Lines from pix function -// Note: Assumes the source and dest opencl buffer are initialized. No check -// done -void OpenclDevice::pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline, - Image *pixClosed, bool getpixClosed, l_int32 close_hsize, - l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, - l_int32 line_hsize, l_int32 line_vsize) { - l_uint32 wpl, h; - cl_mem pixtemp; - - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - - // First step : Close Morph operation: Dilate followed by Erode - clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); - - // Copy the Close output to CPU buffer - if (getpixClosed) { - *pixClosed = - mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, wpl * h, CL_MAP_READ, true, false); - } - - // Store the output of close operation in an intermediate buffer - // this will be later used for pixsubtract - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, - sizeof(int) * wpl * h, 0, nullptr, nullptr); - - // Second step: Open Operation - Erode followed by Dilate - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); - - // Third step: Subtract : (Close - Open) - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; - - clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); - - // Store the output of Hollow operation in an intermediate buffer - // this will be later used - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, - sizeof(int) * wpl * h, 0, nullptr, nullptr); - - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - // Fourth step: Get vertical line - // pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - clStatus = pixOpenCL(1, line_vsize, wpl, h); - - // Copy the vertical line output to CPU buffer - *pix_vline = - mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl * h, CL_MAP_READ, true, false); - - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; - - // Fifth step: Get horizontal line - // pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - clStatus = pixOpenCL(line_hsize, 1, wpl, h); - - // Copy the horizontal line output to CPU buffer - *pix_hline = - mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl * h, CL_MAP_READ, true, true); - - return; -} - -/************************************************************************* - * HistogramRect - * Otsu Thresholding Operations - * histogramAllChannels is laid out as all channel 0, then all channel 1... - * only supports 1 or 4 channels (bytes_per_pixel) - ************************************************************************/ -int OpenclDevice::HistogramRectOCL(void *imageData, int bytes_per_pixel, int bytes_per_line, - int left, // always 0 - int top, // always 0 - int width, int height, int kHistogramSize, - int *histogramAllChannels) { - cl_int clStatus; - int retVal = 0; - KernelEnv histKern; - SetKernelEnv(&histKern); - KernelEnv histRedKern; - SetKernelEnv(&histRedKern); - /* map imagedata to device as read only */ - // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be - // coherent which we don't need. - // faster option would be to allocate initial image buffer - // using a garlic bus memory type - cl_mem imageBuffer = - clCreateBuffer(histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); - - /* setup work group size parameters */ - int block_size = 256; - cl_uint numCUs; - clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, - nullptr); - CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); - - int requestedOccupancy = 10; - int numWorkGroups = numCUs * requestedOccupancy; - int numThreads = block_size * numWorkGroups; - size_t local_work_size[] = {static_cast(block_size)}; - size_t global_work_size[] = {static_cast(numThreads)}; - size_t red_global_work_size[] = { - static_cast(block_size * kHistogramSize * bytes_per_pixel)}; - - /* map histogramAllChannels as write only */ - - cl_mem histogramBuffer = clCreateBuffer( - histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, - kHistogramSize * bytes_per_pixel * sizeof(int), histogramAllChannels, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer histogramBuffer"); - - /* intermediate histogram buffer */ - int histRed = 256; - int tmpHistogramBins = kHistogramSize * bytes_per_pixel * histRed; - - cl_mem tmpHistogramBuffer = - clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE, tmpHistogramBins * sizeof(cl_uint), - nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer tmpHistogramBuffer"); - - /* atomic sync buffer */ - int *zeroBuffer = new int[1]; - zeroBuffer[0] = 0; - cl_mem atomicSyncBuffer = - clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int), - zeroBuffer, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer atomicSyncBuffer"); - delete[] zeroBuffer; - // Create kernel objects based on bytes_per_pixel - if (bytes_per_pixel == 1) { - histKern.mpkKernel = - clCreateKernel(histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectOneChannel"); - - histRedKern.mpkKernel = clCreateKernel(histRedKern.mpkProgram, - "kernel_HistogramRectOneChannelReduction", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectOneChannelReduction"); - } else { - histKern.mpkKernel = - clCreateKernel(histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectAllChannels"); - - histRedKern.mpkKernel = clCreateKernel(histRedKern.mpkProgram, - "kernel_HistogramRectAllChannelsReduction", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectAllChannelsReduction"); - } - - void *ptr; - - // Initialize tmpHistogramBuffer buffer - ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0, - tmpHistogramBins * sizeof(cl_uint), 0, nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clEnqueueMapBuffer tmpHistogramBuffer"); - - memset(ptr, 0, tmpHistogramBins * sizeof(cl_uint)); - clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, nullptr, nullptr); - - /* set kernel 1 arguments */ - clStatus = clSetKernelArg(histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); - cl_uint numPixels = width * height; - clStatus = clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels); - CHECK_OPENCL(clStatus, "clSetKernelArg numPixels"); - clStatus = clSetKernelArg(histKern.mpkKernel, 2, sizeof(cl_mem), &tmpHistogramBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg tmpHistogramBuffer"); - - /* set kernel 2 arguments */ - int n = numThreads / bytes_per_pixel; - clStatus = clSetKernelArg(histRedKern.mpkKernel, 0, sizeof(cl_int), &n); - CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); - clStatus = clSetKernelArg(histRedKern.mpkKernel, 1, sizeof(cl_mem), &tmpHistogramBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg tmpHistogramBuffer"); - clStatus = clSetKernelArg(histRedKern.mpkKernel, 2, sizeof(cl_mem), &histogramBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg histogramBuffer"); - - /* launch histogram */ - clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1, nullptr, - global_work_size, local_work_size, 0, nullptr, nullptr); - CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels"); - clFinish(histKern.mpkCmdQueue); - if (clStatus != 0) { - retVal = -1; - } - /* launch histogram */ - clStatus = clEnqueueNDRangeKernel(histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, nullptr, - red_global_work_size, local_work_size, 0, nullptr, nullptr); - CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction"); - clFinish(histRedKern.mpkCmdQueue); - if (clStatus != 0) { - retVal = -1; - } - - /* map results back from gpu */ - ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, CL_MAP_READ, 0, - kHistogramSize * bytes_per_pixel * sizeof(int), 0, nullptr, nullptr, - &clStatus); - CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); - if (clStatus != 0) { - retVal = -1; - } - clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, nullptr, nullptr); - - clReleaseMemObject(histogramBuffer); - clReleaseMemObject(imageBuffer); - return retVal; -} - -/************************************************************************* - * Threshold the rectangle, taking everything except the image buffer pointer - * from the class, using thresholds/hi_values to the output IMAGE. - * only supports 1 or 4 channels - ************************************************************************/ -int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, int bytes_per_pixel, - int bytes_per_line, int *thresholds, int *hi_values, - Image *pix, int height, int width, int top, int left) { - int retVal = 0; - /* create pix result buffer */ - *pix = pixCreate(width, height, 1); - uint32_t *pixData = pixGetData(*pix); - int wpl = pixGetWpl(*pix); - int pixSize = wpl * height * sizeof(uint32_t); // number of pixels - - cl_int clStatus; - KernelEnv rEnv; - SetKernelEnv(&rEnv); - - /* setup work group size parameters */ - int block_size = 256; - cl_uint numCUs = 6; - clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, - nullptr); - CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); - - int requestedOccupancy = 10; - int numWorkGroups = numCUs * requestedOccupancy; - int numThreads = block_size * numWorkGroups; - size_t local_work_size[] = {(size_t)block_size}; - size_t global_work_size[] = {(size_t)numThreads}; - - /* map imagedata to device as read only */ - // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be - // coherent which we don't need. - // faster option would be to allocate initial image buffer - // using a garlic bus memory type - cl_mem imageBuffer = - clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); - - /* map pix as write only */ - pixThBuffer = clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, pixSize, - pixData, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer pix"); - - /* map thresholds and hi_values */ - cl_mem thresholdsBuffer = clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - bytes_per_pixel * sizeof(int), thresholds, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer thresholdBuffer"); - cl_mem hiValuesBuffer = clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - bytes_per_pixel * sizeof(int), hi_values, &clStatus); - CHECK_OPENCL(clStatus, "clCreateBuffer hiValuesBuffer"); - - /* compile kernel */ - if (bytes_per_pixel == 4) { - rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix"); - } else { - rEnv.mpkKernel = - clCreateKernel(rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus); - CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan"); - } - - /* set kernel arguments */ - clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), &height); - CHECK_OPENCL(clStatus, "clSetKernelArg height"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(int), &width); - CHECK_OPENCL(clStatus, "clSetKernelArg width"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), &wpl); - CHECK_OPENCL(clStatus, "clSetKernelArg wpl"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &thresholdsBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg thresholdsBuffer"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), &hiValuesBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg hiValuesBuffer"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), &pixThBuffer); - CHECK_OPENCL(clStatus, "clSetKernelArg pixThBuffer"); - - /* launch kernel & wait */ - clStatus = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1, nullptr, global_work_size, - local_work_size, 0, nullptr, nullptr); - CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix"); - clFinish(rEnv.mpkCmdQueue); - if (clStatus != 0) { - tprintf("Setting return value to -1\n"); - retVal = -1; - } - /* map results back from gpu */ - void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, pixSize, 0, - nullptr, nullptr, &clStatus); - CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, nullptr, nullptr); - - clReleaseMemObject(imageBuffer); - clReleaseMemObject(thresholdsBuffer); - clReleaseMemObject(hiValuesBuffer); - - return retVal; -} - -/****************************************************************************** - * Data Types for Device Selection - *****************************************************************************/ - -struct TessScoreEvaluationInputData { - int height; - int width; - int numChannels; - unsigned char *imageData; - Image pix; -}; - -static void populateTessScoreEvaluationInputData(TessScoreEvaluationInputData *input) { - srand(1); - // 8.5x11 inches @ 300dpi rounded to clean multiples - int height = 3328; // %256 - int width = 2560; // %512 - int numChannels = 4; - input->height = height; - input->width = width; - input->numChannels = numChannels; - unsigned char(*imageData4)[4] = (unsigned char(*)[4])malloc( - height * width * numChannels * sizeof(unsigned char)); // new unsigned char[4][height*width]; - input->imageData = (unsigned char *)&imageData4[0]; - - // zero out image - unsigned char pixelWhite[4] = {0, 0, 0, 255}; - unsigned char pixelBlack[4] = {255, 255, 255, 255}; - for (int p = 0; p < height * width; p++) { - // unsigned char tmp[4] = imageData4[0]; - imageData4[p][0] = pixelWhite[0]; - imageData4[p][1] = pixelWhite[1]; - imageData4[p][2] = pixelWhite[2]; - imageData4[p][3] = pixelWhite[3]; - } - // random lines to be eliminated - int maxLineWidth = 64; // pixels wide - int numLines = 10; - // vertical lines - for (int i = 0; i < numLines; i++) { - int lineWidth = rand() % maxLineWidth; - int vertLinePos = lineWidth + rand() % (width - 2 * lineWidth); - // tprintf("[PI] VerticalLine @ %i (w=%i)\n", vertLinePos, lineWidth); - for (int row = vertLinePos - lineWidth / 2; row < vertLinePos + lineWidth / 2; row++) { - for (int col = 0; col < height; col++) { - // imageData4[row*width+col] = pixelBlack; - imageData4[row * width + col][0] = pixelBlack[0]; - imageData4[row * width + col][1] = pixelBlack[1]; - imageData4[row * width + col][2] = pixelBlack[2]; - imageData4[row * width + col][3] = pixelBlack[3]; - } - } - } - // horizontal lines - for (int i = 0; i < numLines; i++) { - int lineWidth = rand() % maxLineWidth; - int horLinePos = lineWidth + rand() % (height - 2 * lineWidth); - // tprintf("[PI] HorizontalLine @ %i (w=%i)\n", horLinePos, lineWidth); - for (int row = 0; row < width; row++) { - for (int col = horLinePos - lineWidth / 2; col < horLinePos + lineWidth / 2; - col++) { // for (int row = vertLinePos-lineWidth/2; row < - // vertLinePos+lineWidth/2; row++) { - // tprintf("[PI] HoizLine pix @ (%3i, %3i)\n", row, col); - // imageData4[row*width+col] = pixelBlack; - imageData4[row * width + col][0] = pixelBlack[0]; - imageData4[row * width + col][1] = pixelBlack[1]; - imageData4[row * width + col][2] = pixelBlack[2]; - imageData4[row * width + col][3] = pixelBlack[3]; - } - } - } - // spots (noise, squares) - float fractionBlack = 0.1; // how much of the image should be blackened - int numSpots = (height * width) * fractionBlack / (maxLineWidth * maxLineWidth / 2 / 2); - for (int i = 0; i < numSpots; i++) { - int lineWidth = rand() % maxLineWidth; - int col = lineWidth + rand() % (width - 2 * lineWidth); - int row = lineWidth + rand() % (height - 2 * lineWidth); - // tprintf("[PI] Spot[%i/%i] @ (%3i, %3i)\n", i, numSpots, row, col ); - for (int r = row - lineWidth / 2; r < row + lineWidth / 2; r++) { - for (int c = col - lineWidth / 2; c < col + lineWidth / 2; c++) { - // tprintf("[PI] \tSpot[%i/%i] @ (%3i, %3i)\n", i, numSpots, r, c ); - // imageData4[row*width+col] = pixelBlack; - imageData4[r * width + c][0] = pixelBlack[0]; - imageData4[r * width + c][1] = pixelBlack[1]; - imageData4[r * width + c][2] = pixelBlack[2]; - imageData4[r * width + c][3] = pixelBlack[3]; - } - } - } - - input->pix = pixCreate(input->width, input->height, 8 * input->numChannels); -} - -struct TessDeviceScore { - float time; // small time means faster device - bool clError; // were there any opencl errors - bool valid; // was the correct response generated -}; - -/****************************************************************************** - * Micro Benchmarks for Device Selection - *****************************************************************************/ - -static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, - ds_device_type type) { - double time = 0; -# if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); -# elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; -# else - timespec time_funct_start, time_funct_end; -# endif - // input data - l_uint32 *tiffdata = (l_uint32 *)input.imageData; // same size and random data; data doesn't - // change workload - - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -# elif ON_APPLE - start = mach_absolute_time(); -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -# endif - - OpenclDevice::gpuEnv = *env; - int wpl = pixGetWpl(input.pix); - OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, wpl, nullptr); -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / (double)(freq.QuadPart); -# elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -# endif - - } else { -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -# elif ON_APPLE - start = mach_absolute_time(); -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -# endif - Image pix = pixCreate(input.width, input.height, 32); - l_uint32 *pixData = pixGetData(pix); - int i, j; - int idx = 0; - for (i = 0; i < input.height; i++) { - for (j = 0; j < input.width; j++) { - l_uint32 tiffword = tiffdata[i * input.width + j]; - l_int32 rval = ((tiffword)&0xff); - l_int32 gval = (((tiffword) >> 8) & 0xff); - l_int32 bval = (((tiffword) >> 16) & 0xff); - l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8); - pixData[idx] = value; - idx++; - } - } -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / (double)(freq.QuadPart); -# elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -# endif - pix.destroy(); - } - - return time; -} - -static double histogramRectMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, - ds_device_type type) { - double time; -# if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); -# elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; -# else - timespec time_funct_start, time_funct_end; -# endif - - const int left = 0; - const int top = 0; - int kHistogramSize = 256; - int bytes_per_line = input.width * input.numChannels; - int *histogramAllChannels = new int[kHistogramSize * input.numChannels]; - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -# elif ON_APPLE - start = mach_absolute_time(); -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -# endif - - OpenclDevice::gpuEnv = *env; - int retVal = OpenclDevice::HistogramRectOCL(input.imageData, input.numChannels, bytes_per_line, - left, top, input.width, input.height, - kHistogramSize, histogramAllChannels); - -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / (double)(freq.QuadPart); -# elif ON_APPLE - stop = mach_absolute_time(); - if (retVal == 0) { - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; - } else { - time = FLT_MAX; - } -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -# endif - } else { - int *histogram = new int[kHistogramSize]; -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -# elif ON_APPLE - start = mach_absolute_time(); -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -# endif - for (int ch = 0; ch < input.numChannels; ++ch) { - tesseract::HistogramRect(input.pix, input.numChannels, left, top, input.width, input.height, - histogram); - } -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / (double)(freq.QuadPart); -# elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -# endif - delete[] histogram; - } - - // cleanup - delete[] histogramAllChannels; - return time; -} - -// Reproducing the ThresholdRectToPix native version -static void ThresholdRectToPix_Native(const unsigned char *imagedata, int bytes_per_pixel, - int bytes_per_line, const int *thresholds, - const int *hi_values, Image *pix) { - int top = 0; - int left = 0; - int width = pixGetWidth(*pix); - int height = pixGetHeight(*pix); - - *pix = pixCreate(width, height, 1); - uint32_t *pixdata = pixGetData(*pix); - int wpl = pixGetWpl(*pix); - const unsigned char *srcdata = imagedata + top * bytes_per_line + left * bytes_per_pixel; - for (int y = 0; y < height; ++y) { - const uint8_t *linedata = srcdata; - uint32_t *pixline = pixdata + y * wpl; - for (int x = 0; x < width; ++x, linedata += bytes_per_pixel) { - bool white_result = true; - for (int ch = 0; ch < bytes_per_pixel; ++ch) { - if (hi_values[ch] >= 0 && (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { - white_result = false; - break; - } - } - if (white_result) - CLEAR_DATA_BIT(pixline, x); - else - SET_DATA_BIT(pixline, x); - } - srcdata += bytes_per_line; - } -} - -static double thresholdRectToPixMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, - ds_device_type type) { - double time; -# if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); -# elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; -# else - timespec time_funct_start, time_funct_end; -# endif - - // input data - unsigned char pixelHi = (unsigned char)255; - int thresholds[4] = {pixelHi, pixelHi, pixelHi, pixelHi}; - - // Pix* pix = pixCreate(width, height, 1); - int top = 0; - int left = 0; - int bytes_per_line = input.width * input.numChannels; - - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -# elif ON_APPLE - start = mach_absolute_time(); -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -# endif - - OpenclDevice::gpuEnv = *env; - int hi_values[4]; - int retVal = OpenclDevice::ThresholdRectToPixOCL( - input.imageData, input.numChannels, bytes_per_line, thresholds, hi_values, &input.pix, - input.height, input.width, top, left); - -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / (double)(freq.QuadPart); -# elif ON_APPLE - stop = mach_absolute_time(); - if (retVal == 0) { - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; - } else { - time = FLT_MAX; - } - -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -# endif - } else { - tesseract::ImageThresholder thresholder; - thresholder.SetImage(input.pix); -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -# elif ON_APPLE - start = mach_absolute_time(); -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -# endif - int hi_values[4] = {}; - ThresholdRectToPix_Native(input.imageData, input.numChannels, bytes_per_line, thresholds, - hi_values, &input.pix); - -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / (double)(freq.QuadPart); -# elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -# endif - } - - return time; -} - -static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputData input, - ds_device_type type) { - double time = 0; -# if ON_WINDOWS - LARGE_INTEGER freq, time_funct_start, time_funct_end; - QueryPerformanceFrequency(&freq); -# elif ON_APPLE - mach_timebase_info_data_t info = {0, 0}; - mach_timebase_info(&info); - long long start, stop; -# else - timespec time_funct_start, time_funct_end; -# endif - - // input data - int resolution = 300; - int wpl = pixGetWpl(input.pix); - int kThinLineFraction = 20; // tess constant - int kMinLineLengthFraction = 4; // tess constant - int max_line_width = resolution / kThinLineFraction; - int min_line_length = resolution / kMinLineLengthFraction; - int closing_brick = max_line_width / 3; - - // function call - if (type == DS_DEVICE_OPENCL_DEVICE) { -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -# elif ON_APPLE - start = mach_absolute_time(); -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -# endif - OpenclDevice::gpuEnv = *env; - OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix); - Image pix_vline = nullptr, pix_hline = nullptr, pix_closed = nullptr; - OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline, &pix_closed, true, - closing_brick, closing_brick, max_line_width, max_line_width, - min_line_length, min_line_length); - - OpenclDevice::releaseMorphCLBuffers(); - -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / (double)(freq.QuadPart); -# elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -# endif - } else { -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_start); -# elif ON_APPLE - start = mach_absolute_time(); -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_start); -# endif - - // native serial code - Image src_pix = input.pix; - Image pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); - Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); - Image pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); - pix_solid.destroy(); - Image pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - Image pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - pix_hline.destroy(); - pix_vline.destroy(); - pix_hollow.destroy(); - -# if ON_WINDOWS - QueryPerformanceCounter(&time_funct_end); - time = (time_funct_end.QuadPart - time_funct_start.QuadPart) / (double)(freq.QuadPart); -# elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; -# else - clock_gettime(CLOCK_MONOTONIC, &time_funct_end); - time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + - (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; -# endif - } - - return time; -} - -/****************************************************************************** - * Device Selection - *****************************************************************************/ - -// encode score object as byte string -static ds_status serializeScore(ds_device *device, uint8_t **serializedScore, - unsigned int *serializedScoreSize) { - *serializedScoreSize = sizeof(TessDeviceScore); - *serializedScore = new uint8_t[*serializedScoreSize]; - memcpy(*serializedScore, device->score, *serializedScoreSize); - return DS_SUCCESS; -} - -// parses byte string and stores in score object -static ds_status deserializeScore(ds_device *device, const uint8_t *serializedScore, - unsigned int serializedScoreSize) { - // check that serializedScoreSize == sizeof(TessDeviceScore); - device->score = new TessDeviceScore; - memcpy(device->score, serializedScore, serializedScoreSize); - return DS_SUCCESS; -} - -static ds_status releaseScore(TessDeviceScore *score) { - delete score; - return DS_SUCCESS; -} - -// evaluate devices -static ds_status evaluateScoreForDevice(ds_device *device, void *inputData) { - // overwrite statuc gpuEnv w/ current device - // so native opencl calls can be used; they use static gpuEnv - tprintf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, - device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - GPUEnv *env = nullptr; - if (device->type == DS_DEVICE_OPENCL_DEVICE) { - env = &OpenclDevice::gpuEnv; - memset(env, 0, sizeof(*env)); - // tprintf("[DS] populating tmp GPUEnv from device\n"); - populateGPUEnvFromDevice(env, device->oclDeviceID); - env->mnFileCount = 0; // argc; - env->mnKernelCount = 0UL; - // tprintf("[DS] compiling kernels for tmp GPUEnv\n"); - OpenclDevice::CompileKernelFile(env, ""); - } - - TessScoreEvaluationInputData *input = static_cast(inputData); - - // pixReadTiff - double composeRGBPixelTime = composeRGBPixelMicroBench(env, *input, device->type); - - // HistogramRect - double histogramRectTime = histogramRectMicroBench(env, *input, device->type); - - // ThresholdRectToPix - double thresholdRectToPixTime = thresholdRectToPixMicroBench(env, *input, device->type); - - // getLineMasks - double getLineMasksMorphTime = getLineMasksMorphMicroBench(env, *input, device->type); - - // weigh times (% of cpu time) - // these weights should be the % execution time that the native cpu code took - float composeRGBPixelWeight = 1.2f; - float histogramRectWeight = 2.4f; - float thresholdRectToPixWeight = 4.5f; - float getLineMasksMorphWeight = 5.0f; - - float weightedTime = composeRGBPixelWeight * composeRGBPixelTime + - histogramRectWeight * histogramRectTime + - thresholdRectToPixWeight * thresholdRectToPixTime + - getLineMasksMorphWeight * getLineMasksMorphTime; - device->score = new TessDeviceScore; - device->score->time = weightedTime; - - tprintf("[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, - device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - tprintf("[DS]%25s: %f (w=%.1f)\n", "composeRGBPixel", composeRGBPixelTime, composeRGBPixelWeight); - tprintf("[DS]%25s: %f (w=%.1f)\n", "HistogramRect", histogramRectTime, histogramRectWeight); - tprintf("[DS]%25s: %f (w=%.1f)\n", "ThresholdRectToPix", thresholdRectToPixTime, - thresholdRectToPixWeight); - tprintf("[DS]%25s: %f (w=%.1f)\n", "getLineMasksMorph", getLineMasksMorphTime, - getLineMasksMorphWeight); - tprintf("[DS]%25s: %f\n", "Score", device->score->time); - return DS_SUCCESS; -} - -// initial call to select device -ds_device OpenclDevice::getDeviceSelection() { - if (!deviceIsSelected) { - // check if opencl is available at runtime - if (1 == LoadOpencl()) { - // opencl is available - // setup devices - ds_status status; - ds_profile *profile; - status = initDSProfile(&profile, "v0.1"); - // try reading scores from file - const char *fileName = "tesseract_opencl_profile_devices.dat"; - status = readProfileFromFile(profile, deserializeScore, fileName); - if (status != DS_SUCCESS) { - // need to run evaluation - tprintf("[DS] Profile file not available (%s); performing profiling.\n", fileName); - - // create input data - TessScoreEvaluationInputData input; - populateTessScoreEvaluationInputData(&input); - // perform evaluations - unsigned int numUpdates; - status = - profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, &input, &numUpdates); - // write scores to file - if (status == DS_SUCCESS) { - status = writeProfileToFile(profile, serializeScore, fileName); - if (status == DS_SUCCESS) { - tprintf("[DS] Scores written to file (%s).\n", fileName); - } else { - tprintf( - "[DS] Error saving scores to file (%s); scores not written to " - "file.\n", - fileName); - } - } else { - tprintf( - "[DS] Unable to evaluate performance; scores not written to " - "file.\n"); - } - } else { - tprintf("[DS] Profile read from file (%s).\n", fileName); - } - - // we now have device scores either from file or evaluation - // select fastest using custom Tesseract selection algorithm - float bestTime = FLT_MAX; // begin search with worst possible time - int bestDeviceIdx = -1; - for (unsigned d = 0; d < profile->numDevices; d++) { - ds_device device = profile->devices[d]; - if (device.score == nullptr) - continue; - TessDeviceScore score = *device.score; - - float time = score.time; - tprintf("[DS] Device[%u] %i:%s score is %f\n", d + 1, device.type, device.oclDeviceName, - time); - if (time < bestTime) { - bestTime = time; - bestDeviceIdx = d; - } - } - if (bestDeviceIdx >= 0) { - tprintf( - "[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1, - profile->devices[bestDeviceIdx].oclDeviceName, - profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - } - // cleanup - // TODO: call destructor for profile object? - - bool overridden = false; - char *overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE"); - if (overrideDeviceStr != nullptr) { - int overrideDeviceIdx = atoi(overrideDeviceStr); - if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) { - tprintf( - "[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, " - "%i)\n", - overrideDeviceStr, overrideDeviceIdx); - bestDeviceIdx = overrideDeviceIdx - 1; - overridden = true; - } else { - tprintf( - "[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are " - "valid devices).\n", - overrideDeviceStr, profile->numDevices); - } - } - - if (overridden) { - tprintf( - "[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1, - profile->devices[bestDeviceIdx].oclDeviceName, - profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - } - selectedDevice = profile->devices[bestDeviceIdx]; - // cleanup - releaseDSProfile(profile, releaseScore); - } else { - // opencl isn't available at runtime, select native cpu device - tprintf("[DS] OpenCL runtime not available.\n"); - selectedDevice.type = DS_DEVICE_NATIVE_CPU; - selectedDevice.oclDeviceName = "(null)"; - selectedDevice.score = nullptr; - selectedDevice.oclDeviceID = nullptr; - selectedDevice.oclDriverVersion = nullptr; - } - deviceIsSelected = true; - } - return selectedDevice; -} - -bool OpenclDevice::selectedDeviceIsOpenCL() { - ds_device device = getDeviceSelection(); - return (device.type == DS_DEVICE_OPENCL_DEVICE); -} - -} // namespace - -#endif diff -Nru tesseract-5.3.4+git6348-2b07505e/src/opencl/openclwrapper.h tesseract-5.3.4+git6361-d4618678/src/opencl/openclwrapper.h --- tesseract-5.3.4+git6348-2b07505e/src/opencl/openclwrapper.h 2024-03-17 20:39:02.345838800 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/opencl/openclwrapper.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,179 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TESSERACT_OPENCL_OPENCLWRAPPER_H_ -#define TESSERACT_OPENCL_OPENCLWRAPPER_H_ - -#include -#include -#include "pix.h" -#include "tprintf.h" - -// including CL/cl.h doesn't occur until USE_OPENCL defined below - -/************************************************************************** - * enable/disable use of OpenCL - **************************************************************************/ - -#ifdef USE_OPENCL - -# ifdef __APPLE__ -# include -# else -# include -# endif - -namespace tesseract { - -class Image; -struct TessDeviceScore; - -// device type -enum ds_device_type { DS_DEVICE_NATIVE_CPU = 0, DS_DEVICE_OPENCL_DEVICE }; - -struct ds_device { - ds_device_type type; - cl_device_id oclDeviceID; - char *oclDeviceName; - char *oclDriverVersion; - // a pointer to the score data, the content/format is application defined. - TessDeviceScore *score; -}; - -# ifndef strcasecmp -# define strcasecmp strcmp -# endif - -# define MAX_KERNEL_STRING_LEN 64 -# define MAX_CLFILE_NUM 50 -# define MAX_CLKERNEL_NUM 200 -# define MAX_KERNEL_NAME_LEN 64 -# define CL_QUEUE_THREAD_HANDLE_AMD 0x403E -# define GROUPSIZE_X 16 -# define GROUPSIZE_Y 16 -# define GROUPSIZE_HMORX 256 -# define GROUPSIZE_HMORY 1 - -struct KernelEnv { - cl_context mpkContext; - cl_command_queue mpkCmdQueue; - cl_program mpkProgram; - cl_kernel mpkKernel; - char mckKernelName[150]; -}; - -struct OpenCLEnv { - cl_platform_id mpOclPlatformID; - cl_context mpOclContext; - cl_device_id mpOclDevsID; - cl_command_queue mpOclCmdQueue; -}; -typedef int (*cl_kernel_function)(void **userdata, KernelEnv *kenv); - -# define CHECK_OPENCL(status, name) \ - if (status != CL_SUCCESS) { \ - tprintf("OpenCL error code is %d at when %s .\n", status, name); \ - } - -struct GPUEnv { - // share vb in all modules in hb library - cl_platform_id mpPlatformID; - cl_device_type mDevType; - cl_context mpContext; - cl_device_id *mpArryDevsID; - cl_device_id mpDevID; - cl_command_queue mpCmdQueue; - cl_kernel mpArryKernels[MAX_CLFILE_NUM]; - cl_program mpArryPrograms[MAX_CLFILE_NUM]; // one program object maps one - // kernel source file - char mArryKnelSrcFile[MAX_CLFILE_NUM][256], // the max len of kernel file name is 256 - mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1]; - cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM]; - int mnKernelCount, mnFileCount, // only one kernel file - mnIsUserCreated; // 1: created , 0:no create and needed to create by - // opencl wrapper - int mnKhrFp64Flag; - int mnAmdFp64Flag; -}; - -class OpenclDevice { -public: - static GPUEnv gpuEnv; - static int isInited; - OpenclDevice(); - ~OpenclDevice(); - static int InitEnv(); // load dll, call InitOpenclRunEnv(0) - static int InitOpenclRunEnv(int argc); // RegistOpenclKernel, double flags, compile kernels - static int InitOpenclRunEnv_DeviceSelection( - int argc); // RegistOpenclKernel, double flags, compile kernels - static int RegistOpenclKernel(); - static int ReleaseOpenclRunEnv(); - static int ReleaseOpenclEnv(GPUEnv *gpuInfo); - static int CompileKernelFile(GPUEnv *gpuInfo, const char *buildOption); - static int CachedOfKernerPrg(const GPUEnv *gpuEnvCached, const char *clFileName); - static int GeneratBinFromKernelSource(cl_program program, const char *clFileName); - static int WriteBinaryToFile(const char *fileName, const char *birary, size_t numBytes); - static int BinaryGenerated(const char *clFileName, FILE **fhandle); - // static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const - // char *buildOption ); - static l_uint32 *pixReadFromTiffKernel(l_uint32 *tiffdata, l_int32 w, l_int32 h, l_int32 wpl, - l_uint32 *line); - static int composeRGBPixelCl(int *tiffdata, int *line, int h, int w); - - /* OpenCL implementations of Morphological operations*/ - - // Initialization of OCL buffers used in Morph operations - static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs); - static void releaseMorphCLBuffers(); - - static void pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline, Image *pixClosed, - bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize, - l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize, - l_int32 line_vsize); - - // int InitOpenclAttr( OpenCLEnv * env ); - // int ReleaseKernel( KernelEnv * env ); - static int SetKernelEnv(KernelEnv *envInfo); - // int CreateKernel( char * kernelname, KernelEnv * env ); - // int RunKernel( const char *kernelName, void **userdata ); - // int ConvertToString( const char *filename, char **source ); - // int CheckKernelName( KernelEnv *envInfo, const char *kernelName ); - // int RegisterKernelWrapper( const char *kernelName, cl_kernel_function - // function ); int RunKernelWrapper( cl_kernel_function function, const char * - // kernelName, void **usrdata ); int GetKernelEnvAndFunc( const char - // *kernelName, KernelEnv *env, cl_kernel_function *function ); - - static int LoadOpencl(); -# ifdef WIN32 - // static int OpenclInite(); - static void FreeOpenclDll(); -# endif - - inline static int AddKernelConfig(int kCount, const char *kName); - - /* for binarization */ - static int HistogramRectOCL(void *imagedata, int bytes_per_pixel, int bytes_per_line, int left, - int top, int width, int height, int kHistogramSize, - int *histogramAllChannels); - - static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel, - int bytes_per_line, int *thresholds, int *hi_values, Image *pix, - int rect_height, int rect_width, int rect_top, int rect_left); - - static ds_device getDeviceSelection(); - static ds_device selectedDevice; - static bool deviceIsSelected; - static bool selectedDeviceIsOpenCL(); -}; - -} - -#endif // USE_OPENCL -#endif // TESSERACT_OPENCL_OPENCLWRAPPER_H_ diff -Nru tesseract-5.3.4+git6348-2b07505e/src/tesseract.cpp tesseract-5.3.4+git6361-d4618678/src/tesseract.cpp --- tesseract-5.3.4+git6348-2b07505e/src/tesseract.cpp 2024-03-17 20:39:02.345838800 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/tesseract.cpp 2024-04-26 18:21:25.625095100 +0000 @@ -34,9 +34,6 @@ #include #include #include "dict.h" -#if defined(USE_OPENCL) -# include "openclwrapper.h" // for OpenclDevice -#endif #include #include "simddetect.h" #include "tesseractclass.h" // for AnyTessLang @@ -112,34 +109,6 @@ printf(" %s\n", versionStrP); lept_free(versionStrP); -#ifdef USE_OPENCL - cl_platform_id platform[4]; - cl_uint num_platforms; - - printf(" OpenCL info:\n"); - if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) { - printf(" Found %u platform(s).\n", num_platforms); - for (unsigned n = 0; n < num_platforms; n++) { - char info[256]; - if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == CL_SUCCESS) { - printf(" Platform %u name: %s.\n", n + 1, info); - } - if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == CL_SUCCESS) { - printf(" Version: %s.\n", info); - } - cl_device_id devices[2]; - cl_uint num_devices; - if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, &num_devices) == CL_SUCCESS) { - printf(" Found %u device(s).\n", num_devices); - for (unsigned i = 0; i < num_devices; ++i) { - if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) == CL_SUCCESS) { - printf(" Device %u name: %s.\n", i + 1, info); - } - } - } - } - } -#endif #if defined(HAVE_NEON) || defined(__aarch64__) if (tesseract::SIMDDetect::IsNEONAvailable()) printf(" Found NEON\n"); @@ -530,6 +499,17 @@ error = true; } } + + api.GetBoolVariable("tessedit_create_page_xml", &b); + if (b) { + auto renderer = std::make_unique(outputbase); + if (renderer->happy()) { + renderers.push_back(std::move(renderer)); + } else { + tprintf("Error, could not create PAGE output file: %s\n", strerror(errno)); + error = true; + } + } api.GetBoolVariable("tessedit_create_tsv", &b); if (b) { diff -Nru tesseract-5.3.4+git6348-2b07505e/src/textord/alignedblob.cpp tesseract-5.3.4+git6361-d4618678/src/textord/alignedblob.cpp --- tesseract-5.3.4+git6348-2b07505e/src/textord/alignedblob.cpp 2024-03-17 20:39:02.345838800 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/textord/alignedblob.cpp 2024-04-26 18:21:25.625095100 +0000 @@ -167,7 +167,7 @@ tab_win = MakeWindow(0, 50, window_name); } // For every tab in the grid, display it. - GridSearch gsearch(this); + BlobGridSearch gsearch(this); gsearch.StartFullSearch(); BLOBNBOX *bbox; while ((bbox = gsearch.NextFullSearch()) != nullptr) { @@ -409,7 +409,7 @@ xmin -= p.min_gutter; } // Setup a vertical search for an aligned blob. - GridSearch vsearch(this); + BlobGridSearch vsearch(this); if (WithinTestRegion(2, x_start, start_y)) { tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", xmin, xmax, start_y, diff -Nru tesseract-5.3.4+git6348-2b07505e/src/textord/colfind.cpp tesseract-5.3.4+git6361-d4618678/src/textord/colfind.cpp --- tesseract-5.3.4+git6348-2b07505e/src/textord/colfind.cpp 2024-03-17 20:39:02.349172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/textord/colfind.cpp 2024-04-26 18:21:25.625095100 +0000 @@ -142,7 +142,7 @@ } // Performs initial processing on the blobs in the input_block: -// Setup the part_grid, stroke_width_, nontext_map. +// Setup the part_grid_, stroke_width_, nontext_map. // Obvious noise blobs are filtered out and used to mark the nontext_map_. // Initial stroke-width analysis is used to get local text alignment // direction, so the textline projection_ map can be setup. @@ -971,7 +971,7 @@ // Splits partitions that cross columns where they have nothing in the gap. void ColumnFinder::GridSplitPartitions() { // Iterate the ColPartitions in the grid. - GridSearch gsearch(&part_grid_); + ColPartitionGridSearch gsearch(&part_grid_); gsearch.StartFullSearch(); ColPartition *dont_repeat = nullptr; ColPartition *part; @@ -1438,7 +1438,7 @@ // like horizontal lines going before the text lines above them. ColPartition_CLIST temp_part_list; // Iterate the ColPartitions in the grid. It starts at the top - GridSearch gsearch(&part_grid_); + ColPartitionGridSearch gsearch(&part_grid_); gsearch.StartFullSearch(); int prev_grid_y = -1; ColPartition *part; diff -Nru tesseract-5.3.4+git6348-2b07505e/src/textord/linefind.cpp tesseract-5.3.4+git6361-d4618678/src/textord/linefind.cpp --- tesseract-5.3.4+git6348-2b07505e/src/textord/linefind.cpp 2024-03-17 20:39:02.349172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/textord/linefind.cpp 2024-04-26 18:21:25.628428500 +0000 @@ -27,9 +27,6 @@ #include "edgblob.h" #include "linefind.h" #include "tabvector.h" -#if defined(USE_OPENCL) -# include "openclwrapper.h" // for OpenclDevice -#endif #include @@ -469,48 +466,33 @@ } int closing_brick = max_line_width / 3; -// only use opencl if compiled w/ OpenCL and selected device is opencl -#ifdef USE_OPENCL - if (OpenclDevice::selectedDeviceIsOpenCL()) { - // OpenCL pixGetLines Operation - int clStatus = - OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), pixGetHeight(src_pix), src_pix); - bool getpixclosed = pix_music_mask != nullptr; - OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline, &pix_closed, getpixclosed, - closing_brick, closing_brick, max_line_width, max_line_width, - min_line_length, min_line_length); - } else { -#endif - // Close up small holes, making it less likely that false alarms are found - // in thickened text (as it will become more solid) and also smoothing over - // some line breaks and nicks in the edges of the lines. - pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); - if (pixa_display != nullptr) { - pixaAddPix(pixa_display, pix_closed, L_CLONE); - } - // Open up with a big box to detect solid areas, which can then be - // subtracted. This is very generous and will leave in even quite wide - // lines. - Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); - if (pixa_display != nullptr) { - pixaAddPix(pixa_display, pix_solid, L_CLONE); - } - pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); - - pix_solid.destroy(); - - // Now open up in both directions independently to find lines of at least - // 1 inch/kMinLineLengthFraction in length. - if (pixa_display != nullptr) { - pixaAddPix(pixa_display, pix_hollow, L_CLONE); - } - *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); + // Close up small holes, making it less likely that false alarms are found + // in thickened text (as it will become more solid) and also smoothing over + // some line breaks and nicks in the edges of the lines. + pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); + if (pixa_display != nullptr) { + pixaAddPix(pixa_display, pix_closed, L_CLONE); + } + // Open up with a big box to detect solid areas, which can then be + // subtracted. This is very generous and will leave in even quite wide + // lines. + Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); + if (pixa_display != nullptr) { + pixaAddPix(pixa_display, pix_solid, L_CLONE); + } + pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); + + pix_solid.destroy(); - pix_hollow.destroy(); -#ifdef USE_OPENCL + // Now open up in both directions independently to find lines of at least + // 1 inch/kMinLineLengthFraction in length. + if (pixa_display != nullptr) { + pixaAddPix(pixa_display, pix_hollow, L_CLONE); } -#endif + *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); + *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); + + pix_hollow.destroy(); // Lines are sufficiently rare, that it is worth checking for a zero image. bool v_empty = pix_vline->isZero(); diff -Nru tesseract-5.3.4+git6348-2b07505e/src/textord/makerow.cpp tesseract-5.3.4+git6361-d4618678/src/textord/makerow.cpp --- tesseract-5.3.4+git6348-2b07505e/src/textord/makerow.cpp 2024-03-17 20:39:02.349172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/textord/makerow.cpp 2024-04-26 18:21:25.628428500 +0000 @@ -2485,8 +2485,8 @@ row_it->forward(); test_row = row_it->data(); if (test_row->min_y() <= top && test_row->max_y() >= bottom) { - merge_top = test_row->max_y() > row->max_y() ? test_row->max_y() : row->max_y(); - merge_bottom = test_row->min_y() < row->min_y() ? test_row->min_y() : row->min_y(); + merge_top = std::max(test_row->max_y(),row->max_y()); + merge_bottom = std::min(test_row->min_y(),row->min_y()); if (merge_top - merge_bottom <= rowsize) { if (testing_blob && textord_debug_blob) { tprintf("Merging rows at (%g,%g), (%g,%g)\n", row->min_y(), row->max_y(), diff -Nru tesseract-5.3.4+git6348-2b07505e/src/textord/strokewidth.h tesseract-5.3.4+git6361-d4618678/src/textord/strokewidth.h --- tesseract-5.3.4+git6348-2b07505e/src/textord/strokewidth.h 2024-03-17 20:39:02.349172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/textord/strokewidth.h 2024-04-26 18:21:25.628428500 +0000 @@ -89,7 +89,7 @@ // Corrects the data structures for the given rotation. void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid); - // Finds leader partitions and inserts them into the give grid. + // Finds leader partitions and inserts them into the given grid. void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid); // Finds and marks noise those blobs that look like bits of vertical lines diff -Nru tesseract-5.3.4+git6348-2b07505e/src/textord/tabfind.cpp tesseract-5.3.4+git6361-d4618678/src/textord/tabfind.cpp --- tesseract-5.3.4+git6348-2b07505e/src/textord/tabfind.cpp 2024-03-17 20:39:02.349172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/textord/tabfind.cpp 2024-04-26 18:21:25.628428500 +0000 @@ -562,7 +562,7 @@ left_tab_boxes_.clear(); right_tab_boxes_.clear(); // For every bbox in the grid, determine whether it uses a tab on an edge. - GridSearch gsearch(this); + BlobGridSearch gsearch(this); gsearch.StartFullSearch(); BLOBNBOX *bbox; while ((bbox = gsearch.NextFullSearch()) != nullptr) { diff -Nru tesseract-5.3.4+git6348-2b07505e/src/textord/tablefind.cpp tesseract-5.3.4+git6361-d4618678/src/textord/tablefind.cpp --- tesseract-5.3.4+git6348-2b07505e/src/textord/tablefind.cpp 2024-03-17 20:39:02.349172000 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/textord/tablefind.cpp 2024-04-26 18:21:25.628428500 +0000 @@ -679,8 +679,7 @@ TBOX part_box = part->bounding_box(); // Start a rect search - GridSearch rectsearch( - &clean_part_grid_); + ColPartitionGridSearch rectsearch(&clean_part_grid_); rectsearch.StartRectSearch(box); ColPartition *neighbor; int min_space_above = kMaxVerticalSpacing; @@ -843,8 +842,7 @@ // 4- Partitions with leaders before/after them. void TableFinder::MarkPartitionsUsingLocalInformation() { // Iterate the ColPartitions in the grid. - GridSearch gsearch( - &clean_part_grid_); + ColPartitionGridSearch gsearch(&clean_part_grid_); gsearch.StartFullSearch(); ColPartition *part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -1190,8 +1188,7 @@ TBOX box = seg->bounding_box(); int num_table_cells = 0; int num_text_cells = 0; - GridSearch rsearch( - &clean_part_grid_); + ColPartitionGridSearch rsearch(&clean_part_grid_); rsearch.SetUniqueMode(true); rsearch.StartRectSearch(box); ColPartition *part = nullptr; @@ -1320,8 +1317,7 @@ void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { ColSegment_IT it(table_columns); // Iterate the ColPartitions in the grid. - GridSearch gsearch( - &clean_part_grid_); + ColPartitionGridSearch gsearch(&clean_part_grid_); gsearch.StartFullSearch(); ColPartition *part; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -1335,8 +1331,7 @@ // Start a search below the current cell to find bottom neighbours // Note: a full search will always process things above it first, so // this should be starting at the highest cell and working its way down. - GridSearch vsearch( - &clean_part_grid_); + ColPartitionGridSearch vsearch(&clean_part_grid_); vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom()); ColPartition *neighbor = nullptr; bool found_neighbours = false; @@ -1503,8 +1498,7 @@ // Check for ColPartitions spanning both table regions TBOX bbox = box1.bounding_union(box2); // Start a rect search on bbox - GridSearch rectsearch( - &clean_part_grid_); + ColPartitionGridSearch rectsearch(&clean_part_grid_); rectsearch.StartRectSearch(bbox); ColPartition *part = nullptr; while ((part = rectsearch.NextRectSearch()) != nullptr) { @@ -1783,8 +1777,7 @@ table_xprojection[i] = 0; } // Start a rect search on table_box - GridSearch rectsearch( - &clean_part_grid_); + ColPartitionGridSearch rectsearch(&clean_part_grid_); rectsearch.SetUniqueMode(true); rectsearch.StartRectSearch(table_box); ColPartition *part; @@ -1972,7 +1965,7 @@ ScrollView::Color table_color) { ScrollView::Color color = default_color; // Iterate the ColPartitions in the grid. - GridSearch gsearch(grid); + ColPartitionGridSearch gsearch(grid); gsearch.StartFullSearch(); ColPartition *part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -2002,7 +1995,7 @@ ColPartitionGrid *grid, ScrollView::Color color) { // Iterate the ColPartitions in the grid. - GridSearch gsearch(grid); + ColPartitionGridSearch gsearch(grid); gsearch.StartFullSearch(); ColPartition *part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -2048,7 +2041,7 @@ const WidthCallback &width_cb) { // Since we have table blocks already, remove table tags from all // colpartitions - GridSearch gsearch(grid); + ColPartitionGridSearch gsearch(grid); gsearch.StartFullSearch(); ColPartition *part = nullptr; @@ -2066,8 +2059,7 @@ while ((table = table_search.NextFullSearch()) != nullptr) { const TBOX &table_box = table->bounding_box(); // Start a rect search on table_box - GridSearch rectsearch( - grid); + ColPartitionGridSearch rectsearch(grid); rectsearch.StartRectSearch(table_box); ColPartition *part; ColPartition *table_partition = nullptr; diff -Nru tesseract-5.3.4+git6348-2b07505e/src/textord/tordmain.cpp tesseract-5.3.4+git6361-d4618678/src/textord/tordmain.cpp --- tesseract-5.3.4+git6348-2b07505e/src/textord/tordmain.cpp 2024-03-17 20:39:02.352505700 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/textord/tordmain.cpp 2024-04-26 18:21:25.628428500 +0000 @@ -533,16 +533,17 @@ } } } + // TODO: check whether `&& super_norm_count < textord_noise_sncount`should always be added here. + bool rejected = dot_count > norm_count * textord_noise_normratio && + dot_count > 2; if (textord_noise_debug) { tprintf("Row ending at (%d,%g):", blob_box.right(), row->base_line(blob_box.right())); tprintf(" R=%g, dc=%d, nc=%d, %s\n", norm_count > 0 ? static_cast(dot_count) / norm_count : 9999, dot_count, norm_count, - dot_count > norm_count * textord_noise_normratio && dot_count > 2 ? "REJECTED" - : "ACCEPTED"); + rejected? "REJECTED": "ACCEPTED"); } - return super_norm_count < textord_noise_sncount && - dot_count > norm_count * textord_noise_rowratio && dot_count > 2; + return super_norm_count < textord_noise_sncount && rejected; } /********************************************************************** diff -Nru tesseract-5.3.4+git6348-2b07505e/src/training/CMakeLists.txt tesseract-5.3.4+git6361-d4618678/src/training/CMakeLists.txt --- tesseract-5.3.4+git6348-2b07505e/src/training/CMakeLists.txt 2024-03-17 20:39:02.352505700 +0000 +++ tesseract-5.3.4+git6361-d4618678/src/training/CMakeLists.txt 2024-04-26 18:21:25.631762000 +0000 @@ -346,7 +346,7 @@ # ############################################################################ add_executable(unicharset_extractor unicharset_extractor.cpp) - set_property(TARGET unicharset_extractor PROPERTY CXX_STANDARD 17) + target_compile_features(unicharset_extractor PRIVATE cxx_std_17) target_link_libraries(unicharset_extractor unicharset_training) project_group(unicharset_extractor "Training Tools") install( diff -Nru tesseract-5.3.4+git6348-2b07505e/sw.cpp tesseract-5.3.4+git6361-d4618678/sw.cpp --- tesseract-5.3.4+git6348-2b07505e/sw.cpp 2024-03-17 20:39:02.355839300 +0000 +++ tesseract-5.3.4+git6361-d4618678/sw.cpp 2024-04-26 18:21:25.635095400 +0000 @@ -21,7 +21,6 @@ libtesseract.Public += "include"_idir; libtesseract.Protected += - "src/opencl"_id, "src/ccmain"_id, "src/api"_id, "src/dict"_id, diff -Nru tesseract-5.3.4+git6348-2b07505e/tessdata/configs/Makefile.am tesseract-5.3.4+git6361-d4618678/tessdata/configs/Makefile.am --- tesseract-5.3.4+git6348-2b07505e/tessdata/configs/Makefile.am 2024-03-17 20:39:02.355839300 +0000 +++ tesseract-5.3.4+git6361-d4618678/tessdata/configs/Makefile.am 2024-04-26 18:21:25.635095400 +0000 @@ -3,6 +3,6 @@ data_DATA += api_config kannada box.train.stderr quiet logfile digits get.images data_DATA += lstmbox wordstrbox # Configurations for OCR output. -data_DATA += alto hocr pdf tsv txt +data_DATA += alto hocr page pdf tsv txt data_DATA += linebox rebox strokewidth bigram EXTRA_DIST = $(data_DATA) diff -Nru tesseract-5.3.4+git6348-2b07505e/tessdata/configs/page tesseract-5.3.4+git6361-d4618678/tessdata/configs/page --- tesseract-5.3.4+git6348-2b07505e/tessdata/configs/page 1970-01-01 00:00:00.000000000 +0000 +++ tesseract-5.3.4+git6361-d4618678/tessdata/configs/page 2024-04-26 18:21:25.635095400 +0000 @@ -0,0 +1,3 @@ +tessedit_create_page_xml 1 +# page_xml_polygon 1 +# page_xml_level 0 diff -Nru tesseract-5.3.4+git6348-2b07505e/tesseract.pc.in tesseract-5.3.4+git6361-d4618678/tesseract.pc.in --- tesseract-5.3.4+git6348-2b07505e/tesseract.pc.in 2024-03-17 20:39:02.355839300 +0000 +++ tesseract-5.3.4+git6361-d4618678/tesseract.pc.in 2024-04-26 18:21:25.635095400 +0000 @@ -12,5 +12,5 @@ Version: @VERSION@ Requires.private: lept Libs: -L${libdir} -ltesseract @libarchive_LIBS@ @libcurl_LIBS@ @TENSORFLOW_LIBS@ -Libs.private: -lpthread @OPENCL_LDFLAGS@ +Libs.private: -lpthread Cflags: -I${includedir}