diff -Nru expat-2.2.2/Changes expat-2.2.3/Changes --- expat-2.2.2/Changes 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/Changes 2017-08-02 13:40:48.000000000 +0000 @@ -2,6 +2,46 @@ https://github.com/libexpat/libexpat/labels/help%20wanted If you can help, please get in touch. Thanks! +Release 2.2.3 Wed August 2 2017 + Security fixes: + #82 CVE-2017-11742 -- Windows: Fix DLL hijacking vulnerability + using Steve Holme's LoadLibrary wrapper for/of cURL + + Bug fixes: + #85 Fix a dangling pointer issue related to realloc + + Other changes: + Increase code coverage + #91 Linux: Allow getrandom to fail if nonblocking pool has not + yet been initialized and read /dev/urandom then, instead. + This is in line with what recent Python does. + #81 Pre-10.7/Lion macOS: Support entropy from arc4random + #86 Check that a UTF-16 encoding in an XML declaration has the + right endianness + #4 #5 #7 Recover correctly when some reallocations fail + Repair "./configure && make" for systems without any + provider of high quality entropy + and try reading /dev/urandom on those + Ensure that user-defined character encodings have converter + functions when they are needed + Fix mis-leading description of argument -c in xmlwf.1 + Rely on macro HAVE_ARC4RANDOM_BUF (rather than __CloudABI__) + for CloudABI + #100 Fix use of SIPHASH_MAIN in siphash.h + #23 Test suite: Fix memory leaks + Version info bumped from 7:4:6 to 7:5:6 + + Special thanks to: + Chanho Park + Joe Orton + Pascal Cuoq + Rhodri James + Simon McVittie + Vadim Zeitlin + Viktor Szakats + and + Core Infrastructure Initiative + Release 2.2.2 Wed July 12 2017 Security fixes: #43 Protect against compilation without any source of high diff -Nru expat-2.2.2/CMakeLists.txt expat-2.2.3/CMakeLists.txt --- expat-2.2.2/CMakeLists.txt 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/CMakeLists.txt 2017-08-02 13:40:48.000000000 +0000 @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.6) set(PACKAGE_BUGREPORT "expat-bugs@libexpat.org") set(PACKAGE_NAME "expat") -set(PACKAGE_VERSION "2.2.2") +set(PACKAGE_VERSION "2.2.3") set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_TARNAME "${PACKAGE_NAME}") @@ -54,6 +54,7 @@ endif(WIN32) set(expat_SRCS + lib/loadlibrary.c lib/xmlparse.c lib/xmlrole.c lib/xmltok.c @@ -76,7 +77,7 @@ add_library(expat ${_SHARED} ${expat_SRCS}) set(LIBCURRENT 7) # sync -set(LIBREVISION 4) # with +set(LIBREVISION 5) # with set(LIBAGE 6) # configure.ac! math(EXPR LIBCURRENT_MINUS_AGE "${LIBCURRENT} - ${LIBAGE}") diff -Nru expat-2.2.2/CMake.README expat-2.2.3/CMake.README --- expat-2.2.2/CMake.README 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/CMake.README 2017-08-02 13:40:48.000000000 +0000 @@ -3,25 +3,25 @@ The cmake based buildsystem for expat works on Windows (cygwin, mingw, Visual Studio) and should work on all other platform cmake supports. -Assuming ~/expat-2.2.2 is the source directory of expat, add a subdirectory +Assuming ~/expat-2.2.3 is the source directory of expat, add a subdirectory build and change into that directory: -~/expat-2.2.2$ mkdir build && cd build -~/expat-2.2.2/build$ +~/expat-2.2.3$ mkdir build && cd build +~/expat-2.2.3/build$ From that directory, call cmake first, then call make, make test and make install in the usual way: -~/expat-2.2.2/build$ cmake .. +~/expat-2.2.3/build$ cmake .. -- The C compiler identification is GNU -- The CXX compiler identification is GNU .... -- Configuring done -- Generating done --- Build files have been written to: /home/patrick/expat-2.2.2/build +-- Build files have been written to: /home/patrick/expat-2.2.3/build If you want to specify the install location for your files, append -DCMAKE_INSTALL_PREFIX=/your/install/path to the cmake call. -~/expat-2.2.2/build$ make && make test && make install +~/expat-2.2.3/build$ make && make test && make install Scanning dependencies of target expat [ 5%] Building C object CMakeFiles/expat.dir/lib/xmlparse.c.o [ 11%] Building C object CMakeFiles/expat.dir/lib/xmlrole.c.o diff -Nru expat-2.2.2/configure expat-2.2.3/configure --- expat-2.2.2/configure 2017-07-13 20:17:21.000000000 +0000 +++ expat-2.2.3/configure 2017-08-02 17:24:46.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for expat 2.2.2. +# Generated by GNU Autoconf 2.69 for expat 2.2.3. # # Report bugs to . # @@ -590,8 +590,8 @@ # Identity of this package. PACKAGE_NAME='expat' PACKAGE_TARNAME='expat' -PACKAGE_VERSION='2.2.2' -PACKAGE_STRING='expat 2.2.2' +PACKAGE_VERSION='2.2.3' +PACKAGE_STRING='expat 2.2.3' PACKAGE_BUGREPORT='expat-bugs@libexpat.org' PACKAGE_URL='' @@ -1293,7 +1293,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures expat 2.2.2 to adapt to many kinds of systems. +\`configure' configures expat 2.2.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1358,7 +1358,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of expat 2.2.2:";; + short | recursive ) echo "Configuration of expat 2.2.3:";; esac cat <<\_ACEOF @@ -1472,7 +1472,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -expat configure 2.2.2 +expat configure 2.2.3 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2016,7 +2016,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by expat $as_me 2.2.2, which was +It was created by expat $as_me 2.2.3, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2404,7 +2404,7 @@ LIBCURRENT=7 # sync -LIBREVISION=4 # with +LIBREVISION=5 # with LIBAGE=6 # CMakeLists.txt! ac_config_headers="$ac_config_headers expat_config.h" @@ -15962,6 +15962,39 @@ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for arc4random (BSD, macOS or libbsd)" >&5 +$as_echo_n "checking for arc4random (BSD, macOS or libbsd)... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #if defined(HAVE_LIBBSD) + # include + #else + # include + #endif + int main() { + arc4random(); + return 0; + } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + +$as_echo "#define HAVE_ARC4RANDOM 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +else + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext @@ -16269,6 +16302,9 @@ $as_echo "#define XML_DTD 1" >>confdefs.h +$as_echo "#define XML_DEV_URANDOM 1" >>confdefs.h + + # Check whether --enable-xml-context was given. if test "${enable_xml_context+set}" = set; then : enableval=$enable_xml_context; enable_xml_context=${enableval} @@ -16800,7 +16836,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by expat $as_me 2.2.2, which was +This file was extended by expat $as_me 2.2.3, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -16866,7 +16902,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -expat config.status 2.2.2 +expat config.status 2.2.3 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff -Nru expat-2.2.2/configure.ac expat-2.2.3/configure.ac --- expat-2.2.2/configure.ac 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/configure.ac 2017-08-02 13:40:48.000000000 +0000 @@ -46,7 +46,7 @@ dnl LIBCURRENT=7 # sync -LIBREVISION=4 # with +LIBREVISION=5 # with LIBAGE=6 # CMakeLists.txt! AC_CONFIG_HEADER(expat_config.h) @@ -126,6 +126,25 @@ AC_MSG_RESULT([yes]) ], [ AC_MSG_RESULT([no]) + + AC_MSG_CHECKING([for arc4random (BSD, macOS or libbsd)]) + AC_LINK_IFELSE([AC_LANG_SOURCE([ + #if defined(HAVE_LIBBSD) + # include + #else + # include + #endif + int main() { + arc4random(); + return 0; + } + ])], [ + AC_DEFINE([HAVE_ARC4RANDOM], [1], + [Define to 1 if you have the `arc4random' function.]) + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) ]) @@ -180,6 +199,8 @@ [Define to make XML Namespaces functionality available.]) AC_DEFINE([XML_DTD], 1, [Define to make parameter entity parsing functionality available.]) +AC_DEFINE([XML_DEV_URANDOM], 1, + [Define to include code reading entropy from `/dev/urandom'.]) AC_ARG_ENABLE([xml-context], AS_HELP_STRING([--enable-xml-context @<:@COUNT@:>@], diff -Nru expat-2.2.2/debian/changelog expat-2.2.3/debian/changelog --- expat-2.2.2/debian/changelog 2017-07-16 14:23:03.000000000 +0000 +++ expat-2.2.3/debian/changelog 2017-08-02 19:54:40.000000000 +0000 @@ -1,3 +1,10 @@ +expat (2.2.3-1) unstable; urgency=medium + + * New upstream release. + * Remove dh-autoreconf build dependency. + + -- Laszlo Boszormenyi (GCS) Wed, 02 Aug 2017 19:54:40 +0000 + expat (2.2.2-2) unstable; urgency=medium * Build with libbsd on Hurd as well. diff -Nru expat-2.2.2/debian/control expat-2.2.3/debian/control --- expat-2.2.2/debian/control 2017-07-16 14:23:03.000000000 +0000 +++ expat-2.2.3/debian/control 2017-08-02 19:54:40.000000000 +0000 @@ -3,7 +3,7 @@ Priority: optional Maintainer: Laszlo Boszormenyi (GCS) Standards-Version: 4.0.0 -Build-Depends: debhelper (>= 10), docbook-to-man, dh-autoreconf, +Build-Depends: debhelper (>= 10), docbook-to-man, libbsd-dev [kfreebsd-amd64 kfreebsd-i386 hurd-i386], gcc-multilib [i386 powerpc sparc s390] Homepage: http://expat.sourceforge.net diff -Nru expat-2.2.2/doc/xmlwf.1 expat-2.2.3/doc/xmlwf.1 --- expat-2.2.2/doc/xmlwf.1 2017-07-13 20:17:23.000000000 +0000 +++ expat-2.2.3/doc/xmlwf.1 2017-08-02 17:24:48.000000000 +0000 @@ -71,7 +71,7 @@ doesn't encounter any errors, the input file is simply copied to the output directory unchanged. This implies no namespaces (turns off \*(T<\fB\-n\fR\*(T>) and -requires \*(T<\fB\-d\fR\*(T> to specify an output file. +requires \*(T<\fB\-d\fR\*(T> to specify an output directory. .TP \*(T<\fB\-d output\-dir\fR\*(T> Specifies a directory to contain transformed diff -Nru expat-2.2.2/doc/xmlwf.xml expat-2.2.3/doc/xmlwf.xml --- expat-2.2.2/doc/xmlwf.xml 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/doc/xmlwf.xml 2017-08-02 13:40:48.000000000 +0000 @@ -146,7 +146,7 @@ doesn't encounter any errors, the input file is simply copied to the output directory unchanged. This implies no namespaces (turns off ) and - requires to specify an output file. + requires to specify an output directory. diff -Nru expat-2.2.2/expat_config.h.in expat-2.2.3/expat_config.h.in --- expat-2.2.2/expat_config.h.in 2017-07-13 20:17:22.000000000 +0000 +++ expat-2.2.3/expat_config.h.in 2017-08-02 17:24:47.000000000 +0000 @@ -3,6 +3,9 @@ /* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */ #undef BYTEORDER +/* Define to 1 if you have the `arc4random' function. */ +#undef HAVE_ARC4RANDOM + /* Define to 1 if you have the `arc4random_buf' function. */ #undef HAVE_ARC4RANDOM_BUF @@ -94,6 +97,9 @@ point. */ #undef XML_CONTEXT_BYTES +/* Define to include code reading entropy from `/dev/urandom'. */ +#undef XML_DEV_URANDOM + /* Define to make parameter entity parsing functionality available. */ #undef XML_DTD diff -Nru expat-2.2.2/lib/expat.h expat-2.2.3/lib/expat.h --- expat-2.2.2/lib/expat.h 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/lib/expat.h 2017-08-02 13:40:48.000000000 +0000 @@ -1048,7 +1048,7 @@ */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 2 -#define XML_MICRO_VERSION 2 +#define XML_MICRO_VERSION 3 #ifdef __cplusplus } diff -Nru expat-2.2.2/lib/loadlibrary.c expat-2.2.3/lib/loadlibrary.c --- expat-2.2.2/lib/loadlibrary.c 1970-01-01 00:00:00.000000000 +0000 +++ expat-2.2.3/lib/loadlibrary.c 2017-08-02 13:40:48.000000000 +0000 @@ -0,0 +1,141 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 2016 - 2017, Steve Holme, . + * + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF + * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH + * THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization of the + * copyright holder. + * + ***************************************************************************/ + +#if defined(_WIN32) + +#include +#include + + +HMODULE _Expat_LoadLibrary(LPCTSTR filename); + + +#if !defined(LOAD_WITH_ALTERED_SEARCH_PATH) +#define LOAD_WITH_ALTERED_SEARCH_PATH 0x00000008 +#endif + +#if !defined(LOAD_LIBRARY_SEARCH_SYSTEM32) +#define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 +#endif + +/* We use our own typedef here since some headers might lack these */ +typedef HMODULE (APIENTRY *LOADLIBRARYEX_FN)(LPCTSTR, HANDLE, DWORD); + +/* See function definitions in winbase.h */ +#ifdef UNICODE +# ifdef _WIN32_WCE +# define LOADLIBARYEX L"LoadLibraryExW" +# else +# define LOADLIBARYEX "LoadLibraryExW" +# endif +#else +# define LOADLIBARYEX "LoadLibraryExA" +#endif + + +/* + * _Expat_LoadLibrary() + * + * This is used to dynamically load DLLs using the most secure method available + * for the version of Windows that we are running on. + * + * Parameters: + * + * filename [in] - The filename or full path of the DLL to load. If only the + * filename is passed then the DLL will be loaded from the + * Windows system directory. + * + * Returns the handle of the module on success; otherwise NULL. + */ +HMODULE _Expat_LoadLibrary(LPCTSTR filename) +{ + HMODULE hModule = NULL; + LOADLIBRARYEX_FN pLoadLibraryEx = NULL; + + /* Get a handle to kernel32 so we can access it's functions at runtime */ + HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32")); + if(!hKernel32) + return NULL; + + /* Attempt to find LoadLibraryEx() which is only available on Windows 2000 + and above */ + pLoadLibraryEx = (LOADLIBRARYEX_FN) GetProcAddress(hKernel32, LOADLIBARYEX); + + /* Detect if there's already a path in the filename and load the library if + there is. Note: Both back slashes and forward slashes have been supported + since the earlier days of DOS at an API level although they are not + supported by command prompt */ + if(_tcspbrk(filename, TEXT("\\/"))) { + /** !checksrc! disable BANNEDFUNC 1 **/ + hModule = pLoadLibraryEx ? + pLoadLibraryEx(filename, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) : + LoadLibrary(filename); + } + /* Detect if KB2533623 is installed, as LOAD_LIBARY_SEARCH_SYSTEM32 is only + supported on Windows Vista, Windows Server 2008, Windows 7 and Windows + Server 2008 R2 with this patch or natively on Windows 8 and above */ + else if(pLoadLibraryEx && GetProcAddress(hKernel32, "AddDllDirectory")) { + /* Load the DLL from the Windows system directory */ + hModule = pLoadLibraryEx(filename, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + } + else { + /* Attempt to get the Windows system path */ + UINT systemdirlen = GetSystemDirectory(NULL, 0); + if(systemdirlen) { + /* Allocate space for the full DLL path (Room for the null terminator + is included in systemdirlen) */ + size_t filenamelen = _tcslen(filename); + TCHAR *path = malloc(sizeof(TCHAR) * (systemdirlen + 1 + filenamelen)); + if(path && GetSystemDirectory(path, systemdirlen)) { + /* Calculate the full DLL path */ + _tcscpy(path + _tcslen(path), TEXT("\\")); + _tcscpy(path + _tcslen(path), filename); + + /* Load the DLL from the Windows system directory */ + /** !checksrc! disable BANNEDFUNC 1 **/ + hModule = pLoadLibraryEx ? + pLoadLibraryEx(path, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) : + LoadLibrary(path); + + } + free(path); + } + } + + return hModule; +} + +#else /* defined(_WIN32) */ + +/* ISO C requires a translation unit to contain at least one declaration + [-Wempty-translation-unit] */ +typedef int _TRANSLATION_UNIT_LOAD_LIBRARY_C_NOT_EMTPY; + +#endif /* defined(_WIN32) */ diff -Nru expat-2.2.2/lib/siphash.h expat-2.2.3/lib/siphash.h --- expat-2.2.2/lib/siphash.h 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/lib/siphash.h 2017-08-02 13:40:48.000000000 +0000 @@ -11,6 +11,9 @@ * -------------------------------------------------------------------------- * HISTORY: * + * 2017-07-25 (Vadim Zeitlin) + * - Fix use of SIPHASH_MAIN macro + * * 2017-07-05 (Sebastian Pipping) * - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++ * - Add const qualifiers at two places @@ -350,7 +353,7 @@ } /* sip24_valid() */ -#if SIPHASH_MAIN +#ifdef SIPHASH_MAIN #include diff -Nru expat-2.2.2/lib/xmlparse.c expat-2.2.3/lib/xmlparse.c --- expat-2.2.2/lib/xmlparse.c 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/lib/xmlparse.c 2017-08-02 13:40:48.000000000 +0000 @@ -1,7 +1,7 @@ /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd See the file COPYING for copying permission. - cd4063469a95eab9a93001afb109e3dee122cdda4635bbec36257fc01c327348 (2.2.2+) + 101bfd65d1ff3d1511cf6671e6aae65f82cd97df6f4da137d46d510731830ad9 (2.2.3+) */ #if !defined(_GNU_SOURCE) @@ -21,6 +21,8 @@ #include /* gettimeofday() */ #include /* getpid() */ #include /* getpid() */ +#include /* O_RDONLY */ +#include #endif #define XML_BUILDING_EXPAT 1 @@ -36,22 +38,30 @@ #include "siphash.h" #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) -# include # if defined(HAVE_GETRANDOM) # include /* getrandom */ # else # include /* syscall */ # include /* SYS_getrandom */ # endif +# if ! defined(GRND_NONBLOCK) +# define GRND_NONBLOCK 0x0001 +# endif /* defined(GRND_NONBLOCK) */ #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ -#if defined(HAVE_ARC4RANDOM_BUF) && defined(HAVE_LIBBSD) +#if defined(HAVE_LIBBSD) \ + && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) # include #endif +#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32) +# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 +#endif #if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \ - && !defined(HAVE_ARC4RANDOM_BUF) && !defined(_WIN32) \ + && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \ + && !defined(XML_DEV_URANDOM) \ + && !defined(_WIN32) \ && !defined(XML_POOR_ENTROPY) # error \ You do not have support for any sources of high quality entropy \ @@ -60,8 +70,11 @@ Your options include: \ * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ - * BSD / macOS (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ + * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ + * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \ * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ + * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ + * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \ * Windows (RtlGenRandom): _WIN32. \ \ If insist on not using any of these, bypass this error by defining \ @@ -744,10 +757,10 @@ /* Obtain entropy on Linux 3.17+ */ static int -writeRandomBytes_getrandom(void * target, size_t count) { +writeRandomBytes_getrandom_nonblock(void * target, size_t count) { int success = 0; /* full count bytes written? */ size_t bytesWrittenTotal = 0; - const unsigned int getrandomFlags = 0; + const unsigned int getrandomFlags = GRND_NONBLOCK; do { void * const currentTarget = (void*)((char*)target + bytesWrittenTotal); @@ -765,7 +778,7 @@ if (bytesWrittenTotal >= count) success = 1; } - } while (! success && (errno == EINTR || errno == EAGAIN)); + } while (! success && (errno == EINTR)); return success; } @@ -773,12 +786,67 @@ #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ +#if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + +/* Extract entropy from /dev/urandom */ +static int +writeRandomBytes_dev_urandom(void * target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + + const int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + return 0; + } + + do { + void * const currentTarget = (void*)((char*)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; + } + } while (! success && (errno == EINTR)); + + close(fd); + return success; +} + +#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ + + +#if defined(HAVE_ARC4RANDOM) + +static void +writeRandomBytes_arc4random(void * target, size_t count) { + size_t bytesWrittenTotal = 0; + + while (bytesWrittenTotal < count) { + const uint32_t random32 = arc4random(); + size_t i = 0; + + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); + i++, bytesWrittenTotal++) { + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); + ((uint8_t *)target)[bytesWrittenTotal] = random8; + } + } +} + +#endif /* defined(HAVE_ARC4RANDOM) */ + + #ifdef _WIN32 typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG); +HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */ /* Obtain entropy on Windows XP / Windows Server 2003 and later. - * Hint on RtlGenRandom and the following article from libsodioum. + * Hint on RtlGenRandom and the following article from libsodium. * * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/ @@ -786,7 +854,7 @@ static int writeRandomBytes_RtlGenRandom(void * target, size_t count) { int success = 0; /* full count bytes written? */ - const HMODULE advapi32 = LoadLibrary(TEXT("ADVAPI32.DLL")); + const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL")); if (advapi32) { const RTLGENRANDOM_FUNC RtlGenRandom @@ -805,6 +873,8 @@ #endif /* _WIN32 */ +#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) + static unsigned long gather_time_entropy(void) { @@ -829,6 +899,9 @@ #endif } +#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ + + static unsigned long ENTROPY_DEBUG(const char * label, unsigned long entropy) { const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG"); @@ -846,10 +919,12 @@ { unsigned long entropy; (void)parser; -#if defined(HAVE_ARC4RANDOM_BUF) || defined(__CloudABI__) - (void)gather_time_entropy; +#if defined(HAVE_ARC4RANDOM_BUF) arc4random_buf(&entropy, sizeof(entropy)); return ENTROPY_DEBUG("arc4random_buf", entropy); +#elif defined(HAVE_ARC4RANDOM) + writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random", entropy); #else /* Try high quality providers first .. */ #ifdef _WIN32 @@ -857,10 +932,15 @@ return ENTROPY_DEBUG("RtlGenRandom", entropy); } #elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) - if (writeRandomBytes_getrandom((void *)&entropy, sizeof(entropy))) { + if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { return ENTROPY_DEBUG("getrandom", entropy); } #endif +#if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("/dev/urandom", entropy); + } +#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ /* .. and self-made low quality for backup: */ /* Process ID is 0 bits entropy if attacker has local access */ @@ -1833,9 +1913,22 @@ if (errorCode == XML_ERROR_NONE) { switch (ps_parsing) { case XML_SUSPENDED: + /* It is hard to be certain, but it seems that this case + * cannot occur. This code is cleaning up a previous parse + * with no new data (since len == 0). Changing the parsing + * state requires getting to execute a handler function, and + * there doesn't seem to be an opportunity for that while in + * this circumstance. + * + * Given the uncertainty, we retain the code but exclude it + * from coverage tests. + * + * LCOV_EXCL_START + */ XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); positionPtr = bufferPtr; return XML_STATUS_SUSPENDED; + /* LCOV_EXCL_STOP */ case XML_INITIALIZED: case XML_PARSING: ps_parsing = XML_FINISHED; @@ -3024,9 +3117,17 @@ return XML_ERROR_NO_MEMORY; break; default: + /* All of the tokens produced by XmlContentTok() have their own + * explicit cases, so this default is not strictly necessary. + * However it is a useful safety net, so we retain the code and + * simply exclude it from the coverage tests. + * + * LCOV_EXCL_START + */ if (defaultHandler) reportDefault(parser, enc, s, next); break; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; switch (ps_parsing) { @@ -3117,13 +3218,17 @@ #endif attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); - if (temp == NULL) + if (temp == NULL) { + attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; + } atts = temp; #ifdef XML_ATTR_INFO temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo)); - if (temp2 == NULL) + if (temp2 == NULL) { + attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; + } attInfo = temp2; #endif if (n > oldAttsSize) @@ -3260,6 +3365,7 @@ int j; /* hash table index */ unsigned long version = nsAttsVersion; int nsAttsSize = (int)1 << nsAttsPower; + unsigned char oldNsAttsPower = nsAttsPower; /* size of hash table must be at least 2 * (# of prefixed attributes) */ if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */ NS_ATT *temp; @@ -3269,8 +3375,11 @@ nsAttsPower = 3; nsAttsSize = (int)1 << nsAttsPower; temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT)); - if (!temp) + if (!temp) { + /* Restore actual size of memory in nsAtts */ + nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; + } nsAtts = temp; version = 0; /* force re-initialization of nsAtts hash table */ } @@ -3297,8 +3406,23 @@ ((XML_Char *)s)[-1] = 0; /* clear flag */ id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); - if (!id || !id->prefix) - return XML_ERROR_NO_MEMORY; + if (!id || !id->prefix) { + /* This code is walking through the appAtts array, dealing + * with (in this case) a prefixed attribute name. To be in + * the array, the attribute must have already been bound, so + * has to have passed through the hash table lookup once + * already. That implies that an entry for it already + * exists, so the lookup above will return a pointer to + * already allocated memory. There is no opportunaity for + * the allocator to fail, so the condition above cannot be + * fulfilled. + * + * Since it is difficult to be certain that the above + * analysis is complete, we retain the test and merely + * remove the code from coverage tests. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } b = id->prefix->binding; if (!b) return XML_ERROR_UNBOUND_PREFIX; @@ -3675,8 +3799,16 @@ } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: + /* Every token returned by XmlCdataSectionTok() has its own + * explicit case, so this default case will never be executed. + * We retain it as a safety net and exclude it from the coverage + * statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; @@ -3736,8 +3868,20 @@ eventEndPP = &eventEndPtr; } else { + /* It's not entirely clear, but it seems the following two lines + * of code cannot be executed. The only occasions on which 'enc' + * is not 'parser->m_encoding' are when this function is called + * from the internal entity processing, and IGNORE sections are an + * error in internal entities. + * + * Since it really isn't clear that this is true, we keep the code + * and just remove it from our coverage tests. + * + * LCOV_EXCL_START + */ eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } *eventPP = s; *startPtr = NULL; @@ -3770,8 +3914,16 @@ } return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ default: + /* All of the tokens that XmlIgnoreSectionTok() returns have + * explicit cases to handle them, so this default case is never + * executed. We keep it as a safety net anyway, and remove it + * from our test coverage statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } /* not reached */ } @@ -3868,7 +4020,14 @@ reportDefault(parser, encoding, s, next); if (protocolEncodingName == NULL) { if (newEncoding) { - if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { + /* Check that the specified encoding does not conflict with what + * the parser has already deduced. Do we have the same number + * of bytes in the smallest representation of a character? If + * this is UTF-16, is it the same endianness? + */ + if (newEncoding->minBytesPerChar != encoding->minBytesPerChar + || (newEncoding->minBytesPerChar == 2 && + newEncoding != encoding)) { eventPtr = encodingName; return XML_ERROR_INCORRECT_ENCODING; } @@ -4013,15 +4172,14 @@ result = processXmlDecl(parser, 0, start, next); if (result != XML_ERROR_NONE) return result; - switch (ps_parsing) { - case XML_SUSPENDED: - *nextPtr = next; - return XML_ERROR_NONE; - case XML_FINISHED: + /* At this point, ps_parsing cannot be XML_SUSPENDED. For that + * to happen, a parameter entity parsing handler must have + * attempted to suspend the parser, which fails and raises an + * error. The parser can be aborted, but can't be suspended. + */ + if (ps_parsing == XML_FINISHED) return XML_ERROR_ABORTED; - default: - *nextPtr = next; - } + *nextPtr = next; /* stop scanning for text declaration - we found one */ processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); @@ -4344,8 +4502,14 @@ &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); - if (!entity) - return XML_ERROR_NO_MEMORY; + if (!entity) { + /* The external subset name "#" will have already been + * inserted into the hash table at the start of the + * external entity parsing, so no allocation will happen + * and lookup() cannot fail. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } if (useForeignDTD) entity->base = curBase; dtd->paramEntityRead = XML_FALSE; @@ -4824,8 +4988,10 @@ if (prologState.level >= groupSize) { if (groupSize) { char *temp = (char *)REALLOC(groupConnector, groupSize *= 2); - if (temp == NULL) + if (temp == NULL) { + groupSize /= 2; return XML_ERROR_NO_MEMORY; + } groupConnector = temp; if (dtd->scaffIndex) { int *temp = (int *)REALLOC(dtd->scaffIndex, @@ -4837,8 +5003,10 @@ } else { groupConnector = (char *)MALLOC(groupSize = 32); - if (!groupConnector) + if (!groupConnector) { + groupSize = 0; return XML_ERROR_NO_MEMORY; + } } } groupConnector[prologState.level] = 0; @@ -4901,8 +5069,29 @@ : !dtd->hasParamEntityRefs)) { if (!entity) return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; + else if (!entity->is_internal) { + /* It's hard to exhaustively search the code to be sure, + * but there doesn't seem to be a way of executing the + * following line. There are two cases: + * + * If 'standalone' is false, the DTD must have no + * parameter entities or we wouldn't have passed the outer + * 'if' statement. That measn the only entity in the hash + * table is the external subset name "#" which cannot be + * given as a parameter entity name in XML syntax, so the + * lookup must have returned NULL and we don't even reach + * the test for an internal entity. + * + * If 'standalone' is true, it does not seem to be + * possible to create entities taking this code path that + * are not internal entities, so fail the test above. + * + * Because this analysis is very uncertain, the code is + * being left in place and merely removed from the + * coverage test statistics. + */ + return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ + } } else if (!entity) { dtd->keepProcessing = dtd->standalone; @@ -5374,11 +5563,15 @@ && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ for (i = 0; i < n; i++) { if (!poolAppendChar(pool, buf[i])) return XML_ERROR_NO_MEMORY; @@ -5452,8 +5645,26 @@ break; } if (entity->open) { - if (enc == encoding) - eventPtr = ptr; + if (enc == encoding) { + /* It does not appear that this line can be executed. + * + * The "if (entity->open)" check catches recursive entity + * definitions. In order to be called with an open + * entity, it must have gone through this code before and + * been through the recursive call to + * appendAttributeValue() some lines below. That call + * sets the local encoding ("enc") to the parser's + * internal encoding (internal_utf8 or internal_utf16), + * which can never be the same as the principle encoding. + * It doesn't appear there is another code path that gets + * here with entity->open being TRUE. + * + * Since it is not certain that this logic is watertight, + * we keep the line and merely exclude it from coverage + * tests. + */ + eventPtr = ptr; /* LCOV_EXCL_LINE */ + } return XML_ERROR_RECURSIVE_ENTITY_REF; } if (entity->notation) { @@ -5480,9 +5691,21 @@ } break; default: + /* The only token returned by XmlAttributeValueTok() that does + * not have an explicit case here is XML_TOK_PARTIAL_CHAR. + * Getting that would require an entity name to contain an + * incomplete XML character (e.g. \xE2\x82); however previous + * tokenisers will have already recognised and rejected such + * names before XmlAttributeValueTok() gets a look-in. This + * default case should be retained as a safety net, but the code + * excluded from coverage tests. + * + * LCOV_EXCL_START + */ if (enc == encoding) eventPtr = ptr; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } ptr = next; } @@ -5615,12 +5838,15 @@ goto endEntityValue; } n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = entityTextPtr; - result = XML_ERROR_BAD_CHAR_REF; - goto endEntityValue; - } + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ for (i = 0; i < n; i++) { if (pool->end == pool->ptr && !poolGrow(pool)) { result = XML_ERROR_NO_MEMORY; @@ -5641,10 +5867,18 @@ result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; default: + /* This default case should be unnecessary -- all the tokens + * that XmlEntityValueTok() can return have their own explicit + * cases -- but should be retained for safety. We do however + * exclude it from the coverage statistics. + * + * LCOV_EXCL_START + */ if (enc == encoding) eventPtr = entityTextPtr; result = XML_ERROR_UNEXPECTED_STATE; goto endEntityValue; + /* LCOV_EXCL_STOP */ } entityTextPtr = next; } @@ -5742,8 +5976,25 @@ eventEndPP = &eventEndPtr; } else { + /* To get here, two things must be true; the parser must be + * using a character encoding that is not the same as the + * encoding passed in, and the encoding passed in must need + * conversion to the internal format (UTF-8 unless XML_UNICODE + * is defined). The only occasions on which the encoding passed + * in is not the same as the parser's encoding are when it is + * the internal encoding (e.g. a previously defined parameter + * entity, already converted to internal format). This by + * definition doesn't need conversion, so the whole branch never + * gets executed. + * + * For safety's sake we don't delete these lines and merely + * exclude them from coverage statistics. + * + * LCOV_EXCL_START + */ eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } do { ICHAR *dataPtr = (ICHAR *)dataBuf; @@ -5912,9 +6163,30 @@ len = dtd->defaultPrefix.binding->uriLen; if (namespaceSeparator) len--; - for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) - return NULL; + for (i = 0; i < len; i++) { + if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) { + /* Because of memory caching, I don't believe this line can be + * executed. + * + * This is part of a loop copying the default prefix binding + * URI into the parser's temporary string pool. Previously, + * that URI was copied into the same string pool, with a + * terminating NUL character, as part of setContext(). When + * the pool was cleared, that leaves a block definitely big + * enough to hold the URI on the free block list of the pool. + * The URI copy in getContext() therefore cannot run out of + * memory. + * + * If the pool is used between the setContext() and + * getContext() calls, the worst it can do is leave a bigger + * block on the front of the free list. Given that this is + * all somewhat inobvious and program logic can be changed, we + * don't delete the line but we do exclude it from the test + * coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } + } needSep = XML_TRUE; } @@ -5926,8 +6198,15 @@ PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); if (!prefix) break; - if (!prefix->binding) - continue; + if (!prefix->binding) { + /* This test appears to be (justifiable) paranoia. There does + * not seem to be a way of injecting a prefix without a binding + * that doesn't get errored long before this function is called. + * The test should remain for safety's sake, so we instead + * exclude the following line from the coverage statistics. + */ + continue; /* LCOV_EXCL_LINE */ + } if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) return NULL; for (s = prefix->name; *s; s++) @@ -6598,8 +6877,20 @@ static const XML_Char * poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { - if (!pool->ptr && !poolGrow(pool)) - return NULL; + if (!pool->ptr && !poolGrow(pool)) { + /* The following line is unreachable given the current usage of + * poolCopyStringN(). Currently it is called from exactly one + * place to copy the text of a simple general entity. By that + * point, the name of the entity is already stored in the pool, so + * pool->ptr cannot be NULL. + * + * If poolCopyStringN() is used elsewhere as it well might be, + * this line may well become executable again. Regardless, this + * sort of check shouldn't be removed lightly, so we just exclude + * it from the coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } for (; n > 0; --n, s++) { if (!poolAppendChar(pool, *s)) return NULL; @@ -6692,8 +6983,19 @@ int blockSize = (int)((unsigned)(pool->end - pool->start)*2U); size_t bytesToAllocate; - if (blockSize < 0) - return XML_FALSE; + // NOTE: Needs to be calculated prior to calling `realloc` + // to avoid dangling pointers: + const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX/2 bytes have already been allocated. This isn't + * readily testable, since it is unlikely that an average + * machine will have that much memory, so we exclude it from the + * coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } bytesToAllocate = poolBytesToAllocateFor(blockSize); if (bytesToAllocate == 0) @@ -6705,7 +7007,7 @@ return XML_FALSE; pool->blocks = temp; pool->blocks->size = blockSize; - pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->ptr = pool->blocks->s + offsetInsideBlock; pool->start = pool->blocks->s; pool->end = pool->start + blockSize; } @@ -6714,8 +7016,18 @@ int blockSize = (int)(pool->end - pool->start); size_t bytesToAllocate; - if (blockSize < 0) - return XML_FALSE; + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX bytes have already been allocated (which is prevented + * by various pieces of program logic, not least this one, never + * mind the unlikelihood of actually having that much memory) or + * the pool control fields have been corrupted (which could + * conceivably happen in an extremely buggy user handler + * function). Either way it isn't readily testable, so we + * exclude it from the coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; diff -Nru expat-2.2.2/lib/xmlrole.c expat-2.2.3/lib/xmlrole.c --- expat-2.2.2/lib/xmlrole.c 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/lib/xmlrole.c 2017-08-02 13:40:48.000000000 +0000 @@ -170,7 +170,14 @@ case XML_TOK_COMMENT: return XML_ROLE_COMMENT; case XML_TOK_BOM: - return XML_ROLE_NONE; + /* This case can never arise. To reach this role function, the + * parse must have passed through prolog0 and therefore have had + * some form of input, even if only a space. At that point, a + * byte order mark is no longer a valid character (though + * technically it should be interpreted as a non-breaking space), + * so will be rejected by the tokenizing stages. + */ + return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), @@ -1285,6 +1292,26 @@ return common(state, tok); } +/* This function will only be invoked if the internal logic of the + * parser has broken down. It is used in two cases: + * + * 1: When the XML prolog has been finished. At this point the + * processor (the parser level above these role handlers) should + * switch from prologProcessor to contentProcessor and reinitialise + * the handler function. + * + * 2: When an error has been detected (via common() below). At this + * point again the processor should be switched to errorProcessor, + * which will never call a handler. + * + * The result of this is that error() can only be called if the + * processor switch failed to happen, which is an internal error and + * therefore we shouldn't be able to provoke it simply by using the + * library. It is a necessary backstop, however, so we merely exclude + * it from the coverage statistics. + * + * LCOV_EXCL_START + */ static int PTRCALL error(PROLOG_STATE *UNUSED_P(state), int UNUSED_P(tok), @@ -1294,6 +1321,7 @@ { return XML_ROLE_NONE; } +/* LCOV_EXCL_STOP */ static int FASTCALL common(PROLOG_STATE *state, int tok) diff -Nru expat-2.2.2/lib/xmltok.c expat-2.2.3/lib/xmltok.c --- expat-2.2.2/lib/xmltok.c 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/lib/xmltok.c 2017-08-02 13:40:48.000000000 +0000 @@ -1019,7 +1019,11 @@ if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; if (ASCII_a <= c2 && c2 <= ASCII_z) - c2 += ASCII_A - ASCII_a; + /* The following line will never get executed. streqci() is + * only called from two places, both of which guarantee to put + * upper-case strings into s2. + */ + c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ if (c1 != c2) return 0; if (!c1) @@ -1291,7 +1295,7 @@ }; if (c < 0) - return 0; + return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ if (c < min2) { buf[0] = (char)(c | UTF8_cval1); return 1; @@ -1314,7 +1318,7 @@ buf[3] = (char)((c & 0x3f) | 0x80); return 4; } - return 0; + return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ } int FASTCALL @@ -1465,6 +1469,9 @@ else if (c < 0) { if (c < -4) return 0; + /* Multi-byte sequences need a converter function */ + if (!convert) + return 0; e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->utf8[i][0] = 0; e->utf16[i] = 0; diff -Nru expat-2.2.2/lib/xmltok_impl.c expat-2.2.3/lib/xmltok_impl.c --- expat-2.2.2/lib/xmltok_impl.c 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/lib/xmltok_impl.c 2017-08-02 13:40:48.000000000 +0000 @@ -1198,8 +1198,14 @@ const char *start; if (ptr >= end) return XML_TOK_NONE; - else if (! HAS_CHAR(enc, ptr, end)) - return XML_TOK_PARTIAL; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { @@ -1258,8 +1264,14 @@ const char *start; if (ptr >= end) return XML_TOK_NONE; - else if (! HAS_CHAR(enc, ptr, end)) - return XML_TOK_PARTIAL; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { @@ -1614,6 +1626,14 @@ return 0; } +/* This function does not appear to be called from anywhere within the + * library code. It is used via the macro XmlSameName(), which is + * defined but never used. Since it appears in the encoding function + * table, removing it is not a thing to be undertaken lightly. For + * the moment, we simply exclude it from coverage tests. + * + * LCOV_EXCL_START + */ static int PTRCALL PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { @@ -1677,14 +1697,21 @@ } /* not reached */ } +/* LCOV_EXCL_STOP */ static int PTRCALL PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (end1 - ptr1 < MINBPC(enc)) - return 0; + if (end1 - ptr1 < MINBPC(enc)) { + /* This line cannot be executed. THe incoming data has already + * been tokenized once, so imcomplete characters like this have + * already been eliminated from the input. Retaining the + * paranoia check is still valuable, however. + */ + return 0; /* LCOV_EXCL_LINE */ + } if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } diff -Nru expat-2.2.2/Makefile.in expat-2.2.3/Makefile.in --- expat-2.2.2/Makefile.in 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/Makefile.in 2017-08-02 13:40:48.000000000 +0000 @@ -128,7 +128,7 @@ LINK_EXE = $(LIBTOOL) $(LTFLAGS) --mode=link $(COMPILE) $(LDFLAGS) -o $@ LINK_CXX_EXE = $(LIBTOOL) $(LTFLAGS) --mode=link $(CXXCOMPILE) $(LDFLAGS) -o $@ -LIB_OBJS = lib/xmlparse.lo lib/xmltok.lo lib/xmlrole.lo +LIB_OBJS = lib/loadlibrary.lo lib/xmlparse.lo lib/xmltok.lo lib/xmlrole.lo $(LIBRARY): $(LIB_OBJS) $(LINK_LIB) $(LIB_OBJS) @@ -138,6 +138,8 @@ lib/xmlparse.lo: lib/xmlparse.c lib/expat.h lib/siphash.h lib/xmlrole.h lib/xmltok.h \ $(top_builddir)/expat_config.h lib/expat_external.h lib/internal.h +lib/loadlibrary.lo: lib/loadlibrary.c + lib/xmlrole.lo: lib/xmlrole.c lib/ascii.h lib/xmlrole.h \ $(top_builddir)/expat_config.h lib/expat_external.h lib/internal.h diff -Nru expat-2.2.2/MANIFEST expat-2.2.3/MANIFEST --- expat-2.2.2/MANIFEST 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/MANIFEST 2017-08-02 13:40:48.000000000 +0000 @@ -12,7 +12,7 @@ ConfigureChecks.cmake MANIFEST Makefile.in -README +README.md configure configure.ac expat_config.h.in @@ -45,6 +45,7 @@ lib/latin1tab.h lib/libexpat.def lib/libexpatw.def +lib/loadlibrary.c lib/nametab.h lib/siphash.h lib/utf8tab.h diff -Nru expat-2.2.2/README expat-2.2.3/README --- expat-2.2.2/README 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,139 +0,0 @@ - - Expat, Release 2.2.2 - -This is Expat, a C library for parsing XML, written by James Clark. -Expat is a stream-oriented XML parser. This means that you register -handlers with the parser before starting the parse. These handlers -are called when the parser discovers the associated structures in the -document being parsed. A start tag is an example of the kind of -structures for which you may register handlers. - -Windows users should use the expat_win32bin package, which includes -both precompiled libraries and executables, and source code for -developers. - -Expat is free software. You may copy, distribute, and modify it under -the terms of the License contained in the file COPYING distributed -with this package. This license is the same as the MIT/X Consortium -license. - -Versions of Expat that have an odd minor version (the middle number in -the release above), are development releases and should be considered -as beta software. Releases with even minor version numbers are -intended to be production grade software. - -If you are building Expat from a check-out from the CVS repository, -you need to run a script that generates the configure script using the -GNU autoconf and libtool tools. To do this, you need to have -autoconf 2.58 or newer. Run the script like this: - - ./buildconf.sh - -Once this has been done, follow the same instructions as for building -from a source distribution. - -To build Expat from a source distribution, you first run the -configuration shell script in the top level distribution directory: - - ./configure - -There are many options which you may provide to configure (which you -can discover by running configure with the --help option). But the -one of most interest is the one that sets the installation directory. -By default, the configure script will set things up to install -libexpat into /usr/local/lib, expat.h into /usr/local/include, and -xmlwf into /usr/local/bin. If, for example, you'd prefer to install -into /home/me/mystuff/lib, /home/me/mystuff/include, and -/home/me/mystuff/bin, you can tell configure about that with: - - ./configure --prefix=/home/me/mystuff - -Another interesting option is to enable 64-bit integer support for -line and column numbers and the over-all byte index: - - ./configure CPPFLAGS=-DXML_LARGE_SIZE - -However, such a modification would be a breaking change to the ABI -and is therefore not recommended for general use - e.g. as part of -a Linux distribution - but rather for builds with special requirements. - -After running the configure script, the "make" command will build -things and "make install" will install things into their proper -location. Have a look at the "Makefile" to learn about additional -"make" options. Note that you need to have write permission into -the directories into which things will be installed. - -If you are interested in building Expat to provide document -information in UTF-16 encoding rather than the default UTF-8, follow -these instructions (after having run "make distclean"): - - 1. For UTF-16 output as unsigned short (and version/error - strings as char), run: - - ./configure CPPFLAGS=-DXML_UNICODE - - For UTF-16 output as wchar_t (incl. version/error strings), - run: - - ./configure CFLAGS="-g -O2 -fshort-wchar" \ - CPPFLAGS=-DXML_UNICODE_WCHAR_T - - 2. Edit the MakeFile, changing: - - LIBRARY = libexpat.la - - to: - - LIBRARY = libexpatw.la - - (Note the additional "w" in the library name.) - - 3. Run "make buildlib" (which builds the library only). - Or, to save step 2, run "make buildlib LIBRARY=libexpatw.la". - - 4. Run "make installlib" (which installs the library only). - Or, if step 2 was omitted, run "make installlib LIBRARY=libexpatw.la". - -Using DESTDIR or INSTALL_ROOT is enabled, with INSTALL_ROOT being the default -value for DESTDIR, and the rest of the make file using only DESTDIR. -It works as follows: - $ make install DESTDIR=/path/to/image -overrides the in-makefile set DESTDIR, while both - $ INSTALL_ROOT=/path/to/image make install - $ make install INSTALL_ROOT=/path/to/image -use DESTDIR=$(INSTALL_ROOT), even if DESTDIR eventually is defined in the -environment, because variable-setting priority is -1) commandline -2) in-makefile -3) environment - -Note: This only applies to the Expat library itself, building UTF-16 versions -of xmlwf and the tests is currently not supported. - -Note for Solaris users: The "ar" command is usually located in -"/usr/ccs/bin", which is not in the default PATH. You will need to -add this to your path for the "make" command, and probably also switch -to GNU make (the "make" found in /usr/ccs/bin does not seem to work -properly -- apparently it does not understand .PHONY directives). If -you're using ksh or bash, use this command to build: - - PATH=/usr/ccs/bin:$PATH make - -When using Expat with a project using autoconf for configuration, you -can use the probing macro in conftools/expat.m4 to determine how to -include Expat. See the comments at the top of that file for more -information. - -A reference manual is available in the file doc/reference.html in this -distribution. - -The homepage for this project is http://www.libexpat.org/. There -are links there to connect you to the bug reports page. If you need -to report a bug when you don't have access to a browser, you may also -send a bug report by email to expat-bugs@mail.libexpat.org. - -Discussion related to the direction of future expat development takes -place on expat-discuss@mail.libexpat.org. Archives of this list and -other Expat-related lists may be found at: - - http://mail.libexpat.org/mailman/listinfo/ diff -Nru expat-2.2.2/README.md expat-2.2.3/README.md --- expat-2.2.2/README.md 1970-01-01 00:00:00.000000000 +0000 +++ expat-2.2.3/README.md 2017-08-02 13:40:48.000000000 +0000 @@ -0,0 +1,126 @@ +# Expat, Release 2.2.3 + +This is Expat, a C library for parsing XML, started by +[James Clark](https://en.wikipedia.org/wiki/James_Clark_(programmer)) in 1997. +Expat is a stream-oriented XML parser. This means that you register +handlers with the parser before starting the parse. These handlers +are called when the parser discovers the associated structures in the +document being parsed. A start tag is an example of the kind of +structures for which you may register handlers. + +Windows users should use the +[`expat_win32` package](https://sourceforge.net/projects/expat/files/expat_win32/), +which includes both precompiled libraries and executables, and source code for +developers. + +Expat is [free software](https://www.gnu.org/philosophy/free-sw.en.html). +You may copy, distribute, and modify it under the terms of the License +contained in the file +[`COPYING`](https://github.com/libexpat/libexpat/blob/master/expat/COPYING) +distributed with this package. +This license is the same as the MIT/X Consortium license. + +If you are building Expat from a check-out from the +[Git repository](https://github.com/libexpat/libexpat/), +you need to run a script that generates the configure script using the +GNU autoconf and libtool tools. To do this, you need to have +autoconf 2.58 or newer. Run the script like this: + +```console +./buildconf.sh +``` + +Once this has been done, follow the same instructions as for building +from a source distribution. + +To build Expat from a source distribution, you first run the +configuration shell script in the top level distribution directory: + +```console +./configure +``` + +There are many options which you may provide to configure (which you +can discover by running configure with the `--help` option). But the +one of most interest is the one that sets the installation directory. +By default, the configure script will set things up to install +libexpat into `/usr/local/lib`, `expat.h` into `/usr/local/include`, and +`xmlwf` into `/usr/local/bin`. If, for example, you'd prefer to install +into `/home/me/mystuff/lib`, `/home/me/mystuff/include`, and +`/home/me/mystuff/bin`, you can tell `configure` about that with: + +```console +./configure --prefix=/home/me/mystuff +``` + +Another interesting option is to enable 64-bit integer support for +line and column numbers and the over-all byte index: + +```console +./configure CPPFLAGS=-DXML_LARGE_SIZE +``` + +However, such a modification would be a breaking change to the ABI +and is therefore not recommended for general use — e.g. as part of +a Linux distribution — but rather for builds with special requirements. + +After running the configure script, the `make` command will build +things and `make install` will install things into their proper +location. Have a look at the `Makefile` to learn about additional +`make` options. Note that you need to have write permission into +the directories into which things will be installed. + +If you are interested in building Expat to provide document +information in UTF-16 encoding rather than the default UTF-8, follow +these instructions (after having run `make distclean`): + +1. For UTF-16 output as unsigned short (and version/error strings as char), + run:
+ `./configure CPPFLAGS=-DXML_UNICODE`
+ For UTF-16 output as `wchar_t` (incl. version/error strings), run:
+ `./configure CFLAGS="-g -O2 -fshort-wchar" CPPFLAGS=-DXML_UNICODE_WCHAR_T` +
Note: The latter requires libc compiled with `-fshort-wchar`, as well. + +1. Edit `Makefile`, changing:
+ `LIBRARY = libexpat.la`
+ to:
+ `LIBRARY = libexpatw.la`
+ (Note the additional "w" in the library name.) + +1. Run `make buildlib` (which builds the library only). + Or, to save step 2, run `make buildlib LIBRARY=libexpatw.la`. + +1. Run `make installlib` (which installs the library only). + Or, if step 2 was omitted, run `make installlib LIBRARY=libexpatw.la`. + +Using `DESTDIR` or `INSTALL_ROOT` is enabled, with `INSTALL_ROOT` being the +default value for `DESTDIR`, and the rest of the make file using only +`DESTDIR`. It works as follows: + +```console +make install DESTDIR=/path/to/image +``` + +overrides the in-makefile set `DESTDIR`, while both + +```console +INSTALL_ROOT=/path/to/image make install +make install INSTALL_ROOT=/path/to/image +``` + +use `DESTDIR=$(INSTALL_ROOT)`, even if `DESTDIR` eventually is defined in the +environment, because variable-setting priority is +1. commandline +2. in-makefile +3. environment + +Note: This only applies to the Expat library itself, building UTF-16 versions +of xmlwf and the tests is currently not supported. + +When using Expat with a project using autoconf for configuration, you +can use the probing macro in `conftools/expat.m4` to determine how to +include Expat. See the comments at the top of that file for more +information. + +A reference manual is available in the file `doc/reference.html` in this +distribution. diff -Nru expat-2.2.2/tests/chardata.c expat-2.2.3/tests/chardata.c --- expat-2.2.2/tests/chardata.c 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/tests/chardata.c 2017-08-02 13:40:48.000000000 +0000 @@ -78,7 +78,7 @@ int CharData_CheckString(CharData *storage, const char *expected) { - char buffer[1280]; + char buffer[4096]; int len; int count; diff -Nru expat-2.2.2/tests/chardata.h expat-2.2.3/tests/chardata.h --- expat-2.2.2/tests/chardata.h 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/tests/chardata.h 2017-08-02 13:40:48.000000000 +0000 @@ -18,7 +18,7 @@ typedef struct { int count; /* # of chars, < 0 if not set */ - XML_Char data[1024]; + XML_Char data[2048]; } CharData; diff -Nru expat-2.2.2/tests/minicheck.c expat-2.2.3/tests/minicheck.c --- expat-2.2.2/tests/minicheck.c 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/tests/minicheck.c 2017-08-02 13:40:48.000000000 +0000 @@ -70,6 +70,32 @@ tc->ntests++; } +static void +tcase_free(TCase *tc) +{ + if (! tc) { + return; + } + + free(tc->tests); + free(tc); +} + +static void +suite_free(Suite *suite) +{ + if (! suite) { + return; + } + + while (suite->tests != NULL) { + TCase *next = suite->tests->next_tcase; + tcase_free(suite->tests); + suite->tests = next; + } + free(suite); +} + SRunner * srunner_create(Suite *suite) { @@ -175,6 +201,10 @@ void srunner_free(SRunner *runner) { - free(runner->suite); + if (! runner) { + return; + } + + suite_free(runner->suite); free(runner); } diff -Nru expat-2.2.2/tests/runtests.c expat-2.2.3/tests/runtests.c --- expat-2.2.2/tests/runtests.c 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/tests/runtests.c 2017-08-02 13:40:48.000000000 +0000 @@ -26,6 +26,7 @@ #include "minicheck.h" #include "memcheck.h" #include "siphash.h" +#include "ascii.h" /* for ASCII_xxx */ #ifdef XML_LARGE_SIZE #define XML_FMT_INT_MOD "ll" @@ -127,6 +128,34 @@ /* Dummy handlers for when we need to set a handler to tickle a bug, but it doesn't need to do anything. */ +static unsigned long dummy_handler_flags = 0; + +#define DUMMY_START_DOCTYPE_HANDLER_FLAG (1UL << 0) +#define DUMMY_END_DOCTYPE_HANDLER_FLAG (1UL << 1) +#define DUMMY_ENTITY_DECL_HANDLER_FLAG (1UL << 2) +#define DUMMY_NOTATION_DECL_HANDLER_FLAG (1UL << 3) +#define DUMMY_ELEMENT_DECL_HANDLER_FLAG (1UL << 4) +#define DUMMY_ATTLIST_DECL_HANDLER_FLAG (1UL << 5) +#define DUMMY_COMMENT_HANDLER_FLAG (1UL << 6) +#define DUMMY_PI_HANDLER_FLAG (1UL << 7) +#define DUMMY_START_ELEMENT_HANDLER_FLAG (1UL << 8) +#define DUMMY_START_CDATA_HANDLER_FLAG (1UL << 9) +#define DUMMY_END_CDATA_HANDLER_FLAG (1UL << 10) +#define DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG (1UL << 11) +#define DUMMY_START_NS_DECL_HANDLER_FLAG (1UL << 12) +#define DUMMY_END_NS_DECL_HANDLER_FLAG (1UL << 13) +#define DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG (1UL << 14) +#define DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG (1UL << 15) +#define DUMMY_SKIP_HANDLER_FLAG (1UL << 16) +#define DUMMY_DEFAULT_HANDLER_FLAG (1UL << 17) + + +static void XMLCALL +dummy_xdecl_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(version), + const XML_Char *UNUSED_P(encoding), + int UNUSED_P(standalone)) +{} static void XMLCALL dummy_start_doctype_handler(void *UNUSED_P(userData), @@ -134,11 +163,15 @@ const XML_Char *UNUSED_P(sysid), const XML_Char *UNUSED_P(pubid), int UNUSED_P(has_internal_subset)) -{} +{ + dummy_handler_flags |= DUMMY_START_DOCTYPE_HANDLER_FLAG; +} static void XMLCALL dummy_end_doctype_handler(void *UNUSED_P(userData)) -{} +{ + dummy_handler_flags |= DUMMY_END_DOCTYPE_HANDLER_FLAG; +} static void XMLCALL dummy_entity_decl_handler(void *UNUSED_P(userData), @@ -150,7 +183,9 @@ const XML_Char *UNUSED_P(systemId), const XML_Char *UNUSED_P(publicId), const XML_Char *UNUSED_P(notationName)) -{} +{ + dummy_handler_flags |= DUMMY_ENTITY_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_notation_decl_handler(void *UNUSED_P(userData), @@ -158,13 +193,22 @@ const XML_Char *UNUSED_P(base), const XML_Char *UNUSED_P(systemId), const XML_Char *UNUSED_P(publicId)) -{} +{ + dummy_handler_flags |= DUMMY_NOTATION_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_element_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), - XML_Content *UNUSED_P(model)) -{} + XML_Content *model) +{ + /* The content model must be freed by the handler. Unfortunately + * we cannot pass the parser as the userData because this is used + * with other handlers that require other userData. + */ + XML_FreeContentModel(parser, model); + dummy_handler_flags |= DUMMY_ELEMENT_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_attlist_decl_handler(void *UNUSED_P(userData), @@ -173,39 +217,65 @@ const XML_Char *UNUSED_P(att_type), const XML_Char *UNUSED_P(dflt), int UNUSED_P(isrequired)) -{} +{ + dummy_handler_flags |= DUMMY_ATTLIST_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data)) -{} +{ + dummy_handler_flags |= DUMMY_COMMENT_HANDLER_FLAG; +} static void XMLCALL dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data)) -{} +{ + dummy_handler_flags |= DUMMY_PI_HANDLER_FLAG; +} static void XMLCALL dummy_start_element(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts)) +{ + dummy_handler_flags |= DUMMY_START_ELEMENT_HANDLER_FLAG; +} + +static void XMLCALL +dummy_end_element(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name)) {} static void XMLCALL dummy_start_cdata_handler(void *UNUSED_P(userData)) -{} +{ + dummy_handler_flags |= DUMMY_START_CDATA_HANDLER_FLAG; +} static void XMLCALL dummy_end_cdata_handler(void *UNUSED_P(userData)) +{ + dummy_handler_flags |= DUMMY_END_CDATA_HANDLER_FLAG; +} + +static void XMLCALL +dummy_cdata_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(s), + int UNUSED_P(len)) {} static void XMLCALL dummy_start_namespace_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(prefix), const XML_Char *UNUSED_P(uri)) -{} +{ + dummy_handler_flags |= DUMMY_START_NS_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_end_namespace_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(prefix)) -{} +{ + dummy_handler_flags |= DUMMY_END_NS_DECL_HANDLER_FLAG; +} /* This handler is obsolete, but while the code exists we should * ensure that dealing with the handler is covered by tests. @@ -217,8 +287,115 @@ const XML_Char *UNUSED_P(systemId), const XML_Char *UNUSED_P(publicId), const XML_Char *UNUSED_P(notationName)) +{ + dummy_handler_flags |= DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG; +} + +static void XMLCALL +dummy_default_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(s), + int UNUSED_P(len)) {} +static void XMLCALL +dummy_start_doctype_decl_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(doctypeName), + const XML_Char *UNUSED_P(sysid), + const XML_Char *UNUSED_P(pubid), + int UNUSED_P(has_internal_subset)) +{ + dummy_handler_flags |= DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG; +} + +static void XMLCALL +dummy_end_doctype_decl_handler(void *UNUSED_P(userData)) +{ + dummy_handler_flags |= DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG; +} + +static void XMLCALL +dummy_skip_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(entityName), + int UNUSED_P(is_parameter_entity)) +{ + dummy_handler_flags |= DUMMY_SKIP_HANDLER_FLAG; +} + +/* Useful external entity handler */ +typedef struct ExtOption { + const char *system_id; + const char *parse_text; +} ExtOption; + +static int XMLCALL +external_entity_optioner(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + ExtOption *options = (ExtOption *)XML_GetUserData(parser); + XML_Parser ext_parser; + + while (options->parse_text != NULL) { + if (!strcmp(systemId, options->system_id)) { + enum XML_Status rc; + ext_parser = + XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + return XML_STATUS_ERROR; + rc = _XML_Parse_SINGLE_BYTES(ext_parser, options->parse_text, + strlen(options->parse_text), + XML_TRUE); + XML_ParserFree(ext_parser); + return rc; + } + options++; + } + fail("No suitable option found"); + return XML_STATUS_ERROR; +} + +/* + * Parameter entity evaluation support. + */ +#define ENTITY_MATCH_FAIL (-1) +#define ENTITY_MATCH_NOT_FOUND (0) +#define ENTITY_MATCH_SUCCESS (1) +static const XML_Char *entity_name_to_match = NULL; +static const XML_Char *entity_value_to_match = NULL; +static int entity_match_flag = ENTITY_MATCH_NOT_FOUND; + +static void XMLCALL +param_entity_match_handler(void *UNUSED_P(userData), + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId), + const XML_Char *UNUSED_P(notationName)) +{ + if (!is_parameter_entity || + entity_name_to_match == NULL || + entity_value_to_match == NULL) { + return; + } + if (!strcmp(entityName, entity_name_to_match)) { + /* The cast here is safe because we control the horizontal and + * the vertical, and we therefore know our strings are never + * going to overflow an int. + */ + if (value_length != (int)strlen(entity_value_to_match) || + strncmp(value, entity_value_to_match, value_length)) { + entity_match_flag = ENTITY_MATCH_FAIL; + } else { + entity_match_flag = ENTITY_MATCH_SUCCESS; + } + } + /* Else leave the match flag alone */ +} /* * Character & encoding tests. @@ -313,6 +490,16 @@ } END_TEST +/* Parse whole buffer at once to exercise a different code path */ +START_TEST(test_nobom_utf16_le) +{ + char text[] = " \0<\0e\0/\0>\0"; + + if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + static void XMLCALL accumulate_characters(void *userData, const XML_Char *s, int len) { @@ -324,9 +511,15 @@ const XML_Char **atts) { CharData *storage = (CharData *)userData; - if (storage->count < 0 && atts != NULL && atts[0] != NULL) { + + /* Check there are attributes to deal with */ + if (atts == NULL) + return; + + while (storage->count < 0 && atts[0] != NULL) { /* "accumulate" the value of the first attribute we see */ CharData_AppendXMLChars(storage, atts[1], -1); + atts += 2; } } @@ -365,6 +558,40 @@ #define run_attribute_check(text, expected) \ _run_attribute_check(text, expected, __FILE__, __LINE__) +typedef struct ExtTest { + const char *parse_text; + const char *encoding; + CharData *storage; +} ExtTest; + +static void XMLCALL +ext_accumulate_characters(void *userData, const XML_Char *s, int len) +{ + ExtTest *test_data = (ExtTest *)userData; + accumulate_characters(test_data->storage, s, len); +} + +static void +_run_ext_character_check(const XML_Char *text, + ExtTest *test_data, + const XML_Char *expected, + const char *file, int line) +{ + CharData storage; + + CharData_Init(&storage); + test_data->storage = &storage; + XML_SetUserData(parser, test_data); + XML_SetCharacterDataHandler(parser, ext_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + _xml_failure(parser, file, line); + CharData_CheckXMLChars(&storage, expected); +} + +#define run_ext_character_check(text, test_data, expected) \ + _run_ext_character_check(text, test_data, expected, __FILE__, __LINE__) + /* Regression test for SF bug #491986. */ START_TEST(test_danish_latin1) { @@ -522,18 +749,27 @@ START_TEST(test_utf16) { /* - some text - */ + * some {A} text + * + * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A + */ char text[] = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" "\000'\000?\000>\000\n" - "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'" - "\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/" - "\000d\000o\000c\000>"; + "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" + "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" + "<\000/\000d\000o\000c\000>"; + char expected[] = "some \357\274\241 text"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); } END_TEST @@ -560,6 +796,34 @@ } END_TEST +/* Test that an outright lie in the encoding is faulted */ +START_TEST(test_not_utf16) +{ + const char *text = + "" + "Hi"; + + /* Use a handler to provoke the appropriate code paths */ + XML_SetXmlDeclHandler(parser, dummy_xdecl_handler); + expect_failure(text, + XML_ERROR_INCORRECT_ENCODING, + "UTF-16 declared in UTF-8 not faulted"); +} +END_TEST + +/* Test that an unknown encoding is rejected */ +START_TEST(test_bad_encoding) +{ + const char *text = "Hi"; + + if (!XML_SetEncoding(parser, "unknown-encoding")) + fail("XML_SetEncoding failed"); + expect_failure(text, + XML_ERROR_UNKNOWN_ENCODING, + "Unknown encoding not faulted"); +} +END_TEST + /* Regression test for SF bug #481609, #774028. */ START_TEST(test_latin1_umlauts) { @@ -574,6 +838,128 @@ run_character_check(text, utf8); XML_ParserReset(parser, NULL); run_attribute_check(text, utf8); + /* Repeat with a default handler */ + XML_ParserReset(parser, NULL); + XML_SetDefaultHandler(parser, dummy_default_handler); + run_character_check(text, utf8); + XML_ParserReset(parser, NULL); + XML_SetDefaultHandler(parser, dummy_default_handler); + run_attribute_check(text, utf8); +} +END_TEST + +/* Test that an element name with a 4-byte UTF-8 character is rejected */ +START_TEST(test_long_utf8_character) +{ + const char *text = + "\n" + /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ + ""; + expect_failure(text, + XML_ERROR_INVALID_TOKEN, + "4-byte UTF-8 character in element name not faulted"); +} +END_TEST + +/* Test that a long latin-1 attribute (too long to convert in one go) + * is correctly converted + */ +START_TEST(test_long_latin1_attribute) +{ + const char *text = + "\n" + "\n"; + const char *expected = + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "\xc3\xa4"; + + run_attribute_check(text, expected); +} +END_TEST + + +/* Test that a long ASCII attribute (too long to convert in one go) + * is correctly converted + */ +START_TEST(test_long_ascii_attribute) +{ + const char *text = + "\n" + "\n"; + const char *expected = + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "01234"; + + run_attribute_check(text, expected); } END_TEST @@ -757,12 +1143,64 @@ } END_TEST +/* Test cdata processing across a buffer boundary */ +START_TEST(test_really_long_encoded_lines) +{ + /* As above, except that we want to provoke an output buffer + * overflow with a non-trivial encoding. For this we need to pass + * the whole cdata in one go, not byte-by-byte. + */ + void *buffer; + const char *text = + "" + "" + /* 64 chars */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + /* until we have at least 1024 characters on the line: */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + ""; + int parse_len = strlen(text); + + /* Need a cdata handler to provoke the code path we want to test */ + XML_SetCharacterDataHandler(parser, dummy_cdata_handler); + buffer = XML_GetBuffer(parser, parse_len); + if (buffer == NULL) + fail("Could not allocate parse buffer"); + memcpy(buffer, text, parse_len); + if (XML_ParseBuffer(parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + /* * Element event tests. */ static void XMLCALL +start_element_event_handler(void *userData, + const XML_Char *name, + const XML_Char **UNUSED_P(atts)) +{ + CharData_AppendXMLChars((CharData *)userData, name, -1); +} + +static void XMLCALL end_element_event_handler(void *userData, const XML_Char *name) { CharData *storage = (CharData *) userData; @@ -912,6 +1350,31 @@ } END_TEST +START_TEST(test_xmldecl_invalid) +{ + expect_failure("\n", + XML_ERROR_XML_DECL, + "Failed to report invalid XML declaration"); +} +END_TEST + +START_TEST(test_xmldecl_missing_attr) +{ + expect_failure("\n\n", + XML_ERROR_XML_DECL, + "Failed to report missing XML declaration attribute"); +} +END_TEST + +START_TEST(test_xmldecl_missing_value) +{ + expect_failure("\n" + "", + XML_ERROR_XML_DECL, + "Failed to report missing attribute value"); +} +END_TEST + /* Regression test for SF bug #584832. */ static int XMLCALL UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info) @@ -974,105 +1437,210 @@ /* Regression test for SF bug #620106. */ static int XMLCALL -external_entity_loader_set_encoding(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) +external_entity_loader(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) { - /* This text says it's an unsupported encoding, but it's really - UTF-8, which we tell Expat using XML_SetEncoding(). - */ - const char *text = - "" - "\xC3\xA9"; + ExtTest *test_data = (ExtTest *)XML_GetUserData(parser); XML_Parser extparser; extparser = XML_ExternalEntityParserCreate(parser, context, NULL); if (extparser == NULL) fail("Could not create external entity parser."); - if (!XML_SetEncoding(extparser, "utf-8")) - fail("XML_SetEncoding() ignored for external entity"); - if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE) + if (test_data->encoding != NULL) { + if (!XML_SetEncoding(extparser, test_data->encoding)) + fail("XML_SetEncoding() ignored for external entity"); + } + if ( _XML_Parse_SINGLE_BYTES(extparser, + test_data->parse_text, + strlen(test_data->parse_text), + XML_TRUE) == XML_STATUS_ERROR) { - xml_failure(parser); - return 0; + xml_failure(extparser); + return XML_STATUS_ERROR; } - return 1; + XML_ParserFree(extparser); + return XML_STATUS_OK; } START_TEST(test_ext_entity_set_encoding) { const char *text = "\n" + " \n" "]>\n" "&en;"; + ExtTest test_data = { + /* This text says it's an unsupported encoding, but it's really + UTF-8, which we tell Expat using XML_SetEncoding(). + */ + "\xC3\xA9", + "utf-8", + NULL + }; - XML_SetExternalEntityRefHandler(parser, - external_entity_loader_set_encoding); - run_character_check(text, "\xC3\xA9"); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + run_ext_character_check(text, &test_data, "\xC3\xA9"); } END_TEST -/* Test that no error is reported for unknown entities if we don't - read an external subset. This was fixed in Expat 1.95.5. -*/ -START_TEST(test_wfc_undeclared_entity_unread_external_subset) { +/* Test external entities with no handler */ +START_TEST(test_ext_entity_no_handler) +{ const char *text = - "\n" - "&entity;"; - - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); -} -END_TEST + "\n" + "]>\n" + "&en;"; -/* Test that an error is reported for unknown entities if we don't - have an external subset. -*/ -START_TEST(test_wfc_undeclared_entity_no_external_subset) { - expect_failure("&entity;", - XML_ERROR_UNDEFINED_ENTITY, - "Parser did not report undefined entity w/out a DTD."); + XML_SetDefaultHandler(parser, dummy_default_handler); + run_character_check(text, ""); } END_TEST -/* Test that an error is reported for unknown entities if we don't - read an external subset, but have been declared standalone. -*/ -START_TEST(test_wfc_undeclared_entity_standalone) { +/* Test UTF-8 BOM is accepted */ +START_TEST(test_ext_entity_set_bom) +{ const char *text = - "\n" - "\n" - "&entity;"; + "\n" + "]>\n" + "&en;"; + ExtTest test_data = { + "\xEF\xBB\xBF" /* BOM */ + "" + "\xC3\xA9", + "utf-8", + NULL + }; - expect_failure(text, - XML_ERROR_UNDEFINED_ENTITY, - "Parser did not report undefined entity (standalone)."); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + run_ext_character_check(text, &test_data, "\xC3\xA9"); } END_TEST + +/* Test that bad encodings are faulted */ +typedef struct ext_faults +{ + const char *parse_text; + const char *fail_text; + const char *encoding; + enum XML_Error error; +} ExtFaults; + static int XMLCALL -external_entity_loader(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) +external_entity_faulter(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) { - char *text = (char *)XML_GetUserData(parser); - XML_Parser extparser; + XML_Parser ext_parser; + ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); - extparser = XML_ExternalEntityParserCreate(parser, context, NULL); - if (extparser == NULL) - fail("Could not create external entity parser."); - if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE) - == XML_STATUS_ERROR) { + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (fault->encoding != NULL) { + if (!XML_SetEncoding(ext_parser, fault->encoding)) + fail("XML_SetEncoding failed"); + } + if (_XML_Parse_SINGLE_BYTES(ext_parser, + fault->parse_text, + strlen(fault->parse_text), + XML_TRUE) != XML_STATUS_ERROR) + fail(fault->fail_text); + if (XML_GetErrorCode(ext_parser) != fault->error) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; +} + +START_TEST(test_ext_entity_bad_encoding) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtFaults fault = { + "u", + "Unsupported encoding not faulted", + "unknown", + XML_ERROR_UNKNOWN_ENCODING + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, &fault); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Bad encoding should not have been accepted"); +} +END_TEST + +/* Try handing an invalid encoding to an external entity parser */ +START_TEST(test_ext_entity_bad_encoding_2) +{ + const char *text = + "\n" + "\n" + "&entity;"; + ExtFaults fault = { + "", + "Unknown encoding not faulted", + "unknown-encoding", + XML_ERROR_UNKNOWN_ENCODING + }; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, &fault); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Bad encoding not faulted in external entity handler"); +} +END_TEST + +/* Test that no error is reported for unknown entities if we don't + read an external subset. This was fixed in Expat 1.95.5. +*/ +START_TEST(test_wfc_undeclared_entity_unread_external_subset) { + const char *text = + "\n" + "&entity;"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); - return XML_STATUS_ERROR; - } - return XML_STATUS_OK; } +END_TEST + +/* Test that an error is reported for unknown entities if we don't + have an external subset. +*/ +START_TEST(test_wfc_undeclared_entity_no_external_subset) { + expect_failure("&entity;", + XML_ERROR_UNDEFINED_ENTITY, + "Parser did not report undefined entity w/out a DTD."); +} +END_TEST + +/* Test that an error is reported for unknown entities if we don't + read an external subset, but have been declared standalone. +*/ +START_TEST(test_wfc_undeclared_entity_standalone) { + const char *text = + "\n" + "\n" + "&entity;"; + + expect_failure(text, + XML_ERROR_UNDEFINED_ENTITY, + "Parser did not report undefined entity (standalone)."); +} +END_TEST /* Test that an error is reported for unknown entities if we have read an external subset, and standalone is true. @@ -1082,11 +1650,14 @@ "\n" "\n" "&entity;"; - char foo_text[] = - ""; + ExtTest test_data = { + "", + NULL, + NULL + }; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); + XML_SetUserData(parser, &test_data); XML_SetExternalEntityRefHandler(parser, external_entity_loader); expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, @@ -1094,6 +1665,26 @@ } END_TEST +/* Test that external entity handling is not done if the parsing flag + * is set to UNLESS_STANDALONE + */ +START_TEST(test_entity_with_external_subset_unless_standalone) { + const char *text = + "\n" + "\n" + "&entity;"; + ExtTest test_data = { "", NULL, NULL }; + + XML_SetParamEntityParsing(parser, + XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + expect_failure(text, + XML_ERROR_UNDEFINED_ENTITY, + "Parser did not report undefined entity"); +} +END_TEST + /* Test that no error is reported for unknown entities if we have read an external subset, and standalone is false. */ @@ -1102,14 +1693,15 @@ "\n" "\n" "&entity;"; - char foo_text[] = - ""; + ExtTest test_data = { + "", + NULL, + NULL + }; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); XML_SetExternalEntityRefHandler(parser, external_entity_loader); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + run_ext_character_check(text, &test_data, ""); } END_TEST @@ -1126,15 +1718,24 @@ "\n" "\n" "&entity;"; - char foo_text[] = - ""; + ExtTest test_data = { + "", + NULL, + NULL + }; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); + XML_SetUserData(parser, &test_data); XML_SetExternalEntityRefHandler(parser, external_entity_loader); XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) - fail("NotStandalone handler failed to reject"); + expect_failure(text, XML_ERROR_NOT_STANDALONE, + "NotStandalone handler failed to reject"); + + /* Try again but without external entity handling */ + XML_ParserReset(parser, NULL); + XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler); + expect_failure(text, XML_ERROR_NOT_STANDALONE, + "NotStandalone handler failed to reject"); } END_TEST @@ -1151,15 +1752,21 @@ "\n" "\n" "&entity;"; - char foo_text[] = - ""; + ExtTest test_data = { + "", + NULL, + NULL + }; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); XML_SetExternalEntityRefHandler(parser, external_entity_loader); XML_SetNotStandaloneHandler(parser, accept_not_standalone_handler); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + run_ext_character_check(text, &test_data, ""); + + /* Repeat wtihout the external entity handler */ + XML_ParserReset(parser, NULL); + XML_SetNotStandaloneHandler(parser, accept_not_standalone_handler); + run_character_check(text, ""); } END_TEST @@ -1177,13 +1784,57 @@ } END_TEST +/* Test incomplete external entities are faulted */ +START_TEST(test_ext_entity_invalid_parse) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + const ExtFaults faults[] = { + { + "<", + "Incomplete element declaration not faulted", + NULL, + XML_ERROR_UNCLOSED_TOKEN + }, + { + "<\xe2\x82", /* First two bytes of a three-byte char */ + "Incomplete character not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { + "\xe2\x82", + "Incomplete character in CDATA not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { NULL, NULL, NULL, XML_ERROR_NONE } + }; + const ExtFaults *fault = faults; + + for (; fault->parse_text != NULL; fault++) { + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, (void *)fault); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Parser did not report external entity error"); + XML_ParserReset(parser, NULL); + } +} +END_TEST + + /* Regression test for SF bug #483514. */ START_TEST(test_dtd_default_handling) { const char *text = "\n" - "\n" + "\n" + "\n" "\n" "\n" "\n" @@ -1205,6 +1856,107 @@ } END_TEST +/* Test handling of attribute declarations */ +typedef struct AttTest { + const XML_Char *definition; + const XML_Char *element_name; + const XML_Char *attr_name; + const XML_Char *attr_type; + const XML_Char *default_value; + int is_required; +} AttTest; + +static void XMLCALL +verify_attlist_decl_handler(void *userData, + const XML_Char *element_name, + const XML_Char *attr_name, + const XML_Char *attr_type, + const XML_Char *default_value, + int is_required) +{ + AttTest *at = (AttTest *)userData; + + if (strcmp(element_name, at->element_name)) + fail("Unexpected element name in attribute declaration"); + if (strcmp(attr_name, at->attr_name)) + fail("Unexpected attribute name in attribute declaration"); + if (strcmp(attr_type, at->attr_type)) + fail("Unexpected attribute type in attribute declaration"); + if ((default_value == NULL && at->default_value != NULL) || + (default_value != NULL && at->default_value == NULL) || + (default_value != NULL && strcmp(default_value, at->default_value))) + fail("Unexpected default value in attribute declaration"); + if (is_required != at->is_required) + fail("Requirement mismatch in attribute declaration"); +} + +START_TEST(test_dtd_attr_handling) +{ + const char *prolog = + "\n"; + AttTest attr_data[] = { + { + "\n" + "]>" + "", + "doc", + "a", + "(one|two|three)", /* Extraneous spaces will be removed */ + NULL, + XML_TRUE + }, + { + "\n" + "\n" + "]>" + "", + "doc", + "a", + "NOTATION(foo)", + NULL, + XML_FALSE + }, + { + "\n" + "]>" + "", + "doc", + "a", + "NOTATION(foo)", + "bar", + XML_FALSE + }, + { + "\n" + "]>" + "", + "doc", + "a", + "CDATA", + "\xdb\xb2", + XML_FALSE + }, + { NULL, NULL, NULL, NULL, NULL, XML_FALSE } + }; + AttTest *test; + + for (test = attr_data; test->definition != NULL; test++) { + XML_SetAttlistDeclHandler(parser, verify_attlist_decl_handler); + XML_SetUserData(parser, test); + if (_XML_Parse_SINGLE_BYTES(parser, prolog, strlen(prolog), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + if (_XML_Parse_SINGLE_BYTES(parser, + test->definition, + strlen(test->definition), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + XML_ParserReset(parser, NULL); + } +} +END_TEST + /* See related SF bug #673791. When namespace processing is enabled, setting the namespace URI for a prefix is not allowed; this test ensures that it *is* allowed @@ -1214,7 +1966,7 @@ START_TEST(test_empty_ns_without_namespaces) { const char *text = - "\n" + "\n" " \n" ""; @@ -1232,7 +1984,7 @@ const char *text = "\n" + " xmlns:e CDATA 'http://example.org/'>\n" " ]>\n" ""; @@ -1391,6 +2143,20 @@ CharData_Init(&storage); XML_SetUserData(parser, &storage); XML_SetCharacterDataHandler(parser, accumulate_characters); + /* Add start and end handlers for coverage */ + XML_SetStartCdataSectionHandler(parser, dummy_start_cdata_handler); + XML_SetEndCdataSectionHandler(parser, dummy_end_cdata_handler); + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); + + /* Try again, this time with a default handler */ + XML_ParserReset(parser, NULL); + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + XML_SetDefaultHandler(parser, dummy_default_handler); if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); @@ -1423,6 +2189,168 @@ } END_TEST +START_TEST(test_good_cdata_utf16_le) +{ + /* Test data is: + * + * + */ + const char text[] = + "<\0?\0x\0m\0l\0" + " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" + "\0?\0>\0\n" + "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; + const char *expected = "hello"; + + CharData storage; + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test UTF16 conversion of a long cdata string */ + +/* 16 characters: handy macro to reduce visual clutter */ +#define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" + +START_TEST(test_long_cdata_utf16) +{ + /* Test data is: + * + * + */ + const char text[] = + "\0<\0?\0x\0m\0l\0 " + "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " + "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" + "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" + /* 64 characters per line */ + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 + "\0]\0]\0>\0<\0/\0a\0>"; + const char *expected = + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOP"; + CharData storage; + void *buffer; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + buffer = XML_GetBuffer(parser, sizeof(text) - 1); + if (buffer == NULL) + fail("Could not allocate parse buffer"); + memcpy(buffer, text, sizeof(text) - 1); + if (XML_ParseBuffer(parser, + sizeof(text) - 1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test handling of multiple unit UTF-16 characters */ +START_TEST(test_multichar_cdata_utf16) +{ + /* Test data is: + * + * + * + * where {MINIM} is U+1d15e (a minim or half-note) + * UTF-16: 0xd834 0xdd5e + * UTF-8: 0xf0 0x9d 0x85 0x9e + * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) + * UTF-16: 0xd834 0xdd5e + * UTF-8: 0xf0 0x9d 0x85 0x9e + */ + const char text[] = + "\0<\0?\0x\0m\0l\0" + " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" + "\0?\0>\0\n" + "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" + "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" + "\0]\0]\0>\0<\0/\0a\0>"; + const char *expected = "\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that an element name with a UTF-16 surrogate pair is rejected */ +START_TEST(test_utf16_bad_surrogate_pair) +{ + /* Test data is: + * + * + * + * where {BADLINB} is U+10000 (the first Linear B character) + * with the UTF-16 surrogate pair in the wrong order, i.e. + * 0xdc00 0xd800 + */ + const char text[] = + "\0<\0?\0x\0m\0l\0" + " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" + "\0?\0>\0\n" + "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" + "\xdc\x00\xd8\x00" + "\0]\0]\0>\0<\0/\0a\0>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, + XML_TRUE) != XML_STATUS_ERROR) + fail("Reversed UTF-16 surrogate pair not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) + xml_failure(parser); +} +END_TEST + + START_TEST(test_bad_cdata) { struct CaseData { @@ -1477,6 +2405,144 @@ } END_TEST +/* Test failures in UTF-16 CDATA */ +START_TEST(test_bad_cdata_utf16) +{ + struct CaseData { + size_t text_bytes; + const char *text; + enum XML_Error expected_error; + }; + + const char prolog[] = + "\0<\0?\0x\0m\0l\0" + " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" + "\0?\0>\0\n" + "\0<\0a\0>"; + struct CaseData cases[] = { + {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, + {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, + {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, + {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, + {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, + {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, + {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, + {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, + {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, + {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, + {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, + {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, + {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, + {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, + {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, + {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, + {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, + {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", + XML_ERROR_UNCLOSED_CDATA_SECTION}, + {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", + XML_ERROR_UNCLOSED_CDATA_SECTION}, + {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", + XML_ERROR_UNCLOSED_CDATA_SECTION}, + /* Now add a four-byte UTF-16 character */ + {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", + XML_ERROR_UNCLOSED_CDATA_SECTION}, + {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", + XML_ERROR_PARTIAL_CHAR}, + {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", + XML_ERROR_PARTIAL_CHAR}, + {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", + XML_ERROR_UNCLOSED_CDATA_SECTION} + }; + size_t i; + + for (i = 0; i < sizeof(cases)/sizeof(struct CaseData); i++) { + enum XML_Status actual_status; + enum XML_Error actual_error; + + if (_XML_Parse_SINGLE_BYTES(parser, prolog, sizeof(prolog)-1, + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + actual_status = _XML_Parse_SINGLE_BYTES(parser, + cases[i].text, + cases[i].text_bytes, + XML_TRUE); + assert(actual_status == XML_STATUS_ERROR); + actual_error = XML_GetErrorCode(parser); + if (actual_error != cases[i].expected_error) { + char message[1024]; + + sprintf(message, + "Expected error %d (%s), got %d (%s) for case %lu\n", + cases[i].expected_error, + XML_ErrorString(cases[i].expected_error), + actual_error, + XML_ErrorString(actual_error), + (long unsigned)(i+1)); + fail(message); + } + XML_ParserReset(parser, NULL); + } +} +END_TEST + +static const char *long_cdata_text = + ""; + +/* Test stopping the parser in cdata handler */ +START_TEST(test_stop_parser_between_cdata_calls) +{ + const char *text = long_cdata_text; + + XML_SetCharacterDataHandler(parser, + clearing_aborting_character_handler); + resumable = XML_FALSE; + expect_failure(text, XML_ERROR_ABORTED, + "Parse not aborted in CDATA handler"); +} +END_TEST + +/* Test suspending the parser in cdata handler */ +START_TEST(test_suspend_parser_between_cdata_calls) +{ + const char *text = long_cdata_text; + enum XML_Status result; + + XML_SetCharacterDataHandler(parser, + clearing_aborting_character_handler); + resumable = XML_TRUE; + result = _XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE); + if (result != XML_STATUS_SUSPENDED) { + if (result == XML_STATUS_ERROR) + xml_failure(parser); + fail("Parse not suspended in CDATA handler"); + } + if (XML_GetErrorCode(parser) != XML_ERROR_NONE) + xml_failure(parser); +} +END_TEST + /* Test memory allocation functions */ START_TEST(test_memory_allocation) { @@ -1537,10 +2603,19 @@ CharData_AppendString((CharData *)userData, "c"); } +static void XMLCALL +record_skip_handler(void *userData, + const XML_Char *UNUSED_P(entityName), + int is_parameter_entity) +{ + CharData_AppendString((CharData *)userData, + is_parameter_entity ? "E" : "e"); +} + /* Test XML_DefaultCurrent() passes handling on correctly */ START_TEST(test_default_current) { - const char *text = "hello"; + const char *text = "hell]"; const char *entity_text = "\n" @@ -1580,6 +2655,19 @@ /* The default handler suppresses the entity */ CharData_CheckString(&storage, "DDDDDDDDDDDDDDDDDDD"); + /* Again, with a skip handler */ + XML_ParserReset(parser, NULL); + XML_SetDefaultHandler(parser, record_default_handler); + XML_SetCharacterDataHandler(parser, record_cdata_handler); + XML_SetSkippedEntityHandler(parser, record_skip_handler); + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, entity_text, strlen(entity_text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + /* The default handler suppresses the entity */ + CharData_CheckString(&storage, "DDDDDDDDDDDDDDDDDeD"); + /* This time, allow the entity through */ XML_ParserReset(parser, NULL); XML_SetDefaultHandlerExpand(parser, record_default_handler); @@ -1628,13 +2716,109 @@ "\n"; const char *text2 = "&entity;"; - char dtd_text[] = ""; + ExtTest test_data = { + "", + NULL, + NULL + }; + + /* Check hash salt is passed through too */ + XML_SetHashSalt(parser, 0x12345678); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + /* Add a default handler to exercise more code paths */ + XML_SetDefaultHandler(parser, dummy_default_handler); + if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE) + fail("Could not set foreign DTD"); + if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + + /* Ensure that trying to set the DTD after parsing has started + * is faulted, even if it's the same setting. + */ + if (XML_UseForeignDTD(parser, XML_TRUE) != + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) + fail("Failed to reject late foreign DTD setting"); + /* Ditto for the hash salt */ + if (XML_SetHashSalt(parser, 0x23456789)) + fail("Failed to reject late hash salt change"); + + /* Now finish the parse */ + if (_XML_Parse_SINGLE_BYTES(parser, text2, strlen(text2), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test foreign DTD handling with a failing NotStandalone handler */ +START_TEST(test_foreign_dtd_not_standalone) +{ + const char *text = + "\n" + "&entity;"; + ExtTest test_data = { + "", + NULL, + NULL + }; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler); + if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE) + fail("Could not set foreign DTD"); + expect_failure(text, XML_ERROR_NOT_STANDALONE, + "NotStandalonehandler failed to reject"); +} +END_TEST + +/* Test invalid character in a foreign DTD is faulted */ +START_TEST(test_invalid_foreign_dtd) +{ + const char *text = + "\n" + "&entity;"; + ExtFaults test_data = { + "$", + "Dollar not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_UseForeignDTD(parser, XML_TRUE); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Bad DTD should not have been accepted"); +} +END_TEST + +/* Test foreign DTD use with a doctype */ +START_TEST(test_foreign_dtd_with_doctype) +{ + const char *text1 = + "\n" + "]>\n"; + const char *text2 = + "&entity;"; + ExtTest test_data = { + "", + NULL, + NULL + }; /* Check hash salt is passed through too */ XML_SetHashSalt(parser, 0x12345678); XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, dtd_text); + XML_SetUserData(parser, &test_data); XML_SetExternalEntityRefHandler(parser, external_entity_loader); + /* Add a default handler to exercise more code paths */ + XML_SetDefaultHandler(parser, dummy_default_handler); if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE) fail("Could not set foreign DTD"); if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1), @@ -1644,7 +2828,8 @@ /* Ensure that trying to set the DTD after parsing has started * is faulted, even if it's the same setting. */ - if (XML_UseForeignDTD(parser, XML_TRUE) == XML_ERROR_NONE) + if (XML_UseForeignDTD(parser, XML_TRUE) != + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) fail("Failed to reject late foreign DTD setting"); /* Ditto for the hash salt */ if (XML_SetHashSalt(parser, 0x23456789)) @@ -1657,6 +2842,47 @@ } END_TEST +/* Test XML_UseForeignDTD with no external subset present */ +static int XMLCALL +external_entity_null_loader(XML_Parser UNUSED_P(parser), + const XML_Char *UNUSED_P(context), + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + return XML_STATUS_OK; +} + +START_TEST(test_foreign_dtd_without_external_subset) +{ + const char *text = + "]>\n" + "&foo;"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, NULL); + XML_SetExternalEntityRefHandler(parser, external_entity_null_loader); + XML_UseForeignDTD(parser, XML_TRUE); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_empty_foreign_dtd) +{ + const char *text = + "\n" + "&entity;"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_null_loader); + XML_UseForeignDTD(parser, XML_TRUE); + expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, + "Undefined entity not faulted"); +} +END_TEST + /* Test XML Base is set and unset appropriately */ START_TEST(test_set_base) { @@ -1705,7 +2931,12 @@ } if (info->name == NULL) fail("Element not recognised"); - /* Note attribute count is doubled */ + /* The attribute count is twice what you might expect. It is a + * count of items in atts, an array which contains alternating + * attribute names and attribute values. For the naive user this + * is possibly a little unexpected, but it is what the + * documentation in expat.h tells us to expect. + */ count = XML_GetSpecifiedAttributeCount(parser); if (info->attr_count * 2 != count) { fail("Not got expected attribute count"); @@ -1735,6 +2966,7 @@ fail("Attribute has wrong value"); return; } + /* Remember, two entries in atts per attribute (see above) */ atts += 2; } } @@ -1839,6 +3071,23 @@ } END_TEST +/* Test that CDATA shows up correctly through a default handler */ +START_TEST(test_cdata_default) +{ + const char *text = ""; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetDefaultHandler(parser, accumulate_characters); + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, text); +} +END_TEST + /* Test resetting a subordinate parser does exactly nothing */ static int XMLCALL external_entity_resetter(XML_Parser parser, @@ -1881,12 +3130,13 @@ fail("Parsing status not still FINISHED"); return XML_STATUS_ERROR; } + XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_subordinate_reset) { - const char *text = + const char *text = "\n" "\n" "&entity;"; @@ -1936,6 +3186,7 @@ xml_failure(ext_parser); return XML_STATUS_ERROR; } + XML_ParserFree(ext_parser); return XML_STATUS_OK; } @@ -1953,963 +3204,7021 @@ } END_TEST - -/* Test setting an explicit encoding */ -START_TEST(test_explicit_encoding) -{ - const char *text1 = "Hello "; - const char *text2 = " World"; - - /* First say we are UTF-8 */ - if (XML_SetEncoding(parser, "utf-8") != XML_STATUS_OK) - fail("Failed to set explicit encoding"); - if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1), - XML_FALSE) == XML_STATUS_ERROR) - xml_failure(parser); - /* Try to switch encodings mid-parse */ - if (XML_SetEncoding(parser, "us-ascii") != XML_STATUS_ERROR) - fail("Allowed encoding change"); - if (_XML_Parse_SINGLE_BYTES(parser, text2, strlen(text2), - XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); - /* Try now the parse is over */ - if (XML_SetEncoding(parser, NULL) != XML_STATUS_OK) - fail("Failed to unset encoding"); -} -END_TEST - -/* Test user parameter settings */ -/* Variable holding the expected handler userData */ -static void *handler_data = NULL; -/* Count of the number of times the comment handler has been invoked */ -static int comment_count = 0; -/* Count of the number of skipped entities */ -static int skip_count = 0; -/* Count of the number of times the XML declaration handler is invoked */ -static int xdecl_count = 0; - -static void XMLCALL -xml_decl_handler(void *userData, - const XML_Char *UNUSED_P(version), - const XML_Char *UNUSED_P(encoding), - int standalone) -{ - if (userData != handler_data) - fail("User data (xml decl) not correctly set"); - if (standalone != -1) - fail("Standalone not show as not present"); - xdecl_count++; -} - +/* Test suspending a subordinate parser from an XML declaration */ +/* Increases code coverage of the tests */ static void XMLCALL -param_check_skip_handler(void *userData, - const XML_Char *UNUSED_P(entityName), - int UNUSED_P(is_parameter_entity)) +entity_suspending_xdecl_handler(void *userData, + const XML_Char *UNUSED_P(version), + const XML_Char *UNUSED_P(encoding), + int UNUSED_P(standalone)) { - if (userData != handler_data) - fail("User data (skip) not correctly set"); - skip_count++; -} + XML_Parser ext_parser = (XML_Parser)userData; -static void XMLCALL -data_check_comment_handler(void *userData, const XML_Char *UNUSED_P(data)) -{ - /* Check that the userData passed through is what we expect */ - if (userData != handler_data) - fail("User data (parser) not correctly set"); - /* Check that the user data in the parser is appropriate */ - if (XML_GetUserData(userData) != (void *)1) - fail("User data in parser not correctly set"); - comment_count++; + XML_StopParser(ext_parser, resumable); + XML_SetXmlDeclHandler(ext_parser, NULL); } static int XMLCALL -external_entity_param_checker(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) +external_entity_suspend_xmldecl(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) { - const char *text = - "\n" - ""; + const char *text = ""; XML_Parser ext_parser; + XML_ParsingStatus status; + enum XML_Status rc; ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); - handler_data = ext_parser; - if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), - XML_TRUE) == XML_STATUS_ERROR) { - xml_failure(parser); - return XML_STATUS_ERROR; - } - handler_data = parser; - return XML_STATUS_OK; + XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); + XML_SetUserData(ext_parser, ext_parser); + rc = _XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), XML_TRUE); + XML_GetParsingStatus(ext_parser, &status); + if (resumable) { + if (rc == XML_STATUS_ERROR) + xml_failure(ext_parser); + if (status.parsing != XML_SUSPENDED) + fail("Ext Parsing status not SUSPENDED"); + } else { + if (rc != XML_STATUS_ERROR) + fail("Ext parsing not aborted"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_ABORTED) + xml_failure(ext_parser); + if (status.parsing != XML_FINISHED) + fail("Ext Parsing status not FINISHED"); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; } -START_TEST(test_user_parameters) +START_TEST(test_subordinate_xdecl_suspend) { const char *text = - "\n" - "\n" - "\n" - "&entity;"; - const char *epilog = - "\n" - ""; + "\n" + "]>\n" + "&entity;"; - comment_count = 0; - skip_count = 0; - xdecl_count = 0; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetXmlDeclHandler(parser, xml_decl_handler); - XML_SetExternalEntityRefHandler(parser, external_entity_param_checker); - XML_SetCommentHandler(parser, data_check_comment_handler); - XML_SetSkippedEntityHandler(parser, param_check_skip_handler); - XML_UseParserAsHandlerArg(parser); - XML_SetUserData(parser, (void *)1); - handler_data = parser; + XML_SetExternalEntityRefHandler(parser, + external_entity_suspend_xmldecl); + resumable = XML_TRUE; if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), - XML_FALSE) == XML_STATUS_ERROR) - xml_failure(parser); - if (comment_count != 2) - fail("Comment handler not invoked enough times"); - /* Ensure we can't change policy mid-parse */ - if (XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_NEVER)) - fail("Changed param entity parsing policy while parsing"); - if (_XML_Parse_SINGLE_BYTES(parser, epilog, strlen(epilog), XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); - if (comment_count != 3) - fail("Comment handler not invoked enough times"); - if (skip_count != 1) - fail("Skip handler not invoked enough times"); - if (xdecl_count != 1) - fail("XML declaration handler not invoked"); } END_TEST -/* Test that an explicit external entity handler argument replaces - * the parser as the first argument. - */ -static int XMLCALL -external_entity_ref_param_checker(XML_Parser parser, - const XML_Char *UNUSED_P(context), - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) -{ - if ((void *)parser != handler_data) - fail("External entity ref handler parameter not correct"); - return XML_STATUS_OK; -} - -START_TEST(test_ext_entity_ref_parameter) +START_TEST(test_subordinate_xdecl_abort) { const char *text = - "\n" - "\n" + "\n" + "]>\n" "&entity;"; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(parser, - external_entity_ref_param_checker); - /* Set a handler arg that is not NULL and not parser (which is - * what NULL would cause to be passed. - */ - XML_SetExternalEntityRefHandlerArg(parser, (void *)text); - handler_data = (void *)text; - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), - XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); - - /* Now try again with unset args */ - XML_ParserReset(parser, NULL); - XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetExternalEntityRefHandler(parser, - external_entity_ref_param_checker); - XML_SetExternalEntityRefHandlerArg(parser, NULL); - handler_data = (void *)parser; + external_entity_suspend_xmldecl); + resumable = XML_FALSE; if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); } END_TEST -/* Test the parsing of an empty string */ -START_TEST(test_empty_parse) +/* Test external entity fault handling with suspension */ +static int XMLCALL +external_entity_suspending_faulter(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) { - const char *text = ""; - const char *partial = ""; - - if (XML_Parse(parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) - fail("Parsing empty string faulted"); - if (XML_Parse(parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) - fail("Parsing final empty string not faulted"); - if (XML_GetErrorCode(parser) != XML_ERROR_NO_ELEMENTS) - fail("Parsing final empty string faulted for wrong reason"); + XML_Parser ext_parser; + ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); + void *buffer; + int parse_len = strlen(fault->parse_text); - /* Now try with valid text before the empty end */ - XML_ParserReset(parser, NULL); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), - XML_FALSE) == XML_STATUS_ERROR) - xml_failure(parser); - if (XML_Parse(parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) - fail("Parsing final empty string faulted"); + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); + XML_SetUserData(ext_parser, ext_parser); + resumable = XML_TRUE; + buffer = XML_GetBuffer(ext_parser, parse_len); + if (buffer == NULL) + fail("Could not allocate parse buffer"); + memcpy(buffer, fault->parse_text, parse_len); + if (XML_ParseBuffer(ext_parser, parse_len, + XML_FALSE) != XML_STATUS_SUSPENDED) + fail("XML declaration did not suspend"); + if (XML_ResumeParser(ext_parser) != XML_STATUS_OK) + xml_failure(ext_parser); + if (XML_ParseBuffer(ext_parser, 0, XML_TRUE) != XML_STATUS_ERROR) + fail(fault->fail_text); + if (XML_GetErrorCode(ext_parser) != fault->error) + xml_failure(ext_parser); - /* Now try with invalid text before the empty end */ - XML_ParserReset(parser, NULL); - if (_XML_Parse_SINGLE_BYTES(parser, partial, strlen(partial), - XML_FALSE) == XML_STATUS_ERROR) - xml_failure(parser); - if (XML_Parse(parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) - fail("Parsing final incomplete empty string not faulted"); + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; } -END_TEST -/* Test odd corners of the XML_GetBuffer interface */ -START_TEST(test_get_buffer_1) +START_TEST(test_ext_entity_invalid_suspended_parse) { const char *text = - "\n\n" + "]>\n" + "&en;"; + ExtFaults faults[] = { + { + "<", + "Incomplete element declaration not faulted", + NULL, + XML_ERROR_UNCLOSED_TOKEN + }, + { + /* First two bytes of a three-byte char */ + "\xe2\x82", + "Incomplete character not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { NULL, NULL, NULL, XML_ERROR_NONE } + }; + ExtFaults *fault; - /* Now try extending it a carefully crafted amount */ - if (XML_GetBuffer(parser, 1000) == NULL) - fail("1000 buffer failed"); + for (fault = &faults[0]; fault->parse_text != NULL; fault++) { + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_suspending_faulter); + XML_SetUserData(parser, fault); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Parser did not report external entity error"); + XML_ParserReset(parser, NULL); + } } END_TEST -/* Test more corners of the XML_GetBuffer interface */ -START_TEST(test_get_buffer_2) + + +/* Test setting an explicit encoding */ +START_TEST(test_explicit_encoding) { - const char *text = - "\n" is not one of them, so the parser should raise an - * error on encountering it. - */ static int XMLCALL -external_entity_param(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *systemId, - const XML_Char *UNUSED_P(publicId)) +external_entity_bad_cr_catcher(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) { - const char *text1 = - "\n" - "\n" - "\n" - "%e1;\n"; - const char *text2 = - "\n" - "\n"; + const char *text = "\r"; XML_Parser ext_parser; - if (systemId == NULL) - return XML_STATUS_OK; - ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); - - if (!strcmp(systemId, "004-1.ent")) { - if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, strlen(text1), - XML_TRUE) != XML_STATUS_ERROR) - fail("Inner DTD with invalid tag not rejected"); - if (XML_GetErrorCode(ext_parser) != XML_ERROR_EXTERNAL_ENTITY_HANDLING) - xml_failure(ext_parser); - } - else if (!strcmp(systemId, "004-2.ent")) { - if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, strlen(text2), - XML_TRUE) != XML_STATUS_ERROR) - fail("Invalid tag in external param not rejected"); - if (XML_GetErrorCode(ext_parser) != XML_ERROR_SYNTAX) - xml_failure(ext_parser); - } else { - fail("Unknown system ID"); - } - - return XML_STATUS_ERROR; + XML_SetCharacterDataHandler(ext_parser, cr_cdata_handler); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + fail("Async entity error not caught"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_ASYNC_ENTITY) + xml_failure(ext_parser); + XML_ParserFree(ext_parser); + return XML_STATUS_OK; } -START_TEST(test_invalid_tag_in_dtd) +START_TEST(test_ext_entity_trailing_cr) { const char *text = - "\n" - "\n"; + "\n" + "]>\n" + "&en;"; + int found_cr; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetExternalEntityRefHandler(parser, external_entity_param); - expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, - "Invalid tag IN DTD external param not rejected"); + XML_SetExternalEntityRefHandler(parser, external_entity_cr_catcher); + XML_SetUserData(parser, &found_cr); + found_cr = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_OK) + xml_failure(parser); + if (found_cr == 0) + fail("No carriage return found"); + XML_ParserReset(parser, NULL); + + /* Try again with a different trailing CR */ + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_bad_cr_catcher); + XML_SetUserData(parser, &found_cr); + found_cr = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_OK) + xml_failure(parser); + if (found_cr == 0) + fail("No carriage return found"); } END_TEST - -/* - * Namespaces tests. - */ - -static void -namespace_setup(void) +/* Test handling of trailing square bracket */ +static void XMLCALL +rsqb_handler(void *userData, const XML_Char *s, int len) { - parser = XML_ParserCreateNS(NULL, ' '); - if (parser == NULL) - fail("Parser not created."); + int *pfound = (int *)userData; + + if (len == 1 && *s == ']') + *pfound = 1; } -static void -namespace_teardown(void) +START_TEST(test_trailing_rsqb) { - basic_teardown(); -} + const char *text8 = "]"; + const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; + int found_rsqb; + int text8_len = strlen(text8); -/* Check that an element name and attribute name match the expected values. - The expected values are passed as an array reference of string pointers - provided as the userData argument; the first is the expected - element name, and the second is the expected attribute name. -*/ -static int triplet_count = 0; + XML_SetCharacterDataHandler(parser, rsqb_handler); + XML_SetUserData(parser, &found_rsqb); + found_rsqb = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text8, text8_len, + XML_TRUE) == XML_STATUS_OK) + fail("Failed to fault unclosed doc"); + if (found_rsqb == 0) + fail("Did not catch the right square bracket"); -static void XMLCALL -triplet_start_checker(void *userData, const XML_Char *name, - const XML_Char **atts) -{ - char **elemstr = (char **)userData; - char buffer[1024]; - if (strcmp(elemstr[0], name) != 0) { - sprintf(buffer, "unexpected start string: '%s'", name); - fail(buffer); - } - if (strcmp(elemstr[1], atts[0]) != 0) { - sprintf(buffer, "unexpected attribute string: '%s'", atts[0]); - fail(buffer); - } - triplet_count++; + /* Try again with a different encoding */ + XML_ParserReset(parser, NULL); + XML_SetCharacterDataHandler(parser, rsqb_handler); + XML_SetUserData(parser, &found_rsqb); + found_rsqb = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text16, sizeof(text16)-1, + XML_TRUE) == XML_STATUS_OK) + fail("Failed to fault unclosed doc"); + if (found_rsqb == 0) + fail("Did not catch the right square bracket"); + + /* And finally with a default handler */ + XML_ParserReset(parser, NULL); + XML_SetDefaultHandler(parser, rsqb_handler); + XML_SetUserData(parser, &found_rsqb); + found_rsqb = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text16, sizeof(text16)-1, + XML_TRUE) == XML_STATUS_OK) + fail("Failed to fault unclosed doc"); + if (found_rsqb == 0) + fail("Did not catch the right square bracket"); } +END_TEST -/* Check that the element name passed to the end-element handler matches - the expected value. The expected value is passed as the first element - in an array of strings passed as the userData argument. -*/ -static void XMLCALL -triplet_end_checker(void *userData, const XML_Char *name) +/* Test trailing right square bracket in an external entity parse */ +static int XMLCALL +external_entity_rsqb_catcher(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) { - char **elemstr = (char **)userData; - if (strcmp(elemstr[0], name) != 0) { - char buffer[1024]; - sprintf(buffer, "unexpected end string: '%s'", name); - fail(buffer); - } - triplet_count++; + const char *text = "]"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetCharacterDataHandler(ext_parser, rsqb_handler); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + fail("Async entity error not caught"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_ASYNC_ENTITY) + xml_failure(ext_parser); + XML_ParserFree(ext_parser); + return XML_STATUS_OK; } -START_TEST(test_return_ns_triplet) +START_TEST(test_ext_entity_trailing_rsqb) { const char *text = - ""; - const char *epilog = ""; - const char *elemstr[] = { - "http://expat.sf.net/ e foo", - "http://expat.sf.net/ a bar" - }; - XML_SetReturnNSTriplet(parser, XML_TRUE); - XML_SetUserData(parser, elemstr); - XML_SetElementHandler(parser, triplet_start_checker, - triplet_end_checker); - XML_SetNamespaceDeclHandler(parser, - dummy_start_namespace_decl_handler, - dummy_end_namespace_decl_handler); - triplet_count = 0; + "\n" + "]>\n" + "&en;"; + int found_rsqb; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_rsqb_catcher); + XML_SetUserData(parser, &found_rsqb); + found_rsqb = 0; if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), - XML_FALSE) == XML_STATUS_ERROR) + XML_TRUE) != XML_STATUS_OK) xml_failure(parser); - if (triplet_count != 1) - fail("triplet_start_checker not invoked"); - /* Check that unsetting "return triplets" fails while still parsing */ - XML_SetReturnNSTriplet(parser, XML_FALSE); - if (_XML_Parse_SINGLE_BYTES(parser, epilog, strlen(epilog), - XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); - if (triplet_count != 2) - fail("triplet_end_checker not invoked"); + if (found_rsqb == 0) + fail("No right square bracket found"); } END_TEST -static void XMLCALL -overwrite_start_checker(void *userData, const XML_Char *name, - const XML_Char **atts) +/* Test CDATA handling in an external entity */ +static int XMLCALL +external_entity_good_cdata_ascii(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) { - CharData *storage = (CharData *) userData; - CharData_AppendString(storage, "start "); - CharData_AppendXMLChars(storage, name, -1); - while (*atts != NULL) { - CharData_AppendString(storage, "\nattribute "); - CharData_AppendXMLChars(storage, *atts, -1); - atts += 2; - } - CharData_AppendString(storage, "\n"); -} + const char *text = + "Hello, world!]]>"; + const char *expected = "Hello, world!"; + CharData storage; + XML_Parser ext_parser; -static void XMLCALL -overwrite_end_checker(void *userData, const XML_Char *name) -{ - CharData *storage = (CharData *) userData; - CharData_AppendString(storage, "end "); - CharData_AppendXMLChars(storage, name, -1); - CharData_AppendString(storage, "\n"); + CharData_Init(&storage); + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetUserData(ext_parser, &storage); + XML_SetCharacterDataHandler(ext_parser, accumulate_characters); + + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + CharData_CheckXMLChars(&storage, expected); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; } -static void -run_ns_tagname_overwrite_test(const char *text, const char *result) +START_TEST(test_ext_entity_good_cdata) { - CharData storage; - CharData_Init(&storage); - XML_SetUserData(parser, &storage); - XML_SetElementHandler(parser, - overwrite_start_checker, overwrite_end_checker); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + const char *text = + "\n" + "]>\n" + "&en;"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_good_cdata_ascii); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_OK) xml_failure(parser); - CharData_CheckString(&storage, result); } +END_TEST -/* Regression test for SF bug #566334. */ -START_TEST(test_ns_tagname_overwrite) -{ +/* Test user parameter settings */ +/* Variable holding the expected handler userData */ +static void *handler_data = NULL; +/* Count of the number of times the comment handler has been invoked */ +static int comment_count = 0; +/* Count of the number of skipped entities */ +static int skip_count = 0; +/* Count of the number of times the XML declaration handler is invoked */ +static int xdecl_count = 0; + +static void XMLCALL +xml_decl_handler(void *userData, + const XML_Char *UNUSED_P(version), + const XML_Char *UNUSED_P(encoding), + int standalone) +{ + if (userData != handler_data) + fail("User data (xml decl) not correctly set"); + if (standalone != -1) + fail("Standalone not flagged as not present in XML decl"); + xdecl_count++; +} + +static void XMLCALL +param_check_skip_handler(void *userData, + const XML_Char *UNUSED_P(entityName), + int UNUSED_P(is_parameter_entity)) +{ + if (userData != handler_data) + fail("User data (skip) not correctly set"); + skip_count++; +} + +static void XMLCALL +data_check_comment_handler(void *userData, const XML_Char *UNUSED_P(data)) +{ + /* Check that the userData passed through is what we expect */ + if (userData != handler_data) + fail("User data (parser) not correctly set"); + /* Check that the user data in the parser is appropriate */ + if (XML_GetUserData(userData) != (void *)1) + fail("User data in parser not correctly set"); + comment_count++; +} + +static int XMLCALL +external_entity_param_checker(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = + "\n" + ""; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + handler_data = ext_parser; + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(parser); + return XML_STATUS_ERROR; + } + handler_data = parser; + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_user_parameters) +{ + const char *text = + "\n" + "\n" + "\n" + "&entity;"; + const char *epilog = + "\n" + ""; + + comment_count = 0; + skip_count = 0; + xdecl_count = 0; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetXmlDeclHandler(parser, xml_decl_handler); + XML_SetExternalEntityRefHandler(parser, external_entity_param_checker); + XML_SetCommentHandler(parser, data_check_comment_handler); + XML_SetSkippedEntityHandler(parser, param_check_skip_handler); + XML_UseParserAsHandlerArg(parser); + XML_SetUserData(parser, (void *)1); + handler_data = parser; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + if (comment_count != 2) + fail("Comment handler not invoked enough times"); + /* Ensure we can't change policy mid-parse */ + if (XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_NEVER)) + fail("Changed param entity parsing policy while parsing"); + if (_XML_Parse_SINGLE_BYTES(parser, epilog, strlen(epilog), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (comment_count != 3) + fail("Comment handler not invoked enough times"); + if (skip_count != 1) + fail("Skip handler not invoked enough times"); + if (xdecl_count != 1) + fail("XML declaration handler not invoked"); +} +END_TEST + +/* Test that an explicit external entity handler argument replaces + * the parser as the first argument. + * + * We do not call the first parameter to the external entity handler + * 'parser' for once, since the first time the handler is called it + * will actually be a text string. We need to be able to access the + * global 'parser' variable to create our external entity parser from, + * since there are code paths we need to ensure get executed. + */ +static int XMLCALL +external_entity_ref_param_checker(XML_Parser parameter, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = ""; + XML_Parser ext_parser; + + if ((void *)parameter != handler_data) + fail("External entity ref handler parameter not correct"); + + /* Here we use the global 'parser' variable */ + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ext_entity_ref_parameter) +{ + const char *text = + "\n" + "\n" + "&entity;"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_ref_param_checker); + /* Set a handler arg that is not NULL and not parser (which is + * what NULL would cause to be passed. + */ + XML_SetExternalEntityRefHandlerArg(parser, (void *)text); + handler_data = (void *)text; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + /* Now try again with unset args */ + XML_ParserReset(parser, NULL); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_ref_param_checker); + XML_SetExternalEntityRefHandlerArg(parser, NULL); + handler_data = (void *)parser; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test the parsing of an empty string */ +START_TEST(test_empty_parse) +{ + const char *text = ""; + const char *partial = ""; + + if (XML_Parse(parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) + fail("Parsing empty string faulted"); + if (XML_Parse(parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) + fail("Parsing final empty string not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_NO_ELEMENTS) + fail("Parsing final empty string faulted for wrong reason"); + + /* Now try with valid text before the empty end */ + XML_ParserReset(parser, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + if (XML_Parse(parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) + fail("Parsing final empty string faulted"); + + /* Now try with invalid text before the empty end */ + XML_ParserReset(parser, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, partial, strlen(partial), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + if (XML_Parse(parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) + fail("Parsing final incomplete empty string not faulted"); +} +END_TEST + +/* Test odd corners of the XML_GetBuffer interface */ +static enum XML_Status +get_feature(enum XML_FeatureEnum feature_id, long *presult) +{ + const XML_Feature *feature = XML_GetFeatureList(); + + if (feature == NULL) + return XML_STATUS_ERROR; + for (; feature->feature != XML_FEATURE_END; feature++) { + if (feature->feature == feature_id) { + *presult = feature->value; + return XML_STATUS_OK; + } + } + return XML_STATUS_ERROR; +} + +/* Having an element name longer than 1024 characters exercises some + * of the pool allocation code in the parser that otherwise does not + * get executed. The count at the end of the line is the number of + * characters (bytes) in the element name by that point.x + */ +static const char *get_buffer_test_text = + "\n" +START_TEST(test_byte_info_at_error) +{ + const char *text = PRE_ERROR_STR POST_ERROR_STR; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + fail("Syntax error not faulted"); + if (XML_GetCurrentByteCount(parser) != 0) + fail("Error byte count incorrect"); + if (XML_GetCurrentByteIndex(parser) != strlen(PRE_ERROR_STR)) + fail("Error byte index incorrect"); +} +END_TEST +#undef PRE_ERROR_STR +#undef POST_ERROR_STR + +/* Test position information in handler */ +typedef struct ByteTestData { + int start_element_len; + int cdata_len; + int total_string_len; +} ByteTestData; + +static void +byte_character_handler(void *userData, + const XML_Char *s, + int len) +{ +#ifdef XML_CONTEXT_BYTES + int offset, size; + const char *buffer; + ByteTestData *data = (ByteTestData *)userData; + + buffer = XML_GetInputContext(parser, &offset, &size); + if (buffer == NULL) + fail("Failed to get context buffer"); + if (offset != data->start_element_len) + fail("Context offset in unexpected position"); + if (len != data->cdata_len) + fail("CDATA length reported incorrectly"); + if (size != data->total_string_len) + fail("Context size is not full buffer"); + if (XML_GetCurrentByteIndex(parser) != offset) + fail("Character byte index incorrect"); + if (XML_GetCurrentByteCount(parser) != len) + fail("Character byte count incorrect"); + if (s != buffer + offset) + fail("Buffer position incorrect"); +#else + (void)userData; + (void)s; + (void)len; +#endif +} + +#define START_ELEMENT "" +#define CDATA_TEXT "Hello" +#define END_ELEMENT "" +START_TEST(test_byte_info_at_cdata) +{ + const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; + int offset, size; + ByteTestData data; + + /* Check initial context is empty */ + if (XML_GetInputContext(parser, &offset, &size) != NULL) + fail("Unexpected context at start of parse"); + + data.start_element_len = strlen(START_ELEMENT); + data.cdata_len = strlen(CDATA_TEXT); + data.total_string_len = strlen(text); + XML_SetCharacterDataHandler(parser, byte_character_handler); + XML_SetUserData(parser, &data); + if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_OK) + xml_failure(parser); +} +END_TEST +#undef START_ELEMENT +#undef CDATA_TEXT +#undef END_ELEMENT + +/* Test predefined entities are correctly recognised */ +START_TEST(test_predefined_entities) +{ + const char *text = "<>&"'"; + const char *result = "<>&\"'"; + CharData storage; + + XML_SetDefaultHandler(parser, accumulate_characters); + /* run_character_check uses XML_SetCharacterDataHandler(), which + * unfortunately heads off a code path that we need to exercise. + */ + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + /* The default handler doesn't translate the entities */ + CharData_CheckXMLChars(&storage, text); + + /* Now try again and check the translation */ + XML_ParserReset(parser, NULL); + run_character_check(text, result); +} +END_TEST + +/* Regression test that an invalid tag in an external parameter + * reference in an external DTD is correctly faulted. + * + * Only a few specific tags are legal in DTDs ignoring comments and + * processing instructions, all of which begin with an exclamation + * mark. "" is not one of them, so the parser should raise an + * error on encountering it. + */ +static int XMLCALL +external_entity_param(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text1 = + "\n" + "\n" + "\n" + "%e1;\n"; + const char *text2 = + "\n" + "\n"; + XML_Parser ext_parser; + + if (systemId == NULL) + return XML_STATUS_OK; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + + if (!strcmp(systemId, "004-1.ent")) { + if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, strlen(text1), + XML_TRUE) != XML_STATUS_ERROR) + fail("Inner DTD with invalid tag not rejected"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_EXTERNAL_ENTITY_HANDLING) + xml_failure(ext_parser); + } + else if (!strcmp(systemId, "004-2.ent")) { + if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, strlen(text2), + XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid tag in external param not rejected"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_SYNTAX) + xml_failure(ext_parser); + } else { + fail("Unknown system ID"); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; +} + +START_TEST(test_invalid_tag_in_dtd) +{ + const char *text = + "\n" + "\n"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_param); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Invalid tag IN DTD external param not rejected"); +} +END_TEST + +/* Test entities not quite the predefined ones are not mis-recognised */ +START_TEST(test_not_predefined_entities) +{ + const char *text[] = { + "&pt;", + "&amo;", + "&quid;", + "&apod;", + NULL + }; + int i = 0; + + while (text[i] != NULL) { + expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, + "Undefined entity not rejected"); + XML_ParserReset(parser, NULL); + i++; + } +} +END_TEST + +/* Test conditional inclusion (IGNORE) */ +static int XMLCALL +external_entity_load_ignore(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = "]]>"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ignore_section) +{ + const char *text = + "\n" + "&entity;"; + const char *expected = + "]]>\n&entity;"; + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &storage); + XML_SetExternalEntityRefHandler(parser, external_entity_load_ignore); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetStartDoctypeDeclHandler(parser, dummy_start_doctype_handler); + XML_SetEndDoctypeDeclHandler(parser, dummy_end_doctype_handler); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, dummy_start_element); + XML_SetEndElementHandler(parser, dummy_end_element); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +static int XMLCALL +external_entity_load_ignore_utf16(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char text[] = + /* ]]> */ + "<\0!\0[\0I\0G\0N\0O\0R\0E\0[\0" + "<\0!\0E\0L\0E\0M\0E\0N\0T\0 \0e\0 \0" + "(\0#\0P\0C\0D\0A\0T\0A\0)\0*\0>\0]\0]\0>\0"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ignore_section_utf16) +{ + const char text[] = + /* */ + "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " + "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" + /* &en; */ + "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; + const XML_Char *expected = + "]]>\n&en;"; + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &storage); + XML_SetExternalEntityRefHandler(parser, + external_entity_load_ignore_utf16); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetStartDoctypeDeclHandler(parser, dummy_start_doctype_handler); + XML_SetEndDoctypeDeclHandler(parser, dummy_end_doctype_handler); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, dummy_start_element); + XML_SetEndElementHandler(parser, dummy_end_element); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +static int XMLCALL +external_entity_load_ignore_utf16_be(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char text[] = + /* ]]> */ + "\0<\0!\0[\0I\0G\0N\0O\0R\0E\0[" + "\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 \0e\0 " + "\0(\0#\0P\0C\0D\0A\0T\0A\0)\0*\0>\0]\0]\0>"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ignore_section_utf16_be) +{ + const char text[] = + /* */ + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " + "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" + /* &en; */ + "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; + const XML_Char *expected = + "]]>\n&en;"; + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &storage); + XML_SetExternalEntityRefHandler(parser, + external_entity_load_ignore_utf16_be); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetStartDoctypeDeclHandler(parser, dummy_start_doctype_handler); + XML_SetEndDoctypeDeclHandler(parser, dummy_end_doctype_handler); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, dummy_start_element); + XML_SetEndElementHandler(parser, dummy_end_element); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test mis-formatted conditional exclusion */ +START_TEST(test_bad_ignore_section) +{ + const char *text = + "\n" + "&entity;"; + ExtFaults faults[] = { + { + "", + "Invalid XML character not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }, + { + /* FIrst two bytes of a three-byte char */ + "parse_text != NULL; fault++) { + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, fault); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Incomplete IGNORE section not failed"); + XML_ParserReset(parser, NULL); + } +} +END_TEST + +/* Test recursive parsing */ +static int XMLCALL +external_entity_valuer(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text1 = + "\n" + "\n" + "\n" + "%e1;\n"; + XML_Parser ext_parser; + + if (systemId == NULL) + return XML_STATUS_OK; + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (!strcmp(systemId, "004-1.ent")) { + if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, strlen(text1), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + } + else if (!strcmp(systemId, "004-2.ent")) { + ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); + enum XML_Status status; + enum XML_Error error; + + status = _XML_Parse_SINGLE_BYTES(ext_parser, + fault->parse_text, + strlen(fault->parse_text), + XML_TRUE); + if (fault->error == XML_ERROR_NONE) { + if (status == XML_STATUS_ERROR) + xml_failure(ext_parser); + } else { + if (status != XML_STATUS_ERROR) + fail(fault->fail_text); + error = XML_GetErrorCode(ext_parser); + if (error != fault->error && + (fault->error != XML_ERROR_XML_DECL || + error != XML_ERROR_TEXT_DECL)) + xml_failure(ext_parser); + } + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_external_entity_values) +{ + const char *text = + "\n" + "\n"; + ExtFaults data_004_2[] = { + { + "", + NULL, + NULL, + XML_ERROR_NONE + }, + { + "", + "Invalid token not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }, + { + "'wombat", + "Unterminated string not faulted", + NULL, + XML_ERROR_UNCLOSED_TOKEN + }, + { + "\xe2\x82", + "Partial UTF-8 character not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { + "\n", + NULL, + NULL, + XML_ERROR_NONE + }, + { + "", + "Malformed XML declaration not faulted", + NULL, + XML_ERROR_XML_DECL + }, + { + /* UTF-8 BOM */ + "\xEF\xBB\xBF", + NULL, + NULL, + XML_ERROR_NONE + }, + { + "\n$", + "Invalid token after text declaration not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }, + { + "\n'wombat", + "Unterminated string after text decl not faulted", + NULL, + XML_ERROR_UNCLOSED_TOKEN + }, + { + "\n\xe2\x82", + "Partial UTF-8 character after text decl not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { + "%e1;", + "Recursive parameter entity not faulted", + NULL, + XML_ERROR_RECURSIVE_ENTITY_REF + }, + { NULL, NULL, NULL, XML_ERROR_NONE } + }; + int i; + + for (i = 0; data_004_2[i].parse_text != NULL; i++) { + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_valuer); + XML_SetUserData(parser, &data_004_2[i]); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + XML_ParserReset(parser, NULL); + } +} +END_TEST + +/* Test the recursive parse interacts with a not standalone handler */ +static int XMLCALL +external_entity_not_standalone(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text1 = + "\n" + "\n" + "%e1;\n"; + const char *text2 = ""; + XML_Parser ext_parser; + + if (systemId == NULL) + return XML_STATUS_OK; + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (!strcmp(systemId, "foo")) { + XML_SetNotStandaloneHandler(ext_parser, + reject_not_standalone_handler); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, strlen(text1), + XML_TRUE) != XML_STATUS_ERROR) + fail("Expected not standalone rejection"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_NOT_STANDALONE) + xml_failure(ext_parser); + XML_SetNotStandaloneHandler(ext_parser, NULL); + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; + } + else if (!strcmp(systemId, "bar")) { + if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, strlen(text2), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ext_entity_not_standalone) +{ + const char *text = + "\n" + ""; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_not_standalone); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Standalone rejection not caught"); +} +END_TEST + +static int XMLCALL +external_entity_value_aborter(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text1 = + "\n" + "\n" + "\n" + "%e1;\n"; + const char *text2 = + ""; + XML_Parser ext_parser; + + if (systemId == NULL) + return XML_STATUS_OK; + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (!strcmp(systemId, "004-1.ent")) { + if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, strlen(text1), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + } + if (!strcmp(systemId, "004-2.ent")) { + XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); + XML_SetUserData(ext_parser, ext_parser); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, strlen(text2), + XML_TRUE) != XML_STATUS_ERROR) + fail("Aborted parse not faulted"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_ABORTED) + xml_failure(ext_parser); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ext_entity_value_abort) +{ + const char *text = + "\n" + "\n"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_value_aborter); + resumable = XML_FALSE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_bad_public_doctype) +{ + const char *text = + "\n" + "\n" + ""; + + /* Setting a handler provokes a particular code path */ + XML_SetDoctypeDeclHandler(parser, + dummy_start_doctype_handler, + dummy_end_doctype_handler); + expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); +} +END_TEST + +/* Test based on ibm/valid/P32/ibm32v04.xml */ +START_TEST(test_attribute_enum_value) +{ + const char *text = + "\n" + "\n" + "This is a \n \n\nyellow tiger"; + ExtTest dtd_data = { + "\n" + "\n" + "", + NULL, + NULL + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + XML_SetUserData(parser, &dtd_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + /* An attribute list handler provokes a different code path */ + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + run_ext_character_check(text, &dtd_data, + "This is a \n \n\nyellow tiger"); +} +END_TEST + +/* Slightly bizarrely, the library seems to silently ignore entity + * definitions for predefined entities, even when they are wrong. The + * language of the XML 1.0 spec is somewhat unhelpful as to what ought + * to happen, so this is currently treated as acceptable. + */ +START_TEST(test_predefined_entity_redefinition) +{ + const char *text = + "\n" + "]>\n" + "'"; + run_character_check(text, "'"); +} +END_TEST + +/* Test that the parser stops processing the DTD after an unresolved + * parameter entity is encountered. + */ +START_TEST(test_dtd_stop_processing) +{ + const char *text = + "\n" + "]>"; + + XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (dummy_handler_flags != 0) + fail("DTD processing still going after undefined PE"); +} +END_TEST + +/* Test public notations with no system ID */ +START_TEST(test_public_notation_no_sysid) +{ + const char *text = + "\n" + "\n" + "]>\n"; + + dummy_handler_flags = 0; + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) + fail("Notation declaration handler not called"); +} +END_TEST + +static void XMLCALL +record_element_start_handler(void *userData, + const XML_Char *name, + const XML_Char **UNUSED_P(atts)) +{ + CharData_AppendString((CharData *)userData, name); +} + +START_TEST(test_nested_groups) +{ + const char *text = + "\n" + "" + "]>\n" + ""; + CharData storage; + + CharData_Init(&storage); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckString(&storage, "doce"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler not fired"); +} +END_TEST + +START_TEST(test_group_choice) +{ + const char *text = + "\n" + "\n" + "\n" + "\n" + "]>\n" + "\n" + "\n" + "This is a foo\n" + "\n" + "\n"; + + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler flag not raised"); +} +END_TEST + +static int XMLCALL +external_entity_public(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *publicId) +{ + const char *text1 = (const char *)XML_GetUserData(parser); + const char *text2 = ""; + const char *text = NULL; + XML_Parser ext_parser; + int parse_res; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + return XML_STATUS_ERROR; + if (systemId != NULL && !strcmp(systemId, "http://example.org/")) { + text = text1; + } + else if (publicId != NULL && !strcmp(publicId, "foo")) { + text = text2; + } + else + fail("Unexpected parameters to external entity parser"); + parse_res = _XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE); + XML_ParserFree(ext_parser); + return parse_res; +} + +START_TEST(test_standalone_parameter_entity) +{ + const char *text = + "\n" + "'>\n" + "%entity;\n" + "]>\n" + ""; + char dtd_data[] = + "\n"; + + XML_SetUserData(parser, dtd_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_public); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test skipping of parameter entity in an external DTD */ +/* Derived from ibm/invalid/P69/ibm69i01.xml */ +START_TEST(test_skipped_parameter_entity) +{ + const char *text = + "\n" + "\n" + "]>\n" + ""; + ExtTest dtd_data = { "%pe2;", NULL, NULL }; + + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + XML_SetUserData(parser, &dtd_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetSkippedEntityHandler(parser, dummy_skip_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (dummy_handler_flags != DUMMY_SKIP_HANDLER_FLAG) + fail("Skip handler not executed"); +} +END_TEST + +/* Test recursive parameter entity definition rejected in external DTD */ +START_TEST(test_recursive_external_parameter_entity) +{ + const char *text = + "\n" + "\n" + "]>\n" + ""; + ExtFaults dtd_data = { + "\n%pe2;", + "Recursive external parameter entity not faulted", + NULL, + XML_ERROR_RECURSIVE_ENTITY_REF + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, &dtd_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Recursive external parameter not spotted"); +} +END_TEST + +/* Test undefined parameter entity in external entity handler */ +static int XMLCALL +external_entity_devaluer(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = + "\n" + "\n" + "%e1;\n"; + XML_Parser ext_parser; + int clear_handler = (intptr_t)XML_GetUserData(parser); + + if (systemId == NULL || !strcmp(systemId, "bar")) + return XML_STATUS_OK; + if (strcmp(systemId, "foo")) + fail("Unexpected system ID"); + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could note create external entity parser"); + if (clear_handler) + XML_SetExternalEntityRefHandler(ext_parser, NULL); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_undefined_ext_entity_in_external_dtd) +{ + const char *text = + "\n" + "\n"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_devaluer); + XML_SetUserData(parser, (void *)(intptr_t)XML_FALSE); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + /* Now repeat without the external entity ref handler invoking + * another copy of itself. + */ + XML_ParserReset(parser, NULL); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_devaluer); + XML_SetUserData(parser, (void *)(intptr_t)XML_TRUE); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + + +static void XMLCALL +aborting_xdecl_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(version), + const XML_Char *UNUSED_P(encoding), + int UNUSED_P(standalone)) +{ + XML_StopParser(parser, resumable); + XML_SetXmlDeclHandler(parser, NULL); +} + +/* Test suspending the parse on receiving an XML declaration works */ +START_TEST(test_suspend_xdecl) +{ + const char *text = long_character_data_text; + + XML_SetXmlDeclHandler(parser, aborting_xdecl_handler); + resumable = XML_TRUE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + if (XML_GetErrorCode(parser) != XML_ERROR_NONE) + xml_failure(parser); + /* Attempt to start a new parse while suspended */ + if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + fail("Attempt to parse while suspended not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_SUSPENDED) + fail("Suspended parse not faulted with correct error"); +} +END_TEST + +/* Test aborting the parse in an epilog works */ +static void XMLCALL +selective_aborting_default_handler(void *userData, + const XML_Char *s, + int len) +{ + const char *match = (const char *)userData; + + if (match == NULL || + (strlen(match) == (unsigned)len && + !strncmp(match, s, len))) { + XML_StopParser(parser, resumable); + XML_SetDefaultHandler(parser, NULL); + } +} + +START_TEST(test_abort_epilog) +{ + const char *text = "\n\r\n"; + char match[] = "\r"; + + XML_SetDefaultHandler(parser, selective_aborting_default_handler); + XML_SetUserData(parser, match); + resumable = XML_FALSE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + fail("Abort not triggered"); + if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED) + xml_failure(parser); +} +END_TEST + +/* Test a different code path for abort in the epilog */ +START_TEST(test_abort_epilog_2) +{ + const char *text = "\n"; + char match[] = "\n"; + + XML_SetDefaultHandler(parser, selective_aborting_default_handler); + XML_SetUserData(parser, match); + resumable = XML_FALSE; + expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); +} +END_TEST + +/* Test suspension from the epilog */ +START_TEST(test_suspend_epilog) +{ + const char *text = "\n"; + char match[] = "\n"; + + XML_SetDefaultHandler(parser, selective_aborting_default_handler); + XML_SetUserData(parser, match); + resumable = XML_TRUE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); +} +END_TEST + +START_TEST(test_unfinished_epilog) +{ + const char *text = "<"; + + expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, + "Incomplete epilog entry not faulted"); +} +END_TEST + +START_TEST(test_partial_char_in_epilog) +{ + const char *text = "\xe2\x82"; + + /* First check that no fault is raised if the parse is not finished */ + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + /* Now check that it is faulted once we finish */ + if (XML_ParseBuffer(parser, 0, XML_TRUE) != XML_STATUS_ERROR) + fail("Partial character in epilog not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_PARTIAL_CHAR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_hash_collision) +{ + /* For full coverage of the lookup routine, we need to ensure a + * hash collision even though we can only tell that we have one + * through breakpoint debugging or coverage statistics. The + * following will cause a hash collision on machines with a 64-bit + * long type; others will have to experiment. The full coverage + * tests invoked from qa.sh usually provide a hash collision, but + * not always. This is an attempt to provide insurance. + */ +#define COLLIDING_HASH_SALT (unsigned long)_SIP_ULL(0xffffffffU, 0xff99fc90U) + const char * text = + "\n" + "\n" + "This is a foo\n" + "\n" + "\n" + "\n" + "This triggers the table growth and collides with b2\n" + "\n"; + + XML_SetHashSalt(parser, COLLIDING_HASH_SALT); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST +#undef COLLIDING_HASH_SALT + +/* Test resuming a parse suspended in entity substitution */ +static void XMLCALL +start_element_suspender(void *UNUSED_P(userData), + const XML_Char *name, + const XML_Char **UNUSED_P(atts)) +{ + if (!strcmp(name, "suspend")) + XML_StopParser(parser, XML_TRUE); +} + +START_TEST(test_suspend_resume_internal_entity) +{ + const char *text = + "HiHo'>\n" + "]>\n" + "&foo;\n"; + const char *expected1 = "Hi"; + const char *expected2 = "HiHo"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, start_element_suspender); + XML_SetCharacterDataHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + if (XML_Parse(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + CharData_CheckXMLChars(&storage, ""); + if (XML_ResumeParser(parser) != XML_STATUS_SUSPENDED) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected1); + if (XML_ResumeParser(parser) != XML_STATUS_OK) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected2); +} +END_TEST + +/* Test syntax error is caught at parse resumption */ +START_TEST(test_resume_entity_with_syntax_error) +{ + const char *text = + "Hi'>\n" + "]>\n" + "&foo;\n"; + + XML_SetStartElementHandler(parser, start_element_suspender); + if (XML_Parse(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + if (XML_ResumeParser(parser) != XML_STATUS_ERROR) + fail("Syntax error in entity not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH) + xml_failure(parser); +} +END_TEST + +/* Test suspending and resuming in a parameter entity substitution */ +static void XMLCALL +element_decl_suspender(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(name), + XML_Content *model) +{ + XML_StopParser(parser, XML_TRUE); + XML_FreeContentModel(parser, model); +} + +START_TEST(test_suspend_resume_parameter_entity) +{ + const char *text = + "'>\n" + "%foo;\n" + "]>\n" + "Hello, world"; + const char *expected = "Hello, world"; + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetElementDeclHandler(parser, element_decl_suspender); + XML_SetCharacterDataHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + if (XML_Parse(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + CharData_CheckXMLChars(&storage, ""); + if (XML_ResumeParser(parser) != XML_STATUS_OK) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test attempting to use parser after an error is faulted */ +START_TEST(test_restart_on_error) +{ + const char *text = "<$doc>"; + + if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid tag name not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) + xml_failure(parser); + if (XML_Parse(parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) + fail("Restarting invalid parse not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) + xml_failure(parser); +} +END_TEST + +/* Test that angle brackets in an attribute default value are faulted */ +START_TEST(test_reject_lt_in_attribute_value) +{ + const char *text = + "'>]>\n" + ""; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad attribute default not faulted"); +} +END_TEST + +START_TEST(test_reject_unfinished_param_in_att_value) +{ + const char *text = + "]>\n" + ""; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad attribute default not faulted"); +} +END_TEST + +START_TEST(test_trailing_cr_in_att_value) +{ + const char *text = ""; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Try parsing a general entity within a parameter entity in a + * standalone internal DTD. Covers a corner case in the parser. + */ +START_TEST(test_standalone_internal_entity) +{ + const char *text = + "\n" + "\n" + " '>\n" + " \n" + " %pe;\n" + "]>\n" + ""; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test that a reference to an unknown external entity is skipped */ +START_TEST(test_skipped_external_entity) +{ + const char *text = + "\n" + "\n"; + ExtTest test_data = { + "\n" + "\n", + NULL, + NULL + }; + + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test a different form of unknown external entity */ +typedef struct ext_hdlr_data { + const char *parse_text; + XML_ExternalEntityRefHandler handler; +} ExtHdlrData; + +static int XMLCALL +external_entity_oneshot_loader(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + ExtHdlrData *test_data = (ExtHdlrData *)XML_GetUserData(parser); + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser."); + /* Use the requested entity parser for further externals */ + XML_SetExternalEntityRefHandler(ext_parser, test_data->handler); + if ( _XML_Parse_SINGLE_BYTES(ext_parser, + test_data->parse_text, + strlen(test_data->parse_text), + XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(ext_parser); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_skipped_null_loaded_ext_entity) +{ + const char *text = + "\n" + ""; + ExtHdlrData test_data = { + "\n" + "\n" + "%pe2;\n", + external_entity_null_loader + }; + + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_skipped_unloaded_ext_entity) +{ + const char *text = + "\n" + ""; + ExtHdlrData test_data = { + "\n" + "\n" + "%pe2;\n", + NULL + }; + + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test that a parameter entity value ending with a carriage return + * has it translated internally into a newline. + */ +START_TEST(test_param_entity_with_trailing_cr) +{ +#define PARAM_ENTITY_NAME "pe" +#define PARAM_ENTITY_CORE_VALUE "" + const char *text = + "\n" + ""; + ExtTest test_data = { + "\n" + "%" PARAM_ENTITY_NAME ";\n", + NULL, + NULL + }; + + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + XML_SetEntityDeclHandler(parser, param_entity_match_handler); + entity_name_to_match = PARAM_ENTITY_NAME; + entity_value_to_match = PARAM_ENTITY_CORE_VALUE "\n"; + entity_match_flag = ENTITY_MATCH_NOT_FOUND; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (entity_match_flag == ENTITY_MATCH_FAIL) + fail("Parameter entity CR->NEWLINE conversion failed"); + else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) + fail("Parameter entity not parsed"); +} +#undef PARAM_ENTITY_NAME +#undef PARAM_ENTITY_CORE_VALUE +END_TEST + +START_TEST(test_invalid_character_entity) +{ + const char *text = + "\n" + "]>\n" + "&entity;"; + + expect_failure(text, XML_ERROR_BAD_CHAR_REF, + "Out of range character reference not faulted"); +} +END_TEST + +START_TEST(test_invalid_character_entity_2) +{ + const char *text = + "\n" + "]>\n" + "&entity;"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Out of range character reference not faulted"); +} +END_TEST + +START_TEST(test_invalid_character_entity_3) +{ + const char text[] = + /* \n */ + "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " + "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" + /* ]>\n */ + "\0]\0>\0\n" + /* &entity; */ + "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid start of entity name not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_UNDEFINED_ENTITY) + xml_failure(parser); +} +END_TEST + +START_TEST(test_invalid_character_entity_4) +{ + const char *text = + "\n" /* = � */ + "]>\n" + "&entity;"; + + expect_failure(text, XML_ERROR_BAD_CHAR_REF, + "Out of range character reference not faulted"); +} +END_TEST + + +/* Test that processing instructions are picked up by a default handler */ +START_TEST(test_pi_handled_in_default) +{ + const char *text = "\n"; + const XML_Char *expected = "\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE)== XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + + +/* Test that comments are picked up by a default handler */ +START_TEST(test_comment_handled_in_default) +{ + const char *text = "\n"; + const XML_Char *expected = "\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test PIs that look almost but not quite like XML declarations */ +static void XMLCALL +accumulate_pi_characters(void *userData, + const XML_Char *target, + const XML_Char *data) +{ + CharData *storage = (CharData *)userData; + + CharData_AppendXMLChars(storage, target, -1); + CharData_AppendXMLChars(storage, ": ", 2); + CharData_AppendXMLChars(storage, data, -1); + CharData_AppendXMLChars(storage, "\n", 1); +} + +START_TEST(test_pi_yml) +{ + const char *text = ""; + const XML_Char *expected = "yml: something like data\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_pi_xnl) +{ + const char *text = ""; + const XML_Char *expected = "xnl: nothing like data\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_pi_xmm) +{ + const char *text = ""; + const XML_Char *expected = "xmm: everything like data\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_pi) +{ + const char text[] = + /* + * where {KHO KHWAI} = U+0E04 + * and {CHO CHAN} = U+0E08 + */ + "<\0?\0\x04\x0e\x08\x0e?\0>\0" + /* */ + "<\0q\0/\0>\0"; + const XML_Char *expected = "\xe0\xb8\x84\xe0\xb8\x88: \n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_be_pi) +{ + const char text[] = + /* + * where {KHO KHWAI} = U+0E04 + * and {CHO CHAN} = U+0E08 + */ + "\0<\0?\x0e\x04\x0e\x08\0?\0>" + /* */ + "\0<\0q\0/\0>"; + const XML_Char *expected = "\xe0\xb8\x84\xe0\xb8\x88: \n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that comments can be picked up and translated */ +static void XMLCALL +accumulate_comment(void *userData, + const XML_Char *data) +{ + CharData *storage = (CharData *)userData; + + CharData_AppendXMLChars(storage, data, -1); +} + +START_TEST(test_utf16_be_comment) +{ + const char text[] = + /* */ + "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" + /* */ + "\0<\0d\0o\0c\0/\0>"; + const XML_Char *expected = " Comment A "; + CharData storage; + + CharData_Init(&storage); + XML_SetCommentHandler(parser, accumulate_comment); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_le_comment) +{ + const char text[] = + /* */ + "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" + /* */ + "<\0d\0o\0c\0/\0>\0"; + const XML_Char *expected = " Comment B "; + CharData storage; + + CharData_Init(&storage); + XML_SetCommentHandler(parser, accumulate_comment); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that the unknown encoding handler with map entries that expect + * conversion but no conversion function is faulted + */ +static int XMLCALL +failing_converter(void *UNUSED_P(data), const char *UNUSED_P(s)) +{ + /* Always claim to have failed */ + return -1; +} + +static int XMLCALL +prefix_converter(void *UNUSED_P(data), const char *s) +{ + /* If the first byte is 0xff, raise an error */ + if (s[0] == -1) + return -1; + /* Just add the low bits of the first byte to the second */ + return (s[1] + (s[0] & 0x7f)) & 0x01ff; +} + +static int XMLCALL +MiscEncodingHandler(void *data, + const XML_Char *encoding, + XML_Encoding *info) +{ + int i; + int high_map = -2; /* Assume a 2-byte sequence */ + + if (!strcmp(encoding, "invalid-9") || + !strcmp(encoding, "ascii-like") || + !strcmp(encoding, "invalid-len") || + !strcmp(encoding, "invalid-a") || + !strcmp(encoding, "invalid-surrogate") || + !strcmp(encoding, "invalid-high")) + high_map = -1; + + for (i = 0; i < 128; ++i) + info->map[i] = i; + for (; i < 256; ++i) + info->map[i] = high_map; + + /* If required, put an invalid value in the ASCII entries */ + if (!strcmp(encoding, "invalid-9")) + info->map[9] = 5; + /* If required, have a top-bit set character starts a 5-byte sequence */ + if (!strcmp(encoding, "invalid-len")) + info->map[0x81] = -5; + /* If required, make a top-bit set character a valid ASCII character */ + if (!strcmp(encoding, "invalid-a")) + info->map[0x82] = 'a'; + /* If required, give a top-bit set character a forbidden value, + * what would otherwise be the first of a surrogate pair. + */ + if (!strcmp(encoding, "invalid-surrogate")) + info->map[0x83] = 0xd801; + /* If required, give a top-bit set character too high a value */ + if (!strcmp(encoding, "invalid-high")) + info->map[0x84] = 0x010101; + + info->data = data; + info->release = NULL; + if (!strcmp(encoding, "failing-conv")) + info->convert = failing_converter; + else if (!strcmp(encoding, "prefix-conv")) + info->convert = prefix_converter; + else + info->convert = NULL; + return XML_STATUS_OK; +} + +START_TEST(test_missing_encoding_conversion_fn) +{ + const char *text = + "\n" + "\x81"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + /* MiscEncodingHandler sets up an encoding with every top-bit-set + * character introducing a two-byte sequence. For this, it + * requires a convert function. The above function call doesn't + * pass one through, so when BadEncodingHandler actually gets + * called it should supply an invalid encoding. + */ + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Encoding with missing convert() not faulted"); +} +END_TEST + +START_TEST(test_failing_encoding_conversion_fn) +{ + const char *text = + "\n" + "\x81"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + /* BadEncodingHandler sets up an encoding with every top-bit-set + * character introducing a two-byte sequence. For this, it + * requires a convert function. The above function call passes + * one that insists all possible sequences are invalid anyway. + */ + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Encoding with failing convert() not faulted"); +} +END_TEST + +/* Test unknown encoding conversions */ +START_TEST(test_unknown_encoding_success) +{ + const char *text = + "\n" + /* Equivalent to Hello, world */ + "<\x81\x64\x80oc>Hello, world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + run_character_check(text, "Hello, world"); +} +END_TEST + +/* Test bad name character in unknown encoding */ +START_TEST(test_unknown_encoding_bad_name) +{ + const char *text = + "\n" + "<\xff\x64oc>Hello, world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad name start in unknown encoding not faulted"); +} +END_TEST + +/* Test bad mid-name character in unknown encoding */ +START_TEST(test_unknown_encoding_bad_name_2) +{ + const char *text = + "\n" + "Hello, world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad name in unknown encoding not faulted"); +} +END_TEST + +/* Test element name that is long enough to fill the conversion buffer + * in an unknown encoding, finishing with an encoded character. + */ +START_TEST(test_unknown_encoding_long_name_1) +{ + const char *text = + "\n" + "" + "Hi" + ""; + const XML_Char *expected = "abcdefghabcdefghabcdefghijklmnop"; + CharData storage; + + CharData_Init(&storage); + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test element name that is long enough to fill the conversion buffer + * in an unknown encoding, finishing with an simple character. + */ +START_TEST(test_unknown_encoding_long_name_2) +{ + const char *text = + "\n" + "" + "Hi" + ""; + const XML_Char *expected = "abcdefghabcdefghabcdefghijklmnop"; + CharData storage; + + CharData_Init(&storage); + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_invalid_unknown_encoding) +{ + const char *text = + "\n" + "Hello world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_ascii_encoding_ok) +{ + const char *text = + "\n" + "Hello, world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + run_character_check(text, "Hello, world"); +} +END_TEST + +START_TEST(test_unknown_ascii_encoding_fail) +{ + const char *text = + "\n" + "Hello, \x80 world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid character not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_length) +{ + const char *text = + "\n" + "Hello, world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_topbit) +{ + const char *text = + "\n" + "Hello, world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_surrogate) +{ + const char *text = + "\n" + "Hello, \x82 world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_high) +{ + const char *text = + "\n" + "Hello, world"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_attr_value) +{ + const char *text = + "\n" + ""; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid attribute valid not faulted"); +} +END_TEST + +/* Test an external entity parser set to use latin-1 detects UTF-16 + * BOMs correctly. + */ +enum ee_parse_flags { + EE_PARSE_NONE = 0x00, + EE_PARSE_FULL_BUFFER = 0x01 +}; + +typedef struct ExtTest2 { + const char *parse_text; + int parse_len; + const char *encoding; + CharData *storage; + enum ee_parse_flags flags; +} ExtTest2; + +static int XMLCALL +external_entity_loader2(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + ExtTest2 *test_data = (ExtTest2 *)XML_GetUserData(parser); + XML_Parser extparser; + + extparser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (extparser == NULL) + fail("Coulr not create external entity parser"); + if (test_data->encoding != NULL) { + if (!XML_SetEncoding(extparser, test_data->encoding)) + fail("XML_SetEncoding() ignored for external entity"); + } + if (test_data->flags & EE_PARSE_FULL_BUFFER) { + if (XML_Parse(extparser, + test_data->parse_text, + test_data->parse_len, + XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(extparser); + } + } + else if (_XML_Parse_SINGLE_BYTES(extparser, + test_data->parse_text, + test_data->parse_len, + XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(extparser); + } + + XML_ParserFree(extparser); + return XML_STATUS_OK; +} + +/* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ +static void XMLCALL +ext2_accumulate_characters(void *userData, const XML_Char *s, int len) +{ + ExtTest2 *test_data = (ExtTest2 *)userData; + accumulate_characters(test_data->storage, s, len); +} + +START_TEST(test_ext_entity_latin1_utf16le_bom) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtTest2 test_data = { + /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ + /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, + * 0x4c = L and 0x20 is a space + */ + "\xff\xfe\x4c\x20", + 4, + "iso-8859-1", + NULL, + EE_PARSE_NONE + }; + /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ + const XML_Char *expected = "\xc3\xbf\xc3\xbeL "; + CharData storage; + + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ext_entity_latin1_utf16be_bom) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtTest2 test_data = { + /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ + /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, + * 0x4c = L and 0x20 is a space + */ + "\xfe\xff\x20\x4c", + 4, + "iso-8859-1", + NULL, + EE_PARSE_NONE + }; + /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ + const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; + CharData storage; + + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + + +/* Parsing the full buffer rather than a byte at a time makes a + * difference to the encoding scanning code, so repeat the above tests + * without breaking them down by byte. + */ +START_TEST(test_ext_entity_latin1_utf16le_bom2) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtTest2 test_data = { + /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ + /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, + * 0x4c = L and 0x20 is a space + */ + "\xff\xfe\x4c\x20", + 4, + "iso-8859-1", + NULL, + EE_PARSE_FULL_BUFFER + }; + /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ + const XML_Char *expected = "\xc3\xbf\xc3\xbeL "; + CharData storage; + + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ext_entity_latin1_utf16be_bom2) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtTest2 test_data = { + /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ + /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, + * 0x4c = L and 0x20 is a space + */ + "\xfe\xff\x20\x4c", + 4, + "iso-8859-1", + NULL, + EE_PARSE_FULL_BUFFER + }; + /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ + const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; + CharData storage; + + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test little-endian UTF-16 given an explicit big-endian encoding */ +START_TEST(test_ext_entity_utf16_be) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtTest2 test_data = { + "<\0e\0/\0>\0", + 8, + "utf-16be", + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = + "\xe3\xb0\x80" /* U+3C00 */ + "\xe6\x94\x80" /* U+6A00 */ + "\xe2\xbc\x80" /* U+2F00 */ + "\xe3\xb8\x80"; /* U+3E00 */ + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test big-endian UTF-16 given an explicit little-endian encoding */ +START_TEST(test_ext_entity_utf16_le) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtTest2 test_data = { + "\0<\0e\0/\0>", + 8, + "utf-16le", + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = + "\xe3\xb0\x80" /* U+3C00 */ + "\xe6\x94\x80" /* U+6A00 */ + "\xe2\xbc\x80" /* U+2F00 */ + "\xe3\xb8\x80"; /* U+3E00 */ + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test little-endian UTF-16 given no explicit encoding. + * The existing default encoding (UTF-8) is assumed to hold without a + * BOM to contradict it, so the entity value will in fact provoke an + * error because 0x00 is not a valid XML character. We parse the + * whole buffer in one go rather than feeding it in byte by byte to + * exercise different code paths in the initial scanning routines. + */ +typedef struct ExtFaults2 { + const char *parse_text; + int parse_len; + const char *fail_text; + const char *encoding; + enum XML_Error error; +} ExtFaults2; + +static int XMLCALL +external_entity_faulter2(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + ExtFaults2 *test_data = (ExtFaults2 *)XML_GetUserData(parser); + XML_Parser extparser; + + extparser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (extparser == NULL) + fail("Could not create external entity parser"); + if (test_data->encoding != NULL) { + if (!XML_SetEncoding(extparser, test_data->encoding)) + fail("XML_SetEncoding() ignored for external entity"); + } + if (XML_Parse(extparser, + test_data->parse_text, + test_data->parse_len, + XML_TRUE) != XML_STATUS_ERROR) + fail(test_data->fail_text); + if (XML_GetErrorCode(extparser) != test_data->error) + xml_failure(extparser); + + XML_ParserFree(extparser); + return XML_STATUS_ERROR; +} + +START_TEST(test_ext_entity_utf16_unknown) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtFaults2 test_data = { + "a\0b\0c\0", + 6, + "Invalid character in entity not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_faulter2); + XML_SetUserData(parser, &test_data); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Invalid character should not have been accepted"); +} +END_TEST + +/* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ +START_TEST(test_ext_entity_utf8_non_bom) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtTest2 test_data = { + "\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ + 3, + NULL, + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = "\xef\xbb\x80"; + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that UTF-8 in a CDATA section is correctly passed through */ +START_TEST(test_utf8_in_cdata_section) +{ + const char *text = ""; + const XML_Char *expected = "one \xc3\xa9 two"; + + run_character_check(text, expected); +} +END_TEST + +/* Test that little-endian UTF-16 in a CDATA section is handled */ +START_TEST(test_utf8_in_cdata_section_2) +{ + const char *text = ""; + const XML_Char *expected = "\xc3\xa9]\xc3\xa9two"; + + run_character_check(text, expected); +} +END_TEST + +/* Test trailing spaces in elements are accepted */ +static void XMLCALL +record_element_end_handler(void *userData, + const XML_Char *name) +{ + CharData *storage = (CharData *)userData; + + CharData_AppendXMLChars(storage, "/", 1); + CharData_AppendXMLChars(storage, name, -1); +} + +START_TEST(test_trailing_spaces_in_elements) +{ + const char *text = "Hi"; + const XML_Char *expected = "doc/doc"; + CharData storage; + + CharData_Init(&storage); + XML_SetElementHandler(parser, record_element_start_handler, + record_element_end_handler); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_attribute) +{ + const char text[] = + /* + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 + */ + "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; + const XML_Char *expected = "a"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_second_attr) +{ + /* + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 + */ + const char text[] = + "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" + "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; + const XML_Char *expected = "1"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_attr_after_solidus) +{ + const char *text = ""; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Misplaced / not faulted"); +} +END_TEST + +static void XMLCALL +accumulate_entity_decl(void *userData, + const XML_Char *entityName, + int UNUSED_P(is_parameter_entity), + const XML_Char *value, + int value_length, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId), + const XML_Char *UNUSED_P(notationName)) +{ + CharData *storage = (CharData *)userData; + + CharData_AppendXMLChars(storage, entityName, -1); + CharData_AppendXMLChars(storage, "=", 1); + CharData_AppendXMLChars(storage, value, value_length); + CharData_AppendXMLChars(storage, "\n", 1); +} + + +START_TEST(test_utf16_pe) +{ + /* '> + * %{KHO KHWAI}{CHO CHAN}; + * ]> + * + * + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 + */ + const char text[] = + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" + "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " + "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " + "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" + "\0%\x0e\x04\x0e\x08\0;\0\n" + "\0]\0>\0\n" + "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; + const XML_Char *expected = + "\xe0\xb8\x84\xe0\xb8\x88=\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetEntityDeclHandler(parser, accumulate_entity_decl); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that duff attribute description keywords are rejected */ +START_TEST(test_bad_attr_desc_keyword) +{ + const char *text = + "\n" + "]>\n" + ""; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad keyword !IMPLIED not faulted"); +} +END_TEST + +/* Test that an invalid attribute description keyword consisting of + * UTF-16 characters with their top bytes non-zero are correctly + * faulted + */ +START_TEST(test_bad_attr_desc_keyword_utf16) +{ + /* + * ]> + * + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 + */ + const char text[] = + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" + "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " + "\0#\x0e\x04\x0e\x08\0>\0\n" + "\0]\0>\0<\0d\0/\0>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid UTF16 attribute keyword not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_SYNTAX) + xml_failure(parser); +} +END_TEST + +/* Test that invalid syntax in a is rejected. Do this + * using prefix-encoding (see above) to trigger specific code paths + */ +START_TEST(test_bad_doctype) +{ + const char *text = + "\n" + ""; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_SYNTAX, + "Invalid bytes in DOCTYPE not faulted"); +} +END_TEST + +START_TEST(test_bad_doctype_utf16) +{ + const char text[] = + /* + * + * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number + * (name character) but not a valid letter (name start character) + */ + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " + "\x06\xf2" + "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid bytes in DOCTYPE not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_SYNTAX) + xml_failure(parser); +} +END_TEST + +START_TEST(test_bad_doctype_plus) +{ + const char *text = + " ]>\n" + "<1+>&foo;"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "'+' in document name not faulted"); +} +END_TEST + +START_TEST(test_bad_doctype_star) +{ + const char *text = + " ]>\n" + "<1*>&foo;"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "'*' in document name not faulted"); +} +END_TEST + +START_TEST(test_bad_doctype_query) +{ + const char *text = + " ]>\n" + "<1?>&foo;"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "'?' in document name not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_bad_ignore) +{ + const char *text = + "" + "" + "&entity;"; + ExtFaults fault = { + "]]>", + "Invalid character not faulted", + "prefix-conv", + XML_ERROR_INVALID_TOKEN + }; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, &fault); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Bad IGNORE section with unknown encoding not failed"); +} +END_TEST + +START_TEST(test_entity_in_utf16_be_attr) +{ + const char text[] = + /* */ + "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " + "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; + const XML_Char *expected = "\xc3\xa4 \xc3\xa4"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_entity_in_utf16_le_attr) +{ + const char text[] = + /* */ + "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" + "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; + const XML_Char *expected = "\xc3\xa4 \xc3\xa4"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_entity_public_utf16_be) +{ + const char text[] = + /* */ + "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " + "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" + /* %e; */ + "\0%\0e\0;\0\n" + /* ]> */ + "\0]\0>\0\n" + /* &j; */ + "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; + ExtTest2 test_data = { + /* */ + "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", + 34, + NULL, + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = "baz"; + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_entity_public_utf16_le) +{ + const char text[] = + /* */ + "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" + "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" + /* %e; */ + "%\0e\0;\0\n\0" + /* ]> */ + "]\0>\0\n\0" + /* &j; */ + "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; + ExtTest2 test_data = { + /* */ + "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", + 34, + NULL, + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = "baz"; + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that a doctype with neither an internal nor external subset is + * faulted + */ +START_TEST(test_short_doctype) +{ + const char *text = ""; + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "DOCTYPE without subset not rejected"); +} +END_TEST + +START_TEST(test_short_doctype_2) +{ + const char *text = ""; + expect_failure(text, XML_ERROR_SYNTAX, + "DOCTYPE without Public ID not rejected"); +} +END_TEST + +START_TEST(test_short_doctype_3) +{ + const char *text = ""; + expect_failure(text, XML_ERROR_SYNTAX, + "DOCTYPE without System ID not rejected"); +} +END_TEST + +START_TEST(test_long_doctype) +{ + const char *text = ""; + expect_failure(text, XML_ERROR_SYNTAX, + "DOCTYPE with extra ID not rejected"); +} +END_TEST + +START_TEST(test_bad_entity) +{ + const char *text = + "\n" + "]>\n" + ""; + expect_failure(text, XML_ERROR_SYNTAX, + "ENTITY without Public ID is not rejected"); +} +END_TEST + +/* Test unquoted value is faulted */ +START_TEST(test_bad_entity_2) +{ + const char *text = + "\n" + "]>\n" + ""; + expect_failure(text, XML_ERROR_SYNTAX, + "ENTITY without Public ID is not rejected"); +} +END_TEST + +START_TEST(test_bad_entity_3) +{ + const char *text = + "\n" + "]>\n" + ""; + expect_failure(text, XML_ERROR_SYNTAX, + "Parameter ENTITY without Public ID is not rejected"); +} +END_TEST + +START_TEST(test_bad_entity_4) +{ + const char *text = + "\n" + "]>\n" + ""; + expect_failure(text, XML_ERROR_SYNTAX, + "Parameter ENTITY without Public ID is not rejected"); +} +END_TEST + +START_TEST(test_bad_notation) +{ + const char *text = + "\n" + "]>\n" + ""; + expect_failure(text, XML_ERROR_SYNTAX, + "Notation without System ID is not rejected"); +} +END_TEST + +/* + * Namespaces tests. + */ + +static void +namespace_setup(void) +{ + parser = XML_ParserCreateNS(NULL, ' '); + if (parser == NULL) + fail("Parser not created."); +} + +static void +namespace_teardown(void) +{ + basic_teardown(); +} + +/* Check that an element name and attribute name match the expected values. + The expected values are passed as an array reference of string pointers + provided as the userData argument; the first is the expected + element name, and the second is the expected attribute name. +*/ +static int triplet_start_flag = XML_FALSE; +static int triplet_end_flag = XML_FALSE; + +static void XMLCALL +triplet_start_checker(void *userData, const XML_Char *name, + const XML_Char **atts) +{ + char **elemstr = (char **)userData; + char buffer[1024]; + if (strcmp(elemstr[0], name) != 0) { + sprintf(buffer, "unexpected start string: '%s'", name); + fail(buffer); + } + if (strcmp(elemstr[1], atts[0]) != 0) { + sprintf(buffer, "unexpected attribute string: '%s'", atts[0]); + fail(buffer); + } + triplet_start_flag = XML_TRUE; +} + +/* Check that the element name passed to the end-element handler matches + the expected value. The expected value is passed as the first element + in an array of strings passed as the userData argument. +*/ +static void XMLCALL +triplet_end_checker(void *userData, const XML_Char *name) +{ + char **elemstr = (char **)userData; + if (strcmp(elemstr[0], name) != 0) { + char buffer[1024]; + sprintf(buffer, "unexpected end string: '%s'", name); + fail(buffer); + } + triplet_end_flag = XML_TRUE; +} + +START_TEST(test_return_ns_triplet) +{ + const char *text = + ""; + const char *epilog = ""; + const char *elemstr[] = { + "http://example.org/ e foo", + "http://example.org/ a bar" + }; + XML_SetReturnNSTriplet(parser, XML_TRUE); + XML_SetUserData(parser, elemstr); + XML_SetElementHandler(parser, triplet_start_checker, + triplet_end_checker); + XML_SetNamespaceDeclHandler(parser, + dummy_start_namespace_decl_handler, + dummy_end_namespace_decl_handler); + triplet_start_flag = XML_FALSE; + triplet_end_flag = XML_FALSE; + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + if (!triplet_start_flag) + fail("triplet_start_checker not invoked"); + /* Check that unsetting "return triplets" fails while still parsing */ + XML_SetReturnNSTriplet(parser, XML_FALSE); + if (_XML_Parse_SINGLE_BYTES(parser, epilog, strlen(epilog), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (!triplet_end_flag) + fail("triplet_end_checker not invoked"); + if (dummy_handler_flags != (DUMMY_START_NS_DECL_HANDLER_FLAG | + DUMMY_END_NS_DECL_HANDLER_FLAG)) + fail("Namespace handlers not called"); +} +END_TEST + +static void XMLCALL +overwrite_start_checker(void *userData, const XML_Char *name, + const XML_Char **atts) +{ + CharData *storage = (CharData *) userData; + CharData_AppendString(storage, "start "); + CharData_AppendXMLChars(storage, name, -1); + while (*atts != NULL) { + CharData_AppendString(storage, "\nattribute "); + CharData_AppendXMLChars(storage, *atts, -1); + atts += 2; + } + CharData_AppendString(storage, "\n"); +} + +static void XMLCALL +overwrite_end_checker(void *userData, const XML_Char *name) +{ + CharData *storage = (CharData *) userData; + CharData_AppendString(storage, "end "); + CharData_AppendXMLChars(storage, name, -1); + CharData_AppendString(storage, "\n"); +} + +static void +run_ns_tagname_overwrite_test(const char *text, const char *result) +{ + CharData storage; + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetElementHandler(parser, + overwrite_start_checker, overwrite_end_checker); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckString(&storage, result); +} + +/* Regression test for SF bug #566334. */ +START_TEST(test_ns_tagname_overwrite) +{ + const char *text = + "\n" + " \n" + " \n" + ""; + const char *result = + "start http://example.org/ e\n" + "start http://example.org/ f\n" + "attribute http://example.org/ attr\n" + "end http://example.org/ f\n" + "start http://example.org/ g\n" + "attribute http://example.org/ attr2\n" + "end http://example.org/ g\n" + "end http://example.org/ e\n"; + run_ns_tagname_overwrite_test(text, result); +} +END_TEST + +/* Regression test for SF bug #566334. */ +START_TEST(test_ns_tagname_overwrite_triplet) +{ + const char *text = + "\n" + " \n" + " \n" + ""; + const char *result = + "start http://example.org/ e n\n" + "start http://example.org/ f n\n" + "attribute http://example.org/ attr n\n" + "end http://example.org/ f n\n" + "start http://example.org/ g n\n" + "attribute http://example.org/ attr2 n\n" + "end http://example.org/ g n\n" + "end http://example.org/ e n\n"; + XML_SetReturnNSTriplet(parser, XML_TRUE); + run_ns_tagname_overwrite_test(text, result); +} +END_TEST + + +/* Regression test for SF bug #620343. */ +static void XMLCALL +start_element_fail(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts)) +{ + /* We should never get here. */ + fail("should never reach start_element_fail()"); +} + +static void XMLCALL +start_ns_clearing_start_element(void *userData, + const XML_Char *UNUSED_P(prefix), + const XML_Char *UNUSED_P(uri)) +{ + XML_SetStartElementHandler((XML_Parser) userData, NULL); +} + +START_TEST(test_start_ns_clears_start_element) +{ + /* This needs to use separate start/end tags; using the empty tag + syntax doesn't cause the problematic path through Expat to be + taken. + */ + const char *text = ""; + + XML_SetStartElementHandler(parser, start_element_fail); + XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element); + XML_SetEndNamespaceDeclHandler(parser, dummy_end_namespace_decl_handler); + XML_UseParserAsHandlerArg(parser); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Regression test for SF bug #616863. */ +static int XMLCALL +external_entity_handler(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser); + const char *text; + XML_Parser p2; + + if (callno == 1) + text = ("\n" + "\n" + "\n"); + else + text = ("" + ""); + + XML_SetUserData(parser, (void *) callno); + p2 = XML_ExternalEntityParserCreate(parser, context, NULL); + if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(p2); + return XML_STATUS_ERROR; + } + XML_ParserFree(p2); + return XML_STATUS_OK; +} + +START_TEST(test_default_ns_from_ext_subset_and_ext_ge) +{ + const char *text = + "\n" + "\n" + "]>\n" + "\n" + "&en;\n" + ""; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_handler); + /* We actually need to set this handler to tickle this bug. */ + XML_SetStartElementHandler(parser, dummy_start_element); + XML_SetUserData(parser, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Regression test #1 for SF bug #673791. */ +START_TEST(test_ns_prefix_with_empty_uri_1) +{ + const char *text = + "\n" + " \n" + ""; + + expect_failure(text, + XML_ERROR_UNDECLARING_PREFIX, + "Did not report re-setting namespace" + " URI with prefix to ''."); +} +END_TEST + +/* Regression test #2 for SF bug #673791. */ +START_TEST(test_ns_prefix_with_empty_uri_2) +{ + const char *text = + "\n" + ""; + + expect_failure(text, + XML_ERROR_UNDECLARING_PREFIX, + "Did not report setting namespace URI with prefix to ''."); +} +END_TEST + +/* Regression test #3 for SF bug #673791. */ +START_TEST(test_ns_prefix_with_empty_uri_3) +{ + const char *text = + "\n" + " \n" + "]>\n" + ""; + + expect_failure(text, + XML_ERROR_UNDECLARING_PREFIX, + "Didn't report attr default setting NS w/ prefix to ''."); +} +END_TEST + +/* Regression test #4 for SF bug #673791. */ +START_TEST(test_ns_prefix_with_empty_uri_4) +{ + const char *text = + "\n" + " \n" + "]>\n" + ""; + /* Packaged info expected by the end element handler; + the weird structuring lets us re-use the triplet_end_checker() + function also used for another test. */ + const char *elemstr[] = { + "http://example.org/ doc prefix" + }; + XML_SetReturnNSTriplet(parser, XML_TRUE); + XML_SetUserData(parser, elemstr); + XML_SetEndElementHandler(parser, triplet_end_checker); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test with non-xmlns prefix */ +START_TEST(test_ns_unbound_prefix) +{ + const char *text = + "\n" + " \n" + "]>\n" + ""; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + fail("Unbound prefix incorrectly passed"); + if (XML_GetErrorCode(parser) != XML_ERROR_UNBOUND_PREFIX) + xml_failure(parser); +} +END_TEST + +START_TEST(test_ns_default_with_empty_uri) +{ + const char *text = + "\n" + " \n" + ""; + /* Add some handlers to exercise extra code paths */ + XML_SetStartNamespaceDeclHandler(parser, + dummy_start_namespace_decl_handler); + XML_SetEndNamespaceDeclHandler(parser, + dummy_end_namespace_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Regression test for SF bug #692964: two prefixes for one namespace. */ +START_TEST(test_ns_duplicate_attrs_diff_prefixes) +{ + const char *text = + ""; + expect_failure(text, + XML_ERROR_DUPLICATE_ATTRIBUTE, + "did not report multiple attributes with same URI+name"); +} +END_TEST + +START_TEST(test_ns_duplicate_hashes) +{ + /* The hash of an attribute is calculated as the hash of its URI + * concatenated with a space followed by its name (after the + * colon). We wish to generate attributes with the same hash + * value modulo the attribute table size so that we can check that + * the attribute hash table works correctly. The attribute hash + * table size will be the smallest power of two greater than the + * number of attributes, but at least eight. There is + * unfortunately no programmatic way of getting the hash or the + * table size at user level, but the test code coverage percentage + * will drop if the hashes cease to point to the same row. + * + * The cunning plan is to have few enough attributes to have a + * reliable table size of 8, and have the single letter attribute + * names be 8 characters apart, producing a hash which will be the + * same modulo 8. + */ + const char *text = + ""; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Regression test for SF bug #695401: unbound prefix. */ +START_TEST(test_ns_unbound_prefix_on_attribute) +{ + const char *text = ""; + expect_failure(text, + XML_ERROR_UNBOUND_PREFIX, + "did not report unbound prefix on attribute"); +} +END_TEST + +/* Regression test for SF bug #695401: unbound prefix. */ +START_TEST(test_ns_unbound_prefix_on_element) +{ + const char *text = ""; + expect_failure(text, + XML_ERROR_UNBOUND_PREFIX, + "did not report unbound prefix on element"); +} +END_TEST + +/* Test that the parsing status is correctly reset by XML_ParserReset(). + * We usE test_return_ns_triplet() for our example parse to improve + * coverage of tidying up code executed. + */ +START_TEST(test_ns_parser_reset) +{ + XML_ParsingStatus status; + + XML_GetParsingStatus(parser, &status); + if (status.parsing != XML_INITIALIZED) + fail("parsing status doesn't start INITIALIZED"); + test_return_ns_triplet(); + XML_GetParsingStatus(parser, &status); + if (status.parsing != XML_FINISHED) + fail("parsing status doesn't end FINISHED"); + XML_ParserReset(parser, NULL); + XML_GetParsingStatus(parser, &status); + if (status.parsing != XML_INITIALIZED) + fail("parsing status doesn't reset to INITIALIZED"); +} +END_TEST + +/* Test that long element names with namespaces are handled correctly */ +START_TEST(test_ns_long_element) +{ + const char *text = + "" + ""; + const char *elemstr[] = { + "http://example.org/" + " thisisalongenoughelementnametotriggerareallocation foo", + "http://example.org/ a bar" + }; + + XML_SetReturnNSTriplet(parser, XML_TRUE); + XML_SetUserData(parser, elemstr); + XML_SetElementHandler(parser, + triplet_start_checker, + triplet_end_checker); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test mixed population of prefixed and unprefixed attributes */ +START_TEST(test_ns_mixed_prefix_atts) +{ + const char *text = + "" + ""; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test having a long namespaced element name inside a short one. + * This exercises some internal buffer reallocation that is shared + * across elements with the same namespace URI. + */ +START_TEST(test_ns_extend_uri_buffer) +{ + const char *text = + "" + " " + ""; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test that xmlns is correctly rejected as an attribute in the xmlns + * namespace, but not in other namespaces + */ +START_TEST(test_ns_reserved_attributes) +{ + const char *text1 = + ""; + const char *text2 = + ""; + expect_failure(text1, XML_ERROR_RESERVED_PREFIX_XMLNS, + "xmlns not rejected as an attribute"); + XML_ParserReset(parser, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text2, strlen(text2), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test more reserved attributes */ +START_TEST(test_ns_reserved_attributes_2) +{ + const char *text1 = + ""; + const char *text2 = + ""; + const char *text3 = + ""; + + expect_failure(text1, XML_ERROR_RESERVED_PREFIX_XML, + "xml not rejected as an attribute"); + XML_ParserReset(parser, NULL); + expect_failure(text2, XML_ERROR_RESERVED_NAMESPACE_URI, + "Use of w3.org URL not faulted"); + XML_ParserReset(parser, NULL); + expect_failure(text3, XML_ERROR_RESERVED_NAMESPACE_URI, + "Use of w3.org xmlns URL not faulted"); +} +END_TEST + +/* Test string pool handling of namespace names of 2048 characters */ +/* Exercises a particular string pool growth path */ +START_TEST(test_ns_extremely_long_prefix) +{ + const char *text = + "" + ""; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test unknown encoding handlers in namespace setup */ +START_TEST(test_ns_unknown_encoding_success) +{ + const char *text = + "\n" + "Hi"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + run_character_check(text, "Hi"); +} +END_TEST + +/* Test that too many colons are rejected */ +START_TEST(test_ns_double_colon) +{ + const char *text = + ""; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Double colon in attribute name not faulted"); +} +END_TEST + +START_TEST(test_ns_double_colon_element) +{ + const char *text = + ""; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Double colon in element name not faulted"); +} +END_TEST + +/* Test that non-name characters after a colon are rejected */ +START_TEST(test_ns_bad_attr_leafname) +{ + const char *text = + ""; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid character in leafname not faulted"); +} +END_TEST + +START_TEST(test_ns_bad_element_leafname) +{ + const char *text = + ""; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid character in element leafname not faulted"); +} +END_TEST + +/* Test high-byte-set UTF-16 characters are valid in a leafname */ +START_TEST(test_ns_utf16_leafname) +{ + const char text[] = + /* + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + */ + "<\0n\0:\0e\0 \0x\0m\0l\0n\0s\0:\0n\0=\0'\0U\0R\0I\0'\0 \0" + "n\0:\0\x04\x0e=\0'\0a\0'\0 \0/\0>\0"; + const XML_Char *expected = "a"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ns_utf16_element_leafname) +{ + const char text[] = + /* + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + */ + "\0<\0n\0:\x0e\x04\0 \0x\0m\0l\0n\0s\0:\0n\0=\0'\0U\0R\0I\0'\0/\0>"; + const XML_Char *expected = "URI \xe0\xb8\x84"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, start_element_event_handler); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ns_utf16_doctype) +{ + const char text[] = + /* ]>\n + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + */ + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0f\0o\0o\0:\x0e\x04\0 " + "\0[\0 \0<\0!\0E\0N\0T\0I\0T\0Y\0 \0b\0a\0r\0 \0'\0b\0a\0z\0'\0>\0 " + "\0]\0>\0\n" + /* &bar; */ + "\0<\0f\0o\0o\0:\x0e\x04\0 " + "\0x\0m\0l\0n\0s\0:\0f\0o\0o\0=\0'\0U\0R\0I\0'\0>" + "\0&\0b\0a\0r\0;" + "\0<\0/\0f\0o\0o\0:\x0e\x04\0>"; + const XML_Char *expected = "URI \xe0\xb8\x84"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, start_element_event_handler); + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ns_invalid_doctype) +{ + const char *text = + "\n" + "&bar;"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid character in document local name not faulted"); +} +END_TEST + +START_TEST(test_ns_double_colon_doctype) +{ + const char *text = + "\n" + "&bar;"; + + expect_failure(text, XML_ERROR_SYNTAX, + "Double colon in document name not faulted"); +} +END_TEST + +/* Control variable; the number of times duff_allocator() will successfully allocate */ +#define ALLOC_ALWAYS_SUCCEED (-1) +#define REALLOC_ALWAYS_SUCCEED (-1) + +static int allocation_count = ALLOC_ALWAYS_SUCCEED; +static int reallocation_count = REALLOC_ALWAYS_SUCCEED; + +/* Crocked allocator for allocation failure tests */ +static void *duff_allocator(size_t size) +{ + if (allocation_count == 0) + return NULL; + if (allocation_count != ALLOC_ALWAYS_SUCCEED) + allocation_count--; + return malloc(size); +} + +/* Crocked reallocator for allocation failure tests */ +static void *duff_reallocator(void *ptr, size_t size) +{ + if (reallocation_count == 0) + return NULL; + if (reallocation_count != REALLOC_ALWAYS_SUCCEED) + reallocation_count--; + return realloc(ptr, size); +} + +/* Test that a failure to allocate the parser structure fails gracefully */ +START_TEST(test_misc_alloc_create_parser) +{ + XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; + unsigned int i; + const unsigned int max_alloc_count = 10; + + /* Something this simple shouldn't need more than 10 allocations */ + for (i = 0; i < max_alloc_count; i++) + { + allocation_count = i; + parser = XML_ParserCreate_MM(NULL, &memsuite, NULL); + if (parser != NULL) + break; + } + if (i == 0) + fail("Parser unexpectedly ignored failing allocator"); + else if (i == max_alloc_count) + fail("Parser not created with max allocation count"); +} +END_TEST + +/* Test memory allocation failures for a parser with an encoding */ +START_TEST(test_misc_alloc_create_parser_with_encoding) +{ + XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; + unsigned int i; + const unsigned int max_alloc_count = 10; + + /* Try several levels of allocation */ + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + parser = XML_ParserCreate_MM("us-ascii", &memsuite, NULL); + if (parser != NULL) + break; + } + if (i == 0) + fail("Parser ignored failing allocator"); + else if (i == max_alloc_count) + fail("Parser not created with max allocation count"); +} +END_TEST + +/* Test that freeing a NULL parser doesn't cause an explosion. + * (Not actually tested anywhere else) + */ +START_TEST(test_misc_null_parser) +{ + XML_ParserFree(NULL); +} +END_TEST + +/* Test that XML_ErrorString rejects out-of-range codes */ +START_TEST(test_misc_error_string) +{ + if (XML_ErrorString((enum XML_Error)-1) != NULL) + fail("Negative error code not rejected"); + if (XML_ErrorString((enum XML_Error)100) != NULL) + fail("Large error code not rejected"); +} +END_TEST + +/* Test the version information is consistent */ + +/* Since we are working in XML_LChars (potentially 16-bits), we + * can't use the standard C library functions for character + * manipulation and have to roll our own. + */ +static int +parse_version(const XML_LChar *version_text, + XML_Expat_Version *version_struct) +{ + while (*version_text != 0x00) { + if (*version_text >= ASCII_0 && *version_text <= ASCII_9) + break; + version_text++; + } + if (*version_text == 0x00) + return XML_FALSE; + + /* version_struct->major = strtoul(version_text, 10, &version_text) */ + version_struct->major = 0; + while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { + version_struct->major = + 10 * version_struct->major + (*version_text++ - ASCII_0); + } + if (*version_text++ != ASCII_PERIOD) + return XML_FALSE; + + /* Now for the minor version number */ + version_struct->minor = 0; + while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { + version_struct->minor = + 10 * version_struct->minor + (*version_text++ - ASCII_0); + } + if (*version_text++ != ASCII_PERIOD) + return XML_FALSE; + + /* Finally the micro version number */ + version_struct->micro = 0; + while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { + version_struct->micro = + 10 * version_struct->micro + (*version_text++ - ASCII_0); + } + if (*version_text != 0x00) + return XML_FALSE; + return XML_TRUE; +} + +static int +versions_equal(const XML_Expat_Version *first, + const XML_Expat_Version *second) +{ + return (first->major == second->major && + first->minor == second->minor && + first->micro == second->micro); +} + +START_TEST(test_misc_version) +{ + XML_Expat_Version read_version = XML_ExpatVersionInfo(); + /* Silence compiler warning with the following assignment */ + XML_Expat_Version parsed_version = { 0, 0, 0 }; + const XML_LChar *version_text = XML_ExpatVersion(); + + if (version_text == NULL) + fail("Could not obtain version text"); + if (!parse_version(version_text, &parsed_version)) + fail("Unable to parse version text"); + if (!versions_equal(&read_version, &parsed_version)) + fail("Version mismatch"); + +#if ! defined(XML_UNICODE) + if (strcmp(version_text, "expat_2.2.3")) /* needs bump on releases */ + fail("XML_*_VERSION in expat.h out of sync?\n"); +#endif /* ! defined(XML_UNICODE) */ +} +END_TEST + +/* Test feature information */ +START_TEST(test_misc_features) +{ + const XML_Feature *features = XML_GetFeatureList(); + + /* Prevent problems with double-freeing parsers */ + parser = NULL; + if (features == NULL) + fail("Failed to get feature information"); + /* Loop through the features checking what we can */ + while (features->feature != XML_FEATURE_END) { + switch(features->feature) { + case XML_FEATURE_SIZEOF_XML_CHAR: + if (features->value != sizeof(XML_Char)) + fail("Incorrect size of XML_Char"); + break; + case XML_FEATURE_SIZEOF_XML_LCHAR: + if (features->value != sizeof(XML_LChar)) + fail("Incorrect size of XML_LChar"); + break; + default: + break; + } + features++; + } +} +END_TEST + +/* Regression test for GitHub Issue #17: memory leak parsing attribute + * values with mixed bound and unbound namespaces. + */ +START_TEST(test_misc_attribute_leak) +{ + const char *text = ""; + XML_Memory_Handling_Suite memsuite = { + tracking_malloc, + tracking_realloc, + tracking_free + }; + + parser = XML_ParserCreate_MM("UTF-8", &memsuite, "\n"); + expect_failure(text, XML_ERROR_UNBOUND_PREFIX, + "Unbound prefixes not found"); + XML_ParserFree(parser); + /* Prevent the teardown trying to double free */ + parser = NULL; + + if (!tracking_report()) + fail("Memory leak found"); +} +END_TEST + +/* Test parser created for UTF-16LE is successful */ +START_TEST(test_misc_utf16le) +{ + const char text[] = + /* Hi */ + "<\0?\0x\0m\0l\0 \0" + "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0" + "<\0q\0>\0H\0i\0<\0/\0q\0>\0"; + const XML_Char *expected = "Hi"; + CharData storage; + + parser = XML_ParserCreate("UTF-16LE"); + if (parser == NULL) + fail("Parser not created"); + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + + +static void +alloc_setup(void) +{ + XML_Memory_Handling_Suite memsuite = { + duff_allocator, + duff_reallocator, + free + }; + + /* Ensure the parser creation will go through */ + allocation_count = ALLOC_ALWAYS_SUCCEED; + reallocation_count = REALLOC_ALWAYS_SUCCEED; + parser = XML_ParserCreate_MM(NULL, &memsuite, NULL); + if (parser == NULL) + fail("Parser not created"); +} + +static void +alloc_teardown(void) +{ + basic_teardown(); +} + + +/* Test the effects of allocation failures on xml declaration processing */ +START_TEST(test_alloc_parse_xdecl) +{ + const char *text = + "\n" + "Hello, world"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetXmlDeclHandler(parser, dummy_xdecl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* Resetting the parser is insufficient, because some memory + * allocations are cached within the parser. Instead we use + * the teardown and setup routines to ensure that we have the + * right sort of parser back in our hands. + */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +/* As above, but with an encoding big enough to cause storing the + * version information to expand the string pool being used. + */ +static int XMLCALL +long_encoding_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(encoding), + XML_Encoding *info) +{ + int i; + + for (i = 0; i < 256; i++) + info->map[i] = i; + info->data = NULL; + info->convert = NULL; + info->release = NULL; + return XML_STATUS_OK; +} + +START_TEST(test_alloc_parse_xdecl_2) +{ + const char *text = + "" + "Hello, world"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetXmlDeclHandler(parser, dummy_xdecl_handler); + XML_SetUnknownEncodingHandler(parser, long_encoding_handler, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +/* Test the effects of allocation failures on a straightforward parse */ +START_TEST(test_alloc_parse_pi) +{ + const char *text = + "\n" + "\n" + "" + "Hello, world" + ""; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +START_TEST(test_alloc_parse_pi_2) +{ + const char *text = + "\n" + "" + "Hello, world" + "\n" + ""; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +START_TEST(test_alloc_parse_pi_3) +{ + const char *text = + ""; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +START_TEST(test_alloc_parse_comment) +{ + const char *text = + "\n" + "" + "Hi"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetCommentHandler(parser, dummy_comment_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +START_TEST(test_alloc_parse_comment_2) +{ + const char *text = + "\n" + "" + "Hello, world" + "" + ""; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetCommentHandler(parser, dummy_comment_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +static int XMLCALL +external_entity_duff_loader(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + XML_Parser new_parser; + unsigned int i; + const unsigned int max_alloc_count = 10; + + /* Try a few different allocation levels */ + for (i = 0; i < max_alloc_count; i++) + { + allocation_count = i; + new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (new_parser != NULL) + { + XML_ParserFree(new_parser); + break; + } + } + if (i == 0) + fail("External parser creation ignored failing allocator"); + else if (i == max_alloc_count) + fail("Extern parser not created with max allocation count"); + + /* Make sure other random allocation doesn't now fail */ + allocation_count = ALLOC_ALWAYS_SUCCEED; + + /* Make sure the failure code path is executed too */ + return XML_STATUS_ERROR; +} + +/* Test that external parser creation running out of memory is + * correctly reported. Based on the external entity test cases. + */ +START_TEST(test_alloc_create_external_parser) +{ + const char *text = + "\n" + "\n" + "&entity;"; + char foo_text[] = + ""; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, foo_text); + XML_SetExternalEntityRefHandler(parser, + external_entity_duff_loader); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) { + fail("External parser allocator returned success incorrectly"); + } +} +END_TEST + +/* More external parser memory allocation testing */ +START_TEST(test_alloc_run_external_parser) +{ + const char *text = + "\n" + "\n" + "&entity;"; + char foo_text[] = + ""; + unsigned int i; + const unsigned int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + XML_SetParamEntityParsing(parser, + XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, foo_text); + XML_SetExternalEntityRefHandler(parser, + external_entity_null_loader); + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing ignored failing allocator"); + else if (i == max_alloc_count) + fail("Parsing failed with allocation count 10"); +} +END_TEST + + +static int XMLCALL +external_entity_dbl_handler(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + intptr_t callno = (intptr_t)XML_GetUserData(parser); + const char *text; + XML_Parser new_parser; + int i; + const int max_alloc_count = 20; + + if (callno == 0) { + /* First time through, check how many calls to malloc occur */ + text = ("\n" + "\n" + "\n"); + allocation_count = 10000; + new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (new_parser == NULL) { + fail("Unable to allocate first external parser"); + return XML_STATUS_ERROR; + } + /* Stash the number of calls in the user data */ + XML_SetUserData(parser, (void *)(intptr_t)(10000 - allocation_count)); + } else { + text = ("" + ""); + /* Try at varying levels to exercise more code paths */ + for (i = 0; i < max_alloc_count; i++) { + allocation_count = callno + i; + new_parser = XML_ExternalEntityParserCreate(parser, + context, + NULL); + if (new_parser != NULL) + break; + } + if (i == 0) { + fail("Second external parser unexpectedly created"); + XML_ParserFree(new_parser); + return XML_STATUS_ERROR; + } + else if (i == max_alloc_count) { + fail("Second external parser not created"); + return XML_STATUS_ERROR; + } + } + + allocation_count = ALLOC_ALWAYS_SUCCEED; + if (_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(new_parser); + return XML_STATUS_ERROR; + } + XML_ParserFree(new_parser); + return XML_STATUS_OK; +} + +/* Test that running out of memory in dtdCopy is correctly reported. + * Based on test_default_ns_from_ext_subset_and_ext_ge() + */ +START_TEST(test_alloc_dtd_copy_default_atts) +{ + const char *text = + "\n" + "\n" + "]>\n" + "\n" + "&en;\n" + ""; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_dbl_handler); + XML_SetUserData(parser, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + + +static int XMLCALL +external_entity_dbl_handler_2(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + intptr_t callno = (intptr_t)XML_GetUserData(parser); + const char *text; + XML_Parser new_parser; + enum XML_Status rv; + + if (callno == 0) { + /* Try different allocation levels for whole exercise */ + text = ("\n" + "\n" + "\n"); + XML_SetUserData(parser, (void *)(intptr_t)1); + new_parser = XML_ExternalEntityParserCreate(parser, + context, + NULL); + if (new_parser == NULL) + return XML_STATUS_ERROR; + rv = _XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), + XML_TRUE); + } else { + /* Just run through once */ + text = ("" + ""); + new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (new_parser == NULL) + return XML_STATUS_ERROR; + rv =_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), + XML_TRUE); + } + XML_ParserFree(new_parser); + if (rv == XML_STATUS_ERROR) + return XML_STATUS_ERROR; + return XML_STATUS_OK; +} + +/* Test more external entity allocation failure paths */ +START_TEST(test_alloc_external_entity) +{ + const char *text = + "\n" + "\n" + "]>\n" + "\n" + "&en;\n" + ""; + int i; + const int alloc_test_max_repeats = 50; + + for (i = 0; i < alloc_test_max_repeats; i++) { + allocation_count = -1; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_dbl_handler_2); + XML_SetUserData(parser, NULL); + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + allocation_count = -1; + if (i == 0) + fail("External entity parsed despite duff allocator"); + if (i == alloc_test_max_repeats) + fail("External entity not parsed at max allocation count"); +} +END_TEST + +/* Test more allocation failure paths */ +static int XMLCALL +external_entity_alloc_set_encoding(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + /* As for external_entity_loader() */ + const char *text = + "" + "\xC3\xA9"; + XML_Parser ext_parser; + enum XML_Status status; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + return XML_STATUS_ERROR; + if (!XML_SetEncoding(ext_parser, "utf-8")) { + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; + } + status = _XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE); + XML_ParserFree(ext_parser); + if (status == XML_STATUS_ERROR) + return XML_STATUS_ERROR; + return XML_STATUS_OK; +} + +START_TEST(test_alloc_ext_entity_set_encoding) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + int i; + const int max_allocation_count = 30; + + for (i = 0; i < max_allocation_count; i++) { + XML_SetExternalEntityRefHandler(parser, + external_entity_alloc_set_encoding); + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + break; + allocation_count = -1; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Encoding check succeeded despite failing allocator"); + if (i == max_allocation_count) + fail("Encoding failed at max allocation count"); +} +END_TEST + +static int XMLCALL +unknown_released_encoding_handler(void *UNUSED_P(data), + const XML_Char *encoding, + XML_Encoding *info) +{ + if (!strcmp(encoding, "unsupported-encoding")) { + int i; + + for (i = 0; i < 256; i++) + info->map[i] = i; + info->data = NULL; + info->convert = NULL; + info->release = dummy_release; + return XML_STATUS_OK; + } + return XML_STATUS_ERROR; +} + +/* Test the effects of allocation failure in internal entities. + * Based on test_unknown_encoding_internal_entity + */ +START_TEST(test_alloc_internal_entity) +{ + const char *text = + "\n" + "]>\n" + ""; + unsigned int i; + const unsigned int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUnknownEncodingHandler(parser, + unknown_released_encoding_handler, + NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Internal entity worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Internal entity failed at max allocation count"); +} +END_TEST + + +/* Test the robustness against allocation failure of element handling + * Based on test_dtd_default_handling(). + */ +START_TEST(test_alloc_dtd_default_handling) +{ + const char *text = + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "]>\n" + ""; + const char *expected = "\n\n\n\n\n\n\n\n\ntext in doc"; + CharData storage; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetDoctypeDeclHandler(parser, + dummy_start_doctype_handler, + dummy_end_doctype_handler); + XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + XML_SetCommentHandler(parser, dummy_comment_handler); + XML_SetCdataSectionHandler(parser, + dummy_start_cdata_handler, + dummy_end_cdata_handler); + XML_SetUnparsedEntityDeclHandler( + parser, + dummy_unparsed_entity_decl_handler); + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Default DTD parsed despite allocation failures"); + if (i == max_alloc_count) + fail("Default DTD not parsed with maximum alloc count"); + CharData_CheckXMLChars(&storage, expected); + if (dummy_handler_flags != (DUMMY_START_DOCTYPE_HANDLER_FLAG | + DUMMY_END_DOCTYPE_HANDLER_FLAG | + DUMMY_ENTITY_DECL_HANDLER_FLAG | + DUMMY_NOTATION_DECL_HANDLER_FLAG | + DUMMY_ELEMENT_DECL_HANDLER_FLAG | + DUMMY_ATTLIST_DECL_HANDLER_FLAG | + DUMMY_COMMENT_HANDLER_FLAG | + DUMMY_PI_HANDLER_FLAG | + DUMMY_START_CDATA_HANDLER_FLAG | + DUMMY_END_CDATA_HANDLER_FLAG | + DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG)) + fail("Not all handlers were called"); +} +END_TEST + +/* Test robustness of XML_SetEncoding() with a failing allocator */ +START_TEST(test_alloc_explicit_encoding) +{ + int i; + const int max_alloc_count = 5; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (XML_SetEncoding(parser, "us-ascii") == XML_STATUS_OK) + break; + } + if (i == 0) + fail("Encoding set despite failing allocator"); + else if (i == max_alloc_count) + fail("Encoding not set at max allocation count"); +} +END_TEST + +/* Test robustness of XML_SetBase against a failing allocator */ +START_TEST(test_alloc_set_base) +{ + const XML_Char *new_base = "/local/file/name.xml"; + int i; + const int max_alloc_count = 5; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (XML_SetBase(parser, new_base) == XML_STATUS_OK) + break; + } + if (i == 0) + fail("Base set despite failing allocator"); + else if (i == max_alloc_count) + fail("Base not set with max allocation count"); +} +END_TEST + +/* Test buffer extension in the face of a duff reallocator */ +START_TEST(test_alloc_realloc_buffer) +{ + const char *text = get_buffer_test_text; + void *buffer; + int i; + const int max_realloc_count = 10; + + /* Get a smallish buffer */ + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + buffer = XML_GetBuffer(parser, 1536); + if (buffer == NULL) + fail("1.5K buffer reallocation failed"); + memcpy(buffer, text, strlen(text)); + if (XML_ParseBuffer(parser, strlen(text), + XML_FALSE) == XML_STATUS_OK) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + reallocation_count = -1; + if (i == 0) + fail("Parse succeeded with no reallocation"); + else if (i == max_realloc_count) + fail("Parse failed with max reallocation count"); +} +END_TEST + +/* Same test for external entity parsers */ +static int XMLCALL +external_entity_reallocator(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = get_buffer_test_text; + XML_Parser ext_parser; + void *buffer; + enum XML_Status status; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + + reallocation_count = (intptr_t)XML_GetUserData(parser); + buffer = XML_GetBuffer(ext_parser, 1536); + if (buffer == NULL) + fail("Buffer allocation failed"); + memcpy(buffer, text, strlen(text)); + status = XML_ParseBuffer(ext_parser, strlen(text), XML_FALSE); + reallocation_count = -1; + XML_ParserFree(ext_parser); + return (status == XML_STATUS_OK) ? XML_STATUS_OK : XML_STATUS_ERROR; +} + +START_TEST(test_alloc_ext_entity_realloc_buffer) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + XML_SetExternalEntityRefHandler(parser, + external_entity_reallocator); + XML_SetUserData(parser, (void *)(intptr_t)i); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Succeeded with no reallocations"); + if (i == max_realloc_count) + fail("Failed with max reallocations"); +} +END_TEST + +/* Test elements with many attributes are handled correctly */ +START_TEST(test_alloc_realloc_many_attributes) +{ + const char *text = + "\n" + "\n" + "\n" + "]>\n" + "" + ""; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite no reallocations"); + if (i == max_realloc_count) + fail("Parse failed at max reallocations"); +} +END_TEST + +/* Test handling of a public entity with failing allocator */ +START_TEST(test_alloc_public_entity_value) +{ + const char *text = + "\n" + "\n"; + char dtd_text[] = + "\n" + "\n" + "\n" + "%e1;\n"; + int i; + const int max_alloc_count = 50; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_public); + /* Provoke a particular code path */ + XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocation"); + if (i == max_alloc_count) + fail("Parsing failed at max allocation count"); + if (dummy_handler_flags != DUMMY_ENTITY_DECL_HANDLER_FLAG) + fail("Entity declaration handler not called"); +} +END_TEST + +START_TEST(test_alloc_realloc_subst_public_entity_value) +{ + const char *text = + "\n" + "\n"; + char dtd_text[] = + "\n" + "\n" + "%ThisIsAStupidlyLongParameterNameIntendedToTriggerPoolGrowth12345" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP;"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_public); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocation"); + if (i == max_realloc_count) + fail("Parsing failed at max reallocation count"); +} +END_TEST + +START_TEST(test_alloc_parse_public_doctype) +{ + const char *text = + "\n" + "\n" + ""; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetDoctypeDeclHandler(parser, + dummy_start_doctype_decl_handler, + dummy_end_doctype_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != (DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG | + DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG)) + fail("Doctype handler functions not called"); +} +END_TEST + +START_TEST(test_alloc_parse_public_doctype_long_name) +{ + const char *text = + "\n" + "\n" + ""; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetDoctypeDeclHandler(parser, + dummy_start_doctype_decl_handler, + dummy_end_doctype_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +static int XMLCALL +external_entity_alloc(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = (const char *)XML_GetUserData(parser); + XML_Parser ext_parser; + int parse_res; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + return XML_STATUS_ERROR; + parse_res = _XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE); + XML_ParserFree(ext_parser); + return parse_res; +} + +/* Test foreign DTD handling */ +START_TEST(test_alloc_set_foreign_dtd) +{ + const char *text1 = + "\n" + "&entity;"; + char text2[] = ""; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &text2); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE) + fail("Could not set foreign DTD"); + if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +/* Test based on ibm/valid/P32/ibm32v04.xml */ +START_TEST(test_alloc_attribute_enum_value) +{ const char *text = - "\n" - " \n" - " \n" - ""; - const char *result = - "start http://xml.libexpat.org/ e\n" - "start http://xml.libexpat.org/ f\n" - "attribute http://xml.libexpat.org/ attr\n" - "end http://xml.libexpat.org/ f\n" - "start http://xml.libexpat.org/ g\n" - "attribute http://xml.libexpat.org/ attr2\n" - "end http://xml.libexpat.org/ g\n" - "end http://xml.libexpat.org/ e\n"; - run_ns_tagname_overwrite_test(text, result); + "\n" + "\n" + "This is a \n \n\nyellow tiger"; + char dtd_text[] = + "\n" + "\n" + ""; + int i; + const int max_alloc_count = 30; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + /* An attribute list handler provokes a different code path */ + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); } END_TEST -/* Regression test for SF bug #566334. */ -START_TEST(test_ns_tagname_overwrite_triplet) +/* Test attribute enums sufficient to overflow the string pool */ +START_TEST(test_alloc_realloc_attribute_enum_value) { const char *text = - "\n" - " \n" - " \n" - ""; - const char *result = - "start http://xml.libexpat.org/ e n\n" - "start http://xml.libexpat.org/ f n\n" - "attribute http://xml.libexpat.org/ attr n\n" - "end http://xml.libexpat.org/ f n\n" - "start http://xml.libexpat.org/ g n\n" - "attribute http://xml.libexpat.org/ attr2 n\n" - "end http://xml.libexpat.org/ g n\n" - "end http://xml.libexpat.org/ e n\n"; - XML_SetReturnNSTriplet(parser, XML_TRUE); - run_ns_tagname_overwrite_test(text, result); + "\n" + "\n" + "This is a yellow tiger"; + /* We wish to define a collection of attribute enums that will + * cause the string pool storing them to have to expand. This + * means more than 1024 bytes, including the parentheses and + * separator bars. + */ + char dtd_text[] = + "\n" + ""; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + /* An attribute list handler provokes a different code path */ + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); } END_TEST +/* Test attribute enums in a #IMPLIED attribute forcing pool growth */ +START_TEST(test_alloc_realloc_implied_attribute) +{ + /* Forcing this particular code path is a balancing act. The + * addition of the closing parenthesis and terminal NUL must be + * what pushes the string of enums over the 1024-byte limit, + * otherwise a different code path will pick up the realloc. + */ + const char *text = + "\n" + "\n" + "]>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); +} +END_TEST -/* Regression test for SF bug #620343. */ -static void XMLCALL -start_element_fail(void *UNUSED_P(userData), - const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts)) +/* Test attribute enums in a defaulted attribute forcing pool growth */ +START_TEST(test_alloc_realloc_default_attribute) { - /* We should never get here. */ - fail("should never reach start_element_fail()"); + /* Forcing this particular code path is a balancing act. The + * addition of the closing parenthesis and terminal NUL must be + * what pushes the string of enums over the 1024-byte limit, + * otherwise a different code path will pick up the realloc. + */ + const char *text = + "\n" + "\n]>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); } +END_TEST -static void XMLCALL -start_ns_clearing_start_element(void *userData, - const XML_Char *UNUSED_P(prefix), - const XML_Char *UNUSED_P(uri)) +/* Test long notation name with dodgy allocator */ +START_TEST(test_alloc_notation) { - XML_SetStartElementHandler((XML_Parser) userData, NULL); + const char *text = + "\n" + "\n" + "\n" + "]>\n"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite allocation failures"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != (DUMMY_ENTITY_DECL_HANDLER_FLAG | + DUMMY_NOTATION_DECL_HANDLER_FLAG)) + fail("Entity declaration handler not called"); } +END_TEST -START_TEST(test_start_ns_clears_start_element) +/* Test public notation with dodgy allocator */ +START_TEST(test_alloc_public_notation) { - /* This needs to use separate start/end tags; using the empty tag - syntax doesn't cause the problematic path through Expat to be - taken. - */ - const char *text = ""; + const char *text = + "\n" + "\n" + "\n" + "]>\n"; + int i; + const int max_alloc_count = 20; - XML_SetStartElementHandler(parser, start_element_fail); - XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element); - XML_SetEndNamespaceDeclHandler(parser, dummy_end_namespace_decl_handler); - XML_UseParserAsHandlerArg(parser); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite allocation failures"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) + fail("Notation handler not called"); } END_TEST -/* Regression test for SF bug #616863. */ -static int XMLCALL -external_entity_handler(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) +/* Test public notation with dodgy allocator */ +START_TEST(test_alloc_system_notation) { - intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser); - const char *text; - XML_Parser p2; + const char *text = + "\n" + "\n" + "\n" + "]>\n"; + int i; + const int max_alloc_count = 20; - if (callno == 1) - text = ("\n" - "\n" - "\n"); - else - text = ("" - ""); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite allocation failures"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) + fail("Notation handler not called"); +} +END_TEST - XML_SetUserData(parser, (void *) callno); - p2 = XML_ExternalEntityParserCreate(parser, context, NULL); - if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) { - xml_failure(p2); - return 0; +START_TEST(test_alloc_nested_groups) +{ + const char *text = + "\n" + "" + "]>\n" + ""; + CharData storage; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + CharData_Init(&storage); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum reallocation count"); + CharData_CheckString(&storage, "doce"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler not fired"); +} +END_TEST + +START_TEST(test_alloc_realloc_nested_groups) +{ + const char *text = + "\n" + "" + "]>\n" + ""; + CharData storage; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + CharData_Init(&storage); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); + CharData_CheckString(&storage, "doce"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler not fired"); +} +END_TEST + +START_TEST(test_alloc_large_group) +{ + const char *text = + "\n" + "]>\n" + "\n" + "\n" + "\n"; + int i; + const int max_alloc_count = 50; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler flag not raised"); +} +END_TEST + +START_TEST(test_alloc_realloc_group_choice) +{ + const char *text = + "\n" + "]>\n" + "\n" + "\n" + "This is a foo\n" + "\n" + "\n"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler flag not raised"); +} +END_TEST + +START_TEST(test_alloc_pi_in_epilog) +{ + const char *text = + "\n" + ""; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse completed despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_PI_HANDLER_FLAG) + fail("Processing instruction handler not invoked"); +} +END_TEST + +START_TEST(test_alloc_comment_in_epilog) +{ + const char *text = + "\n" + ""; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetCommentHandler(parser, dummy_comment_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse completed despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_COMMENT_HANDLER_FLAG) + fail("Processing instruction handler not invoked"); +} +END_TEST + +START_TEST(test_alloc_realloc_long_attribute_value) +{ + const char *text = + "]>\n" + ""; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); } - XML_ParserFree(p2); - return 1; + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); } +END_TEST -START_TEST(test_default_ns_from_ext_subset_and_ext_ge) +START_TEST(test_alloc_attribute_whitespace) { - const char *text = - "\n" - "\n" - "]>\n" - "\n" - "&en;\n" - ""; + const char *text = ""; + int i; + const int max_alloc_count = 15; - XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetExternalEntityRefHandler(parser, external_entity_handler); - /* We actually need to set this handler to tickle this bug. */ - XML_SetStartElementHandler(parser, dummy_start_element); - XML_SetUserData(parser, NULL); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); } END_TEST -/* Regression test #1 for SF bug #673791. */ -START_TEST(test_ns_prefix_with_empty_uri_1) +START_TEST(test_alloc_attribute_predefined_entity) { - const char *text = - "\n" - " \n" - ""; + const char *text = ""; + int i; + const int max_alloc_count = 15; - expect_failure(text, - XML_ERROR_UNDECLARING_PREFIX, - "Did not report re-setting namespace" - " URI with prefix to ''."); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); } END_TEST -/* Regression test #2 for SF bug #673791. */ -START_TEST(test_ns_prefix_with_empty_uri_2) +/* Test that a character reference at the end of a suitably long + * default value for an attribute can trigger pool growth, and recovers + * if the allocator fails on it. + */ +START_TEST(test_alloc_long_attr_default_with_char_ref) { const char *text = - "\n" - ""; + "]>\n" + ""; + int i; + const int max_alloc_count = 20; - expect_failure(text, - XML_ERROR_UNDECLARING_PREFIX, - "Did not report setting namespace URI with prefix to ''."); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); } END_TEST -/* Regression test #3 for SF bug #673791. */ -START_TEST(test_ns_prefix_with_empty_uri_3) +/* Test that a long character reference substitution triggers a pool + * expansion correctly for an attribute value. + */ +START_TEST(test_alloc_long_attr_value) { const char *text = - "\n" - " \n" - "]>\n" - ""; + "]>\n" + ""; + int i; + const int max_alloc_count = 25; - expect_failure(text, - XML_ERROR_UNDECLARING_PREFIX, - "Didn't report attr default setting NS w/ prefix to ''."); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); } END_TEST -/* Regression test #4 for SF bug #673791. */ -START_TEST(test_ns_prefix_with_empty_uri_4) +/* Test that an error in a nested parameter entity substitution is + * handled correctly. It seems unlikely that the code path being + * exercised can be reached purely by carefully crafted XML, but an + * allocation error in the right place will definitely do it. + */ +START_TEST(test_alloc_nested_entities) { const char *text = - "\n" - " \n" - "]>\n" - ""; - /* Packaged info expected by the end element handler; - the weird structuring lets us re-use the triplet_end_checker() - function also used for another test. */ - const char *elemstr[] = { - "http://xml.libexpat.org/ doc prefix" + "\n" + ""; + ExtFaults test_data = { + "\n" + "\n" + "%pe2;", + "Memory Fail not faulted", + NULL, + XML_ERROR_NO_MEMORY }; - XML_SetReturnNSTriplet(parser, XML_TRUE); - XML_SetUserData(parser, elemstr); - XML_SetEndElementHandler(parser, triplet_end_checker); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + + /* Causes an allocation error in a nested storeEntityValue() */ + allocation_count = 12; + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Entity allocation failure not noted"); } END_TEST -START_TEST(test_ns_default_with_empty_uri) +START_TEST(test_alloc_realloc_param_entity_newline) { const char *text = - "\n" - " \n" - ""; - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + "\n" + ""; + char dtd_text[] = + "\n'>" + "%pe;\n"; + int i; + const int max_realloc_count = 5; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); } END_TEST -/* Regression test for SF bug #692964: two prefixes for one namespace. */ -START_TEST(test_ns_duplicate_attrs_diff_prefixes) +START_TEST(test_alloc_realloc_ce_extends_pe) { const char *text = - ""; - expect_failure(text, - XML_ERROR_DUPLICATE_ATTRIBUTE, - "did not report multiple attributes with same URI+name"); + "\n" + ""; + char dtd_text[] = + "\n'>" + "%pe;\n"; + int i; + const int max_realloc_count = 5; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); } END_TEST -/* Regression test for SF bug #695401: unbound prefix. */ -START_TEST(test_ns_unbound_prefix_on_attribute) +START_TEST(test_alloc_realloc_attributes) { - const char *text = ""; - expect_failure(text, - XML_ERROR_UNBOUND_PREFIX, - "did not report unbound prefix on attribute"); + const char *text = + "]>\n" + "wombat\n"; + int i; + const int max_realloc_count = 5; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); } END_TEST -/* Regression test for SF bug #695401: unbound prefix. */ -START_TEST(test_ns_unbound_prefix_on_element) -{ - const char *text = ""; - expect_failure(text, - XML_ERROR_UNBOUND_PREFIX, - "did not report unbound prefix on element"); +START_TEST(test_alloc_long_doc_name) +{ + const char *text = + /* 64 characters per line */ + ""; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max reallocation count"); } END_TEST -/* Test that the parsing status is correctly reset by XML_ParserReset(). - * We usE test_return_ns_triplet() for our example parse to improve - * coverage of tidying up code executed. - */ -START_TEST(test_ns_parser_reset) +START_TEST(test_alloc_long_base) { - XML_ParsingStatus status; + const char *text = + "\n" + "]>\n" + "&e;"; + char entity_text[] = "Hello world"; + const char *base = + /* 64 characters per line */ + "LongBaseURI/that/will/overflow/an/internal/buffer/and/cause/it/t" + "o/have/to/grow/PQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/"; + int i; + const int max_alloc_count = 25; - XML_GetParsingStatus(parser, &status); - if (status.parsing != XML_INITIALIZED) - fail("parsing status doesn't start INITIALIZED"); - test_return_ns_triplet(); - XML_GetParsingStatus(parser, &status); - if (status.parsing != XML_FINISHED) - fail("parsing status doesn't end FINISHED"); - XML_ParserReset(parser, NULL); - XML_GetParsingStatus(parser, &status); - if (status.parsing != XML_INITIALIZED) - fail("parsing status doesn't reset to INITIALIZED"); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, entity_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (XML_SetBase(parser, base) == XML_STATUS_ERROR) { + XML_ParserReset(parser, NULL); + continue; + } + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST -/* Control variable; the number of times duff_allocator() will successfully allocate */ -static unsigned int allocation_count = 0; - -/* Crocked allocator for allocation failure tests */ -static void *duff_allocator(size_t size) +START_TEST(test_alloc_long_public_id) { - if (allocation_count == 0) - return NULL; - allocation_count--; - return malloc(size); + const char *text = + "\n" + "]>\n" + "&e;"; + char entity_text[] = "Hello world"; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, entity_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } +END_TEST -/* Test that a failure to allocate the parser structure fails gracefully */ -START_TEST(test_misc_alloc_create_parser) +START_TEST(test_alloc_long_entity_value) { - XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; - unsigned int i; + const char *text = + "\n" + " \n" + "]>\n" + "&e2;"; + char entity_text[] = "Hello world"; + int i; + const int max_alloc_count = 40; - /* Something this simple shouldn't need more than 10 allocations */ - for (i = 0; i < 10; i++) - { + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; - parser = XML_ParserCreate_MM(NULL, &memsuite, NULL); - if (parser != NULL) + XML_SetUserData(parser, entity_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); } if (i == 0) - fail("Parser unexpectedly ignored failing allocator"); - else if (i == 10) - fail("Parser not created with allocation count 10"); + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST -/* Test memory allocation failures for a parser with an encoding */ -START_TEST(test_misc_alloc_create_parser_with_encoding) +START_TEST(test_alloc_long_notation) { - XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; - unsigned int i; + const char *text = + "\n" + " \n" + " \n" + "]>\n" + "&e2;"; + ExtOption options[] = { + { "foo", "Entity Foo" }, + { "bar", "Entity Bar" }, + { NULL, NULL } + }; + int i; + const int max_alloc_count = 40; - /* Try several levels of allocation */ - for (i = 0; i < 10; i++) { + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; - parser = XML_ParserCreate_MM("us-ascii", &memsuite, NULL); - if (parser != NULL) + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) break; + + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); } if (i == 0) - fail("Parser ignored failing allocator"); - else if (i == 10) - fail("Parser not created with allocation count 10"); + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + + +static void +nsalloc_setup(void) +{ + XML_Memory_Handling_Suite memsuite = { + duff_allocator, + duff_reallocator, + free + }; + XML_Char ns_sep[2] = { ' ', '\0' }; + + /* Ensure the parser creation will go through */ + allocation_count = ALLOC_ALWAYS_SUCCEED; + reallocation_count = REALLOC_ALWAYS_SUCCEED; + parser = XML_ParserCreate_MM(NULL, &memsuite, ns_sep); + if (parser == NULL) + fail("Parser not created"); +} + +static void +nsalloc_teardown(void) +{ + basic_teardown(); } -END_TEST + /* Test the effects of allocation failure in simple namespace parsing. * Based on test_ns_default_with_empty_uri() */ -START_TEST(test_misc_alloc_ns) +START_TEST(test_nsalloc_xmlns) { - XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; const char *text = - "\n" + "\n" " \n" ""; unsigned int i; - int repeated = 0; - XML_Char ns_sep[2] = { ' ', '\0' }; + const unsigned int max_alloc_count = 30; - allocation_count = 10000; - parser = XML_ParserCreate_MM(NULL, &memsuite, ns_sep); - if (parser == NULL) { - fail("Parser not created"); - } else { - for (i = 0; i < 10; i++) { - /* Repeat some tests with the same allocation count to - * catch cached allocations not freed by XML_ParserReset() - */ - if (repeated < 2 && i == 3) { - i--; - repeated++; - } - if (repeated == 2 && i == 5) { - i = 3; - repeated++; - } - allocation_count = i; - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) - break; - XML_ParserReset(parser, NULL); - } - if (i == 0) - fail("Parsing worked despite failing allocations"); - else if (i == 10) - fail("Parsing failed even at allocation count 10"); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + /* Exercise more code paths with a default handler */ + XML_SetDefaultHandler(parser, dummy_default_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* Resetting the parser is insufficient, because some memory + * allocations are cached within the parser. Instead we use + * the teardown and setup routines to ensure that we have the + * right sort of parser back in our hands. + */ + nsalloc_teardown(); + nsalloc_setup(); } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at maximum allocation count"); } END_TEST /* Test XML_ParseBuffer interface with namespace and a dicky allocator */ -START_TEST(test_misc_alloc_ns_parse_buffer) +START_TEST(test_nsalloc_parse_buffer) { - XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; - XML_Char ns_sep[2] = { ' ', '\0' }; const char *text = "Hello"; void *buffer; - /* Make sure the basic parser is allocated */ - allocation_count = 10000; - parser = XML_ParserCreate_MM(NULL, &memsuite, ns_sep); - if (parser == NULL) - fail("Parser not created"); - /* Try a parse before the start of the world */ /* (Exercises new code path) */ allocation_count = 0; @@ -2919,7 +10228,7 @@ fail("Pre-init XML_ParseBuffer faulted for wrong reason"); /* Now with actual memory allocation */ - allocation_count = 10000; + allocation_count = ALLOC_ALWAYS_SUCCEED; if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) xml_failure(parser); @@ -2961,516 +10270,1375 @@ } END_TEST -/* Test that freeing a NULL parser doesn't cause an explosion. - * (Not actually tested anywhere else) - */ -START_TEST(test_misc_null_parser) +/* Check handling of long prefix names (pool growth) */ +START_TEST(test_nsalloc_long_prefix) { - XML_ParserFree(NULL); + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":foo xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "='http://example.org/'>" + ""; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST -/* Test that XML_ErrorString rejects out-of-range codes */ -START_TEST(test_misc_error_string) -{ - if (XML_ErrorString((enum XML_Error)-1) != NULL) - fail("Negative error code not rejected"); - if (XML_ErrorString((enum XML_Error)100) != NULL) - fail("Large error code not rejected"); +/* Check handling of long uri names (pool growth) */ +START_TEST(test_nsalloc_long_uri) +{ + const char *text = + "" + ""; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST -/* Test the version information is consistent */ -START_TEST(test_misc_version) -{ - XML_Expat_Version version_struct = XML_ExpatVersionInfo(); - const XML_LChar *version_text = XML_ExpatVersion(); - long value; - const char *p; - char *endp; +/* Test handling of long attribute names with prefixes */ +START_TEST(test_nsalloc_long_attr) +{ + const char *text = + "" + ""; + int i; + const int max_alloc_count = 40; - if (version_text == NULL) - fail("Could not obtain version text"); - for (p = version_text; *p != '\0'; p++) - if (isdigit(*p)) + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) break; - if (*p == '\0') - fail("No numbers in version text"); - value = strtoul(p, &endp, 10); - if (*endp != '.') - fail("Major version conversion from text failed"); - if (value != version_struct.major) - fail("Major version mismatch"); - p = endp + 1; - value = strtoul(p, &endp, 10); - if (*endp != '.') - fail("Minor version conversion from text failed"); - if (value != version_struct.minor) - fail("Minor version mismatch"); - p = endp + 1; - value = strtoul(p, &endp, 10); - if (*endp != '\0') - fail("Micro version conversion from text failed"); - if (value != version_struct.micro) - fail("Micro version mismatch"); + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST -/* Regression test for GitHub Issue #17: memory leak parsing attribute - * values with mixed bound and unbound namespaces. - */ -START_TEST(test_misc_attribute_leak) -{ - const char *text = ""; - XML_Memory_Handling_Suite memsuite = { - tracking_malloc, - tracking_realloc, - tracking_free +/* Test handling of an attribute name with a long namespace prefix */ +START_TEST(test_nsalloc_long_attr_prefix) +{ + const char *text = + "" + ""; + const char *elemstr[] = { + "http://example.org/ e foo", + "http://example.org/ a " + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" }; + int i; + const int max_alloc_count = 40; - parser = XML_ParserCreate_MM("UTF-8", &memsuite, "\n"); - expect_failure(text, XML_ERROR_UNBOUND_PREFIX, - "Unbound prefixes not found"); - XML_ParserFree(parser); - /* Prevent the teardown trying to double free */ - parser = NULL; - - if (!tracking_report()) - fail("Memory leak found"); + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetReturnNSTriplet(parser, XML_TRUE); + XML_SetUserData(parser, elemstr); + XML_SetElementHandler(parser, + triplet_start_checker, + triplet_end_checker); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST - -static void -alloc_setup(void) +/* Test attribute handling in the face of a dodgy reallocator */ +START_TEST(test_nsalloc_realloc_attributes) { - XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; - - /* Ensure the parser creation will go through */ - allocation_count = 10000; - parser = XML_ParserCreate_MM(NULL, &memsuite, NULL); - if (parser == NULL) - fail("Parser not created"); -} + const char *text = + "" + ""; + int i; + const int max_realloc_count = 10; -static void -alloc_teardown(void) -{ - basic_teardown(); + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed at max reallocation count"); } +END_TEST -static int XMLCALL -external_entity_duff_loader(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) +/* Test long element names with namespaces under a failing allocator */ +START_TEST(test_nsalloc_long_element) { - XML_Parser new_parser; - unsigned int i; + const char *text = + "" + ""; + const char *elemstr[] = { + "http://example.org/" + " thisisalongenoughelementnametotriggerareallocation foo", + "http://example.org/ a bar" + }; + int i; + const int max_alloc_count = 30; - /* Try a few different allocation levels */ - for (i = 0; i < 10; i++) - { + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; - new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); - if (new_parser != NULL) - { - XML_ParserFree(new_parser); + XML_SetReturnNSTriplet(parser, XML_TRUE); + XML_SetUserData(parser, elemstr); + XML_SetElementHandler(parser, + triplet_start_checker, + triplet_end_checker); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) break; - } + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } if (i == 0) - fail("External parser creation ignored failing allocator"); - else if (i == 10) - fail("Extern parser not created with allocation count 10"); - - /* Make sure other random allocation doesn't now fail */ - allocation_count = 10000; - - /* Make sure the failure code path is executed too */ - return XML_STATUS_ERROR; + fail("Parsing worked despite failing reallocations"); + else if (i == max_alloc_count) + fail("Parsing failed at max reallocation count"); } +END_TEST -/* Test that external parser creation running out of memory is - * correctly reported. Based on the external entity test cases. +/* Test the effects of reallocation failure when reassigning a + * binding. + * + * XML_ParserReset does not free the BINDING structures used by a + * parser, but instead adds them to an internal free list to be reused + * as necessary. Likewise the URI buffers allocated for the binding + * aren't freed, but kept attached to their existing binding. If the + * new binding has a longer URI, it will need reallocation. This test + * provokes that reallocation, and tests the control path if it fails. */ -START_TEST(test_alloc_create_external_parser) +START_TEST(test_nsalloc_realloc_binding_uri) { - const char *text = - "\n" - "\n" - "&entity;"; - char foo_text[] = - ""; + const char *first = + "\n" + " \n" + ""; + const char *second = + "\n" + " \n" + ""; + unsigned i; + const unsigned max_realloc_count = 10; - XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); - XML_SetExternalEntityRefHandler(parser, - external_entity_duff_loader); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) { - fail("External parser allocator returned success incorrectly"); + /* First, do a full parse that will leave bindings around */ + if (_XML_Parse_SINGLE_BYTES(parser, first, strlen(first), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + /* Now repeat with a longer URI and a duff reallocator */ + for (i = 0; i < max_realloc_count; i++) { + XML_ParserReset(parser, NULL); + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, second, strlen(second), + XML_TRUE) != XML_STATUS_ERROR) + break; } -} -END_TEST + if (i == 0) + fail("Parsing worked despite failing reallocation"); + else if (i == max_realloc_count) + fail("Parsing failed at max reallocation count"); +} +END_TEST + +/* Check handling of long prefix names (pool growth) */ +START_TEST(test_nsalloc_realloc_long_prefix) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":foo xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "='http://example.org/'>" + ""; + int i; + const int max_realloc_count = 12; -static int XMLCALL -external_entity_null_loader(XML_Parser UNUSED_P(parser), - const XML_Char *UNUSED_P(context), - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) -{ - return XML_STATUS_OK; -} + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); +} +END_TEST + +/* Check handling of even long prefix names (different code path) */ +START_TEST(test_nsalloc_realloc_longer_prefix) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "Q:foo xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "Q='http://example.org/'>" + ""; + int i; + const int max_realloc_count = 12; -/* More external parser memory allocation testing */ -START_TEST(test_alloc_run_external_parser) -{ - const char *text = - "\n" - "\n" - "&entity;"; - char foo_text[] = - ""; - unsigned int i; + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); +} +END_TEST + +START_TEST(test_nsalloc_long_namespace) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":e xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "='http://example.org/'>\n" + "<" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":f " + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":attr='foo'/>\n" + ""; + int i; + const int max_alloc_count = 40; - for (i = 0; i < 10; i++) { - XML_SetParamEntityParsing(parser, - XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); - XML_SetExternalEntityRefHandler(parser, - external_entity_null_loader); + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) break; - /* Re-use the parser */ - XML_ParserReset(parser, NULL); + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } if (i == 0) - fail("Parsing ignored failing allocator"); - else if (i == 10) - fail("Parsing failed with allocation count 10"); + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST - -static int XMLCALL -external_entity_dbl_handler(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) -{ - intptr_t callno = (intptr_t)XML_GetUserData(parser); - const char *text; - XML_Parser new_parser; - int i; - - if (callno == 0) { - /* First time through, check how many calls to malloc occur */ - text = ("\n" - "\n" - "\n"); - allocation_count = 10000; - new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); - if (new_parser == NULL) { - fail("Unable to allocate first external parser"); - return 0; - } - /* Stash the number of calls in the user data */ - XML_SetUserData(parser, (void *)(intptr_t)(10000 - allocation_count)); - } else { - text = ("" - ""); - /* Try at varying levels to exercise more code paths */ - for (i = 0; i < 20; i++) { - allocation_count = callno + i; - new_parser = XML_ExternalEntityParserCreate(parser, - context, - NULL); - if (new_parser != NULL) - break; - } - if (i == 0) { - fail("Second external parser unexpectedly created"); - XML_ParserFree(new_parser); - return 0; - } - else if (i == 20) { - fail("Second external parser not created"); - return 0; - } +/* Using a slightly shorter namespace name provokes allocations in + * slightly different places in the code. + */ +START_TEST(test_nsalloc_less_long_namespace) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + ":e xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + "='http://example.org/'>\n" + "<" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + ":f " + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + ":att='foo'/>\n" + ""; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST - allocation_count = 10000; - if (_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) { - xml_failure(new_parser); - return 0; +START_TEST(test_nsalloc_long_context) +{ + const char *text = + "\n" + " \n" + "]>\n" + "\n" + "&en;" + ""; + ExtOption options[] = { + { "foo", ""}, + { "bar", "" }, + { NULL, NULL } + }; + int i; + const int max_alloc_count = 70; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } - XML_ParserFree(new_parser); - return 1; + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } +END_TEST -/* Test that running out of memory in dtdCopy is correctly reported. - * Based on test_default_ns_from_ext_subset_and_ext_ge() +/* This function is void; it will throw a fail() on error, so if it + * returns normally it must have succeeded. */ -START_TEST(test_alloc_dtd_copy_default_atts) +static void +context_realloc_test(const char *text) +{ + ExtOption options[] = { + { "foo", ""}, + { "bar", "" }, + { NULL, NULL } + }; + int i; + const int max_realloc_count = 5; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); +} + +START_TEST(test_nsalloc_realloc_long_context) { const char *text = - "\n" - "\n" + "\n" "]>\n" - "\n" - "&en;\n" + "\n" + "&en;" ""; - XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetExternalEntityRefHandler(parser, - external_entity_dbl_handler); - XML_SetUserData(parser, NULL); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + context_realloc_test(text); } END_TEST +START_TEST(test_nsalloc_realloc_long_context_2) +{ + const char *text = + "\n" + "]>\n" + "\n" + "&en;" + ""; -static int XMLCALL -external_entity_dbl_handler_2(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_context_3) { - intptr_t callno = (intptr_t)XML_GetUserData(parser); - const char *text; - XML_Parser new_parser; - int i; + const char *text = + "\n" + "]>\n" + "\n" + "&en;" + ""; - if (callno == 0) { - /* Try different allocation levels for whole exercise */ - text = ("\n" - "\n" - "\n"); - XML_SetUserData(parser, (void *)(intptr_t)1); - for (i = 0; i < 20; i++) { - allocation_count = i; - new_parser = XML_ExternalEntityParserCreate(parser, - context, - NULL); - if (new_parser == NULL) - continue; - if (_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), - XML_TRUE) != XML_STATUS_ERROR) - break; - XML_ParserFree(new_parser); - } + context_realloc_test(text); +} +END_TEST - /* Ensure future allocations will be well */ - allocation_count = 10000; - if (i == 0) { - fail("first external parser unexpectedly created"); - XML_ParserFree(new_parser); - return 0; - } - else if (i == 20) { - fail("first external parser not allocated with count 20"); - return 0; - } - } else { - /* Just run through once */ - text = ("" - ""); - allocation_count = 10000; - new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); - if (new_parser == NULL) { - fail("Unable to create second external parser"); - return 0; - } - if (_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), - XML_TRUE) == XML_STATUS_ERROR) { - xml_failure(new_parser); - XML_ParserFree(new_parser); - return 0; - } - } - XML_ParserFree(new_parser); - return 1; +START_TEST(test_nsalloc_realloc_long_context_4) +{ + const char *text = + "\n" + "]>\n" + "\n" + "&en;" + ""; + + context_realloc_test(text); } +END_TEST -/* Test more external entity allocation failure paths */ -START_TEST(test_alloc_external_entity) +START_TEST(test_nsalloc_realloc_long_context_5) { const char *text = - "\n" - "\n" + "\n" "]>\n" - "\n" - "&en;\n" + "\n" + "&en;" ""; - XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetExternalEntityRefHandler(parser, - external_entity_dbl_handler_2); - XML_SetUserData(parser, NULL); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), - XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + context_realloc_test(text); } END_TEST +START_TEST(test_nsalloc_realloc_long_context_6) +{ + const char *text = + "\n" + "]>\n" + "\n" + "&en;" + ""; -static int XMLCALL -unknown_released_encoding_handler(void *UNUSED_P(data), - const XML_Char *encoding, - XML_Encoding *info) + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_context_7) { - if (!strcmp(encoding, "unsupported-encoding")) { - int i; + const char *text = + "\n" + "]>\n" + "\n" + "&en;" + ""; - for (i = 0; i < 256; i++) - info->map[i] = i; - info->data = NULL; - info->convert = NULL; - info->release = dummy_release; - return XML_STATUS_OK; + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_ge_name) +{ + const char *text = + "\n" + "]>\n" + "\n" + "&" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + ";" + ""; + ExtOption options[] = { + { "foo", "" }, + { "bar", "" }, + { NULL, NULL } + }; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } - return XML_STATUS_ERROR; + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); } +END_TEST -/* Test the effects of allocation failure in internal entities. - * Based on test_unknown_encoding_internal_entity +/* Test that when a namespace is passed through the context mechanism + * to an external entity parser, the parsers handle reallocation + * failures correctly. The prefix is exactly the right length to + * provoke particular uncommon code paths. */ -START_TEST(test_alloc_internal_entity) +START_TEST(test_nsalloc_realloc_long_context_in_dtd) { const char *text = - "\n" - "]>\n" - ""; - unsigned int i; - int repeated = 0; + "\n" + "]>\n" + "<" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + ":doc xmlns:" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "='foo/Second'>&First;"; + ExtOption options[] = { + { "foo/First", "Hello world" }, + { NULL, NULL } + }; + int i; + const int max_realloc_count = 20; - for (i = 0; i < 10; i++) { - /* Again, repeat some counts to account for caching */ - if (repeated < 2 && i == 2) { - i--; - repeated++; - } - XML_SetUnknownEncodingHandler(parser, - unknown_released_encoding_handler, - NULL); - allocation_count = i; - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) break; - XML_ParserReset(parser, NULL); + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } if (i == 0) - fail("Internal entity worked despite failing allocations"); - else if (i == 10) - fail("Internal entity failed at allocation count 10"); + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); } END_TEST - -/* Test the robustness against allocation failure of element handling - * Based on test_dtd_default_handling(). - */ -START_TEST(test_alloc_dtd_default_handling) +START_TEST(test_nsalloc_long_default_in_ext) { const char *text = "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "]>"; - const char *expected = "\n\n\n\n\n\n\n"; - CharData storage; + " \n" + " \n" + "]>\n" + "&x;"; + ExtOption options[] = { + { "foo", ""}, + { NULL, NULL } + }; int i; - int repeat = 0; + const int max_alloc_count = 50; - for (i = 0; i < 10; i++) { - /* Repeat some counts to catch cached allocations */ - if ((repeat < 4 && i == 2) || - (repeat == 4 && i == 3)) { - i--; - repeat++; - } + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; - XML_SetDefaultHandler(parser, accumulate_characters); - XML_SetDoctypeDeclHandler(parser, - dummy_start_doctype_handler, - dummy_end_doctype_handler); - XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); - XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); - XML_SetElementDeclHandler(parser, dummy_element_decl_handler); - XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); - XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); - XML_SetCommentHandler(parser, dummy_comment_handler); - XML_SetCdataSectionHandler(parser, - dummy_start_cdata_handler, - dummy_end_cdata_handler); - XML_SetUnparsedEntityDeclHandler( - parser, - dummy_unparsed_entity_decl_handler); - CharData_Init(&storage); - XML_SetUserData(parser, &storage); - XML_SetCharacterDataHandler(parser, accumulate_characters); + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; - XML_ParserReset(parser, NULL); - } - if (i == 0) { - fail("Default DTD parsed despite allocation failures"); - } else if (i == 10) { - fail("Default DTD not parsed with alloc count 10"); - } else { - CharData_CheckXMLChars(&storage, expected); + + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST -/* Test robustness of XML_SetEncoding() with a failing allocator */ -START_TEST(test_alloc_explicit_encoding) -{ +START_TEST(test_nsalloc_long_systemid_in_ext) +{ + const char *text = + "\n" + "]>\n" + "&en;"; + ExtOption options[] = { + { "foo", "" }, + { + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/", + "" + }, + { NULL, NULL } + }; int i; + const int max_alloc_count = 55; - for (i = 0; i < 5; i++) { + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; - if (XML_SetEncoding(parser, "us-ascii") == XML_STATUS_OK) + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) break; + + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } if (i == 0) - fail("Encoding set despite failing allocator"); - else if (i == 5) - fail("Encoding not set at allocation count 5"); + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); } END_TEST -/* Test robustness of XML_SetBase against a failing allocator */ -START_TEST(test_alloc_set_base) +/* Test the effects of allocation failure on parsing an element in a + * namespace. Based on test_nsalloc_long_context. + */ +START_TEST(test_nsalloc_prefixed_element) { - const XML_Char *new_base = "/local/file/name.xml"; + const char *text = + "\n" + " \n" + "]>\n" + "\n" + "&en;" + ""; + ExtOption options[] = { + { "foo", "" }, + { "bar", "" }, + { NULL, NULL } + }; int i; + const int max_alloc_count = 70; - for (i = 0; i < 5; i++) { + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; - if (XML_SetBase(parser, new_base) == XML_STATUS_OK) + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) break; + + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); } if (i == 0) - fail("Base set despite failing allocator"); - else if (i == 5) - fail("Base not set with allocation count 5"); + fail("Success despite failing allocator"); + else if (i == max_alloc_count) + fail("Failed even at full allocation count"); } END_TEST - static Suite * make_suite(void) { @@ -3479,6 +11647,7 @@ TCase *tc_namespace = tcase_create("XML namespaces"); TCase *tc_misc = tcase_create("miscellaneous tests"); TCase *tc_alloc = tcase_create("allocation tests"); + TCase *tc_nsalloc = tcase_create("namespace allocation tests"); suite_add_tcase(s, tc_basic); tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); @@ -3489,11 +11658,17 @@ tcase_add_test(tc_basic, test_bom_utf8); tcase_add_test(tc_basic, test_bom_utf16_be); tcase_add_test(tc_basic, test_bom_utf16_le); + tcase_add_test(tc_basic, test_nobom_utf16_le); tcase_add_test(tc_basic, test_illegal_utf8); tcase_add_test(tc_basic, test_utf8_auto_align); tcase_add_test(tc_basic, test_utf16); tcase_add_test(tc_basic, test_utf16_le_epilog_newline); + tcase_add_test(tc_basic, test_not_utf16); + tcase_add_test(tc_basic, test_bad_encoding); tcase_add_test(tc_basic, test_latin1_umlauts); + tcase_add_test(tc_basic, test_long_utf8_character); + tcase_add_test(tc_basic, test_long_latin1_attribute); + tcase_add_test(tc_basic, test_long_ascii_attribute); /* Regression test for SF bug #491986. */ tcase_add_test(tc_basic, test_danish_latin1); /* Regression test for SF bug #514281. */ @@ -3508,9 +11683,13 @@ tcase_add_test(tc_basic, test_line_number_after_error); tcase_add_test(tc_basic, test_column_number_after_error); tcase_add_test(tc_basic, test_really_long_lines); + tcase_add_test(tc_basic, test_really_long_encoded_lines); tcase_add_test(tc_basic, test_end_element_events); tcase_add_test(tc_basic, test_attr_whitespace_normalization); tcase_add_test(tc_basic, test_xmldecl_misplaced); + tcase_add_test(tc_basic, test_xmldecl_invalid); + tcase_add_test(tc_basic, test_xmldecl_missing_attr); + tcase_add_test(tc_basic, test_xmldecl_missing_value); tcase_add_test(tc_basic, test_unknown_encoding_internal_entity); tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); tcase_add_test(tc_basic, @@ -3522,9 +11701,18 @@ tcase_add_test(tc_basic, test_not_standalone_handler_accept); tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset_standalone); + tcase_add_test(tc_basic, + test_entity_with_external_subset_unless_standalone); tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs); tcase_add_test(tc_basic, test_ext_entity_set_encoding); + tcase_add_test(tc_basic, test_ext_entity_no_handler); + tcase_add_test(tc_basic, test_ext_entity_set_bom); + tcase_add_test(tc_basic, test_ext_entity_bad_encoding); + tcase_add_test(tc_basic, test_ext_entity_bad_encoding_2); + tcase_add_test(tc_basic, test_ext_entity_invalid_parse); + tcase_add_test(tc_basic, test_ext_entity_invalid_suspended_parse); tcase_add_test(tc_basic, test_dtd_default_handling); + tcase_add_test(tc_basic, test_dtd_attr_handling); tcase_add_test(tc_basic, test_empty_ns_without_namespaces); tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); @@ -3532,19 +11720,39 @@ tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); tcase_add_test(tc_basic, test_good_cdata_ascii); tcase_add_test(tc_basic, test_good_cdata_utf16); + tcase_add_test(tc_basic, test_good_cdata_utf16_le); + tcase_add_test(tc_basic, test_long_cdata_utf16); + tcase_add_test(tc_basic, test_multichar_cdata_utf16); + tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); tcase_add_test(tc_basic, test_bad_cdata); + tcase_add_test(tc_basic, test_bad_cdata_utf16); + tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); + tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); tcase_add_test(tc_basic, test_memory_allocation); tcase_add_test(tc_basic, test_default_current); tcase_add_test(tc_basic, test_dtd_elements); tcase_add_test(tc_basic, test_set_foreign_dtd); + tcase_add_test(tc_basic, test_foreign_dtd_not_standalone); + tcase_add_test(tc_basic, test_invalid_foreign_dtd); + tcase_add_test(tc_basic, test_foreign_dtd_with_doctype); + tcase_add_test(tc_basic, test_foreign_dtd_without_external_subset); + tcase_add_test(tc_basic, test_empty_foreign_dtd); tcase_add_test(tc_basic, test_set_base); tcase_add_test(tc_basic, test_attributes); tcase_add_test(tc_basic, test_reset_in_entity); tcase_add_test(tc_basic, test_resume_invalid_parse); tcase_add_test(tc_basic, test_resume_resuspended); + tcase_add_test(tc_basic, test_cdata_default); tcase_add_test(tc_basic, test_subordinate_reset); tcase_add_test(tc_basic, test_subordinate_suspend); + tcase_add_test(tc_basic, test_subordinate_xdecl_suspend); + tcase_add_test(tc_basic, test_subordinate_xdecl_abort); tcase_add_test(tc_basic, test_explicit_encoding); + tcase_add_test(tc_basic, test_trailing_cr); + tcase_add_test(tc_basic, test_ext_entity_trailing_cr); + tcase_add_test(tc_basic, test_trailing_rsqb); + tcase_add_test(tc_basic, test_ext_entity_trailing_rsqb); + tcase_add_test(tc_basic, test_ext_entity_good_cdata); tcase_add_test(tc_basic, test_user_parameters); tcase_add_test(tc_basic, test_ext_entity_ref_parameter); tcase_add_test(tc_basic, test_empty_parse); @@ -3553,7 +11761,110 @@ tcase_add_test(tc_basic, test_byte_info_at_end); tcase_add_test(tc_basic, test_byte_info_at_error); tcase_add_test(tc_basic, test_byte_info_at_cdata); + tcase_add_test(tc_basic, test_predefined_entities); tcase_add_test(tc_basic, test_invalid_tag_in_dtd); + tcase_add_test(tc_basic, test_not_predefined_entities); + tcase_add_test(tc_basic, test_ignore_section); + tcase_add_test(tc_basic, test_ignore_section_utf16); + tcase_add_test(tc_basic, test_ignore_section_utf16_be); + tcase_add_test(tc_basic, test_bad_ignore_section); + tcase_add_test(tc_basic, test_external_entity_values); + tcase_add_test(tc_basic, test_ext_entity_not_standalone); + tcase_add_test(tc_basic, test_ext_entity_value_abort); + tcase_add_test(tc_basic, test_bad_public_doctype); + tcase_add_test(tc_basic, test_attribute_enum_value); + tcase_add_test(tc_basic, test_predefined_entity_redefinition); + tcase_add_test(tc_basic, test_dtd_stop_processing); + tcase_add_test(tc_basic, test_public_notation_no_sysid); + tcase_add_test(tc_basic, test_nested_groups); + tcase_add_test(tc_basic, test_group_choice); + tcase_add_test(tc_basic, test_standalone_parameter_entity); + tcase_add_test(tc_basic, test_skipped_parameter_entity); + tcase_add_test(tc_basic, test_recursive_external_parameter_entity); + tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); + tcase_add_test(tc_basic, test_suspend_xdecl); + tcase_add_test(tc_basic, test_abort_epilog); + tcase_add_test(tc_basic, test_abort_epilog_2); + tcase_add_test(tc_basic, test_suspend_epilog); + tcase_add_test(tc_basic, test_unfinished_epilog); + tcase_add_test(tc_basic, test_partial_char_in_epilog); + tcase_add_test(tc_basic, test_hash_collision); + tcase_add_test(tc_basic, test_suspend_resume_internal_entity); + tcase_add_test(tc_basic, test_resume_entity_with_syntax_error); + tcase_add_test(tc_basic, test_suspend_resume_parameter_entity); + tcase_add_test(tc_basic, test_restart_on_error); + tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); + tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); + tcase_add_test(tc_basic, test_trailing_cr_in_att_value); + tcase_add_test(tc_basic, test_standalone_internal_entity); + tcase_add_test(tc_basic, test_skipped_external_entity); + tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); + tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); + tcase_add_test(tc_basic, test_param_entity_with_trailing_cr); + tcase_add_test(tc_basic, test_invalid_character_entity); + tcase_add_test(tc_basic, test_invalid_character_entity_2); + tcase_add_test(tc_basic, test_invalid_character_entity_3); + tcase_add_test(tc_basic, test_invalid_character_entity_4); + tcase_add_test(tc_basic, test_pi_handled_in_default); + tcase_add_test(tc_basic, test_comment_handled_in_default); + tcase_add_test(tc_basic, test_pi_yml); + tcase_add_test(tc_basic, test_pi_xnl); + tcase_add_test(tc_basic, test_pi_xmm); + tcase_add_test(tc_basic, test_utf16_pi); + tcase_add_test(tc_basic, test_utf16_be_pi); + tcase_add_test(tc_basic, test_utf16_be_comment); + tcase_add_test(tc_basic, test_utf16_le_comment); + tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); + tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); + tcase_add_test(tc_basic, test_unknown_encoding_success); + tcase_add_test(tc_basic, test_unknown_encoding_bad_name); + tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); + tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); + tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); + tcase_add_test(tc_basic, test_invalid_unknown_encoding); + tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); + tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); + tcase_add_test(tc_basic, test_ext_entity_latin1_utf16le_bom); + tcase_add_test(tc_basic, test_ext_entity_latin1_utf16be_bom); + tcase_add_test(tc_basic, test_ext_entity_latin1_utf16le_bom2); + tcase_add_test(tc_basic, test_ext_entity_latin1_utf16be_bom2); + tcase_add_test(tc_basic, test_ext_entity_utf16_be); + tcase_add_test(tc_basic, test_ext_entity_utf16_le); + tcase_add_test(tc_basic, test_ext_entity_utf16_unknown); + tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom); + tcase_add_test(tc_basic, test_utf8_in_cdata_section); + tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); + tcase_add_test(tc_basic, test_trailing_spaces_in_elements); + tcase_add_test(tc_basic, test_utf16_attribute); + tcase_add_test(tc_basic, test_utf16_second_attr); + tcase_add_test(tc_basic, test_attr_after_solidus); + tcase_add_test(tc_basic, test_utf16_pe); + tcase_add_test(tc_basic, test_bad_attr_desc_keyword); + tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); + tcase_add_test(tc_basic, test_bad_doctype); + tcase_add_test(tc_basic, test_bad_doctype_utf16); + tcase_add_test(tc_basic, test_bad_doctype_plus); + tcase_add_test(tc_basic, test_bad_doctype_star); + tcase_add_test(tc_basic, test_bad_doctype_query); + tcase_add_test(tc_basic, test_unknown_encoding_bad_ignore); + tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); + tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); + tcase_add_test(tc_basic, test_entity_public_utf16_be); + tcase_add_test(tc_basic, test_entity_public_utf16_le); + tcase_add_test(tc_basic, test_short_doctype); + tcase_add_test(tc_basic, test_short_doctype_2); + tcase_add_test(tc_basic, test_short_doctype_3); + tcase_add_test(tc_basic, test_long_doctype); + tcase_add_test(tc_basic, test_bad_entity); + tcase_add_test(tc_basic, test_bad_entity_2); + tcase_add_test(tc_basic, test_bad_entity_3); + tcase_add_test(tc_basic, test_bad_entity_4); + tcase_add_test(tc_basic, test_bad_notation); suite_add_tcase(s, tc_namespace); tcase_add_checked_fixture(tc_namespace, @@ -3567,33 +11878,123 @@ tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_2); tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_3); tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_4); + tcase_add_test(tc_namespace, test_ns_unbound_prefix); tcase_add_test(tc_namespace, test_ns_default_with_empty_uri); tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes); + tcase_add_test(tc_namespace, test_ns_duplicate_hashes); tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute); tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element); tcase_add_test(tc_namespace, test_ns_parser_reset); + tcase_add_test(tc_namespace, test_ns_long_element); + tcase_add_test(tc_namespace, test_ns_mixed_prefix_atts); + tcase_add_test(tc_namespace, test_ns_extend_uri_buffer); + tcase_add_test(tc_namespace, test_ns_reserved_attributes); + tcase_add_test(tc_namespace, test_ns_reserved_attributes_2); + tcase_add_test(tc_namespace, test_ns_extremely_long_prefix); + tcase_add_test(tc_namespace, test_ns_unknown_encoding_success); + tcase_add_test(tc_namespace, test_ns_double_colon); + tcase_add_test(tc_namespace, test_ns_double_colon_element); + tcase_add_test(tc_namespace, test_ns_bad_attr_leafname); + tcase_add_test(tc_namespace, test_ns_bad_element_leafname); + tcase_add_test(tc_namespace, test_ns_utf16_leafname); + tcase_add_test(tc_namespace, test_ns_utf16_element_leafname); + tcase_add_test(tc_namespace, test_ns_utf16_doctype); + tcase_add_test(tc_namespace, test_ns_invalid_doctype); + tcase_add_test(tc_namespace, test_ns_double_colon_doctype); suite_add_tcase(s, tc_misc); tcase_add_checked_fixture(tc_misc, NULL, basic_teardown); tcase_add_test(tc_misc, test_misc_alloc_create_parser); tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding); - tcase_add_test(tc_misc, test_misc_alloc_ns); tcase_add_test(tc_misc, test_misc_null_parser); - tcase_add_test(tc_misc, test_misc_alloc_ns_parse_buffer); tcase_add_test(tc_misc, test_misc_error_string); tcase_add_test(tc_misc, test_misc_version); + tcase_add_test(tc_misc, test_misc_features); tcase_add_test(tc_misc, test_misc_attribute_leak); + tcase_add_test(tc_misc, test_misc_utf16le); suite_add_tcase(s, tc_alloc); tcase_add_checked_fixture(tc_alloc, alloc_setup, alloc_teardown); + tcase_add_test(tc_alloc, test_alloc_parse_xdecl); + tcase_add_test(tc_alloc, test_alloc_parse_xdecl_2); + tcase_add_test(tc_alloc, test_alloc_parse_pi); + tcase_add_test(tc_alloc, test_alloc_parse_pi_2); + tcase_add_test(tc_alloc, test_alloc_parse_pi_3); + tcase_add_test(tc_alloc, test_alloc_parse_comment); + tcase_add_test(tc_alloc, test_alloc_parse_comment_2); tcase_add_test(tc_alloc, test_alloc_create_external_parser); tcase_add_test(tc_alloc, test_alloc_run_external_parser); tcase_add_test(tc_alloc, test_alloc_dtd_copy_default_atts); tcase_add_test(tc_alloc, test_alloc_external_entity); + tcase_add_test(tc_alloc, test_alloc_ext_entity_set_encoding); tcase_add_test(tc_alloc, test_alloc_internal_entity); tcase_add_test(tc_alloc, test_alloc_dtd_default_handling); tcase_add_test(tc_alloc, test_alloc_explicit_encoding); tcase_add_test(tc_alloc, test_alloc_set_base); + tcase_add_test(tc_alloc, test_alloc_realloc_buffer); + tcase_add_test(tc_alloc, test_alloc_ext_entity_realloc_buffer); + tcase_add_test(tc_alloc, test_alloc_realloc_many_attributes); + tcase_add_test(tc_alloc, test_alloc_public_entity_value); + tcase_add_test(tc_alloc, test_alloc_realloc_subst_public_entity_value); + tcase_add_test(tc_alloc, test_alloc_parse_public_doctype); + tcase_add_test(tc_alloc, test_alloc_parse_public_doctype_long_name); + tcase_add_test(tc_alloc, test_alloc_set_foreign_dtd); + tcase_add_test(tc_alloc, test_alloc_attribute_enum_value); + tcase_add_test(tc_alloc, test_alloc_realloc_attribute_enum_value); + tcase_add_test(tc_alloc, test_alloc_realloc_implied_attribute); + tcase_add_test(tc_alloc, test_alloc_realloc_default_attribute); + tcase_add_test(tc_alloc, test_alloc_notation); + tcase_add_test(tc_alloc, test_alloc_public_notation); + tcase_add_test(tc_alloc, test_alloc_system_notation); + tcase_add_test(tc_alloc, test_alloc_nested_groups); + tcase_add_test(tc_alloc, test_alloc_realloc_nested_groups); + tcase_add_test(tc_alloc, test_alloc_large_group); + tcase_add_test(tc_alloc, test_alloc_realloc_group_choice); + tcase_add_test(tc_alloc, test_alloc_pi_in_epilog); + tcase_add_test(tc_alloc, test_alloc_comment_in_epilog); + tcase_add_test(tc_alloc, test_alloc_realloc_long_attribute_value); + tcase_add_test(tc_alloc, test_alloc_attribute_whitespace); + tcase_add_test(tc_alloc, test_alloc_attribute_predefined_entity); + tcase_add_test(tc_alloc, test_alloc_long_attr_default_with_char_ref); + tcase_add_test(tc_alloc, test_alloc_long_attr_value); + tcase_add_test(tc_alloc, test_alloc_nested_entities); + tcase_add_test(tc_alloc, test_alloc_realloc_param_entity_newline); + tcase_add_test(tc_alloc, test_alloc_realloc_ce_extends_pe); + tcase_add_test(tc_alloc, test_alloc_realloc_attributes); + tcase_add_test(tc_alloc, test_alloc_long_doc_name); + tcase_add_test(tc_alloc, test_alloc_long_base); + tcase_add_test(tc_alloc, test_alloc_long_public_id); + tcase_add_test(tc_alloc, test_alloc_long_entity_value); + tcase_add_test(tc_alloc, test_alloc_long_notation); + + suite_add_tcase(s, tc_nsalloc); + tcase_add_checked_fixture(tc_nsalloc, nsalloc_setup, nsalloc_teardown); + tcase_add_test(tc_nsalloc, test_nsalloc_xmlns); + tcase_add_test(tc_nsalloc, test_nsalloc_parse_buffer); + tcase_add_test(tc_nsalloc, test_nsalloc_long_prefix); + tcase_add_test(tc_nsalloc, test_nsalloc_long_uri); + tcase_add_test(tc_nsalloc, test_nsalloc_long_attr); + tcase_add_test(tc_nsalloc, test_nsalloc_long_attr_prefix); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_attributes); + tcase_add_test(tc_nsalloc, test_nsalloc_long_element); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_binding_uri); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_prefix); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_longer_prefix); + tcase_add_test(tc_nsalloc, test_nsalloc_long_namespace); + tcase_add_test(tc_nsalloc, test_nsalloc_less_long_namespace); + tcase_add_test(tc_nsalloc, test_nsalloc_long_context); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_2); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_3); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_4); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_5); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_6); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_7); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_ge_name); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_in_dtd); + tcase_add_test(tc_nsalloc, test_nsalloc_long_default_in_ext); + tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext); + tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element); return s; } diff -Nru expat-2.2.2/win32/expat.iss expat-2.2.3/win32/expat.iss --- expat-2.2.2/win32/expat.iss 2017-07-12 21:55:49.000000000 +0000 +++ expat-2.2.3/win32/expat.iss 2017-08-02 13:40:48.000000000 +0000 @@ -7,17 +7,17 @@ [Setup] AppName=Expat AppId=expat -AppVersion=2.2.2 -AppVerName=Expat 2.2.2 +AppVersion=2.2.3 +AppVerName=Expat 2.2.3 AppCopyright=Copyright � 1998-2017 Thai Open Source Software Center, Clark Cooper, and the Expat maintainers AppPublisher=The Expat Developers AppPublisherURL=http://www.libexpat.org/ AppSupportURL=http://www.libexpat.org/ AppUpdatesURL=http://www.libexpat.org/ -UninstallDisplayName=Expat XML Parser 2.2.2 -VersionInfoVersion=2.2.2 +UninstallDisplayName=Expat XML Parser 2.2.3 +VersionInfoVersion=2.2.3 -DefaultDirName={pf}\Expat 2.2.2 +DefaultDirName={pf}\Expat 2.2.3 UninstallFilesDir={app}\Uninstall Compression=lzma @@ -35,7 +35,7 @@ Flags: ignoreversion; Source: AUTHORS; DestDir: "{app}"; DestName: AUTHORS.txt Flags: ignoreversion; Source: Changes; DestDir: "{app}"; DestName: Changes.txt Flags: ignoreversion; Source: COPYING; DestDir: "{app}"; DestName: COPYING.txt -Flags: ignoreversion; Source: README; DestDir: "{app}"; DestName: README.txt +Flags: ignoreversion; Source: README.md; DestDir: "{app}"; DestName: README.txt Flags: ignoreversion; Source: doc\*.html; DestDir: "{app}\Doc" Flags: ignoreversion; Source: doc\*.css; DestDir: "{app}\Doc" Flags: ignoreversion; Source: doc\*.png; DestDir: "{app}\Doc"