diff -Nru libzeep-5.0.1/changelog libzeep-5.0.2/changelog --- libzeep-5.0.1/changelog 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/changelog 2020-11-14 07:31:12.000000000 +0000 @@ -1,3 +1,8 @@ +Version 5.0.2 +- Add support for building shared libraries +- Decoupled example code from rest, should now be build after installation, + or use the STAGE=1 option to make. + Version 5.0.1 - Update makefile to include changes made for the Debian package diff -Nru libzeep-5.0.1/configure libzeep-5.0.2/configure --- libzeep-5.0.1/configure 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/configure 2020-11-14 07:31:12.000000000 +0000 @@ -635,7 +635,6 @@ ac_subst_vars='LTLIBOBJS LIBOBJS -BUILD_DOCS_FALSE BUILD_DOCS_TRUE BJAM FOP @@ -653,6 +652,8 @@ ax_pthread_config MRC DEBUG +LZ_LT_VERSION +LZ_TL_CURRENT LIBTOOL_DEPS CXXCPP CPP @@ -744,8 +745,8 @@ ac_subst_files='' ac_user_opts=' enable_option_checking -enable_static enable_shared +enable_static with_pic enable_fast_install with_aix_soname @@ -1400,8 +1401,8 @@ --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --enable-static[=PKGS] build static libraries [default=yes] --enable-shared[=PKGS] build shared libraries [default=no] + --enable-static[=PKGS] build static libraries [default=yes] --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) @@ -3863,67 +3864,6 @@ test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' -# Check whether --enable-static was given. -if test "${enable_static+set}" = set; then : - enableval=$enable_static; p=${PACKAGE-default} - case $enableval in - yes) enable_static=yes ;; - no) enable_static=no ;; - *) - enable_static=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, - for pkg in $enableval; do - IFS=$lt_save_ifs - if test "X$pkg" = "X$p"; then - enable_static=yes - fi - done - IFS=$lt_save_ifs - ;; - esac -else - enable_static=yes -fi - - - - - - - - - -# Check whether --enable-shared was given. -if test "${enable_shared+set}" = set; then : - enableval=$enable_shared; p=${PACKAGE-default} - case $enableval in - yes) enable_shared=yes ;; - no) enable_shared=no ;; - *) - enable_shared=no - # Look at the argument we got. We use all the common list separators. - lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, - for pkg in $enableval; do - IFS=$lt_save_ifs - if test "X$pkg" = "X$p"; then - enable_shared=yes - fi - done - IFS=$lt_save_ifs - ;; - esac -else - enable_shared=no -fi - - - - - - - - case `pwd` in *\ * | *\ *) @@ -8287,6 +8227,35 @@ # Set options +# Check whether --enable-shared was given. +if test "${enable_shared+set}" = set; then : + enableval=$enable_shared; p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else + enable_shared=no +fi + + + + + + + @@ -8297,6 +8266,36 @@ + # Check whether --enable-static was given. +if test "${enable_static+set}" = set; then : + enableval=$enable_static; p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else + enable_static=yes +fi + + + + + + + + # Check whether --with-pic was given. @@ -15865,6 +15864,16 @@ +LZ_CURRENT=5 +LZ_REVISION=0 +LZ_AGE=0 + +LZ_TL_CURRENT="${LZ_CURRENT}" +LZ_LT_VERSION="${LZ_CURRENT}:${LZ_REVISION}:${LZ_AGE}" + + + + # Extract the first word of "sh", so it can be a program name with args. set dummy sh; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 @@ -18129,22 +18138,18 @@ fi -if test "$enable_documentation" = "yes" && test "$DOXYGEN" = "nodoxygen" -o "$QUICKBOOX" = "noquickbook" -o "$XSLTPROC" = "noxsltproc" -o "$FOP" = "nofop" -o "$BJAM" = "nobjam"; then : +if test "$enable_documentation" = "yes"; then : + + if test "$DOXYGEN" = "nodoxygen" -o "$QUICKBOOX" = "noquickbook" -o "$XSLTPROC" = "noxsltproc" -o "$FOP" = "nofop" -o "$BJAM" = "nobjam"; then : as_fn_error $? "could not find the tools necessary to build the documentation" "$LINENO" 5 fi - if test "$enable_documentation" = "yes"; then - BUILD_DOCS_TRUE= - BUILD_DOCS_FALSE='#' -else - BUILD_DOCS_TRUE='#' - BUILD_DOCS_FALSE= -fi + BUILD_DOCS_TRUE=1 -BUILD_DIR="$abs_top_builddir" +fi -ac_config_files="$ac_config_files GNUmakefile lib-xml/GNUmakefile lib-json/GNUmakefile lib-http/GNUmakefile examples/GNUmakefile lib-xml/libzeep-xml.pc lib-json/libzeep-json.pc lib-http/libzeep-http.pc" +ac_config_files="$ac_config_files GNUmakefile libzeep.pc" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure @@ -18291,10 +18296,6 @@ LTLIBOBJS=$ac_ltlibobjs -if test -z "${BUILD_DOCS_TRUE}" && test -z "${BUILD_DOCS_FALSE}"; then - as_fn_error $? "conditional \"BUILD_DOCS\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 @@ -18865,10 +18866,10 @@ sed_quote_subst='$sed_quote_subst' double_quote_subst='$double_quote_subst' delay_variable_subst='$delay_variable_subst' -enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' -enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' +enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' +enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' shared_archive_member_spec='`$ECHO "$shared_archive_member_spec" | $SED "$delay_single_quote_subst"`' @@ -19248,13 +19249,7 @@ case $ac_config_target in "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; "GNUmakefile") CONFIG_FILES="$CONFIG_FILES GNUmakefile" ;; - "lib-xml/GNUmakefile") CONFIG_FILES="$CONFIG_FILES lib-xml/GNUmakefile" ;; - "lib-json/GNUmakefile") CONFIG_FILES="$CONFIG_FILES lib-json/GNUmakefile" ;; - "lib-http/GNUmakefile") CONFIG_FILES="$CONFIG_FILES lib-http/GNUmakefile" ;; - "examples/GNUmakefile") CONFIG_FILES="$CONFIG_FILES examples/GNUmakefile" ;; - "lib-xml/libzeep-xml.pc") CONFIG_FILES="$CONFIG_FILES lib-xml/libzeep-xml.pc" ;; - "lib-json/libzeep-json.pc") CONFIG_FILES="$CONFIG_FILES lib-json/libzeep-json.pc" ;; - "lib-http/libzeep-http.pc") CONFIG_FILES="$CONFIG_FILES lib-http/libzeep-http.pc" ;; + "libzeep.pc") CONFIG_FILES="$CONFIG_FILES libzeep.pc" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac @@ -19729,15 +19724,15 @@ # ### BEGIN LIBTOOL CONFIG -# Whether or not to build static libraries. -build_old_libs=$enable_static +# Which release of libtool.m4 was used? +macro_version=$macro_version +macro_revision=$macro_revision # Whether or not to build shared libraries. build_libtool_libs=$enable_shared -# Which release of libtool.m4 was used? -macro_version=$macro_version -macro_revision=$macro_revision +# Whether or not to build static libraries. +build_old_libs=$enable_static # What type of objects to build. pic_mode=$pic_mode diff -Nru libzeep-5.0.1/configure.ac libzeep-5.0.2/configure.ac --- libzeep-5.0.1/configure.ac 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/configure.ac 2020-11-14 07:31:12.000000000 +0000 @@ -13,18 +13,29 @@ AC_PREFIX_DEFAULT(/usr/local) AC_PROG_INSTALL -AC_ENABLE_STATIC -AC_DISABLE_SHARED -AC_PROG_LIBTOOL + +dnl Shared libraries are not convenient +LT_INIT([disable-shared]) AC_SUBST(LIBTOOL_DEPS) +dnl versioning +LZ_CURRENT=5 +LZ_REVISION=0 +LZ_AGE=0 + +LZ_TL_CURRENT="${LZ_CURRENT}" +LZ_LT_VERSION="${LZ_CURRENT}:${LZ_REVISION}:${LZ_AGE}" + +AC_SUBST(LZ_TL_CURRENT) +AC_SUBST(LZ_LT_VERSION) + AC_PATH_PROG([SHELL], [sh]) AC_ARG_VAR([DEBUG], [Build a debug version of the library]) AC_ARG_VAR([MRC], [Specify a location for the mrc executable]) -dnl We really want to use mrc +dnl We really like to use mrc if test "x$MRC" = "x"; then AC_PATH_PROG([MRC], [mrc]) fi @@ -87,19 +98,15 @@ AC_PATH_TOOL([FOP], [fop], [nofop]) AC_PATH_TOOL([BJAM], [bjam], [nobjam]) -AS_IF([test "$enable_documentation" = "yes" && test "$DOXYGEN" = "nodoxygen" -o "$QUICKBOOX" = "noquickbook" -o "$XSLTPROC" = "noxsltproc" -o "$FOP" = "nofop" -o "$BJAM" = "nobjam"], - [AC_MSG_ERROR([could not find the tools necessary to build the documentation])]) - -AM_CONDITIONAL([BUILD_DOCS], [[test "$enable_documentation" = "yes"]]) +AS_IF([test "$enable_documentation" = "yes"], + [ + AS_IF([test "$DOXYGEN" = "nodoxygen" -o "$QUICKBOOX" = "noquickbook" -o "$XSLTPROC" = "noxsltproc" -o "$FOP" = "nofop" -o "$BJAM" = "nobjam"], + [AC_MSG_ERROR([could not find the tools necessary to build the documentation])]) -BUILD_DIR="$abs_top_builddir" + BUILD_DOCS_TRUE=1 + AC_SUBST(BUILD_DOCS_TRUE) +]) dnl Process Makefile.in to create Makefile AC_OUTPUT([GNUmakefile - lib-xml/GNUmakefile - lib-json/GNUmakefile - lib-http/GNUmakefile - examples/GNUmakefile - lib-xml/libzeep-xml.pc - lib-json/libzeep-json.pc - lib-http/libzeep-http.pc]) + libzeep.pc]) diff -Nru libzeep-5.0.1/debian/changelog libzeep-5.0.2/debian/changelog --- libzeep-5.0.1/debian/changelog 2020-11-10 12:22:59.000000000 +0000 +++ libzeep-5.0.2/debian/changelog 2020-11-16 13:18:01.000000000 +0000 @@ -1,14 +1,35 @@ -libzeep (5.0.1-0ubuntu1) hirsute; urgency=medium +libzeep (5.0.2-3) unstable; urgency=medium - * New upstream release. + * Fix autopkgtest control file (needed one more dependency) - -- Dimitri John Ledkov Tue, 10 Nov 2020 12:22:59 +0000 + -- Maarten L. Hekkelman Mon, 16 Nov 2020 14:18:01 +0100 + +libzeep (5.0.2-2) unstable; urgency=medium + + * Source only upload + + -- Andreas Tille Mon, 16 Nov 2020 08:21:10 +0100 + +libzeep (5.0.2-1) unstable; urgency=medium + + * Fix FTBFS (Closes: #974074) + * New upstream release + * Add autopkgtest + + -- Maarten L. Hekkelman Sat, 14 Nov 2020 12:44:57 +0100 + +libzeep (5.0.1-1) unstable; urgency=medium + + * Fix writing HTML, proper empty elements + * Fix endian issue in SHA algorithm + * more robust http tests + * Fix FTBFS: (Closes: #973526) + + -- Maarten L. Hekkelman Mon, 09 Nov 2020 10:04:29 +0100 libzeep (5.0.0-3) unstable; urgency=medium - * Update debian/control, moved depends from Build-Depends-indep to - Build-Depends - Closes: #973525, #973526 + * Update debian/control, moved depends from Build-Depends-indep to Build-Depends * Added mrc as requirement for build, suggestion for install -- Maarten L. Hekkelman Sat, 31 Oct 2020 10:26:02 +0100 diff -Nru libzeep-5.0.1/debian/control libzeep-5.0.2/debian/control --- libzeep-5.0.1/debian/control 2020-11-10 12:22:59.000000000 +0000 +++ libzeep-5.0.2/debian/control 2020-11-16 13:18:01.000000000 +0000 @@ -1,18 +1,17 @@ Source: libzeep -Maintainer: Ubuntu Developers -XSBC-Original-Maintainer: Debian Med Packaging Team +Maintainer: Debian Med Packaging Team Uploaders: Maarten L. Hekkelman , Andreas Tille Section: libs Priority: optional Build-Depends: debhelper-compat (= 13), autoconf-archive, - mrc, libboost-dev, libboost-program-options-dev, + libboost-date-time-dev, libboost-system-dev, libboost-tools-dev, - libboost-date-time-dev, + mrc, doxygen, docbook-website, xsltproc, @@ -27,9 +26,11 @@ Architecture: any Section: libdevel Depends: ${misc:Depends}, - libzeep5.0 (= ${binary:Version}), + libzeep5 (= ${binary:Version}), libboost-dev, libboost-program-options-dev, + libboost-date-time-dev, + libboost-system-dev, pkg-config Suggests: libzeep-doc, mrc Description: Development files for libzeep @@ -72,11 +73,11 @@ This specific package contains the documentation you can use to develop new software using libzeep. -Package: libzeep5.0 +Package: libzeep5 Architecture: any Depends: ${misc:Depends}, ${shlibs:Depends} -Description: Library files for libzeep +Description: Library file for libzeep Libzeep is a C++ library providing a validating XML parser, XML DOM tree implementation, XPath 1.0 support and code to create SOAP/REST servers as well as a full web application framework. diff -Nru libzeep-5.0.1/debian/libzeep5.0.docs libzeep-5.0.2/debian/libzeep5.0.docs --- libzeep-5.0.1/debian/libzeep5.0.docs 2020-09-10 09:33:17.000000000 +0000 +++ libzeep-5.0.2/debian/libzeep5.0.docs 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -README.md diff -Nru libzeep-5.0.1/debian/libzeep5.docs libzeep-5.0.2/debian/libzeep5.docs --- libzeep-5.0.1/debian/libzeep5.docs 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/debian/libzeep5.docs 2020-11-16 13:18:01.000000000 +0000 @@ -0,0 +1 @@ +README.md diff -Nru libzeep-5.0.1/debian/libzeep-dev.examples libzeep-5.0.2/debian/libzeep-dev.examples --- libzeep-5.0.1/debian/libzeep-dev.examples 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/debian/libzeep-dev.examples 2020-11-16 13:18:01.000000000 +0000 @@ -0,0 +1 @@ +examples/* \ No newline at end of file diff -Nru libzeep-5.0.1/debian/patches/docs-patch libzeep-5.0.2/debian/patches/docs-patch --- libzeep-5.0.1/debian/patches/docs-patch 2020-09-10 09:33:17.000000000 +0000 +++ libzeep-5.0.2/debian/patches/docs-patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -Description: Add some rules to find the right tools and files -Author: Maarten L. Hekkelman -Last-Update: Thu, 10 Sep 2020 08:55:19 +0200 - ---- a/doc/Jamfile.v2 -+++ b/doc/Jamfile.v2 -@@ -1,3 +1,16 @@ -+using boostbook -+ : /usr/share/xml/docbook/stylesheet/docbook-xsl/ -+ : /usr/share/xml/docbook/schema/dtd/4.2/ -+ : /usr/share/boostbook -+ ; -+ -+using xsltproc : /usr/bin/xsltproc ; -+ -+using doxygen : /usr/bin/doxygen ; -+ -+using fop : /usr/bin/fop ; -+ -+using quickbook : /usr/bin/quickbook ; - - doxygen autodoc - : diff -Nru libzeep-5.0.1/debian/patches/examples-makefile libzeep-5.0.2/debian/patches/examples-makefile --- libzeep-5.0.1/debian/patches/examples-makefile 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/debian/patches/examples-makefile 2020-11-16 13:18:01.000000000 +0000 @@ -0,0 +1,26 @@ +Description: Fix makefile for examples +Author: Maarten L. Hekkelman +Last-Update: Sat, 14 Nov 2020 11:41:05 +0100 +Forwarded: yes + +--- a/examples/GNUmakefile ++++ b/examples/GNUmakefile +@@ -50,9 +50,6 @@ + clean: + rm -rf $(OBJDIR)/* $(EXAMPLES:%=$(BINDIR)/%) + +-ZEEP_LIBS = http xml json +-BOOST_LIBS = system +- + define EXAMPLE_template = + + -include $$(OBJDIR)/$(1).d +@@ -61,7 +58,7 @@ + + $(BINDIR)/$(1): $$($(1)_OBJECTS) | $(BINDIR) + @ echo "ld> $(1)" +- @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) $(ZEEP_LIBS:%=-lzeep-%) $(BOOST_LIBS:%=-lboost_%) $(LIBS) ++ @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) -lzeep $(LIBS) + + endef + diff -Nru libzeep-5.0.1/debian/patches/install-targets libzeep-5.0.2/debian/patches/install-targets --- libzeep-5.0.1/debian/patches/install-targets 2020-11-10 12:21:11.000000000 +0000 +++ libzeep-5.0.2/debian/patches/install-targets 2020-11-16 13:18:01.000000000 +0000 @@ -1,90 +1,58 @@ Description: Fix makefiles to install parts at the correct location Author: Maarten L. Hekkelman Last-Update: Thu, 10 Sep 2020 08:55:19 +0200 +Forwarded: not-needed -Index: libzeep-5.0.1/GNUmakefile.in -=================================================================== ---- libzeep-5.0.1.orig/GNUmakefile.in -+++ libzeep-5.0.1/GNUmakefile.in -@@ -16,7 +16,7 @@ CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ +--- a/GNUmakefile.in ++++ b/GNUmakefile.in +@@ -16,7 +16,7 @@ LDFLAGS = @LDFLAGS@ @BOOST_LDFLAGS@ - LIBS = @LIBS@ - --prefix = @prefix@ -+prefix = $(DESTDIR)/@prefix@ - exec_prefix = @exec_prefix@ - libdir = @libdir@ - includedir = @includedir@ -@@ -95,6 +95,14 @@ install-$(1)-lib: - install-$(1)-dev: - +$(MAKE) -C lib-$(1) install-dev - -+.PHONY: install-$(1)-lib -+install-$(1)-lib: -+ +$(MAKE) -C lib-$(1) install-lib -+ -+.PHONY: install-$(1)-dev -+install-$(1)-dev: -+ +$(MAKE) -C lib-$(1) install-dev -+ - lib-$(1): $(OUTPUT_DIR) - - endef -@@ -121,7 +129,17 @@ doc: - endif - - .PHONY: all --all: libraries test examples -+all: libraries -+ -+.PHONY: install-libs -+install-libs: $(ZEEP_LIBS:%=install-%-lib) -+ -+.PHONY: install-dev -+install-dev: $(ZEEP_LIBS:%=install-%-dev) -+ -+.PHONY: install-doc -+install-doc: doc -+ cd doc; for f in `find html -type f`; do install -D $$f $(docdir)/libzeep-doc/$$f; done - - .PHONY: install-libs - install-libs: $(ZEEP_LIBS:%=install-%-lib) -Index: libzeep-5.0.1/lib-http/GNUmakefile.in -=================================================================== ---- libzeep-5.0.1.orig/lib-http/GNUmakefile.in -+++ libzeep-5.0.1/lib-http/GNUmakefile.in -@@ -14,7 +14,7 @@ CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ - LDFLAGS = @LDFLAGS@ @LIBS@ @BOOST_LDFLAGS@ - LIBS = @LIBS@ + LIBS = @BOOST_PROGRAM_OPTIONS_LIB@ @BOOST_DATE_TIME_LIB@ @LIBS@ -prefix = @prefix@ +prefix = $(DESTDIR)@prefix@ exec_prefix = @exec_prefix@ libdir = @libdir@ - includedir = @includedir@ -Index: libzeep-5.0.1/lib-json/GNUmakefile.in -=================================================================== ---- libzeep-5.0.1.orig/lib-json/GNUmakefile.in -+++ libzeep-5.0.1/lib-json/GNUmakefile.in -@@ -15,7 +15,7 @@ CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ - LDFLAGS = @LDFLAGS@ @LIBS@ @BOOST_LDFLAGS@ - LIBS = @BOOST_DATE_TIME_LIB@ @LIBS@ + datarootdir = @datarootdir@ +@@ -125,12 +125,10 @@ + -include $(OBJECTS:%.lo=%.d) + + $(OBJDIR)/%.lo: %.cpp | $(OBJDIR) +- @ echo ">>" $< +- @ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< ++ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< + + $(OBJDIR)/%.o: %.cpp | $(OBJDIR) +- @ echo ">>" $< +- @ $(CXX) $(CXXFLAGS) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< ++ $(CXX) $(CXXFLAGS) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< + + $(LIB_TARGET): $(OBJECTS) + $(CXXLINK) -rpath $(libdir) $(OBJECTS) $(LDFLAGS) $(LIBS) +@@ -166,11 +164,15 @@ + .PHONY: install-lib + install-lib: $(LIB_TARGET) + install -d $(libdir) +- $(LIBTOOL) --mode=install install $(LIB_TARGET) $(libdir); ++ $(LIBTOOL) --mode=install install $(LIB_TARGET) $(libdir) ++ rm -f $(libdir)/libzeep.so $(libdir)/libzeep.a $(libdir)/libzeep.la + + # Install header files and .pc files + .PHONY: install-dev + install-dev: $(LIB_TARGET) libzeep.pc ++ install -d $(libdir) ++ $(LIBTOOL) --mode=install install $(LIB_TARGET) $(libdir) ++ rm -f $(libdir)/libzeep.so.5.0.0 $(libdir)/libzeep.so.5 + for dir in . ./http ./json ./xml ; do \ + install -d $(includedir)/zeep/$${dir}; \ + for hdr in include/zeep/$${dir}/*.hpp ; do \ +@@ -226,8 +228,7 @@ + endif --prefix = @prefix@ -+prefix = $(DESTDIR)@prefix@ - exec_prefix = @exec_prefix@ - libdir = @libdir@ - includedir = @includedir@ -Index: libzeep-5.0.1/lib-xml/GNUmakefile.in -=================================================================== ---- libzeep-5.0.1.orig/lib-xml/GNUmakefile.in -+++ libzeep-5.0.1/lib-xml/GNUmakefile.in -@@ -14,7 +14,7 @@ CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ - LDFLAGS = @LDFLAGS@ @LIBS@ @BOOST_LDFLAGS@ - LIBS = @LIBS@ + tests/$(1)-test: $(LIB_TARGET) $$($(1)_OBJECTS) | tests +- @ echo ">>> building $(1)-test" +- @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) -L.libs -lzeep $(LIBS) ++ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) -L.libs -lzeep $(LIBS) --prefix = @prefix@ -+prefix = $(DESTDIR)@prefix@ - exec_prefix = @exec_prefix@ - libdir = @libdir@ - includedir = @includedir@ + .PHONY: $(1)-test + $(1)-test: tests/$(1)-test diff -Nru libzeep-5.0.1/debian/patches/series libzeep-5.0.2/debian/patches/series --- libzeep-5.0.1/debian/patches/series 2020-11-10 12:22:46.000000000 +0000 +++ libzeep-5.0.2/debian/patches/series 2020-11-16 13:18:01.000000000 +0000 @@ -1,2 +1,2 @@ install-targets -docs-patch +examples-makefile diff -Nru libzeep-5.0.1/debian/rules libzeep-5.0.2/debian/rules --- libzeep-5.0.1/debian/rules 2020-09-10 16:14:56.000000000 +0000 +++ libzeep-5.0.2/debian/rules 2020-11-16 13:18:01.000000000 +0000 @@ -7,29 +7,17 @@ LC_ALL := C.UTF-8 export LC_ALL -export LIBRARY_VERSION=5.0 - %: dh $@ override_dh_auto_configure: - dh_auto_configure -- --enable-documentation + dh_auto_configure -- --enable-shared --enable-documentation override_dh_auto_install: - $(MAKE) DESTDIR=$(CURDIR)/debian/libzeep5.0 install-libs + $(MAKE) DESTDIR=$(CURDIR)/debian/libzeep5 install-lib $(MAKE) DESTDIR=$(CURDIR)/debian/libzeep-dev install-dev $(MAKE) DESTDIR=$(CURDIR)/debian/libzeep-doc install-doc -override_dh_auto_configure-arch: - dh_auto_configure -- --enable-shared --disable-documentation - -override_dh_auto_configure-indep: - dh_auto_configure -- --enable-shared - -override_dh_auto_build-indep: - cd $(CURDIR)/doc/ && bjam - find -type f -iname "*.md5" -delete - override_dh_auto_test: ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) $(MAKE) test @@ -39,6 +27,3 @@ ## cleaning up dependency_libs filed in *.la files sed -i "/dependency_libs/ s/'.*'/''/" `find . -name '*.la'` dh_install - -override_dh_makeshlibs: - dh_makeshlibs -- -v$(LIBRARY_VERSION) diff -Nru libzeep-5.0.1/debian/tests/control libzeep-5.0.2/debian/tests/control --- libzeep-5.0.1/debian/tests/control 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/debian/tests/control 2020-11-16 13:18:01.000000000 +0000 @@ -0,0 +1,3 @@ +Tests: run-build-test +Depends: build-essential, libzeep-dev +Restrictions: allow-stderr diff -Nru libzeep-5.0.1/debian/tests/run-build-test libzeep-5.0.2/debian/tests/run-build-test --- libzeep-5.0.1/debian/tests/run-build-test 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/debian/tests/run-build-test 2020-11-16 13:18:01.000000000 +0000 @@ -0,0 +1,18 @@ +#!/bin/bash +set -e +set -x + +pkg="libzeep-dev" + +if [ "$AUTOPKGTEST_TMP" = "" ] ; then + AUTOPKGTEST_TMP=`mktemp -d /tmp/${pkg}-test.XXXXXX` + trap "rm -rf $AUTOPKGTEST_TMP" 0 INT QUIT ABRT PIPE TERM +fi + +cp -r /usr/share/doc/${pkg}/examples/* $AUTOPKGTEST_TMP + +cd $AUTOPKGTEST_TMP + +make + +bin/serialize-xml \ No newline at end of file diff -Nru libzeep-5.0.1/doc/Jamfile.v2 libzeep-5.0.2/doc/Jamfile.v2 --- libzeep-5.0.1/doc/Jamfile.v2 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/doc/Jamfile.v2 2020-11-14 07:31:12.000000000 +0000 @@ -1,3 +1,16 @@ +using boostbook + : /usr/share/xml/docbook/stylesheet/docbook-xsl/ + : /usr/share/xml/docbook/schema/dtd/4.2/ + : /usr/share/boostbook + ; + +using xsltproc : /usr/bin/xsltproc ; + +using doxygen : /usr/bin/doxygen ; + +using fop : /usr/bin/fop ; + +using quickbook : /usr/bin/quickbook ; doxygen autodoc : diff -Nru libzeep-5.0.1/examples/GNUmakefile libzeep-5.0.2/examples/GNUmakefile --- libzeep-5.0.1/examples/GNUmakefile 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/examples/GNUmakefile 2020-11-14 07:31:12.000000000 +0000 @@ -0,0 +1,81 @@ +# Makefile for the libzeep-http library +# +# Copyright Maarten L. Hekkelman, UMC St. Radboud 2008-2013. +# Copyright Maarten L. Hekkelman, 2014-2019 +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) +# +# This makefile assumes you already installed libzeep + +.PHONY: firstTarget +firstTarget: all + +# main build variables +CXXFLAGS += -pthread -std=c++17 +WARNINGS += all no-multichar + +ifeq "$(DEBUG)" "1" +DEFINES += DEBUG +CXXFLAGS += -g -O0 +LDFLAGS += -g +else +CXXFLAGS += -O3 +DEFINES += NDEBUG +endif + +CXXFLAGS += $(DEFINES:%=-D%) +CXXFLAGS += $(WARNINGS:%=-W%) + +OBJDIR = obj +ifeq "$(DEBUG)" "1" + OBJDIR := $(OBJDIR).dbg +endif + +ifeq "$(STAGE)" "1" +CXXFLAGS += -I ../include +LDFLAGS += -L ../lib +endif + +BINDIR = bin + +$(OBJDIR) $(BINDIR): + mkdir -p $@ + +$(OBJDIR)/%.o: %.cpp | $(OBJDIR) + @ echo "cc>" $< + @ $(CXX) -MD -c -o $@ $< $(CFLAGS) $(CXXFLAGS) + +.PHONY: clean +clean: + rm -rf $(OBJDIR)/* $(EXAMPLES:%=$(BINDIR)/%) + +ZEEP_LIBS = http xml json +BOOST_LIBS = system + +define EXAMPLE_template = + +-include $$(OBJDIR)/$(1).d + +$(1)_OBJECTS = $$(OBJDIR)/$(1).o + +$(BINDIR)/$(1): $$($(1)_OBJECTS) | $(BINDIR) + @ echo "ld> $(1)" + @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) $(ZEEP_LIBS:%=-lzeep-%) $(BOOST_LIBS:%=-lboost_%) $(LIBS) + +endef + +EXAMPLES = \ + http-server-0 http-server-1 http-server-2 \ + serialize-xml synopsis-json synopsis-xml validating-xml-sample xpath-sample \ + synopsis-el-1 security-sample rest-sample rest-sample-2 + +$(foreach part,$(EXAMPLES),$(eval $(call EXAMPLE_template,$(part)))) + +.PHONY: examples +examples: $(EXAMPLES:%=$(BINDIR)/%) + +all: examples + +FORCE: + diff -Nru libzeep-5.0.1/examples/GNUmakefile.in libzeep-5.0.2/examples/GNUmakefile.in --- libzeep-5.0.1/examples/GNUmakefile.in 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/examples/GNUmakefile.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,120 +0,0 @@ -# Makefile for the libzeep-http library -# -# Copyright Maarten L. Hekkelman, UMC St. Radboud 2008-2013. -# Copyright Maarten L. Hekkelman, 2014-2019 -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -.PHONY: firstTarget -firstTarget: all - -CXX = @CXX@ -CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ -LDFLAGS = @LDFLAGS@ @BOOST_LDFLAGS@ -LIBS = @LIBS@ - -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir = @libdir@ -includedir = @includedir@ - -ifneq "$(CHECK_CONFIG)" "" - -GNUmakefile: ../config.status GNUmakefile.in - cd ..; $(SHELL) ./config.status - -../config.status: ../configure - cd ..; $(SHELL) ./config.status --recheck - -../configure: ../configure.ac - cd ..; autoconf - -endif - -# main build variables -CXXFLAGS += -I. -pthread -I ../include/ -WARNINGS += all no-multichar - -# Use the DEBUG flag to build debug versions of the code -DEBUG = @DEBUG@ - -# resource support? -MRC = @MRC@ -ifneq "x$(MRC)" "x" -USE_RSRC = 1 -DEFINES += USE_RSRC -else -USE_RSRC = 0 -endif - -ifeq "$(DEBUG)" "1" -DEFINES += DEBUG -CXXFLAGS += -g -O0 -LDFLAGS += -g -else -CXXFLAGS += -O3 -DEFINES += NDEBUG -endif - -# targets - -VPATH += src:test - -CXXFLAGS += $(DEFINES:%=-D%) -CXXFLAGS += $(WARNINGS:%=-W%) - -OBJDIR = obj -ifeq "$(DEBUG)" "1" - OBJDIR := $(OBJDIR).dbg -endif - -BINDIR = bin - -$(OBJDIR) $(BINDIR): - mkdir -p $@ - -$(OBJDIR)/%.o: %.cpp | $(OBJDIR) - @ echo "cc>" $< - @ $(CXX) -MD -c -o $@ $< $(CFLAGS) $(CXXFLAGS) - -$(OBJDIR)/dummy_rsrc.o: - $(MRC) -o $@ test.xml - -.PHONY: clean -clean: - rm -rf $(OBJDIR)/* $(EXAMPLES) - -ZEEP_LIBS = http xml json -BOOST_LIBS = system - -define EXAMPLE_template = - --include $$(OBJDIR)/$(1).d - -$(1)_OBJECTS = $$(OBJDIR)/$(1).o - -ifneq ($$(USE_RSRC),0) -$(1)_OBJECTS += $$(OBJDIR)/dummy_rsrc.o -endif - -$(BINDIR)/$(1): $$($(1)_OBJECTS) | $(BINDIR) - @ echo "ld> $(1)" - @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) -L../lib $(ZEEP_LIBS:%=-lzeep-%) $(BOOST_LIBS:%=-lboost_%) $(LIBS) - -endef - -EXAMPLES = \ - http-server-0 http-server-1 http-server-2 \ - serialize-xml synopsis-json synopsis-xml validating-xml-sample xpath-sample \ - synopsis-el-1 security-sample rest-sample rest-sample-2 - -$(foreach part,$(EXAMPLES),$(eval $(call EXAMPLE_template,$(part)))) - -.PHONY: examples -examples: $(EXAMPLES:%=$(BINDIR)/%) - -all: examples - -FORCE: - diff -Nru libzeep-5.0.1/examples/http-server-1.cpp libzeep-5.0.2/examples/http-server-1.cpp --- libzeep-5.0.1/examples/http-server-1.cpp 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/examples/http-server-1.cpp 2020-11-14 07:31:12.000000000 +0000 @@ -1,7 +1,3 @@ -/* compile: -clang++ -o http-server-1 http-server-1.cpp -I ~/projects/boost_1_73_0 -DWEBAPP_USES_RESOURCES -I. -fPIC -pthread -std=c++17 -Wall -g -DDEBUG -I ../../include/ -L ../../lib -lzeep-http -lzeep-xml -lzeep-json dummy_rsrc.o -lstdc++fs -*/ - //[ simple_http_server #include #include diff -Nru libzeep-5.0.1/.gitignore libzeep-5.0.2/.gitignore --- libzeep-5.0.1/.gitignore 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/.gitignore 2020-11-14 07:31:12.000000000 +0000 @@ -15,10 +15,13 @@ config.log config.status GNUmakefile -*/GNUmakefile +lib-*/GNUmakefile aclocal.m4 libtool lib/ libzeep-*.pc examples/bin/ lib-xml/.gdb_history +tests/ +.libs/ +libzeep.la \ No newline at end of file diff -Nru libzeep-5.0.1/GNUmakefile.in libzeep-5.0.2/GNUmakefile.in --- libzeep-5.0.1/GNUmakefile.in 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/GNUmakefile.in 2020-11-14 07:31:12.000000000 +0000 @@ -12,20 +12,17 @@ firstTarget: all CXX = @CXX@ -CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ +CXXFLAGS = @CPPFLAGS@ @CXXFLAGS@ @BOOST_CPPFLAGS@ LDFLAGS = @LDFLAGS@ @BOOST_LDFLAGS@ -LIBS = @LIBS@ +LIBS = @BOOST_PROGRAM_OPTIONS_LIB@ @BOOST_DATE_TIME_LIB@ @LIBS@ prefix = @prefix@ exec_prefix = @exec_prefix@ libdir = @libdir@ +datarootdir = @datarootdir@ includedir = @includedir@ docdir = @docdir@ - -BOOST_LIBS = program_options -ZEEP_LIBS = http json xml - -LIBS += $(BOOST_LIBS:%=-lboost_%) $(ZEEP_LIBS:%=-lzeep-%) +pkgconfigdir = $(libdir)/pkgconfig ifneq "$(CHECK_CONFIG)" "" @@ -40,77 +37,115 @@ endif +LIB_NAME = @PACKAGE_NAME@ +LIB_VERSION = @LZ_LT_VERSION@ +LIB_TARGET = $(LIB_NAME).la +VERSION = @PACKAGE_VERSION@ +DIST_NAME = @PACKAGE_NAME@-@PACKAGE_VERSION@ + # libtool stuff LIBTOOL_DEPS = @LIBTOOL_DEPS@ libtool: $(LIBTOOL_DEPS) - $(SHELL) ./config.status --recheck + $(SHELL) ./config.status libtool LIBTOOL = $(SHELL) @abs_top_builddir@/libtool CXXCOMPILE = $(LIBTOOL) --silent --tag=CXX --mode=compile $(CXX) $(CXXFLAGS) -CXXLINK = $(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ +CXXLINK = $(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -version-info $(LIB_VERSION) -o $@ # main build variables -CXXFLAGS += -Wall -Wno-multichar - -VERSION = @PACKAGE_VERSION@ -DIST_NAME = @PACKAGE_NAME@ - -OUTPUT_DIR ?= ./lib +CXXFLAGS += -Wall -Wno-multichar -I include # documentation rules -@BUILD_DOCS_TRUE@BUILD_DOCS = 1 +BUILD_DOCS = @BUILD_DOCS_TRUE@ BJAM = @BJAM@ -# targets - -$(OUTPUT_DIR): - mkdir -p $(OUTPUT_DIR) +# resource support? +MRC = @MRC@ -ZEEP_LIB_PARTS = xml json http +VPATH += lib-http/src:lib-json/src:lib-xml/src:lib-http/test:lib-json/test:lib-xml/test -define ZEEPLIB_template = +# Debug support - .PHONY: $(1)_clean - $(1)_clean: - +$(MAKE) -C lib-$(1) clean +DEBUG = @DEBUG@ - .PHONY: $(1)_test - $(1)_test: libraries - +$(MAKE) -C lib-$(1) test - -.PHONY: lib-$(1) -lib-$(1): - +$(MAKE) -C lib-$(1) lib +ifeq "$(DEBUG)" "1" +DEFINES += DEBUG +CXXFLAGS += -g -O0 +LDFLAGS += -g +else +CXXFLAGS += -O3 +DEFINES += NDEBUG +endif -.PHONY: install-$(1) -install-$(1): - +$(MAKE) -C lib-$(1) install +CXXFLAGS += $(DEFINES:%=-D%) -.PHONY: install-$(1)-lib -install-$(1)-lib: - +$(MAKE) -C lib-$(1) install-lib +# Objects -.PHONY: install-$(1)-dev -install-$(1)-dev: - +$(MAKE) -C lib-$(1) install-dev +OBJDIR = obj +ifeq "$(DEBUG)" "1" + OBJDIR := $(OBJDIR).dbg +endif -lib-$(1): $(OUTPUT_DIR) +$(OBJDIR): + mkdir -p $(OBJDIR) -endef +OBJECTS = \ + $(OBJDIR)/connection.lo \ + $(OBJDIR)/controller.lo \ + $(OBJDIR)/controller-rsrc.lo \ + $(OBJDIR)/crypto.lo \ + $(OBJDIR)/daemon.lo \ + $(OBJDIR)/el-processing.lo \ + $(OBJDIR)/error-handler.lo \ + $(OBJDIR)/format.lo \ + $(OBJDIR)/glob.lo \ + $(OBJDIR)/html-controller.lo \ + $(OBJDIR)/login-controller.lo \ + $(OBJDIR)/message-parser.lo \ + $(OBJDIR)/preforked-server.lo \ + $(OBJDIR)/reply.lo \ + $(OBJDIR)/request.lo \ + $(OBJDIR)/rest-controller.lo \ + $(OBJDIR)/security.lo \ + $(OBJDIR)/server.lo \ + $(OBJDIR)/soap-controller.lo \ + $(OBJDIR)/tag-processor-v2.lo \ + $(OBJDIR)/tag-processor.lo \ + $(OBJDIR)/template-processor.lo \ + $(OBJDIR)/element.lo \ + $(OBJDIR)/json-parser.lo \ + $(OBJDIR)/character-classification.lo \ + $(OBJDIR)/doctype.lo \ + $(OBJDIR)/document.lo \ + $(OBJDIR)/node.lo \ + $(OBJDIR)/xml-parser.lo \ + $(OBJDIR)/xpath.lo + +-include $(OBJECTS:%.lo=%.d) + +$(OBJDIR)/%.lo: %.cpp | $(OBJDIR) + @ echo ">>" $< + @ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< + +$(OBJDIR)/%.o: %.cpp | $(OBJDIR) + @ echo ">>" $< + @ $(CXX) $(CXXFLAGS) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< -$(foreach part,$(ZEEP_LIB_PARTS),$(eval $(call ZEEPLIB_template,$(part)))) +$(LIB_TARGET): $(OBJECTS) + $(CXXLINK) -rpath $(libdir) $(OBJECTS) $(LDFLAGS) $(LIBS) -.PHONY: libraries -libraries: $(ZEEP_LIBS:%=lib-%) +libzeep.pc: libzeep.pc.in + ./config.status libzeep.pc -.PHONY: test -test: $(ZEEP_LIB_PARTS:%=%_test) +# Example code .PHONY: examples -examples: libraries +examples: $(LIB_TARGET) +$(MAKE) -C examples all +# Documentation + .PHONY: doc ifeq "$(BUILD_DOCS)" "1" doc: @@ -121,26 +156,106 @@ endif .PHONY: all -all: libraries test examples +all: $(LIB_TARGET) -.PHONY: install-libs -install-libs: $(ZEEP_LIBS:%=install-%-lib) +ifeq "$(BUILD_DOCS)" "1" +all: doc +endif + +# Install the libraries +.PHONY: install-lib +install-lib: $(LIB_TARGET) + install -d $(libdir) + $(LIBTOOL) --mode=install install $(LIB_TARGET) $(libdir); +# Install header files and .pc files .PHONY: install-dev -install-dev: $(ZEEP_LIBS:%=install-%-dev) +install-dev: $(LIB_TARGET) libzeep.pc + for dir in . ./http ./json ./xml ; do \ + install -d $(includedir)/zeep/$${dir}; \ + for hdr in include/zeep/$${dir}/*.hpp ; do \ + install $${hdr} $(includedir)/zeep/$${dir}; \ + done; \ + done + install -d $(pkgconfigdir) + install -m 644 libzeep.pc $(pkgconfigdir)/libzeep.pc; + cd examples; for d in `find . -type d`; do install -d $(docdir)/libzeep-dev/examples/$$d; done + cd examples; for f in `find . -type f`; do install -m644 $$f $(docdir)/libzeep-dev/examples/$$f; done .PHONY: install-doc install-doc: doc - cd doc; for f in `find html -type f`; do install -D $$f $(docdir)/libzeep-doc/$$f; done + cd doc; for d in `find html -type d`; do install -d $(docdir)/libzeep-doc/$$d; done + cd doc; for f in `find html -type f`; do install -m644 $$f $(docdir)/libzeep-doc/$$f; done .PHONY: install -install: $(ZEEP_LIBS:%=install-%) +install: install-lib install-dev +ifeq "$(BUILD_DOCS)" "1" + +$(MAKE) install-doc +endif + +# Test rules + +tests: + mkdir -p tests + +$(OBJDIR)/test_rsrc.o: lib-http/test/fragment-file.xhtml + $(MRC) -o $@ $< + +QUESTIONABLE_XML_TEST_IDS = \ + ibm-valid-P28-ibm28v02.xml \ + ibm-valid-P29-ibm29v01.xml \ + ibm-valid-P29-ibm29v02.xml \ + ibm-1-1-valid-P03-ibm03v09.xml \ + rmt-e2e-34 \ + rmt-e2e-55 \ + rmt-054 \ + rmt-ns10-006 \ + rmt-e3e-13 + +parser_PARAMS = lib-xml/test/XML-Test-Suite/xmlconf/xmlconf.xml $(QUESTIONABLE_XML_TEST_IDS:%=--questionable=%) --print-ids +xpath_PARAMS = lib-xml/test/XPath-Test-Suite/xpath-tests.xml + +define TEST_template = + +-include $$(OBJDIR)/$(1)-test.d + +$(1)_OBJECTS = $$(OBJDIR)/client-test-code.o $$(OBJDIR)/$(1)-test.o + +ifneq "x$(MRC)" "x" +$(1)_OBJECTS += $$(OBJDIR)/test_rsrc.o +endif + +tests/$(1)-test: $(LIB_TARGET) $$($(1)_OBJECTS) | tests + @ echo ">>> building $(1)-test" + @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) -L.libs -lzeep $(LIBS) + +.PHONY: $(1)-test +$(1)-test: tests/$(1)-test + LD_LIBRARY_PATH=.libs tests/$(1)-test $$($(1)_PARAMS) + +endef + +TESTS = unit parser serializer xpath json crypto http processor webapp soap rest + +ifneq "x$(MRC)" "x" +TESTS += rsrc_webapp +endif + +$(foreach part,$(TESTS),$(eval $(call TEST_template,$(part)))) + +lib-xml/test/XML-Test-Suite/xmlconf/xmlconf.xml: lib-xml/test/XML-Test-Suite.tbz + cd lib-xml/test; tar xf XML-Test-Suite.tbz + +parser-test: lib-xml/test/XML-Test-Suite/xmlconf/xmlconf.xml + +.PHONY: test +test: $(TESTS:%=%-test) .PHONY: clean -clean: $(ZEEP_LIB_PARTS:%=%_clean) +clean: + rm -rf $(LIB_TARGET) $(OBJDIR) $(DIST_NAME).tgz .libs tests lib-xml/test/XML-Test-Suite/ libzeep.pc $(MAKE) -C examples clean cd doc; $$(which $(BJAM) > /dev/null) && $(BJAM) clean || echo "No $(BJAM) installed, cannot clean doc" - rm -f $(DIST_NAME).tgz .PHONY: dist dist: doc @@ -150,6 +265,3 @@ find doc/html -depth | cpio -pd $(DIST_NAME) tar czf $(DIST_NAME).tgz $(DIST_NAME) rm -rf $(DIST_NAME) - -.PHONY: FORCE -FORCE: diff -Nru libzeep-5.0.1/lib-http/GNUmakefile.in libzeep-5.0.2/lib-http/GNUmakefile.in --- libzeep-5.0.1/lib-http/GNUmakefile.in 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-http/GNUmakefile.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,223 +0,0 @@ -# Makefile for the libzeep-http library -# -# Copyright Maarten L. Hekkelman, UMC St. Radboud 2008-2013. -# Copyright Maarten L. Hekkelman, 2014-2019 -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -.PHONY: firstTarget -firstTarget: lib - -CXX = @CXX@ -CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ -LDFLAGS = @LDFLAGS@ @LIBS@ @BOOST_LDFLAGS@ -LIBS = @LIBS@ - -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir = @libdir@ -includedir = @includedir@ -pkgconfigdir = $(libdir)/pkgconfig - -LIB_NAME = @PACKAGE_NAME@-http -LIB_TARGET = $(LIB_NAME).la -STAGE_DIR = @abs_top_builddir@/lib - -ifneq "$(CHECK_CONFIG)" "" - -GNUmakefile: ../config.status GNUmakefile.in - cd ..; $(SHELL) ./config.status - -../config.status: ../configure - cd ..; $(SHELL) ./config.status --recheck - -../configure: ../configure.ac - cd ..; autoconf - -endif - -# libtool stuff - -LIBTOOL = $(SHELL) @abs_top_builddir@/libtool -CXXCOMPILE = $(LIBTOOL) --silent --tag=CXX --mode=compile $(CXX) $(CXXFLAGS) -CXXLINK = $(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ - -# main build variables -CXXFLAGS += -I. -pthread -I ../include/ -WARNINGS += all no-multichar - -# Use the DEBUG flag to build debug versions of the code -DEBUG = @DEBUG@ - -# resource support? -MRC = @MRC@ -ifneq "x$(MRC)" "x" -USE_RSRC = 1 -DEFINES += USE_RSRC WEBAPP_USES_RESOURCES -else -USE_RSRC = 0 -endif - -ifeq "$(DEBUG)" "1" -DEFINES += DEBUG -CXXFLAGS += -g -O0 -LDFLAGS += -g -else -CXXFLAGS += -O3 -DEFINES += NDEBUG -endif - -# targets - -VPATH += src:test - -CXXFLAGS += $(DEFINES:%=-D%) -CXXFLAGS += $(WARNINGS:%=-W%) - -OBJDIR = obj -ifeq "$(DEBUG)" "1" - OBJDIR := $(OBJDIR).dbg -endif - -$(OBJDIR): - mkdir -p $(OBJDIR) - -OBJECTS = \ - $(OBJDIR)/connection.lo \ - $(OBJDIR)/controller.lo \ - $(OBJDIR)/crypto.lo \ - $(OBJDIR)/daemon.lo \ - $(OBJDIR)/el-processing.lo \ - $(OBJDIR)/error-handler.lo \ - $(OBJDIR)/format.lo \ - $(OBJDIR)/glob.lo \ - $(OBJDIR)/html-controller.lo \ - $(OBJDIR)/login-controller.lo \ - $(OBJDIR)/message-parser.lo \ - $(OBJDIR)/preforked-server.lo \ - $(OBJDIR)/reply.lo \ - $(OBJDIR)/request.lo \ - $(OBJDIR)/rest-controller.lo \ - $(OBJDIR)/security.lo \ - $(OBJDIR)/server.lo \ - $(OBJDIR)/soap-controller.lo \ - $(OBJDIR)/tag-processor-v2.lo \ - $(OBJDIR)/tag-processor.lo \ - $(OBJDIR)/template-processor.lo - -ifneq ($(USE_RSRC),0) -OBJECTS += \ - $(OBJDIR)/controller-rsrc.lo - -$(OBJDIR)/controller-rsrc.lo: src/mrsrc.h - -src/mrsrc.h: - $(MRC) --header > $@ - -endif - -HEADERS = \ - zeep/value-serializer.hpp \ - zeep/streambuf.hpp \ - zeep/crypto.hpp \ - zeep/config.hpp \ - zeep/exception.hpp \ - zeep/unicode-support.hpp \ - zeep/type-traits.hpp \ - zeep/http/message-parser.hpp \ - zeep/http/daemon.hpp \ - zeep/http/rest-controller.hpp \ - zeep/http/header.hpp \ - zeep/http/preforked-server.hpp \ - zeep/http/connection.hpp \ - zeep/http/reply.hpp \ - zeep/http/request.hpp \ - zeep/http/soap-controller.hpp \ - zeep/http/error-handler.hpp \ - zeep/http/html-controller.hpp \ - zeep/http/tag-processor.hpp \ - zeep/http/template-processor.hpp \ - zeep/http/controller.hpp \ - zeep/http/login-controller.hpp \ - zeep/http/security.hpp \ - zeep/http/server.hpp \ - zeep/http/el-processing.hpp - -$(LIB_TARGET): $(OBJECTS) - $(CXXLINK) -rpath $(libdir) $(OBJECTS) $(LIBS) - $(LIBTOOL) --mode=install cp $@ $(STAGE_DIR) - -.PHONY: lib -lib: $(LIB_TARGET) - -.PHONY: install-lib -install-lib: lib - install -d $(libdir) - $(LIBTOOL) --mode=install install $(LIB_TARGET) $(libdir) - -.PHONY: install-dev -install-dev: - install -d $(includedir)/zeep/http - for f in $(HEADERS); do install ../include/$$f $(includedir)/$$f; done - install -d $(pkgconfigdir) - install -m 644 $(LIB_NAME).pc $(pkgconfigdir)/$(LIB_NAME).pc - -.PHONY: install -install: install-lib install-dev - --include $(OBJECTS:%.lo=%.d) - -$(OBJDIR)/%.lo: %.cpp | $(OBJDIR) - @ echo ">>" $< - @ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< - -$(OBJDIR)/%.o: %.cpp | $(OBJDIR) - @ echo ">>" $< - @ $(CXX) $(CXXFLAGS) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< - -.PHONY: clean -clean: - rm -rf $(OBJDIR)/* $(LIB_TARGET) ../$(LIB_NAME)* ../.libs/$(LIB_NAME)* ../lib/$(LIB_NAME)* $(LIB_NAME).pc - -$(OBJDIR)/test_rsrc.o: test/fragment-file.xhtml src/mrsrc.h - $(MRC) -o $@ $< - -http_PARAMS = -webapp_PARAMS = - -ZEEP_LIBS = http xml json -BOOST_LIBS = system - -define TEST_template = - --include $$(OBJDIR)/$(1)-test.d - -$(1)_OBJECTS = $$(OBJDIR)/client-test-code.o $$(OBJDIR)/$(1)-test.o - -ifneq ($(USE_RSRC),0) -$(1)_OBJECTS += $$(OBJDIR)/test_rsrc.o -endif - -test/$(1)-test: lib $$($(1)_OBJECTS) - @ echo ">>> building $(1)-test" - @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) -L../lib $(ZEEP_LIBS:%=-lzeep-%) $(BOOST_LIBS:%=-lboost_%) $(LIBS) - -.PHONY: $(1)-test -$(1)-test: test/$(1)-test - cd test; ./$(1)-test $$($(1)_PARAMS) - -endef - -TESTS = crypto http processor webapp soap rest - -ifneq ($(USE_RSRC),0) -TESTS += rsrc_webapp -endif - -$(foreach part,$(TESTS),$(eval $(call TEST_template,$(part)))) - -.PHONY: test -test: $(TESTS:%=%-test) - -FORCE: diff -Nru libzeep-5.0.1/lib-http/libzeep-http.pc.in libzeep-5.0.2/lib-http/libzeep-http.pc.in --- libzeep-5.0.1/lib-http/libzeep-http.pc.in 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-http/libzeep-http.pc.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: libzeep-xml -Description: C++ library for building web applications -Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -lzeep-http -lzeep-json -lzeep-xml -Cflags: -I${includedir} diff -Nru libzeep-5.0.1/lib-http/src/controller-rsrc.cpp libzeep-5.0.2/lib-http/src/controller-rsrc.cpp --- libzeep-5.0.1/lib-http/src/controller-rsrc.cpp 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-http/src/controller-rsrc.cpp 2020-11-14 07:31:12.000000000 +0000 @@ -9,11 +9,415 @@ #include #include -#include "mrsrc.h" - namespace ba = boost::algorithm; namespace fs = std::filesystem; +// -------------------------------------------------------------------- +// We have a special, private version of mrsrc here. To be able to create +// shared libraries and still be able to link when there's no mrc used. + +namespace mrsrc +{ + /// \brief Internal data structure as generated by mrc + struct rsrc_imp + { + unsigned int m_next; + unsigned int m_child; + unsigned int m_name; + unsigned int m_size; + unsigned int m_data; + }; +} + +extern const __attribute__((weak)) mrsrc::rsrc_imp gResourceIndex[]; +extern const __attribute__((weak)) char gResourceData[]; +extern const __attribute__((weak)) char gResourceName[]; + +namespace mrsrc +{ + class rsrc_data + { + public: + static rsrc_data& instance() + { + static rsrc_data s_instance; + return s_instance; + } + + const rsrc_imp* index() const { return m_index; } + + const char* data(unsigned int offset) const + { + return m_data + offset; + } + + const char* name(unsigned int offset) const + { + return m_name + offset; + } + + private: + + rsrc_data() + { + if (gResourceIndex and gResourceIndex and gResourceName) + { + m_index = gResourceIndex; + m_data = gResourceData; + m_name = gResourceName; + } + } + + rsrc_imp m_dummy = {}; + const rsrc_imp* m_index = &m_dummy; + const char* m_data = ""; + const char* m_name = ""; + }; + + /// \brief Class mrsrc::rsrc contains a pointer to the data in the + /// resource, as well as offering an iterator interface to its + /// children. + + class rsrc + { + public: + + rsrc() : m_impl(rsrc_data::instance().index()) {} + + rsrc(const rsrc& other) + : m_impl(other.m_impl) {} + + rsrc& operator=(const rsrc& other) + { + m_impl = other.m_impl; + return *this; + } + + rsrc(std::filesystem::path path); + + std::string name() const { return rsrc_data::instance().name(m_impl->m_name); } + + const char* data() const { return rsrc_data::instance().data(m_impl->m_data); } + + unsigned long size() const { return m_impl->m_size; } + + explicit operator bool() const { return m_impl != NULL and m_impl->m_size > 0; } + + template + class iterator_t + { + public: + + using iterator_category = std::input_iterator_tag; + using value_type = RSRC; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + iterator_t(const rsrc_imp* cur) + : m_cur(cur) {} + + iterator_t(const iterator_t& i) + : m_cur(i.m_cur) + { + } + + iterator_t& operator=(const iterator_t& i) + { + m_cur = i.m_cur; + return *this; + } + + reference operator*() { return m_cur; } + pointer operator->() { return& m_cur; } + + iterator_t& operator++() + { + if (m_cur.m_impl->m_next) + m_cur.m_impl = rsrc_data::instance().index() + m_cur.m_impl->m_next; + else + m_cur.m_impl = nullptr; + return *this; + } + + iterator_t operator++(int) + { + auto tmp(*this); + this->operator++(); + return tmp; + } + + bool operator==(const iterator_t& rhs) const { return m_cur.m_impl == rhs.m_cur.m_impl; } + bool operator!=(const iterator_t& rhs) const { return m_cur.m_impl != rhs.m_cur.m_impl; } + + private: + value_type m_cur; + }; + + using iterator = iterator_t; + + iterator begin() const + { + const rsrc_imp* impl = nullptr; + if (m_impl and m_impl->m_child) + impl = rsrc_data::instance().index() + m_impl->m_child; + return iterator(impl); + } + + iterator end() const + { + return iterator(nullptr); + } + + private: + rsrc(const rsrc_imp* imp) + : m_impl(imp) {} + + const rsrc_imp *m_impl; + }; + + inline rsrc::rsrc(std::filesystem::path p) + { + m_impl = rsrc_data::instance().index(); + + // using std::filesytem::path would have been natural here of course... + + auto pb = p.begin(); + auto pe = p.end(); + + while (m_impl != nullptr and pb != pe) + { + auto name = *pb++; + + const rsrc_imp* impl = nullptr; + for (rsrc child: *this) + { + if (child.name() == name) + { + impl = child.m_impl; + break; + } + } + + m_impl = impl; + } + + if (pb != pe) // not found + m_impl = nullptr; + } + + // -------------------------------------------------------------------- + + template + class basic_streambuf : public std::basic_streambuf + { + public: + + typedef CharT char_type; + typedef Traits traits_type; + typedef typename traits_type::int_type int_type; + typedef typename traits_type::pos_type pos_type; + typedef typename traits_type::off_type off_type; + + /// \brief constructor taking a \a path to the resource in memory + basic_streambuf(const std::string& path) + : m_rsrc(path) + { + init(); + } + + /// \brief constructor taking a \a rsrc + basic_streambuf(const rsrc& rsrc) + : m_rsrc(rsrc) + { + init(); + } + + basic_streambuf(const basic_streambuf&) = delete; + + basic_streambuf(basic_streambuf&& rhs) + : basic_streambuf(rhs.m_rsrc) + { + } + + basic_streambuf& operator=(const basic_streambuf&) = delete; + + basic_streambuf& operator=(basic_streambuf&& rhs) + { + swap(rhs); + return *this; + } + + void swap(basic_streambuf& rhs) + { + std::swap(m_begin, rhs.m_begin); + std::swap(m_end, rhs.m_end); + std::swap(m_current, rhs.m_current); + } + + private: + + void init() + { + m_begin = reinterpret_cast(m_rsrc.data()); + m_end = reinterpret_cast(m_rsrc.data() + m_rsrc.size()); + m_current = m_begin; + } + + int_type underflow() + { + if (m_current == m_end) + return traits_type::eof(); + + return traits_type::to_int_type(*m_current); + } + + int_type uflow() + { + if (m_current == m_end) + return traits_type::eof(); + + return traits_type::to_int_type(*m_current++); + } + + int_type pbackfail(int_type ch) + { + if (m_current == m_begin or (ch != traits_type::eof() and ch != m_current[-1])) + return traits_type::eof(); + + return traits_type::to_int_type(*--m_current); + } + + std::streamsize showmanyc() + { + assert(std::less_equal()(m_current, m_end)); + return m_end - m_current; + } + + pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which) + { + switch (dir) + { + case std::ios_base::beg: + m_current = m_begin + off; + break; + + case std::ios_base::end: + m_current = m_end + off; + break; + + case std::ios_base::cur: + m_current += off; + break; + + default: + break; + } + + if (m_current < m_begin) + m_current = m_begin; + + if (m_current > m_end) + m_current = m_end; + + return m_current - m_begin; + } + + pos_type seekpos(pos_type pos, std::ios_base::openmode which) + { + m_current = m_begin + pos; + + if (m_current < m_begin) + m_current = m_begin; + + if (m_current > m_end) + m_current = m_end; + + return m_current - m_begin; + } + + private: + rsrc m_rsrc; + const char_type* m_begin; + const char_type* m_end; + const char_type* m_current; + }; + + using streambuf = basic_streambuf>; + + // -------------------------------------------------------------------- + // class mrsrc::istream + + template + class basic_istream : public std::basic_istream + { + public: + typedef CharT char_type; + typedef Traits traits_type; + typedef typename traits_type::int_type int_type; + typedef typename traits_type::pos_type pos_type; + typedef typename traits_type::off_type off_type; + + private: + + using __streambuf_type = basic_streambuf; + using __istream_type = std::basic_istream; + + __streambuf_type m_buffer; + + public: + + basic_istream(const std::string& path) + : __istream_type(&m_buffer) + , m_buffer(path) + { + this->init(&m_buffer); + } + + basic_istream(rsrc& resource) + : __istream_type(&m_buffer) + , m_buffer(resource)\ + { + this->init(&m_buffer); + } + + basic_istream(const basic_istream&) = delete; + + basic_istream(basic_istream&& rhs) + : __istream_type(std::move(rhs)) + , m_buffer(std::move(rhs.m_buffer)) + { + __istream_type::set_rdbuf(&m_buffer); + } + + basic_istream& operator=(const basic_istream& ) = delete; + + basic_istream& operator=(basic_istream&& rhs) + { + __istream_type::operator=(std::move(rhs)); + m_buffer = std::move(rhs.m_buffer); + return *this; + } + + void swap(basic_istream& rhs) + { + __istream_type::swap(rhs); + m_buffer.swap(rhs.m_buffer); + } + + __streambuf_type* rdbuf() const + { + return const_cast<__streambuf_type*>(&m_buffer); + } + }; + + using istream = basic_istream>; +} + + + +// -------------------------------------------------------------------- + namespace zeep::http { diff -Nru libzeep-5.0.1/lib-http/test/processor-test.cpp libzeep-5.0.2/lib-http/test/processor-test.cpp --- libzeep-5.0.1/lib-http/test/processor-test.cpp 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-http/test/processor-test.cpp 2020-11-14 07:31:12.000000000 +0000 @@ -8,12 +8,14 @@ using namespace std; +#define DOCROOT "./lib-http/test/" + using json = zeep::json::element; using namespace zeep::xml::literals; void process_and_compare(zeep::xml::document& a, zeep::xml::document& b, const zeep::http::scope& scope = {}) { - zeep::http::template_processor p; + zeep::http::template_processor p(DOCROOT); zeep::http::tag_processor_v2 tp; tp.process_xml(a.child(), scope, "", p); @@ -376,7 +378,7 @@ )"_xml; - zeep::http::template_processor p; + zeep::http::template_processor p(DOCROOT); zeep::http::tag_processor_v2 tp; zeep::http::request req("GET", "/", { 1, 0 }, { { "Accept-Language", "nl, en-US;q=0.7, en;q=0.3" }}, ""); @@ -411,7 +413,7 @@ )"_xml; - zeep::http::template_processor p; + zeep::http::template_processor p(DOCROOT); zeep::http::tag_processor_v2 tp; zeep::http::request req("GET", "/", { 1, 0 }, { { "Accept-Language", "da, en-US;q=0.7, en;q=0.3" }}, ""); @@ -652,7 +654,7 @@ BOOST_AUTO_TEST_CASE(test_22a) { - zeep::http::template_processor p; + zeep::http::template_processor p(DOCROOT); zeep::xml::document doc1; p.load_template("fragment-file :: frag1", doc1); diff -Nru libzeep-5.0.1/lib-json/GNUmakefile.in libzeep-5.0.2/lib-json/GNUmakefile.in --- libzeep-5.0.1/lib-json/GNUmakefile.in 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-json/GNUmakefile.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,163 +0,0 @@ -# Makefile for the libzeep-json library -# -# Copyright Maarten L. Hekkelman, UMC St. Radboud 2008-2013. -# Copyright Maarten L. Hekkelman, 2014-2019 -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - - -.PHONY: firstTarget -firstTarget: lib - -CXX = @CXX@ -CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ -LDFLAGS = @LDFLAGS@ @LIBS@ @BOOST_LDFLAGS@ -LIBS = @BOOST_DATE_TIME_LIB@ @LIBS@ - -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir = @libdir@ -includedir = @includedir@ -pkgconfigdir = $(libdir)/pkgconfig - -LIB_NAME = @PACKAGE_NAME@-json -LIB_TARGET = $(LIB_NAME).la -STAGE_DIR = @abs_top_builddir@/lib - -ifneq "$(CHECK_CONFIG)" "" - -GNUmakefile: ../config.status GNUmakefile.in - cd ..; $(SHELL) ./config.status - -../config.status: ../configure - cd ..; $(SHELL) ./config.status --recheck - -../configure: ../configure.ac - cd ..; autoconf - -endif - -# libtool stuff - -LIBTOOL = $(SHELL) @abs_top_builddir@/libtool -CXXCOMPILE = $(LIBTOOL) --silent --tag=CXX --mode=compile $(CXX) $(CXXFLAGS) -CXXLINK = $(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ - -# main build variables -CXXFLAGS += -I. -pthread -I ../include/ -WARNINGS += all no-multichar - -# Use the DEBUG flag to build debug versions of the code -DEBUG = @DEBUG@ - -ifeq "$(DEBUG)" "1" -DEFINES += DEBUG -CXXFLAGS += -g -O0 -LDFLAGS += -g -else -CXXFLAGS += -O3 -DEFINES += NDEBUG -endif - -# targets - -VPATH += src:test - -CXXFLAGS += $(DEFINES:%=-D%) -CXXFLAGS += $(WARNINGS:%=-W%) - -OBJDIR = obj -ifeq "$(DEBUG)" "1" - OBJDIR := $(OBJDIR).dbg -endif - -$(OBJDIR): - mkdir -p $(OBJDIR) - -OBJECTS = \ - $(OBJDIR)/element.lo \ - $(OBJDIR)/parser.lo - -HEADERS = \ - zeep/value-serializer.hpp \ - zeep/config.hpp \ - zeep/exception.hpp \ - zeep/nvp.hpp \ - zeep/unicode-support.hpp \ - zeep/type-traits.hpp \ - zeep/json/factory.hpp \ - zeep/json/iterator.hpp \ - zeep/json/element.hpp \ - zeep/json/serializer.hpp \ - zeep/json/to_element.hpp \ - zeep/json/from_element.hpp \ - zeep/json/parser.hpp \ - zeep/json/type_traits.hpp \ - zeep/json/element_fwd.hpp - -$(LIB_TARGET): $(OBJECTS) - $(CXXLINK) -rpath $(libdir) $(OBJECTS) $(LIBS) - $(LIBTOOL) --mode=install cp $@ $(STAGE_DIR) - -.PHONY: lib -lib: $(LIB_TARGET) - -.PHONY: install-lib -install-lib: lib - install -d $(libdir) - $(LIBTOOL) --mode=install install $(LIB_TARGET) $(libdir) - -.PHONY: install-dev -install-dev: - install -d $(includedir)/zeep/json - for f in $(HEADERS); do install ../include/$$f $(includedir)/$$f; done - install -d $(pkgconfigdir) - install -m 644 $(LIB_NAME).pc $(pkgconfigdir)/$(LIB_NAME).pc - -.PHONY: install -install: install-lib install-dev - --include $(OBJECTS:%.lo=%.d) - -$(OBJDIR)/%.lo: %.cpp | $(OBJDIR) - @ echo ">>" $< - @ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< - -$(OBJDIR)/%.o: %.cpp | $(OBJDIR) - @ echo ">>" $< - @ $(CXX) $(CXXFLAGS) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< - -.PHONY: clean -clean: - rm -rf $(OBJDIR)/* $(LIB_TARGET) ../$(LIB_NAME)* ../.libs/$(LIB_NAME)* ../lib/$(LIB_NAME)* $(LIB_NAME).pc - -json_PARAMS = - -ZEEP_LIBS = xml json -BOOST_LIBS = - -define TEST_template = - --include $$(OBJDIR)/$(1)-test.d - -$(1)_OBJECTS = $$(OBJDIR)/$(1)-test.o - -test/$(1)-test: lib $$($(1)_OBJECTS) - @ echo ">>> building $(1)-test" - @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) -L../lib $(ZEEP_LIBS:%=-lzeep-%) $(BOOST_LIBS:%=-lboost_%) $(LIBS) - -.PHONY: $(1)-test -$(1)-test: test/$(1)-test - cd test; ./$(1)-test $$($(1)_PARAMS) - -endef - -TESTS = json -$(foreach part,$(TESTS),$(eval $(call TEST_template,$(part)))) - -.PHONY: test -test: $(TESTS:%=%-test) - -FORCE: - diff -Nru libzeep-5.0.1/lib-json/libzeep-json.pc.in libzeep-5.0.2/lib-json/libzeep-json.pc.in --- libzeep-5.0.1/lib-json/libzeep-json.pc.in 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-json/libzeep-json.pc.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: libzeep-xml -Description: C++ json library -Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -lzeep-json -Cflags: -I${includedir} diff -Nru libzeep-5.0.1/lib-json/src/json-parser.cpp libzeep-5.0.2/lib-json/src/json-parser.cpp --- libzeep-5.0.1/lib-json/src/json-parser.cpp 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/lib-json/src/json-parser.cpp 2020-11-14 07:31:12.000000000 +0000 @@ -0,0 +1,664 @@ +// Copyright Maarten L. Hekkelman, Radboud University 2008-2013. +// Copyright Maarten L. Hekkelman, 2014-2020 +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#include + +#include +#include + +#include + +namespace zeep::json +{ + +// -------------------------------------------------------------------- + +class json_parser +{ + public: + json_parser(std::istream& is) + : m_is(is) + { + } + + void parse(json::element& object); + + private: + + enum class token_t : uint8_t + { + Eof, + LeftBrace, RightBrace, + LeftBracket, RightBracket, + Comma, + Colon, + String, + Integer, + Number, + True, + False, + Null, + Undef + }; + + std::string describe_token(token_t t) const + { + switch (t) + { + case token_t::Eof: return "end of data"; + case token_t::LeftBrace: return "left brace ('{')"; + case token_t::RightBrace: return "richt brace ('}')"; + case token_t::LeftBracket: return "left bracket ('[')"; + case token_t::RightBracket: return "right bracket (']')"; + case token_t::Comma: return "comma"; + case token_t::Colon: return "colon"; + case token_t::String: return "string"; + case token_t::Integer: return "integer"; + case token_t::Number: return "number"; + case token_t::True: return "true"; + case token_t::False: return "false"; + case token_t::Null: return "null"; + case token_t::Undef: return "undefined token"; + default: assert(false); return "???"; + } + } + + void match(token_t expected); + + void parse_value(json::element& e); + void parse_object(json::element& e); + void parse_array(json::element& e); + + uint8_t get_next_byte(); + unicode get_next_unicode(); + unicode get_next_char(); + void retract(); + + token_t get_next_token(); + + std::istream& m_is; + + // a minimal stack for ungetc like operations + unicode m_buffer[2]; + unicode* m_buffer_ptr = m_buffer; + + std::string m_token; + double m_token_float; + int64_t m_token_int; + token_t m_lookahead; +}; + +uint8_t json_parser::get_next_byte() +{ + int result = m_is.rdbuf()->sbumpc(); + + if (result == std::streambuf::traits_type::eof()) + result = 0; + + return static_cast(result); +} + +unicode json_parser::get_next_unicode() +{ + unicode result = get_next_byte(); + + if (result & 0x080) + { + unsigned char ch[3]; + + if ((result & 0x0E0) == 0x0C0) + { + ch[0] = get_next_byte(); + if ((ch[0] & 0x0c0) != 0x080) + throw std::runtime_error("Invalid utf-8"); + result = ((result & 0x01F) << 6) | (ch[0] & 0x03F); + } + else if ((result & 0x0F0) == 0x0E0) + { + ch[0] = get_next_byte(); + ch[1] = get_next_byte(); + if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080) + throw std::runtime_error("Invalid utf-8"); + result = ((result & 0x00F) << 12) | ((ch[0] & 0x03F) << 6) | (ch[1] & 0x03F); + } + else if ((result & 0x0F8) == 0x0F0) + { + ch[0] = get_next_byte(); + ch[1] = get_next_byte(); + ch[2] = get_next_byte(); + if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080 or (ch[2] & 0x0c0) != 0x080) + throw std::runtime_error("Invalid utf-8"); + result = ((result & 0x007) << 18) | ((ch[0] & 0x03F) << 12) | ((ch[1] & 0x03F) << 6) | (ch[2] & 0x03F); + + if (result > 0x10ffff) + throw std::runtime_error("invalid utf-8 character (out of range)"); + } + } + + return result; +} + +unicode json_parser::get_next_char() +{ + unicode result = 0; + + if (m_buffer_ptr > m_buffer) // if buffer is not empty we already did all the validity checks + result = *--m_buffer_ptr; + else + { + result = get_next_unicode(); + + if (result >= 0x080) + { + if (result == 0x0ffff or result == 0x0fffe) + throw std::runtime_error("character " + to_hex(result) + " is not allowed"); + + // surrogate support + else if (result >= 0x0D800 and result <= 0x0DBFF) + { + unicode uc2 = get_next_char(); + if (uc2 >= 0x0DC00 and uc2 <= 0x0DFFF) + result = (result - 0x0D800) * 0x400 + (uc2 - 0x0DC00) + 0x010000; + else + throw std::runtime_error("leading surrogate character without trailing surrogate character"); + } + else if (result >= 0x0DC00 and result <= 0x0DFFF) + throw std::runtime_error("trailing surrogate character without a leading surrogate"); + } + } + + // append(m_token, result); + // somehow, append refuses to inline, so we have to do it ourselves + if (result < 0x080) + m_token += (static_cast(result)); + else if (result < 0x0800) + { + char ch[2] = { + static_cast(0x0c0 | (result >> 6)), + static_cast(0x080 | (result & 0x3f))}; + m_token.append(ch, 2); + } + else if (result < 0x00010000) + { + char ch[3] = { + static_cast(0x0e0 | (result >> 12)), + static_cast(0x080 | ((result >> 6) & 0x3f)), + static_cast(0x080 | (result & 0x3f))}; + m_token.append(ch, 3); + } + else + { + char ch[4] = { + static_cast(0x0f0 | (result >> 18)), + static_cast(0x080 | ((result >> 12) & 0x3f)), + static_cast(0x080 | ((result >> 6) & 0x3f)), + static_cast(0x080 | (result & 0x3f))}; + m_token.append(ch, 4); + } + + return result; +} + +void json_parser::retract() +{ + assert(not m_token.empty()); + *m_buffer_ptr++ = pop_last_char(m_token); +} + +auto json_parser::get_next_token() -> token_t +{ + enum class state_t + { + Start, + Negative, + Zero, + Number, + NumberFraction, + NumberExpSign, + NumberExpDigit1, + NumberExpDigit2, + Literal, + String, + Escape, + EscapeHex1, + EscapeHex2, + EscapeHex3, + EscapeHex4 + } state = state_t::Start; + + token_t token = token_t::Undef; + double fraction = 1.0, exponent = 1; + bool negative = false, negativeExp = false; + + unicode hx; + + m_token.clear(); + + while (token == token_t::Undef) + { + unicode ch = get_next_char(); + + switch (state) + { + case state_t::Start: + switch (ch) + { + case 0: + token = token_t::Eof; + break; + case '{': + token = token_t::LeftBrace; + break; + case '}': + token = token_t::RightBrace; + break; + case '[': + token = token_t::LeftBracket; + break; + case ']': + token = token_t::RightBracket; + break; + case ',': + token = token_t::Comma; + break; + case ':': + token = token_t::Colon; + break; + case ' ': + case '\n': + case '\r': + case '\t': + m_token.clear(); + break; + case '"': + m_token.pop_back(); + state = state_t::String; + break; + case '-': + state = state_t::Negative; + break; + default: + if (ch == '0') + { + state = state_t::Zero; + m_token_int = 0; + } + else if (ch >= '1' and ch <= '9') + { + m_token_int = ch - '0'; + state = state_t::Number; + } + else if (ch < 128 and std::isalpha(ch)) + state = state_t::Literal; + else + throw zeep::exception("invalid character (" + (std::isprint(ch) ? std::string(1, ch) :to_hex(ch)) + ") in json"); + } + break; + + case state_t::Negative: + if (ch == '0') + { + state = state_t::Zero; + negative = true; + } + else if (ch >= '1' and ch <= '9') + { + state = state_t::Number; + m_token_int = ch - '0'; + negative = true; + } + else + throw zeep::exception("invalid character '-' in json"); + break; + + case state_t::Zero: +#if DISALLOW_LEADING_ZERO + if ((ch >= '0' and ch <= '9') or ch == '.') + throw zeep::exception("invalid number in json, should not start with zero"); +#else + if (ch >= '0' and ch <= '9') + throw zeep::exception("invalid number in json, should not start with zero"); + else if (ch == '.') + { + m_token_float = m_token_int = 0; + fraction = 0.1; + state = state_t::NumberFraction; + } +#endif + else + { + retract(); + m_token_int = 0; + token = token_t::Integer; + } + break; + + case state_t::Number: + if (ch >= '0' and ch <= '9') + m_token_int = 10 * m_token_int + (ch - '0'); + else if (ch == '.') + { + m_token_float = m_token_int; + fraction = 0.1; + state = state_t::NumberFraction; + } + else if (ch == 'e' or ch == 'E') + { + m_token_float = m_token_int; + state = state_t::NumberExpSign; + } + else + { + retract(); + token = token_t::Integer; + if (negative) + m_token_int = -m_token_int; + } + break; + + case state_t::NumberFraction: + if (ch >= '0' and ch <= '9') + { + m_token_float += fraction * (ch - '0'); + fraction /= 10; + } + else if (ch == 'e' or ch == 'E') + state = state_t::NumberExpSign; + else + { + retract(); + token = token_t::Number; + if (negative) + m_token_float = -m_token_float; + } + break; + + case state_t::NumberExpSign: + if (ch == '+') + state = state_t::NumberExpDigit1; + else if (ch == '-') + { + negativeExp = true; + state = state_t::NumberExpDigit1; + } + else if (ch >= '0' and ch <= '9') + { + exponent = (ch - '0'); + state = state_t::NumberExpDigit2; + } + break; + + case state_t::NumberExpDigit1: + if (ch >= '0' and ch <= '9') + { + exponent = (ch - '0'); + state = state_t::NumberExpDigit2; + } + else + throw zeep::exception("invalid floating point format in json"); + break; + + case state_t::NumberExpDigit2: + if (ch >= '0' and ch <= '9') + exponent = 10 * exponent + (ch - '0'); + else + { + retract(); + m_token_float *= std::pow(10, (negativeExp ? -1 : 1) * exponent); + if (negative) + m_token_float = -m_token_float; + token = token_t::Number; + } + break; + + case state_t::Literal: + if (ch > 128 or not std::isalpha(ch)) + { + retract(); + if (m_token == "true") + token = token_t::True; + else if (m_token == "false") + token = token_t::False; + else if (m_token == "null") + token = token_t::Null; + else + throw zeep::exception("Invalid literal found in json: " + m_token); + } + break; + + case state_t::String: + if (ch == '\"') + { + token = token_t::String; + m_token.pop_back(); + } + else if (ch == 0) + throw zeep::exception("Invalid unterminated string in json"); + else if (ch == '\\') + { + state = state_t::Escape; + m_token.pop_back(); + } + break; + + case state_t::Escape: + switch (ch) + { + case '"': + case '\\': + case '/': + break; + + case 'n': m_token.back() = '\n'; break; + case 't': m_token.back() = '\t'; break; + case 'r': m_token.back() = '\r'; break; + case 'f': m_token.back() = '\f'; break; + case 'b': m_token.back() = '\b'; break; + + case 'u': + state = state_t::EscapeHex1; + m_token.pop_back(); + break; + + default: + throw zeep::exception("Invalid escape sequence in json (\\" + std::string{static_cast(ch)} + ')'); + } + if (state == state_t::Escape) + state = state_t::String; + break; + + case state_t::EscapeHex1: + if (ch >= 0 and ch <= '9') + hx = ch - '0'; + else if (ch >= 'a' and ch <= 'f') + hx = 10 + ch - 'a'; + else if (ch >= 'A' and ch <= 'F') + hx = 10 + ch - 'A'; + else + throw zeep::exception("Invalid hex sequence in json"); + m_token.pop_back(); + state = state_t::EscapeHex2; + break; + + case state_t::EscapeHex2: + if (ch >= 0 and ch <= '9') + hx = 16 * hx + ch - '0'; + else if (ch >= 'a' and ch <= 'f') + hx = 16 * hx + 10 + ch - 'a'; + else if (ch >= 'A' and ch <= 'F') + hx = 16 * hx + 10 + ch - 'A'; + else + throw zeep::exception("Invalid hex sequence in json"); + m_token.pop_back(); + state = state_t::EscapeHex3; + break; + + case state_t::EscapeHex3: + if (ch >= 0 and ch <= '9') + hx = 16 * hx + ch - '0'; + else if (ch >= 'a' and ch <= 'f') + hx = 16 * hx + 10 + ch - 'a'; + else if (ch >= 'A' and ch <= 'F') + hx = 16 * hx + 10 + ch - 'A'; + else + throw zeep::exception("Invalid hex sequence in json"); + m_token.pop_back(); + state = state_t::EscapeHex4; + break; + + case state_t::EscapeHex4: + if (ch >= 0 and ch <= '9') + hx = 16 * hx + ch - '0'; + else if (ch >= 'a' and ch <= 'f') + hx = 16 * hx + 10 + ch - 'a'; + else if (ch >= 'A' and ch <= 'F') + hx = 16 * hx + 10 + ch - 'A'; + else + throw zeep::exception("Invalid hex sequence in json"); + m_token.pop_back(); + append(m_token, hx); + state = state_t::String; + break; + } + } + + return token; +} + +void json_parser::match(token_t expected) +{ + if (m_lookahead != expected) + throw zeep::exception("Syntax error in json, expected " + describe_token(expected) + " but found " + describe_token(m_lookahead)); + + m_lookahead = get_next_token(); +} + +void json_parser::parse_value(json::element& e) +{ + switch (m_lookahead) + { + case token_t::Eof: + break; + + case token_t::Null: + match(m_lookahead); + break; + + case token_t::False: + match(m_lookahead); + e = false; + break; + + case token_t::True: + match(m_lookahead); + e = true; + break; + + case token_t::Integer: + match(m_lookahead); + e = m_token_int; + break; + + case token_t::Number: + match(m_lookahead); + e = m_token_float; + break; + + case token_t::LeftBrace: + match(m_lookahead); + parse_object(e); + match(token_t::RightBrace); + break; + + case token_t::LeftBracket: + match(m_lookahead); + parse_array(e); + match(token_t::RightBracket); + break; + + case token_t::String: + e = m_token; + match(m_lookahead); + break; + + default: + throw std::runtime_error("Syntax error in json, unexpected token " + describe_token(m_lookahead)); + } +} + +void json_parser::parse_object(json::element& e) +{ + for (;;) + { + if (m_lookahead == token_t::RightBrace or m_lookahead == token_t::Eof) + break; + + auto name = m_token; + match(token_t::String); + match(token_t::Colon); + + json::element v; + parse_value(v); + e.emplace(name, v); + + if (m_lookahead != token_t::Comma) + break; + + match(m_lookahead); + } +} + +void json_parser::parse_array(json::element& e) +{ + for (;;) + { + if (m_lookahead == token_t::RightBracket or m_lookahead == token_t::Eof) + break; + + json::element v; + parse_value(v); + e.emplace_back(v); + + if (m_lookahead != token_t::Comma) + break; + + match(m_lookahead); + } +} + +void json_parser::parse(json::element& obj) +{ + m_lookahead = get_next_token(); + parse_value(obj); + if (m_lookahead != token_t::Eof) + throw zeep::exception("Extraneaous data after parsing json"); +} + +void parse_json(const std::string& json, element& object) +{ + std::istringstream s(json); + json_parser p(s); + + p.parse(object); +} + +void parse_json(std::istream& is, element& object) +{ + json_parser p(is); + p.parse(object); +} + +namespace literals +{ +element operator""_json(const char* s, size_t n) +{ + element result; + parse_json(std::string{s, n}, result); + return result; +} +} + +} diff -Nru libzeep-5.0.1/lib-json/src/parser.cpp libzeep-5.0.2/lib-json/src/parser.cpp --- libzeep-5.0.1/lib-json/src/parser.cpp 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-json/src/parser.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,664 +0,0 @@ -// Copyright Maarten L. Hekkelman, Radboud University 2008-2013. -// Copyright Maarten L. Hekkelman, 2014-2020 -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include - -#include -#include - -#include - -namespace zeep::json -{ - -// -------------------------------------------------------------------- - -class json_parser -{ - public: - json_parser(std::istream& is) - : m_is(is) - { - } - - void parse(json::element& object); - - private: - - enum class token_t : uint8_t - { - Eof, - LeftBrace, RightBrace, - LeftBracket, RightBracket, - Comma, - Colon, - String, - Integer, - Number, - True, - False, - Null, - Undef - }; - - std::string describe_token(token_t t) const - { - switch (t) - { - case token_t::Eof: return "end of data"; - case token_t::LeftBrace: return "left brace ('{')"; - case token_t::RightBrace: return "richt brace ('}')"; - case token_t::LeftBracket: return "left bracket ('[')"; - case token_t::RightBracket: return "right bracket (']')"; - case token_t::Comma: return "comma"; - case token_t::Colon: return "colon"; - case token_t::String: return "string"; - case token_t::Integer: return "integer"; - case token_t::Number: return "number"; - case token_t::True: return "true"; - case token_t::False: return "false"; - case token_t::Null: return "null"; - case token_t::Undef: return "undefined token"; - default: assert(false); return "???"; - } - } - - void match(token_t expected); - - void parse_value(json::element& e); - void parse_object(json::element& e); - void parse_array(json::element& e); - - uint8_t get_next_byte(); - unicode get_next_unicode(); - unicode get_next_char(); - void retract(); - - token_t get_next_token(); - - std::istream& m_is; - - // a minimal stack for ungetc like operations - unicode m_buffer[2]; - unicode* m_buffer_ptr = m_buffer; - - std::string m_token; - double m_token_float; - int64_t m_token_int; - token_t m_lookahead; -}; - -uint8_t json_parser::get_next_byte() -{ - int result = m_is.rdbuf()->sbumpc(); - - if (result == std::streambuf::traits_type::eof()) - result = 0; - - return static_cast(result); -} - -unicode json_parser::get_next_unicode() -{ - unicode result = get_next_byte(); - - if (result & 0x080) - { - unsigned char ch[3]; - - if ((result & 0x0E0) == 0x0C0) - { - ch[0] = get_next_byte(); - if ((ch[0] & 0x0c0) != 0x080) - throw std::runtime_error("Invalid utf-8"); - result = ((result & 0x01F) << 6) | (ch[0] & 0x03F); - } - else if ((result & 0x0F0) == 0x0E0) - { - ch[0] = get_next_byte(); - ch[1] = get_next_byte(); - if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080) - throw std::runtime_error("Invalid utf-8"); - result = ((result & 0x00F) << 12) | ((ch[0] & 0x03F) << 6) | (ch[1] & 0x03F); - } - else if ((result & 0x0F8) == 0x0F0) - { - ch[0] = get_next_byte(); - ch[1] = get_next_byte(); - ch[2] = get_next_byte(); - if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080 or (ch[2] & 0x0c0) != 0x080) - throw std::runtime_error("Invalid utf-8"); - result = ((result & 0x007) << 18) | ((ch[0] & 0x03F) << 12) | ((ch[1] & 0x03F) << 6) | (ch[2] & 0x03F); - - if (result > 0x10ffff) - throw std::runtime_error("invalid utf-8 character (out of range)"); - } - } - - return result; -} - -unicode json_parser::get_next_char() -{ - unicode result = 0; - - if (m_buffer_ptr > m_buffer) // if buffer is not empty we already did all the validity checks - result = *--m_buffer_ptr; - else - { - result = get_next_unicode(); - - if (result >= 0x080) - { - if (result == 0x0ffff or result == 0x0fffe) - throw std::runtime_error("character " + to_hex(result) + " is not allowed"); - - // surrogate support - else if (result >= 0x0D800 and result <= 0x0DBFF) - { - unicode uc2 = get_next_char(); - if (uc2 >= 0x0DC00 and uc2 <= 0x0DFFF) - result = (result - 0x0D800) * 0x400 + (uc2 - 0x0DC00) + 0x010000; - else - throw std::runtime_error("leading surrogate character without trailing surrogate character"); - } - else if (result >= 0x0DC00 and result <= 0x0DFFF) - throw std::runtime_error("trailing surrogate character without a leading surrogate"); - } - } - - // append(m_token, result); - // somehow, append refuses to inline, so we have to do it ourselves - if (result < 0x080) - m_token += (static_cast(result)); - else if (result < 0x0800) - { - char ch[2] = { - static_cast(0x0c0 | (result >> 6)), - static_cast(0x080 | (result & 0x3f))}; - m_token.append(ch, 2); - } - else if (result < 0x00010000) - { - char ch[3] = { - static_cast(0x0e0 | (result >> 12)), - static_cast(0x080 | ((result >> 6) & 0x3f)), - static_cast(0x080 | (result & 0x3f))}; - m_token.append(ch, 3); - } - else - { - char ch[4] = { - static_cast(0x0f0 | (result >> 18)), - static_cast(0x080 | ((result >> 12) & 0x3f)), - static_cast(0x080 | ((result >> 6) & 0x3f)), - static_cast(0x080 | (result & 0x3f))}; - m_token.append(ch, 4); - } - - return result; -} - -void json_parser::retract() -{ - assert(not m_token.empty()); - *m_buffer_ptr++ = pop_last_char(m_token); -} - -auto json_parser::get_next_token() -> token_t -{ - enum class state_t - { - Start, - Negative, - Zero, - Number, - NumberFraction, - NumberExpSign, - NumberExpDigit1, - NumberExpDigit2, - Literal, - String, - Escape, - EscapeHex1, - EscapeHex2, - EscapeHex3, - EscapeHex4 - } state = state_t::Start; - - token_t token = token_t::Undef; - double fraction = 1.0, exponent = 1; - bool negative = false, negativeExp = false; - - unicode hx; - - m_token.clear(); - - while (token == token_t::Undef) - { - unicode ch = get_next_char(); - - switch (state) - { - case state_t::Start: - switch (ch) - { - case 0: - token = token_t::Eof; - break; - case '{': - token = token_t::LeftBrace; - break; - case '}': - token = token_t::RightBrace; - break; - case '[': - token = token_t::LeftBracket; - break; - case ']': - token = token_t::RightBracket; - break; - case ',': - token = token_t::Comma; - break; - case ':': - token = token_t::Colon; - break; - case ' ': - case '\n': - case '\r': - case '\t': - m_token.clear(); - break; - case '"': - m_token.pop_back(); - state = state_t::String; - break; - case '-': - state = state_t::Negative; - break; - default: - if (ch == '0') - { - state = state_t::Zero; - m_token_int = 0; - } - else if (ch >= '1' and ch <= '9') - { - m_token_int = ch - '0'; - state = state_t::Number; - } - else if (ch < 128 and std::isalpha(ch)) - state = state_t::Literal; - else - throw zeep::exception("invalid character (" + (std::isprint(ch) ? std::string(1, ch) :to_hex(ch)) + ") in json"); - } - break; - - case state_t::Negative: - if (ch == '0') - { - state = state_t::Zero; - negative = true; - } - else if (ch >= '1' and ch <= '9') - { - state = state_t::Number; - m_token_int = ch - '0'; - negative = true; - } - else - throw zeep::exception("invalid character '-' in json"); - break; - - case state_t::Zero: -#if DISALLOW_LEADING_ZERO - if ((ch >= '0' and ch <= '9') or ch == '.') - throw zeep::exception("invalid number in json, should not start with zero"); -#else - if (ch >= '0' and ch <= '9') - throw zeep::exception("invalid number in json, should not start with zero"); - else if (ch == '.') - { - m_token_float = m_token_int = 0; - fraction = 0.1; - state = state_t::NumberFraction; - } -#endif - else - { - retract(); - m_token_int = 0; - token = token_t::Integer; - } - break; - - case state_t::Number: - if (ch >= '0' and ch <= '9') - m_token_int = 10 * m_token_int + (ch - '0'); - else if (ch == '.') - { - m_token_float = m_token_int; - fraction = 0.1; - state = state_t::NumberFraction; - } - else if (ch == 'e' or ch == 'E') - { - m_token_float = m_token_int; - state = state_t::NumberExpSign; - } - else - { - retract(); - token = token_t::Integer; - if (negative) - m_token_int = -m_token_int; - } - break; - - case state_t::NumberFraction: - if (ch >= '0' and ch <= '9') - { - m_token_float += fraction * (ch - '0'); - fraction /= 10; - } - else if (ch == 'e' or ch == 'E') - state = state_t::NumberExpSign; - else - { - retract(); - token = token_t::Number; - if (negative) - m_token_float = -m_token_float; - } - break; - - case state_t::NumberExpSign: - if (ch == '+') - state = state_t::NumberExpDigit1; - else if (ch == '-') - { - negativeExp = true; - state = state_t::NumberExpDigit1; - } - else if (ch >= '0' and ch <= '9') - { - exponent = (ch - '0'); - state = state_t::NumberExpDigit2; - } - break; - - case state_t::NumberExpDigit1: - if (ch >= '0' and ch <= '9') - { - exponent = (ch - '0'); - state = state_t::NumberExpDigit2; - } - else - throw zeep::exception("invalid floating point format in json"); - break; - - case state_t::NumberExpDigit2: - if (ch >= '0' and ch <= '9') - exponent = 10 * exponent + (ch - '0'); - else - { - retract(); - m_token_float *= std::pow(10, (negativeExp ? -1 : 1) * exponent); - if (negative) - m_token_float = -m_token_float; - token = token_t::Number; - } - break; - - case state_t::Literal: - if (ch > 128 or not std::isalpha(ch)) - { - retract(); - if (m_token == "true") - token = token_t::True; - else if (m_token == "false") - token = token_t::False; - else if (m_token == "null") - token = token_t::Null; - else - throw zeep::exception("Invalid literal found in json: " + m_token); - } - break; - - case state_t::String: - if (ch == '\"') - { - token = token_t::String; - m_token.pop_back(); - } - else if (ch == 0) - throw zeep::exception("Invalid unterminated string in json"); - else if (ch == '\\') - { - state = state_t::Escape; - m_token.pop_back(); - } - break; - - case state_t::Escape: - switch (ch) - { - case '"': - case '\\': - case '/': - break; - - case 'n': m_token.back() = '\n'; break; - case 't': m_token.back() = '\t'; break; - case 'r': m_token.back() = '\r'; break; - case 'f': m_token.back() = '\f'; break; - case 'b': m_token.back() = '\b'; break; - - case 'u': - state = state_t::EscapeHex1; - m_token.pop_back(); - break; - - default: - throw zeep::exception("Invalid escape sequence in json (\\" + std::string{static_cast(ch)} + ')'); - } - if (state == state_t::Escape) - state = state_t::String; - break; - - case state_t::EscapeHex1: - if (ch >= 0 and ch <= '9') - hx = ch - '0'; - else if (ch >= 'a' and ch <= 'f') - hx = 10 + ch - 'a'; - else if (ch >= 'A' and ch <= 'F') - hx = 10 + ch - 'A'; - else - throw zeep::exception("Invalid hex sequence in json"); - m_token.pop_back(); - state = state_t::EscapeHex2; - break; - - case state_t::EscapeHex2: - if (ch >= 0 and ch <= '9') - hx = 16 * hx + ch - '0'; - else if (ch >= 'a' and ch <= 'f') - hx = 16 * hx + 10 + ch - 'a'; - else if (ch >= 'A' and ch <= 'F') - hx = 16 * hx + 10 + ch - 'A'; - else - throw zeep::exception("Invalid hex sequence in json"); - m_token.pop_back(); - state = state_t::EscapeHex3; - break; - - case state_t::EscapeHex3: - if (ch >= 0 and ch <= '9') - hx = 16 * hx + ch - '0'; - else if (ch >= 'a' and ch <= 'f') - hx = 16 * hx + 10 + ch - 'a'; - else if (ch >= 'A' and ch <= 'F') - hx = 16 * hx + 10 + ch - 'A'; - else - throw zeep::exception("Invalid hex sequence in json"); - m_token.pop_back(); - state = state_t::EscapeHex4; - break; - - case state_t::EscapeHex4: - if (ch >= 0 and ch <= '9') - hx = 16 * hx + ch - '0'; - else if (ch >= 'a' and ch <= 'f') - hx = 16 * hx + 10 + ch - 'a'; - else if (ch >= 'A' and ch <= 'F') - hx = 16 * hx + 10 + ch - 'A'; - else - throw zeep::exception("Invalid hex sequence in json"); - m_token.pop_back(); - append(m_token, hx); - state = state_t::String; - break; - } - } - - return token; -} - -void json_parser::match(token_t expected) -{ - if (m_lookahead != expected) - throw zeep::exception("Syntax error in json, expected " + describe_token(expected) + " but found " + describe_token(m_lookahead)); - - m_lookahead = get_next_token(); -} - -void json_parser::parse_value(json::element& e) -{ - switch (m_lookahead) - { - case token_t::Eof: - break; - - case token_t::Null: - match(m_lookahead); - break; - - case token_t::False: - match(m_lookahead); - e = false; - break; - - case token_t::True: - match(m_lookahead); - e = true; - break; - - case token_t::Integer: - match(m_lookahead); - e = m_token_int; - break; - - case token_t::Number: - match(m_lookahead); - e = m_token_float; - break; - - case token_t::LeftBrace: - match(m_lookahead); - parse_object(e); - match(token_t::RightBrace); - break; - - case token_t::LeftBracket: - match(m_lookahead); - parse_array(e); - match(token_t::RightBracket); - break; - - case token_t::String: - e = m_token; - match(m_lookahead); - break; - - default: - throw std::runtime_error("Syntax eror in json, unexpected token " + describe_token(m_lookahead)); - } -} - -void json_parser::parse_object(json::element& e) -{ - for (;;) - { - if (m_lookahead == token_t::RightBrace or m_lookahead == token_t::Eof) - break; - - auto name = m_token; - match(token_t::String); - match(token_t::Colon); - - json::element v; - parse_value(v); - e.emplace(name, v); - - if (m_lookahead != token_t::Comma) - break; - - match(m_lookahead); - } -} - -void json_parser::parse_array(json::element& e) -{ - for (;;) - { - if (m_lookahead == token_t::RightBracket or m_lookahead == token_t::Eof) - break; - - json::element v; - parse_value(v); - e.emplace_back(v); - - if (m_lookahead != token_t::Comma) - break; - - match(m_lookahead); - } -} - -void json_parser::parse(json::element& obj) -{ - m_lookahead = get_next_token(); - parse_value(obj); - if (m_lookahead != token_t::Eof) - throw zeep::exception("Extraneaous data after parsing json"); -} - -void parse_json(const std::string& json, element& object) -{ - std::istringstream s(json); - json_parser p(s); - - p.parse(object); -} - -void parse_json(std::istream& is, element& object) -{ - json_parser p(is); - p.parse(object); -} - -namespace literals -{ -element operator""_json(const char* s, size_t n) -{ - element result; - parse_json(std::string{s, n}, result); - return result; -} -} - -} diff -Nru libzeep-5.0.1/lib-xml/GNUmakefile.in libzeep-5.0.2/lib-xml/GNUmakefile.in --- libzeep-5.0.1/lib-xml/GNUmakefile.in 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-xml/GNUmakefile.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,181 +0,0 @@ -# Makefile for the libzeep-xml library -# -# Copyright Maarten L. Hekkelman, UMC St. Radboud 2008-2013. -# Copyright Maarten L. Hekkelman, 2014-2019 -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -.PHONY: firstTarget -firstTarget: lib - -CXX = @CXX@ -CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ -LDFLAGS = @LDFLAGS@ @LIBS@ @BOOST_LDFLAGS@ -LIBS = @LIBS@ - -prefix = @prefix@ -exec_prefix = @exec_prefix@ -libdir = @libdir@ -includedir = @includedir@ -pkgconfigdir = $(libdir)/pkgconfig - -LIB_NAME = @PACKAGE_NAME@-xml -LIB_TARGET = $(LIB_NAME).la -STAGE_DIR = @abs_top_builddir@/lib - -ifneq "$(CHECK_CONFIG)" "" - -GNUmakefile: ../config.status GNUmakefile.in - cd ..; $(SHELL) ./config.status - -../config.status: ../configure - cd ..; $(SHELL) ./config.status --recheck - -../configure: ../configure.ac - cd ..; autoconf - -endif - -# libtool stuff - -LIBTOOL = $(SHELL) @abs_top_builddir@/libtool -CXXCOMPILE = $(LIBTOOL) --silent --tag=CXX --mode=compile $(CXX) $(CXXFLAGS) -CXXLINK = $(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ - -# main build variables -CXXFLAGS += -I. -pthread -I ../include/ -WARNINGS += all no-multichar - -# Use the DEBUG flag to build debug versions of the code -DEBUG = @DEBUG@ - -ifeq "$(DEBUG)" "1" -DEFINES += DEBUG -CXXFLAGS += -g -O0 -LDFLAGS += -g -else -CXXFLAGS += -O3 -DEFINES += NDEBUG -endif - -# targets - -VPATH += src:test - -CXXFLAGS += $(DEFINES:%=-D%) -CXXFLAGS += $(WARNINGS:%=-W%) - -OBJDIR = obj -ifeq "$(DEBUG)" "1" - OBJDIR := $(OBJDIR).dbg -endif - -$(OBJDIR): - mkdir -p $(OBJDIR) - -OBJECTS = \ - $(OBJDIR)/character-classification.lo \ - $(OBJDIR)/doctype.lo \ - $(OBJDIR)/document.lo \ - $(OBJDIR)/node.lo \ - $(OBJDIR)/parser.lo \ - $(OBJDIR)/xpath.lo - -HEADERS = \ - zeep/config.hpp \ - zeep/value-serializer.hpp \ - zeep/streambuf.hpp \ - zeep/exception.hpp \ - zeep/nvp.hpp \ - zeep/unicode-support.hpp \ - zeep/type-traits.hpp \ - zeep/xml/serialize.hpp \ - zeep/xml/xpath.hpp \ - zeep/xml/character-classification.hpp \ - zeep/xml/doctype.hpp \ - zeep/xml/node.hpp \ - zeep/xml/document.hpp \ - zeep/xml/parser.hpp - -$(LIB_TARGET): $(OBJECTS) - $(CXXLINK) -rpath $(libdir) $(OBJECTS) $(LIBS) - $(LIBTOOL) --mode=install cp $@ $(STAGE_DIR) - -.PHONY: lib -lib: $(LIB_TARGET) - -.PHONY: install-lib -install-lib: lib - install -d $(libdir) - $(LIBTOOL) --mode=install install $(LIB_TARGET) $(libdir) - -.PHONY: install-dev -install-dev: - install -d $(includedir)/zeep/xml - for f in $(HEADERS); do install ../include/$$f $(includedir)/$$f; done - install -d $(pkgconfigdir) - install -m 644 $(LIB_NAME).pc $(pkgconfigdir)/$(LIB_NAME).pc - -.PHONY: install -install: install-lib install-dev - --include $(OBJECTS:%.lo=%.d) - -$(OBJDIR)/%.lo: %.cpp | $(OBJDIR) - @ echo ">>" $< - @ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< - -$(OBJDIR)/%.o: %.cpp | $(OBJDIR) - @ echo ">>" $< - @ $(CXX) $(CXXFLAGS) -MT $@ -MD -MP -MF $(OBJDIR)/$*.d -c -o $@ $< - -.PHONY: clean -clean: - rm -rf $(OBJDIR)/* $(LIB_TARGET) ../$(LIB_NAME)* ../.libs/$(LIB_NAME)* ../lib/$(LIB_NAME)* $(LIB_NAME).pc - -QUESTIONABLE_XML_TEST_IDS = \ - ibm-valid-P28-ibm28v02.xml \ - ibm-valid-P29-ibm29v01.xml \ - ibm-valid-P29-ibm29v02.xml \ - ibm-1-1-valid-P03-ibm03v09.xml \ - rmt-e2e-34 \ - rmt-e2e-55 \ - rmt-054 \ - rmt-ns10-006 \ - rmt-e3e-13 - -parser_PARAMS = XML-Test-Suite/xmlconf/xmlconf.xml $(QUESTIONABLE_XML_TEST_IDS:%=--questionable=%) --print-ids -xpath_PARAMS = XPath-Test-Suite/xpath-tests.xml - -ZEEP_LIBS = xml -BOOST_LIBS = program_options - -define TEST_template = - --include $$(OBJDIR)/$(1)-test.d - -$(1)_OBJECTS = $$(OBJDIR)/$(1)-test.o - -test/$(1)-test: lib $$($(1)_OBJECTS) - @ echo ">>> building $(1)-test" - @ $(CXX) -o $$@ $$($(1)_OBJECTS) $$(CFLAGS) $$(CXXFLAGS) $(LDFLAGS) -L../lib $(ZEEP_LIBS:%=-lzeep-%) $(BOOST_LIBS:%=-lboost_%) $(LIBS) - -.PHONY: $(1)-test -$(1)-test: test/$(1)-test - cd test; ./$(1)-test $$($(1)_PARAMS) - -endef - -test/XML-Test-Suite/xmlconf/xmlconf.xml: test/XML-Test-Suite.tbz - cd test; tar xf XML-Test-Suite.tbz - -parser-test: test/XML-Test-Suite/xmlconf/xmlconf.xml - -TESTS = unit parser serializer xpath -$(foreach part,$(TESTS),$(eval $(call TEST_template,$(part)))) - -.PHONY: test -test: $(TESTS:%=%-test) - -FORCE: diff -Nru libzeep-5.0.1/lib-xml/libzeep-xml.pc.in libzeep-5.0.2/lib-xml/libzeep-xml.pc.in --- libzeep-5.0.1/lib-xml/libzeep-xml.pc.in 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-xml/libzeep-xml.pc.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: libzeep-xml -Description: C++ xml library -Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -lzeep-xml -Cflags: -I${includedir} diff -Nru libzeep-5.0.1/lib-xml/src/parser.cpp libzeep-5.0.2/lib-xml/src/parser.cpp --- libzeep-5.0.1/lib-xml/src/parser.cpp 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-xml/src/parser.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,4148 +0,0 @@ -// Copyright Maarten L. Hekkelman, Radboud University 2008-2013. -// Copyright Maarten L. Hekkelman, 2014-2020 -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include - -#include - -#include -#include - -namespace ba = boost::algorithm; - -namespace zeep::xml -{ - -bool is_absolute_path(const std::string& s) -{ - bool result = false; - - if (not s.empty()) - { - if (s[0] == '/') - result = true; - else if (isalpha(s[0])) - { - std::string::const_iterator ch = s.begin() + 1; - while (ch != s.end() and isalpha(*ch)) - ++ch; - result = ch != s.end() and *ch == ':'; - } - } - - return result; -} - -// #define url_hexdigit "[[:digit:]a-fA-F]" -// #define url_unreserved "[-[:alnum:]._~]" -// #define url_pct_encoded "%" url_hexdigit "{2}" -// #define url_sub_delims "[!$&'()*+,;=]" -// #define url_userinfo "(?:((?:" url_unreserved "|" url_pct_encoded "|" url_sub_delims ")+)@)?" -// #define url_scheme "[[:alpha:]][[:alnum:]]*" -// #define url_host "(\\[(?:[[:digit:]a-fA-F:]+)\\]|(?:" url_unreserved "|" url_pct_encoded "|" url_sub_delims ")+)" -// #define url_port "(?::([[:digit:]]+))?" -// #define url_pchar url_unreserved "|" url_pct_encoded "|" url_sub_delims "|:|@" -// #define url_path "(?:/((?:" url_pchar "|\\*|\\?|/)*))?" -// #define url_fragment "(?:#(?:" url_pchar ")*)?" -// -// const std::regex kURL_rx( url_scheme "://" url_userinfo url_host url_port url_path url_fragment); - -// const std::regex kURL_rx(R"([[:alpha:]][[:alnum:]]*:.+)"); - -bool is_valid_url(const std::string& url) -{ - // no, really... - auto cp = url.find(':'); - - return cp > 1 and cp != std::string::npos and std::isalpha(url[0]); - // return std::regex_match(url, kURL_rx); -} - -// parsing XML is somewhat like macro processing, -// we can encounter entities that need to be expanded into replacement text -// and so we declare data_source objects that can be stacked. - -class data_source; - -// exception generated by data_source - -class source_exception : public zeep::exception -{ -public: - source_exception(const std::string& msg) : exception(msg), m_wmsg(msg) {} - ~source_exception() throw() {} - - std::string m_wmsg; -}; - -// A data source can have a base dir which is the directory the data came from. -// This information is needed when a relative uri is found in an external ID. - -class data_source -{ -public: - data_source(const data_source&) = delete; - data_source& operator=(const data_source&) = delete; - - data_source() - : m_base("."), m_encoding(encoding_type::UTF8), m_line_nr(1) - { - static int sNextID = 0; - m_id = sNextID++; - } - - virtual ~data_source() {} - - // data_source is a virtual base class. Derivatives need to declare the next function. - virtual unicode get_next_char() = 0; - - void base(const std::string& dir) { m_base = dir; } - const std::string& base() const { return m_base; } - - encoding_type encoding() const { return m_encoding; } - virtual void encoding(encoding_type enc) { m_encoding = enc; } - virtual bool has_bom() { return false; } - - void version(float v) { m_version = v; } - - int id() const { return m_id; } - - int line_nr() const { return m_line_nr; } - void line_nr(int l) { m_line_nr = l; } - -protected: - std::string m_base; - encoding_type m_encoding; - float m_version = 1.0f; - int m_id; // for nesting checks - int m_line_nr; // for reporting errors -}; - -// -------------------------------------------------------------------- -// An std::istream implementation of data_source. - -class istream_data_source : public data_source -{ -public: - istream_data_source(std::istream& data) - : m_data(&data), m_owns_data(false) - { - guess_encoding(); - } - - istream_data_source(std::istream *data) - : m_data(data) - { - guess_encoding(); - } - - ~istream_data_source() - { - if (m_owns_data) - delete m_data; - } - - virtual bool has_bom() { return m_has_bom; } - - virtual unicode get_next_char(); - virtual void encoding(encoding_type enc); - -private: - void guess_encoding(); - void parse_text_decl(); - unicode next_utf8_char(); - unicode next_utf16le_char(); - unicode next_utf16be_char(); - unicode next_iso88591_char(); - unicode next_ascii_char(); - - unsigned char next_byte() - { - int result = m_data->rdbuf()->sbumpc(); - - if (result == std::streambuf::traits_type::eof()) - result = 0; - - return static_cast(result); - } - - std::istream* m_data; - bool m_owns_data = true; - unicode m_char_buffer = 0; // used in detecting \r\n algorithm - - typedef unicode (istream_data_source::*next_func)(void); - - next_func m_next; - bool m_has_bom = false; -}; - -void istream_data_source::guess_encoding() -{ - // see if there is a BOM - // if there isn't, we assume the data is UTF-8 - - int ch = m_data->rdbuf()->sgetc(); - if (ch != std::streambuf::traits_type::eof()) - { - char ch1 = static_cast(ch); - - if (ch1 == char(0xfe)) - { - char ch2 = m_data->rdbuf()->snextc(); - - if (ch2 == char(0xff)) - { - m_data->rdbuf()->snextc(); - m_encoding = encoding_type::UTF16BE; - m_has_bom = true; - } - else - m_data->rdbuf()->sungetc(); - } - else if (ch1 == char(0xff)) - { - char ch2 = m_data->rdbuf()->snextc(); - - if (ch2 == char(0xfe)) - { - m_data->rdbuf()->snextc(); - m_encoding = encoding_type::UTF16LE; - m_has_bom = true; - } - else - m_data->rdbuf()->sungetc(); - } - else if (ch1 == char(0xef)) - { - char ch2 = m_data->rdbuf()->snextc(); - char ch3 = m_data->rdbuf()->snextc(); - - if (ch2 == char(0xbb) and ch3 == char(0xbf)) - { - m_data->rdbuf()->snextc(); - m_encoding = encoding_type::UTF8; - m_has_bom = true; - } - else - { - m_data->rdbuf()->sungetc(); - m_data->rdbuf()->sputbackc(ch1); - } - } - } - - encoding(m_encoding); -} - -void istream_data_source::encoding(encoding_type enc) -{ - if (enc != m_encoding) - { - if (is_single_byte_encoding(enc) and is_single_byte_encoding(m_encoding)) - m_encoding = enc; - else - throw invalid_exception("Invalid encoding specified, incompatible with actual encoding"); - } - - data_source::encoding(enc); - - switch (m_encoding) - { - case encoding_type::UTF8: - m_next = &istream_data_source::next_utf8_char; - break; - case encoding_type::UTF16LE: - m_next = &istream_data_source::next_utf16le_char; - break; - case encoding_type::UTF16BE: - m_next = &istream_data_source::next_utf16be_char; - break; - case encoding_type::ISO88591: - m_next = &istream_data_source::next_iso88591_char; - break; - case encoding_type::ASCII: - m_next = &istream_data_source::next_ascii_char; - break; - default: break; - } -} - -unicode istream_data_source::next_utf8_char() -{ - unicode result = next_byte(); - - if (result & 0x080) - { - unsigned char ch[3]; - - if ((result & 0x0E0) == 0x0C0) - { - ch[0] = next_byte(); - if ((ch[0] & 0x0c0) != 0x080) - throw source_exception("Invalid utf-8"); - result = ((result & 0x01F) << 6) | (ch[0] & 0x03F); - } - else if ((result & 0x0F0) == 0x0E0) - { - ch[0] = next_byte(); - ch[1] = next_byte(); - if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080) - throw source_exception("Invalid utf-8"); - result = ((result & 0x00F) << 12) | ((ch[0] & 0x03F) << 6) | (ch[1] & 0x03F); - } - else if ((result & 0x0F8) == 0x0F0) - { - ch[0] = next_byte(); - ch[1] = next_byte(); - ch[2] = next_byte(); - if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080 or (ch[2] & 0x0c0) != 0x080) - throw source_exception("Invalid utf-8"); - result = ((result & 0x007) << 18) | ((ch[0] & 0x03F) << 12) | ((ch[1] & 0x03F) << 6) | (ch[2] & 0x03F); - - if (result > 0x10ffff) - throw source_exception("invalid utf-8 character (out of range)"); - } - } - - return result; -} - -unicode istream_data_source::next_utf16le_char() -{ - unsigned char c1 = next_byte(), c2 = next_byte(); - - unicode ch = (static_cast(c2) << 8) | c1; - - if (ch >= 0x080) - { - // surrogate support - if (ch >= 0x0D800 and ch <= 0x0DBFF) - { - unicode uc2 = next_utf16le_char(); - if (uc2 >= 0x0DC00 and uc2 <= 0x0DFFF) - ch = (ch - 0x0D800) * 0x400 + (uc2 - 0x0DC00) + 0x010000; - else - throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: leading surrogate character without trailing surrogate character"); - } - else if (ch >= 0x0DC00 and ch <= 0x0DFFF) - throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: trailing surrogate character without a leading surrogate"); - } - - return ch; -} - -unicode istream_data_source::next_utf16be_char() -{ - unsigned char c1 = next_byte(), c2 = next_byte(); - - unicode ch = (static_cast(c1) << 8) | c2; - - if (ch >= 0x080) - { - // surrogate support - if (ch >= 0x0D800 and ch <= 0x0DBFF) - { - unicode uc2 = next_utf16be_char(); - if (uc2 >= 0x0DC00 and uc2 <= 0x0DFFF) - ch = (ch - 0x0D800) * 0x400 + (uc2 - 0x0DC00) + 0x010000; - else - throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: leading surrogate character without trailing surrogate character"); - } - else if (ch >= 0x0DC00 and ch <= 0x0DFFF) - throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: trailing surrogate character without a leading surrogate"); - } - - return ch; -} - -unicode istream_data_source::next_iso88591_char() -{ - return (unicode)next_byte(); -} - -unicode istream_data_source::next_ascii_char() -{ - unicode c = next_byte(); - - if (c > 127) - throw not_wf_exception("Invalid ascii value"); - - return c; -} - -unicode istream_data_source::get_next_char() -{ - unicode ch = m_char_buffer; - - if (ch == 0) - ch = (this->*m_next)(); - else - m_char_buffer = 0; - - if (ch == 0x0ffff or ch == 0x0fffe) - throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: character " + to_hex(ch) + " is not allowed"); - - if (ch == '\r') - { - ch = (this->*m_next)(); - if (ch != '\n' and (m_version == 1.0 or ch != 0x85 or m_encoding == encoding_type::ASCII)) - m_char_buffer = ch; - ch = '\n'; - } - - if (m_encoding != encoding_type::ASCII) - { - if (m_version > 1.0 and ch == 0x85) - ch = '\n'; - else if (m_encoding != encoding_type::ISO88591 and m_version > 1.0 and ch == 0x2028) - ch = '\n'; - } - - if (ch == '\n') - ++m_line_nr; - - return ch; -} - -// -------------------------------------------------------------------- - -class string_data_source : public data_source -{ -public: - string_data_source(const std::string& data) - : m_data(data), m_ptr(m_data.begin()) - { - } - - unicode get_next_char() - { - unicode result = 0; - - if (m_ptr != m_data.end()) - std::tie(result, m_ptr) = get_first_char(m_ptr); - - if (result == '\n') - ++m_line_nr; - - return result; - } - -private: - - std::string m_data; - std::string::iterator m_ptr; -}; - -// -------------------------------------------------------------------- - -class entity_data_source : public string_data_source -{ -public: - entity_data_source(const std::string& text, const std::string& entity_path) - : string_data_source(text) - { - base(entity_path); - } -}; - -// -------------------------------------------------------------------- - -class parameter_entity_data_source : public string_data_source -{ -public: - parameter_entity_data_source(const std::string& data, const std::string& base_dir) - : string_data_source(" " + data + " ") - { - base(base_dir); - } -}; - -// -------------------------------------------------------------------- - -class valid_nesting_validator -{ -public: - valid_nesting_validator(data_source& source) - : m_id(source.id()) {} - - void check(data_source& source) - { - if (source.id() != m_id) - throw invalid_exception("proper nesting validation error"); - } - -private: - int m_id; -}; - -// -------------------------------------------------------------------- - -struct parser_imp -{ - parser_imp(std::istream& data, parser& parser); - - ~parser_imp(); - - // Here comes the parser part - void parse(bool validate, bool validate_ns); - - // the productions. Some are inlined below for obvious reasons. - // names of the productions try to follow those in the TR http://www.w3.org/TR/xml - void prolog(); - void xml_decl(); - void text_decl(); - - void s(bool at_least_one = false); - void eq(); - void misc(); - void element(doctype::validator& valid); - void content(doctype::validator& valid); - - void comment(); - void pi(); - - void pereference(); - - void doctypedecl(); - data_source* get_data_source(const std::string& pubid, std::string uri); - std::tuple read_external_id(); - void intsubset(); - void extsubset(); - void declsep(); - void conditionalsect(); - void ignoresectcontents(); - void markup_decl(); - void element_decl(); - void contentspec(doctype::element_& element); - doctype::content_spec_ptr cp(); - void attlist_decl(); - void notation_decl(); - void entity_decl(); - void parameter_entity_decl(); - void general_entity_decl(); - void entity_value(); - - // at several locations we need to parse out entity references from strings: - void parse_parameter_entity_declaration(std::string& s); - void parse_general_entity_declaration(std::string& s); - - // same goes for attribute values - std::string normalize_attribute_value(const std::string& s, bool isCDATA) - { - push_data_source(new string_data_source(s), false); - - std::string result = normalize_attribute_value(); - - if (m_standalone and result != s) - not_valid("Document cannot be standalone since an attribute was modified"); - - if (not isCDATA) - collapse_spaces(result); - - return result; - } - - std::string normalize_attribute_value(); - - void collapse_spaces(std::string& s); - - // The scanner is next. We recognize the following tokens: - enum XMLToken - { - Undef, - - Eq = '=', - QuestionMark = '?', - GreaterThan = '>', - OpenBracket = '[', - CloseBracket = ']', - OpenParenthesis = '(', - CloseParenthesis = ')', - Percent = '%', - Plus = '+', - Pipe = '|', - Asterisk = '*', - Slash = '/', - Comma = ',', - - Eof = 256, - Other, // - - // these are tokens for the markup - - XMLDecl, // - Content, // anything else up to the next element start - }; - - // for debugging and error reporting we have the following describing routine - constexpr const char* describe_token(XMLToken token) - { - switch (token) - { - case XMLToken::Undef: return "undefined"; - case XMLToken::Eq: return "="; - case XMLToken::QuestionMark: return "?"; - case XMLToken::GreaterThan: return ">"; - case XMLToken::OpenBracket: return "["; - case XMLToken::CloseBracket: return "]"; - case XMLToken::OpenParenthesis: return "("; - case XMLToken::CloseParenthesis:return ")"; - case XMLToken::Percent: return "%"; - case XMLToken::Plus: return "+"; - case XMLToken::Pipe: return "|"; - case XMLToken::Asterisk: return "*"; - case XMLToken::Slash: return "/"; - case XMLToken::Comma: return ","; - case XMLToken::Eof: return "end of file"; - case XMLToken::Other: return "an invalid character"; - case XMLToken::XMLDecl: return "'() const { return m_source; } - data_source& operator*() const { return *m_source; } - - bool inserted() const { return m_inserted; } - - parser_imp& m_impl; - data_source* m_source; - std::array m_buffer; - int m_buffer_offset; - XMLToken m_lookahead; - std::string m_token; - bool m_inserted; - }; - - void push_data_source(data_source* source, bool insert) - { - source->version(m_version); - m_source.emplace(this, source, insert); - } - - void pop_data_source() - { - assert(not m_source.empty()); - m_source.pop(); - } - - // And during parsing we keep track of the namespaces we encounter. - class ns_state - { - public: - ns_state(parser_imp *imp) - : m_parser_imp(imp), m_next(imp->m_ns) - { - m_parser_imp->m_ns = this; - } - - ~ns_state() - { - m_parser_imp->m_ns = m_next; - } - - std::string default_ns() - { - std::string result = m_default_ns; - if (result.empty() and m_next != nullptr) - result = m_next->default_ns(); - return result; - } - - void default_ns(const std::string& ns) - { - m_default_ns = ns; - } - - std::string ns_for_prefix(const std::string& prefix) - { - std::string result; - - if (m_unbound.count(prefix) == 0) - { - auto np = m_known.find(prefix); - if (np != m_known.end()) - result = np->second; - else if (m_next != nullptr) - result = m_next->ns_for_prefix(prefix); - } - - return result; - } - - void bind(const std::string& prefix, const std::string& uri) - { - m_known[prefix] = uri; - } - - void unbind(const std::string& prefix) - { - m_unbound.insert(prefix); - } - - bool is_known_prefix(const std::string& prefix) - { - bool result = false; - - if (not m_unbound.count(prefix)) - { - if (m_known.count(prefix)) - result = true; - else if (m_next != nullptr) - result = m_next->is_known_prefix(prefix); - } - - return result; - } - - bool is_known_uri(const std::string& uri) - { - return find_if(m_known.begin(), m_known.end(), [uri] (auto k) { return k.second == uri; }) != m_known.end() or - (m_next != nullptr and m_next->is_known_uri(uri)); - } - - private: - - parser_imp *m_parser_imp; - std::string m_default_ns; - ns_state *m_next; - - std::map m_known; - std::set m_unbound; - }; - - bool is_char(unicode uc) - { - return - m_version == 1.0 ? - is_valid_xml_1_0_char(uc) : - is_valid_xml_1_1_char(uc); - } - - bool is_space(unicode uc) - { - return uc == ' ' or uc == '\t' or uc == '\n' or uc == '\r'; - } - - bool is_space(const std::string& s) - { - return not s.empty() and s.find_first_not_of(" \t\r\n") == std::string::npos; - } - - bool is_referrable_char(unicode charref) - { - return - m_version == 1.0 ? - charref == 0x09 or - charref == 0x0A or - charref == 0x0D or - (charref > 0x01F and charref < 0x0D800) or - (charref > 0x0DFFF and charref < 0x0FFFE) or - (charref > 0x0FFFF and charref < 0x00110000) : - - // 1.1 - (charref > 0x0 and charref < 0x0D800) or - (charref > 0x0DFFF and charref < 0x0FFFE) or - (charref > 0x0FFFF and charref < 0x00110000) - ; - } - - parser& m_parser; - bool m_validating; - bool m_validating_ns; - bool m_has_dtd; - XMLToken m_lookahead; - std::string m_token; - - std::stack m_source; - - std::array m_buffer; - std::array::iterator m_buffer_ptr = m_buffer.begin(); - - float m_version = 1.0f; - encoding_type m_encoding = encoding_type::UTF8; - bool m_standalone; - - // parser state - bool m_external_subset = false; - bool m_internal_subset = false; - bool m_allow_peref = false; - bool m_in_declsep = false; - bool m_in_external_dtd = false; - bool m_in_content = false; - - std::vector m_entities_on_stack; - ns_state* m_ns; - - std::string m_root_element; - doctype::entity_list m_parameter_entities; - doctype::entity_list m_general_entities; - doctype::element_list m_doctype; - - std::set m_notations; - std::set m_ids; // attributes of type ID should be unique - std::set m_unresolved_ids; // keep track of IDREFS that were not found yet - - std::unique_ptr m_xmlSpaceAttr; -}; - -// -------------------------------------------------------------------- -// some inlines - -inline void parser_imp::s(bool at_least_one) -{ - if (at_least_one) - match(XMLToken::Space); - - while (m_lookahead == XMLToken::Space) - match(XMLToken::Space); -} - -inline void parser_imp::eq() -{ - s(); - match(XMLToken::Eq); - s(); -} - -// -------------------------------------------------------------------- - -parser_imp::parser_imp(std::istream& data, parser& parser) - : m_parser(parser), m_validating(true), m_has_dtd(false), m_lookahead(XMLToken::Eof) - , m_encoding(encoding_type::ASCII), m_standalone(false), m_ns(nullptr) -{ - push_data_source(new istream_data_source(data), false); - - m_encoding = m_source.top()->encoding(); - - // these entities are always recognized: - m_general_entities.push_back(new doctype::general_entity("lt", "<")); - m_general_entities.push_back(new doctype::general_entity("gt", ">")); - m_general_entities.push_back(new doctype::general_entity("amp", "&")); - m_general_entities.push_back(new doctype::general_entity("apos", "'")); - m_general_entities.push_back(new doctype::general_entity("quot", """)); - - m_xmlSpaceAttr.reset(new doctype::attribute_("xml:space", doctype::AttributeType::Enumerated, { "preserve", "default" })); -} - -parser_imp::~parser_imp() -{ - while (not m_source.empty()) - m_source.pop(); - - // there may be parameter_entity_data_source's left in the stack - // as a result of a validation error/exception - - for (doctype::entity *e : m_parameter_entities) - delete e; - - for (doctype::entity *e : m_general_entities) - delete e; - - for (doctype::element_ *e : m_doctype) - delete e; -} - -const doctype::entity& parser_imp::get_general_entity(const std::string& name) const -{ - auto e = std::find_if(m_general_entities.begin(), m_general_entities.end(), - [name](auto e) { return e->name() == name; }); - - if (e == m_general_entities.end()) - not_well_formed("undefined entity reference '" + name + "'"); - - if ((*e)->is_external() and m_standalone) - not_valid("Document cannot be standalone since entity " + name + " is defined externally"); - - return **e; -} - -const doctype::entity& parser_imp::get_parameter_entity(const std::string& name) const -{ - auto e = find_if(m_parameter_entities.begin(), m_parameter_entities.end(), - [name](auto e) { return e->name() == name; }); - - if (e == m_parameter_entities.end()) - not_well_formed("Undefined parameter entity '" + m_token + '\''); - - return **e; -} - -const doctype::element_ *parser_imp::get_element(const std::string& name) const -{ - const doctype::element_ *result = nullptr; - - auto e = find_if(m_doctype.begin(), m_doctype.end(), - [name](auto e) { return e->name() == name; }); - - if (e != m_doctype.end()) - result = *e; - - return result; -} - -unicode parser_imp::get_next_char() -{ - unicode result = 0; - - if (m_buffer_ptr > m_buffer.begin()) // if buffer is not empty we already did all the validity checks - result = *--m_buffer_ptr; - - if (result == 0) - { - while (not m_source.empty()) - { - try - { - result = m_source.top()->get_next_char(); - } - catch (source_exception& e) - { - not_well_formed(e.m_wmsg); - } - - if (result == 0 and m_source.top().inserted()) - { - m_source.pop(); - continue; - } - - break; - } - } - - append(m_token, result); - - return result; -} - -void parser_imp::retract() -{ - assert(not m_token.empty()); - - assert(m_buffer_ptr < m_buffer.end()); - *m_buffer_ptr++ = pop_last_char(m_token); -} - -void parser_imp::match(XMLToken token) -{ - if (m_lookahead != token) - { - std::string expected = describe_token(token); - std::string found = describe_token(m_lookahead); - - not_well_formed( - "Error parsing XML, expected '" + expected + "' but found '" + found + "' ('" + m_token + "')"); - } - - if (m_in_content) - m_lookahead = get_next_content(); - else - { - m_lookahead = get_next_token(); - - if (m_lookahead == XMLToken::PEReference and not m_in_declsep) - { - if (m_allow_peref) - pereference(); - else - not_well_formed("Invalid entity reference at this location"); - } - } -} - -void parser_imp::not_well_formed(const std::string& msg) const -{ - std::stringstream s; - if (m_source.empty()) - s << "Document not well-formed: " << msg; - else - s << "Document (line: " << m_source.top()->line_nr() << ") not well-formed: " << msg; - throw not_wf_exception(s.str()); -} - -void parser_imp::not_valid(const std::string& msg) const -{ - if (m_validating) - { - std::stringstream s; - - if (m_source.empty()) - s << "Document not valid: " << msg; - else - s << "Document (line: " << m_source.top()->line_nr() << ") not valid: " << msg; - - throw invalid_exception(s.str()); - } - else - m_parser.report_invalidation(msg); -} - -/* - get_next_token is a hand optimised scanner for tokens in the input stream. -*/ - -parser_imp::XMLToken parser_imp::get_next_token() -{ - enum State - { - state_Start = 0, - state_WhiteSpace = 10, - state_Tag = 20, - state_String = 30, - state_PERef = 40, - state_HashName = 49, - state_Name = 50, - state_CommentOrDoctype = 60, - state_Comment = 70, - state_DocTypeDecl = 80, - state_PI = 90, - }; - - XMLToken token = XMLToken::Undef; - unicode quote_char = 0; - int state = state_Start; - bool might_be_name = false; - - m_token.clear(); - - while (token == XMLToken::Undef) - { - unicode uc = get_next_char(); - - switch (state) - { - // start scanning. - case state_Start: - switch (uc) - { - case 0: - token = XMLToken::Eof; - break; - - case ' ': - case '\t': - case '\n': - state = state_WhiteSpace; - break; - - case '<': - state = state_Tag; - break; - - case '\'': - case '"': - { - state = state_String; - quote_char = uc; - break; - } - - case '%': - state = state_PERef; - break; - - case '#': - state = state_HashName; - break; - - case '=': token = XMLToken::Eq; break; - case '?': token = XMLToken::QuestionMark; break; - case '>': token = XMLToken::GreaterThan; break; - case '[': token = XMLToken::OpenBracket; break; - case ']': token = XMLToken::CloseBracket; break; - case '(': token = XMLToken::OpenParenthesis; break; - case ')': token = XMLToken::CloseParenthesis; break; - // case '%': token = XMLToken::Percent; break; - case '+': token = XMLToken::Plus; break; - case '|': token = XMLToken::Pipe; break; - case '*': token = XMLToken::Asterisk; break; - case '/': token = XMLToken::Slash; break; - case ',': token = XMLToken::Comma; break; - - default: - if (is_name_start_char(uc)) - { - might_be_name = true; - state = state_Name; - } - else if (is_name_char(uc)) - state = state_Name; - else if (is_char(uc)) - token = XMLToken::Other; - else - not_well_formed("Unexpected character: " + ((uc < 128 and std::isprint(uc)) ? std::string(1, uc) : to_hex(uc)) ); - - break; - } - break; - - // collect all whitespace - case state_WhiteSpace: - if (uc != ' ' and uc != '\t' and uc != '\n') - { - retract(); - token = XMLToken::Space; - } - break; - - // We scanned a < character, decide what to do next. - case state_Tag: - if (uc == '!') // comment or doctype thing - state = state_CommentOrDoctype; - else if (uc == '/') // end tag - token = XMLToken::ETag; - else if (uc == '?') // processing instruction - state = state_PI; - else // anything else - { - retract(); - token = XMLToken::STag; - } - break; - - // So we had - case state_Comment: - if (uc == '-') - token = XMLToken::Comment; - else - not_well_formed("Invalid formatted comment"); - break; - - // scan for processing instructions - case state_PI: - if (not is_name_char(uc)) - { - retract(); - - // we treat the xml processing instruction separately. - if (m_token.substr(2) == "xml") - token = XMLToken::XMLDecl; - else if (iequals(m_token.substr(2), "xml")) - not_well_formed(" in text - break; - - case ' ': - case '\t': - case '\n': - case '\r': - state = state_WhiteSpace; - break; - - default: - if (is_char(uc)) - state = state_Content; // anything else - else - not_well_formed("Unexpected character in content: " + (std::isprint(uc) ? std::string(1, uc) : to_hex(uc)) ); - break; - } - break; - - // collect all whitespace - case state_WhiteSpace: - if (not is_space(uc)) - { - retract(); - token = XMLToken::Space; - } - break; - - // content. Only stop collecting character when uc is special - case state_Content: - if (uc == ']') - state = state_Illegal; - else if (uc == 0 or uc == '<' or uc == '&') - { - retract(); - token = XMLToken::Content; - } - else if (not is_referrable_char(uc)) - not_well_formed("Illegal character in content text"); - break; - - // beginning of a tag? - case state_Tag: - if (uc == '/') - token = XMLToken::ETag; - else if (uc == '?') // processing instruction - state = state_PI; - else if (uc == '!') // comment or CDATA - state = state_CommentOrCDATA; - else - { - retract(); - token = XMLToken::STag; - } - break; - - // processing instructions - case state_PI: - if (not is_name_char(uc)) - { - retract(); - token = XMLToken::PI; - } - break; - - // comment or CDATA - case state_CommentOrCDATA: - if (uc == '-') // comment - state = state_Comment; - else if (uc == '[') - state = state_CDATA; // CDATA - else - not_well_formed("invalid content"); - break; - - case state_Comment: - if (uc == '-') - token = XMLToken::Comment; - else - not_well_formed("invalid content"); - break; - - // CDATA (we parsed ') - { - token = XMLToken::CDSect; - m_token = m_token.substr(9, m_token.length() - 12); - } - else if (uc == 0) - not_well_formed("runaway cdata section"); - else if (uc != ']') - state = state_CDATA + 2; - break; - - // reference, either a character reference or a general entity reference - case state_Reference: - if (uc == '#') - state = state_Reference + 2; - else if (is_name_start_char(uc)) - state = state_Reference + 1; - else - not_well_formed("stray ampersand found in content"); - break; - - case state_Reference + 1: - if (not is_name_char(uc)) - { - if (uc != ';') - not_well_formed("invalid entity found in content, missing semicolon?"); - token = XMLToken::Reference; - m_token = m_token.substr(1, m_token.length() - 2); - } - break; - - case state_Reference + 2: - if (uc == 'x') - state = state_Reference + 4; - else if (uc >= '0' and uc <= '9') - { - charref = uc - '0'; - state += 1; - } - else - not_well_formed("invalid character reference"); - break; - - case state_Reference + 3: - if (uc >= '0' and uc <= '9') - charref = charref * 10 + (uc - '0'); - else if (uc == ';') - { - if (not is_referrable_char(charref)) - not_well_formed("Illegal character in content text"); - m_token.clear(); - append(m_token, charref); - token = XMLToken::CharRef; - } - else - not_well_formed("invalid character reference"); - break; - - case state_Reference + 4: - if (uc >= 'a' and uc <= 'f') - { - charref = uc - 'a' + 10; - state += 1; - } - else if (uc >= 'A' and uc <= 'F') - { - charref = uc - 'A' + 10; - state += 1; - } - else if (uc >= '0' and uc <= '9') - { - charref = uc - '0'; - state += 1; - } - else - not_well_formed("invalid character reference"); - break; - - case state_Reference + 5: - if (uc >= 'a' and uc <= 'f') - charref = (charref << 4) + (uc - 'a' + 10); - else if (uc >= 'A' and uc <= 'F') - charref = (charref << 4) + (uc - 'A' + 10); - else if (uc >= '0' and uc <= '9') - charref = (charref << 4) + (uc - '0'); - else if (uc == ';') - { - if (not is_referrable_char(charref)) - not_well_formed("Illegal character in content text"); - m_token.clear(); - append(m_token, charref); - token = XMLToken::CharRef; - } - else - not_well_formed("invalid character reference"); - break; - - // ]]> is illegal - case state_Illegal: - if (uc == ']') - state += 1; - else - { - retract(); - state = state_Content; - } - break; - - case state_Illegal + 1: - if (uc == '>') - not_well_formed("the sequence ']]>' is illegal in content text"); - else if (uc != ']') - { - retract(); - retract(); - state = state_Content; - } - break; - - default: - assert(false); - not_well_formed("state reached that should not be reachable"); - } - } - - //#if DEBUG - // if (VERBOSE) - // std::cout << "content: " << describe_token(token) << " (" << m_token << ')' << std::endl; - //#endif - - return token; -} - -float parser_imp::parse_version() -{ - float result = -1; - - if (m_token.length() >= 3) - { - auto i = m_token.begin(); - if (*i == '1' and *(i + 1) == '.') - { - result = 1.0f; - float dec = 10; - - for (i += 2; i != m_token.end(); ++i) - { - if (*i < '0' or *i > '9') - { - result = -1; - break; - } - - result += (*i - '0') / dec; - dec += 10; - } - } - } - - if (result < 1.0 or result >= 2.0) - not_well_formed("Invalid version specified: '" + m_token + "'"); - - return result; -} - -void parser_imp::parse(bool validate, bool validate_ns) -{ - m_validating = validate; - m_validating_ns = validate_ns; - - m_lookahead = get_next_token(); - - prolog(); - - const doctype::element_ *e = get_element(m_root_element); - - if (m_has_dtd and e == nullptr and m_validating) - not_valid("Element '" + m_root_element + "' is not defined in DTD"); - - if (e) - { - doctype::content_spec_element allowed(m_root_element); - doctype::validator valid(&allowed); - - element(valid); - } - else - { - doctype::content_spec_any allowed; - doctype::validator valid(&allowed); - - element(valid); - } - - misc(); - - if (m_lookahead != XMLToken::Eof) - not_well_formed("garbage at end of file"); - - if (not m_unresolved_ids.empty()) - { - not_valid("document contains references to the following undefined ID's: '" + ba::join(m_unresolved_ids, ", ") + "'"); - } -} - -void parser_imp::prolog() -{ - xml_decl(); - - misc(); - - if (m_lookahead == XMLToken::DocType) - { - doctypedecl(); - misc(); - } - else if (m_validating) - not_valid("document type declaration is missing"); -} - -void parser_imp::xml_decl() -{ - if (m_lookahead == XMLToken::XMLDecl) - { - encoding_type encoding = m_encoding; - - if (m_encoding == encoding_type::UTF8) - m_source.top()->encoding(encoding_type::ASCII); - - match(XMLToken::XMLDecl); - - s(true); - if (m_token != "version") - not_well_formed("expected a version attribute in XML declaration"); - match(XMLToken::Name); - eq(); - - auto version = parse_version(); - - m_version = version; - - if (m_version >= 2.0f or m_version < 1.0f) - not_well_formed("This library only supports XML version 1.0 or 1.1"); - - m_source.top()->version(version); - match(XMLToken::String); - - if (m_lookahead == XMLToken::Space) - { - s(true); - - if (m_token == "encoding") - { - match(XMLToken::Name); - eq(); - ba::to_upper(m_token); - if (m_token == "US-ASCII") - encoding = encoding_type::ASCII; - else if (m_token == "ISO-8859-1") - encoding = encoding_type::ISO88591; - else if (m_token == "UTF-8") - encoding = encoding_type::UTF8; - else if (m_token == "UTF-16") - { - if (m_source.top()->encoding() != encoding_type::UTF16LE and m_source.top()->encoding() != encoding_type::UTF16BE) - not_well_formed("Inconsistent encoding attribute in XML declaration"); - encoding = m_source.top()->encoding(); - } - else - not_well_formed("Unsupported encoding value '" + m_token + "'"); - match(XMLToken::String); - - s(); - } - - if (m_token == "standalone") - { - match(XMLToken::Name); - eq(); - if (m_token != "yes" and m_token != "no") - not_well_formed("Invalid XML declaration, standalone value should be either yes or no"); - m_standalone = (m_token == "yes"); - match(XMLToken::String); - s(); - } - } - - m_encoding = encoding; - m_source.top()->encoding(encoding); - - match(XMLToken::QuestionMark); - match(XMLToken::GreaterThan); - - m_parser.xml_decl(m_encoding, m_standalone, m_version); - } - else - m_encoding = m_source.top()->encoding(); -} - -void parser_imp::text_decl() -{ - if (m_lookahead == XMLToken::XMLDecl) - { - encoding_type encoding = m_source.top()->encoding(); - if (encoding == encoding_type::UTF8 and not m_source.top()->has_bom()) - m_source.top()->encoding(encoding_type::ISO88591); - - match(XMLToken::XMLDecl); - - s(true); - - if (m_token == "version") - { - match(XMLToken::Name); - eq(); - - auto version = parse_version(); - if (version > m_version) - not_well_formed("Version mismatch between document and external entity"); - - match(XMLToken::String); - s(m_version == 1.0); - } - - if (m_token != "encoding") - { - if (m_version == 1.0) - not_well_formed("encoding attribute is mandatory in text declaration"); - } - else - { - match(XMLToken::Name); - eq(); - match(XMLToken::String); - s(); - } - - m_source.top()->encoding(encoding); - - match(XMLToken::QuestionMark); - match(XMLToken::GreaterThan); - } -} - -void parser_imp::misc() -{ - for (;;) - { - switch (m_lookahead) - { - case XMLToken::Space: - s(); - continue; - - case XMLToken::Comment: - comment(); - continue; - - case XMLToken::PI: - pi(); - continue; - default:; - } - - break; - } -} - -void parser_imp::doctypedecl() -{ - match(XMLToken::DocType); - - m_has_dtd = true; - - s(true); - - auto name = m_token; - match(XMLToken::Name); - - m_root_element = name; - - std::unique_ptr dtd; - - if (m_lookahead == XMLToken::Space) - { - s(true); - - if (m_lookahead == XMLToken::Name) - { - std::string pubid, uri; - - if (m_token == "SYSTEM") - { - match(XMLToken::Name); - s(true); - - uri = m_token; - - if (not is_valid_system_literal(uri)) - not_well_formed("invalid system literal"); - } - else if (m_token == "PUBLIC") - { - match(XMLToken::Name); - s(true); - - pubid = m_token; - match(XMLToken::String); - - // validate the public ID - if (not is_valid_public_id(pubid)) - not_well_formed("Invalid public ID"); - - s(true); - uri = m_token; - } - else - not_well_formed("Expected external id starting with either SYSTEM or PUBLIC"); - - match(XMLToken::String); - dtd.reset(get_data_source(pubid, uri)); - - if (m_validating and not dtd) - not_valid("Could not load DTD " + uri); - - m_parser.doctype_decl(m_root_element, pubid, uri); - } - - s(); - } - - if (m_lookahead == XMLToken::OpenBracket) - { - match(XMLToken::OpenBracket); - intsubset(); - match(XMLToken::CloseBracket); - - s(); - } - - // internal subset takes precedence over external subset, so - // if the external subset is defined, include it here. - if (dtd.get() != nullptr) - { - push_data_source(dtd.release(), false); - - m_external_subset = true; - m_in_external_dtd = true; - - m_lookahead = get_next_token(); - - text_decl(); - - extsubset(); - - match(XMLToken::Eof); - - pop_data_source(); - m_in_external_dtd = false; - } - - match(XMLToken::GreaterThan); - - // test if all ndata references can be resolved - - for (const doctype::entity *e : m_general_entities) - { - if (e->is_parsed() == false and m_notations.count(e->get_ndata()) == 0) - not_valid("Undefined NOTATION '" + e->get_ndata() + "'"); - } - - // and the notations in the doctype attlists - for (const doctype::element_ *element : m_doctype) - { - for (const doctype::attribute_ *attr : element->get_attributes()) - { - if (attr->get_type() != doctype::AttributeType::Notation) - continue; - - for (const std::string& n : attr->get_enums()) - { - if (m_notations.count(n) == 0) - not_valid("Undefined NOTATION '" + n + "'"); - } - } - } -} - -void parser_imp::pereference() -{ - const doctype::entity& e = get_parameter_entity(m_token); - - push_data_source(new parameter_entity_data_source(e.get_replacement(), e.get_path()), true); - - match(XMLToken::PEReference); -} - -void parser_imp::intsubset() -{ - save_state state_intsubset(m_internal_subset, true); - save_state state_allow_peref(m_allow_peref, true); - - for (;;) - { - switch (m_lookahead) - { - case XMLToken::Element: - case XMLToken::AttList: - case XMLToken::Entity: - case XMLToken::Notation: - markup_decl(); - continue; - - case XMLToken::PI: - pi(); - continue; - - case XMLToken::Comment: - comment(); - continue; - - case XMLToken::Space: - case XMLToken::PEReference: - declsep(); - continue; - default:; - } - - break; - } -} - -void parser_imp::declsep() -{ - save_state state_declsep(m_in_declsep, true); - - switch (m_lookahead) - { - case XMLToken::PEReference: - { - const doctype::entity& e = get_parameter_entity(m_token); - - match(XMLToken::PEReference); - - push_data_source(new parameter_entity_data_source(e.get_replacement(), e.get_path()), false); - - m_lookahead = get_next_token(); - extsubset(); - - match(XMLToken::Eof); - pop_data_source(); - - break; - } - - case XMLToken::Space: - s(); - break; - - default:; - } -} - -void parser_imp::extsubset() -{ - save_state state_extsubset(m_external_subset, true); - save_state state_allow_peref(m_allow_peref, true); - - for (;;) - { - switch (m_lookahead) - { - case XMLToken::Element: - case XMLToken::AttList: - case XMLToken::Entity: - case XMLToken::Notation: - markup_decl(); - continue; - - case XMLToken::IncludeIgnore: - conditionalsect(); - continue; - - case XMLToken::PI: - pi(); - continue; - - case XMLToken::Comment: - comment(); - continue; - - case XMLToken::Space: - case XMLToken::PEReference: - declsep(); - continue; - - default:; - } - - break; - } -} - -void parser_imp::conditionalsect() -{ - valid_nesting_validator check(*m_source.top()); - match(XMLToken::IncludeIgnore); - - s(); - - bool include = false; - - if (m_token == "INCLUDE") - include = true; - else if (m_token == "IGNORE") - include = false; - else if (m_lookahead == XMLToken::Name) - not_well_formed("Unexpected literal '" + m_token + "'"); - - match(XMLToken::Name); - - check.check(*m_source.top()); - - s(); - - if (include) - { - match(XMLToken::OpenBracket); - extsubset(); - match(XMLToken::CloseBracket); - match(XMLToken::CloseBracket); - check.check(*m_source.top()); - match(XMLToken::GreaterThan); - } - else - { - ignoresectcontents(); - check.check(*m_source.top()); - m_lookahead = get_next_token(); - } -} - -void parser_imp::ignoresectcontents() -{ - // yet another tricky routine, skip - - int state = 0; - bool done = false; - - while (not done) - { - unicode ch = get_next_char(); - if (ch == 0) - not_well_formed("runaway IGNORE section"); - - switch (state) - { - case 0: - if (ch == ']') - state = 1; - else if (ch == '<') - state = 10; - break; - - case 1: - if (ch == ']') - state = 2; - else - { - retract(); - state = 0; - } - break; - - case 2: - if (ch == '>') - done = true; - else if (ch != ']') - { - retract(); - state = 0; - } - break; - - case 10: - if (ch == '!') - state = 11; - else - { - retract(); - state = 0; - } - break; - - case 11: - if (ch == '[') - { - ignoresectcontents(); - state = 0; - } - else - { - retract(); - state = 0; - } - break; - } - } -} - -void parser_imp::markup_decl() -{ - save_state state_allow_peref(m_allow_peref, m_external_subset); - - switch (m_lookahead) - { - case XMLToken::Element: - element_decl(); - break; - - case XMLToken::AttList: - attlist_decl(); - break; - - case XMLToken::Entity: - entity_decl(); - break; - - case XMLToken::Notation: - notation_decl(); - break; - - case XMLToken::PI: - pi(); - break; - - case XMLToken::Comment: - comment(); - break; - - case XMLToken::Space: - s(); - break; - - default:; - } -} - -void parser_imp::element_decl() -{ - valid_nesting_validator check(*m_source.top()); - - match(XMLToken::Element); - s(true); - - std::string name = m_token; - if (ba::starts_with(name, "xmlns:")) - not_well_formed("Element names should not start with xmlns:"); - - auto e = std::find_if(m_doctype.begin(), m_doctype.end(), - [name](auto e) { return e->name() == name; }); - - if (e == m_doctype.end()) - e = m_doctype.insert(m_doctype.end(), new doctype::element_(name, true, m_in_external_dtd)); - else if ((*e)->is_declared()) - not_valid("duplicate element declaration for element '" + name + "'"); - - match(XMLToken::Name); - s(true); - - contentspec(**e); - s(); - - check.check(*m_source.top()); - match(XMLToken::GreaterThan); -} - -void parser_imp::contentspec(doctype::element_& element) -{ - if (m_lookahead == XMLToken::Name) - { - if (m_token == "EMPTY") - element.set_allowed(new doctype::content_spec_empty); - else if (m_token == "ANY") - element.set_allowed(new doctype::content_spec_any); - else - not_well_formed("Invalid element content specification"); - match(XMLToken::Name); - } - else - { - valid_nesting_validator check(*m_source.top()); - - match(XMLToken::OpenParenthesis); - - std::unique_ptr allowed; - - s(); - - bool mixed = false; - bool more = false; - - if (m_lookahead == XMLToken::PCData) // Mixed - { - mixed = true; - match(m_lookahead); - - s(); - - std::set seen; - - while (m_lookahead == XMLToken::Pipe) - { - more = true; - - match(XMLToken::Pipe); - s(); - - if (seen.count(m_token) > 0) - not_valid("no duplicates allowed in mixed content for element declaration"); - seen.insert(m_token); - - match(XMLToken::Name); - s(); - } - - doctype::content_spec_choice *choice = new doctype::content_spec_choice(true); - for (auto& c : seen) - choice->add(new doctype::content_spec_element(c)); - allowed.reset(choice); - } - else // children - { - allowed.reset(cp()); - - s(); - - if (m_lookahead == XMLToken::Comma) - { - doctype::content_spec_seq *seq = new doctype::content_spec_seq(allowed.release()); - allowed.reset(seq); - - more = true; - do - { - match(m_lookahead); - s(); - seq->add(cp()); - s(); - } while (m_lookahead == XMLToken::Comma); - } - else if (m_lookahead == XMLToken::Pipe) - { - doctype::content_spec_choice *choice = new doctype::content_spec_choice(allowed.release(), false); - allowed.reset(choice); - - more = true; - do - { - match(m_lookahead); - s(); - choice->add(cp()); - s(); - } while (m_lookahead == XMLToken::Pipe); - } - } - - s(); - - - check.check(*m_source.top()); - - match(XMLToken::CloseParenthesis); - - if (m_lookahead == XMLToken::Asterisk) - { - allowed.reset(new doctype::content_spec_repeated(allowed.release(), '*')); - match(XMLToken::Asterisk); - } - else if (more) - { - if (mixed) - { - allowed.reset(new doctype::content_spec_repeated(allowed.release(), '*')); - match(XMLToken::Asterisk); - } - else if (m_lookahead == XMLToken::Plus) - { - allowed.reset(new doctype::content_spec_repeated(allowed.release(), '+')); - match(XMLToken::Plus); - } - else if (m_lookahead == XMLToken::QuestionMark) - { - allowed.reset(new doctype::content_spec_repeated(allowed.release(), '?')); - match(XMLToken::QuestionMark); - } - } - - element.set_allowed(allowed.release()); - } -} - -doctype::content_spec_ptr parser_imp::cp() -{ - std::unique_ptr result; - - if (m_lookahead == XMLToken::OpenParenthesis) - { - valid_nesting_validator check(*m_source.top()); - - match(XMLToken::OpenParenthesis); - - s(); - result.reset(cp()); - s(); - if (m_lookahead == XMLToken::Comma) - { - doctype::content_spec_seq *seq = new doctype::content_spec_seq(result.release()); - result.reset(seq); - - do - { - match(m_lookahead); - s(); - seq->add(cp()); - s(); - } while (m_lookahead == XMLToken::Comma); - } - else if (m_lookahead == XMLToken::Pipe) - { - doctype::content_spec_choice *choice = new doctype::content_spec_choice(result.release(), false); - result.reset(choice); - - do - { - match(m_lookahead); - s(); - choice->add(cp()); - s(); - } while (m_lookahead == XMLToken::Pipe); - } - - s(); - check.check(*m_source.top()); - match(XMLToken::CloseParenthesis); - } - else - { - std::string name = m_token; - match(XMLToken::Name); - - result.reset(new doctype::content_spec_element(name)); - } - - switch (m_lookahead) - { - case XMLToken::Asterisk: - result.reset(new doctype::content_spec_repeated(result.release(), '*')); - match(XMLToken::Asterisk); - break; - case XMLToken::Plus: - result.reset(new doctype::content_spec_repeated(result.release(), '+')); - match(XMLToken::Plus); - break; - case XMLToken::QuestionMark: - result.reset(new doctype::content_spec_repeated(result.release(), '?')); - match(XMLToken::QuestionMark); - break; - default:; - } - - return result.release(); -} - -void parser_imp::entity_decl() -{ - match(XMLToken::Entity); - s(true); - - if (m_lookahead == XMLToken::Percent) // PEDecl - parameter_entity_decl(); - else - general_entity_decl(); -} - -void parser_imp::parameter_entity_decl() -{ - match(XMLToken::Percent); - s(true); - - std::string name = m_token; - match(XMLToken::Name); - - if (m_validating_ns and name.find(':') != std::string::npos) - not_well_formed("Entity names should not contain a colon"); - if (ba::starts_with(name, "xmlns:")) - not_well_formed("Entity names should not start with xmlns:"); - - s(true); - - std::string path; - std::string value; - - { - // PEDef is either a EntityValue... - if (m_lookahead == XMLToken::String) - { - value = m_token; - match(XMLToken::String); - - parse_parameter_entity_declaration(value); - } - else // ... or an external id - { - std::tie(path, value) = read_external_id(); - match(XMLToken::String); - } - - s(); - } - - match(XMLToken::GreaterThan); - - if (find_if(m_parameter_entities.begin(), m_parameter_entities.end(), - [name](auto e) { return e->name() == name; }) == m_parameter_entities.end()) - { - m_parameter_entities.push_back(new doctype::parameter_entity(name, value, path)); - } -} - -void parser_imp::general_entity_decl() -{ - std::string name = m_token; - match(XMLToken::Name); - s(true); - - if (m_validating_ns and name.find(':') != std::string::npos) - not_well_formed("Entity names should not contain a colon"); - if (ba::starts_with(name, "xmlns:")) - not_well_formed("Entity names should not start with xmlns:"); - - std::string value, ndata; - bool external = false; - bool parsed = true; - - if (m_lookahead == XMLToken::String) - { - value = m_token; - match(XMLToken::String); - - parse_general_entity_declaration(value); - } - else // ... or an ExternalID - { - std::tie(std::ignore, value) = read_external_id(); - match(XMLToken::String); - external = true; - - if (m_lookahead == XMLToken::Space) - { - s(true); - if (m_lookahead == XMLToken::Name and m_token == "NDATA") - { - match(XMLToken::Name); - s(true); - - parsed = false; - ndata = m_token; - - match(XMLToken::Name); - } - } - } - - s(); - - match(XMLToken::GreaterThan); - - if (std::find_if(m_general_entities.begin(), m_general_entities.end(), - [name](auto e) { return e->name() == name; }) == m_general_entities.end()) - { - m_general_entities.push_back(new doctype::general_entity(name, value, external, parsed)); - - if (not parsed) - m_general_entities.back()->set_ndata(ndata); - - if (m_in_external_dtd) - m_general_entities.back()->set_externally_defined(true); - } -} - -void parser_imp::attlist_decl() -{ - match(XMLToken::AttList); - s(true); - std::string element = m_token; - match(XMLToken::Name); - - auto dte = find_if(m_doctype.begin(), m_doctype.end(), - [element](auto e) { return e->name() == element; }); - - if (dte == m_doctype.end()) - dte = m_doctype.insert(m_doctype.end(), new doctype::element_(element, false, m_in_external_dtd)); - - // attribute defaults - - while (m_lookahead == XMLToken::Space) - { - s(true); - - if (m_lookahead != XMLToken::Name) - break; - - std::string name = m_token; - match(XMLToken::Name); - s(true); - - std::unique_ptr attribute; - - // att type: several possibilities: - if (m_lookahead == XMLToken::OpenParenthesis) // enumeration - { - std::vector enums; - - match(m_lookahead); - - s(); - - enums.push_back(m_token); - if (m_lookahead == XMLToken::Name) - match(XMLToken::Name); - else - match(XMLToken::NMToken); - - s(); - - while (m_lookahead == XMLToken::Pipe) - { - match(XMLToken::Pipe); - - s(); - - if (find(enums.begin(), enums.end(), m_token) != enums.end()) - not_valid("Duplicate token in enumerated attribute declaration ('" + m_token + "')"); - - enums.push_back(m_token); - if (m_lookahead == XMLToken::Name) - match(XMLToken::Name); - else - match(XMLToken::NMToken); - - s(); - } - - s(); - - match(XMLToken::CloseParenthesis); - - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::Enumerated, enums)); - } - else - { - std::string type = m_token; - match(XMLToken::Name); - - std::vector notations; - - if (type == "CDATA") - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::CDATA)); - else if (type == "ID") - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::ID)); - else if (type == "IDREF") - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::IDREF)); - else if (type == "IDREFS") - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::IDREFS)); - else if (type == "ENTITY") - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::ENTITY)); - else if (type == "ENTITIES") - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::ENTITIES)); - else if (type == "NMTOKEN") - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::NMTOKEN)); - else if (type == "NMTOKENS") - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::NMTOKENS)); - else if (type == "NOTATION") - { - s(true); - match(XMLToken::OpenParenthesis); - s(); - - notations.push_back(m_token); - match(XMLToken::Name); - - s(); - - while (m_lookahead == XMLToken::Pipe) - { - match(XMLToken::Pipe); - - s(); - - if (find(notations.begin(), notations.end(), m_token) != notations.end()) - not_valid("Duplicate token in enumerated attribute declaration ('" + m_token + "')"); - notations.push_back(m_token); - match(XMLToken::Name); - - s(); - } - - s(); - - match(XMLToken::CloseParenthesis); - - attribute.reset(new doctype::attribute_(name, doctype::AttributeType::Notation, notations)); - } - else - not_well_formed("invalid attribute type"); - } - - // att def - - s(true); - - std::string value; - - switch (m_lookahead) - { - case XMLToken::Required: - match(m_lookahead); - attribute->set_default(doctype::AttributeDefault::Required, ""); - break; - - case XMLToken::Implied: - match(m_lookahead); - attribute->set_default(doctype::AttributeDefault::Implied, ""); - break; - - case XMLToken::Fixed: - { - match(m_lookahead); - if (attribute->get_type() == doctype::AttributeType::ID) - not_valid("the default declaration for an ID attribute declaration should be #IMPLIED or #REQUIRED"); - - s(true); - - std::string value = m_token; - normalize_attribute_value(value, attribute->get_type() == doctype::AttributeType::CDATA); - if (not value.empty() and not attribute->validate_value(value, m_general_entities)) - { - not_valid("default value '" + value + "' for attribute '" + name + "' is not valid"); - } - - attribute->set_default(doctype::AttributeDefault::Fixed, value); - match(XMLToken::String); - break; - } - - default: - { - if (attribute->get_type() == doctype::AttributeType::ID) - not_valid("the default declaration for an ID attribute declaration should be #IMPLIED or #REQUIRED"); - - if (m_standalone) - not_valid("Document cannot be standalone since there is a default value for an attribute"); - - std::string value = m_token; - normalize_attribute_value(value, attribute->get_type() == doctype::AttributeType::CDATA); - collapse_spaces(value); - if (not value.empty() and not attribute->validate_value(value, m_general_entities)) - { - not_valid("default value '" + value + "' for attribute '" + name + "' is not valid"); - } - attribute->set_default(doctype::AttributeDefault::None, value); - match(XMLToken::String); - break; - } - } - - if (attribute->get_type() == doctype::AttributeType::ID) - { - const doctype::attribute_list& atts = (*dte)->get_attributes(); - if (std::find_if(atts.begin(), atts.end(), - [](auto a) { return a->get_type() == doctype::AttributeType::ID; }) != atts.end()) - not_valid("only one attribute per element can have the ID type"); - } - - attribute->set_external(m_in_external_dtd); - // attribute->version(m_version); - (*dte)->add_attribute(attribute.release()); - } - - match(XMLToken::GreaterThan); -} - -void parser_imp::notation_decl() -{ - match(XMLToken::Notation); - s(true); - - std::string name = m_token, pubid, sysid; - - if (m_validating_ns and name.find(':') != std::string::npos) - not_well_formed("Notation names should not contain a colon"); - - if (m_notations.count(name) > 0) - not_valid("notation names should be unique"); - m_notations.insert(name); - - match(XMLToken::Name); - s(true); - - if (m_token == "SYSTEM") - { - match(XMLToken::Name); - s(true); - - sysid = m_token; - match(XMLToken::String); - - if (not is_valid_system_literal(sysid)) - not_well_formed("invalid system literal"); - } - else if (m_token == "PUBLIC") - { - match(XMLToken::Name); - s(true); - - pubid = m_token; - match(XMLToken::String); - - // validate the public ID - if (not is_valid_public_id(pubid)) - not_well_formed("Invalid public ID"); - - s(); - - if (m_lookahead == XMLToken::String) - { - sysid = m_token; - match(XMLToken::String); - } - } - else - not_well_formed("Expected either SYSTEM or PUBLIC"); - - s(); - - match(XMLToken::GreaterThan); - - collapse_spaces(sysid); - - ba::replace_all(pubid, "\t", " "); - ba::replace_all(pubid, "\n", " "); - collapse_spaces(pubid); - - m_parser.notation_decl(name, sysid, pubid); -} - -data_source* parser_imp::get_data_source(const std::string& pubid, std::string uri) -{ - data_source *result = nullptr; - - std::istream *is = m_parser.external_entity_ref(m_source.top()->base(), pubid, uri); - if (is != nullptr) - { - result = new istream_data_source(is); - - std::string::size_type s = uri.rfind('/'); - if (s == std::string::npos) - result->base(m_source.top()->base()); - else - { - uri.erase(s, std::string::npos); - - if (is_absolute_path(uri)) - result->base(uri); - else - result->base(m_source.top()->base() + '/' + uri); - } - } - - return result; -} - -std::tuple parser_imp::read_external_id() -{ - std::string result; - std::string path; - - std::string pubid, uri; - - if (m_token == "SYSTEM") - { - match(XMLToken::Name); - s(true); - - uri = m_token; - - if (not is_valid_system_literal(uri)) - not_well_formed("invalid system literal"); - } - else if (m_token == "PUBLIC") - { - match(XMLToken::Name); - s(true); - - pubid = m_token; - match(XMLToken::String); - - // validate the public ID - if (not is_valid_public_id(pubid)) - not_well_formed("Invalid public ID"); - - s(true); - uri = m_token; - } - else - not_well_formed("Expected external id starting with either SYSTEM or PUBLIC"); - - std::unique_ptr data(get_data_source(pubid, uri)); - - if (data) - { - push_data_source(data.release(), false); - - path = m_source.top()->base(); - - m_lookahead = get_next_token(); - - text_decl(); - - if (m_lookahead != XMLToken::Eof) - { - result = m_token; - - while (m_buffer_ptr > m_buffer.begin()) - append(result, *--m_buffer_ptr); - - while (unicode ch = m_source.top()->get_next_char()) - append(result, ch); - } - - pop_data_source(); - } - - return std::make_tuple(path, result); -} - -void parser_imp::parse_parameter_entity_declaration(std::string& s) -{ - std::string result; - - int state = 0; - unicode charref = 0; - std::string name; - int open = 0; - - for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) - { - unicode c = *i; - - switch (state) - { - case 0: - if (c == '&') - state = 1; - else if (c == '%') - { - if (m_allow_peref) - { - name.clear(); - state = 20; - } - else - not_well_formed("parameter entities may not occur in declarations that are not in an external subset"); - } - else if (c == '<') - { - ++open; - append(result, c); - } - else if (c == '>') - { - --open; - append(result, c); - } - else if (not is_char(c)) - not_well_formed("Invalid character in entity value"); - else - append(result, c); - break; - - case 1: - if (c == '#') - state = 2; - else - { - result += '&'; - append(result, c); - state = 0; - } - break; - - case 2: - if (c == 'x') - state = 4; - else if (c >= '0' and c <= '9') - { - charref = c - '0'; - state = 3; - } - else - not_well_formed("invalid character reference"); - break; - - case 3: - if (c >= '0' and c <= '9') - charref = charref * 10 + (c - '0'); - else if (c == ';') - { - if (not is_referrable_char(charref)) - not_well_formed("Illegal character referenced: " + to_hex(charref) + '\''); - - append(result, charref); - state = 0; - } - else - not_well_formed("invalid character reference"); - break; - - case 4: - if (c >= 'a' and c <= 'f') - { - charref = c - 'a' + 10; - state = 5; - } - else if (c >= 'A' and c <= 'F') - { - charref = c - 'A' + 10; - state = 5; - } - else if (c >= '0' and c <= '9') - { - charref = c - '0'; - state = 5; - } - else - not_well_formed("invalid character reference"); - break; - - case 5: - if (c >= 'a' and c <= 'f') - charref = (charref << 4) + (c - 'a' + 10); - else if (c >= 'A' and c <= 'F') - charref = (charref << 4) + (c - 'A' + 10); - else if (c >= '0' and c <= '9') - charref = (charref << 4) + (c - '0'); - else if (c == ';') - { - if (not is_referrable_char(charref)) - not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); - - append(result, charref); - state = 0; - } - else - not_well_formed("invalid character reference"); - break; - - case 20: - if (c == ';') - { - const doctype::entity& e = get_parameter_entity(name); - result += e.get_replacement(); - state = 0; - } - else if (is_name_char(c)) - append(name, c); - else - not_well_formed("invalid parameter entity reference"); - break; - - default: - assert(false); - not_well_formed("invalid state"); - } - } - - if (state != 0) - not_well_formed("invalid reference"); - - if (open != 0) - not_valid("invalid reference"); - - swap(s, result); -} - -// parse out the general and parameter entity references in a value std::string -// for a general entity reference which is about to be stored. -void parser_imp::parse_general_entity_declaration(std::string& s) -{ - std::string result; - - int state = 0; - unicode charref = 0; - std::string name; - - auto sp = s.begin(); - auto se = s.end(); - - while (sp < se) - { - unicode c; - std::tie(c, sp) = get_first_char(sp); - - switch (state) - { - case 0: - if (c == '&') - state = 1; - else if (c == '%') - { - if (m_allow_peref) - { - name.clear(); - state = 20; - } - else - not_well_formed("parameter entities may not occur in declarations that are not in an external subset"); - } - else if (not is_char(c)) - not_well_formed("Invalid character in entity value"); - else - append(result, c); - break; - - case 1: - if (c == '#') - state = 2; - else if (is_name_start_char(c)) - { - name.clear(); - append(name, c); - state = 10; - } - break; - - case 2: - if (c == 'x') - state = 4; - else if (c >= '0' and c <= '9') - { - charref = c - '0'; - state = 3; - } - else - not_well_formed("invalid character reference"); - break; - - case 3: - if (c >= '0' and c <= '9') - charref = charref * 10 + (c - '0'); - else if (c == ';') - { - if (not is_referrable_char(charref)) - not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); - - append(result, charref); - state = 0; - } - else - not_well_formed("invalid character reference"); - break; - - case 4: - if (c >= 'a' and c <= 'f') - { - charref = c - 'a' + 10; - state = 5; - } - else if (c >= 'A' and c <= 'F') - { - charref = c - 'A' + 10; - state = 5; - } - else if (c >= '0' and c <= '9') - { - charref = c - '0'; - state = 5; - } - else - not_well_formed("invalid character reference"); - break; - - case 5: - if (c >= 'a' and c <= 'f') - charref = (charref << 4) + (c - 'a' + 10); - else if (c >= 'A' and c <= 'F') - charref = (charref << 4) + (c - 'A' + 10); - else if (c >= '0' and c <= '9') - charref = (charref << 4) + (c - '0'); - else if (c == ';') - { - if (not is_referrable_char(charref)) - not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); - - append(result, charref); - state = 0; - } - else - not_well_formed("invalid character reference"); - break; - - case 10: - if (c == ';') - { - result += '&'; - result += name; - result += ';'; - - state = 0; - } - else if (is_name_char(c)) - append(name, c); - else - not_well_formed("invalid entity reference"); - break; - - case 20: - if (c == ';') - { - const doctype::entity& e = get_parameter_entity(name); - result += e.get_replacement(); - state = 0; - } - else if (is_name_char(c)) - append(name, c); - else - not_well_formed("invalid parameter entity reference"); - break; - - default: - assert(false); - not_well_formed("invalid state"); - } - } - - if (state != 0) - not_well_formed("invalid reference"); - - swap(s, result); -} - -std::string parser_imp::normalize_attribute_value() -{ - std::string result; - - unicode charref = 0; - std::string name; - - enum State - { - state_Start, - state_ReferenceStart, - state_CharReferenceStart, - state_HexCharReference, - state_HexCharReference2, - state_DecCharReference, - state_EntityReference, - - } state = state_Start; - - for (;;) - { - unicode c = get_next_char(); - - if (c == 0) - break; - - if (c == '<') - not_well_formed("Attribute values may not contain '<' character"); - - switch (state) - { - case state_Start: - if (c == ' ' or c == '\t' or c == '\r' or c == '\n') - result += ' '; - else if (c == '&') - state = state_ReferenceStart; - else - append(result, c); - break; - - case state_ReferenceStart: - if (c == '#') - state = state_CharReferenceStart; - else if (is_name_start_char(c)) - { - name.clear(); - append(name, c); - state = state_EntityReference; - } - else - not_well_formed("invalid reference found in attribute value"); - break; - - case state_CharReferenceStart: - if (c == 'x') - state = state_HexCharReference; - else if (c >= '0' and c <= '9') - { - charref = c - '0'; - state = state_DecCharReference; - } - else - not_well_formed("invalid character reference"); - break; - - case state_DecCharReference: - if (c >= '0' and c <= '9') - charref = charref * 10 + (c - '0'); - else if (c == ';') - { - if (not is_referrable_char(charref)) - not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); - - append(result, charref); - state = state_Start; - } - else - not_well_formed("invalid character reference"); - break; - - case state_HexCharReference: - if (c >= 'a' and c <= 'f') - { - charref = c - 'a' + 10; - state = state_HexCharReference2; - } - else if (c >= 'A' and c <= 'F') - { - charref = c - 'A' + 10; - state = state_HexCharReference2; - } - else if (c >= '0' and c <= '9') - { - charref = c - '0'; - state = state_HexCharReference2; - } - else - not_well_formed("invalid character reference"); - break; - - case state_HexCharReference2: - if (c >= 'a' and c <= 'f') - charref = (charref << 4) + (c - 'a' + 10); - else if (c >= 'A' and c <= 'F') - charref = (charref << 4) + (c - 'A' + 10); - else if (c >= '0' and c <= '9') - charref = (charref << 4) + (c - '0'); - else if (c == ';') - { - if (not is_referrable_char(charref)) - not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); - - append(result, charref); - state = state_Start; - } - else - not_well_formed("invalid character reference"); - break; - - case state_EntityReference: - if (c == ';') - { - if (std::find(m_entities_on_stack.begin(), m_entities_on_stack.end(), name) != m_entities_on_stack.end()) - not_well_formed("infinite recursion in nested entity references"); - - m_entities_on_stack.push_back(name); - - const doctype::entity& e = get_general_entity(name); - - if (e.is_external()) - not_well_formed("attribute value may not contain external entity reference"); - - if (e.is_externally_defined() and m_standalone) - not_well_formed("document marked as standalone but an external entity is referenced"); - - push_data_source(new entity_data_source(e.get_replacement(), m_source.top()->base()), false); - - std::string replacement = normalize_attribute_value(); - result += replacement; - - state = state_Start; - - m_entities_on_stack.pop_back(); - } - else if (is_name_char(c)) - append(name, c); - else - not_well_formed("invalid entity reference"); - break; - - default: - assert(false); - not_well_formed("invalid state"); - } - } - - if (state != state_Start) - not_well_formed("invalid reference"); - - m_source.pop(); - - return result; -} - -void parser_imp::collapse_spaces(std::string& s) -{ - auto i = s.begin(), o = s.begin();; - bool space = true; - - while (i != s.end()) - { - if (*i == ' ') - { - if (not space) - *o++ = ' '; - ++i; - space = true; - } - else - { - *o++ = *i++; - space = false; - } - } - - if (space and o != s.begin()) - --o; - - s.erase(o, s.end()); -} - -void parser_imp::element(doctype::validator& valid) -{ - save_state in_content(m_in_content, false); - - match(XMLToken::STag); - std::string name = m_token; - match(XMLToken::Name); - - if (not valid.allow(name)) - not_valid("element '" + name + "' not expected at this position"); - - const doctype::element_ *dte = get_element(name); - - if (m_has_dtd and dte == nullptr and m_validating) - not_valid("Element '" + name + "' is not defined in DTD"); - - doctype::validator sub_valid(dte); - - std::list attrs; - - ns_state ns(this); - std::set seen; - - for (;;) - { - if (m_lookahead != XMLToken::Space) - break; - - s(true); - - if (m_lookahead != XMLToken::Name) - break; - - std::string attr_name = m_token; - match(XMLToken::Name); - - if (seen.count(attr_name) > 0) - not_well_formed("multiple values for attribute '" + attr_name + "'"); - seen.insert(attr_name); - - eq(); - - const doctype::attribute_ *dta = nullptr; - if (dte != nullptr) - dta = dte->get_attribute(attr_name); - if (dta == nullptr and not m_validating and attr_name == "xml:space") - dta = m_xmlSpaceAttr.get(); - - if (dta == nullptr and m_validating) - not_valid("undeclared attribute '" + attr_name + "'"); - - std::string attr_value = normalize_attribute_value(m_token, dta == nullptr or dta->get_type() == doctype::AttributeType::CDATA); - match(XMLToken::String); - - if (m_validating and - dta != nullptr and - dta->get_default_type() == doctype::AttributeDefault::Fixed and - attr_value != std::get<1>(dta->get_default())) - { - not_valid("invalid value specified for fixed attribute"); - } - - // had a crash suddenly here deep down in ba::starts_with... - if (attr_name == "xmlns" or attr_name.compare(0, 6, "xmlns:", 6) == 0) // namespace support - { - if (not ((m_version > 1.0f and attr_value.empty()) or is_valid_url(attr_value))) - not_well_formed("Not a valid namespace URI: " + attr_value); - - if (not (m_version > 1.0f and attr_value.empty()) and ns.is_known_uri(attr_value)) - not_well_formed("This uri is repeated: " + attr_value); - - if (attr_value == "http://www.w3.org/XML/1998/namespace" or attr_value == "http://www.w3.org/2000/xmlns/") - not_well_formed("The xml namespace is reserved"); - - if (attr_name.length() == 5) - { - ns.default_ns(attr_value); - m_parser.start_namespace_decl("", attr_value); - } - else if (attr_name.length() == 6) - not_well_formed("Invalid xmlns: "); - else - { - std::string prefix = attr_name.substr(6); - - if (iequals(prefix, "xml") or iequals(prefix, "xmlns")) - not_well_formed(prefix + " is a preserved prefix"); - - if (m_version > 1.0f and attr_value.empty()) - ns.unbind(prefix); - else - { - ns.bind(prefix, attr_value); - m_parser.start_namespace_decl(prefix, attr_value); - } - } - - // if (not attr_value.empty()) - // ns.m_known_uris.insert(attr_value); - } - else - { - bool id = (attr_name == "xml:id"); - - if (dta != nullptr) - { - std::string v(attr_value); - - if (not dta->validate_value(attr_value, m_general_entities)) - { - if (dta == m_xmlSpaceAttr.get()) - not_well_formed("invalid value ('" + attr_value + "') for attribute " + attr_name + ""); - else - not_valid("invalid value ('" + attr_value + "') for attribute " + attr_name + ""); - } - - if (m_validating and m_standalone and dta->is_external() and v != attr_value) - not_valid("attribute value modified as a result of an external defined attlist declaration, which is not valid in a standalone document"); - - if (dta->get_type() == doctype::AttributeType::ID) - { - id = true; - - if (m_validating_ns and attr_value.find(':') != std::string::npos) - not_valid("ID attribute value should not contain a colon"); - - if (m_ids.count(attr_value) > 0) - { - not_valid("attribute value ('" + attr_value + "') for attribute '" + attr_name + "' is not unique"); - } - - m_ids.insert(attr_value); - - if (m_unresolved_ids.count(attr_value) > 0) - m_unresolved_ids.erase(attr_value); - } - else if (dta->get_type() == doctype::AttributeType::IDREF) - { - if (attr_value.empty()) - not_valid("attribute value for attribute '" + attr_name + "' may not be empty"); - - if (not m_ids.count(attr_value)) - m_unresolved_ids.insert(attr_value); - } - else if (dta->get_type() == doctype::AttributeType::IDREFS) - { - if (attr_value.empty()) - not_valid("attribute value for attribute '" + attr_name + "' may not be empty"); - - std::string::size_type b = 0, e = attr_value.find(' '); - while (e != std::string::npos) - { - if (e - b > 0) - { - std::string id = attr_value.substr(b, e); - if (not m_ids.count(id)) - m_unresolved_ids.insert(id); - } - b = e + 1; - e = attr_value.find(' ', b); - } - - if (b != std::string::npos and b < attr_value.length()) - { - std::string id = attr_value.substr(b); - if (not m_ids.count(id)) - m_unresolved_ids.insert(id); - } - } - } - - detail::attr attr; - attr.m_name = attr_name; - attr.m_value = attr_value; - attr.m_id = id; - - if (m_ns != nullptr and dta == nullptr) - { - std::string::size_type d = attr_name.find(':'); - if (d != std::string::npos) - { - if (attr_name.find(':', d + 1) != std::string::npos) - not_well_formed("Multiple colons in attribute name"); - - auto prefix = attr_name.substr(0, d); - if (not iequals(prefix, "xml")) - { - std::string ns = m_ns->ns_for_prefix(prefix); - - if (ns.empty()) - not_well_formed("Unbound attribute prefix"); - - attr.m_ns = ns; - attr.m_name = attr_name.substr(d + 1); - } - } - } - - attrs.push_back(attr); - } - } - - if (dte == nullptr) - { - if (name[0] == ':') - not_well_formed("Element name should not start with colon"); - - auto cp = name.find(':'); - if (cp != std::string::npos) - { - auto prefix = name.substr(0, cp); - if (not ns.is_known_prefix(prefix)) - not_well_formed("Unknown prefix for element " + name); - } - } - else // add missing attributes - { - for (const doctype::attribute_ *dta : dte->get_attributes()) - { - std::string attr_name = dta->name(); - - std::list::iterator attr = find_if(attrs.begin(), attrs.end(), - [attr_name](auto& a) { return a.m_name == attr_name; }); - - doctype::AttributeDefault defType; - std::string defValue; - - std::tie(defType, defValue) = dta->get_default(); - - if (defType == doctype::AttributeDefault::Required) - { - if (attr == attrs.end()) - not_valid("missing #REQUIRED attribute '" + attr_name + "' for element '" + name + "'"); - } - else if (not defValue.empty() and attr == attrs.end()) - { - if (m_validating and m_standalone and dta->is_external()) - not_valid("default value for attribute defined in external declaration which is not allowed in a standalone document"); - - detail::attr attr; - attr.m_name = attr_name; - attr.m_value = normalize_attribute_value(defValue, dta->get_type() == doctype::AttributeType::CDATA); - attr.m_id = false; - - if (m_ns != nullptr) - { - std::string::size_type d = attr_name.find(':'); - if (d != std::string::npos) - { - std::string ns = m_ns->ns_for_prefix(attr_name.substr(0, d)); - - if (not ns.empty()) - { - attr.m_ns = ns; - attr.m_name = attr_name.substr(d + 1); - } - } - } - - attrs.push_back(attr); - } - } - } - - // now find out the namespace we're supposed to pass - std::string uri, raw(name); - - std::string::size_type c = name.find(':'); - if (c != std::string::npos and c > 0) - { - uri = ns.ns_for_prefix(name.substr(0, c)); - name.erase(0, c + 1); - } - else - uri = ns.default_ns(); - - // sort the attributes (why? disabled to allow similar output) - attrs.sort([](auto& a, auto& b) { return a.m_name < b.m_name; }); - - if (m_lookahead == XMLToken::Slash) - { - match(XMLToken::Slash); - m_parser.start_element(name, uri, attrs); - m_parser.end_element(name, uri); - } - else - { - m_parser.start_element(name, uri, attrs); - - m_in_content = true; - match(XMLToken::GreaterThan); - - if (m_lookahead != XMLToken::ETag) - content(sub_valid); - - m_in_content = false; - - match(XMLToken::ETag); - - if (m_token != raw) - not_well_formed("end tag does not match start tag"); - - match(XMLToken::Name); - - s(); - - m_parser.end_element(name, uri); - } - - in_content.reset(); - match(XMLToken::GreaterThan); - - if (m_validating and dte != nullptr and not sub_valid.done()) - not_valid("missing child elements for element '" + dte->name() + "'"); -} - -void parser_imp::content(doctype::validator& valid) -{ - if (valid.get_content_spec() == doctype::ContentSpecType::Empty and m_lookahead != XMLToken::ETag) - not_valid("Content is not allowed in an element declared to be EMPTY"); - - do - { - switch (m_lookahead) - { - case XMLToken::Content: - case XMLToken::Space: - if (valid.get_content_spec() == doctype::ContentSpecType::Empty) - not_valid("character data not allowed in EMPTY element"); - else if (valid.get_content_spec() == doctype::ContentSpecType::Children and m_lookahead == XMLToken::Content) - not_valid("character data '" + m_token + "' not allowed in element"); - m_parser.character_data(m_token); - match(m_lookahead); - break; - - case XMLToken::CharRef: - if (valid.get_content_spec() == doctype::ContentSpecType::Empty) - not_valid("data not allowed in EMPTY element"); - else if (valid.get_content_spec() == doctype::ContentSpecType::Children and is_space(m_token)) - not_valid("Element may not contain reference to space"); - m_parser.character_data(m_token); - match(m_lookahead); - break; - - case XMLToken::Reference: - { - if (std::find(m_entities_on_stack.begin(), m_entities_on_stack.end(), m_token) != m_entities_on_stack.end()) - not_well_formed("infinite recursion of entity references"); - - m_entities_on_stack.push_back(m_token); - - const doctype::entity& e = get_general_entity(m_token); - - if (e.is_externally_defined() and m_standalone) - not_well_formed("document marked as standalone but an external entity is referenced"); - - if (not e.is_parsed()) - not_well_formed("content has a general entity reference to an unparsed entity"); - - push_data_source(new entity_data_source(e.get_replacement(), m_source.top()->base()), false); - - m_lookahead = get_next_content(); - - save_state in_external_dtd(m_in_external_dtd, e.is_externally_defined()); - - // a children production may not contain references to spaces - if (m_lookahead == XMLToken::Space and valid.get_content_spec() == doctype::ContentSpecType::Children) - { - auto space = m_token; - match(m_lookahead); - - if (m_lookahead == XMLToken::Eof) - not_valid("Element may not contain reference to space"); - m_parser.character_data(space); - } - - if (m_lookahead != XMLToken::Eof) - content(valid); - - if (m_lookahead != XMLToken::Eof) - not_well_formed("entity reference should be a valid content production"); - - pop_data_source(); - - match(XMLToken::Reference); - - m_entities_on_stack.pop_back(); - break; - } - - case XMLToken::STag: - element(valid); - break; - - case XMLToken::PI: - pi(); - break; - - case XMLToken::Comment: - comment(); - break; - - case XMLToken::CDSect: - if (valid.get_content_spec() != doctype::ContentSpecType::Mixed and valid.get_content_spec() != doctype::ContentSpecType::Any) - not_valid("character data '" + m_token + "' not allowed in element"); - - m_parser.start_cdata_section(); - m_parser.character_data(m_token); - - if (is_space(m_token) and valid.get_content_spec() == doctype::ContentSpecType::Children) - not_valid("Element may not contain CDATA section containing only space"); - - m_parser.end_cdata_section(); - - match(XMLToken::CDSect); - break; - - default: - match(XMLToken::Content); // will fail and report error - } - } while (m_lookahead != XMLToken::ETag and m_lookahead != XMLToken::Eof); -} - -void parser_imp::comment() -{ - save_state in_content(m_in_content, false); - - // m_lookahead == XMLToken::Comment - // read characters until we reach --> - // check all characters in between for validity - - enum - { - state_Start, - state_FirstHyphenSeen, - state_SecondHyphenSeen, - state_CommentClosed - } state = state_Start; - - m_token.clear(); - - while (state != state_CommentClosed) - { - unicode ch = get_next_char(); - - if (ch == 0) - not_well_formed("runaway comment"); - if (not is_char(ch)) - not_well_formed("illegal character in content: '" + to_hex(ch) + '\''); - - switch (state) - { - case state_Start: - if (ch == '-') - state = state_FirstHyphenSeen; - break; - - case state_FirstHyphenSeen: - if (ch == '-') - state = state_SecondHyphenSeen; - else - state = state_Start; - break; - - case state_SecondHyphenSeen: - if (ch == '>') - state = state_CommentClosed; - else - not_well_formed("double hyphen found in comment"); - break; - - case state_CommentClosed: - assert(false); - } - } - - assert(m_token.length() >= 3); - m_token.erase(m_token.end() - 3, m_token.end()); - m_parser.comment(m_token); - - in_content.reset(); - match(XMLToken::Comment); -} - -void parser_imp::pi() -{ - save_state in_content(m_in_content, false); - - // m_lookahead == XMLToken::PI - // read characters until we reach --> - // check all characters in between for validity - - std::string pi_target = m_token.substr(2); - - if (pi_target.empty()) - not_well_formed("processing instruction target missing"); - - if (m_validating_ns and pi_target.find(':') != std::string::npos) - not_well_formed("processing instruction name should not contain a colon"); - - // we treat the xml processing instruction separately. - if (m_token.substr(2) == "xml") - not_well_formed("xml declaration are only valid as the start of the file"); - else if (iequals(pi_target, "xml")) - not_well_formed("') - state = state_PIClosed; - else if (ch != '?') - state = state_Data; - break; - - case state_PIClosed: - assert(false); - } - } - - m_token.erase(m_token.end() - 2, m_token.end()); - m_parser.processing_instruction(pi_target, m_token); - - in_content.reset(); - match(XMLToken::PI); -} - -// -------------------------------------------------------------------- - -parser::parser(std::istream& data) - : m_impl(new parser_imp(data, *this)), m_istream(nullptr) -{ -} - -parser::parser(const std::string& data) -{ - m_istream = new std::istringstream(data); - m_impl = new parser_imp(*m_istream, *this); -} - -parser::~parser() -{ - delete m_impl; - delete m_istream; -} - -void parser::parse(bool validate, bool validate_ns) -{ - m_impl->parse(validate, validate_ns); -} - -void parser::xml_decl(encoding_type encoding, bool standalone, float version) -{ - if (xml_decl_handler) - xml_decl_handler(encoding, standalone, version); -} - -void parser::start_element(const std::string& name, const std::string& uri, const std::list &atts) -{ - if (start_element_handler) - start_element_handler(name, uri, atts); -} - -void parser::end_element(const std::string& name, const std::string& uri) -{ - if (end_element_handler) - end_element_handler(name, uri); -} - -void parser::character_data(const std::string& data) -{ - if (character_data_handler) - character_data_handler(data); -} - -void parser::processing_instruction(const std::string& target, const std::string& data) -{ - if (processing_instruction_handler) - processing_instruction_handler(target, data); -} - -void parser::comment(const std::string& data) -{ - if (comment_handler) - comment_handler(data); -} - -void parser::start_cdata_section() -{ - if (start_cdata_section_handler) - start_cdata_section_handler(); -} - -void parser::end_cdata_section() -{ - if (end_cdata_section_handler) - end_cdata_section_handler(); -} - -void parser::start_namespace_decl(const std::string& prefix, const std::string& uri) -{ - if (start_namespace_decl_handler) - start_namespace_decl_handler(prefix, uri); -} - -void parser::end_namespace_decl(const std::string& prefix) -{ - if (end_namespace_decl_handler) - end_namespace_decl_handler(prefix); -} - -void parser::doctype_decl(const std::string& root, const std::string& publicId, const std::string& uri) -{ - if (doctype_decl_handler) - doctype_decl_handler(root, publicId, uri); -} - -void parser::notation_decl(const std::string& name, const std::string& systemId, const std::string& publicId) -{ - if (notation_decl_handler) - notation_decl_handler(name, systemId, publicId); -} - -std::istream *parser::external_entity_ref(const std::string& base, const std::string& pubid, const std::string& uri) -{ - std::istream *result = nullptr; - if (external_entity_ref_handler) - result = external_entity_ref_handler(base, pubid, uri); - return result; -} - -void parser::report_invalidation(const std::string& msg) -{ - if (report_invalidation_handler) - report_invalidation_handler(msg); -} - -} // namespace zeep::xml diff -Nru libzeep-5.0.1/lib-xml/src/xml-parser.cpp libzeep-5.0.2/lib-xml/src/xml-parser.cpp --- libzeep-5.0.1/lib-xml/src/xml-parser.cpp 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/lib-xml/src/xml-parser.cpp 2020-11-14 07:31:12.000000000 +0000 @@ -0,0 +1,4148 @@ +// Copyright Maarten L. Hekkelman, Radboud University 2008-2013. +// Copyright Maarten L. Hekkelman, 2014-2020 +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include + +#include + +#include +#include + +namespace ba = boost::algorithm; + +namespace zeep::xml +{ + +bool is_absolute_path(const std::string& s) +{ + bool result = false; + + if (not s.empty()) + { + if (s[0] == '/') + result = true; + else if (isalpha(s[0])) + { + std::string::const_iterator ch = s.begin() + 1; + while (ch != s.end() and isalpha(*ch)) + ++ch; + result = ch != s.end() and *ch == ':'; + } + } + + return result; +} + +// #define url_hexdigit "[[:digit:]a-fA-F]" +// #define url_unreserved "[-[:alnum:]._~]" +// #define url_pct_encoded "%" url_hexdigit "{2}" +// #define url_sub_delims "[!$&'()*+,;=]" +// #define url_userinfo "(?:((?:" url_unreserved "|" url_pct_encoded "|" url_sub_delims ")+)@)?" +// #define url_scheme "[[:alpha:]][[:alnum:]]*" +// #define url_host "(\\[(?:[[:digit:]a-fA-F:]+)\\]|(?:" url_unreserved "|" url_pct_encoded "|" url_sub_delims ")+)" +// #define url_port "(?::([[:digit:]]+))?" +// #define url_pchar url_unreserved "|" url_pct_encoded "|" url_sub_delims "|:|@" +// #define url_path "(?:/((?:" url_pchar "|\\*|\\?|/)*))?" +// #define url_fragment "(?:#(?:" url_pchar ")*)?" +// +// const std::regex kURL_rx( url_scheme "://" url_userinfo url_host url_port url_path url_fragment); + +// const std::regex kURL_rx(R"([[:alpha:]][[:alnum:]]*:.+)"); + +bool is_valid_url(const std::string& url) +{ + // no, really... + auto cp = url.find(':'); + + return cp > 1 and cp != std::string::npos and std::isalpha(url[0]); + // return std::regex_match(url, kURL_rx); +} + +// parsing XML is somewhat like macro processing, +// we can encounter entities that need to be expanded into replacement text +// and so we declare data_source objects that can be stacked. + +class data_source; + +// exception generated by data_source + +class source_exception : public zeep::exception +{ +public: + source_exception(const std::string& msg) : exception(msg), m_wmsg(msg) {} + ~source_exception() throw() {} + + std::string m_wmsg; +}; + +// A data source can have a base dir which is the directory the data came from. +// This information is needed when a relative uri is found in an external ID. + +class data_source +{ +public: + data_source(const data_source&) = delete; + data_source& operator=(const data_source&) = delete; + + data_source() + : m_base("."), m_encoding(encoding_type::UTF8), m_line_nr(1) + { + static int sNextID = 0; + m_id = sNextID++; + } + + virtual ~data_source() {} + + // data_source is a virtual base class. Derivatives need to declare the next function. + virtual unicode get_next_char() = 0; + + void base(const std::string& dir) { m_base = dir; } + const std::string& base() const { return m_base; } + + encoding_type encoding() const { return m_encoding; } + virtual void encoding(encoding_type enc) { m_encoding = enc; } + virtual bool has_bom() { return false; } + + void version(float v) { m_version = v; } + + int id() const { return m_id; } + + int line_nr() const { return m_line_nr; } + void line_nr(int l) { m_line_nr = l; } + +protected: + std::string m_base; + encoding_type m_encoding; + float m_version = 1.0f; + int m_id; // for nesting checks + int m_line_nr; // for reporting errors +}; + +// -------------------------------------------------------------------- +// An std::istream implementation of data_source. + +class istream_data_source : public data_source +{ +public: + istream_data_source(std::istream& data) + : m_data(&data), m_owns_data(false) + { + guess_encoding(); + } + + istream_data_source(std::istream *data) + : m_data(data) + { + guess_encoding(); + } + + ~istream_data_source() + { + if (m_owns_data) + delete m_data; + } + + virtual bool has_bom() { return m_has_bom; } + + virtual unicode get_next_char(); + virtual void encoding(encoding_type enc); + +private: + void guess_encoding(); + void parse_text_decl(); + unicode next_utf8_char(); + unicode next_utf16le_char(); + unicode next_utf16be_char(); + unicode next_iso88591_char(); + unicode next_ascii_char(); + + unsigned char next_byte() + { + int result = m_data->rdbuf()->sbumpc(); + + if (result == std::streambuf::traits_type::eof()) + result = 0; + + return static_cast(result); + } + + std::istream* m_data; + bool m_owns_data = true; + unicode m_char_buffer = 0; // used in detecting \r\n algorithm + + typedef unicode (istream_data_source::*next_func)(void); + + next_func m_next; + bool m_has_bom = false; +}; + +void istream_data_source::guess_encoding() +{ + // see if there is a BOM + // if there isn't, we assume the data is UTF-8 + + int ch = m_data->rdbuf()->sgetc(); + if (ch != std::streambuf::traits_type::eof()) + { + char ch1 = static_cast(ch); + + if (ch1 == char(0xfe)) + { + char ch2 = m_data->rdbuf()->snextc(); + + if (ch2 == char(0xff)) + { + m_data->rdbuf()->snextc(); + m_encoding = encoding_type::UTF16BE; + m_has_bom = true; + } + else + m_data->rdbuf()->sungetc(); + } + else if (ch1 == char(0xff)) + { + char ch2 = m_data->rdbuf()->snextc(); + + if (ch2 == char(0xfe)) + { + m_data->rdbuf()->snextc(); + m_encoding = encoding_type::UTF16LE; + m_has_bom = true; + } + else + m_data->rdbuf()->sungetc(); + } + else if (ch1 == char(0xef)) + { + char ch2 = m_data->rdbuf()->snextc(); + char ch3 = m_data->rdbuf()->snextc(); + + if (ch2 == char(0xbb) and ch3 == char(0xbf)) + { + m_data->rdbuf()->snextc(); + m_encoding = encoding_type::UTF8; + m_has_bom = true; + } + else + { + m_data->rdbuf()->sungetc(); + m_data->rdbuf()->sputbackc(ch1); + } + } + } + + encoding(m_encoding); +} + +void istream_data_source::encoding(encoding_type enc) +{ + if (enc != m_encoding) + { + if (is_single_byte_encoding(enc) and is_single_byte_encoding(m_encoding)) + m_encoding = enc; + else + throw invalid_exception("Invalid encoding specified, incompatible with actual encoding"); + } + + data_source::encoding(enc); + + switch (m_encoding) + { + case encoding_type::UTF8: + m_next = &istream_data_source::next_utf8_char; + break; + case encoding_type::UTF16LE: + m_next = &istream_data_source::next_utf16le_char; + break; + case encoding_type::UTF16BE: + m_next = &istream_data_source::next_utf16be_char; + break; + case encoding_type::ISO88591: + m_next = &istream_data_source::next_iso88591_char; + break; + case encoding_type::ASCII: + m_next = &istream_data_source::next_ascii_char; + break; + default: break; + } +} + +unicode istream_data_source::next_utf8_char() +{ + unicode result = next_byte(); + + if (result & 0x080) + { + unsigned char ch[3]; + + if ((result & 0x0E0) == 0x0C0) + { + ch[0] = next_byte(); + if ((ch[0] & 0x0c0) != 0x080) + throw source_exception("Invalid utf-8"); + result = ((result & 0x01F) << 6) | (ch[0] & 0x03F); + } + else if ((result & 0x0F0) == 0x0E0) + { + ch[0] = next_byte(); + ch[1] = next_byte(); + if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080) + throw source_exception("Invalid utf-8"); + result = ((result & 0x00F) << 12) | ((ch[0] & 0x03F) << 6) | (ch[1] & 0x03F); + } + else if ((result & 0x0F8) == 0x0F0) + { + ch[0] = next_byte(); + ch[1] = next_byte(); + ch[2] = next_byte(); + if ((ch[0] & 0x0c0) != 0x080 or (ch[1] & 0x0c0) != 0x080 or (ch[2] & 0x0c0) != 0x080) + throw source_exception("Invalid utf-8"); + result = ((result & 0x007) << 18) | ((ch[0] & 0x03F) << 12) | ((ch[1] & 0x03F) << 6) | (ch[2] & 0x03F); + + if (result > 0x10ffff) + throw source_exception("invalid utf-8 character (out of range)"); + } + } + + return result; +} + +unicode istream_data_source::next_utf16le_char() +{ + unsigned char c1 = next_byte(), c2 = next_byte(); + + unicode ch = (static_cast(c2) << 8) | c1; + + if (ch >= 0x080) + { + // surrogate support + if (ch >= 0x0D800 and ch <= 0x0DBFF) + { + unicode uc2 = next_utf16le_char(); + if (uc2 >= 0x0DC00 and uc2 <= 0x0DFFF) + ch = (ch - 0x0D800) * 0x400 + (uc2 - 0x0DC00) + 0x010000; + else + throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: leading surrogate character without trailing surrogate character"); + } + else if (ch >= 0x0DC00 and ch <= 0x0DFFF) + throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: trailing surrogate character without a leading surrogate"); + } + + return ch; +} + +unicode istream_data_source::next_utf16be_char() +{ + unsigned char c1 = next_byte(), c2 = next_byte(); + + unicode ch = (static_cast(c1) << 8) | c2; + + if (ch >= 0x080) + { + // surrogate support + if (ch >= 0x0D800 and ch <= 0x0DBFF) + { + unicode uc2 = next_utf16be_char(); + if (uc2 >= 0x0DC00 and uc2 <= 0x0DFFF) + ch = (ch - 0x0D800) * 0x400 + (uc2 - 0x0DC00) + 0x010000; + else + throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: leading surrogate character without trailing surrogate character"); + } + else if (ch >= 0x0DC00 and ch <= 0x0DFFF) + throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: trailing surrogate character without a leading surrogate"); + } + + return ch; +} + +unicode istream_data_source::next_iso88591_char() +{ + return (unicode)next_byte(); +} + +unicode istream_data_source::next_ascii_char() +{ + unicode c = next_byte(); + + if (c > 127) + throw not_wf_exception("Invalid ascii value"); + + return c; +} + +unicode istream_data_source::get_next_char() +{ + unicode ch = m_char_buffer; + + if (ch == 0) + ch = (this->*m_next)(); + else + m_char_buffer = 0; + + if (ch == 0x0ffff or ch == 0x0fffe) + throw not_wf_exception("Document (line: " + std::to_string(m_line_nr) + " not well-formed: character " + to_hex(ch) + " is not allowed"); + + if (ch == '\r') + { + ch = (this->*m_next)(); + if (ch != '\n' and (m_version == 1.0 or ch != 0x85 or m_encoding == encoding_type::ASCII)) + m_char_buffer = ch; + ch = '\n'; + } + + if (m_encoding != encoding_type::ASCII) + { + if (m_version > 1.0 and ch == 0x85) + ch = '\n'; + else if (m_encoding != encoding_type::ISO88591 and m_version > 1.0 and ch == 0x2028) + ch = '\n'; + } + + if (ch == '\n') + ++m_line_nr; + + return ch; +} + +// -------------------------------------------------------------------- + +class string_data_source : public data_source +{ +public: + string_data_source(const std::string& data) + : m_data(data), m_ptr(m_data.begin()) + { + } + + unicode get_next_char() + { + unicode result = 0; + + if (m_ptr != m_data.end()) + std::tie(result, m_ptr) = get_first_char(m_ptr); + + if (result == '\n') + ++m_line_nr; + + return result; + } + +private: + + std::string m_data; + std::string::iterator m_ptr; +}; + +// -------------------------------------------------------------------- + +class entity_data_source : public string_data_source +{ +public: + entity_data_source(const std::string& text, const std::string& entity_path) + : string_data_source(text) + { + base(entity_path); + } +}; + +// -------------------------------------------------------------------- + +class parameter_entity_data_source : public string_data_source +{ +public: + parameter_entity_data_source(const std::string& data, const std::string& base_dir) + : string_data_source(" " + data + " ") + { + base(base_dir); + } +}; + +// -------------------------------------------------------------------- + +class valid_nesting_validator +{ +public: + valid_nesting_validator(data_source& source) + : m_id(source.id()) {} + + void check(data_source& source) + { + if (source.id() != m_id) + throw invalid_exception("proper nesting validation error"); + } + +private: + int m_id; +}; + +// -------------------------------------------------------------------- + +struct parser_imp +{ + parser_imp(std::istream& data, parser& parser); + + ~parser_imp(); + + // Here comes the parser part + void parse(bool validate, bool validate_ns); + + // the productions. Some are inlined below for obvious reasons. + // names of the productions try to follow those in the TR http://www.w3.org/TR/xml + void prolog(); + void xml_decl(); + void text_decl(); + + void s(bool at_least_one = false); + void eq(); + void misc(); + void element(doctype::validator& valid); + void content(doctype::validator& valid); + + void comment(); + void pi(); + + void pereference(); + + void doctypedecl(); + data_source* get_data_source(const std::string& pubid, std::string uri); + std::tuple read_external_id(); + void intsubset(); + void extsubset(); + void declsep(); + void conditionalsect(); + void ignoresectcontents(); + void markup_decl(); + void element_decl(); + void contentspec(doctype::element_& element); + doctype::content_spec_ptr cp(); + void attlist_decl(); + void notation_decl(); + void entity_decl(); + void parameter_entity_decl(); + void general_entity_decl(); + void entity_value(); + + // at several locations we need to parse out entity references from strings: + void parse_parameter_entity_declaration(std::string& s); + void parse_general_entity_declaration(std::string& s); + + // same goes for attribute values + std::string normalize_attribute_value(const std::string& s, bool isCDATA) + { + push_data_source(new string_data_source(s), false); + + std::string result = normalize_attribute_value(); + + if (m_standalone and result != s) + not_valid("Document cannot be standalone since an attribute was modified"); + + if (not isCDATA) + collapse_spaces(result); + + return result; + } + + std::string normalize_attribute_value(); + + void collapse_spaces(std::string& s); + + // The scanner is next. We recognize the following tokens: + enum XMLToken + { + Undef, + + Eq = '=', + QuestionMark = '?', + GreaterThan = '>', + OpenBracket = '[', + CloseBracket = ']', + OpenParenthesis = '(', + CloseParenthesis = ')', + Percent = '%', + Plus = '+', + Pipe = '|', + Asterisk = '*', + Slash = '/', + Comma = ',', + + Eof = 256, + Other, // + + // these are tokens for the markup + + XMLDecl, // + Content, // anything else up to the next element start + }; + + // for debugging and error reporting we have the following describing routine + constexpr const char* describe_token(XMLToken token) + { + switch (token) + { + case XMLToken::Undef: return "undefined"; + case XMLToken::Eq: return "="; + case XMLToken::QuestionMark: return "?"; + case XMLToken::GreaterThan: return ">"; + case XMLToken::OpenBracket: return "["; + case XMLToken::CloseBracket: return "]"; + case XMLToken::OpenParenthesis: return "("; + case XMLToken::CloseParenthesis:return ")"; + case XMLToken::Percent: return "%"; + case XMLToken::Plus: return "+"; + case XMLToken::Pipe: return "|"; + case XMLToken::Asterisk: return "*"; + case XMLToken::Slash: return "/"; + case XMLToken::Comma: return ","; + case XMLToken::Eof: return "end of file"; + case XMLToken::Other: return "an invalid character"; + case XMLToken::XMLDecl: return "'() const { return m_source; } + data_source& operator*() const { return *m_source; } + + bool inserted() const { return m_inserted; } + + parser_imp& m_impl; + data_source* m_source; + std::array m_buffer; + int m_buffer_offset; + XMLToken m_lookahead; + std::string m_token; + bool m_inserted; + }; + + void push_data_source(data_source* source, bool insert) + { + source->version(m_version); + m_source.emplace(this, source, insert); + } + + void pop_data_source() + { + assert(not m_source.empty()); + m_source.pop(); + } + + // And during parsing we keep track of the namespaces we encounter. + class ns_state + { + public: + ns_state(parser_imp *imp) + : m_parser_imp(imp), m_next(imp->m_ns) + { + m_parser_imp->m_ns = this; + } + + ~ns_state() + { + m_parser_imp->m_ns = m_next; + } + + std::string default_ns() + { + std::string result = m_default_ns; + if (result.empty() and m_next != nullptr) + result = m_next->default_ns(); + return result; + } + + void default_ns(const std::string& ns) + { + m_default_ns = ns; + } + + std::string ns_for_prefix(const std::string& prefix) + { + std::string result; + + if (m_unbound.count(prefix) == 0) + { + auto np = m_known.find(prefix); + if (np != m_known.end()) + result = np->second; + else if (m_next != nullptr) + result = m_next->ns_for_prefix(prefix); + } + + return result; + } + + void bind(const std::string& prefix, const std::string& uri) + { + m_known[prefix] = uri; + } + + void unbind(const std::string& prefix) + { + m_unbound.insert(prefix); + } + + bool is_known_prefix(const std::string& prefix) + { + bool result = false; + + if (not m_unbound.count(prefix)) + { + if (m_known.count(prefix)) + result = true; + else if (m_next != nullptr) + result = m_next->is_known_prefix(prefix); + } + + return result; + } + + bool is_known_uri(const std::string& uri) + { + return find_if(m_known.begin(), m_known.end(), [uri] (auto k) { return k.second == uri; }) != m_known.end() or + (m_next != nullptr and m_next->is_known_uri(uri)); + } + + private: + + parser_imp *m_parser_imp; + std::string m_default_ns; + ns_state *m_next; + + std::map m_known; + std::set m_unbound; + }; + + bool is_char(unicode uc) + { + return + m_version == 1.0 ? + is_valid_xml_1_0_char(uc) : + is_valid_xml_1_1_char(uc); + } + + bool is_space(unicode uc) + { + return uc == ' ' or uc == '\t' or uc == '\n' or uc == '\r'; + } + + bool is_space(const std::string& s) + { + return not s.empty() and s.find_first_not_of(" \t\r\n") == std::string::npos; + } + + bool is_referrable_char(unicode charref) + { + return + m_version == 1.0 ? + charref == 0x09 or + charref == 0x0A or + charref == 0x0D or + (charref > 0x01F and charref < 0x0D800) or + (charref > 0x0DFFF and charref < 0x0FFFE) or + (charref > 0x0FFFF and charref < 0x00110000) : + + // 1.1 + (charref > 0x0 and charref < 0x0D800) or + (charref > 0x0DFFF and charref < 0x0FFFE) or + (charref > 0x0FFFF and charref < 0x00110000) + ; + } + + parser& m_parser; + bool m_validating; + bool m_validating_ns; + bool m_has_dtd; + XMLToken m_lookahead; + std::string m_token; + + std::stack m_source; + + std::array m_buffer; + std::array::iterator m_buffer_ptr = m_buffer.begin(); + + float m_version = 1.0f; + encoding_type m_encoding = encoding_type::UTF8; + bool m_standalone; + + // parser state + bool m_external_subset = false; + bool m_internal_subset = false; + bool m_allow_peref = false; + bool m_in_declsep = false; + bool m_in_external_dtd = false; + bool m_in_content = false; + + std::vector m_entities_on_stack; + ns_state* m_ns; + + std::string m_root_element; + doctype::entity_list m_parameter_entities; + doctype::entity_list m_general_entities; + doctype::element_list m_doctype; + + std::set m_notations; + std::set m_ids; // attributes of type ID should be unique + std::set m_unresolved_ids; // keep track of IDREFS that were not found yet + + std::unique_ptr m_xmlSpaceAttr; +}; + +// -------------------------------------------------------------------- +// some inlines + +inline void parser_imp::s(bool at_least_one) +{ + if (at_least_one) + match(XMLToken::Space); + + while (m_lookahead == XMLToken::Space) + match(XMLToken::Space); +} + +inline void parser_imp::eq() +{ + s(); + match(XMLToken::Eq); + s(); +} + +// -------------------------------------------------------------------- + +parser_imp::parser_imp(std::istream& data, parser& parser) + : m_parser(parser), m_validating(true), m_has_dtd(false), m_lookahead(XMLToken::Eof) + , m_encoding(encoding_type::ASCII), m_standalone(false), m_ns(nullptr) +{ + push_data_source(new istream_data_source(data), false); + + m_encoding = m_source.top()->encoding(); + + // these entities are always recognized: + m_general_entities.push_back(new doctype::general_entity("lt", "<")); + m_general_entities.push_back(new doctype::general_entity("gt", ">")); + m_general_entities.push_back(new doctype::general_entity("amp", "&")); + m_general_entities.push_back(new doctype::general_entity("apos", "'")); + m_general_entities.push_back(new doctype::general_entity("quot", """)); + + m_xmlSpaceAttr.reset(new doctype::attribute_("xml:space", doctype::AttributeType::Enumerated, { "preserve", "default" })); +} + +parser_imp::~parser_imp() +{ + while (not m_source.empty()) + m_source.pop(); + + // there may be parameter_entity_data_source's left in the stack + // as a result of a validation error/exception + + for (doctype::entity *e : m_parameter_entities) + delete e; + + for (doctype::entity *e : m_general_entities) + delete e; + + for (doctype::element_ *e : m_doctype) + delete e; +} + +const doctype::entity& parser_imp::get_general_entity(const std::string& name) const +{ + auto e = std::find_if(m_general_entities.begin(), m_general_entities.end(), + [name](auto e) { return e->name() == name; }); + + if (e == m_general_entities.end()) + not_well_formed("undefined entity reference '" + name + "'"); + + if ((*e)->is_external() and m_standalone) + not_valid("Document cannot be standalone since entity " + name + " is defined externally"); + + return **e; +} + +const doctype::entity& parser_imp::get_parameter_entity(const std::string& name) const +{ + auto e = find_if(m_parameter_entities.begin(), m_parameter_entities.end(), + [name](auto e) { return e->name() == name; }); + + if (e == m_parameter_entities.end()) + not_well_formed("Undefined parameter entity '" + m_token + '\''); + + return **e; +} + +const doctype::element_ *parser_imp::get_element(const std::string& name) const +{ + const doctype::element_ *result = nullptr; + + auto e = find_if(m_doctype.begin(), m_doctype.end(), + [name](auto e) { return e->name() == name; }); + + if (e != m_doctype.end()) + result = *e; + + return result; +} + +unicode parser_imp::get_next_char() +{ + unicode result = 0; + + if (m_buffer_ptr > m_buffer.begin()) // if buffer is not empty we already did all the validity checks + result = *--m_buffer_ptr; + + if (result == 0) + { + while (not m_source.empty()) + { + try + { + result = m_source.top()->get_next_char(); + } + catch (source_exception& e) + { + not_well_formed(e.m_wmsg); + } + + if (result == 0 and m_source.top().inserted()) + { + m_source.pop(); + continue; + } + + break; + } + } + + append(m_token, result); + + return result; +} + +void parser_imp::retract() +{ + assert(not m_token.empty()); + + assert(m_buffer_ptr < m_buffer.end()); + *m_buffer_ptr++ = pop_last_char(m_token); +} + +void parser_imp::match(XMLToken token) +{ + if (m_lookahead != token) + { + std::string expected = describe_token(token); + std::string found = describe_token(m_lookahead); + + not_well_formed( + "Error parsing XML, expected '" + expected + "' but found '" + found + "' ('" + m_token + "')"); + } + + if (m_in_content) + m_lookahead = get_next_content(); + else + { + m_lookahead = get_next_token(); + + if (m_lookahead == XMLToken::PEReference and not m_in_declsep) + { + if (m_allow_peref) + pereference(); + else + not_well_formed("Invalid entity reference at this location"); + } + } +} + +void parser_imp::not_well_formed(const std::string& msg) const +{ + std::stringstream s; + if (m_source.empty()) + s << "Document not well-formed: " << msg; + else + s << "Document (line: " << m_source.top()->line_nr() << ") not well-formed: " << msg; + throw not_wf_exception(s.str()); +} + +void parser_imp::not_valid(const std::string& msg) const +{ + if (m_validating) + { + std::stringstream s; + + if (m_source.empty()) + s << "Document not valid: " << msg; + else + s << "Document (line: " << m_source.top()->line_nr() << ") not valid: " << msg; + + throw invalid_exception(s.str()); + } + else + m_parser.report_invalidation(msg); +} + +/* + get_next_token is a hand optimised scanner for tokens in the input stream. +*/ + +parser_imp::XMLToken parser_imp::get_next_token() +{ + enum State + { + state_Start = 0, + state_WhiteSpace = 10, + state_Tag = 20, + state_String = 30, + state_PERef = 40, + state_HashName = 49, + state_Name = 50, + state_CommentOrDoctype = 60, + state_Comment = 70, + state_DocTypeDecl = 80, + state_PI = 90, + }; + + XMLToken token = XMLToken::Undef; + unicode quote_char = 0; + int state = state_Start; + bool might_be_name = false; + + m_token.clear(); + + while (token == XMLToken::Undef) + { + unicode uc = get_next_char(); + + switch (state) + { + // start scanning. + case state_Start: + switch (uc) + { + case 0: + token = XMLToken::Eof; + break; + + case ' ': + case '\t': + case '\n': + state = state_WhiteSpace; + break; + + case '<': + state = state_Tag; + break; + + case '\'': + case '"': + { + state = state_String; + quote_char = uc; + break; + } + + case '%': + state = state_PERef; + break; + + case '#': + state = state_HashName; + break; + + case '=': token = XMLToken::Eq; break; + case '?': token = XMLToken::QuestionMark; break; + case '>': token = XMLToken::GreaterThan; break; + case '[': token = XMLToken::OpenBracket; break; + case ']': token = XMLToken::CloseBracket; break; + case '(': token = XMLToken::OpenParenthesis; break; + case ')': token = XMLToken::CloseParenthesis; break; + // case '%': token = XMLToken::Percent; break; + case '+': token = XMLToken::Plus; break; + case '|': token = XMLToken::Pipe; break; + case '*': token = XMLToken::Asterisk; break; + case '/': token = XMLToken::Slash; break; + case ',': token = XMLToken::Comma; break; + + default: + if (is_name_start_char(uc)) + { + might_be_name = true; + state = state_Name; + } + else if (is_name_char(uc)) + state = state_Name; + else if (is_char(uc)) + token = XMLToken::Other; + else + not_well_formed("Unexpected character: " + ((uc < 128 and std::isprint(uc)) ? std::string(1, uc) : to_hex(uc)) ); + + break; + } + break; + + // collect all whitespace + case state_WhiteSpace: + if (uc != ' ' and uc != '\t' and uc != '\n') + { + retract(); + token = XMLToken::Space; + } + break; + + // We scanned a < character, decide what to do next. + case state_Tag: + if (uc == '!') // comment or doctype thing + state = state_CommentOrDoctype; + else if (uc == '/') // end tag + token = XMLToken::ETag; + else if (uc == '?') // processing instruction + state = state_PI; + else // anything else + { + retract(); + token = XMLToken::STag; + } + break; + + // So we had + case state_Comment: + if (uc == '-') + token = XMLToken::Comment; + else + not_well_formed("Invalid formatted comment"); + break; + + // scan for processing instructions + case state_PI: + if (not is_name_char(uc)) + { + retract(); + + // we treat the xml processing instruction separately. + if (m_token.substr(2) == "xml") + token = XMLToken::XMLDecl; + else if (iequals(m_token.substr(2), "xml")) + not_well_formed(" in text + break; + + case ' ': + case '\t': + case '\n': + case '\r': + state = state_WhiteSpace; + break; + + default: + if (is_char(uc)) + state = state_Content; // anything else + else + not_well_formed("Unexpected character in content: " + (std::isprint(uc) ? std::string(1, uc) : to_hex(uc)) ); + break; + } + break; + + // collect all whitespace + case state_WhiteSpace: + if (not is_space(uc)) + { + retract(); + token = XMLToken::Space; + } + break; + + // content. Only stop collecting character when uc is special + case state_Content: + if (uc == ']') + state = state_Illegal; + else if (uc == 0 or uc == '<' or uc == '&') + { + retract(); + token = XMLToken::Content; + } + else if (not is_referrable_char(uc)) + not_well_formed("Illegal character in content text"); + break; + + // beginning of a tag? + case state_Tag: + if (uc == '/') + token = XMLToken::ETag; + else if (uc == '?') // processing instruction + state = state_PI; + else if (uc == '!') // comment or CDATA + state = state_CommentOrCDATA; + else + { + retract(); + token = XMLToken::STag; + } + break; + + // processing instructions + case state_PI: + if (not is_name_char(uc)) + { + retract(); + token = XMLToken::PI; + } + break; + + // comment or CDATA + case state_CommentOrCDATA: + if (uc == '-') // comment + state = state_Comment; + else if (uc == '[') + state = state_CDATA; // CDATA + else + not_well_formed("invalid content"); + break; + + case state_Comment: + if (uc == '-') + token = XMLToken::Comment; + else + not_well_formed("invalid content"); + break; + + // CDATA (we parsed ') + { + token = XMLToken::CDSect; + m_token = m_token.substr(9, m_token.length() - 12); + } + else if (uc == 0) + not_well_formed("runaway cdata section"); + else if (uc != ']') + state = state_CDATA + 2; + break; + + // reference, either a character reference or a general entity reference + case state_Reference: + if (uc == '#') + state = state_Reference + 2; + else if (is_name_start_char(uc)) + state = state_Reference + 1; + else + not_well_formed("stray ampersand found in content"); + break; + + case state_Reference + 1: + if (not is_name_char(uc)) + { + if (uc != ';') + not_well_formed("invalid entity found in content, missing semicolon?"); + token = XMLToken::Reference; + m_token = m_token.substr(1, m_token.length() - 2); + } + break; + + case state_Reference + 2: + if (uc == 'x') + state = state_Reference + 4; + else if (uc >= '0' and uc <= '9') + { + charref = uc - '0'; + state += 1; + } + else + not_well_formed("invalid character reference"); + break; + + case state_Reference + 3: + if (uc >= '0' and uc <= '9') + charref = charref * 10 + (uc - '0'); + else if (uc == ';') + { + if (not is_referrable_char(charref)) + not_well_formed("Illegal character in content text"); + m_token.clear(); + append(m_token, charref); + token = XMLToken::CharRef; + } + else + not_well_formed("invalid character reference"); + break; + + case state_Reference + 4: + if (uc >= 'a' and uc <= 'f') + { + charref = uc - 'a' + 10; + state += 1; + } + else if (uc >= 'A' and uc <= 'F') + { + charref = uc - 'A' + 10; + state += 1; + } + else if (uc >= '0' and uc <= '9') + { + charref = uc - '0'; + state += 1; + } + else + not_well_formed("invalid character reference"); + break; + + case state_Reference + 5: + if (uc >= 'a' and uc <= 'f') + charref = (charref << 4) + (uc - 'a' + 10); + else if (uc >= 'A' and uc <= 'F') + charref = (charref << 4) + (uc - 'A' + 10); + else if (uc >= '0' and uc <= '9') + charref = (charref << 4) + (uc - '0'); + else if (uc == ';') + { + if (not is_referrable_char(charref)) + not_well_formed("Illegal character in content text"); + m_token.clear(); + append(m_token, charref); + token = XMLToken::CharRef; + } + else + not_well_formed("invalid character reference"); + break; + + // ]]> is illegal + case state_Illegal: + if (uc == ']') + state += 1; + else + { + retract(); + state = state_Content; + } + break; + + case state_Illegal + 1: + if (uc == '>') + not_well_formed("the sequence ']]>' is illegal in content text"); + else if (uc != ']') + { + retract(); + retract(); + state = state_Content; + } + break; + + default: + assert(false); + not_well_formed("state reached that should not be reachable"); + } + } + + //#if DEBUG + // if (VERBOSE) + // std::cout << "content: " << describe_token(token) << " (" << m_token << ')' << std::endl; + //#endif + + return token; +} + +float parser_imp::parse_version() +{ + float result = -1; + + if (m_token.length() >= 3) + { + auto i = m_token.begin(); + if (*i == '1' and *(i + 1) == '.') + { + result = 1.0f; + float dec = 10; + + for (i += 2; i != m_token.end(); ++i) + { + if (*i < '0' or *i > '9') + { + result = -1; + break; + } + + result += (*i - '0') / dec; + dec += 10; + } + } + } + + if (result < 1.0 or result >= 2.0) + not_well_formed("Invalid version specified: '" + m_token + "'"); + + return result; +} + +void parser_imp::parse(bool validate, bool validate_ns) +{ + m_validating = validate; + m_validating_ns = validate_ns; + + m_lookahead = get_next_token(); + + prolog(); + + const doctype::element_ *e = get_element(m_root_element); + + if (m_has_dtd and e == nullptr and m_validating) + not_valid("Element '" + m_root_element + "' is not defined in DTD"); + + if (e) + { + doctype::content_spec_element allowed(m_root_element); + doctype::validator valid(&allowed); + + element(valid); + } + else + { + doctype::content_spec_any allowed; + doctype::validator valid(&allowed); + + element(valid); + } + + misc(); + + if (m_lookahead != XMLToken::Eof) + not_well_formed("garbage at end of file"); + + if (not m_unresolved_ids.empty()) + { + not_valid("document contains references to the following undefined ID's: '" + ba::join(m_unresolved_ids, ", ") + "'"); + } +} + +void parser_imp::prolog() +{ + xml_decl(); + + misc(); + + if (m_lookahead == XMLToken::DocType) + { + doctypedecl(); + misc(); + } + else if (m_validating) + not_valid("document type declaration is missing"); +} + +void parser_imp::xml_decl() +{ + if (m_lookahead == XMLToken::XMLDecl) + { + encoding_type encoding = m_encoding; + + if (m_encoding == encoding_type::UTF8) + m_source.top()->encoding(encoding_type::ASCII); + + match(XMLToken::XMLDecl); + + s(true); + if (m_token != "version") + not_well_formed("expected a version attribute in XML declaration"); + match(XMLToken::Name); + eq(); + + auto version = parse_version(); + + m_version = version; + + if (m_version >= 2.0f or m_version < 1.0f) + not_well_formed("This library only supports XML version 1.0 or 1.1"); + + m_source.top()->version(version); + match(XMLToken::String); + + if (m_lookahead == XMLToken::Space) + { + s(true); + + if (m_token == "encoding") + { + match(XMLToken::Name); + eq(); + ba::to_upper(m_token); + if (m_token == "US-ASCII") + encoding = encoding_type::ASCII; + else if (m_token == "ISO-8859-1") + encoding = encoding_type::ISO88591; + else if (m_token == "UTF-8") + encoding = encoding_type::UTF8; + else if (m_token == "UTF-16") + { + if (m_source.top()->encoding() != encoding_type::UTF16LE and m_source.top()->encoding() != encoding_type::UTF16BE) + not_well_formed("Inconsistent encoding attribute in XML declaration"); + encoding = m_source.top()->encoding(); + } + else + not_well_formed("Unsupported encoding value '" + m_token + "'"); + match(XMLToken::String); + + s(); + } + + if (m_token == "standalone") + { + match(XMLToken::Name); + eq(); + if (m_token != "yes" and m_token != "no") + not_well_formed("Invalid XML declaration, standalone value should be either yes or no"); + m_standalone = (m_token == "yes"); + match(XMLToken::String); + s(); + } + } + + m_encoding = encoding; + m_source.top()->encoding(encoding); + + match(XMLToken::QuestionMark); + match(XMLToken::GreaterThan); + + m_parser.xml_decl(m_encoding, m_standalone, m_version); + } + else + m_encoding = m_source.top()->encoding(); +} + +void parser_imp::text_decl() +{ + if (m_lookahead == XMLToken::XMLDecl) + { + encoding_type encoding = m_source.top()->encoding(); + if (encoding == encoding_type::UTF8 and not m_source.top()->has_bom()) + m_source.top()->encoding(encoding_type::ISO88591); + + match(XMLToken::XMLDecl); + + s(true); + + if (m_token == "version") + { + match(XMLToken::Name); + eq(); + + auto version = parse_version(); + if (version > m_version) + not_well_formed("Version mismatch between document and external entity"); + + match(XMLToken::String); + s(m_version == 1.0); + } + + if (m_token != "encoding") + { + if (m_version == 1.0) + not_well_formed("encoding attribute is mandatory in text declaration"); + } + else + { + match(XMLToken::Name); + eq(); + match(XMLToken::String); + s(); + } + + m_source.top()->encoding(encoding); + + match(XMLToken::QuestionMark); + match(XMLToken::GreaterThan); + } +} + +void parser_imp::misc() +{ + for (;;) + { + switch (m_lookahead) + { + case XMLToken::Space: + s(); + continue; + + case XMLToken::Comment: + comment(); + continue; + + case XMLToken::PI: + pi(); + continue; + default:; + } + + break; + } +} + +void parser_imp::doctypedecl() +{ + match(XMLToken::DocType); + + m_has_dtd = true; + + s(true); + + auto name = m_token; + match(XMLToken::Name); + + m_root_element = name; + + std::unique_ptr dtd; + + if (m_lookahead == XMLToken::Space) + { + s(true); + + if (m_lookahead == XMLToken::Name) + { + std::string pubid, uri; + + if (m_token == "SYSTEM") + { + match(XMLToken::Name); + s(true); + + uri = m_token; + + if (not is_valid_system_literal(uri)) + not_well_formed("invalid system literal"); + } + else if (m_token == "PUBLIC") + { + match(XMLToken::Name); + s(true); + + pubid = m_token; + match(XMLToken::String); + + // validate the public ID + if (not is_valid_public_id(pubid)) + not_well_formed("Invalid public ID"); + + s(true); + uri = m_token; + } + else + not_well_formed("Expected external id starting with either SYSTEM or PUBLIC"); + + match(XMLToken::String); + dtd.reset(get_data_source(pubid, uri)); + + if (m_validating and not dtd) + not_valid("Could not load DTD " + uri); + + m_parser.doctype_decl(m_root_element, pubid, uri); + } + + s(); + } + + if (m_lookahead == XMLToken::OpenBracket) + { + match(XMLToken::OpenBracket); + intsubset(); + match(XMLToken::CloseBracket); + + s(); + } + + // internal subset takes precedence over external subset, so + // if the external subset is defined, include it here. + if (dtd.get() != nullptr) + { + push_data_source(dtd.release(), false); + + m_external_subset = true; + m_in_external_dtd = true; + + m_lookahead = get_next_token(); + + text_decl(); + + extsubset(); + + match(XMLToken::Eof); + + pop_data_source(); + m_in_external_dtd = false; + } + + match(XMLToken::GreaterThan); + + // test if all ndata references can be resolved + + for (const doctype::entity *e : m_general_entities) + { + if (e->is_parsed() == false and m_notations.count(e->get_ndata()) == 0) + not_valid("Undefined NOTATION '" + e->get_ndata() + "'"); + } + + // and the notations in the doctype attlists + for (const doctype::element_ *element : m_doctype) + { + for (const doctype::attribute_ *attr : element->get_attributes()) + { + if (attr->get_type() != doctype::AttributeType::Notation) + continue; + + for (const std::string& n : attr->get_enums()) + { + if (m_notations.count(n) == 0) + not_valid("Undefined NOTATION '" + n + "'"); + } + } + } +} + +void parser_imp::pereference() +{ + const doctype::entity& e = get_parameter_entity(m_token); + + push_data_source(new parameter_entity_data_source(e.get_replacement(), e.get_path()), true); + + match(XMLToken::PEReference); +} + +void parser_imp::intsubset() +{ + save_state state_intsubset(m_internal_subset, true); + save_state state_allow_peref(m_allow_peref, true); + + for (;;) + { + switch (m_lookahead) + { + case XMLToken::Element: + case XMLToken::AttList: + case XMLToken::Entity: + case XMLToken::Notation: + markup_decl(); + continue; + + case XMLToken::PI: + pi(); + continue; + + case XMLToken::Comment: + comment(); + continue; + + case XMLToken::Space: + case XMLToken::PEReference: + declsep(); + continue; + default:; + } + + break; + } +} + +void parser_imp::declsep() +{ + save_state state_declsep(m_in_declsep, true); + + switch (m_lookahead) + { + case XMLToken::PEReference: + { + const doctype::entity& e = get_parameter_entity(m_token); + + match(XMLToken::PEReference); + + push_data_source(new parameter_entity_data_source(e.get_replacement(), e.get_path()), false); + + m_lookahead = get_next_token(); + extsubset(); + + match(XMLToken::Eof); + pop_data_source(); + + break; + } + + case XMLToken::Space: + s(); + break; + + default:; + } +} + +void parser_imp::extsubset() +{ + save_state state_extsubset(m_external_subset, true); + save_state state_allow_peref(m_allow_peref, true); + + for (;;) + { + switch (m_lookahead) + { + case XMLToken::Element: + case XMLToken::AttList: + case XMLToken::Entity: + case XMLToken::Notation: + markup_decl(); + continue; + + case XMLToken::IncludeIgnore: + conditionalsect(); + continue; + + case XMLToken::PI: + pi(); + continue; + + case XMLToken::Comment: + comment(); + continue; + + case XMLToken::Space: + case XMLToken::PEReference: + declsep(); + continue; + + default:; + } + + break; + } +} + +void parser_imp::conditionalsect() +{ + valid_nesting_validator check(*m_source.top()); + match(XMLToken::IncludeIgnore); + + s(); + + bool include = false; + + if (m_token == "INCLUDE") + include = true; + else if (m_token == "IGNORE") + include = false; + else if (m_lookahead == XMLToken::Name) + not_well_formed("Unexpected literal '" + m_token + "'"); + + match(XMLToken::Name); + + check.check(*m_source.top()); + + s(); + + if (include) + { + match(XMLToken::OpenBracket); + extsubset(); + match(XMLToken::CloseBracket); + match(XMLToken::CloseBracket); + check.check(*m_source.top()); + match(XMLToken::GreaterThan); + } + else + { + ignoresectcontents(); + check.check(*m_source.top()); + m_lookahead = get_next_token(); + } +} + +void parser_imp::ignoresectcontents() +{ + // yet another tricky routine, skip + + int state = 0; + bool done = false; + + while (not done) + { + unicode ch = get_next_char(); + if (ch == 0) + not_well_formed("runaway IGNORE section"); + + switch (state) + { + case 0: + if (ch == ']') + state = 1; + else if (ch == '<') + state = 10; + break; + + case 1: + if (ch == ']') + state = 2; + else + { + retract(); + state = 0; + } + break; + + case 2: + if (ch == '>') + done = true; + else if (ch != ']') + { + retract(); + state = 0; + } + break; + + case 10: + if (ch == '!') + state = 11; + else + { + retract(); + state = 0; + } + break; + + case 11: + if (ch == '[') + { + ignoresectcontents(); + state = 0; + } + else + { + retract(); + state = 0; + } + break; + } + } +} + +void parser_imp::markup_decl() +{ + save_state state_allow_peref(m_allow_peref, m_external_subset); + + switch (m_lookahead) + { + case XMLToken::Element: + element_decl(); + break; + + case XMLToken::AttList: + attlist_decl(); + break; + + case XMLToken::Entity: + entity_decl(); + break; + + case XMLToken::Notation: + notation_decl(); + break; + + case XMLToken::PI: + pi(); + break; + + case XMLToken::Comment: + comment(); + break; + + case XMLToken::Space: + s(); + break; + + default:; + } +} + +void parser_imp::element_decl() +{ + valid_nesting_validator check(*m_source.top()); + + match(XMLToken::Element); + s(true); + + std::string name = m_token; + if (ba::starts_with(name, "xmlns:")) + not_well_formed("Element names should not start with xmlns:"); + + auto e = std::find_if(m_doctype.begin(), m_doctype.end(), + [name](auto e) { return e->name() == name; }); + + if (e == m_doctype.end()) + e = m_doctype.insert(m_doctype.end(), new doctype::element_(name, true, m_in_external_dtd)); + else if ((*e)->is_declared()) + not_valid("duplicate element declaration for element '" + name + "'"); + + match(XMLToken::Name); + s(true); + + contentspec(**e); + s(); + + check.check(*m_source.top()); + match(XMLToken::GreaterThan); +} + +void parser_imp::contentspec(doctype::element_& element) +{ + if (m_lookahead == XMLToken::Name) + { + if (m_token == "EMPTY") + element.set_allowed(new doctype::content_spec_empty); + else if (m_token == "ANY") + element.set_allowed(new doctype::content_spec_any); + else + not_well_formed("Invalid element content specification"); + match(XMLToken::Name); + } + else + { + valid_nesting_validator check(*m_source.top()); + + match(XMLToken::OpenParenthesis); + + std::unique_ptr allowed; + + s(); + + bool mixed = false; + bool more = false; + + if (m_lookahead == XMLToken::PCData) // Mixed + { + mixed = true; + match(m_lookahead); + + s(); + + std::set seen; + + while (m_lookahead == XMLToken::Pipe) + { + more = true; + + match(XMLToken::Pipe); + s(); + + if (seen.count(m_token) > 0) + not_valid("no duplicates allowed in mixed content for element declaration"); + seen.insert(m_token); + + match(XMLToken::Name); + s(); + } + + doctype::content_spec_choice *choice = new doctype::content_spec_choice(true); + for (auto& c : seen) + choice->add(new doctype::content_spec_element(c)); + allowed.reset(choice); + } + else // children + { + allowed.reset(cp()); + + s(); + + if (m_lookahead == XMLToken::Comma) + { + doctype::content_spec_seq *seq = new doctype::content_spec_seq(allowed.release()); + allowed.reset(seq); + + more = true; + do + { + match(m_lookahead); + s(); + seq->add(cp()); + s(); + } while (m_lookahead == XMLToken::Comma); + } + else if (m_lookahead == XMLToken::Pipe) + { + doctype::content_spec_choice *choice = new doctype::content_spec_choice(allowed.release(), false); + allowed.reset(choice); + + more = true; + do + { + match(m_lookahead); + s(); + choice->add(cp()); + s(); + } while (m_lookahead == XMLToken::Pipe); + } + } + + s(); + + + check.check(*m_source.top()); + + match(XMLToken::CloseParenthesis); + + if (m_lookahead == XMLToken::Asterisk) + { + allowed.reset(new doctype::content_spec_repeated(allowed.release(), '*')); + match(XMLToken::Asterisk); + } + else if (more) + { + if (mixed) + { + allowed.reset(new doctype::content_spec_repeated(allowed.release(), '*')); + match(XMLToken::Asterisk); + } + else if (m_lookahead == XMLToken::Plus) + { + allowed.reset(new doctype::content_spec_repeated(allowed.release(), '+')); + match(XMLToken::Plus); + } + else if (m_lookahead == XMLToken::QuestionMark) + { + allowed.reset(new doctype::content_spec_repeated(allowed.release(), '?')); + match(XMLToken::QuestionMark); + } + } + + element.set_allowed(allowed.release()); + } +} + +doctype::content_spec_ptr parser_imp::cp() +{ + std::unique_ptr result; + + if (m_lookahead == XMLToken::OpenParenthesis) + { + valid_nesting_validator check(*m_source.top()); + + match(XMLToken::OpenParenthesis); + + s(); + result.reset(cp()); + s(); + if (m_lookahead == XMLToken::Comma) + { + doctype::content_spec_seq *seq = new doctype::content_spec_seq(result.release()); + result.reset(seq); + + do + { + match(m_lookahead); + s(); + seq->add(cp()); + s(); + } while (m_lookahead == XMLToken::Comma); + } + else if (m_lookahead == XMLToken::Pipe) + { + doctype::content_spec_choice *choice = new doctype::content_spec_choice(result.release(), false); + result.reset(choice); + + do + { + match(m_lookahead); + s(); + choice->add(cp()); + s(); + } while (m_lookahead == XMLToken::Pipe); + } + + s(); + check.check(*m_source.top()); + match(XMLToken::CloseParenthesis); + } + else + { + std::string name = m_token; + match(XMLToken::Name); + + result.reset(new doctype::content_spec_element(name)); + } + + switch (m_lookahead) + { + case XMLToken::Asterisk: + result.reset(new doctype::content_spec_repeated(result.release(), '*')); + match(XMLToken::Asterisk); + break; + case XMLToken::Plus: + result.reset(new doctype::content_spec_repeated(result.release(), '+')); + match(XMLToken::Plus); + break; + case XMLToken::QuestionMark: + result.reset(new doctype::content_spec_repeated(result.release(), '?')); + match(XMLToken::QuestionMark); + break; + default:; + } + + return result.release(); +} + +void parser_imp::entity_decl() +{ + match(XMLToken::Entity); + s(true); + + if (m_lookahead == XMLToken::Percent) // PEDecl + parameter_entity_decl(); + else + general_entity_decl(); +} + +void parser_imp::parameter_entity_decl() +{ + match(XMLToken::Percent); + s(true); + + std::string name = m_token; + match(XMLToken::Name); + + if (m_validating_ns and name.find(':') != std::string::npos) + not_well_formed("Entity names should not contain a colon"); + if (ba::starts_with(name, "xmlns:")) + not_well_formed("Entity names should not start with xmlns:"); + + s(true); + + std::string path; + std::string value; + + { + // PEDef is either a EntityValue... + if (m_lookahead == XMLToken::String) + { + value = m_token; + match(XMLToken::String); + + parse_parameter_entity_declaration(value); + } + else // ... or an external id + { + std::tie(path, value) = read_external_id(); + match(XMLToken::String); + } + + s(); + } + + match(XMLToken::GreaterThan); + + if (find_if(m_parameter_entities.begin(), m_parameter_entities.end(), + [name](auto e) { return e->name() == name; }) == m_parameter_entities.end()) + { + m_parameter_entities.push_back(new doctype::parameter_entity(name, value, path)); + } +} + +void parser_imp::general_entity_decl() +{ + std::string name = m_token; + match(XMLToken::Name); + s(true); + + if (m_validating_ns and name.find(':') != std::string::npos) + not_well_formed("Entity names should not contain a colon"); + if (ba::starts_with(name, "xmlns:")) + not_well_formed("Entity names should not start with xmlns:"); + + std::string value, ndata; + bool external = false; + bool parsed = true; + + if (m_lookahead == XMLToken::String) + { + value = m_token; + match(XMLToken::String); + + parse_general_entity_declaration(value); + } + else // ... or an ExternalID + { + std::tie(std::ignore, value) = read_external_id(); + match(XMLToken::String); + external = true; + + if (m_lookahead == XMLToken::Space) + { + s(true); + if (m_lookahead == XMLToken::Name and m_token == "NDATA") + { + match(XMLToken::Name); + s(true); + + parsed = false; + ndata = m_token; + + match(XMLToken::Name); + } + } + } + + s(); + + match(XMLToken::GreaterThan); + + if (std::find_if(m_general_entities.begin(), m_general_entities.end(), + [name](auto e) { return e->name() == name; }) == m_general_entities.end()) + { + m_general_entities.push_back(new doctype::general_entity(name, value, external, parsed)); + + if (not parsed) + m_general_entities.back()->set_ndata(ndata); + + if (m_in_external_dtd) + m_general_entities.back()->set_externally_defined(true); + } +} + +void parser_imp::attlist_decl() +{ + match(XMLToken::AttList); + s(true); + std::string element = m_token; + match(XMLToken::Name); + + auto dte = find_if(m_doctype.begin(), m_doctype.end(), + [element](auto e) { return e->name() == element; }); + + if (dte == m_doctype.end()) + dte = m_doctype.insert(m_doctype.end(), new doctype::element_(element, false, m_in_external_dtd)); + + // attribute defaults + + while (m_lookahead == XMLToken::Space) + { + s(true); + + if (m_lookahead != XMLToken::Name) + break; + + std::string name = m_token; + match(XMLToken::Name); + s(true); + + std::unique_ptr attribute; + + // att type: several possibilities: + if (m_lookahead == XMLToken::OpenParenthesis) // enumeration + { + std::vector enums; + + match(m_lookahead); + + s(); + + enums.push_back(m_token); + if (m_lookahead == XMLToken::Name) + match(XMLToken::Name); + else + match(XMLToken::NMToken); + + s(); + + while (m_lookahead == XMLToken::Pipe) + { + match(XMLToken::Pipe); + + s(); + + if (find(enums.begin(), enums.end(), m_token) != enums.end()) + not_valid("Duplicate token in enumerated attribute declaration ('" + m_token + "')"); + + enums.push_back(m_token); + if (m_lookahead == XMLToken::Name) + match(XMLToken::Name); + else + match(XMLToken::NMToken); + + s(); + } + + s(); + + match(XMLToken::CloseParenthesis); + + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::Enumerated, enums)); + } + else + { + std::string type = m_token; + match(XMLToken::Name); + + std::vector notations; + + if (type == "CDATA") + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::CDATA)); + else if (type == "ID") + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::ID)); + else if (type == "IDREF") + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::IDREF)); + else if (type == "IDREFS") + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::IDREFS)); + else if (type == "ENTITY") + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::ENTITY)); + else if (type == "ENTITIES") + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::ENTITIES)); + else if (type == "NMTOKEN") + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::NMTOKEN)); + else if (type == "NMTOKENS") + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::NMTOKENS)); + else if (type == "NOTATION") + { + s(true); + match(XMLToken::OpenParenthesis); + s(); + + notations.push_back(m_token); + match(XMLToken::Name); + + s(); + + while (m_lookahead == XMLToken::Pipe) + { + match(XMLToken::Pipe); + + s(); + + if (find(notations.begin(), notations.end(), m_token) != notations.end()) + not_valid("Duplicate token in enumerated attribute declaration ('" + m_token + "')"); + notations.push_back(m_token); + match(XMLToken::Name); + + s(); + } + + s(); + + match(XMLToken::CloseParenthesis); + + attribute.reset(new doctype::attribute_(name, doctype::AttributeType::Notation, notations)); + } + else + not_well_formed("invalid attribute type"); + } + + // att def + + s(true); + + std::string value; + + switch (m_lookahead) + { + case XMLToken::Required: + match(m_lookahead); + attribute->set_default(doctype::AttributeDefault::Required, ""); + break; + + case XMLToken::Implied: + match(m_lookahead); + attribute->set_default(doctype::AttributeDefault::Implied, ""); + break; + + case XMLToken::Fixed: + { + match(m_lookahead); + if (attribute->get_type() == doctype::AttributeType::ID) + not_valid("the default declaration for an ID attribute declaration should be #IMPLIED or #REQUIRED"); + + s(true); + + std::string value = m_token; + normalize_attribute_value(value, attribute->get_type() == doctype::AttributeType::CDATA); + if (not value.empty() and not attribute->validate_value(value, m_general_entities)) + { + not_valid("default value '" + value + "' for attribute '" + name + "' is not valid"); + } + + attribute->set_default(doctype::AttributeDefault::Fixed, value); + match(XMLToken::String); + break; + } + + default: + { + if (attribute->get_type() == doctype::AttributeType::ID) + not_valid("the default declaration for an ID attribute declaration should be #IMPLIED or #REQUIRED"); + + if (m_standalone) + not_valid("Document cannot be standalone since there is a default value for an attribute"); + + std::string value = m_token; + normalize_attribute_value(value, attribute->get_type() == doctype::AttributeType::CDATA); + collapse_spaces(value); + if (not value.empty() and not attribute->validate_value(value, m_general_entities)) + { + not_valid("default value '" + value + "' for attribute '" + name + "' is not valid"); + } + attribute->set_default(doctype::AttributeDefault::None, value); + match(XMLToken::String); + break; + } + } + + if (attribute->get_type() == doctype::AttributeType::ID) + { + const doctype::attribute_list& atts = (*dte)->get_attributes(); + if (std::find_if(atts.begin(), atts.end(), + [](auto a) { return a->get_type() == doctype::AttributeType::ID; }) != atts.end()) + not_valid("only one attribute per element can have the ID type"); + } + + attribute->set_external(m_in_external_dtd); + // attribute->version(m_version); + (*dte)->add_attribute(attribute.release()); + } + + match(XMLToken::GreaterThan); +} + +void parser_imp::notation_decl() +{ + match(XMLToken::Notation); + s(true); + + std::string name = m_token, pubid, sysid; + + if (m_validating_ns and name.find(':') != std::string::npos) + not_well_formed("Notation names should not contain a colon"); + + if (m_notations.count(name) > 0) + not_valid("notation names should be unique"); + m_notations.insert(name); + + match(XMLToken::Name); + s(true); + + if (m_token == "SYSTEM") + { + match(XMLToken::Name); + s(true); + + sysid = m_token; + match(XMLToken::String); + + if (not is_valid_system_literal(sysid)) + not_well_formed("invalid system literal"); + } + else if (m_token == "PUBLIC") + { + match(XMLToken::Name); + s(true); + + pubid = m_token; + match(XMLToken::String); + + // validate the public ID + if (not is_valid_public_id(pubid)) + not_well_formed("Invalid public ID"); + + s(); + + if (m_lookahead == XMLToken::String) + { + sysid = m_token; + match(XMLToken::String); + } + } + else + not_well_formed("Expected either SYSTEM or PUBLIC"); + + s(); + + match(XMLToken::GreaterThan); + + collapse_spaces(sysid); + + ba::replace_all(pubid, "\t", " "); + ba::replace_all(pubid, "\n", " "); + collapse_spaces(pubid); + + m_parser.notation_decl(name, sysid, pubid); +} + +data_source* parser_imp::get_data_source(const std::string& pubid, std::string uri) +{ + data_source *result = nullptr; + + std::istream *is = m_parser.external_entity_ref(m_source.top()->base(), pubid, uri); + if (is != nullptr) + { + result = new istream_data_source(is); + + std::string::size_type s = uri.rfind('/'); + if (s == std::string::npos) + result->base(m_source.top()->base()); + else + { + uri.erase(s, std::string::npos); + + if (is_absolute_path(uri)) + result->base(uri); + else + result->base(m_source.top()->base() + '/' + uri); + } + } + + return result; +} + +std::tuple parser_imp::read_external_id() +{ + std::string result; + std::string path; + + std::string pubid, uri; + + if (m_token == "SYSTEM") + { + match(XMLToken::Name); + s(true); + + uri = m_token; + + if (not is_valid_system_literal(uri)) + not_well_formed("invalid system literal"); + } + else if (m_token == "PUBLIC") + { + match(XMLToken::Name); + s(true); + + pubid = m_token; + match(XMLToken::String); + + // validate the public ID + if (not is_valid_public_id(pubid)) + not_well_formed("Invalid public ID"); + + s(true); + uri = m_token; + } + else + not_well_formed("Expected external id starting with either SYSTEM or PUBLIC"); + + std::unique_ptr data(get_data_source(pubid, uri)); + + if (data) + { + push_data_source(data.release(), false); + + path = m_source.top()->base(); + + m_lookahead = get_next_token(); + + text_decl(); + + if (m_lookahead != XMLToken::Eof) + { + result = m_token; + + while (m_buffer_ptr > m_buffer.begin()) + append(result, *--m_buffer_ptr); + + while (unicode ch = m_source.top()->get_next_char()) + append(result, ch); + } + + pop_data_source(); + } + + return std::make_tuple(path, result); +} + +void parser_imp::parse_parameter_entity_declaration(std::string& s) +{ + std::string result; + + int state = 0; + unicode charref = 0; + std::string name; + int open = 0; + + for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) + { + unicode c = *i; + + switch (state) + { + case 0: + if (c == '&') + state = 1; + else if (c == '%') + { + if (m_allow_peref) + { + name.clear(); + state = 20; + } + else + not_well_formed("parameter entities may not occur in declarations that are not in an external subset"); + } + else if (c == '<') + { + ++open; + append(result, c); + } + else if (c == '>') + { + --open; + append(result, c); + } + else if (not is_char(c)) + not_well_formed("Invalid character in entity value"); + else + append(result, c); + break; + + case 1: + if (c == '#') + state = 2; + else + { + result += '&'; + append(result, c); + state = 0; + } + break; + + case 2: + if (c == 'x') + state = 4; + else if (c >= '0' and c <= '9') + { + charref = c - '0'; + state = 3; + } + else + not_well_formed("invalid character reference"); + break; + + case 3: + if (c >= '0' and c <= '9') + charref = charref * 10 + (c - '0'); + else if (c == ';') + { + if (not is_referrable_char(charref)) + not_well_formed("Illegal character referenced: " + to_hex(charref) + '\''); + + append(result, charref); + state = 0; + } + else + not_well_formed("invalid character reference"); + break; + + case 4: + if (c >= 'a' and c <= 'f') + { + charref = c - 'a' + 10; + state = 5; + } + else if (c >= 'A' and c <= 'F') + { + charref = c - 'A' + 10; + state = 5; + } + else if (c >= '0' and c <= '9') + { + charref = c - '0'; + state = 5; + } + else + not_well_formed("invalid character reference"); + break; + + case 5: + if (c >= 'a' and c <= 'f') + charref = (charref << 4) + (c - 'a' + 10); + else if (c >= 'A' and c <= 'F') + charref = (charref << 4) + (c - 'A' + 10); + else if (c >= '0' and c <= '9') + charref = (charref << 4) + (c - '0'); + else if (c == ';') + { + if (not is_referrable_char(charref)) + not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); + + append(result, charref); + state = 0; + } + else + not_well_formed("invalid character reference"); + break; + + case 20: + if (c == ';') + { + const doctype::entity& e = get_parameter_entity(name); + result += e.get_replacement(); + state = 0; + } + else if (is_name_char(c)) + append(name, c); + else + not_well_formed("invalid parameter entity reference"); + break; + + default: + assert(false); + not_well_formed("invalid state"); + } + } + + if (state != 0) + not_well_formed("invalid reference"); + + if (open != 0) + not_valid("invalid reference"); + + swap(s, result); +} + +// parse out the general and parameter entity references in a value std::string +// for a general entity reference which is about to be stored. +void parser_imp::parse_general_entity_declaration(std::string& s) +{ + std::string result; + + int state = 0; + unicode charref = 0; + std::string name; + + auto sp = s.begin(); + auto se = s.end(); + + while (sp < se) + { + unicode c; + std::tie(c, sp) = get_first_char(sp); + + switch (state) + { + case 0: + if (c == '&') + state = 1; + else if (c == '%') + { + if (m_allow_peref) + { + name.clear(); + state = 20; + } + else + not_well_formed("parameter entities may not occur in declarations that are not in an external subset"); + } + else if (not is_char(c)) + not_well_formed("Invalid character in entity value"); + else + append(result, c); + break; + + case 1: + if (c == '#') + state = 2; + else if (is_name_start_char(c)) + { + name.clear(); + append(name, c); + state = 10; + } + break; + + case 2: + if (c == 'x') + state = 4; + else if (c >= '0' and c <= '9') + { + charref = c - '0'; + state = 3; + } + else + not_well_formed("invalid character reference"); + break; + + case 3: + if (c >= '0' and c <= '9') + charref = charref * 10 + (c - '0'); + else if (c == ';') + { + if (not is_referrable_char(charref)) + not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); + + append(result, charref); + state = 0; + } + else + not_well_formed("invalid character reference"); + break; + + case 4: + if (c >= 'a' and c <= 'f') + { + charref = c - 'a' + 10; + state = 5; + } + else if (c >= 'A' and c <= 'F') + { + charref = c - 'A' + 10; + state = 5; + } + else if (c >= '0' and c <= '9') + { + charref = c - '0'; + state = 5; + } + else + not_well_formed("invalid character reference"); + break; + + case 5: + if (c >= 'a' and c <= 'f') + charref = (charref << 4) + (c - 'a' + 10); + else if (c >= 'A' and c <= 'F') + charref = (charref << 4) + (c - 'A' + 10); + else if (c >= '0' and c <= '9') + charref = (charref << 4) + (c - '0'); + else if (c == ';') + { + if (not is_referrable_char(charref)) + not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); + + append(result, charref); + state = 0; + } + else + not_well_formed("invalid character reference"); + break; + + case 10: + if (c == ';') + { + result += '&'; + result += name; + result += ';'; + + state = 0; + } + else if (is_name_char(c)) + append(name, c); + else + not_well_formed("invalid entity reference"); + break; + + case 20: + if (c == ';') + { + const doctype::entity& e = get_parameter_entity(name); + result += e.get_replacement(); + state = 0; + } + else if (is_name_char(c)) + append(name, c); + else + not_well_formed("invalid parameter entity reference"); + break; + + default: + assert(false); + not_well_formed("invalid state"); + } + } + + if (state != 0) + not_well_formed("invalid reference"); + + swap(s, result); +} + +std::string parser_imp::normalize_attribute_value() +{ + std::string result; + + unicode charref = 0; + std::string name; + + enum State + { + state_Start, + state_ReferenceStart, + state_CharReferenceStart, + state_HexCharReference, + state_HexCharReference2, + state_DecCharReference, + state_EntityReference, + + } state = state_Start; + + for (;;) + { + unicode c = get_next_char(); + + if (c == 0) + break; + + if (c == '<') + not_well_formed("Attribute values may not contain '<' character"); + + switch (state) + { + case state_Start: + if (c == ' ' or c == '\t' or c == '\r' or c == '\n') + result += ' '; + else if (c == '&') + state = state_ReferenceStart; + else + append(result, c); + break; + + case state_ReferenceStart: + if (c == '#') + state = state_CharReferenceStart; + else if (is_name_start_char(c)) + { + name.clear(); + append(name, c); + state = state_EntityReference; + } + else + not_well_formed("invalid reference found in attribute value"); + break; + + case state_CharReferenceStart: + if (c == 'x') + state = state_HexCharReference; + else if (c >= '0' and c <= '9') + { + charref = c - '0'; + state = state_DecCharReference; + } + else + not_well_formed("invalid character reference"); + break; + + case state_DecCharReference: + if (c >= '0' and c <= '9') + charref = charref * 10 + (c - '0'); + else if (c == ';') + { + if (not is_referrable_char(charref)) + not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); + + append(result, charref); + state = state_Start; + } + else + not_well_formed("invalid character reference"); + break; + + case state_HexCharReference: + if (c >= 'a' and c <= 'f') + { + charref = c - 'a' + 10; + state = state_HexCharReference2; + } + else if (c >= 'A' and c <= 'F') + { + charref = c - 'A' + 10; + state = state_HexCharReference2; + } + else if (c >= '0' and c <= '9') + { + charref = c - '0'; + state = state_HexCharReference2; + } + else + not_well_formed("invalid character reference"); + break; + + case state_HexCharReference2: + if (c >= 'a' and c <= 'f') + charref = (charref << 4) + (c - 'a' + 10); + else if (c >= 'A' and c <= 'F') + charref = (charref << 4) + (c - 'A' + 10); + else if (c >= '0' and c <= '9') + charref = (charref << 4) + (c - '0'); + else if (c == ';') + { + if (not is_referrable_char(charref)) + not_well_formed("Illegal character referenced: '" + to_hex(charref) + '\''); + + append(result, charref); + state = state_Start; + } + else + not_well_formed("invalid character reference"); + break; + + case state_EntityReference: + if (c == ';') + { + if (std::find(m_entities_on_stack.begin(), m_entities_on_stack.end(), name) != m_entities_on_stack.end()) + not_well_formed("infinite recursion in nested entity references"); + + m_entities_on_stack.push_back(name); + + const doctype::entity& e = get_general_entity(name); + + if (e.is_external()) + not_well_formed("attribute value may not contain external entity reference"); + + if (e.is_externally_defined() and m_standalone) + not_well_formed("document marked as standalone but an external entity is referenced"); + + push_data_source(new entity_data_source(e.get_replacement(), m_source.top()->base()), false); + + std::string replacement = normalize_attribute_value(); + result += replacement; + + state = state_Start; + + m_entities_on_stack.pop_back(); + } + else if (is_name_char(c)) + append(name, c); + else + not_well_formed("invalid entity reference"); + break; + + default: + assert(false); + not_well_formed("invalid state"); + } + } + + if (state != state_Start) + not_well_formed("invalid reference"); + + m_source.pop(); + + return result; +} + +void parser_imp::collapse_spaces(std::string& s) +{ + auto i = s.begin(), o = s.begin();; + bool space = true; + + while (i != s.end()) + { + if (*i == ' ') + { + if (not space) + *o++ = ' '; + ++i; + space = true; + } + else + { + *o++ = *i++; + space = false; + } + } + + if (space and o != s.begin()) + --o; + + s.erase(o, s.end()); +} + +void parser_imp::element(doctype::validator& valid) +{ + save_state in_content(m_in_content, false); + + match(XMLToken::STag); + std::string name = m_token; + match(XMLToken::Name); + + if (not valid.allow(name)) + not_valid("element '" + name + "' not expected at this position"); + + const doctype::element_ *dte = get_element(name); + + if (m_has_dtd and dte == nullptr and m_validating) + not_valid("Element '" + name + "' is not defined in DTD"); + + doctype::validator sub_valid(dte); + + std::list attrs; + + ns_state ns(this); + std::set seen; + + for (;;) + { + if (m_lookahead != XMLToken::Space) + break; + + s(true); + + if (m_lookahead != XMLToken::Name) + break; + + std::string attr_name = m_token; + match(XMLToken::Name); + + if (seen.count(attr_name) > 0) + not_well_formed("multiple values for attribute '" + attr_name + "'"); + seen.insert(attr_name); + + eq(); + + const doctype::attribute_ *dta = nullptr; + if (dte != nullptr) + dta = dte->get_attribute(attr_name); + if (dta == nullptr and not m_validating and attr_name == "xml:space") + dta = m_xmlSpaceAttr.get(); + + if (dta == nullptr and m_validating) + not_valid("undeclared attribute '" + attr_name + "'"); + + std::string attr_value = normalize_attribute_value(m_token, dta == nullptr or dta->get_type() == doctype::AttributeType::CDATA); + match(XMLToken::String); + + if (m_validating and + dta != nullptr and + dta->get_default_type() == doctype::AttributeDefault::Fixed and + attr_value != std::get<1>(dta->get_default())) + { + not_valid("invalid value specified for fixed attribute"); + } + + // had a crash suddenly here deep down in ba::starts_with... + if (attr_name == "xmlns" or attr_name.compare(0, 6, "xmlns:", 6) == 0) // namespace support + { + if (not ((m_version > 1.0f and attr_value.empty()) or is_valid_url(attr_value))) + not_well_formed("Not a valid namespace URI: " + attr_value); + + if (not (m_version > 1.0f and attr_value.empty()) and ns.is_known_uri(attr_value)) + not_well_formed("This uri is repeated: " + attr_value); + + if (attr_value == "http://www.w3.org/XML/1998/namespace" or attr_value == "http://www.w3.org/2000/xmlns/") + not_well_formed("The xml namespace is reserved"); + + if (attr_name.length() == 5) + { + ns.default_ns(attr_value); + m_parser.start_namespace_decl("", attr_value); + } + else if (attr_name.length() == 6) + not_well_formed("Invalid xmlns: "); + else + { + std::string prefix = attr_name.substr(6); + + if (iequals(prefix, "xml") or iequals(prefix, "xmlns")) + not_well_formed(prefix + " is a preserved prefix"); + + if (m_version > 1.0f and attr_value.empty()) + ns.unbind(prefix); + else + { + ns.bind(prefix, attr_value); + m_parser.start_namespace_decl(prefix, attr_value); + } + } + + // if (not attr_value.empty()) + // ns.m_known_uris.insert(attr_value); + } + else + { + bool id = (attr_name == "xml:id"); + + if (dta != nullptr) + { + std::string v(attr_value); + + if (not dta->validate_value(attr_value, m_general_entities)) + { + if (dta == m_xmlSpaceAttr.get()) + not_well_formed("invalid value ('" + attr_value + "') for attribute " + attr_name + ""); + else + not_valid("invalid value ('" + attr_value + "') for attribute " + attr_name + ""); + } + + if (m_validating and m_standalone and dta->is_external() and v != attr_value) + not_valid("attribute value modified as a result of an external defined attlist declaration, which is not valid in a standalone document"); + + if (dta->get_type() == doctype::AttributeType::ID) + { + id = true; + + if (m_validating_ns and attr_value.find(':') != std::string::npos) + not_valid("ID attribute value should not contain a colon"); + + if (m_ids.count(attr_value) > 0) + { + not_valid("attribute value ('" + attr_value + "') for attribute '" + attr_name + "' is not unique"); + } + + m_ids.insert(attr_value); + + if (m_unresolved_ids.count(attr_value) > 0) + m_unresolved_ids.erase(attr_value); + } + else if (dta->get_type() == doctype::AttributeType::IDREF) + { + if (attr_value.empty()) + not_valid("attribute value for attribute '" + attr_name + "' may not be empty"); + + if (not m_ids.count(attr_value)) + m_unresolved_ids.insert(attr_value); + } + else if (dta->get_type() == doctype::AttributeType::IDREFS) + { + if (attr_value.empty()) + not_valid("attribute value for attribute '" + attr_name + "' may not be empty"); + + std::string::size_type b = 0, e = attr_value.find(' '); + while (e != std::string::npos) + { + if (e - b > 0) + { + std::string id = attr_value.substr(b, e); + if (not m_ids.count(id)) + m_unresolved_ids.insert(id); + } + b = e + 1; + e = attr_value.find(' ', b); + } + + if (b != std::string::npos and b < attr_value.length()) + { + std::string id = attr_value.substr(b); + if (not m_ids.count(id)) + m_unresolved_ids.insert(id); + } + } + } + + detail::attr attr; + attr.m_name = attr_name; + attr.m_value = attr_value; + attr.m_id = id; + + if (m_ns != nullptr and dta == nullptr) + { + std::string::size_type d = attr_name.find(':'); + if (d != std::string::npos) + { + if (attr_name.find(':', d + 1) != std::string::npos) + not_well_formed("Multiple colons in attribute name"); + + auto prefix = attr_name.substr(0, d); + if (not iequals(prefix, "xml")) + { + std::string ns = m_ns->ns_for_prefix(prefix); + + if (ns.empty()) + not_well_formed("Unbound attribute prefix"); + + attr.m_ns = ns; + attr.m_name = attr_name.substr(d + 1); + } + } + } + + attrs.push_back(attr); + } + } + + if (dte == nullptr) + { + if (name[0] == ':') + not_well_formed("Element name should not start with colon"); + + auto cp = name.find(':'); + if (cp != std::string::npos) + { + auto prefix = name.substr(0, cp); + if (not ns.is_known_prefix(prefix)) + not_well_formed("Unknown prefix for element " + name); + } + } + else // add missing attributes + { + for (const doctype::attribute_ *dta : dte->get_attributes()) + { + std::string attr_name = dta->name(); + + std::list::iterator attr = find_if(attrs.begin(), attrs.end(), + [attr_name](auto& a) { return a.m_name == attr_name; }); + + doctype::AttributeDefault defType; + std::string defValue; + + std::tie(defType, defValue) = dta->get_default(); + + if (defType == doctype::AttributeDefault::Required) + { + if (attr == attrs.end()) + not_valid("missing #REQUIRED attribute '" + attr_name + "' for element '" + name + "'"); + } + else if (not defValue.empty() and attr == attrs.end()) + { + if (m_validating and m_standalone and dta->is_external()) + not_valid("default value for attribute defined in external declaration which is not allowed in a standalone document"); + + detail::attr attr; + attr.m_name = attr_name; + attr.m_value = normalize_attribute_value(defValue, dta->get_type() == doctype::AttributeType::CDATA); + attr.m_id = false; + + if (m_ns != nullptr) + { + std::string::size_type d = attr_name.find(':'); + if (d != std::string::npos) + { + std::string ns = m_ns->ns_for_prefix(attr_name.substr(0, d)); + + if (not ns.empty()) + { + attr.m_ns = ns; + attr.m_name = attr_name.substr(d + 1); + } + } + } + + attrs.push_back(attr); + } + } + } + + // now find out the namespace we're supposed to pass + std::string uri, raw(name); + + std::string::size_type c = name.find(':'); + if (c != std::string::npos and c > 0) + { + uri = ns.ns_for_prefix(name.substr(0, c)); + name.erase(0, c + 1); + } + else + uri = ns.default_ns(); + + // sort the attributes (why? disabled to allow similar output) + attrs.sort([](auto& a, auto& b) { return a.m_name < b.m_name; }); + + if (m_lookahead == XMLToken::Slash) + { + match(XMLToken::Slash); + m_parser.start_element(name, uri, attrs); + m_parser.end_element(name, uri); + } + else + { + m_parser.start_element(name, uri, attrs); + + m_in_content = true; + match(XMLToken::GreaterThan); + + if (m_lookahead != XMLToken::ETag) + content(sub_valid); + + m_in_content = false; + + match(XMLToken::ETag); + + if (m_token != raw) + not_well_formed("end tag does not match start tag"); + + match(XMLToken::Name); + + s(); + + m_parser.end_element(name, uri); + } + + in_content.reset(); + match(XMLToken::GreaterThan); + + if (m_validating and dte != nullptr and not sub_valid.done()) + not_valid("missing child elements for element '" + dte->name() + "'"); +} + +void parser_imp::content(doctype::validator& valid) +{ + if (valid.get_content_spec() == doctype::ContentSpecType::Empty and m_lookahead != XMLToken::ETag) + not_valid("Content is not allowed in an element declared to be EMPTY"); + + do + { + switch (m_lookahead) + { + case XMLToken::Content: + case XMLToken::Space: + if (valid.get_content_spec() == doctype::ContentSpecType::Empty) + not_valid("character data not allowed in EMPTY element"); + else if (valid.get_content_spec() == doctype::ContentSpecType::Children and m_lookahead == XMLToken::Content) + not_valid("character data '" + m_token + "' not allowed in element"); + m_parser.character_data(m_token); + match(m_lookahead); + break; + + case XMLToken::CharRef: + if (valid.get_content_spec() == doctype::ContentSpecType::Empty) + not_valid("data not allowed in EMPTY element"); + else if (valid.get_content_spec() == doctype::ContentSpecType::Children and is_space(m_token)) + not_valid("Element may not contain reference to space"); + m_parser.character_data(m_token); + match(m_lookahead); + break; + + case XMLToken::Reference: + { + if (std::find(m_entities_on_stack.begin(), m_entities_on_stack.end(), m_token) != m_entities_on_stack.end()) + not_well_formed("infinite recursion of entity references"); + + m_entities_on_stack.push_back(m_token); + + const doctype::entity& e = get_general_entity(m_token); + + if (e.is_externally_defined() and m_standalone) + not_well_formed("document marked as standalone but an external entity is referenced"); + + if (not e.is_parsed()) + not_well_formed("content has a general entity reference to an unparsed entity"); + + push_data_source(new entity_data_source(e.get_replacement(), m_source.top()->base()), false); + + m_lookahead = get_next_content(); + + save_state in_external_dtd(m_in_external_dtd, e.is_externally_defined()); + + // a children production may not contain references to spaces + if (m_lookahead == XMLToken::Space and valid.get_content_spec() == doctype::ContentSpecType::Children) + { + auto space = m_token; + match(m_lookahead); + + if (m_lookahead == XMLToken::Eof) + not_valid("Element may not contain reference to space"); + m_parser.character_data(space); + } + + if (m_lookahead != XMLToken::Eof) + content(valid); + + if (m_lookahead != XMLToken::Eof) + not_well_formed("entity reference should be a valid content production"); + + pop_data_source(); + + match(XMLToken::Reference); + + m_entities_on_stack.pop_back(); + break; + } + + case XMLToken::STag: + element(valid); + break; + + case XMLToken::PI: + pi(); + break; + + case XMLToken::Comment: + comment(); + break; + + case XMLToken::CDSect: + if (valid.get_content_spec() != doctype::ContentSpecType::Mixed and valid.get_content_spec() != doctype::ContentSpecType::Any) + not_valid("character data '" + m_token + "' not allowed in element"); + + m_parser.start_cdata_section(); + m_parser.character_data(m_token); + + if (is_space(m_token) and valid.get_content_spec() == doctype::ContentSpecType::Children) + not_valid("Element may not contain CDATA section containing only space"); + + m_parser.end_cdata_section(); + + match(XMLToken::CDSect); + break; + + default: + match(XMLToken::Content); // will fail and report error + } + } while (m_lookahead != XMLToken::ETag and m_lookahead != XMLToken::Eof); +} + +void parser_imp::comment() +{ + save_state in_content(m_in_content, false); + + // m_lookahead == XMLToken::Comment + // read characters until we reach --> + // check all characters in between for validity + + enum + { + state_Start, + state_FirstHyphenSeen, + state_SecondHyphenSeen, + state_CommentClosed + } state = state_Start; + + m_token.clear(); + + while (state != state_CommentClosed) + { + unicode ch = get_next_char(); + + if (ch == 0) + not_well_formed("runaway comment"); + if (not is_char(ch)) + not_well_formed("illegal character in content: '" + to_hex(ch) + '\''); + + switch (state) + { + case state_Start: + if (ch == '-') + state = state_FirstHyphenSeen; + break; + + case state_FirstHyphenSeen: + if (ch == '-') + state = state_SecondHyphenSeen; + else + state = state_Start; + break; + + case state_SecondHyphenSeen: + if (ch == '>') + state = state_CommentClosed; + else + not_well_formed("double hyphen found in comment"); + break; + + case state_CommentClosed: + assert(false); + } + } + + assert(m_token.length() >= 3); + m_token.erase(m_token.end() - 3, m_token.end()); + m_parser.comment(m_token); + + in_content.reset(); + match(XMLToken::Comment); +} + +void parser_imp::pi() +{ + save_state in_content(m_in_content, false); + + // m_lookahead == XMLToken::PI + // read characters until we reach --> + // check all characters in between for validity + + std::string pi_target = m_token.substr(2); + + if (pi_target.empty()) + not_well_formed("processing instruction target missing"); + + if (m_validating_ns and pi_target.find(':') != std::string::npos) + not_well_formed("processing instruction name should not contain a colon"); + + // we treat the xml processing instruction separately. + if (m_token.substr(2) == "xml") + not_well_formed("xml declaration are only valid as the start of the file"); + else if (iequals(pi_target, "xml")) + not_well_formed("') + state = state_PIClosed; + else if (ch != '?') + state = state_Data; + break; + + case state_PIClosed: + assert(false); + } + } + + m_token.erase(m_token.end() - 2, m_token.end()); + m_parser.processing_instruction(pi_target, m_token); + + in_content.reset(); + match(XMLToken::PI); +} + +// -------------------------------------------------------------------- + +parser::parser(std::istream& data) + : m_impl(new parser_imp(data, *this)), m_istream(nullptr) +{ +} + +parser::parser(const std::string& data) +{ + m_istream = new std::istringstream(data); + m_impl = new parser_imp(*m_istream, *this); +} + +parser::~parser() +{ + delete m_impl; + delete m_istream; +} + +void parser::parse(bool validate, bool validate_ns) +{ + m_impl->parse(validate, validate_ns); +} + +void parser::xml_decl(encoding_type encoding, bool standalone, float version) +{ + if (xml_decl_handler) + xml_decl_handler(encoding, standalone, version); +} + +void parser::start_element(const std::string& name, const std::string& uri, const std::list &atts) +{ + if (start_element_handler) + start_element_handler(name, uri, atts); +} + +void parser::end_element(const std::string& name, const std::string& uri) +{ + if (end_element_handler) + end_element_handler(name, uri); +} + +void parser::character_data(const std::string& data) +{ + if (character_data_handler) + character_data_handler(data); +} + +void parser::processing_instruction(const std::string& target, const std::string& data) +{ + if (processing_instruction_handler) + processing_instruction_handler(target, data); +} + +void parser::comment(const std::string& data) +{ + if (comment_handler) + comment_handler(data); +} + +void parser::start_cdata_section() +{ + if (start_cdata_section_handler) + start_cdata_section_handler(); +} + +void parser::end_cdata_section() +{ + if (end_cdata_section_handler) + end_cdata_section_handler(); +} + +void parser::start_namespace_decl(const std::string& prefix, const std::string& uri) +{ + if (start_namespace_decl_handler) + start_namespace_decl_handler(prefix, uri); +} + +void parser::end_namespace_decl(const std::string& prefix) +{ + if (end_namespace_decl_handler) + end_namespace_decl_handler(prefix); +} + +void parser::doctype_decl(const std::string& root, const std::string& publicId, const std::string& uri) +{ + if (doctype_decl_handler) + doctype_decl_handler(root, publicId, uri); +} + +void parser::notation_decl(const std::string& name, const std::string& systemId, const std::string& publicId) +{ + if (notation_decl_handler) + notation_decl_handler(name, systemId, publicId); +} + +std::istream *parser::external_entity_ref(const std::string& base, const std::string& pubid, const std::string& uri) +{ + std::istream *result = nullptr; + if (external_entity_ref_handler) + result = external_entity_ref_handler(base, pubid, uri); + return result; +} + +void parser::report_invalidation(const std::string& msg) +{ + if (report_invalidation_handler) + report_invalidation_handler(msg); +} + +} // namespace zeep::xml diff -Nru libzeep-5.0.1/lib-xml/test/parser-test.cpp libzeep-5.0.2/lib-xml/test/parser-test.cpp --- libzeep-5.0.1/lib-xml/test/parser-test.cpp 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/lib-xml/test/parser-test.cpp 2020-11-14 07:31:12.000000000 +0000 @@ -367,6 +367,7 @@ ("single", po::value(), "Test a single XML file") ("dump", po::value(), "Dump the structure of a single XML file") ("print-ids", "Print the ID's of failed tests") + ("conf", po::value(), "Configuration file") ; po::positional_options_description p; @@ -420,7 +421,6 @@ } else { - fs::path xmlconfFile("XML-Test-Suite/xmlconf/xmlconf.xml"); if (vm.count("test")) xmlconfFile = vm["test"].as(); diff -Nru libzeep-5.0.1/libzeep.pc libzeep-5.0.2/libzeep.pc --- libzeep-5.0.1/libzeep.pc 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/libzeep.pc 2020-11-14 07:31:12.000000000 +0000 @@ -0,0 +1,10 @@ +prefix=/usr/local +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: libzeep +Description: C++ library for building web applications +Version: 5.0 +Libs: -L${libdir} -lzeep +Cflags: -I${includedir} diff -Nru libzeep-5.0.1/libzeep.pc.in libzeep-5.0.2/libzeep.pc.in --- libzeep-5.0.1/libzeep.pc.in 1970-01-01 00:00:00.000000000 +0000 +++ libzeep-5.0.2/libzeep.pc.in 2020-11-14 07:31:12.000000000 +0000 @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libzeep +Description: C++ library for building web applications +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lzeep +Cflags: -I${includedir} diff -Nru libzeep-5.0.1/.travis.yml libzeep-5.0.2/.travis.yml --- libzeep-5.0.1/.travis.yml 2020-11-09 08:47:48.000000000 +0000 +++ libzeep-5.0.2/.travis.yml 2020-11-14 07:31:12.000000000 +0000 @@ -2,15 +2,20 @@ os: - linux - # - osx + - osx dist: focal -# osx_image: xcode12 +osx_image: xcode12 compiler: - gcc - # - clang + - clang + +arch: + - amd64 + - ppc64le + - s390x addons: apt: @@ -27,12 +32,13 @@ - fop before_install: - - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew update ; fi - - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install boost make; fi + - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew update ; fi + - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install make; fi script: - ./configure - if [ "$TRAVIS_OS_NAME" = "osx" ]; then gmake; else make; fi + - if [ "$TRAVIS_OS_NAME" = "osx" ]; then gmake test; else make test; fi - if [ "$TRAVIS_OS_NAME" = "osx" ]; then sudo gmake install; else sudo make install; fi jobs: