--- libxml2-2.7.8.dfsg.orig/tree.c +++ libxml2-2.7.8.dfsg/tree.c @@ -678,11 +678,13 @@ * XML_BUFFER_ALLOC_EXACT - use exact sizes, keeps memory usage down * XML_BUFFER_ALLOC_DOUBLEIT - double buffer when extra needed, * improves performance + * XML_BUFFER_ALLOC_BOUNDED - limit the upper size of the buffer */ void xmlSetBufferAllocationScheme(xmlBufferAllocationScheme scheme) { if ((scheme == XML_BUFFER_ALLOC_EXACT) || - (scheme == XML_BUFFER_ALLOC_DOUBLEIT)) + (scheme == XML_BUFFER_ALLOC_DOUBLEIT) || + (scheme == XML_BUFFER_ALLOC_BOUNDED)) xmlBufferAllocScheme = scheme; } @@ -1567,6 +1569,7 @@ else if ((ent != NULL) && (ent->children == NULL)) { xmlNodePtr temp; + ent->children = (xmlNodePtr) -1; ent->children = xmlStringGetNodeList(doc, (const xmlChar*)node->content); ent->owner = 1; @@ -7099,6 +7102,19 @@ size = buf->use + len + 100; #endif + if (buf->alloc == XML_BUFFER_ALLOC_BOUNDED) { + /* + * Used to provide parsing limits + */ + if ((buf->use + len >= XML_MAX_TEXT_LENGTH) || + (buf->size >= XML_MAX_TEXT_LENGTH)) { + xmlTreeErrMemory("buffer error: text too long"); + return(0); + } + if (size >= XML_MAX_TEXT_LENGTH) + size = XML_MAX_TEXT_LENGTH; + } + if ((buf->alloc == XML_BUFFER_ALLOC_IO) && (buf->contentIO != NULL)) { size_t start_buf = buf->content - buf->contentIO; @@ -7209,7 +7225,15 @@ return(0); if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return(0); - + if (buf->alloc == XML_BUFFER_ALLOC_BOUNDED) { + /* + * Used to provide parsing limits + */ + if (size >= XML_MAX_TEXT_LENGTH) { + xmlTreeErrMemory("buffer error: text too long"); + return(0); + } + } /* Don't resize if we don't have to */ if (size < buf->size) return 1; @@ -7388,6 +7412,15 @@ } needSize = buf->use + len + 2; if (needSize > buf->size){ + if (buf->alloc == XML_BUFFER_ALLOC_BOUNDED) { + /* + * Used to provide parsing limits + */ + if (needSize >= XML_MAX_TEXT_LENGTH) { + xmlTreeErrMemory("buffer error: text too long"); + return(-1); + } + } if (!xmlBufferResize(buf, needSize)){ xmlTreeErrMemory("growing buffer"); return XML_ERR_NO_MEMORY; --- libxml2-2.7.8.dfsg.orig/libxml-2.0-uninstalled.pc.in +++ libxml2-2.7.8.dfsg/libxml-2.0-uninstalled.pc.in @@ -8,5 +8,6 @@ Version: @VERSION@ Description: libXML library version2. Requires: -Libs: -L${libdir} -lxml2 @THREAD_LIBS@ @Z_LIBS@ @ICONV_LIBS@ @M_LIBS@ @LIBS@ +Libs: -L${libdir} -lxml2 +Libs.private: @BASE_THREAD_LIBS@ @THREAD_LIBS@ @Z_LIBS@ @ICONV_LIBS@ @M_LIBS@ @LIBS@ Cflags: -I${includedir} @XML_INCLUDEDIR@ @XML_CFLAGS@ --- libxml2-2.7.8.dfsg.orig/libxml.h +++ libxml2-2.7.8.dfsg/libxml.h @@ -13,6 +13,9 @@ #ifndef _LARGEFILE_SOURCE #define _LARGEFILE_SOURCE #endif +#ifndef _LARGEFILE64_SOURCE +#define _LARGEFILE64_SOURCE +#endif #ifndef _FILE_OFFSET_BITS #define _FILE_OFFSET_BITS 64 #endif --- libxml2-2.7.8.dfsg.orig/xmllint.c +++ libxml2-2.7.8.dfsg/xmllint.c @@ -2976,7 +2976,7 @@ printf("\t--huge : remove any internal arbitrary parser limits\n"); printf("\t--noent : substitute entity references by their value\n"); printf("\t--noout : don't output the result tree\n"); - printf("\t--path 'paths': provide a set of paths for resources\n"); + printf("\t--path 'paths' : provide a set of paths for resources\n"); printf("\t--load-trace : print trace of all external entites loaded\n"); printf("\t--nonet : refuse to fetch DTDs or entities over network\n"); printf("\t--nocompact : do not generate compact text nodes\n"); @@ -3032,7 +3032,7 @@ printf("\t--catalogs : use SGML catalogs from $SGML_CATALOG_FILES\n"); printf("\t otherwise XML Catalogs starting from \n"); printf("\t %s are activated by default\n", XML_XML_DEFAULT_CATALOG); - printf("\t--nocatalogs: deactivate all catalogs\n"); + printf("\t--nocatalogs : deactivate all catalogs\n"); #endif printf("\t--auto : generate a small doc on the fly\n"); #ifdef LIBXML_XINCLUDE_ENABLED --- libxml2-2.7.8.dfsg.orig/configure.in +++ libxml2-2.7.8.dfsg/configure.in @@ -70,6 +70,8 @@ AC_LIBTOOL_WIN32_DLL AM_PROG_LIBTOOL +AM_MAINTAINER_MODE + dnl dnl if the system support linker version scripts for symbol versioning dnl then add it @@ -84,7 +86,7 @@ esac fi AC_SUBST(VERSION_SCRIPT_FLAGS) -AM_CONDITIONAL([USE_VERSION_SCRIPT], [test -z "$VERSION_SCRIPT_FLAGS"]) +AM_CONDITIONAL([USE_VERSION_SCRIPT], [test -n "$VERSION_SCRIPT_FLAGS"]) dnl dnl We process the AC_ARG_WITH first so that later we can modify @@ -475,6 +477,7 @@ AC_CHECK_FUNCS(finite isnand fp_class class fpclass) AC_CHECK_FUNCS(strftime localtime gettimeofday ftime) AC_CHECK_FUNCS(stat _stat signal) +AC_CHECK_FUNCS(rand srand time) dnl Checking the standard string functions availability AC_CHECK_FUNCS(printf sprintf fprintf snprintf vfprintf vsprintf vsnprintf sscanf,, @@ -1319,7 +1322,7 @@ *) M_LIBS="-lm" ;; esac -XML_LIBS="-lxml2 $Z_LIBS $THREAD_LIBS $ICONV_LIBS $M_LIBS $LIBS" +XML_LIBS="-lxml2" XML_LIBTOOLLIBS="libxml2.la" AC_SUBST(WITH_ICONV) --- libxml2-2.7.8.dfsg.orig/HTMLparser.c +++ libxml2-2.7.8.dfsg/HTMLparser.c @@ -300,6 +300,7 @@ #define UPP(val) (toupper(ctxt->input->cur[(val)])) #define CUR_PTR ctxt->input->cur +#define BASE_PTR ctxt->input->base #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ @@ -2431,6 +2432,10 @@ (*in == '_') || (*in == '-') || (*in == ':') || (*in == '.')) in++; + + if (in == ctxt->input->end) + return(NULL); + if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); @@ -2474,6 +2479,10 @@ NEXTL(l); c = CUR_CHAR(l); } + + if (ctxt->input->base > ctxt->input->cur - len) + return(NULL); + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); } @@ -2725,31 +2734,43 @@ static xmlChar * htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { - const xmlChar *q; + size_t len = 0, startPosition = 0; xmlChar *ret = NULL; if (CUR == '"') { NEXT; - q = CUR_PTR; - while ((IS_CHAR_CH(CUR)) && (CUR != '"')) + + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while ((IS_CHAR_CH(CUR)) && (CUR != '"')) { NEXT; + len++; + } if (!IS_CHAR_CH(CUR)) { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished SystemLiteral\n", NULL, NULL); } else { - ret = xmlStrndup(q, CUR_PTR - q); + ret = xmlStrndup((BASE_PTR+startPosition), len); NEXT; } } else if (CUR == '\'') { NEXT; - q = CUR_PTR; - while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) + + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) { NEXT; + len++; + } if (!IS_CHAR_CH(CUR)) { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished SystemLiteral\n", NULL, NULL); } else { - ret = xmlStrndup(q, CUR_PTR - q); + ret = xmlStrndup((BASE_PTR+startPosition), len); NEXT; } } else { @@ -2773,32 +2794,47 @@ static xmlChar * htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { - const xmlChar *q; + size_t len = 0, startPosition = 0; xmlChar *ret = NULL; /* * Name ::= (Letter | '_') (NameChar)* */ if (CUR == '"') { NEXT; - q = CUR_PTR; - while (IS_PUBIDCHAR_CH(CUR)) NEXT; + + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while (IS_PUBIDCHAR_CH(CUR)) { + len++; + NEXT; + } + if (CUR != '"') { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished PubidLiteral\n", NULL, NULL); } else { - ret = xmlStrndup(q, CUR_PTR - q); + ret = xmlStrndup((BASE_PTR + startPosition), len); NEXT; } } else if (CUR == '\'') { NEXT; - q = CUR_PTR; - while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')) - NEXT; + + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){ + len++; + NEXT; + } + if (CUR != '\'') { htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, "Unfinished PubidLiteral\n", NULL, NULL); } else { - ret = xmlStrndup(q, CUR_PTR - q); + ret = xmlStrndup((BASE_PTR + startPosition), len); NEXT; } } else { @@ -3194,12 +3230,17 @@ ctxt->instate = state; return; } + len = 0; + buf[len] = 0; q = CUR_CHAR(ql); + if (!IS_CHAR(q)) + goto unfinished; NEXTL(ql); r = CUR_CHAR(rl); + if (!IS_CHAR(r)) + goto unfinished; NEXTL(rl); cur = CUR_CHAR(l); - len = 0; while (IS_CHAR(cur) && ((cur != '>') || (r != '-') || (q != '-'))) { @@ -3230,18 +3271,20 @@ } } buf[len] = 0; - if (!IS_CHAR(cur)) { - htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment not terminated \n