--- icu-3.8.orig/debian/icu-doc.install +++ icu-3.8/debian/icu-doc.install @@ -0,0 +1 @@ +usr/share/doc/icu/html usr/share/doc/icu-doc --- icu-3.8.orig/debian/libicu-dev.install +++ icu-3.8/debian/libicu-dev.install @@ -0,0 +1,8 @@ +usr/lib/lib*.so +usr/lib/lib*.a +usr/lib/icu +usr/include +usr/bin +usr/sbin +usr/share/icu +usr/share/man --- icu-3.8.orig/debian/lib32icu38.shlibs +++ icu-3.8/debian/lib32icu38.shlibs @@ -0,0 +1,7 @@ +libicudata 38 lib32icu38 (>= 3.8-5) +libicui18n 38 lib32icu38 (>= 3.8-5) +libicuio 38 lib32icu38 (>= 3.8-5) +libicule 38 lib32icu38 (>= 3.8-5) +libiculx 38 lib32icu38 (>= 3.8-5) +libicutu 38 lib32icu38 (>= 3.8-5) +libicuuc 38 lib32icu38 (>= 3.8-5) --- icu-3.8.orig/debian/libicu38.shlibs +++ icu-3.8/debian/libicu38.shlibs @@ -0,0 +1,7 @@ +libicudata 38 libicu38 (>= 3.8-5) +libicui18n 38 libicu38 (>= 3.8-5) +libicuio 38 libicu38 (>= 3.8-5) +libicule 38 libicu38 (>= 3.8-5) +libiculx 38 libicu38 (>= 3.8-5) +libicutu 38 libicu38 (>= 3.8-5) +libicuuc 38 libicu38 (>= 3.8-5) --- icu-3.8.orig/debian/control +++ icu-3.8/debian/control @@ -0,0 +1,71 @@ +Source: icu +Section: libs +Priority: optional +Maintainer: Ubuntu Core Developers +XSBC-Original-Maintainer: Jay Berkenbilt +Standards-Version: 3.7.3 +Build-Depends: cdbs, debhelper (>= 5), doxygen, gcc-multilib [amd64 kfreebsd-amd64 ppc64] | gcc-4.1 (<< 4.1.2) [amd64 kfreebsd-amd64 ppc64], + g++-multilib [amd64 kfreebsd-amd64 ppc64] | g++-4.1 (<< 4.1.2) [amd64 kfreebsd-amd64 ppc64], + libc6-dev-i386 [amd64], libc0.1-dev-i386 [kfreebsd-amd64], libc6-dev-powerpc [ppc64] + +Package: libicu38 +Section: libs +Architecture: any +Depends: ${shlibs:Depends} +Replaces: icu, icu-locales +Conflicts: icu, icu-locales +Description: International Components for Unicode + ICU is a C++ and C library that provides robust and full-featured + Unicode and locale support. This package contains the runtime + libraries for ICU. + +Package: libicu38-dbg +Section: libs +Priority: extra +Architecture: any +Depends: libicu38 (= ${binary:Version}) +Description: International Components for Unicode + ICU is a C++ and C library that provides robust and full-featured + Unicode and locale support. This package contains debugging symbols + for the libraries. + +Package: libicu-dev +Section: libdevel +Architecture: any +Depends: libicu38 (= ${binary:Version}), libc6-dev | libc-dev +Replaces: libicu34-dev, libicu36-dev +Conflicts: libicu34-dev, libicu36-dev +Suggests: icu-doc +Description: Development files for International Components for Unicode + ICU is a C++ and C library that provides robust and full-featured + Unicode and locale support. This package contains the development + files for ICU along with programs used to manipulate data files found + in the ICU sources. + +Package: lib32icu38 +Section: libs +Architecture: amd64 ppc64 kfreebsd-amd64 +Depends: ${shlibs:Depends} +Description: International Components for Unicode (32-bit) + ICU is a C++ and C library that provides robust and full-featured + Unicode and locale support. This package contains the runtime + libraries for ICU. + +Package: lib32icu-dev +Section: libdevel +Architecture: amd64 ppc64 kfreebsd-amd64 +Depends: libicu-dev (= ${binary:Version}), lib32icu38 (= ${binary:Version}) +Suggests: icu-doc +Description: Development files for International Components for Unicode (32-bit) + ICU is a C++ and C library that provides robust and full-featured + Unicode and locale support. This package contains the development + files for ICU along with programs used to manipulate data files found + in the ICU sources. + +Package: icu-doc +Section: doc +Architecture: all +Description: API documentation for ICU classes and functions + ICU is a C++ and C library that provides robust and full-featured + Unicode and locale support. This package contains HTML files + documenting the ICU APIs. --- icu-3.8.orig/debian/patches/05-redhat.icu6002.patch +++ icu-3.8/debian/patches/05-redhat.icu6002.patch @@ -0,0 +1,411 @@ +# +# Description: the HZ converter must restrict DBCS codes to bytes 21..7D +# (required to fix CVE-2009-0153). See: +# https://bugzilla.redhat.com/show_bug.cgi?id=503071 +# Upstream: http://bugs.icu-project.org/trac/ticket/6002 +# +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_ext.c icu-3.8.new/build-tree/icu/source/common/ucnv_ext.c +--- icu/source/common/ucnv_ext.c 2009-10-07 11:32:22.241950584 -0500 ++++ icu/source/common/ucnv_ext.c 2009-10-07 11:32:29.253215734 -0500 +@@ -1036,15 +1036,13 @@ + /* enumerate the from-Unicode trie table */ + c=0; /* keep track of the current code point while enumerating */ + +- if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || +- filter==UCNV_SET_FILTER_DBCS_ONLY || +- filter==UCNV_SET_FILTER_SJIS || +- filter==UCNV_SET_FILTER_GR94DBCS ++ if(filter==UCNV_SET_FILTER_2022_CN) { ++ minLength=3; ++ } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || ++ filter!=UCNV_SET_FILTER_NONE + ) { + /* DBCS-only, ignore single-byte results */ + minLength=2; +- } else if(filter==UCNV_SET_FILTER_2022_CN) { +- minLength=3; + } else { + minLength=1; + } +@@ -1099,8 +1097,15 @@ + break; + case UCNV_SET_FILTER_GR94DBCS: + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && +- (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe-0xa1a1) && +- (uint8_t)(value-0xa1)<=(0xfe-0xa1))) { ++ (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && ++ (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { ++ continue; ++ } ++ break; ++ case UCNV_SET_FILTER_HZ: ++ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && ++ (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && ++ (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { + continue; + } + break; +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvhz.c icu-3.8.new/build-tree/icu/source/common/ucnvhz.c +--- icu/source/common/ucnvhz.c 2009-10-07 11:32:22.241950584 -0500 ++++ icu/source/common/ucnvhz.c 2009-10-07 11:32:29.273207453 -0500 +@@ -72,7 +72,7 @@ + cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ)); + if(cnv->extraInfo != NULL){ + uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ)); +- ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode); ++ ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("GBK",errorCode); + } + else { + *errorCode = U_MEMORY_ALLOCATION_ERROR; +@@ -141,7 +141,7 @@ + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; + UChar32 targetUniChar = 0x0000; +- UChar mySourceChar = 0x0000; ++ int32_t mySourceChar = 0x0000; + UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); + tempBuf[0]=0; + tempBuf[1]=0; +@@ -156,90 +156,71 @@ + + mySourceChar= (unsigned char) *mySource++; + +- switch(mySourceChar){ ++ if(args->converter->mode == UCNV_TILDE) { ++ /* second byte after ~ */ ++ args->converter->mode=0; ++ switch(mySourceChar) { + case 0x0A: +- if(args->converter->mode ==UCNV_TILDE){ +- args->converter->mode=0; +- +- } +- *(myTarget++)=(UChar)mySourceChar; ++ /* no output for ~\n (line-continuation marker) */ + continue; +- + case UCNV_TILDE: +- if(args->converter->mode ==UCNV_TILDE){ +- *(myTarget++)=(UChar)mySourceChar; +- args->converter->mode=0; +- continue; +- ++ if(args->offsets) { ++ args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); + } +- else if(args->converter->toUnicodeStatus !=0){ +- args->converter->mode=0; +- break; +- } +- else{ +- args->converter->mode = UCNV_TILDE; +- continue; +- } +- +- ++ *(myTarget++)=(UChar)mySourceChar; ++ continue; + case UCNV_OPEN_BRACE: +- if(args->converter->mode == UCNV_TILDE){ +- args->converter->mode=0; +- myData->isStateDBCS = TRUE; +- continue; +- } +- else{ +- break; +- } +- +- ++ myData->isStateDBCS = TRUE; ++ continue; + case UCNV_CLOSE_BRACE: +- if(args->converter->mode == UCNV_TILDE){ +- args->converter->mode=0; +- myData->isStateDBCS = FALSE; +- continue; +- } +- else{ +- break; +- } +- ++ myData->isStateDBCS = FALSE; ++ continue; + default: + /* if the first byte is equal to TILDE and the trail byte + * is not a valid byte then it is an error condition + */ +- if(args->converter->mode == UCNV_TILDE){ +- args->converter->mode=0; +- mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); +- goto SAVE_STATE; +- } +- ++ mySourceChar = 0x7e00 | mySourceChar; ++ targetUniChar = 0xffff; + break; +- +- } +- +- if(myData->isStateDBCS){ ++ } ++ } else if(myData->isStateDBCS) { + if(args->converter->toUnicodeStatus == 0x00){ +- args->converter->toUnicodeStatus = (UChar) mySourceChar; ++ /* lead byte */ ++ if(mySourceChar == UCNV_TILDE) { ++ args->converter->mode = UCNV_TILDE; ++ } else { ++ /* add another bit to distinguish a 0 byte from not having seen a lead byte */ ++ args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); ++ } + continue; + } + else{ +- tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ; +- tempBuf[1] = (char) (mySourceChar+0x80); +- mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); ++ /* trail byte */ ++ uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; ++ if( (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21) && ++ (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21) ++ ) { ++ tempBuf[0] = (char) (leadByte+0x80) ; ++ tempBuf[1] = (char) (mySourceChar+0x80); ++ targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, ++ tempBuf, 2, args->converter->useFallback); ++ } else { ++ targetUniChar = 0xffff; ++ } ++ /* add another bit so that the code below writes 2 bytes in case of error */ ++ mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; + args->converter->toUnicodeStatus =0x00; +- targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, +- tempBuf, 2, args->converter->useFallback); + } + } + else{ +- if(args->converter->fromUnicodeStatus == 0x00){ +- targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, +- mySource - 1, 1, args->converter->useFallback); +- } +- else{ +- goto SAVE_STATE; ++ if(mySourceChar == UCNV_TILDE) { ++ args->converter->mode = UCNV_TILDE; ++ continue; ++ } else if(mySourceChar <= 0x7f) { ++ targetUniChar = (UChar)mySourceChar; /* ASCII */ ++ } else { ++ targetUniChar = 0xffff; + } +- + } + if(targetUniChar < 0xfffe){ + if(args->offsets) { +@@ -248,26 +229,17 @@ + + *(myTarget++)=(UChar)targetUniChar; + } +- else if(targetUniChar>=0xfffe){ +-SAVE_STATE: ++ else /* targetUniChar>=0xfffe */ { + if(targetUniChar == 0xfffe){ + *err = U_INVALID_CHAR_FOUND; + } + else{ + *err = U_ILLEGAL_CHAR_FOUND; + } +- if(myData->isStateDBCS){ +- /* this should never occur since isStateDBCS is set to true +- * only after tempBuf[0] and tempBuf[1] +- * are set to the input .. just to please BEAM +- */ +- if(tempBuf[0]==0 || tempBuf[1]==0){ +- *err = U_INTERNAL_PROGRAM_ERROR; +- }else{ +- args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80); +- args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80); +- args->converter->toULength=2; +- } ++ if(mySourceChar > 0xff){ ++ args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); ++ args->converter->toUBytes[1] = (uint8_t)mySourceChar; ++ args->converter->toULength=2; + } + else{ + args->converter->toUBytes[0] = (uint8_t)mySourceChar; +@@ -328,16 +300,21 @@ + escSeq = TILDE_ESCAPE; + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); + continue; +- } +- else{ ++ } else if(mySourceChar <= 0x7f) { ++ length = 1; ++ targetUniChar = mySourceChar; ++ } else { + length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, + mySourceChar,&targetUniChar,args->converter->useFallback); +- +- } +- /* only DBCS or SBCS characters are expected*/ +- /* DB haracters with high bit set to 1 are expected */ +- if(length > 2 || length==0 ||(((targetUniChar & 0x8080) != 0x8080)&& length==2)){ +- targetUniChar= missingCharMarker; ++ /* we can only use lead bytes 21..7D and trail bytes 21..7E */ ++ if( length == 2 && ++ (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && ++ (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) ++ ) { ++ targetUniChar -= 0x8080; ++ } else { ++ targetUniChar = missingCharMarker; ++ } + } + if (targetUniChar != missingCharMarker){ + myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); +@@ -360,22 +337,22 @@ + + if(isTargetUCharDBCS){ + if( myTargetIndex > 8) -0x80); ++ myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); + if(offsets){ + *(offsets++) = mySourceIndex-1; + } + if(myTargetIndex < targetLength){ +- myTarget[myTargetIndex++] =(char) ((targetUniChar & 0x00FF) -0x80); ++ myTarget[myTargetIndex++] =(char) targetUniChar; + if(offsets){ + *(offsets++) = mySourceIndex-1; + } + }else{ +- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80); ++ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; + *err = U_BUFFER_OVERFLOW_ERROR; + } + }else{ +- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) ((targetUniChar >> 8) -0x80); +- args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80); ++ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); ++ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; + *err = U_BUFFER_OVERFLOW_ERROR; + } + +@@ -524,15 +501,14 @@ + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { +- /* the tilde '~' is hardcoded in the converter */ +- sa->add(sa->set, 0x7e); ++ /* HZ converts all of ASCII */ ++ sa->addRange(sa->set, 0, 0x7f); + + /* add all of the code points that the sub-converter handles */ +- /* ucnv_MBCSGetFilteredUnicodeSetForUnicode(((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, sa, which, UCNV_SET_FILTER_GR94DBCS, pErrorCode); */ +- ((UConverterDataHZ*)cnv->extraInfo)-> +- gbConverter->sharedData->impl-> +- getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, +- sa, which, pErrorCode); ++ ucnv_MBCSGetFilteredUnicodeSetForUnicode( ++ ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, ++ sa, which, UCNV_SET_FILTER_HZ, ++ pErrorCode); + } + + static const UConverterImpl _HZImpl={ +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.c icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.c +--- icu/source/common/ucnvmbcs.c 2009-10-07 11:32:22.251959209 -0500 ++++ icu/source/common/ucnvmbcs.c 2009-10-07 11:32:29.273207453 -0500 +@@ -625,8 +625,21 @@ + /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */ + do { + if( ((st3&1)!=0 || useFallback) && +- (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfefe-0xa1a1) && +- (uint8_t)(value-0xa1)<=(0xfe-0xa1) ++ (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) && ++ (uint8_t)(value-0xa1)<=(0xfe - 0xa1) ++ ) { ++ sa->add(sa->set, c); ++ } ++ st3>>=1; ++ stage3+=2; /* +=st3Multiplier */ ++ } while((++c&0xf)!=0); ++ break; ++ case UCNV_SET_FILTER_HZ: ++ /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */ ++ do { ++ if( ((st3&1)!=0 || useFallback) && ++ (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) && ++ (uint8_t)(value-0xa1)<=(0xfe - 0xa1) + ) { + sa->add(sa->set, c); + } +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.h icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.h +--- icu/source/common/ucnvmbcs.h 2009-10-07 11:32:22.251959209 -0500 ++++ icu/source/common/ucnvmbcs.h 2009-10-07 11:32:29.273207453 -0500 +@@ -493,6 +493,7 @@ + UCNV_SET_FILTER_2022_CN, + UCNV_SET_FILTER_SJIS, + UCNV_SET_FILTER_GR94DBCS, ++ UCNV_SET_FILTER_HZ, + UCNV_SET_FILTER_COUNT + } UConverterSetFilter; + +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/ncnvtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/ncnvtst.c +--- icu/source/test/cintltst/ncnvtst.c 2007-09-13 18:17:36.000000000 -0500 ++++ icu/source/test/cintltst/ncnvtst.c 2009-10-07 11:32:29.283205342 -0500 +@@ -1928,7 +1928,7 @@ + #if !UCONFIG_NO_LEGACY_CONVERSION + { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }, + { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff }, +- { "HZ", 0x410, 0x44f, 0x4e00, 0x4eff, 0xac00, 0xd7ff }, ++ /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */ + { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff } + #else + { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff } +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/intltest/convtest.cpp icu-3.8.new/build-tree/icu/source/test/intltest/convtest.cpp +--- icu/source/test/intltest/convtest.cpp 2009-10-07 11:32:22.251959209 -0500 ++++ icu/source/test/intltest/convtest.cpp 2009-10-07 11:32:29.283205342 -0500 +@@ -538,7 +538,7 @@ + "Shift-JIS", + "ibm-1390", // EBCDIC_STATEFUL table + "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table +- // "HZ", TODO(markus): known bug, the set incorrectly contains [\u02CA\u02CB\u02D9\u2010\u2013\u2015...] ++ "HZ", + "ISO-2022-JP", + "JIS7", + "ISO-2022-CN", +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt +--- icu/source/test/testdata/conversion.txt 2009-10-07 11:32:22.251959209 -0500 ++++ icu/source/test/testdata/conversion.txt 2009-10-07 11:32:29.283205342 -0500 +@@ -48,6 +48,14 @@ + toUnicode { + Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" } + Cases { ++ // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e ++ { ++ "HZ", ++ :bin{ 7e7b21212120217e217f772100007e217e7d207e7e807e0a2b }, ++ "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd ~\ufffd+", ++ :intvector{ 2,4,6,8,10,12,14,18,19,21,24 }, ++ :int{1}, :int{1}, "", "?", :bin{""} ++ } + // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and + // using the Shift-JIS table for JIS X 0208 (ticket #5797) + { +@@ -1349,6 +1357,14 @@ + :int{0} + } + ++ // HZ ++ { ++ "HZ", ++ "[\u0410-\u044f\u4e00\u4e01\u4e03]", ++ "[\u4e02\u4e04-\u4e06\uac00-\ud7ff]", ++ :int{0} ++ } ++ + // DBCS-only + { + "ibm-971", --- icu-3.8.orig/debian/patches/03-redhat.icu5797.patch +++ icu-3.8/debian/patches/03-redhat.icu5797.patch @@ -0,0 +1,751 @@ +# +# Description: use Shift-JIS table for ISO 2022-JP (required to fix +# CVE-2009-0153). See: https://bugzilla.redhat.com/show_bug.cgi?id=503071 +# Upstream: http://bugs.icu-project.org/trac/ticket/5797 +# +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv2022.c icu-3.8.new/build-tree/icu/source/common/ucnv2022.c +--- icu/source/common/ucnv2022.c 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/ucnv2022.c 2009-10-07 11:31:52.331962609 -0500 +@@ -472,8 +472,7 @@ + if(jpCharsetMasks[version]&CSM(ISO8859_7)) { + myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode); + } +- myConverterData->myConverterArray[JISX201] = ucnv_loadSharedData("JISX0201", NULL, errorCode); +- myConverterData->myConverterArray[JISX208] = ucnv_loadSharedData("jisx-208", NULL, errorCode); ++ myConverterData->myConverterArray[JISX208] = ucnv_loadSharedData("Shift-JIS", NULL, errorCode); + if(jpCharsetMasks[version]&CSM(JISX212)) { + myConverterData->myConverterArray[JISX212] = ucnv_loadSharedData("jisx-212", NULL, errorCode); + } +@@ -1040,14 +1039,6 @@ + length=3; + } + } +- /* +- * TODO(markus): Use Shift-JIS table for JIS X 0208, to save mapping table space. +- * Pass in parameter for type of output bytes, for validation and shifting: +- * - Direct: Pass bytes through, but forbid control codes 00-1F (except SI/SO/ESC) and space 20? +- * (Need to allow some (TAB/LF/CR) or most of them for ASCII and maybe JIS X 0201.) +- * - A1-FE: Subtract 80 after range check. +- * - SJIS: Shift DBCS result to 21-7E x 21-7E. +- */ + /* is this code point assigned, or do we use fallbacks? */ + if((stage2Entry&(1<<(16+(c&0xf))))!=0) { + /* assigned */ +@@ -1105,6 +1096,23 @@ + } + } + ++/* ++ * Check that the result is a 2-byte value with each byte in the range A1..FE ++ * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte ++ * to move it to the ISO 2022 range 21..7E. ++ * Return 0 if out of range. ++ */ ++static U_INLINE uint32_t ++_2022FromGR94DBCS(uint32_t value) { ++ if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && ++ (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) ++ ) { ++ return value - 0x8080; /* shift down to 21..7e byte range */ ++ } else { ++ return 0; /* not valid for ISO 2022 */ ++ } ++} ++ + #ifdef U_ENABLE_GENERIC_ISO_2022 + + /********************************************************************************** +@@ -1233,7 +1241,7 @@ + } + else{ + cnv->toUBytes[0] =(char) sourceChar; +- cnv->toULength = 2; ++ cnv->toULength = 1; + } + + if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){ +@@ -1344,6 +1352,181 @@ + * TODO: Implement a priority technique where the users are allowed to set the priority of code pages + */ + ++/* Map 00..7F to Unicode according to JIS X 0201. */ ++static U_INLINE uint32_t ++jisx201ToU(uint32_t value) { ++ if(value < 0x5c) { ++ return value; ++ } else if(value == 0x5c) { ++ return 0xa5; ++ } else if(value == 0x7e) { ++ return 0x203e; ++ } else /* value <= 0x7f */ { ++ return value; ++ } ++} ++ ++/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */ ++static U_INLINE uint32_t ++jisx201FromU(uint32_t value) { ++ if(value<=0x7f) { ++ if(value!=0x5c && value!=0x7e) { ++ return value; ++ } ++ } else if(value==0xa5) { ++ return 0x5c; ++ } else if(value==0x203e) { ++ return 0x7e; ++ } ++ return 0xfffe; ++} ++ ++/* ++ * Take a valid Shift-JIS byte pair, check that it is in the range corresponding ++ * to JIS X 0208, and convert it to a pair of 21..7E bytes. ++ * Return 0 if the byte pair is out of range. ++ */ ++static U_INLINE uint32_t ++_2022FromSJIS(uint32_t value) { ++ uint8_t trail; ++ ++ if(value > 0xEFFC) { ++ return 0; /* beyond JIS X 0208 */ ++ } ++ ++ trail = (uint8_t)value; ++ ++ value &= 0xff00; /* lead byte */ ++ if(value <= 0x9f00) { ++ value -= 0x7000; ++ } else /* 0xe000 <= value <= 0xef00 */ { ++ value -= 0xb000; ++ } ++ value <<= 1; ++ ++ if(trail <= 0x9e) { ++ value -= 0x100; ++ if(trail <= 0x7e) { ++ value |= trail - 0x1f; ++ } else { ++ value |= trail - 0x20; ++ } ++ } else /* trail <= 0xfc */ { ++ value |= trail - 0x7e; ++ } ++ return value; ++} ++ ++/* ++ * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS. ++ * If either byte is outside 21..7E make sure that the result is not valid ++ * for Shift-JIS so that the converter catches it. ++ * Some invalid byte values already turn into equally invalid Shift-JIS ++ * byte values and need not be tested explicitly. ++ */ ++static U_INLINE void ++_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) { ++ if(c1&1) { ++ ++c1; ++ if(c2 <= 0x5f) { ++ c2 += 0x1f; ++ } else if(c2 <= 0x7e) { ++ c2 += 0x20; ++ } else { ++ c2 = 0; /* invalid */ ++ } ++ } else { ++ if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) { ++ c2 += 0x7e; ++ } else { ++ c2 = 0; /* invalid */ ++ } ++ } ++ c1 >>= 1; ++ if(c1 <= 0x2f) { ++ c1 += 0x70; ++ } else if(c1 <= 0x3f) { ++ c1 += 0xb0; ++ } else { ++ c1 = 0; /* invalid */ ++ } ++ bytes[0] = (char)c1; ++ bytes[1] = (char)c2; ++} ++ ++/* ++ * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS) ++ * Katakana. ++ * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks ++ * because Shift-JIS roundtrips half-width Katakana to single bytes. ++ * These were the only fallbacks in ICU's jisx-208.ucm file. ++ */ ++static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = { ++ 0x2123, /* U+FF61 */ ++ 0x2156, ++ 0x2157, ++ 0x2122, ++ 0x2126, ++ 0x2572, ++ 0x2521, ++ 0x2523, ++ 0x2525, ++ 0x2527, ++ 0x2529, ++ 0x2563, ++ 0x2565, ++ 0x2567, ++ 0x2543, ++ 0x213C, /* U+FF70 */ ++ 0x2522, ++ 0x2524, ++ 0x2526, ++ 0x2528, ++ 0x252A, ++ 0x252B, ++ 0x252D, ++ 0x252F, ++ 0x2531, ++ 0x2533, ++ 0x2535, ++ 0x2537, ++ 0x2539, ++ 0x253B, ++ 0x253D, ++ 0x253F, /* U+FF80 */ ++ 0x2541, ++ 0x2544, ++ 0x2546, ++ 0x2548, ++ 0x254A, ++ 0x254B, ++ 0x254C, ++ 0x254D, ++ 0x254E, ++ 0x254F, ++ 0x2552, ++ 0x2555, ++ 0x2558, ++ 0x255B, ++ 0x255E, ++ 0x255F, /* U+FF90 */ ++ 0x2560, ++ 0x2561, ++ 0x2562, ++ 0x2564, ++ 0x2566, ++ 0x2568, ++ 0x2569, ++ 0x256A, ++ 0x256B, ++ 0x256C, ++ 0x256D, ++ 0x256F, ++ 0x2573, ++ 0x212B, ++ 0x212C /* U+FF9F */ ++}; ++ + static void + UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) { + UConverter *cnv = args->converter; +@@ -1499,7 +1682,7 @@ + } + break; + case HWKANA_7BIT: +- if((uint32_t)(HWKANA_END-sourceChar)<=(HWKANA_END-HWKANA_START)) { ++ if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { + if(converterData->version==3) { + /* JIS7: use G1 (SO) */ + /* Shift U+FF61..U+FF9F to bytes 21..5F. */ +@@ -1526,13 +1709,34 @@ + break; + case JISX201: + /* G0 SBCS */ +- len2 = MBCS_SINGLE_FROM_UCHAR32( ++ value = jisx201FromU(sourceChar); ++ if(value <= 0x7f) { ++ targetValue = value; ++ len = 1; ++ cs = cs0; ++ g = 0; ++ useFallback = FALSE; ++ } ++ break; ++ case JISX208: ++ /* G0 DBCS from Shift-JIS table */ ++ len2 = MBCS_FROM_UCHAR32_ISO2022( + converterData->myConverterArray[cs0], + sourceChar, &value, +- useFallback); +- if(len2 != 0 && !(len2 < 0 && len != 0) && value <= 0x7f) { +- targetValue = value; +- len = len2; ++ useFallback, MBCS_OUTPUT_2); ++ if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ ++ value = _2022FromSJIS(value); ++ if(value != 0) { ++ targetValue = value; ++ len = len2; ++ cs = cs0; ++ g = 0; ++ useFallback = FALSE; ++ } ++ } else if(len == 0 && useFallback && ++ (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { ++ targetValue = hwkana_fb[sourceChar - HWKANA_START]; ++ len = -2; + cs = cs0; + g = 0; + useFallback = FALSE; +@@ -1564,17 +1768,10 @@ + * Check for valid bytes for the encoding scheme. + * This is necessary because the sub-converter (windows-949) + * has a broader encoding scheme than is valid for 2022. +- * +- * Check that the result is a 2-byte value with each byte in the range A1..FE +- * (strict EUC-KR DBCS) before accepting it and subtracting 0x80 from each byte +- * to move it to the ISO 2022 range 21..7E. + */ +- if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && +- (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) +- ) { +- value -= 0x8080; /* shift down to 21..7e byte range */ +- } else { +- break; /* not valid for ISO 2022 */ ++ value = _2022FromGR94DBCS(value); ++ if(value == 0) { ++ break; + } + } + targetValue = value; +@@ -1750,7 +1947,7 @@ + static void + UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, + UErrorCode* err){ +- char tempBuf[3]; ++ char tempBuf[2]; + const char *mySource = (char *) args->source; + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; +@@ -1868,10 +2065,7 @@ + break; + case JISX201: + if(mySourceChar <= 0x7f) { +- targetUniChar = +- _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( +- myData->myConverterArray[cs], +- mySourceChar); ++ targetUniChar = jisx201ToU(mySourceChar); + } + break; + case HWKANA_7BIT: +@@ -1885,8 +2079,13 @@ + if(mySource < mySourceLimit) { + char trailByte; + getTrailByte: +- tempBuf[0] = (char) (mySourceChar); +- tempBuf[1] = trailByte = *mySource++; ++ trailByte = *mySource++; ++ if(cs == JISX208) { ++ _2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf); ++ } else { ++ tempBuf[0] = (char)mySourceChar; ++ tempBuf[1] = trailByte; ++ } + mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); + targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); + } else { +@@ -3190,6 +3389,9 @@ + /* open a set and initialize it with code points that are algorithmically round-tripped */ + switch(cnvData->locale[0]){ + case 'j': ++ /* include JIS X 0201 which is hardcoded */ ++ sa->add(sa->set, 0xa5); ++ sa->add(sa->set, 0x203e); + if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { + /* include Latin-1 for some variants of JP */ + sa->addRange(sa->set, 0, 0xff); +@@ -3198,6 +3400,11 @@ + sa->addRange(sa->set, 0, 0x7f); + } + if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) { ++ /* ++ * TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks, ++ * we need to include half-width Katakana for all JP variants because ++ * JIS X 0208 has hardcoded fallbacks for them. ++ */ + /* include half-width Katakana for JP */ + sa->addRange(sa->set, HWKANA_START, HWKANA_END); + } +@@ -3217,15 +3424,7 @@ + break; + } + +- /* +- * Version-specific for CN: +- * CN version 0 does not map CNS planes 3..7 although +- * they are all available in the CNS conversion table; +- * CN version 1 does map them all. +- * The two versions create different Unicode sets. +- */ +- for (i=0; imyConverterArray[i]!=NULL) { ++#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */ + if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && + cnvData->version==0 && i==CNS_11643 + ) { +@@ -3235,9 +3434,33 @@ + sa, UCNV_ROUNDTRIP_SET, + 0, 0x81, 0x82, + pErrorCode); ++ } ++#endif ++ ++ for (i=0; imyConverterArray[i]!=NULL) { ++ if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && ++ cnvData->version==0 && i==CNS_11643 ++ ) { ++ /* ++ * Version-specific for CN: ++ * CN version 0 does not map CNS planes 3..7 although ++ * they are all available in the CNS conversion table; ++ * CN version 1 (-EXT) does map them all. ++ * The two versions create different Unicode sets. ++ */ ++ filter=UCNV_SET_FILTER_2022_CN; ++ } else if(cnvData->locale[0]=='j' && i==JISX208) { ++ /* ++ * Only add code points that map to Shift-JIS codes ++ * corresponding to JIS X 0208. ++ */ ++ filter=UCNV_SET_FILTER_SJIS; + } else { +- ucnv_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode); ++ filter=UCNV_SET_FILTER_NONE; + } ++ ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode); + } + } + +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.c icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.c +--- icu/source/common/ucnvmbcs.c 2007-09-13 18:17:58.000000000 -0500 ++++ icu/source/common/ucnvmbcs.c 2009-10-07 11:31:52.351957135 -0500 +@@ -362,6 +362,8 @@ + + /* Miscellaneous ------------------------------------------------------------ */ + ++#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */ ++ + /* similar to ucnv_MBCSGetNextUChar() but recursive */ + static void + _getUnicodeSetForBytes(const UConverterSharedData *sharedData, +@@ -454,11 +456,14 @@ + pErrorCode); + } + ++#endif ++ + U_CFUNC void +-ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, +- const USetAdder *sa, +- UConverterUnicodeSet which, +- UErrorCode *pErrorCode) { ++ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, ++ const USetAdder *sa, ++ UConverterUnicodeSet which, ++ UConverterSetFilter filter, ++ UErrorCode *pErrorCode) { + const UConverterMBCSTable *mbcsTable; + const uint16_t *table; + +@@ -512,50 +517,26 @@ + c+=1024; /* empty stage 2 block */ + } + } +- } else if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY) { +- /* ignore single-byte results */ ++ } else { + const uint32_t *stage2; +- const uint16_t *stage3, *results; ++ const uint8_t *stage3, *bytes; ++ uint32_t st3Multiplier; ++ uint32_t value; + +- results=(const uint16_t *)mbcsTable->fromUnicodeBytes; +- +- for(st1=0; st1(maxStage1>>1)) { +- stage2=(const uint32_t *)table+st2; +- for(st2=0; st2<64; ++st2) { +- if((st3=stage2[st2])!=0) { +- /* read the stage 3 block */ +- stage3=results+16*(uint32_t)(uint16_t)st3; +- +- /* get the roundtrip flags for the stage 3 block */ +- st3>>=16; ++ bytes=mbcsTable->fromUnicodeBytes; + +- /* +- * Add code points for which the roundtrip flag is set. +- * Once we get a set for fallback mappings, we have to check +- * non-roundtrip stage 3 results for whether they are 0. +- * See ucnv_MBCSFromUnicodeWithOffsets() for details. +- * +- * Ignore single-byte results (<0x100). +- */ +- do { +- if((st3&1)!=0 && *stage3>=0x100) { +- sa->add(sa->set, c); +- } +- st3>>=1; +- ++stage3; +- } while((++c&0xf)!=0); +- } else { +- c+=16; /* empty stage 3 block */ +- } +- } +- } else { +- c+=1024; /* empty stage 2 block */ +- } ++ switch(mbcsTable->outputType) { ++ case MBCS_OUTPUT_3: ++ case MBCS_OUTPUT_4_EUC: ++ st3Multiplier=3; ++ break; ++ case MBCS_OUTPUT_4: ++ st3Multiplier=4; ++ break; ++ default: ++ st3Multiplier=2; ++ break; + } +- } else { +- const uint32_t *stage2; + + for(st1=0; st1>=16; + +@@ -572,12 +556,49 @@ + * non-roundtrip stage 3 results for whether they are 0. + * See ucnv_MBCSFromUnicodeWithOffsets() for details. + */ +- do { +- if(st3&1) { +- sa->add(sa->set, c); +- } +- st3>>=1; +- } while((++c&0xf)!=0); ++ switch(filter) { ++ case UCNV_SET_FILTER_NONE: ++ do { ++ if(st3&1) { ++ sa->add(sa->set, c); ++ } ++ st3>>=1; ++ } while((++c&0xf)!=0); ++ break; ++ case UCNV_SET_FILTER_DBCS_ONLY: ++ /* Ignore single-byte results (<0x100). */ ++ do { ++ if((st3&1)!=0 && *((const uint16_t *)stage3)>=0x100) { ++ sa->add(sa->set, c); ++ } ++ st3>>=1; ++ stage3+=2; /* +=st3Multiplier */ ++ } while((++c&0xf)!=0); ++ break; ++ case UCNV_SET_FILTER_2022_CN: ++ /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */ ++ do { ++ if((st3&1)!=0 && ((value=*stage3)==0x81 || value==0x82)) { ++ sa->add(sa->set, c); ++ } ++ st3>>=1; ++ stage3+=3; /* +=st3Multiplier */ ++ } while((++c&0xf)!=0); ++ break; ++ case UCNV_SET_FILTER_SJIS: ++ /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */ ++ do { ++ if((st3&1)!=0 && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) { ++ sa->add(sa->set, c); ++ } ++ st3>>=1; ++ stage3+=2; /* +=st3Multiplier */ ++ } while((++c&0xf)!=0); ++ break; ++ default: ++ *pErrorCode=U_INTERNAL_PROGRAM_ERROR; ++ return; ++ } + } else { + c+=16; /* empty stage 3 block */ + } +@@ -591,6 +612,19 @@ + ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode); + } + ++U_CFUNC void ++ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, ++ const USetAdder *sa, ++ UConverterUnicodeSet which, ++ UErrorCode *pErrorCode) { ++ ucnv_MBCSGetFilteredUnicodeSetForUnicode( ++ sharedData, sa, which, ++ sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? ++ UCNV_SET_FILTER_DBCS_ONLY : ++ UCNV_SET_FILTER_NONE, ++ pErrorCode); ++} ++ + static void + ucnv_MBCSGetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.h icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.h +--- icu/source/common/ucnvmbcs.h 2007-09-13 18:17:58.000000000 -0500 ++++ icu/source/common/ucnvmbcs.h 2009-10-07 11:31:52.351957135 -0500 +@@ -456,6 +456,7 @@ + ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode); + ++#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */ + /* + * Internal function returning a UnicodeSet for toUnicode() conversion. + * Currently only used for ISO-2022-CN, and only handles roundtrip mappings. +@@ -470,6 +471,7 @@ + UConverterUnicodeSet which, + uint8_t state, int32_t lowByte, int32_t highByte, + UErrorCode *pErrorCode); ++#endif + + /* + * Internal function returning a UnicodeSet for toUnicode() conversion. +@@ -481,9 +483,30 @@ + */ + U_CFUNC void + ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, +- const USetAdder *sa, +- UConverterUnicodeSet which, +- UErrorCode *pErrorCode); ++ const USetAdder *sa, ++ UConverterUnicodeSet which, ++ UErrorCode *pErrorCode); ++ ++typedef enum UConverterSetFilter { ++ UCNV_SET_FILTER_NONE, ++ UCNV_SET_FILTER_DBCS_ONLY, ++ UCNV_SET_FILTER_2022_CN, ++ UCNV_SET_FILTER_SJIS, ++ UCNV_SET_FILTER_COUNT ++} UConverterSetFilter; ++ ++/* ++ * Same as ucnv_MBCSGetUnicodeSetForUnicode() but ++ * the set can be filtered by encoding scheme. ++ * Used by stateful converters which share regular conversion tables ++ * but only use a subset of their mappings. ++ */ ++U_CFUNC void ++ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, ++ const USetAdder *sa, ++ UConverterUnicodeSet which, ++ UConverterSetFilter filter, ++ UErrorCode *pErrorCode); + + #endif + +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/nucnvtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/nucnvtst.c +--- icu/source/test/cintltst/nucnvtst.c 2007-09-13 18:17:34.000000000 -0500 ++++ icu/source/test/cintltst/nucnvtst.c 2009-10-07 11:31:52.351957135 -0500 +@@ -3202,7 +3202,7 @@ + 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, + 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, + 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, +- 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, ++ 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, + 0x201D, 0x3014, 0x000D, 0x000A, + 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, + 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, +@@ -3730,7 +3730,7 @@ + 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, + 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, + 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, +- 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, ++ 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, + 0x201D, 0x000D, 0x000A, + 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, + 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/udatatst.c icu-3.8.new/build-tree/icu/source/test/cintltst/udatatst.c +--- icu/source/test/cintltst/udatatst.c 2007-09-13 18:17:36.000000000 -0500 ++++ icu/source/test/cintltst/udatatst.c 2009-10-07 11:31:52.351957135 -0500 +@@ -1281,7 +1281,7 @@ + * MBCS conversion table file without extension, + * to test swapping and preflighting of UTF-8-friendly mbcsIndex[]. + */ +- {"jisx-208", "cnv", ucnv_swap}, ++ {"jisx-212", "cnv", ucnv_swap}, + #endif + + #if !UCONFIG_NO_CONVERSION +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt +--- icu/source/test/testdata/conversion.txt 2007-09-13 18:17:46.000000000 -0500 ++++ icu/source/test/testdata/conversion.txt 2009-10-07 11:31:52.351957135 -0500 +@@ -48,6 +48,15 @@ + toUnicode { + Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" } + Cases { ++ // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and ++ // using the Shift-JIS table for JIS X 0208 (ticket #5797) ++ { ++ "ISO-2022-JP", ++ :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 }, ++ "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e", ++ :intvector{ 3,4,5,9,11,13,15,17,19,21,23,25,27 }, ++ :int{1}, :int{1}, "", "?", :bin{""} ++ } + // improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets() + { + "ISO-8859-3", +@@ -495,6 +504,15 @@ + fromUnicode { + Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" } + Cases { ++ // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and ++ // using the Shift-JIS table for JIS X 0208 (ticket #5797) ++ { ++ "ISO-2022-JP", ++ "\u203e\xa5\u4e00\ufa10\u6f3e\u0391", ++ :bin{ 1b284a7e5c1b2442306c222e5f2126211b2842 }, ++ :intvector{ 0,0,0,0,1,2,2,2,2,2,3,3,4,4,5,5,5,5,5 }, ++ :int{1}, :int{0}, "", "?=\u3013", "" // U+3013 Geta Mark converts to 222e ++ } + // Verify that mappings that would result in byte values outside 20..7F (for SBCS) + // or 21..7E (for DBCS) are not used. + // ibm-9005_X110-2007.ucm (ISO 8859-7, .F=1b2e46): +@@ -1293,13 +1311,13 @@ + // versions of ISO-2022-JP + { + "ISO-2022-JP", +- "[\x00-\x0d\x10-\x1a\x1c-\x7f\u0391-\u03a1\uff61-\uff9f\u4e00\u4e01\uffe5]", +- "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\uffe6-\U0010ffff]", ++ "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u203e\uff61-\uff9f\u4e00\u4e01\uffe5]", ++ "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\ufa0e-\ufa2d\uffe6-\U0010ffff]", + :int{0} + } + { + "ISO-2022-JP-2", +- "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\uff61-\uff9f\u4e00-\u4e05\uffe6]", ++ "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\uff61-\uff9f\u4e00-\u4e05\uffe6]", + "[\x0e\x0f\x1b\uffe7-\U0010ffff]", + :int{0} + } --- icu-3.8.orig/debian/patches/07-CVE-2009-0153.patch +++ icu-3.8/debian/patches/07-CVE-2009-0153.patch @@ -0,0 +1,592 @@ +# +# Description: fix improper handling of invalid byte sequences during Unicode +# conversion. Requires the following: +# http://bugs.icu-project.org/trac/ticket/5797 +# http://bugs.icu-project.org/trac/ticket/6001 +# http://bugs.icu-project.org/trac/ticket/6002 +# Patch: https://bugzilla.redhat.com/show_bug.cgi?id=503071 +# +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv2022.c icu-3.8.new/build-tree/icu/source/common/ucnv2022.c +--- icu/source/common/ucnv2022.c 2009-10-07 11:33:25.051981563 -0500 ++++ icu/source/common/ucnv2022.c 2009-10-07 11:33:30.351949076 -0500 +@@ -1973,6 +1973,7 @@ + mySourceChar = args->converter->toUBytes[0]; + args->converter->toULength = 0; + cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; ++ targetUniChar = missingCharMarker; + goto getTrailByte; + } + +@@ -2102,18 +2103,45 @@ + default: + /* G0 DBCS */ + if(mySource < mySourceLimit) { ++ int leadIsOk, trailIsOk; + char trailByte; + getTrailByte: +- trailByte = *mySource++; +- if(cs == JISX208) { +- _2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf); +- } else { +- tempBuf[0] = (char)mySourceChar; +- tempBuf[1] = trailByte; +- } +- mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); +- targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); +- } else { ++ trailByte = *mySource; ++ /* ++ * Ticket 5691: consistent illegal sequences: ++ * - We include at least the first byte in the illegal sequence. ++ * - If any of the non-initial bytes could be the start of a character, ++ * we stop the illegal sequence before the first one of those. ++ * ++ * In ISO-2022 DBCS, if both bytes are valid or both bytes are outside ++ * the 21..7e range, then we treat them as a pair. ++ * Otherwise (valid lead byte + illegal trail byte, or vice versa) ++ * we report only the first byte as the illegal sequence. ++ */ ++ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); ++ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); ++ if (leadIsOk == trailIsOk) { ++ ++mySource; ++ uint32_t tmpSourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); ++ if (leadIsOk) { ++ if(cs == JISX208) { ++ _2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf); ++ mySourceChar = tmpSourceChar; ++ } else { ++ /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ ++ mySourceChar = tmpSourceChar; ++ if (cs == KSC5601) { ++ tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ ++ } ++ tempBuf[0] = (char)(tmpSourceChar >> 8); ++ tempBuf[1] = (char)(tmpSourceChar); ++ } ++ targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); ++ } else { ++ mySourceChar = tmpSourceChar; ++ } ++ } ++ } else { + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength = 1; + goto endloop; +@@ -2254,7 +2282,12 @@ + } + /* only DBCS or SBCS characters are expected*/ + /* DB characters with high bit set to 1 are expected */ +- if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){ ++ if( length > 2 || length==0 || ++ (length == 1 && targetByteUnit > 0x7f) || ++ (length == 2 && ++ ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) || ++ (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1))) ++ ) { + targetByteUnit=missingCharMarker; + } + if (targetByteUnit != missingCharMarker){ +@@ -2583,17 +2616,36 @@ + myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */ + if(myData->toU2022State.g == 1) { + if(mySource < mySourceLimit) { ++ int leadIsOk, trailIsOk; + char trailByte; + getTrailByte: +- trailByte = *mySource++; +- tempBuf[0] = (char)(mySourceChar + 0x80); +- tempBuf[1] = (char)(trailByte + 0x80); +- mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); +- if((mySourceChar & 0x8080) == 0) { +- targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback); ++ targetUniChar = missingCharMarker; ++ trailByte = *mySource; ++ /* ++ * Ticket 5691: consistent illegal sequences: ++ * - We include at least the first byte in the illegal sequence. ++ * - If any of the non-initial bytes could be the start of a character, ++ * we stop the illegal sequence before the first one of those. ++ * ++ * In ISO-2022 DBCS, if both bytes are valid or both bytes are outside ++ * the 21..7e range, then we treat them as a pair. ++ * Otherwise (valid lead byte + illegal trail byte, or vice versa) ++ * we report only the first byte as the illegal sequence. ++ */ ++ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); ++ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); ++ if (leadIsOk == trailIsOk) { ++ ++mySource; ++ if (leadIsOk) { ++ tempBuf[0] = (char)(mySourceChar + 0x80); ++ tempBuf[1] = (char)(trailByte + 0x80); ++ targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback); ++ } else { ++ leadIsOk = TRUE; /* TODO: remove */ ++ } ++ mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); + } else { +- /* illegal bytes > 0x7f */ +- targetUniChar = missingCharMarker; ++ trailIsOk = TRUE; /* TODO: remove */ + } + } else { + args->converter->toUBytes[0] = (uint8_t)mySourceChar; +@@ -2601,8 +2653,10 @@ + break; + } + } +- else{ ++ else if(mySourceChar <= 0x7f) { + targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback); ++ } else { ++ targetUniChar = 0xffff; + } + if(targetUniChar < 0xfffe){ + if(args->offsets) { +@@ -3099,6 +3153,7 @@ + /* continue with a partial double-byte character */ + mySourceChar = args->converter->toUBytes[0]; + args->converter->toULength = 0; ++ targetUniChar = missingCharMarker; + goto getTrailByte; + } + +@@ -3178,29 +3233,48 @@ + UConverterSharedData *cnv; + StateEnum tempState; + int32_t tempBufLen; ++ int leadIsOk, trailIsOk; + char trailByte; + getTrailByte: +- trailByte = *mySource++; +- tempState = (StateEnum)pToU2022State->cs[pToU2022State->g]; +- if(tempState > CNS_11643_0) { +- cnv = myData->myConverterArray[CNS_11643]; +- tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0)); +- tempBuf[1] = (char) (mySourceChar); +- tempBuf[2] = trailByte; +- tempBufLen = 3; +- +- }else{ +- cnv = myData->myConverterArray[tempState]; +- tempBuf[0] = (char) (mySourceChar); +- tempBuf[1] = trailByte; +- tempBufLen = 2; ++ trailByte = *mySource; ++ /* ++ * Ticket 5691: consistent illegal sequences: ++ * - We include at least the first byte in the illegal sequence. ++ * - If any of the non-initial bytes could be the start of a character, ++ * we stop the illegal sequence before the first one of those. ++ * ++ * In ISO-2022 DBCS, if both bytes are valid or both bytes are outside ++ * the 21..7e range, then we treat them as a pair. ++ * Otherwise (valid lead byte + illegal trail byte, or vice versa) ++ * we report only the first byte as the illegal sequence. ++ */ ++ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); ++ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); ++ if (leadIsOk == trailIsOk) { ++ ++mySource; ++ if (leadIsOk) { ++ tempState = (StateEnum)pToU2022State->cs[pToU2022State->g]; ++ if(tempState >= CNS_11643_0) { ++ cnv = myData->myConverterArray[CNS_11643]; ++ tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0)); ++ tempBuf[1] = (char) (mySourceChar); ++ tempBuf[2] = trailByte; ++ tempBufLen = 3; ++ ++ }else{ ++ cnv = myData->myConverterArray[tempState]; ++ tempBuf[0] = (char) (mySourceChar); ++ tempBuf[1] = trailByte; ++ tempBufLen = 2; ++ } ++ targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); ++ } ++ mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); + } +- mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); + if(pToU2022State->g>=2) { + /* return from a single-shift state to the previous one */ + pToU2022State->g=pToU2022State->prevG; + } +- targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); + } else { + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength = 1; +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvhz.c icu-3.8.new/build-tree/icu/source/common/ucnvhz.c +--- icu/source/common/ucnvhz.c 2009-10-07 11:33:25.061953264 -0500 ++++ icu/source/common/ucnvhz.c 2009-10-07 11:33:30.363220525 -0500 +@@ -215,19 +215,35 @@ + } + else{ + /* trail byte */ ++ int leadIsOk, trailIsOk; + uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; +- if( (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21) && +- (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21) +- ) { +- tempBuf[0] = (char) (leadByte+0x80) ; +- tempBuf[1] = (char) (mySourceChar+0x80); +- targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, +- tempBuf, 2, args->converter->useFallback); ++ targetUniChar = 0xffff; ++ /* ++ * Ticket 5691: consistent illegal sequences: ++ * - We include at least the first byte in the illegal sequence. ++ * - If any of the non-initial bytes could be the start of a character, ++ * we stop the illegal sequence before the first one of those. ++ * ++ * In HZ DBCS, if both bytes are valid or both bytes are outside ++ * the 21..7d/7e range, then we treat them as a pair. ++ * Otherwise (valid lead byte + illegal trail byte, or vice versa) ++ * we report only the first byte as the illegal sequence. ++ */ ++ leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); ++ trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); ++ if (leadIsOk == trailIsOk) { ++ if (leadIsOk) { ++ tempBuf[0] = (char) (leadByte+0x80) ; ++ tempBuf[1] = (char) (mySourceChar+0x80); ++ targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, ++ tempBuf, 2, args->converter->useFallback); ++ } ++ /* add another bit so that the code below writes 2 bytes in case of error */ ++ mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; + } else { +- targetUniChar = 0xffff; ++ --mySource; ++ mySourceChar = (int32_t)leadByte; + } +- /* add another bit so that the code below writes 2 bytes in case of error */ +- mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; + args->converter->toUnicodeStatus =0x00; + } + } +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.c icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.c +--- icu/source/common/ucnvmbcs.c 2009-10-07 11:33:24.991952135 -0500 ++++ icu/source/common/ucnvmbcs.c 2009-10-07 11:33:30.363220525 -0500 +@@ -1,7 +1,7 @@ + /* + ****************************************************************************** + * +-* Copyright (C) 2000-2007, International Business Machines ++* Copyright (C) 2000-2008, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** +@@ -1791,6 +1791,65 @@ + pArgs->offsets=offsets; + } + ++static UBool ++hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) { ++ const int32_t *row=stateTable[state]; ++ int32_t b, entry; ++ /* First test for final entries in this state for some commonly valid byte values. */ ++ entry=row[0xa1]; ++ if( !MBCS_ENTRY_IS_TRANSITION(entry) && ++ MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL ++ ) { ++ return TRUE; ++ } ++ entry=row[0x41]; ++ if( !MBCS_ENTRY_IS_TRANSITION(entry) && ++ MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL ++ ) { ++ return TRUE; ++ } ++ /* Then test for final entries in this state. */ ++ for(b=0; b<=0xff; ++b) { ++ entry=row[b]; ++ if( !MBCS_ENTRY_IS_TRANSITION(entry) && ++ MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL ++ ) { ++ return TRUE; ++ } ++ } ++ /* Then recurse for transition entries. */ ++ for(b=0; b<=0xff; ++b) { ++ entry=row[b]; ++ if( MBCS_ENTRY_IS_TRANSITION(entry) && ++ hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)) ++ ) { ++ return TRUE; ++ } ++ } ++ return FALSE; ++} ++ ++/* ++ * Is byte b a single/lead byte in this state? ++ * Recurse for transition states, because here we don't want to say that ++ * b is a lead byte if all byte sequences that start with b are illegal. ++ */ ++static UBool ++isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) { ++ const int32_t *row=stateTable[state]; ++ int32_t entry=row[b]; ++ if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */ ++ return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)); ++ } else { ++ uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); ++ if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) { ++ return FALSE; /* SI/SO are illegal for DBCS-only conversion */ ++ } else { ++ return action!=MBCS_STATE_ILLEGAL; ++ } ++ } ++} ++ + U_CFUNC void + ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { +@@ -2146,6 +2205,34 @@ + sourceIndex=nextSourceIndex; + } else if(U_FAILURE(*pErrorCode)) { + /* callback(illegal) */ ++ if(byteIndex>1) { ++ /* ++ * Ticket 5691: consistent illegal sequences: ++ * - We include at least the first byte in the illegal sequence. ++ * - If any of the non-initial bytes could be the start of a character, ++ * we stop the illegal sequence before the first one of those. ++ */ ++ UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); ++ int8_t i; ++ for(i=1; ++ isource); ++ byteIndex=i; /* length of reported illegal byte sequence */ ++ if(backOutDistance<=bytesFromThisBuffer) { ++ source-=backOutDistance; ++ } else { ++ /* Back out bytes from the previous buffer: Need to replay them. */ ++ cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); ++ /* preToULength is negative! */ ++ uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength); ++ source=(const uint8_t *)pArgs->source; ++ } ++ } ++ } + break; + } else /* unassigned sequences indicated with byteIndex>0 */ { + /* try an extension mapping */ +@@ -2156,6 +2243,7 @@ + &offsets, sourceIndex, + pArgs->flush, + pErrorCode); ++ /* TODO: nextSourceIndex+=diff instead of nextSourceIndex+diff ?? */ + sourceIndex=nextSourceIndex+(int32_t)(source-(const uint8_t *)pArgs->source); + + if(U_FAILURE(*pErrorCode)) { +@@ -2447,15 +2535,37 @@ + + if(c<0) { + if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSourcetoUBytes; + cnv->toULength=(int8_t)(source-lastSource); + do { + *bytes++=*lastSource++; + } while(lastSourcesharedData->mbcs.dbcsOnlyState!=0); ++ uint8_t *bytes=cnv->toUBytes; ++ *bytes++=*lastSource++; /* first byte */ ++ if(lastSource==source) { ++ cnv->toULength=1; ++ } else /* lastSourcetoULength=i; ++ source=lastSource; ++ } + } else { + /* no output because of empty input or only state changes */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/nccbtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/nccbtst.c +--- icu/source/test/cintltst/nccbtst.c 2007-09-13 18:17:34.000000000 -0500 ++++ icu/source/test/cintltst/nccbtst.c 2009-10-07 11:33:30.363220525 -0500 +@@ -2497,13 +2497,13 @@ + + + static const uint8_t text943[] = { +- 0x82, 0xa9, 0x82, 0x20, /*0xc8,*/ 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; +- static const UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57}; +- static const UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57}; ++ 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; ++ static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; ++ static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; + static const UChar toUnicode943stop[]= { 0x304b}; + +- static const int32_t fromIBM943Offssub[] = {0, 2, 4, 5, 7}; +- static const int32_t fromIBM943Offsskip[] = { 0, 4, 5, 7}; ++ static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; ++ static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; + static const int32_t fromIBM943Offsstop[] = { 0}; + + gInBufferSize = inputsize; +@@ -2537,9 +2537,9 @@ + { + static const uint8_t sampleText[] = { + 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, +- 0xff, /*0x82, 0xa9,*/ 0x32, 0x33}; +- static const UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063, 0xfffd,/*0x304b,*/ 0x0032, 0x0033}; +- static const int32_t fromIBM943Offssub[] = {0, 2, 3, 4, 5, 7, 8}; ++ 0xff, 0x32, 0x33}; ++ static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; ++ static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; + /*checking illegal value for ibm-943 with substitute*/ + gInBufferSize = inputsize; + gOutBufferSize = outputsize; +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/nucnvtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/nucnvtst.c +--- icu/source/test/cintltst/nucnvtst.c 2009-10-07 11:33:25.071957485 -0500 ++++ icu/source/test/cintltst/nucnvtst.c 2009-10-07 11:33:30.373199482 -0500 +@@ -2608,7 +2608,7 @@ + TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); + /*Test for the condition where there is an invalid character*/ + { +- static const uint8_t source2[]={0xa1, 0x01}; ++ static const uint8_t source2[]={0xa1, 0x80}; + TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); + } + /*Test for the condition where we have a truncated char*/ +@@ -3901,11 +3901,11 @@ + TestISO_2022_KR() { + /* test input */ + static const uint16_t in[]={ +- 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D +- ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04 ++ 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D ++ ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 + ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 + ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB +- ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2 ++ ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 + ,0x53E3,0x53E4,0x000A,0x000D}; + const UChar* uSource; + const UChar* uSourceLimit; +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt +--- icu/source/test/testdata/conversion.txt 2009-10-07 11:33:25.071957485 -0500 ++++ icu/source/test/testdata/conversion.txt 2009-10-07 11:33:30.373199482 -0500 +@@ -48,12 +48,83 @@ + toUnicode { + Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" } + Cases { ++ // Test ticket 5691: consistent illegal sequences ++ // Unfortunately, we cannot use the Shift-JIS examples from the ticket ++ // comments because our Shift-JIS table is Windows-compatible and ++ // therefore has no illegal single bytes. Same for GBK. ++ // Instead, we use the stricter GB 18030 also for 2-byte examples. ++ // The byte sequences are generally slightly different from the ticket ++ // comment, simply using assigned characters rather than just ++ // theoretically valid sequences. ++ { ++ "gb18030", ++ :bin{ 618140813c81ff7a }, ++ "a\u4e02\\x81<\\x81\\xFFz", ++ :intvector{ 0,1,3,3,3,3,4,5,5,5,5,5,5,5,5,7 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } ++ { ++ "EUC-JP", ++ :bin{ 618fb0a98fb03c8f3cb0a97a }, ++ "a\u4e28\\x8F\\xB0<\\x8F<\u9022z", ++ :intvector{ 0,1,4,4,4,4,5,5,5,5,6,7,7,7,7,8,9,11 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } ++ { ++ "gb18030", ++ :bin{ 618130fc318130fc8181303c3e813cfc817a }, ++ "a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z", ++ :intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } ++ { ++ "UTF-8", ++ :bin{ 61f1808182f180813cf18081fff180ff3cf1ff3c3e7a }, ++ "a\U00040042\\xF1\\x80\\x81<\\xF1\\x80\\x81\\xFF\\xF1\\x80\\xFF<\\xF1\\xFF<>z", ++ :intvector{ 0,1,1,5,5,5,5,5,5,5,5,5,5,5,5,8,9,9,9,9,9,9,9,9,9,9,9,9,12,12,12,12,13,13,13,13,13,13,13,13,15,15,15,15,16,17,17,17,17,18,18,18,18,19,20,21 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } ++ { ++ "ISO-2022-JP-2", ++ :bin{ 1b24424141af4142affe41431b2842 }, ++ "\u758f\\xAF\u758e\\xAF\\xFE\u790e", ++ :intvector{ 3,5,5,5,5,6,8,8,8,8,8,8,8,8,10 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } ++ { ++ "ibm-25546", ++ :bin{ 411b242943420e4141af4142affe41430f5a }, ++ "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ", ++ :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } ++ { ++ "ISO-2022-KR", ++ :bin{ 411b242943420e4141af4142affe41430f5a }, ++ "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ", ++ :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } ++ { ++ "ISO-2022-CN", ++ :bin{ 411b242941420e4141af4142affe41430f5a }, ++ "AB\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z", ++ :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } ++ { ++ "HZ", ++ :bin{ 417e7b4141af4142affe41437e7d5a }, ++ "A\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z", ++ :intvector{ 0,3,5,5,5,5,6,8,8,8,8,8,8,8,8,10,14 }, ++ :int{1}, :int{0}, "", "&C", :bin{""} ++ } + // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e + { + "HZ", + :bin{ 7e7b21212120217e217f772100007e217e7d207e7e807e0a2b }, +- "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd ~\ufffd+", +- :intvector{ 2,4,6,8,10,12,14,18,19,21,24 }, ++ "\u3000\ufffd\ufffd\u3013\ufffd\ufffd\u9ccc\ufffd\ufffd ~\ufffd+", ++ :intvector{ 2,4,5,6,8,9,10,12,14,18,19,21,24 }, + :int{1}, :int{1}, "", "?", :bin{""} + } + // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and +@@ -61,8 +132,8 @@ + { + "ISO-2022-JP", + :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 }, +- "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e", +- :intvector{ 3,4,5,9,11,13,15,17,19,21,23,25,27 }, ++ "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\ufffd\ufffd\u25b2\ufffd\ufffd\u6f3e", ++ :intvector{ 3,4,5,9,11,12,13,14,16,17,19,20,21,22,23,25,26,27 }, + :int{1}, :int{1}, "", "?", :bin{""} + } + // improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets() --- icu-3.8.orig/debian/patches/06-CVE-2008-1036.patch +++ icu-3.8/debian/patches/06-CVE-2008-1036.patch @@ -0,0 +1,557 @@ +# +# Description: fix cross-site scripting attack via invalid character sequences +# Ubuntu: https://bugs.launchpad.net/ubuntu/+source/icu/+bug/341834 +# Patch: http://bugs.icu-project.org/trac/search?q=%22ticket:6175:%22&noquickjump=1&changeset=on +# Patch: https://bugzilla.redhat.com/attachment.cgi?id=321139 (thanks Red Hat) +# +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv2022.c icu-3.8.new/build-tree/icu/source/common/ucnv2022.c +--- icu/source/common/ucnv2022.c 2009-10-07 11:32:59.372006488 -0500 ++++ icu/source/common/ucnv2022.c 2009-10-07 11:33:04.801949089 -0500 +@@ -201,6 +201,7 @@ + #ifdef U_ENABLE_GENERIC_ISO_2022 + UBool isFirstBuffer; + #endif ++ UBool isEmptySegment; + char name[30]; + char locale[3]; + }UConverterDataISO2022; +@@ -609,6 +610,7 @@ + if(choice<=UCNV_RESET_TO_UNICODE) { + uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)); + myConverterData->key = 0; ++ myConverterData->isEmptySegment = FALSE; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)); +@@ -814,6 +816,7 @@ + if(chosenConverterName == NULL) { + /* SS2 or SS3 */ + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; ++ _this->toUCallbackReason = UCNV_UNASSIGNED; + return; + } + +@@ -935,6 +938,8 @@ + } + if(U_SUCCESS(*err)) { + _this->toULength = 0; ++ } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { ++ _this->toUCallbackReason = UCNV_UNASSIGNED; + } + } + +@@ -1986,6 +1991,7 @@ + continue; + } else { + /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ ++ myData->isEmptySegment = FALSE; /* reset this, we have a different error */ + break; + } + +@@ -1997,21 +2003,39 @@ + continue; + } else { + /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ ++ myData->isEmptySegment = FALSE; /* reset this, we have a different error */ + break; + } + + case ESC_2022: + mySource--; + escape: +- changeState_2022(args->converter,&(mySource), +- mySourceLimit, ISO_2022_JP,err); ++ { ++ const char * mySourceBefore = mySource; ++ int8_t toULengthBefore = args->converter->toULength; ++ ++ changeState_2022(args->converter,&(mySource), ++ mySourceLimit, ISO_2022_JP,err); ++ ++ /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */ ++ if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { ++ *err = U_ILLEGAL_ESCAPE_SEQUENCE; ++ args->converter->toUCallbackReason = UCNV_IRREGULAR; ++ args->converter->toULength = toULengthBefore + (mySource - mySourceBefore); ++ } ++ } + + /* invalid or illegal escape sequence */ + if(U_FAILURE(*err)){ + args->target = myTarget; + args->source = mySource; ++ myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ + return; + } ++ /* If we successfully completed an escape sequence, we begin a new segment, empty so far */ ++ if(myData->key==0) { ++ myData->isEmptySegment = TRUE; ++ } + continue; + + /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ +@@ -2028,6 +2052,7 @@ + /* falls through */ + default: + /* convert one or two bytes */ ++ myData->isEmptySegment = FALSE; + cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; + if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 && + !IS_JP_DBCS(cs) +@@ -2524,15 +2549,27 @@ + + if(mySourceChar==UCNV_SI){ + myData->toU2022State.g = 0; ++ if (myData->isEmptySegment) { ++ myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ ++ *err = U_ILLEGAL_ESCAPE_SEQUENCE; ++ args->converter->toUCallbackReason = UCNV_IRREGULAR; ++ args->converter->toUBytes[0] = mySourceChar; ++ args->converter->toULength = 1; ++ args->target = myTarget; ++ args->source = mySource; ++ return; ++ } + /*consume the source */ + continue; + }else if(mySourceChar==UCNV_SO){ + myData->toU2022State.g = 1; ++ myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ + /*consume the source */ + continue; + }else if(mySourceChar==ESC_2022){ + mySource--; + escape: ++ myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */ + changeState_2022(args->converter,&(mySource), + mySourceLimit, ISO_2022_KR, err); + if(U_FAILURE(*err)){ +@@ -2543,6 +2580,7 @@ + continue; + } + ++ myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */ + if(myData->toU2022State.g == 1) { + if(mySource < mySourceLimit) { + char trailByte; +@@ -3075,27 +3113,52 @@ + switch(mySourceChar){ + case UCNV_SI: + pToU2022State->g=0; ++ if (myData->isEmptySegment) { ++ myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ ++ *err = U_ILLEGAL_ESCAPE_SEQUENCE; ++ args->converter->toUCallbackReason = UCNV_IRREGULAR; ++ args->converter->toUBytes[0] = mySourceChar; ++ args->converter->toULength = 1; ++ args->target = myTarget; ++ args->source = mySource; ++ return; ++ } + continue; + + case UCNV_SO: + if(pToU2022State->cs[1] != 0) { + pToU2022State->g=1; ++ myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ + continue; + } else { + /* illegal to have SO before a matching designator */ ++ myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */ + break; + } + + case ESC_2022: + mySource--; + escape: +- changeState_2022(args->converter,&(mySource), +- mySourceLimit, ISO_2022_CN,err); ++ { ++ const char * mySourceBefore = mySource; ++ int8_t toULengthBefore = args->converter->toULength; ++ ++ changeState_2022(args->converter,&(mySource), ++ mySourceLimit, ISO_2022_CN,err); ++ ++ /* After SO there must be at least one character before a designator (designator error handled separately) */ ++ if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { ++ *err = U_ILLEGAL_ESCAPE_SEQUENCE; ++ args->converter->toUCallbackReason = UCNV_IRREGULAR; ++ args->converter->toULength = toULengthBefore + (mySource - mySourceBefore); ++ } ++ } + + /* invalid or illegal escape sequence */ + if(U_FAILURE(*err)){ + args->target = myTarget; + args->source = mySource; ++ myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ + return; + } + continue; +@@ -3109,6 +3172,7 @@ + /* falls through */ + default: + /* convert one or two bytes */ ++ myData->isEmptySegment = FALSE; + if(pToU2022State->g != 0) { + if(mySource < mySourceLimit) { + UConverterSharedData *cnv; +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_bld.c icu-3.8.new/build-tree/icu/source/common/ucnv_bld.c +--- icu/source/common/ucnv_bld.c 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/ucnv_bld.c 2009-10-07 11:33:04.821949771 -0500 +@@ -932,6 +932,7 @@ + myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; + myUConverter->subChars = (uint8_t *)myUConverter->subUChars; + uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); ++ myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ + + if(mySharedConverterData->impl->open != NULL) { + mySharedConverterData->impl->open(myUConverter, realName, locale, options, err); +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_bld.h icu-3.8.new/build-tree/icu/source/common/ucnv_bld.h +--- icu/source/common/ucnv_bld.h 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/ucnv_bld.h 2009-10-07 11:33:04.821949771 -0500 +@@ -1,6 +1,6 @@ + /* + ********************************************************************** +-* Copyright (C) 1999-2006, International Business Machines ++* Copyright (C) 1999-2006,2008 International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * +@@ -226,6 +226,9 @@ + char preToU[UCNV_EXT_MAX_BYTES]; + int8_t preFromULength, preToULength; /* negative: replay */ + int8_t preToUFirstLength; /* length of first character */ ++ ++ /* new fields for ICU 4.0 */ ++ UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */ + }; + + U_CDECL_END /* end of UConverter */ +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv.c icu-3.8.new/build-tree/icu/source/common/ucnv.c +--- icu/source/common/ucnv.c 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/ucnv.c 2009-10-07 11:33:04.821949771 -0500 +@@ -1528,11 +1528,14 @@ + cnv->toULength=0; + + /* call the callback function */ ++ if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { ++ cnv->toUCallbackReason = UCNV_UNASSIGNED; ++ } + cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, + cnv->invalidCharBuffer, errorInputLength, +- (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ? +- UCNV_UNASSIGNED : UCNV_ILLEGAL, ++ cnv->toUCallbackReason, + err); ++ cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ + + /* + * loop back to the offset handling +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvhz.c icu-3.8.new/build-tree/icu/source/common/ucnvhz.c +--- icu/source/common/ucnvhz.c 2009-10-07 11:32:59.422000252 -0500 ++++ icu/source/common/ucnvhz.c 2009-10-07 11:33:04.821949771 -0500 +@@ -59,6 +59,7 @@ + UBool isEscapeAppended; + UBool isStateDBCS; + UBool isTargetUCharDBCS; ++ UBool isEmptySegment; + }UConverterDataHZ; + + +@@ -98,6 +99,7 @@ + cnv->mode=0; + if(cnv->extraInfo != NULL){ + ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; ++ ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; + } + } + if(choice!=UCNV_RESET_TO_UNICODE) { +@@ -130,6 +132,10 @@ + * from-GB code '~}' ($7E7D) is outside the defined GB range.) + * + * Source: RFC 1842 ++* ++* Note that the formal syntax in RFC 1842 is invalid. I assume that the ++* intended definition of single-byte-segment is as follows (pedberg): ++* single-byte-segment = single-byte-seq 1*single-byte-char + */ + + +@@ -168,12 +174,23 @@ + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); + } + *(myTarget++)=(UChar)mySourceChar; ++ myData->isEmptySegment = FALSE; + continue; + case UCNV_OPEN_BRACE: +- myData->isStateDBCS = TRUE; +- continue; + case UCNV_CLOSE_BRACE: +- myData->isStateDBCS = FALSE; ++ myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); ++ if (myData->isEmptySegment) { ++ myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ ++ *err = U_ILLEGAL_ESCAPE_SEQUENCE; ++ args->converter->toUCallbackReason = UCNV_IRREGULAR; ++ args->converter->toUBytes[0] = UCNV_TILDE; ++ args->converter->toUBytes[1] = mySourceChar; ++ args->converter->toULength = 2; ++ args->target = myTarget; ++ args->source = mySource; ++ return; ++ } ++ myData->isEmptySegment = TRUE; + continue; + default: + /* if the first byte is equal to TILDE and the trail byte +@@ -181,6 +198,7 @@ + */ + mySourceChar = 0x7e00 | mySourceChar; + targetUniChar = 0xffff; ++ myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ + break; + } + } else if(myData->isStateDBCS) { +@@ -191,6 +209,7 @@ + } else { + /* add another bit to distinguish a 0 byte from not having seen a lead byte */ + args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); ++ myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ + } + continue; + } +@@ -218,8 +237,10 @@ + continue; + } else if(mySourceChar <= 0x7f) { + targetUniChar = (UChar)mySourceChar; /* ASCII */ ++ myData->isEmptySegment = FALSE; /* the segment has something valid */ + } else { + targetUniChar = 0xffff; ++ myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ + } + } + if(targetUniChar < 0xfffe){ +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/nucnvtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/nucnvtst.c +--- icu/source/test/cintltst/nucnvtst.c 2009-10-07 11:32:59.321951109 -0500 ++++ icu/source/test/cintltst/nucnvtst.c 2009-10-07 11:33:04.821949771 -0500 +@@ -81,6 +81,7 @@ + static void TestJitterbug2411(void); + static void TestJB5275(void); + static void TestJB5275_1(void); ++static void TestJitterbug6175(void); + #endif + + static void TestRoundTrippingAllUTF(void); +@@ -297,6 +298,7 @@ + #if !UCONFIG_NO_LEGACY_CONVERSION + addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); + addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); ++ addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); + #endif + + } +@@ -4456,6 +4458,70 @@ + free(offsets); + } + ++/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ ++typedef struct { ++ const char * converterName; ++ const char * inputText; ++ int inputTextLength; ++} EmptySegmentTest; ++ ++/* Callback for TestJitterbug6175, should only get called for empty segment errors */ ++static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, ++ int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { ++ if (reason > UCNV_IRREGULAR) { ++ return; ++ } ++ if (reason != UCNV_IRREGULAR) { ++ log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); ++ } ++ /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ ++ *err = U_ZERO_ERROR; ++ ucnv_cbToUWriteSub(toArgs,0,err); ++} ++ ++enum { kEmptySegmentToUCharsMax = 64 }; ++static void TestJitterbug6175(void) { ++ static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; ++ static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; ++ static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; ++ static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; ++ static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; ++ static const EmptySegmentTest emptySegmentTests[] = { ++ /* converterName inputText inputTextLength */ ++ { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, ++ { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, ++ { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, ++ { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, ++ { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, ++ /* terminator: */ ++ { NULL, NULL, 0, } ++ }; ++ const EmptySegmentTest * testPtr; ++ for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { ++ UErrorCode err = U_ZERO_ERROR; ++ UConverter * cnv = ucnv_open(testPtr->converterName, &err); ++ if (U_FAILURE(err)) { ++ log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); ++ return; ++ } ++ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); ++ if (U_FAILURE(err)) { ++ log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); ++ ucnv_close(cnv); ++ return; ++ } ++ { ++ UChar toUChars[kEmptySegmentToUCharsMax]; ++ UChar * toUCharsPtr = toUChars; ++ const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; ++ const char * inCharsPtr = testPtr->inputText; ++ const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; ++ ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); ++ } ++ ucnv_close(cnv); ++ } ++} ++ + static void + TestEBCDIC_STATEFUL() { + /* test input */ +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt +--- icu/source/test/testdata/conversion.txt 2009-10-07 11:32:59.431954490 -0500 ++++ icu/source/test/testdata/conversion.txt 2009-10-07 11:33:04.821949771 -0500 +@@ -1,6 +1,6 @@ + //******************************************************************************* + // +-// Copyright (C) 2003-2007, International Business Machines ++// Copyright (C) 2003-2008, International Business Machines + // Corporation and others. All Rights Reserved. + // + // file name: conversion.txt +@@ -199,6 +199,21 @@ + :intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 }, + :int{1}, :int{1}, "", "&", :bin{""} + } ++ // empty segment (using substitution and stop) ++ { ++ "ISO-2022-KR", ++ :bin{ 1b242943610e0f620d0a }, ++ "a\uFFFDb\u000D\u000A", ++ :intvector{ 4, 6, 7, 8, 9 }, ++ :int{1}, :int{1}, "", "?", :bin{""} ++ } ++ { ++ "ISO-2022-KR", ++ :bin{ 1b242943610e0f620d0a }, ++ "a", ++ :intvector{ 4 }, ++ :int{1}, :int{1}, "illesc", ".", :bin{"0f"} ++ } + + // ISO-2022-JP + +@@ -249,6 +264,21 @@ + :bin{ 41c15c1b284a5cc242 }, "A\uff81\\\xa5\uff82B", :intvector{ 0, 1, 2, 6, 7, 8 }, + :int{1}, :int{1}, "", ".", :bin{""} + } ++ // empty segment (using substitution and stop) ++ { ++ "ISO-2022-JP", ++ :bin{ 61621b24421b284263640d0a }, ++ "ab\uFFFDcd\u000D\u000A", ++ :intvector{ 0, 1, 5, 8, 9, 10, 11 }, ++ :int{1}, :int{1}, "", "?", :bin{""} ++ } ++ { ++ "ISO-2022-JP", ++ :bin{ 61621b24421b284263640d0a }, ++ "ab", ++ :intvector{ 0, 1 }, ++ :int{1}, :int{1}, "illesc", ".", :bin{"1b2842"} ++ } + + // ISO-2022-CN + +@@ -319,6 +349,36 @@ + :bin{ 411b242b491b4f2121 }, "\x41", :intvector{ 0 }, + :int{1}, :int{1}, "unsuppesc", ".", :bin{ 1b242b49 } + } ++ // empty segment 1 (using substitution and stop) ++ { ++ "ISO-2022-CN", ++ :bin{ 611b242941620e0f1b242a481b4e6a65630d0a }, ++ "ab\uFFFD\u994Cc\u000D\u000A", ++ :intvector{ 0, 5, 7, 14, 16, 17, 18 }, ++ :int{1}, :int{1}, "", "?", :bin{""} ++ } ++ { ++ "ISO-2022-CN", ++ :bin{ 611b242941620e0f1b242a481b4e6a65630d0a }, ++ "ab", ++ :intvector{ 0, 5 }, ++ :int{1}, :int{1}, "illesc", ".", :bin{"0f"} ++ } ++ // empty segment 2 (using substitution and stop) ++ { ++ "ISO-2022-CN", ++ :bin{ 611b242941620e1b24294768640f630d0a }, ++ "ab\uFFFD\u5F70c\u000D\u000A", ++ :intvector{ 0, 5, 7, 11, 14, 15, 16 }, ++ :int{1}, :int{1}, "", "?", :bin{""} ++ } ++ { ++ "ISO-2022-CN", ++ :bin{ 611b242941620e1b24294768640f630d0a }, ++ "ab", ++ :intvector{ 0, 5 }, ++ :int{1}, :int{1}, "illesc", ".", :bin{"1b242947"} ++ } + + // ISO-2022 SBCS + // [U_ENABLE_GENERIC_ISO_2022] +@@ -333,6 +393,39 @@ + // :int{1}, :int{1}, "", ".", :bin{""} + //} + ++ // HZ-GB-2312 ++ ++ // empty segment 1 (using substitution and stop) ++ { ++ "HZ-GB-2312", ++ :bin{ 61627e7b7e7d6364 }, ++ "ab\uFFFDcd", ++ :intvector{ 0, 1, 4, 6, 7 }, ++ :int{1}, :int{1}, "", "?", :bin{""} ++ } ++ { ++ "HZ-GB-2312", ++ :bin{ 61627e7b7e7d63640d0a }, ++ "ab", ++ :intvector{ 0, 1 }, ++ :int{1}, :int{1}, "illesc", ".", :bin{"7e7d"} ++ } ++ // empty segment 2 & legal redundant switches (using substitution and stop) ++ { ++ "HZ-GB-2312", ++ :bin{ 61627e7b323b3f557e7b7e7b523b7e7d63647e7d65667e7d7e7d }, ++ "ab\u4E0D\u7A7A\uFFFD\u4E00cdef\uFFFD", ++ :intvector{ 0, 1, 4, 6, 10, 12, 16, 17, 20, 21, 24 }, ++ :int{1}, :int{1}, "", "?", :bin{""} ++ } ++ { ++ "HZ-GB-2312", ++ :bin{ 61627e7b323b3f557e7b7e7b523b7e7d63647e7d65667e7d7e7d }, ++ "ab\u4E0D\u7A7A", ++ :intvector{ 0, 1, 4, 6 }, ++ :int{1}, :int{1}, "illesc", ".", :bin{"7e7b"} ++ } ++ + // DBCS-only extensions + { + "ibm-970", --- icu-3.8.orig/debian/patches/04-redhat.icu6001.patch +++ icu-3.8/debian/patches/04-redhat.icu6001.patch @@ -0,0 +1,764 @@ +# +# Description: ucnv_getUnicodeSet(): add choice of set of code points with +# roundtrip or fallback mappings (required to fix CVE-2009-0153). See: +# https://bugzilla.redhat.com/show_bug.cgi?id=503071 +# Upstream: http://bugs.icu-project.org/trac/ticket/6001 +# +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv2022.c icu-3.8.new/build-tree/icu/source/common/ucnv2022.c +--- icu/source/common/ucnv2022.c 2009-10-07 11:32:05.613200656 -0500 ++++ icu/source/common/ucnv2022.c 2009-10-07 11:32:11.463208021 -0500 +@@ -3399,11 +3399,19 @@ + /* include ASCII for JP */ + sa->addRange(sa->set, 0, 0x7f); + } +- if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) { ++ if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { + /* +- * TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks, +- * we need to include half-width Katakana for all JP variants because +- * JIS X 0208 has hardcoded fallbacks for them. ++ * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0 ++ * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) ++ * use half-width Katakana. ++ * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) ++ * half-width Katakana via the ESC ( I sequence. ++ * However, we only emit (fromUnicode) half-width Katakana according to the ++ * definition of each variant. ++ * ++ * When including fallbacks, ++ * we need to include half-width Katakana Unicode code points for all JP variants because ++ * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana). + */ + /* include half-width Katakana for JP */ + sa->addRange(sa->set, HWKANA_START, HWKANA_END); +@@ -3457,6 +3465,12 @@ + * corresponding to JIS X 0208. + */ + filter=UCNV_SET_FILTER_SJIS; ++ } else if(i==KSC5601) { ++ /* ++ * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) ++ * are broader than GR94. ++ */ ++ filter=UCNV_SET_FILTER_GR94DBCS; + } else { + filter=UCNV_SET_FILTER_NONE; + } +@@ -3472,6 +3486,9 @@ + sa->remove(sa->set, 0x0e); + sa->remove(sa->set, 0x0f); + sa->remove(sa->set, 0x1b); ++ ++ /* ISO 2022 converters do not convert C1 controls either */ ++ sa->removeRange(sa->set, 0x80, 0x9f); + } + + static const UConverterImpl _ISO2022Impl={ +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_ext.c icu-3.8.new/build-tree/icu/source/common/ucnv_ext.c +--- icu/source/common/ucnv_ext.c 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/ucnv_ext.c 2009-10-07 11:32:11.483207693 -0500 +@@ -946,7 +946,7 @@ + ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, + const int32_t *cx, + const USetAdder *sa, +- UConverterUnicodeSet which, ++ UBool useFallback, + int32_t minLength, + UChar32 c, + UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, +@@ -966,7 +966,7 @@ + value=*fromUSectionValues++; + + if( value!=0 && +- UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) && ++ (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) && + UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength + ) { + if(c>=0) { +@@ -987,12 +987,14 @@ + /* no mapping, do nothing */ + } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { + ucnv_extGetUnicodeSetString( +- sharedData, cx, sa, which, minLength, ++ sharedData, cx, sa, useFallback, minLength, + U_SENTINEL, s, length+1, + (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), + pErrorCode); +- } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))== +- UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) && ++ } else if((useFallback ? ++ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 : ++ ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))== ++ UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) && + UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength + ) { + sa->addString(sa->set, s, length+1); +@@ -1004,6 +1006,7 @@ + ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, + const USetAdder *sa, + UConverterUnicodeSet which, ++ UConverterSetFilter filter, + UErrorCode *pErrorCode) { + const int32_t *cx; + const uint16_t *stage12, *stage3, *ps2, *ps3; +@@ -1011,6 +1014,7 @@ + + uint32_t value; + int32_t st1, stage1Length, st2, st3, minLength; ++ UBool useFallback; + + UChar s[UCNV_EXT_MAX_UCHARS]; + UChar32 c; +@@ -1027,12 +1031,20 @@ + + stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; + ++ useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); ++ + /* enumerate the from-Unicode trie table */ + c=0; /* keep track of the current code point while enumerating */ + +- if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) { ++ if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || ++ filter==UCNV_SET_FILTER_DBCS_ONLY || ++ filter==UCNV_SET_FILTER_SJIS || ++ filter==UCNV_SET_FILTER_GR94DBCS ++ ) { + /* DBCS-only, ignore single-byte results */ + minLength=2; ++ } else if(filter==UCNV_SET_FILTER_2022_CN) { ++ minLength=3; + } else { + minLength=1; + } +@@ -1064,14 +1076,41 @@ + length=0; + U16_APPEND_UNSAFE(s, length, c); + ucnv_extGetUnicodeSetString( +- sharedData, cx, sa, which, minLength, ++ sharedData, cx, sa, useFallback, minLength, + c, s, length, + (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), + pErrorCode); +- } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))== +- UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) && ++ } else if((useFallback ? ++ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 : ++ ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))== ++ UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) && + UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength + ) { ++ switch(filter) { ++ case UCNV_SET_FILTER_2022_CN: ++ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { ++ continue; ++ } ++ break; ++ case UCNV_SET_FILTER_SJIS: ++ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { ++ continue; ++ } ++ break; ++ case UCNV_SET_FILTER_GR94DBCS: ++ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && ++ (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe-0xa1a1) && ++ (uint8_t)(value-0xa1)<=(0xfe-0xa1))) { ++ continue; ++ } ++ break; ++ default: ++ /* ++ * UCNV_SET_FILTER_NONE, ++ * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength ++ */ ++ break; ++ } + sa->add(sa->set, c); + } + } while((++c&0xf)!=0); +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_ext.h icu-3.8.new/build-tree/icu/source/common/ucnv_ext.h +--- icu/source/common/ucnv_ext.h 2007-09-13 18:17:58.000000000 -0500 ++++ icu/source/common/ucnv_ext.h 2009-10-07 11:32:11.483207693 -0500 +@@ -382,10 +382,20 @@ + UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, + UErrorCode *pErrorCode); + ++/* ++ * Add code points and strings to the set according to the extension mappings. ++ * Limitation on the UConverterSetFilter: ++ * The filters currently assume that they are used with 1:1 mappings. ++ * They only apply to single input code points, and then they pass through ++ * only mappings with single-charset-code results. ++ * For example, the Shift-JIS filter only works for 2-byte results and tests ++ * that those 2 bytes are in the JIS X 0208 range of Shift-JIS. ++ */ + U_CFUNC void + ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, + const USetAdder *sa, + UConverterUnicodeSet which, ++ UConverterSetFilter filter, + UErrorCode *pErrorCode); + + /* toUnicode helpers -------------------------------------------------------- */ +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvhz.c icu-3.8.new/build-tree/icu/source/common/ucnvhz.c +--- icu/source/common/ucnvhz.c 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/ucnvhz.c 2009-10-07 11:32:11.483207693 -0500 +@@ -1,6 +1,6 @@ + /* + ********************************************************************** +-* Copyright (C) 2000-2006, International Business Machines ++* Copyright (C) 2000-2007, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * file name: ucnvhz.c +@@ -528,6 +528,7 @@ + sa->add(sa->set, 0x7e); + + /* add all of the code points that the sub-converter handles */ ++ /* ucnv_MBCSGetFilteredUnicodeSetForUnicode(((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, sa, which, UCNV_SET_FILTER_GR94DBCS, pErrorCode); */ + ((UConverterDataHZ*)cnv->extraInfo)-> + gbConverter->sharedData->impl-> + getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_lmb.c icu-3.8.new/build-tree/icu/source/common/ucnv_lmb.c +--- icu/source/common/ucnv_lmb.c 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/ucnv_lmb.c 2009-10-07 11:32:11.483207693 -0500 +@@ -1,6 +1,6 @@ + /* + ********************************************************************** +-* Copyright (C) 2000-2006, International Business Machines ++* Copyright (C) 2000-2007, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * file name: ucnv_lmb.cpp +@@ -536,7 +536,7 @@ + NULL,\ + NULL,\ + _LMBCSSafeClone,\ +- _LMBCSGetUnicodeSet\ ++ ucnv_getCompleteUnicodeSet\ + };\ + static const UConverterStaticData _LMBCSStaticData##n={\ + sizeof(UConverterStaticData),\ +@@ -662,15 +662,14 @@ + return &newLMBCS->cnv; + } + +-static void +-_LMBCSGetUnicodeSet(const UConverter *cnv, +- const USetAdder *sa, +- UConverterUnicodeSet which, +- UErrorCode *pErrorCode) { +- /* all but U+F6xx, see LMBCS explanation above (search for F6xx) */ +- sa->addRange(sa->set, 0, 0xf5ff); +- sa->addRange(sa->set, 0xf700, 0x10ffff); +-} ++/* ++ * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117) ++ * which added all code points except for U+F6xx ++ * because those cannot be represented in the Unicode group. ++ * However, it turns out that windows-950 has roundtrips for all of U+F6xx ++ * which means that LMBCS can convert all Unicode code points after all. ++ * We now simply use ucnv_getCompleteUnicodeSet(). ++ */ + + /* + Here's the basic helper function that we use when converting from +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.c icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.c +--- icu/source/common/ucnvmbcs.c 2009-10-07 11:32:05.613200656 -0500 ++++ icu/source/common/ucnvmbcs.c 2009-10-07 11:32:11.483207693 -0500 +@@ -485,9 +485,23 @@ + + if(mbcsTable->outputType==MBCS_OUTPUT_1) { + const uint16_t *stage2, *stage3, *results; ++ uint16_t minValue; + + results=(const uint16_t *)mbcsTable->fromUnicodeBytes; + ++ /* ++ * Set a threshold variable for selecting which mappings to use. ++ * See ucnv_MBCSSingleFromBMPWithOffsets() and ++ * MBCS_SINGLE_RESULT_FROM_U() for details. ++ */ ++ if(which==UCNV_ROUNDTRIP_SET) { ++ /* use only roundtrips */ ++ minValue=0xf00; ++ } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { ++ /* use all roundtrip and fallback results */ ++ minValue=0x800; ++ } ++ + for(st1=0; st1maxStage1) { +@@ -497,15 +511,8 @@ + /* read the stage 3 block */ + stage3=results+st3; + +- /* +- * Add code points for which the roundtrip flag is set. +- * Once we get a set for fallback mappings, we have to use +- * a threshold variable with a value of 0x800. +- * See ucnv_MBCSSingleFromBMPWithOffsets() and +- * MBCS_SINGLE_RESULT_FROM_U() for details. +- */ + do { +- if(*stage3++>=0xf00) { ++ if(*stage3++>=minValue) { + sa->add(sa->set, c); + } + } while((++c&0xf)!=0); +@@ -522,9 +529,12 @@ + const uint8_t *stage3, *bytes; + uint32_t st3Multiplier; + uint32_t value; ++ UBool useFallback; + + bytes=mbcsTable->fromUnicodeBytes; + ++ useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); ++ + switch(mbcsTable->outputType) { + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4_EUC: +@@ -551,9 +561,8 @@ + st3>>=16; + + /* +- * Add code points for which the roundtrip flag is set. +- * Once we get a set for fallback mappings, we have to check +- * non-roundtrip stage 3 results for whether they are 0. ++ * Add code points for which the roundtrip flag is set, ++ * or which map to non-zero bytes if we use fallbacks. + * See ucnv_MBCSFromUnicodeWithOffsets() for details. + */ + switch(filter) { +@@ -561,6 +570,23 @@ + do { + if(st3&1) { + sa->add(sa->set, c); ++ stage3+=st3Multiplier; ++ } else if(useFallback) { ++ uint8_t b=0; ++ switch(st3Multiplier) { ++ case 4: ++ b|=*stage3++; ++ case 3: ++ b|=*stage3++; ++ case 2: ++ b|=stage3[0]|stage3[1]; ++ stage3+=2; ++ default: ++ break; ++ } ++ if(b!=0) { ++ sa->add(sa->set, c); ++ } + } + st3>>=1; + } while((++c&0xf)!=0); +@@ -568,7 +594,7 @@ + case UCNV_SET_FILTER_DBCS_ONLY: + /* Ignore single-byte results (<0x100). */ + do { +- if((st3&1)!=0 && *((const uint16_t *)stage3)>=0x100) { ++ if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) { + sa->add(sa->set, c); + } + st3>>=1; +@@ -578,7 +604,7 @@ + case UCNV_SET_FILTER_2022_CN: + /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */ + do { +- if((st3&1)!=0 && ((value=*stage3)==0x81 || value==0x82)) { ++ if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) { + sa->add(sa->set, c); + } + st3>>=1; +@@ -588,7 +614,20 @@ + case UCNV_SET_FILTER_SJIS: + /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */ + do { +- if((st3&1)!=0 && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) { ++ if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) { ++ sa->add(sa->set, c); ++ } ++ st3>>=1; ++ stage3+=2; /* +=st3Multiplier */ ++ } while((++c&0xf)!=0); ++ break; ++ case UCNV_SET_FILTER_GR94DBCS: ++ /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */ ++ do { ++ if( ((st3&1)!=0 || useFallback) && ++ (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfefe-0xa1a1) && ++ (uint8_t)(value-0xa1)<=(0xfe-0xa1) ++ ) { + sa->add(sa->set, c); + } + st3>>=1; +@@ -609,7 +648,7 @@ + } + } + +- ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode); ++ ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode); + } + + U_CFUNC void +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.h icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.h +--- icu/source/common/ucnvmbcs.h 2009-10-07 11:32:05.613200656 -0500 ++++ icu/source/common/ucnvmbcs.h 2009-10-07 11:32:11.483207693 -0500 +@@ -492,6 +492,7 @@ + UCNV_SET_FILTER_DBCS_ONLY, + UCNV_SET_FILTER_2022_CN, + UCNV_SET_FILTER_SJIS, ++ UCNV_SET_FILTER_GR94DBCS, + UCNV_SET_FILTER_COUNT + } UConverterSetFilter; + +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_set.c icu-3.8.new/build-tree/icu/source/common/ucnv_set.c +--- icu/source/common/ucnv_set.c 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/ucnv_set.c 2009-10-07 11:32:11.483207693 -0500 +@@ -1,7 +1,7 @@ + /* + ******************************************************************************* + * +-* Copyright (C) 2003-2005, International Business Machines ++* Copyright (C) 2003-2007, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* +@@ -52,7 +52,8 @@ + uset_add, + uset_addRange, + uset_addString, +- uset_remove ++ uset_remove, ++ uset_removeRange + }; + sa.set=setFillIn; + +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/unicode/ucnv.h icu-3.8.new/build-tree/icu/source/common/unicode/ucnv.h +--- icu/source/common/unicode/ucnv.h 2007-09-13 18:17:54.000000000 -0500 ++++ icu/source/common/unicode/ucnv.h 2009-10-07 11:32:11.483207693 -0500 +@@ -870,6 +870,8 @@ + typedef enum UConverterUnicodeSet { + /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */ + UCNV_ROUNDTRIP_SET, ++ /** Select the set of Unicode code points with roundtrip or fallback mappings. @draft ICU 4.0 */ ++ UCNV_ROUNDTRIP_AND_FALLBACK_SET, + /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */ + UCNV_SET_COUNT + } UConverterUnicodeSet; +@@ -878,11 +880,16 @@ + /** + * Returns the set of Unicode code points that can be converted by an ICU converter. + * +- * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET): ++ * Returns one of several kinds of set: ++ * ++ * 1. UCNV_ROUNDTRIP_SET ++ * + * The set of all Unicode code points that can be roundtrip-converted +- * (converted without any data loss) with the converter. ++ * (converted without any data loss) with the converter (ucnv_fromUnicode()). + * This set will not include code points that have fallback mappings + * or are only the result of reverse fallback mappings. ++ * This set will also not include PUA code points with fallbacks, although ++ * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback(). + * See UTR #22 "Character Mapping Markup Language" + * at http://www.unicode.org/reports/tr22/ + * +@@ -893,6 +900,12 @@ + * by comparing its roundtrip set with the set of ExemplarCharacters from + * ICU's locale data or other sources + * ++ * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET ++ * ++ * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode()) ++ * when fallbacks are turned on (see ucnv_setFallback()). ++ * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks). ++ * + * In the future, there may be more UConverterUnicodeSet choices to select + * sets with different properties. + * +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/uset_imp.h icu-3.8.new/build-tree/icu/source/common/uset_imp.h +--- icu/source/common/uset_imp.h 2007-09-13 18:18:00.000000000 -0500 ++++ icu/source/common/uset_imp.h 2009-10-07 11:32:11.483207693 -0500 +@@ -36,6 +36,9 @@ + typedef void U_CALLCONV + USetRemove(USet *set, UChar32 c); + ++typedef void U_CALLCONV ++USetRemoveRange(USet *set, UChar32 start, UChar32 end); ++ + /** + * Interface for adding items to a USet, to keep low-level code from + * statically depending on the USet implementation. +@@ -47,6 +50,7 @@ + USetAddRange *addRange; + USetAddString *addString; + USetRemove *remove; ++ USetRemoveRange *removeRange; + }; + typedef struct USetAdder USetAdder; + +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/intltest/convtest.cpp icu-3.8.new/build-tree/icu/source/test/intltest/convtest.cpp +--- icu/source/test/intltest/convtest.cpp 2007-09-13 18:17:42.000000000 -0500 ++++ icu/source/test/intltest/convtest.cpp 2009-10-07 11:32:11.483207693 -0500 +@@ -70,6 +70,7 @@ + case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break; + case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break; + case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break; ++ case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break; + default: name=""; break; //needed to end loop + } + } +@@ -465,6 +466,183 @@ + } + } + ++U_CDECL_BEGIN ++static void U_CALLCONV ++getUnicodeSetCallback(const void *context, ++ UConverterFromUnicodeArgs *fromUArgs, ++ const UChar* codeUnits, ++ int32_t length, ++ UChar32 codePoint, ++ UConverterCallbackReason reason, ++ UErrorCode *pErrorCode) { ++ if(reason<=UCNV_IRREGULAR) { ++ ((UnicodeSet *)context)->remove(codePoint); // the converter cannot convert this code point ++ *pErrorCode=U_ZERO_ERROR; // skip ++ } // else ignore the reset, close and clone calls. ++} ++U_CDECL_END ++ ++// Compare ucnv_getUnicodeSet() with the set of characters that can be converted. ++void ++ConversionTest::TestGetUnicodeSet2() { ++ // Build a string with all code points. ++ UChar32 cpLimit; ++ int32_t s0Length; ++ if(quick) { ++ cpLimit=s0Length=0x10000; // BMP only ++ } else { ++ cpLimit=0x110000; ++ s0Length=0x10000+0x200000; // BMP + surrogate pairs ++ } ++ UChar *s0=new UChar[s0Length]; ++ if(s0==NULL) { ++ return; ++ } ++ UChar *s=s0; ++ UChar32 c; ++ UChar c2; ++ // low BMP ++ for(c=0; c<=0xd7ff; ++c) { ++ *s++=(UChar)c; ++ } ++ // trail surrogates ++ for(c=0xdc00; c<=0xdfff; ++c) { ++ *s++=(UChar)c; ++ } ++ // lead surrogates ++ // (after trails so that there is not even one surrogate pair in between) ++ for(c=0xd800; c<=0xdbff; ++c) { ++ *s++=(UChar)c; ++ } ++ // high BMP ++ for(c=0xe000; c<=0xffff; ++c) { ++ *s++=(UChar)c; ++ } ++ // supplementary code points = surrogate pairs ++ if(cpLimit==0x110000) { ++ for(c=0xd800; c<=0xdbff; ++c) { ++ for(c2=0xdc00; c2<=0xdfff; ++c2) { ++ *s++=(UChar)c; ++ *s++=c2; ++ } ++ } ++ } ++ ++ static const char *const cnvNames[]={ ++ "UTF-8", ++ "UTF-7", ++ "UTF-16", ++ "US-ASCII", ++ "ISO-8859-1", ++ "windows-1252", ++ "Shift-JIS", ++ "ibm-1390", // EBCDIC_STATEFUL table ++ "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table ++ // "HZ", TODO(markus): known bug, the set incorrectly contains [\u02CA\u02CB\u02D9\u2010\u2013\u2015...] ++ "ISO-2022-JP", ++ "JIS7", ++ "ISO-2022-CN", ++ "ISO-2022-CN-EXT", ++ "LMBCS" ++ }; ++ char buffer[1024]; ++ int32_t i; ++ for(i=0; i100) { ++ out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)); ++ } ++ errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d", ++ cnvNames[i], which); ++ errln(out); ++ } ++ ++ // are there items that must not be in the set but are? ++ (diffSet=set).removeAll(expected); ++ if(!diffSet.isEmpty()) { ++ diffSet.toPattern(out, TRUE); ++ if(out.length()>100) { ++ out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)); ++ } ++ errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d", ++ cnvNames[i], which); ++ errln(out); ++ } ++ } ++ } ++ } ++ ++ delete [] s0; ++} ++ + // open testdata or ICU data converter ------------------------------------- *** + + UConverter * +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/intltest/convtest.h icu-3.8.new/build-tree/icu/source/test/intltest/convtest.h +--- icu/source/test/intltest/convtest.h 2007-09-13 18:17:40.000000000 -0500 ++++ icu/source/test/intltest/convtest.h 2009-10-07 11:32:11.493207494 -0500 +@@ -72,6 +72,7 @@ + void TestToUnicode(); + void TestFromUnicode(); + void TestGetUnicodeSet(); ++ void TestGetUnicodeSet2(); + + private: + UBool +diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt +--- icu/source/test/testdata/conversion.txt 2009-10-07 11:32:05.613200656 -0500 ++++ icu/source/test/testdata/conversion.txt 2009-10-07 11:32:11.493207494 -0500 +@@ -1311,16 +1311,29 @@ + // versions of ISO-2022-JP + { + "ISO-2022-JP", +- "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u203e\uff61-\uff9f\u4e00\u4e01\uffe5]", +- "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\ufa0e-\ufa2d\uffe6-\U0010ffff]", ++ "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u2015\u203e\u4e00\u4e01\uffe5]", ++ "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u2014\u301c\u4e02\u4e27-\u4e29\u4fe0\u663b\u9eb5\ufa0e-\ufa2d\uff61-\uff9f\uffe4\uffe6-\U0010ffff]", + :int{0} +- } ++ } + { + "ISO-2022-JP-2", +- "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\uff61-\uff9f\u4e00-\u4e05\uffe6]", +- "[\x0e\x0f\x1b\uffe7-\U0010ffff]", ++ "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa0-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\u4e00-\u4e05\u4fe0\u663b\uffe6]", ++ "[\x0e\x0f\x1b\uff61-\uff9f\uffe4\uffe7-\U0010ffff]", ++ :int{0} ++ } ++ { ++ "JIS7", ++ "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa0-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\u4e00-\u4e05\u4fe0\u663b\uff61-\uff9f\uffe6]", ++ "[\x0e\x0f\x1b\uffe4\uffe7-\U0010ffff]", + :int{0} + } ++ // with fallbacks ++ { ++ "ISO-2022-JP", ++ "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u2014\u2015\u203e\u301c\u4e00\u4e01\u4fe0\u9eb5\uff61-\uff9f\uffe5]", ++ "[\x0e\x0f\x1b\xa6\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\u663b\ufa0e-\ufa2d\uffe4\uffe6-\U0010ffff]", ++ :int{1} ++ } + + // versions of ISO-2022-CN + { +@@ -1352,6 +1365,14 @@ + :int{0} + } + ++ // LMBCS ++ { ++ "LMBCS", ++ "[\x00-\U0010ffff]", ++ "[]", ++ :int{0} ++ } ++ + // extensions + { + "ibm-1390", --- icu-3.8.orig/debian/patches/00-cve-2007-4770-4771.patch +++ icu-3.8/debian/patches/00-cve-2007-4770-4771.patch @@ -0,0 +1,350 @@ +Index: source/i18n/regexcmp.cpp +=================================================================== +--- source/i18n/regexcmp.cpp (revision 23291) ++++ source/i18n/regexcmp.cpp (revision 23292) +@@ -2,7 +2,7 @@ + // + // file: regexcmp.cpp + // +-// Copyright (C) 2002-2007 International Business Machines Corporation and others. ++// Copyright (C) 2002-2008 International Business Machines Corporation and others. + // All Rights Reserved. + // + // This file contains the ICU regular expression compiler, which is responsible +@@ -1186,14 +1186,17 @@ + // Because capture groups can be forward-referenced by back-references, + // we fill the operand with the capture group number. At the end + // of compilation, it will be changed to the variable's location. +- U_ASSERT(groupNum > 0); +- int32_t op; +- if (fModeFlags & UREGEX_CASE_INSENSITIVE) { +- op = URX_BUILD(URX_BACKREF_I, groupNum); ++ if (groupNum < 1) { ++ error(U_REGEX_INVALID_BACK_REF); + } else { +- op = URX_BUILD(URX_BACKREF, groupNum); ++ int32_t op; ++ if (fModeFlags & UREGEX_CASE_INSENSITIVE) { ++ op = URX_BUILD(URX_BACKREF_I, groupNum); ++ } else { ++ op = URX_BUILD(URX_BACKREF, groupNum); ++ } ++ fRXPat->fCompiledPat->addElement(op, *fStatus); + } +- fRXPat->fCompiledPat->addElement(op, *fStatus); + } + break; + +Index: source/i18n/rematch.cpp +=================================================================== +--- source/i18n/rematch.cpp (revision 23291) ++++ source/i18n/rematch.cpp (revision 23292) +@@ -1,6 +1,6 @@ + /* + ************************************************************************** +-* Copyright (C) 2002-2007 International Business Machines Corporation * ++* Copyright (C) 2002-2008 International Business Machines Corporation * + * and others. All rights reserved. * + ************************************************************************** + */ +@@ -30,6 +30,15 @@ + + U_NAMESPACE_BEGIN + ++// Limit the size of the back track stack, to avoid system failures caused ++// by heap exhaustion. Units are in 32 bit words, not bytes. ++// This value puts ICU's limits higher than most other regexp implementations, ++// which use recursion rather than the heap, and take more storage per ++// backtrack point. ++// This constant is _temporary_. Proper API to control the value will added. ++// ++static const int32_t BACKTRACK_STACK_CAPACITY = 8000000; ++ + //----------------------------------------------------------------------------- + // + // Constructor and Destructor +@@ -53,8 +62,9 @@ + } + if (fStack == NULL || fData == NULL) { + fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; ++ } else { ++ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); + } +- + reset(RegexStaticSets::gStaticSets->fEmptyString); + } + +@@ -78,6 +88,8 @@ + } + if (fStack == NULL || fData == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; ++ } else { ++ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); + } + reset(input); + } +@@ -102,6 +114,8 @@ + } + if (fStack == NULL || fData == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; ++ } else { ++ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); + } + reset(RegexStaticSets::gStaticSets->fEmptyString); + } +@@ -1014,6 +1028,14 @@ + inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, int32_t frameSize, UErrorCode &status) { + // push storage for a new frame. + int32_t *newFP = fStack->reserveBlock(frameSize, status); ++ if (newFP == NULL) { ++ // Heap allocation error on attempted stack expansion. ++ // We need to return a writable stack frame, so just return the ++ // previous frame. The match operation will stop quickly ++ // becuase of the error status, after which the frame will never ++ // be looked at again. ++ return fp; ++ } + fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack. + + // New stack frame = copy of old top frame. +@@ -1029,8 +1051,8 @@ + fp->fPatIdx = savePatIdx; + return (REStackFrame *)newFP; + } +- +- ++ ++ + //-------------------------------------------------------------------------------- + // + // MatchAt This is the actual matching engine. +@@ -2261,6 +2283,7 @@ + } + + if (U_FAILURE(status)) { ++ isMatch = FALSE; + break; + } + } +Index: source/test/intltest/regextst.h +=================================================================== +--- source/test/intltest/regextst.h (revision 23291) ++++ source/test/intltest/regextst.h (revision 23292) +@@ -1,6 +1,6 @@ + /******************************************************************** + * COPYRIGHT: +- * Copyright (c) 2002-2007, International Business Machines Corporation and ++ * Copyright (c) 2002-2008, International Business Machines Corporation and + * others. All Rights Reserved. + ********************************************************************/ + +@@ -30,6 +30,7 @@ + virtual void Extended(); + virtual void Errors(); + virtual void PerlTests(); ++ virtual void Bug6149(); + + // The following functions are internal to the regexp tests. + virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line); +Index: source/test/intltest/regextst.cpp +=================================================================== +--- source/test/intltest/regextst.cpp (revision 23291) ++++ source/test/intltest/regextst.cpp (revision 23292) +@@ -1,6 +1,6 @@ + /******************************************************************** + * COPYRIGHT: +- * Copyright (c) 2002-2007, International Business Machines Corporation and ++ * Copyright (c) 2002-2008, International Business Machines Corporation and + * others. All Rights Reserved. + ********************************************************************/ + +@@ -66,6 +66,10 @@ + case 6: name = "PerlTests"; + if (exec) PerlTests(); + break; ++ case 7: name = "Bug 6149"; ++ if (exec) Bug6149(); ++ break; ++ + + + default: name = ""; +@@ -1639,6 +1643,12 @@ + + // Ticket 5389 + REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); ++ ++ // Invalid Back Reference \0 ++ // For ICU 3.8 and earlier ++ // For ICU versions newer than 3.8, \0 introduces an octal escape. ++ // ++ REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_INVALID_BACK_REF); + + } + +@@ -2122,6 +2132,26 @@ + } + + ++//-------------------------------------------------------------- ++// ++// Bug6149 Verify limits to heap expansion for backtrack stack. ++// Use this pattern, ++// "(a?){1,}" ++// The zero-length match will repeat forever. ++// (That this goes into a loop is another bug) ++// ++//--------------------------------------------------------------- ++void RegexTest::Bug6149() { ++ UnicodeString pattern("(a?){1,}"); ++ UnicodeString s("xyz"); ++ uint32_t flags = 0; ++ UErrorCode status = U_ZERO_ERROR; ++ ++ RegexMatcher matcher(pattern, s, flags, status); ++ UBool result = false; ++ REGEX_ASSERT_FAIL(result=matcher.matches(status), U_BUFFER_OVERFLOW_ERROR); ++ REGEX_ASSERT(result == FALSE); ++ } + + #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ + +Index: source/common/uvectr32.cpp +=================================================================== +--- source/common/uvectr32.cpp (revision 23291) ++++ source/common/uvectr32.cpp (revision 23292) +@@ -1,6 +1,6 @@ + /* + ****************************************************************************** +-* Copyright (C) 1999-2003, International Business Machines Corporation and * ++* Copyright (C) 1999-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + * Date Name Description +@@ -26,6 +26,7 @@ + UVector32::UVector32(UErrorCode &status) : + count(0), + capacity(0), ++ maxCapacity(0), + elements(NULL) + { + _init(DEFUALT_CAPACITY, status); +@@ -34,6 +35,7 @@ + UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) : + count(0), + capacity(0), ++ maxCapacity(0), + elements(0) + { + _init(initialCapacity, status); +@@ -46,6 +48,9 @@ + if (initialCapacity < 1) { + initialCapacity = DEFUALT_CAPACITY; + } ++ if (maxCapacity>0 && maxCapacity= minimumCapacity) { + return TRUE; +- } else { +- int32_t newCap = capacity * 2; +- if (newCap < minimumCapacity) { +- newCap = minimumCapacity; +- } +- int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); +- if (newElems == 0) { +- status = U_MEMORY_ALLOCATION_ERROR; +- return FALSE; +- } +- uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); +- uprv_free(elements); +- elements = newElems; +- capacity = newCap; +- return TRUE; + } ++ if (maxCapacity>0 && minimumCapacity>maxCapacity) { ++ status = U_BUFFER_OVERFLOW_ERROR; ++ return FALSE; ++ } ++ int32_t newCap = capacity * 2; ++ if (newCap < minimumCapacity) { ++ newCap = minimumCapacity; ++ } ++ if (maxCapacity > 0 && newCap > maxCapacity) { ++ newCap = maxCapacity; ++ } ++ int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); ++ if (newElems == 0) { ++ status = U_MEMORY_ALLOCATION_ERROR; ++ return FALSE; ++ } ++ uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); ++ uprv_free(elements); ++ elements = newElems; ++ capacity = newCap; ++ return TRUE; + } + ++void UVector32::setMaxCapacity(int32_t limit) { ++ U_ASSERT(limit >= 0); ++ maxCapacity = limit; ++ if (maxCapacity < 0) { ++ maxCapacity = 0; ++ } ++} ++ + /** + * Change the size of this vector as follows: If newSize is smaller, + * then truncate the array, possibly deleting held elements for i >= +Index: source/common/uvectr32.h +=================================================================== +--- source/common/uvectr32.h (revision 23291) ++++ source/common/uvectr32.h (revision 23292) +@@ -1,6 +1,6 @@ + /* + ********************************************************************** +-* Copyright (C) 1999-2006, International Business Machines ++* Copyright (C) 1999-2008, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + */ +@@ -61,6 +61,8 @@ + int32_t count; + + int32_t capacity; ++ ++ int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. + + int32_t* elements; + +@@ -162,6 +164,14 @@ + int32_t *getBuffer() const; + + /** ++ * Set the maximum allowed buffer capacity for this vector/stack. ++ * Default with no limit set is unlimited, go until malloc() fails. ++ * A Limit of zero means unlimited capacity. ++ * Units are vector elements (32 bits each), not bytes. ++ */ ++ void setMaxCapacity(int32_t limit); ++ ++ /** + * ICU "poor man's RTTI", returns a UClassID for this class. + */ + static UClassID U_EXPORT2 getStaticClassID(); +@@ -221,7 +231,9 @@ + } + + inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) { +- ensureCapacity(count+size, status); ++ if (ensureCapacity(count+size, status) == FALSE) { ++ return NULL; ++ } + int32_t *rp = elements+count; + count += size; + return rp; --- icu-3.8.orig/debian/patches/icu-3.6-setBreakType.patch +++ icu-3.8/debian/patches/icu-3.6-setBreakType.patch @@ -0,0 +1,17 @@ +--- icu/source/common/unicode/rbbi.h-old 2007-10-31 15:52:08.000000000 +0100 ++++ icu/source/common/unicode/rbbi.h 2007-10-31 15:52:47.000000000 +0100 +@@ -611,12 +611,14 @@ + virtual int32_t getBreakType() const; + #endif + ++public: + /** + * Set the type of the break iterator. + * @internal + */ + virtual void setBreakType(int32_t type); + ++protected: + /** + * Common initialization function, used by constructors and bufferClone. + * (Also used by DictionaryBasedBreakIterator::createBufferClone().) --- icu-3.8.orig/debian/libicu38.lintian +++ icu-3.8/debian/libicu38.lintian @@ -0,0 +1,6 @@ +# libicu38 installs multiple shared libraries, none of which is +# actually called libicu.so.38, but all of which are libicu*.so.38. +libicu38: package-name-doesnt-match-sonames +# libicudata.so.38.0 contains static data only +libicu38: shared-lib-without-dependency-information +libicu38: shlib-without-PT_GNU_STACK-section usr/lib/libicudata.so.38.0 --- icu-3.8.orig/debian/lib32icu38.install +++ icu-3.8/debian/lib32icu38.install @@ -0,0 +1 @@ +usr/lib32/lib*.so.* --- icu-3.8.orig/debian/copyright +++ icu-3.8/debian/copyright @@ -0,0 +1,44 @@ +This package was debianized by Jay Berkenbilt on +August 5, 2005. + +The original source was downloaded from +ftp://ftp.software.ibm.com/software/globalization/icu/3.6/icu4c-3_6-src.tgz + +The main web sites for ICU are +http://icu.sourceforge.net/ +http://www.ibm.com/software/globalization/icu/ + +ICU License - ICU 1.8.1 and later + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2001 International Business Machines Corporation and others +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies of +the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL +INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING +FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION +WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, use +or other dealings in this Software without prior written authorization +of the copyright holder. + +-------------------------------------------------------------------------------- +All trademarks and registered trademarks mentioned herein are the property of their respective owners. --- icu-3.8.orig/debian/lib32icu-dev.install +++ icu-3.8/debian/lib32icu-dev.install @@ -0,0 +1,3 @@ +usr/lib32/lib*.so +usr/lib32/lib*.a +usr/lib32/icu --- icu-3.8.orig/debian/libicu38.install +++ icu-3.8/debian/libicu38.install @@ -0,0 +1 @@ +usr/lib/lib*.so.* --- icu-3.8.orig/debian/icu-doc.doc-base +++ icu-3.8/debian/icu-doc.doc-base @@ -0,0 +1,13 @@ +Document: icu-doc +Title: ICU API Documentation +Author: IBM Corporation and Others +Abstract: This manual describes the APIs of the + International Components for Unicode C/C++ + library. It is a useful reference for the + ICU programmer. +Section: Libs + +Format: HTML +Index: /usr/share/doc/icu-doc/html/index.html +Files: /usr/share/doc/icu-doc/html/*.html + --- icu-3.8.orig/debian/changelog +++ icu-3.8/debian/changelog @@ -0,0 +1,396 @@ +icu (3.8-6ubuntu0.2) hardy-security; urgency=low + + * SECURITY UPDATE: fix improper handling of invalid byte sequences + during Unicode conversion + - debian/07-CVE-2009-0153.patch: backported patch thanks to RedHat via + Debian + - 03-redhat.icu5797.patch, 04-redhat.icu6001.patch, and + 05-redhat.icu6002.patch required for applying 07-CVE-2009-0153.patch + with 06-CVE-2008-1036.patch needing adjustments. Patch from Debian. + - CVE-2009-0153 + + -- Jamie Strandboge Wed, 07 Oct 2009 11:33:48 -0500 + +icu (3.8-6ubuntu0.1) hardy-security; urgency=low + + * SECURITY UPDATE: Cross-site scripting attack via invalid character + sequences (LP: #341834) + - debian/patches/03-cve-2008-1036.patch: Improve parsing logic in + source/common/{ucnv2022.c,ucnv_bld.*,ucnv.c,ucnvhz.c} to replace + invalid character sequences. Also, add test case to + source/test/{cintltst/nucnvtst.c,testdata/conversion.txt}. + - CVE-2008-1036 + + -- Marc Deslauriers Wed, 25 Mar 2009 09:55:21 -0400 + +icu (3.8-6) unstable; urgency=high + + * Add debian/patches/00-cve-2007-4770-4771.patch created from with + svn diff -c 23292 \ + http://source.icu-project.org/repos/icu/icu/branches/maint/maint-3-8 + to address the following security vulnerablilities: + - CVE-2007-4770: reference to non-existent capture group may + cause access to invalid memory + - CVE-2007-4771: buffer overflow in regexcmp.cpp + (Closes: #463688) + * Updated standards version to 3.7.3: no changes required. + + -- Jay Berkenbilt Thu, 07 Feb 2008 12:58:34 -0500 + +icu (3.8-5) unstable; urgency=low + + * Filter out extraneous dependencies among different versions of the + library packages. (Closes: #451767, 451978) + + -- Jay Berkenbilt Sat, 01 Dec 2007 09:47:32 -0500 + +icu (3.8-4) experimental; urgency=low + + * Include changes from 3.6-10. + + -- Jay Berkenbilt Sun, 18 Nov 2007 11:04:16 -0500 + +icu (3.6-10) unstable; urgency=low + + * It appears that amd64 requires 32-bit libraries to be in + /emul/ia32-linux/usr/lib instead of /usr/lib32. Following zlib's + example of moving them around for amd64 only. (Closes: #451495) + + -- Jay Berkenbilt Sun, 18 Nov 2007 11:03:10 -0500 + +icu (3.8-3) experimental; urgency=low + + * Include changes from 3.6-9. + * Include -dbg package with unstripped versions of the libraries. + + -- Jay Berkenbilt Sat, 17 Nov 2007 15:02:36 -0500 + +icu (3.6-9) unstable; urgency=low + + * Yet another 32-bit library fix. Files were installed in /32 because + of the debian/tmp32 thing. How did this ever work? (Closes: #451495) + + -- Jay Berkenbilt Sat, 17 Nov 2007 12:12:18 -0500 + +icu (3.8-2) experimental; urgency=low + + * Include changes from 3.6-8. (Closes: #448747) + + -- Jay Berkenbilt Tue, 06 Nov 2007 20:58:09 -0500 + +icu (3.6-8) unstable; urgency=low + + * Clean up 32-bit library patch to avoid excessive and unnecessary runs + of configure. (Closes: #447771) + * make setBreakType public in rbbi.h; needed by OpenOffice.org. This + patch is included in OpenOffice.org's internal ICU. Including it here + allows OpenOffice.org to continue to use this ICU package. Thanks + Rene Engelhard. (Closes: #448745) + * Rename debian/watch.not-yet to debian/no-watch so it won't get picked + up even though it's not supposed to. ICU's ftp site uses a structure + that isn't supported by uscan. (Closes: #449701) + + -- Jay Berkenbilt Tue, 06 Nov 2007 20:56:38 -0500 + +icu (3.8-1) experimental; urgency=low + + * New upstream release. All previously included patches have been + incorporated into upstream. + + -- Jay Berkenbilt Sat, 20 Oct 2007 11:53:32 -0400 + +icu (3.6-7) unstable; urgency=low + + * Fix bug in which 32-bit library installs were overwriting files for + 64-bit libraries on amd64. Thanks Robert Millan for the patch. + (Closes: #447275) + + -- Jay Berkenbilt Sat, 20 Oct 2007 11:30:12 -0400 + +icu (3.6-6) unstable; urgency=low + + * Oops: fixed one more problem with 32-bit builds on a 64-bit platform. + Thanks Aaron Ucko. (Closes: #398778) + + -- Jay Berkenbilt Mon, 17 Sep 2007 15:19:59 -0400 + +icu (3.6-5) unstable; urgency=low + + * Add additional Build-Depends for 64-bit platforms. Thanks Robert + Millan. (Closes: #398778) + + -- Jay Berkenbilt Mon, 17 Sep 2007 10:42:32 -0400 + +icu (3.6-4) unstable; urgency=low + + * Accepted patch from Robert Millan (with very slight, mostly cosmetic + modifications) to build 32-bit libraries on 64-bit architectures. + Many thanks to Robert Millan for supplying this patch! (Closes: + #398778) + + -- Jay Berkenbilt Sat, 15 Sep 2007 21:42:33 -0400 + +icu (3.8~d01-1) experimental; urgency=low + + * New upstream release + * Configure with weak reference to thread library. (Closes: #389260) + * The development package no longer has the library soname in its name. + It is now just libicu-dev. + + -- Jay Berkenbilt Sat, 04 Aug 2007 11:04:49 -0400 + +icu (3.6-3) unstable; urgency=low + + * Include patch from Samuel Thibault to allow icu to build on gnu hurd. + (Closes: #414446) + + -- Jay Berkenbilt Tue, 10 Jul 2007 17:31:56 -0400 + +icu (3.6-2) unstable; urgency=low + + * Include patch to fix error in IndicClassTables to fix + worstCaseExpansion for Sinhala. Thanks to Harshula for forwarding + this. + + -- Jay Berkenbilt Mon, 27 Nov 2006 21:19:09 -0500 + +icu (3.6-1) unstable; urgency=low + + * New upstream release + * Provide libicu34-dev since ICU 3.6 provides backward compatible + interfaces in addition to new ones. + + -- Jay Berkenbilt Tue, 19 Sep 2006 12:10:41 -0400 + +icu (3.6~d02-1) experimental; urgency=low + + * New upstream release. + * Remove special optimization hack to work around now-fixed m68k build + problems. (Closes: #360743) + * Update standards version. No changes required. + + -- Jay Berkenbilt Tue, 15 Aug 2006 16:34:34 -0400 + +icu (3.4.1a-1) unstable; urgency=low + + * Upstream re-released 3.4.1 without changing the version number because + the header file with 3.4.1 still said it was 3.4. Unfortunately, the + debian 3.4.1 package had already been uploaded. This "3.4.1a" release + now matches upstream's 3.4.1. + + -- Jay Berkenbilt Wed, 29 Mar 2006 22:19:08 -0500 + +icu (3.4.1-1) unstable; urgency=low + + * New upstream release + + -- Jay Berkenbilt Fri, 3 Mar 2006 23:07:52 -0500 + +icu (3.4-4) unstable; urgency=low + + * Build with g++ 4.0 with -fno-strict-aliasing to work around g++ 4.0 + bugs that impact ICU. Future versions should work properly with the + latest g++ without any special flags. (Closes: #342970) + * Enable static libraries. + + -- Jay Berkenbilt Sun, 22 Jan 2006 11:36:59 -0500 + +icu (3.4-3) unstable; urgency=low + + * Explicitly build with g++ 3.4. The current ICU fails its test suite + with 4.0 but not with 3.4. Future versions should work properly with + 4.0. + + -- Jay Berkenbilt Sat, 19 Nov 2005 11:29:31 -0500 + +icu (3.4-2) unstable; urgency=low + + * Remove some extraneous build steps that may cause problems with + autobuilders. + + -- Jay Berkenbilt Sat, 13 Aug 2005 12:41:35 -0400 + +icu (3.4-1) unstable; urgency=low + + * New upstream release + * Completely new packaging + + -- Jay Berkenbilt Fri, 5 Aug 2005 21:57:15 -0400 + +icu (2.1-3) unstable; urgency=low + + * New maintainer as per discussion with Ivo. + * g++ 4.0 transition: libicu21c102 is now libicu21c2. + * Accepted changes from NMU below for now. This change will be + reversed soon when icu is updated to the current upstream version. + The icu28 package will also be removed at that time, as per discussion + with the icu28 maintainer. Closes: #301316 + * Add shlibs files + + -- Jay Berkenbilt Sat, 9 Jul 2005 13:33:35 -0400 + +icu (2.1-2.1) unstable; urgency=medium + + * Rename icu-doc to icu21-doc. icu-doc is built by the icu28 package. + + -- Matthias Klose Sat, 21 May 2005 22:44:31 +0200 + +icu (2.1-2) unstable; urgency=low + + * debian/control: Changed maintainer, added Daniel Glassey as Uploader. + (Reference: + http://lists.debian.org/debian-devel/2003/debian-devel-200308/msg01963.html) + + -- Ivo Timmermans Sun, 18 Jan 2004 23:52:03 +0100 + +icu (2.6.1-1) experimental; urgency=low + + * New upstream version. + * Ivo Timmermans: + * debian/rules Don't create arch-all packages in the binary-arch + target. Closes: #184403 + + -- Ivo Timmermans Thu, 6 Nov 2003 09:03:44 +0100 + +icu (2.6-1) experimental; urgency=low + + * New upstream version. Closes: #162975 + * debian/control: New maintainers + * Daniel Glassey: + * debian/rules Don't use --enable-static as it bloats the packages + * debian/rules Change the optimisations to -O3 and -O to get it to build + * debian/rules get the latest config.{sub,guess} from /usr/share/misc + so add build-dep on autotools-dev + * debian/postinst gencnval is now in {prefix}/bin + * Ivo Timmermans: + * debian/control Tightened debhelper build dependency + * debian/control Update Standards-Version + + -- Daniel Glassey Wed, 3 Sep 2003 12:39:35 +0200 + +icu (2.1-1.2) unstable; urgency=low + + * NMU. + * Updated source/config.{sub,guess}. Closes: #182697 + + -- Ivo Timmermans Fri, 7 Mar 2003 20:58:23 +0100 + +icu (2.1-1.1) unstable; urgency=low + + * NMU. + * debian/control: Go through G++ ABI transition. Closes: #180124 + * source/common/unicode/docmain.h: Fix \mainpage and \section tags, + so doxygen doesn't get confused any more. Closes: #178344 + * debian/copyright: Added upstream URL. Closes: #165780 + + -- Ivo Timmermans Fri, 14 Feb 2003 15:21:56 +0100 + +icu (2.1-1) unstable; urgency=low + + * ICU 2.1 release. + * Changed the icu package description. Closes: 142886 + * Use -O1 for CXXFLAGS for OS/390. Closes: 143021 + + -- Yves Arrouye Mon, 15 Apr 2002 14:03:12 -0700 + +icu (2.0.2-1) unstable; urgency=low + + * Minor release of ICU with fixes for threading and strTo/FromWCS + + -- Yves Arrouye Tue, 2 Apr 2002 09:06:00 -0800 + +icu (2.0-2.1pre20020318-1) unstable; urgency=low + + * Use the library number in the development package too. + * ICU changed to version 2.1. + + -- Yves Arrouye Tue, 19 Mar 2002 18:38:37 -0800 + +icu (2.0-2.1pre20020303-1) unstable; urgency=low + + * Fixed a crash in uconv when no argument is passed to -f or -t. + * Other upstream changes. + * Fresh upload with an up to date orig tar file so that future + diffs won't be 6 megabytes long! + + -- Yves Arrouye Sun, 3 Mar 2002 15:31:13 -0800 + +icu (2.0-2.1pre-1) unstable; urgency=low + + * Prerelease of 2.1 with a working upgraded uconv(1). + + -- Yves Arrouye Fri, 1 Mar 2002 21:51:47 -0800 + +icu (2.0-3) unstable; urgency=low + + * Renamed doc-base to icu-doc.doc-base. Closes: 127487 + + -- Yves Arrouye Fri, 18 Jan 2002 22:33:53 -0800 + +icu (2.0-2) unstable; urgency=low + + * Fixed a bug in uprv_uca_cloneTempTable(). Closes: 128484 + * Update Debian bugs status. Closes: 104642 + * Use the official 2.0 tarball as the original tar. + * Added manual pages for every tool. + + -- Yves Arrouye Wed, 16 Jan 2002 20:45:42 -0800 + +icu (2.0-1) unstable; urgency=low + + * Update to ICU version 2.0. + + -- Yves Arrouye Sat, 10 Nov 2001 21:58:19 -0800 + +icu (1.8.1-2) unstable; urgency=low + + * Updated copyright file. Closes: 112488 + * Updated icu-locales description. Closes: 75499 + + -- Yves Arrouye Sat, 10 Nov 2001 21:24:58 -0800 + +icu (1.8.1-1.1) unstable; urgency=low + + * NMU to resolve build failures on ia64 and (hopefully) hppa. + * config.{sub|guess} update + * source/tools/ctestfw/ctest.c: Add static declaration to global + variables local to that module to avoid @gprel relocation errors. + Closes: 104642 + + -- Yves Arrouye Sat, 10 Nov 2001 21:24:44 -0800 + +icu (1.8.1-1) unstable; urgency=low + + * Update to ICU version 1.8.1. + + -- root Mon, 21 May 2001 15:27:36 -0700 + +icu (1.7-1) unstable; urgency=low + + * Update to ICU version 1.7. + + -- Yves Arrouye Tue, 21 Nov 2000 22:54:52 -0800 + +icu (1.6.0.1-20001113-2) unstable; urgency=low + + * New snapshot with better ISO-2022. + + -- Yves Arrouye Mon, 13 Nov 2000 21:05:00 -0800 + +icu (1.6.0.1-20001027-1) unstable; urgency=low + + * Move architecture-dependent files into /usr/lib, instead of + /usr/share. + * Move convrtrs.txt into /etc/icu, make it a conffile, and generate + /usr/lib/icu/1.6.0.1/cnvalias.dat from it at postinst time. + * Manage a /usr/lib/icu/current symbolic link across installations + of the libicuXX packages. The symlink will always point to the + highest numbered version of ICU. + + -- Yves Arrouye Fri, 27 Oct 2000 15:40:12 -0700 + +icu (1.6.0.1-20001017-1) unstable; urgency=low + + * Initial Release. + + -- Yves Arrouye Tue, 24 Oct 2000 16:14:12 -0700 --- icu-3.8.orig/debian/no-watch +++ icu-3.8/debian/no-watch @@ -0,0 +1,6 @@ +# This doesn't always work because ICU upstream doesn't always have +# the latest version in the latest directory. See debian bug 375138 +# for details. + +version=3 +ftp://ftp.software.ibm.com/software/globalization/icu/(?:\d+\.[\d\.]+)/icu-([\d\.]+).tgz --- icu-3.8.orig/debian/rules +++ icu-3.8/debian/rules @@ -0,0 +1,120 @@ +#!/usr/bin/make -f + +DEB_HOST_ARCH_CPU ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_CPU) + +# To distinguish variables that are truly local to this file (rather +# than for use by cdbs), we adopt the convention of starting local +# variables' names with l_. + +l_PWD := $(shell pwd) +l_STAMPS := debian/l_stamps +l_RUN_CHECK := 1 +l_CFLAGS := -g -Wall +ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) + l_CFLAGS += -O0 +else + l_CFLAGS += -O2 +endif +# common configure cruft +l_CONFIGURE = CC="gcc" CXX="g++" CPPFLAGS="" LDFLAGS="" \ + ./configure \ + --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr \ + --includedir="\$${prefix}/include" \ + --mandir="\$${prefix}/share/man" --infodir="\$${prefix}/share/info" \ + --sysconfdir=/etc --localstatedir=/var \ + --disable-maintainer-mode --disable-dependency-tracking +# specific to this package +l_CONFIGURE += --disable-samples --enable-static --enable-weak-threads + +ifneq (, $(filter $(DEB_HOST_ARCH_CPU), amd64 ppc64 kfreebsd-amd64)) +build32 := build32 +endif + +# Variables used by cdbs + +VERSION := $(shell dpkg-parsechangelog | \ + awk '/Version:/ {print $$2}' | cut -d- -f 1) + +DEB_TAR_SRCDIR = icu/source +DEB_COMPRESS_EXCLUDE = html examples +DEB_INSTALL_EXAMPLES_libicu-dev = \ + build-tree/$(DEB_TAR_SRCDIR)/samples/* + +# Overridden for 32-bit packages on 64-bit platforms +DEB_DH_INSTALL_SOURCEDIR=debian/tmp + +DEB_DBG_PACKAGE_libicu38 = libicu38-dbg + +# Include cdbs rules files. +include /usr/share/cdbs/1/rules/tarball.mk +include /usr/share/cdbs/1/rules/simple-patchsys.mk +include /usr/share/cdbs/1/rules/debhelper.mk + +cleanbuilddir:: + $(RM) -r $(l_STAMPS) + +# As 0.4.21, cdbs creates but doesn't remove debian/compat. It +# creates it conditionally, so this doesn't have a trivial fix. +clean:: + $(RM) debian/compat *.cdbs-config_list + $(RM) -rf debian/tmp32 + $(RM) debian/stamp-configure debian/stamp-configure32 + +post-patches:: + chmod a+x $(DEB_SRCDIR)/configure +ifneq (, $(build32)) + cp -a $(DEB_SRCDIR) $(DEB_SRCDIR)-build32 +endif + +configure/libicu38 configure/libicu-dev:: debian/stamp-configure +debian/stamp-configure: + cd $(DEB_SRCDIR) && \ + CFLAGS="$(l_CFLAGS)" CXXFLAGS="$(l_CFLAGS)" \ + $(l_CONFIGURE) + touch debian/stamp-configure + +configure/lib32icu38 configure/lib32icu-dev:: debian/stamp-configure32 +debian/stamp-configure32: + cd $(DEB_SRCDIR)-build32 && \ + CFLAGS="$(l_CFLAGS) -m32" CXXFLAGS="$(l_CFLAGS) -m32" \ + $(l_CONFIGURE) --libdir=/usr/lib32 + touch debian/stamp-configure32 + +build/libicu38 build/libicu-dev:: + $(MAKE) -C $(DEB_SRCDIR) + +build/lib32icu38 build/lib32icu-dev:: + $(MAKE) -C $(DEB_SRCDIR)-build32 + +install/libicu38 install/libicu-dev:: + $(MAKE) -C $(DEB_SRCDIR) install DESTDIR=$(CURDIR)/debian/tmp + +binary-install/lib32icu38 binary-install/lib32icu-dev:: DEB_DH_INSTALL_SOURCEDIR=debian/tmp32 +install/lib32icu38 install/lib32icu-dev:: + $(MAKE) -C $(DEB_SRCDIR)-build32 install DESTDIR=$(CURDIR)/debian/tmp32 + +install/icu-doc:: install/libicu38 install/libicu-dev + $(MAKE) -C $(DEB_SRCDIR) install-doc DESTDIR=$(CURDIR)/debian/tmp + +ifeq ($(DEB_HOST_ARCH),amd64) +# On amd64 only, it appears that we need to put these in a different +# location. +binary-install/lib32icu38 binary-install/lib32icu-dev:: + mkdir -p debian/$(cdbs_curpkg)/emul/ia32-linux/usr + mv debian/$(cdbs_curpkg)/usr/lib32 debian/$(cdbs_curpkg)/emul/ia32-linux/usr/lib +endif + +# As per upstream, icuswap is deprecated and should not be +# distributed. +binary-post-install/libicu-dev:: + rm debian/$(cdbs_curpkg)/usr/sbin/icuswap + +# Install lintian override files +binary-post-install/%:: + if [ -f debian/$*.lintian ]; then \ + mkdir -p debian/$*/usr/share/lintian/overrides && \ + cp -p debian/$*.lintian debian/$*/usr/share/lintian/overrides/$*; \ + fi + +binary-predeb/%:: + perl debian/fix_substvars.pl debian/$*.substvars 'lib(32)?icu.*' --- icu-3.8.orig/debian/fix_substvars.pl +++ icu-3.8/debian/fix_substvars.pl @@ -0,0 +1,52 @@ +# +# Remove any whose names that match the given pattern from the +# shlibs:Depends entry in the given substvars. +# + +BEGIN { $^W = 1; } +use strict; +my $whoami = ($0 =~ m,([^/\\]*)$,) ? $1 : $0; + +die "usage: $whoami substvars-file pattern" unless @ARGV == 2; +my ($file, $pattern) = @ARGV; +if (! -f $file) +{ + exit 0; +} + +open(F, "<$file") or die "$whoami: can't open $file: $!\n"; +my @in = (); +close(F); +my @out = (); + +for (@in) +{ + if (m/(shlibs:Depends=)(.*)/) + { + my $prefix = $1; + my $contents = $2; + my @items = split(',\s*', $contents); + my @new = (); + foreach my $i (@items) + { + $i =~ m/^(\S+)/ or die; + my $pkg = $1; + if ($pkg !~ m/^${pattern}$/) + { + push(@new, $i); + } + } + push(@out, $prefix . join(', ', @new) . "\n"); + } + else + { + push(@out, $_); + } +} + +open(F, ">$file") or die "$whoami: can't open $file.new: $!\n"; +foreach (@out) +{ + print F $_; +} +close(F);