--- icu-3.8.orig/debian/icu-doc.install
+++ icu-3.8/debian/icu-doc.install
@@ -0,0 +1 @@
+usr/share/doc/icu/html	usr/share/doc/icu-doc
--- icu-3.8.orig/debian/libicu-dev.install
+++ icu-3.8/debian/libicu-dev.install
@@ -0,0 +1,8 @@
+usr/lib/lib*.so
+usr/lib/lib*.a
+usr/lib/icu
+usr/include
+usr/bin
+usr/sbin
+usr/share/icu
+usr/share/man
--- icu-3.8.orig/debian/lib32icu38.shlibs
+++ icu-3.8/debian/lib32icu38.shlibs
@@ -0,0 +1,7 @@
+libicudata 38 lib32icu38 (>= 3.8-5)
+libicui18n 38 lib32icu38 (>= 3.8-5)
+libicuio 38 lib32icu38 (>= 3.8-5)
+libicule 38 lib32icu38 (>= 3.8-5)
+libiculx 38 lib32icu38 (>= 3.8-5)
+libicutu 38 lib32icu38 (>= 3.8-5)
+libicuuc 38 lib32icu38 (>= 3.8-5)
--- icu-3.8.orig/debian/libicu38.shlibs
+++ icu-3.8/debian/libicu38.shlibs
@@ -0,0 +1,7 @@
+libicudata 38 libicu38 (>= 3.8-5)
+libicui18n 38 libicu38 (>= 3.8-5)
+libicuio 38 libicu38 (>= 3.8-5)
+libicule 38 libicu38 (>= 3.8-5)
+libiculx 38 libicu38 (>= 3.8-5)
+libicutu 38 libicu38 (>= 3.8-5)
+libicuuc 38 libicu38 (>= 3.8-5)
--- icu-3.8.orig/debian/control
+++ icu-3.8/debian/control
@@ -0,0 +1,71 @@
+Source: icu
+Section: libs
+Priority: optional
+Maintainer: Ubuntu Core Developers <ubuntu-devel-discuss@lists.ubuntu.com>
+XSBC-Original-Maintainer: Jay Berkenbilt <qjb@debian.org>
+Standards-Version: 3.7.3
+Build-Depends: cdbs, debhelper (>= 5), doxygen, gcc-multilib [amd64 kfreebsd-amd64 ppc64] | gcc-4.1 (<< 4.1.2) [amd64 kfreebsd-amd64 ppc64],
+ g++-multilib [amd64 kfreebsd-amd64 ppc64] | g++-4.1 (<< 4.1.2) [amd64 kfreebsd-amd64 ppc64],
+ libc6-dev-i386 [amd64], libc0.1-dev-i386 [kfreebsd-amd64], libc6-dev-powerpc [ppc64]
+
+Package: libicu38
+Section: libs
+Architecture: any
+Depends: ${shlibs:Depends}
+Replaces: icu, icu-locales
+Conflicts: icu, icu-locales
+Description: International Components for Unicode
+ ICU is a C++ and C library that provides robust and full-featured
+ Unicode and locale support.  This package contains the runtime
+ libraries for ICU.
+
+Package: libicu38-dbg
+Section: libs
+Priority: extra
+Architecture: any
+Depends: libicu38 (= ${binary:Version})
+Description: International Components for Unicode
+ ICU is a C++ and C library that provides robust and full-featured
+ Unicode and locale support.  This package contains debugging symbols
+ for the libraries.
+
+Package: libicu-dev
+Section: libdevel
+Architecture: any
+Depends: libicu38 (= ${binary:Version}), libc6-dev | libc-dev
+Replaces: libicu34-dev, libicu36-dev
+Conflicts: libicu34-dev, libicu36-dev
+Suggests: icu-doc
+Description: Development files for International Components for Unicode
+ ICU is a C++ and C library that provides robust and full-featured
+ Unicode and locale support.  This package contains the development
+ files for ICU along with programs used to manipulate data files found
+ in the ICU sources.
+
+Package: lib32icu38
+Section: libs
+Architecture: amd64 ppc64 kfreebsd-amd64
+Depends: ${shlibs:Depends}
+Description: International Components for Unicode (32-bit)
+ ICU is a C++ and C library that provides robust and full-featured
+ Unicode and locale support.  This package contains the runtime
+ libraries for ICU.
+
+Package: lib32icu-dev
+Section: libdevel
+Architecture: amd64 ppc64 kfreebsd-amd64
+Depends: libicu-dev (= ${binary:Version}), lib32icu38 (= ${binary:Version})
+Suggests: icu-doc
+Description: Development files for International Components for Unicode (32-bit)
+ ICU is a C++ and C library that provides robust and full-featured
+ Unicode and locale support.  This package contains the development
+ files for ICU along with programs used to manipulate data files found
+ in the ICU sources.
+
+Package: icu-doc
+Section: doc
+Architecture: all
+Description: API documentation for ICU classes and functions
+ ICU is a C++ and C library that provides robust and full-featured
+ Unicode and locale support.  This package contains HTML files
+ documenting the ICU APIs.
--- icu-3.8.orig/debian/patches/05-redhat.icu6002.patch
+++ icu-3.8/debian/patches/05-redhat.icu6002.patch
@@ -0,0 +1,411 @@
+#
+# Description: the HZ converter must restrict DBCS codes to bytes 21..7D
+#  (required to fix CVE-2009-0153). See:
+#  https://bugzilla.redhat.com/show_bug.cgi?id=503071
+# Upstream: http://bugs.icu-project.org/trac/ticket/6002
+#
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_ext.c icu-3.8.new/build-tree/icu/source/common/ucnv_ext.c
+--- icu/source/common/ucnv_ext.c	2009-10-07 11:32:22.241950584 -0500
++++ icu/source/common/ucnv_ext.c	2009-10-07 11:32:29.253215734 -0500
+@@ -1036,15 +1036,13 @@
+     /* enumerate the from-Unicode trie table */
+     c=0; /* keep track of the current code point while enumerating */
+ 
+-    if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
+-        filter==UCNV_SET_FILTER_DBCS_ONLY ||
+-        filter==UCNV_SET_FILTER_SJIS ||
+-        filter==UCNV_SET_FILTER_GR94DBCS
++    if(filter==UCNV_SET_FILTER_2022_CN) {
++        minLength=3;
++    } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
++               filter!=UCNV_SET_FILTER_NONE
+     ) {
+         /* DBCS-only, ignore single-byte results */
+         minLength=2;
+-    } else if(filter==UCNV_SET_FILTER_2022_CN) {
+-        minLength=3;
+     } else {
+         minLength=1;
+     }
+@@ -1099,8 +1097,15 @@
+                                 break;
+                             case UCNV_SET_FILTER_GR94DBCS:
+                                 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+-                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe-0xa1a1) &&
+-                                     (uint8_t)(value-0xa1)<=(0xfe-0xa1))) {
++                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) &&
++                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
++                                    continue;
++                                }
++                                break;
++                            case UCNV_SET_FILTER_HZ:
++                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
++                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
++                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+                                     continue;
+                                 }
+                                 break;
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvhz.c icu-3.8.new/build-tree/icu/source/common/ucnvhz.c
+--- icu/source/common/ucnvhz.c	2009-10-07 11:32:22.241950584 -0500
++++ icu/source/common/ucnvhz.c	2009-10-07 11:32:29.273207453 -0500
+@@ -72,7 +72,7 @@
+     cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ));
+     if(cnv->extraInfo != NULL){
+         uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ));
+-        ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
++        ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("GBK",errorCode);
+     }
+     else {
+         *errorCode = U_MEMORY_ALLOCATION_ERROR;
+@@ -141,7 +141,7 @@
+     UChar *myTarget = args->target;
+     const char *mySourceLimit = args->sourceLimit;
+     UChar32 targetUniChar = 0x0000;
+-    UChar mySourceChar = 0x0000;
++    int32_t mySourceChar = 0x0000;
+     UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
+     tempBuf[0]=0; 
+     tempBuf[1]=0;
+@@ -156,90 +156,71 @@
+             
+             mySourceChar= (unsigned char) *mySource++;
+ 
+-            switch(mySourceChar){
++            if(args->converter->mode == UCNV_TILDE) {
++                /* second byte after ~ */
++                args->converter->mode=0;
++                switch(mySourceChar) {
+                 case 0x0A:
+-                    if(args->converter->mode ==UCNV_TILDE){
+-                        args->converter->mode=0;
+-                        
+-                    }
+-                    *(myTarget++)=(UChar)mySourceChar;
++                    /* no output for ~\n (line-continuation marker) */
+                     continue;
+-            
+                 case UCNV_TILDE:
+-                    if(args->converter->mode ==UCNV_TILDE){
+-                        *(myTarget++)=(UChar)mySourceChar;
+-                        args->converter->mode=0;
+-                        continue;
+-                        
++                    if(args->offsets) {
++                        args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
+                     }
+-                    else if(args->converter->toUnicodeStatus !=0){
+-                        args->converter->mode=0;
+-                        break;
+-                    }
+-                    else{
+-                        args->converter->mode = UCNV_TILDE;
+-                        continue;
+-                    }
+-                
+-                
++                    *(myTarget++)=(UChar)mySourceChar;
++                    continue;
+                 case UCNV_OPEN_BRACE:
+-                    if(args->converter->mode == UCNV_TILDE){
+-                        args->converter->mode=0;
+-                        myData->isStateDBCS = TRUE;
+-                        continue;
+-                    }
+-                    else{
+-                        break;
+-                    }
+-               
+-                
++                    myData->isStateDBCS = TRUE;
++                    continue;
+                 case UCNV_CLOSE_BRACE:
+-                    if(args->converter->mode == UCNV_TILDE){
+-                        args->converter->mode=0;
+-                         myData->isStateDBCS = FALSE;
+-                        continue;
+-                    }
+-                    else{
+-                        break;
+-                    }
+-                
++                    myData->isStateDBCS = FALSE;
++                    continue;
+                 default:
+                      /* if the first byte is equal to TILDE and the trail byte
+                      * is not a valid byte then it is an error condition
+                      */
+-                    if(args->converter->mode == UCNV_TILDE){
+-                        args->converter->mode=0;
+-                        mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
+-                        goto SAVE_STATE;
+-                    }
+-                    
++                    mySourceChar = 0x7e00 | mySourceChar;
++                    targetUniChar = 0xffff;
+                     break;
+-
+-            }
+-             
+-            if(myData->isStateDBCS){
++                }
++            } else if(myData->isStateDBCS) {
+                 if(args->converter->toUnicodeStatus == 0x00){
+-                    args->converter->toUnicodeStatus = (UChar) mySourceChar;
++                    /* lead byte */
++                    if(mySourceChar == UCNV_TILDE) {
++                        args->converter->mode = UCNV_TILDE;
++                    } else {
++                        /* add another bit to distinguish a 0 byte from not having seen a lead byte */
++                        args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
++                    }
+                     continue;
+                 }
+                 else{
+-                    tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
+-                    tempBuf[1] = (char) (mySourceChar+0x80);
+-                    mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
++                    /* trail byte */
++                    uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
++                    if( (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21) &&
++                        (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21)
++                    ) {
++                        tempBuf[0] = (char) (leadByte+0x80) ;
++                        tempBuf[1] = (char) (mySourceChar+0x80);
++                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
++                            tempBuf, 2, args->converter->useFallback);
++                    } else {
++                        targetUniChar = 0xffff;
++                    }
++                    /* add another bit so that the code below writes 2 bytes in case of error */
++                    mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
+                     args->converter->toUnicodeStatus =0x00;
+-                    targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
+-                        tempBuf, 2, args->converter->useFallback);
+                 }
+             }
+             else{
+-                if(args->converter->fromUnicodeStatus == 0x00){
+-                    targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
+-                        mySource - 1, 1, args->converter->useFallback);
+-                }
+-                else{
+-                    goto SAVE_STATE;
++                if(mySourceChar == UCNV_TILDE) {
++                    args->converter->mode = UCNV_TILDE;
++                    continue;
++                } else if(mySourceChar <= 0x7f) {
++                    targetUniChar = (UChar)mySourceChar;  /* ASCII */
++                } else {
++                    targetUniChar = 0xffff;
+                 }
+-
+             }
+             if(targetUniChar < 0xfffe){
+                 if(args->offsets) {
+@@ -248,26 +229,17 @@
+ 
+                 *(myTarget++)=(UChar)targetUniChar;
+             }
+-            else if(targetUniChar>=0xfffe){
+-SAVE_STATE:
++            else /* targetUniChar>=0xfffe */ {
+                 if(targetUniChar == 0xfffe){
+                     *err = U_INVALID_CHAR_FOUND;
+                 }
+                 else{
+                     *err = U_ILLEGAL_CHAR_FOUND;
+                 }
+-                if(myData->isStateDBCS){
+-                    /* this should never occur since isStateDBCS is set to true 
+-                     * only after tempBuf[0] and tempBuf[1]
+-                     * are set to the input ..  just to please BEAM 
+-                     */
+-                    if(tempBuf[0]==0 || tempBuf[1]==0){
+-                        *err = U_INTERNAL_PROGRAM_ERROR;
+-                    }else{
+-                        args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80);
+-                        args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80);
+-                        args->converter->toULength=2;
+-                    }
++                if(mySourceChar > 0xff){
++                    args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
++                    args->converter->toUBytes[1] = (uint8_t)mySourceChar;
++                    args->converter->toULength=2;
+                 }
+                 else{
+                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+@@ -328,16 +300,21 @@
+                 escSeq = TILDE_ESCAPE;
+                 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
+                 continue;
+-            }
+-            else{
++            } else if(mySourceChar <= 0x7f) {
++                length = 1;
++                targetUniChar = mySourceChar;
++            } else {
+                 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
+                     mySourceChar,&targetUniChar,args->converter->useFallback);
+-
+-            }
+-            /* only DBCS or SBCS characters are expected*/
+-            /* DB haracters with high bit set to 1 are expected */
+-            if(length > 2 || length==0 ||(((targetUniChar & 0x8080) != 0x8080)&& length==2)){
+-                targetUniChar= missingCharMarker;
++                /* we can only use lead bytes 21..7D and trail bytes 21..7E */
++                if( length == 2 &&
++                    (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
++                    (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
++                ) {
++                    targetUniChar -= 0x8080;
++                } else {
++                    targetUniChar = missingCharMarker;
++                }
+             }
+             if (targetUniChar != missingCharMarker){
+                myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);     
+@@ -360,22 +337,22 @@
+             
+                 if(isTargetUCharDBCS){
+                     if( myTargetIndex <targetLength){
+-                        myTarget[myTargetIndex++] =(char) ((targetUniChar >> 8) -0x80);
++                        myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
+                         if(offsets){
+                             *(offsets++) = mySourceIndex-1;
+                         }
+                         if(myTargetIndex < targetLength){
+-                            myTarget[myTargetIndex++] =(char) ((targetUniChar & 0x00FF) -0x80);
++                            myTarget[myTargetIndex++] =(char) targetUniChar;
+                             if(offsets){
+                                 *(offsets++) = mySourceIndex-1;
+                             }
+                         }else{
+-                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
++                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
+                             *err = U_BUFFER_OVERFLOW_ERROR;
+                         } 
+                     }else{
+-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) ((targetUniChar >> 8) -0x80);
+-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
++                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
++                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
+                         *err = U_BUFFER_OVERFLOW_ERROR;
+                     }
+ 
+@@ -524,15 +501,14 @@
+                   const USetAdder *sa,
+                   UConverterUnicodeSet which,
+                   UErrorCode *pErrorCode) {
+-    /* the tilde '~' is hardcoded in the converter */
+-    sa->add(sa->set, 0x7e);
++    /* HZ converts all of ASCII */
++    sa->addRange(sa->set, 0, 0x7f);
+ 
+     /* add all of the code points that the sub-converter handles */
+-    /* ucnv_MBCSGetFilteredUnicodeSetForUnicode(((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, sa, which, UCNV_SET_FILTER_GR94DBCS, pErrorCode); */
+-    ((UConverterDataHZ*)cnv->extraInfo)->
+-        gbConverter->sharedData->impl->
+-            getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
+-                          sa, which, pErrorCode);
++    ucnv_MBCSGetFilteredUnicodeSetForUnicode(
++        ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
++        sa, which, UCNV_SET_FILTER_HZ,
++        pErrorCode);
+ }
+ 
+ static const UConverterImpl _HZImpl={
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.c icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.c
+--- icu/source/common/ucnvmbcs.c	2009-10-07 11:32:22.251959209 -0500
++++ icu/source/common/ucnvmbcs.c	2009-10-07 11:32:29.273207453 -0500
+@@ -625,8 +625,21 @@
+                             /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
+                             do {
+                                 if( ((st3&1)!=0 || useFallback) &&
+-                                    (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfefe-0xa1a1) &&
+-                                    (uint8_t)(value-0xa1)<=(0xfe-0xa1)
++                                    (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
++                                    (uint8_t)(value-0xa1)<=(0xfe - 0xa1)
++                                ) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=2;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_HZ:
++                            /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
++                            do {
++                                if( ((st3&1)!=0 || useFallback) &&
++                                    (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
++                                    (uint8_t)(value-0xa1)<=(0xfe - 0xa1)
+                                 ) {
+                                     sa->add(sa->set, c);
+                                 }
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.h icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.h
+--- icu/source/common/ucnvmbcs.h	2009-10-07 11:32:22.251959209 -0500
++++ icu/source/common/ucnvmbcs.h	2009-10-07 11:32:29.273207453 -0500
+@@ -493,6 +493,7 @@
+     UCNV_SET_FILTER_2022_CN,
+     UCNV_SET_FILTER_SJIS,
+     UCNV_SET_FILTER_GR94DBCS,
++    UCNV_SET_FILTER_HZ,
+     UCNV_SET_FILTER_COUNT
+ } UConverterSetFilter;
+ 
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/ncnvtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/ncnvtst.c
+--- icu/source/test/cintltst/ncnvtst.c	2007-09-13 18:17:36.000000000 -0500
++++ icu/source/test/cintltst/ncnvtst.c	2009-10-07 11:32:29.283205342 -0500
+@@ -1928,7 +1928,7 @@
+ #if !UCONFIG_NO_LEGACY_CONVERSION
+         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
+         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
+-        { "HZ", 0x410, 0x44f, 0x4e00, 0x4eff, 0xac00, 0xd7ff },
++        /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
+         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
+ #else
+         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/intltest/convtest.cpp icu-3.8.new/build-tree/icu/source/test/intltest/convtest.cpp
+--- icu/source/test/intltest/convtest.cpp	2009-10-07 11:32:22.251959209 -0500
++++ icu/source/test/intltest/convtest.cpp	2009-10-07 11:32:29.283205342 -0500
+@@ -538,7 +538,7 @@
+         "Shift-JIS",
+         "ibm-1390",  // EBCDIC_STATEFUL table
+         "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
+-        // "HZ", TODO(markus): known bug, the set incorrectly contains [\u02CA\u02CB\u02D9\u2010\u2013\u2015...]
++        "HZ",
+         "ISO-2022-JP",
+         "JIS7",
+         "ISO-2022-CN",
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt
+--- icu/source/test/testdata/conversion.txt	2009-10-07 11:32:22.251959209 -0500
++++ icu/source/test/testdata/conversion.txt	2009-10-07 11:32:29.283205342 -0500
+@@ -48,6 +48,14 @@
+     toUnicode {
+       Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
+       Cases {
++        // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e
++        {
++          "HZ",
++          :bin{ 7e7b21212120217e217f772100007e217e7d207e7e807e0a2b },
++          "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd ~\ufffd+",
++          :intvector{ 2,4,6,8,10,12,14,18,19,21,24 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
+         // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
+         // using the Shift-JIS table for JIS X 0208 (ticket #5797)
+         {
+@@ -1349,6 +1357,14 @@
+           :int{0}
+         }
+ 
++        // HZ
++        {
++          "HZ",
++          "[\u0410-\u044f\u4e00\u4e01\u4e03]",
++          "[\u4e02\u4e04-\u4e06\uac00-\ud7ff]",
++          :int{0}
++        }
++        
+         // DBCS-only
+         {
+           "ibm-971",
--- icu-3.8.orig/debian/patches/03-redhat.icu5797.patch
+++ icu-3.8/debian/patches/03-redhat.icu5797.patch
@@ -0,0 +1,751 @@
+#
+# Description: use Shift-JIS table for ISO 2022-JP (required to fix
+#  CVE-2009-0153). See: https://bugzilla.redhat.com/show_bug.cgi?id=503071
+# Upstream: http://bugs.icu-project.org/trac/ticket/5797
+#
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv2022.c icu-3.8.new/build-tree/icu/source/common/ucnv2022.c
+--- icu/source/common/ucnv2022.c	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/ucnv2022.c	2009-10-07 11:31:52.331962609 -0500
+@@ -472,8 +472,7 @@
+             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
+                 myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode);
+             }
+-            myConverterData->myConverterArray[JISX201]      = ucnv_loadSharedData("JISX0201", NULL, errorCode);
+-            myConverterData->myConverterArray[JISX208]      = ucnv_loadSharedData("jisx-208", NULL, errorCode);
++            myConverterData->myConverterArray[JISX208]      = ucnv_loadSharedData("Shift-JIS", NULL, errorCode);
+             if(jpCharsetMasks[version]&CSM(JISX212)) {
+                 myConverterData->myConverterArray[JISX212]  = ucnv_loadSharedData("jisx-212", NULL, errorCode);
+             }
+@@ -1040,14 +1039,6 @@
+                 length=3;
+             }
+         }
+-        /*
+-         * TODO(markus): Use Shift-JIS table for JIS X 0208, to save mapping table space.
+-         * Pass in parameter for type of output bytes, for validation and shifting:
+-         * - Direct: Pass bytes through, but forbid control codes 00-1F (except SI/SO/ESC) and space 20?
+-         *   (Need to allow some (TAB/LF/CR) or most of them for ASCII and maybe JIS X 0201.)
+-         * - A1-FE: Subtract 80 after range check.
+-         * - SJIS: Shift DBCS result to 21-7E x 21-7E.
+-         */
+         /* is this code point assigned, or do we use fallbacks? */
+         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
+             /* assigned */
+@@ -1105,6 +1096,23 @@
+     }
+ }
+ 
++/*
++ * Check that the result is a 2-byte value with each byte in the range A1..FE
++ * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
++ * to move it to the ISO 2022 range 21..7E.
++ * Return 0 if out of range.
++ */
++static U_INLINE uint32_t
++_2022FromGR94DBCS(uint32_t value) {
++    if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
++        (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
++    ) {
++        return value - 0x8080;  /* shift down to 21..7e byte range */
++    } else {
++        return 0;  /* not valid for ISO 2022 */
++    }
++}
++
+ #ifdef U_ENABLE_GENERIC_ISO_2022
+ 
+ /**********************************************************************************
+@@ -1233,7 +1241,7 @@
+     }
+     else{
+         cnv->toUBytes[0] =(char) sourceChar;
+-        cnv->toULength = 2;
++        cnv->toULength = 1;
+     }
+ 
+     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
+@@ -1344,6 +1352,181 @@
+ * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
+ */
+ 
++/* Map 00..7F to Unicode according to JIS X 0201. */
++static U_INLINE uint32_t
++jisx201ToU(uint32_t value) {
++    if(value < 0x5c) {
++        return value;
++    } else if(value == 0x5c) {
++        return 0xa5;
++    } else if(value == 0x7e) {
++        return 0x203e;
++    } else /* value <= 0x7f */ {
++        return value;
++    }
++}
++
++/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
++static U_INLINE uint32_t
++jisx201FromU(uint32_t value) {
++    if(value<=0x7f) {
++        if(value!=0x5c && value!=0x7e) {
++            return value;
++        }
++    } else if(value==0xa5) {
++        return 0x5c;
++    } else if(value==0x203e) {
++        return 0x7e;
++    }
++    return 0xfffe;
++}
++
++/*
++ * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
++ * to JIS X 0208, and convert it to a pair of 21..7E bytes.
++ * Return 0 if the byte pair is out of range.
++ */
++static U_INLINE uint32_t
++_2022FromSJIS(uint32_t value) {
++    uint8_t trail;
++
++    if(value > 0xEFFC) {
++        return 0;  /* beyond JIS X 0208 */
++    }
++
++    trail = (uint8_t)value;
++
++    value &= 0xff00;  /* lead byte */
++    if(value <= 0x9f00) {
++        value -= 0x7000;
++    } else /* 0xe000 <= value <= 0xef00 */ {
++        value -= 0xb000;
++    }
++    value <<= 1;
++
++    if(trail <= 0x9e) {
++        value -= 0x100;
++        if(trail <= 0x7e) {
++            value |= trail - 0x1f;
++        } else {
++            value |= trail - 0x20;
++        }
++    } else /* trail <= 0xfc */ {
++        value |= trail - 0x7e;
++    }
++    return value;
++}
++
++/*
++ * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
++ * If either byte is outside 21..7E make sure that the result is not valid
++ * for Shift-JIS so that the converter catches it.
++ * Some invalid byte values already turn into equally invalid Shift-JIS
++ * byte values and need not be tested explicitly.
++ */
++static U_INLINE void
++_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
++    if(c1&1) {
++        ++c1;
++        if(c2 <= 0x5f) {
++            c2 += 0x1f;
++        } else if(c2 <= 0x7e) {
++            c2 += 0x20;
++        } else {
++            c2 = 0;  /* invalid */
++        }
++    } else {
++        if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
++            c2 += 0x7e;
++        } else {
++            c2 = 0;  /* invalid */
++        }
++    }
++    c1 >>= 1;
++    if(c1 <= 0x2f) {
++        c1 += 0x70;
++    } else if(c1 <= 0x3f) {
++        c1 += 0xb0;
++    } else {
++        c1 = 0;  /* invalid */
++    }
++    bytes[0] = (char)c1;
++    bytes[1] = (char)c2;
++}
++
++/*
++ * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
++ * Katakana.
++ * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
++ * because Shift-JIS roundtrips half-width Katakana to single bytes.
++ * These were the only fallbacks in ICU's jisx-208.ucm file.
++ */
++static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
++    0x2123,  /* U+FF61 */
++    0x2156,
++    0x2157,
++    0x2122,
++    0x2126,
++    0x2572,
++    0x2521,
++    0x2523,
++    0x2525,
++    0x2527,
++    0x2529,
++    0x2563,
++    0x2565,
++    0x2567,
++    0x2543,
++    0x213C,  /* U+FF70 */
++    0x2522,
++    0x2524,
++    0x2526,
++    0x2528,
++    0x252A,
++    0x252B,
++    0x252D,
++    0x252F,
++    0x2531,
++    0x2533,
++    0x2535,
++    0x2537,
++    0x2539,
++    0x253B,
++    0x253D,
++    0x253F,  /* U+FF80 */
++    0x2541,
++    0x2544,
++    0x2546,
++    0x2548,
++    0x254A,
++    0x254B,
++    0x254C,
++    0x254D,
++    0x254E,
++    0x254F,
++    0x2552,
++    0x2555,
++    0x2558,
++    0x255B,
++    0x255E,
++    0x255F,  /* U+FF90 */
++    0x2560,
++    0x2561,
++    0x2562,
++    0x2564,
++    0x2566,
++    0x2568,
++    0x2569,
++    0x256A,
++    0x256B,
++    0x256C,
++    0x256D,
++    0x256F,
++    0x2573,
++    0x212B,
++    0x212C   /* U+FF9F */
++};
++
+ static void
+ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
+     UConverter *cnv = args->converter;
+@@ -1499,7 +1682,7 @@
+                     }
+                     break;
+                 case HWKANA_7BIT:
+-                    if((uint32_t)(HWKANA_END-sourceChar)<=(HWKANA_END-HWKANA_START)) {
++                    if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
+                         if(converterData->version==3) {
+                             /* JIS7: use G1 (SO) */
+                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
+@@ -1526,13 +1709,34 @@
+                     break;
+                 case JISX201:
+                     /* G0 SBCS */
+-                    len2 = MBCS_SINGLE_FROM_UCHAR32(
++                    value = jisx201FromU(sourceChar);
++                    if(value <= 0x7f) {
++                        targetValue = value;
++                        len = 1;
++                        cs = cs0;
++                        g = 0;
++                        useFallback = FALSE;
++                    }
++                    break;
++                case JISX208:
++                    /* G0 DBCS from Shift-JIS table */
++                    len2 = MBCS_FROM_UCHAR32_ISO2022(
+                                 converterData->myConverterArray[cs0],
+                                 sourceChar, &value,
+-                                useFallback);
+-                    if(len2 != 0 && !(len2 < 0 && len != 0) && value <= 0x7f) {
+-                        targetValue = value;
+-                        len = len2;
++                                useFallback, MBCS_OUTPUT_2);
++                    if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
++                        value = _2022FromSJIS(value);
++                        if(value != 0) {
++                            targetValue = value;
++                            len = len2;
++                            cs = cs0;
++                            g = 0;
++                            useFallback = FALSE;
++                        }
++                    } else if(len == 0 && useFallback &&
++                              (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
++                        targetValue = hwkana_fb[sourceChar - HWKANA_START];
++                        len = -2;
+                         cs = cs0;
+                         g = 0;
+                         useFallback = FALSE;
+@@ -1564,17 +1768,10 @@
+                              * Check for valid bytes for the encoding scheme.
+                              * This is necessary because the sub-converter (windows-949)
+                              * has a broader encoding scheme than is valid for 2022.
+-                             *
+-                             * Check that the result is a 2-byte value with each byte in the range A1..FE
+-                             * (strict EUC-KR DBCS) before accepting it and subtracting 0x80 from each byte
+-                             * to move it to the ISO 2022 range 21..7E.
+                              */
+-                            if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
+-                                (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
+-                            ) {
+-                                value -= 0x8080;  /* shift down to 21..7e byte range */
+-                            } else {
+-                                break;  /* not valid for ISO 2022 */
++                            value = _2022FromGR94DBCS(value);
++                            if(value == 0) {
++                                break;
+                             }
+                         }
+                         targetValue = value;
+@@ -1750,7 +1947,7 @@
+ static void
+ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
+                                                UErrorCode* err){
+-    char tempBuf[3];
++    char tempBuf[2];
+     const char *mySource = (char *) args->source;
+     UChar *myTarget = args->target;
+     const char *mySourceLimit = args->sourceLimit;
+@@ -1868,10 +2065,7 @@
+                     break;
+                 case JISX201:
+                     if(mySourceChar <= 0x7f) {
+-                        targetUniChar =
+-                            _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
+-                                myData->myConverterArray[cs],
+-                                mySourceChar);
++                        targetUniChar = jisx201ToU(mySourceChar);
+                     }
+                     break;
+                 case HWKANA_7BIT:
+@@ -1885,8 +2079,13 @@
+                     if(mySource < mySourceLimit) {
+                         char trailByte;
+ getTrailByte:
+-                        tempBuf[0] = (char) (mySourceChar);
+-                        tempBuf[1] = trailByte = *mySource++;
++                        trailByte = *mySource++;
++                        if(cs == JISX208) {
++                            _2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf);
++                        } else {
++                            tempBuf[0] = (char)mySourceChar;
++                            tempBuf[1] = trailByte;
++                        }
+                         mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
+                     } else {
+@@ -3190,6 +3389,9 @@
+     /* open a set and initialize it with code points that are algorithmically round-tripped */
+     switch(cnvData->locale[0]){
+     case 'j':
++        /* include JIS X 0201 which is hardcoded */
++        sa->add(sa->set, 0xa5);
++        sa->add(sa->set, 0x203e);
+         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
+             /* include Latin-1 for some variants of JP */
+             sa->addRange(sa->set, 0, 0xff);
+@@ -3198,6 +3400,11 @@
+             sa->addRange(sa->set, 0, 0x7f);
+         }
+         if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
++            /*
++             * TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks,
++             * we need to include half-width Katakana for all JP variants because
++             * JIS X 0208 has hardcoded fallbacks for them.
++             */
+             /* include half-width Katakana for JP */
+             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
+         }
+@@ -3217,15 +3424,7 @@
+         break;
+     }
+ 
+-    /*
+-     * Version-specific for CN:
+-     * CN version 0 does not map CNS planes 3..7 although
+-     * they are all available in the CNS conversion table;
+-     * CN version 1 does map them all.
+-     * The two versions create different Unicode sets.
+-     */
+-    for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
+-        if(cnvData->myConverterArray[i]!=NULL) {
++#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
+             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+                 cnvData->version==0 && i==CNS_11643
+             ) {
+@@ -3235,9 +3434,33 @@
+                         sa, UCNV_ROUNDTRIP_SET,
+                         0, 0x81, 0x82,
+                         pErrorCode);
++            }
++#endif
++
++    for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
++        UConverterSetFilter filter;
++        if(cnvData->myConverterArray[i]!=NULL) {
++            if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
++                cnvData->version==0 && i==CNS_11643
++            ) {
++                /*
++                 * Version-specific for CN:
++                 * CN version 0 does not map CNS planes 3..7 although
++                 * they are all available in the CNS conversion table;
++                 * CN version 1 (-EXT) does map them all.
++                 * The two versions create different Unicode sets.
++                 */
++                filter=UCNV_SET_FILTER_2022_CN;
++            } else if(cnvData->locale[0]=='j' && i==JISX208) {
++                /*
++                 * Only add code points that map to Shift-JIS codes
++                 * corresponding to JIS X 0208.
++                 */
++                filter=UCNV_SET_FILTER_SJIS;
+             } else {
+-                ucnv_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode);
++                filter=UCNV_SET_FILTER_NONE;
+             }
++            ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
+         }
+     }
+ 
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.c icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.c
+--- icu/source/common/ucnvmbcs.c	2007-09-13 18:17:58.000000000 -0500
++++ icu/source/common/ucnvmbcs.c	2009-10-07 11:31:52.351957135 -0500
+@@ -362,6 +362,8 @@
+ 
+ /* Miscellaneous ------------------------------------------------------------ */
+ 
++#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
++
+ /* similar to ucnv_MBCSGetNextUChar() but recursive */
+ static void
+ _getUnicodeSetForBytes(const UConverterSharedData *sharedData,
+@@ -454,11 +456,14 @@
+         pErrorCode);
+ }
+ 
++#endif
++
+ U_CFUNC void
+-ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+-                             const USetAdder *sa,
+-                             UConverterUnicodeSet which,
+-                             UErrorCode *pErrorCode) {
++ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
++                                         const USetAdder *sa,
++                                         UConverterUnicodeSet which,
++                                         UConverterSetFilter filter,
++                                         UErrorCode *pErrorCode) {
+     const UConverterMBCSTable *mbcsTable;
+     const uint16_t *table;
+ 
+@@ -512,50 +517,26 @@
+                 c+=1024; /* empty stage 2 block */
+             }
+         }
+-    } else if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY) {
+-        /* ignore single-byte results */
++    } else {
+         const uint32_t *stage2;
+-        const uint16_t *stage3, *results;
++        const uint8_t *stage3, *bytes;
++        uint32_t st3Multiplier;
++        uint32_t value;
+ 
+-        results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
+-
+-        for(st1=0; st1<maxStage1; ++st1) {
+-            st2=table[st1];
+-            if(st2>(maxStage1>>1)) {
+-                stage2=(const uint32_t *)table+st2;
+-                for(st2=0; st2<64; ++st2) {
+-                    if((st3=stage2[st2])!=0) {
+-                        /* read the stage 3 block */
+-                        stage3=results+16*(uint32_t)(uint16_t)st3;
+-
+-                        /* get the roundtrip flags for the stage 3 block */
+-                        st3>>=16;
++        bytes=mbcsTable->fromUnicodeBytes;
+ 
+-                        /*
+-                         * Add code points for which the roundtrip flag is set.
+-                         * Once we get a set for fallback mappings, we have to check
+-                         * non-roundtrip stage 3 results for whether they are 0.
+-                         * See ucnv_MBCSFromUnicodeWithOffsets() for details.
+-                         *
+-                         * Ignore single-byte results (<0x100).
+-                         */
+-                        do {
+-                            if((st3&1)!=0 && *stage3>=0x100) {
+-                                sa->add(sa->set, c);
+-                            }
+-                            st3>>=1;
+-                            ++stage3;
+-                        } while((++c&0xf)!=0);
+-                    } else {
+-                        c+=16; /* empty stage 3 block */
+-                    }
+-                }
+-            } else {
+-                c+=1024; /* empty stage 2 block */
+-            }
++        switch(mbcsTable->outputType) {
++        case MBCS_OUTPUT_3:
++        case MBCS_OUTPUT_4_EUC:
++            st3Multiplier=3;
++            break;
++        case MBCS_OUTPUT_4:
++            st3Multiplier=4;
++            break;
++        default:
++            st3Multiplier=2;
++            break;
+         }
+-    } else {
+-        const uint32_t *stage2;
+ 
+         for(st1=0; st1<maxStage1; ++st1) {
+             st2=table[st1];
+@@ -563,6 +544,9 @@
+                 stage2=(const uint32_t *)table+st2;
+                 for(st2=0; st2<64; ++st2) {
+                     if((st3=stage2[st2])!=0) {
++                        /* read the stage 3 block */
++                        stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3;
++
+                         /* get the roundtrip flags for the stage 3 block */
+                         st3>>=16;
+ 
+@@ -572,12 +556,49 @@
+                          * non-roundtrip stage 3 results for whether they are 0.
+                          * See ucnv_MBCSFromUnicodeWithOffsets() for details.
+                          */
+-                        do {
+-                            if(st3&1) {
+-                                sa->add(sa->set, c);
+-                            }
+-                            st3>>=1;
+-                        } while((++c&0xf)!=0);
++                        switch(filter) {
++                        case UCNV_SET_FILTER_NONE:
++                            do {
++                                if(st3&1) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_DBCS_ONLY:
++                             /* Ignore single-byte results (<0x100). */
++                            do {
++                                if((st3&1)!=0 && *((const uint16_t *)stage3)>=0x100) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=2;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_2022_CN:
++                             /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
++                            do {
++                                if((st3&1)!=0 && ((value=*stage3)==0x81 || value==0x82)) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=3;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_SJIS:
++                             /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
++                            do {
++                                if((st3&1)!=0 && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=2;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        default:
++                            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
++                            return;
++                        }
+                     } else {
+                         c+=16; /* empty stage 3 block */
+                     }
+@@ -591,6 +612,19 @@
+     ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode);
+ }
+ 
++U_CFUNC void
++ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
++                                 const USetAdder *sa,
++                                 UConverterUnicodeSet which,
++                                 UErrorCode *pErrorCode) {
++    ucnv_MBCSGetFilteredUnicodeSetForUnicode(
++        sharedData, sa, which,
++        sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
++            UCNV_SET_FILTER_DBCS_ONLY :
++            UCNV_SET_FILTER_NONE,
++        pErrorCode);
++}
++
+ static void
+ ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
+                    const USetAdder *sa,
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.h icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.h
+--- icu/source/common/ucnvmbcs.h	2007-09-13 18:17:58.000000000 -0500
++++ icu/source/common/ucnvmbcs.h	2009-10-07 11:31:52.351957135 -0500
+@@ -456,6 +456,7 @@
+ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                           UErrorCode *pErrorCode);
+ 
++#if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
+ /*
+  * Internal function returning a UnicodeSet for toUnicode() conversion.
+  * Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
+@@ -470,6 +471,7 @@
+                            UConverterUnicodeSet which,
+                            uint8_t state, int32_t lowByte, int32_t highByte,
+                            UErrorCode *pErrorCode);
++#endif
+ 
+ /*
+  * Internal function returning a UnicodeSet for toUnicode() conversion.
+@@ -481,9 +483,30 @@
+  */
+ U_CFUNC void
+ ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
+-                             const USetAdder *sa,
+-                             UConverterUnicodeSet which,
+-                             UErrorCode *pErrorCode);
++                                 const USetAdder *sa,
++                                 UConverterUnicodeSet which,
++                                 UErrorCode *pErrorCode);
++
++typedef enum UConverterSetFilter {
++    UCNV_SET_FILTER_NONE,
++    UCNV_SET_FILTER_DBCS_ONLY,
++    UCNV_SET_FILTER_2022_CN,
++    UCNV_SET_FILTER_SJIS,
++    UCNV_SET_FILTER_COUNT
++} UConverterSetFilter;
++
++/*
++ * Same as ucnv_MBCSGetUnicodeSetForUnicode() but
++ * the set can be filtered by encoding scheme.
++ * Used by stateful converters which share regular conversion tables
++ * but only use a subset of their mappings.
++ */
++U_CFUNC void
++ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
++                                         const USetAdder *sa,
++                                         UConverterUnicodeSet which,
++                                         UConverterSetFilter filter,
++                                         UErrorCode *pErrorCode);
+ 
+ #endif
+ 
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/nucnvtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/nucnvtst.c
+--- icu/source/test/cintltst/nucnvtst.c	2007-09-13 18:17:34.000000000 -0500
++++ icu/source/test/cintltst/nucnvtst.c	2009-10-07 11:31:52.351957135 -0500
+@@ -3202,7 +3202,7 @@
+         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
+         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
+         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
+-        0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
++        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
+         0x201D, 0x3014, 0x000D, 0x000A,
+         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
+         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
+@@ -3730,7 +3730,7 @@
+         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
+         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
+         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
+-        0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
++        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
+         0x201D, 0x000D, 0x000A,
+         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
+         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/udatatst.c icu-3.8.new/build-tree/icu/source/test/cintltst/udatatst.c
+--- icu/source/test/cintltst/udatatst.c	2007-09-13 18:17:36.000000000 -0500
++++ icu/source/test/cintltst/udatatst.c	2009-10-07 11:31:52.351957135 -0500
+@@ -1281,7 +1281,7 @@
+      * MBCS conversion table file without extension,
+      * to test swapping and preflighting of UTF-8-friendly mbcsIndex[].
+      */
+-    {"jisx-208",                 "cnv", ucnv_swap},
++    {"jisx-212",                 "cnv", ucnv_swap},
+ #endif
+ 
+ #if !UCONFIG_NO_CONVERSION
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt
+--- icu/source/test/testdata/conversion.txt	2007-09-13 18:17:46.000000000 -0500
++++ icu/source/test/testdata/conversion.txt	2009-10-07 11:31:52.351957135 -0500
+@@ -48,6 +48,15 @@
+     toUnicode {
+       Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
+       Cases {
++        // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
++        // using the Shift-JIS table for JIS X 0208 (ticket #5797)
++        {
++          "ISO-2022-JP",
++          :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 },
++          "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e",
++          :intvector{ 3,4,5,9,11,13,15,17,19,21,23,25,27 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
+         // improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets()
+         {
+           "ISO-8859-3",
+@@ -495,6 +504,15 @@
+     fromUnicode {
+       Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
+       Cases {
++        // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
++        // using the Shift-JIS table for JIS X 0208 (ticket #5797)
++        {
++          "ISO-2022-JP",
++          "\u203e\xa5\u4e00\ufa10\u6f3e\u0391",
++          :bin{       1b284a7e5c1b2442306c222e5f2126211b2842 },
++          :intvector{ 0,0,0,0,1,2,2,2,2,2,3,3,4,4,5,5,5,5,5 },
++          :int{1}, :int{0}, "", "?=\u3013", ""  // U+3013 Geta Mark converts to 222e
++        }
+         // Verify that mappings that would result in byte values outside 20..7F (for SBCS)
+         // or 21..7E (for DBCS) are not used.
+         // ibm-9005_X110-2007.ucm (ISO 8859-7, <ESC>.F=1b2e46):
+@@ -1293,13 +1311,13 @@
+         // versions of ISO-2022-JP
+         {
+           "ISO-2022-JP",
+-          "[\x00-\x0d\x10-\x1a\x1c-\x7f\u0391-\u03a1\uff61-\uff9f\u4e00\u4e01\uffe5]",
+-          "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\uffe6-\U0010ffff]",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u203e\uff61-\uff9f\u4e00\u4e01\uffe5]",
++          "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\ufa0e-\ufa2d\uffe6-\U0010ffff]",
+           :int{0}
+         }   
+         {
+           "ISO-2022-JP-2",
+-          "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\uff61-\uff9f\u4e00-\u4e05\uffe6]",
++          "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\uff61-\uff9f\u4e00-\u4e05\uffe6]",
+           "[\x0e\x0f\x1b\uffe7-\U0010ffff]",
+           :int{0}
+         }
--- icu-3.8.orig/debian/patches/07-CVE-2009-0153.patch
+++ icu-3.8/debian/patches/07-CVE-2009-0153.patch
@@ -0,0 +1,592 @@
+#
+# Description: fix improper handling of invalid byte sequences during Unicode
+#  conversion. Requires the following:
+#  http://bugs.icu-project.org/trac/ticket/5797
+#  http://bugs.icu-project.org/trac/ticket/6001
+#  http://bugs.icu-project.org/trac/ticket/6002
+# Patch: https://bugzilla.redhat.com/show_bug.cgi?id=503071
+#
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv2022.c icu-3.8.new/build-tree/icu/source/common/ucnv2022.c
+--- icu/source/common/ucnv2022.c	2009-10-07 11:33:25.051981563 -0500
++++ icu/source/common/ucnv2022.c	2009-10-07 11:33:30.351949076 -0500
+@@ -1973,6 +1973,7 @@
+         mySourceChar = args->converter->toUBytes[0];
+         args->converter->toULength = 0;
+         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
++        targetUniChar = missingCharMarker;
+         goto getTrailByte;
+     }
+ 
+@@ -2102,18 +2103,45 @@
+                 default:
+                     /* G0 DBCS */
+                     if(mySource < mySourceLimit) {
++                        int leadIsOk, trailIsOk;
+                         char trailByte;
+ getTrailByte:
+-                        trailByte = *mySource++;
+-                        if(cs == JISX208) {
+-                            _2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf);
+-                        } else {
+-                            tempBuf[0] = (char)mySourceChar;
+-                            tempBuf[1] = trailByte;
+-                        }
+-                        mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+-                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
+-                    } else {
++                        trailByte = *mySource;
++                        /*
++                         * Ticket 5691: consistent illegal sequences:
++                         * - We include at least the first byte in the illegal sequence.
++                         * - If any of the non-initial bytes could be the start of a character,
++                         *   we stop the illegal sequence before the first one of those.
++                         *
++                         * In ISO-2022 DBCS, if both bytes are valid or both bytes are outside
++                         * the 21..7e range, then we treat them as a pair.
++                         * Otherwise (valid lead byte + illegal trail byte, or vice versa)
++                         * we report only the first byte as the illegal sequence.
++                         */
++                        leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
++                        trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
++                        if (leadIsOk == trailIsOk) {
++                            ++mySource;
++                            uint32_t tmpSourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
++                            if (leadIsOk) {
++                                if(cs == JISX208) {
++                                    _2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf);
++                                    mySourceChar = tmpSourceChar;
++                                } else {
++                                    /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
++                                    mySourceChar = tmpSourceChar;
++                                    if (cs == KSC5601) {
++                                        tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
++                                    }
++                                    tempBuf[0] = (char)(tmpSourceChar >> 8);
++                                    tempBuf[1] = (char)(tmpSourceChar);
++                                }
++                                targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
++                            } else {
++                                mySourceChar = tmpSourceChar;
++                            }
++                          }
++                      } else {
+                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+                         args->converter->toULength = 1;
+                         goto endloop;
+@@ -2254,7 +2282,12 @@
+             }
+             /* only DBCS or SBCS characters are expected*/
+             /* DB characters with high bit set to 1 are expected */
+-            if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){
++            if( length > 2 || length==0 ||
++                (length == 1 && targetByteUnit > 0x7f) ||
++                (length == 2 &&
++                    ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
++                    (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
++            ) {
+                 targetByteUnit=missingCharMarker;
+             }
+             if (targetByteUnit != missingCharMarker){
+@@ -2583,17 +2616,36 @@
+             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
+             if(myData->toU2022State.g == 1) {
+                 if(mySource < mySourceLimit) {
++                    int leadIsOk, trailIsOk;
+                     char trailByte;
+ getTrailByte:
+-                    trailByte = *mySource++;
+-                    tempBuf[0] = (char)(mySourceChar + 0x80);
+-                    tempBuf[1] = (char)(trailByte + 0x80);
+-                    mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+-                    if((mySourceChar & 0x8080) == 0) {
+-                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
++                    targetUniChar = missingCharMarker;
++                    trailByte = *mySource;
++                    /*
++                     * Ticket 5691: consistent illegal sequences:
++                     * - We include at least the first byte in the illegal sequence.
++                     * - If any of the non-initial bytes could be the start of a character,
++                     *   we stop the illegal sequence before the first one of those.
++                     *
++                     * In ISO-2022 DBCS, if both bytes are valid or both bytes are outside
++                     * the 21..7e range, then we treat them as a pair.
++                     * Otherwise (valid lead byte + illegal trail byte, or vice versa)
++                     * we report only the first byte as the illegal sequence.
++                     */
++                    leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
++                    trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
++                    if (leadIsOk == trailIsOk) {
++                        ++mySource;
++                        if (leadIsOk) {
++                            tempBuf[0] = (char)(mySourceChar + 0x80);
++                            tempBuf[1] = (char)(trailByte + 0x80);
++                            targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
++                        } else {
++                            leadIsOk = TRUE; /* TODO: remove */
++                        }
++                        mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+                     } else {
+-                        /* illegal bytes > 0x7f */
+-                        targetUniChar = missingCharMarker;
++                        trailIsOk = TRUE; /* TODO: remove */
+                     }
+                 } else {
+                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+@@ -2601,8 +2653,10 @@
+                     break;
+                 }
+             }
+-            else{
++            else if(mySourceChar <= 0x7f) {
+                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
++            } else {
++                targetUniChar = 0xffff;
+             }
+             if(targetUniChar < 0xfffe){
+                 if(args->offsets) {
+@@ -3099,6 +3153,7 @@
+         /* continue with a partial double-byte character */
+         mySourceChar = args->converter->toUBytes[0];
+         args->converter->toULength = 0;
++        targetUniChar = missingCharMarker;
+         goto getTrailByte;
+     }
+ 
+@@ -3178,29 +3233,48 @@
+                         UConverterSharedData *cnv;
+                         StateEnum tempState;
+                         int32_t tempBufLen;
++                        int leadIsOk, trailIsOk;
+                         char trailByte;
+ getTrailByte:
+-                        trailByte = *mySource++;
+-                        tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
+-                        if(tempState > CNS_11643_0) {
+-                            cnv = myData->myConverterArray[CNS_11643];
+-                            tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
+-                            tempBuf[1] = (char) (mySourceChar);
+-                            tempBuf[2] = trailByte;
+-                            tempBufLen = 3;
+-
+-                        }else{
+-                            cnv = myData->myConverterArray[tempState];
+-                            tempBuf[0] = (char) (mySourceChar);
+-                            tempBuf[1] = trailByte;
+-                            tempBufLen = 2;
++                        trailByte = *mySource;
++                        /*
++                         * Ticket 5691: consistent illegal sequences:
++                         * - We include at least the first byte in the illegal sequence.
++                         * - If any of the non-initial bytes could be the start of a character,
++                         *   we stop the illegal sequence before the first one of those.
++                         *
++                         * In ISO-2022 DBCS, if both bytes are valid or both bytes are outside
++                         * the 21..7e range, then we treat them as a pair.
++                         * Otherwise (valid lead byte + illegal trail byte, or vice versa)
++                         * we report only the first byte as the illegal sequence.
++                         */
++                        leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
++                        trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
++                        if (leadIsOk == trailIsOk) {
++                            ++mySource;
++                            if (leadIsOk) {
++                                tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
++                                if(tempState >= CNS_11643_0) {
++                                    cnv = myData->myConverterArray[CNS_11643];
++                                    tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
++                                    tempBuf[1] = (char) (mySourceChar);
++                                    tempBuf[2] = trailByte;
++                                    tempBufLen = 3;
++
++                                }else{
++                                    cnv = myData->myConverterArray[tempState];
++                                    tempBuf[0] = (char) (mySourceChar);
++                                    tempBuf[1] = trailByte;
++                                    tempBufLen = 2;
++                                }
++                                targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
++                            }
++                            mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+                         }
+-                        mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
+                         if(pToU2022State->g>=2) {
+                             /* return from a single-shift state to the previous one */
+                             pToU2022State->g=pToU2022State->prevG;
+                         }
+-                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
+                     } else {
+                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
+                         args->converter->toULength = 1;
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvhz.c icu-3.8.new/build-tree/icu/source/common/ucnvhz.c
+--- icu/source/common/ucnvhz.c	2009-10-07 11:33:25.061953264 -0500
++++ icu/source/common/ucnvhz.c	2009-10-07 11:33:30.363220525 -0500
+@@ -215,19 +215,35 @@
+                 }
+                 else{
+                     /* trail byte */
++                    int leadIsOk, trailIsOk;
+                     uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
+-                    if( (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21) &&
+-                        (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21)
+-                    ) {
+-                        tempBuf[0] = (char) (leadByte+0x80) ;
+-                        tempBuf[1] = (char) (mySourceChar+0x80);
+-                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
+-                            tempBuf, 2, args->converter->useFallback);
++                    targetUniChar = 0xffff;
++                    /*
++                     * Ticket 5691: consistent illegal sequences:
++                     * - We include at least the first byte in the illegal sequence.
++                     * - If any of the non-initial bytes could be the start of a character,
++                     *   we stop the illegal sequence before the first one of those.
++                     *
++                     * In HZ DBCS, if both bytes are valid or both bytes are outside
++                     * the 21..7d/7e range, then we treat them as a pair.
++                     * Otherwise (valid lead byte + illegal trail byte, or vice versa)
++                     * we report only the first byte as the illegal sequence.
++                     */
++                    leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
++                    trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
++                    if (leadIsOk == trailIsOk) {
++                        if (leadIsOk) {
++                            tempBuf[0] = (char) (leadByte+0x80) ;
++                            tempBuf[1] = (char) (mySourceChar+0x80);
++                            targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
++                                tempBuf, 2, args->converter->useFallback);
++                        }
++                        /* add another bit so that the code below writes 2 bytes in case of error */
++                        mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
+                     } else {
+-                        targetUniChar = 0xffff;
++                        --mySource;
++                        mySourceChar = (int32_t)leadByte;
+                     }
+-                    /* add another bit so that the code below writes 2 bytes in case of error */
+-                    mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
+                     args->converter->toUnicodeStatus =0x00;
+                 }
+             }
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.c icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.c
+--- icu/source/common/ucnvmbcs.c	2009-10-07 11:33:24.991952135 -0500
++++ icu/source/common/ucnvmbcs.c	2009-10-07 11:33:30.363220525 -0500
+@@ -1,7 +1,7 @@
+ /*
+ ******************************************************************************
+ *
+-*   Copyright (C) 2000-2007, International Business Machines
++*   Copyright (C) 2000-2008, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ ******************************************************************************
+@@ -1791,6 +1791,65 @@
+     pArgs->offsets=offsets;
+ }
+ 
++static UBool
++hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
++    const int32_t *row=stateTable[state];
++    int32_t b, entry;
++    /* First test for final entries in this state for some commonly valid byte values. */
++    entry=row[0xa1];
++    if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
++        MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
++    ) {
++        return TRUE;
++    }
++    entry=row[0x41];
++    if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
++        MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
++    ) {
++        return TRUE;
++    }
++    /* Then test for final entries in this state. */
++    for(b=0; b<=0xff; ++b) {
++        entry=row[b];
++        if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
++            MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
++        ) {
++            return TRUE;
++        }
++    }
++    /* Then recurse for transition entries. */
++    for(b=0; b<=0xff; ++b) {
++        entry=row[b];
++        if( MBCS_ENTRY_IS_TRANSITION(entry) &&
++            hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
++        ) {
++            return TRUE;
++        }
++    }
++    return FALSE;
++}
++
++/*
++ * Is byte b a single/lead byte in this state?
++ * Recurse for transition states, because here we don't want to say that
++ * b is a lead byte if all byte sequences that start with b are illegal.
++ */
++static UBool
++isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
++    const int32_t *row=stateTable[state];
++    int32_t entry=row[b];
++    if(MBCS_ENTRY_IS_TRANSITION(entry)) {   /* lead byte */
++        return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
++    } else {
++        uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
++        if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
++            return FALSE;   /* SI/SO are illegal for DBCS-only conversion */
++        } else {
++            return action!=MBCS_STATE_ILLEGAL;
++        }
++    }
++}
++
+ U_CFUNC void
+ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                           UErrorCode *pErrorCode) {
+@@ -2146,6 +2205,34 @@
+             sourceIndex=nextSourceIndex;
+         } else if(U_FAILURE(*pErrorCode)) {
+             /* callback(illegal) */
++            if(byteIndex>1) {
++                /*
++                 * Ticket 5691: consistent illegal sequences:
++                 * - We include at least the first byte in the illegal sequence.
++                 * - If any of the non-initial bytes could be the start of a character,
++                 *   we stop the illegal sequence before the first one of those.
++                 */
++                UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
++                int8_t i;
++                for(i=1;
++                    i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
++                    ++i) {}
++                if(i<byteIndex) {
++                    /* Back out some bytes. */
++                    int8_t backOutDistance=byteIndex-i;
++                    int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
++                    byteIndex=i;  /* length of reported illegal byte sequence */
++                    if(backOutDistance<=bytesFromThisBuffer) {
++                        source-=backOutDistance;
++                    } else {
++                        /* Back out bytes from the previous buffer: Need to replay them. */
++                        cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
++                        /* preToULength is negative! */
++                        uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
++                        source=(const uint8_t *)pArgs->source;
++                    }
++                }
++            }
+             break;
+         } else /* unassigned sequences indicated with byteIndex>0 */ {
+             /* try an extension mapping */
+@@ -2156,6 +2243,7 @@
+                               &offsets, sourceIndex,
+                               pArgs->flush,
+                               pErrorCode);
++            /* TODO: nextSourceIndex+=diff instead of nextSourceIndex+diff ?? */
+             sourceIndex=nextSourceIndex+(int32_t)(source-(const uint8_t *)pArgs->source);
+ 
+             if(U_FAILURE(*pErrorCode)) {
+@@ -2447,15 +2535,37 @@
+ 
+     if(c<0) {
+         if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
+-            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+-        }
+-        if(U_FAILURE(*pErrorCode)) {
+             /* incomplete character byte sequence */
+             uint8_t *bytes=cnv->toUBytes;
+             cnv->toULength=(int8_t)(source-lastSource);
+             do {
+                 *bytes++=*lastSource++;
+             } while(lastSource<source);
++            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
++        } else if(U_FAILURE(*pErrorCode)) {
++            /* callback(illegal) */
++            /*
++             * Ticket 5691: consistent illegal sequences:
++             * - We include at least the first byte in the illegal sequence.
++             * - If any of the non-initial bytes could be the start of a character,
++             *   we stop the illegal sequence before the first one of those.
++             */
++            UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
++            uint8_t *bytes=cnv->toUBytes;
++            *bytes++=*lastSource++;     /* first byte */
++            if(lastSource==source) {
++                cnv->toULength=1;
++            } else /* lastSource<source: multi-byte character */ {
++                int8_t i;
++                for(i=1;
++                    lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
++                    ++i
++                ) {
++                    *bytes++=*lastSource++;
++                }
++                cnv->toULength=i;
++                source=lastSource;
++            }
+         } else {
+             /* no output because of empty input or only state changes */
+             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/nccbtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/nccbtst.c
+--- icu/source/test/cintltst/nccbtst.c	2007-09-13 18:17:34.000000000 -0500
++++ icu/source/test/cintltst/nccbtst.c	2009-10-07 11:33:30.363220525 -0500
+@@ -2497,13 +2497,13 @@
+ 
+ 
+     static const uint8_t text943[] = {
+-        0x82, 0xa9, 0x82, 0x20, /*0xc8,*/  0x61, 0x8a, 0xbf, 0x8e, 0x9a };
+-    static const UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22,  0x5b57};
+-    static const UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22,  0x5b57};
++        0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
++    static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
++    static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
+     static const UChar toUnicode943stop[]= { 0x304b};
+ 
+-    static const int32_t  fromIBM943Offssub[]  = {0, 2, 4, 5, 7};
+-    static const int32_t  fromIBM943Offsskip[] = { 0, 4, 5, 7};
++    static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
++    static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
+     static const int32_t  fromIBM943Offsstop[] = { 0};
+ 
+     gInBufferSize = inputsize;
+@@ -2537,9 +2537,9 @@
+ {
+     static const uint8_t sampleText[] = {
+         0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
+-        0xff, /*0x82, 0xa9,*/ 0x32, 0x33};
+-    static const UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063,  0xfffd,/*0x304b,*/ 0x0032, 0x0033};
+-    static const int32_t  fromIBM943Offssub[]  = {0, 2, 3, 4, 5, 7, 8};
++        0xff, 0x32, 0x33};
++    static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
++    static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
+     /*checking illegal value for ibm-943 with substitute*/ 
+     gInBufferSize = inputsize;
+     gOutBufferSize = outputsize;
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/nucnvtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/nucnvtst.c
+--- icu/source/test/cintltst/nucnvtst.c	2009-10-07 11:33:25.071957485 -0500
++++ icu/source/test/cintltst/nucnvtst.c	2009-10-07 11:33:30.373199482 -0500
+@@ -2608,7 +2608,7 @@
+     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
+     /*Test for the condition where there is an invalid character*/
+     {
+-        static const uint8_t source2[]={0xa1, 0x01};
++        static const uint8_t source2[]={0xa1, 0x80};
+         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
+     }
+     /*Test for the condition where we have a truncated char*/
+@@ -3901,11 +3901,11 @@
+ TestISO_2022_KR() {
+     /* test input */
+     static const uint16_t in[]={
+-                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
+-                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
++                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
++                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
+                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
+                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
+-                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
++                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
+                    ,0x53E3,0x53E4,0x000A,0x000D};
+     const UChar* uSource;
+     const UChar* uSourceLimit;
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt
+--- icu/source/test/testdata/conversion.txt	2009-10-07 11:33:25.071957485 -0500
++++ icu/source/test/testdata/conversion.txt	2009-10-07 11:33:30.373199482 -0500
+@@ -48,12 +48,83 @@
+     toUnicode {
+       Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
+       Cases {
++        // Test ticket 5691: consistent illegal sequences
++        // Unfortunately, we cannot use the Shift-JIS examples from the ticket
++        // comments because our Shift-JIS table is Windows-compatible and
++        // therefore has no illegal single bytes. Same for GBK.
++        // Instead, we use the stricter GB 18030 also for 2-byte examples.
++        // The byte sequences are generally slightly different from the ticket
++        // comment, simply using assigned characters rather than just
++        // theoretically valid sequences.
++        {
++          "gb18030",
++          :bin{ 618140813c81ff7a },
++          "a\u4e02\\x81<\\x81\\xFFz",
++          :intvector{ 0,1,3,3,3,3,4,5,5,5,5,5,5,5,5,7 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "EUC-JP",
++          :bin{ 618fb0a98fb03c8f3cb0a97a },
++          "a\u4e28\\x8F\\xB0<\\x8F<\u9022z",
++          :intvector{ 0,1,4,4,4,4,5,5,5,5,6,7,7,7,7,8,9,11 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "gb18030",
++          :bin{ 618130fc318130fc8181303c3e813cfc817a },
++          "a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z",
++          :intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "UTF-8",
++          :bin{ 61f1808182f180813cf18081fff180ff3cf1ff3c3e7a },
++          "a\U00040042\\xF1\\x80\\x81<\\xF1\\x80\\x81\\xFF\\xF1\\x80\\xFF<\\xF1\\xFF<>z",
++          :intvector{ 0,1,1,5,5,5,5,5,5,5,5,5,5,5,5,8,9,9,9,9,9,9,9,9,9,9,9,9,12,12,12,12,13,13,13,13,13,13,13,13,15,15,15,15,16,17,17,17,17,18,18,18,18,19,20,21 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-JP-2",
++          :bin{ 1b24424141af4142affe41431b2842 },
++          "\u758f\\xAF\u758e\\xAF\\xFE\u790e",
++          :intvector{ 3,5,5,5,5,6,8,8,8,8,8,8,8,8,10 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ibm-25546",
++          :bin{ 411b242943420e4141af4142affe41430f5a },
++          "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ",
++          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-KR",
++          :bin{ 411b242943420e4141af4142affe41430f5a },
++          "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ",
++          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "ISO-2022-CN",
++          :bin{ 411b242941420e4141af4142affe41430f5a },
++          "AB\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z",
++          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
++        {
++          "HZ",
++          :bin{ 417e7b4141af4142affe41437e7d5a },
++          "A\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z",
++          :intvector{ 0,3,5,5,5,5,6,8,8,8,8,8,8,8,8,10,14 },
++          :int{1}, :int{0}, "", "&C", :bin{""}
++        }
+         // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e
+         {
+           "HZ",
+           :bin{ 7e7b21212120217e217f772100007e217e7d207e7e807e0a2b },
+-          "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd ~\ufffd+",
+-          :intvector{ 2,4,6,8,10,12,14,18,19,21,24 },
++          "\u3000\ufffd\ufffd\u3013\ufffd\ufffd\u9ccc\ufffd\ufffd ~\ufffd+",
++          :intvector{ 2,4,5,6,8,9,10,12,14,18,19,21,24 },
+           :int{1}, :int{1}, "", "?", :bin{""}
+         }
+         // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
+@@ -61,8 +132,8 @@
+         {
+           "ISO-2022-JP",
+           :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 },
+-          "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e",
+-          :intvector{ 3,4,5,9,11,13,15,17,19,21,23,25,27 },
++          "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\ufffd\ufffd\u25b2\ufffd\ufffd\u6f3e",
++          :intvector{ 3,4,5,9,11,12,13,14,16,17,19,20,21,22,23,25,26,27 },
+           :int{1}, :int{1}, "", "?", :bin{""}
+         }
+         // improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets()
--- icu-3.8.orig/debian/patches/06-CVE-2008-1036.patch
+++ icu-3.8/debian/patches/06-CVE-2008-1036.patch
@@ -0,0 +1,557 @@
+#
+# Description: fix cross-site scripting attack via invalid character sequences
+# Ubuntu: https://bugs.launchpad.net/ubuntu/+source/icu/+bug/341834
+# Patch: http://bugs.icu-project.org/trac/search?q=%22ticket:6175:%22&noquickjump=1&changeset=on
+# Patch: https://bugzilla.redhat.com/attachment.cgi?id=321139 (thanks Red Hat)
+#
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv2022.c icu-3.8.new/build-tree/icu/source/common/ucnv2022.c
+--- icu/source/common/ucnv2022.c	2009-10-07 11:32:59.372006488 -0500
++++ icu/source/common/ucnv2022.c	2009-10-07 11:33:04.801949089 -0500
+@@ -201,6 +201,7 @@
+ #ifdef U_ENABLE_GENERIC_ISO_2022
+     UBool isFirstBuffer;
+ #endif
++    UBool isEmptySegment;
+     char name[30];
+     char locale[3];
+ }UConverterDataISO2022;
+@@ -609,6 +610,7 @@
+     if(choice<=UCNV_RESET_TO_UNICODE) {
+         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
+         myConverterData->key = 0;
++        myConverterData->isEmptySegment = FALSE;
+     }
+     if(choice!=UCNV_RESET_TO_UNICODE) {
+         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
+@@ -814,6 +816,7 @@
+             if(chosenConverterName == NULL) {
+                 /* SS2 or SS3 */
+                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
++                _this->toUCallbackReason = UCNV_UNASSIGNED;
+                 return;
+             }
+ 
+@@ -935,6 +938,8 @@
+     }
+     if(U_SUCCESS(*err)) {
+         _this->toULength = 0;
++    } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
++        _this->toUCallbackReason = UCNV_UNASSIGNED;
+     }
+ }
+ 
+@@ -1986,6 +1991,7 @@
+                     continue;
+                 } else {
+                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
++                    myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
+                     break;
+                 }
+ 
+@@ -1997,21 +2003,39 @@
+                     continue;
+                 } else {
+                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
++                    myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
+                     break;
+                 }
+ 
+             case ESC_2022:
+                 mySource--;
+ escape:
+-                changeState_2022(args->converter,&(mySource),
+-                    mySourceLimit, ISO_2022_JP,err);
++                {
++                    const char * mySourceBefore = mySource;
++                    int8_t toULengthBefore = args->converter->toULength;
++
++                    changeState_2022(args->converter,&(mySource),
++                        mySourceLimit, ISO_2022_JP,err);
++
++                    /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
++                    if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
++                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                        args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                        args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
++                    }
++                }
+ 
+                 /* invalid or illegal escape sequence */
+                 if(U_FAILURE(*err)){
+                     args->target = myTarget;
+                     args->source = mySource;
++                    myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
+                     return;
+                 }
++                /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
++                if(myData->key==0) {
++                    myData->isEmptySegment = TRUE;
++                }
+                 continue;
+ 
+             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
+@@ -2028,6 +2052,7 @@
+                 /* falls through */
+             default:
+                 /* convert one or two bytes */
++                myData->isEmptySegment = FALSE;
+                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
+                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
+                     !IS_JP_DBCS(cs)
+@@ -2524,15 +2549,27 @@
+ 
+             if(mySourceChar==UCNV_SI){
+                 myData->toU2022State.g = 0;
++                if (myData->isEmptySegment) {
++                    myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
++                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                    args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                    args->converter->toUBytes[0] = mySourceChar;
++                    args->converter->toULength = 1;
++                    args->target = myTarget;
++                    args->source = mySource;
++                    return;
++                }
+                 /*consume the source */
+                 continue;
+             }else if(mySourceChar==UCNV_SO){
+                 myData->toU2022State.g = 1;
++                myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
+                 /*consume the source */
+                 continue;
+             }else if(mySourceChar==ESC_2022){
+                 mySource--;
+ escape:
++                myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
+                 changeState_2022(args->converter,&(mySource),
+                                 mySourceLimit, ISO_2022_KR, err);
+                 if(U_FAILURE(*err)){
+@@ -2543,6 +2580,7 @@
+                 continue;
+             }
+ 
++            myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
+             if(myData->toU2022State.g == 1) {
+                 if(mySource < mySourceLimit) {
+                     char trailByte;
+@@ -3075,27 +3113,52 @@
+             switch(mySourceChar){
+             case UCNV_SI:
+                 pToU2022State->g=0;
++                if (myData->isEmptySegment) {
++                    myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
++                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                    args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                    args->converter->toUBytes[0] = mySourceChar;
++                    args->converter->toULength = 1;
++                    args->target = myTarget;
++                    args->source = mySource;
++                    return;
++                }
+                 continue;
+ 
+             case UCNV_SO:
+                 if(pToU2022State->cs[1] != 0) {
+                     pToU2022State->g=1;
++                    myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
+                     continue;
+                 } else {
+                     /* illegal to have SO before a matching designator */
++                    myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
+                     break;
+                 }
+ 
+             case ESC_2022:
+                 mySource--;
+ escape:
+-                changeState_2022(args->converter,&(mySource),
+-                    mySourceLimit, ISO_2022_CN,err);
++                {
++                    const char * mySourceBefore = mySource;
++                    int8_t toULengthBefore = args->converter->toULength;
++
++                    changeState_2022(args->converter,&(mySource),
++                        mySourceLimit, ISO_2022_CN,err);
++
++                    /* After SO there must be at least one character before a designator (designator error handled separately) */
++                    if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
++                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                        args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                        args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
++                    }
++                }
+ 
+                 /* invalid or illegal escape sequence */
+                 if(U_FAILURE(*err)){
+                     args->target = myTarget;
+                     args->source = mySource;
++                    myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
+                     return;
+                 }
+                 continue;
+@@ -3109,6 +3172,7 @@
+                 /* falls through */
+             default:
+                 /* convert one or two bytes */
++                myData->isEmptySegment = FALSE;
+                 if(pToU2022State->g != 0) {
+                     if(mySource < mySourceLimit) {
+                         UConverterSharedData *cnv;
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_bld.c icu-3.8.new/build-tree/icu/source/common/ucnv_bld.c
+--- icu/source/common/ucnv_bld.c	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/ucnv_bld.c	2009-10-07 11:33:04.821949771 -0500
+@@ -932,6 +932,7 @@
+     myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen;
+     myUConverter->subChars = (uint8_t *)myUConverter->subUChars;
+     uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen);
++    myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */
+ 
+     if(mySharedConverterData->impl->open != NULL) {
+         mySharedConverterData->impl->open(myUConverter, realName, locale, options, err);
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_bld.h icu-3.8.new/build-tree/icu/source/common/ucnv_bld.h
+--- icu/source/common/ucnv_bld.h	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/ucnv_bld.h	2009-10-07 11:33:04.821949771 -0500
+@@ -1,6 +1,6 @@
+ /*
+ **********************************************************************
+-*   Copyright (C) 1999-2006, International Business Machines
++*   Copyright (C) 1999-2006,2008 International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ *
+@@ -226,6 +226,9 @@
+     char preToU[UCNV_EXT_MAX_BYTES];
+     int8_t preFromULength, preToULength;    /* negative: replay */
+     int8_t preToUFirstLength;               /* length of first character */
++
++    /* new fields for ICU 4.0 */
++    UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */
+ };
+ 
+ U_CDECL_END /* end of UConverter */
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv.c icu-3.8.new/build-tree/icu/source/common/ucnv.c
+--- icu/source/common/ucnv.c	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/ucnv.c	2009-10-07 11:33:04.821949771 -0500
+@@ -1528,11 +1528,14 @@
+             cnv->toULength=0;
+ 
+             /* call the callback function */
++            if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
++                cnv->toUCallbackReason = UCNV_UNASSIGNED;
++            }
+             cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
+                 cnv->invalidCharBuffer, errorInputLength,
+-                (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ?
+-                    UCNV_UNASSIGNED : UCNV_ILLEGAL,
++                cnv->toUCallbackReason,
+                 err);
++            cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
+ 
+             /*
+              * loop back to the offset handling
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvhz.c icu-3.8.new/build-tree/icu/source/common/ucnvhz.c
+--- icu/source/common/ucnvhz.c	2009-10-07 11:32:59.422000252 -0500
++++ icu/source/common/ucnvhz.c	2009-10-07 11:33:04.821949771 -0500
+@@ -59,6 +59,7 @@
+     UBool isEscapeAppended;
+     UBool isStateDBCS;
+     UBool isTargetUCharDBCS;
++    UBool isEmptySegment;
+ }UConverterDataHZ;
+ 
+ 
+@@ -98,6 +99,7 @@
+         cnv->mode=0;
+         if(cnv->extraInfo != NULL){
+             ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
++            ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
+         }
+     }
+     if(choice!=UCNV_RESET_TO_UNICODE) {
+@@ -130,6 +132,10 @@
+ *   from-GB code '~}' ($7E7D) is outside the defined GB range.)
+ *
+ *   Source: RFC 1842
++*
++*   Note that the formal syntax in RFC 1842 is invalid. I assume that the
++*   intended definition of single-byte-segment is as follows (pedberg):
++*   single-byte-segment = single-byte-seq 1*single-byte-char
+ */
+ 
+ 
+@@ -168,12 +174,23 @@
+                         args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
+                     }
+                     *(myTarget++)=(UChar)mySourceChar;
++                    myData->isEmptySegment = FALSE;
+                     continue;
+                 case UCNV_OPEN_BRACE:
+-                    myData->isStateDBCS = TRUE;
+-                    continue;
+                 case UCNV_CLOSE_BRACE:
+-                    myData->isStateDBCS = FALSE;
++                    myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
++                    if (myData->isEmptySegment) {
++                        myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
++                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
++                        args->converter->toUCallbackReason = UCNV_IRREGULAR;
++                        args->converter->toUBytes[0] = UCNV_TILDE;
++                        args->converter->toUBytes[1] = mySourceChar;
++                        args->converter->toULength = 2;
++                        args->target = myTarget;
++                        args->source = mySource;
++                        return;
++                    }
++                    myData->isEmptySegment = TRUE;
+                     continue;
+                 default:
+                      /* if the first byte is equal to TILDE and the trail byte
+@@ -181,6 +198,7 @@
+                      */
+                     mySourceChar = 0x7e00 | mySourceChar;
+                     targetUniChar = 0xffff;
++                    myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
+                     break;
+                 }
+             } else if(myData->isStateDBCS) {
+@@ -191,6 +209,7 @@
+                     } else {
+                         /* add another bit to distinguish a 0 byte from not having seen a lead byte */
+                         args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
++                        myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
+                     }
+                     continue;
+                 }
+@@ -218,8 +237,10 @@
+                     continue;
+                 } else if(mySourceChar <= 0x7f) {
+                     targetUniChar = (UChar)mySourceChar;  /* ASCII */
++                    myData->isEmptySegment = FALSE; /* the segment has something valid */
+                 } else {
+                     targetUniChar = 0xffff;
++                    myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
+                 }
+             }
+             if(targetUniChar < 0xfffe){
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/cintltst/nucnvtst.c icu-3.8.new/build-tree/icu/source/test/cintltst/nucnvtst.c
+--- icu/source/test/cintltst/nucnvtst.c	2009-10-07 11:32:59.321951109 -0500
++++ icu/source/test/cintltst/nucnvtst.c	2009-10-07 11:33:04.821949771 -0500
+@@ -81,6 +81,7 @@
+ static void TestJitterbug2411(void);
+ static void TestJB5275(void);
+ static void TestJB5275_1(void);
++static void TestJitterbug6175(void);
+ #endif
+ 
+ static void TestRoundTrippingAllUTF(void);
+@@ -297,6 +298,7 @@
+ #if !UCONFIG_NO_LEGACY_CONVERSION
+    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
+    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
++   addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
+ #endif
+ 
+ }
+@@ -4456,6 +4458,70 @@
+     free(offsets);
+ }
+ 
++/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
++typedef struct {
++    const char *    converterName;
++    const char *    inputText;
++    int             inputTextLength;
++} EmptySegmentTest;
++
++/* Callback for TestJitterbug6175, should only get called for empty segment errors */
++static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
++                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
++    if (reason > UCNV_IRREGULAR) {
++        return;
++    }
++    if (reason != UCNV_IRREGULAR) {
++        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
++    }
++    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
++    *err = U_ZERO_ERROR;
++    ucnv_cbToUWriteSub(toArgs,0,err);
++}
++
++enum { kEmptySegmentToUCharsMax = 64 };
++static void TestJitterbug6175(void) {
++    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
++    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
++    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
++    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
++    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
++    static const EmptySegmentTest emptySegmentTests[] = {
++        /* converterName inputText    inputTextLength */
++        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
++        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
++        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
++        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
++        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
++        /* terminator: */
++        { NULL,          NULL,        0,                  }
++    };
++    const EmptySegmentTest * testPtr;
++    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
++        UErrorCode   err = U_ZERO_ERROR;
++        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
++        if (U_FAILURE(err)) {
++            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
++            return;
++        }
++        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
++        if (U_FAILURE(err)) {
++            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
++            ucnv_close(cnv);
++            return;
++        }
++        {
++            UChar         toUChars[kEmptySegmentToUCharsMax];
++            UChar *       toUCharsPtr = toUChars;
++            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
++            const char *  inCharsPtr = testPtr->inputText;
++            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
++            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
++        }
++        ucnv_close(cnv);
++    }
++}
++
+ static void
+ TestEBCDIC_STATEFUL() {
+     /* test input */
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt
+--- icu/source/test/testdata/conversion.txt	2009-10-07 11:32:59.431954490 -0500
++++ icu/source/test/testdata/conversion.txt	2009-10-07 11:33:04.821949771 -0500
+@@ -1,6 +1,6 @@
+ //*******************************************************************************
+ //
+-//   Copyright (C) 2003-2007, International Business Machines
++//   Copyright (C) 2003-2008, International Business Machines
+ //   Corporation and others.  All Rights Reserved.
+ //
+ //   file name:  conversion.txt
+@@ -199,6 +199,21 @@
+           :intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 },
+           :int{1}, :int{1}, "", "&", :bin{""}
+         }
++        // empty segment (using substitution and stop)
++        {
++          "ISO-2022-KR",
++          :bin{ 1b242943610e0f620d0a },
++          "a\uFFFDb\u000D\u000A",
++          :intvector{ 4, 6, 7, 8, 9 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "ISO-2022-KR",
++          :bin{ 1b242943610e0f620d0a },
++          "a",
++          :intvector{ 4 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"0f"}
++        }
+ 
+         // ISO-2022-JP
+ 
+@@ -249,6 +264,21 @@
+           :bin{ 41c15c1b284a5cc242 }, "A\uff81\\\xa5\uff82B", :intvector{ 0, 1, 2, 6, 7, 8 },
+           :int{1}, :int{1}, "", ".", :bin{""}
+         }
++        // empty segment (using substitution and stop)
++        {
++          "ISO-2022-JP",
++          :bin{ 61621b24421b284263640d0a },
++          "ab\uFFFDcd\u000D\u000A",
++          :intvector{ 0, 1, 5, 8, 9, 10, 11 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "ISO-2022-JP",
++          :bin{ 61621b24421b284263640d0a },
++          "ab",
++          :intvector{ 0, 1 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"1b2842"}
++        }
+ 
+         // ISO-2022-CN
+ 
+@@ -319,6 +349,36 @@
+           :bin{ 411b242b491b4f2121 }, "\x41", :intvector{ 0 },
+           :int{1}, :int{1}, "unsuppesc", ".", :bin{ 1b242b49 }
+         }
++        // empty segment 1 (using substitution and stop)
++        {
++          "ISO-2022-CN",
++          :bin{ 611b242941620e0f1b242a481b4e6a65630d0a },
++          "ab\uFFFD\u994Cc\u000D\u000A",
++          :intvector{ 0, 5, 7, 14, 16, 17, 18 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "ISO-2022-CN",
++          :bin{ 611b242941620e0f1b242a481b4e6a65630d0a },
++          "ab",
++          :intvector{ 0, 5 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"0f"}
++        }
++        // empty segment 2 (using substitution and stop)
++        {
++          "ISO-2022-CN",
++          :bin{ 611b242941620e1b24294768640f630d0a },
++          "ab\uFFFD\u5F70c\u000D\u000A",
++          :intvector{ 0, 5, 7, 11, 14, 15, 16 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "ISO-2022-CN",
++          :bin{ 611b242941620e1b24294768640f630d0a },
++          "ab",
++          :intvector{ 0, 5 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"1b242947"}
++        }
+ 
+         // ISO-2022 SBCS
+         // [U_ENABLE_GENERIC_ISO_2022]
+@@ -333,6 +393,39 @@
+         //  :int{1}, :int{1}, "", ".", :bin{""}
+         //}
+ 
++        // HZ-GB-2312
++
++        // empty segment 1 (using substitution and stop)
++        {
++          "HZ-GB-2312",
++          :bin{ 61627e7b7e7d6364 },
++          "ab\uFFFDcd",
++          :intvector{ 0, 1, 4, 6, 7 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "HZ-GB-2312",
++          :bin{ 61627e7b7e7d63640d0a },
++          "ab",
++          :intvector{ 0, 1 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"7e7d"}
++        }
++        // empty segment 2 & legal redundant switches (using substitution and stop)
++        {
++          "HZ-GB-2312",
++          :bin{ 61627e7b323b3f557e7b7e7b523b7e7d63647e7d65667e7d7e7d },
++          "ab\u4E0D\u7A7A\uFFFD\u4E00cdef\uFFFD",
++          :intvector{ 0, 1, 4, 6, 10, 12, 16, 17, 20, 21, 24 },
++          :int{1}, :int{1}, "", "?", :bin{""}
++        }
++        {
++          "HZ-GB-2312",
++          :bin{ 61627e7b323b3f557e7b7e7b523b7e7d63647e7d65667e7d7e7d },
++          "ab\u4E0D\u7A7A",
++          :intvector{ 0, 1, 4, 6 },
++          :int{1}, :int{1}, "illesc", ".", :bin{"7e7b"}
++        }
++
+         // DBCS-only extensions
+         {
+           "ibm-970",
--- icu-3.8.orig/debian/patches/04-redhat.icu6001.patch
+++ icu-3.8/debian/patches/04-redhat.icu6001.patch
@@ -0,0 +1,764 @@
+#
+# Description: ucnv_getUnicodeSet(): add choice of set of code points with
+#  roundtrip or fallback mappings (required to fix CVE-2009-0153). See:
+#  https://bugzilla.redhat.com/show_bug.cgi?id=503071
+# Upstream: http://bugs.icu-project.org/trac/ticket/6001
+#
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv2022.c icu-3.8.new/build-tree/icu/source/common/ucnv2022.c
+--- icu/source/common/ucnv2022.c	2009-10-07 11:32:05.613200656 -0500
++++ icu/source/common/ucnv2022.c	2009-10-07 11:32:11.463208021 -0500
+@@ -3399,11 +3399,19 @@
+             /* include ASCII for JP */
+             sa->addRange(sa->set, 0, 0x7f);
+         }
+-        if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
++        if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
+             /*
+-             * TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks,
+-             * we need to include half-width Katakana for all JP variants because
+-             * JIS X 0208 has hardcoded fallbacks for them.
++             * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
++             * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
++             * use half-width Katakana.
++             * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
++             * half-width Katakana via the ESC ( I sequence.
++             * However, we only emit (fromUnicode) half-width Katakana according to the
++             * definition of each variant.
++             *
++             * When including fallbacks,
++             * we need to include half-width Katakana Unicode code points for all JP variants because
++             * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
+              */
+             /* include half-width Katakana for JP */
+             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
+@@ -3457,6 +3465,12 @@
+                  * corresponding to JIS X 0208.
+                  */
+                 filter=UCNV_SET_FILTER_SJIS;
++            } else if(i==KSC5601) {
++                /*
++                 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
++                 * are broader than GR94.
++                 */
++                filter=UCNV_SET_FILTER_GR94DBCS;
+             } else {
+                 filter=UCNV_SET_FILTER_NONE;
+             }
+@@ -3472,6 +3486,9 @@
+     sa->remove(sa->set, 0x0e);
+     sa->remove(sa->set, 0x0f);
+     sa->remove(sa->set, 0x1b);
++
++    /* ISO 2022 converters do not convert C1 controls either */
++    sa->removeRange(sa->set, 0x80, 0x9f);
+ }
+ 
+ static const UConverterImpl _ISO2022Impl={
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_ext.c icu-3.8.new/build-tree/icu/source/common/ucnv_ext.c
+--- icu/source/common/ucnv_ext.c	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/ucnv_ext.c	2009-10-07 11:32:11.483207693 -0500
+@@ -946,7 +946,7 @@
+ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
+                             const int32_t *cx,
+                             const USetAdder *sa,
+-                            UConverterUnicodeSet which,
++                            UBool useFallback,
+                             int32_t minLength,
+                             UChar32 c,
+                             UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
+@@ -966,7 +966,7 @@
+     value=*fromUSectionValues++;
+ 
+     if( value!=0 &&
+-        UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) &&
++        (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) &&
+         UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
+     ) {
+         if(c>=0) {
+@@ -987,12 +987,14 @@
+             /* no mapping, do nothing */
+         } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
+             ucnv_extGetUnicodeSetString(
+-                sharedData, cx, sa, which, minLength,
++                sharedData, cx, sa, useFallback, minLength,
+                 U_SENTINEL, s, length+1,
+                 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
+                 pErrorCode);
+-        } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+-                           UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
++        } else if((useFallback ?
++                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
++                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
++                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
+                   UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
+         ) {
+             sa->addString(sa->set, s, length+1);
+@@ -1004,6 +1006,7 @@
+ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
+                       const USetAdder *sa,
+                       UConverterUnicodeSet which,
++                      UConverterSetFilter filter,
+                       UErrorCode *pErrorCode) {
+     const int32_t *cx;
+     const uint16_t *stage12, *stage3, *ps2, *ps3;
+@@ -1011,6 +1014,7 @@
+ 
+     uint32_t value;
+     int32_t st1, stage1Length, st2, st3, minLength;
++    UBool useFallback;
+ 
+     UChar s[UCNV_EXT_MAX_UCHARS];
+     UChar32 c;
+@@ -1027,12 +1031,20 @@
+ 
+     stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
+ 
++    useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
++
+     /* enumerate the from-Unicode trie table */
+     c=0; /* keep track of the current code point while enumerating */
+ 
+-    if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
++    if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
++        filter==UCNV_SET_FILTER_DBCS_ONLY ||
++        filter==UCNV_SET_FILTER_SJIS ||
++        filter==UCNV_SET_FILTER_GR94DBCS
++    ) {
+         /* DBCS-only, ignore single-byte results */
+         minLength=2;
++    } else if(filter==UCNV_SET_FILTER_2022_CN) {
++        minLength=3;
+     } else {
+         minLength=1;
+     }
+@@ -1064,14 +1076,41 @@
+                             length=0;
+                             U16_APPEND_UNSAFE(s, length, c);
+                             ucnv_extGetUnicodeSetString(
+-                                sharedData, cx, sa, which, minLength,
++                                sharedData, cx, sa, useFallback, minLength,
+                                 c, s, length,
+                                 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
+                                 pErrorCode);
+-                        } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+-                                           UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
++                        } else if((useFallback ?
++                                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
++                                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
++                                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
+                                   UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
+                         ) {
++                            switch(filter) {
++                            case UCNV_SET_FILTER_2022_CN:
++                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
++                                    continue;
++                                }
++                                break;
++                            case UCNV_SET_FILTER_SJIS:
++                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
++                                    continue;
++                                }
++                                break;
++                            case UCNV_SET_FILTER_GR94DBCS:
++                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
++                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe-0xa1a1) &&
++                                     (uint8_t)(value-0xa1)<=(0xfe-0xa1))) {
++                                    continue;
++                                }
++                                break;
++                            default:
++                                /*
++                                 * UCNV_SET_FILTER_NONE,
++                                 * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
++                                 */
++                                break;
++                            }
+                             sa->add(sa->set, c);
+                         }
+                     } while((++c&0xf)!=0);
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_ext.h icu-3.8.new/build-tree/icu/source/common/ucnv_ext.h
+--- icu/source/common/ucnv_ext.h	2007-09-13 18:17:58.000000000 -0500
++++ icu/source/common/ucnv_ext.h	2009-10-07 11:32:11.483207693 -0500
+@@ -382,10 +382,20 @@
+                            UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
+                            UErrorCode *pErrorCode);
+ 
++/*
++ * Add code points and strings to the set according to the extension mappings.
++ * Limitation on the UConverterSetFilter:
++ * The filters currently assume that they are used with 1:1 mappings.
++ * They only apply to single input code points, and then they pass through
++ * only mappings with single-charset-code results.
++ * For example, the Shift-JIS filter only works for 2-byte results and tests
++ * that those 2 bytes are in the JIS X 0208 range of Shift-JIS.
++ */
+ U_CFUNC void
+ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
+                       const USetAdder *sa,
+                       UConverterUnicodeSet which,
++                      UConverterSetFilter filter,
+                       UErrorCode *pErrorCode);
+ 
+ /* toUnicode helpers -------------------------------------------------------- */
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvhz.c icu-3.8.new/build-tree/icu/source/common/ucnvhz.c
+--- icu/source/common/ucnvhz.c	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/ucnvhz.c	2009-10-07 11:32:11.483207693 -0500
+@@ -1,6 +1,6 @@
+ /*  
+ **********************************************************************
+-*   Copyright (C) 2000-2006, International Business Machines
++*   Copyright (C) 2000-2007, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ *   file name:  ucnvhz.c
+@@ -528,6 +528,7 @@
+     sa->add(sa->set, 0x7e);
+ 
+     /* add all of the code points that the sub-converter handles */
++    /* ucnv_MBCSGetFilteredUnicodeSetForUnicode(((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, sa, which, UCNV_SET_FILTER_GR94DBCS, pErrorCode); */
+     ((UConverterDataHZ*)cnv->extraInfo)->
+         gbConverter->sharedData->impl->
+             getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_lmb.c icu-3.8.new/build-tree/icu/source/common/ucnv_lmb.c
+--- icu/source/common/ucnv_lmb.c	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/ucnv_lmb.c	2009-10-07 11:32:11.483207693 -0500
+@@ -1,6 +1,6 @@
+ /*  
+ **********************************************************************
+-*   Copyright (C) 2000-2006, International Business Machines
++*   Copyright (C) 2000-2007, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ *   file name:  ucnv_lmb.cpp
+@@ -536,7 +536,7 @@
+     NULL,\
+     NULL,\
+     _LMBCSSafeClone,\
+-    _LMBCSGetUnicodeSet\
++    ucnv_getCompleteUnicodeSet\
+ };\
+ static const UConverterStaticData _LMBCSStaticData##n={\
+   sizeof(UConverterStaticData),\
+@@ -662,15 +662,14 @@
+     return &newLMBCS->cnv;
+ }
+ 
+-static void
+-_LMBCSGetUnicodeSet(const UConverter *cnv,
+-                   const USetAdder *sa,
+-                   UConverterUnicodeSet which,
+-                   UErrorCode *pErrorCode) {
+-    /* all but U+F6xx, see LMBCS explanation above (search for F6xx) */
+-    sa->addRange(sa->set, 0, 0xf5ff);
+-    sa->addRange(sa->set, 0xf700, 0x10ffff);
+-}
++/*
++ * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117)
++ * which added all code points except for U+F6xx
++ * because those cannot be represented in the Unicode group.
++ * However, it turns out that windows-950 has roundtrips for all of U+F6xx
++ * which means that LMBCS can convert all Unicode code points after all.
++ * We now simply use ucnv_getCompleteUnicodeSet().
++ */
+ 
+ /* 
+    Here's the basic helper function that we use when converting from
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.c icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.c
+--- icu/source/common/ucnvmbcs.c	2009-10-07 11:32:05.613200656 -0500
++++ icu/source/common/ucnvmbcs.c	2009-10-07 11:32:11.483207693 -0500
+@@ -485,9 +485,23 @@
+ 
+     if(mbcsTable->outputType==MBCS_OUTPUT_1) {
+         const uint16_t *stage2, *stage3, *results;
++        uint16_t minValue;
+ 
+         results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
+ 
++        /*
++         * Set a threshold variable for selecting which mappings to use.
++         * See ucnv_MBCSSingleFromBMPWithOffsets() and
++         * MBCS_SINGLE_RESULT_FROM_U() for details.
++         */
++        if(which==UCNV_ROUNDTRIP_SET) {
++            /* use only roundtrips */
++            minValue=0xf00;
++        } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
++            /* use all roundtrip and fallback results */
++            minValue=0x800;
++        }
++
+         for(st1=0; st1<maxStage1; ++st1) {
+             st2=table[st1];
+             if(st2>maxStage1) {
+@@ -497,15 +511,8 @@
+                         /* read the stage 3 block */
+                         stage3=results+st3;
+ 
+-                        /*
+-                         * Add code points for which the roundtrip flag is set.
+-                         * Once we get a set for fallback mappings, we have to use
+-                         * a threshold variable with a value of 0x800.
+-                         * See ucnv_MBCSSingleFromBMPWithOffsets() and
+-                         * MBCS_SINGLE_RESULT_FROM_U() for details.
+-                         */
+                         do {
+-                            if(*stage3++>=0xf00) {
++                            if(*stage3++>=minValue) {
+                                 sa->add(sa->set, c);
+                             }
+                         } while((++c&0xf)!=0);
+@@ -522,9 +529,12 @@
+         const uint8_t *stage3, *bytes;
+         uint32_t st3Multiplier;
+         uint32_t value;
++        UBool useFallback;
+ 
+         bytes=mbcsTable->fromUnicodeBytes;
+ 
++        useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
++
+         switch(mbcsTable->outputType) {
+         case MBCS_OUTPUT_3:
+         case MBCS_OUTPUT_4_EUC:
+@@ -551,9 +561,8 @@
+                         st3>>=16;
+ 
+                         /*
+-                         * Add code points for which the roundtrip flag is set.
+-                         * Once we get a set for fallback mappings, we have to check
+-                         * non-roundtrip stage 3 results for whether they are 0.
++                         * Add code points for which the roundtrip flag is set,
++                         * or which map to non-zero bytes if we use fallbacks.
+                          * See ucnv_MBCSFromUnicodeWithOffsets() for details.
+                          */
+                         switch(filter) {
+@@ -561,6 +570,23 @@
+                             do {
+                                 if(st3&1) {
+                                     sa->add(sa->set, c);
++                                    stage3+=st3Multiplier;
++                                } else if(useFallback) {
++                                    uint8_t b=0;
++                                    switch(st3Multiplier) {
++                                    case 4:
++                                        b|=*stage3++;
++                                    case 3:
++                                        b|=*stage3++;
++                                    case 2:
++                                        b|=stage3[0]|stage3[1];
++                                        stage3+=2;
++                                    default:
++                                        break;
++                                    }
++                                    if(b!=0) {
++                                        sa->add(sa->set, c);
++                                    }
+                                 }
+                                 st3>>=1;
+                             } while((++c&0xf)!=0);
+@@ -568,7 +594,7 @@
+                         case UCNV_SET_FILTER_DBCS_ONLY:
+                              /* Ignore single-byte results (<0x100). */
+                             do {
+-                                if((st3&1)!=0 && *((const uint16_t *)stage3)>=0x100) {
++                                if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) {
+                                     sa->add(sa->set, c);
+                                 }
+                                 st3>>=1;
+@@ -578,7 +604,7 @@
+                         case UCNV_SET_FILTER_2022_CN:
+                              /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
+                             do {
+-                                if((st3&1)!=0 && ((value=*stage3)==0x81 || value==0x82)) {
++                                if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) {
+                                     sa->add(sa->set, c);
+                                 }
+                                 st3>>=1;
+@@ -588,7 +614,20 @@
+                         case UCNV_SET_FILTER_SJIS:
+                              /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
+                             do {
+-                                if((st3&1)!=0 && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
++                                if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
++                                    sa->add(sa->set, c);
++                                }
++                                st3>>=1;
++                                stage3+=2;  /* +=st3Multiplier */
++                            } while((++c&0xf)!=0);
++                            break;
++                        case UCNV_SET_FILTER_GR94DBCS:
++                            /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
++                            do {
++                                if( ((st3&1)!=0 || useFallback) &&
++                                    (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfefe-0xa1a1) &&
++                                    (uint8_t)(value-0xa1)<=(0xfe-0xa1)
++                                ) {
+                                     sa->add(sa->set, c);
+                                 }
+                                 st3>>=1;
+@@ -609,7 +648,7 @@
+         }
+     }
+ 
+-    ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode);
++    ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode);
+ }
+ 
+ U_CFUNC void
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnvmbcs.h icu-3.8.new/build-tree/icu/source/common/ucnvmbcs.h
+--- icu/source/common/ucnvmbcs.h	2009-10-07 11:32:05.613200656 -0500
++++ icu/source/common/ucnvmbcs.h	2009-10-07 11:32:11.483207693 -0500
+@@ -492,6 +492,7 @@
+     UCNV_SET_FILTER_DBCS_ONLY,
+     UCNV_SET_FILTER_2022_CN,
+     UCNV_SET_FILTER_SJIS,
++    UCNV_SET_FILTER_GR94DBCS,
+     UCNV_SET_FILTER_COUNT
+ } UConverterSetFilter;
+ 
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/ucnv_set.c icu-3.8.new/build-tree/icu/source/common/ucnv_set.c
+--- icu/source/common/ucnv_set.c	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/ucnv_set.c	2009-10-07 11:32:11.483207693 -0500
+@@ -1,7 +1,7 @@
+ /*
+ *******************************************************************************
+ *
+-*   Copyright (C) 2003-2005, International Business Machines
++*   Copyright (C) 2003-2007, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ *******************************************************************************
+@@ -52,7 +52,8 @@
+             uset_add,
+             uset_addRange,
+             uset_addString,
+-            uset_remove
++            uset_remove,
++            uset_removeRange
+         };
+         sa.set=setFillIn;
+ 
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/unicode/ucnv.h icu-3.8.new/build-tree/icu/source/common/unicode/ucnv.h
+--- icu/source/common/unicode/ucnv.h	2007-09-13 18:17:54.000000000 -0500
++++ icu/source/common/unicode/ucnv.h	2009-10-07 11:32:11.483207693 -0500
+@@ -870,6 +870,8 @@
+ typedef enum UConverterUnicodeSet {
+     /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+     UCNV_ROUNDTRIP_SET,
++    /** Select the set of Unicode code points with roundtrip or fallback mappings. @draft ICU 4.0 */
++    UCNV_ROUNDTRIP_AND_FALLBACK_SET,
+     /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
+     UCNV_SET_COUNT
+ } UConverterUnicodeSet;
+@@ -878,11 +880,16 @@
+ /**
+  * Returns the set of Unicode code points that can be converted by an ICU converter.
+  *
+- * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET):
++ * Returns one of several kinds of set:
++ *
++ * 1. UCNV_ROUNDTRIP_SET
++ *
+  * The set of all Unicode code points that can be roundtrip-converted
+- * (converted without any data loss) with the converter.
++ * (converted without any data loss) with the converter (ucnv_fromUnicode()).
+  * This set will not include code points that have fallback mappings
+  * or are only the result of reverse fallback mappings.
++ * This set will also not include PUA code points with fallbacks, although
++ * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
+  * See UTR #22 "Character Mapping Markup Language"
+  * at http://www.unicode.org/reports/tr22/
+  *
+@@ -893,6 +900,12 @@
+  *   by comparing its roundtrip set with the set of ExemplarCharacters from
+  *   ICU's locale data or other sources
+  *
++ * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
++ *
++ * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
++ * when fallbacks are turned on (see ucnv_setFallback()).
++ * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
++ *
+  * In the future, there may be more UConverterUnicodeSet choices to select
+  * sets with different properties.
+  *
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/common/uset_imp.h icu-3.8.new/build-tree/icu/source/common/uset_imp.h
+--- icu/source/common/uset_imp.h	2007-09-13 18:18:00.000000000 -0500
++++ icu/source/common/uset_imp.h	2009-10-07 11:32:11.483207693 -0500
+@@ -36,6 +36,9 @@
+ typedef void U_CALLCONV
+ USetRemove(USet *set, UChar32 c);
+ 
++typedef void U_CALLCONV
++USetRemoveRange(USet *set, UChar32 start, UChar32 end);
++
+ /**
+  * Interface for adding items to a USet, to keep low-level code from
+  * statically depending on the USet implementation.
+@@ -47,6 +50,7 @@
+     USetAddRange *addRange;
+     USetAddString *addString;
+     USetRemove *remove;
++    USetRemoveRange *removeRange;
+ };
+ typedef struct USetAdder USetAdder;
+ 
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/intltest/convtest.cpp icu-3.8.new/build-tree/icu/source/test/intltest/convtest.cpp
+--- icu/source/test/intltest/convtest.cpp	2007-09-13 18:17:42.000000000 -0500
++++ icu/source/test/intltest/convtest.cpp	2009-10-07 11:32:11.483207693 -0500
+@@ -70,6 +70,7 @@
+         case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
+         case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
+         case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
++        case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
+         default: name=""; break; //needed to end loop
+     }
+ }
+@@ -465,6 +466,183 @@
+     }
+ }
+ 
++U_CDECL_BEGIN
++static void U_CALLCONV
++getUnicodeSetCallback(const void *context,
++                      UConverterFromUnicodeArgs *fromUArgs,
++                      const UChar* codeUnits,
++                      int32_t length,
++                      UChar32 codePoint,
++                      UConverterCallbackReason reason,
++                      UErrorCode *pErrorCode) {
++    if(reason<=UCNV_IRREGULAR) {
++        ((UnicodeSet *)context)->remove(codePoint);  // the converter cannot convert this code point
++        *pErrorCode=U_ZERO_ERROR;                    // skip
++    }  // else ignore the reset, close and clone calls.
++}
++U_CDECL_END
++
++// Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
++void
++ConversionTest::TestGetUnicodeSet2() {
++    // Build a string with all code points.
++    UChar32 cpLimit;
++    int32_t s0Length;
++    if(quick) {
++        cpLimit=s0Length=0x10000;  // BMP only
++    } else {
++        cpLimit=0x110000;
++        s0Length=0x10000+0x200000;  // BMP + surrogate pairs
++    }
++    UChar *s0=new UChar[s0Length];
++    if(s0==NULL) {
++        return;
++    }
++    UChar *s=s0;
++    UChar32 c;
++    UChar c2;
++    // low BMP
++    for(c=0; c<=0xd7ff; ++c) {
++        *s++=(UChar)c;
++    }
++    // trail surrogates
++    for(c=0xdc00; c<=0xdfff; ++c) {
++        *s++=(UChar)c;
++    }
++    // lead surrogates
++    // (after trails so that there is not even one surrogate pair in between)
++    for(c=0xd800; c<=0xdbff; ++c) {
++        *s++=(UChar)c;
++    }
++    // high BMP
++    for(c=0xe000; c<=0xffff; ++c) {
++        *s++=(UChar)c;
++    }
++    // supplementary code points = surrogate pairs
++    if(cpLimit==0x110000) {
++        for(c=0xd800; c<=0xdbff; ++c) {
++            for(c2=0xdc00; c2<=0xdfff; ++c2) {
++                *s++=(UChar)c;
++                *s++=c2;
++            }
++        }
++    }
++
++    static const char *const cnvNames[]={
++        "UTF-8",
++        "UTF-7",
++        "UTF-16",
++        "US-ASCII",
++        "ISO-8859-1",
++        "windows-1252",
++        "Shift-JIS",
++        "ibm-1390",  // EBCDIC_STATEFUL table
++        "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
++        // "HZ", TODO(markus): known bug, the set incorrectly contains [\u02CA\u02CB\u02D9\u2010\u2013\u2015...]
++        "ISO-2022-JP",
++        "JIS7",
++        "ISO-2022-CN",
++        "ISO-2022-CN-EXT",
++        "LMBCS"
++    };
++    char buffer[1024];
++    int32_t i;
++    for(i=0; i<LENGTHOF(cnvNames); ++i) {
++        UErrorCode errorCode=U_ZERO_ERROR;
++        UConverter *cnv=cnv_open(cnvNames[i], errorCode);
++        if(U_FAILURE(errorCode)) {
++            errln("failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
++            continue;
++        }
++        UnicodeSet expected;
++        ucnv_setFromUCallBack(cnv, getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
++        if(U_FAILURE(errorCode)) {
++            errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
++            ucnv_close(cnv);
++            continue;
++        }
++        UConverterUnicodeSet which;
++        for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
++            if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
++                ucnv_setFallback(cnv, TRUE);
++            }
++            expected.add(0, cpLimit-1);
++            s=s0;
++            UBool flush;
++            do {
++                char *t=buffer;
++                flush=(UBool)(s==s0+s0Length);
++                ucnv_fromUnicode(cnv, &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
++                if(U_FAILURE(errorCode)) {
++                    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
++                        errorCode=U_ZERO_ERROR;
++                        continue;
++                    } else {
++                        break;  // unexpected error, should not occur
++                    }
++                }
++            } while(!flush);
++            UnicodeSet set;
++            ucnv_getUnicodeSet(cnv, (USet *)&set, which, &errorCode);
++            if(cpLimit<0x110000) {
++                set.remove(cpLimit, 0x10ffff);
++            }
++            if(which==UCNV_ROUNDTRIP_SET) {
++                // ignore PUA code points because they will be converted even if they
++                // are fallbacks and when other fallbacks are turned off,
++                // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
++                expected.remove(0xe000, 0xf8ff);
++                expected.remove(0xf0000, 0xffffd);
++                expected.remove(0x100000, 0x10fffd);
++                set.remove(0xe000, 0xf8ff);
++                set.remove(0xf0000, 0xffffd);
++                set.remove(0x100000, 0x10fffd);
++            }
++            if(set!=expected) {
++                // First try to see if we have different sets because ucnv_getUnicodeSet()
++                // added strings: The above conversion method does not tell us what strings might be convertible.
++                // Remove strings from the set and compare again.
++                // Unfortunately, there are no good, direct set methods for finding out whether there are strings
++                // in the set, nor for enumerating or removing just them.
++                // Intersect all code points with the set. The intersection will not contain strings.
++                UnicodeSet temp(0, 0x10ffff);
++                temp.retainAll(set);
++                set=temp;
++            }
++            if(set!=expected) {
++                UnicodeSet diffSet;
++                UnicodeString out;
++
++                // are there items that must be in the set but are not?
++                (diffSet=expected).removeAll(set);
++                if(!diffSet.isEmpty()) {
++                    diffSet.toPattern(out, TRUE);
++                    if(out.length()>100) {
++                        out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
++                    }
++                    errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
++                            cnvNames[i], which);
++                    errln(out);
++                }
++
++                // are there items that must not be in the set but are?
++                (diffSet=set).removeAll(expected);
++                if(!diffSet.isEmpty()) {
++                    diffSet.toPattern(out, TRUE);
++                    if(out.length()>100) {
++                        out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
++                    }
++                    errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
++                            cnvNames[i], which);
++                    errln(out);
++                }
++            }
++        }
++    }
++
++    delete [] s0;
++}
++
+ // open testdata or ICU data converter ------------------------------------- ***
+ 
+ UConverter *
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/intltest/convtest.h icu-3.8.new/build-tree/icu/source/test/intltest/convtest.h
+--- icu/source/test/intltest/convtest.h	2007-09-13 18:17:40.000000000 -0500
++++ icu/source/test/intltest/convtest.h	2009-10-07 11:32:11.493207494 -0500
+@@ -72,6 +72,7 @@
+     void TestToUnicode();
+     void TestFromUnicode();
+     void TestGetUnicodeSet();
++    void TestGetUnicodeSet2();
+ 
+ private:
+     UBool
+diff -Nur -x '*.orig' -x '*~' icu-3.8/build-tree/icu/source/test/testdata/conversion.txt icu-3.8.new/build-tree/icu/source/test/testdata/conversion.txt
+--- icu/source/test/testdata/conversion.txt	2009-10-07 11:32:05.613200656 -0500
++++ icu/source/test/testdata/conversion.txt	2009-10-07 11:32:11.493207494 -0500
+@@ -1311,16 +1311,29 @@
+         // versions of ISO-2022-JP
+         {
+           "ISO-2022-JP",
+-          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u203e\uff61-\uff9f\u4e00\u4e01\uffe5]",
+-          "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\ufa0e-\ufa2d\uffe6-\U0010ffff]",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u2015\u203e\u4e00\u4e01\uffe5]",
++          "[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u2014\u301c\u4e02\u4e27-\u4e29\u4fe0\u663b\u9eb5\ufa0e-\ufa2d\uff61-\uff9f\uffe4\uffe6-\U0010ffff]",
+           :int{0}
+-        }   
++        }
+         {
+           "ISO-2022-JP-2",
+-          "[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\uff61-\uff9f\u4e00-\u4e05\uffe6]",
+-          "[\x0e\x0f\x1b\uffe7-\U0010ffff]",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa0-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\u4e00-\u4e05\u4fe0\u663b\uffe6]",
++          "[\x0e\x0f\x1b\uff61-\uff9f\uffe4\uffe7-\U0010ffff]",
++          :int{0}
++        }
++        {
++          "JIS7",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa0-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\u4e00-\u4e05\u4fe0\u663b\uff61-\uff9f\uffe6]",
++          "[\x0e\x0f\x1b\uffe4\uffe7-\U0010ffff]",
+           :int{0}
+         }
++        // with fallbacks
++        {
++          "ISO-2022-JP",
++          "[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u2014\u2015\u203e\u301c\u4e00\u4e01\u4fe0\u9eb5\uff61-\uff9f\uffe5]",
++          "[\x0e\x0f\x1b\xa6\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\u663b\ufa0e-\ufa2d\uffe4\uffe6-\U0010ffff]",
++          :int{1}
++        }
+ 
+         // versions of ISO-2022-CN
+         {
+@@ -1352,6 +1365,14 @@
+           :int{0}
+         }
+ 
++        // LMBCS
++        {
++          "LMBCS",
++          "[\x00-\U0010ffff]",
++          "[]",
++          :int{0}
++        }
++
+         // extensions
+         {
+           "ibm-1390",
--- icu-3.8.orig/debian/patches/00-cve-2007-4770-4771.patch
+++ icu-3.8/debian/patches/00-cve-2007-4770-4771.patch
@@ -0,0 +1,350 @@
+Index: source/i18n/regexcmp.cpp
+===================================================================
+--- source/i18n/regexcmp.cpp	(revision 23291)
++++ source/i18n/regexcmp.cpp	(revision 23292)
+@@ -2,7 +2,7 @@
+ //
+ //  file:  regexcmp.cpp
+ //
+-//  Copyright (C) 2002-2007 International Business Machines Corporation and others.
++//  Copyright (C) 2002-2008 International Business Machines Corporation and others.
+ //  All Rights Reserved.
+ //
+ //  This file contains the ICU regular expression compiler, which is responsible
+@@ -1186,14 +1186,17 @@
+             // Because capture groups can be forward-referenced by back-references,
+             //  we fill the operand with the capture group number.  At the end
+             //  of compilation, it will be changed to the variable's location.
+-            U_ASSERT(groupNum > 0);
+-            int32_t  op;
+-            if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
+-                op = URX_BUILD(URX_BACKREF_I, groupNum);
++            if (groupNum < 1) { 
++                error(U_REGEX_INVALID_BACK_REF);
+             } else {
+-                op = URX_BUILD(URX_BACKREF, groupNum);
++                int32_t  op;
++                if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
++                    op = URX_BUILD(URX_BACKREF_I, groupNum);
++                } else {
++                    op = URX_BUILD(URX_BACKREF, groupNum);
++                }
++                fRXPat->fCompiledPat->addElement(op, *fStatus);
+             }
+-            fRXPat->fCompiledPat->addElement(op, *fStatus);
+         }
+         break;
+ 
+Index: source/i18n/rematch.cpp
+===================================================================
+--- source/i18n/rematch.cpp	(revision 23291)
++++ source/i18n/rematch.cpp	(revision 23292)
+@@ -1,6 +1,6 @@
+ /*
+ **************************************************************************
+-*   Copyright (C) 2002-2007 International Business Machines Corporation  *
++*   Copyright (C) 2002-2008 International Business Machines Corporation  *
+ *   and others. All rights reserved.                                     *
+ **************************************************************************
+ */
+@@ -30,6 +30,15 @@
+ 
+ U_NAMESPACE_BEGIN
+ 
++// Limit the size of the back track stack, to avoid system failures caused
++//   by heap exhaustion.  Units are in 32 bit words, not bytes.
++// This value puts ICU's limits higher than most other regexp implementations,
++//  which use recursion rather than the heap, and take more storage per
++//  backtrack point.
++// This constant is _temporary_.  Proper API to control the value will added.
++//
++static const int32_t BACKTRACK_STACK_CAPACITY = 8000000;
++
+ //-----------------------------------------------------------------------------
+ //
+ //   Constructor and Destructor
+@@ -53,8 +62,9 @@
+     }
+     if (fStack == NULL || fData == NULL) {
+         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
++    } else {
++        fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
+     }
+-        
+     reset(RegexStaticSets::gStaticSets->fEmptyString);
+ }
+ 
+@@ -78,6 +88,8 @@
+     }
+     if (fStack == NULL || fData == NULL) {
+         status = U_MEMORY_ALLOCATION_ERROR;
++    } else {
++        fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
+     }
+     reset(input);
+ }
+@@ -102,6 +114,8 @@
+     }
+     if (fStack == NULL || fData == NULL) {
+         status = U_MEMORY_ALLOCATION_ERROR;
++    } else {
++        fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
+     }
+     reset(RegexStaticSets::gStaticSets->fEmptyString);
+ }
+@@ -1014,6 +1028,14 @@
+ inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, int32_t frameSize, UErrorCode &status) {
+     // push storage for a new frame. 
+     int32_t *newFP = fStack->reserveBlock(frameSize, status);
++    if (newFP == NULL) {
++        // Heap allocation error on attempted stack expansion.
++        // We need to return a writable stack frame, so just return the
++        //    previous frame.  The match operation will stop quickly
++        //    becuase of the error status, after which the frame will never
++        //    be looked at again.
++        return fp;
++    }
+     fp = (REStackFrame *)(newFP - frameSize);  // in case of realloc of stack.
+     
+     // New stack frame = copy of old top frame.
+@@ -1029,8 +1051,8 @@
+     fp->fPatIdx = savePatIdx;
+     return (REStackFrame *)newFP;
+ }
+-    
+-            
++
++
+ //--------------------------------------------------------------------------------
+ //
+ //   MatchAt      This is the actual matching engine.
+@@ -2261,6 +2283,7 @@
+         }
+ 
+         if (U_FAILURE(status)) {
++            isMatch = FALSE;
+             break;
+         }
+     }
+Index: source/test/intltest/regextst.h
+===================================================================
+--- source/test/intltest/regextst.h	(revision 23291)
++++ source/test/intltest/regextst.h	(revision 23292)
+@@ -1,6 +1,6 @@
+ /********************************************************************
+  * COPYRIGHT: 
+- * Copyright (c) 2002-2007, International Business Machines Corporation and
++ * Copyright (c) 2002-2008, International Business Machines Corporation and
+  * others. All Rights Reserved.
+  ********************************************************************/
+ 
+@@ -30,6 +30,7 @@
+     virtual void Extended();
+     virtual void Errors();
+     virtual void PerlTests();
++    virtual void Bug6149();
+ 
+     // The following functions are internal to the regexp tests.
+     virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line);
+Index: source/test/intltest/regextst.cpp
+===================================================================
+--- source/test/intltest/regextst.cpp	(revision 23291)
++++ source/test/intltest/regextst.cpp	(revision 23292)
+@@ -1,6 +1,6 @@
+ /********************************************************************
+  * COPYRIGHT:
+- * Copyright (c) 2002-2007, International Business Machines Corporation and
++ * Copyright (c) 2002-2008, International Business Machines Corporation and
+  * others. All Rights Reserved.
+  ********************************************************************/
+ 
+@@ -66,6 +66,10 @@
+         case 6: name = "PerlTests";
+             if (exec) PerlTests();
+             break;
++        case 7: name = "Bug 6149";
++            if (exec) Bug6149();
++            break;
++            
+ 
+ 
+         default: name = "";
+@@ -1639,6 +1643,12 @@
+ 
+     // Ticket 5389
+     REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX);
++    
++    // Invalid Back Reference \0
++    //    For ICU 3.8 and earlier
++    //    For ICU versions newer than 3.8, \0 introduces an octal escape.
++    //
++    REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_INVALID_BACK_REF);
+ 
+ }
+ 
+@@ -2122,6 +2132,26 @@
+ }
+ 
+ 
++//--------------------------------------------------------------
++//
++//  Bug6149   Verify limits to heap expansion for backtrack stack.
++//             Use this pattern,
++//                 "(a?){1,}"
++//             The zero-length match will repeat forever.
++//                (That this goes into a loop is another bug)
++//
++//---------------------------------------------------------------
++void RegexTest::Bug6149() {
++    UnicodeString pattern("(a?){1,}");
++    UnicodeString s("xyz");
++    uint32_t flags = 0;
++    UErrorCode status = U_ZERO_ERROR;
++    
++    RegexMatcher  matcher(pattern, s, flags, status);
++    UBool result = false;
++    REGEX_ASSERT_FAIL(result=matcher.matches(status), U_BUFFER_OVERFLOW_ERROR);
++    REGEX_ASSERT(result == FALSE);
++ }
+ 
+ #endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
+ 
+Index: source/common/uvectr32.cpp
+===================================================================
+--- source/common/uvectr32.cpp	(revision 23291)
++++ source/common/uvectr32.cpp	(revision 23292)
+@@ -1,6 +1,6 @@
+ /*
+ ******************************************************************************
+-* Copyright (C) 1999-2003, International Business Machines Corporation and   *
++* Copyright (C) 1999-2008, International Business Machines Corporation and   *
+ * others. All Rights Reserved.                                               *
+ ******************************************************************************
+ *   Date        Name        Description
+@@ -26,6 +26,7 @@
+ UVector32::UVector32(UErrorCode &status) :
+     count(0),
+     capacity(0),
++    maxCapacity(0),
+     elements(NULL)
+ {
+     _init(DEFUALT_CAPACITY, status);
+@@ -34,6 +35,7 @@
+ UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) :
+     count(0),
+     capacity(0),
++    maxCapacity(0),
+     elements(0)
+ {
+     _init(initialCapacity, status);
+@@ -46,6 +48,9 @@
+     if (initialCapacity < 1) {
+         initialCapacity = DEFUALT_CAPACITY;
+     }
++    if (maxCapacity>0 && maxCapacity<initialCapacity) {
++        initialCapacity = maxCapacity;
++    }
+     elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity);
+     if (elements == 0) {
+         status = U_MEMORY_ALLOCATION_ERROR;
+@@ -189,24 +194,38 @@
+ UBool UVector32::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
+     if (capacity >= minimumCapacity) {
+         return TRUE;
+-    } else {
+-        int32_t newCap = capacity * 2;
+-        if (newCap < minimumCapacity) {
+-            newCap = minimumCapacity;
+-        }
+-        int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap);
+-        if (newElems == 0) {
+-            status = U_MEMORY_ALLOCATION_ERROR;
+-            return FALSE;
+-        }
+-        uprv_memcpy(newElems, elements, sizeof(elements[0]) * count);
+-        uprv_free(elements);
+-        elements = newElems;
+-        capacity = newCap;
+-        return TRUE;
+     }
++    if (maxCapacity>0 && minimumCapacity>maxCapacity) {
++        status = U_BUFFER_OVERFLOW_ERROR;
++        return FALSE;
++    }
++    int32_t newCap = capacity * 2;
++    if (newCap < minimumCapacity) {
++        newCap = minimumCapacity;
++    }
++    if (maxCapacity > 0 && newCap > maxCapacity) {
++        newCap = maxCapacity;
++    }
++    int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap);
++    if (newElems == 0) {
++        status = U_MEMORY_ALLOCATION_ERROR;
++        return FALSE;
++    }
++    uprv_memcpy(newElems, elements, sizeof(elements[0]) * count);
++    uprv_free(elements);
++    elements = newElems;
++    capacity = newCap;
++    return TRUE;
+ }
+ 
++void UVector32::setMaxCapacity(int32_t limit) {
++    U_ASSERT(limit >= 0);
++    maxCapacity = limit;
++    if (maxCapacity < 0) {
++        maxCapacity = 0;
++    }
++}
++
+ /**
+  * Change the size of this vector as follows: If newSize is smaller,
+  * then truncate the array, possibly deleting held elements for i >=
+Index: source/common/uvectr32.h
+===================================================================
+--- source/common/uvectr32.h	(revision 23291)
++++ source/common/uvectr32.h	(revision 23292)
+@@ -1,6 +1,6 @@
+ /*
+ **********************************************************************
+-*   Copyright (C) 1999-2006, International Business Machines
++*   Copyright (C) 1999-2008, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ */
+@@ -61,6 +61,8 @@
+     int32_t   count;
+ 
+     int32_t   capacity;
++    
++    int32_t   maxCapacity;   // Limit beyond which capacity is not permitted to grow.
+ 
+     int32_t*  elements;
+ 
+@@ -162,6 +164,14 @@
+     int32_t *getBuffer() const;
+ 
+     /**
++     * Set the maximum allowed buffer capacity for this vector/stack.
++     * Default with no limit set is unlimited, go until malloc() fails.
++     * A Limit of zero means unlimited capacity.
++     * Units are vector elements (32 bits each), not bytes.
++     */
++    void setMaxCapacity(int32_t limit);
++
++    /**
+      * ICU "poor man's RTTI", returns a UClassID for this class.
+      */
+     static UClassID U_EXPORT2 getStaticClassID();
+@@ -221,7 +231,9 @@
+ }
+ 
+ inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) {
+-    ensureCapacity(count+size, status);
++    if (ensureCapacity(count+size, status) == FALSE) {
++        return NULL;
++    }
+     int32_t  *rp = elements+count;
+     count += size;
+     return rp;
--- icu-3.8.orig/debian/patches/icu-3.6-setBreakType.patch
+++ icu-3.8/debian/patches/icu-3.6-setBreakType.patch
@@ -0,0 +1,17 @@
+--- icu/source/common/unicode/rbbi.h-old	2007-10-31 15:52:08.000000000 +0100
++++ icu/source/common/unicode/rbbi.h	2007-10-31 15:52:47.000000000 +0100
+@@ -611,12 +611,14 @@
+     virtual int32_t getBreakType() const;
+ #endif
+ 
++public:
+     /**
+       * Set the type of the break iterator.
+       * @internal
+       */
+     virtual void setBreakType(int32_t type);
+ 
++protected:
+     /**
+       * Common initialization function, used by constructors and bufferClone.
+       *   (Also used by DictionaryBasedBreakIterator::createBufferClone().)
--- icu-3.8.orig/debian/libicu38.lintian
+++ icu-3.8/debian/libicu38.lintian
@@ -0,0 +1,6 @@
+# libicu38 installs multiple shared libraries, none of which is
+# actually called libicu.so.38, but all of which are libicu*.so.38.
+libicu38: package-name-doesnt-match-sonames
+# libicudata.so.38.0 contains static data only
+libicu38: shared-lib-without-dependency-information
+libicu38: shlib-without-PT_GNU_STACK-section usr/lib/libicudata.so.38.0
--- icu-3.8.orig/debian/lib32icu38.install
+++ icu-3.8/debian/lib32icu38.install
@@ -0,0 +1 @@
+usr/lib32/lib*.so.*
--- icu-3.8.orig/debian/copyright
+++ icu-3.8/debian/copyright
@@ -0,0 +1,44 @@
+This package was debianized by Jay Berkenbilt <qjb@debian.org> on
+August 5, 2005.
+
+The original source was downloaded from
+ftp://ftp.software.ibm.com/software/globalization/icu/3.6/icu4c-3_6-src.tgz
+
+The main web sites for ICU are
+http://icu.sourceforge.net/
+http://www.ibm.com/software/globalization/icu/
+
+ICU License - ICU 1.8.1 and later
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2001 International Business Machines Corporation and others
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.
+
+--------------------------------------------------------------------------------
+All trademarks and registered trademarks mentioned herein are the property of their respective owners.
--- icu-3.8.orig/debian/lib32icu-dev.install
+++ icu-3.8/debian/lib32icu-dev.install
@@ -0,0 +1,3 @@
+usr/lib32/lib*.so
+usr/lib32/lib*.a
+usr/lib32/icu
--- icu-3.8.orig/debian/libicu38.install
+++ icu-3.8/debian/libicu38.install
@@ -0,0 +1 @@
+usr/lib/lib*.so.*
--- icu-3.8.orig/debian/icu-doc.doc-base
+++ icu-3.8/debian/icu-doc.doc-base
@@ -0,0 +1,13 @@
+Document: icu-doc
+Title: ICU API Documentation
+Author: IBM Corporation and Others
+Abstract: This manual describes the APIs of the
+ International Components for Unicode C/C++
+ library. It is a useful reference for the
+ ICU programmer.
+Section: Libs
+
+Format: HTML
+Index: /usr/share/doc/icu-doc/html/index.html
+Files: /usr/share/doc/icu-doc/html/*.html
+
--- icu-3.8.orig/debian/changelog
+++ icu-3.8/debian/changelog
@@ -0,0 +1,396 @@
+icu (3.8-6ubuntu0.2) hardy-security; urgency=low
+
+  * SECURITY UPDATE: fix improper handling of invalid byte sequences
+    during Unicode conversion
+    - debian/07-CVE-2009-0153.patch: backported patch thanks to RedHat via
+      Debian
+    - 03-redhat.icu5797.patch, 04-redhat.icu6001.patch, and
+      05-redhat.icu6002.patch required for applying 07-CVE-2009-0153.patch
+      with 06-CVE-2008-1036.patch needing adjustments. Patch from Debian.
+    - CVE-2009-0153
+
+ -- Jamie Strandboge <jamie@ubuntu.com>  Wed, 07 Oct 2009 11:33:48 -0500
+
+icu (3.8-6ubuntu0.1) hardy-security; urgency=low
+
+  * SECURITY UPDATE: Cross-site scripting attack via invalid character
+    sequences (LP: #341834)
+    - debian/patches/03-cve-2008-1036.patch: Improve parsing logic in
+      source/common/{ucnv2022.c,ucnv_bld.*,ucnv.c,ucnvhz.c} to replace
+      invalid character sequences. Also, add test case to
+      source/test/{cintltst/nucnvtst.c,testdata/conversion.txt}.
+    - CVE-2008-1036
+
+ -- Marc Deslauriers <marc.deslauriers@ubuntu.com>  Wed, 25 Mar 2009 09:55:21 -0400
+
+icu (3.8-6) unstable; urgency=high
+
+  * Add debian/patches/00-cve-2007-4770-4771.patch created from with
+    svn diff -c 23292 \
+    http://source.icu-project.org/repos/icu/icu/branches/maint/maint-3-8
+    to address the following security vulnerablilities:
+     - CVE-2007-4770: reference to non-existent capture group may
+       cause access to invalid memory
+     - CVE-2007-4771: buffer overflow in regexcmp.cpp
+    (Closes: #463688)
+  * Updated standards version to 3.7.3: no changes required.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Thu, 07 Feb 2008 12:58:34 -0500
+
+icu (3.8-5) unstable; urgency=low
+
+  * Filter out extraneous dependencies among different versions of the
+    library packages. (Closes: #451767, 451978)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 01 Dec 2007 09:47:32 -0500
+
+icu (3.8-4) experimental; urgency=low
+
+  * Include changes from 3.6-10.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sun, 18 Nov 2007 11:04:16 -0500
+
+icu (3.6-10) unstable; urgency=low
+
+  * It appears that amd64 requires 32-bit libraries to be in
+    /emul/ia32-linux/usr/lib instead of /usr/lib32.  Following zlib's
+    example of moving them around for amd64 only. (Closes: #451495)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sun, 18 Nov 2007 11:03:10 -0500
+
+icu (3.8-3) experimental; urgency=low
+
+  * Include changes from 3.6-9.
+  * Include -dbg package with unstripped versions of the libraries.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 17 Nov 2007 15:02:36 -0500
+
+icu (3.6-9) unstable; urgency=low
+
+  * Yet another 32-bit library fix.  Files were installed in /32 because
+    of the debian/tmp32 thing.  How did this ever work? (Closes: #451495)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 17 Nov 2007 12:12:18 -0500
+
+icu (3.8-2) experimental; urgency=low
+
+  * Include changes from 3.6-8.  (Closes: #448747)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Tue, 06 Nov 2007 20:58:09 -0500
+
+icu (3.6-8) unstable; urgency=low
+
+  * Clean up 32-bit library patch to avoid excessive and unnecessary runs
+    of configure. (Closes: #447771)
+  * make setBreakType public in rbbi.h; needed by OpenOffice.org.  This
+    patch is included in OpenOffice.org's internal ICU.  Including it here
+    allows OpenOffice.org to continue to use this ICU package.  Thanks
+    Rene Engelhard.  (Closes: #448745)
+  * Rename debian/watch.not-yet to debian/no-watch so it won't get picked
+    up even though it's not supposed to.  ICU's ftp site uses a structure
+    that isn't supported by uscan.  (Closes: #449701)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Tue, 06 Nov 2007 20:56:38 -0500
+
+icu (3.8-1) experimental; urgency=low
+
+  * New upstream release.  All previously included patches have been
+    incorporated into upstream.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 20 Oct 2007 11:53:32 -0400
+
+icu (3.6-7) unstable; urgency=low
+
+  * Fix bug in which 32-bit library installs were overwriting files for
+    64-bit libraries on amd64.  Thanks Robert Millan for the patch.
+    (Closes: #447275)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 20 Oct 2007 11:30:12 -0400
+
+icu (3.6-6) unstable; urgency=low
+
+  * Oops: fixed one more problem with 32-bit builds on a 64-bit platform.
+    Thanks Aaron Ucko.  (Closes: #398778)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Mon, 17 Sep 2007 15:19:59 -0400
+
+icu (3.6-5) unstable; urgency=low
+
+  * Add additional Build-Depends for 64-bit platforms.  Thanks Robert
+    Millan.  (Closes: #398778)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Mon, 17 Sep 2007 10:42:32 -0400
+
+icu (3.6-4) unstable; urgency=low
+
+  * Accepted patch from Robert Millan (with very slight, mostly cosmetic
+    modifications) to build 32-bit libraries on 64-bit architectures.
+    Many thanks to Robert Millan for supplying this patch!  (Closes:
+    #398778)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 15 Sep 2007 21:42:33 -0400
+
+icu (3.8~d01-1) experimental; urgency=low
+
+  * New upstream release
+  * Configure with weak reference to thread library.  (Closes: #389260)
+  * The development package no longer has the library soname in its name.
+    It is now just libicu-dev.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 04 Aug 2007 11:04:49 -0400
+
+icu (3.6-3) unstable; urgency=low
+
+  * Include patch from Samuel Thibault to allow icu to build on gnu hurd.
+    (Closes: #414446)
+
+ -- Jay Berkenbilt <qjb@debian.org>  Tue, 10 Jul 2007 17:31:56 -0400
+
+icu (3.6-2) unstable; urgency=low
+
+  * Include patch to fix error in IndicClassTables to fix
+    worstCaseExpansion for Sinhala.  Thanks to Harshula for forwarding
+    this.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Mon, 27 Nov 2006 21:19:09 -0500
+
+icu (3.6-1) unstable; urgency=low
+
+  * New upstream release
+  * Provide libicu34-dev since ICU 3.6 provides backward compatible
+    interfaces in addition to new ones.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Tue, 19 Sep 2006 12:10:41 -0400
+
+icu (3.6~d02-1) experimental; urgency=low
+
+  * New upstream release.
+  * Remove special optimization hack to work around now-fixed m68k build
+    problems.  (Closes: #360743)
+  * Update standards version.  No changes required.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Tue, 15 Aug 2006 16:34:34 -0400
+
+icu (3.4.1a-1) unstable; urgency=low
+
+  * Upstream re-released 3.4.1 without changing the version number because
+    the header file with 3.4.1 still said it was 3.4.  Unfortunately, the
+    debian 3.4.1 package had already been uploaded.  This "3.4.1a" release
+    now matches upstream's 3.4.1.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Wed, 29 Mar 2006 22:19:08 -0500
+
+icu (3.4.1-1) unstable; urgency=low
+
+  * New upstream release
+
+ -- Jay Berkenbilt <qjb@debian.org>  Fri,  3 Mar 2006 23:07:52 -0500
+
+icu (3.4-4) unstable; urgency=low
+
+  * Build with g++ 4.0 with -fno-strict-aliasing to work around g++ 4.0
+    bugs that impact ICU.  Future versions should work properly with the
+    latest g++ without any special flags.  (Closes: #342970)
+  * Enable static libraries.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sun, 22 Jan 2006 11:36:59 -0500
+
+icu (3.4-3) unstable; urgency=low
+
+  * Explicitly build with g++ 3.4.  The current ICU fails its test suite
+    with 4.0 but not with 3.4.  Future versions should work properly with
+    4.0.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 19 Nov 2005 11:29:31 -0500
+
+icu (3.4-2) unstable; urgency=low
+
+  * Remove some extraneous build steps that may cause problems with
+    autobuilders.
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat, 13 Aug 2005 12:41:35 -0400
+
+icu (3.4-1) unstable; urgency=low
+
+  * New upstream release
+  * Completely new packaging
+
+ -- Jay Berkenbilt <qjb@debian.org>  Fri,  5 Aug 2005 21:57:15 -0400
+
+icu (2.1-3) unstable; urgency=low
+
+  * New maintainer as per discussion with Ivo.
+  * g++ 4.0 transition: libicu21c102 is now libicu21c2.
+  * Accepted changes from NMU below for now.  This change will be
+    reversed soon when icu is updated to the current upstream version.
+    The icu28 package will also be removed at that time, as per discussion
+    with the icu28 maintainer.  Closes: #301316
+  * Add shlibs files
+
+ -- Jay Berkenbilt <qjb@debian.org>  Sat,  9 Jul 2005 13:33:35 -0400
+
+icu (2.1-2.1) unstable; urgency=medium
+
+  * Rename icu-doc to icu21-doc. icu-doc is built by the icu28 package.
+
+ -- Matthias Klose <doko@debian.org>  Sat, 21 May 2005 22:44:31 +0200
+
+icu (2.1-2) unstable; urgency=low
+
+  * debian/control: Changed maintainer, added Daniel Glassey as Uploader.
+    (Reference:
+    http://lists.debian.org/debian-devel/2003/debian-devel-200308/msg01963.html)
+
+ -- Ivo Timmermans <ivo@debian.org>  Sun, 18 Jan 2004 23:52:03 +0100
+
+icu (2.6.1-1) experimental; urgency=low
+
+  * New upstream version.
+  * Ivo Timmermans:
+      * debian/rules Don't create arch-all packages in the binary-arch
+        target.  Closes: #184403
+
+ -- Ivo Timmermans <ivo@debian.org>  Thu,  6 Nov 2003 09:03:44 +0100
+
+icu (2.6-1) experimental; urgency=low
+
+  * New upstream version. Closes: #162975
+  * debian/control: New maintainers
+  * Daniel Glassey:
+      * debian/rules Don't use --enable-static as it bloats the packages
+      * debian/rules Change the optimisations to -O3 and -O to get it to build
+      * debian/rules get the latest config.{sub,guess} from /usr/share/misc
+        so add build-dep on autotools-dev
+      * debian/postinst gencnval is now in {prefix}/bin
+  * Ivo Timmermans:
+      * debian/control Tightened debhelper build dependency
+      * debian/control Update Standards-Version
+
+ -- Daniel Glassey <wdg@debian.org>  Wed,  3 Sep 2003 12:39:35 +0200
+
+icu (2.1-1.2) unstable; urgency=low
+
+  * NMU.
+  * Updated source/config.{sub,guess}.  Closes: #182697
+
+ -- Ivo Timmermans <ivo@debian.org>  Fri,  7 Mar 2003 20:58:23 +0100
+
+icu (2.1-1.1) unstable; urgency=low
+
+  * NMU.
+  * debian/control: Go through G++ ABI transition. Closes: #180124
+  * source/common/unicode/docmain.h: Fix \mainpage and \section tags,
+    so doxygen doesn't get confused any more. Closes: #178344
+  * debian/copyright: Added upstream URL. Closes: #165780
+
+ -- Ivo Timmermans <ivo@debian.org>  Fri, 14 Feb 2003 15:21:56 +0100
+
+icu (2.1-1) unstable; urgency=low
+
+  * ICU 2.1 release.
+  * Changed the icu package description. Closes: 142886
+  * Use -O1 for CXXFLAGS for OS/390. Closes: 143021
+
+ -- Yves Arrouye <yves@debian.org>  Mon, 15 Apr 2002 14:03:12 -0700
+
+icu (2.0.2-1) unstable; urgency=low
+
+  * Minor release of ICU with fixes for threading and strTo/FromWCS
+
+ -- Yves Arrouye <yves@debian.org>  Tue,  2 Apr 2002 09:06:00 -0800
+
+icu (2.0-2.1pre20020318-1) unstable; urgency=low
+
+  * Use the library number in the development package too.
+  * ICU changed to version 2.1.
+
+ -- Yves Arrouye <yves@debian.org>  Tue, 19 Mar 2002 18:38:37 -0800
+
+icu (2.0-2.1pre20020303-1) unstable; urgency=low
+
+  * Fixed a crash in uconv when no argument is passed to -f or -t.
+  * Other upstream changes.
+  * Fresh upload with an up to date orig tar file so that future
+    diffs won't be 6 megabytes long!
+
+ -- Yves Arrouye <yves@debian.org>  Sun,  3 Mar 2002 15:31:13 -0800
+
+icu (2.0-2.1pre-1) unstable; urgency=low
+
+  * Prerelease of 2.1 with a working upgraded uconv(1).
+
+ -- Yves Arrouye <yves@debian.org>  Fri,  1 Mar 2002 21:51:47 -0800
+
+icu (2.0-3) unstable; urgency=low
+
+  * Renamed doc-base to icu-doc.doc-base. Closes: 127487
+
+ -- Yves Arrouye <yves@debian.org>  Fri, 18 Jan 2002 22:33:53 -0800
+
+icu (2.0-2) unstable; urgency=low
+
+  * Fixed a bug in uprv_uca_cloneTempTable().  Closes: 128484
+  * Update Debian bugs status.  Closes: 104642
+  * Use the official 2.0 tarball as the original tar.
+  * Added manual pages for every tool.
+
+ -- Yves Arrouye <yves@debian.org>  Wed, 16 Jan 2002 20:45:42 -0800
+
+icu (2.0-1) unstable; urgency=low
+
+  * Update to ICU version 2.0.
+
+ -- Yves Arrouye <yves@debian.org>  Sat, 10 Nov 2001 21:58:19 -0800
+
+icu (1.8.1-2) unstable; urgency=low
+
+  * Updated copyright file. Closes: 112488
+  * Updated icu-locales description. Closes: 75499
+
+ -- Yves Arrouye <yves@debian.org>  Sat, 10 Nov 2001 21:24:58 -0800
+
+icu (1.8.1-1.1) unstable; urgency=low
+
+  * NMU to resolve build failures on ia64 and (hopefully) hppa.
+  * config.{sub|guess} update
+  * source/tools/ctestfw/ctest.c: Add static declaration to global
+    variables local to that module to avoid @gprel relocation errors.
+    Closes: 104642
+
+ -- Yves Arrouye <yves@debian.org>  Sat, 10 Nov 2001 21:24:44 -0800
+
+icu (1.8.1-1) unstable; urgency=low
+
+  * Update to ICU version 1.8.1.
+
+ -- root <yves@debian.org>  Mon, 21 May 2001 15:27:36 -0700
+
+icu (1.7-1) unstable; urgency=low
+
+  * Update to ICU version 1.7.
+
+ -- Yves Arrouye <yves@debian.org>  Tue, 21 Nov 2000 22:54:52 -0800
+
+icu (1.6.0.1-20001113-2) unstable; urgency=low
+
+  * New snapshot with better ISO-2022.
+
+ -- Yves Arrouye <yves@debian.org>  Mon, 13 Nov 2000 21:05:00 -0800
+
+icu (1.6.0.1-20001027-1) unstable; urgency=low
+
+  * Move architecture-dependent files into /usr/lib, instead of
+    /usr/share.
+  * Move convrtrs.txt into /etc/icu, make it a conffile, and generate
+    /usr/lib/icu/1.6.0.1/cnvalias.dat from it at postinst time.
+  * Manage a /usr/lib/icu/current symbolic link across installations
+    of the libicuXX packages. The symlink will always point to the
+    highest numbered version of ICU.
+
+ -- Yves Arrouye <yves@debian.org>  Fri, 27 Oct 2000 15:40:12 -0700
+
+icu (1.6.0.1-20001017-1) unstable; urgency=low
+
+  * Initial Release.
+
+ -- Yves Arrouye <yves@debian.org>  Tue, 24 Oct 2000 16:14:12 -0700
--- icu-3.8.orig/debian/no-watch
+++ icu-3.8/debian/no-watch
@@ -0,0 +1,6 @@
+# This doesn't always work because ICU upstream doesn't always have
+# the latest version in the latest directory.  See debian bug 375138
+# for details.
+
+version=3
+ftp://ftp.software.ibm.com/software/globalization/icu/(?:\d+\.[\d\.]+)/icu-([\d\.]+).tgz
--- icu-3.8.orig/debian/rules
+++ icu-3.8/debian/rules
@@ -0,0 +1,120 @@
+#!/usr/bin/make -f
+
+DEB_HOST_ARCH_CPU	?= $(shell dpkg-architecture -qDEB_HOST_ARCH_CPU)
+
+# To distinguish variables that are truly local to this file (rather
+# than for use by cdbs), we adopt the convention of starting local
+# variables' names with l_.
+
+l_PWD := $(shell pwd)
+l_STAMPS := debian/l_stamps
+l_RUN_CHECK := 1
+l_CFLAGS := -g -Wall
+ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
+	l_CFLAGS += -O0
+else
+	l_CFLAGS += -O2
+endif
+# common configure cruft
+l_CONFIGURE = CC="gcc" CXX="g++" CPPFLAGS="" LDFLAGS="" \
+	./configure \
+	--build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr \
+	--includedir="\$${prefix}/include" \
+	--mandir="\$${prefix}/share/man" --infodir="\$${prefix}/share/info" \
+	--sysconfdir=/etc --localstatedir=/var \
+	--disable-maintainer-mode --disable-dependency-tracking
+# specific to this package
+l_CONFIGURE += --disable-samples --enable-static --enable-weak-threads
+
+ifneq (, $(filter $(DEB_HOST_ARCH_CPU), amd64 ppc64 kfreebsd-amd64))
+build32 := build32
+endif
+
+# Variables used by cdbs
+
+VERSION := $(shell dpkg-parsechangelog | \
+             awk '/Version:/ {print $$2}' | cut -d- -f 1)
+
+DEB_TAR_SRCDIR = icu/source
+DEB_COMPRESS_EXCLUDE = html examples
+DEB_INSTALL_EXAMPLES_libicu-dev = \
+	build-tree/$(DEB_TAR_SRCDIR)/samples/*
+
+# Overridden for 32-bit packages on 64-bit platforms
+DEB_DH_INSTALL_SOURCEDIR=debian/tmp
+
+DEB_DBG_PACKAGE_libicu38 = libicu38-dbg
+
+# Include cdbs rules files.
+include /usr/share/cdbs/1/rules/tarball.mk
+include /usr/share/cdbs/1/rules/simple-patchsys.mk
+include /usr/share/cdbs/1/rules/debhelper.mk
+
+cleanbuilddir::
+	$(RM) -r $(l_STAMPS)
+
+# As 0.4.21, cdbs creates but doesn't remove debian/compat.  It
+# creates it conditionally, so this doesn't have a trivial fix.
+clean::
+	$(RM) debian/compat *.cdbs-config_list
+	$(RM) -rf debian/tmp32
+	$(RM) debian/stamp-configure debian/stamp-configure32
+
+post-patches::
+	chmod a+x $(DEB_SRCDIR)/configure
+ifneq (, $(build32))
+	cp -a $(DEB_SRCDIR) $(DEB_SRCDIR)-build32
+endif
+
+configure/libicu38 configure/libicu-dev:: debian/stamp-configure
+debian/stamp-configure:
+	cd $(DEB_SRCDIR) && \
+		CFLAGS="$(l_CFLAGS)" CXXFLAGS="$(l_CFLAGS)" \
+		$(l_CONFIGURE)
+	touch debian/stamp-configure
+
+configure/lib32icu38 configure/lib32icu-dev:: debian/stamp-configure32
+debian/stamp-configure32:
+	cd $(DEB_SRCDIR)-build32 && \
+		CFLAGS="$(l_CFLAGS) -m32" CXXFLAGS="$(l_CFLAGS) -m32" \
+		$(l_CONFIGURE) --libdir=/usr/lib32
+	touch debian/stamp-configure32
+
+build/libicu38 build/libicu-dev::
+	$(MAKE) -C $(DEB_SRCDIR)
+
+build/lib32icu38 build/lib32icu-dev::
+	$(MAKE) -C $(DEB_SRCDIR)-build32
+
+install/libicu38 install/libicu-dev::
+	$(MAKE) -C $(DEB_SRCDIR) install DESTDIR=$(CURDIR)/debian/tmp
+
+binary-install/lib32icu38 binary-install/lib32icu-dev:: DEB_DH_INSTALL_SOURCEDIR=debian/tmp32
+install/lib32icu38 install/lib32icu-dev::
+	$(MAKE) -C $(DEB_SRCDIR)-build32 install DESTDIR=$(CURDIR)/debian/tmp32
+
+install/icu-doc:: install/libicu38 install/libicu-dev
+	$(MAKE) -C $(DEB_SRCDIR) install-doc DESTDIR=$(CURDIR)/debian/tmp
+
+ifeq ($(DEB_HOST_ARCH),amd64)
+# On amd64 only, it appears that we need to put these in a different
+# location.
+binary-install/lib32icu38 binary-install/lib32icu-dev::
+	mkdir -p debian/$(cdbs_curpkg)/emul/ia32-linux/usr
+	mv debian/$(cdbs_curpkg)/usr/lib32 debian/$(cdbs_curpkg)/emul/ia32-linux/usr/lib
+endif
+
+# As per upstream, icuswap is deprecated and should not be
+# distributed.
+binary-post-install/libicu-dev::
+	rm debian/$(cdbs_curpkg)/usr/sbin/icuswap
+
+# Install lintian override files
+binary-post-install/%::
+	if [ -f debian/$*.lintian ]; then \
+	    mkdir -p debian/$*/usr/share/lintian/overrides && \
+	    cp -p debian/$*.lintian debian/$*/usr/share/lintian/overrides/$*; \
+	fi
+
+binary-predeb/%::
+	perl debian/fix_substvars.pl debian/$*.substvars 'lib(32)?icu.*'
--- icu-3.8.orig/debian/fix_substvars.pl
+++ icu-3.8/debian/fix_substvars.pl
@@ -0,0 +1,52 @@
+#
+# Remove any whose names that match the given pattern from the
+# shlibs:Depends entry in the given substvars.
+#
+
+BEGIN { $^W = 1; }
+use strict;
+my $whoami = ($0 =~ m,([^/\\]*)$,) ? $1 : $0;
+
+die "usage: $whoami substvars-file pattern" unless @ARGV == 2;
+my ($file, $pattern) = @ARGV;
+if (! -f $file)
+{
+    exit 0;
+}
+
+open(F, "<$file") or die "$whoami: can't open $file: $!\n";
+my @in = (<F>);
+close(F);
+my @out = ();
+
+for (@in)
+{
+    if (m/(shlibs:Depends=)(.*)/)
+    {
+	my $prefix = $1;
+	my $contents = $2;
+	my @items = split(',\s*', $contents);
+	my @new = ();
+	foreach my $i (@items)
+	{
+	    $i =~ m/^(\S+)/ or die;
+	    my $pkg = $1;
+	    if ($pkg !~ m/^${pattern}$/)
+	    {
+		push(@new, $i);
+	    }
+	}
+	push(@out, $prefix . join(', ', @new) . "\n");
+    }
+    else
+    {
+	push(@out, $_);
+    }
+}
+
+open(F, ">$file") or die "$whoami: can't open $file.new: $!\n";
+foreach (@out)
+{
+    print F $_;
+}
+close(F);