diff -Nru bzip3-1.2.2/debian/changelog bzip3-1.3.1/debian/changelog --- bzip3-1.2.2/debian/changelog 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/changelog 2023-06-12 05:06:28.000000000 +0000 @@ -1,3 +1,18 @@ +bzip3 (1.3.1-1) unstable; urgency=medium + + * New upstream released. + * d/patches: Remove d/patches + * d/control: + * Bump Standards-Version to 4.6.2. + + -- Nobuhiro Iwamatsu Mon, 12 Jun 2023 14:06:28 +0900 + +bzip3 (1.2.3-1) experimental; urgency=medium + + * New upstream released. + + -- Nobuhiro Iwamatsu Thu, 30 Mar 2023 14:34:15 +0900 + bzip3 (1.2.2-2) unstable; urgency=medium * Add d/patches/CVE-2023-29415.patch. Fix CVE-2023-29415. (Closes: #1034177) diff -Nru bzip3-1.2.2/debian/control bzip3-1.3.1/debian/control --- bzip3-1.2.2/debian/control 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/control 2023-06-12 05:06:28.000000000 +0000 @@ -5,7 +5,7 @@ Build-Depends: debhelper-compat (= 13), pkg-config, git-core -Standards-Version: 4.6.1 +Standards-Version: 4.6.2 Rules-Requires-Root: no Vcs-Browser: https://salsa.debian.org/debian/bzip3 Vcs-Git: https://salsa.debian.org/debian/bzip3.git diff -Nru bzip3-1.2.2/debian/patches/CVE-2023-29415.patch bzip3-1.3.1/debian/patches/CVE-2023-29415.patch --- bzip3-1.2.2/debian/patches/CVE-2023-29415.patch 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/patches/CVE-2023-29415.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -From 56c24ca1f8f25e648d42154369b6962600f76465 Mon Sep 17 00:00:00 2001 -From: Kamila Szewczyk <27734421+kspalaiologos@users.noreply.github.com> -Date: Mon, 27 Mar 2023 13:46:27 +0200 -Subject: [PATCH] xwrite: nop when len=0 / size=0 - ---- - src/main.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/main.c b/src/main.c -index cd438a4..bffd798 100644 ---- a/src/main.c -+++ b/src/main.c -@@ -76,6 +76,8 @@ static void help() { - } - - static void xwrite(const void * data, size_t size, size_t len, FILE * des) { -+ if (len == 0 || size == 0) -+ return; - if (fwrite(data, size, len, des) != len) { - fprintf(stderr, "Write error: %s\n", strerror(errno)); - exit(1); --- -2.40.1 - diff -Nru bzip3-1.2.2/debian/patches/CVE-2023-29416.patch bzip3-1.3.1/debian/patches/CVE-2023-29416.patch --- bzip3-1.2.2/debian/patches/CVE-2023-29416.patch 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/patches/CVE-2023-29416.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -From 31b8de1d321a99d99dbb18af3d8c5d7cbfacf708 Mon Sep 17 00:00:00 2001 -From: Kamila Szewczyk -Date: Sun, 26 Mar 2023 08:02:46 +0200 -Subject: [PATCH] tentative libsais safety patches - -Origin: ttps://github.com/kspalaiologos/bzip3/commit/bfa5bf82b53715dfedf048e5859a46cf248668ff -Bug: http://sourceware.org/bugzilla/show_bug.cgi?id=9697 -Bug-Debian: http://bugs.debian.org/1034177 ---- - include/common.h | 1 + - src/libbz3.c | 6 ++++-- - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/include/common.h b/include/common.h -index 589456b..cc927d0 100644 ---- a/include/common.h -+++ b/include/common.h -@@ -22,6 +22,7 @@ - - #define KiB(x) ((x)*1024) - #define MiB(x) ((x)*1024 * 1024) -+#define BWT_BOUND(x) ((x) + 128) - - #include - #include -diff --git a/src/libbz3.c b/src/libbz3.c -index 772f3cd..fe7252b 100644 ---- a/src/libbz3.c -+++ b/src/libbz3.c -@@ -509,8 +509,8 @@ BZIP3_API struct bz3_state * bz3_new(s32 block_size) { - bz3_state->cm_state = malloc(sizeof(state)); - - bz3_state->swap_buffer = malloc(bz3_bound(block_size)); -- bz3_state->sais_array = malloc((block_size + 2) * sizeof(s32)); -- memset(bz3_state->sais_array, 0, sizeof(s32) * (block_size + 2)); -+ bz3_state->sais_array = malloc(BWT_BOUND(block_size) * sizeof(s32)); -+ memset(bz3_state->sais_array, 0, sizeof(s32) * BWT_BOUND(block_size)); - - bz3_state->lzp_lut = calloc(1 << LZP_DICTIONARY, sizeof(s32)); - -@@ -689,6 +689,8 @@ BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_s - } - - // Undo BWT -+ memset(state->sais_array, 0, sizeof(s32) * BWT_BOUND(state->block_size)); -+ memset(b2, 0, size_src); - if (libsais_unbwt(b1, b2, state->sais_array, size_src, NULL, bwt_idx) < 0) { - state->last_error = BZ3_ERR_BWT; - return -1; --- -2.40.1 - diff -Nru bzip3-1.2.2/debian/patches/CVE-2023-29418.patch bzip3-1.3.1/debian/patches/CVE-2023-29418.patch --- bzip3-1.2.2/debian/patches/CVE-2023-29418.patch 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/patches/CVE-2023-29418.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,95 +0,0 @@ -From aae16d107f804f69000c09cd92027a140968cc9d Mon Sep 17 00:00:00 2001 -From: Kamila Szewczyk -Date: Sun, 19 Mar 2023 14:25:04 +0100 -Subject: [PATCH] safety checks for decoding/testing when the buffer size and - block size are inconsistent, tentative CRC32 version for BE systems (needs - further verification on SPARC?) - ---- - src/libbz3.c | 11 +++++++++-- - src/main.c | 19 +++++++++++++++++++ - 2 files changed, 28 insertions(+), 2 deletions(-) - -diff --git a/src/libbz3.c b/src/libbz3.c -index 3a4bc0f..313dfe6 100644 ---- a/src/libbz3.c -+++ b/src/libbz3.c -@@ -61,8 +61,15 @@ static const u32 crc32Table[256] = { - }; - - static u32 crc32sum(u32 crc, u8 * RESTRICT buf, size_t size) { -- while (size--) crc = crc32Table[(crc ^ *(buf++)) & 0xff] ^ (crc >> 8); -- return crc; -+ // Test endianness. The code needs to be different for LE and BE systems. -+ u32 test = 1; -+ if (*(u8 *) &test) { -+ while (size--) crc = crc32Table[(crc ^ *(buf++)) & 0xff] ^ (crc >> 8); -+ return crc; -+ } else { -+ while (size--) crc = crc32Table[((crc >> 24) ^ *(buf++)) & 0xff] ^ (crc << 8); -+ return crc; -+ } - } - - /* LZP code. These constants were manually tuned to give the best compression ratio while using relatively -diff --git a/src/main.c b/src/main.c -index 712c510..f993b13 100644 ---- a/src/main.c -+++ b/src/main.c -@@ -242,6 +242,10 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size - new_size = read_neutral_s32(byteswap_buf); - xread_noeof(&byteswap_buf, 1, 4, input_des); - old_size = read_neutral_s32(byteswap_buf); -+ if(old_size > block_size + 31) { -+ fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); -+ return 1; -+ } - xread_noeof(buffer, 1, new_size, input_des); - bytes_read += 8 + new_size; - if (bz3_decode_block(state, buffer, new_size, old_size) == -1) { -@@ -259,6 +263,10 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size - new_size = read_neutral_s32(byteswap_buf); - xread_noeof(&byteswap_buf, 1, 4, input_des); - old_size = read_neutral_s32(byteswap_buf); -+ if(old_size > block_size + 31) { -+ fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); -+ return 1; -+ } - xread_noeof(buffer, 1, new_size, input_des); - bytes_read += 8 + new_size; - if (bz3_decode_block(state, buffer, new_size, old_size) == -1) { -@@ -332,6 +340,10 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size - sizes[i] = read_neutral_s32(byteswap_buf); - xread_noeof(&byteswap_buf, 1, 4, input_des); - old_sizes[i] = read_neutral_s32(byteswap_buf); -+ if(old_sizes[i] > block_size + 31) { -+ fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); -+ return 1; -+ } - xread_noeof(buffers[i], 1, sizes[i], input_des); - bytes_read += 8 + sizes[i]; - } -@@ -356,6 +368,10 @@ static int process(FILE * input_des, FILE * output_des, int mode, int block_size - sizes[i] = read_neutral_s32(byteswap_buf); - xread_noeof(&byteswap_buf, 1, 4, input_des); - old_sizes[i] = read_neutral_s32(byteswap_buf); -+ if(old_sizes[i] > block_size + 31) { -+ fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); -+ return 1; -+ } - xread_noeof(buffers[i], 1, sizes[i], input_des); - bytes_read += 8 + sizes[i]; - } -@@ -691,6 +707,9 @@ int main(int argc, char * argv[]) { - output_des = mode != MODE_TEST ? open_output(output, force) : NULL; - input_des = open_input(input); - -+ if(output != f2) -+ free(output); -+ - int r = process(input_des, output_des, mode, block_size, workers, verbose, input); - - fclose(input_des); --- -2.40.1 - diff -Nru bzip3-1.2.2/debian/patches/CVE-2023-29419.patch bzip3-1.3.1/debian/patches/CVE-2023-29419.patch --- bzip3-1.2.2/debian/patches/CVE-2023-29419.patch 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/patches/CVE-2023-29419.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,66 +0,0 @@ -From 8ec8ce7d3d58bf42dabc47e4cc53aa27051bd602 Mon Sep 17 00:00:00 2001 -From: Kamila Szewczyk -Date: Wed, 22 Mar 2023 13:56:28 +0100 -Subject: [PATCH] mRLE: decoding bounds - ---- - src/libbz3.c | 17 +++++++++++++---- - 1 file changed, 13 insertions(+), 4 deletions(-) - -diff --git a/src/libbz3.c b/src/libbz3.c -index fdcafe6..836286e 100644 ---- a/src/libbz3.c -+++ b/src/libbz3.c -@@ -257,28 +257,33 @@ static s32 mrlec(u8 * in, s32 inlen, u8 * out) { - return op; - } - --static void mrled(u8 * RESTRICT in, u8 * RESTRICT out, s32 outlen) { -+static int mrled(u8 * RESTRICT in, u8 * RESTRICT out, s32 outlen, s32 maxin) { - s32 op = 0, ip = 0; - - s32 c, pc = -1; - s32 t[256] = { 0 }; - s32 run = 0; - -+ if(maxin < 32) -+ return 1; -+ - for (s32 i = 0; i < 32; ++i) { - c = in[ip++]; - for (s32 j = 0; j < 8; ++j) t[i * 8 + j] = (c >> j) & 1; - } - -- while (op < outlen) { -+ while (op < outlen && ip < maxin) { - c = in[ip++]; - if (t[c]) { -- for (run = 0; (pc = in[ip++]) == 255; run += 255) -+ for (run = 0; (pc = in[ip++]) == 255 && ip < maxin; run += 255) - ; - run += pc + 1; - for (; run > 0 && op < outlen; --run) out[op++] = c; - } else - out[op++] = c; - } -+ -+ return op != outlen; - } - - /* The entropy coder. Uses an arithmetic coder implementation outlined in Matt Mahoney's DCE. */ -@@ -694,7 +699,11 @@ BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_s - } - - if (model & 4) { -- mrled(b1, b2, orig_size); -+ int err = mrled(b1, b2, orig_size, size_src); -+ if(err) { -+ state->last_error = BZ3_ERR_CRC; -+ return -1; -+ } - size_src = orig_size; - swap(b1, b2); - } --- -2.40.1 - diff -Nru bzip3-1.2.2/debian/patches/CVE-2023-29420.patch bzip3-1.3.1/debian/patches/CVE-2023-29420.patch --- bzip3-1.2.2/debian/patches/CVE-2023-29420.patch 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/patches/CVE-2023-29420.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -From bb06deb85f1c249838eb938e0dab271d4194f8fa Mon Sep 17 00:00:00 2001 -From: Kamila Szewczyk -Date: Wed, 22 Mar 2023 13:58:25 +0100 -Subject: [PATCH] store mode: check for truncation - ---- - src/libbz3.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/libbz3.c b/src/libbz3.c -index 836286e..be896e2 100644 ---- a/src/libbz3.c -+++ b/src/libbz3.c -@@ -620,7 +620,7 @@ BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_s - } - - if (bwt_idx == -1) { -- if (data_size - 8 > 64) { -+ if (data_size - 8 > 64 || data_size < 8) { - state->last_error = BZ3_ERR_MALFORMED_HEADER; - return -1; - } --- -2.40.1 - diff -Nru bzip3-1.2.2/debian/patches/CVE-2023-29421.patch bzip3-1.3.1/debian/patches/CVE-2023-29421.patch --- bzip3-1.2.2/debian/patches/CVE-2023-29421.patch 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/patches/CVE-2023-29421.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,25 +0,0 @@ -From 33b1951f153c3c5dc8ed736b9110437e1a619b7d Mon Sep 17 00:00:00 2001 -From: Kamila Szewczyk -Date: Wed, 22 Mar 2023 14:37:32 +0100 -Subject: [PATCH] strict check for size_src overflow of badly bounded b1 - ---- - src/libbz3.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/libbz3.c b/src/libbz3.c -index be896e2..31e11bf 100644 ---- a/src/libbz3.c -+++ b/src/libbz3.c -@@ -710,7 +710,7 @@ BZIP3_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_s - - state->last_error = BZ3_OK; - -- if (size_src > bz3_bound(state->block_size) || size_src < 0) { -+ if (size_src > state->block_size || size_src < 0) { - state->last_error = BZ3_ERR_MALFORMED_HEADER; - return -1; - } --- -2.40.1 - diff -Nru bzip3-1.2.2/debian/patches/series bzip3-1.3.1/debian/patches/series --- bzip3-1.2.2/debian/patches/series 2023-05-06 14:59:33.000000000 +0000 +++ bzip3-1.3.1/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 @@ -1,6 +0,0 @@ -CVE-2023-29415.patch -CVE-2023-29416.patch -CVE-2023-29418.patch -CVE-2023-29419.patch -CVE-2023-29420.patch -CVE-2023-29421.patch diff -Nru bzip3-1.2.2/examples/compress-file.c bzip3-1.3.1/examples/compress-file.c --- bzip3-1.2.2/examples/compress-file.c 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/examples/compress-file.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ - -/* Compress a file SEQUENTIALLY (i.e. *not* in parallel) using bzip3 high level API with block size of 6 MB. */ -/* This is just a demonstration of bzip3 library usage, it does not contain all the necessary error checks and will not - * support cross-endian encoding/decoding. */ - -#include -#include -#include - -#define MB (1024 * 1024) - -int main(int argc, char ** argv) { - if (argc != 3) { - printf("Usage: %s \n"); - return 1; - } - - // Read the entire input file to memory: - FILE * fp = fopen(argv[1], "rb"); - fseek(fp, 0, SEEK_END); - size_t size = ftell(fp); - fseek(fp, 0, SEEK_SET); - uint8_t * buffer = malloc(size); - fread(buffer, 1, size, fp); - fclose(fp); - - // Compress the file: - size_t out_size = bz3_bound(size); - uint8_t * outbuf = malloc(out_size); - int bzerr = bz3_compress(6 * MB, buffer, outbuf, size, &out_size); - if (bzerr != BZ3_OK) { - printf("bz3_compress() failed with error code %d", bzerr); - return 1; - } - - FILE * outfp = fopen(argv[2], "wb"); - /* XXX: Doesn't preserve endianess. We should write the `size_t` value manually with known endianess. */ - fwrite(&size, 1, sizeof(size_t), outfp); - fwrite(outbuf, 1, out_size, outfp); - fclose(outfp); - - printf("OK, %d => %d\n", size, out_size); - return 0; -} diff -Nru bzip3-1.2.2/examples/decompress-file.c bzip3-1.3.1/examples/decompress-file.c --- bzip3-1.2.2/examples/decompress-file.c 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/examples/decompress-file.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ - -/* Decompress a file SEQUENTIALLY (i.e. *not* in parallel) using bzip3 high level API. */ -/* This is just a demonstration of bzip3 library usage, it does not contain all the necessary error checks and will not - * support cross-endian encoding/decoding. */ - -#include -#include -#include - -int main(int argc, char ** argv) { - if (argc != 3) { - printf("Usage: %s "); - return 1; - } - - // Read the entire input file to memory: - FILE * fp = fopen(argv[1], "rb"); - fseek(fp, 0, SEEK_END); - size_t size = ftell(fp); - fseek(fp, 0, SEEK_SET); - uint8_t * buffer = malloc(size); - fread(buffer, 1, size, fp); - fclose(fp); - - // Decompress the file: - size_t orig_size = *(size_t *)buffer; - uint8_t * outbuf = malloc(orig_size); - int bzerr = bz3_decompress(buffer + sizeof(size_t), outbuf, size - sizeof(size_t), &orig_size); - if (bzerr != BZ3_OK) { - printf("bz3_decompress() failed with error code %d", bzerr); - return 1; - } - - FILE * outfp = fopen(argv[2], "wb"); - fwrite(outbuf, 1, orig_size, outfp); - fclose(outfp); - - printf("OK, %d => %d", size, orig_size); - return 0; -} diff -Nru bzip3-1.2.2/examples/fuzz.c bzip3-1.3.1/examples/fuzz.c --- bzip3-1.2.2/examples/fuzz.c 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/examples/fuzz.c 2023-05-20 07:59:05.000000000 +0000 @@ -28,7 +28,7 @@ fread(buffer, 1, size, fp); fclose(fp); - if(size < 64) { + if (size < 64) { // Too small. return 0; } diff -Nru bzip3-1.2.2/.github/workflows/build.yml bzip3-1.3.1/.github/workflows/build.yml --- bzip3-1.2.2/.github/workflows/build.yml 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/.github/workflows/build.yml 2023-05-20 07:59:05.000000000 +0000 @@ -56,7 +56,7 @@ run: make roundtrip build-archs: - name: Build Matrix for non-x86 architectures + name: Build Matrix for non-x86 architectures (Debian Stretch) needs: [ dist ] strategy: fail-fast: false @@ -76,7 +76,39 @@ name: Run in the container with: arch: ${{ matrix.arch }} - distro: stretch + distro: bullseye + shell: /bin/sh + dockerRunArgs: | + --volume "${PWD}:/bzip3" + install: | + apt update -q -y + apt install -q -y clang gcc make + run: | + cd /bzip3 + ./configure CC=${{ matrix.compiler }} --${{ matrix.feature }} --disable-arch-native --disable-link-time-optimization + make && make roundtrip + build-archs-ubuntu: + name: Build Matrix for non-x86 architectures (Ubuntu Latest) + needs: [ dist ] + strategy: + fail-fast: false + matrix: + compiler: [ clang, gcc ] + feature: [ with-pthread, without-pthread ] + arch: [ riscv64 ] + runs-on: ubuntu-latest + steps: + - name: Download source package artifact + uses: actions/download-artifact@v3 + with: + name: bzip3-${{ github.sha }} + - name: Extract source package + run: tar --strip-components=1 -xf bzip3-${{ github.sha}}.tar.gz + - uses: uraimo/run-on-arch-action@v2 + name: Run in the container + with: + arch: ${{ matrix.arch }} + distro: ubuntu_latest shell: /bin/sh dockerRunArgs: | --volume "${PWD}:/bzip3" diff -Nru bzip3-1.2.2/.github/workflows/release.yml bzip3-1.3.1/.github/workflows/release.yml --- bzip3-1.2.2/.github/workflows/release.yml 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/.github/workflows/release.yml 2023-05-20 07:59:05.000000000 +0000 @@ -54,9 +54,9 @@ fail-fast: false matrix: target: - - [ "x86_64-linux", "--enable-static-exe", "" ] - - [ "x86_64", "CC=x86_64-w64-mingw32-gcc --host x86_64-w64-mingw32 --enable-static-exe", "gcc-mingw-w64-x86-64" ] - - [ "i686", "CC=i686-w64-mingw32-gcc --host i686-w64-mingw32 --enable-static-exe", "gcc-mingw-w64-i686" ] + - [ "x86_64-linux", "--enable-static-exe --disable-arch-native", "" ] + - [ "x86_64", "CC=x86_64-w64-mingw32-gcc --host x86_64-w64-mingw32 --enable-static-exe --disable-arch-native", "gcc-mingw-w64-x86-64" ] + - [ "i686", "CC=i686-w64-mingw32-gcc --host i686-w64-mingw32 --enable-static-exe --disable-arch-native", "gcc-mingw-w64-i686" ] steps: - name: Download source package artifact uses: actions/download-artifact@v3 diff -Nru bzip3-1.2.2/include/common.h bzip3-1.3.1/include/common.h --- bzip3-1.2.2/include/common.h 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/include/common.h 2023-05-20 07:59:05.000000000 +0000 @@ -1,7 +1,7 @@ /* * BZip3 - A spiritual successor to BZip2. - * Copyright (C) 2022 Kamila Szewczyk + * Copyright (C) 2022-2023 Kamila Szewczyk * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free @@ -22,6 +22,7 @@ #define KiB(x) ((x)*1024) #define MiB(x) ((x)*1024 * 1024) +#define BWT_BOUND(x) ((x) + 128) #include #include @@ -57,10 +58,10 @@ #if defined(__has_builtin) #if __has_builtin(__builtin_prefetch) - #define HAS_BUILTIN_PREFECTCH + #define HAS_BUILTIN_PREFETCH #endif #elif defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 4)) - #define HAS_BUILTIN_PREFECTCH + #define HAS_BUILTIN_PREFETCH #endif #if defined(__has_builtin) @@ -71,7 +72,7 @@ #define HAS_BUILTIN_BSWAP16 #endif -#if defined(HAS_BUILTIN_PREFECTCH) +#if defined(HAS_BUILTIN_PREFETCH) #define prefetch(address) __builtin_prefetch((const void *)(address), 0, 0) #define prefetchw(address) __builtin_prefetch((const void *)(address), 1, 0) #elif defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(i386) || defined(__i386__) || \ diff -Nru bzip3-1.2.2/include/libbz3.h bzip3-1.3.1/include/libbz3.h --- bzip3-1.2.2/include/libbz3.h 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/include/libbz3.h 2023-05-20 07:59:05.000000000 +0000 @@ -1,7 +1,7 @@ /* * BZip3 - A spiritual successor to BZip2. - * Copyright (C) 2022 Kamila Szewczyk + * Copyright (C) 2022-2023 Kamila Szewczyk * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free diff -Nru bzip3-1.2.2/include/libsais.h bzip3-1.3.1/include/libsais.h --- bzip3-1.2.2/include/libsais.h 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/include/libsais.h 2023-05-20 07:59:05.000000000 +0000 @@ -22,7 +22,7 @@ The stability patches that fix undefined behaviour in unbwt routines: Copyright (c) 2022 Kamila Szewczyk - + Licensed under the same license as the original software. --*/ diff -Nru bzip3-1.2.2/Makefile.am bzip3-1.3.1/Makefile.am --- bzip3-1.2.2/Makefile.am 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/Makefile.am 2023-05-20 07:59:05.000000000 +0000 @@ -59,11 +59,11 @@ # Begin developer convenience targets .PHONY: format -format: $(bzip3_SOURCES) $(include_HEADERS) $(noinst_HEADERS) +format: $(bzip3_SOURCES) $(libbzip3_la_SOURCES) $(include_HEADERS) $(noinst_HEADERS) clang-format -i $^ examples/*.c .PHONY: cloc -cloc: $(bzip3_SOURCES) $(include_HEADERS) $(noinst_HEADERS) +cloc: $(bzip3_SOURCES) $(libbzip3_la_SOURCES) $(include_HEADERS) $(noinst_HEADERS) cloc $^ CLEANFILES += LICENSE2 @@ -76,3 +76,4 @@ ./$(BZIP3) -v -feb 6 $(srcdir)/LICENSE $(builddir)/LICENSE.bz3 ./$(BZIP3) -v -d $(builddir)/LICENSE.bz3 $(builddir)/LICENSE2 cmp $(srcdir)/LICENSE $(builddir)/LICENSE2 + -command -v md5sum >/dev/null 2>&1 && md5sum $(builddir)/LICENSE.bz3 diff -Nru bzip3-1.2.2/NEWS bzip3-1.3.1/NEWS --- bzip3-1.2.2/NEWS 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/NEWS 2023-05-20 07:59:05.000000000 +0000 @@ -66,3 +66,23 @@ * add the `-k` compatibility flag. * use `env` to detect the shell in bzip3 utility scripts * update libtool to v2.4.7 + +v1.2.3: +* fix an important regression introduced in pull request #55 regarding I/O in main.c +* slightly enlargen the SAIS buffer beyond the documented recommended size to avoid some memory errors in libsais +* properly handle a scenario where the individual block original size is larger than the block size declared in the file header +* further security fixes: strict check for size_src overflow of badly bounded b1, store mode: checking for truncation + mRLE: decoding bounds, bz3_bound in bz3_decompress + +v1.3.0: +* resolve alignment issues on SPARC/s390x. +* fix the security issues arising from libsais. +* due to these changes, updating is strongly encouraged. + +v1.3.1: +* Verbose mode in the tool now prints the extra statistics also during testing, not just encoding or decoding. +* Update the CI pipeline to Debian Bullseye. +* Fix a minor issue with side effects in RLE decoding. +* Explicitly disable `-march=native` for releases. +* Fix a bug in the tool reported by Adam Borowski regarding -t/-d input buffer checks. +* Fix an issue with the current input offset not being taken into account in bz3_compress. diff -Nru bzip3-1.2.2/README.md bzip3-1.3.1/README.md --- bzip3-1.2.2/README.md 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/README.md 2023-05-20 07:59:05.000000000 +0000 @@ -89,6 +89,8 @@ * lrzip + lzma - 64'774'202 bytes. * lrzip + bzip2 - 75'685'065 bytes. +For further benchmarks against Turbo-Range-Coder and BSC, check [powturbo's benchmark](https://github.com/powturbo/Turbo-Range-Coder) of bzip3, bzip2, bsc and others. + ## Disclaimers **I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED.** @@ -103,6 +105,18 @@ **Bzip3's performance is _heavily_ dependent on the compiler. x64 Linux clang13 builds usually can go as high as 17MiB/s compression and 23MiB/s decompression _per thread_. Windows and 32-bit builds might be considerably slower.** +Bzip3 has been tested on the following architectures: +- x86 +- x86_64 +- armv6 +- armv7 +- aarch64 +- ppc64le +- mips +- mips64 +- sparc +- s390x + ## Corpus benchmarks ![visualisation of the benchmarks](etc/benchmark.png) diff -Nru bzip3-1.2.2/src/libbz3.c bzip3-1.3.1/src/libbz3.c --- bzip3-1.2.2/src/libbz3.c 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/src/libbz3.c 2023-05-20 07:59:05.000000000 +0000 @@ -1,7 +1,7 @@ /* * BZip3 - A spiritual successor to BZip2. - * Copyright (C) 2022 Kamila Szewczyk + * Copyright (C) 2022-2023 Kamila Szewczyk * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free @@ -61,7 +61,7 @@ }; static u32 crc32sum(u32 crc, u8 * RESTRICT buf, size_t size) { - while (size--) crc = crc32Table[(crc ^ *(buf++)) & 0xff] ^ (crc >> 8); + while (size--) crc = crc32Table[((crc >> 24) ^ *(buf++)) & 0xff] ^ (crc << 8); return crc; } @@ -80,6 +80,13 @@ #define MATCH 0xf2 +static u32 lzp_upcast(const u8 * ptr) { + // val = *(u32 *)ptr; - written this way to avoid UB + u32 val; + memcpy(&val, ptr, sizeof(val)); + return val; +} + static s32 lzp_encode_block(const u8 * RESTRICT in, const u8 * in_end, u8 * RESTRICT out, u8 * out_end, s32 * RESTRICT lut) { const u8 * ins = in; @@ -101,11 +108,11 @@ const u8 * RESTRICT ref = ins + val; if (memcmp(in + LZP_MIN_MATCH - 4, ref + LZP_MIN_MATCH - 4, sizeof(u32)) == 0 && memcmp(in, ref, sizeof(u32)) == 0) { - if (heur > in && *(u32 *)heur != *(u32 *)(ref + (heur - in))) goto not_found; + if (heur > in && lzp_upcast(heur) != lzp_upcast(ref + (heur - in))) goto not_found; s32 len = 4; for (; in + len < in_end - LZP_MIN_MATCH - 32; len += sizeof(u32)) { - if (*(u32 *)(in + len) != *(u32 *)(ref + len)) break; + if (lzp_upcast(in + len) != lzp_upcast(ref + len)) break; } if (len < LZP_MIN_MATCH) { @@ -179,7 +186,7 @@ } const u8 * ref = outs + val; - u8 * oe = out + len; + const u8 * oe = out + len; if (oe > out_end) oe = out_end; while (out < oe) *out++ = *ref++; @@ -257,28 +264,33 @@ return op; } -static void mrled(u8 * RESTRICT in, u8 * RESTRICT out, s32 outlen) { +static int mrled(u8 * RESTRICT in, u8 * RESTRICT out, s32 outlen, s32 maxin) { s32 op = 0, ip = 0; s32 c, pc = -1; s32 t[256] = { 0 }; s32 run = 0; + if(maxin < 32) + return 1; + for (s32 i = 0; i < 32; ++i) { c = in[ip++]; for (s32 j = 0; j < 8; ++j) t[i * 8 + j] = (c >> j) & 1; } - while (op < outlen) { + while (op < outlen && ip < maxin) { c = in[ip++]; if (t[c]) { - for (run = 0; (pc = in[ip++]) == 255; run += 255) + for (run = 0; ip < maxin && (pc = in[ip++]) == 255; run += 255) ; run += pc + 1; for (; run > 0 && op < outlen; --run) out[op++] = c; } else out[op++] = c; } + + return op != outlen; } /* The entropy coder. Uses an arithmetic coder implementation outlined in Matt Mahoney's DCE. */ @@ -497,8 +509,8 @@ bz3_state->cm_state = malloc(sizeof(state)); bz3_state->swap_buffer = malloc(bz3_bound(block_size)); - bz3_state->sais_array = malloc((block_size + 2) * sizeof(s32)); - memset(bz3_state->sais_array, 0, sizeof(s32) * (block_size + 2)); + bz3_state->sais_array = malloc(BWT_BOUND(block_size) * sizeof(s32)); + memset(bz3_state->sais_array, 0, sizeof(s32) * BWT_BOUND(block_size)); bz3_state->lzp_lut = calloc(1 << LZP_DICTIONARY, sizeof(s32)); @@ -615,7 +627,7 @@ } if (bwt_idx == -1) { - if (data_size - 8 > 64) { + if (data_size - 8 > 64 || data_size < 8) { state->last_error = BZ3_ERR_MALFORMED_HEADER; return -1; } @@ -677,6 +689,8 @@ } // Undo BWT + memset(state->sais_array, 0, sizeof(s32) * BWT_BOUND(state->block_size)); + memset(b2, 0, size_src); if (libsais_unbwt(b1, b2, state->sais_array, size_src, NULL, bwt_idx) < 0) { state->last_error = BZ3_ERR_BWT; return -1; @@ -694,14 +708,18 @@ } if (model & 4) { - mrled(b1, b2, orig_size); + int err = mrled(b1, b2, orig_size, size_src); + if(err) { + state->last_error = BZ3_ERR_CRC; + return -1; + } size_src = orig_size; swap(b1, b2); } state->last_error = BZ3_OK; - if (size_src > bz3_bound(state->block_size) || size_src < 0) { + if (size_src > state->block_size || size_src < 0) { state->last_error = BZ3_ERR_MALFORMED_HEADER; return -1; } @@ -814,10 +832,11 @@ *out_size += 13; // Compress and write the blocks. + size_t in_offset = 0; for (u32 i = 0; i < n_blocks; i++) { s32 size = block_size; if (i == n_blocks - 1) size = in_size % block_size; - memcpy(compression_buf, in, size); + memcpy(compression_buf, in + in_offset, size); s32 out_size_block = bz3_encode_block(state, compression_buf, size); if (bz3_last_error(state) != BZ3_OK) { s8 last_error = state->last_error; @@ -829,6 +848,7 @@ write_neutral_s32(out + *out_size, out_size_block); write_neutral_s32(out + *out_size + 4, size); *out_size += out_size_block + 8; + in_offset += size; } bz3_free(state); @@ -849,7 +869,7 @@ struct bz3_state * state = bz3_new(block_size); if (!state) return BZ3_ERR_INIT; - u8 * compression_buf = malloc(block_size); + u8 * compression_buf = malloc(bz3_bound(block_size)); if (!compression_buf) { bz3_free(state); return BZ3_ERR_INIT; diff -Nru bzip3-1.2.2/src/main.c bzip3-1.3.1/src/main.c --- bzip3-1.2.2/src/main.c 2023-01-10 14:43:49.000000000 +0000 +++ bzip3-1.3.1/src/main.c 2023-05-20 07:59:05.000000000 +0000 @@ -1,7 +1,7 @@ /* * BZip3 - A spiritual successor to BZip2. - * Copyright (C) 2022 Kamila Szewczyk + * Copyright (C) 2022-2023 Kamila Szewczyk * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free @@ -47,7 +47,7 @@ static void version() { fprintf(stdout, "bzip3 " VERSION "\n" - "Copyright (C) by Kamila Szewczyk, 2022.\n" + "Copyright (C) by Kamila Szewczyk, 2022-2023.\n" "License: GNU Lesser GPL version 3 \n"); } @@ -76,6 +76,8 @@ } static void xwrite(const void * data, size_t size, size_t len, FILE * des) { + if (len == 0 || size == 0) + return; if (fwrite(data, size, len, des) != len) { fprintf(stderr, "Write error: %s\n", strerror(errno)); exit(1); @@ -141,7 +143,8 @@ } } -static int process(FILE * input_des, FILE * output_des, int mode, int block_size, int workers, int verbose, char * file_name) { +static int process(FILE * input_des, FILE * output_des, int mode, int block_size, int workers, int verbose, + char * file_name) { uint64_t bytes_read = 0, bytes_written = 0; if ((mode == MODE_ENCODE && isatty(fileno(output_des))) || @@ -150,6 +153,9 @@ return 1; } + // Reset errno after the isatty() call. + errno = 0; + u8 byteswap_buf[4]; switch (mode) { @@ -201,7 +207,7 @@ return 1; } - u8 * buffer = malloc(block_size + block_size / 50 + 32); + u8 * buffer = malloc(bz3_bound(block_size)); if (!buffer) { fprintf(stderr, "Failed to allocate memory.\n"); @@ -214,6 +220,8 @@ read_count = xread(buffer, 1, block_size, input_des); bytes_read += read_count; + if (read_count == 0) break; + s32 new_size = bz3_encode_block(state, buffer, read_count); if (new_size == -1) { fprintf(stderr, "Failed to encode a block: %s\n", bz3_strerror(state)); @@ -236,6 +244,10 @@ new_size = read_neutral_s32(byteswap_buf); xread_noeof(&byteswap_buf, 1, 4, input_des); old_size = read_neutral_s32(byteswap_buf); + if (old_size > bz3_bound(block_size) || new_size > bz3_bound(block_size)) { + fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); + return 1; + } xread_noeof(buffer, 1, new_size, input_des); bytes_read += 8 + new_size; if (bz3_decode_block(state, buffer, new_size, old_size) == -1) { @@ -253,8 +265,13 @@ new_size = read_neutral_s32(byteswap_buf); xread_noeof(&byteswap_buf, 1, 4, input_des); old_size = read_neutral_s32(byteswap_buf); + if (old_size > bz3_bound(block_size) || new_size > bz3_bound(block_size)) { + fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); + return 1; + } xread_noeof(buffer, 1, new_size, input_des); bytes_read += 8 + new_size; + bytes_written += old_size; if (bz3_decode_block(state, buffer, new_size, old_size) == -1) { fprintf(stderr, "Failed to decode a block: %s\n", bz3_strerror(state)); return 1; @@ -326,6 +343,10 @@ sizes[i] = read_neutral_s32(byteswap_buf); xread_noeof(&byteswap_buf, 1, 4, input_des); old_sizes[i] = read_neutral_s32(byteswap_buf); + if (old_sizes[i] > bz3_bound(block_size) || sizes[i] > bz3_bound(block_size)) { + fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); + return 1; + } xread_noeof(buffers[i], 1, sizes[i], input_des); bytes_read += 8 + sizes[i]; } @@ -350,8 +371,13 @@ sizes[i] = read_neutral_s32(byteswap_buf); xread_noeof(&byteswap_buf, 1, 4, input_des); old_sizes[i] = read_neutral_s32(byteswap_buf); + if (old_sizes[i] > bz3_bound(block_size) || sizes[i] > bz3_bound(block_size)) { + fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); + return 1; + } xread_noeof(buffers[i], 1, sizes[i], input_des); bytes_read += 8 + sizes[i]; + bytes_written += old_sizes[i]; } bz3_decode_blocks(states, buffers, sizes, old_sizes, i); for (s32 j = 0; j < i; j++) { @@ -370,14 +396,17 @@ } #endif - if(verbose) { - if(file_name) fprintf(stderr, " %s:", file_name); - if(mode == MODE_ENCODE) - fprintf(stderr, "\t%"PRIu64" -> %"PRIu64" bytes, %.2f%%, %.2f bpb\n", bytes_read, bytes_written, (double)bytes_written * 100.0 / bytes_read, (double)bytes_written * 8.0 / bytes_read); - else if(mode == MODE_DECODE) - fprintf(stderr, "\t%"PRIu64" -> %"PRIu64" bytes, %.2f%%, %.2f bpb\n", bytes_read, bytes_written, (double)bytes_read * 100.0 / bytes_written, (double)bytes_read * 8.0 / bytes_written); + if (verbose) { + if (file_name) fprintf(stderr, " %s:", file_name); + if (mode == MODE_ENCODE) + fprintf(stderr, "\t%" PRIu64 " -> %" PRIu64 " bytes, %.2f%%, %.2f bpb\n", bytes_read, bytes_written, + (double)bytes_written * 100.0 / bytes_read, (double)bytes_written * 8.0 / bytes_read); + else if (mode == MODE_DECODE) + fprintf(stderr, "\t%" PRIu64 " -> %" PRIu64 " bytes, %.2f%%, %.2f bpb\n", bytes_read, bytes_written, + (double)bytes_read * 100.0 / bytes_written, (double)bytes_read * 8.0 / bytes_written); else - fprintf(stderr, "OK, %"PRIu64" bytes read.\n", bytes_read); + fprintf(stderr, "\tOK, %" PRIu64 " -> %" PRIu64 " bytes, %.2f%%, %.2f bpb\n", bytes_read, bytes_written, + (double)bytes_read * 100.0 / bytes_written, (double)bytes_read * 8.0 / bytes_written); } return 0; @@ -489,7 +518,8 @@ case '?': fprintf(stderr, "Try 'bzip3 --help' for more information.\n"); return 1; - case 'e': case 'z': + case 'e': + case 'z': mode = MODE_ENCODE; break; case 'd': @@ -685,6 +715,8 @@ output_des = mode != MODE_TEST ? open_output(output, force) : NULL; input_des = open_input(input); + if (output != f2) free(output); + int r = process(input_des, output_des, mode, block_size, workers, verbose, input); fclose(input_des);