diff -Nru zopfli-0~git130414/CONTRIBUTORS zopfli-1.0.0/CONTRIBUTORS --- zopfli-0~git130414/CONTRIBUTORS 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/CONTRIBUTORS 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,6 @@ +Mark Adler +Jyrki Alakuijala +Daniel Reed +Huzaifa Sidhpurwala +Péter Szabó +Lode Vandevenne diff -Nru zopfli-0~git130414/README zopfli-1.0.0/README --- zopfli-0~git130414/README 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/README 2013-04-25 16:09:00.000000000 +0000 @@ -1,23 +1,29 @@ Zopfli Compression Algorithm is a compression library programmed in C to perform very good, but slow, deflate or zlib compression. -zopfli.c is separate from the library and contains an example program to create -very well compressed gzip files. +The basic function to compress data is ZopfliCompress in zopfli.h. Use the +ZopfliOptions object to set parameters that affect the speed and compression. +Use the ZopfliInitOptions function to place the default values in the +ZopfliOptions first. -The basic functions to compress data are Deflate in deflate.h, ZlibCompress in -zlib_container.h and GzipCompress in gzip_container.h. Use the Options -object to set parameters that affect the speed and compression. Use the -InitOptions function to place the default values in the Options first. +ZopfliCompress supports deflate, gzip and zlib output format with a parameter. +To support only one individual format, you can instead use ZopfliDeflate in +deflate.h, ZopfliZlibCompress in zlib_container.h or ZopfliGzipCompress in +gzip_container.h. -Deflate creates a valid deflate stream in memory, see: +ZopfliDeflate creates a valid deflate stream in memory, see: http://www.ietf.org/rfc/rfc1951.txt -ZlibCompress creates a valid zlib stream in memory, see: +ZopfliZlibCompress creates a valid zlib stream in memory, see: http://www.ietf.org/rfc/rfc1950.txt -GzipCompress creates a valid gzip stream in memory, see: +ZopfliGzipCompress creates a valid gzip stream in memory, see: http://www.ietf.org/rfc/rfc1952.txt This library can only compress, not decompress. Existing zlib or deflate libraries can decompress the data. +zopfli_bin.c is separate from the library and contains an example program to +create very well compressed gzip files. Currently the makefile builds this +program with the library statically linked in. + Zopfli Compression Algorithm was created by Lode Vandevenne and Jyrki Alakuijala, based on an algorithm by Jyrki Alakuijala. diff -Nru zopfli-0~git130414/blocksplitter.c zopfli-1.0.0/blocksplitter.c --- zopfli-0~git130414/blocksplitter.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/blocksplitter.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,338 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "blocksplitter.h" - -#include -#include -#include - -#include "deflate.h" -#include "lz77.h" -#include "squeeze.h" -#include "tree.h" - -/* -The "f" for the FindMinimum function below. -i: the current parameter of f(i) -context: for your implementation -*/ -typedef double FindMinimumFun(size_t i, void* context); - -/* -Finds minimum of function f(i) where is is of type size_t, f(i) is of type -double, i is in range start-end (excluding end). -*/ -static size_t FindMinimum(FindMinimumFun f, void* context, - size_t start, size_t end) { - if (end - start < 1024) { - double best = LARGE_FLOAT; - size_t result = start; - size_t i; - for (i = start; i < end; i++) { - double v = f(i, context); - if (v < best) { - best = v; - result = i; - } - } - return result; - } else { - /* Try to find minimum faster by recursively checking multiple points. */ -#define NUM 9 /* Good value: 9. */ - size_t i; - size_t p[NUM]; - double vp[NUM]; - size_t besti; - double best; - double lastbest = LARGE_FLOAT; - size_t pos = start; - - for (;;) { - if (end - start <= NUM) break; - - for (i = 0; i < NUM; i++) { - p[i] = start + (i + 1) * ((end - start) / (NUM + 1)); - vp[i] = f(p[i], context); - } - besti = 0; - best = vp[0]; - for (i = 1; i < NUM; i++) { - if (vp[i] < best) { - best = vp[i]; - besti = i; - } - } - if (best > lastbest) break; - - start = besti == 0 ? start : p[besti - 1]; - end = besti == NUM - 1 ? end : p[besti + 1]; - - pos = p[besti]; - lastbest = best; - } - return pos; -#undef NUM - } -} - -/* -Returns estimated cost of a block in bits. It includes the size to encode the -tree and the size to encode all literal, length and distance symbols and their -extra bits. - -litlens: lz77 lit/lengths -dists: ll77 distances -lstart: start of block -lend: end of block (not inclusive) -*/ -double EstimateCost(const unsigned short* litlens, const unsigned short* dists, - size_t lstart, size_t lend) { - return CalculateBlockSize(litlens, dists, lstart, lend, 2); -} - -typedef struct SplitCostContext { - const unsigned short* litlens; - const unsigned short* dists; - size_t llsize; - size_t start; - size_t end; -} SplitCostContext; - - -/* -Gets the cost which is the sum of the cost of the left and the right section -of the data. -type: FindMinimumFun -*/ -static double SplitCost(size_t i, void* context) { - SplitCostContext* c = (SplitCostContext*)context; - return EstimateCost(c->litlens, c->dists, c->start, i) + - EstimateCost(c->litlens, c->dists, i, c->end); -} - -static void AddSorted(size_t value, size_t** out, size_t* outsize) { - size_t i; - APPEND_DATA(value, out, outsize); - if (*outsize > 0) { - for (i = 0; i < *outsize - 1; i++) { - if ((*out)[i] > value) { - size_t j; - for (j = *outsize - 1; j > i; j--) { - (*out)[j] = (*out)[j - 1]; - } - (*out)[i] = value; - break; - } - } - } -} - -/* -Prints the block split points as decimal and hex values in the terminal. -*/ -static void PrintBlockSplitPoints(const unsigned short* litlens, - const unsigned short* dists, - size_t llsize, const size_t* lz77splitpoints, - size_t nlz77points) { - size_t* splitpoints = 0; - size_t npoints = 0; - size_t i; - /* The input is given as lz77 indices, but we want to see the uncompressed - index values. */ - size_t pos = 0; - if (nlz77points > 0) { - for (i = 0; i < llsize; i++) { - size_t length = dists[i] == 0 ? 1 : litlens[i]; - if (lz77splitpoints[npoints] == i) { - APPEND_DATA(pos, &splitpoints, &npoints); - if (npoints == nlz77points) break; - } - pos += length; - } - } - assert(npoints == nlz77points); - - fprintf(stderr, "block split points: "); - for (i = 0; i < npoints; i++) { - fprintf(stderr, "%d ", (int)splitpoints[i]); - } - fprintf(stderr, "(hex:"); - for (i = 0; i < npoints; i++) { - fprintf(stderr, " %x", (int)splitpoints[i]); - } - fprintf(stderr, ")\n"); - - free(splitpoints); -} - -/* -Finds next block to try to split, the largest of the available ones. -The largest is chosen to make sure that if only a limited amount of blocks is -requested, their sizes are spread evenly. -llsize: the size of the LL77 data, which is the size of the done array here. -done: array indicating which blocks starting at that position are no longer - splittable (splitting them increases rather than decreases cost). -splitpoints: the splitpoints found so far. -npoints: the amount of splitpoints found so far. -lstart: output variable, giving start of block. -lend: output variable, giving end of block. -returns 1 if a block was found, 0 if no block found (all are done). -*/ -static int FindLargestSplittableBlock( - size_t llsize, const unsigned char* done, - const size_t* splitpoints, size_t npoints, - size_t* lstart, size_t* lend) { - size_t longest = 0; - int found = 0; - size_t i; - for (i = 0; i <= npoints; i++) { - size_t start = i == 0 ? 0 : splitpoints[i - 1]; - size_t end = i == npoints ? llsize - 1 : splitpoints[i]; - if (!done[start] && end - start > longest) { - *lstart = start; - *lend = end; - found = 1; - longest = end - start; - } - } - return found; -} - -void BlockSplitLZ77(const Options* options, - const unsigned short* litlens, const unsigned short* dists, - size_t llsize, size_t maxblocks, - size_t** splitpoints, size_t* npoints) { - size_t lstart, lend; - size_t i; - size_t llpos = 0; - size_t numblocks = 1; - unsigned char* done; - double splitcost, origcost; - - if (llsize < 10) return; /* This code fails on tiny files. */ - - done = (unsigned char*)malloc(llsize); - if (!done) exit(-1); /* Allocation failed. */ - for (i = 0; i < llsize; i++) done[i] = 0; - - lstart = 0; - lend = llsize; - for (;;) { - SplitCostContext c; - - if (maxblocks > 0 && numblocks >= maxblocks) { - break; - } - - c.litlens = litlens; - c.dists = dists; - c.llsize = llsize; - c.start = lstart; - c.end = lend; - assert(lstart < lend); - llpos = FindMinimum(SplitCost, &c, lstart + 1, lend); - - assert(llpos > lstart); - assert(llpos < lend); - - splitcost = EstimateCost(litlens, dists, lstart, llpos) + - EstimateCost(litlens, dists, llpos, lend); - origcost = EstimateCost(litlens, dists, lstart, lend); - - if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) { - done[lstart] = 1; - } else { - AddSorted(llpos, splitpoints, npoints); - numblocks++; - } - - if (!FindLargestSplittableBlock( - llsize, done, *splitpoints, *npoints, &lstart, &lend)) { - break; /* No further split will probably reduce compression. */ - } - - if (lend - lstart < 10) { - break; - } - } - - if (options->verbose) { - PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints); - } - - free(done); -} - -void BlockSplit(const Options* options, - const unsigned char* in, size_t instart, size_t inend, - size_t maxblocks, size_t** splitpoints, size_t* npoints) { - size_t pos = 0; - size_t i; - BlockState s; - size_t* lz77splitpoints = 0; - size_t nlz77points = 0; - LZ77Store store; - - InitLZ77Store(&store); - - s.options = options; - s.blockstart = instart; - s.blockend = inend; -#ifdef USE_LONGEST_MATCH_CACHE - s.lmc = 0; -#endif - - *npoints = 0; - *splitpoints = 0; - - /* Unintuitively, Using a simple LZ77 method here instead of LZ77Optimal - results in better blocks. */ - LZ77Greedy(&s, in, instart, inend, &store); - - BlockSplitLZ77(options, store.litlens, store.dists, store.size, maxblocks, - &lz77splitpoints, &nlz77points); - - /* Convert LZ77 positions to positions in the uncompressed input. */ - pos = instart; - if (nlz77points > 0) { - for (i = 0; i < store.size; i++) { - size_t length = store.dists[i] == 0 ? 1 : store.litlens[i]; - if (lz77splitpoints[*npoints] == i) { - APPEND_DATA(pos, splitpoints, npoints); - if (*npoints == nlz77points) break; - } - pos += length; - } - } - assert(*npoints == nlz77points); - - free(lz77splitpoints); - CleanLZ77Store(&store); -} - -void BlockSplitSimple(const unsigned char* in, size_t instart, size_t inend, - size_t blocksize, size_t** splitpoints, size_t* npoints) { - size_t i = instart; - while (i < inend) { - APPEND_DATA(i, splitpoints, npoints); - i += blocksize; - } - (void)in; -} diff -Nru zopfli-0~git130414/blocksplitter.h zopfli-1.0.0/blocksplitter.h --- zopfli-0~git130414/blocksplitter.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/blocksplitter.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,74 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -Functions to choose good boundaries for block splitting. Deflate allows encoding -the data in multiple blocks, with a separate Huffman tree for each block. The -Huffman tree itself requires some bytes to encode, so by choosing certain -blocks, you can either hurt, or enhance compression. These functions choose good -ones that enhance it. -*/ - -#ifndef ZOPFLI_BLOCKSPLITTER_H_ -#define ZOPFLI_BLOCKSPLITTER_H_ - -#include - -#include "util.h" - - -/* -Does blocksplitting on LZ77 data. -The output splitpoints are indices in the LZ77 data. -litlens: lz77 lit/lengths -dists: lz77 distances -llsize: size of litlens and dists -maxblocks: set a limit to the amount of blocks. Set to 0 to mean no limit. -*/ -void BlockSplitLZ77(const Options* options, - const unsigned short* litlens, const unsigned short* dists, - size_t llsize, size_t maxblocks, - size_t** splitpoints, size_t* npoints); - -/* -Does blocksplitting on uncompressed data. -The output splitpoints are indices in the uncompressed bytes. - -options: general program options. -in: uncompressed input data -instart: where to start splitting -inend: where to end splitting (not inclusive) -maxblocks: maximum amount of blocks to split into, or 0 for no limit -splitpoints: dynamic array to put the resulting split point coordinates into. - The coordinates are indices in the input array. -npoints: pointer to amount of splitpoints, for the dynamic array. The amount of - blocks is the amount of splitpoitns + 1. -*/ -void BlockSplit(const Options* options, - const unsigned char* in, size_t instart, size_t inend, - size_t maxblocks, size_t** splitpoints, size_t* npoints); - -/* -Divides the input into equal blocks, does not even take LZ77 lengths into -account. -*/ -void BlockSplitSimple(const unsigned char* in, size_t instart, size_t inend, - size_t blocksize, size_t** splitpoints, size_t* npoints); - -#endif /* ZOPFLI_BLOCKSPLITTER_H_ */ diff -Nru zopfli-0~git130414/cache.c zopfli-1.0.0/cache.c --- zopfli-0~git130414/cache.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/cache.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "cache.h" - -#include -#include -#include - -#ifdef USE_LONGEST_MATCH_CACHE - -void InitLongestMatchCache(size_t blocksize, LongestMatchCache* lmc) { - size_t i; - lmc->length = (unsigned short*)malloc(sizeof(unsigned short) * blocksize); - lmc->dist = (unsigned short*)malloc(sizeof(unsigned short) * blocksize); - /* Rather large amount of memory. */ - lmc->sublen = (unsigned char*)malloc(NUM_CACHED_LENGTHS * 3 * blocksize); - - /* length > 0 and dist 0 is invalid combination, which indicates on purpose - that this cache value is not filled in yet. */ - for (i = 0; i < blocksize; i++) lmc->length[i] = 1; - for (i = 0; i < blocksize; i++) lmc->dist[i] = 0; - for (i = 0; i < NUM_CACHED_LENGTHS * blocksize * 3; i++) lmc->sublen[i] = 0; -} - -void CleanLongestMatchCache(LongestMatchCache* lmc) { - free(lmc->length); - free(lmc->dist); - free(lmc->sublen); -} - -void SublenToCache(const unsigned short* sublen, size_t pos, size_t length, - LongestMatchCache* lmc) { - size_t i; - size_t j = 0; - unsigned bestlength = 0; - unsigned char* cache; - -#if NUM_CACHED_LENGTHS == 0 - return; -#endif - - cache = &lmc->sublen[NUM_CACHED_LENGTHS * pos * 3]; - if (length < 3) return; - for (i = 3; i <= length; i++) { - if (i == length || sublen[i] != sublen[i + 1]) { - cache[j * 3] = i - 3; - cache[j * 3 + 1] = sublen[i] % 256; - cache[j * 3 + 2] = (sublen[i] >> 8) % 256; - bestlength = i; - j++; - if (j >= NUM_CACHED_LENGTHS) break; - } - } - if (j < NUM_CACHED_LENGTHS) { - assert(bestlength == length); - cache[(NUM_CACHED_LENGTHS - 1) * 3] = bestlength - 3; - } else { - assert(bestlength <= length); - } - assert(bestlength == MaxCachedSublen(lmc, pos, length)); -} - -void CacheToSublen(const LongestMatchCache* lmc, size_t pos, size_t length, - unsigned short* sublen) { - size_t i, j; - unsigned maxlength = MaxCachedSublen(lmc, pos, length); - unsigned prevlength = 0; - unsigned char* cache; -#if NUM_CACHED_LENGTHS == 0 - return; -#endif - if (length < 3) return; - cache = &lmc->sublen[NUM_CACHED_LENGTHS * pos * 3]; - for (j = 0; j < NUM_CACHED_LENGTHS; j++) { - unsigned length = cache[j * 3] + 3; - unsigned dist = cache[j * 3 + 1] + 256 * cache[j * 3 + 2]; - for (i = prevlength; i <= length; i++) { - sublen[i] = dist; - } - if (length == maxlength) break; - prevlength = length + 1; - } -} - -/* -Returns the length up to which could be stored in the cache. -*/ -unsigned MaxCachedSublen(const LongestMatchCache* lmc, - size_t pos, size_t length) { - unsigned char* cache; -#if NUM_CACHED_LENGTHS == 0 - return 0; -#endif - cache = &lmc->sublen[NUM_CACHED_LENGTHS * pos * 3]; - (void)length; - if (cache[1] == 0 && cache[2] == 0) return 0; /* No sublen cached. */ - return cache[(NUM_CACHED_LENGTHS - 1) * 3] + 3; -} - -#endif /* USE_LONGEST_MATCH_CACHE */ diff -Nru zopfli-0~git130414/cache.h zopfli-1.0.0/cache.h --- zopfli-0~git130414/cache.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/cache.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,63 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -The cache that speeds up FindLongestMatch of lz77.c. -*/ - -#ifndef ZOPFLI_CACHE_H_ -#define ZOPFLI_CACHE_H_ - -#include "util.h" - -#ifdef USE_LONGEST_MATCH_CACHE - -/* -Cache used by FindLongestMatch to remember previously found length/dist values. -This is needed because the squeeze runs will ask these values multiple times for -the same position. -Uses large amounts of memory, since it has to remember the distance belonging -to every possible shorter-than-the-best length (the so called "sublen" array). -*/ -typedef struct LongestMatchCache { - unsigned short* length; - unsigned short* dist; - unsigned char* sublen; /* For each length, the distance */ -} LongestMatchCache; - -/* Initializes the LongestMatchCache. */ -void InitLongestMatchCache(size_t blocksize, LongestMatchCache* lmc); - -/* Frees up the memory of the LongestMatchCache. */ -void CleanLongestMatchCache(LongestMatchCache* lmc); - -/* Stores sublen array in the cache. */ -void SublenToCache(const unsigned short* sublen, size_t pos, size_t length, - LongestMatchCache* lmc); - -/* Extracts sublen array from the cache. */ -void CacheToSublen(const LongestMatchCache* lmc, size_t pos, size_t length, - unsigned short* sublen); -/* Returns the length up to which could be stored in the cache. */ -unsigned MaxCachedSublen(const LongestMatchCache* lmc, - size_t pos, size_t length); - -#endif /* USE_LONGEST_MATCH_CACHE */ - -#endif /* ZOPFLI_CACHE_H_ */ diff -Nru zopfli-0~git130414/debian/changelog zopfli-1.0.0/debian/changelog --- zopfli-0~git130414/debian/changelog 2013-03-03 13:47:28.000000000 +0000 +++ zopfli-1.0.0/debian/changelog 2013-12-22 00:23:47.000000000 +0000 @@ -1,3 +1,10 @@ +zopfli (1.0.0-1) unstable; urgency=low + + * New upstream release. + * Update Standards-Version to 3.9.5 . + + -- Laszlo Boszormenyi (GCS) Sun, 22 Dec 2013 01:20:05 +0100 + zopfli (0~git130414-1) unstable; urgency=low * Initial release (closes: #702170). diff -Nru zopfli-0~git130414/debian/control zopfli-1.0.0/debian/control --- zopfli-0~git130414/debian/control 2013-03-03 13:22:41.000000000 +0000 +++ zopfli-1.0.0/debian/control 2013-12-22 00:21:45.000000000 +0000 @@ -1,9 +1,9 @@ Source: zopfli Section: utils Priority: optional -Maintainer: Laszlo Boszormenyi (GCS) +Maintainer: Laszlo Boszormenyi (GCS) Build-Depends: debhelper (>= 9), dpkg-dev (>= 1.16.1~) -Standards-Version: 3.9.4 +Standards-Version: 3.9.5 Homepage: http://code.google.com/p/zopfli/ Package: zopfli diff -Nru zopfli-0~git130414/debian/copyright zopfli-1.0.0/debian/copyright --- zopfli-0~git130414/debian/copyright 2013-03-03 13:14:58.000000000 +0000 +++ zopfli-1.0.0/debian/copyright 2013-12-22 00:22:17.000000000 +0000 @@ -27,7 +27,7 @@ the file `/usr/share/common-licenses/Apache-2.0'. Files: debian/* -Copyright: Copyright (C) 2010-2012 Laszlo Boszormenyi (GCS) +Copyright: Copyright (C) 2010- Laszlo Boszormenyi (GCS) License: GPL-3+ This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru zopfli-0~git130414/debian/patches/hardened_build.patch zopfli-1.0.0/debian/patches/hardened_build.patch --- zopfli-0~git130414/debian/patches/hardened_build.patch 2013-03-03 13:50:40.000000000 +0000 +++ zopfli-1.0.0/debian/patches/hardened_build.patch 2013-12-22 00:26:28.000000000 +0000 @@ -6,12 +6,13 @@ --- ---- zopfli-0~git130414.orig/makefile -+++ zopfli-0~git130414/makefile +--- zopfli/makefile ++++ zopfli/makefile @@ -1,5 +1,5 @@ make: -- gcc *.c -O2 -W -Wall -Wextra -ansi -pedantic -lm -o zopfli -+ gcc *.c -O2 -W -Wall -Wextra -ansi -pedantic -lm $(CPPFLAGS) $(LDFLAGS) -o zopfli +- gcc src/zopfli/*.c -O2 -W -Wall -Wextra -ansi -pedantic -lm -o zopfli ++ gcc src/zopfli/*.c -O2 -W -Wall -Wextra -ansi -pedantic -lm $(CPPFLAGS) $(LDFLAGS) -o zopfli debug: - gcc *.c -g3 -lm -o zopfli +- gcc src/zopfli/*.c -g3 -lm -o zopfli ++ gcc src/zopfli/*.c -g3 -lm $(CPPFLAGS) $(LDFLAGS) -o zopfli diff -Nru zopfli-0~git130414/debian/rules zopfli-1.0.0/debian/rules --- zopfli-0~git130414/debian/rules 2013-03-03 13:51:13.000000000 +0000 +++ zopfli-1.0.0/debian/rules 2013-12-22 00:20:37.000000000 +0000 @@ -4,10 +4,6 @@ # Uncomment this to turn on verbose mode. #export DH_VERBOSE=1 -export DEB_BUILD_MAINT_OPTIONS = hardening=+all -DPKG_EXPORT_BUILDFLAGS = 1 -include /usr/share/dpkg/buildflags.mk - override_dh_auto_configure: %: diff -Nru zopfli-0~git130414/deflate.c zopfli-1.0.0/deflate.c --- zopfli-0~git130414/deflate.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/deflate.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,675 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "deflate.h" - -#include -#include -#include - -#include "blocksplitter.h" -#include "lz77.h" -#include "squeeze.h" -#include "tree.h" - -static void AddBit(int bit, - unsigned char* bp, unsigned char** out, size_t* outsize) { - if (((*bp) & 7) == 0) APPEND_DATA(0, out, outsize); - (*out)[*outsize - 1] |= bit << ((*bp) & 7); - (*bp)++; -} - -static void AddBits(unsigned symbol, unsigned length, - unsigned char* bp, unsigned char** out, size_t* outsize) { - /* TODO(lode): make more efficient (add more bits at once). */ - unsigned i; - for (i = 0; i < length; i++) { - unsigned bit = (symbol >> i) & 1; - if (((*bp) & 7) == 0) APPEND_DATA(0, out, outsize); - (*out)[*outsize - 1] |= bit << ((*bp) & 7); - (*bp)++; - } -} - -/* -Adds bits, like AddBits, but the order is inverted. The deflate specification -uses both orders in one standard. -*/ -static void AddHuffmanBits(unsigned symbol, unsigned length, - unsigned char* bp, unsigned char** out, - size_t* outsize) { - /* TODO(lode): make more efficient (add more bits at once). */ - unsigned i; - for (i = 0; i < length; i++) { - unsigned bit = (symbol >> (length - i - 1)) & 1; - if (((*bp) & 7) == 0) APPEND_DATA(0, out, outsize); - (*out)[*outsize - 1] |= bit << ((*bp) & 7); - (*bp)++; - } -} - -/* -Ensures there are at least 2 distance codes to support buggy decoders. -Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1 -distance code (with length > 0), even though it's valid according to the -deflate spec to have 0 distance codes. On top of that, some mobile phones -require at least two distance codes. To support these decoders too (but -potentially at the cost of a few bytes), add dummy code lengths of 1. -References to this bug can be found in the changelog of -Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0. - -d_lengths: the 32 lengths of the distance codes. -*/ -static void PatchDistanceCodesForBuggyDecoders(unsigned* d_lengths) { - int num_dist_codes = 0; /* Amount of non-zero distance codes */ - int i; - for (i = 0; i < 30 /* Ignore the two unused codes from the spec */; i++) { - if (d_lengths[i]) num_dist_codes++; - if (num_dist_codes >= 2) return; /* Two or more codes is fine. */ - } - - if (num_dist_codes == 0) { - d_lengths[0] = d_lengths[1] = 1; - } else if (num_dist_codes == 1) { - d_lengths[d_lengths[0] ? 1 : 0] = 1; - } -} - -/* -Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE. -*/ -size_t CalculateTreeSize(const unsigned* ll_lengths, const unsigned* d_lengths, - size_t* ll_counts, size_t* d_counts) { - unsigned char* dummy = 0; - size_t dummysize = 0; - unsigned char bp = 0; - - (void)ll_counts; - (void)d_counts; - - AddDynamicTree(ll_lengths, d_lengths, &bp, &dummy, &dummysize); - free(dummy); - - return dummysize * 8 + (bp & 7); -} - -void AddDynamicTree(const unsigned* ll_lengths, const unsigned* d_lengths, - unsigned char* bp, unsigned char** out, size_t* outsize) { - unsigned* lld_lengths = 0; /* All litlen and dist lengthts with ending zeros - trimmed together in one array. */ - unsigned lld_total; /* Size of lld_lengths. */ - unsigned* rle = 0; /* Runlength encoded version of lengths of litlen and dist - trees. */ - unsigned* rle_bits = 0; /* Extra bits for rle values 16, 17 and 18. */ - size_t rle_size = 0; /* Size of rle array. */ - size_t rle_bits_size = 0; /* Should have same value as rle_size. */ - unsigned hlit = 29; /* 286 - 257 */ - unsigned hdist = 29; /* 32 - 1, but gzip does not like hdist > 29.*/ - unsigned hclen; - size_t i, j; - size_t clcounts[19]; - unsigned clcl[19]; /* Code length code lengths. */ - unsigned clsymbols[19]; - /* The order in which code length code lengths are encoded as per deflate. */ - unsigned order[19] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 - }; - - /* Trim zeros. */ - while (hlit > 0 && ll_lengths[257 + hlit - 1] == 0) hlit--; - while (hdist > 0 && d_lengths[1 + hdist - 1] == 0) hdist--; - - lld_total = hlit + 257 + hdist + 1; - lld_lengths = (unsigned*)malloc(sizeof(*lld_lengths) * lld_total); - if (!lld_lengths) exit(-1); /* Allocation failed. */ - - for (i = 0; i < lld_total; i++) { - lld_lengths[i] = i < 257 + hlit - ? ll_lengths[i] : d_lengths[i - 257 - hlit]; - assert(lld_lengths[i] < 16); - } - - for (i = 0; i < lld_total; i++) { - size_t count = 0; - for (j = i; j < lld_total && lld_lengths[i] == lld_lengths[j]; j++) { - count++; - } - if (count >= 4 || (count >= 3 && lld_lengths[i] == 0)) { - if (lld_lengths[i] == 0) { - if (count > 10) { - if (count > 138) count = 138; - APPEND_DATA(18, &rle, &rle_size); - APPEND_DATA(count - 11, &rle_bits, &rle_bits_size); - } else { - APPEND_DATA(17, &rle, &rle_size); - APPEND_DATA(count - 3, &rle_bits, &rle_bits_size); - } - } else { - unsigned repeat = count - 1; /* Since the first one is hardcoded. */ - APPEND_DATA(lld_lengths[i], &rle, &rle_size); - APPEND_DATA(0, &rle_bits, &rle_bits_size); - while (repeat >= 6) { - APPEND_DATA(16, &rle, &rle_size); - APPEND_DATA(6 - 3, &rle_bits, &rle_bits_size); - repeat -= 6; - } - if (repeat >= 3) { - APPEND_DATA(16, &rle, &rle_size); - APPEND_DATA(3 - 3, &rle_bits, &rle_bits_size); - repeat -= 3; - } - while (repeat != 0) { - APPEND_DATA(lld_lengths[i], &rle, &rle_size); - APPEND_DATA(0, &rle_bits, &rle_bits_size); - repeat--; - } - } - - i += count - 1; - } else { - APPEND_DATA(lld_lengths[i], &rle, &rle_size); - APPEND_DATA(0, &rle_bits, &rle_bits_size); - } - assert(rle[rle_size - 1] <= 18); - } - - for (i = 0; i < 19; i++) { - clcounts[i] = 0; - } - for (i = 0; i < rle_size; i++) { - clcounts[rle[i]]++; - } - - CalculateBitLengths(clcounts, 19, 7, clcl); - LengthsToSymbols(clcl, 19, 7, clsymbols); - - hclen = 15; - /* Trim zeros. */ - while (hclen > 0 && clcounts[order[hclen + 4 - 1]] == 0) hclen--; - - AddBits(hlit, 5, bp, out, outsize); - AddBits(hdist, 5, bp, out, outsize); - AddBits(hclen, 4, bp, out, outsize); - - for (i = 0; i < hclen + 4; i++) { - AddBits(clcl[order[i]], 3, bp, out, outsize); - } - - for (i = 0; i < rle_size; i++) { - unsigned symbol = clsymbols[rle[i]]; - AddHuffmanBits(symbol, clcl[rle[i]], bp, out, outsize); - /* Extra bits. */ - if (rle[i] == 16) AddBits(rle_bits[i], 2, bp, out, outsize); - else if (rle[i] == 17) AddBits(rle_bits[i], 3, bp, out, outsize); - else if (rle[i] == 18) AddBits(rle_bits[i], 7, bp, out, outsize); - } - - free(lld_lengths); - free(rle); - free(rle_bits); -} - -/* -Adds all lit/len and dist codes from the lists as huffman symbols. Does not add -end code 256. expected_data_size is the uncompressed block size, used for -assert, but you can set it to 0 to not do the assertion. -*/ -void AddLZ77Data(const unsigned short* litlens, const unsigned short* dists, - size_t lstart, size_t lend, - size_t expected_data_size, - const unsigned* ll_symbols, const unsigned* ll_lengths, - const unsigned* d_symbols, const unsigned* d_lengths, - unsigned char* bp, unsigned char** out, size_t* outsize) { - size_t testlength = 0; - size_t i; - - for (i = lstart; i < lend; i++) { - unsigned dist = dists[i]; - unsigned litlen = litlens[i]; - if (dist == 0) { - assert(litlen < 256); - assert(ll_lengths[litlen] > 0); - AddHuffmanBits(ll_symbols[litlen], ll_lengths[litlen], bp, out, outsize); - testlength++; - } else { - unsigned lls = GetLengthSymbol(litlen); - unsigned ds = GetDistSymbol(dist); - assert(litlen >= 3 && litlen <= 288); - assert(ll_lengths[lls] > 0); - assert(d_lengths[ds] > 0); - AddHuffmanBits(ll_symbols[lls], ll_lengths[lls], bp, out, outsize); - AddBits(GetLengthExtraBitsValue(litlen), GetLengthExtraBits(litlen), - bp, out, outsize); - AddHuffmanBits(d_symbols[ds], d_lengths[ds], bp, out, outsize); - AddBits(GetDistExtraBitsValue(dist), GetDistExtraBits(dist), - bp, out, outsize); - testlength += litlen; - } - } - assert(expected_data_size == 0 || testlength == expected_data_size); -} - -void GetFixedTree(unsigned* ll_lengths, unsigned* d_lengths) { - size_t i; - for (i = 0; i < 144; i++) ll_lengths[i] = 8; - for (i = 144; i < 256; i++) ll_lengths[i] = 9; - for (i = 256; i < 280; i++) ll_lengths[i] = 7; - for (i = 280; i < 288; i++) ll_lengths[i] = 8; - for (i = 0; i < 32; i++) d_lengths[i] = 5; -} - -/* -Calculates size of the part after the header and tree of an LZ77 block, in bits. -*/ -size_t CalculateBlockSymbolSize(const unsigned* ll_lengths, - const unsigned* d_lengths, - const unsigned short* litlens, - const unsigned short* dists, - size_t lstart, size_t lend) { - size_t result = 0; - size_t i; - for (i = lstart; i < lend; i++) { - if (dists[i] == 0) { - result += ll_lengths[litlens[i]]; - } else { - result += ll_lengths[GetLengthSymbol(litlens[i])]; - result += d_lengths[GetDistSymbol(dists[i])]; - result += GetLengthExtraBits(litlens[i]); - result += GetDistExtraBits(dists[i]); - } - } - result += ll_lengths[256]; /*end symbol*/ - return result; -} - -double CalculateBlockSize( - const unsigned short* litlens, const unsigned short* dists, - size_t lstart, size_t lend, int btype) { - size_t ll_counts[288]; - size_t d_counts[32]; - - unsigned ll_lengths[288]; - unsigned d_lengths[32]; - - double result = 3; /*bfinal and btype bits*/ - - assert(btype == 1 || btype == 2); /* This is not for uncompressed blocks. */ - - if(btype == 1) { - GetFixedTree(ll_lengths, d_lengths); - } else { - GetLZ77Counts(litlens, dists, lstart, lend, ll_counts, d_counts); - CalculateBitLengths(ll_counts, 288, 15, ll_lengths); - CalculateBitLengths(d_counts, 32, 15, d_lengths); - PatchDistanceCodesForBuggyDecoders(d_lengths); - result += CalculateTreeSize(ll_lengths, d_lengths, ll_counts, d_counts); - } - - result += CalculateBlockSymbolSize( - ll_lengths, d_lengths, litlens, dists, lstart, lend); - - return result; -} - -/* -Adds a deflate block with the given LZ77 data to the output. -options: global program options -btype: the block type, must be 1 or 2 -final: whether to set the "final" bit on this block, must be the last block -litlens: literal/length array of the LZ77 data, in the same format as in - LZ77Store. -dists: distance array of the LZ77 data, in the same format as in LZ77Store. -lstart: where to start in the LZ77 data -lend: where to end in the LZ77 data (not inclusive) -expected_data_size: the uncompressed block size, used for assert, but you can - set it to 0 to not do the assertion. -bp: output bit pointer -out: dynamic output array to append to -outsize: dynamic output array size -*/ -void AddLZ77Block(const Options* options, int btype, int final, - const unsigned short* litlens, const unsigned short* dists, - size_t lstart, size_t lend, - size_t expected_data_size, - unsigned char* bp, unsigned char** out, size_t* outsize) { - size_t ll_counts[288]; - size_t d_counts[32]; - unsigned ll_lengths[288]; - unsigned d_lengths[32]; - unsigned ll_symbols[288]; - unsigned d_symbols[32]; - size_t detect_block_size = *outsize; - size_t compressed_size; - size_t uncompressed_size = 0; - size_t i; - - AddBit(final, bp, out, outsize); - AddBit(btype & 1, bp, out, outsize); - AddBit((btype & 2) >> 1, bp, out, outsize); - - if (btype == 1) { - /* Fixed block. */ - GetFixedTree(ll_lengths, d_lengths); - } else { - /* Dynamic block. */ - unsigned detect_tree_size; - assert(btype == 2); - GetLZ77Counts(litlens, dists, lstart, lend, ll_counts, d_counts); - CalculateBitLengths(ll_counts, 288, 15, ll_lengths); - CalculateBitLengths(d_counts, 32, 15, d_lengths); - PatchDistanceCodesForBuggyDecoders(d_lengths); - detect_tree_size = *outsize; - AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize); - if (options->verbose) { - fprintf(stderr, "treesize: %d\n", (int)(*outsize - detect_tree_size)); - } - - /* Assert that for every present symbol, the code length is non-zero. */ - /* TODO(lode): remove this in release version. */ - for (i = 0; i < 288; i++) assert(ll_counts[i] == 0 || ll_lengths[i] > 0); - for (i = 0; i < 32; i++) assert(d_counts[i] == 0 || d_lengths[i] > 0); - } - - LengthsToSymbols(ll_lengths, 288, 15, ll_symbols); - LengthsToSymbols(d_lengths, 32, 15, d_symbols); - - detect_block_size = *outsize; - AddLZ77Data(litlens, dists, lstart, lend, expected_data_size, - ll_symbols, ll_lengths, d_symbols, d_lengths, - bp, out, outsize); - /* End symbol. */ - AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize); - - for (i = lstart; i < lend; i++) { - uncompressed_size += dists[i] == 0 ? 1 : litlens[i]; - } - compressed_size = *outsize - detect_block_size; - if (options->verbose) { - fprintf(stderr, "compressed block size: %d (%dk) (unc: %d)\n", - (int)compressed_size, (int)(compressed_size / 1024), - (int)(uncompressed_size)); - } -} - -void DeflateDynamicBlock(const Options* options, int final, - const unsigned char* in, size_t instart, size_t inend, - unsigned char* bp, - unsigned char** out, size_t* outsize) { - BlockState s; - size_t blocksize = inend - instart; - LZ77Store store; - int btype = 2; - - InitLZ77Store(&store); - - s.options = options; - s.blockstart = instart; - s.blockend = inend; -#ifdef USE_LONGEST_MATCH_CACHE - s.lmc = (LongestMatchCache*)malloc(sizeof(LongestMatchCache)); - InitLongestMatchCache(blocksize, s.lmc); -#endif - - LZ77Optimal(&s, in, instart, inend, &store); - - /* For small block, encoding with fixed tree can be smaller. For large block, - don't bother doing this expensive test, dynamic tree will be better.*/ - if (store.size < 1000) { - double dyncost, fixedcost; - LZ77Store fixedstore; - InitLZ77Store(&fixedstore); - LZ77OptimalFixed(&s, in, instart, inend, &fixedstore); - dyncost = CalculateBlockSize(store.litlens, store.dists, 0, store.size, 2); - fixedcost = CalculateBlockSize(fixedstore.litlens, fixedstore.dists, - 0, fixedstore.size, 1); - if (fixedcost < dyncost) { - btype = 1; - CleanLZ77Store(&store); - store = fixedstore; - } else { - CleanLZ77Store(&fixedstore); - } - } - - AddLZ77Block(s.options, btype, final, - store.litlens, store.dists, 0, store.size, - blocksize, bp, out, outsize); - -#ifdef USE_LONGEST_MATCH_CACHE - CleanLongestMatchCache(s.lmc); - free(s.lmc); -#endif - CleanLZ77Store(&store); -} - -void DeflateFixedBlock(const Options* options, int final, - const unsigned char* in, size_t instart, size_t inend, - unsigned char* bp, - unsigned char** out, size_t* outsize) { - BlockState s; - size_t blocksize = inend - instart; - LZ77Store store; - - InitLZ77Store(&store); - - s.options = options; - s.blockstart = instart; - s.blockend = inend; -#ifdef USE_LONGEST_MATCH_CACHE - s.lmc = (LongestMatchCache*)malloc(sizeof(LongestMatchCache)); - InitLongestMatchCache(blocksize, s.lmc); -#endif - - LZ77OptimalFixed(&s, in, instart, inend, &store); - - AddLZ77Block(s.options, 1, final, store.litlens, store.dists, 0, store.size, - blocksize, bp, out, outsize); - -#ifdef USE_LONGEST_MATCH_CACHE - CleanLongestMatchCache(s.lmc); - free(s.lmc); -#endif - CleanLZ77Store(&store); -} - -void DeflateNonCompressedBlock(const Options* options, int final, - const unsigned char* in, size_t instart, - size_t inend, - unsigned char* bp, - unsigned char** out, size_t* outsize) { - size_t i; - size_t blocksize = inend - instart; - unsigned short nlen = ~blocksize; - - (void)options; - assert(blocksize < 65536); /* Non compressed blocks are max this size. */ - - AddBit(final, bp, out, outsize); - /* BTYPE 00 */ - AddBit(0, bp, out, outsize); - AddBit(0, bp, out, outsize); - - /* Any bits of input up to the next byte boundary are ignored. */ - *bp = 0; - - APPEND_DATA(blocksize % 256, out, outsize); - APPEND_DATA((blocksize / 256) % 256, out, outsize); - APPEND_DATA(nlen % 256, out, outsize); - APPEND_DATA((nlen / 256) % 256, out, outsize); - - for (i = instart; i < inend; i++) { - APPEND_DATA(in[i], out, outsize); - } -} - -void DeflateBlock(const Options* options, - int btype, int final, - const unsigned char* in, size_t instart, size_t inend, - unsigned char* bp, unsigned char** out, size_t* outsize) { - if (btype == 0) { - DeflateNonCompressedBlock( - options, final, in, instart, inend, bp, out, outsize); - } else if (btype == 1) { - DeflateFixedBlock(options, final, in, instart, inend, bp, out, outsize); - } else { - assert (btype == 2); - DeflateDynamicBlock(options, final, in, instart, inend, bp, out, outsize); - } -} - -/* -Does squeeze strategy where first block splitting is done, then each block is -squeezed. -Parameters: see description of the Deflate function. -*/ -void DeflateSplittingFirst(const Options* options, int btype, int final, - const unsigned char* in, - size_t instart, size_t inend, - unsigned char* bp, - unsigned char** out, size_t* outsize) { - size_t i; - size_t* splitpoints = 0; - size_t npoints = 0; - if (btype == 0) { - BlockSplitSimple(in, instart, inend, 65535, &splitpoints, &npoints); - } else if (btype == 1) { - /* If all blocks are fixed tree, splitting into separate blocks only - increases the total size. Leave npoints at 0, this represents 1 block. */ - } else { - BlockSplit(options, in, instart, inend, - options->blocksplittingmax, &splitpoints, &npoints); - } - - for (i = 0; i <= npoints; i++) { - size_t start = i == 0 ? instart : splitpoints[i - 1]; - size_t end = i == npoints ? inend : splitpoints[i]; - DeflateBlock(options, btype, i == npoints && final, in, start, end, - bp, out, outsize); - } - - free(splitpoints); -} - -/* -Does squeeze strategy where first the best possible lz77 is done, and then based -on that data, block splitting is done. -Parameters: see description of the Deflate function. -*/ -void DeflateSplittingLast(const Options* options, int btype, int final, - const unsigned char* in, - size_t instart, size_t inend, - unsigned char* bp, - unsigned char** out, size_t* outsize) { - size_t i; - BlockState s; - LZ77Store store; - size_t* splitpoints = 0; - size_t npoints = 0; - - if (btype == 0) { - /* This function only supports LZ77 compression. DeflateSplittingFirst - supports the special case of noncompressed data. Punt it to that one. */ - DeflateSplittingFirst(options, btype, final, - in, instart, inend, - bp, out, outsize); - } - assert(btype == 1 || btype == 2); - - InitLZ77Store(&store); - - s.options = options; - s.blockstart = instart; - s.blockend = inend; -#ifdef USE_LONGEST_MATCH_CACHE - s.lmc = (LongestMatchCache*)malloc(sizeof(LongestMatchCache)); - InitLongestMatchCache(inend - instart, s.lmc); -#endif - - if (btype == 2) { - LZ77Optimal(&s, in, instart, inend, &store); - } else { - assert (btype == 1); - LZ77OptimalFixed(&s, in, instart, inend, &store); - } - - if (btype == 1) { - /* If all blocks are fixed tree, splitting into separate blocks only - increases the total size. Leave npoints at 0, this represents 1 block. */ - } else { - BlockSplitLZ77(options, store.litlens, store.dists, store.size, - options->blocksplittingmax, &splitpoints, &npoints); - } - - for (i = 0; i <= npoints; i++) { - size_t start = i == 0 ? 0 : splitpoints[i - 1]; - size_t end = i == npoints ? store.size : splitpoints[i]; - AddLZ77Block(options, btype, i == npoints && final, - store.litlens, store.dists, start, end, 0, - bp, out, outsize); - } - -#ifdef USE_LONGEST_MATCH_CACHE - CleanLongestMatchCache(s.lmc); - free(s.lmc); -#endif - - CleanLZ77Store(&store); -} - -/* -Deflate a part, to allow Deflate() to use multiple master blocks if needed. -It is possible to call this function multiple times in a row, shifting -instart and inend to next bytes of the data. If instart is larger than 0, then -previous bytes are used as the initial dictionary for LZ77. -This function will usually output multiple deflate blocks. If final is 1, then -the final bit will be set on the last block. -*/ -static void DeflatePart(const Options* options, int btype, int final, - const unsigned char* in, size_t instart, size_t inend, - unsigned char* bp, unsigned char** out, - size_t* outsize) { - if (options->blocksplitting) { - if (options->blocksplittinglast) { - DeflateSplittingLast(options, btype, final, in, instart, inend, - bp, out, outsize); - } else { - DeflateSplittingFirst(options, btype, final, in, instart, inend, - bp, out, outsize); - } - } else { - DeflateBlock(options, btype, final, in, instart, inend, bp, out, outsize); - } -} - -void Deflate(const Options* options, int btype, int final, - const unsigned char* in, size_t insize, - unsigned char* bp, unsigned char** out, size_t* outsize) { -#if MASTER_BLOCK_SIZE == 0 - DeflatePart(options, btype, final, in, 0, insize, bp, out, outsize); -#else - size_t i = 0; - while (i < insize) { - int masterfinal = (i + MASTER_BLOCK_SIZE >= insize); - int final2 = final && masterfinal; - size_t size = masterfinal ? insize - i : MASTER_BLOCK_SIZE; - DeflatePart(options, btype, final2, in, i, i + size, bp, out, outsize); - i += size; - } -#endif -} diff -Nru zopfli-0~git130414/deflate.h zopfli-1.0.0/deflate.h --- zopfli-0~git130414/deflate.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/deflate.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#ifndef ZOPFLI_DEFLATE_H_ -#define ZOPFLI_DEFLATE_H_ - -/* -Functions to compress compatible with the deflate specification. -*/ - -#include "util.h" - -/* -Compresses according to the deflate specification and append the compressed -result to the output. -This function will usually output multiple deflate blocks. If final is 1, then -the final bit will be set on the last block. - -options: global program options -btype: the deflate block type. Use 2 for best compression. - -0: non compressed blocks (00) - -1: blocks with fixed tree (01) - -2: blocks with dynamic tree (10) -final: whether this is the last section of the input, sets the final bit to the - last deflate block. -in: the input bytes -insize: number of input bytes -bp: bit pointer for the output array. This must initially be 0, and for - consecutive calls must be reused (it can have values from 0-7). This is - because deflate appends blocks as bit-based data, rather than on byte - boundaries. -out: pointer to the dynamic output array to which the result is appended. Must - be freed after use. -outsize: pointer to the dynamic output array size. -*/ -void Deflate(const Options* options, int btype, int final, - const unsigned char* in, size_t insize, - unsigned char* bp, unsigned char** out, size_t* outsize); - -/* -Outputs the tree to a dynamic block (btype 10) according to the deflate -specification. -*/ -void AddDynamicTree(const unsigned* ll_lengths, const unsigned* d_lengths, - unsigned char* bp, unsigned char** out, size_t* outsize); - -/* -Calculates block size in bits. -litlens: lz77 lit/lengths -dists: ll77 distances -lstart: start of block -lend: end of block (not inclusive) -*/ -double CalculateBlockSize( - const unsigned short* litlens, const unsigned short* dists, - size_t lstart, size_t lend, int btype); -#endif /* ZOPFLI_DEFLATE_H_ */ diff -Nru zopfli-0~git130414/gzip_container.c zopfli-1.0.0/gzip_container.c --- zopfli-0~git130414/gzip_container.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/gzip_container.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,115 +0,0 @@ -/* -Copyright 2013 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "gzip_container.h" - -#include - -#include "deflate.h" - -/* Table of CRCs of all 8-bit messages. */ -static unsigned long crc_table[256]; - -/* Flag: has the table been computed? Initially false. */ -static int crc_table_computed = 0; - -/* Makes the table for a fast CRC. */ -void MakeCRCTable() { - unsigned long c; - int n, k; - for (n = 0; n < 256; n++) { - c = (unsigned long) n; - for (k = 0; k < 8; k++) { - if (c & 1) { - c = 0xedb88320L ^ (c >> 1); - } else { - c = c >> 1; - } - } - crc_table[n] = c; - } - crc_table_computed = 1; -} - - -/* -Updates a running crc with the bytes buf[0..len-1] and returns -the updated crc. The crc should be initialized to zero. -*/ -unsigned long UpdateCRC(unsigned long crc, - const unsigned char *buf, size_t len) { - unsigned long c = crc ^ 0xffffffffL; - unsigned n; - - if (!crc_table_computed) - MakeCRCTable(); - for (n = 0; n < len; n++) { - c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8); - } - return c ^ 0xffffffffL; -} - -/* Returns the CRC of the bytes buf[0..len-1]. */ -unsigned long CRC(const unsigned char* buf, int len) { - return UpdateCRC(0L, buf, len); -} - -/* -Compresses the data according to the gzip specification. -*/ -void GzipCompress(const Options* options, - const unsigned char* in, size_t insize, - unsigned char** out, size_t* outsize) { - unsigned long crcvalue = CRC(in, insize); - unsigned char bp = 0; - - APPEND_DATA(31, out, outsize); /* ID1 */ - APPEND_DATA(139, out, outsize); /* ID2 */ - APPEND_DATA(8, out, outsize); /* CM */ - APPEND_DATA(0, out, outsize); /* FLG */ - /* MTIME */ - APPEND_DATA(0, out, outsize); - APPEND_DATA(0, out, outsize); - APPEND_DATA(0, out, outsize); - APPEND_DATA(0, out, outsize); - - APPEND_DATA(2, out, outsize); /* XFL, 2 indicates best compression. */ - APPEND_DATA(3, out, outsize); /* OS follows Unix conventions. */ - - Deflate(options, 2 /* Dynamic block */, 1, in, insize, &bp, out, outsize); - - /* CRC */ - APPEND_DATA(crcvalue % 256, out, outsize); - APPEND_DATA((crcvalue >> 8) % 256, out, outsize); - APPEND_DATA((crcvalue >> 16) % 256, out, outsize); - APPEND_DATA((crcvalue >> 24) % 256, out, outsize); - - /* ISIZE */ - APPEND_DATA(insize % 256, out, outsize); - APPEND_DATA((insize >> 8) % 256, out, outsize); - APPEND_DATA((insize >> 16) % 256, out, outsize); - APPEND_DATA((insize >> 24) % 256, out, outsize); - - if (options->verbose) { - fprintf(stderr, - "Original Size: %d, Compressed: %d, Compression: %f%% Removed\n", - (int)insize, (int)*outsize, - 100.0f * (float)(insize - *outsize) / (float)insize); - } -} diff -Nru zopfli-0~git130414/gzip_container.h zopfli-1.0.0/gzip_container.h --- zopfli-0~git130414/gzip_container.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/gzip_container.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -/* -Copyright 2013 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#ifndef ZOPFLI_GZIP_H_ -#define ZOPFLI_GZIP_H_ - -/* -Functions to compress according to the Gzip specification. -*/ - -#include "util.h" - -/* -Compresses according to the gzip specification and append the compressed -result to the output. - -options: global program options -out: pointer to the dynamic output array to which the result is appended. Must - be freed after use. -outsize: pointer to the dynamic output array size. -*/ -void GzipCompress(const Options* options, - const unsigned char* in, size_t insize, - unsigned char** out, size_t* outsize); - -#endif /* ZOPFLI_GZIP_H_ */ diff -Nru zopfli-0~git130414/hash.c zopfli-1.0.0/hash.c --- zopfli-0~git130414/hash.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/hash.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,132 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "hash.h" - -#include -#include -#include - -#define HASH_SHIFT 5 -#define HASH_MASK 32767 - -void InitHash(size_t window_size, Hash* h) { - size_t i; - - h->val = 0; - h->head = (int*)malloc(sizeof(*h->head) * 65536); - h->prev = (unsigned short*)malloc(sizeof(*h->prev) * window_size); - h->hashval = (int*)malloc(sizeof(*h->hashval) * window_size); - for (i = 0; i < 65536; i++) { - h->head[i] = -1; /* -1 indicates no head so far. */ - } - for (i = 0; i < window_size; i++) { - h->prev[i] = i; /* If prev[j] == j, then prev[j] is uninitialized. */ - h->hashval[i] = -1; - } - -#ifdef USE_HASH_SAME - h->same = (unsigned short*)malloc(sizeof(*h->same) * window_size); - for (i = 0; i < window_size; i++) { - h->same[i] = 0; - } -#endif - -#ifdef USE_HASH_SAME_HASH - h->val2 = 0; - h->head2 = (int*)malloc(sizeof(*h->head2) * 65536); - h->prev2 = (unsigned short*)malloc(sizeof(*h->prev2) * window_size); - h->hashval2 = (int*)malloc(sizeof(*h->hashval2) * window_size); - for (i = 0; i < 65536; i++) { - h->head2[i] = -1; - } - for (i = 0; i < window_size; i++) { - h->prev2[i] = i; - h->hashval2[i] = -1; - } -#endif -} - -void CleanHash(Hash* h) { - free(h->head); - free(h->prev); - free(h->hashval); - -#ifdef USE_HASH_SAME_HASH - free(h->head2); - free(h->prev2); - free(h->hashval2); -#endif - -#ifdef USE_HASH_SAME - free(h->same); -#endif -} - -/* -Update the sliding hash value with the given byte. All calls to this function -must be made on consecutive input characters. Since the hash value exists out -of multiple input bytes, a few warmups with this function are needed initially. -*/ -static void UpdateHashValue(Hash* h, unsigned char c) { - h->val = (((h->val) << HASH_SHIFT) ^ (c)) & HASH_MASK; -} - -void UpdateHash(const unsigned char* array, size_t pos, size_t end, Hash* h) { - unsigned short hpos = pos & WINDOW_MASK; -#ifdef USE_HASH_SAME - size_t amount = 0; -#endif - - UpdateHashValue(h, pos + MIN_MATCH <= end ? array[pos + MIN_MATCH - 1] : 0); - h->hashval[hpos] = h->val; - if (h->head[h->val] != -1 && h->hashval[h->head[h->val]] == h->val) { - h->prev[hpos] = h->head[h->val]; - } - else h->prev[hpos] = hpos; - h->head[h->val] = hpos; - -#ifdef USE_HASH_SAME - /* Update "same". */ - if (h->same[(pos - 1) & WINDOW_MASK] > 1) { - amount = h->same[(pos - 1) & WINDOW_MASK] - 1; - } - while (pos + amount + 1 < end && - array[pos] == array[pos + amount + 1] && amount < (unsigned short)(-1)) { - amount++; - } - h->same[hpos] = amount; -#endif - -#ifdef USE_HASH_SAME_HASH - h->val2 = ((h->same[hpos] - MIN_MATCH) & 255) ^ h->val; - h->hashval2[hpos] = h->val2; - if (h->head2[h->val2] != -1 && h->hashval2[h->head2[h->val2]] == h->val2) { - h->prev2[hpos] = h->head2[h->val2]; - } - else h->prev2[hpos] = hpos; - h->head2[h->val2] = hpos; -#endif -} - -void WarmupHash(const unsigned char* array, size_t pos, size_t end, Hash* h) { - (void)end; - UpdateHashValue(h, array[pos + 0]); - UpdateHashValue(h, array[pos + 1]); -} diff -Nru zopfli-0~git130414/hash.h zopfli-1.0.0/hash.h --- zopfli-0~git130414/hash.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/hash.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,68 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -The hash for FindLongestMatch of lz77.c. -*/ - -#ifndef ZOPFLI_HASH_H_ -#define ZOPFLI_HASH_H_ - -#include "util.h" - -typedef struct Hash { - int* head; /* Hash value to index of its most recent occurance. */ - unsigned short* prev; /* Index to index of prev. occurance of same hash. */ - int* hashval; /* Index to hash value at this index. */ - int val; /* Current hash value. */ - -#ifdef USE_HASH_SAME_HASH - /* Fields with similar purpose as the above hash, but for the second hash with - a value that is calculated differently. */ - int* head2; /* Hash value to index of its most recent occurance. */ - unsigned short* prev2; /* Index to index of prev. occurance of same hash. */ - int* hashval2; /* Index to hash value at this index. */ - int val2; /* Current hash value. */ -#endif - -#ifdef USE_HASH_SAME - unsigned short* same; /* Amount of repetitions of same byte after this .*/ -#endif -} Hash; - -/* Allocates and initializes all fields of Hash. */ -void InitHash(size_t window_size, Hash* h); - -/* Frees all fields of Hash. */ -void CleanHash(Hash* h); - -/* -Updates the hash values based on the current position in the array. All calls -to this must be made for consecutive bytes. -*/ -void UpdateHash(const unsigned char* array, size_t pos, size_t end, Hash* h); - -/* -Prepopulates hash: -Fills in the initial values in the hash, before UpdateHash can be used -correctly. -*/ -void WarmupHash(const unsigned char* array, size_t pos, size_t end, Hash* h); - -#endif /* ZOPFLI_HASH_H_ */ diff -Nru zopfli-0~git130414/katajainen.c zopfli-1.0.0/katajainen.c --- zopfli-0~git130414/katajainen.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/katajainen.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,251 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -Bounded package merge algorithm, based on the paper -"A Fast and Space-Economical Algorithm for Length-Limited Coding -Jyrki Katajainen, Alistair Moffat, Andrew Turpin". -*/ - -#include "katajainen.h" -#include -#include - -typedef struct Node Node; - -/* -Nodes forming chains. Also used to represent leaves. -*/ -struct Node { - size_t weight; /* Total weight (symbol count) of this chain. */ - Node* tail; /* Previous node(s) of this chain, or 0 if none. */ - int count; /* Leaf symbol index, or number of leaves before this chain. */ - char inuse; /* Tracking for garbage collection. */ -}; - -/* -Memory pool for nodes. -*/ -typedef struct NodePool { - Node* nodes; /* The pool. */ - Node* next; /* Pointer to a possibly free node in the pool. */ - int size; /* Size of the memory pool. */ -} NodePool; - -/* -Initializes a chain node with the given values and marks it as in use. -*/ -static void InitNode(size_t weight, int count, Node* tail, Node* node) { - node->weight = weight; - node->count = count; - node->tail = tail; - node->inuse = 1; -} - -/* -Finds a free location in the memory pool. Performs garbage collection if needed. -lists: If given, used to mark in-use nodes during garbage collection. -maxbits: Size of lists. -pool: Memory pool to get free node from. -*/ -static Node* GetFreeNode(Node* (*lists)[2], int maxbits, NodePool* pool) { - for (;;) { - if (pool->next >= &pool->nodes[pool->size]) { - /* Garbage collection. */ - int i; - for (i = 0; i < pool->size; i++) { - pool->nodes[i].inuse = 0; - } - if (lists) { - for (i = 0; i < maxbits * 2; i++) { - Node* node; - for (node = lists[i / 2][i % 2]; node; node = node->tail) { - node->inuse = 1; - } - } - } - pool->next = &pool->nodes[0]; - } - if (!pool->next->inuse) break; /* Found one. */ - pool->next++; - } - return pool->next++; -} - - -/* -Performs a Boundary Package-Merge step. Puts a new chain in the given list. The -new chain is, depending on the weights, a leaf or a combination of two chains -from the previous list. -lists: The lists of chains. -maxbits: Number of lists. -leaves: The leaves, one per symbol. -numsymbols: Number of leaves. -pool: the node memory pool. -index: The index of the list in which a new chain or leaf is required. -final: Whether this is the last time this function is called. If it is then it - is no more needed to recursively call self. -*/ -static void BoundaryPM(Node* (*lists)[2], int maxbits, - Node* leaves, int numsymbols, NodePool* pool, int index, char final) { - Node* newchain; - Node* oldchain; - int lastcount = lists[index][1]->count; /* Count of last chain of list. */ - - if (index == 0 && lastcount >= numsymbols) return; - - newchain = GetFreeNode(lists, maxbits, pool); - oldchain = lists[index][1]; - - /* These are set up before the recursive calls below, so that there is a list - pointing to the new node, to let the garbage collection know it's in use. */ - lists[index][0] = oldchain; - lists[index][1] = newchain; - - if (index == 0) { - /* New leaf node in list 0. */ - InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain); - } else { - size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight; - if (lastcount < numsymbols && sum > leaves[lastcount].weight) { - /* New leaf inserted in list, so count is incremented. */ - InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail, - newchain); - } else { - InitNode(sum, lastcount, lists[index - 1][1], newchain); - if (!final) { - /* Two lookahead chains of previous list used up, create new ones. */ - BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0); - BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0); - } - } - } -} - -/* -Initializes each list with as lookahead chains the two leaves with lowest -weights. -*/ -static void InitLists( - NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) { - int i; - Node* node0 = GetFreeNode(0, maxbits, pool); - Node* node1 = GetFreeNode(0, maxbits, pool); - InitNode(leaves[0].weight, 1, 0, node0); - InitNode(leaves[1].weight, 2, 0, node1); - for (i = 0; i < maxbits; i++) { - lists[i][0] = node0; - lists[i][1] = node1; - } -} - -/* -Converts result of boundary package-merge to the bitlengths. The result in the -last chain of the last list contains the amount of active leaves in each list. -chain: Chain to extract the bit length from (last chain from last list). -*/ -static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) { - Node* node; - for (node = chain; node; node = node->tail) { - int i; - for (i = 0; i < node->count; i++) { - bitlengths[leaves[i].count]++; - } - } -} - -/* -Comparator for sorting the leaves. Has the function signature for qsort. -*/ -static int LeafComparator(const void* a, const void* b) { - return ((const Node*)a)->weight - ((const Node*)b)->weight; -} - -int LengthLimitedCodeLengths( - const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) { - NodePool pool; - int i; - int numsymbols = 0; /* Amount of symbols with frequency > 0. */ - int numBoundaryPMRuns; - - /* Array of lists of chains. Each list requires only two lookahead chains at - a time, so each list is a array of two Node*'s. */ - Node* (*lists)[2]; - - /* One leaf per symbol. Only numsymbols leaves will be used. */ - Node* leaves = (Node*)malloc(n * sizeof(*leaves)); - - /* Initialize all bitlengths at 0. */ - for (i = 0; i < n; i++) { - bitlengths[i] = 0; - } - - /* Count used symbols and place them in the leaves. */ - for (i = 0; i < n; i++) { - if (frequencies[i]) { - leaves[numsymbols].weight = frequencies[i]; - leaves[numsymbols].count = i; /* Index of symbol this leaf represents. */ - numsymbols++; - } - } - - /* Check special cases and error conditions. */ - if ((1 << maxbits) < numsymbols) { - free(leaves); - return 1; /* Error, too few maxbits to represent symbols. */ - } - if (numsymbols == 0) { - free(leaves); - return 0; /* No symbols at all. OK. */ - } - if (numsymbols == 1) { - bitlengths[leaves[0].count] = 1; - free(leaves); - return 0; /* Only one symbol, give it bitlength 1, not 0. OK. */ - } - - /* Sort the leaves from lightest to heaviest. */ - qsort(leaves, numsymbols, sizeof(Node), LeafComparator); - - /* Initialize node memory pool. */ - pool.size = 2 * maxbits * (maxbits + 1); - pool.nodes = (Node*)malloc(pool.size * sizeof(*pool.nodes)); - pool.next = pool.nodes; - for (i = 0; i < pool.size; i++) { - pool.nodes[i].inuse = 0; - } - - lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists)); - InitLists(&pool, leaves, maxbits, lists); - - /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two - are already created in the initialization. Each BoundaryPM run creates one. */ - numBoundaryPMRuns = 2 * numsymbols - 4; - for (i = 0; i < numBoundaryPMRuns; i++) { - char final = i == numBoundaryPMRuns - 1; - BoundaryPM(lists, maxbits, leaves, numsymbols, &pool, maxbits - 1, final); - } - - ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths); - - free(lists); - free(leaves); - free(pool.nodes); - return 0; /* OK. */ -} diff -Nru zopfli-0~git130414/katajainen.h zopfli-1.0.0/katajainen.h --- zopfli-0~git130414/katajainen.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/katajainen.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#ifndef ZOPFLI_KATAJAINEN_H_ -#define ZOPFLI_KATAJAINEN_H_ - -#include - -/* -Outputs minimum-redundancy length-limited code bitlengths for symbols with the -given counts. The bitlengths are limited by maxbits. - -The output is tailored for DEFLATE: symbols that never occur, get a bit length -of 0, and if only a single symbol occurs at least once, its bitlength will be 1, -and not 0 as would theoretically be needed for a single symbol. - -frequencies: The amount of occurances of each symbol. -n: The amount of symbols. -maxbits: Maximum bit length, inclusive. -bitlengths: Output, the bitlengths for the symbol prefix codes. -return: 0 for OK, non-0 for error. -*/ -int LengthLimitedCodeLengths( - const size_t* frequencies, int n, int maxbits, unsigned* bitlengths); - -#endif /* ZOPFLI_KATAJAINEN_H_ */ diff -Nru zopfli-0~git130414/lz77.c zopfli-1.0.0/lz77.c --- zopfli-0~git130414/lz77.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/lz77.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,461 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "lz77.h" - -#include -#include -#include - -void InitLZ77Store(LZ77Store* store) { - store->size = 0; - store->litlens = 0; - store->dists = 0; -} - -void CleanLZ77Store(LZ77Store* store) { - free(store->litlens); - free(store->dists); -} - -void CopyLZ77Store( - const LZ77Store* source, LZ77Store* dest) { - size_t i; - CleanLZ77Store(dest); - dest->litlens = - (unsigned short*)malloc(sizeof(*dest->litlens) * source->size); - dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size); - - if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */ - - dest->size = source->size; - for (i = 0; i < source->size; i++) { - dest->litlens[i] = source->litlens[i]; - dest->dists[i] = source->dists[i]; - } -} - -/* -Appends the length and distance to the LZ77 arrays of the LZ77Store. -context must be a LZ77Store*. -*/ -void StoreLitLenDist(unsigned short length, unsigned short dist, - LZ77Store* store) { - size_t size2 = store->size; /* Needed for using APPEND_DATA twice. */ - APPEND_DATA(length, &store->litlens, &store->size); - APPEND_DATA(dist, &store->dists, &size2); -} - -/* -Gets the value of the length given the distance. Typically, the value of the -length is the length, but if the distance is very long, decrease the value of -the length a bit to make up for the fact that long distances use large amounts -of extra bits. -*/ -static int GetLengthValue(int length, int distance) { - /* - At distance > 1024, using length 3 is no longer good, due to the large amount - of extra bits for the distance code. distance > 1024 uses 9+ extra bits, and - this seems to be the sweet spot. - */ - return distance > 1024 ? length - 1 : length; -} - -void VerifyLenDist(const unsigned char* data, size_t datasize, size_t pos, - unsigned short dist, unsigned short length) { - - /* TODO(lode): make this only run in a debug compile, it's for assert only. */ - size_t i; - - assert(pos + length <= datasize); - for (i = 0; i < length; i++) { - if (data[pos - dist + i] != data[pos + i]) { - assert(data[pos - dist + i] == data[pos + i]); - break; - } - } -} - -/* -Finds how long the match of scan and match is. Can be used to find how many -bytes starting from scan, and from match, are equal. Returns the last byte -after scan, which is still equal to the correspondinb byte after match. -scan is the position to compare -match is the earlier position to compare. -end is the last possible byte, beyond which to stop looking. -safe_end is a few (8) bytes before end, for comparing multiple bytes at once. -*/ -static const unsigned char* GetMatch(const unsigned char* scan, - const unsigned char* match, - const unsigned char* end, - const unsigned char* safe_end) { - - if (sizeof(size_t) == 8) { - /* 8 checks at once per array bounds check (size_t is 64-bit). */ - while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) { - scan += 8; - match += 8; - } - } else if (sizeof(unsigned int) == 4) { - /* 4 checks at once per array bounds check (unsigned int is 32-bit). */ - while (scan < safe_end - && *((unsigned int*)scan) == *((unsigned int*)match)) { - scan += 4; - match += 4; - } - } else { - /* do 8 checks at once per array bounds check. */ - while (scan < safe_end && *scan == *match && *++scan == *++match - && *++scan == *++match && *++scan == *++match - && *++scan == *++match && *++scan == *++match - && *++scan == *++match && *++scan == *++match) { - scan++; match++; - } - } - - /* The remaining few bytes. */ - while (scan != end && *scan == *match) { - scan++; match++; - } - - return scan; -} - -#ifdef USE_LONGEST_MATCH_CACHE -/* -Gets distance, length and sublen values from the cache if possible. -Returns 1 if it got the values from the cache, 0 if not. -Updates the limit value to a smaller one if possible with more limited -information from the cache. -*/ -int TryGetFromLongestMatchCache(BlockState* s, size_t pos, size_t* limit, - unsigned short* sublen, unsigned short* distance, unsigned short* length) { - /* The LMC cache starts at the beginning of the block rather than the - beginning of the whole array. */ - size_t lmcpos = pos - s->blockstart; - - /* Length > 0 and dist 0 is invalid combination, which indicates on purpose - that this cache value is not filled in yet. */ - unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || - s->lmc->dist[lmcpos] != 0); - unsigned char limit_ok_for_cache = cache_available && (*limit == MAX_MATCH || - s->lmc->length[lmcpos] <= *limit || - (sublen && MaxCachedSublen(s->lmc, - lmcpos, s->lmc->length[lmcpos]) >= *limit)); - - if (s->lmc && limit_ok_for_cache && cache_available) { - if (!sublen || s->lmc->length[lmcpos] - <= MaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) { - *length = s->lmc->length[lmcpos]; - if (*length > *limit) *length = *limit; - if (sublen) { - CacheToSublen(s->lmc, lmcpos, *length, sublen); - *distance = sublen[*length]; - if (*limit == MAX_MATCH && *length >= MIN_MATCH) { - assert(sublen[*length] == s->lmc->dist[lmcpos]); - } - } else { - *distance = s->lmc->dist[lmcpos]; - } - return 1; - } - /* Can't use much of the cache, since the "sublens" need to be calculated, - but at least we already know when to stop. */ - *limit = s->lmc->length[lmcpos]; - } - - return 0; -} - -/* -Stores the found sublen, distance and length in the longest match cache, if -possible. -*/ -void StoreInLongestMatchCache(BlockState* s, size_t pos, size_t limit, - const unsigned short* sublen, - unsigned short distance, unsigned short length) { - /* The LMC cache starts at the beginning of the block rather than the - beginning of the whole array. */ - size_t lmcpos = pos - s->blockstart; - - /* Length > 0 and dist 0 is invalid combination, which indicates on purpose - that this cache value is not filled in yet. */ - unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || - s->lmc->dist[lmcpos] != 0); - - if (s->lmc && limit == MAX_MATCH && sublen && !cache_available) { - assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0); - s->lmc->dist[lmcpos] = length < MIN_MATCH ? 0 : distance; - s->lmc->length[lmcpos] = length < MIN_MATCH ? 0 : length; - assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0)); - SublenToCache(sublen, lmcpos, length, s->lmc); - } -} -#endif - -void FindLongestMatch(BlockState* s, const Hash* h, const unsigned char* array, - size_t pos, size_t size, size_t limit, - unsigned short* sublen, unsigned short* distance, unsigned short* length) { - unsigned short hpos = pos & WINDOW_MASK, p, pp; - unsigned short bestdist = 0; - unsigned short bestlength = 1; - const unsigned char* scan; - const unsigned char* match; - const unsigned char* arrayend; - const unsigned char* arrayend_safe; -#if MAX_CHAIN_HITS < WINDOW_SIZE - int chain_counter = MAX_CHAIN_HITS; /* For quitting early. */ -#endif - - unsigned dist = 0; /* Not unsigned short on purpose. */ - - int* hhead = h->head; - unsigned short* hprev = h->prev; - int* hhashval = h->hashval; - int hval = h->val; - -#ifdef USE_LONGEST_MATCH_CACHE - if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) { - assert(pos + *length <= size); - return; - } -#endif - - assert(limit <= MAX_MATCH); - assert(limit >= MIN_MATCH); - assert(pos < size); - - if (size - pos < MIN_MATCH) { - /* The rest of the code assumes there are at least MIN_MATCH bytes to - try. */ - *length = 0; - *distance = 0; - return; - } - - if (pos + limit > size) { - limit = size - pos; - } - arrayend = &array[pos] + limit; - arrayend_safe = arrayend - 8; - - assert(hval < 65536); - - pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */ - p = hprev[pp]; - - assert(pp == hpos); - - dist = p < pp ? pp - p : ((WINDOW_SIZE - p) + pp); - - /* Go through all distances. */ - while (dist < WINDOW_SIZE) { - unsigned short currentlength = 0; - - assert(p < WINDOW_SIZE); - assert(p == hprev[pp]); - assert(hhashval[p] == hval); - - if (dist > 0) { - assert(pos < size); - assert(dist <= pos); - scan = &array[pos]; - match = &array[pos - dist]; - - /* Testing the byte at position bestlength first, goes slightly faster. */ - if (pos + bestlength >= size - || *(scan + bestlength) == *(match + bestlength)) { - -#ifdef USE_HASH_SAME - unsigned short same0 = h->same[pos & WINDOW_MASK]; - if (same0 > 2 && *scan == *match) { - unsigned short same1 = h->same[(pos - dist) & WINDOW_MASK]; - unsigned short same = same0 < same1 ? same0 : same1; - if (same > limit) same = limit; - scan += same; - match += same; - } -#endif - scan = GetMatch(scan, match, arrayend, arrayend_safe); - currentlength = scan - &array[pos]; /* The found length. */ - } - - if (currentlength > bestlength) { - if (sublen) { - unsigned short j; - for (j = bestlength + 1; j <= currentlength; j++) { - sublen[j] = dist; - } - } - bestdist = dist; - bestlength = currentlength; - if (currentlength >= limit) break; - } - } - - -#ifdef USE_HASH_SAME_HASH - /* Switch to the other hash once this will be more efficient. */ - if (hhead != h->head2 && bestlength >= h->same[hpos] && - h->val2 == h->hashval2[p]) { - /* Now use the hash that encodes the length and first byte. */ - hhead = h->head2; - hprev = h->prev2; - hhashval = h->hashval2; - hval = h->val2; - } -#endif - - pp = p; - p = hprev[p]; - if (p == pp) break; /* Uninited prev value. */ - - dist += p < pp ? pp - p : ((WINDOW_SIZE - p) + pp); - -#if MAX_CHAIN_HITS < WINDOW_SIZE - chain_counter--; - if (chain_counter <= 0) break; -#endif - } - -#ifdef USE_LONGEST_MATCH_CACHE - StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength); -#endif - - assert(bestlength <= limit); - - *distance = bestdist; - *length = bestlength; - assert(pos + *length <= size); -} - -void LZ77Greedy(BlockState* s, const unsigned char* in, - size_t instart, size_t inend, - LZ77Store* store) { - size_t i = 0, j; - unsigned short leng; - unsigned short dist; - int lengvalue; - size_t windowstart = instart > WINDOW_SIZE ? instart - WINDOW_SIZE : 0; - unsigned short dummysublen[259]; - - Hash hash; - Hash* h = &hash; - -#ifdef LAZY_MATCHING - /* Lazy matching. */ - unsigned prev_length = 0; - unsigned prev_match = 0; - int prevlengvalue; - int match_available = 0; -#endif - - if (instart == inend) return; - - InitHash(WINDOW_SIZE, h); - WarmupHash(in, windowstart, inend, h); - for (i = windowstart; i < instart; i++) { - UpdateHash(in, i, inend, h); - } - - for (i = instart; i < inend; i++) { - UpdateHash(in, i, inend, h); - - FindLongestMatch(s, h, in, i, inend, MAX_MATCH, dummysublen, &dist, &leng); - lengvalue = GetLengthValue(leng, dist); - -#ifdef LAZY_MATCHING - /* Lazy matching. */ - prevlengvalue = GetLengthValue(prev_length, prev_match); - if (match_available) { - match_available = 0; - if (lengvalue > prevlengvalue + 1) { - StoreLitLenDist(in[i - 1], 0, store); - if (lengvalue >= MIN_MATCH && lengvalue < MAX_MATCH) { - match_available = 1; - prev_length = leng; - prev_match = dist; - continue; - } - } else { - /* Add previous to output. */ - leng = prev_length; - dist = prev_match; - lengvalue = prevlengvalue; - /* Add to output. */ - VerifyLenDist(in, inend, i - 1, dist, leng); - StoreLitLenDist(leng, dist, store); - for (j = 2; j < leng; j++) { - assert(i < inend); - i++; - UpdateHash(in, i, inend, h); - } - continue; - } - } - else if (lengvalue >= MIN_MATCH && leng < MAX_MATCH) { - match_available = 1; - prev_length = leng; - prev_match = dist; - continue; - } - /* End of lazy matching. */ -#endif - - /* Add to output. */ - if (lengvalue >= MIN_MATCH) { - VerifyLenDist(in, inend, i, dist, leng); - StoreLitLenDist(leng, dist, store); - } else { - leng = 1; - StoreLitLenDist(in[i], 0, store); - } - for (j = 1; j < leng; j++) { - assert(i < inend); - i++; - UpdateHash(in, i, inend, h); - } - } - - CleanHash(h); -} - -void GetLZ77Counts(const unsigned short* litlens, const unsigned short* dists, - size_t start, size_t end, - size_t* ll_count, size_t* d_count) { - size_t i; - - for (i = 0; i < 288; i++) { - ll_count[i] = 0; - } - for (i = 0; i < 32; i++) { - d_count[i] = 0; - } - - for (i = start; i < end; i++) { - if (dists[i] == 0) { - ll_count[litlens[i]]++; - } else { - ll_count[GetLengthSymbol(litlens[i])]++; - d_count[GetDistSymbol(dists[i])]++; - } - } - - ll_count[256] = 1; /* End symbol. */ -} diff -Nru zopfli-0~git130414/lz77.h zopfli-1.0.0/lz77.h --- zopfli-0~git130414/lz77.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/lz77.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,126 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -Functions for basic LZ77 compression and utilities for the "squeeze" LZ77 -compression. -*/ - -#ifndef ZOPFLI_LZ77_H_ -#define ZOPFLI_LZ77_H_ - -#include - -#include "cache.h" -#include "hash.h" -#include "util.h" - -/* -Stores lit/length and dist pairs for LZ77. -litlens: Contains the literal symbols or length values. -dists: Indicates the distance, or 0 to indicate that there is no distance and -litlens contains a literal instead of a length. -litlens and dists both have the same size. -*/ -typedef struct LZ77Store { - unsigned short* litlens; /* Lit or len. */ - unsigned short* dists; /* If 0: indicates literal in corresponding litlens, - if > 0: length in corresponding litlens, this is the distance. */ - size_t size; -} LZ77Store; - -void InitLZ77Store(LZ77Store* store); -void CleanLZ77Store(LZ77Store* store); -void CopyLZ77Store(const LZ77Store* source, LZ77Store* dest); -void StoreLitLenDist(unsigned short length, unsigned short dist, - LZ77Store* store); - -/* -Some state information for compressing a block. -This is currently a bit under-used (with mainly only the longest match cache), -but is kept for easy future expansion. -*/ -typedef struct BlockState { - const Options* options; - -#ifdef USE_LONGEST_MATCH_CACHE - /* Cache for length/distance pairs found so far. */ - LongestMatchCache* lmc; -#endif - - /* The start (inclusive) and end (not inclusive) of the current block. */ - size_t blockstart; - size_t blockend; -} BlockState; - -/* -Finds the longest match (length and corresponding distance) for LZ77 -compression. -Even when not using "sublen", it can be more efficient to provide an array, -because only then the caching is used. -array: the data -pos: position in the data to find the match for -size: size of the data -limit: limit length to maximum this value (default should be 258). This allows - finding a shorter dist for that length (= less extra bits). Must be - in the range [MIN_MATCH, MAX_MATCH]. -sublen: output array of 259 elements, or null. Has, for each length, the - smallest distance required to reach this length. Only 256 of its 259 values - are used, the first 3 are ignored (the shortest length is 3. It is purely - for convenience that the array is made 3 longer). -*/ - -void FindLongestMatch( - BlockState *s, const Hash* h, const unsigned char* array, - size_t pos, size_t size, size_t limit, - unsigned short* sublen, unsigned short* distance, unsigned short* length); - -/* -Verifies if length and dist are indeed valid, only used for assertion. -*/ -void VerifyLenDist(const unsigned char* data, size_t datasize, size_t pos, - unsigned short dist, unsigned short length); - -/* -Counts the number of literal, length and distance symbols in the given lz77 -arrays. -litlens: lz77 lit/lengths -dists: ll77 distances -start: where to begin counting in litlens and dists -end: where to stop counting in litlens and dists (not inclusive) -ll_count: count of each lit/len symbol, must have size 288 (see deflate - standard) -d_count: count of each dist symbol, must have size 32 (see deflate standard) -*/ -void GetLZ77Counts(const unsigned short* litlens, const unsigned short* dists, - size_t start, size_t end, - size_t* ll_count, size_t* d_count); - -/* -Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than -with the slow but better "squeeze" implementation. -The result is placed in the LZ77Store. -If instart is larger than 0, it uses values before instart as starting -dictionary. -*/ -void LZ77Greedy(BlockState* s, const unsigned char* in, - size_t instart, size_t inend, - LZ77Store* store); - -#endif /* ZOPFLI_LZ77_H_ */ diff -Nru zopfli-0~git130414/makefile zopfli-1.0.0/makefile --- zopfli-0~git130414/makefile 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/makefile 2013-04-25 16:09:00.000000000 +0000 @@ -1,5 +1,5 @@ make: - gcc *.c -O2 -W -Wall -Wextra -ansi -pedantic -lm -o zopfli + gcc src/zopfli/*.c -O2 -W -Wall -Wextra -ansi -pedantic -lm -o zopfli debug: - gcc *.c -g3 -lm -o zopfli + gcc src/zopfli/*.c -g3 -lm -o zopfli diff -Nru zopfli-0~git130414/squeeze.c zopfli-1.0.0/squeeze.c --- zopfli-0~git130414/squeeze.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/squeeze.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,528 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "squeeze.h" - -#include -#include -#include - -#include "blocksplitter.h" -#include "deflate.h" -#include "tree.h" -#include "util.h" - -typedef struct SymbolStats { - /* The literal and length symbols. */ - size_t litlens[288]; - /* The 32 unique dist symbols, not the 32768 possible dists. */ - size_t dists[32]; - - double ll_symbols[288]; /* Length of each lit/len symbol in bits. */ - double d_symbols[32]; /* Length of each dist symbol in bits. */ -} SymbolStats; - -/* Sets everything to 0. */ -static void InitStats(SymbolStats* stats) { - memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0])); - memset(stats->dists, 0, 32 * sizeof(stats->dists[0])); - - memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0])); - memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0])); -} - -static void CopyStats(SymbolStats* source, SymbolStats* dest) { - memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0])); - memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0])); - - memcpy(dest->ll_symbols, source->ll_symbols, - 288 * sizeof(dest->ll_symbols[0])); - memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0])); -} - -/* Adds the bit lengths. */ -static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1, - const SymbolStats* stats2, double w2, - SymbolStats* result) { - size_t i; - for (i = 0; i < 288; i++) { - result->litlens[i] = - (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2); - } - for (i = 0; i < 32; i++) { - result->dists[i] = - (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2); - } - result->litlens[256] = 1; /* End symbol. */ -} - -/* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */ -static unsigned int Ran() { - static unsigned int m_w = 1; - static unsigned int m_z = 2; - m_z = 36969 * (m_z & 65535) + (m_z >> 16); - m_w = 18000 * (m_w & 65535) + (m_w >> 16); - return (m_z << 16) + m_w; /* 32-bit result. */ -} - -static void RandomizeFreqs(size_t* freqs, int n) { - int i; - for (i = 0; i < n; i++) { - if ((Ran() >> 4) % 3 == 0) freqs[i] = freqs[Ran() % n]; - } -} - -static void RandomizeStatFreqs(SymbolStats* stats) { - RandomizeFreqs(stats->litlens, 288); - RandomizeFreqs(stats->dists, 32); - stats->litlens[256] = 1; /* End symbol. */ -} - -static void ClearStatFreqs(SymbolStats* stats) { - size_t i; - for (i = 0; i < 288; i++) stats->litlens[i] = 0; - for (i = 0; i < 32; i++) stats->dists[i] = 0; -} - -/* -Function that calculates a cost based on a model for the given LZ77 symbol. -litlen: means literal symbol if dist is 0, length otherwise. -*/ -typedef double CostModelFun(unsigned litlen, unsigned dist, void* context); - -/* -Cost model which should exactly match fixed tree. -type: CostModelFun -*/ -static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) { - (void)unused; - if (dist == 0) { - if (litlen <= 143) return 8; - else return 9; - } else { - int dbits = GetDistExtraBits(dist); - int lbits = GetLengthExtraBits(litlen); - int lsym = GetLengthSymbol(litlen); - double cost = 0; - if (lsym <= 279) cost += 7; - else cost += 8; - cost += 5; /* Every dist symbol has length 5. */ - return cost + dbits + lbits; - } -} - -/* -Cost model based on symbol statistics. -type: CostModelFun -*/ -static double GetCostStat(unsigned litlen, unsigned dist, void* context) { - SymbolStats* stats = (SymbolStats*)context; - if (dist == 0) { - return stats->ll_symbols[litlen]; - } else { - int lsym = GetLengthSymbol(litlen); - int lbits = GetLengthExtraBits(litlen); - int dsym = GetDistSymbol(dist); - int dbits = GetDistExtraBits(dist); - return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits; - } -} - -/* -Finds the minimum possible cost this cost model can return for valid length and -distance symbols. -*/ -static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) { - double mincost; - int bestlength = 0; /* length that has lowest cost in the cost model */ - int bestdist = 0; /* distance that has lowest cost in the cost model */ - int i; - /* - Table of distances that have a different distance symbol in the deflate - specification. Each value is the first distance that has a new symbol. Only - different symbols affect the cost model so only these need to be checked. - See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes). - */ - static const int dsymbols[30] = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, - 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 - }; - - mincost = LARGE_FLOAT; - for (i = 3; i < 259; i++) { - double c = costmodel(i, 1, costcontext); - if (c < mincost) { - bestlength = i; - mincost = c; - } - } - - mincost = LARGE_FLOAT; - for (i = 0; i < 30; i++) { - double c = costmodel(3, dsymbols[i], costcontext); - if (c < mincost) { - bestdist = dsymbols[i]; - mincost = c; - } - } - - return costmodel(bestlength, bestdist, costcontext); -} - -/* -Performs the forward pass for "squeeze". Gets the most optimal length to reach -every byte from a previous byte, using cost calculations. -s: the BlockState -in: the input data array -instart: where to start -inend: where to stop (not inclusive) -costmodel: function to calculate the cost of some lit/len/dist pair. -costcontext: abstract context for the costmodel function -length_array: output array of size (inend - instart) which will receive the best - length to reach this byte from a previous byte. -returns the cost that was, according to the costmodel, needed to get to the end. -*/ -static double GetBestLengths(BlockState *s, - const unsigned char* in, - size_t instart, size_t inend, - CostModelFun* costmodel, void* costcontext, - unsigned short* length_array) { - /* Best cost to get here so far. */ - size_t blocksize = inend - instart; - float* costs; - size_t i = 0, k; - unsigned short leng; - unsigned short dist; - unsigned short sublen[259]; - size_t windowstart = instart > WINDOW_SIZE ? instart - WINDOW_SIZE : 0; - Hash hash; - Hash* h = &hash; - double result; - double mincost = GetCostModelMinCost(costmodel, costcontext); - - if (instart == inend) return 0; - - costs = (float*)malloc(sizeof(float) * (blocksize + 1)); - if (!costs) exit(-1); /* Allocation failed. */ - - InitHash(WINDOW_SIZE, h); - WarmupHash(in, windowstart, inend, h); - for (i = windowstart; i < instart; i++) { - UpdateHash(in, i, inend, h); - } - - for (i = 1; i < blocksize + 1; i++) costs[i] = LARGE_FLOAT; - costs[0] = 0; /* Because it's the start. */ - length_array[0] = 0; - - for (i = instart; i < inend; i++) { - size_t j = i - instart; /* Index in the costs array and length_array. */ - UpdateHash(in, i, inend, h); - -#ifdef SHORTCUT_LONG_REPETITIONS - /* If we're in a long repetition of the same character and have more than - MAX_MATCH characters before and after our position. */ - if (h->same[i & WINDOW_MASK] > MAX_MATCH * 2 - && i > instart + MAX_MATCH + 1 - && i + MAX_MATCH * 2 + 1 < inend - && h->same[(i - MAX_MATCH) & WINDOW_MASK] > MAX_MATCH) { - double symbolcost = costmodel(MAX_MATCH, 1, costcontext); - /* Set the length to reach each one to MAX_MATCH, and the cost to the - cost corresponding to that length. Doing this, we skip MAX_MATCH - values to avoid calling FindLongestMatch. */ - for (k = 0; k < MAX_MATCH; k++) { - costs[j + MAX_MATCH] = costs[j] + symbolcost; - length_array[j + MAX_MATCH] = MAX_MATCH; - i++; - j++; - UpdateHash(in, i, inend, h); - } - } -#endif - - FindLongestMatch(s, h, in, i, inend, MAX_MATCH, sublen, &dist, &leng); - - /* Literal. */ - if (i + 1 <= inend) { - double newCost = costs[j] + costmodel(in[i], 0, costcontext); - assert(newCost >= 0); - if (newCost < costs[j + 1]) { - costs[j + 1] = newCost; - length_array[j + 1] = 1; - } - } - /* Lengths. */ - for (k = 3; k <= leng && i + k <= inend; k++) { - double newCost; - - /* Calling the cost model is expensive, avoid this if we are already at - the minimum possible cost that it can return. */ - if (costs[j + k] - costs[j] <= mincost) continue; - - newCost = costs[j] + costmodel(k, sublen[k], costcontext); - assert(newCost >= 0); - if (newCost < costs[j + k]) { - assert(k <= MAX_MATCH); - costs[j + k] = newCost; - length_array[j + k] = k; - } - } - } - - assert(costs[blocksize] >= 0); - result = costs[blocksize]; - - CleanHash(h); - free(costs); - - return result; -} - -/* -Calculates the optimal path of lz77 lengths to use, from the calculated -length_array. The length_array must contain the optimal length to reach that -byte. The path will be filled with the lengths to use, so its data size will be -the amount of lz77 symbols. -*/ -static void TraceBackwards(size_t size, const unsigned short* length_array, - unsigned short** path, size_t* pathsize) { - size_t index = size; - if (size == 0) return; - for (;;) { - APPEND_DATA(length_array[index], path, pathsize); - assert(length_array[index] <= index); - assert(length_array[index] <= MAX_MATCH); - assert(length_array[index] != 0); - index -= length_array[index]; - if (index == 0) break; - } - - /* Mirror result. */ - for (index = 0; index < *pathsize / 2; index++) { - unsigned short temp = (*path)[index]; - (*path)[index] = (*path)[*pathsize - index - 1]; - (*path)[*pathsize - index - 1] = temp; - } -} - -static void FollowPath(BlockState* s, - const unsigned char* in, size_t instart, size_t inend, - unsigned short* path, size_t pathsize, - LZ77Store* store) { - size_t i, j, pos = 0; - size_t windowstart = instart > WINDOW_SIZE ? instart - WINDOW_SIZE : 0; - - size_t total_length_test = 0; - - Hash hash; - Hash* h = &hash; - - if (instart == inend) return; - - InitHash(WINDOW_SIZE, h); - WarmupHash(in, windowstart, inend, h); - for (i = windowstart; i < instart; i++) { - UpdateHash(in, i, inend, h); - } - - pos = instart; - for (i = 0; i < pathsize; i++) { - unsigned short length = path[i]; - unsigned short dummy_length; - unsigned short dist; - assert(pos < inend); - - UpdateHash(in, pos, inend, h); - - /* Add to output. */ - if (length >= MIN_MATCH) { - /* Get the distance by recalculating longest match. The found length - should match the length from the path. */ - FindLongestMatch(s, h, in, pos, inend, length, 0, &dist, &dummy_length); - assert(!(dummy_length != length && length > 2 && dummy_length > 2)); - VerifyLenDist(in, inend, pos, dist, length); - StoreLitLenDist(length, dist, store); - total_length_test += length; - } else { - length = 1; - StoreLitLenDist(in[pos], 0, store); - total_length_test++; - } - - - assert(pos + length <= inend); - for (j = 1; j < length; j++) { - UpdateHash(in, pos + j, inend, h); - } - - pos += length; - } - - CleanHash(h); -} - -/* Calculates the entropy of the statistics */ -static void CalculateStatistics(SymbolStats* stats) { - CalculateEntropy(stats->litlens, 288, stats->ll_symbols); - CalculateEntropy(stats->dists, 32, stats->d_symbols); -} - -/* Appends the symbol statistics from the store. */ -static void GetStatistics(const LZ77Store* store, SymbolStats* stats) { - size_t i; - for (i = 0; i < store->size; i++) { - if (store->dists[i] == 0) { - stats->litlens[store->litlens[i]]++; - } else { - stats->litlens[GetLengthSymbol(store->litlens[i])]++; - stats->dists[GetDistSymbol(store->dists[i])]++; - } - } - stats->litlens[256] = 1; /* End symbol. */ - - CalculateStatistics(stats); -} - -/* -Does a single run for LZ77Optimal. For good compression, repeated runs with -updated statistics should be performed. - -s: the block state -in: the input data array -instart: where to start -inend: where to stop (not inclusive) -path: pointer to dynamically allocated memory to store the path -pathsize: pointer to the size of the dynamic path array -length_array: array if size (inend - instart) used to store lengths -costmodel: function to use as the cost model for this squeeze run -costcontext: abstract context for the costmodel function -store: place to output the LZ77 data -returns the cost that was, according to the costmodel, needed to get to the end. - This is not the actual cost. -*/ -static double LZ77OptimalRun(BlockState* s, - const unsigned char* in, size_t instart, size_t inend, - unsigned short** path, size_t* pathsize, - unsigned short* length_array, CostModelFun* costmodel, - void* costcontext, LZ77Store* store) { - double cost = GetBestLengths( - s, in, instart, inend, costmodel, costcontext, length_array); - free(*path); - *path = 0; - *pathsize = 0; - TraceBackwards(inend - instart, length_array, path, pathsize); - FollowPath(s, in, instart, inend, *path, *pathsize, store); - assert(cost < LARGE_FLOAT); - return cost; -} - -void LZ77Optimal(BlockState *s, - const unsigned char* in, size_t instart, size_t inend, - LZ77Store* store) { - /* Dist to get to here with smallest cost. */ - size_t blocksize = inend - instart; - unsigned short* length_array = - (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1)); - unsigned short* path = 0; - size_t pathsize = 0; - LZ77Store currentstore; - SymbolStats stats, beststats, laststats; - int i; - double cost; - double bestcost = LARGE_FLOAT; - double lastcost = 0; - /* Try randomizing the costs a bit once the size stabilizes. */ - int lastrandomstep = -1; - - if (!length_array) exit(-1); /* Allocation failed. */ - - InitStats(&stats); - InitLZ77Store(¤tstore); - - /* Do regular deflate, then loop multiple shortest path runs, each time using - the statistics of the previous run. */ - - /* Initial run. */ - LZ77Greedy(s, in, instart, inend, ¤tstore); - GetStatistics(¤tstore, &stats); - - /* Repeat statistics with each time the cost model from the previous stat - run. */ - for (i = 0; i < s->options->numiterations; i++) { - CleanLZ77Store(¤tstore); - InitLZ77Store(¤tstore); - LZ77OptimalRun(s, in, instart, inend, &path, &pathsize, - length_array, GetCostStat, (void*)&stats, - ¤tstore); - cost = CalculateBlockSize(currentstore.litlens, currentstore.dists, - 0, currentstore.size, 2); - if (cost < bestcost) { - /* Copy to the output store. */ - CopyLZ77Store(¤tstore, store); - CopyStats(&stats, &beststats); - bestcost = cost; - } - CopyStats(&stats, &laststats); - ClearStatFreqs(&stats); - GetStatistics(¤tstore, &stats); - if (lastrandomstep != -1) { - /* This makes it converge slower but better. Do it only once the - randomness kicks in so that if the user does few iterations, it gives a - better result sooner. */ - AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats); - CalculateStatistics(&stats); - } - if (i > 5 && cost == lastcost) { - CopyStats(&beststats, &stats); - RandomizeStatFreqs(&stats); - CalculateStatistics(&stats); - lastrandomstep = i; - } - lastcost = cost; - } - - free(length_array); - free(path); - CleanLZ77Store(¤tstore); -} - -void LZ77OptimalFixed(BlockState *s, - const unsigned char* in, size_t instart, size_t inend, - LZ77Store* store) -{ - /* Dist to get to here with smallest cost. */ - size_t blocksize = inend - instart; - unsigned short* length_array = - (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1)); - unsigned short* path = 0; - size_t pathsize = 0; - - if (!length_array) exit(-1); /* Allocation failed. */ - - s->blockstart = instart; - s->blockend = inend; - - /* Shortest path for fixed tree This one should give the shortest possible - result for fixed tree, no repeated runs are needed since the tree is known. */ - LZ77OptimalRun(s, in, instart, inend, &path, &pathsize, - length_array, GetCostFixed, 0, store); - - free(length_array); - free(path); -} diff -Nru zopfli-0~git130414/squeeze.h zopfli-1.0.0/squeeze.h --- zopfli-0~git130414/squeeze.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/squeeze.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,59 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -The squeeze functions do enhanced LZ77 compression by optimal parsing with a -cost model, rather than greedily choosing the longest length or using a single -step of lazy matching like regular implementations. - -Since the cost model is based on the Huffman tree that can only be calculated -after the LZ77 data is generated, there is a chicken and egg problem, and -multiple runs are done with updated cost models to converge to a better -solution. -*/ - -#ifndef ZOPFLI_SQUEEZE_H_ -#define ZOPFLI_SQUEEZE_H_ - -#include "lz77.h" - -/* -Calculates lit/len and dist pairs for given data. -If instart is larger than 0, it uses values before instart as starting -dictionary. -*/ -void LZ77Optimal(BlockState *s, - const unsigned char* in, size_t instart, size_t inend, - LZ77Store* store); - -/* -Does the same as LZ77Optimal, but optimized for the fixed tree of the deflate -standard. -The fixed tree rarely gives the best compression. But this gives the best -possible LZ77 encoding possible with the fixed tree. -This does not create or output any fixed tree, only LZ77 data optimized for -using with a fixed tree. -If instart is larger than 0, it uses values before instart as starting -dictionary. -*/ -void LZ77OptimalFixed(BlockState *s, - const unsigned char* in, size_t instart, size_t inend, - LZ77Store* store); - -#endif /* ZOPFLI_SQUEEZE_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/blocksplitter.c zopfli-1.0.0/src/zopfli/blocksplitter.c --- zopfli-0~git130414/src/zopfli/blocksplitter.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/blocksplitter.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,344 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "blocksplitter.h" + +#include +#include +#include + +#include "deflate.h" +#include "lz77.h" +#include "squeeze.h" +#include "tree.h" +#include "util.h" + +/* +The "f" for the FindMinimum function below. +i: the current parameter of f(i) +context: for your implementation +*/ +typedef double FindMinimumFun(size_t i, void* context); + +/* +Finds minimum of function f(i) where is is of type size_t, f(i) is of type +double, i is in range start-end (excluding end). +*/ +static size_t FindMinimum(FindMinimumFun f, void* context, + size_t start, size_t end) { + if (end - start < 1024) { + double best = ZOPFLI_LARGE_FLOAT; + size_t result = start; + size_t i; + for (i = start; i < end; i++) { + double v = f(i, context); + if (v < best) { + best = v; + result = i; + } + } + return result; + } else { + /* Try to find minimum faster by recursively checking multiple points. */ +#define NUM 9 /* Good value: 9. */ + size_t i; + size_t p[NUM]; + double vp[NUM]; + size_t besti; + double best; + double lastbest = ZOPFLI_LARGE_FLOAT; + size_t pos = start; + + for (;;) { + if (end - start <= NUM) break; + + for (i = 0; i < NUM; i++) { + p[i] = start + (i + 1) * ((end - start) / (NUM + 1)); + vp[i] = f(p[i], context); + } + besti = 0; + best = vp[0]; + for (i = 1; i < NUM; i++) { + if (vp[i] < best) { + best = vp[i]; + besti = i; + } + } + if (best > lastbest) break; + + start = besti == 0 ? start : p[besti - 1]; + end = besti == NUM - 1 ? end : p[besti + 1]; + + pos = p[besti]; + lastbest = best; + } + return pos; +#undef NUM + } +} + +/* +Returns estimated cost of a block in bits. It includes the size to encode the +tree and the size to encode all literal, length and distance symbols and their +extra bits. + +litlens: lz77 lit/lengths +dists: ll77 distances +lstart: start of block +lend: end of block (not inclusive) +*/ +static double EstimateCost(const unsigned short* litlens, + const unsigned short* dists, + size_t lstart, size_t lend) { + return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2); +} + +typedef struct SplitCostContext { + const unsigned short* litlens; + const unsigned short* dists; + size_t llsize; + size_t start; + size_t end; +} SplitCostContext; + + +/* +Gets the cost which is the sum of the cost of the left and the right section +of the data. +type: FindMinimumFun +*/ +static double SplitCost(size_t i, void* context) { + SplitCostContext* c = (SplitCostContext*)context; + return EstimateCost(c->litlens, c->dists, c->start, i) + + EstimateCost(c->litlens, c->dists, i, c->end); +} + +static void AddSorted(size_t value, size_t** out, size_t* outsize) { + size_t i; + ZOPFLI_APPEND_DATA(value, out, outsize); + if (*outsize > 0) { + for (i = 0; i < *outsize - 1; i++) { + if ((*out)[i] > value) { + size_t j; + for (j = *outsize - 1; j > i; j--) { + (*out)[j] = (*out)[j - 1]; + } + (*out)[i] = value; + break; + } + } + } +} + +/* +Prints the block split points as decimal and hex values in the terminal. +*/ +static void PrintBlockSplitPoints(const unsigned short* litlens, + const unsigned short* dists, + size_t llsize, const size_t* lz77splitpoints, + size_t nlz77points) { + size_t* splitpoints = 0; + size_t npoints = 0; + size_t i; + /* The input is given as lz77 indices, but we want to see the uncompressed + index values. */ + size_t pos = 0; + if (nlz77points > 0) { + for (i = 0; i < llsize; i++) { + size_t length = dists[i] == 0 ? 1 : litlens[i]; + if (lz77splitpoints[npoints] == i) { + ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints); + if (npoints == nlz77points) break; + } + pos += length; + } + } + assert(npoints == nlz77points); + + fprintf(stderr, "block split points: "); + for (i = 0; i < npoints; i++) { + fprintf(stderr, "%d ", (int)splitpoints[i]); + } + fprintf(stderr, "(hex:"); + for (i = 0; i < npoints; i++) { + fprintf(stderr, " %x", (int)splitpoints[i]); + } + fprintf(stderr, ")\n"); + + free(splitpoints); +} + +/* +Finds next block to try to split, the largest of the available ones. +The largest is chosen to make sure that if only a limited amount of blocks is +requested, their sizes are spread evenly. +llsize: the size of the LL77 data, which is the size of the done array here. +done: array indicating which blocks starting at that position are no longer + splittable (splitting them increases rather than decreases cost). +splitpoints: the splitpoints found so far. +npoints: the amount of splitpoints found so far. +lstart: output variable, giving start of block. +lend: output variable, giving end of block. +returns 1 if a block was found, 0 if no block found (all are done). +*/ +static int FindLargestSplittableBlock( + size_t llsize, const unsigned char* done, + const size_t* splitpoints, size_t npoints, + size_t* lstart, size_t* lend) { + size_t longest = 0; + int found = 0; + size_t i; + for (i = 0; i <= npoints; i++) { + size_t start = i == 0 ? 0 : splitpoints[i - 1]; + size_t end = i == npoints ? llsize - 1 : splitpoints[i]; + if (!done[start] && end - start > longest) { + *lstart = start; + *lend = end; + found = 1; + longest = end - start; + } + } + return found; +} + +void ZopfliBlockSplitLZ77(const ZopfliOptions* options, + const unsigned short* litlens, + const unsigned short* dists, + size_t llsize, size_t maxblocks, + size_t** splitpoints, size_t* npoints) { + size_t lstart, lend; + size_t i; + size_t llpos = 0; + size_t numblocks = 1; + unsigned char* done; + double splitcost, origcost; + + if (llsize < 10) return; /* This code fails on tiny files. */ + + done = (unsigned char*)malloc(llsize); + if (!done) exit(-1); /* Allocation failed. */ + for (i = 0; i < llsize; i++) done[i] = 0; + + lstart = 0; + lend = llsize; + for (;;) { + SplitCostContext c; + + if (maxblocks > 0 && numblocks >= maxblocks) { + break; + } + + c.litlens = litlens; + c.dists = dists; + c.llsize = llsize; + c.start = lstart; + c.end = lend; + assert(lstart < lend); + llpos = FindMinimum(SplitCost, &c, lstart + 1, lend); + + assert(llpos > lstart); + assert(llpos < lend); + + splitcost = EstimateCost(litlens, dists, lstart, llpos) + + EstimateCost(litlens, dists, llpos, lend); + origcost = EstimateCost(litlens, dists, lstart, lend); + + if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) { + done[lstart] = 1; + } else { + AddSorted(llpos, splitpoints, npoints); + numblocks++; + } + + if (!FindLargestSplittableBlock( + llsize, done, *splitpoints, *npoints, &lstart, &lend)) { + break; /* No further split will probably reduce compression. */ + } + + if (lend - lstart < 10) { + break; + } + } + + if (options->verbose) { + PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints); + } + + free(done); +} + +void ZopfliBlockSplit(const ZopfliOptions* options, + const unsigned char* in, size_t instart, size_t inend, + size_t maxblocks, size_t** splitpoints, size_t* npoints) { + size_t pos = 0; + size_t i; + ZopfliBlockState s; + size_t* lz77splitpoints = 0; + size_t nlz77points = 0; + ZopfliLZ77Store store; + + ZopfliInitLZ77Store(&store); + + s.options = options; + s.blockstart = instart; + s.blockend = inend; +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + s.lmc = 0; +#endif + + *npoints = 0; + *splitpoints = 0; + + /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal + results in better blocks. */ + ZopfliLZ77Greedy(&s, in, instart, inend, &store); + + ZopfliBlockSplitLZ77(options, + store.litlens, store.dists, store.size, maxblocks, + &lz77splitpoints, &nlz77points); + + /* Convert LZ77 positions to positions in the uncompressed input. */ + pos = instart; + if (nlz77points > 0) { + for (i = 0; i < store.size; i++) { + size_t length = store.dists[i] == 0 ? 1 : store.litlens[i]; + if (lz77splitpoints[*npoints] == i) { + ZOPFLI_APPEND_DATA(pos, splitpoints, npoints); + if (*npoints == nlz77points) break; + } + pos += length; + } + } + assert(*npoints == nlz77points); + + free(lz77splitpoints); + ZopfliCleanLZ77Store(&store); +} + +void ZopfliBlockSplitSimple(const unsigned char* in, + size_t instart, size_t inend, + size_t blocksize, + size_t** splitpoints, size_t* npoints) { + size_t i = instart; + while (i < inend) { + ZOPFLI_APPEND_DATA(i, splitpoints, npoints); + i += blocksize; + } + (void)in; +} diff -Nru zopfli-0~git130414/src/zopfli/blocksplitter.h zopfli-1.0.0/src/zopfli/blocksplitter.h --- zopfli-0~git130414/src/zopfli/blocksplitter.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/blocksplitter.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,77 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +Functions to choose good boundaries for block splitting. Deflate allows encoding +the data in multiple blocks, with a separate Huffman tree for each block. The +Huffman tree itself requires some bytes to encode, so by choosing certain +blocks, you can either hurt, or enhance compression. These functions choose good +ones that enhance it. +*/ + +#ifndef ZOPFLI_BLOCKSPLITTER_H_ +#define ZOPFLI_BLOCKSPLITTER_H_ + +#include + +#include "zopfli.h" + + +/* +Does blocksplitting on LZ77 data. +The output splitpoints are indices in the LZ77 data. +litlens: lz77 lit/lengths +dists: lz77 distances +llsize: size of litlens and dists +maxblocks: set a limit to the amount of blocks. Set to 0 to mean no limit. +*/ +void ZopfliBlockSplitLZ77(const ZopfliOptions* options, + const unsigned short* litlens, + const unsigned short* dists, + size_t llsize, size_t maxblocks, + size_t** splitpoints, size_t* npoints); + +/* +Does blocksplitting on uncompressed data. +The output splitpoints are indices in the uncompressed bytes. + +options: general program options. +in: uncompressed input data +instart: where to start splitting +inend: where to end splitting (not inclusive) +maxblocks: maximum amount of blocks to split into, or 0 for no limit +splitpoints: dynamic array to put the resulting split point coordinates into. + The coordinates are indices in the input array. +npoints: pointer to amount of splitpoints, for the dynamic array. The amount of + blocks is the amount of splitpoitns + 1. +*/ +void ZopfliBlockSplit(const ZopfliOptions* options, + const unsigned char* in, size_t instart, size_t inend, + size_t maxblocks, size_t** splitpoints, size_t* npoints); + +/* +Divides the input into equal blocks, does not even take LZ77 lengths into +account. +*/ +void ZopfliBlockSplitSimple(const unsigned char* in, + size_t instart, size_t inend, + size_t blocksize, + size_t** splitpoints, size_t* npoints); + +#endif /* ZOPFLI_BLOCKSPLITTER_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/cache.c zopfli-1.0.0/src/zopfli/cache.c --- zopfli-0~git130414/src/zopfli/cache.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/cache.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,119 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "cache.h" + +#include +#include +#include + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + +void ZopfliInitCache(size_t blocksize, ZopfliLongestMatchCache* lmc) { + size_t i; + lmc->length = (unsigned short*)malloc(sizeof(unsigned short) * blocksize); + lmc->dist = (unsigned short*)malloc(sizeof(unsigned short) * blocksize); + /* Rather large amount of memory. */ + lmc->sublen = (unsigned char*)malloc(ZOPFLI_CACHE_LENGTH * 3 * blocksize); + + /* length > 0 and dist 0 is invalid combination, which indicates on purpose + that this cache value is not filled in yet. */ + for (i = 0; i < blocksize; i++) lmc->length[i] = 1; + for (i = 0; i < blocksize; i++) lmc->dist[i] = 0; + for (i = 0; i < ZOPFLI_CACHE_LENGTH * blocksize * 3; i++) lmc->sublen[i] = 0; +} + +void ZopfliCleanCache(ZopfliLongestMatchCache* lmc) { + free(lmc->length); + free(lmc->dist); + free(lmc->sublen); +} + +void ZopfliSublenToCache(const unsigned short* sublen, + size_t pos, size_t length, + ZopfliLongestMatchCache* lmc) { + size_t i; + size_t j = 0; + unsigned bestlength = 0; + unsigned char* cache; + +#if ZOPFLI_CACHE_LENGTH == 0 + return; +#endif + + cache = &lmc->sublen[ZOPFLI_CACHE_LENGTH * pos * 3]; + if (length < 3) return; + for (i = 3; i <= length; i++) { + if (i == length || sublen[i] != sublen[i + 1]) { + cache[j * 3] = i - 3; + cache[j * 3 + 1] = sublen[i] % 256; + cache[j * 3 + 2] = (sublen[i] >> 8) % 256; + bestlength = i; + j++; + if (j >= ZOPFLI_CACHE_LENGTH) break; + } + } + if (j < ZOPFLI_CACHE_LENGTH) { + assert(bestlength == length); + cache[(ZOPFLI_CACHE_LENGTH - 1) * 3] = bestlength - 3; + } else { + assert(bestlength <= length); + } + assert(bestlength == ZopfliMaxCachedSublen(lmc, pos, length)); +} + +void ZopfliCacheToSublen(const ZopfliLongestMatchCache* lmc, + size_t pos, size_t length, + unsigned short* sublen) { + size_t i, j; + unsigned maxlength = ZopfliMaxCachedSublen(lmc, pos, length); + unsigned prevlength = 0; + unsigned char* cache; +#if ZOPFLI_CACHE_LENGTH == 0 + return; +#endif + if (length < 3) return; + cache = &lmc->sublen[ZOPFLI_CACHE_LENGTH * pos * 3]; + for (j = 0; j < ZOPFLI_CACHE_LENGTH; j++) { + unsigned length = cache[j * 3] + 3; + unsigned dist = cache[j * 3 + 1] + 256 * cache[j * 3 + 2]; + for (i = prevlength; i <= length; i++) { + sublen[i] = dist; + } + if (length == maxlength) break; + prevlength = length + 1; + } +} + +/* +Returns the length up to which could be stored in the cache. +*/ +unsigned ZopfliMaxCachedSublen(const ZopfliLongestMatchCache* lmc, + size_t pos, size_t length) { + unsigned char* cache; +#if ZOPFLI_CACHE_LENGTH == 0 + return 0; +#endif + cache = &lmc->sublen[ZOPFLI_CACHE_LENGTH * pos * 3]; + (void)length; + if (cache[1] == 0 && cache[2] == 0) return 0; /* No sublen cached. */ + return cache[(ZOPFLI_CACHE_LENGTH - 1) * 3] + 3; +} + +#endif /* ZOPFLI_LONGEST_MATCH_CACHE */ diff -Nru zopfli-0~git130414/src/zopfli/cache.h zopfli-1.0.0/src/zopfli/cache.h --- zopfli-0~git130414/src/zopfli/cache.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/cache.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,66 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +The cache that speeds up ZopfliFindLongestMatch of lz77.c. +*/ + +#ifndef ZOPFLI_CACHE_H_ +#define ZOPFLI_CACHE_H_ + +#include "util.h" + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + +/* +Cache used by ZopfliFindLongestMatch to remember previously found length/dist +values. +This is needed because the squeeze runs will ask these values multiple times for +the same position. +Uses large amounts of memory, since it has to remember the distance belonging +to every possible shorter-than-the-best length (the so called "sublen" array). +*/ +typedef struct ZopfliLongestMatchCache { + unsigned short* length; + unsigned short* dist; + unsigned char* sublen; +} ZopfliLongestMatchCache; + +/* Initializes the ZopfliLongestMatchCache. */ +void ZopfliInitCache(size_t blocksize, ZopfliLongestMatchCache* lmc); + +/* Frees up the memory of the ZopfliLongestMatchCache. */ +void ZopfliCleanCache(ZopfliLongestMatchCache* lmc); + +/* Stores sublen array in the cache. */ +void ZopfliSublenToCache(const unsigned short* sublen, + size_t pos, size_t length, + ZopfliLongestMatchCache* lmc); + +/* Extracts sublen array from the cache. */ +void ZopfliCacheToSublen(const ZopfliLongestMatchCache* lmc, + size_t pos, size_t length, + unsigned short* sublen); +/* Returns the length up to which could be stored in the cache. */ +unsigned ZopfliMaxCachedSublen(const ZopfliLongestMatchCache* lmc, + size_t pos, size_t length); + +#endif /* ZOPFLI_LONGEST_MATCH_CACHE */ + +#endif /* ZOPFLI_CACHE_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/deflate.c zopfli-1.0.0/src/zopfli/deflate.c --- zopfli-0~git130414/src/zopfli/deflate.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/deflate.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,698 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "deflate.h" + +#include +#include +#include + +#include "blocksplitter.h" +#include "lz77.h" +#include "squeeze.h" +#include "tree.h" + +static void AddBit(int bit, + unsigned char* bp, unsigned char** out, size_t* outsize) { + if (((*bp) & 7) == 0) ZOPFLI_APPEND_DATA(0, out, outsize); + (*out)[*outsize - 1] |= bit << ((*bp) & 7); + (*bp)++; +} + +static void AddBits(unsigned symbol, unsigned length, + unsigned char* bp, unsigned char** out, size_t* outsize) { + /* TODO(lode): make more efficient (add more bits at once). */ + unsigned i; + for (i = 0; i < length; i++) { + unsigned bit = (symbol >> i) & 1; + if (((*bp) & 7) == 0) ZOPFLI_APPEND_DATA(0, out, outsize); + (*out)[*outsize - 1] |= bit << ((*bp) & 7); + (*bp)++; + } +} + +/* +Adds bits, like AddBits, but the order is inverted. The deflate specification +uses both orders in one standard. +*/ +static void AddHuffmanBits(unsigned symbol, unsigned length, + unsigned char* bp, unsigned char** out, + size_t* outsize) { + /* TODO(lode): make more efficient (add more bits at once). */ + unsigned i; + for (i = 0; i < length; i++) { + unsigned bit = (symbol >> (length - i - 1)) & 1; + if (((*bp) & 7) == 0) ZOPFLI_APPEND_DATA(0, out, outsize); + (*out)[*outsize - 1] |= bit << ((*bp) & 7); + (*bp)++; + } +} + +/* +Ensures there are at least 2 distance codes to support buggy decoders. +Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1 +distance code (with length > 0), even though it's valid according to the +deflate spec to have 0 distance codes. On top of that, some mobile phones +require at least two distance codes. To support these decoders too (but +potentially at the cost of a few bytes), add dummy code lengths of 1. +References to this bug can be found in the changelog of +Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0. + +d_lengths: the 32 lengths of the distance codes. +*/ +static void PatchDistanceCodesForBuggyDecoders(unsigned* d_lengths) { + int num_dist_codes = 0; /* Amount of non-zero distance codes */ + int i; + for (i = 0; i < 30 /* Ignore the two unused codes from the spec */; i++) { + if (d_lengths[i]) num_dist_codes++; + if (num_dist_codes >= 2) return; /* Two or more codes is fine. */ + } + + if (num_dist_codes == 0) { + d_lengths[0] = d_lengths[1] = 1; + } else if (num_dist_codes == 1) { + d_lengths[d_lengths[0] ? 1 : 0] = 1; + } +} + +static void AddDynamicTree(const unsigned* ll_lengths, + const unsigned* d_lengths, + unsigned char* bp, + unsigned char** out, size_t* outsize) { + unsigned* lld_lengths = 0; /* All litlen and dist lengthts with ending zeros + trimmed together in one array. */ + unsigned lld_total; /* Size of lld_lengths. */ + unsigned* rle = 0; /* Runlength encoded version of lengths of litlen and dist + trees. */ + unsigned* rle_bits = 0; /* Extra bits for rle values 16, 17 and 18. */ + size_t rle_size = 0; /* Size of rle array. */ + size_t rle_bits_size = 0; /* Should have same value as rle_size. */ + unsigned hlit = 29; /* 286 - 257 */ + unsigned hdist = 29; /* 32 - 1, but gzip does not like hdist > 29.*/ + unsigned hclen; + size_t i, j; + size_t clcounts[19]; + unsigned clcl[19]; /* Code length code lengths. */ + unsigned clsymbols[19]; + /* The order in which code length code lengths are encoded as per deflate. */ + unsigned order[19] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + }; + + /* Trim zeros. */ + while (hlit > 0 && ll_lengths[257 + hlit - 1] == 0) hlit--; + while (hdist > 0 && d_lengths[1 + hdist - 1] == 0) hdist--; + + lld_total = hlit + 257 + hdist + 1; + lld_lengths = (unsigned*)malloc(sizeof(*lld_lengths) * lld_total); + if (!lld_lengths) exit(-1); /* Allocation failed. */ + + for (i = 0; i < lld_total; i++) { + lld_lengths[i] = i < 257 + hlit + ? ll_lengths[i] : d_lengths[i - 257 - hlit]; + assert(lld_lengths[i] < 16); + } + + for (i = 0; i < lld_total; i++) { + size_t count = 0; + for (j = i; j < lld_total && lld_lengths[i] == lld_lengths[j]; j++) { + count++; + } + if (count >= 4 || (count >= 3 && lld_lengths[i] == 0)) { + if (lld_lengths[i] == 0) { + if (count > 10) { + if (count > 138) count = 138; + ZOPFLI_APPEND_DATA(18, &rle, &rle_size); + ZOPFLI_APPEND_DATA(count - 11, &rle_bits, &rle_bits_size); + } else { + ZOPFLI_APPEND_DATA(17, &rle, &rle_size); + ZOPFLI_APPEND_DATA(count - 3, &rle_bits, &rle_bits_size); + } + } else { + unsigned repeat = count - 1; /* Since the first one is hardcoded. */ + ZOPFLI_APPEND_DATA(lld_lengths[i], &rle, &rle_size); + ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size); + while (repeat >= 6) { + ZOPFLI_APPEND_DATA(16, &rle, &rle_size); + ZOPFLI_APPEND_DATA(6 - 3, &rle_bits, &rle_bits_size); + repeat -= 6; + } + if (repeat >= 3) { + ZOPFLI_APPEND_DATA(16, &rle, &rle_size); + ZOPFLI_APPEND_DATA(3 - 3, &rle_bits, &rle_bits_size); + repeat -= 3; + } + while (repeat != 0) { + ZOPFLI_APPEND_DATA(lld_lengths[i], &rle, &rle_size); + ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size); + repeat--; + } + } + + i += count - 1; + } else { + ZOPFLI_APPEND_DATA(lld_lengths[i], &rle, &rle_size); + ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size); + } + assert(rle[rle_size - 1] <= 18); + } + + for (i = 0; i < 19; i++) { + clcounts[i] = 0; + } + for (i = 0; i < rle_size; i++) { + clcounts[rle[i]]++; + } + + ZopfliCalculateBitLengths(clcounts, 19, 7, clcl); + ZopfliLengthsToSymbols(clcl, 19, 7, clsymbols); + + hclen = 15; + /* Trim zeros. */ + while (hclen > 0 && clcounts[order[hclen + 4 - 1]] == 0) hclen--; + + AddBits(hlit, 5, bp, out, outsize); + AddBits(hdist, 5, bp, out, outsize); + AddBits(hclen, 4, bp, out, outsize); + + for (i = 0; i < hclen + 4; i++) { + AddBits(clcl[order[i]], 3, bp, out, outsize); + } + + for (i = 0; i < rle_size; i++) { + unsigned symbol = clsymbols[rle[i]]; + AddHuffmanBits(symbol, clcl[rle[i]], bp, out, outsize); + /* Extra bits. */ + if (rle[i] == 16) AddBits(rle_bits[i], 2, bp, out, outsize); + else if (rle[i] == 17) AddBits(rle_bits[i], 3, bp, out, outsize); + else if (rle[i] == 18) AddBits(rle_bits[i], 7, bp, out, outsize); + } + + free(lld_lengths); + free(rle); + free(rle_bits); +} + +/* +Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE. +*/ +static size_t CalculateTreeSize(const unsigned* ll_lengths, + const unsigned* d_lengths, + size_t* ll_counts, size_t* d_counts) { + unsigned char* dummy = 0; + size_t dummysize = 0; + unsigned char bp = 0; + + (void)ll_counts; + (void)d_counts; + + AddDynamicTree(ll_lengths, d_lengths, &bp, &dummy, &dummysize); + free(dummy); + + return dummysize * 8 + (bp & 7); +} + +/* +Adds all lit/len and dist codes from the lists as huffman symbols. Does not add +end code 256. expected_data_size is the uncompressed block size, used for +assert, but you can set it to 0 to not do the assertion. +*/ +static void AddLZ77Data(const unsigned short* litlens, + const unsigned short* dists, + size_t lstart, size_t lend, + size_t expected_data_size, + const unsigned* ll_symbols, const unsigned* ll_lengths, + const unsigned* d_symbols, const unsigned* d_lengths, + unsigned char* bp, + unsigned char** out, size_t* outsize) { + size_t testlength = 0; + size_t i; + + for (i = lstart; i < lend; i++) { + unsigned dist = dists[i]; + unsigned litlen = litlens[i]; + if (dist == 0) { + assert(litlen < 256); + assert(ll_lengths[litlen] > 0); + AddHuffmanBits(ll_symbols[litlen], ll_lengths[litlen], bp, out, outsize); + testlength++; + } else { + unsigned lls = ZopfliGetLengthSymbol(litlen); + unsigned ds = ZopfliGetDistSymbol(dist); + assert(litlen >= 3 && litlen <= 288); + assert(ll_lengths[lls] > 0); + assert(d_lengths[ds] > 0); + AddHuffmanBits(ll_symbols[lls], ll_lengths[lls], bp, out, outsize); + AddBits(ZopfliGetLengthExtraBitsValue(litlen), + ZopfliGetLengthExtraBits(litlen), + bp, out, outsize); + AddHuffmanBits(d_symbols[ds], d_lengths[ds], bp, out, outsize); + AddBits(ZopfliGetDistExtraBitsValue(dist), + ZopfliGetDistExtraBits(dist), + bp, out, outsize); + testlength += litlen; + } + } + assert(expected_data_size == 0 || testlength == expected_data_size); +} + +static void GetFixedTree(unsigned* ll_lengths, unsigned* d_lengths) { + size_t i; + for (i = 0; i < 144; i++) ll_lengths[i] = 8; + for (i = 144; i < 256; i++) ll_lengths[i] = 9; + for (i = 256; i < 280; i++) ll_lengths[i] = 7; + for (i = 280; i < 288; i++) ll_lengths[i] = 8; + for (i = 0; i < 32; i++) d_lengths[i] = 5; +} + +/* +Calculates size of the part after the header and tree of an LZ77 block, in bits. +*/ +static size_t CalculateBlockSymbolSize(const unsigned* ll_lengths, + const unsigned* d_lengths, + const unsigned short* litlens, + const unsigned short* dists, + size_t lstart, size_t lend) { + size_t result = 0; + size_t i; + for (i = lstart; i < lend; i++) { + if (dists[i] == 0) { + result += ll_lengths[litlens[i]]; + } else { + result += ll_lengths[ZopfliGetLengthSymbol(litlens[i])]; + result += d_lengths[ZopfliGetDistSymbol(dists[i])]; + result += ZopfliGetLengthExtraBits(litlens[i]); + result += ZopfliGetDistExtraBits(dists[i]); + } + } + result += ll_lengths[256]; /*end symbol*/ + return result; +} + +double ZopfliCalculateBlockSize(const unsigned short* litlens, + const unsigned short* dists, + size_t lstart, size_t lend, int btype) { + size_t ll_counts[288]; + size_t d_counts[32]; + + unsigned ll_lengths[288]; + unsigned d_lengths[32]; + + double result = 3; /*bfinal and btype bits*/ + + assert(btype == 1 || btype == 2); /* This is not for uncompressed blocks. */ + + if(btype == 1) { + GetFixedTree(ll_lengths, d_lengths); + } else { + ZopfliLZ77Counts(litlens, dists, lstart, lend, ll_counts, d_counts); + ZopfliCalculateBitLengths(ll_counts, 288, 15, ll_lengths); + ZopfliCalculateBitLengths(d_counts, 32, 15, d_lengths); + PatchDistanceCodesForBuggyDecoders(d_lengths); + result += CalculateTreeSize(ll_lengths, d_lengths, ll_counts, d_counts); + } + + result += CalculateBlockSymbolSize( + ll_lengths, d_lengths, litlens, dists, lstart, lend); + + return result; +} + +/* +Adds a deflate block with the given LZ77 data to the output. +options: global program options +btype: the block type, must be 1 or 2 +final: whether to set the "final" bit on this block, must be the last block +litlens: literal/length array of the LZ77 data, in the same format as in + ZopfliLZ77Store. +dists: distance array of the LZ77 data, in the same format as in + ZopfliLZ77Store. +lstart: where to start in the LZ77 data +lend: where to end in the LZ77 data (not inclusive) +expected_data_size: the uncompressed block size, used for assert, but you can + set it to 0 to not do the assertion. +bp: output bit pointer +out: dynamic output array to append to +outsize: dynamic output array size +*/ +static void AddLZ77Block(const ZopfliOptions* options, int btype, int final, + const unsigned short* litlens, + const unsigned short* dists, + size_t lstart, size_t lend, + size_t expected_data_size, + unsigned char* bp, unsigned char** out, size_t* outsize) { + size_t ll_counts[288]; + size_t d_counts[32]; + unsigned ll_lengths[288]; + unsigned d_lengths[32]; + unsigned ll_symbols[288]; + unsigned d_symbols[32]; + size_t detect_block_size = *outsize; + size_t compressed_size; + size_t uncompressed_size = 0; + size_t i; + + AddBit(final, bp, out, outsize); + AddBit(btype & 1, bp, out, outsize); + AddBit((btype & 2) >> 1, bp, out, outsize); + + if (btype == 1) { + /* Fixed block. */ + GetFixedTree(ll_lengths, d_lengths); + } else { + /* Dynamic block. */ + unsigned detect_tree_size; + assert(btype == 2); + ZopfliLZ77Counts(litlens, dists, lstart, lend, ll_counts, d_counts); + ZopfliCalculateBitLengths(ll_counts, 288, 15, ll_lengths); + ZopfliCalculateBitLengths(d_counts, 32, 15, d_lengths); + PatchDistanceCodesForBuggyDecoders(d_lengths); + detect_tree_size = *outsize; + AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize); + if (options->verbose) { + fprintf(stderr, "treesize: %d\n", (int)(*outsize - detect_tree_size)); + } + + /* Assert that for every present symbol, the code length is non-zero. */ + /* TODO(lode): remove this in release version. */ + for (i = 0; i < 288; i++) assert(ll_counts[i] == 0 || ll_lengths[i] > 0); + for (i = 0; i < 32; i++) assert(d_counts[i] == 0 || d_lengths[i] > 0); + } + + ZopfliLengthsToSymbols(ll_lengths, 288, 15, ll_symbols); + ZopfliLengthsToSymbols(d_lengths, 32, 15, d_symbols); + + detect_block_size = *outsize; + AddLZ77Data(litlens, dists, lstart, lend, expected_data_size, + ll_symbols, ll_lengths, d_symbols, d_lengths, + bp, out, outsize); + /* End symbol. */ + AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize); + + for (i = lstart; i < lend; i++) { + uncompressed_size += dists[i] == 0 ? 1 : litlens[i]; + } + compressed_size = *outsize - detect_block_size; + if (options->verbose) { + fprintf(stderr, "compressed block size: %d (%dk) (unc: %d)\n", + (int)compressed_size, (int)(compressed_size / 1024), + (int)(uncompressed_size)); + } +} + +static void DeflateDynamicBlock(const ZopfliOptions* options, int final, + const unsigned char* in, + size_t instart, size_t inend, + unsigned char* bp, + unsigned char** out, size_t* outsize) { + ZopfliBlockState s; + size_t blocksize = inend - instart; + ZopfliLZ77Store store; + int btype = 2; + + ZopfliInitLZ77Store(&store); + + s.options = options; + s.blockstart = instart; + s.blockend = inend; +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache)); + ZopfliInitCache(blocksize, s.lmc); +#endif + + ZopfliLZ77Optimal(&s, in, instart, inend, &store); + + /* For small block, encoding with fixed tree can be smaller. For large block, + don't bother doing this expensive test, dynamic tree will be better.*/ + if (store.size < 1000) { + double dyncost, fixedcost; + ZopfliLZ77Store fixedstore; + ZopfliInitLZ77Store(&fixedstore); + ZopfliLZ77OptimalFixed(&s, in, instart, inend, &fixedstore); + dyncost = ZopfliCalculateBlockSize(store.litlens, store.dists, + 0, store.size, 2); + fixedcost = ZopfliCalculateBlockSize(fixedstore.litlens, fixedstore.dists, + 0, fixedstore.size, 1); + if (fixedcost < dyncost) { + btype = 1; + ZopfliCleanLZ77Store(&store); + store = fixedstore; + } else { + ZopfliCleanLZ77Store(&fixedstore); + } + } + + AddLZ77Block(s.options, btype, final, + store.litlens, store.dists, 0, store.size, + blocksize, bp, out, outsize); + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + ZopfliCleanCache(s.lmc); + free(s.lmc); +#endif + ZopfliCleanLZ77Store(&store); +} + +static void DeflateFixedBlock(const ZopfliOptions* options, int final, + const unsigned char* in, + size_t instart, size_t inend, + unsigned char* bp, + unsigned char** out, size_t* outsize) { + ZopfliBlockState s; + size_t blocksize = inend - instart; + ZopfliLZ77Store store; + + ZopfliInitLZ77Store(&store); + + s.options = options; + s.blockstart = instart; + s.blockend = inend; +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache)); + ZopfliInitCache(blocksize, s.lmc); +#endif + + ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store); + + AddLZ77Block(s.options, 1, final, store.litlens, store.dists, 0, store.size, + blocksize, bp, out, outsize); + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + ZopfliCleanCache(s.lmc); + free(s.lmc); +#endif + ZopfliCleanLZ77Store(&store); +} + +static void DeflateNonCompressedBlock(const ZopfliOptions* options, int final, + const unsigned char* in, size_t instart, + size_t inend, + unsigned char* bp, + unsigned char** out, size_t* outsize) { + size_t i; + size_t blocksize = inend - instart; + unsigned short nlen = ~blocksize; + + (void)options; + assert(blocksize < 65536); /* Non compressed blocks are max this size. */ + + AddBit(final, bp, out, outsize); + /* BTYPE 00 */ + AddBit(0, bp, out, outsize); + AddBit(0, bp, out, outsize); + + /* Any bits of input up to the next byte boundary are ignored. */ + *bp = 0; + + ZOPFLI_APPEND_DATA(blocksize % 256, out, outsize); + ZOPFLI_APPEND_DATA((blocksize / 256) % 256, out, outsize); + ZOPFLI_APPEND_DATA(nlen % 256, out, outsize); + ZOPFLI_APPEND_DATA((nlen / 256) % 256, out, outsize); + + for (i = instart; i < inend; i++) { + ZOPFLI_APPEND_DATA(in[i], out, outsize); + } +} + +static void DeflateBlock(const ZopfliOptions* options, + int btype, int final, + const unsigned char* in, size_t instart, size_t inend, + unsigned char* bp, + unsigned char** out, size_t* outsize) { + if (btype == 0) { + DeflateNonCompressedBlock( + options, final, in, instart, inend, bp, out, outsize); + } else if (btype == 1) { + DeflateFixedBlock(options, final, in, instart, inend, bp, out, outsize); + } else { + assert (btype == 2); + DeflateDynamicBlock(options, final, in, instart, inend, bp, out, outsize); + } +} + +/* +Does squeeze strategy where first block splitting is done, then each block is +squeezed. +Parameters: see description of the ZopfliDeflate function. +*/ +static void DeflateSplittingFirst(const ZopfliOptions* options, + int btype, int final, + const unsigned char* in, + size_t instart, size_t inend, + unsigned char* bp, + unsigned char** out, size_t* outsize) { + size_t i; + size_t* splitpoints = 0; + size_t npoints = 0; + if (btype == 0) { + ZopfliBlockSplitSimple(in, instart, inend, 65535, &splitpoints, &npoints); + } else if (btype == 1) { + /* If all blocks are fixed tree, splitting into separate blocks only + increases the total size. Leave npoints at 0, this represents 1 block. */ + } else { + ZopfliBlockSplit(options, in, instart, inend, + options->blocksplittingmax, &splitpoints, &npoints); + } + + for (i = 0; i <= npoints; i++) { + size_t start = i == 0 ? instart : splitpoints[i - 1]; + size_t end = i == npoints ? inend : splitpoints[i]; + DeflateBlock(options, btype, i == npoints && final, in, start, end, + bp, out, outsize); + } + + free(splitpoints); +} + +/* +Does squeeze strategy where first the best possible lz77 is done, and then based +on that data, block splitting is done. +Parameters: see description of the ZopfliDeflate function. +*/ +static void DeflateSplittingLast(const ZopfliOptions* options, + int btype, int final, + const unsigned char* in, + size_t instart, size_t inend, + unsigned char* bp, + unsigned char** out, size_t* outsize) { + size_t i; + ZopfliBlockState s; + ZopfliLZ77Store store; + size_t* splitpoints = 0; + size_t npoints = 0; + + if (btype == 0) { + /* This function only supports LZ77 compression. DeflateSplittingFirst + supports the special case of noncompressed data. Punt it to that one. */ + DeflateSplittingFirst(options, btype, final, + in, instart, inend, + bp, out, outsize); + } + assert(btype == 1 || btype == 2); + + ZopfliInitLZ77Store(&store); + + s.options = options; + s.blockstart = instart; + s.blockend = inend; +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache)); + ZopfliInitCache(inend - instart, s.lmc); +#endif + + if (btype == 2) { + ZopfliLZ77Optimal(&s, in, instart, inend, &store); + } else { + assert (btype == 1); + ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store); + } + + if (btype == 1) { + /* If all blocks are fixed tree, splitting into separate blocks only + increases the total size. Leave npoints at 0, this represents 1 block. */ + } else { + ZopfliBlockSplitLZ77(options, store.litlens, store.dists, store.size, + options->blocksplittingmax, &splitpoints, &npoints); + } + + for (i = 0; i <= npoints; i++) { + size_t start = i == 0 ? 0 : splitpoints[i - 1]; + size_t end = i == npoints ? store.size : splitpoints[i]; + AddLZ77Block(options, btype, i == npoints && final, + store.litlens, store.dists, start, end, 0, + bp, out, outsize); + } + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + ZopfliCleanCache(s.lmc); + free(s.lmc); +#endif + + ZopfliCleanLZ77Store(&store); +} + +/* +Deflate a part, to allow ZopfliDeflate() to use multiple master blocks if +needed. +It is possible to call this function multiple times in a row, shifting +instart and inend to next bytes of the data. If instart is larger than 0, then +previous bytes are used as the initial dictionary for LZ77. +This function will usually output multiple deflate blocks. If final is 1, then +the final bit will be set on the last block. +*/ +void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final, + const unsigned char* in, size_t instart, size_t inend, + unsigned char* bp, unsigned char** out, + size_t* outsize) { + if (options->blocksplitting) { + if (options->blocksplittinglast) { + DeflateSplittingLast(options, btype, final, in, instart, inend, + bp, out, outsize); + } else { + DeflateSplittingFirst(options, btype, final, in, instart, inend, + bp, out, outsize); + } + } else { + DeflateBlock(options, btype, final, in, instart, inend, bp, out, outsize); + } +} + +void ZopfliDeflate(const ZopfliOptions* options, int btype, int final, + const unsigned char* in, size_t insize, + unsigned char* bp, unsigned char** out, size_t* outsize) { +#if ZOPFLI_MASTER_BLOCK_SIZE == 0 + ZopfliDeflatePart(options, btype, final, in, 0, insize, bp, out, outsize); +#else + size_t i = 0; + while (i < insize) { + int masterfinal = (i + ZOPFLI_MASTER_BLOCK_SIZE >= insize); + int final2 = final && masterfinal; + size_t size = masterfinal ? insize - i : ZOPFLI_MASTER_BLOCK_SIZE; + ZopfliDeflatePart(options, btype, final2, + in, i, i + size, bp, out, outsize); + i += size; + } +#endif + if (options->verbose) { + fprintf(stderr, + "Original Size: %d, Deflate: %d, Compression: %f%% Removed\n", + (int)insize, (int)*outsize, + 100.0 * (double)(insize - *outsize) / (double)insize); + } +} diff -Nru zopfli-0~git130414/src/zopfli/deflate.h zopfli-1.0.0/src/zopfli/deflate.h --- zopfli-0~git130414/src/zopfli/deflate.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/deflate.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,77 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#ifndef ZOPFLI_DEFLATE_H_ +#define ZOPFLI_DEFLATE_H_ + +/* +Functions to compress according to the DEFLATE specification, using the +"squeeze" LZ77 compression backend. +*/ + +#include "zopfli.h" + +/* +Compresses according to the deflate specification and append the compressed +result to the output. +This function will usually output multiple deflate blocks. If final is 1, then +the final bit will be set on the last block. + +options: global program options +btype: the deflate block type. Use 2 for best compression. + -0: non compressed blocks (00) + -1: blocks with fixed tree (01) + -2: blocks with dynamic tree (10) +final: whether this is the last section of the input, sets the final bit to the + last deflate block. +in: the input bytes +insize: number of input bytes +bp: bit pointer for the output array. This must initially be 0, and for + consecutive calls must be reused (it can have values from 0-7). This is + because deflate appends blocks as bit-based data, rather than on byte + boundaries. +out: pointer to the dynamic output array to which the result is appended. Must + be freed after use. +outsize: pointer to the dynamic output array size. +*/ +void ZopfliDeflate(const ZopfliOptions* options, int btype, int final, + const unsigned char* in, size_t insize, + unsigned char* bp, unsigned char** out, size_t* outsize); + +/* +Like ZopfliDeflate, but allows to specify start and end byte with instart and +inend. Only that part is compressed, but earlier bytes are still used for the +back window. +*/ +void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final, + const unsigned char* in, size_t instart, size_t inend, + unsigned char* bp, unsigned char** out, + size_t* outsize); + +/* +Calculates block size in bits. +litlens: lz77 lit/lengths +dists: ll77 distances +lstart: start of block +lend: end of block (not inclusive) +*/ +double ZopfliCalculateBlockSize(const unsigned short* litlens, + const unsigned short* dists, + size_t lstart, size_t lend, int btype); +#endif /* ZOPFLI_DEFLATE_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/gzip_container.c zopfli-1.0.0/src/zopfli/gzip_container.c --- zopfli-0~git130414/src/zopfli/gzip_container.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/gzip_container.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,117 @@ +/* +Copyright 2013 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "gzip_container.h" +#include "util.h" + +#include + +#include "deflate.h" + +/* Table of CRCs of all 8-bit messages. */ +static unsigned long crc_table[256]; + +/* Flag: has the table been computed? Initially false. */ +static int crc_table_computed = 0; + +/* Makes the table for a fast CRC. */ +static void MakeCRCTable() { + unsigned long c; + int n, k; + for (n = 0; n < 256; n++) { + c = (unsigned long) n; + for (k = 0; k < 8; k++) { + if (c & 1) { + c = 0xedb88320L ^ (c >> 1); + } else { + c = c >> 1; + } + } + crc_table[n] = c; + } + crc_table_computed = 1; +} + + +/* +Updates a running crc with the bytes buf[0..len-1] and returns +the updated crc. The crc should be initialized to zero. +*/ +static unsigned long UpdateCRC(unsigned long crc, + const unsigned char *buf, size_t len) { + unsigned long c = crc ^ 0xffffffffL; + unsigned n; + + if (!crc_table_computed) + MakeCRCTable(); + for (n = 0; n < len; n++) { + c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8); + } + return c ^ 0xffffffffL; +} + +/* Returns the CRC of the bytes buf[0..len-1]. */ +static unsigned long CRC(const unsigned char* buf, int len) { + return UpdateCRC(0L, buf, len); +} + +/* +Compresses the data according to the gzip specification. +*/ +void ZopfliGzipCompress(const ZopfliOptions* options, + const unsigned char* in, size_t insize, + unsigned char** out, size_t* outsize) { + unsigned long crcvalue = CRC(in, insize); + unsigned char bp = 0; + + ZOPFLI_APPEND_DATA(31, out, outsize); /* ID1 */ + ZOPFLI_APPEND_DATA(139, out, outsize); /* ID2 */ + ZOPFLI_APPEND_DATA(8, out, outsize); /* CM */ + ZOPFLI_APPEND_DATA(0, out, outsize); /* FLG */ + /* MTIME */ + ZOPFLI_APPEND_DATA(0, out, outsize); + ZOPFLI_APPEND_DATA(0, out, outsize); + ZOPFLI_APPEND_DATA(0, out, outsize); + ZOPFLI_APPEND_DATA(0, out, outsize); + + ZOPFLI_APPEND_DATA(2, out, outsize); /* XFL, 2 indicates best compression. */ + ZOPFLI_APPEND_DATA(3, out, outsize); /* OS follows Unix conventions. */ + + ZopfliDeflate(options, 2 /* Dynamic block */, 1, + in, insize, &bp, out, outsize); + + /* CRC */ + ZOPFLI_APPEND_DATA(crcvalue % 256, out, outsize); + ZOPFLI_APPEND_DATA((crcvalue >> 8) % 256, out, outsize); + ZOPFLI_APPEND_DATA((crcvalue >> 16) % 256, out, outsize); + ZOPFLI_APPEND_DATA((crcvalue >> 24) % 256, out, outsize); + + /* ISIZE */ + ZOPFLI_APPEND_DATA(insize % 256, out, outsize); + ZOPFLI_APPEND_DATA((insize >> 8) % 256, out, outsize); + ZOPFLI_APPEND_DATA((insize >> 16) % 256, out, outsize); + ZOPFLI_APPEND_DATA((insize >> 24) % 256, out, outsize); + + if (options->verbose) { + fprintf(stderr, + "Original Size: %d, Gzip: %d, Compression: %f%% Removed\n", + (int)insize, (int)*outsize, + 100.0 * (double)(insize - *outsize) / (double)insize); + } +} diff -Nru zopfli-0~git130414/src/zopfli/gzip_container.h zopfli-1.0.0/src/zopfli/gzip_container.h --- zopfli-0~git130414/src/zopfli/gzip_container.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/gzip_container.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,42 @@ +/* +Copyright 2013 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#ifndef ZOPFLI_GZIP_H_ +#define ZOPFLI_GZIP_H_ + +/* +Functions to compress according to the Gzip specification. +*/ + +#include "zopfli.h" + +/* +Compresses according to the gzip specification and append the compressed +result to the output. + +options: global program options +out: pointer to the dynamic output array to which the result is appended. Must + be freed after use. +outsize: pointer to the dynamic output array size. +*/ +void ZopfliGzipCompress(const ZopfliOptions* options, + const unsigned char* in, size_t insize, + unsigned char** out, size_t* outsize); + +#endif /* ZOPFLI_GZIP_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/hash.c zopfli-1.0.0/src/zopfli/hash.c --- zopfli-0~git130414/src/zopfli/hash.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/hash.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,135 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "hash.h" + +#include +#include +#include + +#define HASH_SHIFT 5 +#define HASH_MASK 32767 + +void ZopfliInitHash(size_t window_size, ZopfliHash* h) { + size_t i; + + h->val = 0; + h->head = (int*)malloc(sizeof(*h->head) * 65536); + h->prev = (unsigned short*)malloc(sizeof(*h->prev) * window_size); + h->hashval = (int*)malloc(sizeof(*h->hashval) * window_size); + for (i = 0; i < 65536; i++) { + h->head[i] = -1; /* -1 indicates no head so far. */ + } + for (i = 0; i < window_size; i++) { + h->prev[i] = i; /* If prev[j] == j, then prev[j] is uninitialized. */ + h->hashval[i] = -1; + } + +#ifdef ZOPFLI_HASH_SAME + h->same = (unsigned short*)malloc(sizeof(*h->same) * window_size); + for (i = 0; i < window_size; i++) { + h->same[i] = 0; + } +#endif + +#ifdef ZOPFLI_HASH_SAME_HASH + h->val2 = 0; + h->head2 = (int*)malloc(sizeof(*h->head2) * 65536); + h->prev2 = (unsigned short*)malloc(sizeof(*h->prev2) * window_size); + h->hashval2 = (int*)malloc(sizeof(*h->hashval2) * window_size); + for (i = 0; i < 65536; i++) { + h->head2[i] = -1; + } + for (i = 0; i < window_size; i++) { + h->prev2[i] = i; + h->hashval2[i] = -1; + } +#endif +} + +void ZopfliCleanHash(ZopfliHash* h) { + free(h->head); + free(h->prev); + free(h->hashval); + +#ifdef ZOPFLI_HASH_SAME_HASH + free(h->head2); + free(h->prev2); + free(h->hashval2); +#endif + +#ifdef ZOPFLI_HASH_SAME + free(h->same); +#endif +} + +/* +Update the sliding hash value with the given byte. All calls to this function +must be made on consecutive input characters. Since the hash value exists out +of multiple input bytes, a few warmups with this function are needed initially. +*/ +static void UpdateHashValue(ZopfliHash* h, unsigned char c) { + h->val = (((h->val) << HASH_SHIFT) ^ (c)) & HASH_MASK; +} + +void ZopfliUpdateHash(const unsigned char* array, size_t pos, size_t end, + ZopfliHash* h) { + unsigned short hpos = pos & ZOPFLI_WINDOW_MASK; +#ifdef ZOPFLI_HASH_SAME + size_t amount = 0; +#endif + + UpdateHashValue(h, pos + ZOPFLI_MIN_MATCH <= end ? + array[pos + ZOPFLI_MIN_MATCH - 1] : 0); + h->hashval[hpos] = h->val; + if (h->head[h->val] != -1 && h->hashval[h->head[h->val]] == h->val) { + h->prev[hpos] = h->head[h->val]; + } + else h->prev[hpos] = hpos; + h->head[h->val] = hpos; + +#ifdef ZOPFLI_HASH_SAME + /* Update "same". */ + if (h->same[(pos - 1) & ZOPFLI_WINDOW_MASK] > 1) { + amount = h->same[(pos - 1) & ZOPFLI_WINDOW_MASK] - 1; + } + while (pos + amount + 1 < end && + array[pos] == array[pos + amount + 1] && amount < (unsigned short)(-1)) { + amount++; + } + h->same[hpos] = amount; +#endif + +#ifdef ZOPFLI_HASH_SAME_HASH + h->val2 = ((h->same[hpos] - ZOPFLI_MIN_MATCH) & 255) ^ h->val; + h->hashval2[hpos] = h->val2; + if (h->head2[h->val2] != -1 && h->hashval2[h->head2[h->val2]] == h->val2) { + h->prev2[hpos] = h->head2[h->val2]; + } + else h->prev2[hpos] = hpos; + h->head2[h->val2] = hpos; +#endif +} + +void ZopfliWarmupHash(const unsigned char* array, size_t pos, size_t end, + ZopfliHash* h) { + (void)end; + UpdateHashValue(h, array[pos + 0]); + UpdateHashValue(h, array[pos + 1]); +} diff -Nru zopfli-0~git130414/src/zopfli/hash.h zopfli-1.0.0/src/zopfli/hash.h --- zopfli-0~git130414/src/zopfli/hash.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/hash.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,70 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +The hash for ZopfliFindLongestMatch of lz77.c. +*/ + +#ifndef ZOPFLI_HASH_H_ +#define ZOPFLI_HASH_H_ + +#include "util.h" + +typedef struct ZopfliHash { + int* head; /* Hash value to index of its most recent occurance. */ + unsigned short* prev; /* Index to index of prev. occurance of same hash. */ + int* hashval; /* Index to hash value at this index. */ + int val; /* Current hash value. */ + +#ifdef ZOPFLI_HASH_SAME_HASH + /* Fields with similar purpose as the above hash, but for the second hash with + a value that is calculated differently. */ + int* head2; /* Hash value to index of its most recent occurance. */ + unsigned short* prev2; /* Index to index of prev. occurance of same hash. */ + int* hashval2; /* Index to hash value at this index. */ + int val2; /* Current hash value. */ +#endif + +#ifdef ZOPFLI_HASH_SAME + unsigned short* same; /* Amount of repetitions of same byte after this .*/ +#endif +} ZopfliHash; + +/* Allocates and initializes all fields of ZopfliHash. */ +void ZopfliInitHash(size_t window_size, ZopfliHash* h); + +/* Frees all fields of ZopfliHash. */ +void ZopfliCleanHash(ZopfliHash* h); + +/* +Updates the hash values based on the current position in the array. All calls +to this must be made for consecutive bytes. +*/ +void ZopfliUpdateHash(const unsigned char* array, size_t pos, size_t end, + ZopfliHash* h); + +/* +Prepopulates hash: +Fills in the initial values in the hash, before ZopfliUpdateHash can be used +correctly. +*/ +void ZopfliWarmupHash(const unsigned char* array, size_t pos, size_t end, + ZopfliHash* h); + +#endif /* ZOPFLI_HASH_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/katajainen.c zopfli-1.0.0/src/zopfli/katajainen.c --- zopfli-0~git130414/src/zopfli/katajainen.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/katajainen.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,251 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +Bounded package merge algorithm, based on the paper +"A Fast and Space-Economical Algorithm for Length-Limited Coding +Jyrki Katajainen, Alistair Moffat, Andrew Turpin". +*/ + +#include "katajainen.h" +#include +#include + +typedef struct Node Node; + +/* +Nodes forming chains. Also used to represent leaves. +*/ +struct Node { + size_t weight; /* Total weight (symbol count) of this chain. */ + Node* tail; /* Previous node(s) of this chain, or 0 if none. */ + int count; /* Leaf symbol index, or number of leaves before this chain. */ + char inuse; /* Tracking for garbage collection. */ +}; + +/* +Memory pool for nodes. +*/ +typedef struct NodePool { + Node* nodes; /* The pool. */ + Node* next; /* Pointer to a possibly free node in the pool. */ + int size; /* Size of the memory pool. */ +} NodePool; + +/* +Initializes a chain node with the given values and marks it as in use. +*/ +static void InitNode(size_t weight, int count, Node* tail, Node* node) { + node->weight = weight; + node->count = count; + node->tail = tail; + node->inuse = 1; +} + +/* +Finds a free location in the memory pool. Performs garbage collection if needed. +lists: If given, used to mark in-use nodes during garbage collection. +maxbits: Size of lists. +pool: Memory pool to get free node from. +*/ +static Node* GetFreeNode(Node* (*lists)[2], int maxbits, NodePool* pool) { + for (;;) { + if (pool->next >= &pool->nodes[pool->size]) { + /* Garbage collection. */ + int i; + for (i = 0; i < pool->size; i++) { + pool->nodes[i].inuse = 0; + } + if (lists) { + for (i = 0; i < maxbits * 2; i++) { + Node* node; + for (node = lists[i / 2][i % 2]; node; node = node->tail) { + node->inuse = 1; + } + } + } + pool->next = &pool->nodes[0]; + } + if (!pool->next->inuse) break; /* Found one. */ + pool->next++; + } + return pool->next++; +} + + +/* +Performs a Boundary Package-Merge step. Puts a new chain in the given list. The +new chain is, depending on the weights, a leaf or a combination of two chains +from the previous list. +lists: The lists of chains. +maxbits: Number of lists. +leaves: The leaves, one per symbol. +numsymbols: Number of leaves. +pool: the node memory pool. +index: The index of the list in which a new chain or leaf is required. +final: Whether this is the last time this function is called. If it is then it + is no more needed to recursively call self. +*/ +static void BoundaryPM(Node* (*lists)[2], int maxbits, + Node* leaves, int numsymbols, NodePool* pool, int index, char final) { + Node* newchain; + Node* oldchain; + int lastcount = lists[index][1]->count; /* Count of last chain of list. */ + + if (index == 0 && lastcount >= numsymbols) return; + + newchain = GetFreeNode(lists, maxbits, pool); + oldchain = lists[index][1]; + + /* These are set up before the recursive calls below, so that there is a list + pointing to the new node, to let the garbage collection know it's in use. */ + lists[index][0] = oldchain; + lists[index][1] = newchain; + + if (index == 0) { + /* New leaf node in list 0. */ + InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain); + } else { + size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight; + if (lastcount < numsymbols && sum > leaves[lastcount].weight) { + /* New leaf inserted in list, so count is incremented. */ + InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail, + newchain); + } else { + InitNode(sum, lastcount, lists[index - 1][1], newchain); + if (!final) { + /* Two lookahead chains of previous list used up, create new ones. */ + BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0); + BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0); + } + } + } +} + +/* +Initializes each list with as lookahead chains the two leaves with lowest +weights. +*/ +static void InitLists( + NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) { + int i; + Node* node0 = GetFreeNode(0, maxbits, pool); + Node* node1 = GetFreeNode(0, maxbits, pool); + InitNode(leaves[0].weight, 1, 0, node0); + InitNode(leaves[1].weight, 2, 0, node1); + for (i = 0; i < maxbits; i++) { + lists[i][0] = node0; + lists[i][1] = node1; + } +} + +/* +Converts result of boundary package-merge to the bitlengths. The result in the +last chain of the last list contains the amount of active leaves in each list. +chain: Chain to extract the bit length from (last chain from last list). +*/ +static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) { + Node* node; + for (node = chain; node; node = node->tail) { + int i; + for (i = 0; i < node->count; i++) { + bitlengths[leaves[i].count]++; + } + } +} + +/* +Comparator for sorting the leaves. Has the function signature for qsort. +*/ +static int LeafComparator(const void* a, const void* b) { + return ((const Node*)a)->weight - ((const Node*)b)->weight; +} + +int ZopfliLengthLimitedCodeLengths( + const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) { + NodePool pool; + int i; + int numsymbols = 0; /* Amount of symbols with frequency > 0. */ + int numBoundaryPMRuns; + + /* Array of lists of chains. Each list requires only two lookahead chains at + a time, so each list is a array of two Node*'s. */ + Node* (*lists)[2]; + + /* One leaf per symbol. Only numsymbols leaves will be used. */ + Node* leaves = (Node*)malloc(n * sizeof(*leaves)); + + /* Initialize all bitlengths at 0. */ + for (i = 0; i < n; i++) { + bitlengths[i] = 0; + } + + /* Count used symbols and place them in the leaves. */ + for (i = 0; i < n; i++) { + if (frequencies[i]) { + leaves[numsymbols].weight = frequencies[i]; + leaves[numsymbols].count = i; /* Index of symbol this leaf represents. */ + numsymbols++; + } + } + + /* Check special cases and error conditions. */ + if ((1 << maxbits) < numsymbols) { + free(leaves); + return 1; /* Error, too few maxbits to represent symbols. */ + } + if (numsymbols == 0) { + free(leaves); + return 0; /* No symbols at all. OK. */ + } + if (numsymbols == 1) { + bitlengths[leaves[0].count] = 1; + free(leaves); + return 0; /* Only one symbol, give it bitlength 1, not 0. OK. */ + } + + /* Sort the leaves from lightest to heaviest. */ + qsort(leaves, numsymbols, sizeof(Node), LeafComparator); + + /* Initialize node memory pool. */ + pool.size = 2 * maxbits * (maxbits + 1); + pool.nodes = (Node*)malloc(pool.size * sizeof(*pool.nodes)); + pool.next = pool.nodes; + for (i = 0; i < pool.size; i++) { + pool.nodes[i].inuse = 0; + } + + lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists)); + InitLists(&pool, leaves, maxbits, lists); + + /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two + are already created in the initialization. Each BoundaryPM run creates one. */ + numBoundaryPMRuns = 2 * numsymbols - 4; + for (i = 0; i < numBoundaryPMRuns; i++) { + char final = i == numBoundaryPMRuns - 1; + BoundaryPM(lists, maxbits, leaves, numsymbols, &pool, maxbits - 1, final); + } + + ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths); + + free(lists); + free(leaves); + free(pool.nodes); + return 0; /* OK. */ +} diff -Nru zopfli-0~git130414/src/zopfli/katajainen.h zopfli-1.0.0/src/zopfli/katajainen.h --- zopfli-0~git130414/src/zopfli/katajainen.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/katajainen.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,42 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#ifndef ZOPFLI_KATAJAINEN_H_ +#define ZOPFLI_KATAJAINEN_H_ + +#include + +/* +Outputs minimum-redundancy length-limited code bitlengths for symbols with the +given counts. The bitlengths are limited by maxbits. + +The output is tailored for DEFLATE: symbols that never occur, get a bit length +of 0, and if only a single symbol occurs at least once, its bitlength will be 1, +and not 0 as would theoretically be needed for a single symbol. + +frequencies: The amount of occurances of each symbol. +n: The amount of symbols. +maxbits: Maximum bit length, inclusive. +bitlengths: Output, the bitlengths for the symbol prefix codes. +return: 0 for OK, non-0 for error. +*/ +int ZopfliLengthLimitedCodeLengths( + const size_t* frequencies, int n, int maxbits, unsigned* bitlengths); + +#endif /* ZOPFLI_KATAJAINEN_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/lz77.c zopfli-1.0.0/src/zopfli/lz77.c --- zopfli-0~git130414/src/zopfli/lz77.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/lz77.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,482 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "lz77.h" +#include "util.h" + +#include +#include +#include + +void ZopfliInitLZ77Store(ZopfliLZ77Store* store) { + store->size = 0; + store->litlens = 0; + store->dists = 0; +} + +void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) { + free(store->litlens); + free(store->dists); +} + +void ZopfliCopyLZ77Store( + const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) { + size_t i; + ZopfliCleanLZ77Store(dest); + dest->litlens = + (unsigned short*)malloc(sizeof(*dest->litlens) * source->size); + dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size); + + if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */ + + dest->size = source->size; + for (i = 0; i < source->size; i++) { + dest->litlens[i] = source->litlens[i]; + dest->dists[i] = source->dists[i]; + } +} + +/* +Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store. +context must be a ZopfliLZ77Store*. +*/ +void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist, + ZopfliLZ77Store* store) { + size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */ + ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size); + ZOPFLI_APPEND_DATA(dist, &store->dists, &size2); +} + +/* +Gets a score of the length given the distance. Typically, the score of the +length is the length itself, but if the distance is very long, decrease the +score of the length a bit to make up for the fact that long distances use large +amounts of extra bits. + +This is not an accurate score, it is a heuristic only for the greedy LZ77 +implementation. More accurate cost models are employed later. Making this +heuristic more accurate may hurt rather than improve compression. + +The two direct uses of this heuristic are: +-avoid using a length of 3 in combination with a long distance. This only has + an effect if length == 3. +-make a slightly better choice between the two options of the lazy matching. + +Indirectly, this affects: +-the block split points if the default of block splitting first is used, in a + rather unpredictable way +-the first zopfli run, so it affects the chance of the first run being closer + to the optimal output +*/ +static int GetLengthScore(int length, int distance) { + /* + At 1024, the distance uses 9+ extra bits and this seems to be the sweet spot + on tested files. + */ + return distance > 1024 ? length - 1 : length; +} + +void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos, + unsigned short dist, unsigned short length) { + + /* TODO(lode): make this only run in a debug compile, it's for assert only. */ + size_t i; + + assert(pos + length <= datasize); + for (i = 0; i < length; i++) { + if (data[pos - dist + i] != data[pos + i]) { + assert(data[pos - dist + i] == data[pos + i]); + break; + } + } +} + +/* +Finds how long the match of scan and match is. Can be used to find how many +bytes starting from scan, and from match, are equal. Returns the last byte +after scan, which is still equal to the correspondinb byte after match. +scan is the position to compare +match is the earlier position to compare. +end is the last possible byte, beyond which to stop looking. +safe_end is a few (8) bytes before end, for comparing multiple bytes at once. +*/ +static const unsigned char* GetMatch(const unsigned char* scan, + const unsigned char* match, + const unsigned char* end, + const unsigned char* safe_end) { + + if (sizeof(size_t) == 8) { + /* 8 checks at once per array bounds check (size_t is 64-bit). */ + while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) { + scan += 8; + match += 8; + } + } else if (sizeof(unsigned int) == 4) { + /* 4 checks at once per array bounds check (unsigned int is 32-bit). */ + while (scan < safe_end + && *((unsigned int*)scan) == *((unsigned int*)match)) { + scan += 4; + match += 4; + } + } else { + /* do 8 checks at once per array bounds check. */ + while (scan < safe_end && *scan == *match && *++scan == *++match + && *++scan == *++match && *++scan == *++match + && *++scan == *++match && *++scan == *++match + && *++scan == *++match && *++scan == *++match) { + scan++; match++; + } + } + + /* The remaining few bytes. */ + while (scan != end && *scan == *match) { + scan++; match++; + } + + return scan; +} + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE +/* +Gets distance, length and sublen values from the cache if possible. +Returns 1 if it got the values from the cache, 0 if not. +Updates the limit value to a smaller one if possible with more limited +information from the cache. +*/ +static int TryGetFromLongestMatchCache(ZopfliBlockState* s, + size_t pos, size_t* limit, + unsigned short* sublen, unsigned short* distance, unsigned short* length) { + /* The LMC cache starts at the beginning of the block rather than the + beginning of the whole array. */ + size_t lmcpos = pos - s->blockstart; + + /* Length > 0 and dist 0 is invalid combination, which indicates on purpose + that this cache value is not filled in yet. */ + unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || + s->lmc->dist[lmcpos] != 0); + unsigned char limit_ok_for_cache = cache_available && + (*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit || + (sublen && ZopfliMaxCachedSublen(s->lmc, + lmcpos, s->lmc->length[lmcpos]) >= *limit)); + + if (s->lmc && limit_ok_for_cache && cache_available) { + if (!sublen || s->lmc->length[lmcpos] + <= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) { + *length = s->lmc->length[lmcpos]; + if (*length > *limit) *length = *limit; + if (sublen) { + ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen); + *distance = sublen[*length]; + if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) { + assert(sublen[*length] == s->lmc->dist[lmcpos]); + } + } else { + *distance = s->lmc->dist[lmcpos]; + } + return 1; + } + /* Can't use much of the cache, since the "sublens" need to be calculated, + but at least we already know when to stop. */ + *limit = s->lmc->length[lmcpos]; + } + + return 0; +} + +/* +Stores the found sublen, distance and length in the longest match cache, if +possible. +*/ +static void StoreInLongestMatchCache(ZopfliBlockState* s, + size_t pos, size_t limit, + const unsigned short* sublen, + unsigned short distance, unsigned short length) { + /* The LMC cache starts at the beginning of the block rather than the + beginning of the whole array. */ + size_t lmcpos = pos - s->blockstart; + + /* Length > 0 and dist 0 is invalid combination, which indicates on purpose + that this cache value is not filled in yet. */ + unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || + s->lmc->dist[lmcpos] != 0); + + if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) { + assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0); + s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance; + s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length; + assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0)); + ZopfliSublenToCache(sublen, lmcpos, length, s->lmc); + } +} +#endif + +void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h, + const unsigned char* array, + size_t pos, size_t size, size_t limit, + unsigned short* sublen, unsigned short* distance, unsigned short* length) { + unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp; + unsigned short bestdist = 0; + unsigned short bestlength = 1; + const unsigned char* scan; + const unsigned char* match; + const unsigned char* arrayend; + const unsigned char* arrayend_safe; +#if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE + int chain_counter = ZOPFLI_MAX_CHAIN_HITS; /* For quitting early. */ +#endif + + unsigned dist = 0; /* Not unsigned short on purpose. */ + + int* hhead = h->head; + unsigned short* hprev = h->prev; + int* hhashval = h->hashval; + int hval = h->val; + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) { + assert(pos + *length <= size); + return; + } +#endif + + assert(limit <= ZOPFLI_MAX_MATCH); + assert(limit >= ZOPFLI_MIN_MATCH); + assert(pos < size); + + if (size - pos < ZOPFLI_MIN_MATCH) { + /* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to + try. */ + *length = 0; + *distance = 0; + return; + } + + if (pos + limit > size) { + limit = size - pos; + } + arrayend = &array[pos] + limit; + arrayend_safe = arrayend - 8; + + assert(hval < 65536); + + pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */ + p = hprev[pp]; + + assert(pp == hpos); + + dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp); + + /* Go through all distances. */ + while (dist < ZOPFLI_WINDOW_SIZE) { + unsigned short currentlength = 0; + + assert(p < ZOPFLI_WINDOW_SIZE); + assert(p == hprev[pp]); + assert(hhashval[p] == hval); + + if (dist > 0) { + assert(pos < size); + assert(dist <= pos); + scan = &array[pos]; + match = &array[pos - dist]; + + /* Testing the byte at position bestlength first, goes slightly faster. */ + if (pos + bestlength >= size + || *(scan + bestlength) == *(match + bestlength)) { + +#ifdef ZOPFLI_HASH_SAME + unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK]; + if (same0 > 2 && *scan == *match) { + unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK]; + unsigned short same = same0 < same1 ? same0 : same1; + if (same > limit) same = limit; + scan += same; + match += same; + } +#endif + scan = GetMatch(scan, match, arrayend, arrayend_safe); + currentlength = scan - &array[pos]; /* The found length. */ + } + + if (currentlength > bestlength) { + if (sublen) { + unsigned short j; + for (j = bestlength + 1; j <= currentlength; j++) { + sublen[j] = dist; + } + } + bestdist = dist; + bestlength = currentlength; + if (currentlength >= limit) break; + } + } + + +#ifdef ZOPFLI_HASH_SAME_HASH + /* Switch to the other hash once this will be more efficient. */ + if (hhead != h->head2 && bestlength >= h->same[hpos] && + h->val2 == h->hashval2[p]) { + /* Now use the hash that encodes the length and first byte. */ + hhead = h->head2; + hprev = h->prev2; + hhashval = h->hashval2; + hval = h->val2; + } +#endif + + pp = p; + p = hprev[p]; + if (p == pp) break; /* Uninited prev value. */ + + dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp); + +#if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE + chain_counter--; + if (chain_counter <= 0) break; +#endif + } + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength); +#endif + + assert(bestlength <= limit); + + *distance = bestdist; + *length = bestlength; + assert(pos + *length <= size); +} + +void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in, + size_t instart, size_t inend, + ZopfliLZ77Store* store) { + size_t i = 0, j; + unsigned short leng; + unsigned short dist; + int lengthscore; + size_t windowstart = instart > ZOPFLI_WINDOW_SIZE + ? instart - ZOPFLI_WINDOW_SIZE : 0; + unsigned short dummysublen[259]; + + ZopfliHash hash; + ZopfliHash* h = &hash; + +#ifdef ZOPFLI_LAZY_MATCHING + /* Lazy matching. */ + unsigned prev_length = 0; + unsigned prev_match = 0; + int prevlengthscore; + int match_available = 0; +#endif + + if (instart == inend) return; + + ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h); + ZopfliWarmupHash(in, windowstart, inend, h); + for (i = windowstart; i < instart; i++) { + ZopfliUpdateHash(in, i, inend, h); + } + + for (i = instart; i < inend; i++) { + ZopfliUpdateHash(in, i, inend, h); + + ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen, + &dist, &leng); + lengthscore = GetLengthScore(leng, dist); + +#ifdef ZOPFLI_LAZY_MATCHING + /* Lazy matching. */ + prevlengthscore = GetLengthScore(prev_length, prev_match); + if (match_available) { + match_available = 0; + if (lengthscore > prevlengthscore + 1) { + ZopfliStoreLitLenDist(in[i - 1], 0, store); + if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) { + match_available = 1; + prev_length = leng; + prev_match = dist; + continue; + } + } else { + /* Add previous to output. */ + leng = prev_length; + dist = prev_match; + lengthscore = prevlengthscore; + /* Add to output. */ + ZopfliVerifyLenDist(in, inend, i - 1, dist, leng); + ZopfliStoreLitLenDist(leng, dist, store); + for (j = 2; j < leng; j++) { + assert(i < inend); + i++; + ZopfliUpdateHash(in, i, inend, h); + } + continue; + } + } + else if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) { + match_available = 1; + prev_length = leng; + prev_match = dist; + continue; + } + /* End of lazy matching. */ +#endif + + /* Add to output. */ + if (lengthscore >= ZOPFLI_MIN_MATCH) { + ZopfliVerifyLenDist(in, inend, i, dist, leng); + ZopfliStoreLitLenDist(leng, dist, store); + } else { + leng = 1; + ZopfliStoreLitLenDist(in[i], 0, store); + } + for (j = 1; j < leng; j++) { + assert(i < inend); + i++; + ZopfliUpdateHash(in, i, inend, h); + } + } + + ZopfliCleanHash(h); +} + +void ZopfliLZ77Counts(const unsigned short* litlens, + const unsigned short* dists, + size_t start, size_t end, + size_t* ll_count, size_t* d_count) { + size_t i; + + for (i = 0; i < 288; i++) { + ll_count[i] = 0; + } + for (i = 0; i < 32; i++) { + d_count[i] = 0; + } + + for (i = start; i < end; i++) { + if (dists[i] == 0) { + ll_count[litlens[i]]++; + } else { + ll_count[ZopfliGetLengthSymbol(litlens[i])]++; + d_count[ZopfliGetDistSymbol(dists[i])]++; + } + } + + ll_count[256] = 1; /* End symbol. */ +} diff -Nru zopfli-0~git130414/src/zopfli/lz77.h zopfli-1.0.0/src/zopfli/lz77.h --- zopfli-0~git130414/src/zopfli/lz77.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/lz77.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,126 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +Functions for basic LZ77 compression and utilities for the "squeeze" LZ77 +compression. +*/ + +#ifndef ZOPFLI_LZ77_H_ +#define ZOPFLI_LZ77_H_ + +#include + +#include "cache.h" +#include "hash.h" +#include "zopfli.h" + +/* +Stores lit/length and dist pairs for LZ77. +litlens: Contains the literal symbols or length values. +dists: Indicates the distance, or 0 to indicate that there is no distance and +litlens contains a literal instead of a length. +litlens and dists both have the same size. +*/ +typedef struct ZopfliLZ77Store { + unsigned short* litlens; /* Lit or len. */ + unsigned short* dists; /* If 0: indicates literal in corresponding litlens, + if > 0: length in corresponding litlens, this is the distance. */ + size_t size; +} ZopfliLZ77Store; + +void ZopfliInitLZ77Store(ZopfliLZ77Store* store); +void ZopfliCleanLZ77Store(ZopfliLZ77Store* store); +void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest); +void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist, + ZopfliLZ77Store* store); + +/* +Some state information for compressing a block. +This is currently a bit under-used (with mainly only the longest match cache), +but is kept for easy future expansion. +*/ +typedef struct ZopfliBlockState { + const ZopfliOptions* options; + +#ifdef ZOPFLI_LONGEST_MATCH_CACHE + /* Cache for length/distance pairs found so far. */ + ZopfliLongestMatchCache* lmc; +#endif + + /* The start (inclusive) and end (not inclusive) of the current block. */ + size_t blockstart; + size_t blockend; +} ZopfliBlockState; + +/* +Finds the longest match (length and corresponding distance) for LZ77 +compression. +Even when not using "sublen", it can be more efficient to provide an array, +because only then the caching is used. +array: the data +pos: position in the data to find the match for +size: size of the data +limit: limit length to maximum this value (default should be 258). This allows + finding a shorter dist for that length (= less extra bits). Must be + in the range [ZOPFLI_MIN_MATCH, ZOPFLI_MAX_MATCH]. +sublen: output array of 259 elements, or null. Has, for each length, the + smallest distance required to reach this length. Only 256 of its 259 values + are used, the first 3 are ignored (the shortest length is 3. It is purely + for convenience that the array is made 3 longer). +*/ +void ZopfliFindLongestMatch( + ZopfliBlockState *s, const ZopfliHash* h, const unsigned char* array, + size_t pos, size_t size, size_t limit, + unsigned short* sublen, unsigned short* distance, unsigned short* length); + +/* +Verifies if length and dist are indeed valid, only used for assertion. +*/ +void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos, + unsigned short dist, unsigned short length); + +/* +Counts the number of literal, length and distance symbols in the given lz77 +arrays. +litlens: lz77 lit/lengths +dists: ll77 distances +start: where to begin counting in litlens and dists +end: where to stop counting in litlens and dists (not inclusive) +ll_count: count of each lit/len symbol, must have size 288 (see deflate + standard) +d_count: count of each dist symbol, must have size 32 (see deflate standard) +*/ +void ZopfliLZ77Counts(const unsigned short* litlens, + const unsigned short* dists, + size_t start, size_t end, + size_t* ll_count, size_t* d_count); + +/* +Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than +with the slow but better "squeeze" implementation. +The result is placed in the ZopfliLZ77Store. +If instart is larger than 0, it uses values before instart as starting +dictionary. +*/ +void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in, + size_t instart, size_t inend, + ZopfliLZ77Store* store); + +#endif /* ZOPFLI_LZ77_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/squeeze.c zopfli-1.0.0/src/zopfli/squeeze.c --- zopfli-0~git130414/src/zopfli/squeeze.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/squeeze.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,546 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "squeeze.h" + +#include +#include +#include + +#include "blocksplitter.h" +#include "deflate.h" +#include "tree.h" +#include "util.h" + +typedef struct SymbolStats { + /* The literal and length symbols. */ + size_t litlens[288]; + /* The 32 unique dist symbols, not the 32768 possible dists. */ + size_t dists[32]; + + double ll_symbols[288]; /* Length of each lit/len symbol in bits. */ + double d_symbols[32]; /* Length of each dist symbol in bits. */ +} SymbolStats; + +/* Sets everything to 0. */ +static void InitStats(SymbolStats* stats) { + memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0])); + memset(stats->dists, 0, 32 * sizeof(stats->dists[0])); + + memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0])); + memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0])); +} + +static void CopyStats(SymbolStats* source, SymbolStats* dest) { + memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0])); + memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0])); + + memcpy(dest->ll_symbols, source->ll_symbols, + 288 * sizeof(dest->ll_symbols[0])); + memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0])); +} + +/* Adds the bit lengths. */ +static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1, + const SymbolStats* stats2, double w2, + SymbolStats* result) { + size_t i; + for (i = 0; i < 288; i++) { + result->litlens[i] = + (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2); + } + for (i = 0; i < 32; i++) { + result->dists[i] = + (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2); + } + result->litlens[256] = 1; /* End symbol. */ +} + +typedef struct RanState { + unsigned int m_w, m_z; +} RanState; + +static void InitRanState(RanState* state) { + state->m_w = 1; + state->m_z = 2; +} + +/* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */ +static unsigned int Ran(RanState* state) { + state->m_z = 36969 * (state->m_z & 65535) + (state->m_z >> 16); + state->m_w = 18000 * (state->m_w & 65535) + (state->m_w >> 16); + return (state->m_z << 16) + state->m_w; /* 32-bit result. */ +} + +static void RandomizeFreqs(RanState* state, size_t* freqs, int n) { + int i; + for (i = 0; i < n; i++) { + if ((Ran(state) >> 4) % 3 == 0) freqs[i] = freqs[Ran(state) % n]; + } +} + +static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) { + RandomizeFreqs(state, stats->litlens, 288); + RandomizeFreqs(state, stats->dists, 32); + stats->litlens[256] = 1; /* End symbol. */ +} + +static void ClearStatFreqs(SymbolStats* stats) { + size_t i; + for (i = 0; i < 288; i++) stats->litlens[i] = 0; + for (i = 0; i < 32; i++) stats->dists[i] = 0; +} + +/* +Function that calculates a cost based on a model for the given LZ77 symbol. +litlen: means literal symbol if dist is 0, length otherwise. +*/ +typedef double CostModelFun(unsigned litlen, unsigned dist, void* context); + +/* +Cost model which should exactly match fixed tree. +type: CostModelFun +*/ +static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) { + (void)unused; + if (dist == 0) { + if (litlen <= 143) return 8; + else return 9; + } else { + int dbits = ZopfliGetDistExtraBits(dist); + int lbits = ZopfliGetLengthExtraBits(litlen); + int lsym = ZopfliGetLengthSymbol(litlen); + double cost = 0; + if (lsym <= 279) cost += 7; + else cost += 8; + cost += 5; /* Every dist symbol has length 5. */ + return cost + dbits + lbits; + } +} + +/* +Cost model based on symbol statistics. +type: CostModelFun +*/ +static double GetCostStat(unsigned litlen, unsigned dist, void* context) { + SymbolStats* stats = (SymbolStats*)context; + if (dist == 0) { + return stats->ll_symbols[litlen]; + } else { + int lsym = ZopfliGetLengthSymbol(litlen); + int lbits = ZopfliGetLengthExtraBits(litlen); + int dsym = ZopfliGetDistSymbol(dist); + int dbits = ZopfliGetDistExtraBits(dist); + return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits; + } +} + +/* +Finds the minimum possible cost this cost model can return for valid length and +distance symbols. +*/ +static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) { + double mincost; + int bestlength = 0; /* length that has lowest cost in the cost model */ + int bestdist = 0; /* distance that has lowest cost in the cost model */ + int i; + /* + Table of distances that have a different distance symbol in the deflate + specification. Each value is the first distance that has a new symbol. Only + different symbols affect the cost model so only these need to be checked. + See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes). + */ + static const int dsymbols[30] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, + 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 + }; + + mincost = ZOPFLI_LARGE_FLOAT; + for (i = 3; i < 259; i++) { + double c = costmodel(i, 1, costcontext); + if (c < mincost) { + bestlength = i; + mincost = c; + } + } + + mincost = ZOPFLI_LARGE_FLOAT; + for (i = 0; i < 30; i++) { + double c = costmodel(3, dsymbols[i], costcontext); + if (c < mincost) { + bestdist = dsymbols[i]; + mincost = c; + } + } + + return costmodel(bestlength, bestdist, costcontext); +} + +/* +Performs the forward pass for "squeeze". Gets the most optimal length to reach +every byte from a previous byte, using cost calculations. +s: the ZopfliBlockState +in: the input data array +instart: where to start +inend: where to stop (not inclusive) +costmodel: function to calculate the cost of some lit/len/dist pair. +costcontext: abstract context for the costmodel function +length_array: output array of size (inend - instart) which will receive the best + length to reach this byte from a previous byte. +returns the cost that was, according to the costmodel, needed to get to the end. +*/ +static double GetBestLengths(ZopfliBlockState *s, + const unsigned char* in, + size_t instart, size_t inend, + CostModelFun* costmodel, void* costcontext, + unsigned short* length_array) { + /* Best cost to get here so far. */ + size_t blocksize = inend - instart; + float* costs; + size_t i = 0, k; + unsigned short leng; + unsigned short dist; + unsigned short sublen[259]; + size_t windowstart = instart > ZOPFLI_WINDOW_SIZE + ? instart - ZOPFLI_WINDOW_SIZE : 0; + ZopfliHash hash; + ZopfliHash* h = &hash; + double result; + double mincost = GetCostModelMinCost(costmodel, costcontext); + + if (instart == inend) return 0; + + costs = (float*)malloc(sizeof(float) * (blocksize + 1)); + if (!costs) exit(-1); /* Allocation failed. */ + + ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h); + ZopfliWarmupHash(in, windowstart, inend, h); + for (i = windowstart; i < instart; i++) { + ZopfliUpdateHash(in, i, inend, h); + } + + for (i = 1; i < blocksize + 1; i++) costs[i] = ZOPFLI_LARGE_FLOAT; + costs[0] = 0; /* Because it's the start. */ + length_array[0] = 0; + + for (i = instart; i < inend; i++) { + size_t j = i - instart; /* Index in the costs array and length_array. */ + ZopfliUpdateHash(in, i, inend, h); + +#ifdef ZOPFLI_SHORTCUT_LONG_REPETITIONS + /* If we're in a long repetition of the same character and have more than + ZOPFLI_MAX_MATCH characters before and after our position. */ + if (h->same[i & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH * 2 + && i > instart + ZOPFLI_MAX_MATCH + 1 + && i + ZOPFLI_MAX_MATCH * 2 + 1 < inend + && h->same[(i - ZOPFLI_MAX_MATCH) & ZOPFLI_WINDOW_MASK] + > ZOPFLI_MAX_MATCH) { + double symbolcost = costmodel(ZOPFLI_MAX_MATCH, 1, costcontext); + /* Set the length to reach each one to ZOPFLI_MAX_MATCH, and the cost to + the cost corresponding to that length. Doing this, we skip + ZOPFLI_MAX_MATCH values to avoid calling ZopfliFindLongestMatch. */ + for (k = 0; k < ZOPFLI_MAX_MATCH; k++) { + costs[j + ZOPFLI_MAX_MATCH] = costs[j] + symbolcost; + length_array[j + ZOPFLI_MAX_MATCH] = ZOPFLI_MAX_MATCH; + i++; + j++; + ZopfliUpdateHash(in, i, inend, h); + } + } +#endif + + ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, sublen, + &dist, &leng); + + /* Literal. */ + if (i + 1 <= inend) { + double newCost = costs[j] + costmodel(in[i], 0, costcontext); + assert(newCost >= 0); + if (newCost < costs[j + 1]) { + costs[j + 1] = newCost; + length_array[j + 1] = 1; + } + } + /* Lengths. */ + for (k = 3; k <= leng && i + k <= inend; k++) { + double newCost; + + /* Calling the cost model is expensive, avoid this if we are already at + the minimum possible cost that it can return. */ + if (costs[j + k] - costs[j] <= mincost) continue; + + newCost = costs[j] + costmodel(k, sublen[k], costcontext); + assert(newCost >= 0); + if (newCost < costs[j + k]) { + assert(k <= ZOPFLI_MAX_MATCH); + costs[j + k] = newCost; + length_array[j + k] = k; + } + } + } + + assert(costs[blocksize] >= 0); + result = costs[blocksize]; + + ZopfliCleanHash(h); + free(costs); + + return result; +} + +/* +Calculates the optimal path of lz77 lengths to use, from the calculated +length_array. The length_array must contain the optimal length to reach that +byte. The path will be filled with the lengths to use, so its data size will be +the amount of lz77 symbols. +*/ +static void TraceBackwards(size_t size, const unsigned short* length_array, + unsigned short** path, size_t* pathsize) { + size_t index = size; + if (size == 0) return; + for (;;) { + ZOPFLI_APPEND_DATA(length_array[index], path, pathsize); + assert(length_array[index] <= index); + assert(length_array[index] <= ZOPFLI_MAX_MATCH); + assert(length_array[index] != 0); + index -= length_array[index]; + if (index == 0) break; + } + + /* Mirror result. */ + for (index = 0; index < *pathsize / 2; index++) { + unsigned short temp = (*path)[index]; + (*path)[index] = (*path)[*pathsize - index - 1]; + (*path)[*pathsize - index - 1] = temp; + } +} + +static void FollowPath(ZopfliBlockState* s, + const unsigned char* in, size_t instart, size_t inend, + unsigned short* path, size_t pathsize, + ZopfliLZ77Store* store) { + size_t i, j, pos = 0; + size_t windowstart = instart > ZOPFLI_WINDOW_SIZE + ? instart - ZOPFLI_WINDOW_SIZE : 0; + + size_t total_length_test = 0; + + ZopfliHash hash; + ZopfliHash* h = &hash; + + if (instart == inend) return; + + ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h); + ZopfliWarmupHash(in, windowstart, inend, h); + for (i = windowstart; i < instart; i++) { + ZopfliUpdateHash(in, i, inend, h); + } + + pos = instart; + for (i = 0; i < pathsize; i++) { + unsigned short length = path[i]; + unsigned short dummy_length; + unsigned short dist; + assert(pos < inend); + + ZopfliUpdateHash(in, pos, inend, h); + + /* Add to output. */ + if (length >= ZOPFLI_MIN_MATCH) { + /* Get the distance by recalculating longest match. The found length + should match the length from the path. */ + ZopfliFindLongestMatch(s, h, in, pos, inend, length, 0, + &dist, &dummy_length); + assert(!(dummy_length != length && length > 2 && dummy_length > 2)); + ZopfliVerifyLenDist(in, inend, pos, dist, length); + ZopfliStoreLitLenDist(length, dist, store); + total_length_test += length; + } else { + length = 1; + ZopfliStoreLitLenDist(in[pos], 0, store); + total_length_test++; + } + + + assert(pos + length <= inend); + for (j = 1; j < length; j++) { + ZopfliUpdateHash(in, pos + j, inend, h); + } + + pos += length; + } + + ZopfliCleanHash(h); +} + +/* Calculates the entropy of the statistics */ +static void CalculateStatistics(SymbolStats* stats) { + ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols); + ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols); +} + +/* Appends the symbol statistics from the store. */ +static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) { + size_t i; + for (i = 0; i < store->size; i++) { + if (store->dists[i] == 0) { + stats->litlens[store->litlens[i]]++; + } else { + stats->litlens[ZopfliGetLengthSymbol(store->litlens[i])]++; + stats->dists[ZopfliGetDistSymbol(store->dists[i])]++; + } + } + stats->litlens[256] = 1; /* End symbol. */ + + CalculateStatistics(stats); +} + +/* +Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs +with updated statistics should be performed. + +s: the block state +in: the input data array +instart: where to start +inend: where to stop (not inclusive) +path: pointer to dynamically allocated memory to store the path +pathsize: pointer to the size of the dynamic path array +length_array: array if size (inend - instart) used to store lengths +costmodel: function to use as the cost model for this squeeze run +costcontext: abstract context for the costmodel function +store: place to output the LZ77 data +returns the cost that was, according to the costmodel, needed to get to the end. + This is not the actual cost. +*/ +static double LZ77OptimalRun(ZopfliBlockState* s, + const unsigned char* in, size_t instart, size_t inend, + unsigned short** path, size_t* pathsize, + unsigned short* length_array, CostModelFun* costmodel, + void* costcontext, ZopfliLZ77Store* store) { + double cost = GetBestLengths( + s, in, instart, inend, costmodel, costcontext, length_array); + free(*path); + *path = 0; + *pathsize = 0; + TraceBackwards(inend - instart, length_array, path, pathsize); + FollowPath(s, in, instart, inend, *path, *pathsize, store); + assert(cost < ZOPFLI_LARGE_FLOAT); + return cost; +} + +void ZopfliLZ77Optimal(ZopfliBlockState *s, + const unsigned char* in, size_t instart, size_t inend, + ZopfliLZ77Store* store) { + /* Dist to get to here with smallest cost. */ + size_t blocksize = inend - instart; + unsigned short* length_array = + (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1)); + unsigned short* path = 0; + size_t pathsize = 0; + ZopfliLZ77Store currentstore; + SymbolStats stats, beststats, laststats; + int i; + double cost; + double bestcost = ZOPFLI_LARGE_FLOAT; + double lastcost = 0; + /* Try randomizing the costs a bit once the size stabilizes. */ + RanState ran_state; + int lastrandomstep = -1; + + if (!length_array) exit(-1); /* Allocation failed. */ + + InitRanState(&ran_state); + InitStats(&stats); + ZopfliInitLZ77Store(¤tstore); + + /* Do regular deflate, then loop multiple shortest path runs, each time using + the statistics of the previous run. */ + + /* Initial run. */ + ZopfliLZ77Greedy(s, in, instart, inend, ¤tstore); + GetStatistics(¤tstore, &stats); + + /* Repeat statistics with each time the cost model from the previous stat + run. */ + for (i = 0; i < s->options->numiterations; i++) { + ZopfliCleanLZ77Store(¤tstore); + ZopfliInitLZ77Store(¤tstore); + LZ77OptimalRun(s, in, instart, inend, &path, &pathsize, + length_array, GetCostStat, (void*)&stats, + ¤tstore); + cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists, + 0, currentstore.size, 2); + if (s->options->verbose_more || (s->options->verbose && cost < bestcost)) { + fprintf(stderr, "Iteration %d: %d bit\n", i, (int) cost); + } + if (cost < bestcost) { + /* Copy to the output store. */ + ZopfliCopyLZ77Store(¤tstore, store); + CopyStats(&stats, &beststats); + bestcost = cost; + } + CopyStats(&stats, &laststats); + ClearStatFreqs(&stats); + GetStatistics(¤tstore, &stats); + if (lastrandomstep != -1) { + /* This makes it converge slower but better. Do it only once the + randomness kicks in so that if the user does few iterations, it gives a + better result sooner. */ + AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats); + CalculateStatistics(&stats); + } + if (i > 5 && cost == lastcost) { + CopyStats(&beststats, &stats); + RandomizeStatFreqs(&ran_state, &stats); + CalculateStatistics(&stats); + lastrandomstep = i; + } + lastcost = cost; + } + + free(length_array); + free(path); + ZopfliCleanLZ77Store(¤tstore); +} + +void ZopfliLZ77OptimalFixed(ZopfliBlockState *s, + const unsigned char* in, + size_t instart, size_t inend, + ZopfliLZ77Store* store) +{ + /* Dist to get to here with smallest cost. */ + size_t blocksize = inend - instart; + unsigned short* length_array = + (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1)); + unsigned short* path = 0; + size_t pathsize = 0; + + if (!length_array) exit(-1); /* Allocation failed. */ + + s->blockstart = instart; + s->blockend = inend; + + /* Shortest path for fixed tree This one should give the shortest possible + result for fixed tree, no repeated runs are needed since the tree is known. */ + LZ77OptimalRun(s, in, instart, inend, &path, &pathsize, + length_array, GetCostFixed, 0, store); + + free(length_array); + free(path); +} diff -Nru zopfli-0~git130414/src/zopfli/squeeze.h zopfli-1.0.0/src/zopfli/squeeze.h --- zopfli-0~git130414/src/zopfli/squeeze.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/squeeze.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,60 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +The squeeze functions do enhanced LZ77 compression by optimal parsing with a +cost model, rather than greedily choosing the longest length or using a single +step of lazy matching like regular implementations. + +Since the cost model is based on the Huffman tree that can only be calculated +after the LZ77 data is generated, there is a chicken and egg problem, and +multiple runs are done with updated cost models to converge to a better +solution. +*/ + +#ifndef ZOPFLI_SQUEEZE_H_ +#define ZOPFLI_SQUEEZE_H_ + +#include "lz77.h" + +/* +Calculates lit/len and dist pairs for given data. +If instart is larger than 0, it uses values before instart as starting +dictionary. +*/ +void ZopfliLZ77Optimal(ZopfliBlockState *s, + const unsigned char* in, size_t instart, size_t inend, + ZopfliLZ77Store* store); + +/* +Does the same as ZopfliLZ77Optimal, but optimized for the fixed tree of the +deflate standard. +The fixed tree never gives the best compression. But this gives the best +possible LZ77 encoding possible with the fixed tree. +This does not create or output any fixed tree, only LZ77 data optimized for +using with a fixed tree. +If instart is larger than 0, it uses values before instart as starting +dictionary. +*/ +void ZopfliLZ77OptimalFixed(ZopfliBlockState *s, + const unsigned char* in, + size_t instart, size_t inend, + ZopfliLZ77Store* store); + +#endif /* ZOPFLI_SQUEEZE_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/tree.c zopfli-1.0.0/src/zopfli/tree.c --- zopfli-0~git130414/src/zopfli/tree.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/tree.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,101 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "tree.h" + +#include +#include +#include +#include + +#include "katajainen.h" +#include "util.h" + +void ZopfliLengthsToSymbols(const unsigned* lengths, size_t n, unsigned maxbits, + unsigned* symbols) { + size_t* bl_count = (size_t*)malloc(sizeof(size_t) * (maxbits + 1)); + size_t* next_code = (size_t*)malloc(sizeof(size_t) * (maxbits + 1)); + unsigned bits, i; + unsigned code; + + for (i = 0; i < n; i++) { + symbols[i] = 0; + } + + /* 1) Count the number of codes for each code length. Let bl_count[N] be the + number of codes of length N, N >= 1. */ + for (bits = 0; bits <= maxbits; bits++) { + bl_count[bits] = 0; + } + for (i = 0; i < n; i++) { + assert(lengths[i] <= maxbits); + bl_count[lengths[i]]++; + } + /* 2) Find the numerical value of the smallest code for each code length. */ + code = 0; + bl_count[0] = 0; + for (bits = 1; bits <= maxbits; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = code; + } + /* 3) Assign numerical values to all codes, using consecutive values for all + codes of the same length with the base values determined at step 2. */ + for (i = 0; i < n; i++) { + unsigned len = lengths[i]; + if (len != 0) { + symbols[i] = next_code[len]; + next_code[len]++; + } + } + + free(bl_count); + free(next_code); +} + +void ZopfliCalculateEntropy(const size_t* count, size_t n, double* bitlengths) { + static const double kInvLog2 = 1.4426950408889; /* 1.0 / log(2.0) */ + unsigned sum = 0; + unsigned i; + double log2sum; + for (i = 0; i < n; ++i) { + sum += count[i]; + } + log2sum = (sum == 0 ? log(n) : log(sum)) * kInvLog2; + for (i = 0; i < n; ++i) { + /* When the count of the symbol is 0, but its cost is requested anyway, it + means the symbol will appear at least once anyway, so give it the cost as if + its count is 1.*/ + if (count[i] == 0) bitlengths[i] = log2sum; + else bitlengths[i] = log2sum - log(count[i]) * kInvLog2; + /* Depending on compiler and architecture, the above subtraction of two + floating point numbers may give a negative result very close to zero + instead of zero (e.g. -5.973954e-17 with gcc 4.1.2 on Ubuntu 11.4). Clamp + it to zero. These floating point imprecisions do not affect the cost model + significantly so this is ok. */ + if (bitlengths[i] < 0 && bitlengths[i] > -1e-5) bitlengths[i] = 0; + assert(bitlengths[i] >= 0); + } +} + +void ZopfliCalculateBitLengths(const size_t* count, size_t n, int maxbits, + unsigned* bitlengths) { + int error = ZopfliLengthLimitedCodeLengths(count, n, maxbits, bitlengths); + (void) error; + assert(!error); +} diff -Nru zopfli-0~git130414/src/zopfli/tree.h zopfli-1.0.0/src/zopfli/tree.h --- zopfli-0~git130414/src/zopfli/tree.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/tree.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,51 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +Utilities for creating and using Huffman trees. +*/ + +#ifndef ZOPFLI_TREE_H_ +#define ZOPFLI_TREE_H_ + +#include + +/* +Calculates the bitlengths for the Huffman tree, based on the counts of each +symbol. +*/ +void ZopfliCalculateBitLengths(const size_t* count, size_t n, int maxbits, + unsigned *bitlengths); + +/* +Converts a series of Huffman tree bitlengths, to the bit values of the symbols. +*/ +void ZopfliLengthsToSymbols(const unsigned* lengths, size_t n, unsigned maxbits, + unsigned* symbols); + +/* +Calculates the entropy of each symbol, based on the counts of each symbol. The +result is similar to the result of ZopfliCalculateBitLengths, but with the +actual theoritical bit lengths according to the entropy. Since the resulting +values are fractional, they cannot be used to encode the tree specified by +DEFLATE. +*/ +void ZopfliCalculateEntropy(const size_t* count, size_t n, double* bitlengths); + +#endif /* ZOPFLI_TREE_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/util.c zopfli-1.0.0/src/zopfli/util.c --- zopfli-0~git130414/src/zopfli/util.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/util.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,213 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "util.h" + +#include "zopfli.h" + +#include +#include +#include + +int ZopfliGetDistExtraBits(int dist) { +#ifdef __GNUC__ + if (dist < 5) return 0; + return (31 ^ __builtin_clz(dist - 1)) - 1; /* log2(dist - 1) - 1 */ +#else + if (dist < 5) return 0; + else if (dist < 9) return 1; + else if (dist < 17) return 2; + else if (dist < 33) return 3; + else if (dist < 65) return 4; + else if (dist < 129) return 5; + else if (dist < 257) return 6; + else if (dist < 513) return 7; + else if (dist < 1025) return 8; + else if (dist < 2049) return 9; + else if (dist < 4097) return 10; + else if (dist < 8193) return 11; + else if (dist < 16385) return 12; + else return 13; +#endif +} + +int ZopfliGetDistExtraBitsValue(int dist) { +#ifdef __GNUC__ + if (dist < 5) { + return 0; + } else { + int l = 31 ^ __builtin_clz(dist - 1); /* log2(dist - 1) */ + return (dist - (1 + (1 << l))) & ((1 << (l - 1)) - 1); + } +#else + if (dist < 5) return 0; + else if (dist < 9) return (dist - 5) & 1; + else if (dist < 17) return (dist - 9) & 3; + else if (dist < 33) return (dist - 17) & 7; + else if (dist < 65) return (dist - 33) & 15; + else if (dist < 129) return (dist - 65) & 31; + else if (dist < 257) return (dist - 129) & 63; + else if (dist < 513) return (dist - 257) & 127; + else if (dist < 1025) return (dist - 513) & 255; + else if (dist < 2049) return (dist - 1025) & 511; + else if (dist < 4097) return (dist - 2049) & 1023; + else if (dist < 8193) return (dist - 4097) & 2047; + else if (dist < 16385) return (dist - 8193) & 4095; + else return (dist - 16385) & 8191; +#endif +} + +int ZopfliGetDistSymbol(int dist) { +#ifdef __GNUC__ + if (dist < 5) { + return dist - 1; + } else { + int l = (31 ^ __builtin_clz(dist - 1)); /* log2(dist - 1) */ + int r = ((dist - 1) >> (l - 1)) & 1; + return l * 2 + r; + } +#else + if (dist < 193) { + if (dist < 13) { /* dist 0..13. */ + if (dist < 5) return dist - 1; + else if (dist < 7) return 4; + else if (dist < 9) return 5; + else return 6; + } else { /* dist 13..193. */ + if (dist < 17) return 7; + else if (dist < 25) return 8; + else if (dist < 33) return 9; + else if (dist < 49) return 10; + else if (dist < 65) return 11; + else if (dist < 97) return 12; + else if (dist < 129) return 13; + else return 14; + } + } else { + if (dist < 2049) { /* dist 193..2049. */ + if (dist < 257) return 15; + else if (dist < 385) return 16; + else if (dist < 513) return 17; + else if (dist < 769) return 18; + else if (dist < 1025) return 19; + else if (dist < 1537) return 20; + else return 21; + } else { /* dist 2049..32768. */ + if (dist < 3073) return 22; + else if (dist < 4097) return 23; + else if (dist < 6145) return 24; + else if (dist < 8193) return 25; + else if (dist < 12289) return 26; + else if (dist < 16385) return 27; + else if (dist < 24577) return 28; + else return 29; + } + } +#endif +} + +int ZopfliGetLengthExtraBits(int l) { + static const int table[259] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 + }; + return table[l]; +} + +int ZopfliGetLengthExtraBitsValue(int l) { + static const int table[259] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 0, + 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, + 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0 + }; + return table[l]; +} + +/* +Returns symbol in range [257-285] (inclusive). +*/ +int ZopfliGetLengthSymbol(int l) { + static const int table[259] = { + 0, 0, 0, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 265, 266, 266, 267, 267, 268, 268, + 269, 269, 269, 269, 270, 270, 270, 270, + 271, 271, 271, 271, 272, 272, 272, 272, + 273, 273, 273, 273, 273, 273, 273, 273, + 274, 274, 274, 274, 274, 274, 274, 274, + 275, 275, 275, 275, 275, 275, 275, 275, + 276, 276, 276, 276, 276, 276, 276, 276, + 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, + 278, 278, 278, 278, 278, 278, 278, 278, + 278, 278, 278, 278, 278, 278, 278, 278, + 279, 279, 279, 279, 279, 279, 279, 279, + 279, 279, 279, 279, 279, 279, 279, 279, + 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, + 281, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, + 282, 282, 282, 282, 282, 282, 282, 282, + 282, 282, 282, 282, 282, 282, 282, 282, + 282, 282, 282, 282, 282, 282, 282, 282, + 282, 282, 282, 282, 282, 282, 282, 282, + 283, 283, 283, 283, 283, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, + 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 285 + }; + return table[l]; +} + +void ZopfliInitOptions(ZopfliOptions* options) { + options->verbose = 0; + options->verbose_more = 0; + options->numiterations = 15; + options->blocksplitting = 1; + options->blocksplittinglast = 0; + options->blocksplittingmax = 15; +} diff -Nru zopfli-0~git130414/src/zopfli/util.h zopfli-1.0.0/src/zopfli/util.h --- zopfli-0~git130414/src/zopfli/util.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/util.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,175 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +Several utilities, including: #defines to try different compression results, +basic deflate specification values and generic program options. +*/ + +#ifndef ZOPFLI_UTIL_H_ +#define ZOPFLI_UTIL_H_ + +#include +#include + +/* Minimum and maximum length that can be encoded in deflate. */ +#define ZOPFLI_MAX_MATCH 258 +#define ZOPFLI_MIN_MATCH 3 + +/* +The window size for deflate. Must be a power of two. This should be 32768, the +maximum possible by the deflate spec. Anything less hurts compression more than +speed. +*/ +#define ZOPFLI_WINDOW_SIZE 32768 + +/* +The window mask used to wrap indices into the window. This is why the +window size must be a power of two. +*/ +#define ZOPFLI_WINDOW_MASK (ZOPFLI_WINDOW_SIZE - 1) + +/* +A block structure of huge, non-smart, blocks to divide the input into, to allow +operating on huge files without exceeding memory, such as the 1GB wiki9 corpus. +The whole compression algorithm, including the smarter block splitting, will +be executed independently on each huge block. +Dividing into huge blocks hurts compression, but not much relative to the size. +Set this to, for example, 20MB (20000000). Set it to 0 to disable master blocks. +*/ +#define ZOPFLI_MASTER_BLOCK_SIZE 20000000 + +/* +Used to initialize costs for example +*/ +#define ZOPFLI_LARGE_FLOAT 1e30 + +/* +For longest match cache. max 256. Uses huge amounts of memory but makes it +faster. Uses this many times three bytes per single byte of the input data. +This is so because longest match finding has to find the exact distance +that belongs to each length for the best lz77 strategy. +Good values: e.g. 5, 8. +*/ +#define ZOPFLI_CACHE_LENGTH 8 + +/* +limit the max hash chain hits for this hash value. This has an effect only +on files where the hash value is the same very often. On these files, this +gives worse compression (the value should ideally be 32768, which is the +ZOPFLI_WINDOW_SIZE, while zlib uses 4096 even for best level), but makes it +faster on some specific files. +Good value: e.g. 8192. +*/ +#define ZOPFLI_MAX_CHAIN_HITS 8192 + +/* +Whether to use the longest match cache for ZopfliFindLongestMatch. This cache +consumes a lot of memory but speeds it up. No effect on compression size. +*/ +#define ZOPFLI_LONGEST_MATCH_CACHE + +/* +Enable to remember amount of successive identical bytes in the hash chain for +finding longest match +required for ZOPFLI_HASH_SAME_HASH and ZOPFLI_SHORTCUT_LONG_REPETITIONS +This has no effect on the compression result, and enabling it increases speed. +*/ +#define ZOPFLI_HASH_SAME + +/* +Switch to a faster hash based on the info from ZOPFLI_HASH_SAME once the +best length so far is long enough. This is way faster for files with lots of +identical bytes, on which the compressor is otherwise too slow. Regular files +are unaffected or maybe a tiny bit slower. +This has no effect on the compression result, only on speed. +*/ +#define ZOPFLI_HASH_SAME_HASH + +/* +Enable this, to avoid slowness for files which are a repetition of the same +character more than a multiple of ZOPFLI_MAX_MATCH times. This should not affect +the compression result. +*/ +#define ZOPFLI_SHORTCUT_LONG_REPETITIONS + +/* +Whether to use lazy matching in the greedy LZ77 implementation. This gives a +better result of ZopfliLZ77Greedy, but the effect this has on the optimal LZ77 +varies from file to file. +*/ +#define ZOPFLI_LAZY_MATCHING + +/* +Gets the symbol for the given length, cfr. the DEFLATE spec. +Returns the symbol in the range [257-285] (inclusive) +*/ +int ZopfliGetLengthSymbol(int l); + +/* Gets the amount of extra bits for the given length, cfr. the DEFLATE spec. */ +int ZopfliGetLengthExtraBits(int l); + +/* Gets value of the extra bits for the given length, cfr. the DEFLATE spec. */ +int ZopfliGetLengthExtraBitsValue(int l); + +/* Gets the symbol for the given dist, cfr. the DEFLATE spec. */ +int ZopfliGetDistSymbol(int dist); + +/* Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec. */ +int ZopfliGetDistExtraBits(int dist); + +/* Gets value of the extra bits for the given dist, cfr. the DEFLATE spec. */ +int ZopfliGetDistExtraBitsValue(int dist); + +/* +Appends value to dynamically allocated memory, doubling its allocation size +whenever needed. + +value: the value to append, type T +data: pointer to the dynamic array to append to, type T** +size: pointer to the size of the array to append to, type size_t*. This is the +size that you consider the array to be, not the internal allocation size. +Precondition: allocated size of data is at least a power of two greater than or +equal than *size. +*/ +#ifdef __cplusplus /* C++ cannot assign void* from malloc to *data */ +#define ZOPFLI_APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\ + if (!((*size) & ((*size) - 1))) {\ + /*double alloc size if it's a power of two*/\ + void** data_void = reinterpret_cast(data);\ + *data_void = (*size) == 0 ? malloc(sizeof(**data))\ + : realloc((*data), (*size) * 2 * sizeof(**data));\ + }\ + (*data)[(*size)] = (value);\ + (*size)++;\ +} +#else /* C gives problems with strict-aliasing rules for (void**) cast */ +#define ZOPFLI_APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\ + if (!((*size) & ((*size) - 1))) {\ + /*double alloc size if it's a power of two*/\ + (*data) = (*size) == 0 ? malloc(sizeof(**data))\ + : realloc((*data), (*size) * 2 * sizeof(**data));\ + }\ + (*data)[(*size)] = (value);\ + (*size)++;\ +} +#endif + + +#endif /* ZOPFLI_UTIL_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/zlib_container.c zopfli-1.0.0/src/zopfli/zlib_container.c --- zopfli-0~git130414/src/zopfli/zlib_container.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/zlib_container.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,79 @@ +/* +Copyright 2013 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "zlib_container.h" +#include "util.h" + +#include + +#include "deflate.h" + + +/* Calculates the adler32 checksum of the data */ +static unsigned adler32(const unsigned char* data, size_t size) +{ + static const unsigned sums_overflow = 5550; + unsigned s1 = 1; + unsigned s2 = 1 >> 16; + + while (size > 0) { + size_t amount = size > sums_overflow ? sums_overflow : size; + size -= amount; + while (amount > 0) { + s1 += (*data++); + s2 += s1; + amount--; + } + s1 %= 65521; + s2 %= 65521; + } + + return (s2 << 16) | s1; +} + +void ZopfliZlibCompress(const ZopfliOptions* options, + const unsigned char* in, size_t insize, + unsigned char** out, size_t* outsize) { + unsigned char bitpointer = 0; + unsigned checksum = adler32(in, (unsigned)insize); + unsigned cmf = 120; /* CM 8, CINFO 7. See zlib spec.*/ + unsigned flevel = 0; + unsigned fdict = 0; + unsigned cmfflg = 256 * cmf + fdict * 32 + flevel * 64; + unsigned fcheck = 31 - cmfflg % 31; + cmfflg += fcheck; + + ZOPFLI_APPEND_DATA(cmfflg / 256, out, outsize); + ZOPFLI_APPEND_DATA(cmfflg % 256, out, outsize); + + ZopfliDeflate(options, 2 /* dynamic block */, 1 /* final */, + in, insize, &bitpointer, out, outsize); + + ZOPFLI_APPEND_DATA((checksum >> 24) % 256, out, outsize); + ZOPFLI_APPEND_DATA((checksum >> 16) % 256, out, outsize); + ZOPFLI_APPEND_DATA((checksum >> 8) % 256, out, outsize); + ZOPFLI_APPEND_DATA(checksum % 256, out, outsize); + + if (options->verbose) { + fprintf(stderr, + "Original Size: %d, Zlib: %d, Compression: %f%% Removed\n", + (int)insize, (int)*outsize, + 100.0 * (double)(insize - *outsize) / (double)insize); + } +} diff -Nru zopfli-0~git130414/src/zopfli/zlib_container.h zopfli-1.0.0/src/zopfli/zlib_container.h --- zopfli-0~git130414/src/zopfli/zlib_container.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/zlib_container.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,42 @@ +/* +Copyright 2013 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#ifndef ZOPFLI_ZLIB_H_ +#define ZOPFLI_ZLIB_H_ + +/* +Functions to compress according to the Zlib specification. +*/ + +#include "zopfli.h" + +/* +Compresses according to the zlib specification and append the compressed +result to the output. + +options: global program options +out: pointer to the dynamic output array to which the result is appended. Must + be freed after use. +outsize: pointer to the dynamic output array size. +*/ +void ZopfliZlibCompress(const ZopfliOptions* options, + const unsigned char* in, size_t insize, + unsigned char** out, size_t* outsize); + +#endif /* ZOPFLI_ZLIB_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/zopfli.h zopfli-1.0.0/src/zopfli/zopfli.h --- zopfli-0~git130414/src/zopfli/zopfli.h 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/zopfli.h 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,88 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#ifndef ZOPFLI_ZOPFLI_H_ +#define ZOPFLI_ZOPFLI_H_ + +#include /* for size_t */ + +/* +Options used throughout the program. +*/ +typedef struct ZopfliOptions { + /* Whether to print output */ + int verbose; + + /* Whether to print more detailed output */ + int verbose_more; + + /* + Maximum amount of times to rerun forward and backward pass to optimize LZ77 + compression cost. Good values: 10, 15 for small files, 5 for files over + several MB in size or it will be too slow. + */ + int numiterations; + + /* + If true, splits the data in multiple deflate blocks with optimal choice + for the block boundaries. Block splitting gives better compression. Default: + true (1). + */ + int blocksplitting; + + /* + If true, chooses the optimal block split points only after doing the iterative + LZ77 compression. If false, chooses the block split points first, then does + iterative LZ77 on each individual block. Depending on the file, either first + or last gives the best compression. Default: false (0). + */ + int blocksplittinglast; + + /* + Maximum amount of blocks to split into (0 for unlimited, but this can give + extreme results that hurt compression on some files). Default value: 15. + */ + int blocksplittingmax; +} ZopfliOptions; + +/* Initializes options with default values. */ +void ZopfliInitOptions(ZopfliOptions* options); + +/* Output format */ +typedef enum { + ZOPFLI_FORMAT_GZIP, + ZOPFLI_FORMAT_ZLIB, + ZOPFLI_FORMAT_DEFLATE +} ZopfliFormat; + +/* +Compresses according to the given output format and appends the result to the +output. + +options: global program options +output_type: the output format to use +out: pointer to the dynamic output array to which the result is appended. Must + be freed after use +outsize: pointer to the dynamic output array size +*/ +void ZopfliCompress(const ZopfliOptions* options, ZopfliFormat output_type, + const unsigned char* in, size_t insize, + unsigned char** out, size_t* outsize); + +#endif /* ZOPFLI_ZOPFLI_H_ */ diff -Nru zopfli-0~git130414/src/zopfli/zopfli_bin.c zopfli-1.0.0/src/zopfli/zopfli_bin.c --- zopfli-0~git130414/src/zopfli/zopfli_bin.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/zopfli_bin.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,203 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +/* +Zopfli compressor program. It can output gzip-, zlib- or deflate-compatible +data. By default it creates a .gz file. This tool can only compress, not +decompress. Decompression can be done by any standard gzip, zlib or deflate +decompressor. +*/ + +#include +#include +#include +#include + +#include "deflate.h" +#include "gzip_container.h" +#include "zlib_container.h" + +/* +Loads a file into a memory array. +*/ +static void LoadFile(const char* filename, + unsigned char** out, size_t* outsize) { + FILE* file; + + *out = 0; + *outsize = 0; + file = fopen(filename, "rb"); + if (!file) return; + + fseek(file , 0 , SEEK_END); + *outsize = ftell(file); + rewind(file); + + *out = (unsigned char*)malloc(*outsize); + + if (*outsize && (*out)) { + size_t testsize = fread(*out, 1, *outsize, file); + if (testsize != *outsize) { + /* It could be a directory */ + free(*out); + *out = 0; + *outsize = 0; + } + } + + assert(!(*outsize) || out); /* If size is not zero, out must be allocated. */ + fclose(file); +} + +/* +Saves a file from a memory array, overwriting the file if it existed. +*/ +static void SaveFile(const char* filename, + const unsigned char* in, size_t insize) { + FILE* file = fopen(filename, "wb" ); + assert(file); + fwrite((char*)in, 1, insize, file); + fclose(file); +} + +/* +outfilename: filename to write output to, or 0 to write to stdout instead +*/ +static void CompressFile(const ZopfliOptions* options, + ZopfliFormat output_type, + const char* infilename, + const char* outfilename) { + unsigned char* in; + size_t insize; + unsigned char* out = 0; + size_t outsize = 0; + LoadFile(infilename, &in, &insize); + if (insize == 0) { + fprintf(stderr, "Invalid filename: %s\n", infilename); + return; + } + + ZopfliCompress(options, output_type, in, insize, &out, &outsize); + + if (outfilename) { + SaveFile(outfilename, out, outsize); + } else { + size_t i; + for (i = 0; i < outsize; i++) { + /* Works only if terminal does not convert newlines. */ + printf("%c", out[i]); + } + } + + free(out); + free(in); +} + +/* +Add two strings together. Size does not matter. Result must be freed. +*/ +static char* AddStrings(const char* str1, const char* str2) { + size_t len = strlen(str1) + strlen(str2); + char* result = (char*)malloc(len + 1); + if (!result) exit(-1); /* Allocation failed. */ + strcpy(result, str1); + strcat(result, str2); + return result; +} + +static char StringsEqual(const char* str1, const char* str2) { + return strcmp(str1, str2) == 0; +} + +int main(int argc, char* argv[]) { + ZopfliOptions options; + ZopfliFormat output_type = ZOPFLI_FORMAT_GZIP; + const char* filename = 0; + int output_to_stdout = 0; + int i; + + ZopfliInitOptions(&options); + + for (i = 1; i < argc; i++) { + const char* arg = argv[i]; + if (StringsEqual(arg, "-v")) options.verbose = 1; + else if (StringsEqual(arg, "-c")) output_to_stdout = 1; + else if (StringsEqual(arg, "--deflate")) { + output_type = ZOPFLI_FORMAT_DEFLATE; + } + else if (StringsEqual(arg, "--zlib")) output_type = ZOPFLI_FORMAT_ZLIB; + else if (StringsEqual(arg, "--gzip")) output_type = ZOPFLI_FORMAT_GZIP; + else if (StringsEqual(arg, "--splitlast")) options.blocksplittinglast = 1; + else if (arg[0] == '-' && arg[1] == '-' && arg[2] == 'i' + && arg[3] >= '0' && arg[3] <= '9') { + options.numiterations = atoi(arg + 3); + } + else if (StringsEqual(arg, "-h")) { + fprintf(stderr, + "Usage: zopfli [OPTION]... FILE\n" + " -h gives this help\n" + " -c write the result on standard output, instead of disk" + " filename + '.gz'\n" + " -v verbose mode\n" + " --i# perform # iterations (default 15). More gives" + " more compression but is slower." + " Examples: --i10, --i50, --i1000\n"); + fprintf(stderr, + " --gzip output to gzip format (default)\n" + " --zlib output to zlib format instead of gzip\n" + " --deflate output to deflate format instead of gzip\n" + " --splitlast do block splitting last instead of first\n"); + return 0; + } + } + + if (options.numiterations < 1) { + fprintf(stderr, "Error: must have 1 or more iterations"); + return 0; + } + + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-') { + char* outfilename; + filename = argv[i]; + if (output_to_stdout) { + outfilename = 0; + } else if (output_type == ZOPFLI_FORMAT_GZIP) { + outfilename = AddStrings(filename, ".gz"); + } else if (output_type == ZOPFLI_FORMAT_ZLIB) { + outfilename = AddStrings(filename, ".zlib"); + } else { + assert(output_type == ZOPFLI_FORMAT_DEFLATE); + outfilename = AddStrings(filename, ".deflate"); + } + if (options.verbose && outfilename) { + fprintf(stderr, "Saving to: %s\n", outfilename); + } + CompressFile(&options, output_type, filename, outfilename); + free(outfilename); + } + } + + if (!filename) { + fprintf(stderr, + "Please provide filename\nFor help, type: %s -h\n", argv[0]); + } + + return 0; +} diff -Nru zopfli-0~git130414/src/zopfli/zopfli_lib.c zopfli-1.0.0/src/zopfli/zopfli_lib.c --- zopfli-0~git130414/src/zopfli/zopfli_lib.c 1970-01-01 00:00:00.000000000 +0000 +++ zopfli-1.0.0/src/zopfli/zopfli_lib.c 2013-04-25 16:09:00.000000000 +0000 @@ -0,0 +1,43 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: lode.vandevenne@gmail.com (Lode Vandevenne) +Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) +*/ + +#include "zopfli.h" + +#include "deflate.h" +#include "gzip_container.h" +#include "zlib_container.h" + +#include + +void ZopfliCompress(const ZopfliOptions* options, ZopfliFormat output_type, + const unsigned char* in, size_t insize, + unsigned char** out, size_t* outsize) +{ + if (output_type == ZOPFLI_FORMAT_GZIP) { + ZopfliGzipCompress(options, in, insize, out, outsize); + } else if (output_type == ZOPFLI_FORMAT_ZLIB) { + ZopfliZlibCompress(options, in, insize, out, outsize); + } else if (output_type == ZOPFLI_FORMAT_DEFLATE) { + unsigned char bp = 0; + ZopfliDeflate(options, 2 /* Dynamic block */, 1, + in, insize, &bp, out, outsize); + } else { + assert(0); + } +} diff -Nru zopfli-0~git130414/tree.c zopfli-1.0.0/tree.c --- zopfli-0~git130414/tree.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/tree.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,101 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "tree.h" - -#include -#include -#include -#include - -#include "katajainen.h" -#include "util.h" - -void LengthsToSymbols(const unsigned* lengths, size_t n, unsigned maxbits, - unsigned* symbols) { - size_t* bl_count = (size_t*)malloc(sizeof(size_t) * (maxbits + 1)); - size_t* next_code = (size_t*)malloc(sizeof(size_t) * (maxbits + 1)); - unsigned bits, i; - unsigned code; - - for (i = 0; i < n; i++) { - symbols[i] = 0; - } - - /* 1) Count the number of codes for each code length. Let bl_count[N] be the - number of codes of length N, N >= 1. */ - for (bits = 0; bits <= maxbits; bits++) { - bl_count[bits] = 0; - } - for (i = 0; i < n; i++) { - assert(lengths[i] <= maxbits); - bl_count[lengths[i]]++; - } - /* 2) Find the numerical value of the smallest code for each code length. */ - code = 0; - bl_count[0] = 0; - for (bits = 1; bits <= maxbits; bits++) { - code = (code + bl_count[bits-1]) << 1; - next_code[bits] = code; - } - /* 3) Assign numerical values to all codes, using consecutive values for all - codes of the same length with the base values determined at step 2. */ - for (i = 0; i < n; i++) { - unsigned len = lengths[i]; - if (len != 0) { - symbols[i] = next_code[len]; - next_code[len]++; - } - } - - free(bl_count); - free(next_code); -} - -void CalculateEntropy(const size_t* count, size_t n, double* bitlengths) { - static const double kInvLog2 = 1.4426950408889; /* 1.0 / log(2.0) */ - unsigned sum = 0; - unsigned i; - double log2sum; - for (i = 0; i < n; ++i) { - sum += count[i]; - } - log2sum = (sum == 0 ? log(n) : log(sum)) * kInvLog2; - for (i = 0; i < n; ++i) { - /* When the count of the symbol is 0, but its cost is requested anyway, it - means the symbol will appear at least once anyway, so give it the cost as if - its count is 1.*/ - if (count[i] == 0) bitlengths[i] = log2sum; - else bitlengths[i] = log2sum - log(count[i]) * kInvLog2; - /* Depending on compiler and architecture, the above subtraction of two - floating point numbers may give a negative result very close to zero - instead of zero (e.g. -5.973954e-17 with gcc 4.1.2 on Ubuntu 11.4). Clamp - it to zero. These floating point imprecisions do not affect the cost model - significantly so this is ok. */ - if (bitlengths[i] < 0 && bitlengths[i] > -1e-5) bitlengths[i] = 0; - assert(bitlengths[i] >= 0); - } -} - -void CalculateBitLengths(const size_t* count, size_t n, int maxbits, - unsigned* bitlengths) { - int error = LengthLimitedCodeLengths(count, n, maxbits, bitlengths); - (void) error; - assert(!error); -} diff -Nru zopfli-0~git130414/tree.h zopfli-1.0.0/tree.h --- zopfli-0~git130414/tree.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/tree.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -Utilities for creating and using Huffman trees. -*/ - -#ifndef ZOPFLI_TREE_H_ -#define ZOPFLI_TREE_H_ - -#include - -/* -Calculates the bitlengths for the Huffman tree, based on the counts of each -symbol. -*/ -void CalculateBitLengths(const size_t* count, size_t n, int maxbits, - unsigned *bitlengths); - -/* -Converts a series of Huffman tree bitlengths, to the bit values of the symbols. -*/ -void LengthsToSymbols(const unsigned* lengths, size_t n, unsigned maxbits, - unsigned* symbols); - -/* -Calculates the entropy of each symbol, based on the counts of each symbol. The -result is similar to the result of CalculateBitLengths, but with the actual -theoritical bit lengths according to the entropy. Since the resulting values -are fractional, they cannot be used to encode the tree specified by DEFLATE. -*/ -void CalculateEntropy(const size_t* count, size_t n, double* bitlengths); - -#endif /* ZOPFLI_TREE_H_ */ diff -Nru zopfli-0~git130414/util.c zopfli-1.0.0/util.c --- zopfli-0~git130414/util.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/util.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,210 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "util.h" - -#include -#include -#include - -int GetDistExtraBits(int dist) { -#ifdef __GNUC__ - if (dist < 5) return 0; - return (31 ^ __builtin_clz(dist - 1)) - 1; /* log2(dist - 1) - 1 */ -#else - if (dist < 5) return 0; - else if (dist < 9) return 1; - else if (dist < 17) return 2; - else if (dist < 33) return 3; - else if (dist < 65) return 4; - else if (dist < 129) return 5; - else if (dist < 257) return 6; - else if (dist < 513) return 7; - else if (dist < 1025) return 8; - else if (dist < 2049) return 9; - else if (dist < 4097) return 10; - else if (dist < 8193) return 11; - else if (dist < 16385) return 12; - else return 13; -#endif -} - -int GetDistExtraBitsValue(int dist) { -#ifdef __GNUC__ - if (dist < 5) { - return 0; - } else { - int l = 31 ^ __builtin_clz(dist - 1); /* log2(dist - 1) */ - return (dist - (1 + (1 << l))) & ((1 << (l - 1)) - 1); - } -#else - if (dist < 5) return 0; - else if (dist < 9) return (dist - 5) & 1; - else if (dist < 17) return (dist - 9) & 3; - else if (dist < 33) return (dist - 17) & 7; - else if (dist < 65) return (dist - 33) & 15; - else if (dist < 129) return (dist - 65) & 31; - else if (dist < 257) return (dist - 129) & 63; - else if (dist < 513) return (dist - 257) & 127; - else if (dist < 1025) return (dist - 513) & 255; - else if (dist < 2049) return (dist - 1025) & 511; - else if (dist < 4097) return (dist - 2049) & 1023; - else if (dist < 8193) return (dist - 4097) & 2047; - else if (dist < 16385) return (dist - 8193) & 4095; - else return (dist - 16385) & 8191; -#endif -} - -int GetDistSymbol(int dist) { -#ifdef __GNUC__ - if (dist < 5) { - return dist - 1; - } else { - int l = (31 ^ __builtin_clz(dist - 1)); /* log2(dist - 1) */ - int r = ((dist - 1) >> (l - 1)) & 1; - return l * 2 + r; - } -#else - if (dist < 193) { - if (dist < 13) { /* dist 0..13. */ - if (dist < 5) return dist - 1; - else if (dist < 7) return 4; - else if (dist < 9) return 5; - else return 6; - } else { /* dist 13..193. */ - if (dist < 17) return 7; - else if (dist < 25) return 8; - else if (dist < 33) return 9; - else if (dist < 49) return 10; - else if (dist < 65) return 11; - else if (dist < 97) return 12; - else if (dist < 129) return 13; - else return 14; - } - } else { - if (dist < 2049) { /* dist 193..2049. */ - if (dist < 257) return 15; - else if (dist < 385) return 16; - else if (dist < 513) return 17; - else if (dist < 769) return 18; - else if (dist < 1025) return 19; - else if (dist < 1537) return 20; - else return 21; - } else { /* dist 2049..32768. */ - if (dist < 3073) return 22; - else if (dist < 4097) return 23; - else if (dist < 6145) return 24; - else if (dist < 8193) return 25; - else if (dist < 12289) return 26; - else if (dist < 16385) return 27; - else if (dist < 24577) return 28; - else return 29; - } - } -#endif -} - -int GetLengthExtraBits(int l) { - static const int table[259] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 - }; - return table[l]; -} - -int GetLengthExtraBitsValue(int l) { - static const int table[259] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 0, - 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, - 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0 - }; - return table[l]; -} - -/* -Returns symbol in range [257-285] (inclusive). -*/ -int GetLengthSymbol(int l) { - static const int table[259] = { - 0, 0, 0, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 265, 266, 266, 267, 267, 268, 268, - 269, 269, 269, 269, 270, 270, 270, 270, - 271, 271, 271, 271, 272, 272, 272, 272, - 273, 273, 273, 273, 273, 273, 273, 273, - 274, 274, 274, 274, 274, 274, 274, 274, - 275, 275, 275, 275, 275, 275, 275, 275, - 276, 276, 276, 276, 276, 276, 276, 276, - 277, 277, 277, 277, 277, 277, 277, 277, - 277, 277, 277, 277, 277, 277, 277, 277, - 278, 278, 278, 278, 278, 278, 278, 278, - 278, 278, 278, 278, 278, 278, 278, 278, - 279, 279, 279, 279, 279, 279, 279, 279, - 279, 279, 279, 279, 279, 279, 279, 279, - 280, 280, 280, 280, 280, 280, 280, 280, - 280, 280, 280, 280, 280, 280, 280, 280, - 281, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, - 282, 282, 282, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, - 283, 283, 283, 283, 283, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, - 284, 284, 284, 284, 284, 284, 284, 284, - 284, 284, 284, 284, 284, 284, 284, 284, - 284, 284, 284, 284, 284, 284, 284, 284, - 284, 284, 284, 284, 284, 284, 284, 285 - }; - return table[l]; -} - -void InitOptions(Options* options) { - options->verbose = 0; - options->numiterations = 15; - options->blocksplitting = 1; - options->blocksplittinglast = 0; - options->blocksplittingmax = 15; -} diff -Nru zopfli-0~git130414/util.h zopfli-1.0.0/util.h --- zopfli-0~git130414/util.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/util.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,214 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -Several utilities, including: #defines to try different compression results, -basic deflate specification values and generic program options. -*/ - -#ifndef ZOPFLI_UTIL_H_ -#define ZOPFLI_UTIL_H_ - -#include -#include - -/* Minimum and maximum length that can be encoded in deflate. */ -#define MAX_MATCH 258 -#define MIN_MATCH 3 - -/* -The window size for deflate. Must be a power of two. This should be 32768, the -maximum possible by the deflate spec. Anything less hurts compression more than -speed. -*/ -#define WINDOW_SIZE 32768 - -/* -The window mask used to wrap indices into the window. This is why the -window size must be a power of two. -*/ -#define WINDOW_MASK (WINDOW_SIZE - 1) - -/* -A block structure of huge, non-smart, blocks to divide the input into, to allow -operating on huge files without exceeding memory, such as the 1GB wiki9 corpus. -The whole compression algorithm, including the smarter block splitting, will -be executed independently on each huge block. -Dividing into huge blocks hurts compression, but not much relative to the size. -Set this to, for example, 20MB (20000000). Set it to 0 to disable master blocks. -*/ -#define MASTER_BLOCK_SIZE 20000000 - -/* -Used to initialize costs for example -*/ -#define LARGE_FLOAT 1e30 - -/* -For longest match cache. max 256. Uses huge amounts of memory but makes it -faster. Uses this many times three bytes per single byte of the input data. -This is so because longest match finding has to find the exact distance -that belongs to each length for the best lz77 strategy. -Good values: e.g. 5, 8. -*/ -#define NUM_CACHED_LENGTHS 8 - -/* -limit the max hash chain hits for this hash value. This has an effect only -on files where the hash value is the same very often. On these files, this -gives worse compression (the value should ideally be 32768, which is the -WINDOW_SIZE, while zlib uses 4096 even for best level), but makes it faster on -some specific files. -Good value: e.g. 8192. -*/ -#define MAX_CHAIN_HITS 8192 - -/* -Whether to use the longest match cache for FindLongestMatch. This cache -consumes a lot of memory but speeds it up. No effect on compression size. -*/ -#define USE_LONGEST_MATCH_CACHE - -/* -Enable to remember amount of successive identical bytes in the hash chain for -finding longest match -required for USE_HASH_SAME_HASH and SHORTCUT_LONG_REPETITIONS -This has no effect on the compression result, and enabling it increases speed. -*/ -#define USE_HASH_SAME - -/* -Switch to a faster hash based on the info from USE_HASH_SAME once the -best length so far is long enough. This is way faster for files with lots of -identical bytes, on which the compressor is otherwise too slow. Regular files -are unaffected or maybe a tiny bit slower. -This has no effect on the compression result, only on speed. -*/ -#define USE_HASH_SAME_HASH - -/* -Enable this, to avoid slowness for files which are a repetition of the same -character more than a multiple of MAX_MATCH times. This should not affect the -compression result. -*/ -#define SHORTCUT_LONG_REPETITIONS - -/* -Whether to use lazy matching in the greedy LZ77 implementation. This gives a -better result of LZ77Greedy, but the effect this has on the optimal LZ77 -varies from file to file. -*/ -#define LAZY_MATCHING - -/* -Gets the symbol for the given length, cfr. the DEFLATE spec. -Returns the symbol in the range [257-285] (inclusive) -*/ -int GetLengthSymbol(int l); - -/* Gets the amount of extra bits for the given length, cfr. the DEFLATE spec. */ -int GetLengthExtraBits(int l); - -/* Gets value of the extra bits for the given length, cfr. the DEFLATE spec. */ -int GetLengthExtraBitsValue(int l); - -/* Gets the symbol for the given dist, cfr. the DEFLATE spec. */ -int GetDistSymbol(int dist); - -/* Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec. */ -int GetDistExtraBits(int dist); - -/* Gets value of the extra bits for the given dist, cfr. the DEFLATE spec. */ -int GetDistExtraBitsValue(int dist); - -/* -Options used throughout the program. -*/ -typedef struct Options { - /* Whether to print output */ - int verbose; - - /* - Maximum amount of times to rerun forward and backward pass to optimize LZ77 - compression cost. Good values: 10, 15 for small files, 5 for files over - several MB in size or it will be too slow. - */ - int numiterations; - - /* - If true, splits the data in multiple deflate blocks with optimal choice - for the block boundaries. Block splitting gives better compression. Default: - true (1). - */ - int blocksplitting; - - /* - If true, chooses the optimal block split points only after doing the iterative - LZ77 compression. If false, chooses the block split points first, then does - iterative LZ77 on each individual block. Depending on the file, either first - or last gives the best compression. Default: false (0). - */ - int blocksplittinglast; - - /* - Maximum amount of blocks to split into (0 for unlimited, but this can give - extreme results that hurt compression on some files). Default value: 15. - */ - int blocksplittingmax; -} Options; - -/* Initializes options with default values. */ -void InitOptions(Options* options); - -/* -Appends value to dynamically allocated memory, doubling its allocation size -whenever needed. - -value: the value to append, type T -data: pointer to the dynamic array to append to, type T** -size: pointer to the size of the array to append to, type size_t*. This is the -size that you consider the array to be, not the internal allocation size. -Precondition: allocated size of data is at least a power of two greater than or -equal than *size. -*/ -#ifdef __cplusplus /* C++ cannot assign void* from malloc to *data */ -#define APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\ - if (!((*size) & ((*size) - 1))) {\ - /*double alloc size if it's a power of two*/\ - void** data_void = reinterpret_cast(data);\ - *data_void = (*size) == 0 ? malloc(sizeof(**data))\ - : realloc((*data), (*size) * 2 * sizeof(**data));\ - }\ - (*data)[(*size)] = (value);\ - (*size)++;\ -} -#else /* C gives problems with strict-aliasing rules for (void**) cast */ -#define APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\ - if (!((*size) & ((*size) - 1))) {\ - /*double alloc size if it's a power of two*/\ - (*data) = (*size) == 0 ? malloc(sizeof(**data))\ - : realloc((*data), (*size) * 2 * sizeof(**data));\ - }\ - (*data)[(*size)] = (value);\ - (*size)++;\ -} -#endif - - -#endif /* ZOPFLI_UTIL_H_ */ diff -Nru zopfli-0~git130414/zlib_container.c zopfli-1.0.0/zlib_container.c --- zopfli-0~git130414/zlib_container.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/zlib_container.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -/* -Copyright 2013 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#include "zlib_container.h" - -#include - -#include "deflate.h" - - -/* Calculates the adler32 checksum of the data */ -static unsigned adler32(const unsigned char* data, size_t size) -{ - static const unsigned sums_overflow = 5550; - unsigned s1 = 1; - unsigned s2 = 1 >> 16; - - while (size > 0) { - size_t amount = size > sums_overflow ? sums_overflow : size; - size -= amount; - while (amount > 0) { - s1 += (*data++); - s2 += s1; - amount--; - } - s1 %= 65521; - s2 %= 65521; - } - - return (s2 << 16) | s1; -} - -void ZlibCompress(const Options* options, - const unsigned char* in, size_t insize, - unsigned char** out, size_t* outsize) { - unsigned char bitpointer = 0; - unsigned checksum = adler32(in, (unsigned)insize); - unsigned cmf = 120; /* CM 8, CINFO 7. See zlib spec.*/ - unsigned flevel = 0; - unsigned fdict = 0; - unsigned cmfflg = 256 * cmf + fdict * 32 + flevel * 64; - unsigned fcheck = 31 - cmfflg % 31; - cmfflg += fcheck; - - APPEND_DATA(cmfflg / 256, out, outsize); - APPEND_DATA(cmfflg % 256, out, outsize); - - Deflate(options, 2 /* dynamic block */, 1 /* final */, - in, insize, &bitpointer, out, outsize); - - APPEND_DATA((checksum >> 24) % 256, out, outsize); - APPEND_DATA((checksum >> 16) % 256, out, outsize); - APPEND_DATA((checksum >> 8) % 256, out, outsize); - APPEND_DATA(checksum % 256, out, outsize); - - if (options->verbose) { - fprintf(stderr, - "Original Size: %d, Compressed: %d, Compression: %f%% Removed\n", - (int)insize, (int)*outsize, - 100.0f * (float)(insize - *outsize) / (float)insize); - } -} diff -Nru zopfli-0~git130414/zlib_container.h zopfli-1.0.0/zlib_container.h --- zopfli-0~git130414/zlib_container.h 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/zlib_container.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -/* -Copyright 2013 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -#ifndef ZOPFLI_ZLIB_H_ -#define ZOPFLI_ZLIB_H_ - -/* -Functions to compress according to the Zlib specification. -*/ - -#include "util.h" - -/* -Compresses according to the zlib specification and append the compressed -result to the output. - -options: global program options -out: pointer to the dynamic output array to which the result is appended. Must - be freed after use. -outsize: pointer to the dynamic output array size. -*/ -void ZlibCompress(const Options* options, - const unsigned char* in, size_t insize, - unsigned char** out, size_t* outsize); - -#endif /* ZOPFLI_ZLIB_H_ */ diff -Nru zopfli-0~git130414/zopfli.c zopfli-1.0.0/zopfli.c --- zopfli-0~git130414/zopfli.c 2013-03-03 12:21:32.000000000 +0000 +++ zopfli-1.0.0/zopfli.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,216 +0,0 @@ -/* -Copyright 2011 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: lode.vandevenne@gmail.com (Lode Vandevenne) -Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) -*/ - -/* -Zopfli compressor program. It can output gzip-, zlib- or deflate-compatible -data. By default it creates a .gz file. This tool can only compress, not -decompress. Decompression can be done by any standard gzip, zlib or deflate -decompressor. -*/ - -#include -#include -#include -#include - -#include "deflate.h" -#include "gzip_container.h" -#include "zlib_container.h" - -/* -Loads a file into a memory array. -*/ -static void LoadFile(const char* filename, - unsigned char** out, size_t* outsize) { - FILE* file; - - *out = 0; - *outsize = 0; - file = fopen(filename, "rb"); - if (!file) return; - - fseek(file , 0 , SEEK_END); - *outsize = ftell(file); - rewind(file); - - *out = (unsigned char*)malloc(*outsize); - - if (*outsize && (*out)) { - size_t testsize = fread(*out, 1, *outsize, file); - if (testsize != *outsize) { - /* It could be a directory */ - free(*out); - *out = 0; - *outsize = 0; - } - } - - assert(!(*outsize) || out); /* If size is not zero, out must be allocated. */ - fclose(file); -} - -/* -Saves a file from a memory array, overwriting the file if it existed. -*/ -static void SaveFile(const char* filename, - const unsigned char* in, size_t insize) { - FILE* file = fopen(filename, "wb" ); - assert(file); - fwrite((char*)in, 1, insize, file); - fclose(file); -} - -typedef enum { - OUTPUT_GZIP, - OUTPUT_ZLIB, - OUTPUT_DEFLATE -} OutputType; - -/* -outfilename: filename to write output to, or 0 to write to stdout instead -*/ -void CompressFile(const Options* options, - OutputType output_type, - const char* infilename, - const char* outfilename) { - unsigned char* in; - size_t insize; - unsigned char* out = 0; - size_t outsize = 0; - LoadFile(infilename, &in, &insize); - if (insize == 0) { - fprintf(stderr, "Invalid filename: %s\n", infilename); - return; - } - if (output_type == OUTPUT_GZIP) { - GzipCompress(options, in, insize, &out, &outsize); - } else if (output_type == OUTPUT_ZLIB) { - ZlibCompress(options, in, insize, &out, &outsize); - } else if (output_type == OUTPUT_DEFLATE) { - unsigned char bp = 0; - Deflate(options, 2 /* Dynamic block */, 1, in, insize, &bp, &out, &outsize); - } else { - assert(0); - } - if (outfilename) { - SaveFile(outfilename, out, outsize); - } else { - size_t i; - for (i = 0; i < outsize; i++) { - /* Works only if terminal does not convert newlines. */ - printf("%c", out[i]); - } - } - - free(out); - free(in); -} - -/* -Add two strings together. Size does not matter. Result must be freed. -*/ -static char* AddStrings(const char* str1, const char* str2) { - size_t len = strlen(str1) + strlen(str2); - char* result = (char*)malloc(len + 1); - if (!result) exit(-1); /* Allocation failed. */ - strcpy(result, str1); - strcat(result, str2); - return result; -} - -static char StringsEqual(const char* str1, const char* str2) { - return strcmp(str1, str2) == 0; -} - -int main(int argc, char* argv[]) { - Options options; - const char* filename = 0; - int output_to_stdout = 0; - int i; - OutputType output_type = OUTPUT_GZIP; - - InitOptions(&options); - - for (i = 1; i < argc; i++) { - if (StringsEqual(argv[i], "-v")) options.verbose = 1; - else if (StringsEqual(argv[i], "-c")) output_to_stdout = 1; - else if (StringsEqual(argv[i], "--deflate")) output_type = OUTPUT_DEFLATE; - else if (StringsEqual(argv[i], "--zlib")) output_type = OUTPUT_ZLIB; - else if (StringsEqual(argv[i], "--gzip")) output_type = OUTPUT_GZIP; - else if (StringsEqual(argv[i], "--i5")) options.numiterations = 5; - else if (StringsEqual(argv[i], "--i10")) options.numiterations = 10; - else if (StringsEqual(argv[i], "--i15")) options.numiterations = 15; - else if (StringsEqual(argv[i], "--i25")) options.numiterations = 25; - else if (StringsEqual(argv[i], "--i50")) options.numiterations = 50; - else if (StringsEqual(argv[i], "--i100")) options.numiterations = 100; - else if (StringsEqual(argv[i], "--i250")) options.numiterations = 250; - else if (StringsEqual(argv[i], "--i500")) options.numiterations = 500; - else if (StringsEqual(argv[i], "--i1000")) options.numiterations = 1000; - else if (StringsEqual(argv[i], "-h")) { - fprintf(stderr, "Usage: zopfli [OPTION]... FILE\n" - " -h gives this help\n" - " -c write the result on standard output, instead of disk" - " filename + '.gz'\n" - " -v verbose mode\n" - " --gzip output to gzip format (default)\n" - " --deflate output to deflate format instead of gzip\n" - " --zlib output to zlib format instead of gzip\n"); - fprintf(stderr, " --i5 less compression, but faster\n" - " --i10 less compression, but faster\n" - " --i15 default compression, 15 iterations\n" - " --i25 more compression, but slower\n" - " --i50 more compression, but slower\n" - " --i100 more compression, but slower\n" - " --i250 more compression, but slower\n" - " --i500 more compression, but slower\n" - " --i1000 more compression, but slower\n"); - return 0; - } - } - - for (i = 1; i < argc; i++) { - if (argv[i][0] != '-') { - char* outfilename; - filename = argv[i]; - if (output_to_stdout) { - outfilename = 0; - } else if (output_type == OUTPUT_GZIP) { - outfilename = AddStrings(filename, ".gz"); - } else if (output_type == OUTPUT_ZLIB) { - outfilename = AddStrings(filename, ".zlib"); - } else if (output_type == OUTPUT_DEFLATE) { - outfilename = AddStrings(filename, ".deflate"); - } else { - assert(0); - } - if (options.verbose && outfilename) { - fprintf(stderr, "Saving to: %s\n", outfilename); - } - CompressFile(&options, output_type, filename, outfilename); - free(outfilename); - } - } - - if (!filename) { - fprintf(stderr, - "Please provide filename\nFor help, type: %s -h\n", argv[0]); - } - - return 0; -}