diff -Nru gzrt-0.6+ds1/ChangeLog gzrt-0.8/ChangeLog --- gzrt-0.6+ds1/ChangeLog 2012-02-10 15:11:39.000000000 +0000 +++ gzrt-0.8/ChangeLog 2013-10-02 20:41:21.000000000 +0000 @@ -1,4 +1,20 @@ -New for release 06. (2012-02-09) +New for release 0.8 (2013-10-03) + o Eliminate call to fsync(), resulting in 99% speed improvement + o Add ability to read from the standard input stream and write to + standard output for pipeline support. + o Thanks to Alexey Yurchenko (ayurchen@gmail.com) for the above suggestions. + o Fix incorrect errpos tracker (probably caused some of the core dumps + people had reported) + o Fix verbose logging to fix stream positions being incorrect (had not + been updated after the program moved from mmap to a read buffer) + o Also, move verbose logging from stdout to stderr + o Misc error reporting updates + +New for release 0.7 (2013-02-02) + o Fix =/== confusion in read_internal error check (via Shawn + Cokus (cokus@ucla.edu) + +New for release 0.6 (2012-02-09) o Patches from Paul Wise (pabs@debian.org) for stability and memory leaks New for release 0.5 (2006-08-29) diff -Nru gzrt-0.6+ds1/README gzrt-0.8/README --- gzrt-0.6+ds1/README 2012-02-10 15:11:39.000000000 +0000 +++ gzrt-0.8/README 2013-10-02 20:27:46.000000000 +0000 @@ -19,19 +19,28 @@ USAGE: -gzrecover [ -hsv ] [-o ] +gzrecover [ -hpsVv ] [-o ] [filename] +If no input filename is specific, gzrecover reads from the standard input. By default, gzrecover writes its output to .recovered. If the -original filename ended in .gz, that extension is removed. Options include: +original filename ended in .gz, that extension is removed. The default +output filename when reading from the standard input is "stdin.recovered". + +Options include: -o - Sets the output file name +-p - Write output to standard output for pipeline support -s - Splits each recovered segment into its own file, with numeric suffixes (.1, .2, etc) (UNTESTED) -h - Print the help message -v - Verbose logging on +-V - Print version number + +-o and -p cannot be specified at the same time. -Note that gzrecover will run much more slowly than regular gunzip does. -The more corruption in the file, the more slowly it runs. +Note that gzrecover will run slower than regular gunzip does, but has +been significantly inproved in speed since the last release. The more +corruption in the file, the more slowly it runs. Running gzrecover on an uncorrupted gzip file should simply uncompress it. However, substituting gzrecover for gunzip on a regular basis is not @@ -81,9 +90,9 @@ COPYRIGHT NOTICE gzrecover written by Aaron M. Renn (arenn@urbanophile.com) -Copyright (c) 2002-2012 Aaron M. Renn. +Copyright (c) 2002-2013 Aaron M. Renn. This code is licensed under the same GNU General Public License v2 -(or at your option, any later version) at GNU tar. See +(or at your option, any later version). See http://www.gnu.org/licenses/gpl.html diff -Nru gzrt-0.6+ds1/README.build gzrt-0.8/README.build --- gzrt-0.6+ds1/README.build 2012-02-10 15:11:39.000000000 +0000 +++ gzrt-0.8/README.build 2013-10-01 20:26:09.000000000 +0000 @@ -20,7 +20,7 @@ COPYRIGHT NOTICE gzrecover written by Aaron M. Renn (arenn@urbanophile.com) -Copyright (c) 2002-2012 Aaron M. Renn. +Copyright (c) 2002-2013 Aaron M. Renn. This code is licensed under the same GNU General Public License v2 (or at your option, any later version) at GNU tar. See diff -Nru gzrt-0.6+ds1/debian/changelog gzrt-0.8/debian/changelog --- gzrt-0.6+ds1/debian/changelog 2012-06-12 04:43:49.000000000 +0000 +++ gzrt-0.8/debian/changelog 2013-10-08 06:13:34.000000000 +0000 @@ -1,3 +1,11 @@ +gzrt (0.8-1) unstable; urgency=low + + * New upstream release + - tarball doesn't have binary file, drop get-orig-source + * Bump Standards-Version, no changes needed + + -- Paul Wise Tue, 08 Oct 2013 14:13:32 +0800 + gzrt (0.6+ds1-1) unstable; urgency=low * New upstream release diff -Nru gzrt-0.6+ds1/debian/control gzrt-0.8/debian/control --- gzrt-0.6+ds1/debian/control 2012-05-31 09:23:30.000000000 +0000 +++ gzrt-0.8/debian/control 2013-10-08 06:02:23.000000000 +0000 @@ -2,14 +2,19 @@ Section: utils Priority: optional Maintainer: Paul Wise -Build-Depends: debhelper (>= 9), zlib1g-dev -Standards-Version: 3.9.3 +Build-Depends: + debhelper (>= 9), + zlib1g-dev +Standards-Version: 3.9.4 Homepage: http://www.urbanophile.com/arenn/hacking/gzrt/ Package: gzrt Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends} -Recommends: cpio +Depends: + ${misc:Depends}, + ${shlibs:Depends} +Recommends: + cpio Description: gzip recovery toolkit gzrecover will attempt to skip over corrupted data in a gzip archive, thereby allowing the remaining data to be recovered. diff -Nru gzrt-0.6+ds1/debian/rules gzrt-0.8/debian/rules --- gzrt-0.6+ds1/debian/rules 2012-06-12 04:26:20.000000000 +0000 +++ gzrt-0.8/debian/rules 2013-10-08 05:56:23.000000000 +0000 @@ -6,17 +6,3 @@ %: dh $@ --parallel - -UVERSION=$(shell dpkg-parsechangelog | sed -n 's/^Version: \([^+]*\)+ds[0-9]*-.*$$/\1/p') -DVERSION=$(shell dpkg-parsechangelog | sed -n 's/^Version: \(.*\)-.*$$/\1/p') - -get-orig-source: - rm -f gzrt-$(UVERSION).tar.gz gzrt-$(UVERSION) - wget http://www.urbanophile.com/arenn/hacking/gzrt/gzrt-$(UVERSION).tar.gz - tar zxf gzrt-$(UVERSION).tar.gz - # Pre-built binary, unused - rm gzrt-$(UVERSION)/gzrecover - mv gzrt-$(UVERSION) gzrt-$(DVERSION).orig - tar zcf ../gzrt_$(DVERSION).orig.tar.gz gzrt-$(DVERSION).orig - rm -r gzrt-$(UVERSION).tar.gz gzrt-$(DVERSION).orig - diff -Nru gzrt-0.6+ds1/gzrecover.1 gzrt-0.8/gzrecover.1 --- gzrt-0.6+ds1/gzrecover.1 2012-02-10 15:11:39.000000000 +0000 +++ gzrt-0.8/gzrecover.1 2013-10-02 20:26:03.000000000 +0000 @@ -13,6 +13,10 @@ .br Show usage statement. .TP +.B \-V +.br +Display version number. +.TP .B \-v .br Turn on verbose mode. @@ -25,6 +29,10 @@ .I file .br Set the output file. +.TP +.B \-p +.br +Write recovered data to stdout instead of file. .SH SEE ALSO .BR gzip (1), .BR cpio (1). diff -Nru gzrt-0.6+ds1/gzrecover.c gzrt-0.8/gzrecover.c --- gzrt-0.6+ds1/gzrecover.c 2012-02-10 15:11:39.000000000 +0000 +++ gzrt-0.8/gzrecover.c 2013-10-02 20:53:14.000000000 +0000 @@ -1,7 +1,7 @@ /************************************************************************* * gzrecover - A program to recover data from corrupted gzip files * - * Copyright (c) 2002-2012 Aaron M. Renn (arenn@urbanophile.com) + * Copyright (c) 2002-2013 Aaron M. Renn (arenn@urbanophile.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published @@ -28,24 +28,27 @@ #include #include +#define VERSION "0.8" + /* Global contants */ #define DEFAULT_INBUF_SIZE (1024*1024) #define DEFAULT_OUTBUF_SIZE (64*1024) -static const char *optstring = "ho:sv"; -static const char *usage = "Usage: gzrecover [-hsv] [-o ] "; +static const char *optstring = "ho:psVv"; +static const char *usage = "Usage: gzrecover [-hpsVv] [-o ] [infile]"; /* Global Variables */ static int split_mode = 0; static int verbose_mode = 0; static int outfile_specified = 0; +static int stdout_specified = 0; static char *user_outname; static size_t inbuf_size = DEFAULT_INBUF_SIZE; static size_t outbuf_size = DEFAULT_OUTBUF_SIZE; /* Display usage string and exit */ void -show_usage(int exit_status) +show_usage_and_exit(int exit_status) { fprintf(stderr, "%s\n", usage); exit(exit_status); @@ -62,8 +65,9 @@ for (;;) { rc = read(fd, buf, count); - if ((rc == -1) && (errno = EINTR)) + if ((rc == -1) && ((errno == EINTR) || (errno == EAGAIN))) continue; + return(rc); } } @@ -76,6 +80,10 @@ char *outfile, *ptr; static int suffix = 1; + /* Just return standard output if that is specified */ + if (stdout_specified) + return STDOUT_FILENO; + /* Build the output file name */ if (outfile_specified) outfile = (char *)malloc(strlen(user_outname) + 9); @@ -108,7 +116,7 @@ if( ofd == -1 ){ throw_error("open") } if (verbose_mode) - fprintf(stdout, "Opened output file for writing: %s\n", outfile); + fprintf(stderr, "Opened output file for writing: %s\n", outfile); free(outfile); @@ -126,11 +134,7 @@ d_stream->avail_in = bufsize; rc = inflateInit2(d_stream, -15); /* Don't ask why -15 - I don't know */ - if (rc != Z_OK) - { - perror("inflateInit2"); - exit(1); - } + if (rc != Z_OK) { throw_error("inflateInit2"); } } /* Skip gzip header stuff we don't care about */ @@ -181,9 +185,9 @@ int main(int argc, char **argv) { - int opt, rc, ifd, ofd, founderr=0, foundgood=0; - ssize_t bytes_read=0; - off_t errpos=0, errinc=0; + int opt, rc, rc2, ifd, ofd, founderr=0, foundgood=0; + ssize_t bytes_read=0, tot_written=0; + off_t errpos=0, errinc=0, readpos=0; char *infile; unsigned char *inbuf, *outbuf; z_stream d_stream; @@ -194,13 +198,18 @@ switch (opt) { case 'h': - show_usage(0); + show_usage_and_exit(0); + break; case 'o': user_outname = optarg; outfile_specified = 1; break; + case 'p': + stdout_specified = 1; + break; + case 's': split_mode = 1; break; @@ -209,23 +218,42 @@ verbose_mode = 1; break; + case 'V': + fprintf(stderr, "gzrecover %s\n", VERSION); + break; + default: - show_usage(1); + show_usage_and_exit(1); } } - - if (optind == argc) - show_usage(1); - infile = argv[optind]; - /* Open input file and memory map */ + /* Either output to stdout (-p) or specify filename (-o) but not both */ + if (outfile_specified && stdout_specified) + { + fprintf(stderr, "gzrecover: Cannot specify output filename (-o) and stdout (-p) simultaneously.\n"); + show_usage_and_exit(1); + } + + /* Allocate our read buffer */ inbuf = (unsigned char *)malloc(inbuf_size); if( inbuf == 0 ){ throw_error("malloc") } - ifd = open(infile, O_RDONLY); + + /* Open input file using name or set to standard input if no file + specified */ + if (optind == argc) + { + infile = "stdin"; + ifd = STDIN_FILENO; + } + else + { + infile = argv[optind]; + ifd = open(infile, O_RDONLY); + } if( ifd == -1 ){ free(inbuf); throw_error("open") } if (verbose_mode) - fprintf(stdout, "Opened input file for reading: %s\n", infile); + fprintf(stderr, "Opened input file for reading: %s\n", infile); /* Open output file & initialize output buffer */ ofd = open_outfile(infile); @@ -238,16 +266,18 @@ if (bytes_read == 0) { if (verbose_mode) - fprintf(stdout, "File is empty\n"); + fprintf(stderr, "File is empty\n"); close(ifd); close(ofd); free(inbuf); free(outbuf); return(0); } + readpos = bytes_read; init_zlib(&d_stream, inbuf, bytes_read); - skip_gzip_header(&d_stream); + /* Assume there's a valid gzip header at the beginning of the file */ + skip_gzip_header(&d_stream); /* Finally - decompress this bad boy */ for (;;) @@ -265,11 +295,19 @@ if ((rc != Z_OK) && (rc != Z_STREAM_END)) { foundgood = 0; + + /* If founderr flag is set, this is our first error. So set + * the error flag, reset the increment counter to 0, and + * read more data from the stream if necessary + */ if (!founderr) { - // FIXME: errpos not correct - errpos = inbuf_size - d_stream.avail_in; founderr = 1; + errpos = bytes_read - d_stream.avail_in; + + if (verbose_mode) + fprintf(stderr, "Found error at byte %d in input stream\n", + (int)(readpos - (bytes_read - errpos))); if (d_stream.avail_in == 0) { @@ -277,47 +315,67 @@ if( bytes_read == -1 ){ throw_error("read") } if (bytes_read == 0) break; + readpos += bytes_read; errinc = 0; inflateEnd(&d_stream); init_zlib(&d_stream, inbuf, bytes_read); continue; } - - if (verbose_mode) - fprintf(stdout, "Found error at byte %d in input stream\n", - (int)errpos); } + /* Note that we fall through to here from above unless we + * had to do a re-read n the stream. Set the increment the + * error increment counter, then re-initialize zlib from + * the point of the original error + the value of the increment + * counter (which starts out at 1). Each time through we keep + * incrementing one more byte through the buffer until we + * either find a good byte, or exhaust it and have to re-read. + */ inflateEnd(&d_stream); ++errinc; - if( inbuf_size > (size_t)(errpos+errinc) ) + + /* More left to try in our buffer */ + if (bytes_read > (size_t)(errpos+errinc) ) { - init_zlib(&d_stream, inbuf+errpos+errinc, inbuf_size - (errpos+errinc)); + init_zlib(&d_stream, inbuf+errpos+errinc, bytes_read - (errpos+errinc)); } + /* Nothing left in our buffer - read again */ else { bytes_read = read_internal(ifd, inbuf, inbuf_size); if( bytes_read == -1 ){ throw_error("read") } if (bytes_read == 0) break; + readpos += bytes_read; - errinc = 0; inflateEnd(&d_stream); init_zlib(&d_stream, inbuf, bytes_read); + + /* Reset errpos and errinc to zero, but leave the founderr + flag as true */ + errpos = 0; + errinc = 0; } continue; } + /* If we make it here, we were able to decompress data. If the + * founderr flag says we were previously in an error state, that means + * we are starting to decode again after bypassing a region of + * corruption. Reset the various flags and counters. If we are in + * split mode, open the next increment of output files. + */ if (founderr & !foundgood) { foundgood = 1; founderr = 0; errinc = 0; + if (verbose_mode) - fprintf(stdout, "Found good data at byte %d in input stream\n", - (int)(errpos + errinc)); + fprintf(stderr, "Found good data at byte %d in input stream\n", + (int)(readpos - (bytes_read - d_stream.avail_in))); if (split_mode) { @@ -327,16 +385,18 @@ } /* Write decompressed output - should really handle short write counts */ - if( -1 == write(ofd, outbuf, outbuf_size - d_stream.avail_out) ){ throw_error("write") } - fsync(ofd); + rc2 = write(ofd, outbuf, outbuf_size - d_stream.avail_out); + if ( rc2 == -1 ){ throw_error("write") } + tot_written += rc2; /* We've exhausted our input buffer, read some more */ if (d_stream.avail_in == 0) { bytes_read = read_internal(ifd, inbuf, inbuf_size); - if( bytes_read == -1 ){ perror("read"); exit(1); } + if( bytes_read == -1 ){ throw_error("read"); } if (bytes_read == 0) break; + readpos += bytes_read; errinc = 0; d_stream.next_in = inbuf; @@ -344,15 +404,13 @@ } /* In we get a false alarm on end of file, we need to handle that to. - * Reset to one byte past where it occurs */ + * Reset to one byte past where it occurs. This seems to happen + * quite a bit + */ if (rc == Z_STREAM_END) { off_t tmppos = d_stream.avail_in; - if (verbose_mode) - fprintf(stdout, "Premature end of stream at %zd\n", - inbuf_size - d_stream.avail_in); - inflateEnd(&d_stream); if ((unsigned char *)d_stream.next_in == inbuf) { @@ -375,8 +433,8 @@ close(ifd); if (verbose_mode) - fprintf(stdout, "Total decompressed output = %d bytes\n", - (int)d_stream.total_out); + fprintf(stderr, "Total decompressed output = %d bytes\n", + (int)tot_written); free(inbuf); free(outbuf);