diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/AUTHORS golang-github-golang-snappy-0.0.2/AUTHORS
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/AUTHORS	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/AUTHORS	2020-07-07 13:17:29.000000000 +0000
@@ -8,8 +8,10 @@
 
 # Please keep the list sorted.
 
+Amazon.com, Inc
 Damian Gryski <dgryski@gmail.com>
 Google Inc.
 Jan Mercl <0xjnml@gmail.com>
+Klaus Post <klauspost@gmail.com>
 Rodolfo Carvalho <rhcarvalho@gmail.com>
 Sebastien Binet <seb.binet@gmail.com>
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/cmd/snappytool/main.cpp golang-github-golang-snappy-0.0.2/cmd/snappytool/main.cpp
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/cmd/snappytool/main.cpp	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/cmd/snappytool/main.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,77 +0,0 @@
-/*
-To build the snappytool binary:
-g++ main.cpp /usr/lib/libsnappy.a -o snappytool
-or, if you have built the C++ snappy library from source:
-g++ main.cpp /path/to/your/snappy/.libs/libsnappy.a -o snappytool
-after running "make" from your snappy checkout directory.
-*/
-
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "snappy.h"
-
-#define N 1000000
-
-char dst[N];
-char src[N];
-
-int main(int argc, char** argv) {
-  // Parse args.
-  if (argc != 2) {
-    fprintf(stderr, "exactly one of -d or -e must be given\n");
-    return 1;
-  }
-  bool decode = strcmp(argv[1], "-d") == 0;
-  bool encode = strcmp(argv[1], "-e") == 0;
-  if (decode == encode) {
-    fprintf(stderr, "exactly one of -d or -e must be given\n");
-    return 1;
-  }
-
-  // Read all of stdin into src[:s].
-  size_t s = 0;
-  while (1) {
-    if (s == N) {
-      fprintf(stderr, "input too large\n");
-      return 1;
-    }
-    ssize_t n = read(0, src+s, N-s);
-    if (n == 0) {
-      break;
-    }
-    if (n < 0) {
-      fprintf(stderr, "read error: %s\n", strerror(errno));
-      // TODO: handle EAGAIN, EINTR?
-      return 1;
-    }
-    s += n;
-  }
-
-  // Encode or decode src[:s] to dst[:d], and write to stdout.
-  size_t d = 0;
-  if (encode) {
-    if (N < snappy::MaxCompressedLength(s)) {
-      fprintf(stderr, "input too large after encoding\n");
-      return 1;
-    }
-    snappy::RawCompress(src, s, dst, &d);
-  } else {
-    if (!snappy::GetUncompressedLength(src, s, &d)) {
-      fprintf(stderr, "could not get uncompressed length\n");
-      return 1;
-    }
-    if (N < d) {
-      fprintf(stderr, "input too large after decoding\n");
-      return 1;
-    }
-    if (!snappy::RawUncompress(src, s, dst)) {
-      fprintf(stderr, "input was not valid Snappy-compressed data\n");
-      return 1;
-    }
-  }
-  write(1, dst, d);
-  return 0;
-}
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/cmd/snappytool/main.go golang-github-golang-snappy-0.0.2/cmd/snappytool/main.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/cmd/snappytool/main.go	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/cmd/snappytool/main.go	2020-07-07 13:17:29.000000000 +0000
@@ -0,0 +1,46 @@
+package main
+
+import (
+	"errors"
+	"flag"
+	"io/ioutil"
+	"os"
+
+	"github.com/golang/snappy"
+)
+
+var (
+	decode = flag.Bool("d", false, "decode")
+	encode = flag.Bool("e", false, "encode")
+)
+
+func run() error {
+	flag.Parse()
+	if *decode == *encode {
+		return errors.New("exactly one of -d or -e must be given")
+	}
+
+	in, err := ioutil.ReadAll(os.Stdin)
+	if err != nil {
+		return err
+	}
+
+	out := []byte(nil)
+	if *decode {
+		out, err = snappy.Decode(nil, in)
+		if err != nil {
+			return err
+		}
+	} else {
+		out = snappy.Encode(nil, in)
+	}
+	_, err = os.Stdout.Write(out)
+	return err
+}
+
+func main() {
+	if err := run(); err != nil {
+		os.Stderr.WriteString(err.Error() + "\n")
+		os.Exit(1)
+	}
+}
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/CONTRIBUTORS golang-github-golang-snappy-0.0.2/CONTRIBUTORS
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/CONTRIBUTORS	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/CONTRIBUTORS	2020-07-07 13:17:29.000000000 +0000
@@ -28,7 +28,9 @@
 
 Damian Gryski <dgryski@gmail.com>
 Jan Mercl <0xjnml@gmail.com>
+Jonathan Swinney <jswinney@amazon.com>
 Kai Backman <kaib@golang.org>
+Klaus Post <klauspost@gmail.com>
 Marc-Antoine Ruel <maruel@chromium.org>
 Nigel Tao <nigeltao@golang.org>
 Rob Pike <r@golang.org>
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/changelog golang-github-golang-snappy-0.0.2/debian/changelog
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/changelog	2018-11-30 19:04:33.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/debian/changelog	2021-01-31 14:45:02.000000000 +0000
@@ -1,3 +1,33 @@
+golang-github-golang-snappy (0.0.2-2) unstable; urgency=medium
+
+  * Team upload.
+  * Fix failure on go1.16 and 32bit system
+    + Backport upstream patch for go1.16
+    + Skip failed test on 32bit system
+      The new test doesn't pass on old version too.
+
+ -- Shengjing Zhu <zhsj@debian.org>  Sun, 31 Jan 2021 22:45:02 +0800
+
+golang-github-golang-snappy (0.0.2-1) unstable; urgency=medium
+
+  * Team upload.
+
+  [ Debian Janitor ]
+  * Apply multi-arch hints.
+    + golang-github-golang-snappy-dev: Add Multi-Arch: foreign.
+
+  [ Shengjing Zhu ]
+  * New upstream version
+  * Add uscan watch file
+  * Update Section to golang
+  * Bump debhelper-compat to 13
+  * Update Standards-Version to 4.5.1 (no changes)
+  * Add Rules-Requires-Root
+  * Add autopkgtest-go
+  * Remove unused dpkg source local-options
+
+ -- Shengjing Zhu <zhsj@debian.org>  Sun, 31 Jan 2021 04:54:08 +0800
+
 golang-github-golang-snappy (0.0+git20160529.d9eb7a3-3) unstable; urgency=medium
 
   * Team upload.
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/compat golang-github-golang-snappy-0.0.2/debian/compat
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/compat	2018-11-30 19:03:40.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/debian/compat	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-11
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/control golang-github-golang-snappy-0.0.2/debian/control
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/control	2018-11-30 19:03:44.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/debian/control	2021-01-31 14:45:02.000000000 +0000
@@ -1,25 +1,28 @@
 Source: golang-github-golang-snappy
-Section: devel
+Section: golang
 Priority: optional
 Maintainer: Debian Go Packaging Team <team+pkg-go@tracker.debian.org>
 Uploaders: Hilko Bengen <bengen@debian.org>,
-           Martín Ferrari <tincho@debian.org>
-Build-Depends: debhelper (>= 11~),
+           Martín Ferrari <tincho@debian.org>,
+Build-Depends: debhelper-compat (= 13),
                dh-golang (>= 1.17~),
                golang-any,
-Standards-Version: 4.2.1
+Standards-Version: 4.5.1
 Homepage: https://github.com/golang/snappy
 Vcs-Browser: https://salsa.debian.org/go-team/packages/golang-github-golang-snappy
 Vcs-Git: https://salsa.debian.org/go-team/packages/golang-github-golang-snappy.git
 XS-Go-Import-Path: github.com/golang/snappy
+Rules-Requires-Root: no
+Testsuite: autopkgtest-pkg-go
 
 Package: golang-github-golang-snappy-dev
 Architecture: all
 Depends: ${misc:Depends},
          ${shlibs:Depends},
-Replaces: golang-snappy-go-dev (<< 0.0+git20150730.723cc1e-3~)
-Breaks: golang-snappy-go-dev (<< 0.0+git20150730.723cc1e-3~)
-Provides: golang-snappy-go-dev
+Replaces: golang-snappy-go-dev (<< 0.0+git20150730.723cc1e-3~),
+Breaks: golang-snappy-go-dev (<< 0.0+git20150730.723cc1e-3~),
+Provides: golang-snappy-go-dev,
+Multi-Arch: foreign
 Description: Implementation of the Snappy compression format in Go
  Snappy is a compression/decompression library. It does not aim for maximum
  compression, or compatibility with any other compression library; instead, it
@@ -36,7 +39,8 @@
 Package: golang-snappy-go-dev
 Section: oldlibs
 Architecture: all
-Depends: ${misc:Depends}, golang-github-golang-snappy-dev
+Depends: golang-github-golang-snappy-dev,
+         ${misc:Depends},
 Description: Transitional package for golang-github-golang-snappy-dev
  This is a transitional package to ease upgrades to the
  golang-github-golang-snappy-dev package. It can safely be removed.
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/patches/0001-Backport-go1.16-fix-on-arm64.patch golang-github-golang-snappy-0.0.2/debian/patches/0001-Backport-go1.16-fix-on-arm64.patch
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/patches/0001-Backport-go1.16-fix-on-arm64.patch	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/debian/patches/0001-Backport-go1.16-fix-on-arm64.patch	2021-01-31 14:45:02.000000000 +0000
@@ -0,0 +1,267 @@
+From: Shengjing Zhu <zhsj@debian.org>
+Date: Sun, 31 Jan 2021 22:33:53 +0800
+Subject: Backport go1.16 fix on arm64
+
+Origin: backport, https://github.com/golang/snappy/pull/56
+---
+ decode_arm64.s | 45 +++++++++++++-------------------
+ encode_arm64.s | 81 +++++++++++++++++++++++++++-------------------------------
+ 2 files changed, 55 insertions(+), 71 deletions(-)
+
+diff --git a/decode_arm64.s b/decode_arm64.s
+index bfafa0c..7a3ead1 100644
+--- a/decode_arm64.s
++++ b/decode_arm64.s
+@@ -70,7 +70,7 @@ loop:
+ 	// x := uint32(src[s] >> 2)
+ 	// switch
+ 	MOVW $60, R1
+-	ADD  R4>>2, ZR, R4
++	LSRW $2, R4, R4
+ 	CMPW R4, R1
+ 	BLS  tagLit60Plus
+ 
+@@ -111,13 +111,12 @@ doLit:
+ 	// is contiguous in memory and so it needs to leave enough source bytes to
+ 	// read the next tag without refilling buffers, but Go's Decode assumes
+ 	// contiguousness (the src argument is a []byte).
+-	MOVD $16, R1
+-	CMP  R1, R4
+-	BGT  callMemmove
+-	CMP  R1, R2
+-	BLT  callMemmove
+-	CMP  R1, R3
+-	BLT  callMemmove
++	CMP $16, R4
++	BGT callMemmove
++	CMP $16, R2
++	BLT callMemmove
++	CMP $16, R3
++	BLT callMemmove
+ 
+ 	// !!! Implement the copy from src to dst as a 16-byte load and store.
+ 	// (Decode's documentation says that dst and src must not overlap.)
+@@ -130,9 +129,8 @@ doLit:
+ 	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
+ 	// 16-byte loads and stores. This technique probably wouldn't be as
+ 	// effective on architectures that are fussier about alignment.
+-
+-	VLD1 0(R6), [V0.B16]
+-	VST1 [V0.B16], 0(R7)
++	LDP 0(R6), (R14, R15)
++	STP (R14, R15), 0(R7)
+ 
+ 	// d += length
+ 	// s += length
+@@ -210,8 +208,7 @@ tagLit61:
+ 	B     doLit
+ 
+ tagLit62Plus:
+-	MOVW $62, R1
+-	CMPW R1, R4
++	CMPW $62, R4
+ 	BHI  tagLit63
+ 
+ 	// case x == 62:
+@@ -273,10 +270,9 @@ tagCopy:
+ 	// We have a copy tag. We assume that:
+ 	//	- R3 == src[s] & 0x03
+ 	//	- R4 == src[s]
+-	MOVD $2, R1
+-	CMP  R1, R3
+-	BEQ  tagCopy2
+-	BGT  tagCopy4
++	CMP $2, R3
++	BEQ tagCopy2
++	BGT tagCopy4
+ 
+ 	// case tagCopy1:
+ 	// s += 2
+@@ -346,13 +342,11 @@ doCopy:
+ 	// }
+ 	// copy 16 bytes
+ 	// d += length
+-	MOVD $16, R1
+-	MOVD $8, R0
+-	CMP  R1, R4
++	CMP  $16, R4
+ 	BGT  slowForwardCopy
+-	CMP  R0, R5
++	CMP  $8, R5
+ 	BLT  slowForwardCopy
+-	CMP  R1, R14
++	CMP  $16, R14
+ 	BLT  slowForwardCopy
+ 	MOVD 0(R15), R2
+ 	MOVD R2, 0(R7)
+@@ -426,8 +420,7 @@ makeOffsetAtLeast8:
+ 	//   // The two previous lines together means that d-offset, and therefore
+ 	//   // R15, is unchanged.
+ 	// }
+-	MOVD $8, R1
+-	CMP  R1, R5
++	CMP  $8, R5
+ 	BGE  fixUpSlowForwardCopy
+ 	MOVD (R15), R3
+ 	MOVD R3, (R7)
+@@ -477,9 +470,7 @@ verySlowForwardCopy:
+ 	ADD  $1, R15, R15
+ 	ADD  $1, R7, R7
+ 	SUB  $1, R4, R4
+-	MOVD $0, R1
+-	CMP  R1, R4
+-	BNE  verySlowForwardCopy
++	CBNZ R4, verySlowForwardCopy
+ 	B    loop
+ 
+ 	// The code above handles copy tags.
+diff --git a/encode_arm64.s b/encode_arm64.s
+index 1f565ee..bf83667 100644
+--- a/encode_arm64.s
++++ b/encode_arm64.s
+@@ -35,11 +35,9 @@ TEXT ·emitLiteral(SB), NOSPLIT, $32-56
+ 	MOVW R3, R4
+ 	SUBW $1, R4, R4
+ 
+-	MOVW $60, R2
+-	CMPW R2, R4
++	CMPW $60, R4
+ 	BLT  oneByte
+-	MOVW $256, R2
+-	CMPW R2, R4
++	CMPW $256, R4
+ 	BLT  twoBytes
+ 
+ threeBytes:
+@@ -98,8 +96,7 @@ TEXT ·emitCopy(SB), NOSPLIT, $0-48
+ 
+ loop0:
+ 	// for length >= 68 { etc }
+-	MOVW $68, R2
+-	CMPW R2, R3
++	CMPW $68, R3
+ 	BLT  step1
+ 
+ 	// Emit a length 64 copy, encoded as 3 bytes.
+@@ -112,9 +109,8 @@ loop0:
+ 
+ step1:
+ 	// if length > 64 { etc }
+-	MOVD $64, R2
+-	CMP  R2, R3
+-	BLE  step2
++	CMP $64, R3
++	BLE step2
+ 
+ 	// Emit a length 60 copy, encoded as 3 bytes.
+ 	MOVD $0xee, R2
+@@ -125,11 +121,9 @@ step1:
+ 
+ step2:
+ 	// if length >= 12 || offset >= 2048 { goto step3 }
+-	MOVD $12, R2
+-	CMP  R2, R3
++	CMP  $12, R3
+ 	BGE  step3
+-	MOVW $2048, R2
+-	CMPW R2, R11
++	CMPW $2048, R11
+ 	BGE  step3
+ 
+ 	// Emit the remaining copy, encoded as 2 bytes.
+@@ -295,27 +289,24 @@ varTable:
+ 	// var table [maxTableSize]uint16
+ 	//
+ 	// In the asm code, unlike the Go code, we can zero-initialize only the
+-	// first tableSize elements. Each uint16 element is 2 bytes and each VST1
+-	// writes 64 bytes, so we can do only tableSize/32 writes instead of the
+-	// 2048 writes that would zero-initialize all of table's 32768 bytes.
+-	// This clear could overrun the first tableSize elements, but it won't
+-	// overrun the allocated stack size.
++	// first tableSize elements. Each uint16 element is 2 bytes and each
++	// iterations writes 64 bytes, so we can do only tableSize/32 writes
++	// instead of the 2048 writes that would zero-initialize all of table's
++	// 32768 bytes. This clear could overrun the first tableSize elements, but
++	// it won't overrun the allocated stack size.
+ 	ADD  $128, RSP, R17
+ 	MOVD R17, R4
+ 
+ 	// !!! R6 = &src[tableSize]
+ 	ADD R6<<1, R17, R6
+ 
+-	// zero the SIMD registers
+-	VEOR V0.B16, V0.B16, V0.B16
+-	VEOR V1.B16, V1.B16, V1.B16
+-	VEOR V2.B16, V2.B16, V2.B16
+-	VEOR V3.B16, V3.B16, V3.B16
+-
+ memclr:
+-	VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(R4)
+-	CMP    R4, R6
+-	BHI    memclr
++	STP.P (ZR, ZR), 64(R4)
++	STP   (ZR, ZR), -48(R4)
++	STP   (ZR, ZR), -32(R4)
++	STP   (ZR, ZR), -16(R4)
++	CMP   R4, R6
++	BHI   memclr
+ 
+ 	// !!! R6 = &src[0]
+ 	MOVD R7, R6
+@@ -404,8 +395,7 @@ fourByteMatch:
+ 	// on inputMargin in encode.go.
+ 	MOVD R7, R3
+ 	SUB  R10, R3, R3
+-	MOVD $16, R2
+-	CMP  R2, R3
++	CMP  $16, R3
+ 	BLE  emitLiteralFastPath
+ 
+ 	// ----------------------------------------
+@@ -454,18 +444,21 @@ inlineEmitLiteralMemmove:
+ 	MOVD R3, 24(RSP)
+ 
+ 	// Finish the "d +=" part of "d += emitLiteral(etc)".
+-	ADD  R3, R8, R8
+-	MOVD R7, 80(RSP)
+-	MOVD R8, 88(RSP)
+-	MOVD R15, 120(RSP)
+-	CALL runtime·memmove(SB)
+-	MOVD 64(RSP), R5
+-	MOVD 72(RSP), R6
+-	MOVD 80(RSP), R7
+-	MOVD 88(RSP), R8
+-	MOVD 96(RSP), R9
+-	MOVD 120(RSP), R15
+-	B    inner1
++	ADD   R3, R8, R8
++	MOVD  R7, 80(RSP)
++	MOVD  R8, 88(RSP)
++	MOVD  R15, 120(RSP)
++	CALL  runtime·memmove(SB)
++	MOVD  64(RSP), R5
++	MOVD  72(RSP), R6
++	MOVD  80(RSP), R7
++	MOVD  88(RSP), R8
++	MOVD  96(RSP), R9
++	MOVD  120(RSP), R15
++	ADD   $128, RSP, R17
++	MOVW  $0xa7bd, R16
++	MOVKW $(0x1e35<<16), R16
++	B     inner1
+ 
+ inlineEmitLiteralEnd:
+ 	// End inline of the emitLiteral call.
+@@ -489,9 +482,9 @@ emitLiteralFastPath:
+ 	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
+ 	// 16-byte loads and stores. This technique probably wouldn't be as
+ 	// effective on architectures that are fussier about alignment.
+-	VLD1 0(R10), [V0.B16]
+-	VST1 [V0.B16], 0(R8)
+-	ADD  R3, R8, R8
++	LDP 0(R10), (R0, R1)
++	STP (R0, R1), 0(R8)
++	ADD R3, R8, R8
+ 
+ inner1:
+ 	// for { etc }
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/patches/0002-Skip-failed-test-on-32bit-system.patch golang-github-golang-snappy-0.0.2/debian/patches/0002-Skip-failed-test-on-32bit-system.patch
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/patches/0002-Skip-failed-test-on-32bit-system.patch	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/debian/patches/0002-Skip-failed-test-on-32bit-system.patch	2021-01-31 14:45:02.000000000 +0000
@@ -0,0 +1,51 @@
+From: Shengjing Zhu <zhsj@debian.org>
+Date: Sun, 31 Jan 2021 22:41:42 +0800
+Subject: Skip failed test on 32bit system
+
+The test doesn't pass on old version too, not a regression.
+
+Bug: https://github.com/golang/snappy/issues/58
+---
+ snappy_test.go | 20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/snappy_test.go b/snappy_test.go
+index d7c3ea6..c310ba1 100644
+--- a/snappy_test.go
++++ b/snappy_test.go
+@@ -19,6 +19,7 @@ import (
+ 	"runtime"
+ 	"strings"
+ 	"testing"
++	"unsafe"
+ )
+ 
+ var (
+@@ -316,13 +317,22 @@ func TestDecode(t *testing.T) {
+ 		"\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00",
+ 		"abcdbc",
+ 		nil,
+-	}, {
+-		`decodedLen=0; tagCopy4, 4 extra length|offset bytes; with msb set (0x93); discovered by go-fuzz`,
+-		"\x00\xfc000\x93",
+-		"",
+-		ErrCorrupt,
+ 	}}
+ 
++	if unsafe.Sizeof(int(0)) == 8 {
++		testCases = append(testCases, struct {
++			desc    string
++			input   string
++			want    string
++			wantErr error
++		}{
++			`decodedLen=0; tagCopy4, 4 extra length|offset bytes; with msb set (0x93); discovered by go-fuzz`,
++			"\x00\xfc000\x93",
++			"",
++			ErrCorrupt,
++		})
++	}
++
+ 	const (
+ 		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
+ 		// not present in either the input or the output. It is written to dBuf
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/patches/series golang-github-golang-snappy-0.0.2/debian/patches/series
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/patches/series	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/debian/patches/series	2021-01-31 14:45:02.000000000 +0000
@@ -0,0 +1,2 @@
+0001-Backport-go1.16-fix-on-arm64.patch
+0002-Skip-failed-test-on-32bit-system.patch
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/rules golang-github-golang-snappy-0.0.2/debian/rules
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/rules	2018-11-30 19:03:24.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/debian/rules	2021-01-31 14:45:02.000000000 +0000
@@ -1,16 +1,6 @@
 #!/usr/bin/make -f
 
-export DH_GOLANG_INSTALL_EXTRA = testdata
+export DH_GOLANG_EXCLUDES := cmd/snappytool
 
 %:
 	dh $@ --buildsystem=golang --with=golang
-
-DEBVERS    ?= $(shell dpkg-parsechangelog | sed -n -e 's/^Version: //p')
-VERSION    ?= $(shell echo '$(DEBVERS)' | sed 's/^[[:digit:]]*://; s/[-].*//')
-DEBPKGNAME ?= $(shell dpkg-parsechangelog | sed -n -e 's/^Source: //p')
-
-gen-orig-tgz:
-	if [ ! -f ../$(DEBPKGNAME)_$(VERSION).orig.tar.gz ] ; then \
-            git archive --format=tar.gz --prefix=$(DEBPKGNAME)-$(VERSION)/ \
-                upstream/$(VERSION) >../$(DEBPKGNAME)_$(VERSION).orig.tar.gz ; \
-        fi
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/watch golang-github-golang-snappy-0.0.2/debian/watch
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/debian/watch	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/debian/watch	2021-01-31 14:45:02.000000000 +0000
@@ -0,0 +1,4 @@
+version=4
+opts="mode=git, pgpmode=none" \
+  https://github.com/golang/snappy \
+  refs/tags/v?@ANY_VERSION@ debian
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode_amd64.go golang-github-golang-snappy-0.0.2/decode_amd64.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode_amd64.go	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/decode_amd64.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,14 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-package snappy
-
-// decode has the same semantics as in decode_other.go.
-//
-//go:noescape
-func decode(dst, src []byte) int
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode_arm64.s golang-github-golang-snappy-0.0.2/decode_arm64.s
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode_arm64.s	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/decode_arm64.s	2020-07-07 13:17:29.000000000 +0000
@@ -0,0 +1,503 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The asm code generally follows the pure Go code in decode_other.go, except
+// where marked with a "!!!".
+
+// func decode(dst, src []byte) int
+//
+// All local variables fit into registers. The non-zero stack size is only to
+// spill registers and push args when issuing a CALL. The register allocation:
+//	- R2	scratch
+//	- R3	scratch
+//	- R4	length or x
+//	- R5	offset
+//	- R6	&src[s]
+//	- R7	&dst[d]
+//	+ R8	dst_base
+//	+ R9	dst_len
+//	+ R10	dst_base + dst_len
+//	+ R11	src_base
+//	+ R12	src_len
+//	+ R13	src_base + src_len
+//	- R14	used by doCopy
+//	- R15	used by doCopy
+//
+// The registers R8-R13 (marked with a "+") are set at the start of the
+// function, and after a CALL returns, and are not otherwise modified.
+//
+// The d variable is implicitly R7 - R8,  and len(dst)-d is R10 - R7.
+// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6.
+TEXT ·decode(SB), NOSPLIT, $56-56
+	// Initialize R6, R7 and R8-R13.
+	MOVD dst_base+0(FP), R8
+	MOVD dst_len+8(FP), R9
+	MOVD R8, R7
+	MOVD R8, R10
+	ADD  R9, R10, R10
+	MOVD src_base+24(FP), R11
+	MOVD src_len+32(FP), R12
+	MOVD R11, R6
+	MOVD R11, R13
+	ADD  R12, R13, R13
+
+loop:
+	// for s < len(src)
+	CMP R13, R6
+	BEQ end
+
+	// R4 = uint32(src[s])
+	//
+	// switch src[s] & 0x03
+	MOVBU (R6), R4
+	MOVW  R4, R3
+	ANDW  $3, R3
+	MOVW  $1, R1
+	CMPW  R1, R3
+	BGE   tagCopy
+
+	// ----------------------------------------
+	// The code below handles literal tags.
+
+	// case tagLiteral:
+	// x := uint32(src[s] >> 2)
+	// switch
+	MOVW $60, R1
+	ADD  R4>>2, ZR, R4
+	CMPW R4, R1
+	BLS  tagLit60Plus
+
+	// case x < 60:
+	// s++
+	ADD $1, R6, R6
+
+doLit:
+	// This is the end of the inner "switch", when we have a literal tag.
+	//
+	// We assume that R4 == x and x fits in a uint32, where x is the variable
+	// used in the pure Go decode_other.go code.
+
+	// length = int(x) + 1
+	//
+	// Unlike the pure Go code, we don't need to check if length <= 0 because
+	// R4 can hold 64 bits, so the increment cannot overflow.
+	ADD $1, R4, R4
+
+	// Prepare to check if copying length bytes will run past the end of dst or
+	// src.
+	//
+	// R2 = len(dst) - d
+	// R3 = len(src) - s
+	MOVD R10, R2
+	SUB  R7, R2, R2
+	MOVD R13, R3
+	SUB  R6, R3, R3
+
+	// !!! Try a faster technique for short (16 or fewer bytes) copies.
+	//
+	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
+	//   goto callMemmove // Fall back on calling runtime·memmove.
+	// }
+	//
+	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
+	// against 21 instead of 16, because it cannot assume that all of its input
+	// is contiguous in memory and so it needs to leave enough source bytes to
+	// read the next tag without refilling buffers, but Go's Decode assumes
+	// contiguousness (the src argument is a []byte).
+	MOVD $16, R1
+	CMP  R1, R4
+	BGT  callMemmove
+	CMP  R1, R2
+	BLT  callMemmove
+	CMP  R1, R3
+	BLT  callMemmove
+
+	// !!! Implement the copy from src to dst as a 16-byte load and store.
+	// (Decode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only length bytes, but that's
+	// OK. If the input is a valid Snappy encoding then subsequent iterations
+	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
+	// non-nil error), so the overrun will be ignored.
+	//
+	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+
+	VLD1 0(R6), [V0.B16]
+	VST1 [V0.B16], 0(R7)
+
+	// d += length
+	// s += length
+	ADD R4, R7, R7
+	ADD R4, R6, R6
+	B   loop
+
+callMemmove:
+	// if length > len(dst)-d || length > len(src)-s { etc }
+	CMP R2, R4
+	BGT errCorrupt
+	CMP R3, R4
+	BGT errCorrupt
+
+	// copy(dst[d:], src[s:s+length])
+	//
+	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
+	// R7, R6 and R4 as arguments. Coincidentally, we also need to spill those
+	// three registers to the stack, to save local variables across the CALL.
+	MOVD R7, 8(RSP)
+	MOVD R6, 16(RSP)
+	MOVD R4, 24(RSP)
+	MOVD R7, 32(RSP)
+	MOVD R6, 40(RSP)
+	MOVD R4, 48(RSP)
+	CALL runtime·memmove(SB)
+
+	// Restore local variables: unspill registers from the stack and
+	// re-calculate R8-R13.
+	MOVD 32(RSP), R7
+	MOVD 40(RSP), R6
+	MOVD 48(RSP), R4
+	MOVD dst_base+0(FP), R8
+	MOVD dst_len+8(FP), R9
+	MOVD R8, R10
+	ADD  R9, R10, R10
+	MOVD src_base+24(FP), R11
+	MOVD src_len+32(FP), R12
+	MOVD R11, R13
+	ADD  R12, R13, R13
+
+	// d += length
+	// s += length
+	ADD R4, R7, R7
+	ADD R4, R6, R6
+	B   loop
+
+tagLit60Plus:
+	// !!! This fragment does the
+	//
+	// s += x - 58; if uint(s) > uint(len(src)) { etc }
+	//
+	// checks. In the asm version, we code it once instead of once per switch case.
+	ADD  R4, R6, R6
+	SUB  $58, R6, R6
+	MOVD R6, R3
+	SUB  R11, R3, R3
+	CMP  R12, R3
+	BGT  errCorrupt
+
+	// case x == 60:
+	MOVW $61, R1
+	CMPW R1, R4
+	BEQ  tagLit61
+	BGT  tagLit62Plus
+
+	// x = uint32(src[s-1])
+	MOVBU -1(R6), R4
+	B     doLit
+
+tagLit61:
+	// case x == 61:
+	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
+	MOVHU -2(R6), R4
+	B     doLit
+
+tagLit62Plus:
+	MOVW $62, R1
+	CMPW R1, R4
+	BHI  tagLit63
+
+	// case x == 62:
+	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+	MOVHU -3(R6), R4
+	MOVBU -1(R6), R3
+	ORR   R3<<16, R4
+	B     doLit
+
+tagLit63:
+	// case x == 63:
+	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+	MOVWU -4(R6), R4
+	B     doLit
+
+	// The code above handles literal tags.
+	// ----------------------------------------
+	// The code below handles copy tags.
+
+tagCopy4:
+	// case tagCopy4:
+	// s += 5
+	ADD $5, R6, R6
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVD R6, R3
+	SUB  R11, R3, R3
+	CMP  R12, R3
+	BGT  errCorrupt
+
+	// length = 1 + int(src[s-5])>>2
+	MOVD $1, R1
+	ADD  R4>>2, R1, R4
+
+	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+	MOVWU -4(R6), R5
+	B     doCopy
+
+tagCopy2:
+	// case tagCopy2:
+	// s += 3
+	ADD $3, R6, R6
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVD R6, R3
+	SUB  R11, R3, R3
+	CMP  R12, R3
+	BGT  errCorrupt
+
+	// length = 1 + int(src[s-3])>>2
+	MOVD $1, R1
+	ADD  R4>>2, R1, R4
+
+	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+	MOVHU -2(R6), R5
+	B     doCopy
+
+tagCopy:
+	// We have a copy tag. We assume that:
+	//	- R3 == src[s] & 0x03
+	//	- R4 == src[s]
+	MOVD $2, R1
+	CMP  R1, R3
+	BEQ  tagCopy2
+	BGT  tagCopy4
+
+	// case tagCopy1:
+	// s += 2
+	ADD $2, R6, R6
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVD R6, R3
+	SUB  R11, R3, R3
+	CMP  R12, R3
+	BGT  errCorrupt
+
+	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+	MOVD  R4, R5
+	AND   $0xe0, R5
+	MOVBU -1(R6), R3
+	ORR   R5<<3, R3, R5
+
+	// length = 4 + int(src[s-2])>>2&0x7
+	MOVD $7, R1
+	AND  R4>>2, R1, R4
+	ADD  $4, R4, R4
+
+doCopy:
+	// This is the end of the outer "switch", when we have a copy tag.
+	//
+	// We assume that:
+	//	- R4 == length && R4 > 0
+	//	- R5 == offset
+
+	// if offset <= 0 { etc }
+	MOVD $0, R1
+	CMP  R1, R5
+	BLE  errCorrupt
+
+	// if d < offset { etc }
+	MOVD R7, R3
+	SUB  R8, R3, R3
+	CMP  R5, R3
+	BLT  errCorrupt
+
+	// if length > len(dst)-d { etc }
+	MOVD R10, R3
+	SUB  R7, R3, R3
+	CMP  R3, R4
+	BGT  errCorrupt
+
+	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
+	//
+	// Set:
+	//	- R14 = len(dst)-d
+	//	- R15 = &dst[d-offset]
+	MOVD R10, R14
+	SUB  R7, R14, R14
+	MOVD R7, R15
+	SUB  R5, R15, R15
+
+	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
+	//
+	// First, try using two 8-byte load/stores, similar to the doLit technique
+	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
+	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
+	// and not one 16-byte load/store, and the first store has to be before the
+	// second load, due to the overlap if offset is in the range [8, 16).
+	//
+	// if length > 16 || offset < 8 || len(dst)-d < 16 {
+	//   goto slowForwardCopy
+	// }
+	// copy 16 bytes
+	// d += length
+	MOVD $16, R1
+	MOVD $8, R0
+	CMP  R1, R4
+	BGT  slowForwardCopy
+	CMP  R0, R5
+	BLT  slowForwardCopy
+	CMP  R1, R14
+	BLT  slowForwardCopy
+	MOVD 0(R15), R2
+	MOVD R2, 0(R7)
+	MOVD 8(R15), R3
+	MOVD R3, 8(R7)
+	ADD  R4, R7, R7
+	B    loop
+
+slowForwardCopy:
+	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
+	// can still try 8-byte load stores, provided we can overrun up to 10 extra
+	// bytes. As above, the overrun will be fixed up by subsequent iterations
+	// of the outermost loop.
+	//
+	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
+	// commentary says:
+	//
+	// ----
+	//
+	// The main part of this loop is a simple copy of eight bytes at a time
+	// until we've copied (at least) the requested amount of bytes.  However,
+	// if d and d-offset are less than eight bytes apart (indicating a
+	// repeating pattern of length < 8), we first need to expand the pattern in
+	// order to get the correct results. For instance, if the buffer looks like
+	// this, with the eight-byte <d-offset> and <d> patterns marked as
+	// intervals:
+	//
+	//    abxxxxxxxxxxxx
+	//    [------]           d-offset
+	//      [------]         d
+	//
+	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
+	// once, after which we can move <d> two bytes without moving <d-offset>:
+	//
+	//    ababxxxxxxxxxx
+	//    [------]           d-offset
+	//        [------]       d
+	//
+	// and repeat the exercise until the two no longer overlap.
+	//
+	// This allows us to do very well in the special case of one single byte
+	// repeated many times, without taking a big hit for more general cases.
+	//
+	// The worst case of extra writing past the end of the match occurs when
+	// offset == 1 and length == 1; the last copy will read from byte positions
+	// [0..7] and write to [4..11], whereas it was only supposed to write to
+	// position 1. Thus, ten excess bytes.
+	//
+	// ----
+	//
+	// That "10 byte overrun" worst case is confirmed by Go's
+	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
+	// and finishSlowForwardCopy algorithm.
+	//
+	// if length > len(dst)-d-10 {
+	//   goto verySlowForwardCopy
+	// }
+	SUB $10, R14, R14
+	CMP R14, R4
+	BGT verySlowForwardCopy
+
+makeOffsetAtLeast8:
+	// !!! As above, expand the pattern so that offset >= 8 and we can use
+	// 8-byte load/stores.
+	//
+	// for offset < 8 {
+	//   copy 8 bytes from dst[d-offset:] to dst[d:]
+	//   length -= offset
+	//   d      += offset
+	//   offset += offset
+	//   // The two previous lines together means that d-offset, and therefore
+	//   // R15, is unchanged.
+	// }
+	MOVD $8, R1
+	CMP  R1, R5
+	BGE  fixUpSlowForwardCopy
+	MOVD (R15), R3
+	MOVD R3, (R7)
+	SUB  R5, R4, R4
+	ADD  R5, R7, R7
+	ADD  R5, R5, R5
+	B    makeOffsetAtLeast8
+
+fixUpSlowForwardCopy:
+	// !!! Add length (which might be negative now) to d (implied by R7 being
+	// &dst[d]) so that d ends up at the right place when we jump back to the
+	// top of the loop. Before we do that, though, we save R7 to R2 so that, if
+	// length is positive, copying the remaining length bytes will write to the
+	// right place.
+	MOVD R7, R2
+	ADD  R4, R7, R7
+
+finishSlowForwardCopy:
+	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
+	// length means that we overrun, but as above, that will be fixed up by
+	// subsequent iterations of the outermost loop.
+	MOVD $0, R1
+	CMP  R1, R4
+	BLE  loop
+	MOVD (R15), R3
+	MOVD R3, (R2)
+	ADD  $8, R15, R15
+	ADD  $8, R2, R2
+	SUB  $8, R4, R4
+	B    finishSlowForwardCopy
+
+verySlowForwardCopy:
+	// verySlowForwardCopy is a simple implementation of forward copy. In C
+	// parlance, this is a do/while loop instead of a while loop, since we know
+	// that length > 0. In Go syntax:
+	//
+	// for {
+	//   dst[d] = dst[d - offset]
+	//   d++
+	//   length--
+	//   if length == 0 {
+	//     break
+	//   }
+	// }
+	MOVB (R15), R3
+	MOVB R3, (R7)
+	ADD  $1, R15, R15
+	ADD  $1, R7, R7
+	SUB  $1, R4, R4
+	MOVD $0, R1
+	CMP  R1, R4
+	BNE  verySlowForwardCopy
+	B    loop
+
+	// The code above handles copy tags.
+	// ----------------------------------------
+
+end:
+	// This is the end of the "for s < len(src)".
+	//
+	// if d != len(dst) { etc }
+	CMP R10, R7
+	BNE errCorrupt
+
+	// return 0
+	MOVD $0, ret+48(FP)
+	RET
+
+errCorrupt:
+	// return decodeErrCodeCorrupt
+	MOVD $1, R2
+	MOVD R2, ret+48(FP)
+	RET
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode_asm.go golang-github-golang-snappy-0.0.2/decode_asm.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode_asm.go	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/decode_asm.go	2020-07-07 13:17:29.000000000 +0000
@@ -0,0 +1,15 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+// +build amd64 arm64
+
+package snappy
+
+// decode has the same semantics as in decode_other.go.
+//
+//go:noescape
+func decode(dst, src []byte) int
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode.go golang-github-golang-snappy-0.0.2/decode.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode.go	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/decode.go	2020-07-07 13:17:29.000000000 +0000
@@ -52,6 +52,8 @@
 // Otherwise, a newly allocated slice will be returned.
 //
 // The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// Decode handles the Snappy block format, not the Snappy stream format.
 func Decode(dst, src []byte) ([]byte, error) {
 	dLen, s, err := decodedLen(src)
 	if err != nil {
@@ -83,6 +85,8 @@
 }
 
 // Reader is an io.Reader that can read Snappy-compressed bytes.
+//
+// Reader handles the Snappy stream format, not the Snappy block format.
 type Reader struct {
 	r       io.Reader
 	err     error
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode_other.go golang-github-golang-snappy-0.0.2/decode_other.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/decode_other.go	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/decode_other.go	2020-07-07 13:17:29.000000000 +0000
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !amd64 appengine !gc noasm
+// +build !amd64,!arm64 appengine !gc noasm
 
 package snappy
 
@@ -85,14 +85,28 @@
 		if offset <= 0 || d < offset || length > len(dst)-d {
 			return decodeErrCodeCorrupt
 		}
-		// Copy from an earlier sub-slice of dst to a later sub-slice. Unlike
-		// the built-in copy function, this byte-by-byte copy always runs
+		// Copy from an earlier sub-slice of dst to a later sub-slice.
+		// If no overlap, use the built-in copy:
+		if offset >= length {
+			copy(dst[d:d+length], dst[d-offset:])
+			d += length
+			continue
+		}
+
+		// Unlike the built-in copy function, this byte-by-byte copy always runs
 		// forwards, even if the slices overlap. Conceptually, this is:
 		//
 		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
-		for end := d + length; d != end; d++ {
-			dst[d] = dst[d-offset]
+		//
+		// We align the slices into a and b and show the compiler they are the same size.
+		// This allows the loop to run without bounds checks.
+		a := dst[d : d+length]
+		b := dst[d-offset:]
+		b = b[:len(a)]
+		for i := range a {
+			a[i] = b[i]
 		}
+		d += length
 	}
 	if d != len(dst) {
 		return decodeErrCodeCorrupt
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode_amd64.go golang-github-golang-snappy-0.0.2/encode_amd64.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode_amd64.go	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/encode_amd64.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,29 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-package snappy
-
-// emitLiteral has the same semantics as in encode_other.go.
-//
-//go:noescape
-func emitLiteral(dst, lit []byte) int
-
-// emitCopy has the same semantics as in encode_other.go.
-//
-//go:noescape
-func emitCopy(dst []byte, offset, length int) int
-
-// extendMatch has the same semantics as in encode_other.go.
-//
-//go:noescape
-func extendMatch(src []byte, i, j int) int
-
-// encodeBlock has the same semantics as in encode_other.go.
-//
-//go:noescape
-func encodeBlock(dst, src []byte) (d int)
\ No newline at end of file
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode_arm64.s golang-github-golang-snappy-0.0.2/encode_arm64.s
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode_arm64.s	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/encode_arm64.s	2020-07-07 13:17:29.000000000 +0000
@@ -0,0 +1,729 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The asm code generally follows the pure Go code in encode_other.go, except
+// where marked with a "!!!".
+
+// ----------------------------------------------------------------------------
+
+// func emitLiteral(dst, lit []byte) int
+//
+// All local variables fit into registers. The register allocation:
+//	- R3	len(lit)
+//	- R4	n
+//	- R6	return value
+//	- R8	&dst[i]
+//	- R10	&lit[0]
+//
+// The 32 bytes of stack space is to call runtime·memmove.
+//
+// The unusual register allocation of local variables, such as R10 for the
+// source pointer, matches the allocation used at the call site in encodeBlock,
+// which makes it easier to manually inline this function.
+TEXT ·emitLiteral(SB), NOSPLIT, $32-56
+	MOVD dst_base+0(FP), R8
+	MOVD lit_base+24(FP), R10
+	MOVD lit_len+32(FP), R3
+	MOVD R3, R6
+	MOVW R3, R4
+	SUBW $1, R4, R4
+
+	MOVW $60, R2
+	CMPW R2, R4
+	BLT  oneByte
+	MOVW $256, R2
+	CMPW R2, R4
+	BLT  twoBytes
+
+threeBytes:
+	MOVD $0xf4, R2
+	MOVB R2, 0(R8)
+	MOVW R4, 1(R8)
+	ADD  $3, R8, R8
+	ADD  $3, R6, R6
+	B    memmove
+
+twoBytes:
+	MOVD $0xf0, R2
+	MOVB R2, 0(R8)
+	MOVB R4, 1(R8)
+	ADD  $2, R8, R8
+	ADD  $2, R6, R6
+	B    memmove
+
+oneByte:
+	LSLW $2, R4, R4
+	MOVB R4, 0(R8)
+	ADD  $1, R8, R8
+	ADD  $1, R6, R6
+
+memmove:
+	MOVD R6, ret+48(FP)
+
+	// copy(dst[i:], lit)
+	//
+	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+	// R8, R10 and R3 as arguments.
+	MOVD R8, 8(RSP)
+	MOVD R10, 16(RSP)
+	MOVD R3, 24(RSP)
+	CALL runtime·memmove(SB)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func emitCopy(dst []byte, offset, length int) int
+//
+// All local variables fit into registers. The register allocation:
+//	- R3	length
+//	- R7	&dst[0]
+//	- R8	&dst[i]
+//	- R11	offset
+//
+// The unusual register allocation of local variables, such as R11 for the
+// offset, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·emitCopy(SB), NOSPLIT, $0-48
+	MOVD dst_base+0(FP), R8
+	MOVD R8, R7
+	MOVD offset+24(FP), R11
+	MOVD length+32(FP), R3
+
+loop0:
+	// for length >= 68 { etc }
+	MOVW $68, R2
+	CMPW R2, R3
+	BLT  step1
+
+	// Emit a length 64 copy, encoded as 3 bytes.
+	MOVD $0xfe, R2
+	MOVB R2, 0(R8)
+	MOVW R11, 1(R8)
+	ADD  $3, R8, R8
+	SUB  $64, R3, R3
+	B    loop0
+
+step1:
+	// if length > 64 { etc }
+	MOVD $64, R2
+	CMP  R2, R3
+	BLE  step2
+
+	// Emit a length 60 copy, encoded as 3 bytes.
+	MOVD $0xee, R2
+	MOVB R2, 0(R8)
+	MOVW R11, 1(R8)
+	ADD  $3, R8, R8
+	SUB  $60, R3, R3
+
+step2:
+	// if length >= 12 || offset >= 2048 { goto step3 }
+	MOVD $12, R2
+	CMP  R2, R3
+	BGE  step3
+	MOVW $2048, R2
+	CMPW R2, R11
+	BGE  step3
+
+	// Emit the remaining copy, encoded as 2 bytes.
+	MOVB R11, 1(R8)
+	LSRW $3, R11, R11
+	AND  $0xe0, R11, R11
+	SUB  $4, R3, R3
+	LSLW $2, R3
+	AND  $0xff, R3, R3
+	ORRW R3, R11, R11
+	ORRW $1, R11, R11
+	MOVB R11, 0(R8)
+	ADD  $2, R8, R8
+
+	// Return the number of bytes written.
+	SUB  R7, R8, R8
+	MOVD R8, ret+40(FP)
+	RET
+
+step3:
+	// Emit the remaining copy, encoded as 3 bytes.
+	SUB  $1, R3, R3
+	AND  $0xff, R3, R3
+	LSLW $2, R3, R3
+	ORRW $2, R3, R3
+	MOVB R3, 0(R8)
+	MOVW R11, 1(R8)
+	ADD  $3, R8, R8
+
+	// Return the number of bytes written.
+	SUB  R7, R8, R8
+	MOVD R8, ret+40(FP)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func extendMatch(src []byte, i, j int) int
+//
+// All local variables fit into registers. The register allocation:
+//	- R6	&src[0]
+//	- R7	&src[j]
+//	- R13	&src[len(src) - 8]
+//	- R14	&src[len(src)]
+//	- R15	&src[i]
+//
+// The unusual register allocation of local variables, such as R15 for a source
+// pointer, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·extendMatch(SB), NOSPLIT, $0-48
+	MOVD src_base+0(FP), R6
+	MOVD src_len+8(FP), R14
+	MOVD i+24(FP), R15
+	MOVD j+32(FP), R7
+	ADD  R6, R14, R14
+	ADD  R6, R15, R15
+	ADD  R6, R7, R7
+	MOVD R14, R13
+	SUB  $8, R13, R13
+
+cmp8:
+	// As long as we are 8 or more bytes before the end of src, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMP  R13, R7
+	BHI  cmp1
+	MOVD (R15), R3
+	MOVD (R7), R4
+	CMP  R4, R3
+	BNE  bsf
+	ADD  $8, R15, R15
+	ADD  $8, R7, R7
+	B    cmp8
+
+bsf:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs.
+	// RBIT reverses the bit order, then CLZ counts the leading zeros, the
+	// combination of which finds the least significant bit which is set.
+	// The arm64 architecture is little-endian, and the shift by 3 converts
+	// a bit index to a byte index.
+	EOR  R3, R4, R4
+	RBIT R4, R4
+	CLZ  R4, R4
+	ADD  R4>>3, R7, R7
+
+	// Convert from &src[ret] to ret.
+	SUB  R6, R7, R7
+	MOVD R7, ret+40(FP)
+	RET
+
+cmp1:
+	// In src's tail, compare 1 byte at a time.
+	CMP  R7, R14
+	BLS  extendMatchEnd
+	MOVB (R15), R3
+	MOVB (R7), R4
+	CMP  R4, R3
+	BNE  extendMatchEnd
+	ADD  $1, R15, R15
+	ADD  $1, R7, R7
+	B    cmp1
+
+extendMatchEnd:
+	// Convert from &src[ret] to ret.
+	SUB  R6, R7, R7
+	MOVD R7, ret+40(FP)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func encodeBlock(dst, src []byte) (d int)
+//
+// All local variables fit into registers, other than "var table". The register
+// allocation:
+//	- R3	.	.
+//	- R4	.	.
+//	- R5	64	shift
+//	- R6	72	&src[0], tableSize
+//	- R7	80	&src[s]
+//	- R8	88	&dst[d]
+//	- R9	96	sLimit
+//	- R10	.	&src[nextEmit]
+//	- R11	104	prevHash, currHash, nextHash, offset
+//	- R12	112	&src[base], skip
+//	- R13	.	&src[nextS], &src[len(src) - 8]
+//	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
+//	- R15	120	candidate
+//	- R16	.	hash constant, 0x1e35a7bd
+//	- R17	.	&table
+//	- .  	128	table
+//
+// The second column (64, 72, etc) is the stack offset to spill the registers
+// when calling other functions. We could pack this slightly tighter, but it's
+// simpler to have a dedicated spill map independent of the function called.
+//
+// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
+// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill
+// local variables (registers) during calls gives 32768 + 64 + 64 = 32896.
+TEXT ·encodeBlock(SB), 0, $32896-56
+	MOVD dst_base+0(FP), R8
+	MOVD src_base+24(FP), R7
+	MOVD src_len+32(FP), R14
+
+	// shift, tableSize := uint32(32-8), 1<<8
+	MOVD  $24, R5
+	MOVD  $256, R6
+	MOVW  $0xa7bd, R16
+	MOVKW $(0x1e35<<16), R16
+
+calcShift:
+	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+	//	shift--
+	// }
+	MOVD $16384, R2
+	CMP  R2, R6
+	BGE  varTable
+	CMP  R14, R6
+	BGE  varTable
+	SUB  $1, R5, R5
+	LSL  $1, R6, R6
+	B    calcShift
+
+varTable:
+	// var table [maxTableSize]uint16
+	//
+	// In the asm code, unlike the Go code, we can zero-initialize only the
+	// first tableSize elements. Each uint16 element is 2 bytes and each VST1
+	// writes 64 bytes, so we can do only tableSize/32 writes instead of the
+	// 2048 writes that would zero-initialize all of table's 32768 bytes.
+	// This clear could overrun the first tableSize elements, but it won't
+	// overrun the allocated stack size.
+	ADD  $128, RSP, R17
+	MOVD R17, R4
+
+	// !!! R6 = &src[tableSize]
+	ADD R6<<1, R17, R6
+
+	// zero the SIMD registers
+	VEOR V0.B16, V0.B16, V0.B16
+	VEOR V1.B16, V1.B16, V1.B16
+	VEOR V2.B16, V2.B16, V2.B16
+	VEOR V3.B16, V3.B16, V3.B16
+
+memclr:
+	VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(R4)
+	CMP    R4, R6
+	BHI    memclr
+
+	// !!! R6 = &src[0]
+	MOVD R7, R6
+
+	// sLimit := len(src) - inputMargin
+	MOVD R14, R9
+	SUB  $15, R9, R9
+
+	// !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't
+	// change for the rest of the function.
+	MOVD R5, 64(RSP)
+	MOVD R6, 72(RSP)
+	MOVD R9, 96(RSP)
+
+	// nextEmit := 0
+	MOVD R6, R10
+
+	// s := 1
+	ADD $1, R7, R7
+
+	// nextHash := hash(load32(src, s), shift)
+	MOVW 0(R7), R11
+	MULW R16, R11, R11
+	LSRW R5, R11, R11
+
+outer:
+	// for { etc }
+
+	// skip := 32
+	MOVD $32, R12
+
+	// nextS := s
+	MOVD R7, R13
+
+	// candidate := 0
+	MOVD $0, R15
+
+inner0:
+	// for { etc }
+
+	// s := nextS
+	MOVD R13, R7
+
+	// bytesBetweenHashLookups := skip >> 5
+	MOVD R12, R14
+	LSR  $5, R14, R14
+
+	// nextS = s + bytesBetweenHashLookups
+	ADD R14, R13, R13
+
+	// skip += bytesBetweenHashLookups
+	ADD R14, R12, R12
+
+	// if nextS > sLimit { goto emitRemainder }
+	MOVD R13, R3
+	SUB  R6, R3, R3
+	CMP  R9, R3
+	BHI  emitRemainder
+
+	// candidate = int(table[nextHash])
+	MOVHU 0(R17)(R11<<1), R15
+
+	// table[nextHash] = uint16(s)
+	MOVD R7, R3
+	SUB  R6, R3, R3
+
+	MOVH R3, 0(R17)(R11<<1)
+
+	// nextHash = hash(load32(src, nextS), shift)
+	MOVW 0(R13), R11
+	MULW R16, R11
+	LSRW R5, R11, R11
+
+	// if load32(src, s) != load32(src, candidate) { continue } break
+	MOVW 0(R7), R3
+	MOVW (R6)(R15*1), R4
+	CMPW R4, R3
+	BNE  inner0
+
+fourByteMatch:
+	// As per the encode_other.go code:
+	//
+	// A 4-byte match has been found. We'll later see etc.
+
+	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
+	// on inputMargin in encode.go.
+	MOVD R7, R3
+	SUB  R10, R3, R3
+	MOVD $16, R2
+	CMP  R2, R3
+	BLE  emitLiteralFastPath
+
+	// ----------------------------------------
+	// Begin inline of the emitLiteral call.
+	//
+	// d += emitLiteral(dst[d:], src[nextEmit:s])
+
+	MOVW R3, R4
+	SUBW $1, R4, R4
+
+	MOVW $60, R2
+	CMPW R2, R4
+	BLT  inlineEmitLiteralOneByte
+	MOVW $256, R2
+	CMPW R2, R4
+	BLT  inlineEmitLiteralTwoBytes
+
+inlineEmitLiteralThreeBytes:
+	MOVD $0xf4, R1
+	MOVB R1, 0(R8)
+	MOVW R4, 1(R8)
+	ADD  $3, R8, R8
+	B    inlineEmitLiteralMemmove
+
+inlineEmitLiteralTwoBytes:
+	MOVD $0xf0, R1
+	MOVB R1, 0(R8)
+	MOVB R4, 1(R8)
+	ADD  $2, R8, R8
+	B    inlineEmitLiteralMemmove
+
+inlineEmitLiteralOneByte:
+	LSLW $2, R4, R4
+	MOVB R4, 0(R8)
+	ADD  $1, R8, R8
+
+inlineEmitLiteralMemmove:
+	// Spill local variables (registers) onto the stack; call; unspill.
+	//
+	// copy(dst[i:], lit)
+	//
+	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+	// R8, R10 and R3 as arguments.
+	MOVD R8, 8(RSP)
+	MOVD R10, 16(RSP)
+	MOVD R3, 24(RSP)
+
+	// Finish the "d +=" part of "d += emitLiteral(etc)".
+	ADD  R3, R8, R8
+	MOVD R7, 80(RSP)
+	MOVD R8, 88(RSP)
+	MOVD R15, 120(RSP)
+	CALL runtime·memmove(SB)
+	MOVD 64(RSP), R5
+	MOVD 72(RSP), R6
+	MOVD 80(RSP), R7
+	MOVD 88(RSP), R8
+	MOVD 96(RSP), R9
+	MOVD 120(RSP), R15
+	B    inner1
+
+inlineEmitLiteralEnd:
+	// End inline of the emitLiteral call.
+	// ----------------------------------------
+
+emitLiteralFastPath:
+	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
+	MOVB R3, R4
+	SUBW $1, R4, R4
+	AND  $0xff, R4, R4
+	LSLW $2, R4, R4
+	MOVB R4, (R8)
+	ADD  $1, R8, R8
+
+	// !!! Implement the copy from lit to dst as a 16-byte load and store.
+	// (Encode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
+	// OK. Subsequent iterations will fix up the overrun.
+	//
+	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+	VLD1 0(R10), [V0.B16]
+	VST1 [V0.B16], 0(R8)
+	ADD  R3, R8, R8
+
+inner1:
+	// for { etc }
+
+	// base := s
+	MOVD R7, R12
+
+	// !!! offset := base - candidate
+	MOVD R12, R11
+	SUB  R15, R11, R11
+	SUB  R6, R11, R11
+
+	// ----------------------------------------
+	// Begin inline of the extendMatch call.
+	//
+	// s = extendMatch(src, candidate+4, s+4)
+
+	// !!! R14 = &src[len(src)]
+	MOVD src_len+32(FP), R14
+	ADD  R6, R14, R14
+
+	// !!! R13 = &src[len(src) - 8]
+	MOVD R14, R13
+	SUB  $8, R13, R13
+
+	// !!! R15 = &src[candidate + 4]
+	ADD $4, R15, R15
+	ADD R6, R15, R15
+
+	// !!! s += 4
+	ADD $4, R7, R7
+
+inlineExtendMatchCmp8:
+	// As long as we are 8 or more bytes before the end of src, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMP  R13, R7
+	BHI  inlineExtendMatchCmp1
+	MOVD (R15), R3
+	MOVD (R7), R4
+	CMP  R4, R3
+	BNE  inlineExtendMatchBSF
+	ADD  $8, R15, R15
+	ADD  $8, R7, R7
+	B    inlineExtendMatchCmp8
+
+inlineExtendMatchBSF:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs.
+	// RBIT reverses the bit order, then CLZ counts the leading zeros, the
+	// combination of which finds the least significant bit which is set.
+	// The arm64 architecture is little-endian, and the shift by 3 converts
+	// a bit index to a byte index.
+	EOR  R3, R4, R4
+	RBIT R4, R4
+	CLZ  R4, R4
+	ADD  R4>>3, R7, R7
+	B    inlineExtendMatchEnd
+
+inlineExtendMatchCmp1:
+	// In src's tail, compare 1 byte at a time.
+	CMP  R7, R14
+	BLS  inlineExtendMatchEnd
+	MOVB (R15), R3
+	MOVB (R7), R4
+	CMP  R4, R3
+	BNE  inlineExtendMatchEnd
+	ADD  $1, R15, R15
+	ADD  $1, R7, R7
+	B    inlineExtendMatchCmp1
+
+inlineExtendMatchEnd:
+	// End inline of the extendMatch call.
+	// ----------------------------------------
+
+	// ----------------------------------------
+	// Begin inline of the emitCopy call.
+	//
+	// d += emitCopy(dst[d:], base-candidate, s-base)
+
+	// !!! length := s - base
+	MOVD R7, R3
+	SUB  R12, R3, R3
+
+inlineEmitCopyLoop0:
+	// for length >= 68 { etc }
+	MOVW $68, R2
+	CMPW R2, R3
+	BLT  inlineEmitCopyStep1
+
+	// Emit a length 64 copy, encoded as 3 bytes.
+	MOVD $0xfe, R1
+	MOVB R1, 0(R8)
+	MOVW R11, 1(R8)
+	ADD  $3, R8, R8
+	SUBW $64, R3, R3
+	B    inlineEmitCopyLoop0
+
+inlineEmitCopyStep1:
+	// if length > 64 { etc }
+	MOVW $64, R2
+	CMPW R2, R3
+	BLE  inlineEmitCopyStep2
+
+	// Emit a length 60 copy, encoded as 3 bytes.
+	MOVD $0xee, R1
+	MOVB R1, 0(R8)
+	MOVW R11, 1(R8)
+	ADD  $3, R8, R8
+	SUBW $60, R3, R3
+
+inlineEmitCopyStep2:
+	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
+	MOVW $12, R2
+	CMPW R2, R3
+	BGE  inlineEmitCopyStep3
+	MOVW $2048, R2
+	CMPW R2, R11
+	BGE  inlineEmitCopyStep3
+
+	// Emit the remaining copy, encoded as 2 bytes.
+	MOVB R11, 1(R8)
+	LSRW $8, R11, R11
+	LSLW $5, R11, R11
+	SUBW $4, R3, R3
+	AND  $0xff, R3, R3
+	LSLW $2, R3, R3
+	ORRW R3, R11, R11
+	ORRW $1, R11, R11
+	MOVB R11, 0(R8)
+	ADD  $2, R8, R8
+	B    inlineEmitCopyEnd
+
+inlineEmitCopyStep3:
+	// Emit the remaining copy, encoded as 3 bytes.
+	SUBW $1, R3, R3
+	LSLW $2, R3, R3
+	ORRW $2, R3, R3
+	MOVB R3, 0(R8)
+	MOVW R11, 1(R8)
+	ADD  $3, R8, R8
+
+inlineEmitCopyEnd:
+	// End inline of the emitCopy call.
+	// ----------------------------------------
+
+	// nextEmit = s
+	MOVD R7, R10
+
+	// if s >= sLimit { goto emitRemainder }
+	MOVD R7, R3
+	SUB  R6, R3, R3
+	CMP  R3, R9
+	BLS  emitRemainder
+
+	// As per the encode_other.go code:
+	//
+	// We could immediately etc.
+
+	// x := load64(src, s-1)
+	MOVD -1(R7), R14
+
+	// prevHash := hash(uint32(x>>0), shift)
+	MOVW R14, R11
+	MULW R16, R11, R11
+	LSRW R5, R11, R11
+
+	// table[prevHash] = uint16(s-1)
+	MOVD R7, R3
+	SUB  R6, R3, R3
+	SUB  $1, R3, R3
+
+	MOVHU R3, 0(R17)(R11<<1)
+
+	// currHash := hash(uint32(x>>8), shift)
+	LSR  $8, R14, R14
+	MOVW R14, R11
+	MULW R16, R11, R11
+	LSRW R5, R11, R11
+
+	// candidate = int(table[currHash])
+	MOVHU 0(R17)(R11<<1), R15
+
+	// table[currHash] = uint16(s)
+	ADD   $1, R3, R3
+	MOVHU R3, 0(R17)(R11<<1)
+
+	// if uint32(x>>8) == load32(src, candidate) { continue }
+	MOVW (R6)(R15*1), R4
+	CMPW R4, R14
+	BEQ  inner1
+
+	// nextHash = hash(uint32(x>>16), shift)
+	LSR  $8, R14, R14
+	MOVW R14, R11
+	MULW R16, R11, R11
+	LSRW R5, R11, R11
+
+	// s++
+	ADD $1, R7, R7
+
+	// break out of the inner1 for loop, i.e. continue the outer loop.
+	B outer
+
+emitRemainder:
+	// if nextEmit < len(src) { etc }
+	MOVD src_len+32(FP), R3
+	ADD  R6, R3, R3
+	CMP  R3, R10
+	BEQ  encodeBlockEnd
+
+	// d += emitLiteral(dst[d:], src[nextEmit:])
+	//
+	// Push args.
+	MOVD R8, 8(RSP)
+	MOVD $0, 16(RSP)  // Unnecessary, as the callee ignores it, but conservative.
+	MOVD $0, 24(RSP)  // Unnecessary, as the callee ignores it, but conservative.
+	MOVD R10, 32(RSP)
+	SUB  R10, R3, R3
+	MOVD R3, 40(RSP)
+	MOVD R3, 48(RSP)  // Unnecessary, as the callee ignores it, but conservative.
+
+	// Spill local variables (registers) onto the stack; call; unspill.
+	MOVD R8, 88(RSP)
+	CALL ·emitLiteral(SB)
+	MOVD 88(RSP), R8
+
+	// Finish the "d +=" part of "d += emitLiteral(etc)".
+	MOVD 56(RSP), R1
+	ADD  R1, R8, R8
+
+encodeBlockEnd:
+	MOVD dst_base+0(FP), R3
+	SUB  R3, R8, R8
+	MOVD R8, d+48(FP)
+	RET
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode_asm.go golang-github-golang-snappy-0.0.2/encode_asm.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode_asm.go	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/encode_asm.go	2020-07-07 13:17:29.000000000 +0000
@@ -0,0 +1,30 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+// +build amd64 arm64
+
+package snappy
+
+// emitLiteral has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitLiteral(dst, lit []byte) int
+
+// emitCopy has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitCopy(dst []byte, offset, length int) int
+
+// extendMatch has the same semantics as in encode_other.go.
+//
+//go:noescape
+func extendMatch(src []byte, i, j int) int
+
+// encodeBlock has the same semantics as in encode_other.go.
+//
+//go:noescape
+func encodeBlock(dst, src []byte) (d int)
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode.go golang-github-golang-snappy-0.0.2/encode.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode.go	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/encode.go	2020-07-07 13:17:29.000000000 +0000
@@ -15,6 +15,8 @@
 // Otherwise, a newly allocated slice will be returned.
 //
 // The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// Encode handles the Snappy block format, not the Snappy stream format.
 func Encode(dst, src []byte) []byte {
 	if n := MaxEncodedLen(len(src)); n < 0 {
 		panic(ErrTooLarge)
@@ -138,7 +140,9 @@
 	}
 }
 
-// Writer is an io.Writer than can write Snappy-compressed bytes.
+// Writer is an io.Writer that can write Snappy-compressed bytes.
+//
+// Writer handles the Snappy stream format, not the Snappy block format.
 type Writer struct {
 	w   io.Writer
 	err error
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode_other.go golang-github-golang-snappy-0.0.2/encode_other.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/encode_other.go	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/encode_other.go	2020-07-07 13:17:29.000000000 +0000
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !amd64 appengine !gc noasm
+// +build !amd64,!arm64 appengine !gc noasm
 
 package snappy
 
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/go.mod golang-github-golang-snappy-0.0.2/go.mod
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/go.mod	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/go.mod	2020-07-07 13:17:29.000000000 +0000
@@ -0,0 +1 @@
+module github.com/golang/snappy
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/misc/main.cpp golang-github-golang-snappy-0.0.2/misc/main.cpp
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/misc/main.cpp	1970-01-01 00:00:00.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/misc/main.cpp	2020-07-07 13:17:29.000000000 +0000
@@ -0,0 +1,79 @@
+/*
+This is a C version of the cmd/snappytool Go program.
+
+To build the snappytool binary:
+g++ main.cpp /usr/lib/libsnappy.a -o snappytool
+or, if you have built the C++ snappy library from source:
+g++ main.cpp /path/to/your/snappy/.libs/libsnappy.a -o snappytool
+after running "make" from your snappy checkout directory.
+*/
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "snappy.h"
+
+#define N 1000000
+
+char dst[N];
+char src[N];
+
+int main(int argc, char** argv) {
+  // Parse args.
+  if (argc != 2) {
+    fprintf(stderr, "exactly one of -d or -e must be given\n");
+    return 1;
+  }
+  bool decode = strcmp(argv[1], "-d") == 0;
+  bool encode = strcmp(argv[1], "-e") == 0;
+  if (decode == encode) {
+    fprintf(stderr, "exactly one of -d or -e must be given\n");
+    return 1;
+  }
+
+  // Read all of stdin into src[:s].
+  size_t s = 0;
+  while (1) {
+    if (s == N) {
+      fprintf(stderr, "input too large\n");
+      return 1;
+    }
+    ssize_t n = read(0, src+s, N-s);
+    if (n == 0) {
+      break;
+    }
+    if (n < 0) {
+      fprintf(stderr, "read error: %s\n", strerror(errno));
+      // TODO: handle EAGAIN, EINTR?
+      return 1;
+    }
+    s += n;
+  }
+
+  // Encode or decode src[:s] to dst[:d], and write to stdout.
+  size_t d = 0;
+  if (encode) {
+    if (N < snappy::MaxCompressedLength(s)) {
+      fprintf(stderr, "input too large after encoding\n");
+      return 1;
+    }
+    snappy::RawCompress(src, s, dst, &d);
+  } else {
+    if (!snappy::GetUncompressedLength(src, s, &d)) {
+      fprintf(stderr, "could not get uncompressed length\n");
+      return 1;
+    }
+    if (N < d) {
+      fprintf(stderr, "input too large after decoding\n");
+      return 1;
+    }
+    if (!snappy::RawUncompress(src, s, dst)) {
+      fprintf(stderr, "input was not valid Snappy-compressed data\n");
+      return 1;
+    }
+  }
+  write(1, dst, d);
+  return 0;
+}
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/snappy.go golang-github-golang-snappy-0.0.2/snappy.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/snappy.go	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/snappy.go	2020-07-07 13:17:29.000000000 +0000
@@ -2,10 +2,21 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package snappy implements the snappy block-based compression format.
-// It aims for very high speeds and reasonable compression.
+// Package snappy implements the Snappy compression format. It aims for very
+// high speeds and reasonable compression.
 //
-// The C++ snappy implementation is at https://github.com/google/snappy
+// There are actually two Snappy formats: block and stream. They are related,
+// but different: trying to decompress block-compressed data as a Snappy stream
+// will fail, and vice versa. The block format is the Decode and Encode
+// functions and the stream format is the Reader and Writer types.
+//
+// The block format, the more common case, is used when the complete size (the
+// number of bytes) of the original data is known upfront, at the time
+// compression starts. The stream format, also known as the framing format, is
+// for when that isn't always true.
+//
+// The canonical, C++ implementation is at https://github.com/google/snappy and
+// it only implements the block format.
 package snappy // import "github.com/golang/snappy"
 
 import (
diff -Nru golang-github-golang-snappy-0.0+git20160529.d9eb7a3/snappy_test.go golang-github-golang-snappy-0.0.2/snappy_test.go
--- golang-github-golang-snappy-0.0+git20160529.d9eb7a3/snappy_test.go	2016-05-29 05:00:41.000000000 +0000
+++ golang-github-golang-snappy-0.0.2/snappy_test.go	2020-07-07 13:17:29.000000000 +0000
@@ -316,6 +316,11 @@
 		"\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00",
 		"abcdbc",
 		nil,
+	}, {
+		`decodedLen=0; tagCopy4, 4 extra length|offset bytes; with msb set (0x93); discovered by go-fuzz`,
+		"\x00\xfc000\x93",
+		"",
+		ErrCorrupt,
 	}}
 
 	const (