Update dependencies

2025-10-20 22:49:50 +08:00 · 2024-04-29 16:37:17 +02:00
parent 4dab7b8e6c
commit 24cc87ae7e
1548 changed files with 70419 additions and 68575 deletions
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv   gzkp    1   3325605752  922273214   13929   227.68

 ## Decompressor

-Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
+Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.

 This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
 kindly supplied by [fuzzit.dev](https://fuzzit.dev/). 
--- a/vendor/github.com/klauspost/compress/zstd/bitreader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@@ -17,7 +17,6 @@ import (
 // for aligning the input.
 type bitReader struct {
 	in       []byte
-	off      uint   // next byte to read is at in[off - 1]
 	value    uint64 // Maybe use [16]byte, but shifting is awkward.
 	bitsRead uint8
 }
@@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error {
 		return errors.New("corrupt stream: too short")
 	}
 	b.in = in
-	b.off = uint(len(in))
 	// The highest bit of the last byte indicates where to start
 	v := in[len(in)-1]
 	if v == 0 {
@@ -69,21 +67,19 @@ func (b *bitReader) fillFast() {
 	if b.bitsRead < 32 {
 		return
 	}
-	// 2 bounds checks.
-	v := b.in[b.off-4:]
-	v = v[:4]
+	v := b.in[len(b.in)-4:]
+	b.in = b.in[:len(b.in)-4]
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	b.value = (b.value << 32) | uint64(low)
 	b.bitsRead -= 32
-	b.off -= 4
 }

 // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
 func (b *bitReader) fillFastStart() {
-	// Do single re-slice to avoid bounds checks.
-	b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+	v := b.in[len(b.in)-8:]
+	b.in = b.in[:len(b.in)-8]
+	b.value = binary.LittleEndian.Uint64(v)
 	b.bitsRead = 0
-	b.off -= 8
 }

 // fill() will make sure at least 32 bits are available.
@@ -91,25 +87,25 @@ func (b *bitReader) fill() {
 	if b.bitsRead < 32 {
 		return
 	}
-	if b.off >= 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
+	if len(b.in) >= 4 {
+		v := b.in[len(b.in)-4:]
+		b.in = b.in[:len(b.in)-4]
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		b.value = (b.value << 32) | uint64(low)
 		b.bitsRead -= 32
-		b.off -= 4
 		return
 	}
-	for b.off > 0 {
-		b.value = (b.value << 8) | uint64(b.in[b.off-1])
-		b.bitsRead -= 8
-		b.off--
+
+	b.bitsRead -= uint8(8 * len(b.in))
+	for len(b.in) > 0 {
+		b.value = (b.value << 8) | uint64(b.in[len(b.in)-1])
+		b.in = b.in[:len(b.in)-1]
 	}
 }

 // finished returns true if all bits have been read from the bit stream.
 func (b *bitReader) finished() bool {
-	return b.off == 0 && b.bitsRead >= 64
+	return len(b.in) == 0 && b.bitsRead >= 64
 }

 // overread returns true if more bits have been requested than is on the stream.
@@ -119,7 +115,7 @@ func (b *bitReader) overread() bool {

 // remain returns the number of bits remaining.
 func (b *bitReader) remain() uint {
-	return b.off*8 + 64 - uint(b.bitsRead)
+	return 8*uint(len(b.in)) + 64 - uint(b.bitsRead)
 }

 // close the bitstream and returns an error if out-of-buffer reads occurred.
--- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
@@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() {

 // close will write the alignment bit and write the final byte(s)
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	b.flushAlign()
-	return nil
 }

 // reset and continue writing by appending to out.
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -554,6 +554,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
 		if debugDecoder {
 			printf("Compression modes: 0b%b", compMode)
 		}
+		if compMode&3 != 0 {
+			return errors.New("corrupt block: reserved bits not zero")
+		}
 		for i := uint(0); i < 3; i++ {
 			mode := seqCompMode((compMode >> (6 - i*2)) & 3)
 			if debugDecoder {
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
 	if len(lits) >= 1024 {
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
-	} else if len(lits) > 32 {
+	} else if len(lits) > 16 {
 		// Use 1 stream
 		single = true
 		out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
 	} else {
 		err = huff0.ErrIncompressible
 	}
-
+	if err == nil && len(out)+5 > len(lits) {
+		// If we are close, we may still be worse or equal to raw.
+		var lh literalsHeader
+		lh.setSizes(len(out), len(lits), single)
+		if len(out)+lh.size() >= len(lits) {
+			err = huff0.ErrIncompressible
+		}
+	}
 	switch err {
 	case huff0.ErrIncompressible:
 		if debugEncoder {
@@ -420,6 +427,16 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
 	return nil
 }

+// encodeRLE will encode an RLE block.
+func (b *blockEnc) encodeRLE(val byte, length uint32) {
+	var bh blockHeader
+	bh.setLast(b.last)
+	bh.setSize(length)
+	bh.setType(blockTypeRLE)
+	b.output = bh.appendTo(b.output)
+	b.output = append(b.output, val)
+}
+
 // fuzzFseEncoder can be used to fuzz the FSE encoder.
 func fuzzFseEncoder(data []byte) int {
 	if len(data) > maxSequences || len(data) < 2 {
@@ -472,6 +489,16 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if len(b.sequences) == 0 {
 		return b.encodeLits(b.literals, rawAllLits)
 	}
+	if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 {
+		// Check common RLE cases.
+		seq := b.sequences[0]
+		if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 {
+			// Offset == 1 and 0 or 1 literals.
+			b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen)
+			return nil
+		}
+	}
+
 	// We want some difference to at least account for the headers.
 	saved := b.size - len(b.literals) - (b.size >> 6)
 	if saved < 16 {
@@ -503,7 +530,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if len(b.literals) >= 1024 && !raw {
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
-	} else if len(b.literals) > 32 && !raw {
+	} else if len(b.literals) > 16 && !raw {
 		// Use 1 stream
 		single = true
 		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
@@ -511,6 +538,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		err = huff0.ErrIncompressible
 	}

+	if err == nil && len(out)+5 > len(b.literals) {
+		// If we are close, we may still be worse or equal to raw.
+		var lh literalsHeader
+		lh.setSize(len(b.literals))
+		szRaw := lh.size()
+		lh.setSizes(len(out), len(b.literals), single)
+		szComp := lh.size()
+		if len(out)+szComp >= len(b.literals)+szRaw {
+			err = huff0.ErrIncompressible
+		}
+	}
 	switch err {
 	case huff0.ErrIncompressible:
 		lh.setType(literalsBlockRaw)
@@ -773,10 +811,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	ml.flush(mlEnc.actualTableLog)
 	of.flush(ofEnc.actualTableLog)
 	ll.flush(llEnc.actualTableLog)
-	err = wr.close()
-	if err != nil {
-		return err
-	}
+	wr.close()
 	b.output = wr.out

 	// Maybe even add a bigger margin.
--- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@@ -95,42 +95,54 @@ type Header struct {
 // If there isn't enough input, io.ErrUnexpectedEOF is returned.
 // The FirstBlock.OK will indicate if enough information was available to decode the first block header.
 func (h *Header) Decode(in []byte) error {
+	_, err := h.DecodeAndStrip(in)
+	return err
+}
+
+// DecodeAndStrip will decode the header from the beginning of the stream
+// and on success return the remaining bytes.
+// This will decode the frame header and the first block header if enough bytes are provided.
+// It is recommended to provide at least HeaderMaxSize bytes.
+// If the frame header cannot be read an error will be returned.
+// If there isn't enough input, io.ErrUnexpectedEOF is returned.
+// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
+func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
 	*h = Header{}
 	if len(in) < 4 {
-		return io.ErrUnexpectedEOF
+		return nil, io.ErrUnexpectedEOF
 	}
 	h.HeaderSize += 4
 	b, in := in[:4], in[4:]
 	if string(b) != frameMagic {
 		if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
-			return ErrMagicMismatch
+			return nil, ErrMagicMismatch
 		}
 		if len(in) < 4 {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		h.HeaderSize += 4
 		h.Skippable = true
 		h.SkippableID = int(b[0] & 0xf)
 		h.SkippableSize = binary.LittleEndian.Uint32(in)
-		return nil
+		return in[4:], nil
 	}

 	// Read Window_Descriptor
 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
 	if len(in) < 1 {
-		return io.ErrUnexpectedEOF
+		return nil, io.ErrUnexpectedEOF
 	}
 	fhd, in := in[0], in[1:]
 	h.HeaderSize++
 	h.SingleSegment = fhd&(1<<5) != 0
 	h.HasCheckSum = fhd&(1<<2) != 0
 	if fhd&(1<<3) != 0 {
-		return errors.New("reserved bit set on frame header")
+		return nil, errors.New("reserved bit set on frame header")
 	}

 	if !h.SingleSegment {
 		if len(in) < 1 {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		var wd byte
 		wd, in = in[0], in[1:]
@@ -148,7 +160,7 @@ func (h *Header) Decode(in []byte) error {
 			size = 4
 		}
 		if len(in) < int(size) {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		b, in = in[:size], in[size:]
 		h.HeaderSize += int(size)
@@ -178,7 +190,7 @@ func (h *Header) Decode(in []byte) error {
 	if fcsSize > 0 {
 		h.HasFCS = true
 		if len(in) < fcsSize {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		b, in = in[:fcsSize], in[fcsSize:]
 		h.HeaderSize += int(fcsSize)
@@ -199,7 +211,7 @@ func (h *Header) Decode(in []byte) error {

 	// Frame Header done, we will not fail from now on.
 	if len(in) < 3 {
-		return nil
+		return in, nil
 	}
 	tmp := in[:3]
 	bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
@@ -209,7 +221,7 @@ func (h *Header) Decode(in []byte) error {
 	cSize := int(bh >> 3)
 	switch blockType {
 	case blockTypeReserved:
-		return nil
+		return in, nil
 	case blockTypeRLE:
 		h.FirstBlock.Compressed = true
 		h.FirstBlock.DecompressedSize = cSize
@@ -225,5 +237,25 @@ func (h *Header) Decode(in []byte) error {
 	}

 	h.FirstBlock.OK = true
-	return nil
+	return in, nil
+}
+
+// AppendTo will append the encoded header to the dst slice.
+// There is no error checking performed on the header values.
+func (h *Header) AppendTo(dst []byte) ([]byte, error) {
+	if h.Skippable {
+		magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
+		magic[0] |= byte(h.SkippableID & 0xf)
+		dst = append(dst, magic[:]...)
+		f := h.SkippableSize
+		return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
+	}
+	f := frameHeader{
+		ContentSize:   h.FrameContentSize,
+		WindowSize:    uint32(h.WindowSize),
+		SingleSegment: h.SingleSegment,
+		Checksum:      h.HasCheckSum,
+		DictID:        h.DictionaryID,
+	}
+	return f.appendTo(dst), nil
 }
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -82,7 +82,7 @@ var (
 // can run multiple concurrent stateless decodes. It is even possible to
 // use stateless decodes while a stream is being decoded.
 //
-// The Reset function can be used to initiate a new stream, which is will considerably
+// The Reset function can be used to initiate a new stream, which will considerably
 // reduce the allocations normally caused by NewReader.
 func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
 	initPredefined()
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -1,10 +1,13 @@
 package zstd

 import (
+	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
+	"math"
+	"sort"

 	"github.com/klauspost/compress/huff0"
 )
@@ -14,9 +17,8 @@ type dict struct {

 	litEnc              *huff0.Scratch
 	llDec, ofDec, mlDec sequenceDec
-	//llEnc, ofEnc, mlEnc []*fseEncoder
-	offsets [3]int
-	content []byte
+	offsets             [3]int
+	content             []byte
 }

 const dictMagic = "\x37\xa4\x30\xec"
@@ -159,3 +161,374 @@ func InspectDictionary(b []byte) (interface {
 	d, err := loadDict(b)
 	return d, err
 }
+
+type BuildDictOptions struct {
+	// Dictionary ID.
+	ID uint32
+
+	// Content to use to create dictionary tables.
+	Contents [][]byte
+
+	// History to use for all blocks.
+	History []byte
+
+	// Offsets to use.
+	Offsets [3]int
+
+	// CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier.
+	// See https://github.com/facebook/zstd/issues/3724
+	CompatV155 bool
+
+	// Use the specified encoder level.
+	// The dictionary will be built using the specified encoder level,
+	// which will reflect speed and make the dictionary tailored for that level.
+	// If not set SpeedBestCompression will be used.
+	Level EncoderLevel
+
+	// DebugOut will write stats and other details here if set.
+	DebugOut io.Writer
+}
+
+func BuildDict(o BuildDictOptions) ([]byte, error) {
+	initPredefined()
+	hist := o.History
+	contents := o.Contents
+	debug := o.DebugOut != nil
+	println := func(args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprintln(o.DebugOut, args...)
+		}
+	}
+	printf := func(s string, args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprintf(o.DebugOut, s, args...)
+		}
+	}
+	print := func(args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprint(o.DebugOut, args...)
+		}
+	}
+
+	if int64(len(hist)) > dictMaxLength {
+		return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength))
+	}
+	if len(hist) < 8 {
+		return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8)
+	}
+	if len(contents) == 0 {
+		return nil, errors.New("no content provided")
+	}
+	d := dict{
+		id:      o.ID,
+		litEnc:  nil,
+		llDec:   sequenceDec{},
+		ofDec:   sequenceDec{},
+		mlDec:   sequenceDec{},
+		offsets: o.Offsets,
+		content: hist,
+	}
+	block := blockEnc{lowMem: false}
+	block.init()
+	enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}})
+	if o.Level != 0 {
+		eOpts := encoderOptions{
+			level:      o.Level,
+			blockSize:  maxMatchLen,
+			windowSize: maxMatchLen,
+			dict:       &d,
+			lowMem:     false,
+		}
+		enc = eOpts.encoder()
+	} else {
+		o.Level = SpeedBestCompression
+	}
+	var (
+		remain [256]int
+		ll     [256]int
+		ml     [256]int
+		of     [256]int
+	)
+	addValues := func(dst *[256]int, src []byte) {
+		for _, v := range src {
+			dst[v]++
+		}
+	}
+	addHist := func(dst *[256]int, src *[256]uint32) {
+		for i, v := range src {
+			dst[i] += int(v)
+		}
+	}
+	seqs := 0
+	nUsed := 0
+	litTotal := 0
+	newOffsets := make(map[uint32]int, 1000)
+	for _, b := range contents {
+		block.reset(nil)
+		if len(b) < 8 {
+			continue
+		}
+		nUsed++
+		enc.Reset(&d, true)
+		enc.Encode(&block, b)
+		addValues(&remain, block.literals)
+		litTotal += len(block.literals)
+		seqs += len(block.sequences)
+		block.genCodes()
+		addHist(&ll, block.coders.llEnc.Histogram())
+		addHist(&ml, block.coders.mlEnc.Histogram())
+		addHist(&of, block.coders.ofEnc.Histogram())
+		for i, seq := range block.sequences {
+			if i > 3 {
+				break
+			}
+			offset := seq.offset
+			if offset == 0 {
+				continue
+			}
+			if offset > 3 {
+				newOffsets[offset-3]++
+			} else {
+				newOffsets[uint32(o.Offsets[offset-1])]++
+			}
+		}
+	}
+	// Find most used offsets.
+	var sortedOffsets []uint32
+	for k := range newOffsets {
+		sortedOffsets = append(sortedOffsets, k)
+	}
+	sort.Slice(sortedOffsets, func(i, j int) bool {
+		a, b := sortedOffsets[i], sortedOffsets[j]
+		if a == b {
+			// Prefer the longer offset
+			return sortedOffsets[i] > sortedOffsets[j]
+		}
+		return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]]
+	})
+	if len(sortedOffsets) > 3 {
+		if debug {
+			print("Offsets:")
+			for i, v := range sortedOffsets {
+				if i > 20 {
+					break
+				}
+				printf("[%d: %d],", v, newOffsets[v])
+			}
+			println("")
+		}
+
+		sortedOffsets = sortedOffsets[:3]
+	}
+	for i, v := range sortedOffsets {
+		o.Offsets[i] = int(v)
+	}
+	if debug {
+		println("New repeat offsets", o.Offsets)
+	}
+
+	if nUsed == 0 || seqs == 0 {
+		return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs)
+	}
+	if debug {
+		println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal)
+	}
+	if seqs/nUsed < 512 {
+		// Use 512 as minimum.
+		nUsed = seqs / 512
+	}
+	copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
+		hist := dst.Histogram()
+		var maxSym uint8
+		var maxCount int
+		var fakeLength int
+		for i, v := range src {
+			if v > 0 {
+				v = v / nUsed
+				if v == 0 {
+					v = 1
+				}
+			}
+			if v > maxCount {
+				maxCount = v
+			}
+			if v != 0 {
+				maxSym = uint8(i)
+			}
+			fakeLength += v
+			hist[i] = uint32(v)
+		}
+		dst.HistogramFinished(maxSym, maxCount)
+		dst.reUsed = false
+		dst.useRLE = false
+		err := dst.normalizeCount(fakeLength)
+		if err != nil {
+			return nil, err
+		}
+		if debug {
+			println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength)
+		}
+		return dst.writeCount(nil)
+	}
+	if debug {
+		print("Literal lengths: ")
+	}
+	llTable, err := copyHist(block.coders.llEnc, &ll)
+	if err != nil {
+		return nil, err
+	}
+	if debug {
+		print("Match lengths: ")
+	}
+	mlTable, err := copyHist(block.coders.mlEnc, &ml)
+	if err != nil {
+		return nil, err
+	}
+	if debug {
+		print("Offsets: ")
+	}
+	ofTable, err := copyHist(block.coders.ofEnc, &of)
+	if err != nil {
+		return nil, err
+	}
+
+	// Literal table
+	avgSize := litTotal
+	if avgSize > huff0.BlockSizeMax/2 {
+		avgSize = huff0.BlockSizeMax / 2
+	}
+	huffBuff := make([]byte, 0, avgSize)
+	// Target size
+	div := litTotal / avgSize
+	if div < 1 {
+		div = 1
+	}
+	if debug {
+		println("Huffman weights:")
+	}
+	for i, n := range remain[:] {
+		if n > 0 {
+			n = n / div
+			// Allow all entries to be represented.
+			if n == 0 {
+				n = 1
+			}
+			huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+			if debug {
+				printf("[%d: %d], ", i, n)
+			}
+		}
+	}
+	if o.CompatV155 && remain[255]/div == 0 {
+		huffBuff = append(huffBuff, 255)
+	}
+	scratch := &huff0.Scratch{TableLog: 11}
+	for tries := 0; tries < 255; tries++ {
+		scratch = &huff0.Scratch{TableLog: 11}
+		_, _, err = huff0.Compress1X(huffBuff, scratch)
+		if err == nil {
+			break
+		}
+		if debug {
+			printf("Try %d: Huffman error: %v\n", tries+1, err)
+		}
+		huffBuff = huffBuff[:0]
+		if tries == 250 {
+			if debug {
+				println("Huffman: Bailing out with predefined table")
+			}
+
+			// Bail out.... Just generate something
+			huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
+			for i := 0; i < 128; i++ {
+				huffBuff = append(huffBuff, byte(i))
+			}
+			continue
+		}
+		if errors.Is(err, huff0.ErrIncompressible) {
+			// Try truncating least common.
+			for i, n := range remain[:] {
+				if n > 0 {
+					n = n / (div * (i + 1))
+					if n > 0 {
+						huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+					}
+				}
+			}
+			if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 {
+				huffBuff = append(huffBuff, 255)
+			}
+			if len(huffBuff) == 0 {
+				huffBuff = append(huffBuff, 0, 255)
+			}
+		}
+		if errors.Is(err, huff0.ErrUseRLE) {
+			for i, n := range remain[:] {
+				n = n / (div * (i + 1))
+				// Allow all entries to be represented.
+				if n == 0 {
+					n = 1
+				}
+				huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+			}
+		}
+	}
+
+	var out bytes.Buffer
+	out.Write([]byte(dictMagic))
+	out.Write(binary.LittleEndian.AppendUint32(nil, o.ID))
+	out.Write(scratch.OutTable)
+	if debug {
+		println("huff table:", len(scratch.OutTable), "bytes")
+		println("of table:", len(ofTable), "bytes")
+		println("ml table:", len(mlTable), "bytes")
+		println("ll table:", len(llTable), "bytes")
+	}
+	out.Write(ofTable)
+	out.Write(mlTable)
+	out.Write(llTable)
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0])))
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1])))
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2])))
+	out.Write(hist)
+	if debug {
+		_, err := loadDict(out.Bytes())
+		if err != nil {
+			panic(err)
+		}
+		i, err := InspectDictionary(out.Bytes())
+		if err != nil {
+			panic(err)
+		}
+		println("ID:", i.ID())
+		println("Content size:", i.ContentSize())
+		println("Encoder:", i.LitEncoder() != nil)
+		println("Offsets:", i.Offsets())
+		var totalSize int
+		for _, b := range contents {
+			totalSize += len(b)
+		}
+
+		encWith := func(opts ...EOption) int {
+			enc, err := NewWriter(nil, opts...)
+			if err != nil {
+				panic(err)
+			}
+			defer enc.Close()
+			var dst []byte
+			var totalSize int
+			for _, b := range contents {
+				dst = enc.EncodeAll(b, dst[:0])
+				totalSize += len(dst)
+			}
+			return totalSize
+		}
+		plain := encWith(WithEncoderLevel(o.Level))
+		withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes()))
+		println("Input size:", totalSize)
+		println("Plain Compressed:", plain)
+		println("Dict Compressed:", withDict)
+		println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)")
+	}
+	return out.Bytes(), nil
+}
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
 	if m.rep < 0 {
 		ofc = ofCode(uint32(m.s-m.offset) + 3)
 	} else {
-		ofc = ofCode(uint32(m.rep))
+		ofc = ofCode(uint32(m.rep) & 3)
 	}
 	// Cost, excluding
 	ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@@ -135,8 +135,20 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 		break
 	}

+	// Add block to history
 	s := e.addBlock(src)
 	blk.size = len(src)
+
+	// Check RLE first
+	if len(src) > zstdMinMatch {
+		ml := matchLen(src[1:], src)
+		if ml == len(src)-1 {
+			blk.literals = append(blk.literals, src[0])
+			blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
+			return
+		}
+	}
+
 	if len(src) < minNonLiteralBlockSize {
 		blk.extraLits = len(src)
 		blk.literals = blk.literals[:len(src)]
@@ -197,17 +209,10 @@ encodeLoop:

 		// Set m to a match at offset if it looks like that will improve compression.
 		improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
-			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
+			delta := s - offset
+			if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
 				return
 			}
-			if debugAsserts {
-				if offset <= 0 {
-					panic(offset)
-				}
-				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
-					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
-				}
-			}
 			// Try to quick reject if we already have a long match.
 			if m.length > 16 {
 				left := len(src) - int(m.s+m.length)
@@ -226,8 +231,10 @@ encodeLoop:
 				}
 			}
 			l := 4 + e.matchlen(s+4, offset+4, src)
-			if rep < 0 {
+			if m.rep <= 0 {
 				// Extend candidate match backwards as far as possible.
+				// Do not extend repeats as we can assume they are optimal
+				// and offsets change if s == nextEmit.
 				tMin := s - e.maxMatchOff
 				if tMin < 0 {
 					tMin = 0
@@ -238,7 +245,14 @@ encodeLoop:
 					l++
 				}
 			}
-
+			if debugAsserts {
+				if offset >= s {
+					panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
+				}
+				if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
+					panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
+				}
+			}
 			cand := match{offset: offset, s: s, length: l, rep: rep}
 			cand.estBits(bitsPerByte)
 			if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
@@ -281,6 +295,7 @@ encodeLoop:
 		// Load next and check...
 		e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
 		e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
+		index0 := s + 1

 		// Look far ahead, unless we have a really long match already...
 		if best.length < goodEnough {
@@ -334,41 +349,45 @@ encodeLoop:
 		}

 		if debugAsserts {
+			if best.offset >= best.s {
+				panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
+			}
+			if best.s < nextEmit {
+				panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
+			}
+			if best.offset < s-e.maxMatchOff {
+				panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
+			}
 			if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
 				panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
 			}
 		}

 		// We have a match, we can store the forward value
+		s = best.s
 		if best.rep > 0 {
 			var seq seq
 			seq.matchLen = uint32(best.length - zstdMinMatch)
-			if debugAsserts && s <= nextEmit {
-				panic("s <= nextEmit")
-			}
 			addLiterals(&seq, best.s)

 			// Repeat. If bit 4 is set, this is a non-lit repeat.
 			seq.offset = uint32(best.rep & 3)
 			if debugSequences {
-				println("repeat sequence", seq, "next s:", s)
+				println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
 			}
 			blk.sequences = append(blk.sequences, seq)

 			// Index old s + 1 -> s - 1
-			index0 := s + 1
 			s = best.s + best.length
-
 			nextEmit = s
-			if s >= sLimit {
-				if debugEncoder {
-					println("repeat ended", s, best.length)
-				}
-				break encodeLoop
-			}
+
 			// Index skipped...
+			end := s
+			if s > sLimit+4 {
+				end = sLimit + 4
+			}
 			off := index0 + e.cur
-			for index0 < s {
+			for index0 < end {
 				cv0 := load6432(src, index0)
 				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -377,6 +396,7 @@ encodeLoop:
 				off++
 				index0++
 			}
+
 			switch best.rep {
 			case 2, 4 | 1:
 				offset1, offset2 = offset2, offset1
@@ -385,13 +405,17 @@ encodeLoop:
 			case 4 | 3:
 				offset1, offset2, offset3 = offset1-1, offset1, offset2
 			}
+			if s >= sLimit {
+				if debugEncoder {
+					println("repeat ended", s, best.length)
+				}
+				break encodeLoop
+			}
 			continue
 		}

 		// A 4-byte match has been found. Update recent offsets.
 		// We'll later see if more than 4 bytes.
-		index0 := s + 1
-		s = best.s
 		t := best.offset
 		offset1, offset2, offset3 = s-t, offset1, offset2

@@ -418,19 +442,25 @@ encodeLoop:
 		}
 		blk.sequences = append(blk.sequences, seq)
 		nextEmit = s
-		if s >= sLimit {
-			break encodeLoop
+
+		// Index old s + 1 -> s - 1 or sLimit
+		end := s
+		if s > sLimit-4 {
+			end = sLimit - 4
 		}

-		// Index old s + 1 -> s - 1
-		for index0 < s {
+		off := index0 + e.cur
+		for index0 < end {
 			cv0 := load6432(src, index0)
 			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 			index0++
+			off++
+		}
+		if s >= sLimit {
+			break encodeLoop
 		}
 	}

--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -102,9 +102,20 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
 		e.cur = e.maxMatchOff
 		break
 	}
-
+	// Add block to history
 	s := e.addBlock(src)
 	blk.size = len(src)
+
+	// Check RLE first
+	if len(src) > zstdMinMatch {
+		ml := matchLen(src[1:], src)
+		if ml == len(src)-1 {
+			blk.literals = append(blk.literals, src[0])
+			blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
+			return
+		}
+	}
+
 	if len(src) < minNonLiteralBlockSize {
 		blk.extraLits = len(src)
 		blk.literals = blk.literals[:len(src)]
@@ -145,7 +156,7 @@ encodeLoop:
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
 		canRepeat := len(blk.sequences) > 2
-		var matched int32
+		var matched, index0 int32

 		for {
 			if debugAsserts && canRepeat && offset1 == 0 {
@@ -162,6 +173,7 @@ encodeLoop:
 			off := s + e.cur
 			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+			index0 = s + 1

 			if canRepeat {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@@ -258,7 +270,6 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)

-					index0 := s + repOff2
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
@@ -498,15 +509,15 @@ encodeLoop:
 		}

 		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
+		off := index0 + e.cur
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
 			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			index0 += 2
+			off += 2
 		}

 		cv = load6432(src, s)
@@ -672,7 +683,7 @@ encodeLoop:
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
 		canRepeat := len(blk.sequences) > 2
-		var matched int32
+		var matched, index0 int32

 		for {
 			if debugAsserts && canRepeat && offset1 == 0 {
@@ -691,6 +702,7 @@ encodeLoop:
 			e.markLongShardDirty(nextHashL)
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
 			e.markShortShardDirty(nextHashS)
+			index0 = s + 1

 			if canRepeat {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@@ -726,7 +738,6 @@ encodeLoop:
 					blk.sequences = append(blk.sequences, seq)

 					// Index match start+1 (long) -> s - 1
-					index0 := s + repOff
 					s += lenght + repOff

 					nextEmit = s
@@ -790,7 +801,6 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)

-					index0 := s + repOff2
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
@@ -1024,18 +1034,18 @@ encodeLoop:
 		}

 		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
+		off := index0 + e.cur
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
 			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.markLongShardDirty(h0)
 			h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
 			e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			e.markShortShardDirty(h1)
 			index0 += 2
+			off += 2
 		}

 		cv = load6432(src, s)
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -227,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error {
 			DictID:        e.o.dict.ID(),
 		}

-		dst, err := fh.appendTo(tmp[:0])
-		if err != nil {
-			return err
-		}
+		dst := fh.appendTo(tmp[:0])
 		s.headerWritten = true
 		s.wWg.Wait()
 		var n2 int
@@ -483,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 				Checksum: false,
 				DictID:   0,
 			}
-			dst, _ = fh.appendTo(dst)
+			dst = fh.appendTo(dst)

 			// Write raw block as last one only.
 			var blk blockHeader
@@ -518,10 +515,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
 		dst = make([]byte, 0, len(src))
 	}
-	dst, err := fh.appendTo(dst)
-	if err != nil {
-		panic(err)
-	}
+	dst = fh.appendTo(dst)

 	// If we can do everything in one block, prefer that.
 	if len(src) <= e.o.blockSize {
@@ -581,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	// Add padding with content from crypto/rand.Reader
 	if e.o.pad > 0 {
 		add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
+		var err error
 		dst, err = skippableFrame(dst, add, rand.Reader)
 		if err != nil {
 			panic(err)
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -94,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption {
 // The value must be a power of two between MinWindowSize and MaxWindowSize.
 // A larger value will enable better compression but allocate more memory and,
 // for above-default values, take considerably longer.
-// The default value is determined by the compression level.
+// The default value is determined by the compression level and max 8MB.
 func WithWindowSize(n int) EOption {
 	return func(o *encoderOptions) error {
 		switch {
@@ -232,9 +232,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
 			case SpeedDefault:
 				o.windowSize = 8 << 20
 			case SpeedBetterCompression:
-				o.windowSize = 16 << 20
+				o.windowSize = 8 << 20
 			case SpeedBestCompression:
-				o.windowSize = 32 << 20
+				o.windowSize = 8 << 20
 			}
 		}
 		if !o.customALEntropy {
--- a/vendor/github.com/klauspost/compress/zstd/frameenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@@ -22,7 +22,7 @@ type frameHeader struct {

 const maxHeaderSize = 14

-func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
+func (f frameHeader) appendTo(dst []byte) []byte {
 	dst = append(dst, frameMagic...)
 	var fhd uint8
 	if f.Checksum {
@@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 		if f.SingleSegment {
 			dst = append(dst, uint8(f.ContentSize))
 		}
-		// Unless SingleSegment is set, framessizes < 256 are nto stored.
+		// Unless SingleSegment is set, framessizes < 256 are not stored.
 	case 1:
 		f.ContentSize -= 256
 		dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))
@@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 	default:
 		panic("invalid fcs")
 	}
-	return dst, nil
+	return dst
 }

 const skippableFrameHeader = 4 + 4
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
@@ -20,10 +20,9 @@ func (s *fseDecoder) buildDtable() error {
 			if v == -1 {
 				s.dt[highThreshold].setAddBits(uint8(i))
 				highThreshold--
-				symbolNext[i] = 1
-			} else {
-				symbolNext[i] = uint16(v)
+				v = 1
 			}
+			symbolNext[i] = uint16(v)
 		}
 	}

@@ -35,10 +34,12 @@ func (s *fseDecoder) buildDtable() error {
 		for ss, v := range s.norm[:s.symbolLen] {
 			for i := 0; i < int(v); i++ {
 				s.dt[position].setAddBits(uint8(ss))
-				position = (position + step) & tableMask
-				for position > highThreshold {
+				for {
 					// lowprob area
 					position = (position + step) & tableMask
+					if position <= highThreshold {
+						break
+					}
 				}
 			}
 		}
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -245,7 +245,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 			return io.ErrUnexpectedEOF
 		}
 		var ll, mo, ml int
-		if br.off > 4+((maxOffsetBits+16+16)>>3) {
+		if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
 			// inlined function:
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)

@@ -452,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol)

 	// extra bits are stored in reverse order.
 	br.fill()
-	if s.maxBits <= 32 {
-		mo += br.getBits(moB)
-		ml += br.getBits(mlB)
-		ll += br.getBits(llB)
-	} else {
-		mo += br.getBits(moB)
+	mo += br.getBits(moB)
+	if s.maxBits > 32 {
 		br.fill()
-		// matchlength+literal length, max 32 bits
-		ml += br.getBits(mlB)
-		ll += br.getBits(llB)
-
 	}
+	// matchlength+literal length, max 32 bits
+	ml += br.getBits(mlB)
+	ll += br.getBits(llB)
 	mo = s.adjustOffset(mo, ll, moB)
 	return
 }
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -5,11 +5,11 @@
 // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: CMOV
 TEXT ·sequenceDecs_decode_amd64(SB), $8-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
@@ -157,8 +157,7 @@ sequenceDecs_decode_amd64_ll_update_zero:

 	// Update Literal Length State
 	MOVBQZX DI, R14
-	SHRQ    $0x10, DI
-	MOVWQZX DI, DI
+	SHRL    $0x10, DI
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@@ -177,8 +176,7 @@ sequenceDecs_decode_amd64_ll_update_zero:

 	// Update Match Length State
 	MOVBQZX R8, R14
-	SHRQ    $0x10, R8
-	MOVWQZX R8, R8
+	SHRL    $0x10, R8
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@@ -197,8 +195,7 @@ sequenceDecs_decode_amd64_ll_update_zero:

 	// Update Offset State
 	MOVBQZX R9, R14
-	SHRQ    $0x10, R9
-	MOVWQZX R9, R9
+	SHRL    $0x10, R9
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@@ -301,9 +298,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok:
 	MOVQ R12, 152(AX)
 	MOVQ R13, 160(AX)
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)

 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
@@ -336,11 +333,11 @@ error_overread:
 // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: CMOV
 TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
@@ -459,8 +456,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:

 	// Update Literal Length State
 	MOVBQZX DI, R14
-	SHRQ    $0x10, DI
-	MOVWQZX DI, DI
+	SHRL    $0x10, DI
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@@ -479,8 +475,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:

 	// Update Match Length State
 	MOVBQZX R8, R14
-	SHRQ    $0x10, R8
-	MOVWQZX R8, R8
+	SHRL    $0x10, R8
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@@ -499,8 +494,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:

 	// Update Offset State
 	MOVBQZX R9, R14
-	SHRQ    $0x10, R9
-	MOVWQZX R9, R9
+	SHRL    $0x10, R9
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@@ -603,9 +597,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok:
 	MOVQ R12, 152(AX)
 	MOVQ R13, 160(AX)
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)

 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
@@ -638,11 +632,11 @@ error_overread:
 // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: BMI, BMI2, CMOV
 TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
@@ -772,11 +766,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
 	BZHIQ   R14, R15, R15

 	// Update Offset State
-	BZHIQ  R8, R15, CX
-	SHRXQ  R8, R15, R15
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, R8, R8
-	ADDQ   CX, R8
+	BZHIQ R8, R15, CX
+	SHRXQ R8, R15, R15
+	SHRL  $0x10, R8
+	ADDQ  CX, R8

 	// Load ctx.ofTable
 	MOVQ ctx+16(FP), CX
@@ -784,11 +777,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
 	MOVQ (CX)(R8*8), R8

 	// Update Match Length State
-	BZHIQ  DI, R15, CX
-	SHRXQ  DI, R15, R15
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, DI, DI
-	ADDQ   CX, DI
+	BZHIQ DI, R15, CX
+	SHRXQ DI, R15, R15
+	SHRL  $0x10, DI
+	ADDQ  CX, DI

 	// Load ctx.mlTable
 	MOVQ ctx+16(FP), CX
@@ -796,10 +788,9 @@ sequenceDecs_decode_bmi2_fill_2_end:
 	MOVQ (CX)(DI*8), DI

 	// Update Literal Length State
-	BZHIQ  SI, R15, CX
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, SI, SI
-	ADDQ   CX, SI
+	BZHIQ SI, R15, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI

 	// Load ctx.llTable
 	MOVQ ctx+16(FP), CX
@@ -892,9 +883,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok:
 	MOVQ R11, 152(CX)
 	MOVQ R12, 160(CX)
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)

 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
@@ -927,11 +918,11 @@ error_overread:
 // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: BMI, BMI2, CMOV
 TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
@@ -1032,11 +1023,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
 	BZHIQ   R14, R15, R15

 	// Update Offset State
-	BZHIQ  R8, R15, CX
-	SHRXQ  R8, R15, R15
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, R8, R8
-	ADDQ   CX, R8
+	BZHIQ R8, R15, CX
+	SHRXQ R8, R15, R15
+	SHRL  $0x10, R8
+	ADDQ  CX, R8

 	// Load ctx.ofTable
 	MOVQ ctx+16(FP), CX
@@ -1044,11 +1034,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
 	MOVQ (CX)(R8*8), R8

 	// Update Match Length State
-	BZHIQ  DI, R15, CX
-	SHRXQ  DI, R15, R15
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, DI, DI
-	ADDQ   CX, DI
+	BZHIQ DI, R15, CX
+	SHRXQ DI, R15, R15
+	SHRL  $0x10, DI
+	ADDQ  CX, DI

 	// Load ctx.mlTable
 	MOVQ ctx+16(FP), CX
@@ -1056,10 +1045,9 @@ sequenceDecs_decode_56_bmi2_fill_end:
 	MOVQ (CX)(DI*8), DI

 	// Update Literal Length State
-	BZHIQ  SI, R15, CX
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, SI, SI
-	ADDQ   CX, SI
+	BZHIQ SI, R15, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI

 	// Load ctx.llTable
 	MOVQ ctx+16(FP), CX
@@ -1152,9 +1140,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
 	MOVQ R11, 152(CX)
 	MOVQ R12, 160(CX)
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)

 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
@@ -1797,11 +1785,11 @@ empty_seqs:
 // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
@@ -1967,8 +1955,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:

 	// Update Literal Length State
 	MOVBQZX DI, R13
-	SHRQ    $0x10, DI
-	MOVWQZX DI, DI
+	SHRL    $0x10, DI
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@@ -1987,8 +1974,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:

 	// Update Match Length State
 	MOVBQZX R8, R13
-	SHRQ    $0x10, R8
-	MOVWQZX R8, R8
+	SHRL    $0x10, R8
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@@ -2007,8 +1993,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:

 	// Update Offset State
 	MOVBQZX R9, R13
-	SHRQ    $0x10, R9
-	MOVWQZX R9, R9
+	SHRL    $0x10, R9
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@@ -2295,9 +2280,9 @@ handle_loop:

 loop_finished:
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)

 	// Update the context
 	MOVQ ctx+16(FP), AX
@@ -2362,11 +2347,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: BMI, BMI2, CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
@@ -2514,11 +2499,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
 	BZHIQ   R13, R14, R14

 	// Update Offset State
-	BZHIQ  R8, R14, CX
-	SHRXQ  R8, R14, R14
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, R8, R8
-	ADDQ   CX, R8
+	BZHIQ R8, R14, CX
+	SHRXQ R8, R14, R14
+	SHRL  $0x10, R8
+	ADDQ  CX, R8

 	// Load ctx.ofTable
 	MOVQ ctx+16(FP), CX
@@ -2526,11 +2510,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
 	MOVQ (CX)(R8*8), R8

 	// Update Match Length State
-	BZHIQ  DI, R14, CX
-	SHRXQ  DI, R14, R14
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, DI, DI
-	ADDQ   CX, DI
+	BZHIQ DI, R14, CX
+	SHRXQ DI, R14, R14
+	SHRL  $0x10, DI
+	ADDQ  CX, DI

 	// Load ctx.mlTable
 	MOVQ ctx+16(FP), CX
@@ -2538,10 +2521,9 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
 	MOVQ (CX)(DI*8), DI

 	// Update Literal Length State
-	BZHIQ  SI, R14, CX
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, SI, SI
-	ADDQ   CX, SI
+	BZHIQ SI, R14, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI

 	// Load ctx.llTable
 	MOVQ ctx+16(FP), CX
@@ -2818,9 +2800,9 @@ handle_loop:

 loop_finished:
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)

 	// Update the context
 	MOVQ ctx+16(FP), AX
@@ -2885,11 +2867,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
@@ -3055,8 +3037,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:

 	// Update Literal Length State
 	MOVBQZX DI, R13
-	SHRQ    $0x10, DI
-	MOVWQZX DI, DI
+	SHRL    $0x10, DI
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@@ -3075,8 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:

 	// Update Match Length State
 	MOVBQZX R8, R13
-	SHRQ    $0x10, R8
-	MOVWQZX R8, R8
+	SHRL    $0x10, R8
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@@ -3095,8 +3075,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:

 	// Update Offset State
 	MOVBQZX R9, R13
-	SHRQ    $0x10, R9
-	MOVWQZX R9, R9
+	SHRL    $0x10, R9
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@@ -3485,9 +3464,9 @@ handle_loop:

 loop_finished:
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)

 	// Update the context
 	MOVQ ctx+16(FP), AX
@@ -3552,11 +3531,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: BMI, BMI2, CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
@@ -3704,11 +3683,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	BZHIQ   R13, R14, R14

 	// Update Offset State
-	BZHIQ  R8, R14, CX
-	SHRXQ  R8, R14, R14
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, R8, R8
-	ADDQ   CX, R8
+	BZHIQ R8, R14, CX
+	SHRXQ R8, R14, R14
+	SHRL  $0x10, R8
+	ADDQ  CX, R8

 	// Load ctx.ofTable
 	MOVQ ctx+16(FP), CX
@@ -3716,11 +3694,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	MOVQ (CX)(R8*8), R8

 	// Update Match Length State
-	BZHIQ  DI, R14, CX
-	SHRXQ  DI, R14, R14
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, DI, DI
-	ADDQ   CX, DI
+	BZHIQ DI, R14, CX
+	SHRXQ DI, R14, R14
+	SHRL  $0x10, DI
+	ADDQ  CX, DI

 	// Load ctx.mlTable
 	MOVQ ctx+16(FP), CX
@@ -3728,10 +3705,9 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	MOVQ (CX)(DI*8), DI

 	// Update Literal Length State
-	BZHIQ  SI, R14, CX
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, SI, SI
-	ADDQ   CX, SI
+	BZHIQ SI, R14, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI

 	// Load ctx.llTable
 	MOVQ ctx+16(FP), CX
@@ -4110,9 +4086,9 @@ handle_loop:

 loop_finished:
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)

 	// Update the context
 	MOVQ ctx+16(FP), AX
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
@@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 	}
 	for i := range seqs {
 		var ll, mo, ml int
-		if br.off > 4+((maxOffsetBits+16+16)>>3) {
+		if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
 			// inlined function:
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)

--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 	var written int64
 	var readHeader bool
 	{
-		var header []byte
-		var n int
-		header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
+		header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])

+		var n int
 		n, r.err = w.Write(header)
 		if r.err != nil {
 			return written, r.err