Update dependencies

This commit is contained in:
Ingo Oppermann
2024-04-29 16:37:17 +02:00
parent 4dab7b8e6c
commit 24cc87ae7e
1548 changed files with 70419 additions and 68575 deletions

View File

@@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68
## Decompressor
Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
kindly supplied by [fuzzit.dev](https://fuzzit.dev/).

View File

@@ -17,7 +17,6 @@ import (
// for aligning the input.
type bitReader struct {
in []byte
off uint // next byte to read is at in[off - 1]
value uint64 // Maybe use [16]byte, but shifting is awkward.
bitsRead uint8
}
@@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error {
return errors.New("corrupt stream: too short")
}
b.in = in
b.off = uint(len(in))
// The highest bit of the last byte indicates where to start
v := in[len(in)-1]
if v == 0 {
@@ -69,21 +67,19 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 {
return
}
// 2 bounds checks.
v := b.in[b.off-4:]
v = v[:4]
v := b.in[len(b.in)-4:]
b.in = b.in[:len(b.in)-4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
b.off -= 4
}
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
func (b *bitReader) fillFastStart() {
// Do single re-slice to avoid bounds checks.
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
v := b.in[len(b.in)-8:]
b.in = b.in[:len(b.in)-8]
b.value = binary.LittleEndian.Uint64(v)
b.bitsRead = 0
b.off -= 8
}
// fill() will make sure at least 32 bits are available.
@@ -91,25 +87,25 @@ func (b *bitReader) fill() {
if b.bitsRead < 32 {
return
}
if b.off >= 4 {
v := b.in[b.off-4:]
v = v[:4]
if len(b.in) >= 4 {
v := b.in[len(b.in)-4:]
b.in = b.in[:len(b.in)-4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
b.off -= 4
return
}
for b.off > 0 {
b.value = (b.value << 8) | uint64(b.in[b.off-1])
b.bitsRead -= 8
b.off--
b.bitsRead -= uint8(8 * len(b.in))
for len(b.in) > 0 {
b.value = (b.value << 8) | uint64(b.in[len(b.in)-1])
b.in = b.in[:len(b.in)-1]
}
}
// finished returns true if all bits have been read from the bit stream.
func (b *bitReader) finished() bool {
return b.off == 0 && b.bitsRead >= 64
return len(b.in) == 0 && b.bitsRead >= 64
}
// overread returns true if more bits have been requested than is on the stream.
@@ -119,7 +115,7 @@ func (b *bitReader) overread() bool {
// remain returns the number of bits remaining.
func (b *bitReader) remain() uint {
return b.off*8 + 64 - uint(b.bitsRead)
return 8*uint(len(b.in)) + 64 - uint(b.bitsRead)
}
// close the bitstream and returns an error if out-of-buffer reads occurred.

View File

@@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() {
// close will write the alignment bit and write the final byte(s)
// to the output.
func (b *bitWriter) close() error {
func (b *bitWriter) close() {
// End mark
b.addBits16Clean(1, 1)
// flush until next byte.
b.flushAlign()
return nil
}
// reset and continue writing by appending to out.

View File

@@ -554,6 +554,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
if debugDecoder {
printf("Compression modes: 0b%b", compMode)
}
if compMode&3 != 0 {
return errors.New("corrupt block: reserved bits not zero")
}
for i := uint(0); i < 3; i++ {
mode := seqCompMode((compMode >> (6 - i*2)) & 3)
if debugDecoder {

View File

@@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
if len(lits) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
} else if len(lits) > 32 {
} else if len(lits) > 16 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
} else {
err = huff0.ErrIncompressible
}
if err == nil && len(out)+5 > len(lits) {
// If we are close, we may still be worse or equal to raw.
var lh literalsHeader
lh.setSizes(len(out), len(lits), single)
if len(out)+lh.size() >= len(lits) {
err = huff0.ErrIncompressible
}
}
switch err {
case huff0.ErrIncompressible:
if debugEncoder {
@@ -420,6 +427,16 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
return nil
}
// encodeRLE will encode an RLE block.
func (b *blockEnc) encodeRLE(val byte, length uint32) {
var bh blockHeader
bh.setLast(b.last)
bh.setSize(length)
bh.setType(blockTypeRLE)
b.output = bh.appendTo(b.output)
b.output = append(b.output, val)
}
// fuzzFseEncoder can be used to fuzz the FSE encoder.
func fuzzFseEncoder(data []byte) int {
if len(data) > maxSequences || len(data) < 2 {
@@ -472,6 +489,16 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if len(b.sequences) == 0 {
return b.encodeLits(b.literals, rawAllLits)
}
if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 {
// Check common RLE cases.
seq := b.sequences[0]
if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 {
// Offset == 1 and 0 or 1 literals.
b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen)
return nil
}
}
// We want some difference to at least account for the headers.
saved := b.size - len(b.literals) - (b.size >> 6)
if saved < 16 {
@@ -503,7 +530,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if len(b.literals) >= 1024 && !raw {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
} else if len(b.literals) > 32 && !raw {
} else if len(b.literals) > 16 && !raw {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
@@ -511,6 +538,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
err = huff0.ErrIncompressible
}
if err == nil && len(out)+5 > len(b.literals) {
// If we are close, we may still be worse or equal to raw.
var lh literalsHeader
lh.setSize(len(b.literals))
szRaw := lh.size()
lh.setSizes(len(out), len(b.literals), single)
szComp := lh.size()
if len(out)+szComp >= len(b.literals)+szRaw {
err = huff0.ErrIncompressible
}
}
switch err {
case huff0.ErrIncompressible:
lh.setType(literalsBlockRaw)
@@ -773,10 +811,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
ml.flush(mlEnc.actualTableLog)
of.flush(ofEnc.actualTableLog)
ll.flush(llEnc.actualTableLog)
err = wr.close()
if err != nil {
return err
}
wr.close()
b.output = wr.out
// Maybe even add a bigger margin.

View File

@@ -95,42 +95,54 @@ type Header struct {
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error {
_, err := h.DecodeAndStrip(in)
return err
}
// DecodeAndStrip will decode the header from the beginning of the stream
// and on success return the remaining bytes.
// This will decode the frame header and the first block header if enough bytes are provided.
// It is recommended to provide at least HeaderMaxSize bytes.
// If the frame header cannot be read an error will be returned.
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
*h = Header{}
if len(in) < 4 {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
h.HeaderSize += 4
b, in := in[:4], in[4:]
if string(b) != frameMagic {
if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch
return nil, ErrMagicMismatch
}
if len(in) < 4 {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
h.HeaderSize += 4
h.Skippable = true
h.SkippableID = int(b[0] & 0xf)
h.SkippableSize = binary.LittleEndian.Uint32(in)
return nil
return in[4:], nil
}
// Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if len(in) < 1 {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
fhd, in := in[0], in[1:]
h.HeaderSize++
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header")
return nil, errors.New("reserved bit set on frame header")
}
if !h.SingleSegment {
if len(in) < 1 {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
var wd byte
wd, in = in[0], in[1:]
@@ -148,7 +160,7 @@ func (h *Header) Decode(in []byte) error {
size = 4
}
if len(in) < int(size) {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
b, in = in[:size], in[size:]
h.HeaderSize += int(size)
@@ -178,7 +190,7 @@ func (h *Header) Decode(in []byte) error {
if fcsSize > 0 {
h.HasFCS = true
if len(in) < fcsSize {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
b, in = in[:fcsSize], in[fcsSize:]
h.HeaderSize += int(fcsSize)
@@ -199,7 +211,7 @@ func (h *Header) Decode(in []byte) error {
// Frame Header done, we will not fail from now on.
if len(in) < 3 {
return nil
return in, nil
}
tmp := in[:3]
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
@@ -209,7 +221,7 @@ func (h *Header) Decode(in []byte) error {
cSize := int(bh >> 3)
switch blockType {
case blockTypeReserved:
return nil
return in, nil
case blockTypeRLE:
h.FirstBlock.Compressed = true
h.FirstBlock.DecompressedSize = cSize
@@ -225,5 +237,25 @@ func (h *Header) Decode(in []byte) error {
}
h.FirstBlock.OK = true
return nil
return in, nil
}
// AppendTo will append the encoded header to the dst slice.
// There is no error checking performed on the header values.
func (h *Header) AppendTo(dst []byte) ([]byte, error) {
if h.Skippable {
magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
magic[0] |= byte(h.SkippableID & 0xf)
dst = append(dst, magic[:]...)
f := h.SkippableSize
return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
}
f := frameHeader{
ContentSize: h.FrameContentSize,
WindowSize: uint32(h.WindowSize),
SingleSegment: h.SingleSegment,
Checksum: h.HasCheckSum,
DictID: h.DictionaryID,
}
return f.appendTo(dst), nil
}

View File

@@ -82,7 +82,7 @@ var (
// can run multiple concurrent stateless decodes. It is even possible to
// use stateless decodes while a stream is being decoded.
//
// The Reset function can be used to initiate a new stream, which is will considerably
// The Reset function can be used to initiate a new stream, which will considerably
// reduce the allocations normally caused by NewReader.
func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
initPredefined()

View File

@@ -1,10 +1,13 @@
package zstd
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"sort"
"github.com/klauspost/compress/huff0"
)
@@ -14,9 +17,8 @@ type dict struct {
litEnc *huff0.Scratch
llDec, ofDec, mlDec sequenceDec
//llEnc, ofEnc, mlEnc []*fseEncoder
offsets [3]int
content []byte
offsets [3]int
content []byte
}
const dictMagic = "\x37\xa4\x30\xec"
@@ -159,3 +161,374 @@ func InspectDictionary(b []byte) (interface {
d, err := loadDict(b)
return d, err
}
type BuildDictOptions struct {
// Dictionary ID.
ID uint32
// Content to use to create dictionary tables.
Contents [][]byte
// History to use for all blocks.
History []byte
// Offsets to use.
Offsets [3]int
// CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier.
// See https://github.com/facebook/zstd/issues/3724
CompatV155 bool
// Use the specified encoder level.
// The dictionary will be built using the specified encoder level,
// which will reflect speed and make the dictionary tailored for that level.
// If not set SpeedBestCompression will be used.
Level EncoderLevel
// DebugOut will write stats and other details here if set.
DebugOut io.Writer
}
func BuildDict(o BuildDictOptions) ([]byte, error) {
initPredefined()
hist := o.History
contents := o.Contents
debug := o.DebugOut != nil
println := func(args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprintln(o.DebugOut, args...)
}
}
printf := func(s string, args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprintf(o.DebugOut, s, args...)
}
}
print := func(args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprint(o.DebugOut, args...)
}
}
if int64(len(hist)) > dictMaxLength {
return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength))
}
if len(hist) < 8 {
return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8)
}
if len(contents) == 0 {
return nil, errors.New("no content provided")
}
d := dict{
id: o.ID,
litEnc: nil,
llDec: sequenceDec{},
ofDec: sequenceDec{},
mlDec: sequenceDec{},
offsets: o.Offsets,
content: hist,
}
block := blockEnc{lowMem: false}
block.init()
enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}})
if o.Level != 0 {
eOpts := encoderOptions{
level: o.Level,
blockSize: maxMatchLen,
windowSize: maxMatchLen,
dict: &d,
lowMem: false,
}
enc = eOpts.encoder()
} else {
o.Level = SpeedBestCompression
}
var (
remain [256]int
ll [256]int
ml [256]int
of [256]int
)
addValues := func(dst *[256]int, src []byte) {
for _, v := range src {
dst[v]++
}
}
addHist := func(dst *[256]int, src *[256]uint32) {
for i, v := range src {
dst[i] += int(v)
}
}
seqs := 0
nUsed := 0
litTotal := 0
newOffsets := make(map[uint32]int, 1000)
for _, b := range contents {
block.reset(nil)
if len(b) < 8 {
continue
}
nUsed++
enc.Reset(&d, true)
enc.Encode(&block, b)
addValues(&remain, block.literals)
litTotal += len(block.literals)
seqs += len(block.sequences)
block.genCodes()
addHist(&ll, block.coders.llEnc.Histogram())
addHist(&ml, block.coders.mlEnc.Histogram())
addHist(&of, block.coders.ofEnc.Histogram())
for i, seq := range block.sequences {
if i > 3 {
break
}
offset := seq.offset
if offset == 0 {
continue
}
if offset > 3 {
newOffsets[offset-3]++
} else {
newOffsets[uint32(o.Offsets[offset-1])]++
}
}
}
// Find most used offsets.
var sortedOffsets []uint32
for k := range newOffsets {
sortedOffsets = append(sortedOffsets, k)
}
sort.Slice(sortedOffsets, func(i, j int) bool {
a, b := sortedOffsets[i], sortedOffsets[j]
if a == b {
// Prefer the longer offset
return sortedOffsets[i] > sortedOffsets[j]
}
return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]]
})
if len(sortedOffsets) > 3 {
if debug {
print("Offsets:")
for i, v := range sortedOffsets {
if i > 20 {
break
}
printf("[%d: %d],", v, newOffsets[v])
}
println("")
}
sortedOffsets = sortedOffsets[:3]
}
for i, v := range sortedOffsets {
o.Offsets[i] = int(v)
}
if debug {
println("New repeat offsets", o.Offsets)
}
if nUsed == 0 || seqs == 0 {
return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs)
}
if debug {
println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal)
}
if seqs/nUsed < 512 {
// Use 512 as minimum.
nUsed = seqs / 512
}
copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
hist := dst.Histogram()
var maxSym uint8
var maxCount int
var fakeLength int
for i, v := range src {
if v > 0 {
v = v / nUsed
if v == 0 {
v = 1
}
}
if v > maxCount {
maxCount = v
}
if v != 0 {
maxSym = uint8(i)
}
fakeLength += v
hist[i] = uint32(v)
}
dst.HistogramFinished(maxSym, maxCount)
dst.reUsed = false
dst.useRLE = false
err := dst.normalizeCount(fakeLength)
if err != nil {
return nil, err
}
if debug {
println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength)
}
return dst.writeCount(nil)
}
if debug {
print("Literal lengths: ")
}
llTable, err := copyHist(block.coders.llEnc, &ll)
if err != nil {
return nil, err
}
if debug {
print("Match lengths: ")
}
mlTable, err := copyHist(block.coders.mlEnc, &ml)
if err != nil {
return nil, err
}
if debug {
print("Offsets: ")
}
ofTable, err := copyHist(block.coders.ofEnc, &of)
if err != nil {
return nil, err
}
// Literal table
avgSize := litTotal
if avgSize > huff0.BlockSizeMax/2 {
avgSize = huff0.BlockSizeMax / 2
}
huffBuff := make([]byte, 0, avgSize)
// Target size
div := litTotal / avgSize
if div < 1 {
div = 1
}
if debug {
println("Huffman weights:")
}
for i, n := range remain[:] {
if n > 0 {
n = n / div
// Allow all entries to be represented.
if n == 0 {
n = 1
}
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
if debug {
printf("[%d: %d], ", i, n)
}
}
}
if o.CompatV155 && remain[255]/div == 0 {
huffBuff = append(huffBuff, 255)
}
scratch := &huff0.Scratch{TableLog: 11}
for tries := 0; tries < 255; tries++ {
scratch = &huff0.Scratch{TableLog: 11}
_, _, err = huff0.Compress1X(huffBuff, scratch)
if err == nil {
break
}
if debug {
printf("Try %d: Huffman error: %v\n", tries+1, err)
}
huffBuff = huffBuff[:0]
if tries == 250 {
if debug {
println("Huffman: Bailing out with predefined table")
}
// Bail out.... Just generate something
huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
for i := 0; i < 128; i++ {
huffBuff = append(huffBuff, byte(i))
}
continue
}
if errors.Is(err, huff0.ErrIncompressible) {
// Try truncating least common.
for i, n := range remain[:] {
if n > 0 {
n = n / (div * (i + 1))
if n > 0 {
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
}
}
}
if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 {
huffBuff = append(huffBuff, 255)
}
if len(huffBuff) == 0 {
huffBuff = append(huffBuff, 0, 255)
}
}
if errors.Is(err, huff0.ErrUseRLE) {
for i, n := range remain[:] {
n = n / (div * (i + 1))
// Allow all entries to be represented.
if n == 0 {
n = 1
}
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
}
}
}
var out bytes.Buffer
out.Write([]byte(dictMagic))
out.Write(binary.LittleEndian.AppendUint32(nil, o.ID))
out.Write(scratch.OutTable)
if debug {
println("huff table:", len(scratch.OutTable), "bytes")
println("of table:", len(ofTable), "bytes")
println("ml table:", len(mlTable), "bytes")
println("ll table:", len(llTable), "bytes")
}
out.Write(ofTable)
out.Write(mlTable)
out.Write(llTable)
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0])))
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1])))
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2])))
out.Write(hist)
if debug {
_, err := loadDict(out.Bytes())
if err != nil {
panic(err)
}
i, err := InspectDictionary(out.Bytes())
if err != nil {
panic(err)
}
println("ID:", i.ID())
println("Content size:", i.ContentSize())
println("Encoder:", i.LitEncoder() != nil)
println("Offsets:", i.Offsets())
var totalSize int
for _, b := range contents {
totalSize += len(b)
}
encWith := func(opts ...EOption) int {
enc, err := NewWriter(nil, opts...)
if err != nil {
panic(err)
}
defer enc.Close()
var dst []byte
var totalSize int
for _, b := range contents {
dst = enc.EncodeAll(b, dst[:0])
totalSize += len(dst)
}
return totalSize
}
plain := encWith(WithEncoderLevel(o.Level))
withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes()))
println("Input size:", totalSize)
println("Plain Compressed:", plain)
println("Dict Compressed:", withDict)
println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)")
}
return out.Bytes(), nil
}

View File

@@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
ofc = ofCode(uint32(m.rep) & 3)
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@@ -135,8 +135,20 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
break
}
// Add block to history
s := e.addBlock(src)
blk.size = len(src)
// Check RLE first
if len(src) > zstdMinMatch {
ml := matchLen(src[1:], src)
if ml == len(src)-1 {
blk.literals = append(blk.literals, src[0])
blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
return
}
}
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
@@ -197,17 +209,10 @@ encodeLoop:
// Set m to a match at offset if it looks like that will improve compression.
improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
delta := s - offset
if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
return
}
if debugAsserts {
if offset <= 0 {
panic(offset)
}
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
// Try to quick reject if we already have a long match.
if m.length > 16 {
left := len(src) - int(m.s+m.length)
@@ -226,8 +231,10 @@ encodeLoop:
}
}
l := 4 + e.matchlen(s+4, offset+4, src)
if rep < 0 {
if m.rep <= 0 {
// Extend candidate match backwards as far as possible.
// Do not extend repeats as we can assume they are optimal
// and offsets change if s == nextEmit.
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
@@ -238,7 +245,14 @@ encodeLoop:
l++
}
}
if debugAsserts {
if offset >= s {
panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
}
if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
cand := match{offset: offset, s: s, length: l, rep: rep}
cand.estBits(bitsPerByte)
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
@@ -281,6 +295,7 @@ encodeLoop:
// Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
index0 := s + 1
// Look far ahead, unless we have a really long match already...
if best.length < goodEnough {
@@ -334,41 +349,45 @@ encodeLoop:
}
if debugAsserts {
if best.offset >= best.s {
panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
}
if best.s < nextEmit {
panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
}
if best.offset < s-e.maxMatchOff {
panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
}
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
}
}
// We have a match, we can store the forward value
s = best.s
if best.rep > 0 {
var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch)
if debugAsserts && s <= nextEmit {
panic("s <= nextEmit")
}
addLiterals(&seq, best.s)
// Repeat. If bit 4 is set, this is a non-lit repeat.
seq.offset = uint32(best.rep & 3)
if debugSequences {
println("repeat sequence", seq, "next s:", s)
println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
}
blk.sequences = append(blk.sequences, seq)
// Index old s + 1 -> s - 1
index0 := s + 1
s = best.s + best.length
nextEmit = s
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
// Index skipped...
end := s
if s > sLimit+4 {
end = sLimit + 4
}
off := index0 + e.cur
for index0 < s {
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -377,6 +396,7 @@ encodeLoop:
off++
index0++
}
switch best.rep {
case 2, 4 | 1:
offset1, offset2 = offset2, offset1
@@ -385,13 +405,17 @@ encodeLoop:
case 4 | 3:
offset1, offset2, offset3 = offset1-1, offset1, offset2
}
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
continue
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
index0 := s + 1
s = best.s
t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2
@@ -418,19 +442,25 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
// Index old s + 1 -> s - 1 or sLimit
end := s
if s > sLimit-4 {
end = sLimit - 4
}
// Index old s + 1 -> s - 1
for index0 < s {
off := index0 + e.cur
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++
off++
}
if s >= sLimit {
break encodeLoop
}
}

View File

@@ -102,9 +102,20 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
e.cur = e.maxMatchOff
break
}
// Add block to history
s := e.addBlock(src)
blk.size = len(src)
// Check RLE first
if len(src) > zstdMinMatch {
ml := matchLen(src[1:], src)
if ml == len(src)-1 {
blk.literals = append(blk.literals, src[0])
blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
return
}
}
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
@@ -145,7 +156,7 @@ encodeLoop:
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
var matched, index0 int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
@@ -162,6 +173,7 @@ encodeLoop:
off := s + e.cur
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
index0 = s + 1
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@@ -258,7 +270,6 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
@@ -498,15 +509,15 @@ encodeLoop:
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
off += 2
}
cv = load6432(src, s)
@@ -672,7 +683,7 @@ encodeLoop:
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
var matched, index0 int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
@@ -691,6 +702,7 @@ encodeLoop:
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
e.markShortShardDirty(nextHashS)
index0 = s + 1
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@@ -726,7 +738,6 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
index0 := s + repOff
s += lenght + repOff
nextEmit = s
@@ -790,7 +801,6 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
@@ -1024,18 +1034,18 @@ encodeLoop:
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
off += 2
}
cv = load6432(src, s)

View File

@@ -227,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error {
DictID: e.o.dict.ID(),
}
dst, err := fh.appendTo(tmp[:0])
if err != nil {
return err
}
dst := fh.appendTo(tmp[:0])
s.headerWritten = true
s.wWg.Wait()
var n2 int
@@ -483,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
Checksum: false,
DictID: 0,
}
dst, _ = fh.appendTo(dst)
dst = fh.appendTo(dst)
// Write raw block as last one only.
var blk blockHeader
@@ -518,10 +515,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
dst = make([]byte, 0, len(src))
}
dst, err := fh.appendTo(dst)
if err != nil {
panic(err)
}
dst = fh.appendTo(dst)
// If we can do everything in one block, prefer that.
if len(src) <= e.o.blockSize {
@@ -581,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
// Add padding with content from crypto/rand.Reader
if e.o.pad > 0 {
add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
var err error
dst, err = skippableFrame(dst, add, rand.Reader)
if err != nil {
panic(err)

View File

@@ -94,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption {
// The value must be a power of two between MinWindowSize and MaxWindowSize.
// A larger value will enable better compression but allocate more memory and,
// for above-default values, take considerably longer.
// The default value is determined by the compression level.
// The default value is determined by the compression level and max 8MB.
func WithWindowSize(n int) EOption {
return func(o *encoderOptions) error {
switch {
@@ -232,9 +232,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
case SpeedDefault:
o.windowSize = 8 << 20
case SpeedBetterCompression:
o.windowSize = 16 << 20
o.windowSize = 8 << 20
case SpeedBestCompression:
o.windowSize = 32 << 20
o.windowSize = 8 << 20
}
}
if !o.customALEntropy {

View File

@@ -22,7 +22,7 @@ type frameHeader struct {
const maxHeaderSize = 14
func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
func (f frameHeader) appendTo(dst []byte) []byte {
dst = append(dst, frameMagic...)
var fhd uint8
if f.Checksum {
@@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
if f.SingleSegment {
dst = append(dst, uint8(f.ContentSize))
}
// Unless SingleSegment is set, framessizes < 256 are nto stored.
// Unless SingleSegment is set, framessizes < 256 are not stored.
case 1:
f.ContentSize -= 256
dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))
@@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
default:
panic("invalid fcs")
}
return dst, nil
return dst
}
const skippableFrameHeader = 4 + 4

View File

@@ -20,10 +20,9 @@ func (s *fseDecoder) buildDtable() error {
if v == -1 {
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
symbolNext[i] = 1
} else {
symbolNext[i] = uint16(v)
v = 1
}
symbolNext[i] = uint16(v)
}
}
@@ -35,10 +34,12 @@ func (s *fseDecoder) buildDtable() error {
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask
for position > highThreshold {
for {
// lowprob area
position = (position + step) & tableMask
if position <= highThreshold {
break
}
}
}
}

View File

@@ -245,7 +245,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
return io.ErrUnexpectedEOF
}
var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
@@ -452,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol)
// extra bits are stored in reverse order.
br.fill()
if s.maxBits <= 32 {
mo += br.getBits(moB)
ml += br.getBits(mlB)
ll += br.getBits(llB)
} else {
mo += br.getBits(moB)
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fill()
// matchlength+literal length, max 32 bits
ml += br.getBits(mlB)
ll += br.getBits(llB)
}
// matchlength+literal length, max 32 bits
ml += br.getBits(mlB)
ll += br.getBits(llB)
mo = s.adjustOffset(mo, ll, moB)
return
}

View File

@@ -5,11 +5,11 @@
// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_amd64(SB), $8-32
MOVQ br+8(FP), AX
MOVQ 32(AX), DX
MOVBQZX 40(AX), BX
MOVQ 24(AX), SI
MOVQ (AX), AX
MOVQ br+8(FP), CX
MOVQ 24(CX), DX
MOVBQZX 32(CX), BX
MOVQ (CX), AX
MOVQ 8(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@@ -157,8 +157,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Literal Length State
MOVBQZX DI, R14
SHRQ $0x10, DI
MOVWQZX DI, DI
SHRL $0x10, DI
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -177,8 +176,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Match Length State
MOVBQZX R8, R14
SHRQ $0x10, R8
MOVWQZX R8, R8
SHRL $0x10, R8
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -197,8 +195,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Offset State
MOVBQZX R9, R14
SHRQ $0x10, R9
MOVWQZX R9, R9
SHRL $0x10, R9
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -301,9 +298,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok:
MOVQ R12, 152(AX)
MOVQ R13, 160(AX)
MOVQ br+8(FP), AX
MOVQ DX, 32(AX)
MOVB BL, 40(AX)
MOVQ SI, 24(AX)
MOVQ DX, 24(AX)
MOVB BL, 32(AX)
MOVQ SI, 8(AX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@@ -336,11 +333,11 @@ error_overread:
// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
MOVQ br+8(FP), AX
MOVQ 32(AX), DX
MOVBQZX 40(AX), BX
MOVQ 24(AX), SI
MOVQ (AX), AX
MOVQ br+8(FP), CX
MOVQ 24(CX), DX
MOVBQZX 32(CX), BX
MOVQ (CX), AX
MOVQ 8(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@@ -459,8 +456,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Literal Length State
MOVBQZX DI, R14
SHRQ $0x10, DI
MOVWQZX DI, DI
SHRL $0x10, DI
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -479,8 +475,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Match Length State
MOVBQZX R8, R14
SHRQ $0x10, R8
MOVWQZX R8, R8
SHRL $0x10, R8
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -499,8 +494,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Offset State
MOVBQZX R9, R14
SHRQ $0x10, R9
MOVWQZX R9, R9
SHRL $0x10, R9
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@@ -603,9 +597,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok:
MOVQ R12, 152(AX)
MOVQ R13, 160(AX)
MOVQ br+8(FP), AX
MOVQ DX, 32(AX)
MOVB BL, 40(AX)
MOVQ SI, 24(AX)
MOVQ DX, 24(AX)
MOVB BL, 32(AX)
MOVQ SI, 8(AX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@@ -638,11 +632,11 @@ error_overread:
// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
MOVQ br+8(FP), CX
MOVQ 32(CX), AX
MOVBQZX 40(CX), DX
MOVQ 24(CX), BX
MOVQ (CX), CX
MOVQ br+8(FP), BX
MOVQ 24(BX), AX
MOVBQZX 32(BX), DX
MOVQ (BX), CX
MOVQ 8(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@@ -772,11 +766,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
BZHIQ R14, R15, R15
// Update Offset State
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, R8, R8
ADDQ CX, R8
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
SHRL $0x10, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@@ -784,11 +777,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8
// Update Match Length State
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, DI, DI
ADDQ CX, DI
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
SHRL $0x10, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@@ -796,10 +788,9 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI
// Update Literal Length State
BZHIQ SI, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI
BZHIQ SI, R15, CX
SHRL $0x10, SI
ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@@ -892,9 +883,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok:
MOVQ R11, 152(CX)
MOVQ R12, 160(CX)
MOVQ br+8(FP), CX
MOVQ AX, 32(CX)
MOVB DL, 40(CX)
MOVQ BX, 24(CX)
MOVQ AX, 24(CX)
MOVB DL, 32(CX)
MOVQ BX, 8(CX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@@ -927,11 +918,11 @@ error_overread:
// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
MOVQ br+8(FP), CX
MOVQ 32(CX), AX
MOVBQZX 40(CX), DX
MOVQ 24(CX), BX
MOVQ (CX), CX
MOVQ br+8(FP), BX
MOVQ 24(BX), AX
MOVBQZX 32(BX), DX
MOVQ (BX), CX
MOVQ 8(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@@ -1032,11 +1023,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
BZHIQ R14, R15, R15
// Update Offset State
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, R8, R8
ADDQ CX, R8
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
SHRL $0x10, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@@ -1044,11 +1034,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ (CX)(R8*8), R8
// Update Match Length State
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, DI, DI
ADDQ CX, DI
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
SHRL $0x10, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@@ -1056,10 +1045,9 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ (CX)(DI*8), DI
// Update Literal Length State
BZHIQ SI, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI
BZHIQ SI, R15, CX
SHRL $0x10, SI
ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@@ -1152,9 +1140,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
MOVQ R11, 152(CX)
MOVQ R12, 160(CX)
MOVQ br+8(FP), CX
MOVQ AX, 32(CX)
MOVB DL, 40(CX)
MOVQ BX, 24(CX)
MOVQ AX, 24(CX)
MOVB DL, 32(CX)
MOVQ BX, 8(CX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@@ -1797,11 +1785,11 @@ empty_seqs:
// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: CMOV, SSE
TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
MOVQ br+8(FP), AX
MOVQ 32(AX), DX
MOVBQZX 40(AX), BX
MOVQ 24(AX), SI
MOVQ (AX), AX
MOVQ br+8(FP), CX
MOVQ 24(CX), DX
MOVBQZX 32(CX), BX
MOVQ (CX), AX
MOVQ 8(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@@ -1967,8 +1955,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Literal Length State
MOVBQZX DI, R13
SHRQ $0x10, DI
MOVWQZX DI, DI
SHRL $0x10, DI
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -1987,8 +1974,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Match Length State
MOVBQZX R8, R13
SHRQ $0x10, R8
MOVWQZX R8, R8
SHRL $0x10, R8
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -2007,8 +1993,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Offset State
MOVBQZX R9, R13
SHRQ $0x10, R9
MOVWQZX R9, R9
SHRL $0x10, R9
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -2295,9 +2280,9 @@ handle_loop:
loop_finished:
MOVQ br+8(FP), AX
MOVQ DX, 32(AX)
MOVB BL, 40(AX)
MOVQ SI, 24(AX)
MOVQ DX, 24(AX)
MOVB BL, 32(AX)
MOVQ SI, 8(AX)
// Update the context
MOVQ ctx+16(FP), AX
@@ -2362,11 +2347,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: BMI, BMI2, CMOV, SSE
TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
MOVQ br+8(FP), CX
MOVQ 32(CX), AX
MOVBQZX 40(CX), DX
MOVQ 24(CX), BX
MOVQ (CX), CX
MOVQ br+8(FP), BX
MOVQ 24(BX), AX
MOVBQZX 32(BX), DX
MOVQ (BX), CX
MOVQ 8(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@@ -2514,11 +2499,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
BZHIQ R13, R14, R14
// Update Offset State
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, R8, R8
ADDQ CX, R8
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
SHRL $0x10, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@@ -2526,11 +2510,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8
// Update Match Length State
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, DI, DI
ADDQ CX, DI
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
SHRL $0x10, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@@ -2538,10 +2521,9 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI
// Update Literal Length State
BZHIQ SI, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI
BZHIQ SI, R14, CX
SHRL $0x10, SI
ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@@ -2818,9 +2800,9 @@ handle_loop:
loop_finished:
MOVQ br+8(FP), CX
MOVQ AX, 32(CX)
MOVB DL, 40(CX)
MOVQ BX, 24(CX)
MOVQ AX, 24(CX)
MOVB DL, 32(CX)
MOVQ BX, 8(CX)
// Update the context
MOVQ ctx+16(FP), AX
@@ -2885,11 +2867,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: CMOV, SSE
TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
MOVQ br+8(FP), AX
MOVQ 32(AX), DX
MOVBQZX 40(AX), BX
MOVQ 24(AX), SI
MOVQ (AX), AX
MOVQ br+8(FP), CX
MOVQ 24(CX), DX
MOVBQZX 32(CX), BX
MOVQ (CX), AX
MOVQ 8(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@@ -3055,8 +3037,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Literal Length State
MOVBQZX DI, R13
SHRQ $0x10, DI
MOVWQZX DI, DI
SHRL $0x10, DI
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -3075,8 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Match Length State
MOVBQZX R8, R13
SHRQ $0x10, R8
MOVWQZX R8, R8
SHRL $0x10, R8
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -3095,8 +3075,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Offset State
MOVBQZX R9, R13
SHRQ $0x10, R9
MOVWQZX R9, R9
SHRL $0x10, R9
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@@ -3485,9 +3464,9 @@ handle_loop:
loop_finished:
MOVQ br+8(FP), AX
MOVQ DX, 32(AX)
MOVB BL, 40(AX)
MOVQ SI, 24(AX)
MOVQ DX, 24(AX)
MOVB BL, 32(AX)
MOVQ SI, 8(AX)
// Update the context
MOVQ ctx+16(FP), AX
@@ -3552,11 +3531,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: BMI, BMI2, CMOV, SSE
TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
MOVQ br+8(FP), CX
MOVQ 32(CX), AX
MOVBQZX 40(CX), DX
MOVQ 24(CX), BX
MOVQ (CX), CX
MOVQ br+8(FP), BX
MOVQ 24(BX), AX
MOVBQZX 32(BX), DX
MOVQ (BX), CX
MOVQ 8(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@@ -3704,11 +3683,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
BZHIQ R13, R14, R14
// Update Offset State
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, R8, R8
ADDQ CX, R8
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
SHRL $0x10, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@@ -3716,11 +3694,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8
// Update Match Length State
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, DI, DI
ADDQ CX, DI
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
SHRL $0x10, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@@ -3728,10 +3705,9 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI
// Update Literal Length State
BZHIQ SI, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI
BZHIQ SI, R14, CX
SHRL $0x10, SI
ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@@ -4110,9 +4086,9 @@ handle_loop:
loop_finished:
MOVQ br+8(FP), CX
MOVQ AX, 32(CX)
MOVB DL, 40(CX)
MOVQ BX, 24(CX)
MOVQ AX, 24(CX)
MOVB DL, 32(CX)
MOVQ BX, 8(CX)
// Update the context
MOVQ ctx+16(FP), AX

View File

@@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
}
for i := range seqs {
var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)

View File

@@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
var written int64
var readHeader bool
{
var header []byte
var n int
header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
var n int
n, r.err = w.Write(header)
if r.err != nil {
return written, r.err