diff --git a/bench_test.go b/bench_test.go index 6111baf9..eadfcdf3 100644 --- a/bench_test.go +++ b/bench_test.go @@ -63,7 +63,7 @@ func BenchmarkUncompress(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _, _ = lz4block.UncompressBlock(pg1661LZ4, buf) + _, _ = lz4block.UncompressBlock(pg1661LZ4, buf, nil) } } diff --git a/fuzz/lz4.go b/fuzz/lz4.go index 13a107ba..f199b1e5 100644 --- a/fuzz/lz4.go +++ b/fuzz/lz4.go @@ -81,7 +81,7 @@ func FuzzUncompressBlock(data []byte) int { } decomp = decomp[:len(data)] - n, err := lz4.UncompressBlock(data, decomp) + n, err := lz4.UncompressBlock(data, decomp, nil) if n > len(decomp) { panic("uncompressed length greater than buffer") } diff --git a/internal/lz4block/block.go b/internal/lz4block/block.go index 8b971da5..88319105 100644 --- a/internal/lz4block/block.go +++ b/internal/lz4block/block.go @@ -41,11 +41,11 @@ func CompressBlockBound(n int) int { return n + n/255 + 16 } -func UncompressBlock(src, dst []byte) (int, error) { +func UncompressBlock(src, dst, dict []byte) (int, error) { if len(src) == 0 { return 0, nil } - if di := decodeBlock(dst, src); di >= 0 { + if di := decodeBlock(dst, src, dict); di >= 0 { return di, nil } return 0, lz4errors.ErrInvalidSourceShortBuffer diff --git a/internal/lz4block/block_test.go b/internal/lz4block/block_test.go index bd068651..9a9b5d8b 100644 --- a/internal/lz4block/block_test.go +++ b/internal/lz4block/block_test.go @@ -59,7 +59,7 @@ func TestCompressUncompressBlock(t *testing.T) { // Uncompress the data. buf := make([]byte, len(src)) - n, err = lz4block.UncompressBlock(zbuf, buf) + n, err = lz4block.UncompressBlock(zbuf, buf, nil) if err != nil { t.Fatal(err) } else if n < 0 || n > len(buf) { diff --git a/internal/lz4block/decode_amd64.s b/internal/lz4block/decode_amd64.s index dd323300..dfcca572 100644 --- a/internal/lz4block/decode_amd64.s +++ b/internal/lz4block/decode_amd64.s @@ -16,9 +16,11 @@ // R11 &dst // R12 short output end // R13 short input end +// R14 &dict +// R15 &dict + len(dict) -// func decodeBlock(dst, src []byte) int -TEXT ·decodeBlock(SB), NOSPLIT, $48-56 +// func decodeBlock(dst, src, dict []byte) int +TEXT ·decodeBlock(SB), NOSPLIT, $48-80 MOVQ dst_base+0(FP), DI MOVQ DI, R11 MOVQ dst_len+8(FP), R8 @@ -30,6 +32,10 @@ TEXT ·decodeBlock(SB), NOSPLIT, $48-56 JE err_corrupt ADDQ SI, R9 + MOVQ dict_base+48(FP), R14 + MOVQ dict_len+56(FP), R15 + ADDQ R14, R15 + // shortcut ends // short output end MOVQ R8, R12 @@ -96,6 +102,8 @@ loop: // match length, we already have the offset. CMPQ CX, $0xF JEQ match_len_loop_pre + CMPQ DX, R11 + JLT match_len_loop_pre CMPQ DX, $8 JLT match_len_loop_pre CMPQ AX, R11 @@ -280,7 +288,7 @@ copy_match: // check BX is within dst // if BX < &dst CMPQ BX, R11 - JLT err_short_buf + JLT copy_match_from_dict // if offset + match_len < di LEAQ (BX)(CX*1), AX @@ -327,6 +335,81 @@ copy_interior_match: ADDQ CX, DI JMP loop +copy_match_from_dict: + // CX = match_len + // BX = &dst + (di - offset) + + // AX = offset - di = dict_bytes_available => count of bytes potentially covered by the dictionary + MOVQ R11, AX + SUBQ BX, AX + + // BX = &dict_end - dict_bytes_available + MOVQ R15, BX + SUBQ AX, BX + + // check BX is within dict + // if BX < &dict + CMPQ BX, R14 + JLT err_short_dict + + // if match_len > dict_bytes_available, match fits entirely within external dictionary : just copy + CMPQ CX, AX + JLT memmove_match + + // The match stretches over the dictionary and our block + // 1) copy what comes from the dictionary + // AX = dict_bytes_available = copy_size + // BX = &dict_end - copy_size + // CX = match_len + + // memmove(to, from, len) + MOVQ DI, 0(SP) + MOVQ BX, 8(SP) + MOVQ AX, 16(SP) + // store extra stuff we want to recover + // spill + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) + CALL runtime·memmove(SB) + + // restore registers + MOVQ 16(SP), AX // copy_size + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX // match_len + + // recalc initial values + MOVQ dst_base+0(FP), R8 + MOVQ R8, R11 // TODO: make these sensible numbers + ADDQ dst_len+8(FP), R8 + MOVQ src_base+24(FP), R9 + ADDQ src_len+32(FP), R9 + MOVQ dict_base+48(FP), R14 + MOVQ dict_len+56(FP), R15 + ADDQ R14, R15 + MOVQ R8, R12 + SUBQ $32, R12 + MOVQ R9, R13 + SUBQ $16, R13 + + // di+=copy_size + ADDQ AX, DI + + // 2) copy the rest from the current block + // CX = match_len - copy_size = rest_size + SUBQ AX, CX + MOVQ R11, BX + + // check if we have a copy overlap + // AX = &dst + rest_size + MOVQ CX, AX + ADDQ BX, AX + // if &dst + rest_size > di, copy byte by byte + CMPQ AX, DI + + JGT copy_match_loop + memmove_match: // memmove(to, from, len) MOVQ DI, 0(SP) @@ -354,18 +437,25 @@ memmove_match: SUBQ $32, R12 MOVQ R9, R13 SUBQ $16, R13 + MOVQ dict_base+48(FP), R14 + MOVQ dict_len+56(FP), R15 + ADDQ R14, R15 JMP loop err_corrupt: - MOVQ $-1, ret+48(FP) + MOVQ $-1, ret+72(FP) RET err_short_buf: - MOVQ $-2, ret+48(FP) + MOVQ $-2, ret+72(FP) + RET + +err_short_dict: + MOVQ $-3, ret+72(FP) RET end: SUBQ R11, DI - MOVQ DI, ret+48(FP) + MOVQ DI, ret+72(FP) RET diff --git a/internal/lz4block/decode_arm.s b/internal/lz4block/decode_arm.s index 64be9adc..defb00c7 100644 --- a/internal/lz4block/decode_arm.s +++ b/internal/lz4block/decode_arm.s @@ -19,12 +19,12 @@ #define minMatch $4 -// func decodeBlock(dst, src []byte) int -TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-28 - MOVW dst_base +0(FP), dst - MOVW dst_len +4(FP), dstend - MOVW src_base+12(FP), src - MOVW src_len +16(FP), srcend +// func decodeBlock(dst, src, dict []byte) int +TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-40 + MOVW dst_base +0(FP), dst + MOVW dst_len +4(FP), dstend + MOVW src_base +12(FP), src + MOVW src_len +16(FP), srcend CMP $0, srcend BEQ shortSrc @@ -183,7 +183,7 @@ copyMatchDone: end: SUB dstorig, dst, tmp1 - MOVW tmp1, ret+24(FP) + MOVW tmp1, ret+36(FP) RET // The three error cases have distinct labels so we can put different @@ -193,5 +193,5 @@ shortDst: shortSrc: corrupt: MOVW $-1, tmp1 - MOVW tmp1, ret+24(FP) + MOVW tmp1, ret+36(FP) RET diff --git a/internal/lz4block/decode_asm.go b/internal/lz4block/decode_asm.go index e26f8cd6..ca4b11aa 100644 --- a/internal/lz4block/decode_asm.go +++ b/internal/lz4block/decode_asm.go @@ -6,4 +6,4 @@ package lz4block //go:noescape -func decodeBlock(dst, src []byte) int +func decodeBlock(dst, src, dict []byte) int diff --git a/internal/lz4block/decode_other.go b/internal/lz4block/decode_other.go index 52df2f2b..7ab0cd6e 100644 --- a/internal/lz4block/decode_other.go +++ b/internal/lz4block/decode_other.go @@ -2,12 +2,15 @@ package lz4block -import "encoding/binary" +import ( + "encoding/binary" +) -func decodeBlock(dst, src []byte) (ret int) { +func decodeBlock(dst, src, dict []byte) (ret int) { // Restrict capacities so we don't read or write out of bounds. dst = dst[:len(dst):len(dst)] src = src[:len(src):len(src)] + dictLen := uint(len(dict)) const hasError = -2 defer func() { @@ -38,7 +41,7 @@ func decodeBlock(dst, src []byte) (ret int) { // if the match length (4..18) fits within the literals, then copy // all 18 bytes, even if not all are part of the literals. mLen += 4 - if offset := u16(src[si:]); mLen <= offset { + if offset := u16(src[si:]); mLen <= offset && offset < di { i := di - offset end := i + 18 if end > uint(len(dst)) { @@ -91,6 +94,38 @@ func decodeBlock(dst, src []byte) (ret int) { mLen += minMatch // Copy the match. + if di < offset { + // The match is beyond our block, meaning in the dictionary + if offset-di > mLen { + // The match is entirely contained in the dictionary. Just copy! + copy(dst[di:di+mLen], dict[dictLen+di-offset:dictLen+di-offset+mLen]) + di = di + mLen + } else { + // The match stretches over the dictionary and our block + copySize := offset - di + restSize := mLen - copySize + + copy(dst[di:di+copySize], dict[dictLen-copySize:]) + di = di + copySize + + if di < restSize { + // Overlap - we want to copy more than what we have available, + // so copy byte per byte. + copyFrom := 0 + endOfMatch := di + restSize + for di < endOfMatch { + dst[di] = dst[copyFrom] + di = di + 1 + copyFrom = copyFrom + 1 + } + } else { + copy(dst[di:di+restSize], dst[0:restSize]) + di = di + restSize + } + } + continue + } + expanded := dst[di-offset:] if mLen > offset { // Efficiently copy the match dst[di-offset:di] into the dst slice. diff --git a/internal/lz4block/decode_test.go b/internal/lz4block/decode_test.go index 017ee03f..6ba9ee6f 100644 --- a/internal/lz4block/decode_test.go +++ b/internal/lz4block/decode_test.go @@ -125,7 +125,7 @@ func TestBlockDecode(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { buf := make([]byte, len(test.exp)) - n := decodeBlock(buf, test.src) + n := decodeBlock(buf, test.src, nil) if n < 0 { t.Log(n) } @@ -167,7 +167,7 @@ func TestDecodeBlockInvalid(t *testing.T) { } dst = dst[:test.size] - r := decodeBlock(dst, []byte(test.src)) + r := decodeBlock(dst, []byte(test.src), nil) if r >= 0 { t.Errorf("no error for %s", test.name) } diff --git a/internal/lz4stream/block.go b/internal/lz4stream/block.go index c7b929fd..446c74a4 100644 --- a/internal/lz4stream/block.go +++ b/internal/lz4stream/block.go @@ -127,7 +127,7 @@ func (b *Blocks) initR(f *Frame, num int, src io.Reader) (chan []byte, error) { blocks <- c go func() { defer block.Close(f) - data, err := block.Uncompress(f, size.Get(), false) + data, err := block.Uncompress(f, size.Get(), nil, false) if err != nil { b.closeR(err) } else { @@ -303,12 +303,12 @@ func (b *FrameDataBlock) Read(f *Frame, src io.Reader, cum uint32) (uint32, erro return x, nil } -func (b *FrameDataBlock) Uncompress(f *Frame, dst []byte, sum bool) ([]byte, error) { +func (b *FrameDataBlock) Uncompress(f *Frame, dst, dict []byte, sum bool) ([]byte, error) { if b.Size.Uncompressed() { n := copy(dst, b.data) dst = dst[:n] } else { - n, err := lz4block.UncompressBlock(b.data, dst) + n, err := lz4block.UncompressBlock(b.data, dst, dict) if err != nil { return nil, err } diff --git a/internal/lz4stream/frame.go b/internal/lz4stream/frame.go index cfbd5674..18192a94 100644 --- a/internal/lz4stream/frame.go +++ b/internal/lz4stream/frame.go @@ -77,16 +77,16 @@ func (f *Frame) isLegacy() bool { return f.Magic == frameMagicLegacy } -func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) { +func (f *Frame) ParseHeaders(src io.Reader) error { if f.Magic > 0 { // Header already read. - return nil, nil + return nil } newFrame: var err error if f.Magic, err = f.readUint32(src); err != nil { - return nil, err + return err } switch m := f.Magic; { case m == frameMagic || m == frameMagicLegacy: @@ -94,19 +94,23 @@ newFrame: case m>>8 == frameSkipMagic>>8: skip, err := f.readUint32(src) if err != nil { - return nil, err + return err } if _, err := io.CopyN(ioutil.Discard, src, int64(skip)); err != nil { - return nil, err + return err } goto newFrame default: - return nil, lz4errors.ErrInvalidFrame + return lz4errors.ErrInvalidFrame } if err := f.Descriptor.initR(f, src); err != nil { - return nil, err + return err } f.checksum.Reset() + return nil +} + +func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) { return f.Blocks.initR(f, num, src) } diff --git a/internal/lz4stream/frame_test.go b/internal/lz4stream/frame_test.go index 907a8071..1f850c07 100644 --- a/internal/lz4stream/frame_test.go +++ b/internal/lz4stream/frame_test.go @@ -99,7 +99,7 @@ func TestFrameDataBlock(t *testing.T) { t.Fatal(err) } buf := make([]byte, size) - buf, err := block.Uncompress(f, buf, false) + buf, err := block.Uncompress(f, buf, nil, false) if err != nil { t.Fatal(err) } diff --git a/lz4.go b/lz4.go index c585d406..a62022e0 100644 --- a/lz4.go +++ b/lz4.go @@ -35,7 +35,17 @@ func CompressBlockBound(n int) int { // // An error is returned if the source data is invalid or the destination buffer is too small. func UncompressBlock(src, dst []byte) (int, error) { - return lz4block.UncompressBlock(src, dst) + return lz4block.UncompressBlock(src, dst, nil) +} + +// UncompressBlockWithDict uncompresses the source buffer into the destination one using a +// dictionary, and returns the uncompressed size. +// +// The destination buffer must be sized appropriately. +// +// An error is returned if the source data is invalid or the destination buffer is too small. +func UncompressBlockWithDict(src, dst, dict []byte) (int, error) { + return lz4block.UncompressBlock(src, dst, dict) } // A Compressor compresses data into the LZ4 block format. diff --git a/options.go b/options.go index 4e1b6703..46a87380 100644 --- a/options.go +++ b/options.go @@ -2,10 +2,11 @@ package lz4 import ( "fmt" - "github.com/pierrec/lz4/v4/internal/lz4block" - "github.com/pierrec/lz4/v4/internal/lz4errors" "reflect" "runtime" + + "github.com/pierrec/lz4/v4/internal/lz4block" + "github.com/pierrec/lz4/v4/internal/lz4errors" ) //go:generate go run golang.org/x/tools/cmd/stringer -type=BlockSize,CompressionLevel -output options_gen.go diff --git a/reader.go b/reader.go index 403aaf69..f8458807 100644 --- a/reader.go +++ b/reader.go @@ -40,6 +40,7 @@ type Reader struct { idx int // size of pending data handler func(int) cum uint32 + dict []byte } func (*Reader) private() {} @@ -77,6 +78,15 @@ func (r *Reader) isNotConcurrent() bool { } func (r *Reader) init() error { + err := r.frame.ParseHeaders(r.src) + if err != nil { + return err + } + if !r.frame.Descriptor.Flags.BlockIndependence() { + // We can't decompress dependent blocks concurrently. + // Instead of throwing an error to the user, silently drop concurrency + r.num = 1 + } data, err := r.frame.InitR(r.src, r.num) if err != nil { return err @@ -162,10 +172,20 @@ func (r *Reader) read(buf []byte) (int, error) { direct = true dst = buf } - dst, err = block.Uncompress(r.frame, dst, true) + dst, err = block.Uncompress(r.frame, dst, r.dict, true) if err != nil { return 0, err } + if !r.frame.Descriptor.Flags.BlockIndependence() { + if len(r.dict)+len(dst) > 128*1024 { + preserveSize := 64*1024 - len(dst) + if preserveSize < 0 { + preserveSize = 0 + } + r.dict = r.dict[len(r.dict)-preserveSize:] + } + r.dict = append(r.dict, dst...) + } r.cum += uint32(len(dst)) if direct { return len(dst), nil diff --git a/reader_test.go b/reader_test.go index 7c5beebc..ab3c2bc1 100644 --- a/reader_test.go +++ b/reader_test.go @@ -40,6 +40,10 @@ func TestReader(t *testing.T) { name: "testdata/Mark.Twain-Tom.Sawyer_long.txt.lz4", isText: true, }, + { + name: "testdata/Mark.Twain-Tom.Sawyer_linked.txt.lz4", + isText: true, + }, { name: "testdata/pg1661.txt.lz4", isText: false, diff --git a/testdata/Mark.Twain-Tom.Sawyer_linked.txt b/testdata/Mark.Twain-Tom.Sawyer_linked.txt new file mode 120000 index 00000000..40907708 --- /dev/null +++ b/testdata/Mark.Twain-Tom.Sawyer_linked.txt @@ -0,0 +1 @@ +Mark.Twain-Tom.Sawyer_long.txt \ No newline at end of file diff --git a/testdata/Mark.Twain-Tom.Sawyer_linked.txt.lz4 b/testdata/Mark.Twain-Tom.Sawyer_linked.txt.lz4 new file mode 100644 index 00000000..f78f77dd Binary files /dev/null and b/testdata/Mark.Twain-Tom.Sawyer_linked.txt.lz4 differ diff --git a/writer_test.go b/writer_test.go index e43b0fcb..a945f84a 100644 --- a/writer_test.go +++ b/writer_test.go @@ -175,7 +175,7 @@ func TestIssue51(t *testing.T) { zbuf = zbuf[:n] buf := make([]byte, 8192) - n, err = lz4block.UncompressBlock(zbuf, buf) + n, err = lz4block.UncompressBlock(zbuf, buf, nil) if err != nil { t.Fatal(err) }