Skip to content

Commit

Permalink
implement linked-block decompression
Browse files Browse the repository at this point in the history
  • Loading branch information
maxlaverse committed Apr 3, 2021
1 parent 284f056 commit 8fa51c0
Show file tree
Hide file tree
Showing 19 changed files with 206 additions and 41 deletions.
2 changes: 1 addition & 1 deletion bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func BenchmarkUncompress(b *testing.B) {
b.ResetTimer()

for i := 0; i < b.N; i++ {
_, _ = lz4block.UncompressBlock(pg1661LZ4, buf)
_, _ = lz4block.UncompressBlock(pg1661LZ4, buf, nil)
}
}

Expand Down
2 changes: 1 addition & 1 deletion fuzz/lz4.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func FuzzUncompressBlock(data []byte) int {
}
decomp = decomp[:len(data)]

n, err := lz4.UncompressBlock(data, decomp)
n, err := lz4.UncompressBlock(data, decomp, nil)
if n > len(decomp) {
panic("uncompressed length greater than buffer")
}
Expand Down
4 changes: 2 additions & 2 deletions internal/lz4block/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ func CompressBlockBound(n int) int {
return n + n/255 + 16
}

func UncompressBlock(src, dst []byte) (int, error) {
func UncompressBlock(src, dst, dict []byte) (int, error) {
if len(src) == 0 {
return 0, nil
}
if di := decodeBlock(dst, src); di >= 0 {
if di := decodeBlock(dst, src, dict); di >= 0 {
return di, nil
}
return 0, lz4errors.ErrInvalidSourceShortBuffer
Expand Down
2 changes: 1 addition & 1 deletion internal/lz4block/block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func TestCompressUncompressBlock(t *testing.T) {

// Uncompress the data.
buf := make([]byte, len(src))
n, err = lz4block.UncompressBlock(zbuf, buf)
n, err = lz4block.UncompressBlock(zbuf, buf, nil)
if err != nil {
t.Fatal(err)
} else if n < 0 || n > len(buf) {
Expand Down
102 changes: 96 additions & 6 deletions internal/lz4block/decode_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
// R11 &dst
// R12 short output end
// R13 short input end
// R14 &dict
// R15 &dict + len(dict)

// func decodeBlock(dst, src []byte) int
TEXT ·decodeBlock(SB), NOSPLIT, $48-56
// func decodeBlock(dst, src, dict []byte) int
TEXT ·decodeBlock(SB), NOSPLIT, $48-80
MOVQ dst_base+0(FP), DI
MOVQ DI, R11
MOVQ dst_len+8(FP), R8
Expand All @@ -30,6 +32,10 @@ TEXT ·decodeBlock(SB), NOSPLIT, $48-56
JE err_corrupt
ADDQ SI, R9

MOVQ dict_base+48(FP), R14
MOVQ dict_len+56(FP), R15
ADDQ R14, R15

// shortcut ends
// short output end
MOVQ R8, R12
Expand Down Expand Up @@ -96,6 +102,8 @@ loop:
// match length, we already have the offset.
CMPQ CX, $0xF
JEQ match_len_loop_pre
CMPQ DX, R11
JLT match_len_loop_pre
CMPQ DX, $8
JLT match_len_loop_pre
CMPQ AX, R11
Expand Down Expand Up @@ -280,7 +288,7 @@ copy_match:
// check BX is within dst
// if BX < &dst
CMPQ BX, R11
JLT err_short_buf
JLT copy_match_from_dict

// if offset + match_len < di
LEAQ (BX)(CX*1), AX
Expand Down Expand Up @@ -327,6 +335,81 @@ copy_interior_match:
ADDQ CX, DI
JMP loop

copy_match_from_dict:
// CX = match_len
// BX = &dst + (di - offset)

// AX = offset - di = dict_bytes_available => count of bytes potentially covered by the dictionary
MOVQ R11, AX
SUBQ BX, AX

// BX = &dict_end - dict_bytes_available
MOVQ R15, BX
SUBQ AX, BX

// check BX is within dict
// if BX < &dict
CMPQ BX, R14
JLT err_short_dict

// if match_len > dict_bytes_available, match fits entirely within external dictionary : just copy
CMPQ CX, AX
JLT memmove_match

// The match stretches over the dictionary and our block
// 1) copy what comes from the dictionary
// AX = dict_bytes_available = copy_size
// BX = &dict_end - copy_size
// CX = match_len

// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ BX, 8(SP)
MOVQ AX, 16(SP)
// store extra stuff we want to recover
// spill
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP)
CALL runtime·memmove(SB)

// restore registers
MOVQ 16(SP), AX // copy_size
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX // match_len

// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11 // TODO: make these sensible numbers
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ dict_base+48(FP), R14
MOVQ dict_len+56(FP), R15
ADDQ R14, R15
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13

// di+=copy_size
ADDQ AX, DI

// 2) copy the rest from the current block
// CX = match_len - copy_size = rest_size
SUBQ AX, CX
MOVQ R11, BX

// check if we have a copy overlap
// AX = &dst + rest_size
MOVQ CX, AX
ADDQ BX, AX
// if &dst + rest_size > di, copy byte by byte
CMPQ AX, DI

JGT copy_match_loop

memmove_match:
// memmove(to, from, len)
MOVQ DI, 0(SP)
Expand Down Expand Up @@ -354,18 +437,25 @@ memmove_match:
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
MOVQ dict_base+48(FP), R14
MOVQ dict_len+56(FP), R15
ADDQ R14, R15

JMP loop

err_corrupt:
MOVQ $-1, ret+48(FP)
MOVQ $-1, ret+72(FP)
RET

err_short_buf:
MOVQ $-2, ret+48(FP)
MOVQ $-2, ret+72(FP)
RET

err_short_dict:
MOVQ $-3, ret+72(FP)
RET

end:
SUBQ R11, DI
MOVQ DI, ret+48(FP)
MOVQ DI, ret+72(FP)
RET
16 changes: 8 additions & 8 deletions internal/lz4block/decode_arm.s
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@

#define minMatch $4

// func decodeBlock(dst, src []byte) int
TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-28
MOVW dst_base +0(FP), dst
MOVW dst_len +4(FP), dstend
MOVW src_base+12(FP), src
MOVW src_len +16(FP), srcend
// func decodeBlock(dst, src, dict []byte) int
TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-40
MOVW dst_base +0(FP), dst
MOVW dst_len +4(FP), dstend
MOVW src_base +12(FP), src
MOVW src_len +16(FP), srcend

CMP $0, srcend
BEQ shortSrc
Expand Down Expand Up @@ -183,7 +183,7 @@ copyMatchDone:

end:
SUB dstorig, dst, tmp1
MOVW tmp1, ret+24(FP)
MOVW tmp1, ret+36(FP)
RET

// The three error cases have distinct labels so we can put different
Expand All @@ -193,5 +193,5 @@ shortDst:
shortSrc:
corrupt:
MOVW $-1, tmp1
MOVW tmp1, ret+24(FP)
MOVW tmp1, ret+36(FP)
RET
2 changes: 1 addition & 1 deletion internal/lz4block/decode_asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
package lz4block

//go:noescape
func decodeBlock(dst, src []byte) int
func decodeBlock(dst, src, dict []byte) int
41 changes: 38 additions & 3 deletions internal/lz4block/decode_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@

package lz4block

import "encoding/binary"
import (
"encoding/binary"
)

func decodeBlock(dst, src []byte) (ret int) {
func decodeBlock(dst, src, dict []byte) (ret int) {
// Restrict capacities so we don't read or write out of bounds.
dst = dst[:len(dst):len(dst)]
src = src[:len(src):len(src)]
dictLen := uint(len(dict))

const hasError = -2
defer func() {
Expand Down Expand Up @@ -38,7 +41,7 @@ func decodeBlock(dst, src []byte) (ret int) {
// if the match length (4..18) fits within the literals, then copy
// all 18 bytes, even if not all are part of the literals.
mLen += 4
if offset := u16(src[si:]); mLen <= offset {
if offset := u16(src[si:]); mLen <= offset && offset < di {
i := di - offset
end := i + 18
if end > uint(len(dst)) {
Expand Down Expand Up @@ -91,6 +94,38 @@ func decodeBlock(dst, src []byte) (ret int) {
mLen += minMatch

// Copy the match.
if di < offset {
// The match is beyond our block, meaning in the dictionary
if offset-di > mLen {
// The match is entirely contained in the dictionary. Just copy!
copy(dst[di:di+mLen], dict[dictLen+di-offset:dictLen+di-offset+mLen])
di = di + mLen
} else {
// The match stretches over the dictionary and our block
copySize := offset - di
restSize := mLen - copySize

copy(dst[di:di+copySize], dict[dictLen-copySize:])
di = di + copySize

if di < restSize {
// Overlap - we want to copy more than what we have available,
// so copy byte per byte.
copyFrom := 0
endOfMatch := di + restSize
for di < endOfMatch {
dst[di] = dst[copyFrom]
di = di + 1
copyFrom = copyFrom + 1
}
} else {
copy(dst[di:di+restSize], dst[0:restSize])
di = di + restSize
}
}
continue
}

expanded := dst[di-offset:]
if mLen > offset {
// Efficiently copy the match dst[di-offset:di] into the dst slice.
Expand Down
4 changes: 2 additions & 2 deletions internal/lz4block/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func TestBlockDecode(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
buf := make([]byte, len(test.exp))
n := decodeBlock(buf, test.src)
n := decodeBlock(buf, test.src, nil)
if n < 0 {
t.Log(n)
}
Expand Down Expand Up @@ -167,7 +167,7 @@ func TestDecodeBlockInvalid(t *testing.T) {
}
dst = dst[:test.size]

r := decodeBlock(dst, []byte(test.src))
r := decodeBlock(dst, []byte(test.src), nil)
if r >= 0 {
t.Errorf("no error for %s", test.name)
}
Expand Down
6 changes: 3 additions & 3 deletions internal/lz4stream/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (b *Blocks) initR(f *Frame, num int, src io.Reader) (chan []byte, error) {
blocks <- c
go func() {
defer block.Close(f)
data, err := block.Uncompress(f, size.Get(), false)
data, err := block.Uncompress(f, size.Get(), nil, false)
if err != nil {
b.closeR(err)
} else {
Expand Down Expand Up @@ -303,12 +303,12 @@ func (b *FrameDataBlock) Read(f *Frame, src io.Reader, cum uint32) (uint32, erro
return x, nil
}

func (b *FrameDataBlock) Uncompress(f *Frame, dst []byte, sum bool) ([]byte, error) {
func (b *FrameDataBlock) Uncompress(f *Frame, dst, dict []byte, sum bool) ([]byte, error) {
if b.Size.Uncompressed() {
n := copy(dst, b.data)
dst = dst[:n]
} else {
n, err := lz4block.UncompressBlock(b.data, dst)
n, err := lz4block.UncompressBlock(b.data, dst, dict)
if err != nil {
return nil, err
}
Expand Down
18 changes: 11 additions & 7 deletions internal/lz4stream/frame.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,36 +77,40 @@ func (f *Frame) isLegacy() bool {
return f.Magic == frameMagicLegacy
}

func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) {
func (f *Frame) ParseHeaders(src io.Reader) error {
if f.Magic > 0 {
// Header already read.
return nil, nil
return nil
}

newFrame:
var err error
if f.Magic, err = f.readUint32(src); err != nil {
return nil, err
return err
}
switch m := f.Magic; {
case m == frameMagic || m == frameMagicLegacy:
// All 16 values of frameSkipMagic are valid.
case m>>8 == frameSkipMagic>>8:
skip, err := f.readUint32(src)
if err != nil {
return nil, err
return err
}
if _, err := io.CopyN(ioutil.Discard, src, int64(skip)); err != nil {
return nil, err
return err
}
goto newFrame
default:
return nil, lz4errors.ErrInvalidFrame
return lz4errors.ErrInvalidFrame
}
if err := f.Descriptor.initR(f, src); err != nil {
return nil, err
return err
}
f.checksum.Reset()
return nil
}

func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) {
return f.Blocks.initR(f, num, src)
}

Expand Down
Loading

0 comments on commit 8fa51c0

Please sign in to comment.