|
| 1 | +const std = @import("std"); |
| 2 | + |
| 3 | +/// When compressing and decompressing, the provided buffer is used as the |
| 4 | +/// history window, so it must be at least this size. |
| 5 | +pub const max_window_len = history_len * 2; |
| 6 | + |
| 7 | +pub const history_len = 32768; |
| 8 | + |
| 9 | +/// Deflate is a lossless data compression file format that uses a combination |
| 10 | +/// of LZ77 and Huffman coding. |
| 11 | +pub const Compress = @import("flate/Compress.zig"); |
| 12 | + |
| 13 | +/// Inflate is the decoding process that consumes a Deflate bitstream and |
| 14 | +/// produces the original full-size data. |
| 15 | +pub const Decompress = @import("flate/Decompress.zig"); |
| 16 | + |
| 17 | +/// Container of the deflate bit stream body. Container adds header before |
| 18 | +/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header, |
| 19 | +/// no footer, raw bit stream). |
| 20 | +/// |
| 21 | +/// Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes |
| 22 | +/// addler 32 checksum. |
| 23 | +/// |
| 24 | +/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes |
| 25 | +/// crc32 checksum and 4 bytes of uncompressed data length. |
| 26 | +/// |
| 27 | +/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4 |
| 28 | +/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5 |
| 29 | +pub const Container = enum { |
| 30 | + raw, // no header or footer |
| 31 | + gzip, // gzip header and footer |
| 32 | + zlib, // zlib header and footer |
| 33 | + |
| 34 | + pub fn size(w: Container) usize { |
| 35 | + return headerSize(w) + footerSize(w); |
| 36 | + } |
| 37 | + |
| 38 | + pub fn headerSize(w: Container) usize { |
| 39 | + return header(w).len; |
| 40 | + } |
| 41 | + |
| 42 | + pub fn footerSize(w: Container) usize { |
| 43 | + return switch (w) { |
| 44 | + .gzip => 8, |
| 45 | + .zlib => 4, |
| 46 | + .raw => 0, |
| 47 | + }; |
| 48 | + } |
| 49 | + |
| 50 | + pub const list = [_]Container{ .raw, .gzip, .zlib }; |
| 51 | + |
| 52 | + pub const Error = error{ |
| 53 | + BadGzipHeader, |
| 54 | + BadZlibHeader, |
| 55 | + WrongGzipChecksum, |
| 56 | + WrongGzipSize, |
| 57 | + WrongZlibChecksum, |
| 58 | + }; |
| 59 | + |
| 60 | + pub fn header(container: Container) []const u8 { |
| 61 | + return switch (container) { |
| 62 | + // GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5): |
| 63 | + // - ID1 (IDentification 1), always 0x1f |
| 64 | + // - ID2 (IDentification 2), always 0x8b |
| 65 | + // - CM (Compression Method), always 8 = deflate |
| 66 | + // - FLG (Flags), all set to 0 |
| 67 | + // - 4 bytes, MTIME (Modification time), not used, all set to zero |
| 68 | + // - XFL (eXtra FLags), all set to zero |
| 69 | + // - OS (Operating System), 03 = Unix |
| 70 | + .gzip => &[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 }, |
| 71 | + // ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4): |
| 72 | + // 1st byte: |
| 73 | + // - First four bits is the CINFO (compression info), which is 7 for the default deflate window size. |
| 74 | + // - The next four bits is the CM (compression method), which is 8 for deflate. |
| 75 | + // 2nd byte: |
| 76 | + // - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best. |
| 77 | + // - The next bit, FDICT, is set if a dictionary is given. |
| 78 | + // - The final five FCHECK bits form a mod-31 checksum. |
| 79 | + // |
| 80 | + // CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100 |
| 81 | + .zlib => &[_]u8{ 0x78, 0b10_0_11100 }, |
| 82 | + .raw => &.{}, |
| 83 | + }; |
| 84 | + } |
| 85 | + |
| 86 | + pub const Hasher = union(Container) { |
| 87 | + raw: void, |
| 88 | + gzip: struct { |
| 89 | + crc: std.hash.Crc32 = .init(), |
| 90 | + count: u32 = 0, |
| 91 | + }, |
| 92 | + zlib: std.hash.Adler32, |
| 93 | + |
| 94 | + pub fn init(containter: Container) Hasher { |
| 95 | + return switch (containter) { |
| 96 | + .gzip => .{ .gzip = .{} }, |
| 97 | + .zlib => .{ .zlib = .{} }, |
| 98 | + .raw => .raw, |
| 99 | + }; |
| 100 | + } |
| 101 | + |
| 102 | + pub fn container(h: Hasher) Container { |
| 103 | + return h; |
| 104 | + } |
| 105 | + |
| 106 | + pub fn update(h: *Hasher, buf: []const u8) void { |
| 107 | + switch (h.*) { |
| 108 | + .raw => {}, |
| 109 | + .gzip => |*gzip| { |
| 110 | + gzip.crc.update(buf); |
| 111 | + gzip.count +%= @truncate(buf.len); |
| 112 | + }, |
| 113 | + .zlib => |*zlib| { |
| 114 | + zlib.update(buf); |
| 115 | + }, |
| 116 | + } |
| 117 | + } |
| 118 | + |
| 119 | + pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void { |
| 120 | + switch (hasher.*) { |
| 121 | + .gzip => |*gzip| { |
| 122 | + // GZIP 8 bytes footer |
| 123 | + // - 4 bytes, CRC32 (CRC-32) |
| 124 | + // - 4 bytes, ISIZE (Input SIZE) - size of the original |
| 125 | + // (uncompressed) input data modulo 2^32 |
| 126 | + try writer.writeInt(u32, gzip.crc.final(), .little); |
| 127 | + try writer.writeInt(u32, gzip.count, .little); |
| 128 | + }, |
| 129 | + .zlib => |*zlib| { |
| 130 | + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). |
| 131 | + // 4 bytes of ADLER32 (Adler-32 checksum) |
| 132 | + // Checksum value of the uncompressed data (excluding any |
| 133 | + // dictionary data) computed according to Adler-32 |
| 134 | + // algorithm. |
| 135 | + try writer.writeInt(u32, zlib.adler, .big); |
| 136 | + }, |
| 137 | + .raw => {}, |
| 138 | + } |
| 139 | + } |
| 140 | + }; |
| 141 | + |
| 142 | + pub const Metadata = union(Container) { |
| 143 | + raw: void, |
| 144 | + gzip: struct { |
| 145 | + crc: u32 = 0, |
| 146 | + count: u32 = 0, |
| 147 | + }, |
| 148 | + zlib: struct { |
| 149 | + adler: u32 = 0, |
| 150 | + }, |
| 151 | + |
| 152 | + pub fn init(containter: Container) Metadata { |
| 153 | + return switch (containter) { |
| 154 | + .gzip => .{ .gzip = .{} }, |
| 155 | + .zlib => .{ .zlib = .{} }, |
| 156 | + .raw => .raw, |
| 157 | + }; |
| 158 | + } |
| 159 | + |
| 160 | + pub fn container(m: Metadata) Container { |
| 161 | + return m; |
| 162 | + } |
| 163 | + }; |
| 164 | +}; |
| 165 | + |
| 166 | +test { |
| 167 | + _ = Compress; |
| 168 | + _ = Decompress; |
| 169 | +} |
0 commit comments