-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunpack.cpp
392 lines (333 loc) · 11.6 KB
/
unpack.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
#include "rar.hpp"
#include "coder.cpp"
#include "suballoc.cpp"
#include "model.cpp"
#include "unpackinline.cpp"
#ifdef RAR_SMP
#include "unpack50mt.cpp"
#endif
#ifndef SFX_MODULE
#include "unpack15.cpp"
#include "unpack20.cpp"
#include "unpack30.cpp"
#endif
#include "unpack50.cpp"
#include "unpack50frag.cpp"
Unpack::Unpack(ComprDataIO *DataIO)
:Inp(true),VMCodeInp(true)
{
UnpIO=DataIO;
Window=NULL;
Fragmented=false;
Suspended=false;
UnpSomeRead=false;
ExtraDist=false;
#ifdef RAR_SMP
MaxUserThreads=1;
UnpThreadPool=NULL;
ReadBufMT=NULL;
UnpThreadData=NULL;
#endif
AllocWinSize=0;
MaxWinSize=0;
MaxWinMask=0;
// Perform initialization, which should be done only once for all files.
// It prevents crash if first unpacked file has the wrong "true" Solid flag,
// so first DoUnpack call is made with the wrong "true" Solid value later.
UnpInitData(false);
#ifndef SFX_MODULE
// RAR 1.5 decompression initialization
UnpInitData15(false);
InitHuff();
#endif
}
Unpack::~Unpack()
{
#ifndef SFX_MODULE
InitFilters30(false);
#endif
Alloc.delete_l<byte>(Window); // delete Window;
#ifdef RAR_SMP
delete UnpThreadPool;
delete[] ReadBufMT;
delete[] UnpThreadData;
#endif
}
#ifdef RAR_SMP
void Unpack::SetThreads(uint Threads)
{
// More than 8 threads are unlikely to provide noticeable gain
// for unpacking, but would use the additional memory.
MaxUserThreads=Min(Threads,8);
UnpThreadPool=new ThreadPool(MaxUserThreads);
}
#endif
// We get 64-bit WinSize, so we still can check and quit for dictionaries
// exceeding a threshold in 32-bit builds. Then we convert WinSize to size_t
// MaxWinSize.
void Unpack::Init(uint64 WinSize,bool Solid)
{
// Minimum window size must be at least twice more than maximum possible
// size of filter block, which is 0x10000 in RAR now. If window size is
// smaller, we can have a block with never cleared flt->NextWindow flag
// in UnpWriteBuf(). Minimum window size 0x20000 would be enough, but let's
// use 0x40000 for extra safety and possible filter area size expansion.
const size_t MinAllocSize=0x40000;
if (WinSize<MinAllocSize)
WinSize=MinAllocSize;
#ifdef CLAMAV
// Unrar does not support window size greather than 1GB at this time.
// Any request for a window larger than 1GB should be ignored.
const size_t MaxAllocSize=0x40000000;
if (WinSize>MaxAllocSize)
WinSize=MaxAllocSize;
#endif
if (WinSize>Min(0x10000000000ULL,UNPACK_MAX_DICT)) // Window size must not exceed 1 TB.
throw std::bad_alloc();
// 32-bit build can't unpack dictionaries exceeding 32-bit even in theory.
// Also we've not verified if WrapUp and WrapDown work properly in 32-bit
// version and >2GB dictionary and if 32-bit version can handle >2GB
// distances. Since such version is unlikely to allocate >2GB anyway,
// we prohibit >2GB dictionaries for 32-bit build here.
if (WinSize>0x80000000 && sizeof(size_t)<=4)
throw std::bad_alloc();
// Solid block shall use the same window size for all files.
// But if Window isn't initialized when Solid is set, it means that
// first file in solid block doesn't have the solid flag. We initialize
// the window anyway for such malformed archive.
// Non-solid files shall use their specific window sizes,
// so current window size and unpack routine behavior doesn't depend on
// previously unpacked files and their extraction order.
if (!Solid || Window==nullptr)
{
MaxWinSize=(size_t)WinSize;
MaxWinMask=MaxWinSize-1;
}
// Use the already allocated window when processing non-solid files
// with reducing dictionary sizes.
if (WinSize<=AllocWinSize)
return;
// Archiving code guarantees that window size does not grow in the same
// solid stream. So if we are here, we are either creating a new window
// or increasing the size of non-solid window. So we could safely reject
// current window data without copying them to a new window.
if (Solid && (Window!=NULL || Fragmented && WinSize>FragWindow.GetWinSize()))
throw std::bad_alloc();
Alloc.delete_l<byte>(Window); // delete Window;
Window=nullptr;
try
{
if (!Fragmented)
Window=Alloc.new_l<byte>((size_t)WinSize,false); // Window=new byte[(size_t)WinSize];
}
catch (std::bad_alloc) // Use the fragmented window in this case.
{
}
if (Window==nullptr)
if (WinSize<0x1000000 || sizeof(size_t)>4)
throw std::bad_alloc(); // Exclude RAR4, small dictionaries and 64-bit.
else
{
if (WinSize>FragWindow.GetWinSize())
FragWindow.Init((size_t)WinSize);
Fragmented=true;
}
if (!Fragmented)
{
// Clean the window to generate the same output when unpacking corrupt
// RAR files, which may access unused areas of sliding dictionary.
// 2023.10.31: We've added FirstWinDone based unused area access check
// in Unpack::CopyString(), so this memset might be unnecessary now.
// memset(Window,0,(size_t)WinSize);
AllocWinSize=WinSize;
}
}
void Unpack::DoUnpack(uint Method,bool Solid)
{
// Methods <50 will crash in Fragmented mode when accessing NULL Window.
// They cannot be called in such mode now, but we check it below anyway
// just for extra safety.
switch(Method)
{
#ifndef SFX_MODULE
case 15: // RAR 1.5 compression.
if (!Fragmented)
Unpack15(Solid);
break;
case 20: // RAR 2.x compression.
case 26: // Files larger than 2GB.
if (!Fragmented)
Unpack20(Solid);
break;
case 29: // RAR 3.x compression.
if (!Fragmented)
Unpack29(Solid);
break;
#endif
case VER_PACK5: // 50. RAR 5.0 and 7.0 compression algorithms.
case VER_PACK7: // 70.
ExtraDist=(Method==VER_PACK7);
#ifdef RAR_SMP
if (MaxUserThreads>1)
{
// We do not use the multithreaded unpack routine to repack RAR archives
// in 'suspended' mode, because unlike the single threaded code it can
// write more than one dictionary for same loop pass. So we would need
// larger buffers of unknown size. Also we do not support multithreading
// in fragmented window mode.
if (!Fragmented)
{
Unpack5MT(Solid);
break;
}
}
#endif
Unpack5(Solid);
break;
}
}
void Unpack::UnpInitData(bool Solid)
{
if (!Solid)
{
OldDist[0]=OldDist[1]=OldDist[2]=OldDist[3]=(size_t)-1;
OldDistPtr=0;
LastDist=(uint)-1; // Initialize it to -1 like LastDist.
LastLength=0;
// memset(Window,0,MaxWinSize);
memset(&BlockTables,0,sizeof(BlockTables));
UnpPtr=WrPtr=0;
PrevPtr=0;
FirstWinDone=false;
WriteBorder=Min(MaxWinSize,UNPACK_MAX_WRITE);
}
// Filters never share several solid files, so we can safely reset them
// even in solid archive.
InitFilters();
Inp.InitBitInput();
WrittenFileSize=0;
ReadTop=0;
ReadBorder=0;
memset(&BlockHeader,0,sizeof(BlockHeader));
BlockHeader.BlockSize=-1; // '-1' means not defined yet.
#ifndef SFX_MODULE
UnpInitData20(Solid);
UnpInitData30(Solid);
#endif
UnpInitData50(Solid);
}
// LengthTable contains the length in bits for every element of alphabet.
// Dec is the structure to decode Huffman code/
// Size is size of length table and DecodeNum field in Dec structure,
void Unpack::MakeDecodeTables(byte *LengthTable,DecodeTable *Dec,uint Size)
{
// Size of alphabet and DecodePos array.
Dec->MaxNum=Size;
// Calculate how many entries for every bit length in LengthTable we have.
uint LengthCount[16];
memset(LengthCount,0,sizeof(LengthCount));
for (size_t I=0;I<Size;I++)
LengthCount[LengthTable[I] & 0xf]++;
// We must not calculate the number of zero length codes.
LengthCount[0]=0;
// Set the entire DecodeNum to zero.
memset(Dec->DecodeNum,0,Size*sizeof(*Dec->DecodeNum));
// Initialize not really used entry for zero length code.
Dec->DecodePos[0]=0;
// Start code for bit length 1 is 0.
Dec->DecodeLen[0]=0;
// Right aligned upper limit code for current bit length.
uint UpperLimit=0;
for (size_t I=1;I<16;I++)
{
// Adjust the upper limit code.
UpperLimit+=LengthCount[I];
// Left aligned upper limit code.
uint LeftAligned=UpperLimit<<(16-I);
// Prepare the upper limit code for next bit length.
UpperLimit*=2;
// Store the left aligned upper limit code.
Dec->DecodeLen[I]=(uint)LeftAligned;
// Every item of this array contains the sum of all preceding items.
// So it contains the start position in code list for every bit length.
Dec->DecodePos[I]=Dec->DecodePos[I-1]+LengthCount[I-1];
}
// Prepare the copy of DecodePos. We'll modify this copy below,
// so we cannot use the original DecodePos.
uint CopyDecodePos[ASIZE(Dec->DecodePos)];
memcpy(CopyDecodePos,Dec->DecodePos,sizeof(CopyDecodePos));
// For every bit length in the bit length table and so for every item
// of alphabet.
for (uint I=0;I<Size;I++)
{
// Get the current bit length.
byte CurBitLength=LengthTable[I] & 0xf;
if (CurBitLength!=0)
{
// Last position in code list for current bit length.
uint LastPos=CopyDecodePos[CurBitLength];
// Prepare the decode table, so this position in code list will be
// decoded to current alphabet item number.
Dec->DecodeNum[LastPos]=(ushort)I;
// We'll use next position number for this bit length next time.
// So we pass through the entire range of positions available
// for every bit length.
CopyDecodePos[CurBitLength]++;
}
}
// Define the number of bits to process in quick mode. We use more bits
// for larger alphabets. More bits means that more codes will be processed
// in quick mode, but also that more time will be spent to preparation
// of tables for quick decode.
switch (Size)
{
case NC:
case NC20:
case NC30:
Dec->QuickBits=MAX_QUICK_DECODE_BITS;
break;
default:
Dec->QuickBits=MAX_QUICK_DECODE_BITS>3 ? MAX_QUICK_DECODE_BITS-3 : 0;
break;
}
// Size of tables for quick mode.
uint QuickDataSize=1<<Dec->QuickBits;
// Bit length for current code, start from 1 bit codes. It is important
// to use 1 bit instead of 0 for minimum code length, so we are moving
// forward even when processing a corrupt archive.
uint CurBitLength=1;
// For every right aligned bit string which supports the quick decoding.
for (uint Code=0;Code<QuickDataSize;Code++)
{
// Left align the current code, so it will be in usual bit field format.
uint BitField=Code<<(16-Dec->QuickBits);
// Prepare the table for quick decoding of bit lengths.
// Find the upper limit for current bit field and adjust the bit length
// accordingly if necessary.
while (CurBitLength<ASIZE(Dec->DecodeLen) && BitField>=Dec->DecodeLen[CurBitLength])
CurBitLength++;
// Translation of right aligned bit string to bit length.
Dec->QuickLen[Code]=CurBitLength;
// Prepare the table for quick translation of position in code list
// to position in alphabet.
// Calculate the distance from the start code for current bit length.
uint Dist=BitField-Dec->DecodeLen[CurBitLength-1];
// Right align the distance.
Dist>>=(16-CurBitLength);
// Now we can calculate the position in the code list. It is the sum
// of first position for current bit length and right aligned distance
// between our bit field and start code for current bit length.
uint Pos;
if (CurBitLength<ASIZE(Dec->DecodePos) &&
(Pos=Dec->DecodePos[CurBitLength]+Dist)<Size)
{
// Define the code to alphabet number translation.
Dec->QuickNum[Code]=Dec->DecodeNum[Pos];
}
else
{
// Can be here for length table filled with zeroes only (empty).
Dec->QuickNum[Code]=0;
}
}
}