FreeNOS
Lz4Decompressor.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2020 Niek Linnenbank
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <FreeNOS/User.h>
19#include <Log.h>
20#include <ByteOrder.h>
21#include <MemoryBlock.h>
22#include "Lz4Decompressor.h"
23
24Lz4Decompressor::Lz4Decompressor(const void *data, const Size size)
25 : m_inputData(static_cast<const u8 *>(data))
26 , m_inputSize(size)
27 , m_frameDescSize(3)
28 , m_blockChecksums(false)
29 , m_contentChecksum(false)
30 , m_contentSize(0)
31 , m_blockMaximumSize(0)
32{
33}
34
36{
37 // Reset state
39
40 // Verify minimum input size
41 if (m_inputSize < 27)
42 {
43 ERROR("invalid size of input data: " << m_inputSize);
44 return InvalidArgument;
45 }
46
47 // Verify the input is an actual LZ4 frame
49 {
50 ERROR("invalid magic value " << readLe32(m_inputData) << " != " << FrameMagic);
51 return InvalidArgument;
52 }
53
54 // Read the FLG byte
55 const u8 flg = *(m_inputData + sizeof(u32));
56
57 // Verify the version bits
58 const u8 version = flg >> FrameVersionShift;
59 if (version != FrameVersion)
60 {
61 ERROR("invalid version value " << version << " != " << FrameVersion);
62 return InvalidArgument;
63 }
64
65 // This code only supports independent blocks
66 const bool independent = (flg >> FrameBlockIndShift) & 0x1;
67 if (!independent)
68 {
69 ERROR("inter-dependent blocks not supported");
70 return NotSupported;
71 }
72
73 // Check for block checksum flag
74 m_blockChecksums = (flg >> FrameBlockChkShift) & 0x1;
75
76 // Check for content size flag
77 if ((flg >> FrameContentSzShift) & 0x1)
78 {
79 m_contentSize = readLe64(m_inputData + sizeof(u32) + (sizeof(u8) * 2));
80 m_frameDescSize += 8;
81 }
82
83 // Content size must be non-zero
84 if (m_contentSize == 0)
85 {
86 ERROR("content size must not be zero");
87 return NotSupported;
88 }
89
90 // Check for the content checksum flag
91 m_contentChecksum = (flg >> FrameContentChkShift) & 0x1 ? true : false;
92
93 // Check for the DictID flag
94 if ((flg >> FrameDictIdShift) & 0x1)
95 {
96 m_frameDescSize += 4;
97 }
98
99 // Read the BD byte which contains the maximum block size
100 const u8 bd = *(m_inputData + sizeof(u32) + sizeof(u8));
101 switch (bd >> 4)
102 {
103 case 4:
105 break;
106 case 5:
108 break;
109 case 6:
111 break;
112 case 7:
114 break;
115 default:
116 {
117 ERROR("invalid maximum block size value: " << (bd >> 4));
118 return InvalidArgument;
119 }
120 }
121
122 return Success;
123}
124
129
131 const Size size) const
132{
133 const u8 *input = m_inputData + m_frameDescSize + sizeof(u32);
134 const u8 *inputEnd = m_inputData + m_inputSize;
135 u8 *output = static_cast<u8 *>(buffer);
136 Size copied = 0;
137
138 while (copied < size && input < inputEnd)
139 {
140 // Fetch the next block
141 const u32 blockSizeByte = readLe32(input);
142 const u32 blockSize = blockSizeByte & ~(1 << 31);
143 const bool isCompressed = blockSizeByte & (1 << 31) ? false : true;
144 Size uncompSize;
145
146 // Last block has the EndMark as size value
147 if (blockSize == EndMark)
148 {
149 break;
150 }
151 assert(blockSize <= m_blockMaximumSize);
152 input += sizeof(u32);
153
154 // Decompress the block
155 if (isCompressed)
156 {
157 uncompSize = decompress(input, blockSize, output, size - copied);
158 }
159 // Return data as-is when the block is not compressed
160 else
161 {
162 MemoryBlock::copy(output, input, blockSize);
163 uncompSize = blockSize;
164 }
165
166 // Move to the next block
167 copied += uncompSize;
168 output += uncompSize;
169 input += blockSize;
171 {
172 input += sizeof(u32);
173 }
174 }
175
176 return Success;
177}
178
179inline const u32 Lz4Decompressor::integerDecode(const u32 initial,
180 const u8 *next,
181 Size &byteCount) const
182{
183 u32 value = initial;
184
185 if (initial < 0xf)
186 {
187 return initial;
188 }
189
190 for (byteCount = 1; ; byteCount++)
191 {
192 const u8 byte = *next++;
193 value += byte;
194
195 if (byte < 0xff)
196 {
197 break;
198 }
199 }
200
201 return value;
202}
203
205 const Size inputSize,
206 u8 *output,
207 const Size outputSize) const
208{
209 const u8 *inputEnd = input + inputSize;
210 Size outputOffset = 0;
211
212 // Decompress the whole block
213 while (input < inputEnd && outputOffset < outputSize)
214 {
215 u32 literalBytes = 0;
216 u32 matchBytes = 0;
217
218 // Read the token
219 const u8 token = *input;
220 input++;
221
222 // Read literals count
223 const u32 literalsCount = integerDecode(token >> 4, input, literalBytes);
224 input += literalBytes;
225 DEBUG("token = " << token << " literalsCount = " << literalsCount << " literalBytes = " << literalBytes);
226
227 // Copy literals
228 if (literalsCount > 0)
229 {
230 MemoryBlock::copy(output + outputOffset, input, literalsCount);
231 input += literalsCount;
232 outputOffset += literalsCount;
233 }
234
235 // End of block reached? Last 5 bytes are only literals
236 if (input >= inputEnd)
237 {
238 break;
239 }
240
241 // Read match offset
242 const u16 off = readLe16(input);
243 assert(off <= outputOffset);
244 const u32 matchOffset = outputOffset - off;
245 input += sizeof(u16);
246
247 // Read match length
248 const u32 matchCount = integerDecode(token & 0xf, input, matchBytes) + 4u;
249 input += matchBytes;
250
251 // Copy the match from previous decoded bytes
252 DEBUG("matchOffset = " << matchOffset << " matchCount = " << matchCount);
253 MemoryBlock::copy(output + outputOffset, output + matchOffset, matchCount);
254 outputOffset += matchCount;
255 }
256
257 assert(outputOffset <= m_blockMaximumSize);
258 return outputOffset;
259}
Result initialize()
Initialize the decompressor.
u64 m_contentSize
Content size as specified in the frame header.
const Size m_inputSize
Total size in bytes of the compressed input data.
const u32 integerDecode(const u32 initial, const u8 *next, Size &byteCount) const
Decode input data as integer (little-endian, 32-bit unsigned)
Size m_blockMaximumSize
Maximum block size in bytes of the uncompressed content.
u64 getUncompressedSize() const
Get size of the uncompressed data.
const u8 * m_inputData
Compressed input data.
Size m_frameDescSize
Size of the frame descriptor in bytes.
bool m_blockChecksums
True if blocks have checksums enabled.
static const u32 EndMark
EndMark marks the end of the data stream.
bool m_contentChecksum
True if the input data buffer contains content checksum.
Lz4Decompressor(const void *data, const Size size)
Constructor function.
const u32 decompress(const u8 *input, const Size inputSize, u8 *output, const Size outputSize) const
Decompress a block of compressed data.
Result
Result codes.
static const u32 FrameMagic
Magic number value marks the start of the frame header.
Result read(void *buffer, const Size size) const
Reads compressed data.
static const u8 FrameVersion
Current supported version of the LZ4 algorithm.
static Size copy(void *dest, const void *src, Size count)
Copy memory from one place to another.
#define assert(exp)
Insert program diagnostics.
Definition assert.h:60
unsigned int u32
Unsigned 32-bit number.
Definition Types.h:53
#define MegaByte(v)
Convert megabytes to bytes.
Definition Macros.h:57
#define ERROR(msg)
Output an error message.
Definition Log.h:61
const u16 readLe16(const void *data)
Read 16-bit little endian integer.
Definition ByteOrder.h:356
const u64 readLe64(const void *data)
Read 64-bit little endian integer.
Definition ByteOrder.h:328
unsigned short u16
Unsigned 16-bit number.
Definition Types.h:56
#define KiloByte(v)
Convert kilobytes to bytes.
Definition Macros.h:54
unsigned int Size
Any sane size indicator cannot go negative.
Definition Types.h:128
unsigned long long u64
Unsigned 64-bit number.
Definition Types.h:50
const u32 readLe32(const void *data)
Read 32-bit little endian integer.
Definition ByteOrder.h:342
#define DEBUG(msg)
Output a debug message to standard output.
Definition Log.h:89
unsigned char u8
Unsigned 8-bit number.
Definition Types.h:59