mirror of
https://github.com/thorvg/thorvg.git
synced 2025-07-27 00:26:51 +00:00
common: remove redundant lzw decoder
This commit is contained in:
parent
42457a25c9
commit
bed38a4521
2 changed files with 0 additions and 214 deletions
|
@ -20,39 +20,6 @@
|
||||||
* SOFTWARE.
|
* SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* Lempel–Ziv–Welch (LZW) decoder by Guilherme R. Lampert(guilherme.ronaldo.lampert@gmail.com)
|
|
||||||
|
|
||||||
* This is the compression scheme used by the GIF image format and the Unix 'compress' tool.
|
|
||||||
* Main differences from this implementation is that End Of Input (EOI) and Clear Codes (CC)
|
|
||||||
* are not stored in the output and the max code length in bits is 12, vs 16 in compress.
|
|
||||||
*
|
|
||||||
* EOI is simply detected by the end of the data stream, while CC happens if the
|
|
||||||
* dictionary gets filled. Data is written/read from bit streams, which handle
|
|
||||||
* byte-alignment for us in a transparent way.
|
|
||||||
|
|
||||||
* The decoder relies on the hardcoded data layout produced by the encoder, since
|
|
||||||
* no additional reconstruction data is added to the output, so they must match.
|
|
||||||
* The nice thing about LZW is that we can reconstruct the dictionary directly from
|
|
||||||
* the stream of codes generated by the encoder, so this avoids storing additional
|
|
||||||
* headers in the bit stream.
|
|
||||||
|
|
||||||
* The output code length is variable. It starts with the minimum number of bits
|
|
||||||
* required to store the base byte-sized dictionary and automatically increases
|
|
||||||
* as the dictionary gets larger (it starts at 9-bits and grows to 10-bits when
|
|
||||||
* code 512 is added, then 11-bits when 1024 is added, and so on). If the dictionary
|
|
||||||
* is filled (4096 items for a 12-bits dictionary), the whole thing is cleared and
|
|
||||||
* the process starts over. This is the main reason why the encoder and the decoder
|
|
||||||
* must match perfectly, since the lengths of the codes will not be specified with
|
|
||||||
* the data itself.
|
|
||||||
|
|
||||||
* USEFUL LINKS:
|
|
||||||
* https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
|
|
||||||
* http://rosettacode.org/wiki/LZW_compression
|
|
||||||
* http://www.cs.duke.edu/csed/curious/compression/lzw.html
|
|
||||||
* http://www.cs.cf.ac.uk/Dave/Multimedia/node214.html
|
|
||||||
* http://marknelson.us/1989/10/01/lzw-data-compression/
|
|
||||||
*/
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "tvgCommon.h"
|
#include "tvgCommon.h"
|
||||||
#include "tvgCompressor.h"
|
#include "tvgCompressor.h"
|
||||||
|
@ -60,186 +27,6 @@
|
||||||
namespace tvg {
|
namespace tvg {
|
||||||
|
|
||||||
|
|
||||||
/************************************************************************/
|
|
||||||
/* LZW Implementation */
|
|
||||||
/************************************************************************/
|
|
||||||
|
|
||||||
//LZW Dictionary helper:
|
|
||||||
constexpr int Nil = -1;
|
|
||||||
constexpr int MaxDictBits = 12;
|
|
||||||
constexpr int StartBits = 9;
|
|
||||||
constexpr int FirstCode = (1 << (StartBits - 1)); // 256
|
|
||||||
constexpr int MaxDictEntries = (1 << MaxDictBits); // 4096
|
|
||||||
|
|
||||||
struct BitStreamReader
|
|
||||||
{
|
|
||||||
const uint8_t* stream; // Pointer to the external bit stream. Not owned by the reader.
|
|
||||||
const int sizeInBytes; // Size of the stream *in bytes*. Might include padding.
|
|
||||||
const int sizeInBits; // Size of the stream *in bits*, padding *not* include.
|
|
||||||
int currBytePos = 0; // Current byte being read in the stream.
|
|
||||||
int nextBitPos = 0; // Bit position within the current byte to access next. 0 to 7.
|
|
||||||
int numBitsRead = 0; // Total bits read from the stream so far. Never includes byte-rounding padding.
|
|
||||||
|
|
||||||
BitStreamReader(const uint8_t* bitStream, const int byteCount, const int bitCount) : stream(bitStream), sizeInBytes(byteCount), sizeInBits(bitCount)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
bool readNextBit(int& bitOut)
|
|
||||||
{
|
|
||||||
if (numBitsRead >= sizeInBits) return false; //We are done.
|
|
||||||
|
|
||||||
const uint32_t mask = uint32_t(1) << nextBitPos;
|
|
||||||
bitOut = !!(stream[currBytePos] & mask);
|
|
||||||
++numBitsRead;
|
|
||||||
|
|
||||||
if (++nextBitPos == 8) {
|
|
||||||
nextBitPos = 0;
|
|
||||||
++currBytePos;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t readBitsU64(const int bitCount)
|
|
||||||
{
|
|
||||||
uint64_t num = 0;
|
|
||||||
for (int b = 0; b < bitCount; ++b) {
|
|
||||||
int bit;
|
|
||||||
if (!readNextBit(bit)) break;
|
|
||||||
/* Based on a "Stanford bit-hack":
|
|
||||||
http://graphics.stanford.edu/~seander/bithacks.html#ConditionalSetOrClearBitsWithoutBranching */
|
|
||||||
const uint64_t mask = uint64_t(1) << b;
|
|
||||||
num = (num & ~mask) | (-bit & mask);
|
|
||||||
}
|
|
||||||
return num;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isEndOfStream() const
|
|
||||||
{
|
|
||||||
return numBitsRead >= sizeInBits;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
struct Dictionary
|
|
||||||
{
|
|
||||||
struct Entry
|
|
||||||
{
|
|
||||||
int code;
|
|
||||||
int value;
|
|
||||||
};
|
|
||||||
|
|
||||||
//Dictionary entries 0-255 are always reserved to the byte/ASCII range.
|
|
||||||
int size;
|
|
||||||
Entry entries[MaxDictEntries];
|
|
||||||
|
|
||||||
Dictionary()
|
|
||||||
{
|
|
||||||
/* First 256 dictionary entries are reserved to the byte/ASCII range.
|
|
||||||
Additional entries follow for the character sequences found in the input.
|
|
||||||
Up to 4096 - 256 (MaxDictEntries - FirstCode). */
|
|
||||||
size = FirstCode;
|
|
||||||
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
entries[i].code = Nil;
|
|
||||||
entries[i].value = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool add(const int code, const int value)
|
|
||||||
{
|
|
||||||
if (size == MaxDictEntries) return false;
|
|
||||||
entries[size].code = code;
|
|
||||||
entries[size].value = value;
|
|
||||||
++size;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool flush(int & codeBitsWidth)
|
|
||||||
{
|
|
||||||
if (size == (1 << codeBitsWidth)) {
|
|
||||||
++codeBitsWidth;
|
|
||||||
if (codeBitsWidth > MaxDictBits) {
|
|
||||||
//Clear the dictionary (except the first 256 byte entries).
|
|
||||||
codeBitsWidth = StartBits;
|
|
||||||
size = FirstCode;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
static bool outputByte(int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar)
|
|
||||||
{
|
|
||||||
if (bytesDecodedSoFar >= outputSizeBytes) return false;
|
|
||||||
*output++ = static_cast<uint8_t>(code);
|
|
||||||
++bytesDecodedSoFar;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static bool outputSequence(const Dictionary& dict, int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar, int& firstByte)
|
|
||||||
{
|
|
||||||
/* A sequence is stored backwards, so we have to write
|
|
||||||
it to a temp then output the buffer in reverse. */
|
|
||||||
int i = 0;
|
|
||||||
uint8_t sequence[MaxDictEntries];
|
|
||||||
|
|
||||||
do {
|
|
||||||
sequence[i++] = dict.entries[code].value;
|
|
||||||
code = dict.entries[code].code;
|
|
||||||
} while (code >= 0);
|
|
||||||
|
|
||||||
firstByte = sequence[--i];
|
|
||||||
|
|
||||||
for (; i >= 0; --i) {
|
|
||||||
if (!outputByte(sequence[i], output, outputSizeBytes, bytesDecodedSoFar)) return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes)
|
|
||||||
{
|
|
||||||
int code = Nil;
|
|
||||||
int prevCode = Nil;
|
|
||||||
int firstByte = 0;
|
|
||||||
int bytesDecoded = 0;
|
|
||||||
int codeBitsWidth = StartBits;
|
|
||||||
auto uncompressed = tvg::malloc<uint8_t*>(sizeof(uint8_t) * uncompressedSizeBytes);
|
|
||||||
auto ptr = uncompressed;
|
|
||||||
|
|
||||||
/* We'll reconstruct the dictionary based on the bit stream codes.
|
|
||||||
Unlike Huffman encoding, we don't store the dictionary as a prefix to the data. */
|
|
||||||
Dictionary dictionary;
|
|
||||||
BitStreamReader bitStream(compressed, compressedSizeBytes, compressedSizeBits);
|
|
||||||
|
|
||||||
/* We check to avoid an overflow of the user buffer.
|
|
||||||
If the buffer is smaller than the decompressed size, we break the loop and return the current decompression count. */
|
|
||||||
while (!bitStream.isEndOfStream()) {
|
|
||||||
code = static_cast<int>(bitStream.readBitsU64(codeBitsWidth));
|
|
||||||
|
|
||||||
if (prevCode == Nil) {
|
|
||||||
if (!outputByte(code, ptr, uncompressedSizeBytes, bytesDecoded)) break;
|
|
||||||
firstByte = code;
|
|
||||||
prevCode = code;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (code >= dictionary.size) {
|
|
||||||
if (!outputSequence(dictionary, prevCode, ptr, uncompressedSizeBytes, bytesDecoded, firstByte)) break;
|
|
||||||
if (!outputByte(firstByte, ptr, uncompressedSizeBytes, bytesDecoded)) break;
|
|
||||||
} else if (!outputSequence(dictionary, code, ptr, uncompressedSizeBytes, bytesDecoded, firstByte)) break;
|
|
||||||
|
|
||||||
dictionary.add(prevCode, firstByte);
|
|
||||||
if (dictionary.flush(codeBitsWidth)) prevCode = Nil;
|
|
||||||
else prevCode = code;
|
|
||||||
}
|
|
||||||
|
|
||||||
return uncompressed;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/************************************************************************/
|
/************************************************************************/
|
||||||
/* B64 Implementation */
|
/* B64 Implementation */
|
||||||
/************************************************************************/
|
/************************************************************************/
|
||||||
|
|
|
@ -27,7 +27,6 @@
|
||||||
|
|
||||||
namespace tvg
|
namespace tvg
|
||||||
{
|
{
|
||||||
uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes);
|
|
||||||
size_t b64Decode(const char* encoded, const size_t len, char** decoded);
|
size_t b64Decode(const char* encoded, const size_t len, char** decoded);
|
||||||
unsigned long djb2Encode(const char* str);
|
unsigned long djb2Encode(const char* str);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue