From bed38a4521a8c87c7bab02e2aab8dddaa2d6ec3b Mon Sep 17 00:00:00 2001 From: Hermet Park Date: Thu, 20 Feb 2025 16:30:30 +0900 Subject: [PATCH] common: remove redundant lzw decoder --- src/common/tvgCompressor.cpp | 213 ----------------------------------- src/common/tvgCompressor.h | 1 - 2 files changed, 214 deletions(-) diff --git a/src/common/tvgCompressor.cpp b/src/common/tvgCompressor.cpp index 8f8f7940..1ca008da 100644 --- a/src/common/tvgCompressor.cpp +++ b/src/common/tvgCompressor.cpp @@ -20,39 +20,6 @@ * SOFTWARE. */ -/* - * Lempel–Ziv–Welch (LZW) decoder by Guilherme R. Lampert(guilherme.ronaldo.lampert@gmail.com) - - * This is the compression scheme used by the GIF image format and the Unix 'compress' tool. - * Main differences from this implementation is that End Of Input (EOI) and Clear Codes (CC) - * are not stored in the output and the max code length in bits is 12, vs 16 in compress. - * - * EOI is simply detected by the end of the data stream, while CC happens if the - * dictionary gets filled. Data is written/read from bit streams, which handle - * byte-alignment for us in a transparent way. - - * The decoder relies on the hardcoded data layout produced by the encoder, since - * no additional reconstruction data is added to the output, so they must match. - * The nice thing about LZW is that we can reconstruct the dictionary directly from - * the stream of codes generated by the encoder, so this avoids storing additional - * headers in the bit stream. - - * The output code length is variable. It starts with the minimum number of bits - * required to store the base byte-sized dictionary and automatically increases - * as the dictionary gets larger (it starts at 9-bits and grows to 10-bits when - * code 512 is added, then 11-bits when 1024 is added, and so on). If the dictionary - * is filled (4096 items for a 12-bits dictionary), the whole thing is cleared and - * the process starts over. This is the main reason why the encoder and the decoder - * must match perfectly, since the lengths of the codes will not be specified with - * the data itself. - - * USEFUL LINKS: - * https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch - * http://rosettacode.org/wiki/LZW_compression - * http://www.cs.duke.edu/csed/curious/compression/lzw.html - * http://www.cs.cf.ac.uk/Dave/Multimedia/node214.html - * http://marknelson.us/1989/10/01/lzw-data-compression/ - */ #include "config.h" #include "tvgCommon.h" #include "tvgCompressor.h" @@ -60,186 +27,6 @@ namespace tvg { -/************************************************************************/ -/* LZW Implementation */ -/************************************************************************/ - -//LZW Dictionary helper: -constexpr int Nil = -1; -constexpr int MaxDictBits = 12; -constexpr int StartBits = 9; -constexpr int FirstCode = (1 << (StartBits - 1)); // 256 -constexpr int MaxDictEntries = (1 << MaxDictBits); // 4096 - -struct BitStreamReader -{ - const uint8_t* stream; // Pointer to the external bit stream. Not owned by the reader. - const int sizeInBytes; // Size of the stream *in bytes*. Might include padding. - const int sizeInBits; // Size of the stream *in bits*, padding *not* include. - int currBytePos = 0; // Current byte being read in the stream. - int nextBitPos = 0; // Bit position within the current byte to access next. 0 to 7. - int numBitsRead = 0; // Total bits read from the stream so far. Never includes byte-rounding padding. - - BitStreamReader(const uint8_t* bitStream, const int byteCount, const int bitCount) : stream(bitStream), sizeInBytes(byteCount), sizeInBits(bitCount) - { - } - - bool readNextBit(int& bitOut) - { - if (numBitsRead >= sizeInBits) return false; //We are done. - - const uint32_t mask = uint32_t(1) << nextBitPos; - bitOut = !!(stream[currBytePos] & mask); - ++numBitsRead; - - if (++nextBitPos == 8) { - nextBitPos = 0; - ++currBytePos; - } - return true; - } - - uint64_t readBitsU64(const int bitCount) - { - uint64_t num = 0; - for (int b = 0; b < bitCount; ++b) { - int bit; - if (!readNextBit(bit)) break; - /* Based on a "Stanford bit-hack": - http://graphics.stanford.edu/~seander/bithacks.html#ConditionalSetOrClearBitsWithoutBranching */ - const uint64_t mask = uint64_t(1) << b; - num = (num & ~mask) | (-bit & mask); - } - return num; - } - - bool isEndOfStream() const - { - return numBitsRead >= sizeInBits; - } -}; - - -struct Dictionary -{ - struct Entry - { - int code; - int value; - }; - - //Dictionary entries 0-255 are always reserved to the byte/ASCII range. - int size; - Entry entries[MaxDictEntries]; - - Dictionary() - { - /* First 256 dictionary entries are reserved to the byte/ASCII range. - Additional entries follow for the character sequences found in the input. - Up to 4096 - 256 (MaxDictEntries - FirstCode). */ - size = FirstCode; - - for (int i = 0; i < size; ++i) { - entries[i].code = Nil; - entries[i].value = i; - } - } - - bool add(const int code, const int value) - { - if (size == MaxDictEntries) return false; - entries[size].code = code; - entries[size].value = value; - ++size; - return true; - } - - bool flush(int & codeBitsWidth) - { - if (size == (1 << codeBitsWidth)) { - ++codeBitsWidth; - if (codeBitsWidth > MaxDictBits) { - //Clear the dictionary (except the first 256 byte entries). - codeBitsWidth = StartBits; - size = FirstCode; - return true; - } - } - return false; - } -}; - - -static bool outputByte(int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar) -{ - if (bytesDecodedSoFar >= outputSizeBytes) return false; - *output++ = static_cast(code); - ++bytesDecodedSoFar; - return true; -} - - -static bool outputSequence(const Dictionary& dict, int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar, int& firstByte) -{ - /* A sequence is stored backwards, so we have to write - it to a temp then output the buffer in reverse. */ - int i = 0; - uint8_t sequence[MaxDictEntries]; - - do { - sequence[i++] = dict.entries[code].value; - code = dict.entries[code].code; - } while (code >= 0); - - firstByte = sequence[--i]; - - for (; i >= 0; --i) { - if (!outputByte(sequence[i], output, outputSizeBytes, bytesDecodedSoFar)) return false; - } - return true; -} - - -uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes) -{ - int code = Nil; - int prevCode = Nil; - int firstByte = 0; - int bytesDecoded = 0; - int codeBitsWidth = StartBits; - auto uncompressed = tvg::malloc(sizeof(uint8_t) * uncompressedSizeBytes); - auto ptr = uncompressed; - - /* We'll reconstruct the dictionary based on the bit stream codes. - Unlike Huffman encoding, we don't store the dictionary as a prefix to the data. */ - Dictionary dictionary; - BitStreamReader bitStream(compressed, compressedSizeBytes, compressedSizeBits); - - /* We check to avoid an overflow of the user buffer. - If the buffer is smaller than the decompressed size, we break the loop and return the current decompression count. */ - while (!bitStream.isEndOfStream()) { - code = static_cast(bitStream.readBitsU64(codeBitsWidth)); - - if (prevCode == Nil) { - if (!outputByte(code, ptr, uncompressedSizeBytes, bytesDecoded)) break; - firstByte = code; - prevCode = code; - continue; - } - if (code >= dictionary.size) { - if (!outputSequence(dictionary, prevCode, ptr, uncompressedSizeBytes, bytesDecoded, firstByte)) break; - if (!outputByte(firstByte, ptr, uncompressedSizeBytes, bytesDecoded)) break; - } else if (!outputSequence(dictionary, code, ptr, uncompressedSizeBytes, bytesDecoded, firstByte)) break; - - dictionary.add(prevCode, firstByte); - if (dictionary.flush(codeBitsWidth)) prevCode = Nil; - else prevCode = code; - } - - return uncompressed; -} - - /************************************************************************/ /* B64 Implementation */ /************************************************************************/ diff --git a/src/common/tvgCompressor.h b/src/common/tvgCompressor.h index 914fa2d3..2dfda525 100644 --- a/src/common/tvgCompressor.h +++ b/src/common/tvgCompressor.h @@ -27,7 +27,6 @@ namespace tvg { - uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes); size_t b64Decode(const char* encoded, const size_t len, char** decoded); unsigned long djb2Encode(const char* str); }