common: remove redundant lzw decoder

2025-07-27 00:26:51 +00:00 · 2025-02-20 16:30:30 +09:00 · 2025-02-20 16:30:30 +09:00 · bed38a4521
commit bed38a4521
parent 42457a25c9
2 changed files with 0 additions and 214 deletions
--- a/src/common/tvgCompressor.cpp
+++ b/src/common/tvgCompressor.cpp
@ -20,39 +20,6 @@
 * SOFTWARE.
 */
 /*
 * Lempel–Ziv–Welch (LZW) decoder by Guilherme R. Lampert(guilherme.ronaldo.lampert@gmail.com)
 * This is the compression scheme used by the GIF image format and the Unix 'compress' tool.
 * Main differences from this implementation is that End Of Input (EOI) and Clear Codes (CC)
 * are not stored in the output and the max code length in bits is 12, vs 16 in compress.
 *
 * EOI is simply detected by the end of the data stream, while CC happens if the
 * dictionary gets filled. Data is written/read from bit streams, which handle
 * byte-alignment for us in a transparent way.
 * The decoder relies on the hardcoded data layout produced by the encoder, since
 * no additional reconstruction data is added to the output, so they must match.
 * The nice thing about LZW is that we can reconstruct the dictionary directly from
 * the stream of codes generated by the encoder, so this avoids storing additional
 * headers in the bit stream.
 * The output code length is variable. It starts with the minimum number of bits
 * required to store the base byte-sized dictionary and automatically increases
 * as the dictionary gets larger (it starts at 9-bits and grows to 10-bits when
 * code 512 is added, then 11-bits when 1024 is added, and so on). If the dictionary
 * is filled (4096 items for a 12-bits dictionary), the whole thing is cleared and
 * the process starts over. This is the main reason why the encoder and the decoder
 * must match perfectly, since the lengths of the codes will not be specified with
 * the data itself.
 * USEFUL LINKS:
 * https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
 * http://rosettacode.org/wiki/LZW_compression
 * http://www.cs.duke.edu/csed/curious/compression/lzw.html
 * http://www.cs.cf.ac.uk/Dave/Multimedia/node214.html
 * http://marknelson.us/1989/10/01/lzw-data-compression/
 */
 #include "config.h"
 #include "tvgCommon.h"
 #include "tvgCompressor.h"
@ -60,186 +27,6 @@
 namespace tvg {
 /************************************************************************/
 /* LZW Implementation                                                   */
 /************************************************************************/
 //LZW Dictionary helper:
 constexpr int Nil = -1;
 constexpr int MaxDictBits = 12;
 constexpr int StartBits = 9;
 constexpr int FirstCode = (1 << (StartBits - 1)); // 256
 constexpr int MaxDictEntries = (1 << MaxDictBits);     // 4096
 struct BitStreamReader
 {
    const uint8_t* stream;       // Pointer to the external bit stream. Not owned by the reader.
    const int sizeInBytes;       // Size of the stream *in bytes*. Might include padding.
    const int sizeInBits;        // Size of the stream *in bits*, padding *not* include.
    int currBytePos = 0;         // Current byte being read in the stream.
    int nextBitPos = 0;          // Bit position within the current byte to access next. 0 to 7.
    int numBitsRead = 0;         // Total bits read from the stream so far. Never includes byte-rounding padding.
    BitStreamReader(const uint8_t* bitStream, const int byteCount, const int bitCount) : stream(bitStream), sizeInBytes(byteCount), sizeInBits(bitCount)
    {
    }
    bool readNextBit(int& bitOut)
    {
        if (numBitsRead >= sizeInBits) return false; //We are done.
        const uint32_t mask = uint32_t(1) << nextBitPos;
        bitOut = !!(stream[currBytePos] & mask);
        ++numBitsRead;
        if (++nextBitPos == 8) {
            nextBitPos = 0;
            ++currBytePos;
        }
        return true;
    }
    uint64_t readBitsU64(const int bitCount)
    {
        uint64_t num = 0;
        for (int b = 0; b < bitCount; ++b) {
            int bit;
            if (!readNextBit(bit)) break;
            /* Based on a "Stanford bit-hack":
               http://graphics.stanford.edu/~seander/bithacks.html#ConditionalSetOrClearBitsWithoutBranching */
            const uint64_t mask = uint64_t(1) << b;
            num = (num & ~mask) | (-bit & mask);
        }
        return num;
    }
    bool isEndOfStream() const
    {
        return numBitsRead >= sizeInBits;
    }
 };
 struct Dictionary
 {
    struct Entry
    {
        int code;
        int value;
    };
    //Dictionary entries 0-255 are always reserved to the byte/ASCII range.
    int size;
    Entry entries[MaxDictEntries];
    Dictionary()
    {
        /* First 256 dictionary entries are reserved to the byte/ASCII range.
           Additional entries follow for the character sequences found in the input.
           Up to 4096 - 256 (MaxDictEntries - FirstCode). */
        size = FirstCode;
        for (int i = 0; i < size; ++i) {
            entries[i].code  = Nil;
            entries[i].value = i;
        }
    }
    bool add(const int code, const int value)
    {
        if (size == MaxDictEntries) return false;
        entries[size].code  = code;
        entries[size].value = value;
        ++size;
        return true;
    }
    bool flush(int & codeBitsWidth)
    {
        if (size == (1 << codeBitsWidth)) {
            ++codeBitsWidth;
            if (codeBitsWidth > MaxDictBits) {
                //Clear the dictionary (except the first 256 byte entries).
                codeBitsWidth = StartBits;
                size = FirstCode;
                return true;
            }
        }
        return false;
    }
 };
 static bool outputByte(int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar)
 {
    if (bytesDecodedSoFar >= outputSizeBytes) return false;
    *output++ = static_cast<uint8_t>(code);
    ++bytesDecodedSoFar;
    return true;
 }
 static bool outputSequence(const Dictionary& dict, int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar, int& firstByte)
 {
    /* A sequence is stored backwards, so we have to write
       it to a temp then output the buffer in reverse. */
    int i = 0;
    uint8_t sequence[MaxDictEntries];
    do {
        sequence[i++] = dict.entries[code].value;
        code = dict.entries[code].code;
    } while (code >= 0);
    firstByte = sequence[--i];
    for (; i >= 0; --i) {
        if (!outputByte(sequence[i], output, outputSizeBytes, bytesDecodedSoFar)) return false;
    }
    return true;
 }
 uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes)
 {
    int code = Nil;
    int prevCode = Nil;
    int firstByte = 0;
    int bytesDecoded = 0;
    int codeBitsWidth = StartBits;
    auto uncompressed = tvg::malloc<uint8_t*>(sizeof(uint8_t) * uncompressedSizeBytes);
    auto ptr = uncompressed;
    /* We'll reconstruct the dictionary based on the bit stream codes.
       Unlike Huffman encoding, we don't store the dictionary as a prefix to the data. */
    Dictionary dictionary;
    BitStreamReader bitStream(compressed, compressedSizeBytes, compressedSizeBits);
    /* We check to avoid an overflow of the user buffer.
       If the buffer is smaller than the decompressed size, we break the loop and return the current decompression count. */
    while (!bitStream.isEndOfStream()) {
        code = static_cast<int>(bitStream.readBitsU64(codeBitsWidth));
        if (prevCode == Nil) {
            if (!outputByte(code, ptr, uncompressedSizeBytes, bytesDecoded)) break;
            firstByte = code;
            prevCode  = code;
            continue;
        }
        if (code >= dictionary.size) {
            if (!outputSequence(dictionary, prevCode, ptr, uncompressedSizeBytes, bytesDecoded, firstByte)) break;
            if (!outputByte(firstByte, ptr, uncompressedSizeBytes, bytesDecoded)) break;
        } else if (!outputSequence(dictionary, code, ptr, uncompressedSizeBytes, bytesDecoded, firstByte)) break;
        dictionary.add(prevCode, firstByte);
        if (dictionary.flush(codeBitsWidth)) prevCode = Nil;
        else prevCode = code;
    }
    return uncompressed;
 }
 /************************************************************************/
 /* B64 Implementation                                                   */
 /************************************************************************/
--- a/src/common/tvgCompressor.h
+++ b/src/common/tvgCompressor.h
@ -27,7 +27,6 @@
 namespace tvg
 {
    uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes);
    size_t b64Decode(const char* encoded, const size_t len, char** decoded);
    unsigned long djb2Encode(const char* str);
 }