From ce7f4cb7c33e06c8cb6e190a5901c219202cd731 Mon Sep 17 00:00:00 2001 From: Junsu Choi Date: Tue, 6 Feb 2024 12:09:29 +0900 Subject: [PATCH] loader/webp: Support static webp loader Built to libwebp code in to support the static option of webp loader. Code from :https://github.com/jacklicn/libwebp This forking version only contains the minimal webp decoding feature. Binary Size: +68kb Co-authored-by: Hermet Park issue: https://github.com/thorvg/thorvg/issues/1427 --- .github/workflows/build_ubuntu.yml | 2 +- .github/workflows/codeql.yml | 2 +- meson.build | 54 +- meson_options.txt | 4 +- src/examples/PictureWebp.cpp | 1 - src/loaders/external_webp/meson.build | 14 +- src/loaders/meson.build | 7 +- src/loaders/webp/dec/alpha.cpp | 167 ++ src/loaders/webp/dec/alphai.h | 55 + src/loaders/webp/dec/buffer.cpp | 238 +++ src/loaders/webp/dec/common.h | 54 + src/loaders/webp/dec/decode_vp8.h | 177 +++ src/loaders/webp/dec/frame.cpp | 713 +++++++++ src/loaders/webp/dec/io.cpp | 392 +++++ src/loaders/webp/dec/meson.build | 22 + src/loaders/webp/dec/quant.cpp | 110 ++ src/loaders/webp/dec/tree.cpp | 525 ++++++ src/loaders/webp/dec/vp8.cpp | 681 ++++++++ src/loaders/webp/dec/vp8i.h | 307 ++++ src/loaders/webp/dec/vp8l.cpp | 1580 +++++++++++++++++++ src/loaders/webp/dec/vp8li.h | 136 ++ src/loaders/webp/dec/webp.cpp | 674 ++++++++ src/loaders/webp/dec/webpi.h | 108 ++ src/loaders/webp/dsp/alpha_processing.cpp | 377 +++++ src/loaders/webp/dsp/argb.cpp | 68 + src/loaders/webp/dsp/cpu.cpp | 120 ++ src/loaders/webp/dsp/dec.cpp | 766 +++++++++ src/loaders/webp/dsp/dec_clip_tables.cpp | 366 +++++ src/loaders/webp/dsp/dsp.h | 319 ++++ src/loaders/webp/dsp/filters.cpp | 240 +++ src/loaders/webp/dsp/lossless.cpp | 636 ++++++++ src/loaders/webp/dsp/lossless.h | 303 ++++ src/loaders/webp/dsp/meson.build | 20 + src/loaders/webp/dsp/rescaler.cpp | 115 ++ src/loaders/webp/dsp/upsampling.cpp | 252 +++ src/loaders/webp/dsp/yuv.cpp | 166 ++ src/loaders/webp/dsp/yuv.h | 321 ++++ src/loaders/webp/meson.build | 17 + src/loaders/webp/tvgWebpLoader.cpp | 150 ++ src/loaders/webp/tvgWebpLoader.h | 48 + src/loaders/webp/utils/bit_reader.cpp | 216 +++ src/loaders/webp/utils/bit_reader.h | 168 ++ src/loaders/webp/utils/bit_reader_inl.h | 172 ++ src/loaders/webp/utils/color_cache.cpp | 48 + src/loaders/webp/utils/color_cache.h | 74 + src/loaders/webp/utils/endian_inl.h | 100 ++ src/loaders/webp/utils/huffman.cpp | 204 +++ src/loaders/webp/utils/huffman.h | 67 + src/loaders/webp/utils/meson.build | 22 + src/loaders/webp/utils/quant_levels_dec.cpp | 279 ++++ src/loaders/webp/utils/quant_levels_dec.h | 35 + src/loaders/webp/utils/random.cpp | 43 + src/loaders/webp/utils/random.h | 63 + src/loaders/webp/utils/rescaler.cpp | 82 + src/loaders/webp/utils/rescaler.h | 78 + src/loaders/webp/utils/utils.h | 68 + src/loaders/webp/webp/decode.h | 493 ++++++ src/loaders/webp/webp/format_constants.h | 88 ++ src/loaders/webp/webp/meson.build | 10 + src/loaders/webp/webp/types.h | 53 + 60 files changed, 12630 insertions(+), 40 deletions(-) create mode 100644 src/loaders/webp/dec/alpha.cpp create mode 100644 src/loaders/webp/dec/alphai.h create mode 100644 src/loaders/webp/dec/buffer.cpp create mode 100644 src/loaders/webp/dec/common.h create mode 100644 src/loaders/webp/dec/decode_vp8.h create mode 100644 src/loaders/webp/dec/frame.cpp create mode 100644 src/loaders/webp/dec/io.cpp create mode 100644 src/loaders/webp/dec/meson.build create mode 100644 src/loaders/webp/dec/quant.cpp create mode 100644 src/loaders/webp/dec/tree.cpp create mode 100644 src/loaders/webp/dec/vp8.cpp create mode 100644 src/loaders/webp/dec/vp8i.h create mode 100644 src/loaders/webp/dec/vp8l.cpp create mode 100644 src/loaders/webp/dec/vp8li.h create mode 100644 src/loaders/webp/dec/webp.cpp create mode 100644 src/loaders/webp/dec/webpi.h create mode 100644 src/loaders/webp/dsp/alpha_processing.cpp create mode 100644 src/loaders/webp/dsp/argb.cpp create mode 100644 src/loaders/webp/dsp/cpu.cpp create mode 100644 src/loaders/webp/dsp/dec.cpp create mode 100644 src/loaders/webp/dsp/dec_clip_tables.cpp create mode 100644 src/loaders/webp/dsp/dsp.h create mode 100644 src/loaders/webp/dsp/filters.cpp create mode 100644 src/loaders/webp/dsp/lossless.cpp create mode 100644 src/loaders/webp/dsp/lossless.h create mode 100644 src/loaders/webp/dsp/meson.build create mode 100644 src/loaders/webp/dsp/rescaler.cpp create mode 100644 src/loaders/webp/dsp/upsampling.cpp create mode 100644 src/loaders/webp/dsp/yuv.cpp create mode 100644 src/loaders/webp/dsp/yuv.h create mode 100644 src/loaders/webp/meson.build create mode 100644 src/loaders/webp/tvgWebpLoader.cpp create mode 100644 src/loaders/webp/tvgWebpLoader.h create mode 100644 src/loaders/webp/utils/bit_reader.cpp create mode 100644 src/loaders/webp/utils/bit_reader.h create mode 100644 src/loaders/webp/utils/bit_reader_inl.h create mode 100644 src/loaders/webp/utils/color_cache.cpp create mode 100644 src/loaders/webp/utils/color_cache.h create mode 100644 src/loaders/webp/utils/endian_inl.h create mode 100644 src/loaders/webp/utils/huffman.cpp create mode 100644 src/loaders/webp/utils/huffman.h create mode 100644 src/loaders/webp/utils/meson.build create mode 100644 src/loaders/webp/utils/quant_levels_dec.cpp create mode 100644 src/loaders/webp/utils/quant_levels_dec.h create mode 100644 src/loaders/webp/utils/random.cpp create mode 100644 src/loaders/webp/utils/random.h create mode 100644 src/loaders/webp/utils/rescaler.cpp create mode 100644 src/loaders/webp/utils/rescaler.h create mode 100644 src/loaders/webp/utils/utils.h create mode 100644 src/loaders/webp/webp/decode.h create mode 100644 src/loaders/webp/webp/format_constants.h create mode 100644 src/loaders/webp/webp/meson.build create mode 100644 src/loaders/webp/webp/types.h diff --git a/.github/workflows/build_ubuntu.yml b/.github/workflows/build_ubuntu.yml index 7895429d..19d52b17 100644 --- a/.github/workflows/build_ubuntu.yml +++ b/.github/workflows/build_ubuntu.yml @@ -87,7 +87,7 @@ jobs: - name: Build & Run memcheck Script(ASAN) run: | sudo rm -rf ./build - meson setup build -Db_sanitize="address,undefined" -Dloaders="all, webp_beta" -Dsavers="all" -Dtests="true" -Dbindings="capi" + meson setup build -Db_sanitize="address,undefined" -Dloaders="all" -Dsavers="all" -Dtests="true" -Dbindings="capi" sudo ninja -C build install export PATH=$PATH:~/.local/bin/ chmod +x "${GITHUB_WORKSPACE}/.github/workflows/memcheck_asan.sh" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 518079c7..822b8fc0 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -71,7 +71,7 @@ jobs: sudo pip3 install meson sudo apt-get install libturbojpeg0-dev libpng-dev libwebp-dev - meson . build -Dbindings="capi" -Dloaders="all, webp_beta" -Dsavers="all" -Dtools="all" + meson . build -Dbindings="capi" -Dloaders="all" -Dsavers="all" -Dtools="all" sudo ninja -C build install diff --git a/meson.build b/meson.build index e9cf13dd..d46eb560 100644 --- a/meson.build +++ b/meson.build @@ -62,7 +62,7 @@ if all_loaders or get_option('loaders').contains('ttf') == true config_h.set10('THORVG_TTF_LOADER_SUPPORT', true) endif -if get_option('loaders').contains('webp_beta') == true +if all_loaders or get_option('loaders').contains('webp') == true config_h.set10('THORVG_WEBP_LOADER_SUPPORT', true) endif @@ -134,31 +134,31 @@ endif summary = ''' Summary: - ThorVG version: @0@ - Build Type: @1@ - Prefix: @2@ - Multi-Tasking: @3@ - SIMD Instruction: @4@ - Raster Engine (SW): @5@ - Raster Engine (GL): @6@ - Raster Engine (WG): @7@ - Loader (TVG): @8@ - Loader (SVG): @9@ - Loader (TTF): @10@ - Loader (LOTTIE): @11@ - Loader (PNG): @12@ - Loader (JPG): @13@ - Loader (WEBP_BETA): @14@ - Saver (TVG): @15@ - Saver (GIF): @16@ - Binding (CAPI): @17@ - Binding (WASM_BETA): @18@ - Log Message: @19@ - Tests: @20@ - Examples: @21@ - Tool (Svg2Tvg): @22@ - Tool (Svg2Png): @23@ - Tool (Lottie2Gif): @24@ + ThorVG version: @0@ + Build Type: @1@ + Prefix: @2@ + Multi-Tasking: @3@ + SIMD Instruction: @4@ + Raster Engine (SW): @5@ + Raster Engine (GL_BETA): @6@ + Raster Engine (WG_BETA): @7@ + Loader (TVG): @8@ + Loader (SVG): @9@ + Loader (TTF): @10@ + Loader (LOTTIE): @11@ + Loader (PNG): @12@ + Loader (JPG): @13@ + Loader (WEBP): @14@ + Saver (TVG): @15@ + Saver (GIF): @16@ + Binding (CAPI): @17@ + Binding (WASM_BETA): @18@ + Log Message: @19@ + Tests: @20@ + Examples: @21@ + Tool (Svg2Tvg): @22@ + Tool (Svg2Png): @23@ + Tool (Lottie2Gif): @24@ '''.format( meson.project_version(), @@ -175,7 +175,7 @@ Summary: all_loaders or get_option('loaders').contains('lottie'), all_loaders or get_option('loaders').contains('png'), all_loaders or get_option('loaders').contains('jpg'), - get_option('loaders').contains('webp_beta'), + all_loaders or get_option('loaders').contains('webp'), all_savers or get_option('savers').contains('tvg'), all_savers or get_option('savers').contains('gif'), get_option('bindings').contains('capi'), diff --git a/meson_options.txt b/meson_options.txt index c9e6c89a..b2501ceb 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -6,9 +6,9 @@ option('engines', option('loaders', type: 'array', - choices: ['', 'tvg', 'svg', 'png', 'jpg', 'lottie', 'ttf', 'webp_beta', 'all'], + choices: ['', 'tvg', 'svg', 'png', 'jpg', 'lottie', 'ttf', 'webp', 'all'], value: ['svg', 'tvg', 'lottie'], - description: 'Enable File Loaders in thorvg ("all" does not include "*_beta".)') + description: 'Enable File Loaders in thorvg') option('savers', type: 'array', diff --git a/src/examples/PictureWebp.cpp b/src/examples/PictureWebp.cpp index 31a9eaeb..b5b2c50d 100644 --- a/src/examples/PictureWebp.cpp +++ b/src/examples/PictureWebp.cpp @@ -75,7 +75,6 @@ void tvgDrawCmds(tvg::Canvas* canvas) picture->translate(400, 0); picture->scale(0.8); canvas->push(std::move(picture)); - } diff --git a/src/loaders/external_webp/meson.build b/src/loaders/external_webp/meson.build index 70cdead6..69c3e336 100644 --- a/src/loaders/external_webp/meson.build +++ b/src/loaders/external_webp/meson.build @@ -3,10 +3,12 @@ source_file = [ 'tvgWebpLoader.cpp', ] -webp_dep = dependency('libwebp', required: true) +webp_dep = dependency('libwebp', required: false) -subloader_dep += [declare_dependency( - include_directories : include_directories('.'), - dependencies : webp_dep, - sources : source_file - )] \ No newline at end of file +if webp_dep.found() + subloader_dep += [declare_dependency( + include_directories : include_directories('.'), + dependencies : webp_dep, + sources : source_file + )] +endif \ No newline at end of file diff --git a/src/loaders/meson.build b/src/loaders/meson.build index 16526802..41db2a13 100644 --- a/src/loaders/meson.build +++ b/src/loaders/meson.build @@ -38,11 +38,14 @@ if all_loaders or get_option('loaders').contains('jpg') == true endif endif -if get_option('loaders').contains('webp_beta') == true +if all_loaders or get_option('loaders').contains('webp') == true if get_option('static') == true - message('static webp is not available, disable webp_beta loader.') + subdir('webp') else subdir('external_webp') + if not webp_dep.found() + subdir('webp') + endif endif endif diff --git a/src/loaders/webp/dec/alpha.cpp b/src/loaders/webp/dec/alpha.cpp new file mode 100644 index 00000000..3d3df148 --- /dev/null +++ b/src/loaders/webp/dec/alpha.cpp @@ -0,0 +1,167 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Alpha-plane decompression. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include +#include "./alphai.h" +#include "./vp8i.h" +#include "./vp8li.h" +#include "../dsp/dsp.h" +#include "../utils/quant_levels_dec.h" +#include "../utils/utils.h" +#include "../webp/format_constants.h" + +//------------------------------------------------------------------------------ +// ALPHDecoder object. + +ALPHDecoder* ALPHNew(void) { + ALPHDecoder* const dec = (ALPHDecoder*)calloc(1ULL, sizeof(*dec)); + return dec; +} + +void ALPHDelete(ALPHDecoder* const dec) { + if (dec != NULL) { + VP8LDelete(dec->vp8l_dec_); + dec->vp8l_dec_ = NULL; + free(dec); + } +} + +//------------------------------------------------------------------------------ +// Decoding. + +// Initialize alpha decoding by parsing the alpha header and decoding the image +// header for alpha data stored using lossless compression. +// Returns false in case of error in alpha header (data too short, invalid +// compression method or filter, error in lossless header data etc). +static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data, + size_t data_size, int width, int height, uint8_t* output) { + int ok = 0; + const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN; + const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN; + int rsrv; + + assert(width > 0 && height > 0); + assert(data != NULL && output != NULL); + + dec->width_ = width; + dec->height_ = height; + + if (data_size <= ALPHA_HEADER_LEN) { + return 0; + } + + dec->method_ = (data[0] >> 0) & 0x03; + dec->filter_ = static_cast((data[0] >> 2) & 0x03); + dec->pre_processing_ = (data[0] >> 4) & 0x03; + rsrv = (data[0] >> 6) & 0x03; + if (dec->method_ < ALPHA_NO_COMPRESSION || + dec->method_ > ALPHA_LOSSLESS_COMPRESSION || + dec->filter_ >= WEBP_FILTER_LAST || + dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS || + rsrv != 0) { + return 0; + } + + if (dec->method_ == ALPHA_NO_COMPRESSION) { + const size_t alpha_decoded_size = dec->width_ * dec->height_; + ok = (alpha_data_size >= alpha_decoded_size); + } else { + assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION); + ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output); + } + VP8FiltersInit(); + return ok; +} + +// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha +// starting from row number 'row'. It assumes that rows up to (row - 1) have +// already been decoded. +// Returns false in case of bitstream error. +static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) { + ALPHDecoder* const alph_dec = dec->alph_dec_; + const int width = alph_dec->width_; + const int height = alph_dec->height_; + WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_]; + uint8_t* const output = dec->alpha_plane_; + if (alph_dec->method_ == ALPHA_NO_COMPRESSION) { + const size_t offset = row * width; + const size_t num_pixels = num_rows * width; + assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels); + memcpy(dec->alpha_plane_ + offset, + dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels); + } else { // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION + assert(alph_dec->vp8l_dec_ != NULL); + if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) { + return 0; + } + } + + if (unfilter_func != NULL) { + unfilter_func(width, height, width, row, num_rows, output); + } + + if (row + num_rows == dec->pic_hdr_.height_) { + dec->is_alpha_decoded_ = 1; + } + return 1; +} + +//------------------------------------------------------------------------------ +// Main entry point. + +const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, + int row, int num_rows) { + const int width = dec->pic_hdr_.width_; + const int height = dec->pic_hdr_.height_; + + if (row < 0 || num_rows <= 0 || row + num_rows > height) { + return NULL; // sanity check. + } + + if (row == 0) { + // Initialize decoding. + assert(dec->alpha_plane_ != NULL); + dec->alph_dec_ = ALPHNew(); + if (dec->alph_dec_ == NULL) return NULL; + if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_, + width, height, dec->alpha_plane_)) { + ALPHDelete(dec->alph_dec_); + dec->alph_dec_ = NULL; + return NULL; + } + // if we allowed use of alpha dithering, check whether it's needed at all + if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) { + dec->alpha_dithering_ = 0; // disable dithering + } else { + num_rows = height; // decode everything in one pass + } + } + + if (!dec->is_alpha_decoded_) { + int ok = 0; + assert(dec->alph_dec_ != NULL); + ok = ALPHDecode(dec, row, num_rows); + if (ok && dec->alpha_dithering_ > 0) { + ok = WebPDequantizeLevels(dec->alpha_plane_, width, height, + dec->alpha_dithering_); + } + if (!ok || dec->is_alpha_decoded_) { + ALPHDelete(dec->alph_dec_); + dec->alph_dec_ = NULL; + } + if (!ok) return NULL; // Error. + } + + // Return a pointer to the current decoded row. + return dec->alpha_plane_ + row * width; +} diff --git a/src/loaders/webp/dec/alphai.h b/src/loaders/webp/dec/alphai.h new file mode 100644 index 00000000..058e883c --- /dev/null +++ b/src/loaders/webp/dec/alphai.h @@ -0,0 +1,55 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Alpha decoder: internal header. +// +// Author: Urvang (urvang@google.com) + +#ifndef WEBP_DEC_ALPHAI_H_ +#define WEBP_DEC_ALPHAI_H_ + +#include "./webpi.h" +#include "../dsp/dsp.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP8LDecoder; // Defined in dec/vp8li.h. + +typedef struct ALPHDecoder ALPHDecoder; +struct ALPHDecoder { + int width_; + int height_; + int method_; + WEBP_FILTER_TYPE filter_; + int pre_processing_; + struct VP8LDecoder* vp8l_dec_; + VP8Io io_; + int use_8b_decode; // Although alpha channel requires only 1 byte per + // pixel, sometimes VP8LDecoder may need to allocate + // 4 bytes per pixel internally during decode. +}; + +//------------------------------------------------------------------------------ +// internal functions. Not public. + +// Allocates a new alpha decoder instance. +ALPHDecoder* ALPHNew(void); + +// Clears and deallocates an alpha decoder instance. +void ALPHDelete(ALPHDecoder* const dec); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_DEC_ALPHAI_H_ */ diff --git a/src/loaders/webp/dec/buffer.cpp b/src/loaders/webp/dec/buffer.cpp new file mode 100644 index 00000000..bdddcc93 --- /dev/null +++ b/src/loaders/webp/dec/buffer.cpp @@ -0,0 +1,238 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Everything about WebPDecBuffer +// +// Author: Skal (pascal.massimino@gmail.com) + +#include + +#include "./vp8i.h" +#include "./webpi.h" +#include "../utils/utils.h" + +//------------------------------------------------------------------------------ +// WebPDecBuffer + +// Number of bytes per pixel for the different color-spaces. +static const int kModeBpp[MODE_LAST] = { + 3, 4, 3, 4, 4, 2, 2, + 4, 4, 4, 2, // pre-multiplied modes + 1, 1 }; + +// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE. +// Convert to an integer to handle both the unsigned/signed enum cases +// without the need for casting to remove type limit warnings. +static int IsValidColorspace(int webp_csp_mode) { + return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST); +} + +static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { + int ok = 1; + const WEBP_CSP_MODE mode = buffer->colorspace; + const int width = buffer->width; + const int height = buffer->height; + if (!IsValidColorspace(mode)) { + ok = 0; + } else if (!WebPIsRGBMode(mode)) { // YUV checks + const WebPYUVABuffer* const buf = &buffer->u.YUVA; + const int y_stride = abs(buf->y_stride); + const int u_stride = abs(buf->u_stride); + const int v_stride = abs(buf->v_stride); + const int a_stride = abs(buf->a_stride); + const uint64_t y_size = (uint64_t)y_stride * height; + const uint64_t u_size = (uint64_t)u_stride * ((height + 1) / 2); + const uint64_t v_size = (uint64_t)v_stride * ((height + 1) / 2); + const uint64_t a_size = (uint64_t)a_stride * height; + ok &= (y_size <= buf->y_size); + ok &= (u_size <= buf->u_size); + ok &= (v_size <= buf->v_size); + ok &= (y_stride >= width); + ok &= (u_stride >= (width + 1) / 2); + ok &= (v_stride >= (width + 1) / 2); + ok &= (buf->y != NULL); + ok &= (buf->u != NULL); + ok &= (buf->v != NULL); + if (mode == MODE_YUVA) { + ok &= (a_stride >= width); + ok &= (a_size <= buf->a_size); + ok &= (buf->a != NULL); + } + } else { // RGB checks + const WebPRGBABuffer* const buf = &buffer->u.RGBA; + const int stride = abs(buf->stride); + const uint64_t size = (uint64_t)stride * height; + ok &= (size <= buf->size); + ok &= (stride >= width * kModeBpp[mode]); + ok &= (buf->rgba != NULL); + } + return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM; +} + +static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { + const int w = buffer->width; + const int h = buffer->height; + const WEBP_CSP_MODE mode = buffer->colorspace; + + if (w <= 0 || h <= 0 || !IsValidColorspace(mode)) { + return VP8_STATUS_INVALID_PARAM; + } + + if (!buffer->is_external_memory && buffer->private_memory == NULL) { + uint8_t* output; + int uv_stride = 0, a_stride = 0; + uint64_t uv_size = 0, a_size = 0, total_size; + // We need memory and it hasn't been allocated yet. + // => initialize output buffer, now that dimensions are known. + const int stride = w * kModeBpp[mode]; + const uint64_t size = (uint64_t)stride * h; + + if (!WebPIsRGBMode(mode)) { + uv_stride = (w + 1) / 2; + uv_size = (uint64_t)uv_stride * ((h + 1) / 2); + if (mode == MODE_YUVA) { + a_stride = w; + a_size = (uint64_t)a_stride * h; + } + } + total_size = size + 2 * uv_size + a_size; + + // Security/sanity checks + output = (uint8_t*)malloc(total_size * sizeof(*output)); + if (output == NULL) { + return VP8_STATUS_OUT_OF_MEMORY; + } + buffer->private_memory = output; + + if (!WebPIsRGBMode(mode)) { // YUVA initialization + WebPYUVABuffer* const buf = &buffer->u.YUVA; + buf->y = output; + buf->y_stride = stride; + buf->y_size = (size_t)size; + buf->u = output + size; + buf->u_stride = uv_stride; + buf->u_size = (size_t)uv_size; + buf->v = output + size + uv_size; + buf->v_stride = uv_stride; + buf->v_size = (size_t)uv_size; + if (mode == MODE_YUVA) { + buf->a = output + size + 2 * uv_size; + } + buf->a_size = (size_t)a_size; + buf->a_stride = a_stride; + } else { // RGBA initialization + WebPRGBABuffer* const buf = &buffer->u.RGBA; + buf->rgba = output; + buf->stride = stride; + buf->size = (size_t)size; + } + } + return CheckDecBuffer(buffer); +} + +VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) { + if (buffer == NULL) { + return VP8_STATUS_INVALID_PARAM; + } + if (WebPIsRGBMode(buffer->colorspace)) { + WebPRGBABuffer* const buf = &buffer->u.RGBA; + buf->rgba += (buffer->height - 1) * buf->stride; + buf->stride = -buf->stride; + } else { + WebPYUVABuffer* const buf = &buffer->u.YUVA; + const int H = buffer->height; + buf->y += (H - 1) * buf->y_stride; + buf->y_stride = -buf->y_stride; + buf->u += ((H - 1) >> 1) * buf->u_stride; + buf->u_stride = -buf->u_stride; + buf->v += ((H - 1) >> 1) * buf->v_stride; + buf->v_stride = -buf->v_stride; + if (buf->a != NULL) { + buf->a += (H - 1) * buf->a_stride; + buf->a_stride = -buf->a_stride; + } + } + return VP8_STATUS_OK; +} + +VP8StatusCode WebPAllocateDecBuffer(int w, int h, + const WebPDecoderOptions* const options, + WebPDecBuffer* const out) { + VP8StatusCode status; + if (out == NULL || w <= 0 || h <= 0) { + return VP8_STATUS_INVALID_PARAM; + } + if (options != NULL) { // First, apply options if there is any. + if (options->use_cropping) { + const int cw = options->crop_width; + const int ch = options->crop_height; + const int x = options->crop_left & ~1; + const int y = options->crop_top & ~1; + if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) { + return VP8_STATUS_INVALID_PARAM; // out of frame boundary. + } + w = cw; + h = ch; + } + if (options->use_scaling) { + if (options->scaled_width <= 0 || options->scaled_height <= 0) { + return VP8_STATUS_INVALID_PARAM; + } + w = options->scaled_width; + h = options->scaled_height; + } + } + out->width = w; + out->height = h; + + // Then, allocate buffer for real. + status = AllocateBuffer(out); + if (status != VP8_STATUS_OK) return status; + + // Use the stride trick if vertical flip is needed. + if (options != NULL && options->flip) { + status = WebPFlipBuffer(out); + } + return status; +} + +//------------------------------------------------------------------------------ +// constructors / destructors + +int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) { + if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) { + return 0; // version mismatch + } + if (buffer == NULL) return 0; + memset(buffer, 0, sizeof(*buffer)); + return 1; +} + +void WebPFreeDecBuffer(WebPDecBuffer* buffer) { + if (buffer != NULL) { + if (!buffer->is_external_memory) { + free(buffer->private_memory); + } + buffer->private_memory = NULL; + } +} + +void WebPCopyDecBuffer(const WebPDecBuffer* const src, + WebPDecBuffer* const dst) { + if (src != NULL && dst != NULL) { + *dst = *src; + if (src->private_memory != NULL) { + dst->is_external_memory = 1; // dst buffer doesn't own the memory. + dst->private_memory = NULL; + } + } +} + +//------------------------------------------------------------------------------ + diff --git a/src/loaders/webp/dec/common.h b/src/loaders/webp/dec/common.h new file mode 100644 index 00000000..6961e224 --- /dev/null +++ b/src/loaders/webp/dec/common.h @@ -0,0 +1,54 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Definitions and macros common to encoding and decoding +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_DEC_COMMON_H_ +#define WEBP_DEC_COMMON_H_ + +// intra prediction modes +enum { B_DC_PRED = 0, // 4x4 modes + B_TM_PRED = 1, + B_VE_PRED = 2, + B_HE_PRED = 3, + B_RD_PRED = 4, + B_VR_PRED = 5, + B_LD_PRED = 6, + B_VL_PRED = 7, + B_HD_PRED = 8, + B_HU_PRED = 9, + NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10 + + // Luma16 or UV modes + DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED, + H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED, + B_PRED = NUM_BMODES, // refined I4x4 mode + NUM_PRED_MODES = 4, + + // special modes + B_DC_PRED_NOTOP = 4, + B_DC_PRED_NOLEFT = 5, + B_DC_PRED_NOTOPLEFT = 6, + NUM_B_DC_MODES = 7 }; + +enum { MB_FEATURE_TREE_PROBS = 3, + NUM_MB_SEGMENTS = 4, + NUM_REF_LF_DELTAS = 4, + NUM_MODE_LF_DELTAS = 4, // I4x4, ZERO, *, SPLIT + MAX_NUM_PARTITIONS = 8, + // Probabilities + NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC + NUM_BANDS = 8, + NUM_CTX = 3, + NUM_PROBAS = 11 + }; + +#endif // WEBP_DEC_COMMON_H_ diff --git a/src/loaders/webp/dec/decode_vp8.h b/src/loaders/webp/dec/decode_vp8.h new file mode 100644 index 00000000..236342fa --- /dev/null +++ b/src/loaders/webp/dec/decode_vp8.h @@ -0,0 +1,177 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Low-level API for VP8 decoder +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_WEBP_DECODE_VP8_H_ +#define WEBP_WEBP_DECODE_VP8_H_ + +#include "../webp/decode.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//------------------------------------------------------------------------------ +// Lower-level API +// +// These functions provide fine-grained control of the decoding process. +// The call flow should resemble: +// +// VP8Io io; +// VP8InitIo(&io); +// io.data = data; +// io.data_size = size; +// /* customize io's functions (setup()/put()/teardown()) if needed. */ +// +// VP8Decoder* dec = VP8New(); +// bool ok = VP8Decode(dec); +// if (!ok) printf("Error: %s\n", VP8StatusMessage(dec)); +// VP8Delete(dec); +// return ok; + +// Input / Output +typedef struct VP8Io VP8Io; +typedef int (*VP8IoPutHook)(const VP8Io* io); +typedef int (*VP8IoSetupHook)(VP8Io* io); +typedef void (*VP8IoTeardownHook)(const VP8Io* io); + +struct VP8Io { + // set by VP8GetHeaders() + int width, height; // picture dimensions, in pixels (invariable). + // These are the original, uncropped dimensions. + // The actual area passed to put() is stored + // in mb_w / mb_h fields. + + // set before calling put() + int mb_y; // position of the current rows (in pixels) + int mb_w; // number of columns in the sample + int mb_h; // number of rows in the sample + const uint8_t* y, *u, *v; // rows to copy (in yuv420 format) + int y_stride; // row stride for luma + int uv_stride; // row stride for chroma + + void* opaque; // user data + + // called when fresh samples are available. Currently, samples are in + // YUV420 format, and can be up to width x 24 in size (depending on the + // in-loop filtering level, e.g.). Should return false in case of error + // or abort request. The actual size of the area to update is mb_w x mb_h + // in size, taking cropping into account. + VP8IoPutHook put; + + // called just before starting to decode the blocks. + // Must return false in case of setup error, true otherwise. If false is + // returned, teardown() will NOT be called. But if the setup succeeded + // and true is returned, then teardown() will always be called afterward. + VP8IoSetupHook setup; + + // Called just after block decoding is finished (or when an error occurred + // during put()). Is NOT called if setup() failed. + VP8IoTeardownHook teardown; + + // this is a recommendation for the user-side yuv->rgb converter. This flag + // is set when calling setup() hook and can be overwritten by it. It then + // can be taken into consideration during the put() method. + int fancy_upsampling; + + // Input buffer. + size_t data_size; + const uint8_t* data; + + // If true, in-loop filtering will not be performed even if present in the + // bitstream. Switching off filtering may speed up decoding at the expense + // of more visible blocking. Note that output will also be non-compliant + // with the VP8 specifications. + int bypass_filtering; + + // Cropping parameters. + int use_cropping; + int crop_left, crop_right, crop_top, crop_bottom; + + // Scaling parameters. + int use_scaling; + int scaled_width, scaled_height; + + // If non NULL, pointer to the alpha data (if present) corresponding to the + // start of the current row (That is: it is pre-offset by mb_y and takes + // cropping into account). + const uint8_t* a; +}; + +// Internal, version-checked, entry point +int VP8InitIoInternal(VP8Io* const, int); + + +// Main decoding object. This is an opaque structure. +typedef struct VP8Decoder VP8Decoder; + +// Create a new decoder object. +VP8Decoder* VP8New(void); + +// Must be called to make sure 'io' is initialized properly. +// Returns false in case of version mismatch. Upon such failure, no other +// decoding function should be called (VP8Decode, VP8GetHeaders, ...) +static WEBP_INLINE int VP8InitIo(VP8Io* const io) { + return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION); +} + +// Decode the VP8 frame header. Returns true if ok. +// Note: 'io->data' must be pointing to the start of the VP8 frame header. +int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io); + +// Decode a picture. Will call VP8GetHeaders() if it wasn't done already. +// Returns false in case of error. +int VP8Decode(VP8Decoder* const dec, VP8Io* const io); + +// Return current status of the decoder: +VP8StatusCode VP8Status(VP8Decoder* const dec); + +// return readable string corresponding to the last status. +const char* VP8StatusMessage(VP8Decoder* const dec); + +// Resets the decoder in its initial state, reclaiming memory. +// Not a mandatory call between calls to VP8Decode(). +void VP8Clear(VP8Decoder* const dec); + +// Destroy the decoder object. +void VP8Delete(VP8Decoder* const dec); + +//------------------------------------------------------------------------------ +// Miscellaneous VP8/VP8L bitstream probing functions. + +// Returns true if the next 3 bytes in data contain the VP8 signature. +WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size); + +// Validates the VP8 data-header and retrieves basic header information viz +// width and height. Returns 0 in case of formatting error. *width/*height +// can be passed NULL. +WEBP_EXTERN(int) VP8GetInfo( + const uint8_t* data, + size_t data_size, // data available so far + size_t chunk_size, // total data size expected in the chunk + int* const width, int* const height); + +// Returns true if the next byte(s) in data is a VP8L signature. +WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size); + +// Validates the VP8L data-header and retrieves basic header information viz +// width, height and alpha. Returns 0 in case of formatting error. +// width/height/has_alpha can be passed NULL. +WEBP_EXTERN(int) VP8LGetInfo( + const uint8_t* data, size_t data_size, // data available so far + int* const width, int* const height, int* const has_alpha); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_WEBP_DECODE_VP8_H_ */ diff --git a/src/loaders/webp/dec/frame.cpp b/src/loaders/webp/dec/frame.cpp new file mode 100644 index 00000000..1f388a38 --- /dev/null +++ b/src/loaders/webp/dec/frame.cpp @@ -0,0 +1,713 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Frame-reconstruction function. Memory allocation. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include +#include "./vp8i.h" +#include "../utils/utils.h" + +#define ALIGN_CST (32 - 1) +#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) + +//------------------------------------------------------------------------------ +// Main reconstruction function. + +static const int kScan[16] = { + 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, + 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, + 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, + 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS +}; + +static int CheckMode(int mb_x, int mb_y, int mode) { + if (mode == B_DC_PRED) { + if (mb_x == 0) { + return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; + } else { + return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; + } + } + return mode; +} + +static void Copy32b(uint8_t* const dst, const uint8_t* const src) { + memcpy(dst, src, 4); +} + +static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, + uint8_t* const dst) { + switch (bits >> 30) { + case 3: + VP8Transform(src, dst, 0); + break; + case 2: + VP8TransformAC3(src, dst); + break; + case 1: + VP8TransformDC(src, dst); + break; + default: + break; + } +} + +static void DoUVTransform(uint32_t bits, const int16_t* const src, + uint8_t* const dst) { + if (bits & 0xff) { // any non-zero coeff at all? + if (bits & 0xaa) { // any non-zero AC coefficient? + VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V + } else { + VP8TransformDCUV(src, dst); + } + } +} + +static void ReconstructRow(const VP8Decoder* const dec, + const VP8ThreadContext* ctx) { + int j; + int mb_x; + const int mb_y = ctx->mb_y_; + const int cache_id = ctx->id_; + uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; + uint8_t* const u_dst = dec->yuv_b_ + U_OFF; + uint8_t* const v_dst = dec->yuv_b_ + V_OFF; + + // Initialize left-most block. + for (j = 0; j < 16; ++j) { + y_dst[j * BPS - 1] = 129; + } + for (j = 0; j < 8; ++j) { + u_dst[j * BPS - 1] = 129; + v_dst[j * BPS - 1] = 129; + } + + // Init top-left sample on left column too. + if (mb_y > 0) { + y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; + } else { + // we only need to do this init once at block (0,0). + // Afterward, it remains valid for the whole topmost row. + memset(y_dst - BPS - 1, 127, 16 + 4 + 1); + memset(u_dst - BPS - 1, 127, 8 + 1); + memset(v_dst - BPS - 1, 127, 8 + 1); + } + + // Reconstruct one row. + for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { + const VP8MBData* const block = ctx->mb_data_ + mb_x; + + // Rotate in the left samples from previously decoded block. We move four + // pixels at a time for alignment reason, and because of in-loop filter. + if (mb_x > 0) { + for (j = -1; j < 16; ++j) { + Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); + } + for (j = -1; j < 8; ++j) { + Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); + Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); + } + } + { + // bring top samples into the cache + VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x; + const int16_t* const coeffs = block->coeffs_; + uint32_t bits = block->non_zero_y_; + int n; + + if (mb_y > 0) { + memcpy(y_dst - BPS, top_yuv[0].y, 16); + memcpy(u_dst - BPS, top_yuv[0].u, 8); + memcpy(v_dst - BPS, top_yuv[0].v, 8); + } + + // predict and add residuals + if (block->is_i4x4_) { // 4x4 + uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); + + if (mb_y > 0) { + if (mb_x >= dec->mb_w_ - 1) { // on rightmost border + memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); + } else { + memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); + } + } + // replicate the top-right pixels below + top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; + + // predict and add residuals for all 4x4 blocks in turn. + for (n = 0; n < 16; ++n, bits <<= 2) { + uint8_t* const dst = y_dst + kScan[n]; + VP8PredLuma4[block->imodes_[n]](dst); + DoTransform(bits, coeffs + n * 16, dst); + } + } else { // 16x16 + const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]); + VP8PredLuma16[pred_func](y_dst); + if (bits != 0) { + for (n = 0; n < 16; ++n, bits <<= 2) { + DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]); + } + } + } + { + // Chroma + const uint32_t bits_uv = block->non_zero_uv_; + const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_); + VP8PredChroma8[pred_func](u_dst); + VP8PredChroma8[pred_func](v_dst); + DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst); + DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst); + } + + // stash away top samples for next block + if (mb_y < dec->mb_h_ - 1) { + memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16); + memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8); + memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8); + } + } + // Transfer reconstructed samples from yuv_b_ cache to final destination. + { + const int y_offset = cache_id * 16 * dec->cache_y_stride_; + const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; + uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset; + uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset; + uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset; + for (j = 0; j < 16; ++j) { + memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); + } + for (j = 0; j < 8; ++j) { + memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); + memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); + } + } + } +} + +//------------------------------------------------------------------------------ +// Filtering + +// kFilterExtraRows[] = How many extra lines are needed on the MB boundary +// for caching, given a filtering level. +// Simple filter: up to 2 luma samples are read and 1 is written. +// Complex filter: up to 4 luma samples are read and 3 are written. Same for +// U/V, so it's 8 samples total (because of the 2x upsampling). +static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; + +static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) { + const VP8ThreadContext* const ctx = &dec->thread_ctx_; + const int cache_id = ctx->id_; + const int y_bps = dec->cache_y_stride_; + const VP8FInfo* const f_info = ctx->f_info_ + mb_x; + uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16; + const int ilevel = f_info->f_ilevel_; + const int limit = f_info->f_limit_; + if (limit == 0) { + return; + } + assert(limit >= 3); + if (dec->filter_type_ == 1) { // simple + if (mb_x > 0) { + VP8SimpleHFilter16(y_dst, y_bps, limit + 4); + } + if (f_info->f_inner_) { + VP8SimpleHFilter16i(y_dst, y_bps, limit); + } + if (mb_y > 0) { + VP8SimpleVFilter16(y_dst, y_bps, limit + 4); + } + if (f_info->f_inner_) { + VP8SimpleVFilter16i(y_dst, y_bps, limit); + } + } else { // complex + const int uv_bps = dec->cache_uv_stride_; + uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8; + uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8; + const int hev_thresh = f_info->hev_thresh_; + if (mb_x > 0) { + VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); + VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); + } + if (f_info->f_inner_) { + VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); + VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); + } + if (mb_y > 0) { + VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); + VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); + } + if (f_info->f_inner_) { + VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); + VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); + } + } +} + +// Filter the decoded macroblock row (if needed) +static void FilterRow(const VP8Decoder* const dec) { + int mb_x; + const int mb_y = dec->thread_ctx_.mb_y_; + assert(dec->thread_ctx_.filter_row_); + for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) { + DoFilter(dec, mb_x, mb_y); + } +} + +//------------------------------------------------------------------------------ +// Precompute the filtering strength for each segment and each i4x4/i16x16 mode. + +static void PrecomputeFilterStrengths(VP8Decoder* const dec) { + if (dec->filter_type_ > 0) { + int s; + const VP8FilterHeader* const hdr = &dec->filter_hdr_; + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + int i4x4; + // First, compute the initial level + int base_level; + if (dec->segment_hdr_.use_segment_) { + base_level = dec->segment_hdr_.filter_strength_[s]; + if (!dec->segment_hdr_.absolute_delta_) { + base_level += hdr->level_; + } + } else { + base_level = hdr->level_; + } + for (i4x4 = 0; i4x4 <= 1; ++i4x4) { + VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; + int level = base_level; + if (hdr->use_lf_delta_) { + level += hdr->ref_lf_delta_[0]; + if (i4x4) { + level += hdr->mode_lf_delta_[0]; + } + } + level = (level < 0) ? 0 : (level > 63) ? 63 : level; + if (level > 0) { + int ilevel = level; + if (hdr->sharpness_ > 0) { + if (hdr->sharpness_ > 4) { + ilevel >>= 2; + } else { + ilevel >>= 1; + } + if (ilevel > 9 - hdr->sharpness_) { + ilevel = 9 - hdr->sharpness_; + } + } + if (ilevel < 1) ilevel = 1; + info->f_ilevel_ = ilevel; + info->f_limit_ = 2 * level + ilevel; + info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0; + } else { + info->f_limit_ = 0; // no filtering + } + info->f_inner_ = i4x4; + } + } + } +} + +//------------------------------------------------------------------------------ +// Dithering + +#define DITHER_AMP_TAB_SIZE 12 +static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = { + // roughly, it's dqm->uv_mat_[1] + 8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1 +}; + +void VP8InitDithering(const WebPDecoderOptions* const options, + VP8Decoder* const dec) { + assert(dec != NULL); + if (options != NULL) { + const int d = options->dithering_strength; + const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1; + const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100); + if (f > 0) { + int s; + int all_amp = 0; + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + VP8QuantMatrix* const dqm = &dec->dqm_[s]; + if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) { + // TODO(skal): should we specially dither more for uv_quant_ < 0? + const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; + dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; + } + all_amp |= dqm->dither_; + } + if (all_amp != 0) { + VP8InitRandom(&dec->dithering_rg_, 1.0f); + dec->dither_ = 1; + } + } + // potentially allow alpha dithering + dec->alpha_dithering_ = options->alpha_dithering_strength; + if (dec->alpha_dithering_ > 100) { + dec->alpha_dithering_ = 100; + } else if (dec->alpha_dithering_ < 0) { + dec->alpha_dithering_ = 0; + } + } +} + +// minimal amp that will provide a non-zero dithering effect +#define MIN_DITHER_AMP 4 +#define DITHER_DESCALE 4 +#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) +#define DITHER_AMP_BITS 8 +#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) + +static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) { + int i, j; + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) { + // TODO: could be made faster with SSE2 + const int bits = + VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER; + // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100 + const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE; + const int v = (int)dst[i] + delta; + dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v; + } + dst += bps; + } +} + +static void DitherRow(VP8Decoder* const dec) { + int mb_x; + assert(dec->dither_); + for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) { + const VP8ThreadContext* const ctx = &dec->thread_ctx_; + const VP8MBData* const data = ctx->mb_data_ + mb_x; + const int cache_id = ctx->id_; + const int uv_bps = dec->cache_uv_stride_; + if (data->dither_ >= MIN_DITHER_AMP) { + uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8; + uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8; + Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_); + Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_); + } + } +} + +//------------------------------------------------------------------------------ +// This function is called after a row of macroblocks is finished decoding. +// It also takes into account the following restrictions: +// * In case of in-loop filtering, we must hold off sending some of the bottom +// pixels as they are yet unfiltered. They will be when the next macroblock +// row is decoded. Meanwhile, we must preserve them by rotating them in the +// cache area. This doesn't hold for the very bottom row of the uncropped +// picture of course. +// * we must clip the remaining pixels against the cropping area. The VP8Io +// struct must have the following fields set correctly before calling put(): + +#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB + +// Finalize and transmit a complete row. Return false in case of user-abort. +static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { + int ok = 1; + const VP8ThreadContext* const ctx = &dec->thread_ctx_; + const int cache_id = ctx->id_; + const int extra_y_rows = kFilterExtraRows[dec->filter_type_]; + const int ysize = extra_y_rows * dec->cache_y_stride_; + const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_; + const int y_offset = cache_id * 16 * dec->cache_y_stride_; + const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; + uint8_t* const ydst = dec->cache_y_ - ysize + y_offset; + uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset; + uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset; + const int mb_y = ctx->mb_y_; + const int is_first_row = (mb_y == 0); + const int is_last_row = (mb_y >= dec->br_mb_y_ - 1); + + if (ctx->filter_row_) { + FilterRow(dec); + } + + if (dec->dither_) { + DitherRow(dec); + } + + if (io->put != NULL) { + int y_start = MACROBLOCK_VPOS(mb_y); + int y_end = MACROBLOCK_VPOS(mb_y + 1); + if (!is_first_row) { + y_start -= extra_y_rows; + io->y = ydst; + io->u = udst; + io->v = vdst; + } else { + io->y = dec->cache_y_ + y_offset; + io->u = dec->cache_u_ + uv_offset; + io->v = dec->cache_v_ + uv_offset; + } + + if (!is_last_row) { + y_end -= extra_y_rows; + } + if (y_end > io->crop_bottom) { + y_end = io->crop_bottom; // make sure we don't overflow on last row. + } + io->a = NULL; + if (dec->alpha_data_ != NULL && y_start < y_end) { + // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a + // good idea. + io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start); + if (io->a == NULL) { + return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, + "Could not decode alpha data."); + } + } + if (y_start < io->crop_top) { + const int delta_y = io->crop_top - y_start; + y_start = io->crop_top; + assert(!(delta_y & 1)); + io->y += dec->cache_y_stride_ * delta_y; + io->u += dec->cache_uv_stride_ * (delta_y >> 1); + io->v += dec->cache_uv_stride_ * (delta_y >> 1); + if (io->a != NULL) { + io->a += io->width * delta_y; + } + } + if (y_start < y_end) { + io->y += io->crop_left; + io->u += io->crop_left >> 1; + io->v += io->crop_left >> 1; + if (io->a != NULL) { + io->a += io->crop_left; + } + io->mb_y = y_start - io->crop_top; + io->mb_w = io->crop_right - io->crop_left; + io->mb_h = y_end - y_start; + ok = io->put(io); + } + } + // rotate top samples if needed + if (cache_id + 1 == dec->num_caches_) { + if (!is_last_row) { + memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize); + memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize); + memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize); + } + } + + return ok; +} + +#undef MACROBLOCK_VPOS + +//------------------------------------------------------------------------------ + +int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) { + int ok = 1; + VP8ThreadContext* const ctx = &dec->thread_ctx_; + const int filter_row = + (dec->filter_type_ > 0) && + (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_); + // ctx->id_ and ctx->f_info_ are already set + ctx->mb_y_ = dec->mb_y_; + ctx->filter_row_ = filter_row; + ReconstructRow(dec, ctx); + ok = FinishRow(dec, io); + + return ok; +} + +//------------------------------------------------------------------------------ +// Finish setting up the decoding parameter once user's setup() is called. + +VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { + // Call setup() first. This may trigger additional decoding features on 'io'. + // Note: Afterward, we must call teardown() no matter what. + if (io->setup != NULL && !io->setup(io)) { + VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed"); + return dec->status_; + } + + // Disable filtering per user request + if (io->bypass_filtering) { + dec->filter_type_ = 0; + } + // TODO(skal): filter type / strength / sharpness forcing + + // Define the area where we can skip in-loop filtering, in case of cropping. + // + // 'Simple' filter reads two luma samples outside of the macroblock + // and filters one. It doesn't filter the chroma samples. Hence, we can + // avoid doing the in-loop filtering before crop_top/crop_left position. + // For the 'Complex' filter, 3 samples are read and up to 3 are filtered. + // Means: there's a dependency chain that goes all the way up to the + // top-left corner of the picture (MB #0). We must filter all the previous + // macroblocks. + // TODO(skal): add an 'approximate_decoding' option, that won't produce + // a 1:1 bit-exactness for complex filtering? + { + const int extra_pixels = kFilterExtraRows[dec->filter_type_]; + if (dec->filter_type_ == 2) { + // For complex filter, we need to preserve the dependency chain. + dec->tl_mb_x_ = 0; + dec->tl_mb_y_ = 0; + } else { + // For simple filter, we can filter only the cropped region. + // We include 'extra_pixels' on the other side of the boundary, since + // vertical or horizontal filtering of the previous macroblock can + // modify some abutting pixels. + dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4; + dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4; + if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0; + if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0; + } + // We need some 'extra' pixels on the right/bottom. + dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4; + dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4; + if (dec->br_mb_x_ > dec->mb_w_) { + dec->br_mb_x_ = dec->mb_w_; + } + if (dec->br_mb_y_ > dec->mb_h_) { + dec->br_mb_y_ = dec->mb_h_; + } + } + PrecomputeFilterStrengths(dec); + return VP8_STATUS_OK; +} + +int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) { + int ok = 1; + if (io->teardown != NULL) { + io->teardown(io); + } + return ok; +} + +#define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case + +// Initialize multi/single-thread worker +static int InitThreadContext(VP8Decoder* const dec) { + dec->cache_id_ = 0; + dec->num_caches_ = ST_CACHE_LINES; + return 1; +} + +#undef ST_CACHE_LINES + +//------------------------------------------------------------------------------ +// Memory setup + +static int AllocateMemory(VP8Decoder* const dec) { + const int num_caches = dec->num_caches_; + const int mb_w = dec->mb_w_; + // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise. + const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t); + const size_t top_size = sizeof(VP8TopSamples) * mb_w; + const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB); + const size_t f_info_size = (dec->filter_type_ > 0) ? mb_w * sizeof(VP8FInfo) : 0; + const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_); + const size_t mb_data_size = mb_w * sizeof(*dec->mb_data_); + const size_t cache_height = (16 * num_caches + + kFilterExtraRows[dec->filter_type_]) * 3 / 2; + const size_t cache_size = top_size * cache_height; + // alpha_size is the only one that scales as width x height. + const uint64_t alpha_size = (dec->alpha_data_ != NULL) ? + (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL; + const uint64_t needed = (uint64_t)intra_pred_mode_size + + top_size + mb_info_size + f_info_size + + yuv_size + mb_data_size + + cache_size + alpha_size + ALIGN_CST; + uint8_t* mem; + + if (needed != (size_t)needed) return 0; // check for overflow + if (needed > dec->mem_size_) { + free(dec->mem_); + dec->mem_size_ = 0; + dec->mem_ = malloc(needed * sizeof(uint8_t)); + if (dec->mem_ == NULL) { + return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, + "no memory during frame initialization."); + } + // down-cast is ok, thanks to WebPSafeAlloc() above. + dec->mem_size_ = (size_t)needed; + } + + mem = (uint8_t*)dec->mem_; + dec->intra_t_ = (uint8_t*)mem; + mem += intra_pred_mode_size; + + dec->yuv_t_ = (VP8TopSamples*)mem; + mem += top_size; + + dec->mb_info_ = ((VP8MB*)mem) + 1; + mem += mb_info_size; + + dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL; + mem += f_info_size; + dec->thread_ctx_.id_ = 0; + dec->thread_ctx_.f_info_ = dec->f_info_; + + mem = (uint8_t*)DO_ALIGN(mem); + assert((yuv_size & ALIGN_CST) == 0); + dec->yuv_b_ = (uint8_t*)mem; + mem += yuv_size; + + dec->mb_data_ = (VP8MBData*)mem; + dec->thread_ctx_.mb_data_ = (VP8MBData*)mem; + mem += mb_data_size; + + dec->cache_y_stride_ = 16 * mb_w; + dec->cache_uv_stride_ = 8 * mb_w; + { + const int extra_rows = kFilterExtraRows[dec->filter_type_]; + const int extra_y = extra_rows * dec->cache_y_stride_; + const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_; + dec->cache_y_ = ((uint8_t*)mem) + extra_y; + dec->cache_u_ = dec->cache_y_ + + 16 * num_caches * dec->cache_y_stride_ + extra_uv; + dec->cache_v_ = dec->cache_u_ + + 8 * num_caches * dec->cache_uv_stride_ + extra_uv; + dec->cache_id_ = 0; + } + mem += cache_size; + + // alpha plane + dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL; + mem += alpha_size; + assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_); + + // note: left/top-info is initialized once for all. + memset(dec->mb_info_ - 1, 0, mb_info_size); + VP8InitScanline(dec); // initialize left too. + + // initialize top + memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size); + + return 1; +} + +static void InitIo(VP8Decoder* const dec, VP8Io* io) { + // prepare 'io' + io->mb_y = 0; + io->y = dec->cache_y_; + io->u = dec->cache_u_; + io->v = dec->cache_v_; + io->y_stride = dec->cache_y_stride_; + io->uv_stride = dec->cache_uv_stride_; + io->a = NULL; +} + +int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { + if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. + if (!AllocateMemory(dec)) return 0; + InitIo(dec, io); + VP8DspInit(); // Init critical function pointers and look-up tables. + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/src/loaders/webp/dec/io.cpp b/src/loaders/webp/dec/io.cpp new file mode 100644 index 00000000..d0d05496 --- /dev/null +++ b/src/loaders/webp/dec/io.cpp @@ -0,0 +1,392 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// functions for sample output. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include +#include +#include "../dec/vp8i.h" +#include "./webpi.h" +#include "../dsp/dsp.h" +#include "../dsp/yuv.h" +#include "../utils/utils.h" + + +// Point-sampling U/V sampler. +static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { + WebPDecBuffer* const output = p->output; + WebPRGBABuffer* const buf = &output->u.RGBA; + uint8_t* const dst = buf->rgba + io->mb_y * buf->stride; + WebPSamplerProcessPlane(io->y, io->y_stride, + io->u, io->v, io->uv_stride, + dst, buf->stride, io->mb_w, io->mb_h, + WebPSamplers[output->colorspace]); + return io->mb_h; +} + +//------------------------------------------------------------------------------ +// Fancy upsampling + +#ifdef FANCY_UPSAMPLING +static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { + int num_lines_out = io->mb_h; // a priori guess + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* dst = buf->rgba + io->mb_y * buf->stride; + WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace]; + const uint8_t* cur_y = io->y; + const uint8_t* cur_u = io->u; + const uint8_t* cur_v = io->v; + const uint8_t* top_u = p->tmp_u; + const uint8_t* top_v = p->tmp_v; + int y = io->mb_y; + const int y_end = io->mb_y + io->mb_h; + const int mb_w = io->mb_w; + const int uv_w = (mb_w + 1) / 2; + + if (y == 0) { + // First line is special cased. We mirror the u/v samples at boundary. + upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w); + } else { + // We can finish the left-over line from previous call. + upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, + dst - buf->stride, dst, mb_w); + ++num_lines_out; + } + // Loop over each output pairs of row. + for (; y + 2 < y_end; y += 2) { + top_u = cur_u; + top_v = cur_v; + cur_u += io->uv_stride; + cur_v += io->uv_stride; + dst += 2 * buf->stride; + cur_y += 2 * io->y_stride; + upsample(cur_y - io->y_stride, cur_y, + top_u, top_v, cur_u, cur_v, + dst - buf->stride, dst, mb_w); + } + // move to last row + cur_y += io->y_stride; + if (io->crop_top + y_end < io->crop_bottom) { + // Save the unfinished samples for next call (as we're not done yet). + memcpy(p->tmp_y, cur_y, mb_w * sizeof(*p->tmp_y)); + memcpy(p->tmp_u, cur_u, uv_w * sizeof(*p->tmp_u)); + memcpy(p->tmp_v, cur_v, uv_w * sizeof(*p->tmp_v)); + // The fancy upsampler leaves a row unfinished behind + // (except for the very last row) + num_lines_out--; + } else { + // Process the very last row of even-sized picture + if (!(y_end & 1)) { + upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, + dst + buf->stride, NULL, mb_w); + } + } + return num_lines_out; +} + +#endif /* FANCY_UPSAMPLING */ + +//------------------------------------------------------------------------------ + + +static int GetAlphaSourceRow(const VP8Io* const io, + const uint8_t** alpha, int* const num_rows) { + int start_y = io->mb_y; + *num_rows = io->mb_h; + + // Compensate for the 1-line delay of the fancy upscaler. + // This is similar to EmitFancyRGB(). + if (io->fancy_upsampling) { + if (start_y == 0) { + // We don't process the last row yet. It'll be done during the next call. + --*num_rows; + } else { + --start_y; + // Fortunately, *alpha data is persistent, so we can go back + // one row and finish alpha blending, now that the fancy upscaler + // completed the YUV->RGB interpolation. + *alpha -= io->width; + } + if (io->crop_top + io->mb_y + io->mb_h == io->crop_bottom) { + // If it's the very last call, we process all the remaining rows! + *num_rows = io->crop_bottom - io->crop_top - start_y; + } + } + return start_y; +} + +static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { + const uint8_t* alpha = io->a; + if (alpha != NULL) { + const int mb_w = io->mb_w; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int alpha_first = + (colorspace == MODE_ARGB || colorspace == MODE_Argb); + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + int num_rows; + const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); + uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; + uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3); + const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w, + num_rows, dst, buf->stride); + + // has_alpha is true if there's non-trivial alpha to premultiply with. + if (has_alpha && WebPIsPremultipliedMode(colorspace)) { + WebPApplyAlphaMultiply(base_rgba, alpha_first, + mb_w, num_rows, buf->stride); + } + } + return 0; +} + + +//------------------------------------------------------------------------------ +// RGBA rescaling + +static int ExportRGB(WebPDecParams* const p, int y_pos) { + const WebPYUV444Converter convert = + WebPYUV444Converters[p->output->colorspace]; + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride; + int num_lines_out = 0; + // For RGB rescaling, because of the YUV420, current scan position + // U/V can be +1/-1 line from the Y one. Hence the double test. + while (WebPRescalerHasPendingOutput(&p->scaler_y) && + WebPRescalerHasPendingOutput(&p->scaler_u)) { + assert(p->last_y + y_pos + num_lines_out < p->output->height); + assert(p->scaler_u.y_accum == p->scaler_v.y_accum); + WebPRescalerExportRow(&p->scaler_y, 0); + WebPRescalerExportRow(&p->scaler_u, 0); + WebPRescalerExportRow(&p->scaler_v, 0); + convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst, + dst, p->scaler_y.dst_width); + dst += buf->stride; + ++num_lines_out; + } + return num_lines_out; +} + +static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { + const int mb_h = io->mb_h; + const int uv_mb_h = (mb_h + 1) >> 1; + int j = 0, uv_j = 0; + int num_lines_out = 0; + while (j < mb_h) { + const int y_lines_in = + WebPRescalerImport(&p->scaler_y, mb_h - j, + io->y + j * io->y_stride, io->y_stride); + const int u_lines_in = + WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j, + io->u + uv_j * io->uv_stride, io->uv_stride); + const int v_lines_in = + WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j, + io->v + uv_j * io->uv_stride, io->uv_stride); + (void)v_lines_in; // remove a gcc warning + assert(u_lines_in == v_lines_in); + j += y_lines_in; + uv_j += u_lines_in; + num_lines_out += ExportRGB(p, num_lines_out); + } + return num_lines_out; +} + +static int ExportAlpha(WebPDecParams* const p, int y_pos) { + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int alpha_first = + (colorspace == MODE_ARGB || colorspace == MODE_Argb); + uint8_t* dst = base_rgba + (alpha_first ? 0 : 3); + int num_lines_out = 0; + const int is_premult_alpha = WebPIsPremultipliedMode(colorspace); + uint32_t alpha_mask = 0xff; + const int width = p->scaler_a.dst_width; + + while (WebPRescalerHasPendingOutput(&p->scaler_a)) { + int i; + assert(p->last_y + y_pos + num_lines_out < p->output->height); + WebPRescalerExportRow(&p->scaler_a, 0); + for (i = 0; i < width; ++i) { + const uint32_t alpha_value = p->scaler_a.dst[i]; + dst[4 * i] = alpha_value; + alpha_mask &= alpha_value; + } + dst += buf->stride; + ++num_lines_out; + } + if (is_premult_alpha && alpha_mask != 0xff) { + WebPApplyAlphaMultiply(base_rgba, alpha_first, + width, num_lines_out, buf->stride); + } + return num_lines_out; +} + +static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { + if (io->a != NULL) { + WebPRescaler* const scaler = &p->scaler_a; + int j = 0; + int pos = 0; + while (j < io->mb_h) { + j += WebPRescalerImport(scaler, io->mb_h - j, + io->a + j * io->width, io->width); + pos += p->emit_alpha_row(p, pos); + } + } + return 0; +} + +static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { + const int has_alpha = WebPIsAlphaMode(p->output->colorspace); + const int out_width = io->scaled_width; + const int out_height = io->scaled_height; + const int uv_in_width = (io->mb_w + 1) >> 1; + const int uv_in_height = (io->mb_h + 1) >> 1; + const size_t work_size = 2 * out_width; // scratch memory for one rescaler + int32_t* work; // rescalers work area + uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion + size_t tmp_size1, tmp_size2, total_size; + + tmp_size1 = 3 * work_size; + tmp_size2 = 3 * out_width; + if (has_alpha) { + tmp_size1 += work_size; + tmp_size2 += out_width; + } + total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp); + p->memory = calloc(1ULL, total_size); + if (p->memory == NULL) { + return 0; // memory error + } + work = (int32_t*)p->memory; + tmp = (uint8_t*)(work + tmp_size1); + WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h, + tmp + 0 * out_width, out_width, out_height, 0, 1, + io->mb_w, out_width, io->mb_h, out_height, + work + 0 * work_size); + WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height, + tmp + 1 * out_width, out_width, out_height, 0, 1, + io->mb_w, 2 * out_width, io->mb_h, 2 * out_height, + work + 1 * work_size); + WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height, + tmp + 2 * out_width, out_width, out_height, 0, 1, + io->mb_w, 2 * out_width, io->mb_h, 2 * out_height, + work + 2 * work_size); + p->emit = EmitRescaledRGB; + WebPInitYUV444Converters(); + + if (has_alpha) { + WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, + tmp + 3 * out_width, out_width, out_height, 0, 1, + io->mb_w, out_width, io->mb_h, out_height, + work + 3 * work_size); + p->emit_alpha = EmitRescaledAlphaRGB; + p->emit_alpha_row = ExportAlpha; + WebPInitAlphaProcessing(); + } + return 1; +} + +//------------------------------------------------------------------------------ +// Default custom functions + +static int CustomSetup(VP8Io* io) { + WebPDecParams* const p = (WebPDecParams*)io->opaque; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int is_rgb = WebPIsRGBMode(colorspace); + const int is_alpha = WebPIsAlphaMode(colorspace); + + p->memory = NULL; + p->emit = NULL; + p->emit_alpha = NULL; + p->emit_alpha_row = NULL; + + if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) { + return 0; + } + if (is_alpha && WebPIsPremultipliedMode(colorspace)) { + WebPInitUpsamplers(); + } + if (io->use_scaling) { + const int ok = InitRGBRescaler(io, p); + if (!ok) { + return 0; // memory error + } + } else { + if (is_rgb) { + p->emit = EmitSampledRGB; // default + if (io->fancy_upsampling) { +#ifdef FANCY_UPSAMPLING + const int uv_width = (io->mb_w + 1) >> 1; + p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width)); + if (p->memory == NULL) { + return 0; // memory error. + } + p->tmp_y = (uint8_t*)p->memory; + p->tmp_u = p->tmp_y + io->mb_w; + p->tmp_v = p->tmp_u + uv_width; + p->emit = EmitFancyRGB; + WebPInitUpsamplers(); +#endif + } else { + WebPInitSamplers(); + } + } + if (is_alpha) { // need transparency output + p->emit_alpha = EmitAlphaRGB; + if (is_rgb) WebPInitAlphaProcessing(); + } + } + + if (is_rgb) { + VP8YUVInit(); + } + return 1; +} + +//------------------------------------------------------------------------------ + +static int CustomPut(const VP8Io* io) { + WebPDecParams* const p = (WebPDecParams*)io->opaque; + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + int num_lines_out; + assert(!(io->mb_y & 1)); + + if (mb_w <= 0 || mb_h <= 0) { + return 0; + } + num_lines_out = p->emit(io, p); + if (p->emit_alpha != NULL) { + p->emit_alpha(io, p); + } + p->last_y += num_lines_out; + return 1; +} + +//------------------------------------------------------------------------------ + +static void CustomTeardown(const VP8Io* io) { + WebPDecParams* const p = (WebPDecParams*)io->opaque; + free(p->memory); + p->memory = NULL; +} + +//------------------------------------------------------------------------------ +// Main entry point + +void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) { + io->put = CustomPut; + io->setup = CustomSetup; + io->teardown = CustomTeardown; + io->opaque = params; +} + +//------------------------------------------------------------------------------ diff --git a/src/loaders/webp/dec/meson.build b/src/loaders/webp/dec/meson.build new file mode 100644 index 00000000..7a35e281 --- /dev/null +++ b/src/loaders/webp/dec/meson.build @@ -0,0 +1,22 @@ +source_file = [ + 'alphai.h', + 'common.h', + 'decode_vp8.h', + 'vp8i.h', + 'vp8li.h', + 'webpi.h', + 'alpha.cpp', + 'buffer.cpp', + 'frame.cpp', + 'io.cpp', + 'quant.cpp', + 'tree.cpp', + 'vp8.cpp', + 'vp8l.cpp', + 'webp.cpp' +] + +webp_deb += [declare_dependency( + include_directories : include_directories('.'), + sources : source_file +)] diff --git a/src/loaders/webp/dec/quant.cpp b/src/loaders/webp/dec/quant.cpp new file mode 100644 index 00000000..5b648f94 --- /dev/null +++ b/src/loaders/webp/dec/quant.cpp @@ -0,0 +1,110 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Quantizer initialization +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./vp8i.h" + +static WEBP_INLINE int clip(int v, int M) { + return v < 0 ? 0 : v > M ? M : v; +} + +// Paragraph 14.1 +static const uint8_t kDcTable[128] = { + 4, 5, 6, 7, 8, 9, 10, 10, + 11, 12, 13, 14, 15, 16, 17, 17, + 18, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 25, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, + 37, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 76, 77, 78, 79, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, + 91, 93, 95, 96, 98, 100, 101, 102, + 104, 106, 108, 110, 112, 114, 116, 118, + 122, 124, 126, 128, 130, 132, 134, 136, + 138, 140, 143, 145, 148, 151, 154, 157 +}; + +static const uint16_t kAcTable[128] = { + 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 60, + 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, 88, 90, 92, + 94, 96, 98, 100, 102, 104, 106, 108, + 110, 112, 114, 116, 119, 122, 125, 128, + 131, 134, 137, 140, 143, 146, 149, 152, + 155, 158, 161, 164, 167, 170, 173, 177, + 181, 185, 189, 193, 197, 201, 205, 209, + 213, 217, 221, 225, 229, 234, 239, 245, + 249, 254, 259, 264, 269, 274, 279, 284 +}; + +//------------------------------------------------------------------------------ +// Paragraph 9.6 + +void VP8ParseQuant(VP8Decoder* const dec) { + VP8BitReader* const br = &dec->br_; + const int base_q0 = VP8GetValue(br, 7); + const int dqy1_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; + const int dqy2_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; + const int dqy2_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; + const int dquv_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; + const int dquv_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; + + const VP8SegmentHeader* const hdr = &dec->segment_hdr_; + int i; + + for (i = 0; i < NUM_MB_SEGMENTS; ++i) { + int q; + if (hdr->use_segment_) { + q = hdr->quantizer_[i]; + if (!hdr->absolute_delta_) { + q += base_q0; + } + } else { + if (i > 0) { + dec->dqm_[i] = dec->dqm_[0]; + continue; + } else { + q = base_q0; + } + } + { + VP8QuantMatrix* const m = &dec->dqm_[i]; + m->y1_mat_[0] = kDcTable[clip(q + dqy1_dc, 127)]; + m->y1_mat_[1] = kAcTable[clip(q + 0, 127)]; + + m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2; + // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16. + // The smallest precision for that is '(x*6349) >> 12' but 16 is a good + // word size. + m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16; + if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8; + + m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)]; + m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)]; + + m->uv_quant_ = q + dquv_ac; // for dithering strength evaluation + } + } +} + +//------------------------------------------------------------------------------ + diff --git a/src/loaders/webp/dec/tree.cpp b/src/loaders/webp/dec/tree.cpp new file mode 100644 index 00000000..c2007ea7 --- /dev/null +++ b/src/loaders/webp/dec/tree.cpp @@ -0,0 +1,525 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Coding trees and probas +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./vp8i.h" +#include "../utils/bit_reader_inl.h" + +#define USE_GENERIC_TREE + +#ifdef USE_GENERIC_TREE +static const int8_t kYModesIntra4[18] = { + -B_DC_PRED, 1, + -B_TM_PRED, 2, + -B_VE_PRED, 3, + 4, 6, + -B_HE_PRED, 5, + -B_RD_PRED, -B_VR_PRED, + -B_LD_PRED, 7, + -B_VL_PRED, 8, + -B_HD_PRED, -B_HU_PRED +}; +#endif + +//------------------------------------------------------------------------------ +// Default probabilities + +// Paragraph 13.5 +static const uint8_t + CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = { + { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, + { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, + { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } + }, + { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, + { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, + { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, + }, + { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, + { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, + { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, + }, + { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, + { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, + { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } + }, + { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, + { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, + { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } + }, + { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, + { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, + { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } + }, + { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, + { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, + { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, + { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } + }, + { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, + { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, + { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } + }, + { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, + { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, + { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } + }, + { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, + { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, + { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } + }, + { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, + { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, + { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } + }, + { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, + { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, + { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } + }, + { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, + { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, + { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } + }, + { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } + } + }, + { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, + { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, + { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } + }, + { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, + { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, + { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } + }, + { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, + { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, + { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } + }, + { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } + }, + { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, + { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, + { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, + { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, + { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } + }, + { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, + { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, + { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } + }, + { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, + { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, + { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } + }, + { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, + { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, + { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } + }, + { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, + { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, + { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } + }, + { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, + { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, + { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } + }, + { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, + { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, + { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } + }, + { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + } + } +}; + +// Paragraph 11.5 +static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = { + { { 231, 120, 48, 89, 115, 113, 120, 152, 112 }, + { 152, 179, 64, 126, 170, 118, 46, 70, 95 }, + { 175, 69, 143, 80, 85, 82, 72, 155, 103 }, + { 56, 58, 10, 171, 218, 189, 17, 13, 152 }, + { 114, 26, 17, 163, 44, 195, 21, 10, 173 }, + { 121, 24, 80, 195, 26, 62, 44, 64, 85 }, + { 144, 71, 10, 38, 171, 213, 144, 34, 26 }, + { 170, 46, 55, 19, 136, 160, 33, 206, 71 }, + { 63, 20, 8, 114, 114, 208, 12, 9, 226 }, + { 81, 40, 11, 96, 182, 84, 29, 16, 36 } }, + { { 134, 183, 89, 137, 98, 101, 106, 165, 148 }, + { 72, 187, 100, 130, 157, 111, 32, 75, 80 }, + { 66, 102, 167, 99, 74, 62, 40, 234, 128 }, + { 41, 53, 9, 178, 241, 141, 26, 8, 107 }, + { 74, 43, 26, 146, 73, 166, 49, 23, 157 }, + { 65, 38, 105, 160, 51, 52, 31, 115, 128 }, + { 104, 79, 12, 27, 217, 255, 87, 17, 7 }, + { 87, 68, 71, 44, 114, 51, 15, 186, 23 }, + { 47, 41, 14, 110, 182, 183, 21, 17, 194 }, + { 66, 45, 25, 102, 197, 189, 23, 18, 22 } }, + { { 88, 88, 147, 150, 42, 46, 45, 196, 205 }, + { 43, 97, 183, 117, 85, 38, 35, 179, 61 }, + { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, + { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, + { 39, 28, 85, 171, 58, 165, 90, 98, 64 }, + { 34, 22, 116, 206, 23, 34, 43, 166, 73 }, + { 107, 54, 32, 26, 51, 1, 81, 43, 31 }, + { 68, 25, 106, 22, 64, 171, 36, 225, 114 }, + { 34, 19, 21, 102, 132, 188, 16, 76, 124 }, + { 62, 18, 78, 95, 85, 57, 50, 48, 51 } }, + { { 193, 101, 35, 159, 215, 111, 89, 46, 111 }, + { 60, 148, 31, 172, 219, 228, 21, 18, 111 }, + { 112, 113, 77, 85, 179, 255, 38, 120, 114 }, + { 40, 42, 1, 196, 245, 209, 10, 25, 109 }, + { 88, 43, 29, 140, 166, 213, 37, 43, 154 }, + { 61, 63, 30, 155, 67, 45, 68, 1, 209 }, + { 100, 80, 8, 43, 154, 1, 51, 26, 71 }, + { 142, 78, 78, 16, 255, 128, 34, 197, 171 }, + { 41, 40, 5, 102, 211, 183, 4, 1, 221 }, + { 51, 50, 17, 168, 209, 192, 23, 25, 82 } }, + { { 138, 31, 36, 171, 27, 166, 38, 44, 229 }, + { 67, 87, 58, 169, 82, 115, 26, 59, 179 }, + { 63, 59, 90, 180, 59, 166, 93, 73, 154 }, + { 40, 40, 21, 116, 143, 209, 34, 39, 175 }, + { 47, 15, 16, 183, 34, 223, 49, 45, 183 }, + { 46, 17, 33, 183, 6, 98, 15, 32, 183 }, + { 57, 46, 22, 24, 128, 1, 54, 17, 37 }, + { 65, 32, 73, 115, 28, 128, 23, 128, 205 }, + { 40, 3, 9, 115, 51, 192, 18, 6, 223 }, + { 87, 37, 9, 115, 59, 77, 64, 21, 47 } }, + { { 104, 55, 44, 218, 9, 54, 53, 130, 226 }, + { 64, 90, 70, 205, 40, 41, 23, 26, 57 }, + { 54, 57, 112, 184, 5, 41, 38, 166, 213 }, + { 30, 34, 26, 133, 152, 116, 10, 32, 134 }, + { 39, 19, 53, 221, 26, 114, 32, 73, 255 }, + { 31, 9, 65, 234, 2, 15, 1, 118, 73 }, + { 75, 32, 12, 51, 192, 255, 160, 43, 51 }, + { 88, 31, 35, 67, 102, 85, 55, 186, 85 }, + { 56, 21, 23, 111, 59, 205, 45, 37, 192 }, + { 55, 38, 70, 124, 73, 102, 1, 34, 98 } }, + { { 125, 98, 42, 88, 104, 85, 117, 175, 82 }, + { 95, 84, 53, 89, 128, 100, 113, 101, 45 }, + { 75, 79, 123, 47, 51, 128, 81, 171, 1 }, + { 57, 17, 5, 71, 102, 57, 53, 41, 49 }, + { 38, 33, 13, 121, 57, 73, 26, 1, 85 }, + { 41, 10, 67, 138, 77, 110, 90, 47, 114 }, + { 115, 21, 2, 10, 102, 255, 166, 23, 6 }, + { 101, 29, 16, 10, 85, 128, 101, 196, 26 }, + { 57, 18, 10, 102, 102, 213, 34, 20, 43 }, + { 117, 20, 15, 36, 163, 128, 68, 1, 26 } }, + { { 102, 61, 71, 37, 34, 53, 31, 243, 192 }, + { 69, 60, 71, 38, 73, 119, 28, 222, 37 }, + { 68, 45, 128, 34, 1, 47, 11, 245, 171 }, + { 62, 17, 19, 70, 146, 85, 55, 62, 70 }, + { 37, 43, 37, 154, 100, 163, 85, 160, 1 }, + { 63, 9, 92, 136, 28, 64, 32, 201, 85 }, + { 75, 15, 9, 9, 64, 255, 184, 119, 16 }, + { 86, 6, 28, 5, 64, 255, 25, 248, 1 }, + { 56, 8, 17, 132, 137, 255, 55, 116, 128 }, + { 58, 15, 20, 82, 135, 57, 26, 121, 40 } }, + { { 164, 50, 31, 137, 154, 133, 25, 35, 218 }, + { 51, 103, 44, 131, 131, 123, 31, 6, 158 }, + { 86, 40, 64, 135, 148, 224, 45, 183, 128 }, + { 22, 26, 17, 131, 240, 154, 14, 1, 209 }, + { 45, 16, 21, 91, 64, 222, 7, 1, 197 }, + { 56, 21, 39, 155, 60, 138, 23, 102, 213 }, + { 83, 12, 13, 54, 192, 255, 68, 47, 28 }, + { 85, 26, 85, 85, 128, 128, 32, 146, 171 }, + { 18, 11, 7, 63, 144, 171, 4, 4, 246 }, + { 35, 27, 10, 146, 174, 171, 12, 26, 128 } }, + { { 190, 80, 35, 99, 180, 80, 126, 54, 45 }, + { 85, 126, 47, 87, 176, 51, 41, 20, 32 }, + { 101, 75, 128, 139, 118, 146, 116, 128, 85 }, + { 56, 41, 15, 176, 236, 85, 37, 9, 62 }, + { 71, 30, 17, 119, 118, 255, 17, 18, 138 }, + { 101, 38, 60, 138, 55, 70, 43, 26, 142 }, + { 146, 36, 19, 30, 171, 255, 97, 27, 20 }, + { 138, 45, 61, 62, 219, 1, 81, 188, 64 }, + { 32, 41, 20, 117, 151, 142, 20, 21, 163 }, + { 112, 19, 12, 61, 195, 128, 48, 4, 24 } } +}; + +void VP8ResetProba(VP8Proba* const proba) { + memset(proba->segments_, 255u, sizeof(proba->segments_)); + // proba->bands_[][] is initialized later +} + +static void ParseIntraMode(VP8BitReader* const br, + VP8Decoder* const dec, int mb_x) { + uint8_t* const top = dec->intra_t_ + 4 * mb_x; + uint8_t* const left = dec->intra_l_; + VP8MBData* const block = dec->mb_data_ + mb_x; + + // Note: we don't save segment map (yet), as we don't expect + // to decode more than 1 keyframe. + if (dec->segment_hdr_.update_map_) { + // Hardcoded tree parsing + block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0]) + ? VP8GetBit(br, dec->proba_.segments_[1]) + : 2 + VP8GetBit(br, dec->proba_.segments_[2]); + } else { + block->segment_ = 0; // default for intra + } + if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_); + + block->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first + if (!block->is_i4x4_) { + // Hardcoded 16x16 intra-mode decision tree. + const int ymode = + VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED) + : (VP8GetBit(br, 163) ? V_PRED : DC_PRED); + block->imodes_[0] = ymode; + memset(top, ymode, 4 * sizeof(*top)); + memset(left, ymode, 4 * sizeof(*left)); + } else { + uint8_t* modes = block->imodes_; + int y; + for (y = 0; y < 4; ++y) { + int ymode = left[y]; + int x; + for (x = 0; x < 4; ++x) { + const uint8_t* const prob = kBModesProba[top[x]][ymode]; +#ifdef USE_GENERIC_TREE + // Generic tree-parsing + int i = kYModesIntra4[VP8GetBit(br, prob[0])]; + while (i > 0) { + i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])]; + } + ymode = -i; +#else + // Hardcoded tree parsing + ymode = !VP8GetBit(br, prob[0]) ? B_DC_PRED : + !VP8GetBit(br, prob[1]) ? B_TM_PRED : + !VP8GetBit(br, prob[2]) ? B_VE_PRED : + !VP8GetBit(br, prob[3]) ? + (!VP8GetBit(br, prob[4]) ? B_HE_PRED : + (!VP8GetBit(br, prob[5]) ? B_RD_PRED : B_VR_PRED)) : + (!VP8GetBit(br, prob[6]) ? B_LD_PRED : + (!VP8GetBit(br, prob[7]) ? B_VL_PRED : + (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED))); +#endif // USE_GENERIC_TREE + top[x] = ymode; + } + memcpy(modes, top, 4 * sizeof(*top)); + modes += 4; + left[y] = ymode; + } + } + // Hardcoded UVMode decision tree + block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED + : !VP8GetBit(br, 114) ? V_PRED + : VP8GetBit(br, 183) ? TM_PRED : H_PRED; +} + +int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) { + int mb_x; + for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { + ParseIntraMode(br, dec, mb_x); + } + return !dec->br_.eof_; +} + +//------------------------------------------------------------------------------ +// Paragraph 13 + +static const uint8_t + CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = { + { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + } + }, + { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 }, + { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 } + }, + { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + } + }, + { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 } + }, + { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + } + }, + { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + }, + { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } + } + } +}; + +// Paragraph 9.9 + +static const int kBands[16 + 1] = { + 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, + 0 // extra entry as sentinel +}; + +void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) { + VP8Proba* const proba = &dec->proba_; + int t, b, c, p; + for (t = 0; t < NUM_TYPES; ++t) { + for (b = 0; b < NUM_BANDS; ++b) { + for (c = 0; c < NUM_CTX; ++c) { + for (p = 0; p < NUM_PROBAS; ++p) { + const int v = VP8GetBit(br, CoeffsUpdateProba[t][b][c][p]) ? + VP8GetValue(br, 8) : CoeffsProba0[t][b][c][p]; + proba->bands_[t][b].probas_[c][p] = v; + } + } + } + for (b = 0; b < 16 + 1; ++b) { + proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]]; + } + } + dec->use_skip_proba_ = VP8Get(br); + if (dec->use_skip_proba_) { + dec->skip_p_ = VP8GetValue(br, 8); + } +} + diff --git a/src/loaders/webp/dec/vp8.cpp b/src/loaders/webp/dec/vp8.cpp new file mode 100644 index 00000000..cdb0e8a3 --- /dev/null +++ b/src/loaders/webp/dec/vp8.cpp @@ -0,0 +1,681 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// main entry for the decoder +// +// Author: Skal (pascal.massimino@gmail.com) + +#include + +#include "./alphai.h" +#include "./vp8i.h" +#include "./vp8li.h" +#include "./webpi.h" +#include "../utils/bit_reader_inl.h" +#include "../utils/utils.h" + +//------------------------------------------------------------------------------ + +int WebPGetDecoderVersion(void) { + return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION; +} + +//------------------------------------------------------------------------------ +// VP8Decoder + +static void SetOk(VP8Decoder* const dec) { + dec->status_ = VP8_STATUS_OK; + dec->error_msg_ = "OK"; +} + +int VP8InitIoInternal(VP8Io* const io, int version) { + if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) { + return 0; // mismatch error + } + if (io != NULL) { + memset(io, 0, sizeof(*io)); + } + return 1; +} + +VP8Decoder* VP8New(void) { + VP8Decoder* const dec = (VP8Decoder*)calloc(1ULL, sizeof(*dec)); + if (dec != NULL) { + SetOk(dec); + dec->ready_ = 0; + dec->num_parts_ = 1; + } + return dec; +} + +VP8StatusCode VP8Status(VP8Decoder* const dec) { + if (!dec) return VP8_STATUS_INVALID_PARAM; + return dec->status_; +} + +const char* VP8StatusMessage(VP8Decoder* const dec) { + if (dec == NULL) return "no object"; + if (!dec->error_msg_) return "OK"; + return dec->error_msg_; +} + +void VP8Delete(VP8Decoder* const dec) { + if (dec != NULL) { + VP8Clear(dec); + free(dec); + } +} + +int VP8SetError(VP8Decoder* const dec, + VP8StatusCode error, const char* const msg) { + // The oldest error reported takes precedence over the new one. + if (dec->status_ == VP8_STATUS_OK) { + dec->status_ = error; + dec->error_msg_ = msg; + dec->ready_ = 0; + } + return 0; +} + +//------------------------------------------------------------------------------ + +int VP8CheckSignature(const uint8_t* const data, size_t data_size) { + return (data_size >= 3 && + data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a); +} + +int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size, + int* const width, int* const height) { + if (data == NULL || data_size < VP8_FRAME_HEADER_SIZE) { + return 0; // not enough data + } + // check signature + if (!VP8CheckSignature(data + 3, data_size - 3)) { + return 0; // Wrong signature. + } else { + const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16); + const int key_frame = !(bits & 1); + const int w = ((data[7] << 8) | data[6]) & 0x3fff; + const int h = ((data[9] << 8) | data[8]) & 0x3fff; + + if (!key_frame) { // Not a keyframe. + return 0; + } + + if (((bits >> 1) & 7) > 3) { + return 0; // unknown profile + } + if (!((bits >> 4) & 1)) { + return 0; // first frame is invisible! + } + if (((bits >> 5)) >= chunk_size) { // partition_length + return 0; // inconsistent size information. + } + if (w == 0 || h == 0) { + return 0; // We don't support both width and height to be zero. + } + + if (width) { + *width = w; + } + if (height) { + *height = h; + } + + return 1; + } +} + +//------------------------------------------------------------------------------ +// Header parsing + +static void ResetSegmentHeader(VP8SegmentHeader* const hdr) { + assert(hdr != NULL); + hdr->use_segment_ = 0; + hdr->update_map_ = 0; + hdr->absolute_delta_ = 1; + memset(hdr->quantizer_, 0, sizeof(hdr->quantizer_)); + memset(hdr->filter_strength_, 0, sizeof(hdr->filter_strength_)); +} + +// Paragraph 9.3 +static int ParseSegmentHeader(VP8BitReader* br, + VP8SegmentHeader* hdr, VP8Proba* proba) { + assert(br != NULL); + assert(hdr != NULL); + hdr->use_segment_ = VP8Get(br); + if (hdr->use_segment_) { + hdr->update_map_ = VP8Get(br); + if (VP8Get(br)) { // update data + int s; + hdr->absolute_delta_ = VP8Get(br); + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + hdr->quantizer_[s] = VP8Get(br) ? VP8GetSignedValue(br, 7) : 0; + } + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + hdr->filter_strength_[s] = VP8Get(br) ? VP8GetSignedValue(br, 6) : 0; + } + } + if (hdr->update_map_) { + int s; + for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) { + proba->segments_[s] = VP8Get(br) ? VP8GetValue(br, 8) : 255u; + } + } + } else { + hdr->update_map_ = 0; + } + return !br->eof_; +} + +// Paragraph 9.5 +// This function returns VP8_STATUS_SUSPENDED if we don't have all the +// necessary data in 'buf'. +// This case is not necessarily an error (for incremental decoding). +// Still, no bitreader is ever initialized to make it possible to read +// unavailable memory. +// If we don't even have the partitions' sizes, than VP8_STATUS_NOT_ENOUGH_DATA +// is returned, and this is an unrecoverable error. +// If the partitions were positioned ok, VP8_STATUS_OK is returned. +static VP8StatusCode ParsePartitions(VP8Decoder* const dec, + const uint8_t* buf, size_t size) { + VP8BitReader* const br = &dec->br_; + const uint8_t* sz = buf; + const uint8_t* buf_end = buf + size; + const uint8_t* part_start; + int last_part; + int p; + + dec->num_parts_ = 1 << VP8GetValue(br, 2); + last_part = dec->num_parts_ - 1; + part_start = buf + last_part * 3; + if (buf_end < part_start) { + // we can't even read the sizes with sz[]! That's a failure. + return VP8_STATUS_NOT_ENOUGH_DATA; + } + for (p = 0; p < last_part; ++p) { + const uint32_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16); + const uint8_t* part_end = part_start + psize; + if (part_end > buf_end) part_end = buf_end; + VP8InitBitReader(dec->parts_ + p, part_start, part_end); + part_start = part_end; + sz += 3; + } + VP8InitBitReader(dec->parts_ + last_part, part_start, buf_end); + return (part_start < buf_end) ? VP8_STATUS_OK : + VP8_STATUS_SUSPENDED; // Init is ok, but there's not enough data +} + +// Paragraph 9.4 +static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) { + VP8FilterHeader* const hdr = &dec->filter_hdr_; + hdr->simple_ = VP8Get(br); + hdr->level_ = VP8GetValue(br, 6); + hdr->sharpness_ = VP8GetValue(br, 3); + hdr->use_lf_delta_ = VP8Get(br); + if (hdr->use_lf_delta_) { + if (VP8Get(br)) { // update lf-delta? + int i; + for (i = 0; i < NUM_REF_LF_DELTAS; ++i) { + if (VP8Get(br)) { + hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6); + } + } + for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) { + if (VP8Get(br)) { + hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6); + } + } + } + } + dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2; + return !br->eof_; +} + +// Topmost call +int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { + const uint8_t* buf; + size_t buf_size; + VP8FrameHeader* frm_hdr; + VP8PictureHeader* pic_hdr; + VP8BitReader* br; + VP8StatusCode status; + + if (dec == NULL) { + return 0; + } + SetOk(dec); + if (io == NULL) { + return VP8SetError(dec, VP8_STATUS_INVALID_PARAM, + "null VP8Io passed to VP8GetHeaders()"); + } + buf = io->data; + buf_size = io->data_size; + if (buf_size < 4) { + return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, + "Truncated header."); + } + + // Paragraph 9.1 + { + const uint32_t bits = buf[0] | (buf[1] << 8) | (buf[2] << 16); + frm_hdr = &dec->frm_hdr_; + frm_hdr->key_frame_ = !(bits & 1); + frm_hdr->profile_ = (bits >> 1) & 7; + frm_hdr->show_ = (bits >> 4) & 1; + frm_hdr->partition_length_ = (bits >> 5); + if (frm_hdr->profile_ > 3) + return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, + "Incorrect keyframe parameters."); + if (!frm_hdr->show_) + return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE, + "Frame not displayable."); + buf += 3; + buf_size -= 3; + } + + pic_hdr = &dec->pic_hdr_; + if (frm_hdr->key_frame_) { + // Paragraph 9.2 + if (buf_size < 7) { + return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, + "cannot parse picture header"); + } + if (!VP8CheckSignature(buf, buf_size)) { + return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, + "Bad code word"); + } + pic_hdr->width_ = ((buf[4] << 8) | buf[3]) & 0x3fff; + pic_hdr->xscale_ = buf[4] >> 6; // ratio: 1, 5/4 5/3 or 2 + pic_hdr->height_ = ((buf[6] << 8) | buf[5]) & 0x3fff; + pic_hdr->yscale_ = buf[6] >> 6; + buf += 7; + buf_size -= 7; + + dec->mb_w_ = (pic_hdr->width_ + 15) >> 4; + dec->mb_h_ = (pic_hdr->height_ + 15) >> 4; + // Setup default output area (can be later modified during io->setup()) + io->width = pic_hdr->width_; + io->height = pic_hdr->height_; + io->use_scaling = 0; + io->use_cropping = 0; + io->crop_top = 0; + io->crop_left = 0; + io->crop_right = io->width; + io->crop_bottom = io->height; + io->mb_w = io->width; // sanity check + io->mb_h = io->height; // ditto + + VP8ResetProba(&dec->proba_); + ResetSegmentHeader(&dec->segment_hdr_); + } + + // Check if we have all the partition #0 available, and initialize dec->br_ + // to read this partition (and this partition only). + if (frm_hdr->partition_length_ > buf_size) { + return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, + "bad partition length"); + } + + br = &dec->br_; + VP8InitBitReader(br, buf, buf + frm_hdr->partition_length_); + buf += frm_hdr->partition_length_; + buf_size -= frm_hdr->partition_length_; + + if (frm_hdr->key_frame_) { + pic_hdr->colorspace_ = VP8Get(br); + pic_hdr->clamp_type_ = VP8Get(br); + } + if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) { + return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, + "cannot parse segment header"); + } + // Filter specs + if (!ParseFilterHeader(br, dec)) { + return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, + "cannot parse filter header"); + } + status = ParsePartitions(dec, buf, buf_size); + if (status != VP8_STATUS_OK) { + return VP8SetError(dec, status, "cannot parse partitions"); + } + + // quantizer change + VP8ParseQuant(dec); + + // Frame buffer marking + if (!frm_hdr->key_frame_) { + return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE, + "Not a key frame."); + } + + VP8Get(br); // ignore the value of update_proba_ + + VP8ParseProba(br, dec); + + // sanitized state + dec->ready_ = 1; + return 1; +} + +//------------------------------------------------------------------------------ +// Residual decoding (Paragraph 13.2 / 13.3) + +static const uint8_t kCat3[] = { 173, 148, 140, 0 }; +static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 }; +static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 }; +static const uint8_t kCat6[] = + { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; +static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 }; +static const uint8_t kZigzag[16] = { + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2 +static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) { + int v; + if (!VP8GetBit(br, p[3])) { + if (!VP8GetBit(br, p[4])) { + v = 2; + } else { + v = 3 + VP8GetBit(br, p[5]); + } + } else { + if (!VP8GetBit(br, p[6])) { + if (!VP8GetBit(br, p[7])) { + v = 5 + VP8GetBit(br, 159); + } else { + v = 7 + 2 * VP8GetBit(br, 165); + v += VP8GetBit(br, 145); + } + } else { + const uint8_t* tab; + const int bit1 = VP8GetBit(br, p[8]); + const int bit0 = VP8GetBit(br, p[9 + bit1]); + const int cat = 2 * bit1 + bit0; + v = 0; + for (tab = kCat3456[cat]; *tab; ++tab) { + v += v + VP8GetBit(br, *tab); + } + v += 3 + (8 << cat); + } + } + return v; +} + +// Returns the position of the last non-zero coeff plus one +static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[], + int ctx, const quant_t dq, int n, int16_t* out) { + const uint8_t* p = prob[n]->probas_[ctx]; + for (; n < 16; ++n) { + if (!VP8GetBit(br, p[0])) { + return n; // previous coeff was last non-zero coeff + } + while (!VP8GetBit(br, p[1])) { // sequence of zero coeffs + p = prob[++n]->probas_[0]; + if (n == 16) return 16; + } + { // non zero coeff + const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0]; + int v; + if (!VP8GetBit(br, p[2])) { + v = 1; + p = p_ctx[1]; + } else { + v = GetLargeValue(br, p); + p = p_ctx[2]; + } + out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0]; + } + } + return 16; +} + +static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) { + nz_coeffs <<= 2; + nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz; + return nz_coeffs; +} + +static void TransformWHT(const int16_t* in, int16_t* out) { + int tmp[16]; + int i; + for (i = 0; i < 4; ++i) { + const int a0 = in[0 + i] + in[12 + i]; + const int a1 = in[4 + i] + in[ 8 + i]; + const int a2 = in[4 + i] - in[ 8 + i]; + const int a3 = in[0 + i] - in[12 + i]; + tmp[0 + i] = a0 + a1; + tmp[8 + i] = a0 - a1; + tmp[4 + i] = a3 + a2; + tmp[12 + i] = a3 - a2; + } + for (i = 0; i < 4; ++i) { + const int dc = tmp[0 + i * 4] + 3; // w/ rounder + const int a0 = dc + tmp[3 + i * 4]; + const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; + const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; + const int a3 = dc - tmp[3 + i * 4]; + out[ 0] = (a0 + a1) >> 3; + out[16] = (a3 + a2) >> 3; + out[32] = (a0 - a1) >> 3; + out[48] = (a3 - a2) >> 3; + out += 64; + } +} + +static int ParseResiduals(VP8Decoder* const dec, + VP8MB* const mb, VP8BitReader* const token_br) { + const VP8BandProbas* (* const bands)[16 + 1] = dec->proba_.bands_ptr_; + const VP8BandProbas* const * ac_proba; + VP8MBData* const block = dec->mb_data_ + dec->mb_x_; + const VP8QuantMatrix* const q = &dec->dqm_[block->segment_]; + int16_t* dst = block->coeffs_; + VP8MB* const left_mb = dec->mb_info_ - 1; + uint8_t tnz, lnz; + uint32_t non_zero_y = 0; + uint32_t non_zero_uv = 0; + int x, y, ch; + uint32_t out_t_nz, out_l_nz; + int first; + + memset(dst, 0, 384 * sizeof(*dst)); + if (!block->is_i4x4_) { // parse DC + int16_t dc[16] = { 0 }; + const int ctx = mb->nz_dc_ + left_mb->nz_dc_; + const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc); + mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0); + if (nz > 1) { // more than just the DC -> perform the full transform + TransformWHT(dc, dst); + } else { // only DC is non-zero -> inlined simplified transform + int i; + const int dc0 = (dc[0] + 3) >> 3; + for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0; + } + first = 1; + ac_proba = bands[0]; + } else { + first = 0; + ac_proba = bands[3]; + } + + tnz = mb->nz_ & 0x0f; + lnz = left_mb->nz_ & 0x0f; + for (y = 0; y < 4; ++y) { + int l = lnz & 1; + uint32_t nz_coeffs = 0; + for (x = 0; x < 4; ++x) { + const int ctx = l + (tnz & 1); + const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst); + l = (nz > first); + tnz = (tnz >> 1) | (l << 7); + nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0); + dst += 16; + } + tnz >>= 4; + lnz = (lnz >> 1) | (l << 7); + non_zero_y = (non_zero_y << 8) | nz_coeffs; + } + out_t_nz = tnz; + out_l_nz = lnz >> 4; + + for (ch = 0; ch < 4; ch += 2) { + uint32_t nz_coeffs = 0; + tnz = mb->nz_ >> (4 + ch); + lnz = left_mb->nz_ >> (4 + ch); + for (y = 0; y < 2; ++y) { + int l = lnz & 1; + for (x = 0; x < 2; ++x) { + const int ctx = l + (tnz & 1); + const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst); + l = (nz > 0); + tnz = (tnz >> 1) | (l << 3); + nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0); + dst += 16; + } + tnz >>= 2; + lnz = (lnz >> 1) | (l << 5); + } + // Note: we don't really need the per-4x4 details for U/V blocks. + non_zero_uv |= nz_coeffs << (4 * ch); + out_t_nz |= (tnz << 4) << ch; + out_l_nz |= (lnz & 0xf0) << ch; + } + mb->nz_ = out_t_nz; + left_mb->nz_ = out_l_nz; + + block->non_zero_y_ = non_zero_y; + block->non_zero_uv_ = non_zero_uv; + + // We look at the mode-code of each block and check if some blocks have less + // than three non-zero coeffs (code < 2). This is to avoid dithering flat and + // empty blocks. + block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_; + + return !(non_zero_y | non_zero_uv); // will be used for further optimization +} + +//------------------------------------------------------------------------------ +// Main loop + +int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) { + VP8MB* const left = dec->mb_info_ - 1; + VP8MB* const mb = dec->mb_info_ + dec->mb_x_; + VP8MBData* const block = dec->mb_data_ + dec->mb_x_; + int skip = dec->use_skip_proba_ ? block->skip_ : 0; + + if (!skip) { + skip = ParseResiduals(dec, mb, token_br); + } else { + left->nz_ = mb->nz_ = 0; + if (!block->is_i4x4_) { + left->nz_dc_ = mb->nz_dc_ = 0; + } + block->non_zero_y_ = 0; + block->non_zero_uv_ = 0; + block->dither_ = 0; + } + + if (dec->filter_type_ > 0) { // store filter info + VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_; + *finfo = dec->fstrengths_[block->segment_][block->is_i4x4_]; + finfo->f_inner_ |= !skip; + } + + return !token_br->eof_; +} + +void VP8InitScanline(VP8Decoder* const dec) { + VP8MB* const left = dec->mb_info_ - 1; + left->nz_ = 0; + left->nz_dc_ = 0; + memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_)); + dec->mb_x_ = 0; +} + +static int ParseFrame(VP8Decoder* const dec, VP8Io* io) { + for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) { + // Parse bitstream for this row. + VP8BitReader* const token_br = + &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)]; + if (!VP8ParseIntraModeRow(&dec->br_, dec)) { + return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, + "Premature end-of-partition0 encountered."); + } + for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) { + if (!VP8DecodeMB(dec, token_br)) { + return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, + "Premature end-of-file encountered."); + } + } + VP8InitScanline(dec); // Prepare for next scanline + + // Reconstruct, filter and emit the row. + if (!VP8ProcessRow(dec, io)) { + return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted."); + } + } + return 1; +} + +// Main entry point +int VP8Decode(VP8Decoder* const dec, VP8Io* const io) { + int ok = 0; + if (dec == NULL) { + return 0; + } + if (io == NULL) { + return VP8SetError(dec, VP8_STATUS_INVALID_PARAM, + "NULL VP8Io parameter in VP8Decode()."); + } + + if (!dec->ready_) { + if (!VP8GetHeaders(dec, io)) { + return 0; + } + } + assert(dec->ready_); + + // Finish setting up the decoding parameter. Will call io->setup(). + ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK); + if (ok) { // good to go. + // Will allocate memory and prepare everything. + if (ok) ok = VP8InitFrame(dec, io); + + // Main decoding loop + if (ok) ok = ParseFrame(dec, io); + + // Exit. + ok &= VP8ExitCritical(dec, io); + } + + if (!ok) { + VP8Clear(dec); + return 0; + } + + dec->ready_ = 0; + return ok; +} + +void VP8Clear(VP8Decoder* const dec) { + if (dec == NULL) { + return; + } + ALPHDelete(dec->alph_dec_); + dec->alph_dec_ = NULL; + free(dec->mem_); + dec->mem_ = NULL; + dec->mem_size_ = 0; + memset(&dec->br_, 0, sizeof(dec->br_)); + dec->ready_ = 0; +} + +//------------------------------------------------------------------------------ + diff --git a/src/loaders/webp/dec/vp8i.h b/src/loaders/webp/dec/vp8i.h new file mode 100644 index 00000000..0164f988 --- /dev/null +++ b/src/loaders/webp/dec/vp8i.h @@ -0,0 +1,307 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// VP8 decoder: internal header. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_DEC_VP8I_H_ +#define WEBP_DEC_VP8I_H_ + +#include // for memcpy() +#include "./common.h" +#include "./vp8li.h" +#include "../utils/bit_reader.h" +#include "../utils/random.h" +#include "../dsp/dsp.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//------------------------------------------------------------------------------ +// Various defines and enums + +// version numbers +#define DEC_MAJ_VERSION 0 +#define DEC_MIN_VERSION 4 +#define DEC_REV_VERSION 3 + +// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). +// Constraints are: We need to store one 16x16 block of luma samples (y), +// and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned, +// in order to be SIMD-friendly. We also need to store the top, left and +// top-left samples (from previously decoded blocks), along with four +// extra top-right samples for luma (intra4x4 prediction only). +// One possible layout is, using 32 * (17 + 9) bytes: +// +// .+------ <- only 1 pixel high +// .|yyyyt. +// .|yyyyt. +// .|yyyyt. +// .|yyyy.. +// .+--.+-- <- only 1 pixel high +// .|uu.|vv +// .|uu.|vv +// +// Every character is a 4x4 block, with legend: +// '.' = unused +// 'y' = y-samples 'u' = u-samples 'v' = u-samples +// '|' = left sample, '-' = top sample, '+' = top-left sample +// 't' = extra top-right sample for 4x4 modes +#define YUV_SIZE (BPS * 17 + BPS * 9) +#define Y_SIZE (BPS * 17) +#define Y_OFF (BPS * 1 + 8) +#define U_OFF (Y_OFF + BPS * 16 + BPS) +#define V_OFF (U_OFF + 16) + +// minimal width under which lossy multi-threading is always disabled +#define MIN_WIDTH_FOR_THREADS 512 + +//------------------------------------------------------------------------------ +// Headers + +typedef struct { + uint8_t key_frame_; + uint8_t profile_; + uint8_t show_; + uint32_t partition_length_; +} VP8FrameHeader; + +typedef struct { + uint16_t width_; + uint16_t height_; + uint8_t xscale_; + uint8_t yscale_; + uint8_t colorspace_; // 0 = YCbCr + uint8_t clamp_type_; +} VP8PictureHeader; + +// segment features +typedef struct { + int use_segment_; + int update_map_; // whether to update the segment map or not + int absolute_delta_; // absolute or delta values for quantizer and filter + int8_t quantizer_[NUM_MB_SEGMENTS]; // quantization changes + int8_t filter_strength_[NUM_MB_SEGMENTS]; // filter strength for segments +} VP8SegmentHeader; + +// probas associated to one of the contexts +typedef uint8_t VP8ProbaArray[NUM_PROBAS]; + +typedef struct { // all the probas associated to one band + VP8ProbaArray probas_[NUM_CTX]; +} VP8BandProbas; + +// Struct collecting all frame-persistent probabilities. +typedef struct { + uint8_t segments_[MB_FEATURE_TREE_PROBS]; + // Type: 0:Intra16-AC 1:Intra16-DC 2:Chroma 3:Intra4 + VP8BandProbas bands_[NUM_TYPES][NUM_BANDS]; + const VP8BandProbas* bands_ptr_[NUM_TYPES][16 + 1]; +} VP8Proba; + +// Filter parameters +typedef struct { + int simple_; // 0=complex, 1=simple + int level_; // [0..63] + int sharpness_; // [0..7] + int use_lf_delta_; + int ref_lf_delta_[NUM_REF_LF_DELTAS]; + int mode_lf_delta_[NUM_MODE_LF_DELTAS]; +} VP8FilterHeader; + +//------------------------------------------------------------------------------ +// Informations about the macroblocks. + +typedef struct { // filter specs + uint8_t f_limit_; // filter limit in [3..189], or 0 if no filtering + uint8_t f_ilevel_; // inner limit in [1..63] + uint8_t f_inner_; // do inner filtering? + uint8_t hev_thresh_; // high edge variance threshold in [0..2] +} VP8FInfo; + +typedef struct { // Top/Left Contexts used for syntax-parsing + uint8_t nz_; // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma) + uint8_t nz_dc_; // non-zero DC coeff (1bit) +} VP8MB; + +// Dequantization matrices +typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower). +typedef struct { + quant_t y1_mat_, y2_mat_, uv_mat_; + + int uv_quant_; // U/V quantizer value + int dither_; // dithering amplitude (0 = off, max=255) +} VP8QuantMatrix; + +// Data needed to reconstruct a macroblock +typedef struct { + int16_t coeffs_[384]; // 384 coeffs = (16+4+4) * 4*4 + uint8_t is_i4x4_; // true if intra4x4 + uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes + uint8_t uvmode_; // chroma prediction mode + // bit-wise info about the content of each sub-4x4 blocks (in decoding order). + // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to: + // code=0 -> no coefficient + // code=1 -> only DC + // code=2 -> first three coefficients are non-zero + // code=3 -> more than three coefficients are non-zero + // This allows to call specialized transform functions. + uint32_t non_zero_y_; + uint32_t non_zero_uv_; + uint8_t dither_; // local dithering strength (deduced from non_zero_*) + uint8_t skip_; + uint8_t segment_; +} VP8MBData; + +// Persistent information needed by the parallel processing +typedef struct { + int id_; // cache row to process (in [0..2]) + int mb_y_; // macroblock position of the row + int filter_row_; // true if row-filtering is needed + VP8FInfo* f_info_; // filter strengths (swapped with dec->f_info_) + VP8MBData* mb_data_; // reconstruction data (swapped with dec->mb_data_) + VP8Io io_; // copy of the VP8Io to pass to put() +} VP8ThreadContext; + +// Saved top samples, per macroblock. Fits into a cache-line. +typedef struct { + uint8_t y[16], u[8], v[8]; +} VP8TopSamples; + +//------------------------------------------------------------------------------ +// VP8Decoder: the main opaque structure handed over to user + +struct VP8Decoder { + VP8StatusCode status_; + int ready_; // true if ready to decode a picture with VP8Decode() + const char* error_msg_; // set when status_ is not OK. + + // Main data source + VP8BitReader br_; + + // headers + VP8FrameHeader frm_hdr_; + VP8PictureHeader pic_hdr_; + VP8FilterHeader filter_hdr_; + VP8SegmentHeader segment_hdr_; + + int cache_id_; // current cache row + int num_caches_; // number of cached rows of 16 pixels (1, 2 or 3) + VP8ThreadContext thread_ctx_; // Thread context + + // dimension, in macroblock units. + int mb_w_, mb_h_; + + // Macroblock to process/filter, depending on cropping and filter_type. + int tl_mb_x_, tl_mb_y_; // top-left MB that must be in-loop filtered + int br_mb_x_, br_mb_y_; // last bottom-right MB that must be decoded + + // number of partitions. + int num_parts_; + // per-partition boolean decoders. + VP8BitReader parts_[MAX_NUM_PARTITIONS]; + + // Dithering strength, deduced from decoding options + int dither_; // whether to use dithering or not + VP8Random dithering_rg_; // random generator for dithering + + // dequantization (one set of DC/AC dequant factor per segment) + VP8QuantMatrix dqm_[NUM_MB_SEGMENTS]; + + // probabilities + VP8Proba proba_; + int use_skip_proba_; + uint8_t skip_p_; + + // Boundary data cache and persistent buffers. + uint8_t* intra_t_; // top intra modes values: 4 * mb_w_ + uint8_t intra_l_[4]; // left intra modes values + + VP8TopSamples* yuv_t_; // top y/u/v samples + + VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1) + VP8FInfo* f_info_; // filter strength info + uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE) + + uint8_t* cache_y_; // macroblock row for storing unfiltered samples + uint8_t* cache_u_; + uint8_t* cache_v_; + int cache_y_stride_; + int cache_uv_stride_; + + // main memory chunk for the above data. Persistent. + void* mem_; + size_t mem_size_; + + // Per macroblock non-persistent infos. + int mb_x_, mb_y_; // current position, in macroblock units + VP8MBData* mb_data_; // parsed reconstruction data + + // Filtering side-info + int filter_type_; // 0=off, 1=simple, 2=complex + VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type + + // Alpha + struct ALPHDecoder* alph_dec_; // alpha-plane decoder object + const uint8_t* alpha_data_; // compressed alpha data (if present) + size_t alpha_data_size_; + int is_alpha_decoded_; // true if alpha_data_ is decoded in alpha_plane_ + uint8_t* alpha_plane_; // output. Persistent, contains the whole data. + int alpha_dithering_; // derived from decoding options (0=off, 100=full). +}; + +//------------------------------------------------------------------------------ +// internal functions. Not public. + +// in vp8.c +int VP8SetError(VP8Decoder* const dec, + VP8StatusCode error, const char* const msg); + +// in tree.c +void VP8ResetProba(VP8Proba* const proba); +void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec); +// parses one row of intra mode data in partition 0, returns !eof +int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec); + +// in quant.c +void VP8ParseQuant(VP8Decoder* const dec); + +// in frame.c +int VP8InitFrame(VP8Decoder* const dec, VP8Io* io); +// Call io->setup() and finish setting up scan parameters. +// After this call returns, one must always call VP8ExitCritical() with the +// same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK +// if ok, otherwise sets and returns the error status on *dec. +VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io); +// Must always be called in pair with VP8EnterCritical(). +// Returns false in case of error. +int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io); +// Initialize dithering post-process if needed. +void VP8InitDithering(const WebPDecoderOptions* const options, + VP8Decoder* const dec); +// Process the last decoded row (filtering + output). +int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io); +// To be called at the start of a new scanline, to initialize predictors. +void VP8InitScanline(VP8Decoder* const dec); +// Decode one macroblock. Returns false if there is not enough data. +int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br); + +// in alpha.c +const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, + int row, int num_rows); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_DEC_VP8I_H_ */ diff --git a/src/loaders/webp/dec/vp8l.cpp b/src/loaders/webp/dec/vp8l.cpp new file mode 100644 index 00000000..8ddeb7ef --- /dev/null +++ b/src/loaders/webp/dec/vp8l.cpp @@ -0,0 +1,1580 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// main entry for the decoder +// +// Authors: Vikas Arora (vikaas.arora@gmail.com) +// Jyrki Alakuijala (jyrki@google.com) + +#include + +#include "./alphai.h" +#include "./vp8li.h" +#include "../dsp/dsp.h" +#include "../dsp/lossless.h" +#include "../dsp/yuv.h" +#include "../utils/huffman.h" +#include "../utils/utils.h" + +#define NUM_ARGB_CACHE_ROWS 16 + +static const int kCodeLengthLiterals = 16; +static const int kCodeLengthRepeatCode = 16; +static const int kCodeLengthExtraBits[3] = { 2, 3, 7 }; +static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 }; + +// ----------------------------------------------------------------------------- +// Five Huffman codes are used at each meta code: +// 1. green + length prefix codes + color cache codes, +// 2. alpha, +// 3. red, +// 4. blue, and, +// 5. distance prefix codes. +typedef enum { + GREEN = 0, + RED = 1, + BLUE = 2, + ALPHA = 3, + DIST = 4 +} HuffIndex; + +static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = { + NUM_LITERAL_CODES + NUM_LENGTH_CODES, + NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES, + NUM_DISTANCE_CODES +}; + +static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = { + 0, 1, 1, 1, 0 +}; + +#define NUM_CODE_LENGTH_CODES 19 +static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = { + 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +}; + +#define CODE_TO_PLANE_CODES 120 +static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = { + 0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, + 0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, + 0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, + 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03, + 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c, + 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e, + 0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, + 0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, + 0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, + 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41, + 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f, + 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70 +}; + +// Memory needed for lookup tables of one Huffman tree group. Red, blue, alpha +// and distance alphabets are constant (256 for red, blue and alpha, 40 for +// distance) and lookup table sizes for them in worst case are 630 and 410 +// respectively. Size of green alphabet depends on color cache size and is equal +// to 256 (green component values) + 24 (length prefix values) +// + color_cache_size (between 0 and 2048). +// All values computed for 8-bit first level lookup with Mark Adler's tool: +// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c +#define FIXED_TABLE_SIZE (630 * 3 + 410) +static const int kTableSize[12] = { + FIXED_TABLE_SIZE + 654, + FIXED_TABLE_SIZE + 656, + FIXED_TABLE_SIZE + 658, + FIXED_TABLE_SIZE + 662, + FIXED_TABLE_SIZE + 670, + FIXED_TABLE_SIZE + 686, + FIXED_TABLE_SIZE + 718, + FIXED_TABLE_SIZE + 782, + FIXED_TABLE_SIZE + 912, + FIXED_TABLE_SIZE + 1168, + FIXED_TABLE_SIZE + 1680, + FIXED_TABLE_SIZE + 2704 +}; + +static int DecodeImageStream(int xsize, int ysize, + int is_level0, + VP8LDecoder* const dec, + uint32_t** const decoded_data); + +//------------------------------------------------------------------------------ + +int VP8LCheckSignature(const uint8_t* const data, size_t size) { + return (size >= VP8L_FRAME_HEADER_SIZE && + data[0] == VP8L_MAGIC_BYTE && + (data[4] >> 5) == 0); // version +} + +static int ReadImageInfo(VP8LBitReader* const br, + int* const width, int* const height, + int* const has_alpha) { + if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0; + *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1; + *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1; + *has_alpha = VP8LReadBits(br, 1); + if (VP8LReadBits(br, VP8L_VERSION_BITS) != 0) return 0; + return !br->eos_; +} + +int VP8LGetInfo(const uint8_t* data, size_t data_size, + int* const width, int* const height, int* const has_alpha) { + if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) { + return 0; // not enough data + } else if (!VP8LCheckSignature(data, data_size)) { + return 0; // bad signature + } else { + int w, h, a; + VP8LBitReader br; + VP8LInitBitReader(&br, data, data_size); + if (!ReadImageInfo(&br, &w, &h, &a)) { + return 0; + } + if (width != NULL) *width = w; + if (height != NULL) *height = h; + if (has_alpha != NULL) *has_alpha = a; + return 1; + } +} + +//------------------------------------------------------------------------------ + +static WEBP_INLINE int GetCopyDistance(int distance_symbol, + VP8LBitReader* const br) { + int extra_bits, offset; + if (distance_symbol < 4) { + return distance_symbol + 1; + } + extra_bits = (distance_symbol - 2) >> 1; + offset = (2 + (distance_symbol & 1)) << extra_bits; + return offset + VP8LReadBits(br, extra_bits) + 1; +} + +static WEBP_INLINE int GetCopyLength(int length_symbol, + VP8LBitReader* const br) { + // Length and distance prefixes are encoded the same way. + return GetCopyDistance(length_symbol, br); +} + +static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) { + if (plane_code > CODE_TO_PLANE_CODES) { + return plane_code - CODE_TO_PLANE_CODES; + } else { + const int dist_code = kCodeToPlane[plane_code - 1]; + const int yoffset = dist_code >> 4; + const int xoffset = 8 - (dist_code & 0xf); + const int dist = yoffset * xsize + xoffset; + return (dist >= 1) ? dist : 1; // dist<1 can happen if xsize is very small + } +} + +//------------------------------------------------------------------------------ +// Decodes the next Huffman code from bit-stream. +// FillBitWindow(br) needs to be called at minimum every second call +// to ReadSymbol, in order to pre-fetch enough bits. +static WEBP_INLINE int ReadSymbol(const HuffmanCode* table, + VP8LBitReader* const br) { + int nbits; + uint32_t val = VP8LPrefetchBits(br); + table += val & HUFFMAN_TABLE_MASK; + nbits = table->bits - HUFFMAN_TABLE_BITS; + if (nbits > 0) { + VP8LSetBitPos(br, br->bit_pos_ + HUFFMAN_TABLE_BITS); + val = VP8LPrefetchBits(br); + table += table->value; + table += val & ((1 << nbits) - 1); + } + VP8LSetBitPos(br, br->bit_pos_ + table->bits); + return table->value; +} + +static int ReadHuffmanCodeLengths( + VP8LDecoder* const dec, const int* const code_length_code_lengths, + int num_symbols, int* const code_lengths) { + int ok = 0; + VP8LBitReader* const br = &dec->br_; + int symbol; + int max_symbol; + int prev_code_len = DEFAULT_CODE_LENGTH; + HuffmanCode table[1 << LENGTHS_TABLE_BITS]; + + if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS, + code_length_code_lengths, + NUM_CODE_LENGTH_CODES)) { + goto End; + } + + if (VP8LReadBits(br, 1)) { // use length + const int length_nbits = 2 + 2 * VP8LReadBits(br, 3); + max_symbol = 2 + VP8LReadBits(br, length_nbits); + if (max_symbol > num_symbols) { + goto End; + } + } else { + max_symbol = num_symbols; + } + + symbol = 0; + while (symbol < num_symbols) { + const HuffmanCode* p; + int code_len; + if (max_symbol-- == 0) break; + VP8LFillBitWindow(br); + p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK]; + VP8LSetBitPos(br, br->bit_pos_ + p->bits); + code_len = p->value; + if (code_len < kCodeLengthLiterals) { + code_lengths[symbol++] = code_len; + if (code_len != 0) prev_code_len = code_len; + } else { + const int use_prev = (code_len == kCodeLengthRepeatCode); + const int slot = code_len - kCodeLengthLiterals; + const int extra_bits = kCodeLengthExtraBits[slot]; + const int repeat_offset = kCodeLengthRepeatOffsets[slot]; + int repeat = VP8LReadBits(br, extra_bits) + repeat_offset; + if (symbol + repeat > num_symbols) { + goto End; + } else { + const int length = use_prev ? prev_code_len : 0; + while (repeat-- > 0) code_lengths[symbol++] = length; + } + } + } + ok = 1; + + End: + if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + return ok; +} + +// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman +// tree. +static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, + int* const code_lengths, HuffmanCode* const table) { + int ok = 0; + int size = 0; + VP8LBitReader* const br = &dec->br_; + const int simple_code = VP8LReadBits(br, 1); + + memset(code_lengths, 0, alphabet_size * sizeof(*code_lengths)); + + if (simple_code) { // Read symbols, codes & code lengths directly. + const int num_symbols = VP8LReadBits(br, 1) + 1; + const int first_symbol_len_code = VP8LReadBits(br, 1); + // The first code is either 1 bit or 8 bit code. + int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8); + code_lengths[symbol] = 1; + // The second code (if present), is always 8 bit long. + if (num_symbols == 2) { + symbol = VP8LReadBits(br, 8); + code_lengths[symbol] = 1; + } + ok = 1; + } else { // Decode Huffman-coded code lengths. + int i; + int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 }; + const int num_codes = VP8LReadBits(br, 4) + 4; + if (num_codes > NUM_CODE_LENGTH_CODES) { + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + return 0; + } + + for (i = 0; i < num_codes; ++i) { + code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3); + } + ok = ReadHuffmanCodeLengths(dec, code_length_code_lengths, alphabet_size, + code_lengths); + } + + ok = ok && !br->eos_; + if (ok) { + size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS, + code_lengths, alphabet_size); + } + if (!ok || size == 0) { + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + return 0; + } + return size; +} + +static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, + int color_cache_bits, int allow_recursion) { + int i, j; + VP8LBitReader* const br = &dec->br_; + VP8LMetadata* const hdr = &dec->hdr_; + uint32_t* huffman_image = NULL; + HTreeGroup* htree_groups = NULL; + HuffmanCode* huffman_tables = NULL; + HuffmanCode* next = NULL; + int num_htree_groups = 1; + int max_alphabet_size = 0; + int* code_lengths = NULL; + const int table_size = kTableSize[color_cache_bits]; + + if (allow_recursion && VP8LReadBits(br, 1)) { + // use meta Huffman codes. + const int huffman_precision = VP8LReadBits(br, 3) + 2; + const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision); + const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision); + const int huffman_pixs = huffman_xsize * huffman_ysize; + if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec, + &huffman_image)) { + goto Error; + } + hdr->huffman_subsample_bits_ = huffman_precision; + for (i = 0; i < huffman_pixs; ++i) { + // The huffman data is stored in red and green bytes. + const int group = (huffman_image[i] >> 8) & 0xffff; + huffman_image[i] = group; + if (group >= num_htree_groups) { + num_htree_groups = group + 1; + } + } + } + + if (br->eos_) goto Error; + + // Find maximum alphabet size for the htree group. + for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { + int alphabet_size = kAlphabetSize[j]; + if (j == 0 && color_cache_bits > 0) { + alphabet_size += 1 << color_cache_bits; + } + if (max_alphabet_size < alphabet_size) { + max_alphabet_size = alphabet_size; + } + } + + huffman_tables = (HuffmanCode*)malloc(num_htree_groups * table_size * sizeof(*huffman_tables)); + htree_groups = VP8LHtreeGroupsNew(num_htree_groups); + code_lengths = (int*)calloc((uint64_t)max_alphabet_size, sizeof(*code_lengths)); + + if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + goto Error; + } + + next = huffman_tables; + for (i = 0; i < num_htree_groups; ++i) { + HTreeGroup* const htree_group = &htree_groups[i]; + HuffmanCode** const htrees = htree_group->htrees; + int size; + int is_trivial_literal = 1; + for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { + int alphabet_size = kAlphabetSize[j]; + htrees[j] = next; + if (j == 0 && color_cache_bits > 0) { + alphabet_size += 1 << color_cache_bits; + } + size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next); + if (is_trivial_literal && kLiteralMap[j] == 1) { + is_trivial_literal = (next->bits == 0); + } + next += size; + if (size == 0) { + goto Error; + } + } + htree_group->is_trivial_literal = is_trivial_literal; + if (is_trivial_literal) { + const int red = htrees[RED][0].value; + const int blue = htrees[BLUE][0].value; + const int alpha = htrees[ALPHA][0].value; + htree_group->literal_arb = + ((uint32_t)alpha << 24) | (red << 16) | blue; + } + } + free(code_lengths); + + // All OK. Finalize pointers and return. + hdr->huffman_image_ = huffman_image; + hdr->num_htree_groups_ = num_htree_groups; + hdr->htree_groups_ = htree_groups; + hdr->huffman_tables_ = huffman_tables; + return 1; + + Error: + free(code_lengths); + free(huffman_image); + free(huffman_tables); + VP8LHtreeGroupsFree(htree_groups); + return 0; +} + +//------------------------------------------------------------------------------ +// Scaling. + +static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { + const int num_channels = 4; + const int in_width = io->mb_w; + const int out_width = io->scaled_width; + const int in_height = io->mb_h; + const int out_height = io->scaled_height; + const uint64_t work_size = 2 * num_channels * (uint64_t)out_width; + int32_t* work; // Rescaler work area. + const uint64_t scaled_data_size = num_channels * (uint64_t)out_width; + uint32_t* scaled_data; // Temporary storage for scaled BGRA data. + const uint64_t memory_size = sizeof(*dec->rescaler) + + work_size * sizeof(*work) + + scaled_data_size * sizeof(*scaled_data); + uint8_t* memory = (uint8_t*)calloc(memory_size, sizeof(*memory)); + if (memory == NULL) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + return 0; + } + assert(dec->rescaler_memory == NULL); + dec->rescaler_memory = memory; + + dec->rescaler = (WebPRescaler*)memory; + memory += sizeof(*dec->rescaler); + work = (int32_t*)memory; + memory += work_size * sizeof(*work); + scaled_data = (uint32_t*)memory; + + WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data, + out_width, out_height, 0, num_channels, + in_width, out_width, in_height, out_height, work); + return 1; +} + +//------------------------------------------------------------------------------ +// Export to ARGB + +// We have special "export" function since we need to convert from BGRA +static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, + int rgba_stride, uint8_t* const rgba) { + uint32_t* const src = (uint32_t*)rescaler->dst; + const int dst_width = rescaler->dst_width; + int num_lines_out = 0; + while (WebPRescalerHasPendingOutput(rescaler)) { + uint8_t* const dst = rgba + num_lines_out * rgba_stride; + WebPRescalerExportRow(rescaler, 0); + WebPMultARGBRow(src, dst_width, 1); + VP8LConvertFromBGRA(src, dst_width, colorspace, dst); + ++num_lines_out; + } + return num_lines_out; +} + +// Emit scaled rows. +static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, + uint8_t* in, int in_stride, int mb_h, + uint8_t* const out, int out_stride) { + const WEBP_CSP_MODE colorspace = dec->output_->colorspace; + int num_lines_in = 0; + int num_lines_out = 0; + while (num_lines_in < mb_h) { + uint8_t* const row_in = in + num_lines_in * in_stride; + uint8_t* const row_out = out + num_lines_out * out_stride; + const int lines_left = mb_h - num_lines_in; + const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); + assert(needed_lines > 0 && needed_lines <= lines_left); + WebPMultARGBRows(row_in, in_stride, + dec->rescaler->src_width, needed_lines, 0); + WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride); + num_lines_in += needed_lines; + num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out); + } + return num_lines_out; +} + +// Emit rows without any scaling. +static int EmitRows(WEBP_CSP_MODE colorspace, + const uint8_t* row_in, int in_stride, + int mb_w, int mb_h, + uint8_t* const out, int out_stride) { + int lines = mb_h; + uint8_t* row_out = out; + while (lines-- > 0) { + VP8LConvertFromBGRA((const uint32_t*)row_in, mb_w, colorspace, row_out); + row_in += in_stride; + row_out += out_stride; + } + return mb_h; // Num rows out == num rows in. +} + +//------------------------------------------------------------------------------ +// Export to YUVA + +// TODO(skal): should be in yuv.c +static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos, + const WebPDecBuffer* const output) { + const WebPYUVABuffer* const buf = &output->u.YUVA; + // first, the luma plane + { + int i; + uint8_t* const y = buf->y + y_pos * buf->y_stride; + for (i = 0; i < width; ++i) { + const uint32_t p = src[i]; + y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff, + YUV_HALF); + } + } + + // then U/V planes + { + uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride; + uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride; + const int uv_width = width >> 1; + int i; + for (i = 0; i < uv_width; ++i) { + const uint32_t v0 = src[2 * i + 0]; + const uint32_t v1 = src[2 * i + 1]; + // VP8RGBToU/V expects four accumulated pixels. Hence we need to + // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less. + const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe); + const int g = ((v0 >> 7) & 0x1fe) + ((v1 >> 7) & 0x1fe); + const int b = ((v0 << 1) & 0x1fe) + ((v1 << 1) & 0x1fe); + if (!(y_pos & 1)) { // even lines: store values + u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2); + v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2); + } else { // odd lines: average with previous values + const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2); + const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2); + // Approximated average-of-four. But it's an acceptable diff. + u[i] = (u[i] + tmp_u + 1) >> 1; + v[i] = (v[i] + tmp_v + 1) >> 1; + } + } + if (width & 1) { // last pixel + const uint32_t v0 = src[2 * i + 0]; + const int r = (v0 >> 14) & 0x3fc; + const int g = (v0 >> 6) & 0x3fc; + const int b = (v0 << 2) & 0x3fc; + if (!(y_pos & 1)) { // even lines + u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2); + v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2); + } else { // odd lines (note: we could just skip this) + const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2); + const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2); + u[i] = (u[i] + tmp_u + 1) >> 1; + v[i] = (v[i] + tmp_v + 1) >> 1; + } + } + } + // Lastly, store alpha if needed. + if (buf->a != NULL) { + int i; + uint8_t* const a = buf->a + y_pos * buf->a_stride; + for (i = 0; i < width; ++i) a[i] = (src[i] >> 24); + } +} + +static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) { + WebPRescaler* const rescaler = dec->rescaler; + uint32_t* const src = (uint32_t*)rescaler->dst; + const int dst_width = rescaler->dst_width; + int num_lines_out = 0; + while (WebPRescalerHasPendingOutput(rescaler)) { + WebPRescalerExportRow(rescaler, 0); + WebPMultARGBRow(src, dst_width, 1); + ConvertToYUVA(src, dst_width, y_pos, dec->output_); + ++y_pos; + ++num_lines_out; + } + return num_lines_out; +} + +static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec, + uint8_t* in, int in_stride, int mb_h) { + int num_lines_in = 0; + int y_pos = dec->last_out_row_; + while (num_lines_in < mb_h) { + const int lines_left = mb_h - num_lines_in; + const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); + WebPMultARGBRows(in, in_stride, dec->rescaler->src_width, needed_lines, 0); + WebPRescalerImport(dec->rescaler, lines_left, in, in_stride); + num_lines_in += needed_lines; + in += needed_lines * in_stride; + y_pos += ExportYUVA(dec, y_pos); + } + return y_pos; +} + +static int EmitRowsYUVA(const VP8LDecoder* const dec, + const uint8_t* in, int in_stride, + int mb_w, int num_rows) { + int y_pos = dec->last_out_row_; + while (num_rows-- > 0) { + ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output_); + in += in_stride; + ++y_pos; + } + return y_pos; +} + +//------------------------------------------------------------------------------ +// Cropping. + +// Sets io->mb_y, io->mb_h & io->mb_w according to start row, end row and +// crop options. Also updates the input data pointer, so that it points to the +// start of the cropped window. Note that pixels are in ARGB format even if +// 'in_data' is uint8_t*. +// Returns true if the crop window is not empty. +static int SetCropWindow(VP8Io* const io, int y_start, int y_end, + uint8_t** const in_data, int pixel_stride) { + assert(y_start < y_end); + assert(io->crop_left < io->crop_right); + if (y_end > io->crop_bottom) { + y_end = io->crop_bottom; // make sure we don't overflow on last row. + } + if (y_start < io->crop_top) { + const int delta = io->crop_top - y_start; + y_start = io->crop_top; + *in_data += delta * pixel_stride; + } + if (y_start >= y_end) return 0; // Crop window is empty. + + *in_data += io->crop_left * sizeof(uint32_t); + + io->mb_y = y_start - io->crop_top; + io->mb_w = io->crop_right - io->crop_left; + io->mb_h = y_end - y_start; + return 1; // Non-empty crop window. +} + +//------------------------------------------------------------------------------ + +static WEBP_INLINE int GetMetaIndex( + const uint32_t* const image, int xsize, int bits, int x, int y) { + if (bits == 0) return 0; + return image[xsize * (y >> bits) + (x >> bits)]; +} + +static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr, + int x, int y) { + const int meta_index = GetMetaIndex(hdr->huffman_image_, hdr->huffman_xsize_, + hdr->huffman_subsample_bits_, x, y); + assert(meta_index < hdr->num_htree_groups_); + return hdr->htree_groups_ + meta_index; +} + +//------------------------------------------------------------------------------ +// Main loop, with custom row-processing function + +typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row); + +static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows, + const uint32_t* const rows) { + int n = dec->next_transform_; + const int cache_pixs = dec->width_ * num_rows; + const int start_row = dec->last_row_; + const int end_row = start_row + num_rows; + const uint32_t* rows_in = rows; + uint32_t* const rows_out = dec->argb_cache_; + + // Inverse transforms. + // TODO: most transforms only need to operate on the cropped region only. + memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out)); + while (n-- > 0) { + VP8LTransform* const transform = &dec->transforms_[n]; + VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out); + rows_in = rows_out; + } +} + +// Special method for paletted alpha data. +static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows, + const uint8_t* const rows) { + const int start_row = dec->last_row_; + const int end_row = start_row + num_rows; + const uint8_t* rows_in = rows; + uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row; + VP8LTransform* const transform = &dec->transforms_[0]; + assert(dec->next_transform_ == 1); + assert(transform->type_ == COLOR_INDEXING_TRANSFORM); + VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in, + rows_out); +} + +// Processes (transforms, scales & color-converts) the rows decoded after the +// last call. +static void ProcessRows(VP8LDecoder* const dec, int row) { + const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_; + const int num_rows = row - dec->last_row_; + + if (num_rows <= 0) return; // Nothing to be done. + ApplyInverseTransforms(dec, num_rows, rows); + + // Emit output. + { + VP8Io* const io = dec->io_; + uint8_t* rows_data = (uint8_t*)dec->argb_cache_; + const int in_stride = io->width * sizeof(uint32_t); // in unit of RGBA + if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) { + // Nothing to output (this time). + } else { + const WebPDecBuffer* const output = dec->output_; + if (output->colorspace < MODE_YUV) { // convert to RGBA + const WebPRGBABuffer* const buf = &output->u.RGBA; + uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride; + const int num_rows_out = io->use_scaling ? + EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h, + rgba, buf->stride) : + EmitRows(output->colorspace, rows_data, in_stride, + io->mb_w, io->mb_h, rgba, buf->stride); + // Update 'last_out_row_'. + dec->last_out_row_ += num_rows_out; + } else { // convert to YUVA + dec->last_out_row_ = io->use_scaling ? + EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h) : + EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h); + } + assert(dec->last_out_row_ <= output->height); + } + } + + // Update 'last_row_'. + dec->last_row_ = row; + assert(dec->last_row_ <= dec->height_); +} + +// Row-processing for the special case when alpha data contains only one +// transform (color indexing), and trivial non-green literals. +static int Is8bOptimizable(const VP8LMetadata* const hdr) { + int i; + if (hdr->color_cache_size_ > 0) return 0; + // When the Huffman tree contains only one symbol, we can skip the + // call to ReadSymbol() for red/blue/alpha channels. + for (i = 0; i < hdr->num_htree_groups_; ++i) { + HuffmanCode** const htrees = hdr->htree_groups_[i].htrees; + if (htrees[RED][0].bits > 0) return 0; + if (htrees[BLUE][0].bits > 0) return 0; + if (htrees[ALPHA][0].bits > 0) return 0; + } + return 1; +} + +static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) { + const int num_rows = row - dec->last_row_; + const uint8_t* const in = + (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_; + if (num_rows > 0) { + ApplyInverseTransformsAlpha(dec, num_rows, in); + } + dec->last_row_ = dec->last_out_row_ = row; +} + +//------------------------------------------------------------------------------ +// Helper functions for fast pattern copy (8b and 32b) + +// cyclic rotation of pattern word +static WEBP_INLINE uint32_t Rotate8b(uint32_t V) { +#if defined(WORDS_BIGENDIAN) + return ((V & 0xff000000u) >> 24) | (V << 8); +#else + return ((V & 0xffu) << 24) | (V >> 8); +#endif +} + +// copy 1, 2 or 4-bytes pattern +static WEBP_INLINE void CopySmallPattern8b(const uint8_t* src, uint8_t* dst, + int length, uint32_t pattern) { + int i; + // align 'dst' to 4-bytes boundary. Adjust the pattern along the way. + while ((uintptr_t)dst & 3) { + *dst++ = *src++; + pattern = Rotate8b(pattern); + --length; + } + // Copy the pattern 4 bytes at a time. + for (i = 0; i < (length >> 2); ++i) { + ((uint32_t*)dst)[i] = pattern; + } + // Finish with left-overs. 'pattern' is still correctly positioned, + // so no Rotate8b() call is needed. + for (i <<= 2; i < length; ++i) { + dst[i] = src[i]; + } +} + +static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) { + const uint8_t* src = dst - dist; + if (length >= 8) { + uint32_t pattern = 0; + switch (dist) { + case 1: + pattern = src[0]; +#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much + pattern |= pattern << 8; + pattern |= pattern << 16; +#elif defined(WEBP_USE_MIPS_DSP_R2) + __asm__ volatile ("replv.qb %0, %0" : "+r"(pattern)); +#else + pattern = 0x01010101u * pattern; +#endif + break; + case 2: + memcpy(&pattern, src, sizeof(uint16_t)); +#if defined(__arm__) || defined(_M_ARM) + pattern |= pattern << 16; +#elif defined(WEBP_USE_MIPS_DSP_R2) + __asm__ volatile ("replv.ph %0, %0" : "+r"(pattern)); +#else + pattern = 0x00010001u * pattern; +#endif + break; + case 4: + memcpy(&pattern, src, sizeof(uint32_t)); + break; + default: + goto Copy; + break; + } + CopySmallPattern8b(src, dst, length, pattern); + return; + } + Copy: + if (dist >= length) { // no overlap -> use memcpy() + memcpy(dst, src, length * sizeof(*dst)); + } else { + int i; + for (i = 0; i < length; ++i) dst[i] = src[i]; + } +} + +// copy pattern of 1 or 2 uint32_t's +static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src, + uint32_t* dst, + int length, uint64_t pattern) { + int i; + if ((uintptr_t)dst & 4) { // Align 'dst' to 8-bytes boundary. + *dst++ = *src++; + pattern = (pattern >> 32) | (pattern << 32); + --length; + } + assert(0 == ((uintptr_t)dst & 7)); + for (i = 0; i < (length >> 1); ++i) { + ((uint64_t*)dst)[i] = pattern; // Copy the pattern 8 bytes at a time. + } + if (length & 1) { // Finish with left-over. + dst[i << 1] = src[i << 1]; + } +} + +static WEBP_INLINE void CopyBlock32b(uint32_t* const dst, + int dist, int length) { + const uint32_t* const src = dst - dist; + if (dist <= 2 && length >= 4 && ((uintptr_t)dst & 3) == 0) { + uint64_t pattern; + if (dist == 1) { + pattern = (uint64_t)src[0]; + pattern |= pattern << 32; + } else { + memcpy(&pattern, src, sizeof(pattern)); + } + CopySmallPattern32b(src, dst, length, pattern); + } else if (dist >= length) { // no overlap + memcpy(dst, src, length * sizeof(*dst)); + } else { + int i; + for (i = 0; i < length; ++i) dst[i] = src[i]; + } +} + +//------------------------------------------------------------------------------ + +static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data, + int width, int height, int last_row) { + int ok = 1; + int row = dec->last_pixel_ / width; + int col = dec->last_pixel_ % width; + VP8LBitReader* const br = &dec->br_; + VP8LMetadata* const hdr = &dec->hdr_; + const HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row); + int pos = dec->last_pixel_; // current position + const int end = width * height; // End of data + const int last = width * last_row; // Last pixel to decode + const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; + const int mask = hdr->huffman_mask_; + assert(htree_group != NULL); + assert(pos < end); + assert(last_row <= height); + assert(Is8bOptimizable(hdr)); + + while (!br->eos_ && pos < last) { + int code; + // Only update when changing tile. + if ((col & mask) == 0) { + htree_group = GetHtreeGroupForPos(hdr, col, row); + } + VP8LFillBitWindow(br); + code = ReadSymbol(htree_group->htrees[GREEN], br); + if (code < NUM_LITERAL_CODES) { // Literal + data[pos] = code; + ++pos; + ++col; + if (col >= width) { + col = 0; + ++row; + if (row % NUM_ARGB_CACHE_ROWS == 0) { + ExtractPalettedAlphaRows(dec, row); + } + } + } else if (code < len_code_limit) { // Backward reference + int dist_code, dist; + const int length_sym = code - NUM_LITERAL_CODES; + const int length = GetCopyLength(length_sym, br); + const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br); + VP8LFillBitWindow(br); + dist_code = GetCopyDistance(dist_symbol, br); + dist = PlaneCodeToDistance(width, dist_code); + if (pos >= dist && end - pos >= length) { + CopyBlock8b(data + pos, dist, length); + } else { + ok = 0; + goto End; + } + pos += length; + col += length; + while (col >= width) { + col -= width; + ++row; + if (row % NUM_ARGB_CACHE_ROWS == 0) { + ExtractPalettedAlphaRows(dec, row); + } + } + if (pos < last && (col & mask)) { + htree_group = GetHtreeGroupForPos(hdr, col, row); + } + } else { // Not reached + ok = 0; + goto End; + } + assert(br->eos_ == VP8LIsEndOfStream(br)); + } + // Process the remaining rows corresponding to last row-block. + ExtractPalettedAlphaRows(dec, row); + + End: + if (!ok || (br->eos_ && pos < end)) { + ok = 0; + dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED + : VP8_STATUS_BITSTREAM_ERROR; + } else { + dec->last_pixel_ = (int)pos; + } + return ok; +} + +static void SaveState(VP8LDecoder* const dec, int last_pixel) { + assert(dec->incremental_); + dec->saved_br_ = dec->br_; + dec->saved_last_pixel_ = last_pixel; + if (dec->hdr_.color_cache_size_ > 0) { + VP8LColorCacheCopy(&dec->hdr_.color_cache_, &dec->hdr_.saved_color_cache_); + } +} + +static void RestoreState(VP8LDecoder* const dec) { + assert(dec->br_.eos_); + dec->status_ = VP8_STATUS_SUSPENDED; + dec->br_ = dec->saved_br_; + dec->last_pixel_ = dec->saved_last_pixel_; + if (dec->hdr_.color_cache_size_ > 0) { + VP8LColorCacheCopy(&dec->hdr_.saved_color_cache_, &dec->hdr_.color_cache_); + } +} + +#define SYNC_EVERY_N_ROWS 8 // minimum number of rows between check-points +static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data, + int width, int height, int last_row, + ProcessRowsFunc process_func) { + int row = dec->last_pixel_ / width; + int col = dec->last_pixel_ % width; + VP8LBitReader* const br = &dec->br_; + VP8LMetadata* const hdr = &dec->hdr_; + HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row); + uint32_t* src = data + dec->last_pixel_; + uint32_t* last_cached = src; + uint32_t* const src_end = data + width * height; // End of data + uint32_t* const src_last = data + width * last_row; // Last pixel to decode + const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; + const int color_cache_limit = len_code_limit + hdr->color_cache_size_; + int next_sync_row = dec->incremental_ ? row : 1 << 24; + VP8LColorCache* const color_cache = + (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL; + const int mask = hdr->huffman_mask_; + assert(htree_group != NULL); + assert(src < src_end); + assert(src_last <= src_end); + + while (src < src_last) { + int code; + if (row >= next_sync_row) { + SaveState(dec, (int)(src - data)); + next_sync_row = row + SYNC_EVERY_N_ROWS; + } + // Only update when changing tile. Note we could use this test: + // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed + // but that's actually slower and needs storing the previous col/row. + if ((col & mask) == 0) { + htree_group = GetHtreeGroupForPos(hdr, col, row); + } + VP8LFillBitWindow(br); + code = ReadSymbol(htree_group->htrees[GREEN], br); + if (br->eos_) break; // early out + if (code < NUM_LITERAL_CODES) { // Literal + if (htree_group->is_trivial_literal) { + *src = htree_group->literal_arb | (code << 8); + } else { + int red, blue, alpha; + red = ReadSymbol(htree_group->htrees[RED], br); + VP8LFillBitWindow(br); + blue = ReadSymbol(htree_group->htrees[BLUE], br); + alpha = ReadSymbol(htree_group->htrees[ALPHA], br); + if (br->eos_) break; + *src = ((uint32_t)alpha << 24) | (red << 16) | (code << 8) | blue; + } + AdvanceByOne: + ++src; + ++col; + if (col >= width) { + col = 0; + ++row; + if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) { + process_func(dec, row); + } + if (color_cache != NULL) { + while (last_cached < src) { + VP8LColorCacheInsert(color_cache, *last_cached++); + } + } + } + } else if (code < len_code_limit) { // Backward reference + int dist_code, dist; + const int length_sym = code - NUM_LITERAL_CODES; + const int length = GetCopyLength(length_sym, br); + const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br); + VP8LFillBitWindow(br); + dist_code = GetCopyDistance(dist_symbol, br); + dist = PlaneCodeToDistance(width, dist_code); + if (br->eos_) break; + if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) { + goto Error; + } else { + CopyBlock32b(src, dist, length); + } + src += length; + col += length; + while (col >= width) { + col -= width; + ++row; + if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) { + process_func(dec, row); + } + } + // Because of the check done above (before 'src' was incremented by + // 'length'), the following holds true. + assert(src <= src_end); + if (col & mask) htree_group = GetHtreeGroupForPos(hdr, col, row); + if (color_cache != NULL) { + while (last_cached < src) { + VP8LColorCacheInsert(color_cache, *last_cached++); + } + } + } else if (code < color_cache_limit) { // Color cache + const int key = code - len_code_limit; + assert(color_cache != NULL); + while (last_cached < src) { + VP8LColorCacheInsert(color_cache, *last_cached++); + } + *src = VP8LColorCacheLookup(color_cache, key); + goto AdvanceByOne; + } else { // Not reached + goto Error; + } + assert(br->eos_ == VP8LIsEndOfStream(br)); + } + + if (dec->incremental_ && br->eos_ && src < src_end) { + RestoreState(dec); + } else if (!br->eos_) { + // Process the remaining rows corresponding to last row-block. + if (process_func != NULL) { + process_func(dec, row); + } + dec->status_ = VP8_STATUS_OK; + dec->last_pixel_ = (int)(src - data); // end-of-scan marker + } else { + // if not incremental, and we are past the end of buffer (eos_=1), then this + // is a real bitstream error. + goto Error; + } + return 1; + + Error: + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + return 0; +} + +// ----------------------------------------------------------------------------- +// VP8LTransform + +static void ClearTransform(VP8LTransform* const transform) { + free(transform->data_); + transform->data_ = NULL; +} + +// For security reason, we need to remap the color map to span +// the total possible bundled values, and not just the num_colors. +static int ExpandColorMap(int num_colors, VP8LTransform* const transform) { + int i; + const int final_num_colors = 1 << (8 >> transform->bits_); + uint32_t* const new_color_map = (uint32_t*)malloc((uint64_t)final_num_colors * sizeof(*new_color_map)); + if (new_color_map == NULL) { + return 0; + } else { + uint8_t* const data = (uint8_t*)transform->data_; + uint8_t* const new_data = (uint8_t*)new_color_map; + new_color_map[0] = transform->data_[0]; + for (i = 4; i < 4 * num_colors; ++i) { + // Equivalent to AddPixelEq(), on a byte-basis. + new_data[i] = (data[i] + new_data[i - 4]) & 0xff; + } + for (; i < 4 * final_num_colors; ++i) + new_data[i] = 0; // black tail. + free(transform->data_); + transform->data_ = new_color_map; + } + return 1; +} + +static int ReadTransform(int* const xsize, int const* ysize, + VP8LDecoder* const dec) { + int ok = 1; + VP8LBitReader* const br = &dec->br_; + VP8LTransform* transform = &dec->transforms_[dec->next_transform_]; + const VP8LImageTransformType type = + (VP8LImageTransformType)VP8LReadBits(br, 2); + + // Each transform type can only be present once in the stream. + if (dec->transforms_seen_ & (1U << type)) { + return 0; // Already there, let's not accept the second same transform. + } + dec->transforms_seen_ |= (1U << type); + + transform->type_ = type; + transform->xsize_ = *xsize; + transform->ysize_ = *ysize; + transform->data_ = NULL; + ++dec->next_transform_; + assert(dec->next_transform_ <= NUM_TRANSFORMS); + + switch (type) { + case PREDICTOR_TRANSFORM: + case CROSS_COLOR_TRANSFORM: + transform->bits_ = VP8LReadBits(br, 3) + 2; + ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize_, + transform->bits_), + VP8LSubSampleSize(transform->ysize_, + transform->bits_), + 0, dec, &transform->data_); + break; + case COLOR_INDEXING_TRANSFORM: { + const int num_colors = VP8LReadBits(br, 8) + 1; + const int bits = (num_colors > 16) ? 0 + : (num_colors > 4) ? 1 + : (num_colors > 2) ? 2 + : 3; + *xsize = VP8LSubSampleSize(transform->xsize_, bits); + transform->bits_ = bits; + ok = DecodeImageStream(num_colors, 1, 0, dec, &transform->data_); + ok = ok && ExpandColorMap(num_colors, transform); + break; + } + case SUBTRACT_GREEN: + break; + default: + assert(0); // can't happen + break; + } + + return ok; +} + +// ----------------------------------------------------------------------------- +// VP8LMetadata + +static void InitMetadata(VP8LMetadata* const hdr) { + assert(hdr != NULL); + memset(hdr, 0, sizeof(*hdr)); +} + +static void ClearMetadata(VP8LMetadata* const hdr) { + assert(hdr != NULL); + + free(hdr->huffman_image_); + free(hdr->huffman_tables_); + VP8LHtreeGroupsFree(hdr->htree_groups_); + VP8LColorCacheClear(&hdr->color_cache_); + VP8LColorCacheClear(&hdr->saved_color_cache_); + InitMetadata(hdr); +} + +// ----------------------------------------------------------------------------- +// VP8LDecoder + +VP8LDecoder* VP8LNew(void) { + VP8LDecoder* const dec = (VP8LDecoder*)calloc(1ULL, sizeof(*dec)); + if (dec == NULL) return NULL; + dec->status_ = VP8_STATUS_OK; + dec->state_ = READ_DIM; + + VP8LDspInit(); // Init critical function pointers. + + return dec; +} + +void VP8LClear(VP8LDecoder* const dec) { + int i; + if (dec == NULL) return; + ClearMetadata(&dec->hdr_); + + free(dec->pixels_); + dec->pixels_ = NULL; + for (i = 0; i < dec->next_transform_; ++i) { + ClearTransform(&dec->transforms_[i]); + } + dec->next_transform_ = 0; + dec->transforms_seen_ = 0; + + free(dec->rescaler_memory); + dec->rescaler_memory = NULL; + + dec->output_ = NULL; // leave no trace behind +} + +void VP8LDelete(VP8LDecoder* const dec) { + if (dec != NULL) { + VP8LClear(dec); + free(dec); + } +} + +static void UpdateDecoder(VP8LDecoder* const dec, int width, int height) { + VP8LMetadata* const hdr = &dec->hdr_; + const int num_bits = hdr->huffman_subsample_bits_; + dec->width_ = width; + dec->height_ = height; + + hdr->huffman_xsize_ = VP8LSubSampleSize(width, num_bits); + hdr->huffman_mask_ = (num_bits == 0) ? ~0 : (1 << num_bits) - 1; +} + +static int DecodeImageStream(int xsize, int ysize, + int is_level0, + VP8LDecoder* const dec, + uint32_t** const decoded_data) { + int ok = 1; + int transform_xsize = xsize; + int transform_ysize = ysize; + VP8LBitReader* const br = &dec->br_; + VP8LMetadata* const hdr = &dec->hdr_; + uint32_t* data = NULL; + int color_cache_bits = 0; + + // Read the transforms (may recurse). + if (is_level0) { + while (ok && VP8LReadBits(br, 1)) { + ok = ReadTransform(&transform_xsize, &transform_ysize, dec); + } + } + + // Color cache + if (ok && VP8LReadBits(br, 1)) { + color_cache_bits = VP8LReadBits(br, 4); + ok = (color_cache_bits >= 1 && color_cache_bits <= MAX_CACHE_BITS); + if (!ok) { + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + goto End; + } + } + + // Read the Huffman codes (may recurse). + ok = ok && ReadHuffmanCodes(dec, transform_xsize, transform_ysize, + color_cache_bits, is_level0); + if (!ok) { + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + goto End; + } + + // Finish setting up the color-cache + if (color_cache_bits > 0) { + hdr->color_cache_size_ = 1 << color_cache_bits; + if (!VP8LColorCacheInit(&hdr->color_cache_, color_cache_bits)) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + ok = 0; + goto End; + } + } else { + hdr->color_cache_size_ = 0; + } + UpdateDecoder(dec, transform_xsize, transform_ysize); + + if (is_level0) { // level 0 complete + dec->state_ = READ_HDR; + goto End; + } + + { + const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize; + data = (uint32_t*)malloc(total_size * sizeof(*data)); + if (data == NULL) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + ok = 0; + goto End; + } + } + + // Use the Huffman trees to decode the LZ77 encoded data. + ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, + transform_ysize, NULL); + ok = ok && !br->eos_; + + End: + if (!ok) { + free(data); + ClearMetadata(hdr); + } else { + if (decoded_data != NULL) { + *decoded_data = data; + } else { + // We allocate image data in this function only for transforms. At level 0 + // (that is: not the transforms), we shouldn't have allocated anything. + assert(data == NULL); + assert(is_level0); + } + dec->last_pixel_ = 0; // Reset for future DECODE_DATA_FUNC() calls. + if (!is_level0) ClearMetadata(hdr); // Clean up temporary data behind. + } + return ok; +} + +//------------------------------------------------------------------------------ +// Allocate internal buffers dec->pixels_ and dec->argb_cache_. +static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { + const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_; + // Scratch buffer corresponding to top-prediction row for transforming the + // first row in the row-blocks. Not needed for paletted alpha. + const uint64_t cache_top_pixels = (uint16_t)final_width; + // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha. + const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS; + const uint64_t total_num_pixels = + num_pixels + cache_top_pixels + cache_pixels; + + assert(dec->width_ <= final_width); + dec->pixels_ = (uint32_t*)malloc(total_num_pixels * sizeof(uint32_t)); + if (dec->pixels_ == NULL) { + dec->argb_cache_ = NULL; // for sanity check + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + return 0; + } + dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels; + return 1; +} + +static int AllocateInternalBuffers8b(VP8LDecoder* const dec) { + const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_; + dec->argb_cache_ = NULL; // for sanity check + dec->pixels_ = (uint32_t*)malloc(total_num_pixels * sizeof(uint8_t)); + if (dec->pixels_ == NULL) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + return 0; + } + return 1; +} + +//------------------------------------------------------------------------------ + +// Special row-processing that only stores the alpha data. +static void ExtractAlphaRows(VP8LDecoder* const dec, int row) { + const int num_rows = row - dec->last_row_; + const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_; + + if (num_rows <= 0) return; // Nothing to be done. + ApplyInverseTransforms(dec, num_rows, in); + + // Extract alpha (which is stored in the green plane). + { + const int width = dec->io_->width; // the final width (!= dec->width_) + const int cache_pixs = width * num_rows; + uint8_t* const dst = (uint8_t*)dec->io_->opaque + width * dec->last_row_; + const uint32_t* const src = dec->argb_cache_; + int i; + for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff; + } + dec->last_row_ = dec->last_out_row_ = row; +} + +int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec, + const uint8_t* const data, size_t data_size, + uint8_t* const output) { + int ok = 0; + VP8LDecoder* dec; + VP8Io* io; + assert(alph_dec != NULL); + alph_dec->vp8l_dec_ = VP8LNew(); + if (alph_dec->vp8l_dec_ == NULL) return 0; + dec = alph_dec->vp8l_dec_; + + dec->width_ = alph_dec->width_; + dec->height_ = alph_dec->height_; + dec->io_ = &alph_dec->io_; + io = dec->io_; + + VP8InitIo(io); + WebPInitCustomIo(NULL, io); // Just a sanity Init. io won't be used. + io->opaque = output; + io->width = alph_dec->width_; + io->height = alph_dec->height_; + + dec->status_ = VP8_STATUS_OK; + VP8LInitBitReader(&dec->br_, data, data_size); + + if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) { + goto Err; + } + + // Special case: if alpha data uses only the color indexing transform and + // doesn't use color cache (a frequent case), we will use DecodeAlphaData() + // method that only needs allocation of 1 byte per pixel (alpha channel). + if (dec->next_transform_ == 1 && + dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM && + Is8bOptimizable(&dec->hdr_)) { + alph_dec->use_8b_decode = 1; + ok = AllocateInternalBuffers8b(dec); + } else { + // Allocate internal buffers (note that dec->width_ may have changed here). + alph_dec->use_8b_decode = 0; + ok = AllocateInternalBuffers32b(dec, alph_dec->width_); + } + + if (!ok) goto Err; + + return 1; + + Err: + VP8LDelete(alph_dec->vp8l_dec_); + alph_dec->vp8l_dec_ = NULL; + return 0; +} + +int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) { + VP8LDecoder* const dec = alph_dec->vp8l_dec_; + assert(dec != NULL); + assert(last_row <= dec->height_); + + if (dec->last_pixel_ == dec->width_ * dec->height_) { + return 1; // done + } + + // Decode (with special row processing). + return alph_dec->use_8b_decode ? + DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_, + last_row) : + DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, + last_row, ExtractAlphaRows); +} + +//------------------------------------------------------------------------------ + +int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) { + int width, height, has_alpha; + + if (dec == NULL) return 0; + if (io == NULL) { + dec->status_ = VP8_STATUS_INVALID_PARAM; + return 0; + } + + dec->io_ = io; + dec->status_ = VP8_STATUS_OK; + VP8LInitBitReader(&dec->br_, io->data, io->data_size); + if (!ReadImageInfo(&dec->br_, &width, &height, &has_alpha)) { + dec->status_ = VP8_STATUS_BITSTREAM_ERROR; + goto Error; + } + dec->state_ = READ_DIM; + io->width = width; + io->height = height; + + if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error; + return 1; + + Error: + VP8LClear(dec); + assert(dec->status_ != VP8_STATUS_OK); + return 0; +} + +int VP8LDecodeImage(VP8LDecoder* const dec) { + VP8Io* io = NULL; + WebPDecParams* params = NULL; + + // Sanity checks. + if (dec == NULL) return 0; + + assert(dec->hdr_.huffman_tables_ != NULL); + assert(dec->hdr_.htree_groups_ != NULL); + assert(dec->hdr_.num_htree_groups_ > 0); + + io = dec->io_; + assert(io != NULL); + params = (WebPDecParams*)io->opaque; + assert(params != NULL); + + // Initialization. + if (dec->state_ != READ_DATA) { + dec->output_ = params->output; + assert(dec->output_ != NULL); + + if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) { + dec->status_ = VP8_STATUS_INVALID_PARAM; + goto Err; + } + + if (!AllocateInternalBuffers32b(dec, io->width)) goto Err; + + if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err; + + if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) { + // need the alpha-multiply functions for premultiplied output or rescaling + WebPInitAlphaProcessing(); + } + if (dec->incremental_) { + if (dec->hdr_.color_cache_size_ > 0 && + dec->hdr_.saved_color_cache_.colors_ == NULL) { + if (!VP8LColorCacheInit(&dec->hdr_.saved_color_cache_, + dec->hdr_.color_cache_.hash_bits_)) { + dec->status_ = VP8_STATUS_OUT_OF_MEMORY; + goto Err; + } + } + } + dec->state_ = READ_DATA; + } + + // Decode. + if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_, + dec->height_, ProcessRows)) { + goto Err; + } + + params->last_y = dec->last_out_row_; + return 1; + + Err: + VP8LClear(dec); + assert(dec->status_ != VP8_STATUS_OK); + return 0; +} + +//------------------------------------------------------------------------------ diff --git a/src/loaders/webp/dec/vp8li.h b/src/loaders/webp/dec/vp8li.h new file mode 100644 index 00000000..8886e47f --- /dev/null +++ b/src/loaders/webp/dec/vp8li.h @@ -0,0 +1,136 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Lossless decoder: internal header. +// +// Author: Skal (pascal.massimino@gmail.com) +// Vikas Arora(vikaas.arora@gmail.com) + +#ifndef WEBP_DEC_VP8LI_H_ +#define WEBP_DEC_VP8LI_H_ + +#include // for memcpy() +#include "./webpi.h" +#include "../utils/bit_reader.h" +#include "../utils/color_cache.h" +#include "../utils/huffman.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + READ_DATA = 0, + READ_HDR = 1, + READ_DIM = 2 +} VP8LDecodeState; + +typedef struct VP8LTransform VP8LTransform; +struct VP8LTransform { + VP8LImageTransformType type_; // transform type. + int bits_; // subsampling bits defining transform window. + int xsize_; // transform window X index. + int ysize_; // transform window Y index. + uint32_t *data_; // transform data. +}; + +typedef struct { + int color_cache_size_; + VP8LColorCache color_cache_; + VP8LColorCache saved_color_cache_; // for incremental + + int huffman_mask_; + int huffman_subsample_bits_; + int huffman_xsize_; + uint32_t *huffman_image_; + int num_htree_groups_; + HTreeGroup *htree_groups_; + HuffmanCode *huffman_tables_; +} VP8LMetadata; + +typedef struct VP8LDecoder VP8LDecoder; +struct VP8LDecoder { + VP8StatusCode status_; + VP8LDecodeState state_; + VP8Io *io_; + + const WebPDecBuffer *output_; // shortcut to io->opaque->output + + uint32_t *pixels_; // Internal data: either uint8_t* for alpha + // or uint32_t* for BGRA. + uint32_t *argb_cache_; // Scratch buffer for temporary BGRA storage. + + VP8LBitReader br_; + int incremental_; // if true, incremental decoding is expected + VP8LBitReader saved_br_; // note: could be local variables too + int saved_last_pixel_; + + int width_; + int height_; + int last_row_; // last input row decoded so far. + int last_pixel_; // last pixel decoded so far. However, it may + // not be transformed, scaled and + // color-converted yet. + int last_out_row_; // last row output so far. + + VP8LMetadata hdr_; + + int next_transform_; + VP8LTransform transforms_[NUM_TRANSFORMS]; + // or'd bitset storing the transforms types. + uint32_t transforms_seen_; + + uint8_t *rescaler_memory; // Working memory for rescaling work. + WebPRescaler *rescaler; // Common rescaler for all channels. +}; + +//------------------------------------------------------------------------------ +// internal functions. Not public. + +struct ALPHDecoder; // Defined in dec/alphai.h. + +// in vp8l.c + +// Decodes image header for alpha data stored using lossless compression. +// Returns false in case of error. +int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec, + const uint8_t* const data, size_t data_size, + uint8_t* const output); + +// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are +// already decoded in previous call(s), it will resume decoding from where it +// was paused. +// Returns false in case of bitstream error. +int VP8LDecodeAlphaImageStream(struct ALPHDecoder* const alph_dec, + int last_row); + +// Allocates and initialize a new lossless decoder instance. +VP8LDecoder* VP8LNew(void); + +// Decodes the image header. Returns false in case of error. +int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io); + +// Decodes an image. It's required to decode the lossless header before calling +// this function. Returns false in case of error, with updated dec->status_. +int VP8LDecodeImage(VP8LDecoder* const dec); + +// Resets the decoder in its initial state, reclaiming memory. +// Preserves the dec->status_ value. +void VP8LClear(VP8LDecoder* const dec); + +// Clears and deallocate a lossless decoder instance. +void VP8LDelete(VP8LDecoder* const dec); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_DEC_VP8LI_H_ */ diff --git a/src/loaders/webp/dec/webp.cpp b/src/loaders/webp/dec/webp.cpp new file mode 100644 index 00000000..7f6a5a44 --- /dev/null +++ b/src/loaders/webp/dec/webp.cpp @@ -0,0 +1,674 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Main decoding functions for WEBP images. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include + +#include "./vp8i.h" +#include "./vp8li.h" +#include "./webpi.h" + + +// VP8X Feature Flags. +typedef enum WebPFeatureFlags { + FRAGMENTS_FLAG = 0x00000001, + ANIMATION_FLAG = 0x00000002, + XMP_FLAG = 0x00000004, + EXIF_FLAG = 0x00000008, + ALPHA_FLAG = 0x00000010, + ICCP_FLAG = 0x00000020 +} WebPFeatureFlags; + + +//------------------------------------------------------------------------------ +// RIFF layout is: +// Offset tag +// 0...3 "RIFF" 4-byte tag +// 4...7 size of image data (including metadata) starting at offset 8 +// 8...11 "WEBP" our form-type signature +// The RIFF container (12 bytes) is followed by appropriate chunks: +// 12..15 "VP8 ": 4-bytes tags, signaling the use of VP8 video format +// 16..19 size of the raw VP8 image data, starting at offset 20 +// 20.... the VP8 bytes +// Or, +// 12..15 "VP8L": 4-bytes tags, signaling the use of VP8L lossless format +// 16..19 size of the raw VP8L image data, starting at offset 20 +// 20.... the VP8L bytes +// Or, +// 12..15 "VP8X": 4-bytes tags, describing the extended-VP8 chunk. +// 16..19 size of the VP8X chunk starting at offset 20. +// 20..23 VP8X flags bit-map corresponding to the chunk-types present. +// 24..26 Width of the Canvas Image. +// 27..29 Height of the Canvas Image. +// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8, +// VP8L, XMP, EXIF ...) +// All sizes are in little-endian order. +// Note: chunk data size must be padded to multiple of 2 when written. + +static WEBP_INLINE uint32_t get_le24(const uint8_t* const data) { + return data[0] | (data[1] << 8) | (data[2] << 16); +} + +static WEBP_INLINE uint32_t get_le32(const uint8_t* const data) { + return (uint32_t)get_le24(data) | (data[3] << 24); +} + +// Validates the RIFF container (if detected) and skips over it. +// If a RIFF container is detected, returns: +// VP8_STATUS_BITSTREAM_ERROR for invalid header, +// VP8_STATUS_NOT_ENOUGH_DATA for truncated data if have_all_data is true, +// and VP8_STATUS_OK otherwise. +// In case there are not enough bytes (partial RIFF container), return 0 for +// *riff_size. Else return the RIFF size extracted from the header. +static VP8StatusCode ParseRIFF(const uint8_t** const data, + size_t* const data_size, int have_all_data, + size_t* const riff_size) { + assert(data != NULL); + assert(data_size != NULL); + assert(riff_size != NULL); + + *riff_size = 0; // Default: no RIFF present. + if (*data_size >= RIFF_HEADER_SIZE && !memcmp(*data, "RIFF", TAG_SIZE)) { + if (memcmp(*data + 8, "WEBP", TAG_SIZE)) { + return VP8_STATUS_BITSTREAM_ERROR; // Wrong image file signature. + } else { + const uint32_t size = get_le32(*data + TAG_SIZE); + // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn"). + if (size < TAG_SIZE + CHUNK_HEADER_SIZE) { + return VP8_STATUS_BITSTREAM_ERROR; + } + if (size > MAX_CHUNK_PAYLOAD) { + return VP8_STATUS_BITSTREAM_ERROR; + } + if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Truncated bitstream. + } + // We have a RIFF container. Skip it. + *riff_size = size; + *data += RIFF_HEADER_SIZE; + *data_size -= RIFF_HEADER_SIZE; + } + } + return VP8_STATUS_OK; +} + +// Validates the VP8X header and skips over it. +// Returns VP8_STATUS_BITSTREAM_ERROR for invalid VP8X header, +// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and +// VP8_STATUS_OK otherwise. +// If a VP8X chunk is found, found_vp8x is set to true and *width_ptr, +// *height_ptr and *flags_ptr are set to the corresponding values extracted +// from the VP8X chunk. +static VP8StatusCode ParseVP8X(const uint8_t** const data, + size_t* const data_size, + int* const found_vp8x, + int* const width_ptr, int* const height_ptr, + uint32_t* const flags_ptr) { + const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE; + assert(data != NULL); + assert(data_size != NULL); + assert(found_vp8x != NULL); + + *found_vp8x = 0; + + if (*data_size < CHUNK_HEADER_SIZE) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data. + } + + if (!memcmp(*data, "VP8X", TAG_SIZE)) { + int width, height; + uint32_t flags; + const uint32_t chunk_size = get_le32(*data + TAG_SIZE); + if (chunk_size != VP8X_CHUNK_SIZE) { + return VP8_STATUS_BITSTREAM_ERROR; // Wrong chunk size. + } + + // Verify if enough data is available to validate the VP8X chunk. + if (*data_size < vp8x_size) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data. + } + flags = get_le32(*data + 8); + width = 1 + get_le24(*data + 12); + height = 1 + get_le24(*data + 15); + if (width * (uint64_t)height >= MAX_IMAGE_AREA) { + return VP8_STATUS_BITSTREAM_ERROR; // image is too large + } + + if (flags_ptr != NULL) *flags_ptr = flags; + if (width_ptr != NULL) *width_ptr = width; + if (height_ptr != NULL) *height_ptr = height; + // Skip over VP8X header bytes. + *data += vp8x_size; + *data_size -= vp8x_size; + *found_vp8x = 1; + } + return VP8_STATUS_OK; +} + +// Skips to the next VP8/VP8L chunk header in the data given the size of the +// RIFF chunk 'riff_size'. +// Returns VP8_STATUS_BITSTREAM_ERROR if any invalid chunk size is encountered, +// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and +// VP8_STATUS_OK otherwise. +// If an alpha chunk is found, *alpha_data and *alpha_size are set +// appropriately. +static VP8StatusCode ParseOptionalChunks(const uint8_t** const data, + size_t* const data_size, + size_t const riff_size, + const uint8_t** const alpha_data, + size_t* const alpha_size) { + const uint8_t* buf; + size_t buf_size; + uint32_t total_size = TAG_SIZE + // "WEBP". + CHUNK_HEADER_SIZE + // "VP8Xnnnn". + VP8X_CHUNK_SIZE; // data. + assert(data != NULL); + assert(data_size != NULL); + buf = *data; + buf_size = *data_size; + + assert(alpha_data != NULL); + assert(alpha_size != NULL); + *alpha_data = NULL; + *alpha_size = 0; + + while (1) { + uint32_t chunk_size; + uint32_t disk_chunk_size; // chunk_size with padding + + *data = buf; + *data_size = buf_size; + + if (buf_size < CHUNK_HEADER_SIZE) { // Insufficient data. + return VP8_STATUS_NOT_ENOUGH_DATA; + } + + chunk_size = get_le32(buf + TAG_SIZE); + if (chunk_size > MAX_CHUNK_PAYLOAD) { + return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size. + } + // For odd-sized chunk-payload, there's one byte padding at the end. + disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1; + total_size += disk_chunk_size; + + // Check that total bytes skipped so far does not exceed riff_size. + if (riff_size > 0 && (total_size > riff_size)) { + return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size. + } + + // Start of a (possibly incomplete) VP8/VP8L chunk implies that we have + // parsed all the optional chunks. + // Note: This check must occur before the check 'buf_size < disk_chunk_size' + // below to allow incomplete VP8/VP8L chunks. + if (!memcmp(buf, "VP8 ", TAG_SIZE) || + !memcmp(buf, "VP8L", TAG_SIZE)) { + return VP8_STATUS_OK; + } + + if (buf_size < disk_chunk_size) { // Insufficient data. + return VP8_STATUS_NOT_ENOUGH_DATA; + } + + if (!memcmp(buf, "ALPH", TAG_SIZE)) { // A valid ALPH header. + *alpha_data = buf + CHUNK_HEADER_SIZE; + *alpha_size = chunk_size; + } + + // We have a full and valid chunk; skip it. + buf += disk_chunk_size; + buf_size -= disk_chunk_size; + } +} + +// Validates the VP8/VP8L Header ("VP8 nnnn" or "VP8L nnnn") and skips over it. +// Returns VP8_STATUS_BITSTREAM_ERROR for invalid (chunk larger than +// riff_size) VP8/VP8L header, +// VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and +// VP8_STATUS_OK otherwise. +// If a VP8/VP8L chunk is found, *chunk_size is set to the total number of bytes +// extracted from the VP8/VP8L chunk header. +// The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data. +static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr, + size_t* const data_size, int have_all_data, + size_t riff_size, size_t* const chunk_size, + int* const is_lossless) { + const uint8_t* const data = *data_ptr; + const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE); + const int is_vp8l = !memcmp(data, "VP8L", TAG_SIZE); + const uint32_t minimal_size = + TAG_SIZE + CHUNK_HEADER_SIZE; // "WEBP" + "VP8 nnnn" OR + // "WEBP" + "VP8Lnnnn" + assert(data != NULL); + assert(data_size != NULL); + assert(chunk_size != NULL); + assert(is_lossless != NULL); + + if (*data_size < CHUNK_HEADER_SIZE) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Insufficient data. + } + + if (is_vp8 || is_vp8l) { + // Bitstream contains VP8/VP8L header. + const uint32_t size = get_le32(data + TAG_SIZE); + if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) { + return VP8_STATUS_BITSTREAM_ERROR; // Inconsistent size information. + } + if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) { + return VP8_STATUS_NOT_ENOUGH_DATA; // Truncated bitstream. + } + // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header. + *chunk_size = size; + *data_ptr += CHUNK_HEADER_SIZE; + *data_size -= CHUNK_HEADER_SIZE; + *is_lossless = is_vp8l; + } else { + // Raw VP8/VP8L bitstream (no header). + *is_lossless = VP8LCheckSignature(data, *data_size); + *chunk_size = *data_size; + } + + return VP8_STATUS_OK; +} + +//------------------------------------------------------------------------------ + +// Fetch '*width', '*height', '*has_alpha' and fill out 'headers' based on +// 'data'. All the output parameters may be NULL. If 'headers' is NULL only the +// minimal amount will be read to fetch the remaining parameters. +// If 'headers' is non-NULL this function will attempt to locate both alpha +// data (with or without a VP8X chunk) and the bitstream chunk (VP8/VP8L). +// Note: The following chunk sequences (before the raw VP8/VP8L data) are +// considered valid by this function: +// RIFF + VP8(L) +// RIFF + VP8X + (optional chunks) + VP8(L) +// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose. +// VP8(L) <-- Not a valid WebP format: only allowed for internal purpose. +static VP8StatusCode ParseHeadersInternal(const uint8_t* data, + size_t data_size, + int* const width, + int* const height, + int* const has_alpha, + int* const has_animation, + int* const format, + WebPHeaderStructure* const headers) { + int canvas_width = 0; + int canvas_height = 0; + int image_width = 0; + int image_height = 0; + int found_riff = 0; + int found_vp8x = 0; + int animation_present = 0; + int fragments_present = 0; + const int have_all_data = (headers != NULL) ? headers->have_all_data : 0; + + VP8StatusCode status; + WebPHeaderStructure hdrs; + + if (data == NULL || data_size < RIFF_HEADER_SIZE) { + return VP8_STATUS_NOT_ENOUGH_DATA; + } + memset(&hdrs, 0, sizeof(hdrs)); + hdrs.data = data; + hdrs.data_size = data_size; + + // Skip over RIFF header. + status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size); + if (status != VP8_STATUS_OK) { + return status; // Wrong RIFF header / insufficient data. + } + found_riff = (hdrs.riff_size > 0); + + // Skip over VP8X. + { + uint32_t flags = 0; + status = ParseVP8X(&data, &data_size, &found_vp8x, + &canvas_width, &canvas_height, &flags); + if (status != VP8_STATUS_OK) { + return status; // Wrong VP8X / insufficient data. + } + animation_present = !!(flags & ANIMATION_FLAG); + fragments_present = !!(flags & FRAGMENTS_FLAG); + if (!found_riff && found_vp8x) { + // Note: This restriction may be removed in the future, if it becomes + // necessary to send VP8X chunk to the decoder. + return VP8_STATUS_BITSTREAM_ERROR; + } + if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG); + if (has_animation != NULL) *has_animation = animation_present; + if (format != NULL) *format = 0; // default = undefined + + image_width = canvas_width; + image_height = canvas_height; + if (found_vp8x && (animation_present || fragments_present) && + headers == NULL) { + status = VP8_STATUS_OK; + goto ReturnWidthHeight; // Just return features from VP8X header. + } + } + + if (data_size < TAG_SIZE) { + status = VP8_STATUS_NOT_ENOUGH_DATA; + goto ReturnWidthHeight; + } + + // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH". + if ((found_riff && found_vp8x) || + (!found_riff && !found_vp8x && !memcmp(data, "ALPH", TAG_SIZE))) { + status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size, + &hdrs.alpha_data, &hdrs.alpha_data_size); + if (status != VP8_STATUS_OK) { + goto ReturnWidthHeight; // Invalid chunk size / insufficient data. + } + } + + // Skip over VP8/VP8L header. + status = ParseVP8Header(&data, &data_size, have_all_data, hdrs.riff_size, + &hdrs.compressed_size, &hdrs.is_lossless); + if (status != VP8_STATUS_OK) { + goto ReturnWidthHeight; // Wrong VP8/VP8L chunk-header / insufficient data. + } + if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) { + return VP8_STATUS_BITSTREAM_ERROR; + } + + if (format != NULL && !(animation_present || fragments_present)) { + *format = hdrs.is_lossless ? 2 : 1; + } + + if (!hdrs.is_lossless) { + if (data_size < VP8_FRAME_HEADER_SIZE) { + status = VP8_STATUS_NOT_ENOUGH_DATA; + goto ReturnWidthHeight; + } + // Validates raw VP8 data. + if (!VP8GetInfo(data, data_size, (uint32_t)hdrs.compressed_size, + &image_width, &image_height)) { + return VP8_STATUS_BITSTREAM_ERROR; + } + } else { + if (data_size < VP8L_FRAME_HEADER_SIZE) { + status = VP8_STATUS_NOT_ENOUGH_DATA; + goto ReturnWidthHeight; + } + // Validates raw VP8L data. + if (!VP8LGetInfo(data, data_size, &image_width, &image_height, has_alpha)) { + return VP8_STATUS_BITSTREAM_ERROR; + } + } + // Validates image size coherency. + if (found_vp8x) { + if (canvas_width != image_width || canvas_height != image_height) { + return VP8_STATUS_BITSTREAM_ERROR; + } + } + if (headers != NULL) { + *headers = hdrs; + headers->offset = data - headers->data; + assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD); + assert(headers->offset == headers->data_size - data_size); + } + ReturnWidthHeight: + if (status == VP8_STATUS_OK || + (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) { + if (has_alpha != NULL) { + // If the data did not contain a VP8X/VP8L chunk the only definitive way + // to set this is by looking for alpha data (from an ALPH chunk). + *has_alpha |= (hdrs.alpha_data != NULL); + } + if (width != NULL) *width = image_width; + if (height != NULL) *height = image_height; + return VP8_STATUS_OK; + } else { + return status; + } +} + +// Skips over all valid chunks prior to the first VP8/VP8L frame header. +// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk), +// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE +// in the case of non-decodable features (animation for instance). +// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless +// fields are updated appropriately upon success. +static VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) { + VP8StatusCode status; + int has_animation = 0; + assert(headers != NULL); + // fill out headers, ignore width/height/has_alpha. + status = ParseHeadersInternal(headers->data, headers->data_size, + NULL, NULL, NULL, &has_animation, + NULL, headers); + if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) { + // TODO(jzern): full support of animation frames will require API additions. + if (has_animation) { + status = VP8_STATUS_UNSUPPORTED_FEATURE; + } + } + return status; +} + +//------------------------------------------------------------------------------ +// "Into" decoding variants + +// Main flow +static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size, + WebPDecParams* const params) { + VP8StatusCode status; + VP8Io io; + WebPHeaderStructure headers; + + headers.data = data; + headers.data_size = data_size; + headers.have_all_data = 1; + status = WebPParseHeaders(&headers); // Process Pre-VP8 chunks. + if (status != VP8_STATUS_OK) { + return status; + } + + assert(params != NULL); + VP8InitIo(&io); + io.data = headers.data + headers.offset; + io.data_size = headers.data_size - headers.offset; + WebPInitCustomIo(params, &io); // Plug the I/O functions. + + if (!headers.is_lossless) { + VP8Decoder* const dec = VP8New(); + if (dec == NULL) { + return VP8_STATUS_OUT_OF_MEMORY; + } + dec->alpha_data_ = headers.alpha_data; + dec->alpha_data_size_ = headers.alpha_data_size; + + // Decode bitstream header, update io->width/io->height. + if (!VP8GetHeaders(dec, &io)) { + status = dec->status_; // An error occurred. Grab error status. + } else { + // Allocate/check output buffers. + status = WebPAllocateDecBuffer(io.width, io.height, params->options, + params->output); + if (status == VP8_STATUS_OK) { // Decode + VP8InitDithering(params->options, dec); + if (!VP8Decode(dec, &io)) { + status = dec->status_; + } + } + } + VP8Delete(dec); + } else { + VP8LDecoder* const dec = VP8LNew(); + if (dec == NULL) { + return VP8_STATUS_OUT_OF_MEMORY; + } + if (!VP8LDecodeHeader(dec, &io)) { + status = dec->status_; // An error occurred. Grab error status. + } else { + // Allocate/check output buffers. + status = WebPAllocateDecBuffer(io.width, io.height, params->options, + params->output); + if (status == VP8_STATUS_OK) { // Decode + if (!VP8LDecodeImage(dec)) { + status = dec->status_; + } + } + } + VP8LDelete(dec); + } + + if (status != VP8_STATUS_OK) { + WebPFreeDecBuffer(params->output); + } + + if (params->options != NULL && params->options->flip) { + status = WebPFlipBuffer(params->output); + } + return status; +} + +//------------------------------------------------------------------------------ + +static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* const data, + size_t data_size, int* const width, int* const height, + WebPDecBuffer* const keep_info) { + WebPDecParams params; + WebPDecBuffer output; + + WebPInitDecBuffer(&output); + memset(¶ms, 0, sizeof(params)); + params.output = &output; + output.colorspace = mode; + + // Retrieve (and report back) the required dimensions from bitstream. + if (!WebPGetInfo(data, data_size, &output.width, &output.height)) { + return NULL; + } + if (width != NULL) *width = output.width; + if (height != NULL) *height = output.height; + + // Decode + if (DecodeInto(data, data_size, ¶ms) != VP8_STATUS_OK) { + return NULL; + } + if (keep_info != NULL) { // keep track of the side-info + WebPCopyDecBuffer(&output, keep_info); + } + // return decoded samples (don't clear 'output'!) + return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y; +} + + +static void DefaultFeatures(WebPBitstreamFeatures* const features) { + assert(features != NULL); + memset(features, 0, sizeof(*features)); +} + +static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size, + WebPBitstreamFeatures* const features) { + if (features == NULL || data == NULL) { + return VP8_STATUS_INVALID_PARAM; + } + DefaultFeatures(features); + + // Only parse enough of the data to retrieve the features. + return ParseHeadersInternal(data, data_size, + &features->width, &features->height, + &features->has_alpha, &features->has_animation, + &features->format, NULL); +} + +//------------------------------------------------------------------------------ +// Cropping and rescaling. + +int WebPIoInitFromOptions(const WebPDecoderOptions* const options, + VP8Io* const io, WEBP_CSP_MODE src_colorspace) { + const int W = io->width; + const int H = io->height; + int x = 0, y = 0, w = W, h = H; + + // Cropping + io->use_cropping = (options != NULL) && (options->use_cropping > 0); + if (io->use_cropping) { + w = options->crop_width; + h = options->crop_height; + x = options->crop_left; + y = options->crop_top; + if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420 + x &= ~1; + y &= ~1; + } + if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) { + return 0; // out of frame boundary error + } + } + io->crop_left = x; + io->crop_top = y; + io->crop_right = x + w; + io->crop_bottom = y + h; + io->mb_w = w; + io->mb_h = h; + + // Scaling + io->use_scaling = (options != NULL) && (options->use_scaling > 0); + if (io->use_scaling) { + if (options->scaled_width <= 0 || options->scaled_height <= 0) { + return 0; + } + io->scaled_width = options->scaled_width; + io->scaled_height = options->scaled_height; + } + + // Filter + io->bypass_filtering = options && options->bypass_filtering; + + // Fancy upsampler +#ifdef FANCY_UPSAMPLING + io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling); +#endif + + if (io->use_scaling) { + // disable filter (only for large downscaling ratio). + io->bypass_filtering = (io->scaled_width < W * 3 / 4) && + (io->scaled_height < H * 3 / 4); + io->fancy_upsampling = 0; + } + return 1; +} + +//------------------------------------------------------------------------------ + + +/************************************************************************/ +/* External Class Implementation */ +/************************************************************************/ + + +uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size, + int* width, int* height) { + return Decode(MODE_BGRA, data, data_size, width, height, NULL); +} + + +int WebPGetInfo(const uint8_t* data, size_t data_size, + int* width, int* height) { + WebPBitstreamFeatures features; + + if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) { + return 0; + } + + if (width != NULL) { + *width = features.width; + } + if (height != NULL) { + *height = features.height; + } + + return 1; +} diff --git a/src/loaders/webp/dec/webpi.h b/src/loaders/webp/dec/webpi.h new file mode 100644 index 00000000..aed81cb5 --- /dev/null +++ b/src/loaders/webp/dec/webpi.h @@ -0,0 +1,108 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Internal header: WebP decoding parameters and custom IO on buffer +// +// Author: somnath@google.com (Somnath Banerjee) + +#ifndef WEBP_DEC_WEBPI_H_ +#define WEBP_DEC_WEBPI_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "../utils/rescaler.h" +#include "./decode_vp8.h" + +//------------------------------------------------------------------------------ +// WebPDecParams: Decoding output parameters. Transient internal object. + +typedef struct WebPDecParams WebPDecParams; +typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p); +typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos); + +struct WebPDecParams { + WebPDecBuffer* output; // output buffer. + uint8_t* tmp_y, *tmp_u, *tmp_v; // cache for the fancy upsampler + // or used for tmp rescaling + + int last_y; // coordinate of the line that was last output + const WebPDecoderOptions* options; // if not NULL, use alt decoding features + // rescalers + WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a; + void* memory; // overall scratch memory for the output work. + + OutputFunc emit; // output RGB or YUV samples + OutputFunc emit_alpha; // output alpha channel + OutputRowFunc emit_alpha_row; // output one line of rescaled alpha values +}; + + +//------------------------------------------------------------------------------ +// Header parsing helpers + +// Structure storing a description of the RIFF headers. +typedef struct { + const uint8_t* data; // input buffer + size_t data_size; // input buffer size + int have_all_data; // true if all data is known to be available + size_t offset; // offset to main data chunk (VP8 or VP8L) + const uint8_t* alpha_data; // points to alpha chunk (if present) + size_t alpha_data_size; // alpha chunk size + size_t compressed_size; // VP8/VP8L compressed data size + size_t riff_size; // size of the riff payload (or 0 if absent) + int is_lossless; // true if a VP8L chunk is present +} WebPHeaderStructure; + + +//------------------------------------------------------------------------------ +// Misc utils + +// Initializes VP8Io with custom setup, io and teardown functions. The default +// hooks will use the supplied 'params' as io->opaque handle. +void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io); + +// Setup crop_xxx fields, mb_w and mb_h in io. 'src_colorspace' refers +// to the *compressed* format, not the output one. +int WebPIoInitFromOptions(const WebPDecoderOptions* const options, + VP8Io* const io, WEBP_CSP_MODE src_colorspace); + +//------------------------------------------------------------------------------ +// Internal functions regarding WebPDecBuffer memory (in buffer.c). +// Don't really need to be externally visible for now. + +// Prepare 'buffer' with the requested initial dimensions width/height. +// If no external storage is supplied, initializes buffer by allocating output +// memory and setting up the stride information. Validate the parameters. Return +// an error code in case of problem (no memory, or invalid stride / size / +// dimension / etc.). If *options is not NULL, also verify that the options' +// parameters are valid and apply them to the width/height dimensions of the +// output buffer. This takes cropping / scaling / rotation into account. +// Also incorporates the options->flip flag to flip the buffer parameters if +// needed. +VP8StatusCode WebPAllocateDecBuffer(int width, int height, + const WebPDecoderOptions* const options, + WebPDecBuffer* const buffer); + +// Flip buffer vertically by negating the various strides. +VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer); + +// Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the +// memory (still held by 'src'). +void WebPCopyDecBuffer(const WebPDecBuffer* const src, + WebPDecBuffer* const dst); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_DEC_WEBPI_H_ */ diff --git a/src/loaders/webp/dsp/alpha_processing.cpp b/src/loaders/webp/dsp/alpha_processing.cpp new file mode 100644 index 00000000..bef13f41 --- /dev/null +++ b/src/loaders/webp/dsp/alpha_processing.cpp @@ -0,0 +1,377 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Utilities for processing transparent channel. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include +#include "./dsp.h" + +// Tables can be faster on some platform but incur some extra binary size (~2k). +// #define USE_TABLES_FOR_ALPHA_MULT + +// ----------------------------------------------------------------------------- + +#define MFIX 24 // 24bit fixed-point arithmetic +#define HALF ((1u << MFIX) >> 1) +#define KINV_255 ((1u << MFIX) / 255u) + +static uint32_t Mult(uint8_t x, uint32_t mult) { + const uint32_t v = (x * mult + HALF) >> MFIX; + assert(v <= 255); // <- 24bit precision is enough to ensure that. + return v; +} + +#ifdef USE_TABLES_FOR_ALPHA_MULT + +static const uint32_t kMultTables[2][256] = { + { // (255u << MFIX) / alpha + 0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000, + 0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2, + 0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000, + 0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8, + 0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3, + 0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492, + 0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3, + 0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8, + 0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7, + 0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0, + 0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4, + 0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6, + 0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace, + 0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a, + 0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf, + 0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b, + 0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d, + 0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec, + 0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9, + 0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249, + 0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70, + 0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213, + 0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10, + 0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5, + 0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed, + 0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a, + 0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741, + 0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0, + 0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e, + 0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157, + 0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67, + 0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4, + 0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc, + 0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b, + 0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830, + 0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be, + 0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276, + 0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc, + 0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2, + 0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358, + 0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0, + 0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465, + 0x01030c30, 0x01020612, 0x01010204, 0x01000000 }, + { // alpha * KINV_255 + 0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505, + 0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b, + 0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111, + 0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717, + 0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d, + 0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323, + 0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929, + 0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f, + 0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535, + 0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b, + 0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141, + 0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747, + 0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d, + 0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353, + 0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959, + 0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f, + 0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565, + 0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b, + 0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171, + 0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777, + 0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d, + 0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383, + 0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989, + 0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f, + 0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595, + 0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b, + 0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1, + 0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7, + 0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad, + 0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3, + 0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9, + 0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf, + 0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5, + 0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb, + 0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1, + 0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7, + 0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd, + 0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3, + 0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9, + 0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef, + 0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5, + 0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb, + 0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff } +}; + +static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) { + return kMultTables[!inverse][a]; +} + +#else + +static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) { + return inverse ? (255u << MFIX) / a : a * KINV_255; +} + +#endif // USE_TABLES_FOR_ALPHA_MULT + +void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) { + int x; + for (x = 0; x < width; ++x) { + const uint32_t argb = ptr[x]; + if (argb < 0xff000000u) { // alpha < 255 + if (argb <= 0x00ffffffu) { // alpha == 0 + ptr[x] = 0; + } else { + const uint32_t alpha = (argb >> 24) & 0xff; + const uint32_t scale = GetScale(alpha, inverse); + uint32_t out = argb & 0xff000000u; + out |= Mult(argb >> 0, scale) << 0; + out |= Mult(argb >> 8, scale) << 8; + out |= Mult(argb >> 16, scale) << 16; + ptr[x] = out; + } + } + } +} + +void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha, + int width, int inverse) { + int x; + for (x = 0; x < width; ++x) { + const uint32_t a = alpha[x]; + if (a != 255) { + if (a == 0) { + ptr[x] = 0; + } else { + const uint32_t scale = GetScale(a, inverse); + ptr[x] = Mult(ptr[x], scale); + } + } + } +} + +#undef KINV_255 +#undef HALF +#undef MFIX + +void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse); +void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, + int width, int inverse); + +//------------------------------------------------------------------------------ +// Generic per-plane calls + +void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, + int inverse) { + int n; + for (n = 0; n < num_rows; ++n) { + WebPMultARGBRow((uint32_t*)ptr, width, inverse); + ptr += stride; + } +} + +void WebPMultRows(uint8_t* ptr, int stride, + const uint8_t* alpha, int alpha_stride, + int width, int num_rows, int inverse) { + int n; + for (n = 0; n < num_rows; ++n) { + WebPMultRow(ptr, alpha, width, inverse); + ptr += stride; + alpha += alpha_stride; + } +} + +//------------------------------------------------------------------------------ +// Premultiplied modes + +// non dithered-modes + +// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.) +// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5), +// one can use instead: (x * a * 65793 + (1 << 23)) >> 24 +#if 1 // (int)(x * a / 255.) +#define MULTIPLIER(a) ((a) * 32897U) +#define PREMULTIPLY(x, m) (((x) * (m)) >> 23) +#else // (int)(x * a / 255. + .5) +#define MULTIPLIER(a) ((a) * 65793U) +#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24) +#endif + +static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first, + int w, int h, int stride) { + while (h-- > 0) { + uint8_t* const rgb = rgba + (alpha_first ? 1 : 0); + const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3); + int i; + for (i = 0; i < w; ++i) { + const uint32_t a = alpha[4 * i]; + if (a != 0xff) { + const uint32_t mult = MULTIPLIER(a); + rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult); + rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult); + rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult); + } + } + rgba += stride; + } +} +#undef MULTIPLIER +#undef PREMULTIPLY + +// rgbA4444 + +#define MULTIPLIER(a) ((a) * 0x1111) // 0x1111 ~= (1 << 16) / 15 + +static WEBP_INLINE uint8_t dither_hi(uint8_t x) { + return (x & 0xf0) | (x >> 4); +} + +static WEBP_INLINE uint8_t dither_lo(uint8_t x) { + return (x & 0x0f) | (x << 4); +} + +static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) { + return (x * m) >> 16; +} + +static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444, + int w, int h, int stride, + int rg_byte_pos /* 0 or 1 */) { + while (h-- > 0) { + int i; + for (i = 0; i < w; ++i) { + const uint32_t rg = rgba4444[2 * i + rg_byte_pos]; + const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)]; + const uint8_t a = ba & 0x0f; + const uint32_t mult = MULTIPLIER(a); + const uint8_t r = multiply(dither_hi(rg), mult); + const uint8_t g = multiply(dither_lo(rg), mult); + const uint8_t b = multiply(dither_hi(ba), mult); + rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f); + rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a; + } + rgba4444 += stride; + } +} +#undef MULTIPLIER + +static void ApplyAlphaMultiply_16b(uint8_t* rgba4444, + int w, int h, int stride) { +#ifdef WEBP_SWAP_16BIT_CSP + ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1); +#else + ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0); +#endif +} + +static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, + int width, int height, + uint8_t* dst, int dst_stride) { + uint32_t alpha_mask = 0xff; + int i, j; + + for (j = 0; j < height; ++j) { + for (i = 0; i < width; ++i) { + const uint32_t alpha_value = alpha[i]; + dst[4 * i] = alpha_value; + alpha_mask &= alpha_value; + } + alpha += alpha_stride; + dst += dst_stride; + } + + return (alpha_mask != 0xff); +} + +static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride, + int width, int height, + uint32_t* dst, int dst_stride) { + int i, j; + for (j = 0; j < height; ++j) { + for (i = 0; i < width; ++i) { + dst[i] = alpha[i] << 8; // leave A/R/B channels zero'd. + } + alpha += alpha_stride; + dst += dst_stride; + } +} + +static int ExtractAlpha(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride) { + uint8_t alpha_mask = 0xff; + int i, j; + + for (j = 0; j < height; ++j) { + for (i = 0; i < width; ++i) { + const uint8_t alpha_value = argb[4 * i]; + alpha[i] = alpha_value; + alpha_mask &= alpha_value; + } + argb += argb_stride; + alpha += alpha_stride; + } + return (alpha_mask == 0xff); +} + +void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); +void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); +int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); +void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int); +int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); + +//------------------------------------------------------------------------------ +// Init function + +extern void WebPInitAlphaProcessingMIPSdspR2(void); +extern void WebPInitAlphaProcessingSSE2(void); + +static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used = + (VP8CPUInfo)&alpha_processing_last_cpuinfo_used; + +WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) { + if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return; + + WebPMultARGBRow = WebPMultARGBRowC; + WebPMultRow = WebPMultRowC; + WebPApplyAlphaMultiply = ApplyAlphaMultiply; + WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b; + WebPDispatchAlpha = DispatchAlpha; + WebPDispatchAlphaToGreen = DispatchAlphaToGreen; + WebPExtractAlpha = ExtractAlpha; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + WebPInitAlphaProcessingSSE2(); + } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + WebPInitAlphaProcessingMIPSdspR2(); + } +#endif + } + alpha_processing_last_cpuinfo_used = VP8GetCPUInfo; +} diff --git a/src/loaders/webp/dsp/argb.cpp b/src/loaders/webp/dsp/argb.cpp new file mode 100644 index 00000000..cc1f9a96 --- /dev/null +++ b/src/loaders/webp/dsp/argb.cpp @@ -0,0 +1,68 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// ARGB making functions. +// +// Author: Djordje Pesut (djordje.pesut@imgtec.com) + +#include "./dsp.h" + +static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { + return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b); +} + +static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g, + const uint8_t* b, int len, uint32_t* out) { + int i; + for (i = 0; i < len; ++i) { + out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]); + } +} + +static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b, + int len, int step, uint32_t* out) { + int i, offset = 0; + for (i = 0; i < len; ++i) { + out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]); + offset += step; + } +} + +void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*, + const uint8_t*, int, uint32_t*); +void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*, + int, int, uint32_t*); + +extern void VP8EncDspARGBInitMIPSdspR2(void); +extern void VP8EncDspARGBInitSSE2(void); + +static volatile VP8CPUInfo argb_last_cpuinfo_used = + (VP8CPUInfo)&argb_last_cpuinfo_used; + +WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) { + if (argb_last_cpuinfo_used == VP8GetCPUInfo) return; + + VP8PackARGB = PackARGB; + VP8PackRGB = PackRGB; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + VP8EncDspARGBInitSSE2(); + } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + VP8EncDspARGBInitMIPSdspR2(); + } +#endif + } + argb_last_cpuinfo_used = VP8GetCPUInfo; +} diff --git a/src/loaders/webp/dsp/cpu.cpp b/src/loaders/webp/dsp/cpu.cpp new file mode 100644 index 00000000..202d3121 --- /dev/null +++ b/src/loaders/webp/dsp/cpu.cpp @@ -0,0 +1,120 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// CPU detection +// +// Author: Christian Duvivier (cduvivier@google.com) + +#include "./dsp.h" + + +//------------------------------------------------------------------------------ +// SSE2 detection. +// + +// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC. +#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__) +static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { + __asm__ volatile ( + "mov %%ebx, %%edi\n" + "cpuid\n" + "xchg %%edi, %%ebx\n" + : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(info_type), "c"(0)); +} +#elif defined(__i386__) || defined(__x86_64__) +static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { + __asm__ volatile ( + "cpuid\n" + : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(info_type), "c"(0)); +} +#elif (defined(_M_X64) || defined(_M_IX86)) && \ + defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 +#include +#define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 +#elif defined(WEBP_MSC_SSE2) +#define GetCPUInfo __cpuid +#endif + +// NaCl has no support for xgetbv or the raw opcode. +#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) +static WEBP_INLINE uint64_t xgetbv(void) { + const uint32_t ecx = 0; + uint32_t eax, edx; + // Use the raw opcode for xgetbv for compatibility with older toolchains. + __asm__ volatile ( + ".byte 0x0f, 0x01, 0xd0\n" + : "=a"(eax), "=d"(edx) : "c" (ecx)); + return ((uint64_t)edx << 32) | eax; +} +#elif (defined(_M_X64) || defined(_M_IX86)) && \ + defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 +#include +#define xgetbv() _xgetbv(0) +#elif defined(_MSC_VER) && defined(_M_IX86) +static WEBP_INLINE uint64_t xgetbv(void) { + uint32_t eax_, edx_; + __asm { + xor ecx, ecx // ecx = 0 + // Use the raw opcode for xgetbv for compatibility with older toolchains. + __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 + mov eax_, eax + mov edx_, edx + } + return ((uint64_t)edx_ << 32) | eax_; +} +#else +#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. +#endif + +#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) +static int x86CPUInfo(CPUFeature feature) { + int cpu_info[4]; + GetCPUInfo(cpu_info, 1); + if (feature == kSSE2) { + return 0 != (cpu_info[3] & 0x04000000); + } + + return 0; +} +VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; +#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. +static int AndroidCPUInfo(CPUFeature feature) { + const AndroidCpuFamily cpu_family = android_getCpuFamily(); + const uint64_t cpu_features = android_getCpuFeatures(); + if (feature == kNEON) { + return (cpu_family == ANDROID_CPU_FAMILY_ARM && + 0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)); + } + return 0; +} +VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; +#elif defined(WEBP_USE_NEON) +// define a dummy function to enable turning off NEON at runtime by setting +// VP8DecGetCPUInfo = NULL +static int armCPUInfo(CPUFeature feature) { + (void)feature; + return 1; +} +VP8CPUInfo VP8GetCPUInfo = armCPUInfo; +#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) +static int mipsCPUInfo(CPUFeature feature) { + if ((feature == kMIPS32) || (feature == kMIPSdspR2)) { + return 1; + } else { + return 0; + } + +} +VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; +#else +VP8CPUInfo VP8GetCPUInfo = NULL; +#endif + diff --git a/src/loaders/webp/dsp/dec.cpp b/src/loaders/webp/dsp/dec.cpp new file mode 100644 index 00000000..cefbdba0 --- /dev/null +++ b/src/loaders/webp/dsp/dec.cpp @@ -0,0 +1,766 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Speed-critical decoding functions, default plain-C implementations. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./dsp.h" +#include "../dec/vp8i.h" + +//------------------------------------------------------------------------------ + +static WEBP_INLINE uint8_t clip_8b(int v) { + return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; +} + +//------------------------------------------------------------------------------ +// Transforms (Paragraph 14.4) + +#define STORE(x, y, v) \ + dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) + +#define STORE2(y, dc, d, c) do { \ + const int DC = (dc); \ + STORE(0, y, DC + (d)); \ + STORE(1, y, DC + (c)); \ + STORE(2, y, DC - (c)); \ + STORE(3, y, DC - (d)); \ +} while (0) + +static const int kC1 = 20091 + (1 << 16); +static const int kC2 = 35468; +#define MUL(a, b) (((a) * (b)) >> 16) + +static void TransformOne(const int16_t* in, uint8_t* dst) { + int C[4 * 4], *tmp; + int i; + tmp = C; + for (i = 0; i < 4; ++i) { // vertical pass + const int a = in[0] + in[8]; // [-4096, 4094] + const int b = in[0] - in[8]; // [-4095, 4095] + const int c = MUL(in[4], kC2) - MUL(in[12], kC1); // [-3783, 3783] + const int d = MUL(in[4], kC1) + MUL(in[12], kC2); // [-3785, 3781] + tmp[0] = a + d; // [-7881, 7875] + tmp[1] = b + c; // [-7878, 7878] + tmp[2] = b - c; // [-7878, 7878] + tmp[3] = a - d; // [-7877, 7879] + tmp += 4; + in++; + } + // Each pass is expanding the dynamic range by ~3.85 (upper bound). + // The exact value is (2. + (kC1 + kC2) / 65536). + // After the second pass, maximum interval is [-3794, 3794], assuming + // an input in [-2048, 2047] interval. We then need to add a dst value + // in the [0, 255] range. + // In the worst case scenario, the input to clip_8b() can be as large as + // [-60713, 60968]. + tmp = C; + for (i = 0; i < 4; ++i) { // horizontal pass + const int dc = tmp[0] + 4; + const int a = dc + tmp[8]; + const int b = dc - tmp[8]; + const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); + const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); + STORE(0, 0, a + d); + STORE(1, 0, b + c); + STORE(2, 0, b - c); + STORE(3, 0, a - d); + tmp++; + dst += BPS; + } +} + +// Simplified transform when only in[0], in[1] and in[4] are non-zero +static void TransformAC3(const int16_t* in, uint8_t* dst) { + const int a = in[0] + 4; + const int c4 = MUL(in[4], kC2); + const int d4 = MUL(in[4], kC1); + const int c1 = MUL(in[1], kC2); + const int d1 = MUL(in[1], kC1); + STORE2(0, a + d4, d1, c1); + STORE2(1, a + c4, d1, c1); + STORE2(2, a - c4, d1, c1); + STORE2(3, a - d4, d1, c1); +} +#undef MUL +#undef STORE2 + +static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { + TransformOne(in, dst); + if (do_two) { + TransformOne(in + 16, dst + 4); + } +} + +static void TransformUV(const int16_t* in, uint8_t* dst) { + VP8Transform(in + 0 * 16, dst, 1); + VP8Transform(in + 2 * 16, dst + 4 * BPS, 1); +} + +static void TransformDC(const int16_t* in, uint8_t* dst) { + const int DC = in[0] + 4; + int i, j; + for (j = 0; j < 4; ++j) { + for (i = 0; i < 4; ++i) { + STORE(i, j, DC); + } + } +} + +static void TransformDCUV(const int16_t* in, uint8_t* dst) { + if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst); + if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4); + if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS); + if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4); +} + +#undef STORE + +//------------------------------------------------------------------------------ +// Paragraph 14.3 + +static void TransformWHT(const int16_t* in, int16_t* out) { + int tmp[16]; + int i; + for (i = 0; i < 4; ++i) { + const int a0 = in[0 + i] + in[12 + i]; + const int a1 = in[4 + i] + in[ 8 + i]; + const int a2 = in[4 + i] - in[ 8 + i]; + const int a3 = in[0 + i] - in[12 + i]; + tmp[0 + i] = a0 + a1; + tmp[8 + i] = a0 - a1; + tmp[4 + i] = a3 + a2; + tmp[12 + i] = a3 - a2; + } + for (i = 0; i < 4; ++i) { + const int dc = tmp[0 + i * 4] + 3; // w/ rounder + const int a0 = dc + tmp[3 + i * 4]; + const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; + const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; + const int a3 = dc - tmp[3 + i * 4]; + out[ 0] = (a0 + a1) >> 3; + out[16] = (a3 + a2) >> 3; + out[32] = (a0 - a1) >> 3; + out[48] = (a3 - a2) >> 3; + out += 64; + } +} + +void (*VP8TransformWHT)(const int16_t* in, int16_t* out); + +//------------------------------------------------------------------------------ +// Intra predictions + +#define DST(x, y) dst[(x) + (y) * BPS] + +static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) { + const uint8_t* top = dst - BPS; + const uint8_t* const clip0 = VP8kclip1 - top[-1]; + int y; + for (y = 0; y < size; ++y) { + const uint8_t* const clip = clip0 + dst[-1]; + int x; + for (x = 0; x < size; ++x) { + dst[x] = clip[top[x]]; + } + dst += BPS; + } +} +static void TM4(uint8_t* dst) { TrueMotion(dst, 4); } +static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); } +static void TM16(uint8_t* dst) { TrueMotion(dst, 16); } + +//------------------------------------------------------------------------------ +// 16x16 + +static void VE16(uint8_t* dst) { // vertical + int j; + for (j = 0; j < 16; ++j) { + memcpy(dst + j * BPS, dst - BPS, 16); + } +} + +static void HE16(uint8_t* dst) { // horizontal + int j; + for (j = 16; j > 0; --j) { + memset(dst, dst[-1], 16); + dst += BPS; + } +} + +static WEBP_INLINE void Put16(int v, uint8_t* dst) { + int j; + for (j = 0; j < 16; ++j) { + memset(dst + j * BPS, v, 16); + } +} + +static void DC16(uint8_t* dst) { // DC + int DC = 16; + int j; + for (j = 0; j < 16; ++j) { + DC += dst[-1 + j * BPS] + dst[j - BPS]; + } + Put16(DC >> 5, dst); +} + +static void DC16NoTop(uint8_t* dst) { // DC with top samples not available + int DC = 8; + int j; + for (j = 0; j < 16; ++j) { + DC += dst[-1 + j * BPS]; + } + Put16(DC >> 4, dst); +} + +static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available + int DC = 8; + int i; + for (i = 0; i < 16; ++i) { + DC += dst[i - BPS]; + } + Put16(DC >> 4, dst); +} + +static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples + Put16(0x80, dst); +} + +VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES]; + +//------------------------------------------------------------------------------ +// 4x4 + +#define AVG3(a, b, c) ((((uint32_t)a) + 2 * (b) + (c) + 2) >> 2) +#define AVG2(a, b) (((a) + (b) + 1) >> 1) + +static void VE4(uint8_t* dst) { // vertical + const uint8_t* top = dst - BPS; + const uint8_t vals[4] = { + (uint8_t)AVG3(top[-1], top[0], top[1]), + (uint8_t)AVG3(top[ 0], top[1], top[2]), + (uint8_t)AVG3(top[ 1], top[2], top[3]), + (uint8_t)AVG3(top[ 2], top[3], top[4]) + }; + int i; + for (i = 0; i < 4; ++i) { + memcpy(dst + i * BPS, vals, sizeof(vals)); + } +} + +static void HE4(uint8_t* dst) { // horizontal + const int A = dst[-1 - BPS]; + const int B = dst[-1]; + const int C = dst[-1 + BPS]; + const int D = dst[-1 + 2 * BPS]; + const int E = dst[-1 + 3 * BPS]; + *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(A, B, C); + *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(B, C, D); + *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(C, D, E); + *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(D, E, E); +} + +static void DC4(uint8_t* dst) { // DC + uint32_t dc = 4; + int i; + for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS]; + dc >>= 3; + for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4); +} + +static void RD4(uint8_t* dst) { // Down-right + const int I = dst[-1 + 0 * BPS]; + const int J = dst[-1 + 1 * BPS]; + const int K = dst[-1 + 2 * BPS]; + const int L = dst[-1 + 3 * BPS]; + const int X = dst[-1 - BPS]; + const int A = dst[0 - BPS]; + const int B = dst[1 - BPS]; + const int C = dst[2 - BPS]; + const int D = dst[3 - BPS]; + DST(0, 3) = AVG3(J, K, L); + DST(1, 3) = DST(0, 2) = AVG3(I, J, K); + DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); + DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); + DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); + DST(3, 1) = DST(2, 0) = AVG3(C, B, A); + DST(3, 0) = AVG3(D, C, B); +} + +static void LD4(uint8_t* dst) { // Down-Left + const int A = dst[0 - BPS]; + const int B = dst[1 - BPS]; + const int C = dst[2 - BPS]; + const int D = dst[3 - BPS]; + const int E = dst[4 - BPS]; + const int F = dst[5 - BPS]; + const int G = dst[6 - BPS]; + const int H = dst[7 - BPS]; + DST(0, 0) = AVG3(A, B, C); + DST(1, 0) = DST(0, 1) = AVG3(B, C, D); + DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); + DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); + DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); + DST(3, 2) = DST(2, 3) = AVG3(F, G, H); + DST(3, 3) = AVG3(G, H, H); +} + +static void VR4(uint8_t* dst) { // Vertical-Right + const int I = dst[-1 + 0 * BPS]; + const int J = dst[-1 + 1 * BPS]; + const int K = dst[-1 + 2 * BPS]; + const int X = dst[-1 - BPS]; + const int A = dst[0 - BPS]; + const int B = dst[1 - BPS]; + const int C = dst[2 - BPS]; + const int D = dst[3 - BPS]; + DST(0, 0) = DST(1, 2) = AVG2(X, A); + DST(1, 0) = DST(2, 2) = AVG2(A, B); + DST(2, 0) = DST(3, 2) = AVG2(B, C); + DST(3, 0) = AVG2(C, D); + + DST(0, 3) = AVG3(K, J, I); + DST(0, 2) = AVG3(J, I, X); + DST(0, 1) = DST(1, 3) = AVG3(I, X, A); + DST(1, 1) = DST(2, 3) = AVG3(X, A, B); + DST(2, 1) = DST(3, 3) = AVG3(A, B, C); + DST(3, 1) = AVG3(B, C, D); +} + +static void VL4(uint8_t* dst) { // Vertical-Left + const int A = dst[0 - BPS]; + const int B = dst[1 - BPS]; + const int C = dst[2 - BPS]; + const int D = dst[3 - BPS]; + const int E = dst[4 - BPS]; + const int F = dst[5 - BPS]; + const int G = dst[6 - BPS]; + const int H = dst[7 - BPS]; + DST(0, 0) = AVG2(A, B); + DST(1, 0) = DST(0, 2) = AVG2(B, C); + DST(2, 0) = DST(1, 2) = AVG2(C, D); + DST(3, 0) = DST(2, 2) = AVG2(D, E); + + DST(0, 1) = AVG3(A, B, C); + DST(1, 1) = DST(0, 3) = AVG3(B, C, D); + DST(2, 1) = DST(1, 3) = AVG3(C, D, E); + DST(3, 1) = DST(2, 3) = AVG3(D, E, F); + DST(3, 2) = AVG3(E, F, G); + DST(3, 3) = AVG3(F, G, H); +} + +static void HU4(uint8_t* dst) { // Horizontal-Up + const int I = dst[-1 + 0 * BPS]; + const int J = dst[-1 + 1 * BPS]; + const int K = dst[-1 + 2 * BPS]; + const int L = dst[-1 + 3 * BPS]; + DST(0, 0) = AVG2(I, J); + DST(2, 0) = DST(0, 1) = AVG2(J, K); + DST(2, 1) = DST(0, 2) = AVG2(K, L); + DST(1, 0) = AVG3(I, J, K); + DST(3, 0) = DST(1, 1) = AVG3(J, K, L); + DST(3, 1) = DST(1, 2) = AVG3(K, L, L); + DST(3, 2) = DST(2, 2) = + DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; +} + +static void HD4(uint8_t* dst) { // Horizontal-Down + const int I = dst[-1 + 0 * BPS]; + const int J = dst[-1 + 1 * BPS]; + const int K = dst[-1 + 2 * BPS]; + const int L = dst[-1 + 3 * BPS]; + const int X = dst[-1 - BPS]; + const int A = dst[0 - BPS]; + const int B = dst[1 - BPS]; + const int C = dst[2 - BPS]; + + DST(0, 0) = DST(2, 1) = AVG2(I, X); + DST(0, 1) = DST(2, 2) = AVG2(J, I); + DST(0, 2) = DST(2, 3) = AVG2(K, J); + DST(0, 3) = AVG2(L, K); + + DST(3, 0) = AVG3(A, B, C); + DST(2, 0) = AVG3(X, A, B); + DST(1, 0) = DST(3, 1) = AVG3(I, X, A); + DST(1, 1) = DST(3, 2) = AVG3(J, I, X); + DST(1, 2) = DST(3, 3) = AVG3(K, J, I); + DST(1, 3) = AVG3(L, K, J); +} + +#undef DST +#undef AVG3 +#undef AVG2 + +VP8PredFunc VP8PredLuma4[NUM_BMODES]; + +//------------------------------------------------------------------------------ +// Chroma + +static void VE8uv(uint8_t* dst) { // vertical + int j; + for (j = 0; j < 8; ++j) { + memcpy(dst + j * BPS, dst - BPS, 8); + } +} + +static void HE8uv(uint8_t* dst) { // horizontal + int j; + for (j = 0; j < 8; ++j) { + memset(dst, dst[-1], 8); + dst += BPS; + } +} + +// helper for chroma-DC predictions +static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) { + int j; + for (j = 0; j < 8; ++j) { + memset(dst + j * BPS, value, 8); + } +} + +static void DC8uv(uint8_t* dst) { // DC + int dc0 = 8; + int i; + for (i = 0; i < 8; ++i) { + dc0 += dst[i - BPS] + dst[-1 + i * BPS]; + } + Put8x8uv(dc0 >> 4, dst); +} + +static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples + int dc0 = 4; + int i; + for (i = 0; i < 8; ++i) { + dc0 += dst[i - BPS]; + } + Put8x8uv(dc0 >> 3, dst); +} + +static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples + int dc0 = 4; + int i; + for (i = 0; i < 8; ++i) { + dc0 += dst[-1 + i * BPS]; + } + Put8x8uv(dc0 >> 3, dst); +} + +static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing + Put8x8uv(0x80, dst); +} + +VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES]; + +//------------------------------------------------------------------------------ +// Edge filtering functions + +// 4 pixels in, 2 pixels out +static WEBP_INLINE void do_filter2(uint8_t* p, int step) { + const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; + const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892] + const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15] + const int a2 = VP8ksclip2[(a + 3) >> 3]; + p[-step] = VP8kclip1[p0 + a2]; + p[ 0] = VP8kclip1[q0 - a1]; +} + +// 4 pixels in, 4 pixels out +static WEBP_INLINE void do_filter4(uint8_t* p, int step) { + const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; + const int a = 3 * (q0 - p0); + const int a1 = VP8ksclip2[(a + 4) >> 3]; + const int a2 = VP8ksclip2[(a + 3) >> 3]; + const int a3 = (a1 + 1) >> 1; + p[-2*step] = VP8kclip1[p1 + a3]; + p[- step] = VP8kclip1[p0 + a2]; + p[ 0] = VP8kclip1[q0 - a1]; + p[ step] = VP8kclip1[q1 - a3]; +} + +// 6 pixels in, 6 pixels out +static WEBP_INLINE void do_filter6(uint8_t* p, int step) { + const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; + const int q0 = p[0], q1 = p[step], q2 = p[2*step]; + const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]]; + // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9] + const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 + const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 + const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 + p[-3*step] = VP8kclip1[p2 + a3]; + p[-2*step] = VP8kclip1[p1 + a2]; + p[- step] = VP8kclip1[p0 + a1]; + p[ 0] = VP8kclip1[q0 - a1]; + p[ step] = VP8kclip1[q1 - a2]; + p[ 2*step] = VP8kclip1[q2 - a3]; +} + +static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) { + const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; + return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh); +} + +static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) { + const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step]; + return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t); +} + +static WEBP_INLINE int needs_filter2(const uint8_t* p, + int step, int t, int it) { + const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step]; + const int p0 = p[-step], q0 = p[0]; + const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step]; + if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0; + return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it && + VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it && + VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it; +} + +//------------------------------------------------------------------------------ +// Simple In-loop filtering (Paragraph 15.2) + +static void SimpleVFilter16(uint8_t* p, int stride, int thresh) { + int i; + const int thresh2 = 2 * thresh + 1; + for (i = 0; i < 16; ++i) { + if (needs_filter(p + i, stride, thresh2)) { + do_filter2(p + i, stride); + } + } +} + +static void SimpleHFilter16(uint8_t* p, int stride, int thresh) { + int i; + const int thresh2 = 2 * thresh + 1; + for (i = 0; i < 16; ++i) { + if (needs_filter(p + i * stride, 1, thresh2)) { + do_filter2(p + i * stride, 1); + } + } +} + +static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) { + int k; + for (k = 3; k > 0; --k) { + p += 4 * stride; + SimpleVFilter16(p, stride, thresh); + } +} + +static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) { + int k; + for (k = 3; k > 0; --k) { + p += 4; + SimpleHFilter16(p, stride, thresh); + } +} + +//------------------------------------------------------------------------------ +// Complex In-loop filtering (Paragraph 15.3) + +static WEBP_INLINE void FilterLoop26(uint8_t* p, + int hstride, int vstride, int size, + int thresh, int ithresh, int hev_thresh) { + const int thresh2 = 2 * thresh + 1; + while (size-- > 0) { + if (needs_filter2(p, hstride, thresh2, ithresh)) { + if (hev(p, hstride, hev_thresh)) { + do_filter2(p, hstride); + } else { + do_filter6(p, hstride); + } + } + p += vstride; + } +} + +static WEBP_INLINE void FilterLoop24(uint8_t* p, + int hstride, int vstride, int size, + int thresh, int ithresh, int hev_thresh) { + const int thresh2 = 2 * thresh + 1; + while (size-- > 0) { + if (needs_filter2(p, hstride, thresh2, ithresh)) { + if (hev(p, hstride, hev_thresh)) { + do_filter2(p, hstride); + } else { + do_filter4(p, hstride); + } + } + p += vstride; + } +} + +// on macroblock edges +static void VFilter16(uint8_t* p, int stride, + int thresh, int ithresh, int hev_thresh) { + FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh); +} + +static void HFilter16(uint8_t* p, int stride, + int thresh, int ithresh, int hev_thresh) { + FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh); +} + +// on three inner edges +static void VFilter16i(uint8_t* p, int stride, + int thresh, int ithresh, int hev_thresh) { + int k; + for (k = 3; k > 0; --k) { + p += 4 * stride; + FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh); + } +} + +static void HFilter16i(uint8_t* p, int stride, + int thresh, int ithresh, int hev_thresh) { + int k; + for (k = 3; k > 0; --k) { + p += 4; + FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh); + } +} + +// 8-pixels wide variant, for chroma filtering +static void VFilter8(uint8_t* u, uint8_t* v, int stride, + int thresh, int ithresh, int hev_thresh) { + FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh); + FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh); +} + +static void HFilter8(uint8_t* u, uint8_t* v, int stride, + int thresh, int ithresh, int hev_thresh) { + FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh); + FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh); +} + +static void VFilter8i(uint8_t* u, uint8_t* v, int stride, + int thresh, int ithresh, int hev_thresh) { + FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); + FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); +} + +static void HFilter8i(uint8_t* u, uint8_t* v, int stride, + int thresh, int ithresh, int hev_thresh) { + FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); + FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); +} + +//------------------------------------------------------------------------------ + +VP8DecIdct2 VP8Transform; +VP8DecIdct VP8TransformAC3; +VP8DecIdct VP8TransformUV; +VP8DecIdct VP8TransformDC; +VP8DecIdct VP8TransformDCUV; + +VP8LumaFilterFunc VP8VFilter16; +VP8LumaFilterFunc VP8HFilter16; +VP8ChromaFilterFunc VP8VFilter8; +VP8ChromaFilterFunc VP8HFilter8; +VP8LumaFilterFunc VP8VFilter16i; +VP8LumaFilterFunc VP8HFilter16i; +VP8ChromaFilterFunc VP8VFilter8i; +VP8ChromaFilterFunc VP8HFilter8i; +VP8SimpleFilterFunc VP8SimpleVFilter16; +VP8SimpleFilterFunc VP8SimpleHFilter16; +VP8SimpleFilterFunc VP8SimpleVFilter16i; +VP8SimpleFilterFunc VP8SimpleHFilter16i; + +extern void VP8DspInitSSE2(void); +extern void VP8DspInitSSE41(void); +extern void VP8DspInitNEON(void); +extern void VP8DspInitMIPS32(void); +extern void VP8DspInitMIPSdspR2(void); + +static volatile VP8CPUInfo dec_last_cpuinfo_used = + (VP8CPUInfo)&dec_last_cpuinfo_used; + +WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) { + if (dec_last_cpuinfo_used == VP8GetCPUInfo) return; + + VP8InitClipTables(); + + VP8TransformWHT = TransformWHT; + VP8Transform = TransformTwo; + VP8TransformUV = TransformUV; + VP8TransformDC = TransformDC; + VP8TransformDCUV = TransformDCUV; + VP8TransformAC3 = TransformAC3; + + VP8VFilter16 = VFilter16; + VP8HFilter16 = HFilter16; + VP8VFilter8 = VFilter8; + VP8HFilter8 = HFilter8; + VP8VFilter16i = VFilter16i; + VP8HFilter16i = HFilter16i; + VP8VFilter8i = VFilter8i; + VP8HFilter8i = HFilter8i; + VP8SimpleVFilter16 = SimpleVFilter16; + VP8SimpleHFilter16 = SimpleHFilter16; + VP8SimpleVFilter16i = SimpleVFilter16i; + VP8SimpleHFilter16i = SimpleHFilter16i; + + VP8PredLuma4[0] = DC4; + VP8PredLuma4[1] = TM4; + VP8PredLuma4[2] = VE4; + VP8PredLuma4[3] = HE4; + VP8PredLuma4[4] = RD4; + VP8PredLuma4[5] = VR4; + VP8PredLuma4[6] = LD4; + VP8PredLuma4[7] = VL4; + VP8PredLuma4[8] = HD4; + VP8PredLuma4[9] = HU4; + + VP8PredLuma16[0] = DC16; + VP8PredLuma16[1] = TM16; + VP8PredLuma16[2] = VE16; + VP8PredLuma16[3] = HE16; + VP8PredLuma16[4] = DC16NoTop; + VP8PredLuma16[5] = DC16NoLeft; + VP8PredLuma16[6] = DC16NoTopLeft; + + VP8PredChroma8[0] = DC8uv; + VP8PredChroma8[1] = TM8uv; + VP8PredChroma8[2] = VE8uv; + VP8PredChroma8[3] = HE8uv; + VP8PredChroma8[4] = DC8uvNoTop; + VP8PredChroma8[5] = DC8uvNoLeft; + VP8PredChroma8[6] = DC8uvNoTopLeft; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + VP8DspInitSSE2(); +#if defined(WEBP_USE_SSE41) + if (VP8GetCPUInfo(kSSE4_1)) { + VP8DspInitSSE41(); + } +#endif + } +#endif +#if defined(WEBP_USE_NEON) + if (VP8GetCPUInfo(kNEON)) { + VP8DspInitNEON(); + } +#endif +#if defined(WEBP_USE_MIPS32) + if (VP8GetCPUInfo(kMIPS32)) { + VP8DspInitMIPS32(); + } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + VP8DspInitMIPSdspR2(); + } +#endif + } + dec_last_cpuinfo_used = VP8GetCPUInfo; +} diff --git a/src/loaders/webp/dsp/dec_clip_tables.cpp b/src/loaders/webp/dsp/dec_clip_tables.cpp new file mode 100644 index 00000000..edfffd0f --- /dev/null +++ b/src/loaders/webp/dsp/dec_clip_tables.cpp @@ -0,0 +1,366 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Clipping tables for filtering +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./dsp.h" + +//#define USE_STATIC_TABLES // undefine to have run-time table initialization + +#ifdef USE_STATIC_TABLES + +static const uint8_t abs0[255 + 255 + 1] = { + (uint8_t)0xff, (uint8_t)0xfe, (uint8_t)0xfd, (uint8_t)0xfc, (uint8_t)0xfb, (uint8_t)0xfa, (uint8_t)0xf9, (uint8_t)0xf8, (uint8_t)0xf7, (uint8_t)0xf6, (uint8_t)0xf5, (uint8_t)0xf4, + (uint8_t)0xf3, (uint8_t)0xf2, (uint8_t)0xf1, (uint8_t)0xf0, (uint8_t)0xef, (uint8_t)0xee, (uint8_t)0xed, (uint8_t)0xec, (uint8_t)0xeb, (uint8_t)0xea, (uint8_t)0xe9, (uint8_t)0xe8, + (uint8_t)0xe7, (uint8_t)0xe6, (uint8_t)0xe5, (uint8_t)0xe4, (uint8_t)0xe3, (uint8_t)0xe2, (uint8_t)0xe1, (uint8_t)0xe0, (uint8_t)0xdf, (uint8_t)0xde, (uint8_t)0xdd, (uint8_t)0xdc, + (uint8_t)0xdb, (uint8_t)0xda, (uint8_t)0xd9, (uint8_t)0xd8, (uint8_t)0xd7, (uint8_t)0xd6, (uint8_t)0xd5, (uint8_t)0xd4, (uint8_t)0xd3, (uint8_t)0xd2, (uint8_t)0xd1, (uint8_t)0xd0, + (uint8_t)0xcf, (uint8_t)0xce, (uint8_t)0xcd, (uint8_t)0xcc, (uint8_t)0xcb, (uint8_t)0xca, (uint8_t)0xc9, (uint8_t)0xc8, (uint8_t)0xc7, (uint8_t)0xc6, (uint8_t)0xc5, (uint8_t)0xc4, + (uint8_t)0xc3, (uint8_t)0xc2, (uint8_t)0xc1, (uint8_t)0xc0, (uint8_t)0xbf, (uint8_t)0xbe, (uint8_t)0xbd, (uint8_t)0xbc, (uint8_t)0xbb, (uint8_t)0xba, (uint8_t)0xb9, (uint8_t)0xb8, + (uint8_t)0xb7, (uint8_t)0xb6, (uint8_t)0xb5, (uint8_t)0xb4, (uint8_t)0xb3, (uint8_t)0xb2, (uint8_t)0xb1, (uint8_t)0xb0, (uint8_t)0xaf, (uint8_t)0xae, (uint8_t)0xad, (uint8_t)0xac, + (uint8_t)0xab, (uint8_t)0xaa, (uint8_t)0xa9, (uint8_t)0xa8, (uint8_t)0xa7, (uint8_t)0xa6, (uint8_t)0xa5, (uint8_t)0xa4, (uint8_t)0xa3, (uint8_t)0xa2, (uint8_t)0xa1, (uint8_t)0xa0, + (uint8_t)0x9f, (uint8_t)0x9e, (uint8_t)0x9d, (uint8_t)0x9c, (uint8_t)0x9b, (uint8_t)0x9a, (uint8_t)0x99, (uint8_t)0x98, (uint8_t)0x97, (uint8_t)0x96, (uint8_t)0x95, (uint8_t)0x94, + (uint8_t)0x93, (uint8_t)0x92, (uint8_t)0x91, (uint8_t)0x90, (uint8_t)0x8f, (uint8_t)0x8e, (uint8_t)0x8d, (uint8_t)0x8c, (uint8_t)0x8b, (uint8_t)0x8a, (uint8_t)0x89, (uint8_t)0x88, + (uint8_t)0x87, (uint8_t)0x86, (uint8_t)0x85, (uint8_t)0x84, (uint8_t)0x83, (uint8_t)0x82, (uint8_t)0x81, (uint8_t)0x80, (uint8_t)0x7f, (uint8_t)0x7e, (uint8_t)0x7d, (uint8_t)0x7c, + (uint8_t)0x7b, (uint8_t)0x7a, (uint8_t)0x79, (uint8_t)0x78, (uint8_t)0x77, (uint8_t)0x76, (uint8_t)0x75, (uint8_t)0x74, (uint8_t)0x73, (uint8_t)0x72, (uint8_t)0x71, (uint8_t)0x70, + (uint8_t)0x6f, (uint8_t)0x6e, (uint8_t)0x6d, (uint8_t)0x6c, (uint8_t)0x6b, (uint8_t)0x6a, (uint8_t)0x69, (uint8_t)0x68, (uint8_t)0x67, (uint8_t)0x66, (uint8_t)0x65, (uint8_t)0x64, + (uint8_t)0x63, (uint8_t)0x62, (uint8_t)0x61, (uint8_t)0x60, (uint8_t)0x5f, (uint8_t)0x5e, (uint8_t)0x5d, (uint8_t)0x5c, (uint8_t)0x5b, (uint8_t)0x5a, (uint8_t)0x59, (uint8_t)0x58, + (uint8_t)0x57, (uint8_t)0x56, (uint8_t)0x55, (uint8_t)0x54, (uint8_t)0x53, (uint8_t)0x52, (uint8_t)0x51, (uint8_t)0x50, (uint8_t)0x4f, (uint8_t)0x4e, (uint8_t)0x4d, (uint8_t)0x4c, + (uint8_t)0x4b, (uint8_t)0x4a, (uint8_t)0x49, (uint8_t)0x48, (uint8_t)0x47, (uint8_t)0x46, (uint8_t)0x45, (uint8_t)0x44, (uint8_t)0x43, (uint8_t)0x42, (uint8_t)0x41, (uint8_t)0x40, + (uint8_t)0x3f, (uint8_t)0x3e, (uint8_t)0x3d, (uint8_t)0x3c, (uint8_t)0x3b, (uint8_t)0x3a, (uint8_t)0x39, (uint8_t)0x38, (uint8_t)0x37, (uint8_t)0x36, (uint8_t)0x35, (uint8_t)0x34, + (uint8_t)0x33, (uint8_t)0x32, (uint8_t)0x31, (uint8_t)0x30, (uint8_t)0x2f, (uint8_t)0x2e, (uint8_t)0x2d, (uint8_t)0x2c, (uint8_t)0x2b, (uint8_t)0x2a, (uint8_t)0x29, (uint8_t)0x28, + (uint8_t)0x27, (uint8_t)0x26, (uint8_t)0x25, (uint8_t)0x24, (uint8_t)0x23, (uint8_t)0x22, (uint8_t)0x21, (uint8_t)0x20, (uint8_t)0x1f, (uint8_t)0x1e, (uint8_t)0x1d, (uint8_t)0x1c, + (uint8_t)0x1b, (uint8_t)0x1a, (uint8_t)0x19, (uint8_t)0x18, (uint8_t)0x17, (uint8_t)0x16, (uint8_t)0x15, (uint8_t)0x14, (uint8_t)0x13, (uint8_t)0x12, (uint8_t)0x11, (uint8_t)0x10, + (uint8_t)0x0f, (uint8_t)0x0e, (uint8_t)0x0d, (uint8_t)0x0c, (uint8_t)0x0b, (uint8_t)0x0a, (uint8_t)0x09, (uint8_t)0x08, (uint8_t)0x07, (uint8_t)0x06, (uint8_t)0x05, (uint8_t)0x04, + (uint8_t)0x03, (uint8_t)0x02, (uint8_t)0x01, (uint8_t)0x00, (uint8_t)0x01, (uint8_t)0x02, (uint8_t)0x03, (uint8_t)0x04, (uint8_t)0x05, (uint8_t)0x06, (uint8_t)0x07, (uint8_t)0x08, + (uint8_t)0x09, (uint8_t)0x0a, (uint8_t)0x0b, (uint8_t)0x0c, (uint8_t)0x0d, (uint8_t)0x0e, (uint8_t)0x0f, (uint8_t)0x10, (uint8_t)0x11, (uint8_t)0x12, (uint8_t)0x13, (uint8_t)0x14, + (uint8_t)0x15, (uint8_t)0x16, (uint8_t)0x17, (uint8_t)0x18, (uint8_t)0x19, (uint8_t)0x1a, (uint8_t)0x1b, (uint8_t)0x1c, (uint8_t)0x1d, (uint8_t)0x1e, (uint8_t)0x1f, (uint8_t)0x20, + (uint8_t)0x21, (uint8_t)0x22, (uint8_t)0x23, (uint8_t)0x24, (uint8_t)0x25, (uint8_t)0x26, (uint8_t)0x27, (uint8_t)0x28, (uint8_t)0x29, (uint8_t)0x2a, (uint8_t)0x2b, (uint8_t)0x2c, + (uint8_t)0x2d, (uint8_t)0x2e, (uint8_t)0x2f, (uint8_t)0x30, (uint8_t)0x31, (uint8_t)0x32, (uint8_t)0x33, (uint8_t)0x34, (uint8_t)0x35, (uint8_t)0x36, (uint8_t)0x37, (uint8_t)0x38, + (uint8_t)0x39, (uint8_t)0x3a, (uint8_t)0x3b, (uint8_t)0x3c, (uint8_t)0x3d, (uint8_t)0x3e, (uint8_t)0x3f, (uint8_t)0x40, (uint8_t)0x41, (uint8_t)0x42, (uint8_t)0x43, (uint8_t)0x44, + (uint8_t)0x45, (uint8_t)0x46, (uint8_t)0x47, (uint8_t)0x48, (uint8_t)0x49, (uint8_t)0x4a, (uint8_t)0x4b, (uint8_t)0x4c, (uint8_t)0x4d, (uint8_t)0x4e, (uint8_t)0x4f, (uint8_t)0x50, + (uint8_t)0x51, (uint8_t)0x52, (uint8_t)0x53, (uint8_t)0x54, (uint8_t)0x55, (uint8_t)0x56, (uint8_t)0x57, (uint8_t)0x58, (uint8_t)0x59, (uint8_t)0x5a, (uint8_t)0x5b, (uint8_t)0x5c, + (uint8_t)0x5d, (uint8_t)0x5e, (uint8_t)0x5f, (uint8_t)0x60, (uint8_t)0x61, (uint8_t)0x62, (uint8_t)0x63, (uint8_t)0x64, (uint8_t)0x65, (uint8_t)0x66, (uint8_t)0x67, (uint8_t)0x68, + (uint8_t)0x69, (uint8_t)0x6a, (uint8_t)0x6b, (uint8_t)0x6c, (uint8_t)0x6d, (uint8_t)0x6e, (uint8_t)0x6f, (uint8_t)0x70, (uint8_t)0x71, (uint8_t)0x72, (uint8_t)0x73, (uint8_t)0x74, + (uint8_t)0x75, (uint8_t)0x76, (uint8_t)0x77, (uint8_t)0x78, (uint8_t)0x79, (uint8_t)0x7a, (uint8_t)0x7b, (uint8_t)0x7c, (uint8_t)0x7d, (uint8_t)0x7e, (uint8_t)0x7f, (uint8_t)0x80, + (uint8_t)0x81, (uint8_t)0x82, (uint8_t)0x83, (uint8_t)0x84, (uint8_t)0x85, (uint8_t)0x86, (uint8_t)0x87, (uint8_t)0x88, (uint8_t)0x89, (uint8_t)0x8a, (uint8_t)0x8b, (uint8_t)0x8c, + (uint8_t)0x8d, (uint8_t)0x8e, (uint8_t)0x8f, (uint8_t)0x90, (uint8_t)0x91, (uint8_t)0x92, (uint8_t)0x93, (uint8_t)0x94, (uint8_t)0x95, (uint8_t)0x96, (uint8_t)0x97, (uint8_t)0x98, + (uint8_t)0x99, (uint8_t)0x9a, (uint8_t)0x9b, (uint8_t)0x9c, (uint8_t)0x9d, (uint8_t)0x9e, (uint8_t)0x9f, (uint8_t)0xa0, (uint8_t)0xa1, (uint8_t)0xa2, (uint8_t)0xa3, (uint8_t)0xa4, + (uint8_t)0xa5, (uint8_t)0xa6, (uint8_t)0xa7, (uint8_t)0xa8, (uint8_t)0xa9, (uint8_t)0xaa, (uint8_t)0xab, (uint8_t)0xac, (uint8_t)0xad, (uint8_t)0xae, (uint8_t)0xaf, (uint8_t)0xb0, + (uint8_t)0xb1, (uint8_t)0xb2, (uint8_t)0xb3, (uint8_t)0xb4, (uint8_t)0xb5, (uint8_t)0xb6, (uint8_t)0xb7, (uint8_t)0xb8, (uint8_t)0xb9, (uint8_t)0xba, (uint8_t)0xbb, (uint8_t)0xbc, + (uint8_t)0xbd, (uint8_t)0xbe, (uint8_t)0xbf, (uint8_t)0xc0, (uint8_t)0xc1, (uint8_t)0xc2, (uint8_t)0xc3, (uint8_t)0xc4, (uint8_t)0xc5, (uint8_t)0xc6, (uint8_t)0xc7, (uint8_t)0xc8, + (uint8_t)0xc9, (uint8_t)0xca, (uint8_t)0xcb, (uint8_t)0xcc, (uint8_t)0xcd, (uint8_t)0xce, (uint8_t)0xcf, (uint8_t)0xd0, (uint8_t)0xd1, (uint8_t)0xd2, (uint8_t)0xd3, (uint8_t)0xd4, + (uint8_t)0xd5, (uint8_t)0xd6, (uint8_t)0xd7, (uint8_t)0xd8, (uint8_t)0xd9, (uint8_t)0xda, (uint8_t)0xdb, (uint8_t)0xdc, (uint8_t)0xdd, (uint8_t)0xde, (uint8_t)0xdf, (uint8_t)0xe0, + (uint8_t)0xe1, (uint8_t)0xe2, (uint8_t)0xe3, (uint8_t)0xe4, (uint8_t)0xe5, (uint8_t)0xe6, (uint8_t)0xe7, (uint8_t)0xe8, (uint8_t)0xe9, (uint8_t)0xea, (uint8_t)0xeb, (uint8_t)0xec, + (uint8_t)0xed, (uint8_t)0xee, (uint8_t)0xef, (uint8_t)0xf0, (uint8_t)0xf1, (uint8_t)0xf2, (uint8_t)0xf3, (uint8_t)0xf4, (uint8_t)0xf5, (uint8_t)0xf6, (uint8_t)0xf7, (uint8_t)0xf8, + (uint8_t)0xf9, (uint8_t)0xfa, (uint8_t)0xfb, (uint8_t)0xfc, (uint8_t)0xfd, (uint8_t)0xfe, (uint8_t)0xff +}; + +static const int8_t sclip1[1020 + 1020 + 1] = { + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, + (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x80, (int8_t)0x81, (int8_t)0x82, (int8_t)0x83, (int8_t)0x84, (int8_t)0x85, (int8_t)0x86, (int8_t)0x87, + (int8_t)0x88, (int8_t)0x89, (int8_t)0x8a, (int8_t)0x8b, (int8_t)0x8c, (int8_t)0x8d, (int8_t)0x8e, (int8_t)0x8f, (int8_t)0x90, (int8_t)0x91, (int8_t)0x92, (int8_t)0x93, + (int8_t)0x94, (int8_t)0x95, (int8_t)0x96, (int8_t)0x97, (int8_t)0x98, (int8_t)0x99, (int8_t)0x9a, (int8_t)0x9b, (int8_t)0x9c, (int8_t)0x9d, (int8_t)0x9e, (int8_t)0x9f, + (int8_t)0xa0, (int8_t)0xa1, (int8_t)0xa2, (int8_t)0xa3, (int8_t)0xa4, (int8_t)0xa5, (int8_t)0xa6, (int8_t)0xa7, (int8_t)0xa8, (int8_t)0xa9, (int8_t)0xaa, (int8_t)0xab, + (int8_t)0xac, (int8_t)0xad, (int8_t)0xae, (int8_t)0xaf, (int8_t)0xb0, (int8_t)0xb1, (int8_t)0xb2, (int8_t)0xb3, (int8_t)0xb4, (int8_t)0xb5, (int8_t)0xb6, (int8_t)0xb7, + (int8_t)0xb8, (int8_t)0xb9, (int8_t)0xba, (int8_t)0xbb, (int8_t)0xbc, (int8_t)0xbd, (int8_t)0xbe, (int8_t)0xbf, (int8_t)0xc0, (int8_t)0xc1, (int8_t)0xc2, (int8_t)0xc3, + (int8_t)0xc4, (int8_t)0xc5, (int8_t)0xc6, (int8_t)0xc7, (int8_t)0xc8, (int8_t)0xc9, (int8_t)0xca, (int8_t)0xcb, (int8_t)0xcc, (int8_t)0xcd, (int8_t)0xce, (int8_t)0xcf, + (int8_t)0xd0, (int8_t)0xd1, (int8_t)0xd2, (int8_t)0xd3, (int8_t)0xd4, (int8_t)0xd5, (int8_t)0xd6, (int8_t)0xd7, (int8_t)0xd8, (int8_t)0xd9, (int8_t)0xda, (int8_t)0xdb, + (int8_t)0xdc, (int8_t)0xdd, (int8_t)0xde, (int8_t)0xdf, (int8_t)0xe0, (int8_t)0xe1, (int8_t)0xe2, (int8_t)0xe3, (int8_t)0xe4, (int8_t)0xe5, (int8_t)0xe6, (int8_t)0xe7, + (int8_t)0xe8, (int8_t)0xe9, (int8_t)0xea, (int8_t)0xeb, (int8_t)0xec, (int8_t)0xed, (int8_t)0xee, (int8_t)0xef, (int8_t)0xf0, (int8_t)0xf1, (int8_t)0xf2, (int8_t)0xf3, + (int8_t)0xf4, (int8_t)0xf5, (int8_t)0xf6, (int8_t)0xf7, (int8_t)0xf8, (int8_t)0xf9, (int8_t)0xfa, (int8_t)0xfb, (int8_t)0xfc, (int8_t)0xfd, (int8_t)0xfe, (int8_t)0xff, + (int8_t)0x00, (int8_t)0x01, (int8_t)0x02, (int8_t)0x03, (int8_t)0x04, (int8_t)0x05, (int8_t)0x06, (int8_t)0x07, (int8_t)0x08, (int8_t)0x09, (int8_t)0x0a, (int8_t)0x0b, + (int8_t)0x0c, (int8_t)0x0d, (int8_t)0x0e, (int8_t)0x0f, (int8_t)0x10, (int8_t)0x11, (int8_t)0x12, (int8_t)0x13, (int8_t)0x14, (int8_t)0x15, (int8_t)0x16, (int8_t)0x17, + (int8_t)0x18, (int8_t)0x19, (int8_t)0x1a, (int8_t)0x1b, (int8_t)0x1c, (int8_t)0x1d, (int8_t)0x1e, (int8_t)0x1f, (int8_t)0x20, (int8_t)0x21, (int8_t)0x22, (int8_t)0x23, + (int8_t)0x24, (int8_t)0x25, (int8_t)0x26, (int8_t)0x27, (int8_t)0x28, (int8_t)0x29, (int8_t)0x2a, (int8_t)0x2b, (int8_t)0x2c, (int8_t)0x2d, (int8_t)0x2e, (int8_t)0x2f, + (int8_t)0x30, (int8_t)0x31, (int8_t)0x32, (int8_t)0x33, (int8_t)0x34, (int8_t)0x35, (int8_t)0x36, (int8_t)0x37, (int8_t)0x38, (int8_t)0x39, (int8_t)0x3a, (int8_t)0x3b, + (int8_t)0x3c, (int8_t)0x3d, (int8_t)0x3e, (int8_t)0x3f, (int8_t)0x40, (int8_t)0x41, (int8_t)0x42, (int8_t)0x43, (int8_t)0x44, (int8_t)0x45, (int8_t)0x46, (int8_t)0x47, + (int8_t)0x48, (int8_t)0x49, (int8_t)0x4a, (int8_t)0x4b, (int8_t)0x4c, (int8_t)0x4d, (int8_t)0x4e, (int8_t)0x4f, (int8_t)0x50, (int8_t)0x51, (int8_t)0x52, (int8_t)0x53, + (int8_t)0x54, (int8_t)0x55, (int8_t)0x56, (int8_t)0x57, (int8_t)0x58, (int8_t)0x59, (int8_t)0x5a, (int8_t)0x5b, (int8_t)0x5c, (int8_t)0x5d, (int8_t)0x5e, (int8_t)0x5f, + (int8_t)0x60, (int8_t)0x61, (int8_t)0x62, (int8_t)0x63, (int8_t)0x64, (int8_t)0x65, (int8_t)0x66, (int8_t)0x67, (int8_t)0x68, (int8_t)0x69, (int8_t)0x6a, (int8_t)0x6b, + (int8_t)0x6c, (int8_t)0x6d, (int8_t)0x6e, (int8_t)0x6f, (int8_t)0x70, (int8_t)0x71, (int8_t)0x72, (int8_t)0x73, (int8_t)0x74, (int8_t)0x75, (int8_t)0x76, (int8_t)0x77, + (int8_t)0x78, (int8_t)0x79, (int8_t)0x7a, (int8_t)0x7b, (int8_t)0x7c, (int8_t)0x7d, (int8_t)0x7e, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, + (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f, (int8_t)0x7f +}; + +static const int8_t sclip2[112 + 112 + 1] = { + (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, + (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, + (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, + (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, + (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, + (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, + (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, + (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, (int8_t)0xf0, + (int8_t)0xf0, (int8_t)0xf1, (int8_t)0xf2, (int8_t)0xf3, (int8_t)0xf4, (int8_t)0xf5, (int8_t)0xf6, (int8_t)0xf7, (int8_t)0xf8, (int8_t)0xf9, (int8_t)0xfa, (int8_t)0xfb, + (int8_t)0xfc, (int8_t)0xfd, (int8_t)0xfe, (int8_t)0xff, (int8_t)0x00, (int8_t)0x01, (int8_t)0x02, (int8_t)0x03, (int8_t)0x04, (int8_t)0x05, (int8_t)0x06, (int8_t)0x07, + (int8_t)0x08, (int8_t)0x09, (int8_t)0x0a, (int8_t)0x0b, (int8_t)0x0c, (int8_t)0x0d, (int8_t)0x0e, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, + (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, + (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, + (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, + (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, + (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, + (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, + (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, + (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f, (int8_t)0x0f +}; + +static const uint8_t clip1[255 + 511 + 1] = { + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, + (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x00, (uint8_t)0x01, (uint8_t)0x02, (uint8_t)0x03, (uint8_t)0x04, (uint8_t)0x05, (uint8_t)0x06, (uint8_t)0x07, (uint8_t)0x08, + (uint8_t)0x09, (uint8_t)0x0a, (uint8_t)0x0b, (uint8_t)0x0c, (uint8_t)0x0d, (uint8_t)0x0e, (uint8_t)0x0f, (uint8_t)0x10, (uint8_t)0x11, (uint8_t)0x12, (uint8_t)0x13, (uint8_t)0x14, + (uint8_t)0x15, (uint8_t)0x16, (uint8_t)0x17, (uint8_t)0x18, (uint8_t)0x19, (uint8_t)0x1a, (uint8_t)0x1b, (uint8_t)0x1c, (uint8_t)0x1d, (uint8_t)0x1e, (uint8_t)0x1f, (uint8_t)0x20, + (uint8_t)0x21, (uint8_t)0x22, (uint8_t)0x23, (uint8_t)0x24, (uint8_t)0x25, (uint8_t)0x26, (uint8_t)0x27, (uint8_t)0x28, (uint8_t)0x29, (uint8_t)0x2a, (uint8_t)0x2b, (uint8_t)0x2c, + (uint8_t)0x2d, (uint8_t)0x2e, (uint8_t)0x2f, (uint8_t)0x30, (uint8_t)0x31, (uint8_t)0x32, (uint8_t)0x33, (uint8_t)0x34, (uint8_t)0x35, (uint8_t)0x36, (uint8_t)0x37, (uint8_t)0x38, + (uint8_t)0x39, (uint8_t)0x3a, (uint8_t)0x3b, (uint8_t)0x3c, (uint8_t)0x3d, (uint8_t)0x3e, (uint8_t)0x3f, (uint8_t)0x40, (uint8_t)0x41, (uint8_t)0x42, (uint8_t)0x43, (uint8_t)0x44, + (uint8_t)0x45, (uint8_t)0x46, (uint8_t)0x47, (uint8_t)0x48, (uint8_t)0x49, (uint8_t)0x4a, (uint8_t)0x4b, (uint8_t)0x4c, (uint8_t)0x4d, (uint8_t)0x4e, (uint8_t)0x4f, (uint8_t)0x50, + (uint8_t)0x51, (uint8_t)0x52, (uint8_t)0x53, (uint8_t)0x54, (uint8_t)0x55, (uint8_t)0x56, (uint8_t)0x57, (uint8_t)0x58, (uint8_t)0x59, (uint8_t)0x5a, (uint8_t)0x5b, (uint8_t)0x5c, + (uint8_t)0x5d, (uint8_t)0x5e, (uint8_t)0x5f, (uint8_t)0x60, (uint8_t)0x61, (uint8_t)0x62, (uint8_t)0x63, (uint8_t)0x64, (uint8_t)0x65, (uint8_t)0x66, (uint8_t)0x67, (uint8_t)0x68, + (uint8_t)0x69, (uint8_t)0x6a, (uint8_t)0x6b, (uint8_t)0x6c, (uint8_t)0x6d, (uint8_t)0x6e, (uint8_t)0x6f, (uint8_t)0x70, (uint8_t)0x71, (uint8_t)0x72, (uint8_t)0x73, (uint8_t)0x74, + (uint8_t)0x75, (uint8_t)0x76, (uint8_t)0x77, (uint8_t)0x78, (uint8_t)0x79, (uint8_t)0x7a, (uint8_t)0x7b, (uint8_t)0x7c, (uint8_t)0x7d, (uint8_t)0x7e, (uint8_t)0x7f, (uint8_t)0x80, + (uint8_t)0x81, (uint8_t)0x82, (uint8_t)0x83, (uint8_t)0x84, (uint8_t)0x85, (uint8_t)0x86, (uint8_t)0x87, (uint8_t)0x88, (uint8_t)0x89, (uint8_t)0x8a, (uint8_t)0x8b, (uint8_t)0x8c, + (uint8_t)0x8d, (uint8_t)0x8e, (uint8_t)0x8f, (uint8_t)0x90, (uint8_t)0x91, (uint8_t)0x92, (uint8_t)0x93, (uint8_t)0x94, (uint8_t)0x95, (uint8_t)0x96, (uint8_t)0x97, (uint8_t)0x98, + (uint8_t)0x99, (uint8_t)0x9a, (uint8_t)0x9b, (uint8_t)0x9c, (uint8_t)0x9d, (uint8_t)0x9e, (uint8_t)0x9f, (uint8_t)0xa0, (uint8_t)0xa1, (uint8_t)0xa2, (uint8_t)0xa3, (uint8_t)0xa4, + (uint8_t)0xa5, (uint8_t)0xa6, (uint8_t)0xa7, (uint8_t)0xa8, (uint8_t)0xa9, (uint8_t)0xaa, (uint8_t)0xab, (uint8_t)0xac, (uint8_t)0xad, (uint8_t)0xae, (uint8_t)0xaf, (uint8_t)0xb0, + (uint8_t)0xb1, (uint8_t)0xb2, (uint8_t)0xb3, (uint8_t)0xb4, (uint8_t)0xb5, (uint8_t)0xb6, (uint8_t)0xb7, (uint8_t)0xb8, (uint8_t)0xb9, (uint8_t)0xba, (uint8_t)0xbb, (uint8_t)0xbc, + (uint8_t)0xbd, (uint8_t)0xbe, (uint8_t)0xbf, (uint8_t)0xc0, (uint8_t)0xc1, (uint8_t)0xc2, (uint8_t)0xc3, (uint8_t)0xc4, (uint8_t)0xc5, (uint8_t)0xc6, (uint8_t)0xc7, (uint8_t)0xc8, + (uint8_t)0xc9, (uint8_t)0xca, (uint8_t)0xcb, (uint8_t)0xcc, (uint8_t)0xcd, (uint8_t)0xce, (uint8_t)0xcf, (uint8_t)0xd0, (uint8_t)0xd1, (uint8_t)0xd2, (uint8_t)0xd3, (uint8_t)0xd4, + (uint8_t)0xd5, (uint8_t)0xd6, (uint8_t)0xd7, (uint8_t)0xd8, (uint8_t)0xd9, (uint8_t)0xda, (uint8_t)0xdb, (uint8_t)0xdc, (uint8_t)0xdd, (uint8_t)0xde, (uint8_t)0xdf, (uint8_t)0xe0, + (uint8_t)0xe1, (uint8_t)0xe2, (uint8_t)0xe3, (uint8_t)0xe4, (uint8_t)0xe5, (uint8_t)0xe6, (uint8_t)0xe7, (uint8_t)0xe8, (uint8_t)0xe9, (uint8_t)0xea, (uint8_t)0xeb, (uint8_t)0xec, + (uint8_t)0xed, (uint8_t)0xee, (uint8_t)0xef, (uint8_t)0xf0, (uint8_t)0xf1, (uint8_t)0xf2, (uint8_t)0xf3, (uint8_t)0xf4, (uint8_t)0xf5, (uint8_t)0xf6, (uint8_t)0xf7, (uint8_t)0xf8, + (uint8_t)0xf9, (uint8_t)0xfa, (uint8_t)0xfb, (uint8_t)0xfc, (uint8_t)0xfd, (uint8_t)0xfe, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, + (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff, (uint8_t)0xff +}; + +#else + +// uninitialized tables +static uint8_t abs0[255 + 255 + 1]; +static int8_t sclip1[1020 + 1020 + 1]; +static int8_t sclip2[112 + 112 + 1]; +static uint8_t clip1[255 + 511 + 1]; + +// We declare this variable 'volatile' to prevent instruction reordering +// and make sure it's set to true _last_ (so as to be thread-safe) +static volatile int tables_ok = 0; + +#endif + +const int8_t* const VP8ksclip1 = &sclip1[1020]; +const int8_t* const VP8ksclip2 = &sclip2[112]; +const uint8_t* const VP8kclip1 = &clip1[255]; +const uint8_t* const VP8kabs0 = &abs0[255]; + +WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) { +#if !defined(USE_STATIC_TABLES) + int i; + if (!tables_ok) { + for (i = -255; i <= 255; ++i) { + abs0[255 + i] = (i < 0) ? -i : i; + } + for (i = -1020; i <= 1020; ++i) { + sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i; + } + for (i = -112; i <= 112; ++i) { + sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i; + } + for (i = -255; i <= 255 + 255; ++i) { + clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; + } + tables_ok = 1; + } +#endif // USE_STATIC_TABLES +} diff --git a/src/loaders/webp/dsp/dsp.h b/src/loaders/webp/dsp/dsp.h new file mode 100644 index 00000000..4446102a --- /dev/null +++ b/src/loaders/webp/dsp/dsp.h @@ -0,0 +1,319 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Speed-critical functions. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_DSP_DSP_H_ +#define WEBP_DSP_DSP_H_ + +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" +#endif + +#include "../webp/types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define BPS 32 // this is the common stride for enc/dec + +//------------------------------------------------------------------------------ +// CPU detection + +#if defined(__GNUC__) +# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) +# define LOCAL_GCC_PREREQ(maj, min) \ + (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) +#else +# define LOCAL_GCC_VERSION 0 +# define LOCAL_GCC_PREREQ(maj, min) 0 +#endif + +#ifdef __clang__ +# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) +# define LOCAL_CLANG_PREREQ(maj, min) \ + (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) +#else +# define LOCAL_CLANG_VERSION 0 +# define LOCAL_CLANG_PREREQ(maj, min) 0 +#endif // __clang__ + +#if defined(_MSC_VER) && _MSC_VER > 1310 && \ + (defined(_M_X64) || defined(_M_IX86)) +#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets +#endif + +// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp +// files without intrinsics, allowing the corresponding Init() to be called. +// Files containing intrinsics will need to be built targeting the instruction +// set so should succeed on one of the earlier tests. +#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2) +//#define WEBP_USE_SSE2 +#endif + +// This macro prevents thread_sanitizer from reporting known concurrent writes. +#define WEBP_TSAN_IGNORE_FUNCTION +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#undef WEBP_TSAN_IGNORE_FUNCTION +#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) +#endif +#endif + +typedef enum { + kSSE2, +} CPUFeature; +// returns true if the CPU supports the feature. +typedef int (*VP8CPUInfo)(CPUFeature feature); +WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo; + +//------------------------------------------------------------------------------ +// Init stub generator + +// Defines an init function stub to ensure each module exposes a symbol, +// avoiding a compiler warning. +#define WEBP_DSP_INIT_STUB(func) \ + extern void func(void); \ + WEBP_TSAN_IGNORE_FUNCTION void func(void) {} + +//------------------------------------------------------------------------------ +// Decoding + +typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst); +// when doing two transforms, coeffs is actually int16_t[2][16]. +typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two); +extern VP8DecIdct2 VP8Transform; +extern VP8DecIdct VP8TransformAC3; +extern VP8DecIdct VP8TransformUV; +extern VP8DecIdct VP8TransformDC; +extern VP8DecIdct VP8TransformDCUV; + +// *dst is the destination block, with stride BPS. Boundary samples are +// assumed accessible when needed. +typedef void (*VP8PredFunc)(uint8_t* dst); +extern VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */]; +extern VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */]; +extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */]; + +// clipping tables (for filtering) +extern const int8_t* const VP8ksclip1; // clips [-1020, 1020] to [-128, 127] +extern const int8_t* const VP8ksclip2; // clips [-112, 112] to [-16, 15] +extern const uint8_t* const VP8kclip1; // clips [-255,511] to [0,255] +extern const uint8_t* const VP8kabs0; // abs(x) for x in [-255,255] +// must be called first +void VP8InitClipTables(void); + +// simple filter (only for luma) +typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh); +extern VP8SimpleFilterFunc VP8SimpleVFilter16; +extern VP8SimpleFilterFunc VP8SimpleHFilter16; +extern VP8SimpleFilterFunc VP8SimpleVFilter16i; // filter 3 inner edges +extern VP8SimpleFilterFunc VP8SimpleHFilter16i; + +// regular filter (on both macroblock edges and inner edges) +typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride, + int thresh, int ithresh, int hev_t); +typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride, + int thresh, int ithresh, int hev_t); +// on outer edge +extern VP8LumaFilterFunc VP8VFilter16; +extern VP8LumaFilterFunc VP8HFilter16; +extern VP8ChromaFilterFunc VP8VFilter8; +extern VP8ChromaFilterFunc VP8HFilter8; + +// on inner edge +extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether +extern VP8LumaFilterFunc VP8HFilter16i; +extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether +extern VP8ChromaFilterFunc VP8HFilter8i; + +// must be called before anything using the above +void VP8DspInit(void); + +//------------------------------------------------------------------------------ +// WebP I/O + +//#define FANCY_UPSAMPLING // undefined to remove fancy upsampling support + +// Convert a pair of y/u/v lines together to the output rgb/a colorspace. +// bottom_y can be NULL if only one line of output is needed (at top/bottom). +typedef void (*WebPUpsampleLinePairFunc)( + const uint8_t* top_y, const uint8_t* bottom_y, + const uint8_t* top_u, const uint8_t* top_v, + const uint8_t* cur_u, const uint8_t* cur_v, + uint8_t* top_dst, uint8_t* bottom_dst, int len); + +#ifdef FANCY_UPSAMPLING + +// Fancy upsampling functions to convert YUV to RGB(A) modes +extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; + +#endif // FANCY_UPSAMPLING + +// Per-row point-sampling methods. +typedef void (*WebPSamplerRowFunc)(const uint8_t* y, + const uint8_t* u, const uint8_t* v, + uint8_t* dst, int len); +// Generic function to apply 'WebPSamplerRowFunc' to the whole plane: +void WebPSamplerProcessPlane(const uint8_t* y, int y_stride, + const uint8_t* u, const uint8_t* v, int uv_stride, + uint8_t* dst, int dst_stride, + int width, int height, WebPSamplerRowFunc func); + +// Sampling functions to convert rows of YUV to RGB(A) +extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */]; + +// General function for converting two lines of ARGB or RGBA. +// 'alpha_is_last' should be true if 0xff000000 is stored in memory as +// as 0x00, 0x00, 0x00, 0xff (little endian). +WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last); + +// YUV444->RGB converters +typedef void (*WebPYUV444Converter)(const uint8_t* y, + const uint8_t* u, const uint8_t* v, + uint8_t* dst, int len); + +extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */]; + +// Must be called before using the WebPUpsamplers[] (and for premultiplied +// colorspaces like rgbA, rgbA4444, etc) +void WebPInitUpsamplers(void); +// Must be called before using WebPSamplers[] +void WebPInitSamplers(void); +// Must be called before using WebPYUV444Converters[] +void WebPInitYUV444Converters(void); + +//------------------------------------------------------------------------------ +// Rescaler + +struct WebPRescaler; + +// Import a row of data and save its contribution in the rescaler. +// 'channel' denotes the channel number to be imported. +extern void (*WebPRescalerImportRow)(struct WebPRescaler* const wrk, + const uint8_t* const src, int channel); + +// Export one row (starting at x_out position) from rescaler. +extern void (*WebPRescalerExportRow)(struct WebPRescaler* const wrk, int x_out); + +// Plain-C implementation, as fall-back. +extern void WebPRescalerExportRowC(struct WebPRescaler* const wrk, int x_out); + +// Must be called first before using the above. +void WebPRescalerDspInit(void); + +//------------------------------------------------------------------------------ +// Utilities for processing transparent channel. + +// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h. +// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last). +extern void (*WebPApplyAlphaMultiply)( + uint8_t* rgba, int alpha_first, int w, int h, int stride); + +// Same, buf specifically for RGBA4444 format +extern void (*WebPApplyAlphaMultiply4444)( + uint8_t* rgba4444, int w, int h, int stride); + +// Dispatch the values from alpha[] plane to the ARGB destination 'dst'. +// Returns true if alpha[] plane has non-trivial values different from 0xff. +extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride, + int width, int height, + uint8_t* dst, int dst_stride); + +// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the +// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units. +extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride, + int width, int height, + uint32_t* dst, int dst_stride); + +// Extract the alpha values from 32b values in argb[] and pack them into alpha[] +// (this is the opposite of WebPDispatchAlpha). +// Returns true if there's only trivial 0xff alpha values. +extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride); + +// Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B). +// Un-Multiply operation transforms x into x * 255 / A. + +// Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row. +extern void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse); + +// Same a WebPMultARGBRow(), but for several rows. +void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, + int inverse); + +// Same for a row of single values, with side alpha values. +extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, + int width, int inverse); + +// Same a WebPMultRow(), but for several 'num_rows' rows. +void WebPMultRows(uint8_t* ptr, int stride, + const uint8_t* alpha, int alpha_stride, + int width, int num_rows, int inverse); + +// Plain-C versions, used as fallback by some implementations. +void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha, + int width, int inverse); +void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse); + +// To be called first before using the above. +void WebPInitAlphaProcessing(void); + +// ARGB packing function: a/r/g/b input is rgba or bgra order. +extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r, + const uint8_t* g, const uint8_t* b, int len, + uint32_t* out); + +// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order. +extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, + int len, int step, uint32_t* out); + + +//------------------------------------------------------------------------------ +// Filter functions + +typedef enum { // Filter types. + WEBP_FILTER_NONE = 0, + WEBP_FILTER_HORIZONTAL, + WEBP_FILTER_VERTICAL, + WEBP_FILTER_GRADIENT, + WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1, // end marker + WEBP_FILTER_BEST, // meta-types + WEBP_FILTER_FAST +} WEBP_FILTER_TYPE; + +typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height, + int stride, uint8_t* out); +typedef void (*WebPUnfilterFunc)(int width, int height, int stride, + int row, int num_rows, uint8_t* data); + +// Filter the given data using the given predictor. +// 'in' corresponds to a 2-dimensional pixel array of size (stride * height) +// in raster order. +// 'stride' is number of bytes per scan line (with possible padding). +// 'out' should be pre-allocated. +extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; + +// In-place reconstruct the original data from the given filtered data. +// The reconstruction will be done for 'num_rows' rows starting from 'row' +// (assuming rows upto 'row - 1' are already reconstructed). +extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; + +// To be called first before using the above. +void VP8FiltersInit(void); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_DSP_DSP_H_ */ diff --git a/src/loaders/webp/dsp/filters.cpp b/src/loaders/webp/dsp/filters.cpp new file mode 100644 index 00000000..5c30f2e4 --- /dev/null +++ b/src/loaders/webp/dsp/filters.cpp @@ -0,0 +1,240 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Spatial prediction using various filters +// +// Author: Urvang (urvang@google.com) + +#include "./dsp.h" +#include +#include +#include + +//------------------------------------------------------------------------------ +// Helpful macro. + +# define SANITY_CHECK(in, out) \ + assert(in != NULL); \ + assert(out != NULL); \ + assert(width > 0); \ + assert(height > 0); \ + assert(stride >= width); \ + assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \ + (void)height; // Silence unused warning. + +static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred, + uint8_t* dst, int length, int inverse) { + int i; + if (inverse) { + for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i]; + } else { + for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i]; + } +} + +//------------------------------------------------------------------------------ +// Horizontal filter. + +static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in, + int width, int height, int stride, + int row, int num_rows, + int inverse, uint8_t* out) { + const uint8_t* preds; + const size_t start_offset = row * stride; + const int last_row = row + num_rows; + SANITY_CHECK(in, out); + in += start_offset; + out += start_offset; + preds = inverse ? out : in; + + if (row == 0) { + // Leftmost pixel is the same as input for topmost scanline. + out[0] = in[0]; + PredictLine(in + 1, preds, out + 1, width - 1, inverse); + row = 1; + preds += stride; + in += stride; + out += stride; + } + + // Filter line-by-line. + while (row < last_row) { + // Leftmost pixel is predicted from above. + PredictLine(in, preds - stride, out, 1, inverse); + PredictLine(in + 1, preds, out + 1, width - 1, inverse); + ++row; + preds += stride; + in += stride; + out += stride; + } +} + +//------------------------------------------------------------------------------ +// Vertical filter. + +static WEBP_INLINE void DoVerticalFilter(const uint8_t* in, + int width, int height, int stride, + int row, int num_rows, + int inverse, uint8_t* out) { + const uint8_t* preds; + const size_t start_offset = row * stride; + const int last_row = row + num_rows; + SANITY_CHECK(in, out); + in += start_offset; + out += start_offset; + preds = inverse ? out : in; + + if (row == 0) { + // Very first top-left pixel is copied. + out[0] = in[0]; + // Rest of top scan-line is left-predicted. + PredictLine(in + 1, preds, out + 1, width - 1, inverse); + row = 1; + in += stride; + out += stride; + } else { + // We are starting from in-between. Make sure 'preds' points to prev row. + preds -= stride; + } + + // Filter line-by-line. + while (row < last_row) { + PredictLine(in, preds, out, width, inverse); + ++row; + preds += stride; + in += stride; + out += stride; + } +} + +//------------------------------------------------------------------------------ +// Gradient filter. + +static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) { + const int g = a + b - c; + return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit +} + +static WEBP_INLINE void DoGradientFilter(const uint8_t* in, + int width, int height, int stride, + int row, int num_rows, + int inverse, uint8_t* out) { + const uint8_t* preds; + const size_t start_offset = row * stride; + const int last_row = row + num_rows; + SANITY_CHECK(in, out); + in += start_offset; + out += start_offset; + preds = inverse ? out : in; + + // left prediction for top scan-line + if (row == 0) { + out[0] = in[0]; + PredictLine(in + 1, preds, out + 1, width - 1, inverse); + row = 1; + preds += stride; + in += stride; + out += stride; + } + + // Filter line-by-line. + while (row < last_row) { + int w; + // leftmost pixel: predict from above. + PredictLine(in, preds - stride, out, 1, inverse); + for (w = 1; w < width; ++w) { + const int pred = GradientPredictor(preds[w - 1], + preds[w - stride], + preds[w - stride - 1]); + out[w] = in[w] + (inverse ? pred : -pred); + } + ++row; + preds += stride; + in += stride; + out += stride; + } +} + +#undef SANITY_CHECK + +//------------------------------------------------------------------------------ + +static void HorizontalFilter(const uint8_t* data, int width, int height, + int stride, uint8_t* filtered_data) { + DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data); +} + +static void VerticalFilter(const uint8_t* data, int width, int height, + int stride, uint8_t* filtered_data) { + DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data); +} + + +static void GradientFilter(const uint8_t* data, int width, int height, + int stride, uint8_t* filtered_data) { + DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data); +} + + +//------------------------------------------------------------------------------ + +static void VerticalUnfilter(int width, int height, int stride, int row, + int num_rows, uint8_t* data) { + DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data); +} + +static void HorizontalUnfilter(int width, int height, int stride, int row, + int num_rows, uint8_t* data) { + DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data); +} + +static void GradientUnfilter(int width, int height, int stride, int row, + int num_rows, uint8_t* data) { + DoGradientFilter(data, width, height, stride, row, num_rows, 1, data); +} + +//------------------------------------------------------------------------------ +// Init function + +WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; +WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; + +extern void VP8FiltersInitMIPSdspR2(void); +extern void VP8FiltersInitSSE2(void); + +static volatile VP8CPUInfo filters_last_cpuinfo_used = + (VP8CPUInfo)&filters_last_cpuinfo_used; + +WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) { + if (filters_last_cpuinfo_used == VP8GetCPUInfo) return; + + WebPUnfilters[WEBP_FILTER_NONE] = NULL; + WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter; + WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter; + WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter; + + WebPFilters[WEBP_FILTER_NONE] = NULL; + WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter; + WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter; + WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter; + + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + VP8FiltersInitSSE2(); + } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + VP8FiltersInitMIPSdspR2(); + } +#endif + } + filters_last_cpuinfo_used = VP8GetCPUInfo; +} diff --git a/src/loaders/webp/dsp/lossless.cpp b/src/loaders/webp/dsp/lossless.cpp new file mode 100644 index 00000000..5ca77594 --- /dev/null +++ b/src/loaders/webp/dsp/lossless.cpp @@ -0,0 +1,636 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Image transforms and color space conversion methods for lossless decoder. +// +// Authors: Vikas Arora (vikaas.arora@gmail.com) +// Jyrki Alakuijala (jyrki@google.com) +// Urvang Joshi (urvang@google.com) + +#include "./dsp.h" + +#include +#include +#include "../dec/vp8li.h" +#include "../utils/endian_inl.h" +#include "./lossless.h" +#include "./yuv.h" + +#define MAX_DIFF_COST (1e30f) + +//------------------------------------------------------------------------------ +// Image transforms. + +// In-place sum of each component with mod 256. +static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { + const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); + const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); + *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); +} + +static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { + return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1); +} + +static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { + return Average2(Average2(a0, a2), a1); +} + +static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, + uint32_t a2, uint32_t a3) { + return Average2(Average2(a0, a1), Average2(a2, a3)); +} + +static WEBP_INLINE uint32_t Clip255(uint32_t a) { + if (a < 256) { + return a; + } + // return 0, when a is a negative integer. + // return 255, when a is positive. + return ~a >> 24; +} + +static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { + return Clip255(a + b - c); +} + +static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, + uint32_t c2) { + const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24); + const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, + (c1 >> 16) & 0xff, + (c2 >> 16) & 0xff); + const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, + (c1 >> 8) & 0xff, + (c2 >> 8) & 0xff); + const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff); + return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; +} + +static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { + return Clip255(a + (a - b) / 2); +} + +static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, + uint32_t c2) { + const uint32_t ave = Average2(c0, c1); + const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); + const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); + const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); + const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); + return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; +} + +// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined. +#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409 +# define LOCAL_INLINE __attribute__ ((noinline)) +#else +# define LOCAL_INLINE WEBP_INLINE +#endif + +static LOCAL_INLINE int Sub3(int a, int b, int c) { + const int pb = b - c; + const int pa = a - c; + return abs(pb) - abs(pa); +} + +#undef LOCAL_INLINE + +static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { + const int pa_minus_pb = + Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + + Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + + Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + + Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); + return (pa_minus_pb <= 0) ? a : b; +} + +//------------------------------------------------------------------------------ +// Predictors + +static uint32_t Predictor0(uint32_t left, const uint32_t* const top) { + (void)top; + (void)left; + return ARGB_BLACK; +} +static uint32_t Predictor1(uint32_t left, const uint32_t* const top) { + (void)top; + return left; +} +static uint32_t Predictor2(uint32_t left, const uint32_t* const top) { + (void)left; + return top[0]; +} +static uint32_t Predictor3(uint32_t left, const uint32_t* const top) { + (void)left; + return top[1]; +} +static uint32_t Predictor4(uint32_t left, const uint32_t* const top) { + (void)left; + return top[-1]; +} +static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { + const uint32_t pred = Average3(left, top[0], top[1]); + return pred; +} +static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { + const uint32_t pred = Average2(left, top[-1]); + return pred; +} +static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { + const uint32_t pred = Average2(left, top[0]); + return pred; +} +static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { + const uint32_t pred = Average2(top[-1], top[0]); + (void)left; + return pred; +} +static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { + const uint32_t pred = Average2(top[0], top[1]); + (void)left; + return pred; +} +static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { + const uint32_t pred = Average4(left, top[-1], top[0], top[1]); + return pred; +} +static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { + const uint32_t pred = Select(top[0], left, top[-1]); + return pred; +} +static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); + return pred; +} +static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); + return pred; +} + +//------------------------------------------------------------------------------ + +// Inverse prediction. +static void PredictorInverseTransform(const VP8LTransform* const transform, + int y_start, int y_end, uint32_t* data) { + const int width = transform->xsize_; + if (y_start == 0) { // First Row follows the L (mode=1) mode. + int x; + const uint32_t pred0 = Predictor0(data[-1], NULL); + AddPixelsEq(data, pred0); + for (x = 1; x < width; ++x) { + const uint32_t pred1 = Predictor1(data[x - 1], NULL); + AddPixelsEq(data + x, pred1); + } + data += width; + ++y_start; + } + + { + int y = y_start; + const int tile_width = 1 << transform->bits_; + const int mask = tile_width - 1; + const int safe_width = width & ~mask; + const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_); + const uint32_t* pred_mode_base = + transform->data_ + (y >> transform->bits_) * tiles_per_row; + + while (y < y_end) { + const uint32_t pred2 = Predictor2(data[-1], data - width); + const uint32_t* pred_mode_src = pred_mode_base; + VP8LPredictorFunc pred_func; + int x = 1; + int t = 1; + // First pixel follows the T (mode=2) mode. + AddPixelsEq(data, pred2); + // .. the rest: + while (x < safe_width) { + pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf]; + for (; t < tile_width; ++t, ++x) { + const uint32_t pred = pred_func(data[x - 1], data + x - width); + AddPixelsEq(data + x, pred); + } + t = 0; + } + if (x < width) { + pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf]; + for (; x < width; ++x) { + const uint32_t pred = pred_func(data[x - 1], data + x - width); + AddPixelsEq(data + x, pred); + } + } + data += width; + ++y; + if ((y & mask) == 0) { // Use the same mask, since tiles are squares. + pred_mode_base += tiles_per_row; + } + } + } +} + +// Add green to blue and red channels (i.e. perform the inverse transform of +// 'subtract green'). +void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) { + int i; + for (i = 0; i < num_pixels; ++i) { + const uint32_t argb = data[i]; + const uint32_t green = ((argb >> 8) & 0xff); + uint32_t red_blue = (argb & 0x00ff00ffu); + red_blue += (green << 16) | green; + red_blue &= 0x00ff00ffu; + data[i] = (argb & 0xff00ff00u) | red_blue; + } +} + +static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, + int8_t color) { + return (uint32_t)((int)(color_pred) * color) >> 5; +} + +static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code, + VP8LMultipliers* const m) { + m->green_to_red_ = (color_code >> 0) & 0xff; + m->green_to_blue_ = (color_code >> 8) & 0xff; + m->red_to_blue_ = (color_code >> 16) & 0xff; +} + +void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data, + int num_pixels) { + int i; + for (i = 0; i < num_pixels; ++i) { + const uint32_t argb = data[i]; + const uint32_t green = argb >> 8; + const uint32_t red = argb >> 16; + uint32_t new_red = red; + uint32_t new_blue = argb; + new_red += ColorTransformDelta(m->green_to_red_, green); + new_red &= 0xff; + new_blue += ColorTransformDelta(m->green_to_blue_, green); + new_blue += ColorTransformDelta(m->red_to_blue_, new_red); + new_blue &= 0xff; + data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); + } +} + +// Color space inverse transform. +static void ColorSpaceInverseTransform(const VP8LTransform* const transform, + int y_start, int y_end, uint32_t* data) { + const int width = transform->xsize_; + const int tile_width = 1 << transform->bits_; + const int mask = tile_width - 1; + const int safe_width = width & ~mask; + const int remaining_width = width - safe_width; + const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_); + int y = y_start; + const uint32_t* pred_row = + transform->data_ + (y >> transform->bits_) * tiles_per_row; + + while (y < y_end) { + const uint32_t* pred = pred_row; + VP8LMultipliers m = { 0, 0, 0 }; + const uint32_t* const data_safe_end = data + safe_width; + const uint32_t* const data_end = data + width; + while (data < data_safe_end) { + ColorCodeToMultipliers(*pred++, &m); + VP8LTransformColorInverse(&m, data, tile_width); + data += tile_width; + } + if (data < data_end) { // Left-overs using C-version. + ColorCodeToMultipliers(*pred++, &m); + VP8LTransformColorInverse(&m, data, remaining_width); + data += remaining_width; + } + ++y; + if ((y & mask) == 0) pred_row += tiles_per_row; + } +} + +// Separate out pixels packed together using pixel-bundling. +// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t). +#define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX, \ + GET_INDEX, GET_VALUE) \ +static void F_NAME(const TYPE* src, const uint32_t* const color_map, \ + TYPE* dst, int y_start, int y_end, int width) { \ + int y; \ + for (y = y_start; y < y_end; ++y) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ + } \ + } \ +} \ +STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, \ + int y_start, int y_end, const TYPE* src, \ + TYPE* dst) { \ + int y; \ + const int bits_per_pixel = 8 >> transform->bits_; \ + const int width = transform->xsize_; \ + const uint32_t* const color_map = transform->data_; \ + if (bits_per_pixel < 8) { \ + const int pixels_per_byte = 1 << transform->bits_; \ + const int count_mask = pixels_per_byte - 1; \ + const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \ + for (y = y_start; y < y_end; ++y) { \ + uint32_t packed_pixels = 0; \ + int x; \ + for (x = 0; x < width; ++x) { \ + /* We need to load fresh 'packed_pixels' once every */ \ + /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \ + /* is a power of 2, so can just use a mask for that, instead of */ \ + /* decrementing a counter. */ \ + if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \ + *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \ + packed_pixels >>= bits_per_pixel; \ + } \ + } \ + } else { \ + VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width); \ + } \ +} + +COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b, + VP8GetARGBIndex, VP8GetARGBValue) +COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t, + 8b, VP8GetAlphaIndex, VP8GetAlphaValue) + +#undef COLOR_INDEX_INVERSE + +void VP8LInverseTransform(const VP8LTransform* const transform, + int row_start, int row_end, + const uint32_t* const in, uint32_t* const out) { + const int width = transform->xsize_; + assert(row_start < row_end); + assert(row_end <= transform->ysize_); + switch (transform->type_) { + case SUBTRACT_GREEN: + VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width); + break; + case PREDICTOR_TRANSFORM: + PredictorInverseTransform(transform, row_start, row_end, out); + if (row_end != transform->ysize_) { + // The last predicted row in this iteration will be the top-pred row + // for the first row in next iteration. + memcpy(out - width, out + (row_end - row_start - 1) * width, + width * sizeof(*out)); + } + break; + case CROSS_COLOR_TRANSFORM: + ColorSpaceInverseTransform(transform, row_start, row_end, out); + break; + case COLOR_INDEXING_TRANSFORM: + if (in == out && transform->bits_ > 0) { + // Move packed pixels to the end of unpacked region, so that unpacking + // can occur seamlessly. + // Also, note that this is the only transform that applies on + // the effective width of VP8LSubSampleSize(xsize_, bits_). All other + // transforms work on effective width of xsize_. + const int out_stride = (row_end - row_start) * width; + const int in_stride = (row_end - row_start) * + VP8LSubSampleSize(transform->xsize_, transform->bits_); + uint32_t* const src = out + out_stride - in_stride; + memmove(src, out, in_stride * sizeof(*src)); + ColorIndexInverseTransform(transform, row_start, row_end, src, out); + } else { + ColorIndexInverseTransform(transform, row_start, row_end, in, out); + } + break; + } +} + +//------------------------------------------------------------------------------ +// Color space conversion. + +static int is_big_endian(void) { + static const union { + uint16_t w; + uint8_t b[2]; + } tmp = { 1 }; + return (tmp.b[0] != 1); +} + +void VP8LConvertBGRAToRGB_C(const uint32_t* src, + int num_pixels, uint8_t* dst) { + const uint32_t* const src_end = src + num_pixels; + while (src < src_end) { + const uint32_t argb = *src++; + *dst++ = (argb >> 16) & 0xff; + *dst++ = (argb >> 8) & 0xff; + *dst++ = (argb >> 0) & 0xff; + } +} + +void VP8LConvertBGRAToRGBA_C(const uint32_t* src, + int num_pixels, uint8_t* dst) { + const uint32_t* const src_end = src + num_pixels; + while (src < src_end) { + const uint32_t argb = *src++; + *dst++ = (argb >> 16) & 0xff; + *dst++ = (argb >> 8) & 0xff; + *dst++ = (argb >> 0) & 0xff; + *dst++ = (argb >> 24) & 0xff; + } +} + +void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, + int num_pixels, uint8_t* dst) { + const uint32_t* const src_end = src + num_pixels; + while (src < src_end) { + const uint32_t argb = *src++; + const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); + const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); +#ifdef WEBP_SWAP_16BIT_CSP + *dst++ = ba; + *dst++ = rg; +#else + *dst++ = rg; + *dst++ = ba; +#endif + } +} + +void VP8LConvertBGRAToRGB565_C(const uint32_t* src, + int num_pixels, uint8_t* dst) { + const uint32_t* const src_end = src + num_pixels; + while (src < src_end) { + const uint32_t argb = *src++; + const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); + const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); +#ifdef WEBP_SWAP_16BIT_CSP + *dst++ = gb; + *dst++ = rg; +#else + *dst++ = rg; + *dst++ = gb; +#endif + } +} + +void VP8LConvertBGRAToBGR_C(const uint32_t* src, + int num_pixels, uint8_t* dst) { + const uint32_t* const src_end = src + num_pixels; + while (src < src_end) { + const uint32_t argb = *src++; + *dst++ = (argb >> 0) & 0xff; + *dst++ = (argb >> 8) & 0xff; + *dst++ = (argb >> 16) & 0xff; + } +} + +static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, + int swap_on_big_endian) { + if (is_big_endian() == swap_on_big_endian) { + const uint32_t* const src_end = src + num_pixels; + while (src < src_end) { + const uint32_t argb = *src++; + +#if !defined(WORDS_BIGENDIAN) +#if !defined(WEBP_REFERENCE_IMPLEMENTATION) + *(uint32_t*)dst = BSwap32(argb); +#else // WEBP_REFERENCE_IMPLEMENTATION + dst[0] = (argb >> 24) & 0xff; + dst[1] = (argb >> 16) & 0xff; + dst[2] = (argb >> 8) & 0xff; + dst[3] = (argb >> 0) & 0xff; +#endif +#else // WORDS_BIGENDIAN + dst[0] = (argb >> 0) & 0xff; + dst[1] = (argb >> 8) & 0xff; + dst[2] = (argb >> 16) & 0xff; + dst[3] = (argb >> 24) & 0xff; +#endif + dst += sizeof(argb); + } + } else { + memcpy(dst, src, num_pixels * sizeof(*src)); + } +} + +void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, + WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { + switch (out_colorspace) { + case MODE_RGB: + VP8LConvertBGRAToRGB(in_data, num_pixels, rgba); + break; + case MODE_RGBA: + VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba); + break; + case MODE_rgbA: + VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba); + WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); + break; + case MODE_BGR: + VP8LConvertBGRAToBGR(in_data, num_pixels, rgba); + break; + case MODE_BGRA: + CopyOrSwap(in_data, num_pixels, rgba, 1); + break; + case MODE_bgrA: + CopyOrSwap(in_data, num_pixels, rgba, 1); + WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); + break; + case MODE_ARGB: + CopyOrSwap(in_data, num_pixels, rgba, 0); + break; + case MODE_Argb: + CopyOrSwap(in_data, num_pixels, rgba, 0); + WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0); + break; + case MODE_RGBA_4444: + VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba); + break; + case MODE_rgbA_4444: + VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba); + WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0); + break; + case MODE_RGB_565: + VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba); + break; + default: + assert(0); // Code flow should not reach here. + } +} + +//------------------------------------------------------------------------------ + +VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed; +VP8LPredictorFunc VP8LPredictors[16]; + +VP8LTransformColorFunc VP8LTransformColorInverse; + +VP8LConvertFunc VP8LConvertBGRAToRGB; +VP8LConvertFunc VP8LConvertBGRAToRGBA; +VP8LConvertFunc VP8LConvertBGRAToRGBA4444; +VP8LConvertFunc VP8LConvertBGRAToRGB565; +VP8LConvertFunc VP8LConvertBGRAToBGR; + +VP8LMapARGBFunc VP8LMapColor32b; +VP8LMapAlphaFunc VP8LMapColor8b; + +extern void VP8LDspInitSSE2(void); +extern void VP8LDspInitNEON(void); +extern void VP8LDspInitMIPSdspR2(void); + +static volatile VP8CPUInfo lossless_last_cpuinfo_used = + (VP8CPUInfo)&lossless_last_cpuinfo_used; + +WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) { + if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return; + + VP8LPredictors[0] = Predictor0; + VP8LPredictors[1] = Predictor1; + VP8LPredictors[2] = Predictor2; + VP8LPredictors[3] = Predictor3; + VP8LPredictors[4] = Predictor4; + VP8LPredictors[5] = Predictor5; + VP8LPredictors[6] = Predictor6; + VP8LPredictors[7] = Predictor7; + VP8LPredictors[8] = Predictor8; + VP8LPredictors[9] = Predictor9; + VP8LPredictors[10] = Predictor10; + VP8LPredictors[11] = Predictor11; + VP8LPredictors[12] = Predictor12; + VP8LPredictors[13] = Predictor13; + VP8LPredictors[14] = Predictor0; // <- padding security sentinels + VP8LPredictors[15] = Predictor0; + + VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C; + + VP8LTransformColorInverse = VP8LTransformColorInverse_C; + + VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C; + VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C; + VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C; + VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C; + VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; + + VP8LMapColor32b = MapARGB; + VP8LMapColor8b = MapAlpha; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + VP8LDspInitSSE2(); + } +#endif +#if defined(WEBP_USE_NEON) + if (VP8GetCPUInfo(kNEON)) { + VP8LDspInitNEON(); + } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + VP8LDspInitMIPSdspR2(); + } +#endif + } + lossless_last_cpuinfo_used = VP8GetCPUInfo; +} + +//------------------------------------------------------------------------------ diff --git a/src/loaders/webp/dsp/lossless.h b/src/loaders/webp/dsp/lossless.h new file mode 100644 index 00000000..48b360e8 --- /dev/null +++ b/src/loaders/webp/dsp/lossless.h @@ -0,0 +1,303 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Image transforms and color space conversion methods for lossless decoder. +// +// Authors: Vikas Arora (vikaas.arora@gmail.com) +// Jyrki Alakuijala (jyrki@google.com) + +#ifndef WEBP_DSP_LOSSLESS_H_ +#define WEBP_DSP_LOSSLESS_H_ + +#include "../webp/types.h" +#include "../webp/decode.h" + +// #include "../enc/histogram.h" +#include "../utils/utils.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Not a trivial literal symbol. +#define VP8L_NON_TRIVIAL_SYM (0xffffffff) + +//------------------------------------------------------------------------------ +// Decoding + +typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top); +extern VP8LPredictorFunc VP8LPredictors[16]; + +typedef void (*VP8LProcessBlueAndRedFunc)(uint32_t* argb_data, int num_pixels); +extern VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed; + +typedef struct { + // Note: the members are uint8_t, so that any negative values are + // automatically converted to "mod 256" values. + uint8_t green_to_red_; + uint8_t green_to_blue_; + uint8_t red_to_blue_; +} VP8LMultipliers; +typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m, + uint32_t* argb_data, int num_pixels); +extern VP8LTransformColorFunc VP8LTransformColorInverse; + +struct VP8LTransform; // Defined in dec/vp8li.h. + +// Performs inverse transform of data given transform information, start and end +// rows. Transform will be applied to rows [row_start, row_end[. +// The *in and *out pointers refer to source and destination data respectively +// corresponding to the intermediate row (row_start). +void VP8LInverseTransform(const struct VP8LTransform* const transform, + int row_start, int row_end, + const uint32_t* const in, uint32_t* const out); + +// Color space conversion. +typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels, + uint8_t* dst); +extern VP8LConvertFunc VP8LConvertBGRAToRGB; +extern VP8LConvertFunc VP8LConvertBGRAToRGBA; +extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444; +extern VP8LConvertFunc VP8LConvertBGRAToRGB565; +extern VP8LConvertFunc VP8LConvertBGRAToBGR; + +// Converts from BGRA to other color spaces. +void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, + WEBP_CSP_MODE out_colorspace, uint8_t* const rgba); + +// color mapping related functions. +static WEBP_INLINE uint32_t VP8GetARGBIndex(uint32_t idx) { + return (idx >> 8) & 0xff; +} + +static WEBP_INLINE uint8_t VP8GetAlphaIndex(uint8_t idx) { + return idx; +} + +static WEBP_INLINE uint32_t VP8GetARGBValue(uint32_t val) { + return val; +} + +static WEBP_INLINE uint8_t VP8GetAlphaValue(uint32_t val) { + return (val >> 8) & 0xff; +} + +typedef void (*VP8LMapARGBFunc)(const uint32_t* src, + const uint32_t* const color_map, + uint32_t* dst, int y_start, + int y_end, int width); +typedef void (*VP8LMapAlphaFunc)(const uint8_t* src, + const uint32_t* const color_map, + uint8_t* dst, int y_start, + int y_end, int width); + +extern VP8LMapARGBFunc VP8LMapColor32b; +extern VP8LMapAlphaFunc VP8LMapColor8b; + +// Similar to the static method ColorIndexInverseTransform() that is part of +// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than +// uint32_t) arguments for 'src' and 'dst'. +void VP8LColorIndexInverseTransformAlpha( + const struct VP8LTransform* const transform, int y_start, int y_end, + const uint8_t* src, uint8_t* dst); + +// Expose some C-only fallback functions +void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, + uint32_t* data, int num_pixels); + +void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst); +void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst); +void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, + int num_pixels, uint8_t* dst); +void VP8LConvertBGRAToRGB565_C(const uint32_t* src, + int num_pixels, uint8_t* dst); +void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst); +void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels); + +// Must be called before calling any of the above methods. +void VP8LDspInit(void); + +//------------------------------------------------------------------------------ +// Encoding + +extern VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; +extern VP8LTransformColorFunc VP8LTransformColor; +typedef void (*VP8LCollectColorBlueTransformsFunc)( + const uint32_t* argb, int stride, + int tile_width, int tile_height, + int green_to_blue, int red_to_blue, int histo[]); +extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms; + +typedef void (*VP8LCollectColorRedTransformsFunc)( + const uint32_t* argb, int stride, + int tile_width, int tile_height, + int green_to_red, int histo[]); +extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms; + +// Expose some C-only fallback functions +void VP8LTransformColor_C(const VP8LMultipliers* const m, + uint32_t* data, int num_pixels); +void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels); + +//------------------------------------------------------------------------------ +// Image transforms. + +void VP8LResidualImage(int width, int height, int bits, int low_effort, + uint32_t* const argb, uint32_t* const argb_scratch, + uint32_t* const image); + +void VP8LColorSpaceTransform(int width, int height, int bits, int quality, + uint32_t* const argb, uint32_t* image); + +//------------------------------------------------------------------------------ +// Misc methods. + +// Computes sampled size of 'size' when sampling using 'sampling bits'. +static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size, + uint32_t sampling_bits) { + return (size + (1 << sampling_bits) - 1) >> sampling_bits; +} + +// ----------------------------------------------------------------------------- +// Faster logarithm for integers. Small values use a look-up table. +#define LOG_LOOKUP_IDX_MAX 256 +extern const float kLog2Table[LOG_LOOKUP_IDX_MAX]; +extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX]; +typedef float (*VP8LFastLog2SlowFunc)(uint32_t v); + +extern VP8LFastLog2SlowFunc VP8LFastLog2Slow; +extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow; + +static WEBP_INLINE float VP8LFastLog2(uint32_t v) { + return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v); +} +// Fast calculation of v * log2(v) for integer input. +static WEBP_INLINE float VP8LFastSLog2(uint32_t v) { + return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v); +} + +// ----------------------------------------------------------------------------- +// Huffman-cost related functions. + +typedef double (*VP8LCostFunc)(const uint32_t* population, int length); +typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, + int length); + +extern VP8LCostFunc VP8LExtraCost; +extern VP8LCostCombinedFunc VP8LExtraCostCombined; + +typedef struct { // small struct to hold counters + int counts[2]; // index: 0=zero steak, 1=non-zero streak + int streaks[2][2]; // [zero/non-zero][streak<3 / streak>=3] +} VP8LStreaks; + +typedef VP8LStreaks (*VP8LCostCountFunc)(const uint32_t* population, + int length); +typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const uint32_t* X, + const uint32_t* Y, int length); + +extern VP8LCostCountFunc VP8LHuffmanCostCount; +extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount; + +// Get the symbol entropy for the distribution 'population'. +// Set 'trivial_sym', if there's only one symbol present in the distribution. +double VP8LPopulationCost(const uint32_t* const population, int length, + uint32_t* const trivial_sym); + +// Get the combined symbol entropy for the distributions 'X' and 'Y'. +double VP8LGetCombinedEntropy(const uint32_t* const X, + const uint32_t* const Y, int length); + +// ----------------------------------------------------------------------------- +// PrefixEncode() + +static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) { + const int log_floor = BitsLog2Floor(n); + if (n == (n & ~(n - 1))) // zero or a power of two. + return log_floor; + else + return log_floor + 1; +} + +// Splitting of distance and length codes into prefixes and +// extra bits. The prefixes are encoded with an entropy code +// while the extra bits are stored just as normal bits. +static WEBP_INLINE void VP8LPrefixEncodeBitsNoLUT(int distance, int* const code, + int* const extra_bits) { + const int highest_bit = BitsLog2Floor(--distance); + const int second_highest_bit = (distance >> (highest_bit - 1)) & 1; + *extra_bits = highest_bit - 1; + *code = 2 * highest_bit + second_highest_bit; +} + +static WEBP_INLINE void VP8LPrefixEncodeNoLUT(int distance, int* const code, + int* const extra_bits, + int* const extra_bits_value) { + const int highest_bit = BitsLog2Floor(--distance); + const int second_highest_bit = (distance >> (highest_bit - 1)) & 1; + *extra_bits = highest_bit - 1; + *extra_bits_value = distance & ((1 << *extra_bits) - 1); + *code = 2 * highest_bit + second_highest_bit; +} + +#define PREFIX_LOOKUP_IDX_MAX 512 +typedef struct { + int8_t code_; + int8_t extra_bits_; +} VP8LPrefixCode; + +// These tables are derived using VP8LPrefixEncodeNoLUT. +extern const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX]; +extern const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX]; +static WEBP_INLINE void VP8LPrefixEncodeBits(int distance, int* const code, + int* const extra_bits) { + if (distance < PREFIX_LOOKUP_IDX_MAX) { + const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance]; + *code = prefix_code.code_; + *extra_bits = prefix_code.extra_bits_; + } else { + VP8LPrefixEncodeBitsNoLUT(distance, code, extra_bits); + } +} + +static WEBP_INLINE void VP8LPrefixEncode(int distance, int* const code, + int* const extra_bits, + int* const extra_bits_value) { + if (distance < PREFIX_LOOKUP_IDX_MAX) { + const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance]; + *code = prefix_code.code_; + *extra_bits = prefix_code.extra_bits_; + *extra_bits_value = kPrefixEncodeExtraBitsValue[distance]; + } else { + VP8LPrefixEncodeNoLUT(distance, code, extra_bits, extra_bits_value); + } +} + +// In-place difference of each component with mod 256. +static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) { + const uint32_t alpha_and_green = + 0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u); + const uint32_t red_and_blue = + 0xff00ff00u + (a & 0x00ff00ffu) - (b & 0x00ff00ffu); + return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); +} + +void VP8LBundleColorMap(const uint8_t* const row, int width, + int xbits, uint32_t* const dst); + +// Must be called before calling any of the above methods. +void VP8LEncDspInit(void); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_DSP_LOSSLESS_H_ diff --git a/src/loaders/webp/dsp/meson.build b/src/loaders/webp/dsp/meson.build new file mode 100644 index 00000000..9f9eb593 --- /dev/null +++ b/src/loaders/webp/dsp/meson.build @@ -0,0 +1,20 @@ +source_file = [ + 'yuv.h', + 'dsp.h', + 'lossless.h', + 'alpha_processing.cpp', + 'argb.cpp', + 'cpu.cpp', + 'dec.cpp', + 'dec_clip_tables.cpp', + 'filters.cpp', + 'lossless.cpp', + 'rescaler.cpp', + 'upsampling.cpp', + 'yuv.cpp' +] + +webp_deb += [declare_dependency( + include_directories : include_directories('.'), + sources : source_file +)] \ No newline at end of file diff --git a/src/loaders/webp/dsp/rescaler.cpp b/src/loaders/webp/dsp/rescaler.cpp new file mode 100644 index 00000000..640b462c --- /dev/null +++ b/src/loaders/webp/dsp/rescaler.cpp @@ -0,0 +1,115 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Rescaling functions + +#include "./dsp.h" +#include "../utils/rescaler.h" + +//------------------------------------------------------------------------------ +// Implementations of critical functions ImportRow / ExportRow + +#define ROUNDER (1 << (WEBP_RESCALER_RFIX - 1)) +#define MULT_FIX(x, y) (((int64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX) + +static void RescalerImportRowC(WebPRescaler* const wrk, + const uint8_t* const src, int channel) { + const int x_stride = wrk->num_channels; + const int x_out_max = wrk->dst_width * wrk->num_channels; + int x_in = channel; + int x_out; + int accum = 0; + if (!wrk->x_expand) { + int sum = 0; + for (x_out = channel; x_out < x_out_max; x_out += x_stride) { + accum += wrk->x_add; + for (; accum > 0; accum -= wrk->x_sub) { + sum += src[x_in]; + x_in += x_stride; + } + { // Emit next horizontal pixel. + const int32_t base = src[x_in]; + const int32_t frac = base * (-accum); + x_in += x_stride; + wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac; + // fresh fractional start for next pixel + sum = (int)MULT_FIX(frac, wrk->fx_scale); + } + } + } else { // simple bilinear interpolation + int left = src[channel], right = src[channel]; + for (x_out = channel; x_out < x_out_max; x_out += x_stride) { + if (accum < 0) { + left = right; + x_in += x_stride; + right = src[x_in]; + accum += wrk->x_add; + } + wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; + accum -= wrk->x_sub; + } + } + // Accumulate the contribution of the new row. + for (x_out = channel; x_out < x_out_max; x_out += x_stride) { + wrk->irow[x_out] += wrk->frow[x_out]; + } +} + +void WebPRescalerExportRowC(WebPRescaler* const wrk, int x_out) { + if (wrk->y_accum <= 0) { + uint8_t* const dst = wrk->dst; + int32_t* const irow = wrk->irow; + const int32_t* const frow = wrk->frow; + const int yscale = wrk->fy_scale * (-wrk->y_accum); + const int x_out_max = wrk->dst_width * wrk->num_channels; + for (; x_out < x_out_max; ++x_out) { + const int frac = (int)MULT_FIX(frow[x_out], yscale); + const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); + dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; + irow[x_out] = frac; // new fractional start + } + wrk->y_accum += wrk->y_add; + wrk->dst += wrk->dst_stride; + } +} + +#undef MULT_FIX +#undef ROUNDER + +//------------------------------------------------------------------------------ + +void (*WebPRescalerImportRow)(struct WebPRescaler* const wrk, + const uint8_t* const src, int channel); +void (*WebPRescalerExportRow)(struct WebPRescaler* const wrk, int x_out); + +extern void WebPRescalerDspInitMIPS32(void); +extern void WebPRescalerDspInitMIPSdspR2(void); + +static volatile VP8CPUInfo rescaler_last_cpuinfo_used = + (VP8CPUInfo)&rescaler_last_cpuinfo_used; + +WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) { + if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return; + + WebPRescalerImportRow = RescalerImportRowC; + WebPRescalerExportRow = WebPRescalerExportRowC; + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_MIPS32) + if (VP8GetCPUInfo(kMIPS32)) { + WebPRescalerDspInitMIPS32(); + } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + WebPRescalerDspInitMIPSdspR2(); + } +#endif + } + rescaler_last_cpuinfo_used = VP8GetCPUInfo; +} diff --git a/src/loaders/webp/dsp/upsampling.cpp b/src/loaders/webp/dsp/upsampling.cpp new file mode 100644 index 00000000..151a32ee --- /dev/null +++ b/src/loaders/webp/dsp/upsampling.cpp @@ -0,0 +1,252 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// YUV to RGB upsampling functions. +// +// Author: somnath@google.com (Somnath Banerjee) + +#include "./dsp.h" +#include "./yuv.h" + +#include + +//------------------------------------------------------------------------------ +// Fancy upsampler + +#ifdef FANCY_UPSAMPLING + +// Fancy upsampling functions to convert YUV to RGB +WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST]; + +// Given samples laid out in a square as: +// [a b] +// [c d] +// we interpolate u/v as: +// ([9*a + 3*b + 3*c + d 3*a + 9*b + 3*c + d] + [8 8]) / 16 +// ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16 + +// We process u and v together stashed into 32bit (16bit each). +#define LOAD_UV(u, v) ((u) | ((v) << 16)) + +#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \ +static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ + const uint8_t* top_u, const uint8_t* top_v, \ + const uint8_t* cur_u, const uint8_t* cur_v, \ + uint8_t* top_dst, uint8_t* bottom_dst, int len) { \ + int x; \ + const int last_pixel_pair = (len - 1) >> 1; \ + uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \ + uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]); /* left-sample */ \ + assert(top_y != NULL); \ + { \ + const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ + FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \ + } \ + if (bottom_y != NULL) { \ + const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \ + FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst); \ + } \ + for (x = 1; x <= last_pixel_pair; ++x) { \ + const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */ \ + const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]); /* sample */ \ + /* precompute invariant values associated with first and second diagonals*/\ + const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u; \ + const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3; \ + const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3; \ + { \ + const uint32_t uv0 = (diag_12 + tl_uv) >> 1; \ + const uint32_t uv1 = (diag_03 + t_uv) >> 1; \ + FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \ + top_dst + (2 * x - 1) * XSTEP); \ + FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), \ + top_dst + (2 * x - 0) * XSTEP); \ + } \ + if (bottom_y != NULL) { \ + const uint32_t uv0 = (diag_03 + l_uv) >> 1; \ + const uint32_t uv1 = (diag_12 + uv) >> 1; \ + FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \ + bottom_dst + (2 * x - 1) * XSTEP); \ + FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), \ + bottom_dst + (2 * x + 0) * XSTEP); \ + } \ + tl_uv = t_uv; \ + l_uv = uv; \ + } \ + if (!(len & 1)) { \ + { \ + const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ + FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), \ + top_dst + (len - 1) * XSTEP); \ + } \ + if (bottom_y != NULL) { \ + const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \ + FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), \ + bottom_dst + (len - 1) * XSTEP); \ + } \ + } \ +} + +// All variants implemented. +UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3) +UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3) +UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4) +UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4) +UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4) +UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2) +UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2) + +#undef LOAD_UV +#undef UPSAMPLE_FUNC + +#endif // FANCY_UPSAMPLING + +//------------------------------------------------------------------------------ + +#if !defined(FANCY_UPSAMPLING) +#define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC) \ +static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \ + const uint8_t* top_u, const uint8_t* top_v, \ + const uint8_t* bot_u, const uint8_t* bot_v, \ + uint8_t* top_dst, uint8_t* bot_dst, int len) { \ + const int half_len = len >> 1; \ + int x; \ + assert(top_dst != NULL); \ + { \ + for (x = 0; x < half_len; ++x) { \ + FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x + 0); \ + FUNC(top_y[2 * x + 1], top_u[x], top_v[x], top_dst + 8 * x + 4); \ + } \ + if (len & 1) FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x); \ + } \ + if (bot_dst != NULL) { \ + for (x = 0; x < half_len; ++x) { \ + FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x + 0); \ + FUNC(bot_y[2 * x + 1], bot_u[x], bot_v[x], bot_dst + 8 * x + 4); \ + } \ + if (len & 1) FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x); \ + } \ +} + +DUAL_SAMPLE_FUNC(DualLineSamplerBGRA, VP8YuvToBgra) +DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb) +#undef DUAL_SAMPLE_FUNC + +#endif // !FANCY_UPSAMPLING + +WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) { + WebPInitUpsamplers(); + VP8YUVInit(); +#ifdef FANCY_UPSAMPLING + return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB]; +#else + return (alpha_is_last ? DualLineSamplerBGRA : DualLineSamplerARGB); +#endif +} + +//------------------------------------------------------------------------------ +// YUV444 converter + +#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \ +static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \ + uint8_t* dst, int len) { \ + int i; \ + for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \ +} + +YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb, 3) +YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr, 3) +YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba, 4) +YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra, 4) +YUV444_FUNC(Yuv444ToArgb, VP8YuvToArgb, 4) +YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2) +YUV444_FUNC(Yuv444ToRgb565, VP8YuvToRgb565, 2) + +#undef YUV444_FUNC + +WebPYUV444Converter WebPYUV444Converters[MODE_LAST]; + +extern void WebPInitYUV444ConvertersMIPSdspR2(void); + +static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 = + (VP8CPUInfo)&upsampling_last_cpuinfo_used1; + +WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) { + if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return; + + WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb; + WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba; + WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr; + WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra; + WebPYUV444Converters[MODE_ARGB] = Yuv444ToArgb; + WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444; + WebPYUV444Converters[MODE_RGB_565] = Yuv444ToRgb565; + WebPYUV444Converters[MODE_rgbA] = Yuv444ToRgba; + WebPYUV444Converters[MODE_bgrA] = Yuv444ToBgra; + WebPYUV444Converters[MODE_Argb] = Yuv444ToArgb; + WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444; + + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + WebPInitYUV444ConvertersMIPSdspR2(); + } +#endif + } + upsampling_last_cpuinfo_used1 = VP8GetCPUInfo; +} + +//------------------------------------------------------------------------------ +// Main calls + +extern void WebPInitUpsamplersSSE2(void); +extern void WebPInitUpsamplersNEON(void); +extern void WebPInitUpsamplersMIPSdspR2(void); + +static volatile VP8CPUInfo upsampling_last_cpuinfo_used2 = + (VP8CPUInfo)&upsampling_last_cpuinfo_used2; + +WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) { + if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return; + +#ifdef FANCY_UPSAMPLING + WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair; + WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair; + WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair; + WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair; + WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair; + WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair; + WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair; + WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair; + WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair; + WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair; + WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + WebPInitUpsamplersSSE2(); + } +#endif +#if defined(WEBP_USE_NEON) + if (VP8GetCPUInfo(kNEON)) { + WebPInitUpsamplersNEON(); + } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + WebPInitUpsamplersMIPSdspR2(); + } +#endif + } +#endif // FANCY_UPSAMPLING + upsampling_last_cpuinfo_used2 = VP8GetCPUInfo; +} + +//------------------------------------------------------------------------------ diff --git a/src/loaders/webp/dsp/yuv.cpp b/src/loaders/webp/dsp/yuv.cpp new file mode 100644 index 00000000..086a9257 --- /dev/null +++ b/src/loaders/webp/dsp/yuv.cpp @@ -0,0 +1,166 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// YUV->RGB conversion functions +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./yuv.h" + +#if defined(WEBP_YUV_USE_TABLE) + +static int done = 0; + +static WEBP_INLINE uint8_t clip(int v, int max_value) { + return v < 0 ? 0 : v > max_value ? max_value : v; +} + +int16_t VP8kVToR[256], VP8kUToB[256]; +int32_t VP8kVToG[256], VP8kUToG[256]; +uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN]; +uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN]; + +WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) { + int i; + if (done) { + return; + } +#ifndef USE_YUVj + for (i = 0; i < 256; ++i) { + VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX; + VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF; + VP8kVToG[i] = -45773 * (i - 128); + VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX; + } + for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) { + const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX; + VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255); + VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15); + } +#else + for (i = 0; i < 256; ++i) { + VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX; + VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF; + VP8kVToG[i] = -46802 * (i - 128); + VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX; + } + for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) { + const int k = i; + VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255); + VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15); + } +#endif + + done = 1; +} + +#else + +WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {} + +#endif // WEBP_YUV_USE_TABLE + +//----------------------------------------------------------------------------- +// Plain-C version + +#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP) \ +static void FUNC_NAME(const uint8_t* y, \ + const uint8_t* u, const uint8_t* v, \ + uint8_t* dst, int len) { \ + const uint8_t* const end = dst + (len & ~1) * XSTEP; \ + while (dst != end) { \ + FUNC(y[0], u[0], v[0], dst); \ + FUNC(y[1], u[0], v[0], dst + XSTEP); \ + y += 2; \ + ++u; \ + ++v; \ + dst += 2 * XSTEP; \ + } \ + if (len & 1) { \ + FUNC(y[0], u[0], v[0], dst); \ + } \ +} \ + +// All variants implemented. +ROW_FUNC(YuvToRgbRow, VP8YuvToRgb, 3) +ROW_FUNC(YuvToBgrRow, VP8YuvToBgr, 3) +ROW_FUNC(YuvToRgbaRow, VP8YuvToRgba, 4) +ROW_FUNC(YuvToBgraRow, VP8YuvToBgra, 4) +ROW_FUNC(YuvToArgbRow, VP8YuvToArgb, 4) +ROW_FUNC(YuvToRgba4444Row, VP8YuvToRgba4444, 2) +ROW_FUNC(YuvToRgb565Row, VP8YuvToRgb565, 2) + +#undef ROW_FUNC + +// Main call for processing a plane with a WebPSamplerRowFunc function: +void WebPSamplerProcessPlane(const uint8_t* y, int y_stride, + const uint8_t* u, const uint8_t* v, int uv_stride, + uint8_t* dst, int dst_stride, + int width, int height, WebPSamplerRowFunc func) { + int j; + for (j = 0; j < height; ++j) { + func(y, u, v, dst, width); + y += y_stride; + if (j & 1) { + u += uv_stride; + v += uv_stride; + } + dst += dst_stride; + } +} + +//----------------------------------------------------------------------------- +// Main call + +WebPSamplerRowFunc WebPSamplers[MODE_LAST]; + +extern void WebPInitSamplersSSE2(void); +extern void WebPInitSamplersMIPS32(void); +extern void WebPInitSamplersMIPSdspR2(void); + +static volatile VP8CPUInfo yuv_last_cpuinfo_used = + (VP8CPUInfo)&yuv_last_cpuinfo_used; + +WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) { + if (yuv_last_cpuinfo_used == VP8GetCPUInfo) return; + + WebPSamplers[MODE_RGB] = YuvToRgbRow; + WebPSamplers[MODE_RGBA] = YuvToRgbaRow; + WebPSamplers[MODE_BGR] = YuvToBgrRow; + WebPSamplers[MODE_BGRA] = YuvToBgraRow; + WebPSamplers[MODE_ARGB] = YuvToArgbRow; + WebPSamplers[MODE_RGBA_4444] = YuvToRgba4444Row; + WebPSamplers[MODE_RGB_565] = YuvToRgb565Row; + WebPSamplers[MODE_rgbA] = YuvToRgbaRow; + WebPSamplers[MODE_bgrA] = YuvToBgraRow; + WebPSamplers[MODE_Argb] = YuvToArgbRow; + WebPSamplers[MODE_rgbA_4444] = YuvToRgba4444Row; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + WebPInitSamplersSSE2(); + } +#endif // WEBP_USE_SSE2 +#if defined(WEBP_USE_MIPS32) + if (VP8GetCPUInfo(kMIPS32)) { + WebPInitSamplersMIPS32(); + } +#endif // WEBP_USE_MIPS32 +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + WebPInitSamplersMIPSdspR2(); + } +#endif // WEBP_USE_MIPS_DSP_R2 + } + yuv_last_cpuinfo_used = VP8GetCPUInfo; +} + +//----------------------------------------------------------------------------- diff --git a/src/loaders/webp/dsp/yuv.h b/src/loaders/webp/dsp/yuv.h new file mode 100644 index 00000000..8a47edd8 --- /dev/null +++ b/src/loaders/webp/dsp/yuv.h @@ -0,0 +1,321 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// inline YUV<->RGB conversion function +// +// The exact naming is Y'CbCr, following the ITU-R BT.601 standard. +// More information at: http://en.wikipedia.org/wiki/YCbCr +// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16 +// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128 +// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128 +// We use 16bit fixed point operations for RGB->YUV conversion (YUV_FIX). +// +// For the Y'CbCr to RGB conversion, the BT.601 specification reads: +// R = 1.164 * (Y-16) + 1.596 * (V-128) +// G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128) +// B = 1.164 * (Y-16) + 2.018 * (U-128) +// where Y is in the [16,235] range, and U/V in the [16,240] range. +// In the table-lookup version (WEBP_YUV_USE_TABLE), the common factor +// "1.164 * (Y-16)" can be handled as an offset in the VP8kClip[] table. +// So in this case the formulae should read: +// R = 1.164 * [Y + 1.371 * (V-128) ] - 18.624 +// G = 1.164 * [Y - 0.698 * (V-128) - 0.336 * (U-128)] - 18.624 +// B = 1.164 * [Y + 1.733 * (U-128)] - 18.624 +// once factorized. +// For YUV->RGB conversion, only 14bit fixed precision is used (YUV_FIX2). +// That's the maximum possible for a convenient ARM implementation. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_DSP_YUV_H_ +#define WEBP_DSP_YUV_H_ + +#include "./dsp.h" +#include "../dec/decode_vp8.h" + +// Define the following to use the LUT-based code: +// #define WEBP_YUV_USE_TABLE + +#if defined(WEBP_EXPERIMENTAL_FEATURES) +// Do NOT activate this feature for real compression. This is only experimental! +// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace. +// This colorspace is close to Rec.601's Y'CbCr model with the notable +// difference of allowing larger range for luma/chroma. +// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its +// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion +// #define USE_YUVj +#endif + +//------------------------------------------------------------------------------ +// YUV -> RGB conversion + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + YUV_FIX = 16, // fixed-point precision for RGB->YUV + YUV_HALF = 1 << (YUV_FIX - 1), + YUV_MASK = (256 << YUV_FIX) - 1, + YUV_RANGE_MIN = -227, // min value of r/g/b output + YUV_RANGE_MAX = 256 + 226, // max value of r/g/b output + + YUV_FIX2 = 14, // fixed-point precision for YUV->RGB + YUV_HALF2 = 1 << (YUV_FIX2 - 1), + YUV_MASK2 = (256 << YUV_FIX2) - 1 +}; + +// These constants are 14b fixed-point version of ITU-R BT.601 constants. +#define kYScale 19077 // 1.164 = 255 / 219 +#define kVToR 26149 // 1.596 = 255 / 112 * 0.701 +#define kUToG 6419 // 0.391 = 255 / 112 * 0.886 * 0.114 / 0.587 +#define kVToG 13320 // 0.813 = 255 / 112 * 0.701 * 0.299 / 0.587 +#define kUToB 33050 // 2.018 = 255 / 112 * 0.886 +#define kRCst (-kYScale * 16 - kVToR * 128 + YUV_HALF2) +#define kGCst (-kYScale * 16 + kUToG * 128 + kVToG * 128 + YUV_HALF2) +#define kBCst (-kYScale * 16 - kUToB * 128 + YUV_HALF2) + +//------------------------------------------------------------------------------ + +#if !defined(WEBP_YUV_USE_TABLE) + +// slower on x86 by ~7-8%, but bit-exact with the SSE2 version + +static WEBP_INLINE int VP8Clip8(int v) { + return ((v & ~YUV_MASK2) == 0) ? (v >> YUV_FIX2) : (v < 0) ? 0 : 255; +} + +static WEBP_INLINE int VP8YUVToR(int y, int v) { + return VP8Clip8(kYScale * y + kVToR * v + kRCst); +} + +static WEBP_INLINE int VP8YUVToG(int y, int u, int v) { + return VP8Clip8(kYScale * y - kUToG * u - kVToG * v + kGCst); +} + +static WEBP_INLINE int VP8YUVToB(int y, int u) { + return VP8Clip8(kYScale * y + kUToB * u + kBCst); +} + +static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v, + uint8_t* const rgb) { + rgb[0] = VP8YUVToR(y, v); + rgb[1] = VP8YUVToG(y, u, v); + rgb[2] = VP8YUVToB(y, u); +} + +static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v, + uint8_t* const bgr) { + bgr[0] = VP8YUVToB(y, u); + bgr[1] = VP8YUVToG(y, u, v); + bgr[2] = VP8YUVToR(y, v); +} + +static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v, + uint8_t* const rgb) { + const int r = VP8YUVToR(y, v); // 5 usable bits + const int g = VP8YUVToG(y, u, v); // 6 usable bits + const int b = VP8YUVToB(y, u); // 5 usable bits + const int rg = (r & 0xf8) | (g >> 5); + const int gb = ((g << 3) & 0xe0) | (b >> 3); +#ifdef WEBP_SWAP_16BIT_CSP + rgb[0] = gb; + rgb[1] = rg; +#else + rgb[0] = rg; + rgb[1] = gb; +#endif +} + +static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v, + uint8_t* const argb) { + const int r = VP8YUVToR(y, v); // 4 usable bits + const int g = VP8YUVToG(y, u, v); // 4 usable bits + const int b = VP8YUVToB(y, u); // 4 usable bits + const int rg = (r & 0xf0) | (g >> 4); + const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits +#ifdef WEBP_SWAP_16BIT_CSP + argb[0] = ba; + argb[1] = rg; +#else + argb[0] = rg; + argb[1] = ba; +#endif +} + +#else + +// Table-based version, not totally equivalent to the SSE2 version. +// Rounding diff is only +/-1 though. + +extern int16_t VP8kVToR[256], VP8kUToB[256]; +extern int32_t VP8kVToG[256], VP8kUToG[256]; +extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN]; +extern uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN]; + +static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v, + uint8_t* const rgb) { + const int r_off = VP8kVToR[v]; + const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; + const int b_off = VP8kUToB[u]; + rgb[0] = VP8kClip[y + r_off - YUV_RANGE_MIN]; + rgb[1] = VP8kClip[y + g_off - YUV_RANGE_MIN]; + rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN]; +} + +static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v, + uint8_t* const bgr) { + const int r_off = VP8kVToR[v]; + const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; + const int b_off = VP8kUToB[u]; + bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN]; + bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN]; + bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN]; +} + +static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v, + uint8_t* const rgb) { + const int r_off = VP8kVToR[v]; + const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; + const int b_off = VP8kUToB[u]; + const int rg = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) | + (VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5)); + const int gb = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) | + (VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3)); +#ifdef WEBP_SWAP_16BIT_CSP + rgb[0] = gb; + rgb[1] = rg; +#else + rgb[0] = rg; + rgb[1] = gb; +#endif +} + +static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v, + uint8_t* const argb) { + const int r_off = VP8kVToR[v]; + const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; + const int b_off = VP8kUToB[u]; + const int rg = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) | + VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]); + const int ba = (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4) | 0x0f; +#ifdef WEBP_SWAP_16BIT_CSP + argb[0] = ba; + argb[1] = rg; +#else + argb[0] = rg; + argb[1] = ba; +#endif +} + +#endif // WEBP_YUV_USE_TABLE + +//----------------------------------------------------------------------------- +// Alpha handling variants + +static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v, + uint8_t* const argb) { + argb[0] = 0xff; + VP8YuvToRgb(y, u, v, argb + 1); +} + +static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v, + uint8_t* const bgra) { + VP8YuvToBgr(y, u, v, bgra); + bgra[3] = 0xff; +} + +static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v, + uint8_t* const rgba) { + VP8YuvToRgb(y, u, v, rgba); + rgba[3] = 0xff; +} + +// Must be called before everything, to initialize the tables. +void VP8YUVInit(void); + +//----------------------------------------------------------------------------- +// SSE2 extra functions (mostly for upsampling_sse2.c) + +#if defined(WEBP_USE_SSE2) + +// When the following is defined, tables are initialized statically, adding ~12k +// to the binary size. Otherwise, they are initialized at run-time (small cost). +#define WEBP_YUV_USE_SSE2_TABLES + +#if defined(FANCY_UPSAMPLING) +// Process 32 pixels and store the result (24b or 32b per pixel) in *dst. +void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, + uint8_t* dst); +void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v, + uint8_t* dst); +void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v, + uint8_t* dst); +void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v, + uint8_t* dst); +#endif // FANCY_UPSAMPLING + +// Must be called to initialize tables before using the functions. +void VP8YUVInitSSE2(void); + +#endif // WEBP_USE_SSE2 + +//------------------------------------------------------------------------------ +// RGB -> YUV conversion + +// Stub functions that can be called with various rounding values: +static WEBP_INLINE int VP8ClipUV(int uv, int rounding) { + uv = (uv + rounding + (128 << (YUV_FIX + 2))) >> (YUV_FIX + 2); + return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255; +} + +#ifndef USE_YUVj + +static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) { + const int luma = 16839 * r + 33059 * g + 6420 * b; + return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX; // no need to clip +} + +static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) { + const int u = -9719 * r - 19081 * g + 28800 * b; + return VP8ClipUV(u, rounding); +} + +static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) { + const int v = +28800 * r - 24116 * g - 4684 * b; + return VP8ClipUV(v, rounding); +} + +#else + +// This JPEG-YUV colorspace, only for comparison! +// These are also 16bit precision coefficients from Rec.601, but with full +// [0..255] output range. +static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) { + const int luma = 19595 * r + 38470 * g + 7471 * b; + return (luma + rounding) >> YUV_FIX; // no need to clip +} + +static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) { + const int u = -11058 * r - 21710 * g + 32768 * b; + return VP8ClipUV(u, rounding); +} + +static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) { + const int v = 32768 * r - 27439 * g - 5329 * b; + return VP8ClipUV(v, rounding); +} + +#endif // USE_YUVj + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_DSP_YUV_H_ */ diff --git a/src/loaders/webp/meson.build b/src/loaders/webp/meson.build new file mode 100644 index 00000000..5b069da6 --- /dev/null +++ b/src/loaders/webp/meson.build @@ -0,0 +1,17 @@ +webp_deb = [] + +subdir('dec') +subdir('dsp') +subdir('utils') +subdir('webp') + +source_file = [ + 'tvgWebpLoader.h', + 'tvgWebpLoader.cpp', +] + +subloader_dep += [declare_dependency( + include_directories : include_directories('.'), + dependencies : webp_deb, + sources : source_file + )] diff --git a/src/loaders/webp/tvgWebpLoader.cpp b/src/loaders/webp/tvgWebpLoader.cpp new file mode 100644 index 00000000..c9a7eb62 --- /dev/null +++ b/src/loaders/webp/tvgWebpLoader.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2024 the ThorVG project. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "webp/decode.h" +#include "tvgWebpLoader.h" + + +/************************************************************************/ +/* Internal Class Implementation */ +/************************************************************************/ + +void WebpLoader::clear() +{ + if (freeData) free(data); + data = nullptr; + freeData = false; +} + + +void WebpLoader::run(unsigned tid) +{ + //TODO: we can figure out the requested image format in advance. + surface.buf8 = WebPDecodeBGRA(data, size, nullptr, nullptr); + surface.stride = static_cast(w); + surface.w = static_cast(w); + surface.h = static_cast(h); + surface.cs = ColorSpace::ARGB8888; + surface.channelSize = sizeof(uint32_t); + surface.premultiplied = false; + + clear(); +} + + +/************************************************************************/ +/* External Class Implementation */ +/************************************************************************/ + +WebpLoader::WebpLoader() : ImageLoader(FileType::Webp) +{ +} + + +WebpLoader::~WebpLoader() +{ + clear(); + free(surface.buf8); +} + + +bool WebpLoader::open(const string& path) +{ + auto f = fopen(path.c_str(), "rb"); + if (!f) return false; + + fseek(f, 0, SEEK_END); + + size = ftell(f); + if (size == 0) { + fclose(f); + return false; + } + + data = (uint8_t*)malloc(size); + + fseek(f, 0, SEEK_SET); + auto ret = fread(data, sizeof(char), size, f); + if (ret < size) { + fclose(f); + return false; + } + + fclose(f); + + int width, height; + if (!WebPGetInfo(data, size, &width, &height)) return false; + + w = static_cast(width); + h = static_cast(height); + freeData = true; + + return true; +} + + +bool WebpLoader::open(const char* data, uint32_t size, bool copy) +{ + if (copy) { + this->data = (uint8_t*) malloc(size); + if (!this->data) return false; + memcpy((uint8_t*)this->data, data, size); + freeData = true; + } else { + this->data = (uint8_t*) data; + freeData = false; + } + + int width, height; + if (!WebPGetInfo(this->data, size, &width, &height)) return false; + + w = static_cast(width); + h = static_cast(height); + this->size = size; + + return true; +} + + +bool WebpLoader::read() +{ + if (!LoadModule::read()) return true; + + if (!data || w == 0 || h == 0) return false; + + TaskScheduler::request(this); + + return true; +} + + +bool WebpLoader::close() +{ + if (!LoadModule::close()) return false; + this->done(); + return true; +} + + +Surface* WebpLoader::bitmap() +{ + this->done(); + + return ImageLoader::bitmap(); +} \ No newline at end of file diff --git a/src/loaders/webp/tvgWebpLoader.h b/src/loaders/webp/tvgWebpLoader.h new file mode 100644 index 00000000..61655761 --- /dev/null +++ b/src/loaders/webp/tvgWebpLoader.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024 the ThorVG project. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _TVG_WEBP_LOADER_H_ +#define _TVG_WEBP_LOADER_H_ + +#include "tvgLoader.h" +#include "tvgTaskScheduler.h" + +class WebpLoader : public ImageLoader, public Task +{ +private: + uint8_t* data = nullptr; + uint32_t size = 0; + bool freeData = false; + + void clear(); + void run(unsigned tid) override; + +public: + WebpLoader(); + ~WebpLoader(); + + bool open(const string& path) override; + bool open(const char* data, uint32_t size, bool copy) override; + bool read() override; + bool close() override; + + Surface* bitmap() override; +}; + +#endif //_TVG_WEBP_LOADER_H_ \ No newline at end of file diff --git a/src/loaders/webp/utils/bit_reader.cpp b/src/loaders/webp/utils/bit_reader.cpp new file mode 100644 index 00000000..a268c646 --- /dev/null +++ b/src/loaders/webp/utils/bit_reader.cpp @@ -0,0 +1,216 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Boolean decoder non-inlined methods +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" +#endif + +#include "./bit_reader_inl.h" + +//------------------------------------------------------------------------------ +// VP8BitReader + +void VP8InitBitReader(VP8BitReader* const br, + const uint8_t* const start, const uint8_t* const end) { + assert(br != NULL); + assert(start != NULL); + assert(start <= end); + br->range_ = 255 - 1; + br->buf_ = start; + br->buf_end_ = end; + br->value_ = 0; + br->bits_ = -8; // to load the very first 8bits + br->eof_ = 0; + VP8LoadNewBytes(br); +} + +void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) { + if (br->buf_ != NULL) { + br->buf_ += offset; + br->buf_end_ += offset; + } +} + +const uint8_t kVP8Log2Range[128] = { + 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0 +}; + +// range = ((range - 1) << kVP8Log2Range[range]) + 1 +const uint8_t kVP8NewRange[128] = { + 127, 127, 191, 127, 159, 191, 223, 127, + 143, 159, 175, 191, 207, 223, 239, 127, + 135, 143, 151, 159, 167, 175, 183, 191, + 199, 207, 215, 223, 231, 239, 247, 127, + 131, 135, 139, 143, 147, 151, 155, 159, + 163, 167, 171, 175, 179, 183, 187, 191, + 195, 199, 203, 207, 211, 215, 219, 223, + 227, 231, 235, 239, 243, 247, 251, 127, + 129, 131, 133, 135, 137, 139, 141, 143, + 145, 147, 149, 151, 153, 155, 157, 159, + 161, 163, 165, 167, 169, 171, 173, 175, + 177, 179, 181, 183, 185, 187, 189, 191, + 193, 195, 197, 199, 201, 203, 205, 207, + 209, 211, 213, 215, 217, 219, 221, 223, + 225, 227, 229, 231, 233, 235, 237, 239, + 241, 243, 245, 247, 249, 251, 253, 127 +}; + +void VP8LoadFinalBytes(VP8BitReader* const br) { + assert(br != NULL && br->buf_ != NULL); + // Only read 8bits at a time + if (br->buf_ < br->buf_end_) { + br->bits_ += 8; + br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8); + } else if (!br->eof_) { + br->value_ <<= 8; + br->bits_ += 8; + br->eof_ = 1; + } else { + br->bits_ = 0; // This is to avoid undefined behaviour with shifts. + } +} + +//------------------------------------------------------------------------------ +// Higher-level calls + +uint32_t VP8GetValue(VP8BitReader* const br, int bits) { + uint32_t v = 0; + while (bits-- > 0) { + v |= VP8GetBit(br, 0x80) << bits; + } + return v; +} + +int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) { + const int value = VP8GetValue(br, bits); + return VP8Get(br) ? -value : value; +} + +//------------------------------------------------------------------------------ +// VP8LBitReader + +#define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits. + +#if !defined(WEBP_FORCE_ALIGNED) && \ + (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ + defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(_M_X64)) +#define VP8L_USE_UNALIGNED_LOAD +#endif + +static const uint32_t kBitMask[VP8L_MAX_NUM_BIT_READ + 1] = { + 0, + 0x000001, 0x000003, 0x000007, 0x00000f, + 0x00001f, 0x00003f, 0x00007f, 0x0000ff, + 0x0001ff, 0x0003ff, 0x0007ff, 0x000fff, + 0x001fff, 0x003fff, 0x007fff, 0x00ffff, + 0x01ffff, 0x03ffff, 0x07ffff, 0x0fffff, + 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff +}; + +void VP8LInitBitReader(VP8LBitReader* const br, const uint8_t* const start, + size_t length) { + size_t i; + vp8l_val_t value = 0; + assert(br != NULL); + assert(start != NULL); + assert(length < 0xfffffff8u); // can't happen with a RIFF chunk. + + br->len_ = length; + br->val_ = 0; + br->bit_pos_ = 0; + br->eos_ = 0; + + if (length > sizeof(br->val_)) { + length = sizeof(br->val_); + } + for (i = 0; i < length; ++i) { + value |= (vp8l_val_t)start[i] << (8 * i); + } + br->val_ = value; + br->pos_ = length; + br->buf_ = start; +} + +void VP8LBitReaderSetBuffer(VP8LBitReader* const br, + const uint8_t* const buf, size_t len) { + assert(br != NULL); + assert(buf != NULL); + assert(len < 0xfffffff8u); // can't happen with a RIFF chunk. + br->buf_ = buf; + br->len_ = len; + // pos_ > len_ should be considered a param error. + br->eos_ = (br->pos_ > br->len_) || VP8LIsEndOfStream(br); +} + +static void VP8LSetEndOfStream(VP8LBitReader* const br) { + br->eos_ = 1; + br->bit_pos_ = 0; // To avoid undefined behaviour with shifts. +} + +// If not at EOS, reload up to VP8L_LBITS byte-by-byte +static void ShiftBytes(VP8LBitReader* const br) { + while (br->bit_pos_ >= 8 && br->pos_ < br->len_) { + br->val_ >>= 8; + br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (VP8L_LBITS - 8); + ++br->pos_; + br->bit_pos_ -= 8; + } + if (VP8LIsEndOfStream(br)) { + VP8LSetEndOfStream(br); + } +} + +void VP8LDoFillBitWindow(VP8LBitReader* const br) { + assert(br->bit_pos_ >= VP8L_WBITS); + // TODO(jzern): given the fixed read size it may be possible to force + // alignment in this block. +#if defined(VP8L_USE_UNALIGNED_LOAD) + if (br->pos_ + sizeof(br->val_) < br->len_) { + br->val_ >>= VP8L_WBITS; + br->bit_pos_ -= VP8L_WBITS; + // The expression below needs a little-endian arch to work correctly. + // This gives a large speedup for decoding speed. + br->val_ |= (vp8l_val_t)*(const uint32_t*)(br->buf_ + br->pos_) << + (VP8L_LBITS - VP8L_WBITS); + br->pos_ += VP8L_LOG8_WBITS; + return; + } +#endif + ShiftBytes(br); // Slow path. +} + +uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) { + assert(n_bits >= 0); + // Flag an error if end_of_stream or n_bits is more than allowed limit. + if (!br->eos_ && n_bits <= VP8L_MAX_NUM_BIT_READ) { + const uint32_t val = VP8LPrefetchBits(br) & kBitMask[n_bits]; + const int new_bits = br->bit_pos_ + n_bits; + br->bit_pos_ = new_bits; + ShiftBytes(br); + return val; + } else { + VP8LSetEndOfStream(br); + return 0; + } +} + +//------------------------------------------------------------------------------ diff --git a/src/loaders/webp/utils/bit_reader.h b/src/loaders/webp/utils/bit_reader.h new file mode 100644 index 00000000..4ebbe539 --- /dev/null +++ b/src/loaders/webp/utils/bit_reader.h @@ -0,0 +1,168 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Boolean decoder +// +// Author: Skal (pascal.massimino@gmail.com) +// Vikas Arora (vikaas.arora@gmail.com) + +#ifndef WEBP_UTILS_BIT_READER_H_ +#define WEBP_UTILS_BIT_READER_H_ + +#include +#ifdef _MSC_VER +#include // _byteswap_ulong +#endif +#include "../webp/types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// The Boolean decoder needs to maintain infinite precision on the value_ field. +// However, since range_ is only 8bit, we only need an active window of 8 bits +// for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls +// below 128, range_ is updated, and fresh bits read from the bitstream are +// brought in as LSB. To avoid reading the fresh bits one by one (slow), we +// cache BITS of them ahead. The total of (BITS + 8) bits must fit into a +// natural register (with type bit_t). To fetch BITS bits from bitstream we +// use a type lbit_t. +// +// BITS can be any multiple of 8 from 8 to 56 (inclusive). +// Pick values that fit natural register size. + +#if defined(__i386__) || defined(_M_IX86) // x86 32bit +#define BITS 24 +#elif defined(__x86_64__) || defined(_M_X64) // x86 64bit +#define BITS 56 +#elif defined(__arm__) || defined(_M_ARM) // ARM +#define BITS 24 +#elif defined(__mips__) // MIPS +#define BITS 24 +#else // reasonable default +#define BITS 24 // TODO(skal): test aarch64 and find the proper BITS value. +#endif + +//------------------------------------------------------------------------------ +// Derived types and constants: +// bit_t = natural register type for storing 'value_' (which is BITS+8 bits) +// range_t = register for 'range_' (which is 8bits only) + +#if (BITS > 24) +typedef uint64_t bit_t; +#else +typedef uint32_t bit_t; +#endif + +typedef uint32_t range_t; + +//------------------------------------------------------------------------------ +// Bitreader + +typedef struct VP8BitReader VP8BitReader; +struct VP8BitReader { + // boolean decoder (keep the field ordering as is!) + bit_t value_; // current value + range_t range_; // current range minus 1. In [127, 254] interval. + int bits_; // number of valid bits left + // read buffer + const uint8_t* buf_; // next byte to be read + const uint8_t* buf_end_; // end of read buffer + int eof_; // true if input is exhausted +}; + +// Initialize the bit reader and the boolean decoder. +void VP8InitBitReader(VP8BitReader* const br, + const uint8_t* const start, const uint8_t* const end); + +// Update internal pointers to displace the byte buffer by the +// relative offset 'offset'. +void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset); + +// return the next value made of 'num_bits' bits +uint32_t VP8GetValue(VP8BitReader* const br, int num_bits); +static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) { + return VP8GetValue(br, 1); +} + +// return the next value with sign-extension. +int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits); + +// bit_reader_inl.h will implement the following methods: +// static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) +// static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) +// and should be included by the .c files that actually need them. +// This is to avoid recompiling the whole library whenever this file is touched, +// and also allowing platform-specific ad-hoc hacks. + +// ----------------------------------------------------------------------------- +// Bitreader for lossless format + +// maximum number of bits (inclusive) the bit-reader can handle: +#define VP8L_MAX_NUM_BIT_READ 24 + +#define VP8L_LBITS 64 // Number of bits prefetched (= bit-size of vp8l_val_t). +#define VP8L_WBITS 32 // Minimum number of bytes ready after VP8LFillBitWindow. + +typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit. + +typedef struct { + vp8l_val_t val_; // pre-fetched bits + const uint8_t* buf_; // input byte buffer + size_t len_; // buffer length + size_t pos_; // byte position in buf_ + int bit_pos_; // current bit-reading position in val_ + int eos_; // true if a bit was read past the end of buffer +} VP8LBitReader; + +void VP8LInitBitReader(VP8LBitReader* const br, + const uint8_t* const start, + size_t length); + +// Sets a new data buffer. +void VP8LBitReaderSetBuffer(VP8LBitReader* const br, + const uint8_t* const buffer, size_t length); + +// Reads the specified number of bits from read buffer. +// Flags an error in case end_of_stream or n_bits is more than the allowed limit +// of VP8L_MAX_NUM_BIT_READ (inclusive). +// Flags eos_ if this read attempt is going to cross the read buffer. +uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits); + +// Return the prefetched bits, so they can be looked up. +static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) { + return (uint32_t)(br->val_ >> (br->bit_pos_ & (VP8L_LBITS - 1))); +} + +// Returns true if there was an attempt at reading bit past the end of +// the buffer. Doesn't set br->eos_ flag. +static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) { + assert(br->pos_ <= br->len_); + return br->eos_ || ((br->pos_ == br->len_) && (br->bit_pos_ > VP8L_LBITS)); +} + +// For jumping over a number of bits in the bit stream when accessed with +// VP8LPrefetchBits and VP8LFillBitWindow. +static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) { + br->bit_pos_ = val; + br->eos_ = VP8LIsEndOfStream(br); +} + +// Advances the read buffer by 4 bytes to make room for reading next 32 bits. +// Speed critical, but infrequent part of the code can be non-inlined. +extern void VP8LDoFillBitWindow(VP8LBitReader* const br); +static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) { + if (br->bit_pos_ >= VP8L_WBITS) VP8LDoFillBitWindow(br); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_UTILS_BIT_READER_H_ */ diff --git a/src/loaders/webp/utils/bit_reader_inl.h b/src/loaders/webp/utils/bit_reader_inl.h new file mode 100644 index 00000000..1f79c941 --- /dev/null +++ b/src/loaders/webp/utils/bit_reader_inl.h @@ -0,0 +1,172 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Specific inlined methods for boolean decoder [VP8GetBit() ...] +// This file should be included by the .c sources that actually need to call +// these methods. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_UTILS_BIT_READER_INL_H_ +#define WEBP_UTILS_BIT_READER_INL_H_ + +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" +#endif + +#ifdef WEBP_FORCE_ALIGNED +#include // memcpy +#endif + +#include "../dsp/dsp.h" +#include "./bit_reader.h" +#include "./endian_inl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//------------------------------------------------------------------------------ +// Derived type lbit_t = natural type for memory I/O + +#if (BITS > 32) +typedef uint64_t lbit_t; +#elif (BITS > 16) +typedef uint32_t lbit_t; +#elif (BITS > 8) +typedef uint16_t lbit_t; +#else +typedef uint8_t lbit_t; +#endif + +extern const uint8_t kVP8Log2Range[128]; +extern const uint8_t kVP8NewRange[128]; + +// special case for the tail byte-reading +void VP8LoadFinalBytes(VP8BitReader* const br); + +//------------------------------------------------------------------------------ +// Inlined critical functions + +// makes sure br->value_ has at least BITS bits worth of data +static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) { + assert(br != NULL && br->buf_ != NULL); + // Read 'BITS' bits at a time if possible. + if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) { + // convert memory type to register type (with some zero'ing!) + bit_t bits; +#if defined(WEBP_FORCE_ALIGNED) + lbit_t in_bits; + memcpy(&in_bits, br->buf_, sizeof(in_bits)); +#elif defined(WEBP_USE_MIPS32) + // This is needed because of un-aligned read. + lbit_t in_bits; + lbit_t* p_buf_ = (lbit_t*)br->buf_; + __asm__ volatile( + ".set push \n\t" + ".set at \n\t" + ".set macro \n\t" + "ulw %[in_bits], 0(%[p_buf_]) \n\t" + ".set pop \n\t" + : [in_bits]"=r"(in_bits) + : [p_buf_]"r"(p_buf_) + : "memory", "at" + ); +#else + const lbit_t in_bits = *(const lbit_t*)br->buf_; +#endif + br->buf_ += BITS >> 3; +#if !defined(WORDS_BIGENDIAN) +#if (BITS > 32) + bits = BSwap64(in_bits); + bits >>= 64 - BITS; +#elif (BITS >= 24) + bits = BSwap32(in_bits); + bits >>= (32 - BITS); +#elif (BITS == 16) + bits = BSwap16(in_bits); +#else // BITS == 8 + bits = (bit_t)in_bits; +#endif // BITS > 32 +#else // WORDS_BIGENDIAN + bits = (bit_t)in_bits; + if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS); +#endif + br->value_ = bits | (br->value_ << BITS); + br->bits_ += BITS; + } else { + VP8LoadFinalBytes(br); // no need to be inlined + } +} + +// Read a bit with proba 'prob'. Speed-critical function! +static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) { + // Don't move this declaration! It makes a big speed difference to store + // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't + // alter br->range_ value. + range_t range = br->range_; + if (br->bits_ < 0) { + VP8LoadNewBytes(br); + } + { + const int pos = br->bits_; + const range_t split = (range * prob) >> 8; + const range_t value = (range_t)(br->value_ >> pos); +#if defined(__arm__) || defined(_M_ARM) // ARM-specific + const int bit = ((int)(split - value) >> 31) & 1; + if (value > split) { + range -= split + 1; + br->value_ -= (bit_t)(split + 1) << pos; + } else { + range = split; + } +#else // faster version on x86 + int bit; // Don't use 'const int bit = (value > split);", it's slower. + if (value > split) { + range -= split + 1; + br->value_ -= (bit_t)(split + 1) << pos; + bit = 1; + } else { + range = split; + bit = 0; + } +#endif + if (range <= (range_t)0x7e) { + const int shift = kVP8Log2Range[range]; + range = kVP8NewRange[range]; + br->bits_ -= shift; + } + br->range_ = range; + return bit; + } +} + +// simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here) +static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) { + if (br->bits_ < 0) { + VP8LoadNewBytes(br); + } + { + const int pos = br->bits_; + const range_t split = br->range_ >> 1; + const range_t value = (range_t)(br->value_ >> pos); + const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0 + br->bits_ -= 1; + br->range_ += mask; + br->range_ |= 1; + br->value_ -= (bit_t)((split + 1) & mask) << pos; + return (v ^ mask) - mask; + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_UTILS_BIT_READER_INL_H_ diff --git a/src/loaders/webp/utils/color_cache.cpp b/src/loaders/webp/utils/color_cache.cpp new file mode 100644 index 00000000..bedaa72a --- /dev/null +++ b/src/loaders/webp/utils/color_cache.cpp @@ -0,0 +1,48 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Color Cache for WebP Lossless +// +// Author: Jyrki Alakuijala (jyrki@google.com) + +#include +#include +#include +#include "./color_cache.h" +#include "../utils/utils.h" + +//------------------------------------------------------------------------------ +// VP8LColorCache. + +int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) { + const int hash_size = 1 << hash_bits; + assert(cc != NULL); + assert(hash_bits > 0); + cc->colors_ = (uint32_t*)calloc((uint64_t)hash_size, sizeof(*cc->colors_)); + if (cc->colors_ == NULL) return 0; + cc->hash_shift_ = 32 - hash_bits; + cc->hash_bits_ = hash_bits; + return 1; +} + +void VP8LColorCacheClear(VP8LColorCache* const cc) { + if (cc != NULL) { + free(cc->colors_); + cc->colors_ = NULL; + } +} + +void VP8LColorCacheCopy(const VP8LColorCache* const src, + VP8LColorCache* const dst) { + assert(src != NULL); + assert(dst != NULL); + assert(src->hash_bits_ == dst->hash_bits_); + memcpy(dst->colors_, src->colors_, + ((size_t)1u << dst->hash_bits_) * sizeof(*dst->colors_)); +} diff --git a/src/loaders/webp/utils/color_cache.h b/src/loaders/webp/utils/color_cache.h new file mode 100644 index 00000000..a1c4b3aa --- /dev/null +++ b/src/loaders/webp/utils/color_cache.h @@ -0,0 +1,74 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Color Cache for WebP Lossless +// +// Authors: Jyrki Alakuijala (jyrki@google.com) +// Urvang Joshi (urvang@google.com) + +#ifndef WEBP_UTILS_COLOR_CACHE_H_ +#define WEBP_UTILS_COLOR_CACHE_H_ + +#include "../webp/types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Main color cache struct. +typedef struct { + uint32_t *colors_; // color entries + int hash_shift_; // Hash shift: 32 - hash_bits_. + int hash_bits_; +} VP8LColorCache; + +static const uint32_t kHashMul = 0x1e35a7bd; + +static WEBP_INLINE uint32_t VP8LColorCacheLookup( + const VP8LColorCache* const cc, uint32_t key) { + assert(key <= (~0U >> cc->hash_shift_)); + return cc->colors_[key]; +} + +static WEBP_INLINE void VP8LColorCacheInsert(const VP8LColorCache* const cc, + uint32_t argb) { + const uint32_t key = (kHashMul * argb) >> cc->hash_shift_; + cc->colors_[key] = argb; +} + +static WEBP_INLINE int VP8LColorCacheGetIndex(const VP8LColorCache* const cc, + uint32_t argb) { + return (kHashMul * argb) >> cc->hash_shift_; +} + +static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc, + uint32_t argb) { + const uint32_t key = (kHashMul * argb) >> cc->hash_shift_; + return cc->colors_[key] == argb; +} + +//------------------------------------------------------------------------------ + +// Initializes the color cache with 'hash_bits' bits for the keys. +// Returns false in case of memory error. +int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits); + +void VP8LColorCacheCopy(const VP8LColorCache* const src, + VP8LColorCache* const dst); + +// Delete the memory associated to color cache. +void VP8LColorCacheClear(VP8LColorCache* const color_cache); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} +#endif + +#endif // WEBP_UTILS_COLOR_CACHE_H_ diff --git a/src/loaders/webp/utils/endian_inl.h b/src/loaders/webp/utils/endian_inl.h new file mode 100644 index 00000000..cd56c37f --- /dev/null +++ b/src/loaders/webp/utils/endian_inl.h @@ -0,0 +1,100 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Endian related functions. + +#ifndef WEBP_UTILS_ENDIAN_INL_H_ +#define WEBP_UTILS_ENDIAN_INL_H_ + +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" +#endif + +#include "../dsp/dsp.h" +#include "../webp/types.h" + +// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) +#if !defined(WORDS_BIGENDIAN) && \ + (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) +#define WORDS_BIGENDIAN +#endif + +#if defined(WORDS_BIGENDIAN) +#define HToLE32 BSwap32 +#define HToLE16 BSwap16 +#else +#define HToLE32(x) (x) +#define HToLE16(x) (x) +#endif + +#if !defined(HAVE_CONFIG_H) +// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64 +#if LOCAL_GCC_PREREQ(4,3) || LOCAL_CLANG_PREREQ(3,3) +#define HAVE_BUILTIN_BSWAP32 +#define HAVE_BUILTIN_BSWAP64 +#endif +// clang-3.3 and gcc-4.8 have a builtin function for swap16 +#if LOCAL_GCC_PREREQ(4,8) || LOCAL_CLANG_PREREQ(3,3) +#define HAVE_BUILTIN_BSWAP16 +#endif +#endif // !HAVE_CONFIG_H + +static WEBP_INLINE uint16_t BSwap16(uint16_t x) { +#if defined(HAVE_BUILTIN_BSWAP16) + return __builtin_bswap16(x); +#elif defined(_MSC_VER) + return _byteswap_ushort(x); +#else + // gcc will recognize a 'rorw $8, ...' here: + return (x >> 8) | ((x & 0xff) << 8); +#endif // HAVE_BUILTIN_BSWAP16 +} + +static WEBP_INLINE uint32_t BSwap32(uint32_t x) { +#if defined(WEBP_USE_MIPS32_R2) + uint32_t ret; + __asm__ volatile ( + "wsbh %[ret], %[x] \n\t" + "rotr %[ret], %[ret], 16 \n\t" + : [ret]"=r"(ret) + : [x]"r"(x) + ); + return ret; +#elif defined(HAVE_BUILTIN_BSWAP32) + return __builtin_bswap32(x); +#elif defined(__i386__) || defined(__x86_64__) + uint32_t swapped_bytes; + __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x)); + return swapped_bytes; +#elif defined(_MSC_VER) + return (uint32_t)_byteswap_ulong(x); +#else + return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); +#endif // HAVE_BUILTIN_BSWAP32 +} + +static WEBP_INLINE uint64_t BSwap64(uint64_t x) { +#if defined(HAVE_BUILTIN_BSWAP64) + return __builtin_bswap64(x); +#elif defined(__x86_64__) + uint64_t swapped_bytes; + __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x)); + return swapped_bytes; +#elif defined(_MSC_VER) + return (uint64_t)_byteswap_uint64(x); +#else // generic code for swapping 64-bit values (suggested by bdb@) + x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32); + x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16); + x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8); + return x; +#endif // HAVE_BUILTIN_BSWAP64 +} + +#endif // WEBP_UTILS_ENDIAN_INL_H_ diff --git a/src/loaders/webp/utils/huffman.cpp b/src/loaders/webp/utils/huffman.cpp new file mode 100644 index 00000000..d7af79dd --- /dev/null +++ b/src/loaders/webp/utils/huffman.cpp @@ -0,0 +1,204 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Utilities for building and looking up Huffman trees. +// +// Author: Urvang Joshi (urvang@google.com) + +#include +#include +#include +#include "./huffman.h" +#include "../utils/utils.h" +#include "../webp/format_constants.h" + +// Huffman data read via DecodeImageStream is represented in two (red and green) +// bytes. +#define MAX_HTREE_GROUPS 0x10000 + +HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups) { + HTreeGroup* const htree_groups = (HTreeGroup*)malloc(num_htree_groups * sizeof(*htree_groups)); + if (htree_groups == NULL) { + return NULL; + } + assert(num_htree_groups <= MAX_HTREE_GROUPS); + return htree_groups; +} + +void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups) { + if (htree_groups != NULL) { + free(htree_groups); + } +} + +// Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the +// bit-wise reversal of the len least significant bits of key. +static WEBP_INLINE uint32_t GetNextKey(uint32_t key, int len) { + uint32_t step = 1 << (len - 1); + while (key & step) { + step >>= 1; + } + return (key & (step - 1)) + step; +} + +// Stores code in table[0], table[step], table[2*step], ..., table[end]. +// Assumes that end is an integer multiple of step. +static WEBP_INLINE void ReplicateValue(HuffmanCode* table, + int step, int end, + HuffmanCode code) { + assert(end % step == 0); + do { + end -= step; + table[end] = code; + } while (end > 0); +} + +// Returns the table width of the next 2nd level table. count is the histogram +// of bit lengths for the remaining symbols, len is the code length of the next +// processed symbol +static WEBP_INLINE int NextTableBitSize(const int* const count, + int len, int root_bits) { + int left = 1 << (len - root_bits); + while (len < MAX_ALLOWED_CODE_LENGTH) { + left -= count[len]; + if (left <= 0) break; + ++len; + left <<= 1; + } + return len - root_bits; +} + +int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits, + const int code_lengths[], int code_lengths_size) { + HuffmanCode* table = root_table; // next available space in table + int total_size = 1 << root_bits; // total size root table + 2nd level table + int* sorted = NULL; // symbols sorted by code length + int len; // current code length + int symbol; // symbol index in original or sorted table + // number of codes of each length: + int count[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 }; + // offsets in sorted table for each length: + int offset[MAX_ALLOWED_CODE_LENGTH + 1]; + + assert(code_lengths_size != 0); + assert(code_lengths != NULL); + assert(root_table != NULL); + assert(root_bits > 0); + + // Build histogram of code lengths. + for (symbol = 0; symbol < code_lengths_size; ++symbol) { + if (code_lengths[symbol] > MAX_ALLOWED_CODE_LENGTH) { + return 0; + } + ++count[code_lengths[symbol]]; + } + + // Error, all code lengths are zeros. + if (count[0] == code_lengths_size) { + return 0; + } + + // Generate offsets into sorted symbol table by code length. + offset[1] = 0; + for (len = 1; len < MAX_ALLOWED_CODE_LENGTH; ++len) { + if (count[len] > (1 << len)) { + return 0; + } + offset[len + 1] = offset[len] + count[len]; + } + + sorted = (int*)malloc(code_lengths_size * sizeof(*sorted)); + if (sorted == NULL) { + return 0; + } + + // Sort symbols by length, by symbol order within each length. + for (symbol = 0; symbol < code_lengths_size; ++symbol) { + const int symbol_code_length = code_lengths[symbol]; + if (code_lengths[symbol] > 0) { + sorted[offset[symbol_code_length]++] = symbol; + } + } + + // Special case code with only one value. + if (offset[MAX_ALLOWED_CODE_LENGTH] == 1) { + HuffmanCode code; + code.bits = 0; + code.value = (uint16_t)sorted[0]; + ReplicateValue(table, 1, total_size, code); + free(sorted); + return total_size; + } + + { + int step; // step size to replicate values in current table + uint32_t low = -1; // low bits for current root entry + uint32_t mask = total_size - 1; // mask for low bits + uint32_t key = 0; // reversed prefix code + int num_nodes = 1; // number of Huffman tree nodes + int num_open = 1; // number of open branches in current tree level + int table_bits = root_bits; // key length of current table + int table_size = 1 << table_bits; // size of current table + symbol = 0; + // Fill in root table. + for (len = 1, step = 2; len <= root_bits; ++len, step <<= 1) { + num_open <<= 1; + num_nodes += num_open; + num_open -= count[len]; + if (num_open < 0) { + free(sorted); + return 0; + } + for (; count[len] > 0; --count[len]) { + HuffmanCode code; + code.bits = (uint8_t)len; + code.value = (uint16_t)sorted[symbol++]; + ReplicateValue(&table[key], step, table_size, code); + key = GetNextKey(key, len); + } + } + + // Fill in 2nd level tables and add pointers to root table. + for (len = root_bits + 1, step = 2; len <= MAX_ALLOWED_CODE_LENGTH; + ++len, step <<= 1) { + num_open <<= 1; + num_nodes += num_open; + num_open -= count[len]; + if (num_open < 0) { + free(sorted); + return 0; + } + for (; count[len] > 0; --count[len]) { + HuffmanCode code; + if ((key & mask) != low) { + table += table_size; + table_bits = NextTableBitSize(count, len, root_bits); + table_size = 1 << table_bits; + total_size += table_size; + low = key & mask; + root_table[low].bits = (uint8_t)(table_bits + root_bits); + root_table[low].value = (uint16_t)((table - root_table) - low); + } + code.bits = (uint8_t)(len - root_bits); + code.value = (uint16_t)sorted[symbol++]; + ReplicateValue(&table[key >> root_bits], step, table_size, code); + key = GetNextKey(key, len); + } + } + + // Check if tree is full. + if (num_nodes != 2 * offset[MAX_ALLOWED_CODE_LENGTH] - 1) { + free(sorted); + return 0; + } + } + + free(sorted); + return total_size; +} diff --git a/src/loaders/webp/utils/huffman.h b/src/loaders/webp/utils/huffman.h new file mode 100644 index 00000000..8b34da89 --- /dev/null +++ b/src/loaders/webp/utils/huffman.h @@ -0,0 +1,67 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Utilities for building and looking up Huffman trees. +// +// Author: Urvang Joshi (urvang@google.com) + +#ifndef WEBP_UTILS_HUFFMAN_H_ +#define WEBP_UTILS_HUFFMAN_H_ + +#include +#include "../webp/format_constants.h" +#include "../webp/types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define HUFFMAN_TABLE_BITS 8 +#define HUFFMAN_TABLE_MASK ((1 << HUFFMAN_TABLE_BITS) - 1) + +#define LENGTHS_TABLE_BITS 7 +#define LENGTHS_TABLE_MASK ((1 << LENGTHS_TABLE_BITS) - 1) + + +// Huffman lookup table entry +typedef struct { + uint8_t bits; // number of bits used for this symbol + uint16_t value; // symbol value or table offset +} HuffmanCode; + +// Huffman table group. +typedef struct HTreeGroup HTreeGroup; +struct HTreeGroup { + HuffmanCode* htrees[HUFFMAN_CODES_PER_META_CODE]; + int is_trivial_literal; // True, if huffman trees for Red, Blue & Alpha + // Symbols are trivial (have a single code). + uint32_t literal_arb; // If is_trivial_literal is true, this is the + // ARGB value of the pixel, with Green channel + // being set to zero. +}; + +// Creates the instance of HTreeGroup with specified number of tree-groups. +HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups); + +// Releases the memory allocated for HTreeGroup. +void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups); + +// Builds Huffman lookup table assuming code lengths are in symbol order. +// The 'code_lengths' is pre-allocated temporary memory buffer used for creating +// the huffman table. +// Returns built table size or 0 in case of error (invalid tree or +// memory error). +int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits, + const int code_lengths[], int code_lengths_size); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_UTILS_HUFFMAN_H_ diff --git a/src/loaders/webp/utils/meson.build b/src/loaders/webp/utils/meson.build new file mode 100644 index 00000000..69f05f44 --- /dev/null +++ b/src/loaders/webp/utils/meson.build @@ -0,0 +1,22 @@ +source_file = [ + 'bit_reader.h', + 'bit_reader_inl.h', + 'color_cache.h', + 'endian_inl.h', + 'huffman.h', + 'quant_levels_dec.h', + 'random.h', + 'rescaler.h', + 'utils.h', + 'bit_reader.cpp', + 'color_cache.cpp', + 'huffman.cpp', + 'quant_levels_dec.cpp', + 'random.cpp', + 'rescaler.cpp' +] + +webp_deb += [declare_dependency( + include_directories : include_directories('.'), + sources : source_file +)] \ No newline at end of file diff --git a/src/loaders/webp/utils/quant_levels_dec.cpp b/src/loaders/webp/utils/quant_levels_dec.cpp new file mode 100644 index 00000000..5d054e50 --- /dev/null +++ b/src/loaders/webp/utils/quant_levels_dec.cpp @@ -0,0 +1,279 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Implement gradient smoothing: we replace a current alpha value by its +// surrounding average if it's close enough (that is: the change will be less +// than the minimum distance between two quantized level). +// We use sliding window for computing the 2d moving average. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./quant_levels_dec.h" +#include "./utils.h" + +#include +#include // for memset + +// #define USE_DITHERING // uncomment to enable ordered dithering (not vital) + +#define FIX 16 // fix-point precision for averaging +#define LFIX 2 // extra precision for look-up table +#define LUT_SIZE ((1 << (8 + LFIX)) - 1) // look-up table size + +#if defined(USE_DITHERING) + +#define DFIX 4 // extra precision for ordered dithering +#define DSIZE 4 // dithering size (must be a power of two) +// cf. http://en.wikipedia.org/wiki/Ordered_dithering +static const uint8_t kOrderedDither[DSIZE][DSIZE] = { + { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision + { 12, 4, 14, 6 }, + { 3, 11, 1, 9 }, + { 15, 7, 13, 5 } +}; + +#else +#define DFIX 0 +#endif + +typedef struct { + int width_, height_; // dimension + int row_; // current input row being processed + uint8_t* src_; // input pointer + uint8_t* dst_; // output pointer + + int radius_; // filter radius (=delay) + int scale_; // normalization factor, in FIX bits precision + + void* mem_; // all memory + + // various scratch buffers + uint16_t* start_; + uint16_t* cur_; + uint16_t* end_; + uint16_t* top_; + uint16_t* average_; + + // input levels distribution + int num_levels_; // number of quantized levels + int min_, max_; // min and max level values + int min_level_dist_; // smallest distance between two consecutive levels + + int16_t* correction_; // size = 1 + 2*LUT_SIZE -> ~4k memory +} SmoothParams; + +//------------------------------------------------------------------------------ + +#define CLIP_MASK (int)(~0U << (8 + DFIX)) +static WEBP_INLINE uint8_t clip_8b(int v) { + return (!(v & CLIP_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u; +} + +// vertical accumulation +static void VFilter(SmoothParams* const p) { + const uint8_t* src = p->src_; + const int w = p->width_; + uint16_t* const cur = p->cur_; + const uint16_t* const top = p->top_; + uint16_t* const out = p->end_; + uint16_t sum = 0; // all arithmetic is modulo 16bit + int x; + + for (x = 0; x < w; ++x) { + uint16_t new_value; + sum += src[x]; + new_value = top[x] + sum; + out[x] = new_value - cur[x]; // vertical sum of 'r' pixels. + cur[x] = new_value; + } + // move input pointers one row down + p->top_ = p->cur_; + p->cur_ += w; + if (p->cur_ == p->end_) p->cur_ = p->start_; // roll-over + // We replicate edges, as it's somewhat easier as a boundary condition. + // That's why we don't update the 'src' pointer on top/bottom area: + if (p->row_ >= 0 && p->row_ < p->height_ - 1) { + p->src_ += p->width_; + } +} + +// horizontal accumulation. We use mirror replication of missing pixels, as it's +// a little easier to implement (surprisingly). +static void HFilter(SmoothParams* const p) { + const uint16_t* const in = p->end_; + uint16_t* const out = p->average_; + const uint32_t scale = p->scale_; + const int w = p->width_; + const int r = p->radius_; + + int x; + for (x = 0; x <= r; ++x) { // left mirroring + const uint16_t delta = in[x + r - 1] + in[r - x]; + out[x] = (delta * scale) >> FIX; + } + for (; x < w - r; ++x) { // bulk middle run + const uint16_t delta = in[x + r] - in[x - r - 1]; + out[x] = (delta * scale) >> FIX; + } + for (; x < w; ++x) { // right mirroring + const uint16_t delta = + 2 * in[w - 1] - in[2 * w - 2 - r - x] - in[x - r - 1]; + out[x] = (delta * scale) >> FIX; + } +} + +// emit one filtered output row +static void ApplyFilter(SmoothParams* const p) { + const uint16_t* const average = p->average_; + const int w = p->width_; + const int16_t* const correction = p->correction_; +#if defined(USE_DITHERING) + const uint8_t* const dither = kOrderedDither[p->row_ % DSIZE]; +#endif + uint8_t* const dst = p->dst_; + int x; + for (x = 0; x < w; ++x) { + const int v = dst[x]; + if (v < p->max_ && v > p->min_) { + const int c = (v << DFIX) + correction[average[x] - (v << LFIX)]; +#if defined(USE_DITHERING) + dst[x] = clip_8b(c + dither[x % DSIZE]); +#else + dst[x] = clip_8b(c); +#endif + } + } + p->dst_ += w; // advance output pointer +} + +//------------------------------------------------------------------------------ +// Initialize correction table + +static void InitCorrectionLUT(int16_t* const lut, int min_dist) { + // The correction curve is: + // f(x) = x for x <= threshold2 + // f(x) = 0 for x >= threshold1 + // and a linear interpolation for range x=[threshold2, threshold1] + // (along with f(-x) = -f(x) symmetry). + // Note that: threshold2 = 3/4 * threshold1 + const int threshold1 = min_dist << LFIX; + const int threshold2 = (3 * threshold1) >> 2; + const int max_threshold = threshold2 << DFIX; + const int delta = threshold1 - threshold2; + int i; + for (i = 1; i <= LUT_SIZE; ++i) { + int c = (i <= threshold2) ? (i << DFIX) + : (i < threshold1) ? max_threshold * (threshold1 - i) / delta + : 0; + c >>= LFIX; + lut[+i] = +c; + lut[-i] = -c; + } + lut[0] = 0; +} + +static void CountLevels(const uint8_t* const data, int size, + SmoothParams* const p) { + int i, last_level; + uint8_t used_levels[256] = { 0 }; + p->min_ = 255; + p->max_ = 0; + for (i = 0; i < size; ++i) { + const int v = data[i]; + if (v < p->min_) p->min_ = v; + if (v > p->max_) p->max_ = v; + used_levels[v] = 1; + } + // Compute the mininum distance between two non-zero levels. + p->min_level_dist_ = p->max_ - p->min_; + last_level = -1; + for (i = 0; i < 256; ++i) { + if (used_levels[i]) { + ++p->num_levels_; + if (last_level >= 0) { + const int level_dist = i - last_level; + if (level_dist < p->min_level_dist_) { + p->min_level_dist_ = level_dist; + } + } + last_level = i; + } + } +} + +// Initialize all params. +static int InitParams(uint8_t* const data, int width, int height, + int radius, SmoothParams* const p) { + const int R = 2 * radius + 1; // total size of the kernel + + const size_t size_scratch_m = (R + 1) * width * sizeof(*p->start_); + const size_t size_m = width * sizeof(*p->average_); + const size_t size_lut = (1 + 2 * LUT_SIZE) * sizeof(*p->correction_); + const size_t total_size = size_scratch_m + size_m + size_lut; + uint8_t* mem = (uint8_t*)malloc(1U * total_size); + + if (mem == NULL) return 0; + p->mem_ = (void*)mem; + + p->start_ = (uint16_t*)mem; + p->cur_ = p->start_; + p->end_ = p->start_ + R * width; + p->top_ = p->end_ - width; + memset(p->top_, 0, width * sizeof(*p->top_)); + mem += size_scratch_m; + + p->average_ = (uint16_t*)mem; + mem += size_m; + + p->width_ = width; + p->height_ = height; + p->src_ = data; + p->dst_ = data; + p->radius_ = radius; + p->scale_ = (1 << (FIX + LFIX)) / (R * R); // normalization constant + p->row_ = -radius; + + // analyze the input distribution so we can best-fit the threshold + CountLevels(data, width * height, p); + + // correction table + p->correction_ = ((int16_t*)mem) + LUT_SIZE; + InitCorrectionLUT(p->correction_, p->min_level_dist_); + + return 1; +} + +static void CleanupParams(SmoothParams* const p) { + free(p->mem_); +} + +int WebPDequantizeLevels(uint8_t* const data, int width, int height, + int strength) { + const int radius = 4 * strength / 100; + if (strength < 0 || strength > 100) return 0; + if (data == NULL || width <= 0 || height <= 0) return 0; // bad params + if (radius > 0) { + SmoothParams p; + memset(&p, 0, sizeof(p)); + if (!InitParams(data, width, height, radius, &p)) return 0; + if (p.num_levels_ > 2) { + for (; p.row_ < p.height_; ++p.row_) { + VFilter(&p); // accumulate average of input + // Need to wait few rows in order to prime the filter, + // before emitting some output. + if (p.row_ >= p.radius_) { + HFilter(&p); + ApplyFilter(&p); + } + } + } + CleanupParams(&p); + } + return 1; +} diff --git a/src/loaders/webp/utils/quant_levels_dec.h b/src/loaders/webp/utils/quant_levels_dec.h new file mode 100644 index 00000000..9aab0680 --- /dev/null +++ b/src/loaders/webp/utils/quant_levels_dec.h @@ -0,0 +1,35 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Alpha plane de-quantization utility +// +// Author: Vikas Arora (vikasa@google.com) + +#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_H_ +#define WEBP_UTILS_QUANT_LEVELS_DEC_H_ + +#include "../webp/types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Apply post-processing to input 'data' of size 'width'x'height' assuming that +// the source was quantized to a reduced number of levels. +// Strength is in [0..100] and controls the amount of dithering applied. +// Returns false in case of error (data is NULL, invalid parameters, +// malloc failure, ...). +int WebPDequantizeLevels(uint8_t* const data, int width, int height, + int strength); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_UTILS_QUANT_LEVELS_DEC_H_ */ diff --git a/src/loaders/webp/utils/random.cpp b/src/loaders/webp/utils/random.cpp new file mode 100644 index 00000000..24e96ad6 --- /dev/null +++ b/src/loaders/webp/utils/random.cpp @@ -0,0 +1,43 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Pseudo-random utilities +// +// Author: Skal (pascal.massimino@gmail.com) + +#include +#include "./random.h" + +//------------------------------------------------------------------------------ + +// 31b-range values +static const uint32_t kRandomTable[VP8_RANDOM_TABLE_SIZE] = { + 0x0de15230, 0x03b31886, 0x775faccb, 0x1c88626a, 0x68385c55, 0x14b3b828, + 0x4a85fef8, 0x49ddb84b, 0x64fcf397, 0x5c550289, 0x4a290000, 0x0d7ec1da, + 0x5940b7ab, 0x5492577d, 0x4e19ca72, 0x38d38c69, 0x0c01ee65, 0x32a1755f, + 0x5437f652, 0x5abb2c32, 0x0faa57b1, 0x73f533e7, 0x685feeda, 0x7563cce2, + 0x6e990e83, 0x4730a7ed, 0x4fc0d9c6, 0x496b153c, 0x4f1403fa, 0x541afb0c, + 0x73990b32, 0x26d7cb1c, 0x6fcc3706, 0x2cbb77d8, 0x75762f2a, 0x6425ccdd, + 0x24b35461, 0x0a7d8715, 0x220414a8, 0x141ebf67, 0x56b41583, 0x73e502e3, + 0x44cab16f, 0x28264d42, 0x73baaefb, 0x0a50ebed, 0x1d6ab6fb, 0x0d3ad40b, + 0x35db3b68, 0x2b081e83, 0x77ce6b95, 0x5181e5f0, 0x78853bbc, 0x009f9494, + 0x27e5ed3c +}; + +void VP8InitRandom(VP8Random* const rg, float dithering) { + memcpy(rg->tab_, kRandomTable, sizeof(rg->tab_)); + rg->index1_ = 0; + rg->index2_ = 31; + rg->amp_ = (dithering < 0.0) ? 0 + : (dithering > 1.0) ? (1 << VP8_RANDOM_DITHER_FIX) + : (uint32_t)((1 << VP8_RANDOM_DITHER_FIX) * dithering); +} + +//------------------------------------------------------------------------------ + diff --git a/src/loaders/webp/utils/random.h b/src/loaders/webp/utils/random.h new file mode 100644 index 00000000..c392a615 --- /dev/null +++ b/src/loaders/webp/utils/random.h @@ -0,0 +1,63 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Pseudo-random utilities +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_UTILS_RANDOM_H_ +#define WEBP_UTILS_RANDOM_H_ + +#include +#include "../webp/types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define VP8_RANDOM_DITHER_FIX 8 // fixed-point precision for dithering +#define VP8_RANDOM_TABLE_SIZE 55 + +typedef struct { + int index1_, index2_; + uint32_t tab_[VP8_RANDOM_TABLE_SIZE]; + int amp_; +} VP8Random; + +// Initializes random generator with an amplitude 'dithering' in range [0..1]. +void VP8InitRandom(VP8Random* const rg, float dithering); + +// Returns a centered pseudo-random number with 'num_bits' amplitude. +// (uses D.Knuth's Difference-based random generator). +// 'amp' is in VP8_RANDOM_DITHER_FIX fixed-point precision. +static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits, + int amp) { + int diff; + assert(num_bits + VP8_RANDOM_DITHER_FIX <= 31); + diff = rg->tab_[rg->index1_] - rg->tab_[rg->index2_]; + if (diff < 0) diff += (1u << 31); + rg->tab_[rg->index1_] = diff; + if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0; + if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0; + // sign-extend, 0-center + diff = (int)((uint32_t)diff << 1) >> (32 - num_bits); + diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX; // restrict range + diff += 1 << (num_bits - 1); // shift back to 0.5-center + return diff; +} + +static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) { + return VP8RandomBits2(rg, num_bits, rg->amp_); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_UTILS_RANDOM_H_ */ diff --git a/src/loaders/webp/utils/rescaler.cpp b/src/loaders/webp/utils/rescaler.cpp new file mode 100644 index 00000000..cd45ae8d --- /dev/null +++ b/src/loaders/webp/utils/rescaler.cpp @@ -0,0 +1,82 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Rescaling functions +// +// Author: Skal (pascal.massimino@gmail.com) + +#include +#include +#include "../dsp/dsp.h" +#include "./rescaler.h" + +//------------------------------------------------------------------------------ + +void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, + uint8_t* const dst, int dst_width, int dst_height, + int dst_stride, int num_channels, int x_add, int x_sub, + int y_add, int y_sub, int32_t* const work) { + wrk->x_expand = (src_width < dst_width); + wrk->src_width = src_width; + wrk->src_height = src_height; + wrk->dst_width = dst_width; + wrk->dst_height = dst_height; + wrk->dst = dst; + wrk->dst_stride = dst_stride; + wrk->num_channels = num_channels; + // for 'x_expand', we use bilinear interpolation + wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub; + wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; + wrk->y_accum = y_add; + wrk->y_add = y_add; + wrk->y_sub = y_sub; + wrk->fx_scale = (1 << WEBP_RESCALER_RFIX) / x_sub; + wrk->fy_scale = (1 << WEBP_RESCALER_RFIX) / y_sub; + wrk->fxy_scale = wrk->x_expand ? + ((int64_t)dst_height << WEBP_RESCALER_RFIX) / (x_sub * src_height) : + ((int64_t)dst_height << WEBP_RESCALER_RFIX) / (x_add * src_height); + wrk->irow = work; + wrk->frow = work + num_channels * dst_width; + + WebPRescalerDspInit(); +} + +//------------------------------------------------------------------------------ +// all-in-one calls + +int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) { + const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub; + return (num_lines > max_num_lines) ? max_num_lines : num_lines; +} + +int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, + const uint8_t* src, int src_stride) { + int total_imported = 0; + while (total_imported < num_lines && wrk->y_accum > 0) { + int channel; + for (channel = 0; channel < wrk->num_channels; ++channel) { + WebPRescalerImportRow(wrk, src, channel); + } + src += src_stride; + ++total_imported; + wrk->y_accum -= wrk->y_sub; + } + return total_imported; +} + +int WebPRescalerExport(WebPRescaler* const rescaler) { + int total_exported = 0; + while (WebPRescalerHasPendingOutput(rescaler)) { + WebPRescalerExportRow(rescaler, 0); + ++total_exported; + } + return total_exported; +} + +//------------------------------------------------------------------------------ diff --git a/src/loaders/webp/utils/rescaler.h b/src/loaders/webp/utils/rescaler.h new file mode 100644 index 00000000..dd138b1d --- /dev/null +++ b/src/loaders/webp/utils/rescaler.h @@ -0,0 +1,78 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Rescaling functions +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_UTILS_RESCALER_H_ +#define WEBP_UTILS_RESCALER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "../webp/types.h" + +#define WEBP_RESCALER_RFIX 30 // fixed-point precision for multiplies + +// Structure used for on-the-fly rescaling +typedef struct WebPRescaler WebPRescaler; +struct WebPRescaler { + int x_expand; // true if we're expanding in the x direction + int num_channels; // bytes to jump between pixels + int fy_scale, fx_scale; // fixed-point scaling factor + int64_t fxy_scale; // '' + // we need hpel-precise add/sub increments, for the downsampled U/V planes. + int y_accum; // vertical accumulator + int y_add, y_sub; // vertical increments (add ~= src, sub ~= dst) + int x_add, x_sub; // horizontal increments (add ~= src, sub ~= dst) + int src_width, src_height; // source dimensions + int dst_width, dst_height; // destination dimensions + uint8_t* dst; + int dst_stride; + int32_t* irow, *frow; // work buffer +}; + +// Initialize a rescaler given scratch area 'work' and dimensions of src & dst. +void WebPRescalerInit(WebPRescaler* const rescaler, + int src_width, int src_height, + uint8_t* const dst, + int dst_width, int dst_height, int dst_stride, + int num_channels, + int x_add, int x_sub, + int y_add, int y_sub, + int32_t* const work); + +// Returns the number of input lines needed next to produce one output line, +// considering that the maximum available input lines are 'max_num_lines'. +int WebPRescaleNeededLines(const WebPRescaler* const rescaler, + int max_num_lines); + +// Import multiple rows over all channels, until at least one row is ready to +// be exported. Returns the actual number of lines that were imported. +int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows, + const uint8_t* src, int src_stride); + +// Return true if there is pending output rows ready. +static WEBP_INLINE +int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) { + return (rescaler->y_accum <= 0); +} + +// Export as many rows as possible. Return the numbers of rows written. +int WebPRescalerExport(WebPRescaler* const rescaler); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_UTILS_RESCALER_H_ */ diff --git a/src/loaders/webp/utils/utils.h b/src/loaders/webp/utils/utils.h new file mode 100644 index 00000000..3dba9427 --- /dev/null +++ b/src/loaders/webp/utils/utils.h @@ -0,0 +1,68 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Misc. common utility functions +// +// Authors: Skal (pascal.massimino@gmail.com) +// Urvang (urvang@google.com) + +#ifndef WEBP_UTILS_UTILS_H_ +#define WEBP_UTILS_UTILS_H_ + +#include + +#include "../webp/types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//------------------------------------------------------------------------------ +// Returns (int)floor(log2(n)). n must be > 0. +// use GNU builtins where available. +#if defined(__GNUC__) && \ + ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) +static WEBP_INLINE int BitsLog2Floor(uint32_t n) { + return 31 ^ __builtin_clz(n); +} +#elif defined(_MSC_VER) && _MSC_VER > 1310 && \ + (defined(_M_X64) || defined(_M_IX86)) +#include +#pragma intrinsic(_BitScanReverse) + +static WEBP_INLINE int BitsLog2Floor(uint32_t n) { + uint32_t first_set_bit; + _BitScanReverse((unsigned long*)&first_set_bit, n); + return first_set_bit; +} +#else +static WEBP_INLINE int BitsLog2Floor(uint32_t n) { + int log = 0; + uint32_t value = n; + int i; + + for (i = 4; i >= 0; --i) { + const int shift = (1 << i); + const uint32_t x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + return log; +} +#endif + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_UTILS_UTILS_H_ */ diff --git a/src/loaders/webp/webp/decode.h b/src/loaders/webp/webp/decode.h new file mode 100644 index 00000000..e0dd5703 --- /dev/null +++ b/src/loaders/webp/webp/decode.h @@ -0,0 +1,493 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Main decoding functions for WebP images. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_WEBP_DECODE_H_ +#define WEBP_WEBP_DECODE_H_ + +#include "./types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define WEBP_DECODER_ABI_VERSION 0x0205 // MAJOR(8b) + MINOR(8b) + +// Note: forward declaring enumerations is not allowed in (strict) C and C++, +// the types are left here for reference. +// typedef enum VP8StatusCode VP8StatusCode; +// typedef enum WEBP_CSP_MODE WEBP_CSP_MODE; +typedef struct WebPRGBABuffer WebPRGBABuffer; +typedef struct WebPYUVABuffer WebPYUVABuffer; +typedef struct WebPDecBuffer WebPDecBuffer; +typedef struct WebPIDecoder WebPIDecoder; +typedef struct WebPBitstreamFeatures WebPBitstreamFeatures; +typedef struct WebPDecoderOptions WebPDecoderOptions; +typedef struct WebPDecoderConfig WebPDecoderConfig; + +// Return the decoder's version number, packed in hexadecimal using 8bits for +// each of major/minor/revision. E.g: v2.5.7 is 0x020507. +WEBP_EXTERN(int) WebPGetDecoderVersion(void); + +// Retrieve basic header information: width, height. +// This function will also validate the header and return 0 in +// case of formatting error. +// Pointers 'width' and 'height' can be passed NULL if deemed irrelevant. +WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, size_t data_size, + int* width, int* height); + +// Decodes WebP images pointed to by 'data' and returns RGBA samples, along +// with the dimensions in *width and *height. The ordering of samples in +// memory is R, G, B, A, R, G, B, A... in scan order (endian-independent). +// The returned pointer should be deleted calling free(). +// Returns NULL in case of error. +WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, size_t data_size, + int* width, int* height); + +// Same as WebPDecodeRGBA, but returning A, R, G, B, A, R, G, B... ordered data. +WEBP_EXTERN(uint8_t*) WebPDecodeARGB(const uint8_t* data, size_t data_size, + int* width, int* height); + +// Same as WebPDecodeRGBA, but returning B, G, R, A, B, G, R, A... ordered data. +WEBP_EXTERN(uint8_t*) WebPDecodeBGRA(const uint8_t* data, size_t data_size, + int* width, int* height); + +// Same as WebPDecodeRGBA, but returning R, G, B, R, G, B... ordered data. +// If the bitstream contains transparency, it is ignored. +WEBP_EXTERN(uint8_t*) WebPDecodeRGB(const uint8_t* data, size_t data_size, + int* width, int* height); + +// Same as WebPDecodeRGB, but returning B, G, R, B, G, R... ordered data. +WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size, + int* width, int* height); + + +// Decode WebP images pointed to by 'data' to Y'UV format(*). The pointer +// returned is the Y samples buffer. Upon return, *u and *v will point to +// the U and V chroma data. These U and V buffers need NOT be free()'d, +// unlike the returned Y luma one. The dimension of the U and V planes +// are both (*width + 1) / 2 and (*height + 1)/ 2. +// Upon return, the Y buffer has a stride returned as '*stride', while U and V +// have a common stride returned as '*uv_stride'. +// Return NULL in case of error. +// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr +WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, size_t data_size, + int* width, int* height, + uint8_t** u, uint8_t** v, + int* stride, int* uv_stride); + +// These five functions are variants of the above ones, that decode the image +// directly into a pre-allocated buffer 'output_buffer'. The maximum storage +// available in this buffer is indicated by 'output_buffer_size'. If this +// storage is not sufficient (or an error occurred), NULL is returned. +// Otherwise, output_buffer is returned, for convenience. +// The parameter 'output_stride' specifies the distance (in bytes) +// between scanlines. Hence, output_buffer_size is expected to be at least +// output_stride x picture-height. +WEBP_EXTERN(uint8_t*) WebPDecodeRGBAInto( + const uint8_t* data, size_t data_size, + uint8_t* output_buffer, size_t output_buffer_size, int output_stride); +WEBP_EXTERN(uint8_t*) WebPDecodeARGBInto( + const uint8_t* data, size_t data_size, + uint8_t* output_buffer, size_t output_buffer_size, int output_stride); +WEBP_EXTERN(uint8_t*) WebPDecodeBGRAInto( + const uint8_t* data, size_t data_size, + uint8_t* output_buffer, size_t output_buffer_size, int output_stride); + +// RGB and BGR variants. Here too the transparency information, if present, +// will be dropped and ignored. +WEBP_EXTERN(uint8_t*) WebPDecodeRGBInto( + const uint8_t* data, size_t data_size, + uint8_t* output_buffer, size_t output_buffer_size, int output_stride); +WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto( + const uint8_t* data, size_t data_size, + uint8_t* output_buffer, size_t output_buffer_size, int output_stride); + +// WebPDecodeYUVInto() is a variant of WebPDecodeYUV() that operates directly +// into pre-allocated luma/chroma plane buffers. This function requires the +// strides to be passed: one for the luma plane and one for each of the +// chroma ones. The size of each plane buffer is passed as 'luma_size', +// 'u_size' and 'v_size' respectively. +// Pointer to the luma plane ('*luma') is returned or NULL if an error occurred +// during decoding (or because some buffers were found to be too small). +WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto( + const uint8_t* data, size_t data_size, + uint8_t* luma, size_t luma_size, int luma_stride, + uint8_t* u, size_t u_size, int u_stride, + uint8_t* v, size_t v_size, int v_stride); + +//------------------------------------------------------------------------------ +// Output colorspaces and buffer + +// Colorspaces +// Note: the naming describes the byte-ordering of packed samples in memory. +// For instance, MODE_BGRA relates to samples ordered as B,G,R,A,B,G,R,A,... +// Non-capital names (e.g.:MODE_Argb) relates to pre-multiplied RGB channels. +// RGBA-4444 and RGB-565 colorspaces are represented by following byte-order: +// RGBA-4444: [r3 r2 r1 r0 g3 g2 g1 g0], [b3 b2 b1 b0 a3 a2 a1 a0], ... +// RGB-565: [r4 r3 r2 r1 r0 g5 g4 g3], [g2 g1 g0 b4 b3 b2 b1 b0], ... +// In the case WEBP_SWAP_16BITS_CSP is defined, the bytes are swapped for +// these two modes: +// RGBA-4444: [b3 b2 b1 b0 a3 a2 a1 a0], [r3 r2 r1 r0 g3 g2 g1 g0], ... +// RGB-565: [g2 g1 g0 b4 b3 b2 b1 b0], [r4 r3 r2 r1 r0 g5 g4 g3], ... + +typedef enum WEBP_CSP_MODE { + MODE_RGB = 0, MODE_RGBA = 1, + MODE_BGR = 2, MODE_BGRA = 3, + MODE_ARGB = 4, MODE_RGBA_4444 = 5, + MODE_RGB_565 = 6, + // RGB-premultiplied transparent modes (alpha value is preserved) + MODE_rgbA = 7, + MODE_bgrA = 8, + MODE_Argb = 9, + MODE_rgbA_4444 = 10, + // YUV modes must come after RGB ones. + MODE_YUV = 11, MODE_YUVA = 12, // yuv 4:2:0 + MODE_LAST = 13 +} WEBP_CSP_MODE; + +// Some useful macros: +static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) { + return (mode == MODE_rgbA || mode == MODE_bgrA || mode == MODE_Argb || + mode == MODE_rgbA_4444); +} + +static WEBP_INLINE int WebPIsAlphaMode(WEBP_CSP_MODE mode) { + return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB || + mode == MODE_RGBA_4444 || mode == MODE_YUVA || + WebPIsPremultipliedMode(mode)); +} + +static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) { + return (mode < MODE_YUV); +} + +//------------------------------------------------------------------------------ +// WebPDecBuffer: Generic structure for describing the output sample buffer. + +struct WebPRGBABuffer { // view as RGBA + uint8_t* rgba; // pointer to RGBA samples + int stride; // stride in bytes from one scanline to the next. + size_t size; // total size of the *rgba buffer. +}; + +struct WebPYUVABuffer { // view as YUVA + uint8_t* y, *u, *v, *a; // pointer to luma, chroma U/V, alpha samples + int y_stride; // luma stride + int u_stride, v_stride; // chroma strides + int a_stride; // alpha stride + size_t y_size; // luma plane size + size_t u_size, v_size; // chroma planes size + size_t a_size; // alpha-plane size +}; + +// Output buffer +struct WebPDecBuffer { + WEBP_CSP_MODE colorspace; // Colorspace. + int width, height; // Dimensions. + int is_external_memory; // If true, 'internal_memory' pointer is not used. + union { + WebPRGBABuffer RGBA; + WebPYUVABuffer YUVA; + } u; // Nameless union of buffer parameters. + uint32_t pad[4]; // padding for later use + + uint8_t* private_memory; // Internally allocated memory (only when + // is_external_memory is false). Should not be used + // externally, but accessed via the buffer union. +}; + +// Internal, version-checked, entry point +WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int); + +// Initialize the structure as empty. Must be called before any other use. +// Returns false in case of version mismatch +static WEBP_INLINE int WebPInitDecBuffer(WebPDecBuffer* buffer) { + return WebPInitDecBufferInternal(buffer, WEBP_DECODER_ABI_VERSION); +} + +// Free any memory associated with the buffer. Must always be called last. +// Note: doesn't free the 'buffer' structure itself. +WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer); + +//------------------------------------------------------------------------------ +// Enumeration of the status codes + +typedef enum VP8StatusCode { + VP8_STATUS_OK = 0, + VP8_STATUS_OUT_OF_MEMORY, + VP8_STATUS_INVALID_PARAM, + VP8_STATUS_BITSTREAM_ERROR, + VP8_STATUS_UNSUPPORTED_FEATURE, + VP8_STATUS_SUSPENDED, + VP8_STATUS_USER_ABORT, + VP8_STATUS_NOT_ENOUGH_DATA +} VP8StatusCode; + +//------------------------------------------------------------------------------ +// Incremental decoding +// +// This API allows streamlined decoding of partial data. +// Picture can be incrementally decoded as data become available thanks to the +// WebPIDecoder object. This object can be left in a SUSPENDED state if the +// picture is only partially decoded, pending additional input. +// Code example: +// +// WebPInitDecBuffer(&buffer); +// buffer.colorspace = mode; +// ... +// WebPIDecoder* idec = WebPINewDecoder(&buffer); +// while (has_more_data) { +// // ... (get additional data) +// status = WebPIAppend(idec, new_data, new_data_size); +// if (status != VP8_STATUS_SUSPENDED || +// break; +// } +// +// // The above call decodes the current available buffer. +// // Part of the image can now be refreshed by calling to +// // WebPIDecGetRGB()/WebPIDecGetYUVA() etc. +// } +// WebPIDelete(idec); + +// Creates a new incremental decoder with the supplied buffer parameter. +// This output_buffer can be passed NULL, in which case a default output buffer +// is used (with MODE_RGB). Otherwise, an internal reference to 'output_buffer' +// is kept, which means that the lifespan of 'output_buffer' must be larger than +// that of the returned WebPIDecoder object. +// The supplied 'output_buffer' content MUST NOT be changed between calls to +// WebPIAppend() or WebPIUpdate() unless 'output_buffer.is_external_memory' is +// set to 1. In such a case, it is allowed to modify the pointers, size and +// stride of output_buffer.u.RGBA or output_buffer.u.YUVA, provided they remain +// within valid bounds. +// All other fields of WebPDecBuffer MUST remain constant between calls. +// Returns NULL if the allocation failed. +WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer); + +// This function allocates and initializes an incremental-decoder object, which +// will output the RGB/A samples specified by 'csp' into a preallocated +// buffer 'output_buffer'. The size of this buffer is at least +// 'output_buffer_size' and the stride (distance in bytes between two scanlines) +// is specified by 'output_stride'. +// Additionally, output_buffer can be passed NULL in which case the output +// buffer will be allocated automatically when the decoding starts. The +// colorspace 'csp' is taken into account for allocating this buffer. All other +// parameters are ignored. +// Returns NULL if the allocation failed, or if some parameters are invalid. +WEBP_EXTERN(WebPIDecoder*) WebPINewRGB( + WEBP_CSP_MODE csp, + uint8_t* output_buffer, size_t output_buffer_size, int output_stride); + +// This function allocates and initializes an incremental-decoder object, which +// will output the raw luma/chroma samples into a preallocated planes if +// supplied. The luma plane is specified by its pointer 'luma', its size +// 'luma_size' and its stride 'luma_stride'. Similarly, the chroma-u plane +// is specified by the 'u', 'u_size' and 'u_stride' parameters, and the chroma-v +// plane by 'v' and 'v_size'. And same for the alpha-plane. The 'a' pointer +// can be pass NULL in case one is not interested in the transparency plane. +// Conversely, 'luma' can be passed NULL if no preallocated planes are supplied. +// In this case, the output buffer will be automatically allocated (using +// MODE_YUVA) when decoding starts. All parameters are then ignored. +// Returns NULL if the allocation failed or if a parameter is invalid. +WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA( + uint8_t* luma, size_t luma_size, int luma_stride, + uint8_t* u, size_t u_size, int u_stride, + uint8_t* v, size_t v_size, int v_stride, + uint8_t* a, size_t a_size, int a_stride); + +// Deprecated version of the above, without the alpha plane. +// Kept for backward compatibility. +WEBP_EXTERN(WebPIDecoder*) WebPINewYUV( + uint8_t* luma, size_t luma_size, int luma_stride, + uint8_t* u, size_t u_size, int u_stride, + uint8_t* v, size_t v_size, int v_stride); + +// Deletes the WebPIDecoder object and associated memory. Must always be called +// if WebPINewDecoder, WebPINewRGB or WebPINewYUV succeeded. +WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* idec); + +// Copies and decodes the next available data. Returns VP8_STATUS_OK when +// the image is successfully decoded. Returns VP8_STATUS_SUSPENDED when more +// data is expected. Returns error in other cases. +WEBP_EXTERN(VP8StatusCode) WebPIAppend( + WebPIDecoder* idec, const uint8_t* data, size_t data_size); + +// A variant of the above function to be used when data buffer contains +// partial data from the beginning. In this case data buffer is not copied +// to the internal memory. +// Note that the value of the 'data' pointer can change between calls to +// WebPIUpdate, for instance when the data buffer is resized to fit larger data. +WEBP_EXTERN(VP8StatusCode) WebPIUpdate( + WebPIDecoder* idec, const uint8_t* data, size_t data_size); + +// Returns the RGB/A image decoded so far. Returns NULL if output params +// are not initialized yet. The RGB/A output type corresponds to the colorspace +// specified during call to WebPINewDecoder() or WebPINewRGB(). +// *last_y is the index of last decoded row in raster scan order. Some pointers +// (*last_y, *width etc.) can be NULL if corresponding information is not +// needed. +WEBP_EXTERN(uint8_t*) WebPIDecGetRGB( + const WebPIDecoder* idec, int* last_y, + int* width, int* height, int* stride); + +// Same as above function to get a YUVA image. Returns pointer to the luma +// plane or NULL in case of error. If there is no alpha information +// the alpha pointer '*a' will be returned NULL. +WEBP_EXTERN(uint8_t*) WebPIDecGetYUVA( + const WebPIDecoder* idec, int* last_y, + uint8_t** u, uint8_t** v, uint8_t** a, + int* width, int* height, int* stride, int* uv_stride, int* a_stride); + +// Deprecated alpha-less version of WebPIDecGetYUVA(): it will ignore the +// alpha information (if present). Kept for backward compatibility. +static WEBP_INLINE uint8_t* WebPIDecGetYUV( + const WebPIDecoder* idec, int* last_y, uint8_t** u, uint8_t** v, + int* width, int* height, int* stride, int* uv_stride) { + return WebPIDecGetYUVA(idec, last_y, u, v, NULL, width, height, + stride, uv_stride, NULL); +} + +// Generic call to retrieve information about the displayable area. +// If non NULL, the left/right/width/height pointers are filled with the visible +// rectangular area so far. +// Returns NULL in case the incremental decoder object is in an invalid state. +// Otherwise returns the pointer to the internal representation. This structure +// is read-only, tied to WebPIDecoder's lifespan and should not be modified. +WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea( + const WebPIDecoder* idec, int* left, int* top, int* width, int* height); + +//------------------------------------------------------------------------------ +// Advanced decoding parametrization +// +// Code sample for using the advanced decoding API +/* + // A) Init a configuration object + WebPDecoderConfig config; + CHECK(WebPInitDecoderConfig(&config)); + + // B) optional: retrieve the bitstream's features. + CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK); + + // C) Adjust 'config', if needed + config.no_fancy_upsampling = 1; + config.output.colorspace = MODE_BGRA; + // etc. + + // Note that you can also make config.output point to an externally + // supplied memory buffer, provided it's big enough to store the decoded + // picture. Otherwise, config.output will just be used to allocate memory + // and store the decoded picture. + + // D) Decode! + CHECK(WebPDecode(data, data_size, &config) == VP8_STATUS_OK); + + // E) Decoded image is now in config.output (and config.output.u.RGBA) + + // F) Reclaim memory allocated in config's object. It's safe to call + // this function even if the memory is external and wasn't allocated + // by WebPDecode(). + WebPFreeDecBuffer(&config.output); +*/ + +// Features gathered from the bitstream +struct WebPBitstreamFeatures { + int width; // Width in pixels, as read from the bitstream. + int height; // Height in pixels, as read from the bitstream. + int has_alpha; // True if the bitstream contains an alpha channel. + int has_animation; // True if the bitstream is an animation. + int format; // 0 = undefined (/mixed), 1 = lossy, 2 = lossless + + // Unused for now: + int no_incremental_decoding; // if true, using incremental decoding is not + // recommended. + int rotate; // TODO(later) + int uv_sampling; // should be 0 for now. TODO(later) + uint32_t pad[2]; // padding for later use +}; + +// Internal, version-checked, entry point +WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal( + const uint8_t*, size_t, WebPBitstreamFeatures*, int); + +// Retrieve features from the bitstream. The *features structure is filled +// with information gathered from the bitstream. +// Returns VP8_STATUS_OK when the features are successfully retrieved. Returns +// VP8_STATUS_NOT_ENOUGH_DATA when more data is needed to retrieve the +// features from headers. Returns error in other cases. +static WEBP_INLINE VP8StatusCode WebPGetFeatures( + const uint8_t* data, size_t data_size, + WebPBitstreamFeatures* features) { + return WebPGetFeaturesInternal(data, data_size, features, + WEBP_DECODER_ABI_VERSION); +} + +// Decoding options +struct WebPDecoderOptions { + int bypass_filtering; // if true, skip the in-loop filtering + int no_fancy_upsampling; // if true, use faster pointwise upsampler + int use_cropping; // if true, cropping is applied _first_ + int crop_left, crop_top; // top-left position for cropping. + // Will be snapped to even values. + int crop_width, crop_height; // dimension of the cropping area + int use_scaling; // if true, scaling is applied _afterward_ + int scaled_width, scaled_height; // final resolution + int use_threads; // if true, use multi-threaded decoding + int dithering_strength; // dithering strength (0=Off, 100=full) + int flip; // flip output vertically + int alpha_dithering_strength; // alpha dithering strength in [0..100] + + // Unused for now: + int force_rotation; // forced rotation (to be applied _last_) + int no_enhancement; // if true, discard enhancement layer + uint32_t pad[3]; // padding for later use +}; + +// Main object storing the configuration for advanced decoding. +struct WebPDecoderConfig { + WebPBitstreamFeatures input; // Immutable bitstream features (optional) + WebPDecBuffer output; // Output buffer (can point to external mem) + WebPDecoderOptions options; // Decoding options +}; + +// Internal, version-checked, entry point +WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int); + +// Initialize the configuration as empty. This function must always be +// called first, unless WebPGetFeatures() is to be called. +// Returns false in case of mismatched version. +static WEBP_INLINE int WebPInitDecoderConfig(WebPDecoderConfig* config) { + return WebPInitDecoderConfigInternal(config, WEBP_DECODER_ABI_VERSION); +} + +// Instantiate a new incremental decoder object with the requested +// configuration. The bitstream can be passed using 'data' and 'data_size' +// parameter, in which case the features will be parsed and stored into +// config->input. Otherwise, 'data' can be NULL and no parsing will occur. +// Note that 'config' can be NULL too, in which case a default configuration +// is used. +// The return WebPIDecoder object must always be deleted calling WebPIDelete(). +// Returns NULL in case of error (and config->status will then reflect +// the error condition). +WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, size_t data_size, + WebPDecoderConfig* config); + +// Non-incremental version. This version decodes the full data at once, taking +// 'config' into account. Returns decoding status (which should be VP8_STATUS_OK +// if the decoding was successful). +WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, size_t data_size, + WebPDecoderConfig* config); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* WEBP_WEBP_DECODE_H_ */ diff --git a/src/loaders/webp/webp/format_constants.h b/src/loaders/webp/webp/format_constants.h new file mode 100644 index 00000000..4c04b50c --- /dev/null +++ b/src/loaders/webp/webp/format_constants.h @@ -0,0 +1,88 @@ +// Copyright 2012 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Internal header for constants related to WebP file format. +// +// Author: Urvang (urvang@google.com) + +#ifndef WEBP_WEBP_FORMAT_CONSTANTS_H_ +#define WEBP_WEBP_FORMAT_CONSTANTS_H_ + +// Create fourcc of the chunk from the chunk tag characters. +#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24) + +// VP8 related constants. +#define VP8_SIGNATURE 0x9d012a // Signature in VP8 data. +#define VP8_MAX_PARTITION0_SIZE (1 << 19) // max size of mode partition +#define VP8_MAX_PARTITION_SIZE (1 << 24) // max size for token partition +#define VP8_FRAME_HEADER_SIZE 10 // Size of the frame header within VP8 data. + +// VP8L related constants. +#define VP8L_SIGNATURE_SIZE 1 // VP8L signature size. +#define VP8L_MAGIC_BYTE 0x2f // VP8L signature byte. +#define VP8L_IMAGE_SIZE_BITS 14 // Number of bits used to store + // width and height. +#define VP8L_VERSION_BITS 3 // 3 bits reserved for version. +#define VP8L_VERSION 0 // version 0 +#define VP8L_FRAME_HEADER_SIZE 5 // Size of the VP8L frame header. + +#define MAX_PALETTE_SIZE 256 +#define MAX_CACHE_BITS 11 +#define HUFFMAN_CODES_PER_META_CODE 5 +#define ARGB_BLACK 0xff000000 + +#define DEFAULT_CODE_LENGTH 8 +#define MAX_ALLOWED_CODE_LENGTH 15 + +#define NUM_LITERAL_CODES 256 +#define NUM_LENGTH_CODES 24 +#define NUM_DISTANCE_CODES 40 +#define CODE_LENGTH_CODES 19 + +#define MIN_HUFFMAN_BITS 2 // min number of Huffman bits +#define MAX_HUFFMAN_BITS 9 // max number of Huffman bits + +#define TRANSFORM_PRESENT 1 // The bit to be written when next data + // to be read is a transform. +#define NUM_TRANSFORMS 4 // Maximum number of allowed transform + // in a bitstream. +typedef enum { + PREDICTOR_TRANSFORM = 0, + CROSS_COLOR_TRANSFORM = 1, + SUBTRACT_GREEN = 2, + COLOR_INDEXING_TRANSFORM = 3 +} VP8LImageTransformType; + +// Alpha related constants. +#define ALPHA_HEADER_LEN 1 +#define ALPHA_NO_COMPRESSION 0 +#define ALPHA_LOSSLESS_COMPRESSION 1 +#define ALPHA_PREPROCESSED_LEVELS 1 + +// Mux related constants. +#define TAG_SIZE 4 // Size of a chunk tag (e.g. "VP8L"). +#define CHUNK_SIZE_BYTES 4 // Size needed to store chunk's size. +#define CHUNK_HEADER_SIZE 8 // Size of a chunk header. +#define RIFF_HEADER_SIZE 12 // Size of the RIFF header ("RIFFnnnnWEBP"). +#define ANMF_CHUNK_SIZE 16 // Size of an ANMF chunk. +#define ANIM_CHUNK_SIZE 6 // Size of an ANIM chunk. +#define FRGM_CHUNK_SIZE 6 // Size of a FRGM chunk. +#define VP8X_CHUNK_SIZE 10 // Size of a VP8X chunk. + +#define MAX_CANVAS_SIZE (1 << 24) // 24-bit max for VP8X width/height. +#define MAX_IMAGE_AREA (1ULL << 32) // 32-bit max for width x height. +#define MAX_LOOP_COUNT (1 << 16) // maximum value for loop-count +#define MAX_DURATION (1 << 24) // maximum duration +#define MAX_POSITION_OFFSET (1 << 24) // maximum frame/fragment x/y offset + +// Maximum chunk payload is such that adding the header and padding won't +// overflow a uint32_t. +#define MAX_CHUNK_PAYLOAD (~0U - CHUNK_HEADER_SIZE - 1) + +#endif /* WEBP_WEBP_FORMAT_CONSTANTS_H_ */ diff --git a/src/loaders/webp/webp/meson.build b/src/loaders/webp/webp/meson.build new file mode 100644 index 00000000..cbdcd4b5 --- /dev/null +++ b/src/loaders/webp/webp/meson.build @@ -0,0 +1,10 @@ +source_file = [ + 'decode.h', + 'format_constants.h', + 'types.h' +] + +webp_deb += [declare_dependency( + include_directories : include_directories('.'), + sources : source_file +)] \ No newline at end of file diff --git a/src/loaders/webp/webp/types.h b/src/loaders/webp/webp/types.h new file mode 100644 index 00000000..008fcbfb --- /dev/null +++ b/src/loaders/webp/webp/types.h @@ -0,0 +1,53 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Common types +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_WEBP_TYPES_H_ +#define WEBP_WEBP_TYPES_H_ + +#include // for size_t + +#ifndef _MSC_VER +#include +#if defined(__cplusplus) || !defined(__STRICT_ANSI__) || \ + (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) +#define WEBP_INLINE inline +#else +#define WEBP_INLINE +#endif +#else +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +typedef signed int int32_t; +typedef unsigned int uint32_t; +typedef unsigned long long int uint64_t; +typedef long long int int64_t; +#define WEBP_INLINE __forceinline +#endif /* _MSC_VER */ + +#ifndef WEBP_EXTERN +// This explicitly marks library functions and allows for changing the +// signature for e.g., Windows DLL builds. +# if defined(__GNUC__) && __GNUC__ >= 4 +# define WEBP_EXTERN(type) extern __attribute__ ((visibility ("default"))) type +# else +# define WEBP_EXTERN(type) extern type +# endif /* __GNUC__ >= 4 */ + +#endif /* WEBP_EXTERN */ + +// Macro to check ABI compatibility (same major revision number) +#define WEBP_ABI_IS_INCOMPATIBLE(a, b) (((a) >> 8) != ((b) >> 8)) + +#endif /* WEBP_WEBP_TYPES_H_ */