From eb8539e0b41114f6a5897dc988de653a2f517961 Mon Sep 17 00:00:00 2001 From: Hermet Park Date: Sun, 28 May 2023 18:49:44 +0900 Subject: [PATCH] sw_engine fill: optimize linear/radial fill fetching. Save the separated for-loop by unifying the blending/composition in one fetching stage. --- src/lib/sw_engine/tvgSwCommon.h | 39 ++++++- src/lib/sw_engine/tvgSwFill.cpp | 178 ++++++++++++++++++++++++++---- src/lib/sw_engine/tvgSwRaster.cpp | 152 ++++--------------------- 3 files changed, 214 insertions(+), 155 deletions(-) diff --git a/src/lib/sw_engine/tvgSwCommon.h b/src/lib/sw_engine/tvgSwCommon.h index a01bcc4f..be72e4e6 100644 --- a/src/lib/sw_engine/tvgSwCommon.h +++ b/src/lib/sw_engine/tvgSwCommon.h @@ -240,6 +240,7 @@ struct SwImage typedef uint32_t(*SwJoin)(uint8_t r, uint8_t g, uint8_t b, uint8_t a); //color channel join typedef uint8_t(*SwAlpha)(uint8_t*); //blending alpha +typedef uint32_t(*SwBlendOp)(uint32_t s, uint32_t d, uint8_t a); //src, dst, alpha struct SwBlender { @@ -302,6 +303,38 @@ static inline SwCoord HALF_STROKE(float width) return TO_SWCOORD(width * 0.5f); } +static inline uint8_t _multiply(uint8_t c, uint8_t a) +{ + return ((c * a + 0xff) >> 8); +} + +static inline uint8_t _alpha(uint32_t c) +{ + return (c >> 24); +} + +static inline uint8_t _ialpha(uint32_t c) +{ + return (~c >> 24); +} + +static inline uint32_t opAlphaBlend(uint32_t s, uint32_t d, uint8_t a) +{ + auto t = ALPHA_BLEND(s, a); + return t + ALPHA_BLEND(d, _ialpha(t)); +} + +static inline uint32_t opBlend(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a) +{ + return s + ALPHA_BLEND(d, _ialpha(s)); +} + +static inline uint32_t opInterpolate(uint32_t s, uint32_t d, uint8_t a) +{ + return INTERPOLATE(s, d, a); +} + + int64_t mathMultiply(int64_t a, int64_t b); int64_t mathDivide(int64_t a, int64_t b); int64_t mathMulDiv(int64_t a, int64_t b, int64_t c); @@ -349,8 +382,10 @@ void imageFree(SwImage* image); bool fillGenColorTable(SwFill* fill, const Fill* fdata, const Matrix* transform, SwSurface* surface, uint32_t opacity, bool ctable); void fillReset(SwFill* fill); void fillFree(SwFill* fill); -void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len); -void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len); +void fillRasterLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, SwBlendOp op = nullptr, uint8_t a = 255); //blending ver. +void fillRasterLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, uint8_t* cmp, SwAlpha alpha, uint8_t csize, uint8_t opacity); //masking ver. +void fillRasterRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, SwBlendOp op = nullptr, uint8_t a = 255); //blending ver. +void fillRasterRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, uint8_t* cmp, SwAlpha alpha, uint8_t csize, uint8_t opacity); //masking ver. SwRleData* rleRender(SwRleData* rle, const SwOutline* outline, const SwBBox& renderRegion, bool antiAlias); SwRleData* rleRender(const SwBBox* bbox); diff --git a/src/lib/sw_engine/tvgSwFill.cpp b/src/lib/sw_engine/tvgSwFill.cpp index e14d2bdd..df7a1b57 100644 --- a/src/lib/sw_engine/tvgSwFill.cpp +++ b/src/lib/sw_engine/tvgSwFill.cpp @@ -233,7 +233,7 @@ static inline uint32_t _pixel(const SwFill* fill, float pos) /* External Class Implementation */ /************************************************************************/ -void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len) +void fillRasterRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, uint8_t* cmp, SwAlpha alpha, uint8_t csize, uint8_t opacity) { auto rx = (x + 0.5f) * fill->radial.a11 + (y + 0.5f) * fill->radial.a12 + fill->radial.shiftX; auto ry = (x + 0.5f) * fill->radial.a21 + (y + 0.5f) * fill->radial.a22 + fill->radial.shiftY; @@ -244,16 +244,125 @@ void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, auto detFirstDerivative = 2.0f * (fill->radial.a11 * rx + fill->radial.a21 * ry) + 0.5f * detSecondDerivative; auto det = rx * rx + ry * ry; - for (uint32_t i = 0 ; i < len ; ++i) { - *dst = _pixel(fill, sqrtf(det)); - ++dst; - det += detFirstDerivative; - detFirstDerivative += detSecondDerivative; + if (opacity == 255) { + for (uint32_t i = 0 ; i < len ; ++i, ++dst, cmp += csize) { + *dst = opAlphaBlend(_pixel(fill, sqrtf(det)), *dst, alpha(cmp)); + det += detFirstDerivative; + detFirstDerivative += detSecondDerivative; + } + } else { + for (uint32_t i = 0 ; i < len ; ++i, ++dst, cmp += csize) { + *dst = opAlphaBlend(_pixel(fill, sqrtf(det)), *dst, _multiply(opacity, alpha(cmp))); + det += detFirstDerivative; + detFirstDerivative += detSecondDerivative; + } + } + +} + + +void fillRasterRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, SwBlendOp op, uint8_t a) +{ + auto rx = (x + 0.5f) * fill->radial.a11 + (y + 0.5f) * fill->radial.a12 + fill->radial.shiftX; + auto ry = (x + 0.5f) * fill->radial.a21 + (y + 0.5f) * fill->radial.a22 + fill->radial.shiftY; + + // detSecondDerivative = d(detFirstDerivative)/dx = d( d(det)/dx )/dx + auto detSecondDerivative = fill->radial.detSecDeriv; + // detFirstDerivative = d(det)/dx + auto detFirstDerivative = 2.0f * (fill->radial.a11 * rx + fill->radial.a21 * ry) + 0.5f * detSecondDerivative; + auto det = rx * rx + ry * ry; + + if (op) { + for (uint32_t i = 0 ; i < len ; ++i, ++dst) { + *dst = op(_pixel(fill, sqrtf(det)), *dst, a); + det += detFirstDerivative; + detFirstDerivative += detSecondDerivative; + } + } else { + for (uint32_t i = 0 ; i < len ; ++i, ++dst) { + *dst = _pixel(fill, sqrtf(det)); + det += detFirstDerivative; + detFirstDerivative += detSecondDerivative; + } } } -void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len) +void fillRasterLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, uint8_t* cmp, SwAlpha alpha, uint8_t csize, uint8_t opacity) +{ + //Rotation + float rx = x + 0.5f; + float ry = y + 0.5f; + float t = (fill->linear.dx * rx + fill->linear.dy * ry + fill->linear.offset) * (GRADIENT_STOP_SIZE - 1); + float inc = (fill->linear.dx) * (GRADIENT_STOP_SIZE - 1); + + if (opacity == 255) { + if (mathZero(inc)) { + auto color = _fixedPixel(fill, static_cast(t * FIXPT_SIZE)); + for (uint32_t i = 0; i < len; ++i, ++dst, cmp += csize) { + *dst = opAlphaBlend(color, *dst, alpha(cmp)); + } + return; + } + + auto vMax = static_cast(INT32_MAX >> (FIXPT_BITS + 1)); + auto vMin = -vMax; + auto v = t + (inc * len); + + //we can use fixed point math + if (v < vMax && v > vMin) { + auto t2 = static_cast(t * FIXPT_SIZE); + auto inc2 = static_cast(inc * FIXPT_SIZE); + for (uint32_t j = 0; j < len; ++j, ++dst, cmp += csize) { + *dst = opAlphaBlend(_fixedPixel(fill, t2), *dst, alpha(cmp)); + t2 += inc2; + } + //we have to fallback to float math + } else { + uint32_t counter = 0; + while (counter++ < len) { + *dst = opAlphaBlend(_pixel(fill, t / GRADIENT_STOP_SIZE), *dst, alpha(cmp)); + ++dst; + t += inc; + cmp += csize; + } + } + } else { + if (mathZero(inc)) { + auto color = _fixedPixel(fill, static_cast(t * FIXPT_SIZE)); + for (uint32_t i = 0; i < len; ++i, ++dst, cmp += csize) { + *dst = opAlphaBlend(color, *dst, _multiply(alpha(cmp), opacity)); + } + return; + } + + auto vMax = static_cast(INT32_MAX >> (FIXPT_BITS + 1)); + auto vMin = -vMax; + auto v = t + (inc * len); + + //we can use fixed point math + if (v < vMax && v > vMin) { + auto t2 = static_cast(t * FIXPT_SIZE); + auto inc2 = static_cast(inc * FIXPT_SIZE); + for (uint32_t j = 0; j < len; ++j, ++dst, cmp += csize) { + *dst = opAlphaBlend(_fixedPixel(fill, t2), *dst, _multiply(alpha(cmp), opacity)); + t2 += inc2; + } + //we have to fallback to float math + } else { + uint32_t counter = 0; + while (counter++ < len) { + *dst = opAlphaBlend(_pixel(fill, t / GRADIENT_STOP_SIZE), *dst, _multiply(opacity, alpha(cmp))); + ++dst; + t += inc; + cmp += csize; + } + } + } +} + + +void fillRasterLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, SwBlendOp op, uint8_t a) { //Rotation float rx = x + 0.5f; @@ -263,7 +372,13 @@ void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, if (mathZero(inc)) { auto color = _fixedPixel(fill, static_cast(t * FIXPT_SIZE)); - rasterRGBA32(dst, color, 0, len); + if (op) { + for (uint32_t i = 0; i < len; ++i, ++dst) { + *dst = op(color, *dst, a); + } + } else { + rasterRGBA32(dst, color, 0, len); + } return; } @@ -271,22 +386,41 @@ void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, auto vMin = -vMax; auto v = t + (inc * len); - //we can use fixed point math - if (v < vMax && v > vMin) { - auto t2 = static_cast(t * FIXPT_SIZE); - auto inc2 = static_cast(inc * FIXPT_SIZE); - for (uint32_t j = 0; j < len; ++j) { - *dst = _fixedPixel(fill, t2); - ++dst; - t2 += inc2; + if (op) { + //we can use fixed point math + if (v < vMax && v > vMin) { + auto t2 = static_cast(t * FIXPT_SIZE); + auto inc2 = static_cast(inc * FIXPT_SIZE); + for (uint32_t j = 0; j < len; ++j, ++dst) { + *dst = op(_fixedPixel(fill, t2), *dst, a); + t2 += inc2; + } + //we have to fallback to float math + } else { + uint32_t counter = 0; + while (counter++ < len) { + *dst = op(_pixel(fill, t / GRADIENT_STOP_SIZE), *dst, a); + ++dst; + t += inc; + } } - //we have to fallback to float math } else { - uint32_t counter = 0; - while (counter++ < len) { - *dst = _pixel(fill, t / GRADIENT_STOP_SIZE); - ++dst; - t += inc; + //we can use fixed point math + if (v < vMax && v > vMin) { + auto t2 = static_cast(t * FIXPT_SIZE); + auto inc2 = static_cast(inc * FIXPT_SIZE); + for (uint32_t j = 0; j < len; ++j, ++dst) { + *dst = _fixedPixel(fill, t2); + t2 += inc2; + } + //we have to fallback to float math + } else { + uint32_t counter = 0; + while (counter++ < len) { + *dst = _pixel(fill, t / GRADIENT_STOP_SIZE); + ++dst; + t += inc; + } } } } diff --git a/src/lib/sw_engine/tvgSwRaster.cpp b/src/lib/sw_engine/tvgSwRaster.cpp index c69cbc34..c8ba4764 100644 --- a/src/lib/sw_engine/tvgSwRaster.cpp +++ b/src/lib/sw_engine/tvgSwRaster.cpp @@ -37,17 +37,6 @@ /************************************************************************/ constexpr auto DOWN_SCALE_TOLERANCE = 0.5f; -static inline uint8_t _multiply(uint8_t c, uint8_t a) -{ - return ((c * a + 0xff) >> 8); -} - - -static inline uint32_t _ialpha(uint32_t c) -{ - return (~c >> 24); -} - static inline uint8_t _alpha(uint8_t* a) { @@ -926,18 +915,8 @@ static bool _rasterLinearGradientMaskedRect(SwSurface* surface, const SwBBox& re auto cbuffer = surface->compositor->image.buf8 + (region.min.y * surface->compositor->image.stride + region.min.x) * csize; auto alpha = surface->blender.alpha(surface->compositor->method); - auto sbuffer = static_cast(alloca(w * sizeof(uint32_t))); - if (!sbuffer) return false; - for (uint32_t y = 0; y < h; ++y) { - fillFetchLinear(fill, sbuffer, region.min.y + y, region.min.x, w); - auto dst = buffer; - auto cmp = cbuffer; - auto src = sbuffer; - for (uint32_t x = 0; x < w; ++x, ++dst, ++src, cmp += csize) { - auto tmp = ALPHA_BLEND(*src, alpha(cmp)); - *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); - } + fillRasterLinear(fill, buffer, region.min.y + y, region.min.x, w, cbuffer, alpha, csize, 255); buffer += surface->stride; cbuffer += surface->stride * csize; } @@ -953,15 +932,9 @@ static bool _rasterTranslucentLinearGradientRect(SwSurface* surface, const SwBBo auto h = static_cast(region.max.y - region.min.y); auto w = static_cast(region.max.x - region.min.x); - auto sbuffer = static_cast(alloca(w * sizeof(uint32_t))); - if (!sbuffer) return false; - for (uint32_t y = 0; y < h; ++y) { auto dst = buffer; - fillFetchLinear(fill, sbuffer, region.min.y + y, region.min.x, w); - for (uint32_t x = 0; x < w; ++x, ++dst) { - *dst = sbuffer[x] + ALPHA_BLEND(*dst, _ialpha(sbuffer[x])); - } + fillRasterLinear(fill, dst, region.min.y + y, region.min.x, w, opBlend); buffer += surface->stride; } return true; @@ -977,7 +950,7 @@ static bool _rasterSolidLinearGradientRect(SwSurface* surface, const SwBBox& reg auto h = static_cast(region.max.y - region.min.y); for (uint32_t y = 0; y < h; ++y) { - fillFetchLinear(fill, buffer + y * surface->stride, region.min.y + y, region.min.x, w); + fillRasterLinear(fill, buffer + y * surface->stride, region.min.y + y, region.min.x, w); } return true; } @@ -988,6 +961,7 @@ static bool _rasterLinearGradientRect(SwSurface* surface, const SwBBox& region, if (_compositing(surface)) { return _rasterLinearGradientMaskedRect(surface, region, fill); } else { + //OPTIMIZE_ME: Unity branches. if (fill->translucent) return _rasterTranslucentLinearGradientRect(surface, region, fill); else _rasterSolidLinearGradientRect(surface, region, fill); } @@ -1003,30 +977,15 @@ static bool _rasterLinearGradientMaskedRle(SwSurface* surface, const SwRleData* { if (fill->linear.len < FLT_EPSILON) return false; - auto buffer = static_cast(alloca(surface->w * sizeof(uint32_t))); - if (!buffer) return false; - auto span = rle->spans; auto csize = surface->compositor->image.channelSize; auto cbuffer = surface->compositor->image.buf8; auto alpha = surface->blender.alpha(surface->compositor->method); for (uint32_t i = 0; i < rle->size; ++i, ++span) { - fillFetchLinear(fill, buffer, span->y, span->x, span->len); auto dst = &surface->buf32[span->y * surface->stride + span->x]; auto cmp = &cbuffer[(span->y * surface->compositor->image.stride + span->x) * csize]; - auto src = buffer; - if (span->coverage == 255) { - for (uint32_t x = 0; x < span->len; ++x, ++dst, ++src, cmp += csize) { - auto tmp = ALPHA_BLEND(*src, alpha(cmp)); - *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); - } - } else { - for (uint32_t x = 0; x < span->len; ++x, ++dst, ++src, cmp += csize) { - auto tmp = ALPHA_BLEND(*src, _multiply(span->coverage, alpha(cmp))); - *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); - } - } + fillRasterLinear(fill, dst, span->y, span->x, span->len, cmp, alpha, csize, span->coverage); } return true; } @@ -1037,22 +996,11 @@ static bool _rasterTranslucentLinearGradientRle(SwSurface* surface, const SwRleD if (fill->linear.len < FLT_EPSILON) return false; auto span = rle->spans; - auto buffer = static_cast(alloca(surface->w * sizeof(uint32_t))); - if (!buffer) return false; for (uint32_t i = 0; i < rle->size; ++i, ++span) { auto dst = &surface->buf32[span->y * surface->stride + span->x]; - fillFetchLinear(fill, buffer, span->y, span->x, span->len); - if (span->coverage == 255) { - for (uint32_t x = 0; x < span->len; ++x, ++dst) { - *dst = buffer[x] + ALPHA_BLEND(*dst, _ialpha(buffer[x])); - } - } else { - for (uint32_t x = 0; x < span->len; ++x, ++dst) { - auto tmp = ALPHA_BLEND(buffer[x], span->coverage); - *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); - } - } + if (span->coverage == 255) fillRasterLinear(fill, dst, span->y, span->x, span->len, opBlend); + else fillRasterLinear(fill, dst, span->y, span->x, span->len, opAlphaBlend, span->coverage); } return true; } @@ -1062,21 +1010,12 @@ static bool _rasterSolidLinearGradientRle(SwSurface* surface, const SwRleData* r { if (fill->linear.len < FLT_EPSILON) return false; - auto buf = static_cast(alloca(surface->w * sizeof(uint32_t))); - if (!buf) return false; - auto span = rle->spans; for (uint32_t i = 0; i < rle->size; ++i, ++span) { - if (span->coverage == 255) { - fillFetchLinear(fill, surface->buf32 + span->y * surface->stride + span->x, span->y, span->x, span->len); - } else { - fillFetchLinear(fill, buf, span->y, span->x, span->len); - auto dst = &surface->buf32[span->y * surface->stride + span->x]; - for (uint32_t x = 0; x < span->len; ++x) { - dst[x] = INTERPOLATE(buf[x], dst[x], span->coverage); - } - } + auto dst = &surface->buf32[span->y * surface->stride + span->x]; + if (span->coverage == 255) fillRasterLinear(fill, dst, span->y, span->x, span->len); + else fillRasterLinear(fill, dst, span->y, span->x, span->len, opInterpolate, span->coverage); } return true; } @@ -1089,6 +1028,7 @@ static bool _rasterLinearGradientRle(SwSurface* surface, const SwRleData* rle, c if (_compositing(surface)) { return _rasterLinearGradientMaskedRle(surface, rle, fill); } else { + //OPTIMIZE_ME: Unify branches if (fill->translucent) return _rasterTranslucentLinearGradientRle(surface, rle, fill); else return _rasterSolidLinearGradientRle(surface, rle, fill); } @@ -1111,18 +1051,8 @@ static bool _rasterRadialGradientMaskedRect(SwSurface* surface, const SwBBox& re auto cbuffer = surface->compositor->image.buf8 + (region.min.y * surface->compositor->image.stride + region.min.x) * csize; auto alpha = surface->blender.alpha(surface->compositor->method); - auto sbuffer = static_cast(alloca(w * sizeof(uint32_t))); - if (!sbuffer) return false; - for (uint32_t y = 0; y < h; ++y) { - fillFetchRadial(fill, sbuffer, region.min.y + y, region.min.x, w); - auto dst = buffer; - auto cmp = cbuffer; - auto src = sbuffer; - for (uint32_t x = 0; x < w; ++x, ++dst, ++src, cmp += csize) { - auto tmp = ALPHA_BLEND(*src, alpha(cmp)); - *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); - } + fillRasterRadial(fill, buffer, region.min.y + y, region.min.x, w, cbuffer, alpha, csize, 255); buffer += surface->stride; cbuffer += surface->stride * csize; } @@ -1138,15 +1068,9 @@ static bool _rasterTranslucentRadialGradientRect(SwSurface* surface, const SwBBo auto h = static_cast(region.max.y - region.min.y); auto w = static_cast(region.max.x - region.min.x); - auto sbuffer = static_cast(alloca(w * sizeof(uint32_t))); - if (!sbuffer) return false; - for (uint32_t y = 0; y < h; ++y) { auto dst = buffer; - fillFetchRadial(fill, sbuffer, region.min.y + y, region.min.x, w); - for (uint32_t x = 0; x < w; ++x, ++dst) { - *dst = sbuffer[x] + ALPHA_BLEND(*dst, _ialpha(sbuffer[x])); - } + fillRasterRadial(fill, dst, region.min.y + y, region.min.x, w, opBlend); buffer += surface->stride; } return true; @@ -1162,8 +1086,7 @@ static bool _rasterSolidRadialGradientRect(SwSurface* surface, const SwBBox& reg auto w = static_cast(region.max.x - region.min.x); for (uint32_t y = 0; y < h; ++y) { - auto dst = &buffer[y * surface->stride]; - fillFetchRadial(fill, dst, region.min.y + y, region.min.x, w); + fillRasterRadial(fill, &buffer[y * surface->stride], region.min.y + y, region.min.x, w); } return true; } @@ -1174,6 +1097,7 @@ static bool _rasterRadialGradientRect(SwSurface* surface, const SwBBox& region, if (_compositing(surface)) { return _rasterRadialGradientMaskedRect(surface, region, fill); } else { + //OPTIMIZE_ME: Unity branches. if (fill->translucent) return _rasterTranslucentRadialGradientRect(surface, region, fill); else return _rasterSolidRadialGradientRect(surface, region, fill); } @@ -1189,30 +1113,15 @@ static bool _rasterRadialGradientMaskedRle(SwSurface* surface, const SwRleData* { if (fill->radial.a < FLT_EPSILON) return false; - auto buffer = static_cast(alloca(surface->w * sizeof(uint32_t))); - if (!buffer) return false; - auto span = rle->spans; auto csize = surface->compositor->image.channelSize; auto cbuffer = surface->compositor->image.buf8; auto alpha = surface->blender.alpha(surface->compositor->method); for (uint32_t i = 0; i < rle->size; ++i, ++span) { - fillFetchRadial(fill, buffer, span->y, span->x, span->len); auto dst = &surface->buf32[span->y * surface->stride + span->x]; auto cmp = &cbuffer[(span->y * surface->compositor->image.stride + span->x) * csize]; - auto src = buffer; - if (span->coverage == 255) { - for (uint32_t x = 0; x < span->len; ++x, ++dst, ++src, cmp += csize) { - auto tmp = ALPHA_BLEND(*src, alpha(cmp)); - *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); - } - } else { - for (uint32_t x = 0; x < span->len; ++x, ++dst, ++src, cmp += csize) { - auto tmp = ALPHA_BLEND(*src, _multiply(span->coverage, alpha(cmp))); - *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); - } - } + fillRasterRadial(fill, dst, span->y, span->x, span->len, cmp, alpha, csize, span->coverage); } return true; } @@ -1223,22 +1132,11 @@ static bool _rasterTranslucentRadialGradientRle(SwSurface* surface, const SwRleD if (fill->radial.a < FLT_EPSILON) return false; auto span = rle->spans; - auto buffer = static_cast(alloca(surface->w * sizeof(uint32_t))); - if (!buffer) return false; for (uint32_t i = 0; i < rle->size; ++i, ++span) { auto dst = &surface->buf32[span->y * surface->stride + span->x]; - fillFetchRadial(fill, buffer, span->y, span->x, span->len); - if (span->coverage == 255) { - for (uint32_t x = 0; x < span->len; ++x, ++dst) { - *dst = buffer[x] + ALPHA_BLEND(*dst, _ialpha(buffer[x])); - } - } else { - for (uint32_t x = 0; x < span->len; ++x, ++dst) { - auto tmp = ALPHA_BLEND(buffer[x], span->coverage); - *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); - } - } + if (span->coverage == 255) fillRasterRadial(fill, dst, span->y, span->x, span->len, opBlend); + else fillRasterRadial(fill, dst, span->y, span->x, span->len, opAlphaBlend, span->coverage); } return true; } @@ -1248,21 +1146,12 @@ static bool _rasterSolidRadialGradientRle(SwSurface* surface, const SwRleData* r { if (fill->radial.a < FLT_EPSILON) return false; - auto buf = static_cast(alloca(surface->w * sizeof(uint32_t))); - if (!buf) return false; - auto span = rle->spans; for (uint32_t i = 0; i < rle->size; ++i, ++span) { auto dst = &surface->buf32[span->y * surface->stride + span->x]; - if (span->coverage == 255) { - fillFetchRadial(fill, dst, span->y, span->x, span->len); - } else { - fillFetchRadial(fill, buf, span->y, span->x, span->len); - for (uint32_t x = 0; x < span->len; ++x, ++dst) { - *dst = INTERPOLATE(buf[x], *dst, span->coverage); - } - } + if (span->coverage == 255) fillRasterRadial(fill, dst, span->y, span->x, span->len); + else fillRasterRadial(fill, dst, span->y, span->x, span->len, opInterpolate, span->coverage); } return true; } @@ -1275,6 +1164,7 @@ static bool _rasterRadialGradientRle(SwSurface* surface, const SwRleData* rle, c if (_compositing(surface)) { return _rasterRadialGradientMaskedRle(surface, rle, fill); } else { + //OPTIMIZE_ME: Unity branches. if (fill->translucent) _rasterTranslucentRadialGradientRle(surface, rle, fill); else return _rasterSolidRadialGradientRle(surface, rle, fill); }