From 535ea16b95a2e7560a5934b92d711d33a2f1d1f5 Mon Sep 17 00:00:00 2001 From: Hermet Park Date: Tue, 3 Jun 2025 00:56:05 +0900 Subject: [PATCH 1/3] sw_engine: enable render region clipping during rendering Implemented support for clipping shapes and images using a render region bounding box at render time. This allows partial drawing of content, laying the groundwork for upcoming partial rendering functionality. for fast access of the drawing region from the linear rle data, we introduced the binary search for begin/end of rle instead of additional y index buffer. There is a reason for not using a y-index buffer: the shapes in the RLE are not single, continuous shapes but multiple shapes scattered across the space. which means that we need a double-associated data structure per shapes for y indexing, and this data preparation wouldn't be cheaper enough than realtime binary search especially animated data. This also helps for current clipping performance by utilizing the introduced fast-clipping region access. issue: https://github.com/thorvg/thorvg/issues/1747 --- src/renderer/sw_engine/tvgSwCommon.h | 42 ++++- src/renderer/sw_engine/tvgSwRaster.cpp | 207 ++++++++++++--------- src/renderer/sw_engine/tvgSwRasterAvx.h | 34 ++-- src/renderer/sw_engine/tvgSwRasterC.h | 19 +- src/renderer/sw_engine/tvgSwRasterNeon.h | 27 ++- src/renderer/sw_engine/tvgSwRasterTexmap.h | 92 +++------ src/renderer/sw_engine/tvgSwRenderer.cpp | 69 ++++--- src/renderer/sw_engine/tvgSwRle.cpp | 11 +- 8 files changed, 270 insertions(+), 231 deletions(-) diff --git a/src/renderer/sw_engine/tvgSwCommon.h b/src/renderer/sw_engine/tvgSwCommon.h index 1a8fdc6b..8ab93e65 100644 --- a/src/renderer/sw_engine/tvgSwCommon.h +++ b/src/renderer/sw_engine/tvgSwCommon.h @@ -23,6 +23,7 @@ #ifndef _TVG_SW_COMMON_H_ #define _TVG_SW_COMMON_H_ +#include #include "tvgCommon.h" #include "tvgMath.h" #include "tvgRender.h" @@ -117,22 +118,47 @@ struct SwSpan uint16_t x, y; uint16_t len; uint8_t coverage; + + void fetch(const RenderRegion& bbox, int32_t& x, int32_t& len) const + { + x = std::max((int32_t)this->x, bbox.min.x); + len = std::min((int32_t)(this->x + this->len), bbox.max.x) - x; + } }; + struct SwRle { Array spans; - bool invalid() const + const SwSpan* fetch(const RenderRegion& bbox, const SwSpan** end) const { - return spans.empty(); + return fetch(bbox.min.y, bbox.max.y, end); } - bool valid() const + const SwSpan* fetch(int32_t min, uint32_t max, const SwSpan** end) const { - return !invalid(); + const SwSpan* begin; + + if (min <= spans.first().y) { + begin = spans.begin(); + } else { + auto comp = [](const SwSpan& span, int y) { return span.y < y; }; + begin = lower_bound(spans.begin(), spans.end(), min, comp); + } + if (end) { + if (max > spans.last().y) { + *end = spans.end(); + } else { + auto comp = [](int y, const SwSpan& span) { return y <= span.y; }; + *end = upper_bound(spans.begin(), spans.end(), max, comp); + } + } + return begin; } + bool invalid() const { return spans.empty(); } + bool valid() const { return !invalid(); } uint32_t size() const { return spans.count; } SwSpan* data() const { return spans.data; } }; @@ -566,11 +592,11 @@ SwOutline* mpoolReqDashOutline(SwMpool* mpool, unsigned idx); void mpoolRetDashOutline(SwMpool* mpool, unsigned idx); bool rasterCompositor(SwSurface* surface); -bool rasterGradientShape(SwSurface* surface, SwShape* shape, const Fill* fdata, uint8_t opacity); -bool rasterShape(SwSurface* surface, SwShape* shape, RenderColor& c); +bool rasterGradientShape(SwSurface* surface, SwShape* shape, const RenderRegion& bbox, const Fill* fdata, uint8_t opacity); +bool rasterShape(SwSurface* surface, SwShape* shape, const RenderRegion& bbox, RenderColor& c); bool rasterImage(SwSurface* surface, SwImage* image, const Matrix& transform, const RenderRegion& bbox, uint8_t opacity); -bool rasterStroke(SwSurface* surface, SwShape* shape, RenderColor& c); -bool rasterGradientStroke(SwSurface* surface, SwShape* shape, const Fill* fdata, uint8_t opacity); +bool rasterStroke(SwSurface* surface, SwShape* shape, const RenderRegion& bbox, RenderColor& c); +bool rasterGradientStroke(SwSurface* surface, SwShape* shape, const RenderRegion& bbox, const Fill* fdata, uint8_t opacity); bool rasterClear(SwSurface* surface, uint32_t x, uint32_t y, uint32_t w, uint32_t h, pixel_t val = 0); void rasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len); void rasterTranslucentPixel32(uint32_t* dst, uint32_t* src, uint32_t len, uint8_t opacity); diff --git a/src/renderer/sw_engine/tvgSwRaster.cpp b/src/renderer/sw_engine/tvgSwRaster.cpp index 16d8f45d..1a052a7c 100644 --- a/src/renderer/sw_engine/tvgSwRaster.cpp +++ b/src/renderer/sw_engine/tvgSwRaster.cpp @@ -472,37 +472,44 @@ static bool _rasterRect(SwSurface* surface, const RenderRegion& bbox, const Rend /* Rle */ /************************************************************************/ -static bool _rasterCompositeMaskedRle(SwSurface* surface, SwRle* rle, SwMask maskOp, uint8_t a) +static bool _rasterCompositeMaskedRle(SwSurface* surface, SwRle* rle, const RenderRegion& bbox, SwMask maskOp, uint8_t a) { auto cbuffer = surface->compositor->image.buf8; auto cstride = surface->compositor->image.stride; + const SwSpan* end; + int32_t x, len; uint8_t src; - ARRAY_FOREACH(span, rle->spans) { - auto cmp = &cbuffer[span->y * cstride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto cmp = &cbuffer[span->y * cstride + x]; if (span->coverage == 255) src = a; else src = MULTIPLY(a, span->coverage); auto ialpha = 255 - src; - for (auto x = 0; x < span->len; ++x, ++cmp) { + for (auto x = 0; x < len; ++x, ++cmp) { *cmp = maskOp(src, *cmp, ialpha); } } + return _compositeMaskImage(surface, &surface->compositor->image, surface->compositor->bbox); } -static bool _rasterDirectMaskedRle(SwSurface* surface, SwRle* rle, SwMask maskOp, uint8_t a) +static bool _rasterDirectMaskedRle(SwSurface* surface, SwRle* rle, const RenderRegion& bbox, SwMask maskOp, uint8_t a) { auto cbuffer = surface->compositor->image.buf8; auto cstride = surface->compositor->image.stride; + const SwSpan* end; + int32_t x, len; uint8_t src; - ARRAY_FOREACH(span, rle->spans) { - auto cmp = &cbuffer[span->y * cstride + span->x]; - auto dst = &surface->buf8[span->y * surface->stride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto cmp = &cbuffer[span->y * cstride + x]; + auto dst = &surface->buf8[span->y * surface->stride + x]; if (span->coverage == 255) src = a; else src = MULTIPLY(a, span->coverage); - for (auto x = 0; x < span->len; ++x, ++cmp, ++dst) { + for (auto x = 0; x < len; ++x, ++cmp, ++dst) { auto tmp = maskOp(src, *cmp, 0); //not use alpha *dst = tmp + MULTIPLY(*dst, ~tmp); } @@ -511,7 +518,7 @@ static bool _rasterDirectMaskedRle(SwSurface* surface, SwRle* rle, SwMask maskOp } -static bool _rasterMaskedRle(SwSurface* surface, SwRle* rle, const RenderColor& c) +static bool _rasterMaskedRle(SwSurface* surface, SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { TVGLOG("SW_ENGINE", "Masked(%d) Rle", (int)surface->compositor->method); @@ -519,30 +526,33 @@ static bool _rasterMaskedRle(SwSurface* surface, SwRle* rle, const RenderColor& if (surface->channelSize != sizeof(uint8_t)) return false; auto maskOp = _getMaskOp(surface->compositor->method); - if (_direct(surface->compositor->method)) return _rasterDirectMaskedRle(surface, rle, maskOp, c.a); - else return _rasterCompositeMaskedRle(surface, rle, maskOp, c.a); + if (_direct(surface->compositor->method)) return _rasterDirectMaskedRle(surface, rle, bbox, maskOp, c.a); + else return _rasterCompositeMaskedRle(surface, rle, bbox, maskOp, c.a); return false; } -static bool _rasterMattedRle(SwSurface* surface, SwRle* rle, const RenderColor& c) +static bool _rasterMattedRle(SwSurface* surface, SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { TVGLOG("SW_ENGINE", "Matted(%d) Rle", (int)surface->compositor->method); auto cbuffer = surface->compositor->image.buf8; auto csize = surface->compositor->image.channelSize; auto alpha = surface->alpha(surface->compositor->method); + const SwSpan* end; + int32_t x, len; //32bit channels if (surface->channelSize == sizeof(uint32_t)) { uint32_t src; auto color = surface->join(c.r, c.g, c.b, c.a); - ARRAY_FOREACH(span, rle->spans) { - auto dst = &surface->buf32[span->y * surface->stride + span->x]; - auto cmp = &cbuffer[(span->y * surface->compositor->image.stride + span->x) * csize]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto dst = &surface->buf32[span->y * surface->stride + x]; + auto cmp = &cbuffer[(span->y * surface->compositor->image.stride + x) * csize]; if (span->coverage == 255) src = color; else src = ALPHA_BLEND(color, span->coverage); - for (uint32_t x = 0; x < span->len; ++x, ++dst, cmp += csize) { + for (auto x = 0; x < len; ++x, ++dst, cmp += csize) { auto tmp = ALPHA_BLEND(src, alpha(cmp)); *dst = tmp + ALPHA_BLEND(*dst, IA(tmp)); } @@ -550,12 +560,13 @@ static bool _rasterMattedRle(SwSurface* surface, SwRle* rle, const RenderColor& //8bit grayscale } else if (surface->channelSize == sizeof(uint8_t)) { uint8_t src; - ARRAY_FOREACH(span, rle->spans) { - auto dst = &surface->buf8[span->y * surface->stride + span->x]; - auto cmp = &cbuffer[(span->y * surface->compositor->image.stride + span->x) * csize]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto dst = &surface->buf8[span->y * surface->stride + x]; + auto cmp = &cbuffer[(span->y * surface->compositor->image.stride + x) * csize]; if (span->coverage == 255) src = c.a; else src = MULTIPLY(c.a, span->coverage); - for (uint32_t x = 0; x < span->len; ++x, ++dst, cmp += csize) { + for (auto x = 0; x < len; ++x, ++dst, cmp += csize) { *dst = INTERPOLATE8(src, *dst, alpha(cmp)); } } @@ -564,20 +575,23 @@ static bool _rasterMattedRle(SwSurface* surface, SwRle* rle, const RenderColor& } -static bool _rasterBlendingRle(SwSurface* surface, const SwRle* rle, const RenderColor& c) +static bool _rasterBlendingRle(SwSurface* surface, const SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { if (surface->channelSize != sizeof(uint32_t)) return false; auto color = surface->join(c.r, c.g, c.b, c.a); + const SwSpan* end; + int32_t x, len; - ARRAY_FOREACH(span, rle->spans) { - auto dst = &surface->buf32[span->y * surface->stride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto dst = &surface->buf32[span->y * surface->stride + x]; if (span->coverage == 255) { - for (uint32_t x = 0; x < span->len; ++x, ++dst) { + for (auto x = 0; x < len; ++x, ++dst) { *dst = surface->blender(color, *dst, 255); } } else { - for (uint32_t x = 0; x < span->len; ++x, ++dst) { + for (auto x = 0; x < len; ++x, ++dst) { auto tmp = surface->blender(color, *dst, 255); *dst = INTERPOLATE(tmp, *dst, span->coverage); } @@ -587,44 +601,47 @@ static bool _rasterBlendingRle(SwSurface* surface, const SwRle* rle, const Rende } -static bool _rasterTranslucentRle(SwSurface* surface, const SwRle* rle, const RenderColor& c) +static bool _rasterTranslucentRle(SwSurface* surface, const SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { #if defined(THORVG_AVX_VECTOR_SUPPORT) - return avxRasterTranslucentRle(surface, rle, c); + return avxRasterTranslucentRle(surface, rle, bbox, c); #elif defined(THORVG_NEON_VECTOR_SUPPORT) - return neonRasterTranslucentRle(surface, rle, c); + return neonRasterTranslucentRle(surface, rle, bbox, c); #else - return cRasterTranslucentRle(surface, rle, c); + return cRasterTranslucentRle(surface, rle, bbox, c); #endif } -static bool _rasterSolidRle(SwSurface* surface, const SwRle* rle, const RenderColor& c) +static bool _rasterSolidRle(SwSurface* surface, const SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { + const SwSpan* end; + int32_t x, len; + //32bit channels if (surface->channelSize == sizeof(uint32_t)) { auto color = surface->join(c.r, c.g, c.b, 255); - ARRAY_FOREACH(span, rle->spans) { - if (span->coverage == 255) { - rasterPixel32(surface->buf32 + span->y * surface->stride, color, span->x, span->len); - } else { - auto dst = &surface->buf32[span->y * surface->stride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + if (span->coverage == 255) rasterPixel32(surface->buf32 + span->y * surface->stride, color, x, len); + else { + auto dst = &surface->buf32[span->y * surface->stride + x]; auto src = ALPHA_BLEND(color, span->coverage); auto ialpha = 255 - span->coverage; - for (uint32_t x = 0; x < span->len; ++x, ++dst) { + for (auto x = 0; x < len; ++x, ++dst) { *dst = src + ALPHA_BLEND(*dst, ialpha); } } } //8bit grayscale } else if (surface->channelSize == sizeof(uint8_t)) { - ARRAY_FOREACH(span, rle->spans) { - if (span->coverage == 255) { - rasterGrayscale8(surface->buf8, span->coverage, span->y * surface->stride + span->x, span->len); - } else { - auto dst = &surface->buf8[span->y * surface->stride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + if (span->coverage == 255) rasterGrayscale8(surface->buf8, span->coverage, span->y * surface->stride + x, len); + else { + auto dst = &surface->buf8[span->y * surface->stride + x]; auto ialpha = 255 - span->coverage; - for (uint32_t x = 0; x < span->len; ++x, ++dst) { + for (auto x = 0; x < len; ++x, ++dst) { *dst = span->coverage + MULTIPLY(*dst, ialpha); } } @@ -634,18 +651,18 @@ static bool _rasterSolidRle(SwSurface* surface, const SwRle* rle, const RenderCo } -static bool _rasterRle(SwSurface* surface, SwRle* rle, const RenderColor& c) +static bool _rasterRle(SwSurface* surface, SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { if (!rle || rle->invalid()) return false; if (_compositing(surface)) { - if (_matting(surface)) return _rasterMattedRle(surface, rle, c); - else return _rasterMaskedRle(surface, rle, c); + if (_matting(surface)) return _rasterMattedRle(surface, rle, bbox, c); + else return _rasterMaskedRle(surface, rle, bbox, c); } else if (_blending(surface)) { - return _rasterBlendingRle(surface, rle, c); + return _rasterBlendingRle(surface, rle, bbox, c); } else { - if (c.a == 255) return _rasterSolidRle(surface, rle, c); - else return _rasterTranslucentRle(surface, rle, c); + if (c.a == 255) return _rasterSolidRle(surface, rle, bbox, c); + else return _rasterTranslucentRle(surface, rle, bbox, c); } return false; } @@ -781,26 +798,29 @@ static bool _scaledRleImage(SwSurface* surface, const SwImage* image, const Matr /* RLE Direct Image */ /************************************************************************/ -static bool _rasterDirectMattedRleImage(SwSurface* surface, const SwImage* image, uint8_t opacity) +static bool _rasterDirectMattedRleImage(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, uint8_t opacity) { TVGLOG("SW_ENGINE", "Direct Matted(%d) Rle Image", (int)surface->compositor->method); auto csize = surface->compositor->image.channelSize; auto cbuffer = surface->compositor->image.buf8; auto alpha = surface->alpha(surface->compositor->method); + const SwSpan* end; + int32_t x, len; - ARRAY_FOREACH(span, image->rle->spans) { - auto dst = &surface->buf32[span->y * surface->stride + span->x]; - auto cmp = &cbuffer[(span->y * surface->compositor->image.stride + span->x) * csize]; - auto img = image->buf32 + (span->y + image->oy) * image->stride + (span->x + image->ox); + for (auto span = image->rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto dst = &surface->buf32[span->y * surface->stride + x]; + auto cmp = &cbuffer[(span->y * surface->compositor->image.stride + x) * csize]; + auto img = image->buf32 + (span->y + image->oy) * image->stride + (x + image->ox); auto a = MULTIPLY(span->coverage, opacity); if (a == 255) { - for (uint32_t x = 0; x < span->len; ++x, ++dst, ++img, cmp += csize) { + for (auto x = 0; x < len; ++x, ++dst, ++img, cmp += csize) { auto tmp = ALPHA_BLEND(*img, alpha(cmp)); *dst = tmp + ALPHA_BLEND(*dst, IA(tmp)); } } else { - for (uint32_t x = 0; x < span->len; ++x, ++dst, ++img, cmp += csize) { + for (auto x = 0; x < len; ++x, ++dst, ++img, cmp += csize) { auto tmp = ALPHA_BLEND(*img, MULTIPLY(a, alpha(cmp))); *dst = tmp + ALPHA_BLEND(*dst, IA(tmp)); } @@ -810,18 +830,22 @@ static bool _rasterDirectMattedRleImage(SwSurface* surface, const SwImage* image } -static bool _rasterDirectBlendingRleImage(SwSurface* surface, const SwImage* image, uint8_t opacity) +static bool _rasterDirectBlendingRleImage(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, uint8_t opacity) { - ARRAY_FOREACH(span, image->rle->spans) { - auto dst = &surface->buf32[span->y * surface->stride + span->x]; - auto img = image->buf32 + (span->y + image->oy) * image->stride + (span->x + image->ox); + const SwSpan* end; + int32_t x, len; + + for (auto span = image->rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto dst = &surface->buf32[span->y * surface->stride + x]; + auto img = image->buf32 + (span->y + image->oy) * image->stride + (x + image->ox); auto alpha = MULTIPLY(span->coverage, opacity); if (alpha == 255) { - for (uint32_t x = 0; x < span->len; ++x, ++dst, ++img) { + for (auto x = 0; x < len; ++x, ++dst, ++img) { *dst = surface->blender(*img, *dst, 255); } } else { - for (uint32_t x = 0; x < span->len; ++x, ++dst, ++img) { + for (auto x = 0; x < len; ++x, ++dst, ++img) { auto tmp = surface->blender(*img, *dst, 255); *dst = INTERPOLATE(tmp, *dst, MULTIPLY(alpha, A(*img))); } @@ -831,26 +855,30 @@ static bool _rasterDirectBlendingRleImage(SwSurface* surface, const SwImage* ima } -static bool _rasterDirectRleImage(SwSurface* surface, const SwImage* image, uint8_t opacity) +static bool _rasterDirectRleImage(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, uint8_t opacity) { - ARRAY_FOREACH(span, image->rle->spans) { - auto dst = &surface->buf32[span->y * surface->stride + span->x]; - auto img = image->buf32 + (span->y + image->oy) * image->stride + (span->x + image->ox); + const SwSpan* end; + int32_t x, len; + + for (auto span = image->rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto dst = &surface->buf32[span->y * surface->stride + x]; + auto img = image->buf32 + (span->y + image->oy) * image->stride + (x + image->ox); auto alpha = MULTIPLY(span->coverage, opacity); - rasterTranslucentPixel32(dst, img, span->len, alpha); + rasterTranslucentPixel32(dst, img, len, alpha); } return true; } -static bool _rasterDirectMaskedRleImage(SwSurface* surface, const SwImage* image, uint8_t opacity) +static bool _rasterDirectMaskedRleImage(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, uint8_t opacity) { TVGERR("SW_ENGINE", "Not Supported Direct Masked(%d) Rle Image", (int)surface->compositor->method); return false; } -static bool _directRleImage(SwSurface* surface, const SwImage* image, uint8_t opacity) +static bool _directRleImage(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, uint8_t opacity) { if (surface->channelSize == sizeof(uint8_t)) { TVGERR("SW_ENGINE", "Not supported grayscale rle image!"); @@ -858,12 +886,12 @@ static bool _directRleImage(SwSurface* surface, const SwImage* image, uint8_t op } if (_compositing(surface)) { - if (_matting(surface)) return _rasterDirectMattedRleImage(surface, image, opacity); - else return _rasterDirectMaskedRleImage(surface, image, opacity); + if (_matting(surface)) return _rasterDirectMattedRleImage(surface, image, bbox, opacity); + else return _rasterDirectMaskedRleImage(surface, image, bbox, opacity); } else if (_blending(surface)) { - return _rasterDirectBlendingRleImage(surface, image, opacity); + return _rasterDirectBlendingRleImage(surface, image, bbox, opacity); } else { - return _rasterDirectRleImage(surface, image, opacity); + return _rasterDirectRleImage(surface, image, bbox, opacity); } return false; } @@ -1185,14 +1213,14 @@ static bool _rasterImage(SwSurface* surface, SwImage* image, const Matrix& trans { //RLE Image if (image->rle) { - if (image->direct) return _directRleImage(surface, image, opacity); + if (image->direct) return _directRleImage(surface, image, bbox, opacity); else if (image->scaled) return _scaledRleImage(surface, image, transform, bbox, opacity); - else return _rasterTexmapPolygon(surface, image, transform, nullptr, opacity); + else return _rasterTexmapPolygon(surface, image, transform, bbox, opacity); //Whole Image } else { if (image->direct) return _directImage(surface, image, bbox, opacity); else if (image->scaled) return _scaledImage(surface, image, transform, bbox, opacity); - else return _rasterTexmapPolygon(surface, image, transform, &bbox, opacity); + else return _rasterTexmapPolygon(surface, image, transform, bbox, opacity); } } @@ -1666,36 +1694,35 @@ void rasterPremultiply(RenderSurface* surface) } -bool rasterGradientShape(SwSurface* surface, SwShape* shape, const Fill* fdata, uint8_t opacity) +bool rasterGradientShape(SwSurface* surface, SwShape* shape, const RenderRegion& bbox, const Fill* fdata, uint8_t opacity) { if (!shape->fill) return false; if (auto color = fillFetchSolid(shape->fill, fdata)) { auto a = MULTIPLY(color->a, opacity); RenderColor c = {color->r, color->g, color->b, a}; - return a > 0 ? rasterShape(surface, shape, c) : true; + return a > 0 ? rasterShape(surface, shape, bbox, c) : true; } auto type = fdata->type(); if (shape->fastTrack) { - if (type == Type::LinearGradient) return _rasterLinearGradientRect(surface, shape->bbox, shape->fill); - else if (type == Type::RadialGradient)return _rasterRadialGradientRect(surface, shape->bbox, shape->fill); + if (type == Type::LinearGradient) return _rasterLinearGradientRect(surface, bbox, shape->fill); + else if (type == Type::RadialGradient)return _rasterRadialGradientRect(surface, bbox, shape->fill); } else if (shape->rle && shape->rle->valid()) { if (type == Type::LinearGradient) return _rasterLinearGradientRle(surface, shape->rle, shape->fill); else if (type == Type::RadialGradient) return _rasterRadialGradientRle(surface, shape->rle, shape->fill); - } - return false; + } return false; } -bool rasterGradientStroke(SwSurface* surface, SwShape* shape, const Fill* fdata, uint8_t opacity) +bool rasterGradientStroke(SwSurface* surface, SwShape* shape, const RenderRegion& bbox, const Fill* fdata, uint8_t opacity) { if (!shape->stroke || !shape->stroke->fill || !shape->strokeRle || shape->strokeRle->invalid()) return false; if (auto color = fillFetchSolid(shape->stroke->fill, fdata)) { RenderColor c = {color->r, color->g, color->b, color->a}; c.a = MULTIPLY(c.a, opacity); - return c.a > 0 ? rasterStroke(surface, shape, c) : true; + return c.a > 0 ? rasterStroke(surface, shape, bbox, c) : true; } auto type = fdata->type(); @@ -1705,19 +1732,19 @@ bool rasterGradientStroke(SwSurface* surface, SwShape* shape, const Fill* fdata, } -bool rasterShape(SwSurface* surface, SwShape* shape, RenderColor& c) +bool rasterShape(SwSurface* surface, SwShape* shape, const RenderRegion& bbox, RenderColor& c) { if (c.a < 255) { c.r = MULTIPLY(c.r, c.a); c.g = MULTIPLY(c.g, c.a); c.b = MULTIPLY(c.b, c.a); } - if (shape->fastTrack) return _rasterRect(surface, shape->bbox, c); - else return _rasterRle(surface, shape->rle, c); + if (shape->fastTrack) return _rasterRect(surface, bbox, c); + else return _rasterRle(surface, shape->rle, bbox, c); } -bool rasterStroke(SwSurface* surface, SwShape* shape, RenderColor& c) +bool rasterStroke(SwSurface* surface, SwShape* shape, const RenderRegion& bbox, RenderColor& c) { if (c.a < 255) { c.r = MULTIPLY(c.r, c.a); @@ -1725,7 +1752,7 @@ bool rasterStroke(SwSurface* surface, SwShape* shape, RenderColor& c) c.b = MULTIPLY(c.b, c.a); } - return _rasterRle(surface, shape->strokeRle, c); + return _rasterRle(surface, shape->strokeRle, bbox, c); } @@ -1791,4 +1818,4 @@ void rasterXYFlip(uint32_t* src, uint32_t* dst, int32_t stride, int32_t w, int32 } } } -} +} \ No newline at end of file diff --git a/src/renderer/sw_engine/tvgSwRasterAvx.h b/src/renderer/sw_engine/tvgSwRasterAvx.h index e2e8360d..3f53e877 100644 --- a/src/renderer/sw_engine/tvgSwRasterAvx.h +++ b/src/renderer/sw_engine/tvgSwRasterAvx.h @@ -158,47 +158,51 @@ static bool avxRasterTranslucentRect(SwSurface* surface, const RenderRegion& bbo } -static bool avxRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const RenderColor& c) +static bool avxRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { + const SwSpan* end; + int32_t x, len; + //32bit channels if (surface->channelSize == sizeof(uint32_t)) { auto color = surface->join(c.r, c.g, c.b, c.a); uint32_t src; - ARRAY_FOREACH(span, rle->spans) { - auto dst = &surface->buf32[span->y * surface->stride + span->x]; - + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + FETCH_BOUND(span, bbox); + span->fetch(bbox, x, len); if (span->coverage < 255) src = ALPHA_BLEND(color, span->coverage); else src = color; + auto dst = &surface->buf32[span->y * surface->stride + x]; auto ialpha = IA(src); //1. fill the not aligned memory (for 128-bit registers a 16-bytes alignment is required) - auto notAligned = ((uintptr_t)dst & 0xf) / 4; + int32_t notAligned = ((uintptr_t)dst & 0xf) / 4; if (notAligned) { - notAligned = (N_32BITS_IN_128REG - notAligned > span->len ? span->len : N_32BITS_IN_128REG - notAligned); - for (uint32_t x = 0; x < notAligned; ++x, ++dst) { + notAligned = (N_32BITS_IN_128REG - notAligned > len ? len : N_32BITS_IN_128REG - notAligned); + for (auto x = 0; x < notAligned; ++x, ++dst) { *dst = src + ALPHA_BLEND(*dst, ialpha); } } //2. fill the aligned memory using avx - N_32BITS_IN_128REG pixels processed at once //In order to avoid unnecessary avx variables declarations a check is made whether there are any iterations at all - uint32_t iterations = (span->len - notAligned) / N_32BITS_IN_128REG; - uint32_t avxFilled = 0; + int32_t iterations = (len - notAligned) / N_32BITS_IN_128REG; + int32_t avxFilled = 0; if (iterations > 0) { auto avxSrc = _mm_set1_epi32(src); auto avxIalpha = _mm_set1_epi8(ialpha); avxFilled = iterations * N_32BITS_IN_128REG; auto avxDst = (__m128i*)dst; - for (uint32_t x = 0; x < iterations; ++x, ++avxDst) { + for (auto x = 0; x < iterations; ++x, ++avxDst) { *avxDst = _mm_add_epi32(avxSrc, ALPHA_BLEND(*avxDst, avxIalpha)); } } //3. fill the remaining pixels - int32_t leftovers = span->len - notAligned - avxFilled; + auto leftovers = len - notAligned - avxFilled; dst += avxFilled; while (leftovers--) { *dst = src + ALPHA_BLEND(*dst, ialpha); @@ -211,12 +215,14 @@ static bool avxRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const } else if (surface->channelSize == sizeof(uint8_t)) { TVGLOG("SW_ENGINE", "Require AVX Optimization, Channel Size = %d", surface->channelSize); uint8_t src; - ARRAY_FOREACH(span, rle->spans) { - auto dst = &surface->buf8[span->y * surface->stride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + FETCH_BOUND(span, bbox); + span->fetch(bbox, x, len); + auto dst = &surface->buf8[span->y * surface->stride + x]; if (span->coverage < 255) src = MULTIPLY(span->coverage, c.a); else src = c.a; auto ialpha = ~c.a; - for (uint32_t x = 0; x < span->len; ++x, ++dst) { + for (auto x = 0; x < len; ++x, ++dst) { *dst = src + MULTIPLY(*dst, ialpha); } } diff --git a/src/renderer/sw_engine/tvgSwRasterC.h b/src/renderer/sw_engine/tvgSwRasterC.h index 82b696be..8dcbf14c 100644 --- a/src/renderer/sw_engine/tvgSwRasterC.h +++ b/src/renderer/sw_engine/tvgSwRasterC.h @@ -92,30 +92,35 @@ static void inline cRasterPixels(PIXEL_T* dst, PIXEL_T val, uint32_t offset, int } -static bool inline cRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const RenderColor& c) +static bool inline cRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { + const SwSpan* end; + int32_t x, len; + //32bit channels if (surface->channelSize == sizeof(uint32_t)) { auto color = surface->join(c.r, c.g, c.b, c.a); uint32_t src; - ARRAY_FOREACH(span, rle->spans) { - auto dst = &surface->buf32[span->y * surface->stride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto dst = &surface->buf32[span->y * surface->stride + x]; if (span->coverage < 255) src = ALPHA_BLEND(color, span->coverage); else src = color; auto ialpha = IA(src); - for (uint32_t x = 0; x < span->len; ++x, ++dst) { + for (auto x = 0; x < len; ++x, ++dst) { *dst = src + ALPHA_BLEND(*dst, ialpha); } } //8bit grayscale } else if (surface->channelSize == sizeof(uint8_t)) { uint8_t src; - ARRAY_FOREACH(span, rle->spans) { - auto dst = &surface->buf8[span->y * surface->stride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + span->fetch(bbox, x, len); + auto dst = &surface->buf8[span->y * surface->stride + x]; if (span->coverage < 255) src = MULTIPLY(span->coverage, c.a); else src = c.a; auto ialpha = ~c.a; - for (uint32_t x = 0; x < span->len; ++x, ++dst) { + for (auto x = 0; x < len; ++x, ++dst) { *dst = src + MULTIPLY(*dst, ialpha); } } diff --git a/src/renderer/sw_engine/tvgSwRasterNeon.h b/src/renderer/sw_engine/tvgSwRasterNeon.h index d5c408c6..d8409b6d 100644 --- a/src/renderer/sw_engine/tvgSwRasterNeon.h +++ b/src/renderer/sw_engine/tvgSwRasterNeon.h @@ -89,20 +89,25 @@ static void neonRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int3 } -static bool neonRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const RenderColor& c) +static bool neonRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const RenderRegion& bbox, const RenderColor& c) { + const SwSpan* end; + int32_t x, len; + //32bit channels if (surface->channelSize == sizeof(uint32_t)) { auto color = surface->join(c.r, c.g, c.b, c.a); uint32_t src; uint8x8_t *vDst = nullptr; - uint16_t align; + int32_t align; - ARRAY_FOREACH(span, rle->spans) { + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + FETCH_BOUND(span, bbox); + span->fetch(bbox, x, len); if (span->coverage < 255) src = ALPHA_BLEND(color, span->coverage); else src = color; - auto dst = &surface->buf32[span->y * surface->stride + span->x]; + auto dst = &surface->buf32[span->y * surface->stride + x]; auto ialpha = IA(src); if ((((uintptr_t) dst) & 0x7) != 0) { @@ -118,11 +123,11 @@ static bool neonRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const uint8x8_t vSrc = (uint8x8_t) vdup_n_u32(src); uint8x8_t vIalpha = vdup_n_u8((uint8_t) ialpha); - for (uint32_t x = 0; x < (span->len - align) / 2; ++x) + for (int32_t x = 0; x < (len - align) / 2; ++x) vDst[x] = vadd_u8(vSrc, ALPHA_BLEND(vDst[x], vIalpha)); - auto leftovers = (span->len - align) % 2; - if (leftovers > 0) dst[span->len - 1] = src + ALPHA_BLEND(dst[span->len - 1], ialpha); + auto leftovers = (len - align) % 2; + if (leftovers > 0) dst[len - 1] = src + ALPHA_BLEND(dst[len - 1], ialpha); ++span; } @@ -130,12 +135,14 @@ static bool neonRasterTranslucentRle(SwSurface* surface, const SwRle* rle, const } else if (surface->channelSize == sizeof(uint8_t)) { TVGLOG("SW_ENGINE", "Require Neon Optimization, Channel Size = %d", surface->channelSize); uint8_t src; - ARRAY_FOREACH(span, rle->spans) { - auto dst = &surface->buf8[span->y * surface->stride + span->x]; + for (auto span = rle->fetch(bbox, &end); span < end; ++span) { + FETCH_BOUND(span, bbox); + span->fetch(bbox, x, len); + auto dst = &surface->buf8[span->y * surface->stride + x]; if (span->coverage < 255) src = MULTIPLY(span->coverage, c.a); else src = c.a; auto ialpha = ~c.a; - for (uint32_t x = 0; x < span->len; ++x, ++dst) { + for (auto x = 0; x < len; ++x, ++dst) { *dst = src + MULTIPLY(*dst, ialpha); } } diff --git a/src/renderer/sw_engine/tvgSwRasterTexmap.h b/src/renderer/sw_engine/tvgSwRasterTexmap.h index 62b0bea3..4cf46424 100644 --- a/src/renderer/sw_engine/tvgSwRasterTexmap.h +++ b/src/renderer/sw_engine/tvgSwRasterTexmap.h @@ -52,33 +52,25 @@ static float xa, xb, ua, va; //Y Range exception handling -static bool _arrange(const SwImage* image, const RenderRegion* bbox, int& yStart, int& yEnd) +static bool _arrange(const SwImage* image, const RenderRegion& bbox, int& yStart, int& yEnd) { - int32_t bboxTop, bboxBottom; - - if (bbox) { - bboxTop = bbox->min.y; - bboxBottom = bbox->max.y; - } else { - bboxTop = image->rle->spans.first().y; - bboxBottom = image->rle->spans.last().y; - } - - if (yStart < bboxTop) yStart = bboxTop; - if (yEnd > bboxBottom) yEnd = bboxBottom; + auto top = image->rle ? std::max((int)image->rle->spans.first().y, bbox.min.y) : (int)bbox.min.y; + auto bottom = image->rle? std::min((int)image->rle->spans.last().y, bbox.max.y) : (int)bbox.max.y; + if (yStart < top) yStart = top; + if (yEnd > bottom) yEnd = bottom; return yEnd > yStart; } -static bool _rasterMaskedPolygonImageSegment(SwSurface* surface, const SwImage* image, const RenderRegion* bbox, int yStart, int yEnd, AASpans* aaSpans, uint8_t opacity, uint8_t dirFlag = 0) +static bool _rasterMaskedPolygonImageSegment(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, int yStart, int yEnd, AASpans* aaSpans, uint8_t opacity, uint8_t dirFlag = 0) { TVGERR("SW_ENGINE", "TODO: _rasterMaskedPolygonImageSegment()"); return false; } -static void _rasterBlendingPolygonImageSegment(SwSurface* surface, const SwImage* image, const RenderRegion* bbox, int yStart, int yEnd, AASpans* aaSpans, uint8_t opacity) +static void _rasterBlendingPolygonImageSegment(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, int yStart, int yEnd, AASpans* aaSpans, uint8_t opacity) { float _dudx = dudx, _dvdx = dvdx; float _dxdya = dxdya, _dxdyb = dxdyb, _dudya = dudya, _dvdya = dvdya; @@ -89,26 +81,15 @@ static void _rasterBlendingPolygonImageSegment(SwSurface* surface, const SwImage int32_t sh = static_cast(image->h); int32_t x1, x2, x, y, ar, ab, iru, irv, px, ay; int32_t vv = 0, uu = 0; - int32_t minx = INT32_MAX, maxx = 0; + auto minx = bbox.min.x; + auto maxx = bbox.max.x; float dx, u, v, iptr; uint32_t* buf; - SwSpan* span = nullptr; //used only when rle based. if (!_arrange(image, bbox, yStart, yEnd)) return; - //Loop through all lines in the segment - uint32_t spanIdx = 0; - - if (bbox) { - minx = bbox->min.x; - maxx = bbox->max.x; - } else { - span = image->rle->data(); - while (span->y < yStart) { - ++span; - ++spanIdx; - } - } + const SwSpan* span = nullptr; //used only when rle based. + if (image->rle) span = image->rle->fetch(yStart, yEnd, nullptr); y = yStart; @@ -116,16 +97,17 @@ static void _rasterBlendingPolygonImageSegment(SwSurface* surface, const SwImage x1 = (int32_t)_xa; x2 = (int32_t)_xb; - if (!bbox) { + if (span) { minx = INT32_MAX; maxx = 0; //one single row, could be consisted of multiple spans. - while (span->y == y && spanIdx < image->rle->size()) { + while (span->y == y && span < image->rle->spans.end()) { if (minx > span->x) minx = span->x; if (maxx < span->x + span->len) maxx = span->x + span->len; ++span; - ++spanIdx; } + if (minx < bbox.min.x) minx = bbox.min.x; + if (maxx > bbox.max.x) maxx = bbox.max.x; } if (x1 < minx) x1 = minx; if (x2 > maxx) x2 = maxx; @@ -195,8 +177,7 @@ static void _rasterBlendingPolygonImageSegment(SwSurface* surface, const SwImage _ua += _dudya; _va += _dvdya; - if (!bbox && spanIdx >= image->rle->size()) break; - + if (span && span >= image->rle->spans.end()) break; ++y; } xa = _xa; @@ -206,7 +187,7 @@ static void _rasterBlendingPolygonImageSegment(SwSurface* surface, const SwImage } -static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image, const RenderRegion* bbox, int yStart, int yEnd, AASpans* aaSpans, uint8_t opacity, bool matting) +static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, int yStart, int yEnd, AASpans* aaSpans, uint8_t opacity, bool matting) { float _dudx = dudx, _dvdx = dvdx; float _dxdya = dxdya, _dxdyb = dxdyb, _dudya = dudya, _dvdya = dvdya; @@ -217,10 +198,10 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image, int32_t sh = static_cast(image->h); int32_t x1, x2, x, y, ar, ab, iru, irv, px, ay; int32_t vv = 0, uu = 0; - int32_t minx = INT32_MAX, maxx = 0; + auto minx = bbox.min.x; + auto maxx = bbox.max.x; float dx, u, v, iptr; uint32_t* buf; - SwSpan* span = nullptr; //used only when rle based. //for matting(composition) auto csize = matting ? surface->compositor->image.channelSize: 0; @@ -229,19 +210,8 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image, if (!_arrange(image, bbox, yStart, yEnd)) return; - //Loop through all lines in the segment - uint32_t spanIdx = 0; - - if (bbox) { - minx = bbox->min.x; - maxx = bbox->max.x; - } else { - span = image->rle->data(); - while (span->y < yStart) { - ++span; - ++spanIdx; - } - } + const SwSpan* span = nullptr; //used only when rle based. + if (image->rle) span = image->rle->fetch(yStart, yEnd, nullptr); y = yStart; @@ -249,16 +219,17 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image, x1 = (int32_t)_xa; x2 = (int32_t)_xb; - if (!bbox) { + if (span) { minx = INT32_MAX; maxx = 0; //one single row, could be consisted of multiple spans. - while (span->y == y && spanIdx < image->rle->size()) { + while (span->y == y && span < image->rle->spans.end()) { if (minx > span->x) minx = span->x; if (maxx < span->x + span->len) maxx = span->x + span->len; ++span; - ++spanIdx; } + if (minx < bbox.min.x) minx = bbox.min.x; + if (maxx > bbox.max.x) maxx = bbox.max.x; } if (x1 < minx) x1 = minx; if (x2 > maxx) x2 = maxx; @@ -387,8 +358,7 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image, _ua += _dudya; _va += _dvdya; - if (!bbox && spanIdx >= image->rle->size()) break; - + if (span && span >= image->rle->spans.end()) break; ++y; } xa = _xa; @@ -399,7 +369,7 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image, /* This mapping algorithm is based on Mikael Kalms's. */ -static void _rasterPolygonImage(SwSurface* surface, const SwImage* image, const RenderRegion* bbox, Polygon& polygon, AASpans* aaSpans, uint8_t opacity) +static void _rasterPolygonImage(SwSurface* surface, const SwImage* image, const RenderRegion& bbox, Polygon& polygon, AASpans* aaSpans, uint8_t opacity) { float x[3] = {polygon.vertex[0].pt.x, polygon.vertex[1].pt.x, polygon.vertex[2].pt.x}; float y[3] = {polygon.vertex[0].pt.y, polygon.vertex[1].pt.y, polygon.vertex[2].pt.y}; @@ -460,7 +430,7 @@ static void _rasterPolygonImage(SwSurface* surface, const SwImage* image, const if (tvg::equal(y[0], y[1])) side = x[0] > x[1]; if (tvg::equal(y[1], y[2])) side = x[2] > x[1]; - auto bboxTop = bbox ? bbox->min.y : image->rle->data()->y; //Normal Image or Rle Image? + auto bboxTop = image->rle ? std::max(bbox.min.y, (int)image->rle->data()->y) : bbox.min.y; auto compositing = _compositing(surface); //Composition required auto blending = _blending(surface); //Blending required @@ -576,7 +546,7 @@ static void _rasterPolygonImage(SwSurface* surface, const SwImage* image, const } -static AASpans* _AASpans(float ymin, float ymax, const SwImage* image, const RenderRegion* bbox) +static AASpans* _AASpans(float ymin, float ymax, const SwImage* image, const RenderRegion& bbox) { auto yStart = static_cast(ymin); auto yEnd = static_cast(ymax); @@ -863,7 +833,7 @@ static bool _apply(SwSurface* surface, AASpans* aaSpans) | / | 3 -- 2 */ -static bool _rasterTexmapPolygon(SwSurface* surface, const SwImage* image, const Matrix& transform, const RenderRegion* bbox, uint8_t opacity) +static bool _rasterTexmapPolygon(SwSurface* surface, const SwImage* image, const Matrix& transform, const RenderRegion& bbox, uint8_t opacity) { if (surface->channelSize == sizeof(uint8_t)) { TVGERR("SW_ENGINE", "Not supported grayscale Textmap polygon!"); @@ -871,7 +841,7 @@ static bool _rasterTexmapPolygon(SwSurface* surface, const SwImage* image, const } //Exceptions: No dedicated drawing area? - if ((!image->rle && !bbox) || (image->rle && image->rle->size() == 0)) return true; + if ((!image->rle && bbox.invalid()) || (image->rle && image->rle->size() == 0)) return true; /* Prepare vertices. shift XY coordinates to match the sub-pixeling technique. */ diff --git a/src/renderer/sw_engine/tvgSwRenderer.cpp b/src/renderer/sw_engine/tvgSwRenderer.cpp index c44f28cb..08ad4ee5 100644 --- a/src/renderer/sw_engine/tvgSwRenderer.cpp +++ b/src/renderer/sw_engine/tvgSwRenderer.cpp @@ -204,9 +204,7 @@ struct SwImageTask : SwTask if ((flags & (RenderUpdateFlag::Image | RenderUpdateFlag::Transform | RenderUpdateFlag::Color)) && (opacity > 0)) { imageReset(&image); if (!image.data || image.w == 0 || image.h == 0) goto end; - - if (!imagePrepare(&image, transform, clipBox, bbox, mpool, tid)) goto end; - + if (!imagePrepare(&image, transform, clipBox, bbox, mpool, tid)) goto end; if (clips.count > 0) { if (!imageGenRle(&image, bbox, false)) goto end; if (image.rle) { @@ -222,6 +220,7 @@ struct SwImageTask : SwTask } goto end; err: + bbox.reset(); rleReset(image.rle); end: imageDelOutline(&image, mpool, tid); @@ -234,31 +233,6 @@ struct SwImageTask : SwTask }; -static void _renderFill(SwShapeTask* task, SwSurface* surface) -{ - if (auto fill = task->rshape->fill) { - rasterGradientShape(surface, &task->shape, fill, task->opacity); - } else { - RenderColor c; - task->rshape->fillColor(&c.r, &c.g, &c.b, &c.a); - c.a = MULTIPLY(task->opacity, c.a); - if (c.a > 0) rasterShape(surface, &task->shape, c); - } -} - -static void _renderStroke(SwShapeTask* task, SwSurface* surface) -{ - if (auto strokeFill = task->rshape->strokeFill()) { - rasterGradientStroke(surface, &task->shape, strokeFill, task->opacity); - } else { - RenderColor c; - if (task->rshape->strokeFill(&c.r, &c.g, &c.b, &c.a)) { - c.a = MULTIPLY(task->opacity, c.a); - if (c.a > 0) rasterStroke(surface, &task->shape, c); - } - } -} - /************************************************************************/ /* External Class Implementation */ /************************************************************************/ @@ -299,10 +273,10 @@ bool SwRenderer::clear() bool SwRenderer::sync() { + //clear if the rendering was not triggered. ARRAY_FOREACH(p, tasks) { - if ((*p)->disposed) { - delete(*p); - } else { + if ((*p)->disposed) delete(*p); + else { (*p)->done(); (*p)->pushed = false; } @@ -400,6 +374,7 @@ bool SwRenderer::renderImage(RenderData data) if (task->opacity == 0) return true; + //full scene or partial rendering return rasterImage(surface, &task->image, task->transform, task->bbox, task->opacity); } @@ -413,13 +388,35 @@ bool SwRenderer::renderShape(RenderData data) if (task->opacity == 0) return true; - //Main raster stage + auto fill = [](SwShapeTask* task, SwSurface* surface, const RenderRegion& bbox) { + if (auto fill = task->rshape->fill) { + rasterGradientShape(surface, &task->shape, bbox, fill, task->opacity); + } else { + RenderColor c; + task->rshape->fillColor(&c.r, &c.g, &c.b, &c.a); + c.a = MULTIPLY(task->opacity, c.a); + if (c.a > 0) rasterShape(surface, &task->shape, bbox, c); + } + }; + + auto stroke = [](SwShapeTask* task, SwSurface* surface, const RenderRegion& bbox) { + if (auto strokeFill = task->rshape->strokeFill()) { + rasterGradientStroke(surface, &task->shape, bbox, strokeFill, task->opacity); + } else { + RenderColor c; + if (task->rshape->strokeFill(&c.r, &c.g, &c.b, &c.a)) { + c.a = MULTIPLY(task->opacity, c.a); + if (c.a > 0) rasterStroke(surface, &task->shape, bbox, c); + } + } + }; + if (task->rshape->strokeFirst()) { - _renderStroke(task, surface); - _renderFill(task, surface); + stroke(task, surface, task->bbox); + fill(task, surface, task->shape.bbox); } else { - _renderFill(task, surface); - _renderStroke(task, surface); + fill(task, surface, task->shape.bbox); + stroke(task, surface, task->bbox); } return true; diff --git a/src/renderer/sw_engine/tvgSwRle.cpp b/src/renderer/sw_engine/tvgSwRle.cpp index 58ebffa2..0b57be33 100644 --- a/src/renderer/sw_engine/tvgSwRle.cpp +++ b/src/renderer/sw_engine/tvgSwRle.cpp @@ -881,10 +881,10 @@ bool rleClip(SwRle *rle, const SwRle *clip) Array out; out.reserve(std::max(rle->spans.count, clip->spans.count)); - auto spans = rle->data(); - auto end = rle->spans.end(); - auto cspans = clip->data(); - auto cend = clip->spans.end(); + const SwSpan *end; + auto spans = rle->fetch(clip->spans.first().y, clip->spans.last().y, &end); + const SwSpan *cend; + auto cspans = clip->fetch(rle->spans.first().y, rle->spans.last().y, &cend); while(spans < end && cspans < cend) { //align y-coordinates. @@ -928,9 +928,10 @@ bool rleClip(SwRle *rle, const RenderRegion* clip) Array out; out.reserve(rle->spans.count); auto data = out.data; + const SwSpan* end; uint16_t x, len; - ARRAY_FOREACH(p, rle->spans) { + for (auto p = rle->fetch(*clip, &end); p < end; ++p) { if (p->y >= max.y) break; if (p->y < min.y || p->x >= max.x || (p->x + p->len) <= min.x) continue; if (p->x < min.x) { From afeb7c024a6b18c3d41ae8721dd76a23bbeb81b5 Mon Sep 17 00:00:00 2001 From: Hermet Park Date: Thu, 5 Jun 2025 11:59:27 +0900 Subject: [PATCH 2/3] renderer: add partial rendering support Partial Rendering refers to a rendering technique where only a portion of the scene or screen is updated, rather than redrawing the entire output. It is commonly used as a performance optimization strategy, focusing on redrawing only the regions that have changed, often called dirty regions. This introduces RenderDirtyRegion, which assists in collecting a compact dirty region from render tasks. Each backend can utilize this class to support efficient partial rendering. This is implemented using a Line Sweep and Subdivision Merging O(NlogN). The basic per-frame workflow is as follows: 1. RenderDirtyRegion::prepare() //Call this in Renderer::preRender(). 2. RenderDirtyRegion::add() //Add all dirty paints for the frame before rendering. 3. RenderDirtyRegion::commit() //Generate the partial rendering region list before rendering. 4. RenderDirtyRegion::get() //Retrieve the current dirty region list and use it when drawing paints. 5. RenderDirtyRegion::clear() //Reset the state. issue: https://github.com/thorvg/thorvg/issues/1747 --- src/renderer/gl_engine/tvgGlRenderer.cpp | 6 +++ src/renderer/gl_engine/tvgGlRenderer.h | 1 + src/renderer/sw_engine/tvgSwRenderer.cpp | 6 +++ src/renderer/sw_engine/tvgSwRenderer.h | 1 + src/renderer/tvgRender.h | 64 +++++++++++++++++++++++- src/renderer/tvgScene.h | 14 ++++-- src/renderer/wg_engine/tvgWgRenderer.cpp | 6 +++ src/renderer/wg_engine/tvgWgRenderer.h | 1 + 8 files changed, 93 insertions(+), 6 deletions(-) diff --git a/src/renderer/gl_engine/tvgGlRenderer.cpp b/src/renderer/gl_engine/tvgGlRenderer.cpp index c9148145..357de999 100644 --- a/src/renderer/gl_engine/tvgGlRenderer.cpp +++ b/src/renderer/gl_engine/tvgGlRenderer.cpp @@ -859,6 +859,12 @@ bool GlRenderer::sync() } +void GlRenderer::damage(TVG_UNUSED const RenderRegion& region) +{ + //TODO: +} + + RenderRegion GlRenderer::region(RenderData data) { if (currentPass()->isEmpty()) return {}; diff --git a/src/renderer/gl_engine/tvgGlRenderer.h b/src/renderer/gl_engine/tvgGlRenderer.h index 7ab1e901..6879b63d 100644 --- a/src/renderer/gl_engine/tvgGlRenderer.h +++ b/src/renderer/gl_engine/tvgGlRenderer.h @@ -80,6 +80,7 @@ public: bool postRender() override; void dispose(RenderData data) override;; RenderRegion region(RenderData data) override; + void damage(const RenderRegion& region) override; RenderRegion viewport() override; bool viewport(const RenderRegion& vp) override; bool blend(BlendMethod method) override; diff --git a/src/renderer/sw_engine/tvgSwRenderer.cpp b/src/renderer/sw_engine/tvgSwRenderer.cpp index 08ad4ee5..775c2a8f 100644 --- a/src/renderer/sw_engine/tvgSwRenderer.cpp +++ b/src/renderer/sw_engine/tvgSwRenderer.cpp @@ -477,6 +477,12 @@ bool SwRenderer::blend(BlendMethod method) } +void SwRenderer::damage(TVG_UNUSED const RenderRegion& region) +{ + //TODO: +} + + RenderRegion SwRenderer::region(RenderData data) { return static_cast(data)->bounds(); diff --git a/src/renderer/sw_engine/tvgSwRenderer.h b/src/renderer/sw_engine/tvgSwRenderer.h index dc7673df..da9a0bc6 100644 --- a/src/renderer/sw_engine/tvgSwRenderer.h +++ b/src/renderer/sw_engine/tvgSwRenderer.h @@ -46,6 +46,7 @@ public: bool postRender() override; void dispose(RenderData data) override; RenderRegion region(RenderData data) override; + void damage(const RenderRegion& region) override; RenderRegion viewport() override; bool viewport(const RenderRegion& vp) override; bool blend(BlendMethod method) override; diff --git a/src/renderer/tvgRender.h b/src/renderer/tvgRender.h index 46bd0e82..9f18aa62 100644 --- a/src/renderer/tvgRender.h +++ b/src/renderer/tvgRender.h @@ -50,7 +50,6 @@ static inline RenderUpdateFlag operator|(const RenderUpdateFlag a, const RenderU return RenderUpdateFlag(uint16_t(a) | uint16_t(b)); } - struct RenderSurface { union { @@ -137,6 +136,68 @@ struct RenderRegion uint32_t h() const { return (uint32_t) sh(); } }; +struct RenderDirtyRegion +{ + void add(const RenderRegion& region) + { + if (!disabled && region.valid()) { + list[current].push(region); + } + } + + bool prepare(uint32_t count = 0) + { + if (disabled) return false; + + if (count > THRESHOLD) { + skip = true; + return false; + } + + count *= 120; //FIXME: enough? + + list[0].reserve(count); + list[1].reserve(count); + + return true; + } + + bool deactivated() + { + if (disabled || skip) return true; + return false; + } + + void clear() + { + list[0].clear(); + list[1].clear(); + skip = false; + } + + const Array& get() + { + return list[current]; + } + + void commit(); + +private: + void subdivide(Array& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs); + + /* We deactivate partial rendering if there are more than N moving elements. + Imagine thousands of moving objects covering the entire screen, That case partial rendering will lose any benefits. + Even if they don't, the overhead of subdividing and merging partial regions + could be more expensive than simply rendering the full screen. + The number is experimentally confirmed and we are open to improve this. */ + static constexpr const uint32_t THRESHOLD = 5000; + + Array list[2]; //double buffer swapping + uint8_t current = 0; //list index. 0 or 1 + bool disabled = false; + bool skip = false; +}; + struct RenderPath { Array cmds; @@ -433,6 +494,7 @@ public: virtual bool renderImage(RenderData data) = 0; virtual bool postRender() = 0; virtual void dispose(RenderData data) = 0; + virtual void damage(const RenderRegion& region) = 0; virtual RenderRegion region(RenderData data) = 0; virtual RenderRegion viewport() = 0; virtual bool viewport(const RenderRegion& vp) = 0; diff --git a/src/renderer/tvgScene.h b/src/renderer/tvgScene.h index 506bb876..6ce29c94 100644 --- a/src/renderer/tvgScene.h +++ b/src/renderer/tvgScene.h @@ -105,7 +105,7 @@ struct SceneImpl : Scene RenderData update(RenderMethod* renderer, const Matrix& transform, Array& clips, uint8_t opacity, RenderUpdateFlag flag, TVG_UNUSED bool clipper) { - this->vport = renderer->viewport(); + vport = renderer->viewport(); if (needComposition(opacity)) { /* Overriding opacity value. If this scene is half-translucent, @@ -123,6 +123,9 @@ struct SceneImpl : Scene } } + if (compFlag) vport = bounds(renderer); + if (effects) renderer->damage(vport); + return nullptr; } @@ -134,7 +137,7 @@ struct SceneImpl : Scene renderer->blend(impl.blendMethod); if (compFlag) { - cmp = renderer->target(bounds(renderer), renderer->colorSpace(), static_cast(compFlag)); + cmp = renderer->target(vport, renderer->colorSpace(), static_cast(compFlag)); renderer->beginComposite(cmp, MaskMethod::None, opacity); } @@ -157,7 +160,7 @@ struct SceneImpl : Scene return ret; } - RenderRegion bounds(RenderMethod* renderer) const + RenderRegion bounds(RenderMethod* renderer) { if (paints.empty()) return {}; @@ -185,8 +188,8 @@ struct SceneImpl : Scene pRegion.max.x += eRegion.max.x; pRegion.max.y += eRegion.max.y; - pRegion.intersect(this->vport); - return pRegion; + vport = RenderRegion::intersect(renderer->viewport(), pRegion); + return vport; } Result bounds(Point* pt4, Matrix& m, bool obb, bool stroking) @@ -298,6 +301,7 @@ struct SceneImpl : Scene } delete(effects); effects = nullptr; + impl.renderer->damage(vport); } return Result::Success; } diff --git a/src/renderer/wg_engine/tvgWgRenderer.cpp b/src/renderer/wg_engine/tvgWgRenderer.cpp index af860865..a203e9ff 100644 --- a/src/renderer/wg_engine/tvgWgRenderer.cpp +++ b/src/renderer/wg_engine/tvgWgRenderer.cpp @@ -274,6 +274,12 @@ void WgRenderer::dispose(RenderData data) { } +void WgRenderer::damage(TVG_UNUSED const RenderRegion& region) +{ + //TODO: +} + + RenderRegion WgRenderer::region(RenderData data) { auto renderData = (WgRenderDataPaint*)data; diff --git a/src/renderer/wg_engine/tvgWgRenderer.h b/src/renderer/wg_engine/tvgWgRenderer.h index 50825279..317828ad 100644 --- a/src/renderer/wg_engine/tvgWgRenderer.h +++ b/src/renderer/wg_engine/tvgWgRenderer.h @@ -38,6 +38,7 @@ public: bool postRender() override; void dispose(RenderData data) override; RenderRegion region(RenderData data) override; + void damage(const RenderRegion& region) override; RenderRegion viewport() override; bool viewport(const RenderRegion& vp) override; bool blend(BlendMethod method) override; From 103a557b6cd8bf743c83374b3fb20a86ecd432f2 Mon Sep 17 00:00:00 2001 From: Hermet Park Date: Wed, 21 May 2025 17:30:59 +0900 Subject: [PATCH 3/3] sw_engine: add support for partial rendering This implements RenderDirtyRegion. issue: https://github.com/thorvg/thorvg/issues/1747 --- src/renderer/sw_engine/tvgSwRenderer.cpp | 110 ++++++++++++++----- src/renderer/sw_engine/tvgSwRenderer.h | 2 + src/renderer/tvgRender.cpp | 129 +++++++++++++++++++++++ src/renderer/tvgRender.h | 17 ++- 4 files changed, 232 insertions(+), 26 deletions(-) diff --git a/src/renderer/sw_engine/tvgSwRenderer.cpp b/src/renderer/sw_engine/tvgSwRenderer.cpp index 775c2a8f..acd4c3cb 100644 --- a/src/renderer/sw_engine/tvgSwRenderer.cpp +++ b/src/renderer/sw_engine/tvgSwRenderer.cpp @@ -40,7 +40,7 @@ struct SwTask : Task { SwSurface* surface = nullptr; SwMpool* mpool = nullptr; - RenderRegion bbox; //Rendering Region + RenderRegion bbox[2] = {{}, {}}; //Rendering Region 0:current, 1:prevous Matrix transform; Array clips; RenderUpdateFlag flags = RenderUpdateFlag::None; @@ -52,7 +52,7 @@ struct SwTask : Task { //Can we skip the synchronization? done(); - return bbox; + return bbox[0]; } virtual void dispose() = 0; @@ -92,7 +92,7 @@ struct SwShapeTask : SwTask bool clip(SwRle* target) override { if (shape.strokeRle) return rleClip(target, shape.strokeRle); - if (shape.fastTrack) return rleClip(target, &bbox); + if (shape.fastTrack) return rleClip(target, &bbox[0]); if (shape.rle) return rleClip(target, shape.rle); return false; } @@ -101,7 +101,7 @@ struct SwShapeTask : SwTask { //Invisible if (opacity == 0 && !clipper) { - bbox.reset(); + bbox[0].reset(); return; } @@ -115,7 +115,7 @@ struct SwShapeTask : SwTask updateFill = (MULTIPLY(rshape->color.a, opacity) || rshape->fill); if (updateShape) shapeReset(&shape); if (updateFill || clipper) { - if (shapePrepare(&shape, rshape, transform, bbox, renderBox, mpool, tid, clips.count > 0 ? true : false)) { + if (shapePrepare(&shape, rshape, transform, bbox[0], renderBox, mpool, tid, clips.count > 0 ? true : false)) { if (!shapeGenRle(&shape, rshape, antialiasing(strokeWidth))) goto err; } else { updateFill = false; @@ -135,7 +135,7 @@ struct SwShapeTask : SwTask if (updateShape || flags & RenderUpdateFlag::Stroke) { if (strokeWidth > 0.0f) { shapeResetStroke(&shape, rshape, transform); - if (!shapeGenStrokeRle(&shape, rshape, transform, bbox, renderBox, mpool, tid)) goto err; + if (!shapeGenStrokeRle(&shape, rshape, transform, bbox[0], renderBox, mpool, tid)) goto err; if (auto fill = rshape->strokeFill()) { auto ctable = (flags & RenderUpdateFlag::GradientStroke) ? true : false; if (ctable) shapeResetStrokeFill(&shape); @@ -157,12 +157,11 @@ struct SwShapeTask : SwTask if (!clipShapeRle && !clipStrokeRle) goto err; } - bbox = renderBox; //sync - + bbox[0] = renderBox; //sync return; err: - bbox.reset(); + bbox[0].reset(); shapeReset(&shape); rleReset(shape.strokeRle); shapeDelOutline(&shape, mpool, tid); @@ -188,7 +187,7 @@ struct SwImageTask : SwTask void run(unsigned tid) override { - auto clipBox = bbox; + auto clipBox = bbox[0]; //Convert colorspace if it's not aligned. rasterConvertCS(source, surface->cs); @@ -204,9 +203,11 @@ struct SwImageTask : SwTask if ((flags & (RenderUpdateFlag::Image | RenderUpdateFlag::Transform | RenderUpdateFlag::Color)) && (opacity > 0)) { imageReset(&image); if (!image.data || image.w == 0 || image.h == 0) goto end; - if (!imagePrepare(&image, transform, clipBox, bbox, mpool, tid)) goto end; + + if (!imagePrepare(&image, transform, clipBox, bbox[0], mpool, tid)) goto end; + if (clips.count > 0) { - if (!imageGenRle(&image, bbox, false)) goto end; + if (!imageGenRle(&image, bbox[0], false)) goto end; if (image.rle) { //Clear current task memorypool here if the clippers would use the same memory pool imageDelOutline(&image, mpool, tid); @@ -220,7 +221,7 @@ struct SwImageTask : SwTask } goto end; err: - bbox.reset(); + bbox[0].reset(); rleReset(image.rle); end: imageDelOutline(&image, mpool, tid); @@ -266,7 +267,10 @@ SwRenderer::~SwRenderer() bool SwRenderer::clear() { - if (surface) return rasterClear(surface, 0, 0, surface->w, surface->h); + if (surface) { + fulldraw = true; + return rasterClear(surface, 0, 0, surface->w, surface->h); + } return false; } @@ -334,7 +338,28 @@ bool SwRenderer::postUpdate() bool SwRenderer::preRender() { - return surface != nullptr; + if (!surface) return false; + if (fulldraw || !dirtyRegion.prepare(tasks.count)) return true; + + //TODO: optimize to remove this iteration. + //collect the old and new dirtry regions + ARRAY_FOREACH(p, tasks) { + auto task = *p; + task->done(); + auto& cur = task->bbox[0]; + auto& prv = task->bbox[1]; + //quick generous merge if two regions are close enough. + if (abs(cur.min.y - prv.min.y) < 5 && abs(cur.max.y - prv.max.y) < 5 && abs(cur.min.x - prv.min.x) < 5 && abs(cur.max.x - prv.max.x) < 5) { + dirtyRegion.add(RenderRegion::add(task->bbox[0], task->bbox[1])); + } else { + dirtyRegion.add(task->bbox[0]); + dirtyRegion.add(task->bbox[1]); + } + } + + dirtyRegion.commit(); + + return true; } @@ -359,9 +384,15 @@ bool SwRenderer::postRender() ARRAY_FOREACH(p, tasks) { if ((*p)->disposed) delete(*p); - else (*p)->pushed = false; + else { + (*p)->bbox[1] = (*p)->bbox[0]; + (*p)->pushed = false; + } } tasks.clear(); + dirtyRegion.clear(); + + fulldraw = false; return true; } @@ -375,7 +406,18 @@ bool SwRenderer::renderImage(RenderData data) if (task->opacity == 0) return true; //full scene or partial rendering - return rasterImage(surface, &task->image, task->transform, task->bbox, task->opacity); + if (fulldraw || task->pushed || dirtyRegion.deactivated()) { + rasterImage(surface, &task->image, task->transform, task->bbox[0], task->opacity); + } else { + ARRAY_FOREACH(p, dirtyRegion.get()) { + if (task->bbox[0].min.x >= p->max.x) break; //dirtyRegion is sorted in x order + if (task->bbox[0].intersected(*p)) { + auto bbox = RenderRegion::intersect(task->bbox[0], *p); + rasterImage(surface, &task->image, task->transform, bbox, task->opacity); + } + } + } + return true; } @@ -411,12 +453,27 @@ bool SwRenderer::renderShape(RenderData data) } }; - if (task->rshape->strokeFirst()) { - stroke(task, surface, task->bbox); - fill(task, surface, task->shape.bbox); + //full scene or partial rendering + if (fulldraw || task->pushed || dirtyRegion.deactivated()) { + if (task->rshape->strokeFirst()) { + stroke(task, surface, task->bbox[0]); + fill(task, surface, task->shape.bbox); + } else { + fill(task, surface, task->shape.bbox); + stroke(task, surface, task->bbox[0]); + } } else { - fill(task, surface, task->shape.bbox); - stroke(task, surface, task->bbox); + //TODO: skip the stroke bbox if they are invalid. + ARRAY_FOREACH(p, dirtyRegion.get()) { + if (task->bbox[0].min.x >= p->max.x) break; //dirtyRegion is sorted in x order + if (task->rshape->strokeFirst()) { + if (task->bbox[0].intersected(*p)) stroke(task, surface, RenderRegion::intersect(task->bbox[0], *p)); + if (task->shape.bbox.intersected(*p)) fill(task, surface, RenderRegion::intersect(task->shape.bbox, *p)); + } else { + if (task->shape.bbox.intersected(*p)) fill(task, surface, RenderRegion::intersect(task->shape.bbox, *p)); + if (task->bbox[0].intersected(*p)) stroke(task, surface, RenderRegion::intersect(task->bbox[0], *p)); + } + } } return true; @@ -477,9 +534,9 @@ bool SwRenderer::blend(BlendMethod method) } -void SwRenderer::damage(TVG_UNUSED const RenderRegion& region) +void SwRenderer::damage(const RenderRegion& region) { - //TODO: + dirtyRegion.add(region); } @@ -686,6 +743,9 @@ void SwRenderer::dispose(RenderData data) task->done(); task->dispose(); + //should be updated for the region; the current paint is removed + dirtyRegion.add(task->bbox[0]); + if (task->pushed) task->disposed = true; else delete(task); } @@ -706,7 +766,7 @@ void* SwRenderer::prepareCommon(SwTask* task, const Matrix& transform, const Arr task->surface = surface; task->mpool = mpool; - task->bbox = RenderRegion::intersect(vport, {{0, 0}, {int32_t(surface->w), int32_t(surface->h)}}); + task->bbox[0] = RenderRegion::intersect(vport, {{0, 0}, {int32_t(surface->w), int32_t(surface->h)}}); task->transform = transform; task->clips = clips; task->opacity = opacity; diff --git a/src/renderer/sw_engine/tvgSwRenderer.h b/src/renderer/sw_engine/tvgSwRenderer.h index da9a0bc6..ca7e463a 100644 --- a/src/renderer/sw_engine/tvgSwRenderer.h +++ b/src/renderer/sw_engine/tvgSwRenderer.h @@ -71,12 +71,14 @@ public: static bool term(); private: + RenderDirtyRegion dirtyRegion; SwSurface* surface = nullptr; //active surface Array tasks; //async task list Array compositors; //render targets cache list SwMpool* mpool; //private memory pool RenderRegion vport; //viewport bool sharedMpool; //memory-pool behavior policy + bool fulldraw = true; //buffer is cleared (need to redraw full screen) SwRenderer(); ~SwRenderer(); diff --git a/src/renderer/tvgRender.cpp b/src/renderer/tvgRender.cpp index 78d7b41a..64dbcd27 100644 --- a/src/renderer/tvgRender.cpp +++ b/src/renderer/tvgRender.cpp @@ -20,6 +20,7 @@ * SOFTWARE. */ +#include #include "tvgMath.h" #include "tvgRender.h" @@ -116,6 +117,134 @@ void RenderRegion::intersect(const RenderRegion& rhs) if (max.y < min.y) max.y = min.y; } + +void RenderDirtyRegion::subdivide(Array& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs) +{ + RenderRegion temp[5]; + int cnt = 0; + temp[cnt++] = RenderRegion::intersect(lhs, rhs); + auto max = std::min(lhs.max.x, rhs.max.x); + + auto subtract = [&](RenderRegion& lhs, RenderRegion& rhs) { + //top + if (rhs.min.y < lhs.min.y) { + temp[cnt++] = {{rhs.min.x, rhs.min.y}, {rhs.max.x, lhs.min.y}}; + rhs.min.y = lhs.min.y; + } + //bottom + if (rhs.max.y > lhs.max.y) { + temp[cnt++] = {{rhs.min.x, lhs.max.y}, {rhs.max.x, rhs.max.y}}; + rhs.max.y = lhs.max.y; + } + //left + if (rhs.min.x < lhs.min.x) { + temp[cnt++] = {{rhs.min.x, rhs.min.y}, {lhs.min.x, rhs.max.y}}; + rhs.min.x = lhs.min.x; + } + //right + if (rhs.max.x > lhs.max.x) { + temp[cnt++] = {{lhs.max.x, rhs.min.y}, {rhs.max.x, rhs.max.y}}; + //rhs.max.x = lhs.max.x; + } + }; + + subtract(temp[0], lhs); + subtract(temp[0], rhs); + + //TODO: remove this + if (targets.reserved < targets.count + cnt - 1) { + TVGERR("RENDERER", "reserved: %d, required: %d (+%d)\n", targets.reserved, targets.count + cnt - 1, cnt - 1); + abort(); + } + + /* Note: We considered using a list to avoid memory shifting, + but ultimately, the array outperformed the list due to better cache locality. */ + + //shift data + auto dst = &targets[idx + cnt]; + memmove(dst, &targets[idx + 1], sizeof(RenderRegion) * (targets.count - idx - 1)); + memcpy(&targets[idx], temp, sizeof(RenderRegion) * cnt); + targets.count += (cnt - 1); + + //sorting by x coord again, only for the updated region + while (dst < targets.end() && dst->min.x < max) ++dst; + stable_sort(&targets[idx], dst, [](const RenderRegion& a, const RenderRegion& b) -> bool { + return a.min.x < b.min.x; + }); +} + +void RenderDirtyRegion::commit() +{ + if (skip || disabled) return; + + auto& targets = list[current]; + if (targets.empty()) return; + + if (targets.count > THRESHOLD) { + skip = true; + return; + } + + current = !current; //swapping buffers + + auto& output = list[current]; + + //sorting by x coord. guarantee the stable performance: O(NlogN) + stable_sort(targets.begin(), targets.end(), [](const RenderRegion& a, const RenderRegion& b) -> bool { + return a.min.x < b.min.x; + }); + + //Optimized using sweep-line algorithm: O(NlogN) + for (uint32_t i = 0; i < targets.count; ++i) { + auto& lhs = targets[i]; + if (lhs.invalid()) continue; + auto merged = false; + + for (uint32_t j = i + 1; j < targets.count; ++j) { + auto& rhs = targets[j]; + if (rhs.invalid()) continue; + if (lhs.max.x < rhs.min.x) break; //line sweeping + + //fully overlapped. drop lhs + if (rhs.contained(lhs)) { + merged = true; + break; + } + //fully overlapped. replace the lhs with rhs + if (lhs.contained(rhs)) { + rhs = {}; + continue; + } + //just merge & expand on x axis + if (lhs.min.y == rhs.min.y && lhs.max.y == rhs.max.y) { + if (lhs.min.x <= rhs.max.x && rhs.min.x <= lhs.max.x) { + rhs.min.x = std::min(lhs.min.x, rhs.min.x); + rhs.max.x = std::max(lhs.max.x, rhs.max.x); + merged = true; + break; + } + } + //just merge & expand on y axis + if (lhs.min.x == rhs.min.x && lhs.max.x == rhs.max.x) { + if (lhs.min.y <= rhs.max.y && rhs.min.y < lhs.max.y) { + rhs.min.y = std::min(lhs.min.y, rhs.min.y); + rhs.max.y = std::max(lhs.max.y, rhs.max.y); + merged = true; + break; + } + } + //subdivide regions + if (lhs.intersected(rhs)) { + subdivide(targets, j, lhs, rhs); + merged = true; + break; + } + } + if (!merged) output.push(lhs); //this region is complete isolated + lhs = {}; + } +} + /************************************************************************/ /* RenderTrimPath Class Implementation */ /************************************************************************/ diff --git a/src/renderer/tvgRender.h b/src/renderer/tvgRender.h index 9f18aa62..e13f4bdb 100644 --- a/src/renderer/tvgRender.h +++ b/src/renderer/tvgRender.h @@ -106,6 +106,11 @@ struct RenderRegion return {{std::max(lhs.min.x, rhs.min.x), std::max(lhs.min.y, rhs.min.y)}, {std::min(lhs.max.x, rhs.max.x), std::min(lhs.max.y, rhs.max.y)}}; } + static constexpr RenderRegion add(const RenderRegion& lhs, const RenderRegion& rhs) + { + return {{std::min(lhs.min.x, rhs.min.x), std::min(lhs.min.y, rhs.min.y)}, {std::max(lhs.max.x, rhs.max.x), std::max(lhs.max.y, rhs.max.y)}}; + } + void intersect(const RenderRegion& rhs); void add(const RenderRegion& rhs) @@ -116,6 +121,16 @@ struct RenderRegion if (rhs.max.y > max.y) max.y = rhs.max.y; } + bool contained(const RenderRegion& rhs) + { + return (min.x <= rhs.min.x && max.x >= rhs.max.x && min.y <= rhs.min.y && max.y >= rhs.max.y); + } + + bool intersected(const RenderRegion& rhs) const + { + return (rhs.min.x < max.x && rhs.max.x > min.x && rhs.min.y < max.y && rhs.max.y > min.y); + } + bool operator==(const RenderRegion& rhs) const { return (min.x == rhs.min.x && min.y == rhs.min.y && max.x == rhs.max.x && max.y == rhs.max.y); @@ -477,7 +492,7 @@ struct RenderEffectTritone : RenderEffect class RenderMethod { private: - uint32_t refCnt = 0; //reference count + uint32_t refCnt = 0; Key key; public: