From 07331eb76c155c8a0eda24e2e5eb94344013665e Mon Sep 17 00:00:00 2001 From: Hermet Park Date: Tue, 17 Jun 2025 15:50:31 +0900 Subject: [PATCH] renderer: add partial rendering support Partial Rendering refers to a rendering technique where only a portion of the scene or screen is updated, rather than redrawing the entire output. It is commonly used as a performance optimization strategy, focusing on redrawing only the regions that have changed, often called dirty regions. This introduces RenderDirtyRegion, which assists in collecting a compact dirty region from render tasks. To efficient data-processing, this divide the screen region with a designated size of partition and handles the partitl rendering computation with a divide-conquer metholodgy. Each backend can utilize this class to support efficient partial rendering. This is implemented using a Line Sweep and Subdivision Merging O(NlogN). The basic per-frame workflow is as follows: 0. RenderDirtyRegion::init() //set the screen size to properly partition the regions 1. RenderDirtyRegion::prepare() //Call this in Renderer::preRender(). 2. RenderDirtyRegion::add() //Add all dirty paints for the frame before rendering. 3. RenderDirtyRegion::commit() //Generate the partial rendering region list before rendering. 4. RenderDirtyRegion::partition() //Get a certian partition 5. RenderDirtyRegion::get() //Retrieve the current dirty region list of a partition and use it when drawing paints. 6. RenderDirtyRegion::clear() //Reset the state. RenderMethod introduced for 2 utilities for paritial renderings 1. RenderMethod::damage() //add a force dirty region, especially useful for scene effects 2. RenderMethod::partial() //toggle the partial rendering feature issue: https://github.com/thorvg/thorvg/issues/1747 --- src/renderer/gl_engine/tvgGlRenderer.cpp | 13 ++ src/renderer/gl_engine/tvgGlRenderer.h | 8 +- src/renderer/sw_engine/tvgSwRenderer.cpp | 13 ++ src/renderer/sw_engine/tvgSwRenderer.h | 9 +- src/renderer/tvgPaint.h | 5 + src/renderer/tvgRender.cpp | 180 +++++++++++++++++++++++ src/renderer/tvgRender.h | 73 ++++++++- src/renderer/tvgScene.h | 9 +- src/renderer/wg_engine/tvgWgRenderer.cpp | 13 ++ src/renderer/wg_engine/tvgWgRenderer.h | 9 +- 10 files changed, 320 insertions(+), 12 deletions(-) diff --git a/src/renderer/gl_engine/tvgGlRenderer.cpp b/src/renderer/gl_engine/tvgGlRenderer.cpp index b26ef8ca..56d050a2 100644 --- a/src/renderer/gl_engine/tvgGlRenderer.cpp +++ b/src/renderer/gl_engine/tvgGlRenderer.cpp @@ -1479,6 +1479,19 @@ bool GlRenderer::postUpdate() } +void GlRenderer::damage(const RenderRegion& region) +{ + //TODO +} + + +bool GlRenderer::partial(bool disable) +{ + //TODO + return false; +} + + bool GlRenderer::term() { if (rendererCnt > 0) return false; diff --git a/src/renderer/gl_engine/tvgGlRenderer.h b/src/renderer/gl_engine/tvgGlRenderer.h index 347dcad2..794d130f 100644 --- a/src/renderer/gl_engine/tvgGlRenderer.h +++ b/src/renderer/gl_engine/tvgGlRenderer.h @@ -70,6 +70,7 @@ public: RT_None, }; + //main features bool preUpdate() override; RenderData prepare(const RenderShape& rshape, RenderData data, const Matrix& transform, Array& clips, uint8_t opacity, RenderUpdateFlag flags, bool clipper) override; RenderData prepare(RenderSurface* surface, RenderData data, const Matrix& transform, Array& clips, uint8_t opacity, RenderUpdateFlag flags) override; @@ -83,20 +84,25 @@ public: bool blend(BlendMethod method) override; ColorSpace colorSpace() override; const RenderSurface* mainSurface() override; - bool target(void* context, int32_t id, uint32_t w, uint32_t h); bool sync() override; bool clear() override; + //composition RenderCompositor* target(const RenderRegion& region, ColorSpace cs, CompositionFlag flags) override; bool beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity) override; bool endComposite(RenderCompositor* cmp) override; + //post effects void prepare(RenderEffect* effect, const Matrix& transform) override; bool region(RenderEffect* effect) override; bool render(RenderCompositor* cmp, const RenderEffect* effect, bool direct) override; void dispose(RenderEffect* effect) override; + //partial rendering + void damage(const RenderRegion& region) override; + bool partial(bool disable) override; + static GlRenderer* gen(uint32_t threads); static bool term(); diff --git a/src/renderer/sw_engine/tvgSwRenderer.cpp b/src/renderer/sw_engine/tvgSwRenderer.cpp index e44b7e27..d093cff7 100644 --- a/src/renderer/sw_engine/tvgSwRenderer.cpp +++ b/src/renderer/sw_engine/tvgSwRenderer.cpp @@ -347,6 +347,19 @@ bool SwRenderer::postRender() } +void SwRenderer::damage(const RenderRegion& region) +{ + //TODO +} + + +bool SwRenderer::partial(bool disable) +{ + //TODO + return false; +} + + bool SwRenderer::renderImage(RenderData data) { auto task = static_cast(data); diff --git a/src/renderer/sw_engine/tvgSwRenderer.h b/src/renderer/sw_engine/tvgSwRenderer.h index 6e21d19c..72e857e4 100644 --- a/src/renderer/sw_engine/tvgSwRenderer.h +++ b/src/renderer/sw_engine/tvgSwRenderer.h @@ -36,6 +36,7 @@ namespace tvg class SwRenderer : public RenderMethod { public: + //main features bool preUpdate() override; RenderData prepare(const RenderShape& rshape, RenderData data, const Matrix& transform, Array& clips, uint8_t opacity, RenderUpdateFlag flags, bool clipper) override; RenderData prepare(RenderSurface* surface, RenderData data, const Matrix& transform, Array& clips, uint8_t opacity, RenderUpdateFlag flags) override; @@ -49,23 +50,27 @@ public: bool blend(BlendMethod method) override; ColorSpace colorSpace() override; const RenderSurface* mainSurface() override; - bool clear() override; bool sync() override; bool target(pixel_t* data, uint32_t stride, uint32_t w, uint32_t h, ColorSpace cs); + //composition SwSurface* request(int channelSize, bool square); - RenderCompositor* target(const RenderRegion& region, ColorSpace cs, CompositionFlag flags) override; bool beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity) override; bool endComposite(RenderCompositor* cmp) override; void clearCompositors(); + //post effects void prepare(RenderEffect* effect, const Matrix& transform) override; bool region(RenderEffect* effect) override; bool render(RenderCompositor* cmp, const RenderEffect* effect, bool direct) override; void dispose(RenderEffect* effect) override; + //partial rendering + void damage(const RenderRegion& region) override; + bool partial(bool disable) override; + static SwRenderer* gen(uint32_t threads); static bool term(); diff --git a/src/renderer/tvgPaint.h b/src/renderer/tvgPaint.h index 05ddbe53..327eae4d 100644 --- a/src/renderer/tvgPaint.h +++ b/src/renderer/tvgPaint.h @@ -129,6 +129,11 @@ namespace tvg return refCnt; } + void damage(const RenderRegion& vport) + { + if (renderer) renderer->damage(vport); + } + void mark(CompositionFlag flag) { cmpFlag = CompositionFlag(uint8_t(cmpFlag) | uint8_t(flag)); diff --git a/src/renderer/tvgRender.cpp b/src/renderer/tvgRender.cpp index d943b821..cb2b6d48 100644 --- a/src/renderer/tvgRender.cpp +++ b/src/renderer/tvgRender.cpp @@ -20,6 +20,7 @@ * SOFTWARE. */ +#include #include "tvgMath.h" #include "tvgRender.h" @@ -130,6 +131,185 @@ void RenderRegion::intersect(const RenderRegion& rhs) if (max.y < min.y) max.y = min.y; } + +void RenderDirtyRegion::init(uint32_t w, uint32_t h) +{ + auto cnt = int(sqrt(PARTITIONING)); + auto px = int32_t(w / cnt); + auto py = int32_t(h / cnt); + auto lx = int32_t(w % cnt); + auto ly = int32_t(h % cnt); + + //space partitioning + for (int y = 0; y < cnt; ++y) { + for (int x = 0; x < cnt; ++x) { + auto& partition = partitions[y * cnt + x]; + partition.list[0].reserve(64); + auto& region = partition.region; + region.min = {x * px, y * py}; + region.max = {region.min.x + px, region.min.y + py}; + //leftovers + if (x == cnt -1) region.max.x += lx; + if (y == cnt -1) region.max.y += ly; + } + } +} + + +void RenderDirtyRegion::add(const RenderRegion* prv, const RenderRegion* cur) +{ + if (disabled) return; + + auto pvalid = prv ? prv->valid() : false; + auto cvalid = cur ? cur->valid() : false; + if (!pvalid && !cvalid) return; + for (int idx = 0; idx < PARTITIONING; ++idx) { + auto& partition = partitions[idx]; + if (pvalid && prv->intersected(partition.region)) { + ScopedLock lock(key); + partition.list[partition.current].push(RenderRegion::intersect(*prv, partition.region)); + } + if (cvalid && cur->intersected(partition.region)) { + ScopedLock lock(key); + partition.list[partition.current].push(RenderRegion::intersect(*cur, partition.region)); + } + } +} + + +void RenderDirtyRegion::clear() +{ + for (int idx = 0; idx < PARTITIONING; ++idx) { + partitions[idx].list[0].clear(); + partitions[idx].list[1].clear(); + } +} + + +void RenderDirtyRegion::subdivide(Array& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs) +{ + RenderRegion temp[5]; + int cnt = 0; + temp[cnt++] = RenderRegion::intersect(lhs, rhs); + auto max = std::min(lhs.max.x, rhs.max.x); + + auto subtract = [&](RenderRegion& lhs, RenderRegion& rhs) { + //top + if (rhs.min.y < lhs.min.y) { + temp[cnt++] = {{rhs.min.x, rhs.min.y}, {rhs.max.x, lhs.min.y}}; + rhs.min.y = lhs.min.y; + } + //bottom + if (rhs.max.y > lhs.max.y) { + temp[cnt++] = {{rhs.min.x, lhs.max.y}, {rhs.max.x, rhs.max.y}}; + rhs.max.y = lhs.max.y; + } + //left + if (rhs.min.x < lhs.min.x) { + temp[cnt++] = {{rhs.min.x, rhs.min.y}, {lhs.min.x, rhs.max.y}}; + rhs.min.x = lhs.min.x; + } + //right + if (rhs.max.x > lhs.max.x) { + temp[cnt++] = {{lhs.max.x, rhs.min.y}, {rhs.max.x, rhs.max.y}}; + //rhs.max.x = lhs.max.x; + } + }; + + subtract(temp[0], lhs); + subtract(temp[0], rhs); + + /* Considered using a list to avoid memory shifting, + but ultimately, the array outperformed the list due to better cache locality. */ + + //shift data + auto dst = &targets[idx + cnt]; + memmove(dst, &targets[idx + 1], sizeof(RenderRegion) * (targets.count - idx - 1)); + memcpy(&targets[idx], temp, sizeof(RenderRegion) * cnt); + targets.count += (cnt - 1); + + //sorting by x coord again, only for the updated region + while (dst < targets.end() && dst->min.x < max) ++dst; + stable_sort(&targets[idx], dst, [](const RenderRegion& a, const RenderRegion& b) -> bool { + return a.min.x < b.min.x; + }); +} + + +void RenderDirtyRegion::commit() +{ + if (disabled) return; + + for (int idx = 0; idx < PARTITIONING; ++idx) { + auto current = partitions[idx].current; + auto& targets = partitions[idx].list[current]; + if (targets.empty()) return; + + current = !current; //swapping buffers + auto& output = partitions[idx].list[current]; + + targets.reserve(targets.count * 5); //one intersection can be divided up to 5 + output.reserve(targets.count); + + partitions[idx].current = current; + + //sorting by x coord. guarantee the stable performance: O(NlogN) + stable_sort(targets.begin(), targets.end(), [](const RenderRegion& a, const RenderRegion& b) -> bool { + return a.min.x < b.min.x; + }); + + //Optimized using sweep-line algorithm: O(NlogN) + for (uint32_t i = 0; i < targets.count; ++i) { + auto& lhs = targets[i]; + if (lhs.invalid()) continue; + auto merged = false; + + for (uint32_t j = i + 1; j < targets.count; ++j) { + auto& rhs = targets[j]; + if (rhs.invalid()) continue; + if (lhs.max.x < rhs.min.x) break; //line sweeping + + //fully overlapped. drop lhs + if (rhs.contained(lhs)) { + merged = true; + break; + } + //fully overlapped. replace the lhs with rhs + if (lhs.contained(rhs)) { + rhs = {}; + continue; + } + //just merge & expand on x axis + if (lhs.min.y == rhs.min.y && lhs.max.y == rhs.max.y) { + if (lhs.min.x <= rhs.max.x && rhs.min.x <= lhs.max.x) { + rhs.min.x = std::min(lhs.min.x, rhs.min.x); + rhs.max.x = std::max(lhs.max.x, rhs.max.x); + merged = true; + break; + } + } + //just merge & expand on y axis + if (lhs.min.x == rhs.min.x && lhs.max.x == rhs.max.x) { + if (lhs.min.y <= rhs.max.y && rhs.min.y < lhs.max.y) { + rhs.min.y = std::min(lhs.min.y, rhs.min.y); + rhs.max.y = std::max(lhs.max.y, rhs.max.y); + merged = true; + break; + } + } + //subdivide regions + if (lhs.intersected(rhs)) { + subdivide(targets, j, lhs, rhs); + merged = true; + break; + } + } + if (!merged) output.push(lhs); //this region is complete isolated + lhs = {}; + } + } +} + /************************************************************************/ /* RenderTrimPath Class Implementation */ /************************************************************************/ diff --git a/src/renderer/tvgRender.h b/src/renderer/tvgRender.h index 6d802661..938355ea 100644 --- a/src/renderer/tvgRender.h +++ b/src/renderer/tvgRender.h @@ -50,7 +50,6 @@ static inline RenderUpdateFlag operator|(const RenderUpdateFlag a, const RenderU return RenderUpdateFlag(uint16_t(a) | uint16_t(b)); } - struct RenderSurface { union { @@ -111,6 +110,11 @@ struct RenderRegion return ret; } + static constexpr RenderRegion add(const RenderRegion& lhs, const RenderRegion& rhs) + { + return {{std::min(lhs.min.x, rhs.min.x), std::min(lhs.min.y, rhs.min.y)}, {std::max(lhs.max.x, rhs.max.x), std::max(lhs.max.y, rhs.max.y)}}; + } + void intersect(const RenderRegion& rhs); void add(const RenderRegion& rhs) @@ -121,6 +125,16 @@ struct RenderRegion if (rhs.max.y > max.y) max.y = rhs.max.y; } + bool contained(const RenderRegion& rhs) + { + return (min.x <= rhs.min.x && max.x >= rhs.max.x && min.y <= rhs.min.y && max.y >= rhs.max.y); + } + + bool intersected(const RenderRegion& rhs) const + { + return (rhs.min.x < max.x && rhs.max.x > min.x && rhs.min.y < max.y && rhs.max.y > min.y); + } + bool operator==(const RenderRegion& rhs) const { return (min.x == rhs.min.x && min.y == rhs.min.y && max.x == rhs.max.x && max.y == rhs.max.y); @@ -141,6 +155,52 @@ struct RenderRegion uint32_t h() const { return (uint32_t) sh(); } }; +struct RenderDirtyRegion +{ +public: + static constexpr const int PARTITIONING = 16; //must be N*N + + void init(uint32_t w, uint32_t h); + void commit(); + void add(const RenderRegion* prv, const RenderRegion* cur); //collect the old and new dirty regions together + void clear(); + + bool deactivate(bool on) + { + std::swap(on, disabled); + return on; + } + + bool deactivated() + { + return disabled; + } + + const RenderRegion& partition(int idx) + { + return partitions[idx].region; + } + + const Array& get(int idx) + { + return partitions[idx].list[partitions[idx].current]; + } + +private: + void subdivide(Array& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs); + + struct Partition + { + RenderRegion region; + Array list[2]; //double buffer swapping + uint8_t current = 0; //double buffer swapping list index. 0 or 1 + }; + + Key key; + Partition partitions[PARTITIONING]; + bool disabled = false; +}; + struct RenderPath { Array cmds; @@ -420,7 +480,7 @@ struct RenderEffectTritone : RenderEffect class RenderMethod { private: - uint32_t refCnt = 0; //reference count + uint32_t refCnt = 0; Key key; protected: @@ -448,11 +508,10 @@ public: virtual bool blend(BlendMethod method) = 0; virtual ColorSpace colorSpace() = 0; virtual const RenderSurface* mainSurface() = 0; - virtual bool clear() = 0; virtual bool sync() = 0; - //compositions + //composition virtual RenderCompositor* target(const RenderRegion& region, ColorSpace cs, CompositionFlag flags) = 0; virtual bool beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity) = 0; virtual bool endComposite(RenderCompositor* cmp) = 0; @@ -462,6 +521,10 @@ public: virtual bool region(RenderEffect* effect) = 0; virtual bool render(RenderCompositor* cmp, const RenderEffect* effect, bool direct) = 0; virtual void dispose(RenderEffect* effect) = 0; + + //partial rendering + virtual void damage(const RenderRegion& region) = 0; + virtual bool partial(bool disable) = 0; }; static inline bool MASK_REGION_MERGING(MaskMethod method) @@ -532,4 +595,4 @@ static inline uint8_t MULTIPLY(uint8_t c, uint8_t a) } -#endif //_TVG_RENDER_H_ +#endif //_TVG_RENDER_H_ \ No newline at end of file diff --git a/src/renderer/tvgScene.h b/src/renderer/tvgScene.h index c761645f..ff30e040 100644 --- a/src/renderer/tvgScene.h +++ b/src/renderer/tvgScene.h @@ -127,10 +127,13 @@ struct SceneImpl : Scene } } - //this viewport update is more performant than in bounds()? + //this viewport update is more performant than in bounds(). No idea. vport = renderer->viewport(); vdirty = true; + //bounds(renderer) here hinders parallelization. + if (effects) impl.damage(vport); + return true; } @@ -257,7 +260,8 @@ struct SceneImpl : Scene { auto itr = paints.begin(); while (itr != paints.end()) { - PAINT((*itr))->unref(); + auto paint = PAINT((*itr)); + paint->unref(); paints.erase(itr++); } return Result::Success; @@ -310,6 +314,7 @@ struct SceneImpl : Scene } delete(effects); effects = nullptr; + impl.damage(vport); } return Result::Success; } diff --git a/src/renderer/wg_engine/tvgWgRenderer.cpp b/src/renderer/wg_engine/tvgWgRenderer.cpp index 9b250782..a96a7f92 100644 --- a/src/renderer/wg_engine/tvgWgRenderer.cpp +++ b/src/renderer/wg_engine/tvgWgRenderer.cpp @@ -581,6 +581,19 @@ bool WgRenderer::postUpdate() } +void WgRenderer::damage(const RenderRegion& region) +{ + //TODO +} + + +bool WgRenderer::partial(bool disable) +{ + //TODO + return false; +} + + bool WgRenderer::term() { if (rendererCnt > 0) return false; diff --git a/src/renderer/wg_engine/tvgWgRenderer.h b/src/renderer/wg_engine/tvgWgRenderer.h index dc3c5cf6..609d22b0 100644 --- a/src/renderer/wg_engine/tvgWgRenderer.h +++ b/src/renderer/wg_engine/tvgWgRenderer.h @@ -28,6 +28,7 @@ class WgRenderer : public RenderMethod { public: + //main features bool preUpdate() override; RenderData prepare(const RenderShape& rshape, RenderData data, const Matrix& transform, Array& clips, uint8_t opacity, RenderUpdateFlag flags, bool clipper) override; RenderData prepare(RenderSurface* surface, RenderData data, const Matrix& transform, Array& clips, uint8_t opacity, RenderUpdateFlag flags) override; @@ -41,21 +42,25 @@ public: bool blend(BlendMethod method) override; ColorSpace colorSpace() override; const RenderSurface* mainSurface() override; - bool clear() override; bool sync() override; - bool target(WGPUDevice device, WGPUInstance instance, void* target, uint32_t width, uint32_t height, int type = 0); + //composition RenderCompositor* target(const RenderRegion& region, ColorSpace cs, CompositionFlag flags) override; bool beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity) override; bool endComposite(RenderCompositor* cmp) override; + //post effects void prepare(RenderEffect* effect, const Matrix& transform) override; bool region(RenderEffect* effect) override; bool render(RenderCompositor* cmp, const RenderEffect* effect, bool direct) override; void dispose(RenderEffect* effect) override; + //partial rendering + void damage(const RenderRegion& region) override; + bool partial(bool disable) override; + static WgRenderer* gen(uint32_t threads); static bool term();