renderer: add partial rendering support

Partial Rendering refers to a rendering technique where
only a portion of the scene or screen is updated, rather
than redrawing the entire output. It is commonly used as
a performance optimization strategy, focusing on redrawing
only the regions that have changed, often called dirty regions.

This introduces RenderDirtyRegion, which assists
in collecting a compact dirty region from render tasks.

To efficient data-processing, this divide the screen region
with a designated size of partition and handles the partitl rendering
computation with a divide-conquer metholodgy.

Each backend can utilize this class to support efficient partial rendering.
This is implemented using a Line Sweep and Subdivision Merging O(NlogN).

The basic per-frame workflow is as follows:

0. RenderDirtyRegion::init() //set the screen size to properly partition the regions
1. RenderDirtyRegion::prepare() //Call this in Renderer::preRender().
2. RenderDirtyRegion::add() //Add all dirty paints for the frame before rendering.
3. RenderDirtyRegion::commit() //Generate the partial rendering region list before rendering.
4. RenderDirtyRegion::partition() //Get a certian partition
5. RenderDirtyRegion::get() //Retrieve the current dirty region list of a partition and use it when drawing paints.
6. RenderDirtyRegion::clear() //Reset the state.

RenderMethod introduced for 2 utilities for paritial renderings

1. RenderMethod::damage() //add a force dirty region, especially useful for scene effects
2. RenderMethod::partial() //toggle the partial rendering feature

issue: https://github.com/thorvg/thorvg/issues/1747
This commit is contained in:
Hermet Park 2025-06-17 15:50:31 +09:00 committed by Hermet Park
parent 8e8bfff6ab
commit 07331eb76c
10 changed files with 320 additions and 12 deletions

View file

@ -1479,6 +1479,19 @@ bool GlRenderer::postUpdate()
}
void GlRenderer::damage(const RenderRegion& region)
{
//TODO
}
bool GlRenderer::partial(bool disable)
{
//TODO
return false;
}
bool GlRenderer::term()
{
if (rendererCnt > 0) return false;

View file

@ -70,6 +70,7 @@ public:
RT_None,
};
//main features
bool preUpdate() override;
RenderData prepare(const RenderShape& rshape, RenderData data, const Matrix& transform, Array<RenderData>& clips, uint8_t opacity, RenderUpdateFlag flags, bool clipper) override;
RenderData prepare(RenderSurface* surface, RenderData data, const Matrix& transform, Array<RenderData>& clips, uint8_t opacity, RenderUpdateFlag flags) override;
@ -83,20 +84,25 @@ public:
bool blend(BlendMethod method) override;
ColorSpace colorSpace() override;
const RenderSurface* mainSurface() override;
bool target(void* context, int32_t id, uint32_t w, uint32_t h);
bool sync() override;
bool clear() override;
//composition
RenderCompositor* target(const RenderRegion& region, ColorSpace cs, CompositionFlag flags) override;
bool beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity) override;
bool endComposite(RenderCompositor* cmp) override;
//post effects
void prepare(RenderEffect* effect, const Matrix& transform) override;
bool region(RenderEffect* effect) override;
bool render(RenderCompositor* cmp, const RenderEffect* effect, bool direct) override;
void dispose(RenderEffect* effect) override;
//partial rendering
void damage(const RenderRegion& region) override;
bool partial(bool disable) override;
static GlRenderer* gen(uint32_t threads);
static bool term();

View file

@ -347,6 +347,19 @@ bool SwRenderer::postRender()
}
void SwRenderer::damage(const RenderRegion& region)
{
//TODO
}
bool SwRenderer::partial(bool disable)
{
//TODO
return false;
}
bool SwRenderer::renderImage(RenderData data)
{
auto task = static_cast<SwImageTask*>(data);

View file

@ -36,6 +36,7 @@ namespace tvg
class SwRenderer : public RenderMethod
{
public:
//main features
bool preUpdate() override;
RenderData prepare(const RenderShape& rshape, RenderData data, const Matrix& transform, Array<RenderData>& clips, uint8_t opacity, RenderUpdateFlag flags, bool clipper) override;
RenderData prepare(RenderSurface* surface, RenderData data, const Matrix& transform, Array<RenderData>& clips, uint8_t opacity, RenderUpdateFlag flags) override;
@ -49,23 +50,27 @@ public:
bool blend(BlendMethod method) override;
ColorSpace colorSpace() override;
const RenderSurface* mainSurface() override;
bool clear() override;
bool sync() override;
bool target(pixel_t* data, uint32_t stride, uint32_t w, uint32_t h, ColorSpace cs);
//composition
SwSurface* request(int channelSize, bool square);
RenderCompositor* target(const RenderRegion& region, ColorSpace cs, CompositionFlag flags) override;
bool beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity) override;
bool endComposite(RenderCompositor* cmp) override;
void clearCompositors();
//post effects
void prepare(RenderEffect* effect, const Matrix& transform) override;
bool region(RenderEffect* effect) override;
bool render(RenderCompositor* cmp, const RenderEffect* effect, bool direct) override;
void dispose(RenderEffect* effect) override;
//partial rendering
void damage(const RenderRegion& region) override;
bool partial(bool disable) override;
static SwRenderer* gen(uint32_t threads);
static bool term();

View file

@ -129,6 +129,11 @@ namespace tvg
return refCnt;
}
void damage(const RenderRegion& vport)
{
if (renderer) renderer->damage(vport);
}
void mark(CompositionFlag flag)
{
cmpFlag = CompositionFlag(uint8_t(cmpFlag) | uint8_t(flag));

View file

@ -20,6 +20,7 @@
* SOFTWARE.
*/
#include <algorithm>
#include "tvgMath.h"
#include "tvgRender.h"
@ -130,6 +131,185 @@ void RenderRegion::intersect(const RenderRegion& rhs)
if (max.y < min.y) max.y = min.y;
}
void RenderDirtyRegion::init(uint32_t w, uint32_t h)
{
auto cnt = int(sqrt(PARTITIONING));
auto px = int32_t(w / cnt);
auto py = int32_t(h / cnt);
auto lx = int32_t(w % cnt);
auto ly = int32_t(h % cnt);
//space partitioning
for (int y = 0; y < cnt; ++y) {
for (int x = 0; x < cnt; ++x) {
auto& partition = partitions[y * cnt + x];
partition.list[0].reserve(64);
auto& region = partition.region;
region.min = {x * px, y * py};
region.max = {region.min.x + px, region.min.y + py};
//leftovers
if (x == cnt -1) region.max.x += lx;
if (y == cnt -1) region.max.y += ly;
}
}
}
void RenderDirtyRegion::add(const RenderRegion* prv, const RenderRegion* cur)
{
if (disabled) return;
auto pvalid = prv ? prv->valid() : false;
auto cvalid = cur ? cur->valid() : false;
if (!pvalid && !cvalid) return;
for (int idx = 0; idx < PARTITIONING; ++idx) {
auto& partition = partitions[idx];
if (pvalid && prv->intersected(partition.region)) {
ScopedLock lock(key);
partition.list[partition.current].push(RenderRegion::intersect(*prv, partition.region));
}
if (cvalid && cur->intersected(partition.region)) {
ScopedLock lock(key);
partition.list[partition.current].push(RenderRegion::intersect(*cur, partition.region));
}
}
}
void RenderDirtyRegion::clear()
{
for (int idx = 0; idx < PARTITIONING; ++idx) {
partitions[idx].list[0].clear();
partitions[idx].list[1].clear();
}
}
void RenderDirtyRegion::subdivide(Array<RenderRegion>& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs)
{
RenderRegion temp[5];
int cnt = 0;
temp[cnt++] = RenderRegion::intersect(lhs, rhs);
auto max = std::min(lhs.max.x, rhs.max.x);
auto subtract = [&](RenderRegion& lhs, RenderRegion& rhs) {
//top
if (rhs.min.y < lhs.min.y) {
temp[cnt++] = {{rhs.min.x, rhs.min.y}, {rhs.max.x, lhs.min.y}};
rhs.min.y = lhs.min.y;
}
//bottom
if (rhs.max.y > lhs.max.y) {
temp[cnt++] = {{rhs.min.x, lhs.max.y}, {rhs.max.x, rhs.max.y}};
rhs.max.y = lhs.max.y;
}
//left
if (rhs.min.x < lhs.min.x) {
temp[cnt++] = {{rhs.min.x, rhs.min.y}, {lhs.min.x, rhs.max.y}};
rhs.min.x = lhs.min.x;
}
//right
if (rhs.max.x > lhs.max.x) {
temp[cnt++] = {{lhs.max.x, rhs.min.y}, {rhs.max.x, rhs.max.y}};
//rhs.max.x = lhs.max.x;
}
};
subtract(temp[0], lhs);
subtract(temp[0], rhs);
/* Considered using a list to avoid memory shifting,
but ultimately, the array outperformed the list due to better cache locality. */
//shift data
auto dst = &targets[idx + cnt];
memmove(dst, &targets[idx + 1], sizeof(RenderRegion) * (targets.count - idx - 1));
memcpy(&targets[idx], temp, sizeof(RenderRegion) * cnt);
targets.count += (cnt - 1);
//sorting by x coord again, only for the updated region
while (dst < targets.end() && dst->min.x < max) ++dst;
stable_sort(&targets[idx], dst, [](const RenderRegion& a, const RenderRegion& b) -> bool {
return a.min.x < b.min.x;
});
}
void RenderDirtyRegion::commit()
{
if (disabled) return;
for (int idx = 0; idx < PARTITIONING; ++idx) {
auto current = partitions[idx].current;
auto& targets = partitions[idx].list[current];
if (targets.empty()) return;
current = !current; //swapping buffers
auto& output = partitions[idx].list[current];
targets.reserve(targets.count * 5); //one intersection can be divided up to 5
output.reserve(targets.count);
partitions[idx].current = current;
//sorting by x coord. guarantee the stable performance: O(NlogN)
stable_sort(targets.begin(), targets.end(), [](const RenderRegion& a, const RenderRegion& b) -> bool {
return a.min.x < b.min.x;
});
//Optimized using sweep-line algorithm: O(NlogN)
for (uint32_t i = 0; i < targets.count; ++i) {
auto& lhs = targets[i];
if (lhs.invalid()) continue;
auto merged = false;
for (uint32_t j = i + 1; j < targets.count; ++j) {
auto& rhs = targets[j];
if (rhs.invalid()) continue;
if (lhs.max.x < rhs.min.x) break; //line sweeping
//fully overlapped. drop lhs
if (rhs.contained(lhs)) {
merged = true;
break;
}
//fully overlapped. replace the lhs with rhs
if (lhs.contained(rhs)) {
rhs = {};
continue;
}
//just merge & expand on x axis
if (lhs.min.y == rhs.min.y && lhs.max.y == rhs.max.y) {
if (lhs.min.x <= rhs.max.x && rhs.min.x <= lhs.max.x) {
rhs.min.x = std::min(lhs.min.x, rhs.min.x);
rhs.max.x = std::max(lhs.max.x, rhs.max.x);
merged = true;
break;
}
}
//just merge & expand on y axis
if (lhs.min.x == rhs.min.x && lhs.max.x == rhs.max.x) {
if (lhs.min.y <= rhs.max.y && rhs.min.y < lhs.max.y) {
rhs.min.y = std::min(lhs.min.y, rhs.min.y);
rhs.max.y = std::max(lhs.max.y, rhs.max.y);
merged = true;
break;
}
}
//subdivide regions
if (lhs.intersected(rhs)) {
subdivide(targets, j, lhs, rhs);
merged = true;
break;
}
}
if (!merged) output.push(lhs); //this region is complete isolated
lhs = {};
}
}
}
/************************************************************************/
/* RenderTrimPath Class Implementation */
/************************************************************************/

View file

@ -50,7 +50,6 @@ static inline RenderUpdateFlag operator|(const RenderUpdateFlag a, const RenderU
return RenderUpdateFlag(uint16_t(a) | uint16_t(b));
}
struct RenderSurface
{
union {
@ -111,6 +110,11 @@ struct RenderRegion
return ret;
}
static constexpr RenderRegion add(const RenderRegion& lhs, const RenderRegion& rhs)
{
return {{std::min(lhs.min.x, rhs.min.x), std::min(lhs.min.y, rhs.min.y)}, {std::max(lhs.max.x, rhs.max.x), std::max(lhs.max.y, rhs.max.y)}};
}
void intersect(const RenderRegion& rhs);
void add(const RenderRegion& rhs)
@ -121,6 +125,16 @@ struct RenderRegion
if (rhs.max.y > max.y) max.y = rhs.max.y;
}
bool contained(const RenderRegion& rhs)
{
return (min.x <= rhs.min.x && max.x >= rhs.max.x && min.y <= rhs.min.y && max.y >= rhs.max.y);
}
bool intersected(const RenderRegion& rhs) const
{
return (rhs.min.x < max.x && rhs.max.x > min.x && rhs.min.y < max.y && rhs.max.y > min.y);
}
bool operator==(const RenderRegion& rhs) const
{
return (min.x == rhs.min.x && min.y == rhs.min.y && max.x == rhs.max.x && max.y == rhs.max.y);
@ -141,6 +155,52 @@ struct RenderRegion
uint32_t h() const { return (uint32_t) sh(); }
};
struct RenderDirtyRegion
{
public:
static constexpr const int PARTITIONING = 16; //must be N*N
void init(uint32_t w, uint32_t h);
void commit();
void add(const RenderRegion* prv, const RenderRegion* cur); //collect the old and new dirty regions together
void clear();
bool deactivate(bool on)
{
std::swap(on, disabled);
return on;
}
bool deactivated()
{
return disabled;
}
const RenderRegion& partition(int idx)
{
return partitions[idx].region;
}
const Array<RenderRegion>& get(int idx)
{
return partitions[idx].list[partitions[idx].current];
}
private:
void subdivide(Array<RenderRegion>& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs);
struct Partition
{
RenderRegion region;
Array<RenderRegion> list[2]; //double buffer swapping
uint8_t current = 0; //double buffer swapping list index. 0 or 1
};
Key key;
Partition partitions[PARTITIONING];
bool disabled = false;
};
struct RenderPath
{
Array<PathCommand> cmds;
@ -420,7 +480,7 @@ struct RenderEffectTritone : RenderEffect
class RenderMethod
{
private:
uint32_t refCnt = 0; //reference count
uint32_t refCnt = 0;
Key key;
protected:
@ -448,11 +508,10 @@ public:
virtual bool blend(BlendMethod method) = 0;
virtual ColorSpace colorSpace() = 0;
virtual const RenderSurface* mainSurface() = 0;
virtual bool clear() = 0;
virtual bool sync() = 0;
//compositions
//composition
virtual RenderCompositor* target(const RenderRegion& region, ColorSpace cs, CompositionFlag flags) = 0;
virtual bool beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity) = 0;
virtual bool endComposite(RenderCompositor* cmp) = 0;
@ -462,6 +521,10 @@ public:
virtual bool region(RenderEffect* effect) = 0;
virtual bool render(RenderCompositor* cmp, const RenderEffect* effect, bool direct) = 0;
virtual void dispose(RenderEffect* effect) = 0;
//partial rendering
virtual void damage(const RenderRegion& region) = 0;
virtual bool partial(bool disable) = 0;
};
static inline bool MASK_REGION_MERGING(MaskMethod method)
@ -532,4 +595,4 @@ static inline uint8_t MULTIPLY(uint8_t c, uint8_t a)
}
#endif //_TVG_RENDER_H_
#endif //_TVG_RENDER_H_

View file

@ -127,10 +127,13 @@ struct SceneImpl : Scene
}
}
//this viewport update is more performant than in bounds()?
//this viewport update is more performant than in bounds(). No idea.
vport = renderer->viewport();
vdirty = true;
//bounds(renderer) here hinders parallelization.
if (effects) impl.damage(vport);
return true;
}
@ -257,7 +260,8 @@ struct SceneImpl : Scene
{
auto itr = paints.begin();
while (itr != paints.end()) {
PAINT((*itr))->unref();
auto paint = PAINT((*itr));
paint->unref();
paints.erase(itr++);
}
return Result::Success;
@ -310,6 +314,7 @@ struct SceneImpl : Scene
}
delete(effects);
effects = nullptr;
impl.damage(vport);
}
return Result::Success;
}

View file

@ -581,6 +581,19 @@ bool WgRenderer::postUpdate()
}
void WgRenderer::damage(const RenderRegion& region)
{
//TODO
}
bool WgRenderer::partial(bool disable)
{
//TODO
return false;
}
bool WgRenderer::term()
{
if (rendererCnt > 0) return false;

View file

@ -28,6 +28,7 @@
class WgRenderer : public RenderMethod
{
public:
//main features
bool preUpdate() override;
RenderData prepare(const RenderShape& rshape, RenderData data, const Matrix& transform, Array<RenderData>& clips, uint8_t opacity, RenderUpdateFlag flags, bool clipper) override;
RenderData prepare(RenderSurface* surface, RenderData data, const Matrix& transform, Array<RenderData>& clips, uint8_t opacity, RenderUpdateFlag flags) override;
@ -41,21 +42,25 @@ public:
bool blend(BlendMethod method) override;
ColorSpace colorSpace() override;
const RenderSurface* mainSurface() override;
bool clear() override;
bool sync() override;
bool target(WGPUDevice device, WGPUInstance instance, void* target, uint32_t width, uint32_t height, int type = 0);
//composition
RenderCompositor* target(const RenderRegion& region, ColorSpace cs, CompositionFlag flags) override;
bool beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity) override;
bool endComposite(RenderCompositor* cmp) override;
//post effects
void prepare(RenderEffect* effect, const Matrix& transform) override;
bool region(RenderEffect* effect) override;
bool render(RenderCompositor* cmp, const RenderEffect* effect, bool direct) override;
void dispose(RenderEffect* effect) override;
//partial rendering
void damage(const RenderRegion& region) override;
bool partial(bool disable) override;
static WgRenderer* gen(uint32_t threads);
static bool term();