renderer/sw_engine: add partial rendering support (POC)

Partial Rendering refers to a rendering technique where
only a portion of the scene or screen is updated, rather
than redrawing the entire output. It is commonly used as
a performance optimization strategy, focusing on redrawing
only the regions that have changed, often called dirty regions.

This implemenets with the LineSweep and SubDivision Mergion strategy.

issue: https://github.com/thorvg/thorvg/issues/1747
This commit is contained in:
Hermet Park 2025-05-21 17:30:59 +09:00
parent c617c9ed70
commit 7e9b4bb382
13 changed files with 350 additions and 37 deletions

View file

@ -859,6 +859,12 @@ bool GlRenderer::sync()
}
void GlRenderer::damage(TVG_UNUSED const RenderRegion& region)
{
//TODO:
}
RenderRegion GlRenderer::region(RenderData data)
{
if (currentPass()->isEmpty()) return {};

View file

@ -80,6 +80,7 @@ public:
bool postRender() override;
void dispose(RenderData data) override;;
RenderRegion region(RenderData data) override;
void damage(const RenderRegion& region) override;
RenderRegion viewport() override;
bool viewport(const RenderRegion& vp) override;
bool blend(BlendMethod method) override;

View file

@ -147,10 +147,10 @@ struct SwRle
begin = lower_bound(spans.begin(), spans.end(), min, comp);
}
if (end) {
if (max >= spans.last().y) {
if (max > spans.last().y) {
*end = spans.end();
} else {
auto comp = [](int y, const SwSpan& span) { return y < span.y; };
auto comp = [](int y, const SwSpan& span) { return y <= span.y; };
*end = upper_bound(spans.begin(), spans.end(), max, comp);
}
}

View file

@ -237,7 +237,6 @@ static bool _compositeMaskImage(SwSurface* surface, const SwImage* image, const
return true;
}
#include "tvgSwRasterTexmap.h"
#include "tvgSwRasterC.h"
#include "tvgSwRasterAvx.h"

View file

@ -40,7 +40,7 @@ struct SwTask : Task
{
SwSurface* surface = nullptr;
SwMpool* mpool = nullptr;
RenderRegion bbox; //Rendering Region
RenderRegion bbox[2] = {{}, {}}; //Rendering Region 0:current, 1:prevous
Matrix transform;
Array<RenderData> clips;
RenderUpdateFlag flags = RenderUpdateFlag::None;
@ -52,7 +52,7 @@ struct SwTask : Task
{
//Can we skip the synchronization?
done();
return bbox;
return bbox[0];
}
virtual void dispose() = 0;
@ -92,7 +92,7 @@ struct SwShapeTask : SwTask
bool clip(SwRle* target) override
{
if (shape.strokeRle) return rleClip(target, shape.strokeRle);
if (shape.fastTrack) return rleClip(target, &bbox);
if (shape.fastTrack) return rleClip(target, &bbox[0]);
if (shape.rle) return rleClip(target, shape.rle);
return false;
}
@ -101,7 +101,7 @@ struct SwShapeTask : SwTask
{
//Invisible
if (opacity == 0 && !clipper) {
bbox.reset();
bbox[0].reset();
return;
}
@ -115,7 +115,7 @@ struct SwShapeTask : SwTask
updateFill = (MULTIPLY(rshape->color.a, opacity) || rshape->fill);
if (updateShape) shapeReset(&shape);
if (updateFill || clipper) {
if (shapePrepare(&shape, rshape, transform, bbox, renderBox, mpool, tid, clips.count > 0 ? true : false)) {
if (shapePrepare(&shape, rshape, transform, bbox[0], renderBox, mpool, tid, clips.count > 0 ? true : false)) {
if (!shapeGenRle(&shape, rshape, antialiasing(strokeWidth))) goto err;
} else {
updateFill = false;
@ -135,7 +135,7 @@ struct SwShapeTask : SwTask
if (updateShape || flags & RenderUpdateFlag::Stroke) {
if (strokeWidth > 0.0f) {
shapeResetStroke(&shape, rshape, transform);
if (!shapeGenStrokeRle(&shape, rshape, transform, bbox, renderBox, mpool, tid)) goto err;
if (!shapeGenStrokeRle(&shape, rshape, transform, bbox[0], renderBox, mpool, tid)) goto err;
if (auto fill = rshape->strokeFill()) {
auto ctable = (flags & RenderUpdateFlag::GradientStroke) ? true : false;
if (ctable) shapeResetStrokeFill(&shape);
@ -157,12 +157,11 @@ struct SwShapeTask : SwTask
if (!clipShapeRle && !clipStrokeRle) goto err;
}
bbox = renderBox; //sync
bbox[0] = renderBox; //sync
return;
err:
bbox.reset();
bbox[0].reset();
shapeReset(&shape);
rleReset(shape.strokeRle);
shapeDelOutline(&shape, mpool, tid);
@ -188,7 +187,7 @@ struct SwImageTask : SwTask
void run(unsigned tid) override
{
auto clipBox = bbox;
auto clipBox = bbox[0];
//Convert colorspace if it's not aligned.
rasterConvertCS(source, surface->cs);
@ -204,9 +203,11 @@ struct SwImageTask : SwTask
if ((flags & (RenderUpdateFlag::Image | RenderUpdateFlag::Transform | RenderUpdateFlag::Color)) && (opacity > 0)) {
imageReset(&image);
if (!image.data || image.w == 0 || image.h == 0) goto end;
if (!imagePrepare(&image, transform, clipBox, bbox, mpool, tid)) goto end;
if (!imagePrepare(&image, transform, clipBox, bbox[0], mpool, tid)) goto end;
if (clips.count > 0) {
if (!imageGenRle(&image, bbox, false)) goto end;
if (!imageGenRle(&image, bbox[0], false)) goto end;
if (image.rle) {
//Clear current task memorypool here if the clippers would use the same memory pool
imageDelOutline(&image, mpool, tid);
@ -220,7 +221,7 @@ struct SwImageTask : SwTask
}
goto end;
err:
bbox.reset();
bbox[0].reset();
rleReset(image.rle);
end:
imageDelOutline(&image, mpool, tid);
@ -266,7 +267,10 @@ SwRenderer::~SwRenderer()
bool SwRenderer::clear()
{
if (surface) return rasterClear(surface, 0, 0, surface->w, surface->h);
if (surface) {
fulldraw = true;
return rasterClear(surface, 0, 0, surface->w, surface->h);
}
return false;
}
@ -334,7 +338,28 @@ bool SwRenderer::postUpdate()
bool SwRenderer::preRender()
{
return surface != nullptr;
if (!surface) return false;
if (fulldraw || !dirtyRegion.prepare(tasks.count)) return true;
//TODO: optimize to remove this iteration.
//collect the old and new dirtry regions
ARRAY_FOREACH(p, tasks) {
auto task = *p;
task->done();
auto& cur = task->bbox[0];
auto& prv = task->bbox[1];
//quick generous merge if two regions are close enough.
if (abs(cur.min.y - prv.min.y) < 5 && abs(cur.max.y - prv.max.y) < 5 && abs(cur.min.x - prv.min.x) < 5 && abs(cur.max.x - prv.max.x) < 5) {
dirtyRegion.add(RenderRegion::add(task->bbox[0], task->bbox[1]));
} else {
dirtyRegion.add(task->bbox[0]);
dirtyRegion.add(task->bbox[1]);
}
}
dirtyRegion.commit();
return true;
}
@ -359,9 +384,15 @@ bool SwRenderer::postRender()
ARRAY_FOREACH(p, tasks) {
if ((*p)->disposed) delete(*p);
else (*p)->pushed = false;
else {
(*p)->bbox[1] = (*p)->bbox[0];
(*p)->pushed = false;
}
}
tasks.clear();
dirtyRegion.clear();
fulldraw = false;
return true;
}
@ -375,7 +406,18 @@ bool SwRenderer::renderImage(RenderData data)
if (task->opacity == 0) return true;
//full scene or partial rendering
return rasterImage(surface, &task->image, task->transform, task->bbox, task->opacity);
if (fulldraw || task->pushed || dirtyRegion.deactivated()) {
rasterImage(surface, &task->image, task->transform, task->bbox[0], task->opacity);
} else {
ARRAY_FOREACH(p, dirtyRegion.get()) {
if (task->bbox[0].min.x >= p->max.x) break; //dirtyRegion is sorted in x order
if (task->bbox[0].intersected(*p)) {
auto bbox = RenderRegion::intersect(task->bbox[0], *p);
rasterImage(surface, &task->image, task->transform, bbox, task->opacity);
}
}
}
return true;
}
@ -411,12 +453,27 @@ bool SwRenderer::renderShape(RenderData data)
}
};
//full scene or partial rendering
if (fulldraw || task->pushed || dirtyRegion.deactivated()) {
if (task->rshape->strokeFirst()) {
stroke(task, surface, task->bbox);
stroke(task, surface, task->bbox[0]);
fill(task, surface, task->shape.bbox);
} else {
fill(task, surface, task->shape.bbox);
stroke(task, surface, task->bbox);
stroke(task, surface, task->bbox[0]);
}
} else {
//TODO: skip the stroke bbox if they are invalid.
ARRAY_FOREACH(p, dirtyRegion.get()) {
if (task->bbox[0].min.x >= p->max.x) break; //dirtyRegion is sorted in x order
if (task->rshape->strokeFirst()) {
if (task->bbox[0].intersected(*p)) stroke(task, surface, RenderRegion::intersect(task->bbox[0], *p));
if (task->shape.bbox.intersected(*p)) fill(task, surface, RenderRegion::intersect(task->shape.bbox, *p));
} else {
if (task->shape.bbox.intersected(*p)) fill(task, surface, RenderRegion::intersect(task->shape.bbox, *p));
if (task->bbox[0].intersected(*p)) stroke(task, surface, RenderRegion::intersect(task->bbox[0], *p));
}
}
}
return true;
@ -483,6 +540,12 @@ RenderRegion SwRenderer::region(RenderData data)
}
void SwRenderer::damage(const RenderRegion& region)
{
dirtyRegion.add(region);
}
bool SwRenderer::beginComposite(RenderCompositor* cmp, MaskMethod method, uint8_t opacity)
{
if (!cmp) return false;
@ -680,6 +743,9 @@ void SwRenderer::dispose(RenderData data)
task->done();
task->dispose();
//should be updated for the region; the current paint is removed
dirtyRegion.add(task->bbox[0]);
if (task->pushed) task->disposed = true;
else delete(task);
}
@ -700,7 +766,7 @@ void* SwRenderer::prepareCommon(SwTask* task, const Matrix& transform, const Arr
task->surface = surface;
task->mpool = mpool;
task->bbox = RenderRegion::intersect(vport, {{0, 0}, {int32_t(surface->w), int32_t(surface->h)}});
task->bbox[0] = RenderRegion::intersect(vport, {{0, 0}, {int32_t(surface->w), int32_t(surface->h)}});
task->transform = transform;
task->clips = clips;
task->opacity = opacity;

View file

@ -46,6 +46,7 @@ public:
bool postRender() override;
void dispose(RenderData data) override;
RenderRegion region(RenderData data) override;
void damage(const RenderRegion& region) override;
RenderRegion viewport() override;
bool viewport(const RenderRegion& vp) override;
bool blend(BlendMethod method) override;
@ -70,12 +71,14 @@ public:
static bool term();
private:
RenderDirtyRegion dirtyRegion;
SwSurface* surface = nullptr; //active surface
Array<SwTask*> tasks; //async task list
Array<SwSurface*> compositors; //render targets cache list
SwMpool* mpool; //private memory pool
RenderRegion vport; //viewport
bool sharedMpool; //memory-pool behavior policy
bool fulldraw = true; //buffer is cleared (need to redraw full screen)
SwRenderer();
~SwRenderer();

View file

@ -94,9 +94,7 @@ struct Canvas::Impl
if (status == Status::Damaged) update(nullptr, false);
if (!renderer->preRender()) return Result::InsufficientCondition;
if (!PAINT(scene)->render(renderer) || !renderer->postRender()) return Result::InsufficientCondition;
if (!renderer->preRender() || !PAINT(scene)->render(renderer) || !renderer->postRender()) return Result::InsufficientCondition;
status = Status::Drawing;

View file

@ -89,6 +89,16 @@ namespace tvg {
uint16_t THORVG_VERSION_NUMBER();
#if 1 //for debugging
#include <sys/time.h>
static inline double THORVG_TIMESTAMP()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec + tv.tv_usec / 1000000.0);
}
#endif
#define TVG_DELETE(PAINT) \
if (PAINT->refCnt() == 0) delete(PAINT)

View file

@ -20,6 +20,7 @@
* SOFTWARE.
*/
#include <algorithm>
#include "tvgMath.h"
#include "tvgRender.h"
@ -116,6 +117,143 @@ void RenderRegion::intersect(const RenderRegion& rhs)
if (max.y < min.y) max.y = min.y;
}
void RenderDirtyRegion::subdivide(Array<RenderRegion>& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs)
{
RenderRegion temp[5];
int cnt = 0;
temp[cnt++] = RenderRegion::intersect(lhs, rhs);
auto max = std::min(lhs.max.x, rhs.max.x);
auto subtract = [&](RenderRegion& lhs, RenderRegion& rhs) {
//top
if (rhs.min.y < lhs.min.y) {
temp[cnt++] = {{rhs.min.x, rhs.min.y}, {rhs.max.x, lhs.min.y}};
rhs.min.y = lhs.min.y;
}
//bottom
if (rhs.max.y > lhs.max.y) {
temp[cnt++] = {{rhs.min.x, lhs.max.y}, {rhs.max.x, rhs.max.y}};
rhs.max.y = lhs.max.y;
}
//left
if (rhs.min.x < lhs.min.x) {
temp[cnt++] = {{rhs.min.x, rhs.min.y}, {lhs.min.x, rhs.max.y}};
rhs.min.x = lhs.min.x;
}
//right
if (rhs.max.x > lhs.max.x) {
temp[cnt++] = {{lhs.max.x, rhs.min.y}, {rhs.max.x, rhs.max.y}};
//rhs.max.x = lhs.max.x;
}
};
subtract(temp[0], lhs);
subtract(temp[0], rhs);
//TODO: remove this
if (targets.reserved < targets.count + cnt - 1) {
TVGERR("RENDERER", "reserved: %d, required: %d (+%d)\n", targets.reserved, targets.count + cnt - 1, cnt - 1);
abort();
}
/* Note: We considered using a list to avoid memory shifting,
but ultimately, the array outperformed the list due to better cache locality. */
//shift data
auto dst = &targets[idx + cnt];
memmove(dst, &targets[idx + 1], sizeof(RenderRegion) * (targets.count - idx - 1));
memcpy(&targets[idx], temp, sizeof(RenderRegion) * cnt);
targets.count += (cnt - 1);
//sorting by x coord again, only for the updated region
while (dst < targets.end() && dst->min.x < max) ++dst;
stable_sort(&targets[idx], dst, [](const RenderRegion& a, const RenderRegion& b) -> bool {
return a.min.x < b.min.x;
});
}
void RenderDirtyRegion::commit()
{
if (skip || disabled) return;
auto& targets = list[current];
if (targets.empty()) return;
if (targets.count > THREASHOLD) {
skip = true;
return;
}
current = !current; //swapping buffers
auto& output = list[current];
auto damaged = true;
//sorting by x coord. guarantee the stable performance: O(NlogN)
stable_sort(targets.begin(), targets.end(), [](const RenderRegion& a, const RenderRegion& b) -> bool {
return a.min.x < b.min.x;
});
//O(N^2) ~ O(N^3)
while (damaged) {
damaged = false;
for (uint32_t i = 0; i < targets.count; ++i) {
auto& lhs = targets[i];
if (lhs.invalid()) continue;
auto merged = false;
for (uint32_t j = i + 1; j < targets.count; ++j) {
auto& rhs = targets[j];
if (rhs.invalid()) continue;
//line sweeping
if (lhs.max.x < rhs.min.x) break;
//TODO: generous merge for preventing too much fragmentation
//fully overlapped. drop lhs
if (rhs.contained(lhs)) {
merged = true;
break;
}
//fully overlapped. replace the lhs with rhs
if (lhs.contained(rhs)) {
rhs = {};
continue;
}
//just merge & expand on x axis
if (lhs.min.y == rhs.min.y && lhs.max.y == rhs.max.y) {
if (lhs.min.x <= rhs.max.x && rhs.min.x <= lhs.max.x) {
rhs.min.x = std::min(lhs.min.x, rhs.min.x);
rhs.max.x = std::max(lhs.max.x, rhs.max.x);
merged = true;
break;
}
}
//just merge & expand on y axis
if (lhs.min.x == rhs.min.x && lhs.max.x == rhs.max.x) {
if (lhs.min.y <= rhs.max.y && rhs.min.y < lhs.max.y) {
rhs.min.y = std::min(lhs.min.y, rhs.min.y);
rhs.max.y = std::max(lhs.max.y, rhs.max.y);
merged = true;
break;
}
}
//subdivide regions
if (lhs.intersected(rhs)) {
subdivide(targets, j, lhs, rhs);
merged = true;
break;
}
}
if (merged) damaged = true; //regions are damaged, inspect again
else output.push(lhs); //this region is complete isolated
lhs = {};
}
}
}
/************************************************************************/
/* RenderTrimPath Class Implementation */
/************************************************************************/

View file

@ -25,6 +25,7 @@
#include <math.h>
#include <cstdarg>
#include <vector>
#include "tvgCommon.h"
#include "tvgArray.h"
#include "tvgLock.h"
@ -50,7 +51,6 @@ static inline RenderUpdateFlag operator|(const RenderUpdateFlag a, const RenderU
return RenderUpdateFlag(uint16_t(a) | uint16_t(b));
}
struct RenderSurface
{
union {
@ -107,6 +107,11 @@ struct RenderRegion
return {{std::max(lhs.min.x, rhs.min.x), std::max(lhs.min.y, rhs.min.y)}, {std::min(lhs.max.x, rhs.max.x), std::min(lhs.max.y, rhs.max.y)}};
}
static constexpr RenderRegion add(const RenderRegion& lhs, const RenderRegion& rhs)
{
return {{std::min(lhs.min.x, rhs.min.x), std::min(lhs.min.y, rhs.min.y)}, {std::max(lhs.max.x, rhs.max.x), std::max(lhs.max.y, rhs.max.y)}};
}
void intersect(const RenderRegion& rhs);
void add(const RenderRegion& rhs)
@ -117,6 +122,16 @@ struct RenderRegion
if (rhs.max.y > max.y) max.y = rhs.max.y;
}
bool contained(const RenderRegion& rhs)
{
return (min.x <= rhs.min.x && max.x >= rhs.max.x && min.y <= rhs.min.y && max.y >= rhs.max.y);
}
bool intersected(const RenderRegion& rhs) const
{
return (rhs.min.x < max.x && rhs.max.x > min.x && rhs.min.y < max.y && rhs.max.y > min.y);
}
bool operator==(const RenderRegion& rhs) const
{
return (min.x == rhs.min.x && min.y == rhs.min.y && max.x == rhs.max.x && max.y == rhs.max.y);
@ -137,6 +152,68 @@ struct RenderRegion
uint32_t h() const { return (uint32_t) sh(); }
};
struct RenderDirtyRegion
{
void add(const RenderRegion& region)
{
if (!disabled && region.valid()) {
list[current].push(region);
}
}
bool prepare(uint32_t count = 0)
{
if (disabled) return false;
if (count > THREASHOLD) {
skip = true;
return false;
}
count *= 120; //FIXME: enough?
list[0].reserve(count);
list[1].reserve(count);
return true;
}
bool deactivated()
{
if (disabled || skip) return true;
return false;
}
void clear()
{
list[0].clear();
list[1].clear();
skip = false;
}
const Array<RenderRegion>& get()
{
return list[current];
}
void commit();
private:
void subdivide(Array<RenderRegion>& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs);
/* We deactivate partial rendering if there are more than N moving elements.
Imagine thousands of moving objects covering the entire screen, That case partial rendering will lose any benefits.
Even if they don't, the overhead of subdividing and merging partial regions
could be more expensive than simply rendering the full screen.
The number is experimentally confirmed and we are open to improve this. */
static constexpr const uint32_t THREASHOLD = 1000;
Array<RenderRegion> list[2]; //double buffer swapping
uint8_t current = 0; //list index. 0 or 1
bool disabled = false;
bool skip = false;
};
struct RenderPath
{
Array<PathCommand> cmds;
@ -415,8 +492,9 @@ struct RenderEffectTritone : RenderEffect
class RenderMethod
{
private:
uint32_t refCnt = 0; //reference count
protected:
RenderDirtyRegion dirtyRegion;
uint32_t refCnt = 0;
Key key;
public:
@ -433,6 +511,7 @@ public:
virtual bool renderImage(RenderData data) = 0;
virtual bool postRender() = 0;
virtual void dispose(RenderData data) = 0;
virtual void damage(const RenderRegion& region) = 0;
virtual RenderRegion region(RenderData data) = 0;
virtual RenderRegion viewport() = 0;
virtual bool viewport(const RenderRegion& vp) = 0;

View file

@ -105,7 +105,7 @@ struct SceneImpl : Scene
RenderData update(RenderMethod* renderer, const Matrix& transform, Array<RenderData>& clips, uint8_t opacity, RenderUpdateFlag flag, TVG_UNUSED bool clipper)
{
this->vport = renderer->viewport();
vport = renderer->viewport();
if (needComposition(opacity)) {
/* Overriding opacity value. If this scene is half-translucent,
@ -123,6 +123,11 @@ struct SceneImpl : Scene
}
}
if (compFlag) vport = bounds(renderer);
//TODO: apply only for blur style effects
if (effects) renderer->damage(vport);
return nullptr;
}
@ -134,7 +139,7 @@ struct SceneImpl : Scene
renderer->blend(impl.blendMethod);
if (compFlag) {
cmp = renderer->target(bounds(renderer), renderer->colorSpace(), static_cast<CompositionFlag>(compFlag));
cmp = renderer->target(vport, renderer->colorSpace(), static_cast<CompositionFlag>(compFlag));
renderer->beginComposite(cmp, MaskMethod::None, opacity);
}
@ -157,7 +162,7 @@ struct SceneImpl : Scene
return ret;
}
RenderRegion bounds(RenderMethod* renderer) const
RenderRegion bounds(RenderMethod* renderer)
{
if (paints.empty()) return {};
@ -185,8 +190,8 @@ struct SceneImpl : Scene
pRegion.max.x += eRegion.max.x;
pRegion.max.y += eRegion.max.y;
pRegion.intersect(this->vport);
return pRegion;
vport = RenderRegion::intersect(renderer->viewport(), pRegion);
return vport;
}
Result bounds(Point* pt4, Matrix& m, bool obb, bool stroking)
@ -298,6 +303,7 @@ struct SceneImpl : Scene
}
delete(effects);
effects = nullptr;
impl.renderer->damage(vport);
}
return Result::Success;
}

View file

@ -274,6 +274,12 @@ void WgRenderer::dispose(RenderData data) {
}
void WgRenderer::damage(TVG_UNUSED const RenderRegion& region)
{
//TODO:
}
RenderRegion WgRenderer::region(RenderData data)
{
auto renderData = (WgRenderDataPaint*)data;

View file

@ -38,6 +38,7 @@ public:
bool postRender() override;
void dispose(RenderData data) override;
RenderRegion region(RenderData data) override;
void damage(const RenderRegion& region) override;
RenderRegion viewport() override;
bool viewport(const RenderRegion& vp) override;
bool blend(BlendMethod method) override;