sw_engine: add support for partial rendering

This implements RenderDirtyRegion.

issue: https://github.com/thorvg/thorvg/issues/1747
This commit is contained in:
Hermet Park 2025-05-21 17:30:59 +09:00
parent afeb7c024a
commit 103a557b6c
4 changed files with 232 additions and 26 deletions

View file

@ -40,7 +40,7 @@ struct SwTask : Task
{
SwSurface* surface = nullptr;
SwMpool* mpool = nullptr;
RenderRegion bbox; //Rendering Region
RenderRegion bbox[2] = {{}, {}}; //Rendering Region 0:current, 1:prevous
Matrix transform;
Array<RenderData> clips;
RenderUpdateFlag flags = RenderUpdateFlag::None;
@ -52,7 +52,7 @@ struct SwTask : Task
{
//Can we skip the synchronization?
done();
return bbox;
return bbox[0];
}
virtual void dispose() = 0;
@ -92,7 +92,7 @@ struct SwShapeTask : SwTask
bool clip(SwRle* target) override
{
if (shape.strokeRle) return rleClip(target, shape.strokeRle);
if (shape.fastTrack) return rleClip(target, &bbox);
if (shape.fastTrack) return rleClip(target, &bbox[0]);
if (shape.rle) return rleClip(target, shape.rle);
return false;
}
@ -101,7 +101,7 @@ struct SwShapeTask : SwTask
{
//Invisible
if (opacity == 0 && !clipper) {
bbox.reset();
bbox[0].reset();
return;
}
@ -115,7 +115,7 @@ struct SwShapeTask : SwTask
updateFill = (MULTIPLY(rshape->color.a, opacity) || rshape->fill);
if (updateShape) shapeReset(&shape);
if (updateFill || clipper) {
if (shapePrepare(&shape, rshape, transform, bbox, renderBox, mpool, tid, clips.count > 0 ? true : false)) {
if (shapePrepare(&shape, rshape, transform, bbox[0], renderBox, mpool, tid, clips.count > 0 ? true : false)) {
if (!shapeGenRle(&shape, rshape, antialiasing(strokeWidth))) goto err;
} else {
updateFill = false;
@ -135,7 +135,7 @@ struct SwShapeTask : SwTask
if (updateShape || flags & RenderUpdateFlag::Stroke) {
if (strokeWidth > 0.0f) {
shapeResetStroke(&shape, rshape, transform);
if (!shapeGenStrokeRle(&shape, rshape, transform, bbox, renderBox, mpool, tid)) goto err;
if (!shapeGenStrokeRle(&shape, rshape, transform, bbox[0], renderBox, mpool, tid)) goto err;
if (auto fill = rshape->strokeFill()) {
auto ctable = (flags & RenderUpdateFlag::GradientStroke) ? true : false;
if (ctable) shapeResetStrokeFill(&shape);
@ -157,12 +157,11 @@ struct SwShapeTask : SwTask
if (!clipShapeRle && !clipStrokeRle) goto err;
}
bbox = renderBox; //sync
bbox[0] = renderBox; //sync
return;
err:
bbox.reset();
bbox[0].reset();
shapeReset(&shape);
rleReset(shape.strokeRle);
shapeDelOutline(&shape, mpool, tid);
@ -188,7 +187,7 @@ struct SwImageTask : SwTask
void run(unsigned tid) override
{
auto clipBox = bbox;
auto clipBox = bbox[0];
//Convert colorspace if it's not aligned.
rasterConvertCS(source, surface->cs);
@ -204,9 +203,11 @@ struct SwImageTask : SwTask
if ((flags & (RenderUpdateFlag::Image | RenderUpdateFlag::Transform | RenderUpdateFlag::Color)) && (opacity > 0)) {
imageReset(&image);
if (!image.data || image.w == 0 || image.h == 0) goto end;
if (!imagePrepare(&image, transform, clipBox, bbox, mpool, tid)) goto end;
if (!imagePrepare(&image, transform, clipBox, bbox[0], mpool, tid)) goto end;
if (clips.count > 0) {
if (!imageGenRle(&image, bbox, false)) goto end;
if (!imageGenRle(&image, bbox[0], false)) goto end;
if (image.rle) {
//Clear current task memorypool here if the clippers would use the same memory pool
imageDelOutline(&image, mpool, tid);
@ -220,7 +221,7 @@ struct SwImageTask : SwTask
}
goto end;
err:
bbox.reset();
bbox[0].reset();
rleReset(image.rle);
end:
imageDelOutline(&image, mpool, tid);
@ -266,7 +267,10 @@ SwRenderer::~SwRenderer()
bool SwRenderer::clear()
{
if (surface) return rasterClear(surface, 0, 0, surface->w, surface->h);
if (surface) {
fulldraw = true;
return rasterClear(surface, 0, 0, surface->w, surface->h);
}
return false;
}
@ -334,7 +338,28 @@ bool SwRenderer::postUpdate()
bool SwRenderer::preRender()
{
return surface != nullptr;
if (!surface) return false;
if (fulldraw || !dirtyRegion.prepare(tasks.count)) return true;
//TODO: optimize to remove this iteration.
//collect the old and new dirtry regions
ARRAY_FOREACH(p, tasks) {
auto task = *p;
task->done();
auto& cur = task->bbox[0];
auto& prv = task->bbox[1];
//quick generous merge if two regions are close enough.
if (abs(cur.min.y - prv.min.y) < 5 && abs(cur.max.y - prv.max.y) < 5 && abs(cur.min.x - prv.min.x) < 5 && abs(cur.max.x - prv.max.x) < 5) {
dirtyRegion.add(RenderRegion::add(task->bbox[0], task->bbox[1]));
} else {
dirtyRegion.add(task->bbox[0]);
dirtyRegion.add(task->bbox[1]);
}
}
dirtyRegion.commit();
return true;
}
@ -359,9 +384,15 @@ bool SwRenderer::postRender()
ARRAY_FOREACH(p, tasks) {
if ((*p)->disposed) delete(*p);
else (*p)->pushed = false;
else {
(*p)->bbox[1] = (*p)->bbox[0];
(*p)->pushed = false;
}
}
tasks.clear();
dirtyRegion.clear();
fulldraw = false;
return true;
}
@ -375,7 +406,18 @@ bool SwRenderer::renderImage(RenderData data)
if (task->opacity == 0) return true;
//full scene or partial rendering
return rasterImage(surface, &task->image, task->transform, task->bbox, task->opacity);
if (fulldraw || task->pushed || dirtyRegion.deactivated()) {
rasterImage(surface, &task->image, task->transform, task->bbox[0], task->opacity);
} else {
ARRAY_FOREACH(p, dirtyRegion.get()) {
if (task->bbox[0].min.x >= p->max.x) break; //dirtyRegion is sorted in x order
if (task->bbox[0].intersected(*p)) {
auto bbox = RenderRegion::intersect(task->bbox[0], *p);
rasterImage(surface, &task->image, task->transform, bbox, task->opacity);
}
}
}
return true;
}
@ -411,12 +453,27 @@ bool SwRenderer::renderShape(RenderData data)
}
};
if (task->rshape->strokeFirst()) {
stroke(task, surface, task->bbox);
fill(task, surface, task->shape.bbox);
//full scene or partial rendering
if (fulldraw || task->pushed || dirtyRegion.deactivated()) {
if (task->rshape->strokeFirst()) {
stroke(task, surface, task->bbox[0]);
fill(task, surface, task->shape.bbox);
} else {
fill(task, surface, task->shape.bbox);
stroke(task, surface, task->bbox[0]);
}
} else {
fill(task, surface, task->shape.bbox);
stroke(task, surface, task->bbox);
//TODO: skip the stroke bbox if they are invalid.
ARRAY_FOREACH(p, dirtyRegion.get()) {
if (task->bbox[0].min.x >= p->max.x) break; //dirtyRegion is sorted in x order
if (task->rshape->strokeFirst()) {
if (task->bbox[0].intersected(*p)) stroke(task, surface, RenderRegion::intersect(task->bbox[0], *p));
if (task->shape.bbox.intersected(*p)) fill(task, surface, RenderRegion::intersect(task->shape.bbox, *p));
} else {
if (task->shape.bbox.intersected(*p)) fill(task, surface, RenderRegion::intersect(task->shape.bbox, *p));
if (task->bbox[0].intersected(*p)) stroke(task, surface, RenderRegion::intersect(task->bbox[0], *p));
}
}
}
return true;
@ -477,9 +534,9 @@ bool SwRenderer::blend(BlendMethod method)
}
void SwRenderer::damage(TVG_UNUSED const RenderRegion& region)
void SwRenderer::damage(const RenderRegion& region)
{
//TODO:
dirtyRegion.add(region);
}
@ -686,6 +743,9 @@ void SwRenderer::dispose(RenderData data)
task->done();
task->dispose();
//should be updated for the region; the current paint is removed
dirtyRegion.add(task->bbox[0]);
if (task->pushed) task->disposed = true;
else delete(task);
}
@ -706,7 +766,7 @@ void* SwRenderer::prepareCommon(SwTask* task, const Matrix& transform, const Arr
task->surface = surface;
task->mpool = mpool;
task->bbox = RenderRegion::intersect(vport, {{0, 0}, {int32_t(surface->w), int32_t(surface->h)}});
task->bbox[0] = RenderRegion::intersect(vport, {{0, 0}, {int32_t(surface->w), int32_t(surface->h)}});
task->transform = transform;
task->clips = clips;
task->opacity = opacity;

View file

@ -71,12 +71,14 @@ public:
static bool term();
private:
RenderDirtyRegion dirtyRegion;
SwSurface* surface = nullptr; //active surface
Array<SwTask*> tasks; //async task list
Array<SwSurface*> compositors; //render targets cache list
SwMpool* mpool; //private memory pool
RenderRegion vport; //viewport
bool sharedMpool; //memory-pool behavior policy
bool fulldraw = true; //buffer is cleared (need to redraw full screen)
SwRenderer();
~SwRenderer();

View file

@ -20,6 +20,7 @@
* SOFTWARE.
*/
#include <algorithm>
#include "tvgMath.h"
#include "tvgRender.h"
@ -116,6 +117,134 @@ void RenderRegion::intersect(const RenderRegion& rhs)
if (max.y < min.y) max.y = min.y;
}
void RenderDirtyRegion::subdivide(Array<RenderRegion>& targets, uint32_t idx, RenderRegion& lhs, RenderRegion& rhs)
{
RenderRegion temp[5];
int cnt = 0;
temp[cnt++] = RenderRegion::intersect(lhs, rhs);
auto max = std::min(lhs.max.x, rhs.max.x);
auto subtract = [&](RenderRegion& lhs, RenderRegion& rhs) {
//top
if (rhs.min.y < lhs.min.y) {
temp[cnt++] = {{rhs.min.x, rhs.min.y}, {rhs.max.x, lhs.min.y}};
rhs.min.y = lhs.min.y;
}
//bottom
if (rhs.max.y > lhs.max.y) {
temp[cnt++] = {{rhs.min.x, lhs.max.y}, {rhs.max.x, rhs.max.y}};
rhs.max.y = lhs.max.y;
}
//left
if (rhs.min.x < lhs.min.x) {
temp[cnt++] = {{rhs.min.x, rhs.min.y}, {lhs.min.x, rhs.max.y}};
rhs.min.x = lhs.min.x;
}
//right
if (rhs.max.x > lhs.max.x) {
temp[cnt++] = {{lhs.max.x, rhs.min.y}, {rhs.max.x, rhs.max.y}};
//rhs.max.x = lhs.max.x;
}
};
subtract(temp[0], lhs);
subtract(temp[0], rhs);
//TODO: remove this
if (targets.reserved < targets.count + cnt - 1) {
TVGERR("RENDERER", "reserved: %d, required: %d (+%d)\n", targets.reserved, targets.count + cnt - 1, cnt - 1);
abort();
}
/* Note: We considered using a list to avoid memory shifting,
but ultimately, the array outperformed the list due to better cache locality. */
//shift data
auto dst = &targets[idx + cnt];
memmove(dst, &targets[idx + 1], sizeof(RenderRegion) * (targets.count - idx - 1));
memcpy(&targets[idx], temp, sizeof(RenderRegion) * cnt);
targets.count += (cnt - 1);
//sorting by x coord again, only for the updated region
while (dst < targets.end() && dst->min.x < max) ++dst;
stable_sort(&targets[idx], dst, [](const RenderRegion& a, const RenderRegion& b) -> bool {
return a.min.x < b.min.x;
});
}
void RenderDirtyRegion::commit()
{
if (skip || disabled) return;
auto& targets = list[current];
if (targets.empty()) return;
if (targets.count > THRESHOLD) {
skip = true;
return;
}
current = !current; //swapping buffers
auto& output = list[current];
//sorting by x coord. guarantee the stable performance: O(NlogN)
stable_sort(targets.begin(), targets.end(), [](const RenderRegion& a, const RenderRegion& b) -> bool {
return a.min.x < b.min.x;
});
//Optimized using sweep-line algorithm: O(NlogN)
for (uint32_t i = 0; i < targets.count; ++i) {
auto& lhs = targets[i];
if (lhs.invalid()) continue;
auto merged = false;
for (uint32_t j = i + 1; j < targets.count; ++j) {
auto& rhs = targets[j];
if (rhs.invalid()) continue;
if (lhs.max.x < rhs.min.x) break; //line sweeping
//fully overlapped. drop lhs
if (rhs.contained(lhs)) {
merged = true;
break;
}
//fully overlapped. replace the lhs with rhs
if (lhs.contained(rhs)) {
rhs = {};
continue;
}
//just merge & expand on x axis
if (lhs.min.y == rhs.min.y && lhs.max.y == rhs.max.y) {
if (lhs.min.x <= rhs.max.x && rhs.min.x <= lhs.max.x) {
rhs.min.x = std::min(lhs.min.x, rhs.min.x);
rhs.max.x = std::max(lhs.max.x, rhs.max.x);
merged = true;
break;
}
}
//just merge & expand on y axis
if (lhs.min.x == rhs.min.x && lhs.max.x == rhs.max.x) {
if (lhs.min.y <= rhs.max.y && rhs.min.y < lhs.max.y) {
rhs.min.y = std::min(lhs.min.y, rhs.min.y);
rhs.max.y = std::max(lhs.max.y, rhs.max.y);
merged = true;
break;
}
}
//subdivide regions
if (lhs.intersected(rhs)) {
subdivide(targets, j, lhs, rhs);
merged = true;
break;
}
}
if (!merged) output.push(lhs); //this region is complete isolated
lhs = {};
}
}
/************************************************************************/
/* RenderTrimPath Class Implementation */
/************************************************************************/

View file

@ -106,6 +106,11 @@ struct RenderRegion
return {{std::max(lhs.min.x, rhs.min.x), std::max(lhs.min.y, rhs.min.y)}, {std::min(lhs.max.x, rhs.max.x), std::min(lhs.max.y, rhs.max.y)}};
}
static constexpr RenderRegion add(const RenderRegion& lhs, const RenderRegion& rhs)
{
return {{std::min(lhs.min.x, rhs.min.x), std::min(lhs.min.y, rhs.min.y)}, {std::max(lhs.max.x, rhs.max.x), std::max(lhs.max.y, rhs.max.y)}};
}
void intersect(const RenderRegion& rhs);
void add(const RenderRegion& rhs)
@ -116,6 +121,16 @@ struct RenderRegion
if (rhs.max.y > max.y) max.y = rhs.max.y;
}
bool contained(const RenderRegion& rhs)
{
return (min.x <= rhs.min.x && max.x >= rhs.max.x && min.y <= rhs.min.y && max.y >= rhs.max.y);
}
bool intersected(const RenderRegion& rhs) const
{
return (rhs.min.x < max.x && rhs.max.x > min.x && rhs.min.y < max.y && rhs.max.y > min.y);
}
bool operator==(const RenderRegion& rhs) const
{
return (min.x == rhs.min.x && min.y == rhs.min.y && max.x == rhs.max.x && max.y == rhs.max.y);
@ -477,7 +492,7 @@ struct RenderEffectTritone : RenderEffect
class RenderMethod
{
private:
uint32_t refCnt = 0; //reference count
uint32_t refCnt = 0;
Key key;
public: