sw_engine: texture mapping performance optimization
Some checks are pending
Android / build_x86_64 (push) Waiting to run
Android / build_aarch64 (push) Waiting to run
iOS / build_x86_64 (push) Waiting to run
iOS / build_arm64 (push) Waiting to run
macOS / build (push) Waiting to run
macOS / compact_test (push) Waiting to run
macOS / unit_test (push) Waiting to run
Ubuntu / build (push) Waiting to run
Ubuntu / compact_test (push) Waiting to run
Ubuntu / unit_test (push) Waiting to run
Windows / build (push) Waiting to run
Windows / compact_test (push) Waiting to run
Windows / unit_test (push) Waiting to run

- Replaced `modff()` with a custom method,
boosting texture mapping performance by ~15%.

- Unified opacity/non-opacity logic for improved
binary size efficiency(-0.5kb).

- Implemented minor changes for better cache effectiveness.
This commit is contained in:
Hermet Park 2025-06-10 19:12:07 +09:00 committed by Hermet Park
parent bfef89858f
commit 2e5af58592

View file

@ -70,6 +70,11 @@ static bool _arrange(const SwImage* image, const RenderRegion* bbox, int& yStart
return yEnd > yStart; return yEnd > yStart;
} }
static inline int32_t _modf(float v)
{
return 255 - ((int(v * 256.0f)) & 255);
}
static bool _rasterMaskedPolygonImageSegment(SwSurface* surface, const SwImage* image, const RenderRegion* bbox, int yStart, int yEnd, AASpans* aaSpans, uint8_t opacity, uint8_t dirFlag = 0) static bool _rasterMaskedPolygonImageSegment(SwSurface* surface, const SwImage* image, const RenderRegion* bbox, int yStart, int yEnd, AASpans* aaSpans, uint8_t opacity, uint8_t dirFlag = 0)
{ {
@ -90,7 +95,7 @@ static void _rasterBlendingPolygonImageSegment(SwSurface* surface, const SwImage
int32_t x1, x2, x, y, ar, ab, iru, irv, px, ay; int32_t x1, x2, x, y, ar, ab, iru, irv, px, ay;
int32_t vv = 0, uu = 0; int32_t vv = 0, uu = 0;
int32_t minx = INT32_MAX, maxx = 0; int32_t minx = INT32_MAX, maxx = 0;
float dx, u, v, iptr; float dx, u, v;
uint32_t* buf; uint32_t* buf;
SwSpan* span = nullptr; //used only when rle based. SwSpan* span = nullptr; //used only when rle based.
@ -156,8 +161,8 @@ static void _rasterBlendingPolygonImageSegment(SwSurface* surface, const SwImage
if ((uint32_t) uu >= image->w || (uint32_t) vv >= image->h) continue; if ((uint32_t) uu >= image->w || (uint32_t) vv >= image->h) continue;
ar = (int)(255 * (1 - modff(u, &iptr))); ar = _modf(u);
ab = (int)(255 * (1 - modff(v, &iptr))); ab = _modf(v);
iru = uu + 1; iru = uu + 1;
irv = vv + 1; irv = vv + 1;
@ -220,7 +225,7 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image,
int32_t x1, x2, x, y, ar, ab, iru, irv, px, ay; int32_t x1, x2, x, y, ar, ab, iru, irv, px, ay;
int32_t vv = 0, uu = 0; int32_t vv = 0, uu = 0;
int32_t minx = INT32_MAX, maxx = 0; int32_t minx = INT32_MAX, maxx = 0;
float dx, u, v, iptr; float dx, u, v;
uint32_t* buf; uint32_t* buf;
SwSpan* span = nullptr; //used only when rle based. SwSpan* span = nullptr; //used only when rle based.
@ -283,7 +288,8 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image,
if (matting) cmp = &surface->compositor->image.buf8[(y * surface->compositor->image.stride + x1) * csize]; if (matting) cmp = &surface->compositor->image.buf8[(y * surface->compositor->image.stride + x1) * csize];
if (opacity == 255) { const auto fullOpacity = (opacity == 255);
//Draw horizontal line //Draw horizontal line
while (x++ < x2) { while (x++ < x2) {
uu = (int) u; uu = (int) u;
@ -291,8 +297,8 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image,
if ((uint32_t) uu >= image->w || (uint32_t) vv >= image->h) continue; if ((uint32_t) uu >= image->w || (uint32_t) vv >= image->h) continue;
ar = (int)(255.0f * (1.0f - modff(u, &iptr))); ar = _modf(u);
ab = (int)(255.0f * (1.0f - modff(v, &iptr))); ab = _modf(v);
iru = uu + 1; iru = uu + 1;
irv = vv + 1; irv = vv + 1;
@ -319,10 +325,11 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image,
} }
uint32_t src; uint32_t src;
if (matting) { if (matting) {
src = ALPHA_BLEND(px, alpha(cmp)); auto a = alpha(cmp);
src = fullOpacity ? ALPHA_BLEND(px, a) : ALPHA_BLEND(px, MULTIPLY(opacity, a));
cmp += csize; cmp += csize;
} else { } else {
src = px; src = fullOpacity ? px : ALPHA_BLEND(px, opacity);
} }
*buf = src + ALPHA_BLEND(*buf, IA(src)); *buf = src + ALPHA_BLEND(*buf, IA(src));
++buf; ++buf;
@ -331,55 +338,6 @@ static void _rasterPolygonImageSegment(SwSurface* surface, const SwImage* image,
u += _dudx; u += _dudx;
v += _dvdx; v += _dvdx;
} }
} else {
//Draw horizontal line
while (x++ < x2) {
uu = (int) u;
vv = (int) v;
if ((uint32_t) uu >= image->w || (uint32_t) vv >= image->h) continue;
ar = (int)(255.0f * (1.0f - modff(u, &iptr)));
ab = (int)(255.0f * (1.0f - modff(v, &iptr)));
iru = uu + 1;
irv = vv + 1;
px = *(sbuf + (vv * sw) + uu);
/* horizontal interpolate */
if (iru < sw) {
/* right pixel */
int px2 = *(sbuf + (vv * image->stride) + iru);
px = INTERPOLATE(px, px2, ar);
}
/* vertical interpolate */
if (irv < sh) {
/* bottom pixel */
int px2 = *(sbuf + (irv * image->stride) + uu);
/* horizontal interpolate */
if (iru < sw) {
/* bottom right pixel */
int px3 = *(sbuf + (irv * image->stride) + iru);
px2 = INTERPOLATE(px2, px3, ar);
}
px = INTERPOLATE(px, px2, ab);
}
uint32_t src;
if (matting) {
src = ALPHA_BLEND(px, MULTIPLY(opacity, alpha(cmp)));
cmp += csize;
} else {
src = ALPHA_BLEND(px, opacity);
}
*buf = src + ALPHA_BLEND(*buf, IA(src));
++buf;
//Step UV horizontally
u += _dudx;
v += _dvdx;
}
}
} }
//Step along both edges //Step along both edges
@ -657,14 +615,9 @@ static void _calcAAEdge(AASpans *aaSpans, int32_t eidx)
ptx[1] = tx[1]; \ ptx[1] = tx[1]; \
} while (0) } while (0)
struct Point
{
int32_t x, y;
};
int32_t y = 0; int32_t y = 0;
Point pEdge = {-1, -1}; //previous edge point SwPoint pEdge = {-1, -1}; //previous edge point
Point edgeDiff = {0, 0}; //temporary used for point distance SwPoint edgeDiff = {0, 0}; //temporary used for point distance
/* store bigger to tx[0] between prev and current edge's x positions. */ /* store bigger to tx[0] between prev and current edge's x positions. */
int32_t tx[2] = {0, 0}; int32_t tx[2] = {0, 0};
@ -790,26 +743,21 @@ static void _calcAAEdge(AASpans *aaSpans, int32_t eidx)
static void _apply(SwSurface* surface, AASpans* aaSpans) static void _apply(SwSurface* surface, AASpans* aaSpans)
{ {
auto end = surface->buf32 + surface->h * surface->stride; auto end = surface->buf32 + surface->h * surface->stride;
auto buf = surface->buf32 + surface->stride * aaSpans->yStart;
auto y = aaSpans->yStart; auto y = aaSpans->yStart;
uint32_t pixel; auto line = aaSpans->lines;
uint32_t pix;
uint32_t* dst; uint32_t* dst;
int32_t pos; int32_t pos;
//left side _calcAAEdge(aaSpans, 0); //left side
_calcAAEdge(aaSpans, 0); _calcAAEdge(aaSpans, 1); //right side
//right side
_calcAAEdge(aaSpans, 1);
while (y < aaSpans->yEnd) { while (y < aaSpans->yEnd) {
auto line = &aaSpans->lines[y - aaSpans->yStart]; if (line->x[1] - line->x[0] > 0) {
auto width = line->x[1] - line->x[0];
if (width > 0) {
auto offset = y * surface->stride;
//Left edge //Left edge
dst = surface->buf32 + (offset + line->x[0]); dst = buf + line->x[0];
if (line->x[0] > 1) pixel = *(dst - 1); pix = *(dst - ((line->x[0] > 1) ? 1 : 0));
else pixel = *dst;
pos = 1; pos = 1;
//exceptional handling. out of memory bound. //exceptional handling. out of memory bound.
@ -818,30 +766,29 @@ static void _apply(SwSurface* surface, AASpans* aaSpans)
} }
while (pos <= line->length[0]) { while (pos <= line->length[0]) {
*dst = INTERPOLATE(*dst, pixel, line->coverage[0] * pos); *dst = INTERPOLATE(*dst, pix, line->coverage[0] * pos);
++dst; ++dst;
++pos; ++pos;
} }
//Right edge //Right edge
dst = surface->buf32 + offset + line->x[1] - 1; dst = buf + line->x[1] - 1;
pix = *(dst + (line->x[1] < (int32_t)(surface->w - 1) ? 1 : 0));
if (line->x[1] < (int32_t)(surface->w - 1)) pixel = *(dst + 1);
else pixel = *dst;
pos = line->length[1]; pos = line->length[1];
//exceptional handling. out of memory bound. //exceptional handling. out of memory bound.
if (dst - pos < surface->buf32) --pos; if (dst - pos < surface->buf32) --pos;
while (pos > 0) { while (pos > 0) {
*dst = INTERPOLATE(*dst, pixel, 255 - (line->coverage[1] * pos)); *dst = INTERPOLATE(*dst, pix, 255 - (line->coverage[1] * pos));
--dst; --dst;
--pos; --pos;
} }
} }
y++; buf += surface->stride;
++line;
++y;
} }
tvg::free(aaSpans->lines); tvg::free(aaSpans->lines);
tvg::free(aaSpans); tvg::free(aaSpans);
} }