sw_engine: tweak the image down-scaler.

Adjust the sampling count according to the scale ratio.

This significantly improves performance
while making it hard to recognize any loss in image quality.

Lottie example performance has improved by 15%.
This commit is contained in:
Hermet Park 2024-01-18 18:53:27 +09:00 committed by Hermet Park
parent bcab78e7d6
commit 6fee4f44d4

View file

@ -257,8 +257,8 @@ static uint32_t _interpUpScaler(const uint32_t *img, TVG_UNUSED uint32_t stride,
auto ry2 = ry + 1; auto ry2 = ry + 1;
if (ry2 >= h) ry2 = h - 1; if (ry2 >= h) ry2 = h - 1;
auto dx = static_cast<size_t>((sx - rx) * 255.0f); auto dx = static_cast<uint8_t>((sx - rx) * 255.0f);
auto dy = static_cast<size_t>((sy - ry) * 255.0f); auto dy = static_cast<uint8_t>((sy - ry) * 255.0f);
auto c1 = img[rx + ry * w]; auto c1 = img[rx + ry * w];
auto c2 = img[rx2 + ry * w]; auto c2 = img[rx2 + ry * w];
@ -281,21 +281,23 @@ static uint32_t _interpDownScaler(const uint32_t *img, uint32_t stride, uint32_t
int32_t maxx = (int32_t)sx + n; int32_t maxx = (int32_t)sx + n;
if (maxx >= (int32_t)w) maxx = w; if (maxx >= (int32_t)w) maxx = w;
int32_t inc = (n / 2) + 1;
n = 0;
auto src = img + minx + miny * stride; auto src = img + minx + miny * stride;
for (auto y = miny; y < maxy; ++y) { for (auto y = miny; y < maxy; y += inc) {
auto p = src; auto p = src;
for (auto x = minx; x < maxx; ++x, ++p) { for (auto x = minx; x < maxx; x += inc, p += inc) {
c[0] += *p >> 24; c[0] += A(*p);
c[1] += (*p >> 16) & 0xff; c[1] += C1(*p);
c[2] += (*p >> 8) & 0xff; c[2] += C2(*p);
c[3] += *p & 0xff; c[3] += C3(*p);
++n;
} }
src += stride; src += (stride * inc);
} }
n = (maxy - miny) * (maxx - minx);
c[0] /= n; c[0] /= n;
c[1] /= n; c[1] /= n;
c[2] /= n; c[2] /= n;