From 3399da198ff2192968b97bdf501b257c6018d5d0 Mon Sep 17 00:00:00 2001
From: Hermet Park <hermet@lottiefiles.com>
Date: Mon, 5 Jun 2023 12:23:00 +0900
Subject: [PATCH] sw_engine fill: code clean up.

eliminate logic duplication by introducing direct blend operation.
---
 src/lib/sw_engine/tvgSwCommon.h   |  6 +++
 src/lib/sw_engine/tvgSwFill.cpp   | 62 +++++++++----------------------
 src/lib/sw_engine/tvgSwRaster.cpp | 17 +++------
 3 files changed, 28 insertions(+), 57 deletions(-)

diff --git a/src/lib/sw_engine/tvgSwCommon.h b/src/lib/sw_engine/tvgSwCommon.h
index 0181c8c0..3cdaf965 100644
--- a/src/lib/sw_engine/tvgSwCommon.h
+++ b/src/lib/sw_engine/tvgSwCommon.h
@@ -332,6 +332,11 @@ static inline uint32_t opBlend(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
     return s + ALPHA_BLEND(d, IALPHA(s));
 }
 
+static inline uint32_t opDirect(uint32_t s, TVG_UNUSED uint32_t d, TVG_UNUSED uint8_t a)
+{
+    return s;
+}
+
 static inline uint32_t opAddMask(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
     return opBlend(s, d, a);
@@ -405,6 +410,7 @@ void imageFree(SwImage* image);
 bool fillGenColorTable(SwFill* fill, const Fill* fdata, const Matrix* transform, SwSurface* surface, uint32_t opacity, bool ctable);
 void fillReset(SwFill* fill);
 void fillFree(SwFill* fill);
+//OPTIMIZE_ME: Skip the function pointer access
 void fillLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, SwBlendOp op, uint8_t a);                                         //blending ver.
 void fillLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, uint8_t* cmp, SwAlpha alpha, uint8_t csize, uint8_t opacity);     //masking ver.
 void fillRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, SwBlendOp op, uint8_t a);                                         //blending ver.
diff --git a/src/lib/sw_engine/tvgSwFill.cpp b/src/lib/sw_engine/tvgSwFill.cpp
index 1a432ea8..1684a371 100644
--- a/src/lib/sw_engine/tvgSwFill.cpp
+++ b/src/lib/sw_engine/tvgSwFill.cpp
@@ -271,18 +271,10 @@ void fillRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint3
     auto detFirstDerivative = 2.0f * (fill->radial.a11 * rx + fill->radial.a21 * ry) + 0.5f * detSecondDerivative;
     auto det = rx * rx + ry * ry;
 
-    if (op) {
-        for (uint32_t i = 0 ; i < len ; ++i, ++dst) {
-            *dst = op(_pixel(fill, sqrtf(det)), *dst, a);
-            det += detFirstDerivative;
-            detFirstDerivative += detSecondDerivative;
-        }
-    } else {
-        for (uint32_t i = 0 ; i < len ; ++i, ++dst) {
-            *dst = _pixel(fill, sqrtf(det));
-            det += detFirstDerivative;
-            detFirstDerivative += detSecondDerivative;
-        }
+    for (uint32_t i = 0 ; i < len ; ++i, ++dst) {
+        *dst = op(_pixel(fill, sqrtf(det)), *dst, a);
+        det += detFirstDerivative;
+        detFirstDerivative += detSecondDerivative;
     }
 }
 
@@ -385,41 +377,21 @@ void fillLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint3
     auto vMin = -vMax;
     auto v = t + (inc * len);
 
-    if (op) {
-        //we can use fixed point math
-        if (v < vMax && v > vMin) {
-            auto t2 = static_cast<int32_t>(t * FIXPT_SIZE);
-            auto inc2 = static_cast<int32_t>(inc * FIXPT_SIZE);
-            for (uint32_t j = 0; j < len; ++j, ++dst) {
-                *dst = op(_fixedPixel(fill, t2), *dst, a);
-                t2 += inc2;
-            }
-        //we have to fallback to float math
-        } else {
-            uint32_t counter = 0;
-            while (counter++ < len) {
-                *dst = op(_pixel(fill, t / GRADIENT_STOP_SIZE), *dst, a);
-                ++dst;
-                t += inc;
-            }
+    //we can use fixed point math
+    if (v < vMax && v > vMin) {
+        auto t2 = static_cast<int32_t>(t * FIXPT_SIZE);
+        auto inc2 = static_cast<int32_t>(inc * FIXPT_SIZE);
+        for (uint32_t j = 0; j < len; ++j, ++dst) {
+            *dst = op(_fixedPixel(fill, t2), *dst, a);
+            t2 += inc2;
         }
+    //we have to fallback to float math
     } else {
-        //we can use fixed point math
-        if (v < vMax && v > vMin) {
-            auto t2 = static_cast<int32_t>(t * FIXPT_SIZE);
-            auto inc2 = static_cast<int32_t>(inc * FIXPT_SIZE);
-            for (uint32_t j = 0; j < len; ++j, ++dst) {
-                *dst = _fixedPixel(fill, t2);
-                t2 += inc2;
-            }
-        //we have to fallback to float math
-        } else {
-            uint32_t counter = 0;
-            while (counter++ < len) {
-                *dst = _pixel(fill, t / GRADIENT_STOP_SIZE);
-                ++dst;
-                t += inc;
-            }
+        uint32_t counter = 0;
+        while (counter++ < len) {
+            *dst = op(_pixel(fill, t / GRADIENT_STOP_SIZE), *dst, a);
+            ++dst;
+            t += inc;
         }
     }
 }
diff --git a/src/lib/sw_engine/tvgSwRaster.cpp b/src/lib/sw_engine/tvgSwRaster.cpp
index c1284cab..e43bbf7d 100644
--- a/src/lib/sw_engine/tvgSwRaster.cpp
+++ b/src/lib/sw_engine/tvgSwRaster.cpp
@@ -44,11 +44,6 @@ struct FillLinear
         fillLinear(fill, dst, y, x, len, op, a);
     }
 
-    void operator()(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len)
-    {
-        fillLinear(fill, dst, y, x, len, nullptr, 255);
-    }
-
     void operator()(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, uint8_t* cmp, SwAlpha alpha, uint8_t csize, uint8_t opacity)
     {
         fillLinear(fill, dst, y, x, len, cmp, alpha, csize, opacity);
@@ -62,11 +57,6 @@ struct FillRadial
         fillRadial(fill, dst, y, x, len, op, a);
     }
 
-    void operator()(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len)
-    {
-        fillRadial(fill, dst, y, x, len, nullptr, 255);
-    }
-
     void operator()(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len, uint8_t* cmp, SwAlpha alpha, uint8_t csize, uint8_t opacity)
     {
         fillRadial(fill, dst, y, x, len, cmp, alpha, csize, opacity);
@@ -216,7 +206,9 @@ static inline uint32_t _halfScale(float scale)
     return halfScale;
 }
 
+
 //Bilinear Interpolation
+//OPTIMIZE_ME: Skip the function pointer access
 static uint32_t _interpUpScaler(const uint32_t *img, TVG_UNUSED uint32_t stride, uint32_t w, uint32_t h, float sx, float sy, TVG_UNUSED uint32_t n)
 {
     auto rx = (uint32_t)(sx);
@@ -239,6 +231,7 @@ static uint32_t _interpUpScaler(const uint32_t *img, TVG_UNUSED uint32_t stride,
 
 
 //2n x 2n Mean Kernel
+//OPTIMIZE_ME: Skip the function pointer access
 static uint32_t _interpDownScaler(const uint32_t *img, uint32_t stride, uint32_t w, uint32_t h, float sx, float sy, uint32_t n)
 {
     uint32_t rx = sx;
@@ -1460,7 +1453,7 @@ static bool _rasterSolidGradientRect(SwSurface* surface, const SwBBox& region, c
     auto h = static_cast<uint32_t>(region.max.y - region.min.y);
 
     for (uint32_t y = 0; y < h; ++y) {
-        fillMethod()(fill, buffer + y * surface->stride, region.min.y + y, region.min.x, w);
+        fillMethod()(fill, buffer + y * surface->stride, region.min.y + y, region.min.x, w, opDirect, 0);
     }
     return true;
 }
@@ -1597,7 +1590,7 @@ static bool _rasterSolidGradientRle(SwSurface* surface, const SwRleData* rle, co
 
     for (uint32_t i = 0; i < rle->size; ++i, ++span) {
         auto dst = &surface->buf32[span->y * surface->stride + span->x];
-        if (span->coverage == 255) fillMethod()(fill, dst, span->y, span->x, span->len);
+        if (span->coverage == 255) fillMethod()(fill, dst, span->y, span->x, span->len, opDirect, 0);
         else fillMethod()(fill, dst, span->y, span->x, span->len, opInterpolate, span->coverage);
     }
     return true;