sw_engine: rectifiy logic wrt overall blendings

- minor optimization of the unpremultiply logic - exception handling for the unpremultipy logic for anti-aliasing quality - appropriate alpha pre/unpre multiplication is applied - clean code++ - updated doc blending equation issue: https://github.com/thorvg/thorvg/issues/1944
2025-07-15 11:01:08 +00:00 · 2025-07-11 02:16:16 +09:00 · 2025-07-11 02:16:16 +09:00 · d2b2fb02f0
commit d2b2fb02f0
parent f6a9efcc3f
3 changed files with 105 additions and 113 deletions
--- a/inc/thorvg.h
+++ b/inc/thorvg.h
@ -200,15 +200,15 @@ enum class BlendMethod : uint8_t
    Normal = 0,        ///< Perform the alpha blending(default). S if (Sa == 255), otherwise (Sa * S) + (255 - Sa) * D
    Multiply,          ///< Takes the RGB channel values from 0 to 255 of each pixel in the top layer and multiples them with the values for the corresponding pixel from the bottom layer. (S * D)
    Screen,            ///< The values of the pixels in the two layers are inverted, multiplied, and then inverted again. (S + D) - (S * D)
-    Overlay,           ///< Combines Multiply and Screen blend modes. (2 * S * D) if (2 * D < Da), otherwise (Sa * Da) - 2 * (Da - S) * (Sa - D)
+    Overlay,           ///< Combines Multiply and Screen blend modes. (2 * S * D) if (D < 128), otherwise 255 - 2 * (255 - S) * (255 - D)
    Darken,            ///< Creates a pixel that retains the smallest components of the top and bottom layer pixels. min(S, D)
    Lighten,           ///< Only has the opposite action of Darken Only. max(S, D)
    ColorDodge,        ///< Divides the bottom layer by the inverted top layer. D / (255 - S)
    ColorBurn,         ///< Divides the inverted bottom layer by the top layer, and then inverts the result. 255 - (255 - D) / S
-    HardLight,         ///< The same as Overlay but with the color roles reversed. (2 * S * D) if (S < Sa), otherwise (Sa * Da) - 2 * (Da - S) * (Sa - D)
-    SoftLight,         ///< The same as Overlay but with applying pure black or white does not result in pure black or white. (1 - 2 * S) * (D ^ 2) + (2 * S * D)
+    HardLight,         ///< The same as Overlay but with the color roles reversed. (2 * S * D) if (S < 128), otherwise 255 - 2 * (255 - S) * (255 - D)
+    SoftLight,         ///< The same as Overlay but with applying pure black or white does not result in pure black or white. (255 - 2 * S) * (D * D) + (2 * S * D)
    Difference,        ///< Subtracts the bottom layer from the top layer or the other way around, to always get a non-negative value. (S - D) if (S > D), otherwise (D - S)
-    Exclusion,         ///< The result is twice the product of the top and bottom layers, subtracted from their sum. s + d - (2 * s * d)
+    Exclusion,         ///< The result is twice the product of the top and bottom layers, subtracted from their sum. S + D - (2 * S * D)
    Hue,               ///< Reserved. Not supported.
    Saturation,        ///< Reserved. Not supported.
    Color,             ///< Reserved. Not supported.
--- a/src/renderer/sw_engine/tvgSwCommon.h
+++ b/src/renderer/sw_engine/tvgSwCommon.h
@ -373,18 +373,34 @@ static inline uint8_t C3(uint32_t c)
    return (c);
 }

-static inline bool UNPREMULTIPLY(uint32_t color, RenderColor& out)
+static inline uint32_t PREMULTIPLY(uint32_t c, uint8_t a)
 {
-    out.a = A(color);
-    if (out.a == 0) return false;
+    return (c & 0xff000000) + ((((c >> 8) & 0xff) * a) & 0xff00) + ((((c & 0x00ff00ff) * a) >> 8) & 0x00ff00ff);
+}

-    out.r = C1(color) * 255 / out.a;
-    out.g = C2(color) * 255 / out.a;
-    out.b = C3(color) * 255 / out.a;
+static inline bool BLEND_UPRE(uint32_t c, RenderColor& o)
+{
+    o.a = A(c);
+    if (o.a == 0) return false;
+
+    o.r = C1(c);
+    o.g = C2(c);
+    o.b = C3(c);
+
+    if (o.a < 255) {
+        o.r = std::min(o.r * 255u / o.a, 255u);
+        o.g = std::min(o.g * 255u / o.a, 255u);
+        o.b = std::min(o.b * 255u / o.a, 255u);
+    }

    return true;
 }

+static inline uint32_t BLEND_PRE(uint32_t c1, uint32_t c2, uint8_t a)
+{
+    return ALPHA_BLEND(c1, a) + ALPHA_BLEND(c2, 255 - a);
+}
+
 static inline uint32_t opBlendInterp(uint32_t s, uint32_t d, uint8_t a)
 {
    return INTERPOLATE(s, d, a);
@ -411,160 +427,140 @@ static inline uint32_t opBlendDifference(uint32_t s, uint32_t d, TVG_UNUSED uint
 {
    if (d == 0) return s;

-    //if (s > d) => s - d
-    //else => d - s
-    auto c1 = (C1(s) > C1(d)) ? (C1(s) - C1(d)) : (C1(d) - C1(s));
-    auto c2 = (C2(s) > C2(d)) ? (C2(s) - C2(d)) : (C2(d) - C2(s));
-    auto c3 = (C3(s) > C3(d)) ? (C3(s) - C3(d)) : (C3(d) - C3(s));
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return (s > d) ? (s - d) : (d - s);
+    };
+
+    return JOIN(255, f(C1(s), C1(d)), f(C2(s), C2(d)), f(C3(s), C3(d)));
 }

 static inline uint32_t opBlendExclusion(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
    if (d == 0) return s;

-    // (s + d) - (2 * s * d)
-    auto c1 = tvg::clamp(C1(s) + C1(d) - 2 * MULTIPLY(C1(s), C1(d)), 0, 255);
-    auto c2 = tvg::clamp(C2(s) + C2(d) - 2 * MULTIPLY(C2(s), C2(d)), 0, 255);
-    auto c3 = tvg::clamp(C3(s) + C3(d) - 2 * MULTIPLY(C3(s), C3(d)), 0, 255);
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return tvg::clamp(s + d - 2 * MULTIPLY(s, d), 0, 255);
+    };
+
+    return JOIN(255, f(C1(s), C1(d)), f(C2(s), C2(d)), f(C3(s), C3(d)));
 }

 static inline uint32_t opBlendAdd(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
    if (d == 0) return s;

-    // s + d
-    auto c1 = std::min(C1(s) + C1(d), 255);
-    auto c2 = std::min(C2(s) + C2(d), 255);
-    auto c3 = std::min(C3(s) + C3(d), 255);
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return std::min(s + d, 255);
+    };
+
+    return JOIN(255, f(C1(s), C1(d)), f(C2(s), C2(d)), f(C3(s), C3(d)));
 }

 static inline uint32_t opBlendScreen(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
    if (d == 0) return s;

-    // s + d - s * d
-    auto c1 = C1(s) + C1(d) - MULTIPLY(C1(s), C1(d));
-    auto c2 = C2(s) + C2(d) - MULTIPLY(C2(s), C2(d));
-    auto c3 = C3(s) + C3(d) - MULTIPLY(C3(s), C3(d));
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return s + d - MULTIPLY(s, d);
+    };
+
+    return JOIN(255, f(C1(s), C1(d)), f(C2(s), C2(d)), f(C3(s), C3(d)));
 }

 static inline uint32_t opBlendMultiply(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
-    if (d == 0) return s;
-
    RenderColor o;
-    if (!UNPREMULTIPLY(d, o)) return 0;
+    if (!BLEND_UPRE(d, o)) return s;

-    // s * d
-    auto c1 = MULTIPLY(C1(s), o.r);
-    auto c2 = MULTIPLY(C2(s), o.g);
-    auto c3 = MULTIPLY(C3(s), o.b);
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return MULTIPLY(s, d);
+    };
+
+    return BLEND_PRE(JOIN(255, f(C1(s), o.r), f(C2(s), o.g), f(C3(s), o.b)), s, o.a);
 }


 static inline uint32_t opBlendOverlay(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
-    if (d == 0) return s;
-
    RenderColor o;
-    if (!UNPREMULTIPLY(d, o)) return 0;
+    if (!BLEND_UPRE(d, o)) return s;

-    // if (2 * d < da) => 2 * s * d,
-    // else => 1 - 2 * (1 - s) * (1 - d)
-    auto c1 = (o.r < 128) ? std::min(255, 2 * MULTIPLY(C1(s), o.r)) : (255 - std::min(255, 2 * MULTIPLY(255 - C1(s), 255 - o.r)));
-    auto c2 = (o.g < 128) ? std::min(255, 2 * MULTIPLY(C2(s), o.g)) : (255 - std::min(255, 2 * MULTIPLY(255 - C2(s), 255 - o.g)));
-    auto c3 = (o.b < 128) ? std::min(255, 2 * MULTIPLY(C3(s), o.b)) : (255 - std::min(255, 2 * MULTIPLY(255 - C3(s), 255 - o.b)));
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return (d < 128) ? std::min(255, 2 * MULTIPLY(s, d)) : (255 - std::min(255, 2 * MULTIPLY(255 - s, 255 - d)));
+    };
+
+    return BLEND_PRE(JOIN(255, f(C1(s), o.r), f(C2(s), o.g), f(C3(s), o.b)), s, o.a);
 }

 static inline uint32_t opBlendDarken(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
-    if (d == 0) return s;
-
    RenderColor o;
-    if (!UNPREMULTIPLY(d, o)) return 0;
+    if (!BLEND_UPRE(d, o)) return s;

-    // min(s, d)
-    auto c1 = std::min(C1(s), o.r);
-    auto c2 = std::min(C2(s), o.g);
-    auto c3 = std::min(C3(s), o.b);
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return std::min(s, d);
+    };
+
+    return BLEND_PRE(JOIN(255, f(C1(s), o.r), f(C2(s), o.g), f(C3(s), o.b)), s, o.a);
 }

 static inline uint32_t opBlendLighten(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
    if (d == 0) return s;

-    // max(s, d)
-    auto c1 = std::max(C1(s), C1(d));
-    auto c2 = std::max(C2(s), C2(d));
-    auto c3 = std::max(C3(s), C3(d));
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return std::max(s, d);
+    };
+
+    return JOIN(255, f(C1(s), C1(d)), f(C2(s), C2(d)), f(C3(s), C3(d)));
 }

 static inline uint32_t opBlendColorDodge(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
-    if (d == 0) return s;
+    RenderColor o;
+    if (!BLEND_UPRE(d, o)) return s;

-    // d / (1 - s)
-    s = 0xffffffff - s;
-    auto c1 = C1(d) == 0 ? 0 : (C1(s) == 0 ? 255 : std::min(C1(d) * 255 / C1(s), 255));
-    auto c2 = C2(d) == 0 ? 0 : (C2(s) == 0 ? 255 : std::min(C2(d) * 255 / C2(s), 255));
-    auto c3 = C3(d) == 0 ? 0 : (C3(s) == 0 ? 255 : std::min(C3(d) * 255 / C3(s), 255));
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return d == 0 ? 0 : (s == 255 ? 255 : std::min(d * 255 / (255 - s), 255));
+    };
+
+    return BLEND_PRE(JOIN(255, f(C1(s), o.r), f(C2(s), o.g), f(C3(s), o.b)), s, o.a);
 }

 static inline uint32_t opBlendColorBurn(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
-    if (d == 0) return s;
-
    RenderColor o;
-    if (!UNPREMULTIPLY(d, o)) o.r = o.g = o.b = 0;
+    if (!BLEND_UPRE(d, o)) return s;

-    // 1 - (1 - d) / s
-    auto ir = 255 - o.r;
-    auto ig = 255 - o.g;
-    auto ib = 255 - o.b;
+    auto f = [](uint8_t s, uint8_t d) {
+        return d == 255 ? 255 : (s == 0 ? 0 : 255 - std::min((255 - d) * 255 / s, 255));
+    };

-    auto c1 = o.r == 255 ? 255 : (C1(s) == 0 ? 0 : 255 - std::min(ir * 255 / C1(s), 255));
-    auto c2 = o.g == 255 ? 255 : (C2(s) == 0 ? 0 : 255 - std::min(ig * 255 / C2(s), 255));
-    auto c3 = o.b == 255 ? 255 : (C3(s) == 0 ? 0 : 255 - std::min(ib * 255 / C3(s), 255));
-
-    return JOIN(255, c1, c2, c3);
+    return BLEND_PRE(JOIN(255, f(C1(s), o.r), f(C2(s), o.g), f(C3(s), o.b)), s, o.a);
 }

 static inline uint32_t opBlendHardLight(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
-    if (d == 0) return s;
-
    RenderColor o;
-    if (!UNPREMULTIPLY(d, o)) o.r = o.g = o.b = 0;
+    if (!BLEND_UPRE(d, o)) return s;

-    // if (s < sa), (2 * s * d)
-    // else (sa * da) - 2 * (da - s) * (sa - d)
-    auto c1 = (C1(s) < 128) ? std::min(255, 2 * MULTIPLY(C1(s), o.r)) : (255 - std::min(255, 2 * MULTIPLY(255 - C1(s), 255 - o.r)));
-    auto c2 = (C2(s) < 128) ? std::min(255, 2 * MULTIPLY(C2(s), o.g)) : (255 - std::min(255, 2 * MULTIPLY(255 - C2(s), 255 - o.g)));
-    auto c3 = (C3(s) < 128) ? std::min(255, 2 * MULTIPLY(C3(s), o.b)) : (255 - std::min(255, 2 * MULTIPLY(255 - C3(s), 255 - o.b)));
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return (s < 128) ? std::min(255, 2 * MULTIPLY(s, d)) : (255 - std::min(255, 2 * MULTIPLY(255 - s, 255 - d)));
+    };
+
+    return BLEND_PRE(JOIN(255, f(C1(s), o.r), f(C2(s), o.g), f(C3(s), o.b)), s, o.a);
 }

 static inline uint32_t opBlendSoftLight(uint32_t s, uint32_t d, TVG_UNUSED uint8_t a)
 {
-    if (d == 0) return s;
-
    RenderColor o;
-    if (!UNPREMULTIPLY(d, o)) return 0;
+    if (!BLEND_UPRE(d, o)) return s;

-    //(255 - 2 * s) * (d * d) + (2 * s * d)
-    auto c1 = MULTIPLY(255 - std::min(255, 2 * C1(s)), MULTIPLY(o.r, o.r)) + std::min(255, 2 * MULTIPLY(C1(s), o.r));
-    auto c2 = MULTIPLY(255 - std::min(255, 2 * C2(s)), MULTIPLY(o.g, o.g)) + std::min(255, 2 * MULTIPLY(C2(s), o.g));
-    auto c3 = MULTIPLY(255 - std::min(255, 2 * C3(s)), MULTIPLY(o.b, o.b)) + std::min(255, 2 * MULTIPLY(C3(s), o.b));
-    return JOIN(255, c1, c2, c3);
+    auto f = [](uint8_t s, uint8_t d) {
+        return MULTIPLY(255 - std::min(255, 2 * s), MULTIPLY(d, d)) + std::min(255, 2 * MULTIPLY(s, d));
+    };
+
+    return BLEND_PRE(JOIN(255, f(C1(s), o.r), f(C2(s), o.g), f(C3(s), o.b)), s, o.a);
 }


--- a/src/renderer/sw_engine/tvgSwRaster.cpp
+++ b/src/renderer/sw_engine/tvgSwRaster.cpp
@ -591,8 +591,7 @@ static bool _rasterBlendingRle(SwSurface* surface, const SwRle* rle, const Rende
            }
        } else {
            for (auto x = 0; x < len; ++x, ++dst) {
-                auto tmp = surface->blender(color, *dst, 255);
-                *dst = INTERPOLATE(tmp, *dst, span->coverage);
+                *dst = INTERPOLATE(surface->blender(color, *dst, 255), *dst, span->coverage);
            }
        }
    }
@ -733,15 +732,13 @@ static bool _rasterScaledBlendingRleImage(SwSurface* surface, const SwImage& ima
            for (uint32_t x = static_cast<uint32_t>(span->x); x < static_cast<uint32_t>(span->x) + span->len; ++x, ++dst) {
                SCALED_IMAGE_RANGE_X
                auto src = scaleMethod(image.buf32, image.stride, image.w, image.h, sx, sy, miny, maxy, sampleSize);
-                auto tmp = surface->blender(src, *dst, 255);
-                *dst = INTERPOLATE(tmp, *dst, A(src));
+                *dst = INTERPOLATE(surface->blender(src, *dst, 255), *dst, A(src));
            }
        } else {
            for (uint32_t x = static_cast<uint32_t>(span->x); x < static_cast<uint32_t>(span->x) + span->len; ++x, ++dst) {
                SCALED_IMAGE_RANGE_X
                auto src = scaleMethod(image.buf32, image.stride, image.w, image.h, sx, sy, miny, maxy, sampleSize);
-                auto tmp = surface->blender(src, *dst, 255);
-                *dst = INTERPOLATE(tmp, *dst, MULTIPLY(alpha, A(src)));
+                *dst = INTERPOLATE(surface->blender(src, *dst, 255), *dst, MULTIPLY(alpha, A(src)));
            }
        }
    }
@ -822,8 +819,7 @@ static bool _rasterDirectBlendingRleImage(SwSurface* surface, const SwImage& ima
            }
        } else {
            for (auto x = 0; x < len; ++x, ++dst, ++img) {
-                auto tmp = surface->blender(*img, *dst, 255);
-                *dst = INTERPOLATE(tmp, *dst, MULTIPLY(alpha, A(*img)));
+                *dst = INTERPOLATE(surface->blender(*img, *dst, 255), *dst, MULTIPLY(alpha, A(*img)));
            }
        }
    }
@ -918,8 +914,7 @@ static bool _rasterScaledBlendingImage(SwSurface* surface, const SwImage& image,
        for (auto x = bbox.min.x; x < bbox.max.x; ++x, ++dst) {
            SCALED_IMAGE_RANGE_X
            auto src = scaleMethod(image.buf32, image.stride, image.w, image.h, sx, sy, miny, maxy, sampleSize);
-            auto tmp = surface->blender(src, *dst, 255);
-            *dst = INTERPOLATE(tmp, *dst, MULTIPLY(opacity, A(src)));
+            *dst = INTERPOLATE(surface->blender(src, *dst, 255), *dst, MULTIPLY(opacity, A(src)));
        }
    }
    return true;
@ -1533,9 +1528,11 @@ uint32_t rasterUnpremultiply(uint32_t data)
 {
    auto a = A(data);
    if (a == 255 || a == 0) return data;
-    auto r = C1(data) * 255 / a;
-    auto g = C2(data) * 255 / a;
-    auto b = C3(data) * 255 / a;
+
+    uint8_t r = std::min(C1(data) * 255u / a, 255u);
+    uint8_t g = std::min(C2(data) * 255u / a, 255u);
+    uint8_t b = std::min(C3(data) * 255u / a, 255u);
+
    return JOIN(a, r, g, b);
 }

@ -1571,9 +1568,8 @@ void rasterPremultiply(RenderSurface* surface)
        auto dst = buffer;
        for (uint32_t x = 0; x < surface->w; ++x, ++dst) {
            auto c = *dst;
-            auto a = (c >> 24);
-            if (a == 255) continue;
-            *dst = (c & 0xff000000) + ((((c >> 8) & 0xff) * a) & 0xff00) + ((((c & 0x00ff00ff) * a) >> 8) & 0x00ff00ff);
+            if (A(c) == 255) continue;
+            *dst = PREMULTIPLY(c, A(c));
        }
    }
 }