sw_engine: code refactoring.

simplify neon code and fix the wrong range access.
2025-06-12 15:34:22 +00:00 · 2021-08-30 16:54:43 +09:00 · 2021-08-30 16:54:43 +09:00 · 01e1fec367
commit 01e1fec367
parent 37e8011325
2 changed files with 35 additions and 44 deletions
--- a/src/lib/sw_engine/tvgSwRaster.cpp
+++ b/src/lib/sw_engine/tvgSwRaster.cpp
@ -118,18 +118,6 @@ static uint32_t _applyBilinearInterpolation(const uint32_t *img, uint32_t w, uin
 /* Rect                                                                 */
 /************************************************************************/
 static bool _translucentRect(SwSurface* surface, const SwBBox& region, uint32_t color)
 {
    #if defined(THORVG_AVX_VECTOR_SUPPORT)
        return avxRasterTranslucentRect(surface, region, color);
    #elif defined(THORVG_NEON_VECTOR_SUPPORT)
        return neonRasterTranslucentRect(surface, region, color);
    #else
        return cRasterTranslucentRect(surface, region, color);
    #endif
 }
 static bool _translucentRectAlphaMask(SwSurface* surface, const SwBBox& region, uint32_t color)
 {
    auto buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x;
@ -184,7 +172,14 @@ static bool _rasterTranslucentRect(SwSurface* surface, const SwBBox& region, uin
            return _translucentRectInvAlphaMask(surface, region, color);
        }
    }
-    return _translucentRect(surface, region, color);
+
 #if defined(THORVG_AVX_VECTOR_SUPPORT)
    return cRasterTranslucentRect(surface, region, color);
 #elif defined(THORVG_NEON_VECTOR_SUPPORT)
    return neonRasterTranslucentRect(surface, region, color);
 #else
    return cRasterTranslucentRect(surface, region, color);
 #endif
 }
--- a/src/lib/sw_engine/tvgSwRasterNeon.h
+++ b/src/lib/sw_engine/tvgSwRasterNeon.h
@ -53,35 +53,35 @@ static inline bool neonRasterTranslucentRle(SwSurface* surface, const SwRleData*
 {
    auto span = rle->spans;
    uint32_t src;
-    uint8x8_t *vDst = NULL;
+    uint8x8_t *vDst = nullptr;
    uint16_t align;
    for (uint32_t i = 0; i < rle->size; ++i) {
        auto dst = &surface->buffer[span->y * surface->stride + span->x];
        uint32_t align = 0;
        if ((((uint32_t) dst) & 0x7) != 0) {
-            vDst = (uint8x8_t*)(dst+1);
+            vDst = (uint8x8_t*)(dst + 1);
            align = 1;
        } else {
            vDst = (uint8x8_t*) dst;
            align = 0;
        }
        if (span->coverage < 255) src = ALPHA_BLEND(color, span->coverage);
        else src = color;
        auto ialpha = 255 - surface->blender.alpha(src);
        auto ialpha = 255 - surface->blender.alpha(src);
        uint8x8_t vSrc = (uint8x8_t) vdup_n_u32(src);
        uint8x8_t vIalpha = vdup_n_u8((uint8_t) ialpha);
-        uint32_t iterations = (span->len - align) / 2;
+        //fill not aligned byte
-        uint32_t left = (span->len - align) % 2;
+        if (align > 0) *dst = src + ALPHA_BLEND(*dst, ialpha);
-        //Fill not aligned byte
+        for (uint32_t x = 0; x < (span->len - align) / 2; ++x)
-        if (align) *dst = src + ALPHA_BLEND(*dst, ialpha);
+            vDst[x] = vadd_u8(vSrc, ALPHA_BLEND_NEON(vDst[x], vIalpha));
-        for (uint32_t x = 0; x < iterations; ++x) vDst[x] = vadd_u8(vSrc, ALPHA_BLEND_NEON(vDst[x], vIalpha));
+        auto leftovers = (span->len - align) % 2;
-
+        if (leftovers > 0) dst[span->len - 1] = src + ALPHA_BLEND(dst[span->len - 1], ialpha);
        if (left) dst[span->len - 1] = src + ALPHA_BLEND(dst[span->len - 1], ialpha);
        ++span;
    }
@ -91,36 +91,32 @@ static inline bool neonRasterTranslucentRle(SwSurface* surface, const SwRleData*
 static inline bool neonRasterTranslucentRect(SwSurface* surface, const SwBBox& region, uint32_t color)
 {
-    uint32_t *buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x;
+    auto buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x;
-    uint32_t h = (uint32_t)(region.max.y - region.min.y);
+    auto h = static_cast<uint32_t>(region.max.y - region.min.y);
-    uint32_t w = (uint32_t)(region.max.x - region.min.x);
+    auto w = static_cast<uint32_t>(region.max.x - region.min.x);
    auto ialpha = 255 - surface->blender.alpha(color);
-    uint32_t ialpha = 255 - surface->blender.alpha(color);
+    auto vColor = static_cast<uint8x8_t>(vdup_n_u32(color));
-
+    auto vIalpha = static_cast<uint8x8_t>(vdup_n_u8((uint8_t) ialpha));
-    uint8x8_t vColor = (uint8x8_t) vdup_n_u32(color);
+    uint8x8_t* vDst = nullptr;
    uint8x8_t vIalpha = (uint8x8_t) vdup_n_u8((uint8_t) ialpha);
    uint8x8_t *vDst = NULL;
    for (uint32_t y = 0; y < h; ++y) {
-        uint32_t align = 0;
+
-        uint32_t *dst = &buffer[y * surface->stride];
+        auto dst = &buffer[y * surface->stride];
        if ((((uint32_t) dst) & 0x7) != 0) {
-            vDst = (uint8x8_t*)(dst+1);
+            //fill not aligned byte
-            align = 1;
+            *dst = color + ALPHA_BLEND(*dst, ialpha);
            vDst = (uint8x8_t*) (dst + 1);
        } else {
            vDst = (uint8x8_t*) dst;
        }
-        uint32_t iterations = w / 2;
+        for (uint32_t x = 0; x <  w / 2; ++x)
-        uint32_t left = w % 2;
+            vDst[x] = vadd_u8(vColor, ALPHA_BLEND_NEON(vDst[x], vIalpha));
        if (align) *dst = color + ALPHA_BLEND(*dst, ialpha);
        for (uint32_t x = 0; x < iterations; ++x) vDst[x] = vadd_u8(vColor, ALPHA_BLEND_NEON(vDst[x], vIalpha));
-        if (left) dst[w] = color + ALPHA_BLEND(dst[w], ialpha);
+        auto leftovers = w % 2;
        if (leftovers > 0) dst[w - 1] = color + ALPHA_BLEND(dst[w - 1], ialpha);
    }
    return true;
 }