diff --git a/src/lib/sw_engine/tvgSwCommon.h b/src/lib/sw_engine/tvgSwCommon.h index 61cffcc6..6327523a 100644 --- a/src/lib/sw_engine/tvgSwCommon.h +++ b/src/lib/sw_engine/tvgSwCommon.h @@ -363,49 +363,6 @@ bool rasterImage(SwSurface* surface, SwImage* image, const Matrix* transform, co bool rasterStroke(SwSurface* surface, SwShape* shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a); bool rasterGradientStroke(SwSurface* surface, SwShape* shape, unsigned id); bool rasterClear(SwSurface* surface); - -static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len) -{ -#if defined(THORVG_AVX_VECTOR_SUPPORT) - //1. calculate how many iterations we need to cover length - uint32_t iterations = len / 8; - uint32_t avxFilled = iterations * 8; - int32_t leftovers = 0; - - //2. set beginning of the array - dst+=offset; - __m256i_u* avxDst = (__m256i_u*) dst; - - //3. fill octets - for (uint32_t i = 0; i < iterations; ++i) { - *avxDst = _mm256_set1_epi32(val); - avxDst++; - } - - //4. fill leftovers (in first step we have to set pointer to place where avx job is done) - leftovers = len - avxFilled; - dst+= avxFilled; - - while (leftovers--) *dst++ = val; -#elif defined(THORVG_NEON_VECTOR_SUPPORT) - uint32_t iterations = len / 4; - uint32_t neonFilled = iterations * 4; - int32_t leftovers = 0; - - dst+=offset; - uint32x4_t vectorVal = { val, val, val, val }; - - for (uint32_t i = 0; i < iterations; ++i) { - vst1q_u32(dst, vectorVal); - dst += 4; - } - - leftovers = len - neonFilled; - while (leftovers--) *dst++ = val; -#else - dst += offset; - while (len--) *dst++ = val; -#endif -} +void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len); #endif /* _TVG_SW_COMMON_H_ */ diff --git a/src/lib/sw_engine/tvgSwRaster.cpp b/src/lib/sw_engine/tvgSwRaster.cpp index d0205484..a506db22 100644 --- a/src/lib/sw_engine/tvgSwRaster.cpp +++ b/src/lib/sw_engine/tvgSwRaster.cpp @@ -1066,6 +1066,49 @@ static bool _rasterOpaqueRadialGradientRle(SwSurface* surface, const SwRleData* /* External Class Implementation */ /************************************************************************/ +void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len) +{ +#if defined(THORVG_AVX_VECTOR_SUPPORT) + //1. calculate how many iterations we need to cover length + uint32_t iterations = len / 8; + uint32_t avxFilled = iterations * 8; + + //2. set beginning of the array + dst += offset; + __m256i_u* avxDst = (__m256i_u*) dst; + + //3. fill octets + for (uint32_t i = 0; i < iterations; ++i) { + *avxDst = _mm256_set1_epi32(val); + avxDst++; + } + + //4. fill leftovers (in first step we have to set pointer to place where avx job is done) + int32_t leftovers = len - avxFilled; + dst += avxFilled; + + while (leftovers--) *dst++ = val; +#elif defined(THORVG_NEON_VECTOR_SUPPORT) + uint32_t iterations = len / 4; + uint32_t neonFilled = iterations * 4; + + dst += offset; + uint32x4_t vectorVal = {val, val, val, val}; + + for (uint32_t i = 0; i < iterations; ++i) { + vst1q_u32(dst, vectorVal); + dst += 4; + } + + int32_t leftovers = len - neonFilled; + while (leftovers--) *dst++ = val; +#else + dst += offset; + while (len--) *dst++ = val; +#endif +} + + bool rasterCompositor(SwSurface* surface) { if (surface->cs == SwCanvas::ABGR8888) {