sw common: Fixed crash in rasterRGBA32

Description:
Crash was observed in examples when composite object was used.
It was caused because __m256i object was used on non aligned
memory to 32bit. Algorithm in this function was changed to use
unaligned __m256i_u object. Code was also simplified.
This commit is contained in:
Michal Szczecinski 2021-02-17 15:09:28 +01:00 committed by Hermet Park
parent 5e1d3772ca
commit f0ab7f4002

View file

@ -349,22 +349,26 @@ bool rasterClear(SwSurface* surface);
static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
{
#ifdef THORVG_AVX_VECTOR_SUPPORT
int32_t align = (8 - (offset % 8)) % 8;
//Vectorization
auto avxDst = (__m256i*)(dst + offset + align);
int32_t i = (len - align);
for (;i > 7; i -= 8, ++avxDst) {
*avxDst = _mm256_set1_epi32(val);
//1. calculate how many iterations we need to cover length
uint32_t iterations = len / 8;
uint32_t avxFilled = iterations * 8;
int32_t leftovers = 0;
//2. set beginning of the array
dst+=offset;
__m256i_u* avxDst = (__m256i_u*) dst;
//3. fill octets
for (uint32_t i = 0; i < iterations; ++i) {
*avxDst = _mm256_set1_epi32(val);
avxDst++;
}
//Alignment
if (align > 0) {
if (align > len) align -= (align - len);
auto tmp = dst + offset;
for (; align > 0; --align, ++tmp) *tmp = val;
}
//Pack Leftovers
dst += offset + (len - i);
while (i-- > 0) *(dst++) = val;
//4. fill leftovers (in first step we have to set pointer to place where avx job is done)
leftovers = len - avxFilled;
dst+= avxFilled;
while (leftovers--) *dst++ = val;
#else
dst += offset;
while (len--) *dst++ = val;