mirror of
https://github.com/thorvg/thorvg.git
synced 2025-06-15 12:34:30 +00:00
sw common: Fixed crash in rasterRGBA32
Description: Crash was observed in examples when composite object was used. It was caused because __m256i object was used on non aligned memory to 32bit. Algorithm in this function was changed to use unaligned __m256i_u object. Code was also simplified.
This commit is contained in:
parent
5e1d3772ca
commit
f0ab7f4002
1 changed files with 19 additions and 15 deletions
|
@ -349,22 +349,26 @@ bool rasterClear(SwSurface* surface);
|
||||||
static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
||||||
{
|
{
|
||||||
#ifdef THORVG_AVX_VECTOR_SUPPORT
|
#ifdef THORVG_AVX_VECTOR_SUPPORT
|
||||||
int32_t align = (8 - (offset % 8)) % 8;
|
//1. calculate how many iterations we need to cover length
|
||||||
//Vectorization
|
uint32_t iterations = len / 8;
|
||||||
auto avxDst = (__m256i*)(dst + offset + align);
|
uint32_t avxFilled = iterations * 8;
|
||||||
int32_t i = (len - align);
|
int32_t leftovers = 0;
|
||||||
for (;i > 7; i -= 8, ++avxDst) {
|
|
||||||
*avxDst = _mm256_set1_epi32(val);
|
//2. set beginning of the array
|
||||||
|
dst+=offset;
|
||||||
|
__m256i_u* avxDst = (__m256i_u*) dst;
|
||||||
|
|
||||||
|
//3. fill octets
|
||||||
|
for (uint32_t i = 0; i < iterations; ++i) {
|
||||||
|
*avxDst = _mm256_set1_epi32(val);
|
||||||
|
avxDst++;
|
||||||
}
|
}
|
||||||
//Alignment
|
|
||||||
if (align > 0) {
|
//4. fill leftovers (in first step we have to set pointer to place where avx job is done)
|
||||||
if (align > len) align -= (align - len);
|
leftovers = len - avxFilled;
|
||||||
auto tmp = dst + offset;
|
dst+= avxFilled;
|
||||||
for (; align > 0; --align, ++tmp) *tmp = val;
|
|
||||||
}
|
while (leftovers--) *dst++ = val;
|
||||||
//Pack Leftovers
|
|
||||||
dst += offset + (len - i);
|
|
||||||
while (i-- > 0) *(dst++) = val;
|
|
||||||
#else
|
#else
|
||||||
dst += offset;
|
dst += offset;
|
||||||
while (len--) *dst++ = val;
|
while (len--) *dst++ = val;
|
||||||
|
|
Loading…
Add table
Reference in a new issue