sw_engine: code refactoring.

simplify neon code and fix the wrong range access.
This commit is contained in:
Hermet Park 2021-08-30 16:54:43 +09:00 committed by GitHub
parent 37e8011325
commit 01e1fec367
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 44 deletions

View file

@ -118,18 +118,6 @@ static uint32_t _applyBilinearInterpolation(const uint32_t *img, uint32_t w, uin
/* Rect */
/************************************************************************/
static bool _translucentRect(SwSurface* surface, const SwBBox& region, uint32_t color)
{
#if defined(THORVG_AVX_VECTOR_SUPPORT)
return avxRasterTranslucentRect(surface, region, color);
#elif defined(THORVG_NEON_VECTOR_SUPPORT)
return neonRasterTranslucentRect(surface, region, color);
#else
return cRasterTranslucentRect(surface, region, color);
#endif
}
static bool _translucentRectAlphaMask(SwSurface* surface, const SwBBox& region, uint32_t color)
{
auto buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x;
@ -184,7 +172,14 @@ static bool _rasterTranslucentRect(SwSurface* surface, const SwBBox& region, uin
return _translucentRectInvAlphaMask(surface, region, color);
}
}
return _translucentRect(surface, region, color);
#if defined(THORVG_AVX_VECTOR_SUPPORT)
return cRasterTranslucentRect(surface, region, color);
#elif defined(THORVG_NEON_VECTOR_SUPPORT)
return neonRasterTranslucentRect(surface, region, color);
#else
return cRasterTranslucentRect(surface, region, color);
#endif
}

View file

@ -53,35 +53,35 @@ static inline bool neonRasterTranslucentRle(SwSurface* surface, const SwRleData*
{
auto span = rle->spans;
uint32_t src;
uint8x8_t *vDst = NULL;
uint8x8_t *vDst = nullptr;
uint16_t align;
for (uint32_t i = 0; i < rle->size; ++i) {
auto dst = &surface->buffer[span->y * surface->stride + span->x];
uint32_t align = 0;
if ((((uint32_t) dst) & 0x7) != 0) {
vDst = (uint8x8_t*)(dst+1);
vDst = (uint8x8_t*)(dst + 1);
align = 1;
} else {
vDst = (uint8x8_t*) dst;
align = 0;
}
if (span->coverage < 255) src = ALPHA_BLEND(color, span->coverage);
else src = color;
auto ialpha = 255 - surface->blender.alpha(src);
auto ialpha = 255 - surface->blender.alpha(src);
uint8x8_t vSrc = (uint8x8_t) vdup_n_u32(src);
uint8x8_t vIalpha = vdup_n_u8((uint8_t) ialpha);
uint32_t iterations = (span->len - align) / 2;
uint32_t left = (span->len - align) % 2;
//fill not aligned byte
if (align > 0) *dst = src + ALPHA_BLEND(*dst, ialpha);
//Fill not aligned byte
if (align) *dst = src + ALPHA_BLEND(*dst, ialpha);
for (uint32_t x = 0; x < (span->len - align) / 2; ++x)
vDst[x] = vadd_u8(vSrc, ALPHA_BLEND_NEON(vDst[x], vIalpha));
for (uint32_t x = 0; x < iterations; ++x) vDst[x] = vadd_u8(vSrc, ALPHA_BLEND_NEON(vDst[x], vIalpha));
if (left) dst[span->len - 1] = src + ALPHA_BLEND(dst[span->len - 1], ialpha);
auto leftovers = (span->len - align) % 2;
if (leftovers > 0) dst[span->len - 1] = src + ALPHA_BLEND(dst[span->len - 1], ialpha);
++span;
}
@ -91,36 +91,32 @@ static inline bool neonRasterTranslucentRle(SwSurface* surface, const SwRleData*
static inline bool neonRasterTranslucentRect(SwSurface* surface, const SwBBox& region, uint32_t color)
{
uint32_t *buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x;
uint32_t h = (uint32_t)(region.max.y - region.min.y);
uint32_t w = (uint32_t)(region.max.x - region.min.x);
auto buffer = surface->buffer + (region.min.y * surface->stride) + region.min.x;
auto h = static_cast<uint32_t>(region.max.y - region.min.y);
auto w = static_cast<uint32_t>(region.max.x - region.min.x);
auto ialpha = 255 - surface->blender.alpha(color);
uint32_t ialpha = 255 - surface->blender.alpha(color);
uint8x8_t vColor = (uint8x8_t) vdup_n_u32(color);
uint8x8_t vIalpha = (uint8x8_t) vdup_n_u8((uint8_t) ialpha);
uint8x8_t *vDst = NULL;
auto vColor = static_cast<uint8x8_t>(vdup_n_u32(color));
auto vIalpha = static_cast<uint8x8_t>(vdup_n_u8((uint8_t) ialpha));
uint8x8_t* vDst = nullptr;
for (uint32_t y = 0; y < h; ++y) {
uint32_t align = 0;
uint32_t *dst = &buffer[y * surface->stride];
auto dst = &buffer[y * surface->stride];
if ((((uint32_t) dst) & 0x7) != 0) {
vDst = (uint8x8_t*)(dst+1);
align = 1;
//fill not aligned byte
*dst = color + ALPHA_BLEND(*dst, ialpha);
vDst = (uint8x8_t*) (dst + 1);
} else {
vDst = (uint8x8_t*) dst;
}
uint32_t iterations = w / 2;
uint32_t left = w % 2;
if (align) *dst = color + ALPHA_BLEND(*dst, ialpha);
for (uint32_t x = 0; x < iterations; ++x) vDst[x] = vadd_u8(vColor, ALPHA_BLEND_NEON(vDst[x], vIalpha));
for (uint32_t x = 0; x < w / 2; ++x)
vDst[x] = vadd_u8(vColor, ALPHA_BLEND_NEON(vDst[x], vIalpha));
if (left) dst[w] = color + ALPHA_BLEND(dst[w], ialpha);
auto leftovers = w % 2;
if (leftovers > 0) dst[w - 1] = color + ALPHA_BLEND(dst[w - 1], ialpha);
}
return true;
}