mirror of
https://github.com/thorvg/thorvg.git
synced 2025-06-08 13:43:43 +00:00
sw_engine: Implement AVX and NEON optimizations for RasterGrayscale
This commit is contained in:
parent
387d82a80e
commit
06b4b2c586
3 changed files with 41 additions and 2 deletions
|
@ -1755,8 +1755,13 @@ static bool _rasterRadialGradientRle(SwSurface* surface, const SwRleData* rle, c
|
|||
|
||||
void rasterGrayscale8(uint8_t *dst, uint8_t val, uint32_t offset, int32_t len)
|
||||
{
|
||||
//OPTIMIZE_ME: Support SIMD
|
||||
#if defined(THORVG_AVX_VECTOR_SUPPORT)
|
||||
avxRasterGrayscale8(dst, val, offset, len);
|
||||
#elif defined(THORVG_NEON_VECTOR_SUPPORT)
|
||||
neonRasterGrayscale8(dst, val, offset, len);
|
||||
#else
|
||||
cRasterPixels(dst, val, offset, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/*
|
||||
/*
|
||||
* Copyright (c) 2021 - 2024 the ThorVG project. All rights reserved.
|
||||
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
|
@ -62,6 +62,23 @@ static inline __m128i ALPHA_BLEND(__m128i c, __m128i a)
|
|||
}
|
||||
|
||||
|
||||
static void avxRasterGrayscale8(uint8_t* dst, uint8_t val, uint32_t offset, int32_t len)
|
||||
{
|
||||
dst += offset;
|
||||
|
||||
__m256i vecVal = _mm256_set1_epi8(val);
|
||||
|
||||
int32_t i = 0;
|
||||
for (; i <= len - 32; i += 32) {
|
||||
_mm256_storeu_si256((__m256i*)(dst + i), vecVal);
|
||||
}
|
||||
|
||||
for (; i < len; ++i) {
|
||||
dst[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void avxRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
||||
{
|
||||
//1. calculate how many iterations we need to cover the length
|
||||
|
|
|
@ -31,6 +31,23 @@ static inline uint8x8_t ALPHA_BLEND(uint8x8_t c, uint8x8_t a)
|
|||
}
|
||||
|
||||
|
||||
static void neonRasterGrayscale8(uint8_t* dst, uint8_t val, uint32_t offset, int32_t len)
|
||||
{
|
||||
dst += offset;
|
||||
|
||||
int32_t i = 0;
|
||||
uint8x16_t valVec = vdupq_n_u8(val);
|
||||
|
||||
for (; i <= len - 16; i += 16) {
|
||||
vst1q_u8(dst + i, valVec);
|
||||
}
|
||||
|
||||
for (; i < len; i++) {
|
||||
dst[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void neonRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
||||
{
|
||||
uint32_t iterations = len / 4;
|
||||
|
|
Loading…
Add table
Reference in a new issue