mirror of
https://github.com/thorvg/thorvg.git
synced 2025-06-08 21:53:41 +00:00
sw_engine: Implement AVX and NEON optimizations for RasterGrayscale
This commit is contained in:
parent
387d82a80e
commit
06b4b2c586
3 changed files with 41 additions and 2 deletions
|
@ -1755,8 +1755,13 @@ static bool _rasterRadialGradientRle(SwSurface* surface, const SwRleData* rle, c
|
||||||
|
|
||||||
void rasterGrayscale8(uint8_t *dst, uint8_t val, uint32_t offset, int32_t len)
|
void rasterGrayscale8(uint8_t *dst, uint8_t val, uint32_t offset, int32_t len)
|
||||||
{
|
{
|
||||||
//OPTIMIZE_ME: Support SIMD
|
#if defined(THORVG_AVX_VECTOR_SUPPORT)
|
||||||
|
avxRasterGrayscale8(dst, val, offset, len);
|
||||||
|
#elif defined(THORVG_NEON_VECTOR_SUPPORT)
|
||||||
|
neonRasterGrayscale8(dst, val, offset, len);
|
||||||
|
#else
|
||||||
cRasterPixels(dst, val, offset, len);
|
cRasterPixels(dst, val, offset, len);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2021 - 2024 the ThorVG project. All rights reserved.
|
* Copyright (c) 2021 - 2024 the ThorVG project. All rights reserved.
|
||||||
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
@ -62,6 +62,23 @@ static inline __m128i ALPHA_BLEND(__m128i c, __m128i a)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void avxRasterGrayscale8(uint8_t* dst, uint8_t val, uint32_t offset, int32_t len)
|
||||||
|
{
|
||||||
|
dst += offset;
|
||||||
|
|
||||||
|
__m256i vecVal = _mm256_set1_epi8(val);
|
||||||
|
|
||||||
|
int32_t i = 0;
|
||||||
|
for (; i <= len - 32; i += 32) {
|
||||||
|
_mm256_storeu_si256((__m256i*)(dst + i), vecVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < len; ++i) {
|
||||||
|
dst[i] = val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void avxRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
static void avxRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
||||||
{
|
{
|
||||||
//1. calculate how many iterations we need to cover the length
|
//1. calculate how many iterations we need to cover the length
|
||||||
|
|
|
@ -31,6 +31,23 @@ static inline uint8x8_t ALPHA_BLEND(uint8x8_t c, uint8x8_t a)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void neonRasterGrayscale8(uint8_t* dst, uint8_t val, uint32_t offset, int32_t len)
|
||||||
|
{
|
||||||
|
dst += offset;
|
||||||
|
|
||||||
|
int32_t i = 0;
|
||||||
|
uint8x16_t valVec = vdupq_n_u8(val);
|
||||||
|
|
||||||
|
for (; i <= len - 16; i += 16) {
|
||||||
|
vst1q_u8(dst + i, valVec);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < len; i++) {
|
||||||
|
dst[i] = val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void neonRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
static void neonRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
||||||
{
|
{
|
||||||
uint32_t iterations = len / 4;
|
uint32_t iterations = len / 4;
|
||||||
|
|
Loading…
Add table
Reference in a new issue