mirror of
https://github.com/thorvg/thorvg.git
synced 2025-06-07 21:23:32 +00:00
sw_engine common: Added neon version of rasterRGBA32 API.
Changes: - Added 'neon' vector option in build system - Introduced neon version of rasterRGBA32 API, which improves speed of the funciton on ARM cpu's around ~35%
This commit is contained in:
parent
386888bd11
commit
1acc25b5ac
4 changed files with 30 additions and 4 deletions
|
@ -42,6 +42,10 @@ if get_option('vectors').contains('avx') == true
|
|||
config_h.set10('THORVG_AVX_VECTOR_SUPPORT', true)
|
||||
endif
|
||||
|
||||
if get_option('vectors').contains('neon') == true
|
||||
config_h.set10('THORVG_NEON_VECTOR_SUPPORT', true)
|
||||
endif
|
||||
|
||||
if get_option('bindings').contains('capi') == true
|
||||
config_h.set10('THORVG_CAPI_BINDING_SUPPORT', true)
|
||||
endif
|
||||
|
|
|
@ -17,9 +17,9 @@ option('savers',
|
|||
description: 'Enable File Savers in thorvg')
|
||||
|
||||
option('vectors',
|
||||
type: 'array',
|
||||
choices: ['', 'avx'],
|
||||
value: [''],
|
||||
type: 'combo',
|
||||
choices: ['', 'avx', 'neon'],
|
||||
value: '',
|
||||
description: 'Enable CPU Vectorization(SIMD) in thorvg')
|
||||
|
||||
option('bindings',
|
||||
|
|
|
@ -29,6 +29,10 @@
|
|||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef THORVG_NEON_VECTOR_SUPPORT
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#include <sys/time.h>
|
||||
static double timeStamp()
|
||||
|
@ -362,7 +366,7 @@ bool rasterClear(SwSurface* surface);
|
|||
|
||||
static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
||||
{
|
||||
#ifdef THORVG_AVX_VECTOR_SUPPORT
|
||||
#if defined(THORVG_AVX_VECTOR_SUPPORT)
|
||||
//1. calculate how many iterations we need to cover length
|
||||
uint32_t iterations = len / 8;
|
||||
uint32_t avxFilled = iterations * 8;
|
||||
|
@ -382,6 +386,21 @@ static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, in
|
|||
leftovers = len - avxFilled;
|
||||
dst+= avxFilled;
|
||||
|
||||
while (leftovers--) *dst++ = val;
|
||||
#elif defined(THORVG_NEON_VECTOR_SUPPORT)
|
||||
uint32_t iterations = len / 4;
|
||||
uint32_t neonFilled = iterations * 4;
|
||||
int32_t leftovers = 0;
|
||||
|
||||
dst+=offset;
|
||||
uint32x4_t vectorVal = { val, val, val, val };
|
||||
|
||||
for (uint32_t i = 0; i < iterations; ++i) {
|
||||
vst1q_u32(dst, vectorVal);
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
leftovers = len - neonFilled;
|
||||
while (leftovers--) *dst++ = val;
|
||||
#else
|
||||
dst += offset;
|
||||
|
|
|
@ -5,6 +5,9 @@ if (cc.get_id() != 'msvc')
|
|||
if get_option('vectors').contains('avx')
|
||||
compiler_flags += ['-mavx']
|
||||
endif
|
||||
if get_option('vectors').contains('neon')
|
||||
compiler_flags += ['-mfpu=neon-vfpv4']
|
||||
endif
|
||||
if get_option('b_sanitize') == 'none'
|
||||
compiler_flags += ['-fno-exceptions', '-fno-rtti',
|
||||
'-fno-unwind-tables' , '-fno-asynchronous-unwind-tables',
|
||||
|
|
Loading…
Add table
Reference in a new issue