diff --git a/meson.build b/meson.build index 5563fbce..239ce518 100644 --- a/meson.build +++ b/meson.build @@ -42,6 +42,10 @@ if get_option('vectors').contains('avx') == true config_h.set10('THORVG_AVX_VECTOR_SUPPORT', true) endif +if get_option('vectors').contains('neon') == true + config_h.set10('THORVG_NEON_VECTOR_SUPPORT', true) +endif + if get_option('bindings').contains('capi') == true config_h.set10('THORVG_CAPI_BINDING_SUPPORT', true) endif diff --git a/meson_options.txt b/meson_options.txt index eaf8bd1c..a449efdd 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -17,9 +17,9 @@ option('savers', description: 'Enable File Savers in thorvg') option('vectors', - type: 'array', - choices: ['', 'avx'], - value: [''], + type: 'combo', + choices: ['', 'avx', 'neon'], + value: '', description: 'Enable CPU Vectorization(SIMD) in thorvg') option('bindings', diff --git a/src/lib/sw_engine/tvgSwCommon.h b/src/lib/sw_engine/tvgSwCommon.h index 4349884a..61cffcc6 100644 --- a/src/lib/sw_engine/tvgSwCommon.h +++ b/src/lib/sw_engine/tvgSwCommon.h @@ -29,6 +29,10 @@ #include #endif +#ifdef THORVG_NEON_VECTOR_SUPPORT + #include +#endif + #if 0 #include static double timeStamp() @@ -362,7 +366,7 @@ bool rasterClear(SwSurface* surface); static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len) { -#ifdef THORVG_AVX_VECTOR_SUPPORT +#if defined(THORVG_AVX_VECTOR_SUPPORT) //1. calculate how many iterations we need to cover length uint32_t iterations = len / 8; uint32_t avxFilled = iterations * 8; @@ -382,6 +386,21 @@ static inline void rasterRGBA32(uint32_t *dst, uint32_t val, uint32_t offset, in leftovers = len - avxFilled; dst+= avxFilled; + while (leftovers--) *dst++ = val; +#elif defined(THORVG_NEON_VECTOR_SUPPORT) + uint32_t iterations = len / 4; + uint32_t neonFilled = iterations * 4; + int32_t leftovers = 0; + + dst+=offset; + uint32x4_t vectorVal = { val, val, val, val }; + + for (uint32_t i = 0; i < iterations; ++i) { + vst1q_u32(dst, vectorVal); + dst += 4; + } + + leftovers = len - neonFilled; while (leftovers--) *dst++ = val; #else dst += offset; diff --git a/src/meson.build b/src/meson.build index 3302e330..fadc6117 100644 --- a/src/meson.build +++ b/src/meson.build @@ -5,6 +5,9 @@ if (cc.get_id() != 'msvc') if get_option('vectors').contains('avx') compiler_flags += ['-mavx'] endif + if get_option('vectors').contains('neon') + compiler_flags += ['-mfpu=neon-vfpv4'] + endif if get_option('b_sanitize') == 'none' compiler_flags += ['-fno-exceptions', '-fno-rtti', '-fno-unwind-tables' , '-fno-asynchronous-unwind-tables',