From 06b4b2c5864c97624143183b18ff94fcb51bfd8f Mon Sep 17 00:00:00 2001 From: rinechran Date: Sun, 7 Apr 2024 14:43:23 +0900 Subject: [PATCH] sw_engine: Implement AVX and NEON optimizations for RasterGrayscale --- src/renderer/sw_engine/tvgSwRaster.cpp | 7 ++++++- src/renderer/sw_engine/tvgSwRasterAvx.h | 19 ++++++++++++++++++- src/renderer/sw_engine/tvgSwRasterNeon.h | 17 +++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/renderer/sw_engine/tvgSwRaster.cpp b/src/renderer/sw_engine/tvgSwRaster.cpp index 98374d3f..4e0020ee 100644 --- a/src/renderer/sw_engine/tvgSwRaster.cpp +++ b/src/renderer/sw_engine/tvgSwRaster.cpp @@ -1755,8 +1755,13 @@ static bool _rasterRadialGradientRle(SwSurface* surface, const SwRleData* rle, c void rasterGrayscale8(uint8_t *dst, uint8_t val, uint32_t offset, int32_t len) { - //OPTIMIZE_ME: Support SIMD +#if defined(THORVG_AVX_VECTOR_SUPPORT) + avxRasterGrayscale8(dst, val, offset, len); +#elif defined(THORVG_NEON_VECTOR_SUPPORT) + neonRasterGrayscale8(dst, val, offset, len); +#else cRasterPixels(dst, val, offset, len); +#endif } diff --git a/src/renderer/sw_engine/tvgSwRasterAvx.h b/src/renderer/sw_engine/tvgSwRasterAvx.h index 177c7b84..cbaec28f 100644 --- a/src/renderer/sw_engine/tvgSwRasterAvx.h +++ b/src/renderer/sw_engine/tvgSwRasterAvx.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 2021 - 2024 the ThorVG project. All rights reserved. * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -62,6 +62,23 @@ static inline __m128i ALPHA_BLEND(__m128i c, __m128i a) } +static void avxRasterGrayscale8(uint8_t* dst, uint8_t val, uint32_t offset, int32_t len) +{ + dst += offset; + + __m256i vecVal = _mm256_set1_epi8(val); + + int32_t i = 0; + for (; i <= len - 32; i += 32) { + _mm256_storeu_si256((__m256i*)(dst + i), vecVal); + } + + for (; i < len; ++i) { + dst[i] = val; + } +} + + static void avxRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len) { //1. calculate how many iterations we need to cover the length diff --git a/src/renderer/sw_engine/tvgSwRasterNeon.h b/src/renderer/sw_engine/tvgSwRasterNeon.h index 2bb52837..30d8813c 100644 --- a/src/renderer/sw_engine/tvgSwRasterNeon.h +++ b/src/renderer/sw_engine/tvgSwRasterNeon.h @@ -31,6 +31,23 @@ static inline uint8x8_t ALPHA_BLEND(uint8x8_t c, uint8x8_t a) } +static void neonRasterGrayscale8(uint8_t* dst, uint8_t val, uint32_t offset, int32_t len) +{ + dst += offset; + + int32_t i = 0; + uint8x16_t valVec = vdupq_n_u8(val); + + for (; i <= len - 16; i += 16) { + vst1q_u8(dst + i, valVec); + } + + for (; i < len; i++) { + dst[i] = val; + } +} + + static void neonRasterPixel32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len) { uint32_t iterations = len / 4;