From 9e1ba8d2c0bd295513a18361e4c081d9802eba28 Mon Sep 17 00:00:00 2001 From: Hermet Park Date: Wed, 8 Jul 2020 16:05:58 +0900 Subject: [PATCH] sw_engine: introduce avx simd instruction avx is the cutting edge method for intel & amd cpus simd instruction. We are going to support this feature for the desktop environment (instead of sse) You can turn on this with configuration something like this: $meson . build -Dvectors=avx Current patch supports only for raster solid color Change-Id: I068ba30a1f63d480415e2762f8021fc8d6d28a39 --- meson.build | 4 ++++ meson_options.txt | 8 ++++++- src/lib/meson.build | 2 ++ src/lib/sw_engine/tvgSwCommon.h | 37 +++++++++++++++++++++++++------ src/lib/sw_engine/tvgSwFill.cpp | 6 +++-- src/lib/sw_engine/tvgSwRaster.cpp | 29 +++++++++++------------- src/loaders/meson.build | 1 + src/meson.build | 9 ++++++++ test/testPath.cpp | 2 +- 9 files changed, 71 insertions(+), 27 deletions(-) diff --git a/meson.build b/meson.build index c23bcb16..d1803a30 100644 --- a/meson.build +++ b/meson.build @@ -18,6 +18,10 @@ if get_option('loaders').contains('svg') == true config_h.set10('THORVG_SVG_LOADER_SUPPORT', true) endif +if get_option('vectors').contains('avx') == true + config_h.set10('THORVG_AVX_VECTOR_SUPPORT', true) +endif + configure_file( output: 'config.h', configuration: config_h diff --git a/meson_options.txt b/meson_options.txt index daf3923f..5a440fa0 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -6,6 +6,12 @@ option('engines', option('loaders', type: 'array', - choices: ['svg'], + choices: ['', 'svg'], value: ['svg'], description: 'Enable Vector File Loader in thorvg') + +option('vectors', + type: 'array', + choices: ['', 'avx'], + value: [''], + description: 'Enable CPU Vectorization(SIMD) in thorvg') diff --git a/src/lib/meson.build b/src/lib/meson.build index 46d6a997..ad022618 100644 --- a/src/lib/meson.build +++ b/src/lib/meson.build @@ -2,10 +2,12 @@ engine_dep = [] if get_option('engines').contains('sw') == true subdir('sw_engine') + message('Enable SW Raster Engine') endif if get_option('engines').contains('gl') == true subdir('gl_engine') + message('Enable GL Raster Engine') endif source_file = [ diff --git a/src/lib/sw_engine/tvgSwCommon.h b/src/lib/sw_engine/tvgSwCommon.h index 8d903423..5e474838 100644 --- a/src/lib/sw_engine/tvgSwCommon.h +++ b/src/lib/sw_engine/tvgSwCommon.h @@ -19,6 +19,10 @@ #include "tvgCommon.h" +#ifdef THORVG_AVX_VECTOR_SUPPORT + #include +#endif + #if 0 #include static double timeStamp() @@ -242,12 +246,6 @@ static inline uint32_t COLOR_ARGB_JOIN(uint8_t r, uint8_t g, uint8_t b, uint8_t } -static inline void COLOR_SET(uint32_t *dst, uint32_t val, uint32_t len) -{ - while (len--) *dst++ = val; -} - - int64_t mathMultiply(int64_t a, int64_t b); int64_t mathDivide(int64_t a, int64_t b); int64_t mathMulDiv(int64_t a, int64_t b, int64_t c); @@ -283,7 +281,7 @@ void strokeFree(SwStroke* stroke); bool fillGenColorTable(SwFill* fill, const Fill* fdata, const Matrix* transform, bool ctable); void fillReset(SwFill* fill); void fillFree(SwFill* fill); -void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len); +void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t offset, uint32_t len); void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len); SwRleData* rleRender(const SwOutline* outline, const SwBBox& bbox, const SwSize& clip, bool antiAlias); @@ -294,4 +292,29 @@ bool rasterSolidShape(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, ui bool rasterStroke(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a); bool rasterClear(Surface& surface); +inline void rasterARGB32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len) +{ +#ifdef THORVG_AVX_VECTOR_SUPPORT + int32_t align = (8 - (offset % 8)) % 8; + //Vectorization + auto avxDst = (__m256i*)(dst + offset + align); + int32_t i = (len - align); + for (;i > 7; i -= 8, ++avxDst) { + *avxDst = _mm256_set1_epi32(val); + } + //Alignment + if (align > 0) { + if (align > len) align -= (align - len); + auto tmp = dst + offset; + for (; align > 0; --align, ++tmp) *tmp = val; + } + //Pack Leftovers + dst += offset + (len - i); + while (i-- > 0) *(dst++) = val; +#else + dst += offset; + while (len--) *dst++ = val; +#endif +} + #endif /* _TVG_SW_COMMON_H_ */ diff --git a/src/lib/sw_engine/tvgSwFill.cpp b/src/lib/sw_engine/tvgSwFill.cpp index 5992bcaa..1f72ee93 100644 --- a/src/lib/sw_engine/tvgSwFill.cpp +++ b/src/lib/sw_engine/tvgSwFill.cpp @@ -208,7 +208,7 @@ void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, } -void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len) +void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t offset, uint32_t len) { if (fill->linear.len < FLT_EPSILON) return; @@ -220,10 +220,12 @@ void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, if (fabsf(inc) < FLT_EPSILON) { auto color = _fixedPixel(fill, static_cast(t * FIXPT_SIZE)); - COLOR_SET(dst, color, len); + rasterARGB32(dst, color, offset, len); return; } + dst += offset; + auto vMax = static_cast(INT32_MAX >> (FIXPT_BITS + 1)); auto vMin = -vMax; auto v = t + (inc * len); diff --git a/src/lib/sw_engine/tvgSwRaster.cpp b/src/lib/sw_engine/tvgSwRaster.cpp index 5a14822b..5819bb86 100644 --- a/src/lib/sw_engine/tvgSwRaster.cpp +++ b/src/lib/sw_engine/tvgSwRaster.cpp @@ -19,7 +19,6 @@ #include "tvgSwCommon.h" - /************************************************************************/ /* Internal Class Implementation */ /************************************************************************/ @@ -55,13 +54,12 @@ static bool _rasterTranslucentRect(Surface& surface, const SwBBox& region, uint3 static bool _rasterSolidRect(Surface& surface, const SwBBox& region, uint32_t color) { - auto buffer = surface.buffer + (region.min.y * surface.stride) + region.min.x; - auto h = static_cast(region.max.y - region.min.y); + auto buffer = surface.buffer + (region.min.y * surface.stride); auto w = static_cast(region.max.x - region.min.x); + auto h = static_cast(region.max.y - region.min.y); for (uint32_t y = 0; y < h; ++y) { - auto dst = &buffer[y * surface.stride]; - COLOR_SET(dst, color, w); + rasterARGB32(buffer + y * surface.stride, color, region.min.x, w); } return true; } @@ -95,10 +93,10 @@ static bool _rasterSolidRle(Surface& surface, SwRleData* rle, uint32_t color) auto span = rle->spans; for (uint32_t i = 0; i < rle->size; ++i) { - auto dst = &surface.buffer[span->y * surface.stride + span->x]; if (span->coverage == 255) { - COLOR_SET(dst, color, span->len); + rasterARGB32(surface.buffer + span->y * surface.stride, color, span->x, span->len); } else { + auto dst = &surface.buffer[span->y * surface.stride + span->x]; auto src = COLOR_ALPHA_BLEND(color, span->coverage); auto ialpha = 255 - span->coverage; for (uint32_t i = 0; i < span->len; ++i) { @@ -127,7 +125,7 @@ static bool _rasterLinearGradientRect(Surface& surface, const SwBBox& region, co for (uint32_t y = 0; y < h; ++y) { auto dst = &buffer[y * surface.stride]; - fillFetchLinear(fill, tmpBuf, region.min.y + y, region.min.x, w); + fillFetchLinear(fill, tmpBuf, region.min.y + y, region.min.x, 0, w); for (uint32_t x = 0; x < w; ++x) { dst[x] = tmpBuf[x] + COLOR_ALPHA_BLEND(dst[x], 255 - COLOR_ALPHA(tmpBuf[x])); } @@ -135,8 +133,7 @@ static bool _rasterLinearGradientRect(Surface& surface, const SwBBox& region, co //Opaque Gradient } else { for (uint32_t y = 0; y < h; ++y) { - auto dst = &buffer[y * surface.stride]; - fillFetchLinear(fill, dst, region.min.y + y, region.min.x, w); + fillFetchLinear(fill, buffer + y * surface.stride, region.min.y + y, region.min.x, 0, w); } } return true; @@ -188,7 +185,7 @@ static bool _rasterLinearGradientRle(Surface& surface, SwRleData* rle, const SwF if (fill->translucent) { for (uint32_t i = 0; i < rle->size; ++i) { auto dst = &surface.buffer[span->y * surface.stride + span->x]; - fillFetchLinear(fill, buf, span->y, span->x, span->len); + fillFetchLinear(fill, buf, span->y, span->x, 0, span->len); if (span->coverage == 255) { for (uint32_t i = 0; i < span->len; ++i) { dst[i] = buf[i] + COLOR_ALPHA_BLEND(dst[i], 255 - COLOR_ALPHA(buf[i])); @@ -204,11 +201,11 @@ static bool _rasterLinearGradientRle(Surface& surface, SwRleData* rle, const SwF //Opaque Gradient } else { for (uint32_t i = 0; i < rle->size; ++i) { - auto dst = &surface.buffer[span->y * surface.stride + span->x]; if (span->coverage == 255) { - fillFetchLinear(fill, dst, span->y, span->x, span->len); + fillFetchLinear(fill, surface.buffer + span->y * surface.stride, span->y, span->x, span->x, span->len); } else { - fillFetchLinear(fill, buf, span->y, span->x, span->len); + auto dst = &surface.buffer[span->y * surface.stride + span->x]; + fillFetchLinear(fill, buf, span->y, span->x, 0, span->len); auto ialpha = 255 - span->coverage; for (uint32_t i = 0; i < span->len; ++i) { dst[i] = COLOR_ALPHA_BLEND(buf[i], span->coverage) + COLOR_ALPHA_BLEND(dst[i], ialpha); @@ -313,10 +310,10 @@ bool rasterClear(Surface& surface) if (!surface.buffer || surface.stride <= 0 || surface.w <= 0 || surface.h <= 0) return false; if (surface.w == surface.stride) { - COLOR_SET(surface.buffer, 0xff000000, surface.w * surface.h); + rasterARGB32(surface.buffer, 0x00000000, 0, surface.w * surface.h); } else { for (uint32_t i = 0; i < surface.h; i++) { - COLOR_SET(surface.buffer + surface.stride * i, 0xff000000, surface.w); + rasterARGB32(surface.buffer + surface.stride * i, 0x00000000, 0, surface.w); } } return true; diff --git a/src/loaders/meson.build b/src/loaders/meson.build index aaf1324f..873cfa7f 100644 --- a/src/loaders/meson.build +++ b/src/loaders/meson.build @@ -2,6 +2,7 @@ subloader_dep = [] if get_option('loaders').contains('svg') == true subdir('svg_loader') + message('Enable SVG Loader') endif loader_dep = declare_dependency( diff --git a/src/meson.build b/src/meson.build index 23c4ad54..68738701 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,5 +1,14 @@ compiler_flags = ['-DTVG_BUILD'] +cc = meson.get_compiler('cpp') +if (cc.get_id() != 'msvc') + if get_option('vectors').contains('avx') + compiler_flags += ['-mavx'] + message('Enable Advanced Vector Extension') + endif +endif + + subdir('lib') subdir('loaders') subdir('examples') diff --git a/test/testPath.cpp b/test/testPath.cpp index a6656a87..bcd5ab78 100644 --- a/test/testPath.cpp +++ b/test/testPath.cpp @@ -151,4 +151,4 @@ int main(int argc, char **argv) cout << "engine is not supported" << endl; } return 0; -} \ No newline at end of file +}