mirror of
https://github.com/thorvg/thorvg.git
synced 2025-06-14 12:04:29 +00:00
sw_engine: introduce avx simd instruction
avx is the cutting edge method for intel & amd cpus simd instruction. We are going to support this feature for the desktop environment (instead of sse) You can turn on this with configuration something like this: $meson . build -Dvectors=avx Current patch supports only for raster solid color Change-Id: I068ba30a1f63d480415e2762f8021fc8d6d28a39
This commit is contained in:
parent
2deb6919c7
commit
9e1ba8d2c0
9 changed files with 71 additions and 27 deletions
|
@ -18,6 +18,10 @@ if get_option('loaders').contains('svg') == true
|
||||||
config_h.set10('THORVG_SVG_LOADER_SUPPORT', true)
|
config_h.set10('THORVG_SVG_LOADER_SUPPORT', true)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if get_option('vectors').contains('avx') == true
|
||||||
|
config_h.set10('THORVG_AVX_VECTOR_SUPPORT', true)
|
||||||
|
endif
|
||||||
|
|
||||||
configure_file(
|
configure_file(
|
||||||
output: 'config.h',
|
output: 'config.h',
|
||||||
configuration: config_h
|
configuration: config_h
|
||||||
|
|
|
@ -6,6 +6,12 @@ option('engines',
|
||||||
|
|
||||||
option('loaders',
|
option('loaders',
|
||||||
type: 'array',
|
type: 'array',
|
||||||
choices: ['svg'],
|
choices: ['', 'svg'],
|
||||||
value: ['svg'],
|
value: ['svg'],
|
||||||
description: 'Enable Vector File Loader in thorvg')
|
description: 'Enable Vector File Loader in thorvg')
|
||||||
|
|
||||||
|
option('vectors',
|
||||||
|
type: 'array',
|
||||||
|
choices: ['', 'avx'],
|
||||||
|
value: [''],
|
||||||
|
description: 'Enable CPU Vectorization(SIMD) in thorvg')
|
||||||
|
|
|
@ -2,10 +2,12 @@ engine_dep = []
|
||||||
|
|
||||||
if get_option('engines').contains('sw') == true
|
if get_option('engines').contains('sw') == true
|
||||||
subdir('sw_engine')
|
subdir('sw_engine')
|
||||||
|
message('Enable SW Raster Engine')
|
||||||
endif
|
endif
|
||||||
|
|
||||||
if get_option('engines').contains('gl') == true
|
if get_option('engines').contains('gl') == true
|
||||||
subdir('gl_engine')
|
subdir('gl_engine')
|
||||||
|
message('Enable GL Raster Engine')
|
||||||
endif
|
endif
|
||||||
|
|
||||||
source_file = [
|
source_file = [
|
||||||
|
|
|
@ -19,6 +19,10 @@
|
||||||
|
|
||||||
#include "tvgCommon.h"
|
#include "tvgCommon.h"
|
||||||
|
|
||||||
|
#ifdef THORVG_AVX_VECTOR_SUPPORT
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
static double timeStamp()
|
static double timeStamp()
|
||||||
|
@ -242,12 +246,6 @@ static inline uint32_t COLOR_ARGB_JOIN(uint8_t r, uint8_t g, uint8_t b, uint8_t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline void COLOR_SET(uint32_t *dst, uint32_t val, uint32_t len)
|
|
||||||
{
|
|
||||||
while (len--) *dst++ = val;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int64_t mathMultiply(int64_t a, int64_t b);
|
int64_t mathMultiply(int64_t a, int64_t b);
|
||||||
int64_t mathDivide(int64_t a, int64_t b);
|
int64_t mathDivide(int64_t a, int64_t b);
|
||||||
int64_t mathMulDiv(int64_t a, int64_t b, int64_t c);
|
int64_t mathMulDiv(int64_t a, int64_t b, int64_t c);
|
||||||
|
@ -283,7 +281,7 @@ void strokeFree(SwStroke* stroke);
|
||||||
bool fillGenColorTable(SwFill* fill, const Fill* fdata, const Matrix* transform, bool ctable);
|
bool fillGenColorTable(SwFill* fill, const Fill* fdata, const Matrix* transform, bool ctable);
|
||||||
void fillReset(SwFill* fill);
|
void fillReset(SwFill* fill);
|
||||||
void fillFree(SwFill* fill);
|
void fillFree(SwFill* fill);
|
||||||
void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len);
|
void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t offset, uint32_t len);
|
||||||
void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len);
|
void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len);
|
||||||
|
|
||||||
SwRleData* rleRender(const SwOutline* outline, const SwBBox& bbox, const SwSize& clip, bool antiAlias);
|
SwRleData* rleRender(const SwOutline* outline, const SwBBox& bbox, const SwSize& clip, bool antiAlias);
|
||||||
|
@ -294,4 +292,29 @@ bool rasterSolidShape(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, ui
|
||||||
bool rasterStroke(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a);
|
bool rasterStroke(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a);
|
||||||
bool rasterClear(Surface& surface);
|
bool rasterClear(Surface& surface);
|
||||||
|
|
||||||
|
inline void rasterARGB32(uint32_t *dst, uint32_t val, uint32_t offset, int32_t len)
|
||||||
|
{
|
||||||
|
#ifdef THORVG_AVX_VECTOR_SUPPORT
|
||||||
|
int32_t align = (8 - (offset % 8)) % 8;
|
||||||
|
//Vectorization
|
||||||
|
auto avxDst = (__m256i*)(dst + offset + align);
|
||||||
|
int32_t i = (len - align);
|
||||||
|
for (;i > 7; i -= 8, ++avxDst) {
|
||||||
|
*avxDst = _mm256_set1_epi32(val);
|
||||||
|
}
|
||||||
|
//Alignment
|
||||||
|
if (align > 0) {
|
||||||
|
if (align > len) align -= (align - len);
|
||||||
|
auto tmp = dst + offset;
|
||||||
|
for (; align > 0; --align, ++tmp) *tmp = val;
|
||||||
|
}
|
||||||
|
//Pack Leftovers
|
||||||
|
dst += offset + (len - i);
|
||||||
|
while (i-- > 0) *(dst++) = val;
|
||||||
|
#else
|
||||||
|
dst += offset;
|
||||||
|
while (len--) *dst++ = val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _TVG_SW_COMMON_H_ */
|
#endif /* _TVG_SW_COMMON_H_ */
|
||||||
|
|
|
@ -208,7 +208,7 @@ void fillFetchRadial(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t len)
|
void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x, uint32_t offset, uint32_t len)
|
||||||
{
|
{
|
||||||
if (fill->linear.len < FLT_EPSILON) return;
|
if (fill->linear.len < FLT_EPSILON) return;
|
||||||
|
|
||||||
|
@ -220,10 +220,12 @@ void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x,
|
||||||
|
|
||||||
if (fabsf(inc) < FLT_EPSILON) {
|
if (fabsf(inc) < FLT_EPSILON) {
|
||||||
auto color = _fixedPixel(fill, static_cast<uint32_t>(t * FIXPT_SIZE));
|
auto color = _fixedPixel(fill, static_cast<uint32_t>(t * FIXPT_SIZE));
|
||||||
COLOR_SET(dst, color, len);
|
rasterARGB32(dst, color, offset, len);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dst += offset;
|
||||||
|
|
||||||
auto vMax = static_cast<float>(INT32_MAX >> (FIXPT_BITS + 1));
|
auto vMax = static_cast<float>(INT32_MAX >> (FIXPT_BITS + 1));
|
||||||
auto vMin = -vMax;
|
auto vMin = -vMax;
|
||||||
auto v = t + (inc * len);
|
auto v = t + (inc * len);
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
|
|
||||||
#include "tvgSwCommon.h"
|
#include "tvgSwCommon.h"
|
||||||
|
|
||||||
|
|
||||||
/************************************************************************/
|
/************************************************************************/
|
||||||
/* Internal Class Implementation */
|
/* Internal Class Implementation */
|
||||||
/************************************************************************/
|
/************************************************************************/
|
||||||
|
@ -55,13 +54,12 @@ static bool _rasterTranslucentRect(Surface& surface, const SwBBox& region, uint3
|
||||||
|
|
||||||
static bool _rasterSolidRect(Surface& surface, const SwBBox& region, uint32_t color)
|
static bool _rasterSolidRect(Surface& surface, const SwBBox& region, uint32_t color)
|
||||||
{
|
{
|
||||||
auto buffer = surface.buffer + (region.min.y * surface.stride) + region.min.x;
|
auto buffer = surface.buffer + (region.min.y * surface.stride);
|
||||||
auto h = static_cast<uint32_t>(region.max.y - region.min.y);
|
|
||||||
auto w = static_cast<uint32_t>(region.max.x - region.min.x);
|
auto w = static_cast<uint32_t>(region.max.x - region.min.x);
|
||||||
|
auto h = static_cast<uint32_t>(region.max.y - region.min.y);
|
||||||
|
|
||||||
for (uint32_t y = 0; y < h; ++y) {
|
for (uint32_t y = 0; y < h; ++y) {
|
||||||
auto dst = &buffer[y * surface.stride];
|
rasterARGB32(buffer + y * surface.stride, color, region.min.x, w);
|
||||||
COLOR_SET(dst, color, w);
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -95,10 +93,10 @@ static bool _rasterSolidRle(Surface& surface, SwRleData* rle, uint32_t color)
|
||||||
auto span = rle->spans;
|
auto span = rle->spans;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < rle->size; ++i) {
|
for (uint32_t i = 0; i < rle->size; ++i) {
|
||||||
auto dst = &surface.buffer[span->y * surface.stride + span->x];
|
|
||||||
if (span->coverage == 255) {
|
if (span->coverage == 255) {
|
||||||
COLOR_SET(dst, color, span->len);
|
rasterARGB32(surface.buffer + span->y * surface.stride, color, span->x, span->len);
|
||||||
} else {
|
} else {
|
||||||
|
auto dst = &surface.buffer[span->y * surface.stride + span->x];
|
||||||
auto src = COLOR_ALPHA_BLEND(color, span->coverage);
|
auto src = COLOR_ALPHA_BLEND(color, span->coverage);
|
||||||
auto ialpha = 255 - span->coverage;
|
auto ialpha = 255 - span->coverage;
|
||||||
for (uint32_t i = 0; i < span->len; ++i) {
|
for (uint32_t i = 0; i < span->len; ++i) {
|
||||||
|
@ -127,7 +125,7 @@ static bool _rasterLinearGradientRect(Surface& surface, const SwBBox& region, co
|
||||||
|
|
||||||
for (uint32_t y = 0; y < h; ++y) {
|
for (uint32_t y = 0; y < h; ++y) {
|
||||||
auto dst = &buffer[y * surface.stride];
|
auto dst = &buffer[y * surface.stride];
|
||||||
fillFetchLinear(fill, tmpBuf, region.min.y + y, region.min.x, w);
|
fillFetchLinear(fill, tmpBuf, region.min.y + y, region.min.x, 0, w);
|
||||||
for (uint32_t x = 0; x < w; ++x) {
|
for (uint32_t x = 0; x < w; ++x) {
|
||||||
dst[x] = tmpBuf[x] + COLOR_ALPHA_BLEND(dst[x], 255 - COLOR_ALPHA(tmpBuf[x]));
|
dst[x] = tmpBuf[x] + COLOR_ALPHA_BLEND(dst[x], 255 - COLOR_ALPHA(tmpBuf[x]));
|
||||||
}
|
}
|
||||||
|
@ -135,8 +133,7 @@ static bool _rasterLinearGradientRect(Surface& surface, const SwBBox& region, co
|
||||||
//Opaque Gradient
|
//Opaque Gradient
|
||||||
} else {
|
} else {
|
||||||
for (uint32_t y = 0; y < h; ++y) {
|
for (uint32_t y = 0; y < h; ++y) {
|
||||||
auto dst = &buffer[y * surface.stride];
|
fillFetchLinear(fill, buffer + y * surface.stride, region.min.y + y, region.min.x, 0, w);
|
||||||
fillFetchLinear(fill, dst, region.min.y + y, region.min.x, w);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -188,7 +185,7 @@ static bool _rasterLinearGradientRle(Surface& surface, SwRleData* rle, const SwF
|
||||||
if (fill->translucent) {
|
if (fill->translucent) {
|
||||||
for (uint32_t i = 0; i < rle->size; ++i) {
|
for (uint32_t i = 0; i < rle->size; ++i) {
|
||||||
auto dst = &surface.buffer[span->y * surface.stride + span->x];
|
auto dst = &surface.buffer[span->y * surface.stride + span->x];
|
||||||
fillFetchLinear(fill, buf, span->y, span->x, span->len);
|
fillFetchLinear(fill, buf, span->y, span->x, 0, span->len);
|
||||||
if (span->coverage == 255) {
|
if (span->coverage == 255) {
|
||||||
for (uint32_t i = 0; i < span->len; ++i) {
|
for (uint32_t i = 0; i < span->len; ++i) {
|
||||||
dst[i] = buf[i] + COLOR_ALPHA_BLEND(dst[i], 255 - COLOR_ALPHA(buf[i]));
|
dst[i] = buf[i] + COLOR_ALPHA_BLEND(dst[i], 255 - COLOR_ALPHA(buf[i]));
|
||||||
|
@ -204,11 +201,11 @@ static bool _rasterLinearGradientRle(Surface& surface, SwRleData* rle, const SwF
|
||||||
//Opaque Gradient
|
//Opaque Gradient
|
||||||
} else {
|
} else {
|
||||||
for (uint32_t i = 0; i < rle->size; ++i) {
|
for (uint32_t i = 0; i < rle->size; ++i) {
|
||||||
auto dst = &surface.buffer[span->y * surface.stride + span->x];
|
|
||||||
if (span->coverage == 255) {
|
if (span->coverage == 255) {
|
||||||
fillFetchLinear(fill, dst, span->y, span->x, span->len);
|
fillFetchLinear(fill, surface.buffer + span->y * surface.stride, span->y, span->x, span->x, span->len);
|
||||||
} else {
|
} else {
|
||||||
fillFetchLinear(fill, buf, span->y, span->x, span->len);
|
auto dst = &surface.buffer[span->y * surface.stride + span->x];
|
||||||
|
fillFetchLinear(fill, buf, span->y, span->x, 0, span->len);
|
||||||
auto ialpha = 255 - span->coverage;
|
auto ialpha = 255 - span->coverage;
|
||||||
for (uint32_t i = 0; i < span->len; ++i) {
|
for (uint32_t i = 0; i < span->len; ++i) {
|
||||||
dst[i] = COLOR_ALPHA_BLEND(buf[i], span->coverage) + COLOR_ALPHA_BLEND(dst[i], ialpha);
|
dst[i] = COLOR_ALPHA_BLEND(buf[i], span->coverage) + COLOR_ALPHA_BLEND(dst[i], ialpha);
|
||||||
|
@ -313,10 +310,10 @@ bool rasterClear(Surface& surface)
|
||||||
if (!surface.buffer || surface.stride <= 0 || surface.w <= 0 || surface.h <= 0) return false;
|
if (!surface.buffer || surface.stride <= 0 || surface.w <= 0 || surface.h <= 0) return false;
|
||||||
|
|
||||||
if (surface.w == surface.stride) {
|
if (surface.w == surface.stride) {
|
||||||
COLOR_SET(surface.buffer, 0xff000000, surface.w * surface.h);
|
rasterARGB32(surface.buffer, 0x00000000, 0, surface.w * surface.h);
|
||||||
} else {
|
} else {
|
||||||
for (uint32_t i = 0; i < surface.h; i++) {
|
for (uint32_t i = 0; i < surface.h; i++) {
|
||||||
COLOR_SET(surface.buffer + surface.stride * i, 0xff000000, surface.w);
|
rasterARGB32(surface.buffer + surface.stride * i, 0x00000000, 0, surface.w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -2,6 +2,7 @@ subloader_dep = []
|
||||||
|
|
||||||
if get_option('loaders').contains('svg') == true
|
if get_option('loaders').contains('svg') == true
|
||||||
subdir('svg_loader')
|
subdir('svg_loader')
|
||||||
|
message('Enable SVG Loader')
|
||||||
endif
|
endif
|
||||||
|
|
||||||
loader_dep = declare_dependency(
|
loader_dep = declare_dependency(
|
||||||
|
|
|
@ -1,5 +1,14 @@
|
||||||
compiler_flags = ['-DTVG_BUILD']
|
compiler_flags = ['-DTVG_BUILD']
|
||||||
|
|
||||||
|
cc = meson.get_compiler('cpp')
|
||||||
|
if (cc.get_id() != 'msvc')
|
||||||
|
if get_option('vectors').contains('avx')
|
||||||
|
compiler_flags += ['-mavx']
|
||||||
|
message('Enable Advanced Vector Extension')
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
subdir('lib')
|
subdir('lib')
|
||||||
subdir('loaders')
|
subdir('loaders')
|
||||||
subdir('examples')
|
subdir('examples')
|
||||||
|
|
|
@ -151,4 +151,4 @@ int main(int argc, char **argv)
|
||||||
cout << "engine is not supported" << endl;
|
cout << "engine is not supported" << endl;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue