sw_engine: optimize raster.

memset() is more than 10x faster than manual loop.

Thus we replace it to manipulate buffer pixels.

Change-Id: If0f255578f7d49ff6704c4f15e2eefe435cc3c15
This commit is contained in:
Hermet Park 2020-06-23 14:41:57 +09:00
parent 4f48c856f6
commit 86300c5fc0
6 changed files with 46 additions and 44 deletions

View file

@ -1,6 +1,6 @@
project('tizenvg', project('tizenvg',
'cpp', 'cpp',
default_options : ['buildtype=debug', 'werror=false', 'cpp_std=c++14'], default_options : ['buildtype=debugoptimized', 'werror=false', 'cpp_std=c++14', 'optimization=s'],
version : '0.1.0', version : '0.1.0',
license : 'Apache-2.0') license : 'Apache-2.0')

View file

@ -19,6 +19,16 @@
#include "tvgCommon.h" #include "tvgCommon.h"
#if 1
#include <sys/time.h>
static double timeStamp()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec + tv.tv_usec / 1000000.0);
}
#endif
using namespace tvg; using namespace tvg;
#define SW_CURVE_TYPE_POINT 0 #define SW_CURVE_TYPE_POINT 0
@ -230,6 +240,12 @@ static inline uint32_t COLOR_ARGB_JOIN(uint8_t r, uint8_t g, uint8_t b, uint8_t
} }
static inline void COLOR_SET(uint32_t *dst, uint32_t val, uint32_t len)
{
while (len--) *dst++ = val;
}
int64_t mathMultiply(int64_t a, int64_t b); int64_t mathMultiply(int64_t a, int64_t b);
int64_t mathDivide(int64_t a, int64_t b); int64_t mathDivide(int64_t a, int64_t b);
int64_t mathMulDiv(int64_t a, int64_t b, int64_t c); int64_t mathMulDiv(int64_t a, int64_t b, int64_t c);
@ -273,5 +289,6 @@ void rleFree(SwRleData* rle);
bool rasterGradientShape(Surface& surface, SwShape& shape, unsigned id); bool rasterGradientShape(Surface& surface, SwShape& shape, unsigned id);
bool rasterSolidShape(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a); bool rasterSolidShape(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a);
bool rasterStroke(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a); bool rasterStroke(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_t b, uint8_t a);
bool rasterClear(Surface& surface);
#endif /* _TVG_SW_COMMON_H_ */ #endif /* _TVG_SW_COMMON_H_ */

View file

@ -181,29 +181,6 @@ static inline uint32_t _pixel(const SwFill* fill, float pos)
} }
static inline void _write(uint32_t *dst, uint32_t val, uint32_t len)
{
if (len <= 0) return;
// Cute hack to align future memcopy operation
// and do unroll the loop a bit. Not sure it is
// the most efficient, but will do for now.
auto n = (len + 7) / 8;
switch (len & 0x07) {
case 0: do { *dst++ = val;
case 7: *dst++ = val;
case 6: *dst++ = val;
case 5: *dst++ = val;
case 4: *dst++ = val;
case 3: *dst++ = val;
case 2: *dst++ = val;
case 1: *dst++ = val;
} while (--n > 0);
}
}
/************************************************************************/ /************************************************************************/
/* External Class Implementation */ /* External Class Implementation */
/************************************************************************/ /************************************************************************/
@ -244,7 +221,7 @@ void fillFetchLinear(const SwFill* fill, uint32_t* dst, uint32_t y, uint32_t x,
if (fabsf(inc) < FLT_EPSILON) { if (fabsf(inc) < FLT_EPSILON) {
auto color = _fixedPixel(fill, static_cast<uint32_t>(t * FIXPT_SIZE)); auto color = _fixedPixel(fill, static_cast<uint32_t>(t * FIXPT_SIZE));
_write(dst, color, len); COLOR_SET(dst, color, len);
return; return;
} }

View file

@ -24,20 +24,22 @@
/* Internal Class Implementation */ /* Internal Class Implementation */
/************************************************************************/ /************************************************************************/
static bool _rasterTranslucentRle(Surface& surface, SwRleData* rle, uint32_t color) static bool _rasterTranslucentRle(Surface& surface, SwRleData* rle, uint32_t color)
{ {
if (!rle) return false; if (!rle) return false;
auto span = rle->spans; auto span = rle->spans;
auto stride = surface.stride; auto stride = surface.stride;
uint32_t tmp; uint32_t src;
for (uint32_t i = 0; i < rle->size; ++i) { for (uint32_t i = 0; i < rle->size; ++i) {
auto dst = &surface.buffer[span->y * stride + span->x]; auto dst = &surface.buffer[span->y * stride + span->x];
if (span->coverage < 255) tmp = COLOR_ALPHA_BLEND(color, span->coverage); if (span->coverage < 255) src = COLOR_ALPHA_BLEND(color, span->coverage);
else tmp = color; else src = color;
auto ialpha = 255 - COLOR_ALPHA(src);
for (uint32_t i = 0; i < span->len; ++i) { for (uint32_t i = 0; i < span->len; ++i) {
dst[i] = tmp + COLOR_ALPHA_BLEND(dst[i], 255 - COLOR_ALPHA(tmp)); dst[i] = src + COLOR_ALPHA_BLEND(dst[i], ialpha);
} }
++span; ++span;
} }
@ -55,12 +57,12 @@ static bool _rasterSolidRle(Surface& surface, SwRleData* rle, uint32_t color)
for (uint32_t i = 0; i < rle->size; ++i) { for (uint32_t i = 0; i < rle->size; ++i) {
auto dst = &surface.buffer[span->y * stride + span->x]; auto dst = &surface.buffer[span->y * stride + span->x];
if (span->coverage == 255) { if (span->coverage == 255) {
for (uint32_t i = 0; i < span->len; ++i) { COLOR_SET(dst, color, span->len);
dst[i] = color;
}
} else { } else {
auto src = COLOR_ALPHA_BLEND(color, span->coverage);
auto ialpha = 255 - span->coverage;
for (uint32_t i = 0; i < span->len; ++i) { for (uint32_t i = 0; i < span->len; ++i) {
dst[i] = COLOR_ALPHA_BLEND(color, span->coverage) + COLOR_ALPHA_BLEND(dst[i], 255 - span->coverage); dst[i] = src + COLOR_ALPHA_BLEND(dst[i], ialpha);
} }
} }
++span; ++span;
@ -188,4 +190,19 @@ bool rasterStroke(Surface& surface, SwShape& shape, uint8_t r, uint8_t g, uint8_
} }
bool rasterClear(Surface& surface)
{
if (!surface.buffer || surface.stride <= 0 || surface.w <= 0 || surface.h <= 0) return false;
if (surface.w == surface.stride) {
COLOR_SET(surface.buffer, 0xff000000, surface.w * surface.h);
} else {
for (uint32_t i = 0; i < surface.h; i++) {
COLOR_SET(surface.buffer + surface.stride * i, 0xff000000, surface.w);
}
}
return true;
}
#endif /* _TVG_SW_RASTER_CPP_ */ #endif /* _TVG_SW_RASTER_CPP_ */

View file

@ -20,6 +20,7 @@
#include "tvgSwCommon.h" #include "tvgSwCommon.h"
#include "tvgSwRenderer.h" #include "tvgSwRenderer.h"
/************************************************************************/ /************************************************************************/
/* Internal Class Implementation */ /* Internal Class Implementation */
/************************************************************************/ /************************************************************************/
@ -32,17 +33,7 @@ static RenderInitializer renderInit;
bool SwRenderer::clear() bool SwRenderer::clear()
{ {
if (!surface.buffer) return false; return rasterClear(surface);
assert(surface.stride > 0 && surface.w > 0 && surface.h > 0);
//OPTIMIZE ME: SIMD!
for (uint32_t i = 0; i < surface.h; i++) {
for (uint32_t j = 0; j < surface.w; j++)
surface.buffer[surface.stride * i + j] = 0xff000000; //Solid Black
}
return true;
} }
bool SwRenderer::target(uint32_t* buffer, uint32_t stride, uint32_t w, uint32_t h) bool SwRenderer::target(uint32_t* buffer, uint32_t stride, uint32_t w, uint32_t h)

0
test/makefile Executable file → Normal file
View file