sw_engine: applied OpenMP to improve post-processing performance.

- OpenMp threads will be allocated alongside the TaskSculeduler::threads()
- Performance improved by 2x in a specific animation.
- Disable the thread feature in the Android build test due to system issues with OpenMP compilation.
This commit is contained in:
Hermet Park 2024-09-26 13:40:36 +09:00
parent ab8b4ef73e
commit 2972a631bd
6 changed files with 27 additions and 6 deletions

View file

@ -34,7 +34,7 @@ jobs:
API: 21
run: |
sed -e "s|NDK|$NDK|g" -e "s|HOST_TAG|linux-x86_64|g" -e "s|API|$API|g" ./cross/android_x86_64.txt > /tmp/android_cross.txt
meson setup build -Dlog=true -Dengines=all -Dloaders=all -Dsavers=all -Dbindings=capi -Dstatic=true --cross-file /tmp/android_cross.txt
meson setup build -Dlog=true -Dengines=all -Dloaders=all -Dsavers=all -Dbindings=capi -Dstatic=true -Dthreads=false --cross-file /tmp/android_cross.txt
sudo ninja -C build install
- uses: actions/upload-artifact@v4
@ -64,7 +64,7 @@ jobs:
API: 21
run: |
sed -e "s|NDK|$NDK|g" -e "s|HOST_TAG|linux-x86_64|g" -e "s|API|$API|g" ./cross/android_aarch64.txt > /tmp/android_cross.txt
meson setup build -Dlog=true -Dengines=all -Dloaders=all -Dsavers=all -Dbindings=capi -Dstatic=true --cross-file /tmp/android_cross.txt
meson setup build -Dlog=true -Dengines=all -Dloaders=all -Dsavers=all -Dbindings=capi -Dstatic=true -Dthreads=false --cross-file /tmp/android_cross.txt
sudo ninja -C build install
- uses: actions/upload-artifact@v4

View file

@ -19,7 +19,7 @@ if cc.get_id() == 'clang-cl'
endif
if get_option('b_sanitize') == 'none'
override_options += ['cpp_eh=none','cpp_rtti=false']
compiler_flags += ['/clang:-fno-math-errno', '/clang:-Woverloaded-virtual',
compiler_flags += ['/clang:-fno-math-errno', '/clang:-Woverloaded-virtual', '/clang:-Wno-unknown-pragmas',
'/clang:-fno-stack-protector', '/clang:-fno-unwind-tables' ,
'/clang:-fno-asynchronous-unwind-tables']
endif
@ -33,7 +33,7 @@ elif (cc.get_id() != 'msvc')
if get_option('b_sanitize') == 'none'
compiler_flags += ['-fno-exceptions', '-fno-rtti', '-fno-stack-protector', '-fno-math-errno',
'-fno-unwind-tables' , '-fno-asynchronous-unwind-tables',
'-Woverloaded-virtual']
'-Woverloaded-virtual', '-Wno-unknown-pragmas']
endif
endif

View file

@ -17,7 +17,19 @@ source_file = [
'tvgSwStroke.cpp',
]
omp_dep = []
sw_compiler_args = []
if (get_option('threads'))
omp_dep = dependency('openmp', required: false)
if (omp_dep.found())
sw_compiler_args = '-DTHORVG_SW_OPENMP_SUPPORT=1'
endif
endif
engine_dep += [declare_dependency(
compile_args : sw_compiler_args,
include_directories : include_directories('.'),
sources : source_file
)]
dependencies : omp_dep,
sources : source_file
)]

View file

@ -74,6 +74,7 @@ static void _gaussianBlur(uint8_t* src, uint8_t* dst, int32_t stride, int32_t w,
auto iarr = 1.0f / (dimension + dimension + 1);
#pragma omp parallel for
for (int x = 0; x < h; x++) {
auto p = x * stride;
auto i = p * 4; //current index

View file

@ -1832,6 +1832,7 @@ void rasterXYFlip(uint32_t* src, uint32_t* dst, int32_t stride, int32_t w, int32
dst += ((bbox.min.x * stride) + bbox.min.y);
}
#pragma omp parallel for
for (int x = 0; x < w; x += BLOCK) {
auto bx = std::min(w, x + BLOCK) - x;
auto in = &src[x];

View file

@ -20,6 +20,9 @@
* SOFTWARE.
*/
#ifdef THORVG_SW_OPENMP_SUPPORT
#include <omp.h>
#endif
#include <algorithm>
#include "tvgMath.h"
#include "tvgSwCommon.h"
@ -762,6 +765,10 @@ bool SwRenderer::init(uint32_t threads)
int32_t SwRenderer::init()
{
#ifdef THORVG_SW_OPENMP_SUPPORT
omp_set_num_threads(TaskScheduler::threads());
#endif
return initEngineCnt;
}