webgpu_gaussian_optimization

wg_engine: optimize gaussian blur effect for webgpu renderer

Issue: https://github.com/thorvg/thorvg/issues/3054
This commit is contained in:
Sergii Liebodkin 2025-02-10 21:35:27 +00:00 committed by Hermet Park
parent 3035501579
commit 37992b52c9
7 changed files with 103 additions and 89 deletions

View file

@ -747,41 +747,45 @@ void WgCompositor::gaussianBlur(WgContext& context, WgRenderStorage* dst, const
assert(compose->rdViewport); assert(compose->rdViewport);
assert(!renderPassEncoder); assert(!renderPassEncoder);
auto renderDataGaussian = (WgRenderDataGaussian*)params->rd; auto renderDataGaussian = (WgRenderDataGaussian*)params->rd;
auto aabb = compose->aabb;
auto viewport = compose->rdViewport; auto viewport = compose->rdViewport;
WgRenderStorage* sbuff = dst;
WgRenderStorage* dbuff = &storageDstCopy;
// begin compute pass
WGPUComputePassDescriptor computePassDesc{ .label = "Compute pass gaussian blur" };
WGPUComputePassEncoder computePassEncoder = wgpuCommandEncoderBeginComputePass(commandEncoder, &computePassDesc);
for (uint32_t level = 0; level < renderDataGaussian->level; level++) { for (uint32_t level = 0; level < renderDataGaussian->level; level++) {
// horizontal blur // horizontal blur
if (params->direction != 2) { if (params->direction != 2) {
const WGPUImageCopyTexture texSrc { .texture = dst->texture }; wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, sbuff->bindGroupRead, 0, nullptr);
const WGPUImageCopyTexture texDst { .texture = storageDstCopy.texture }; wgpuComputePassEncoderSetBindGroup(computePassEncoder, 1, dbuff->bindGroupWrite, 0, nullptr);
const WGPUExtent3D copySize { .width = width, .height = height, .depthOrArrayLayers = 1 };
wgpuCommandEncoderCopyTextureToTexture(commandEncoder, &texSrc, &texDst, &copySize);
WGPUComputePassDescriptor computePassDesc{ .label = "Compute pass gaussian blur horizontal" };
WGPUComputePassEncoder computePassEncoder = wgpuCommandEncoderBeginComputePass(commandEncoder, &computePassDesc);
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, storageDstCopy.bindGroupRead, 0, nullptr);
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 1, dst->bindGroupWrite, 0, nullptr);
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 2, renderDataGaussian->bindGroupGaussian, 0, nullptr); wgpuComputePassEncoderSetBindGroup(computePassEncoder, 2, renderDataGaussian->bindGroupGaussian, 0, nullptr);
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 3, viewport->bindGroupViewport, 0, nullptr); wgpuComputePassEncoderSetBindGroup(computePassEncoder, 3, viewport->bindGroupViewport, 0, nullptr);
wgpuComputePassEncoderSetPipeline(computePassEncoder, pipelines.gaussian_horz); wgpuComputePassEncoderSetPipeline(computePassEncoder, pipelines.gaussian_horz);
wgpuComputePassEncoderDispatchWorkgroups(computePassEncoder, width / 16, height / 16, 1); wgpuComputePassEncoderDispatchWorkgroups(computePassEncoder, (aabb.w - 1) / 128 + 1, aabb.h, 1);
wgpuComputePassEncoderEnd(computePassEncoder); std::swap(sbuff, dbuff);
wgpuComputePassEncoderRelease(computePassEncoder);
} }
// vertical blur // vertical blur
if (params->direction != 1) { if (params->direction != 1) {
const WGPUImageCopyTexture texSrc { .texture = dst->texture }; wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, sbuff->bindGroupRead, 0, nullptr);
const WGPUImageCopyTexture texDst { .texture = storageDstCopy.texture }; wgpuComputePassEncoderSetBindGroup(computePassEncoder, 1, dbuff->bindGroupWrite, 0, nullptr);
const WGPUExtent3D copySize { .width = width, .height = height, .depthOrArrayLayers = 1 };
wgpuCommandEncoderCopyTextureToTexture(commandEncoder, &texSrc, &texDst, &copySize);
WGPUComputePassDescriptor computePassDesc{ .label = "Compute pass gaussian blur vertical" };
WGPUComputePassEncoder computePassEncoder = wgpuCommandEncoderBeginComputePass(commandEncoder, &computePassDesc);
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, storageDstCopy.bindGroupRead, 0, nullptr);
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 1, dst->bindGroupWrite, 0, nullptr);
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 2, renderDataGaussian->bindGroupGaussian, 0, nullptr); wgpuComputePassEncoderSetBindGroup(computePassEncoder, 2, renderDataGaussian->bindGroupGaussian, 0, nullptr);
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 3, viewport->bindGroupViewport, 0, nullptr); wgpuComputePassEncoderSetBindGroup(computePassEncoder, 3, viewport->bindGroupViewport, 0, nullptr);
wgpuComputePassEncoderSetPipeline(computePassEncoder, pipelines.gaussian_vert); wgpuComputePassEncoderSetPipeline(computePassEncoder, pipelines.gaussian_vert);
wgpuComputePassEncoderDispatchWorkgroups(computePassEncoder, width / 16, height / 16, 1); wgpuComputePassEncoderDispatchWorkgroups(computePassEncoder, aabb.w, (aabb.h - 1) / 128 + 1, 1);
std::swap(sbuff, dbuff);
}
}
// end compute pass
wgpuComputePassEncoderEnd(computePassEncoder); wgpuComputePassEncoderEnd(computePassEncoder);
wgpuComputePassEncoderRelease(computePassEncoder); wgpuComputePassEncoderRelease(computePassEncoder);
}
// if final result stored in intermidiate buffer we must copy result to destination buffer
if (sbuff == &storageDstCopy) {
const WGPUImageCopyTexture texSrc { .texture = sbuff->texture, .origin = { .x = (uint32_t)aabb.x, .y = (uint32_t)aabb.y } };
const WGPUImageCopyTexture texDst { .texture = dbuff->texture, .origin = { .x = (uint32_t)aabb.x, .y = (uint32_t)aabb.y } };
const WGPUExtent3D copySize { .width = (uint32_t)aabb.w, .height = (uint32_t)aabb.h, .depthOrArrayLayers = 1 };
wgpuCommandEncoderCopyTextureToTexture(commandEncoder, &texSrc, &texDst, &copySize);
} }
} }

View file

@ -221,8 +221,7 @@ void WgPipelines::initialize(WgContext& context)
// shader blit // shader blit
shader_blit = createShaderModule(context.device, "The shader blit", cShaderSrc_Blit); shader_blit = createShaderModule(context.device, "The shader blit", cShaderSrc_Blit);
// shader effects // shader effects
shader_gaussian_horz = createShaderModule(context.device, "The shader gaussian horizontal", cShaderSrc_GaussianBlur_Horz); shader_gaussian = createShaderModule(context.device, "The shader gaussian", cShaderSrc_GaussianBlur);
shader_gaussian_vert = createShaderModule(context.device, "The shader gaussian vertical", cShaderSrc_GaussianBlur_Vert);
// layouts // layouts
layout_stencil = createPipelineLayout(context.device, bindGroupLayoutsStencil, 2); layout_stencil = createPipelineLayout(context.device, bindGroupLayoutsStencil, 2);
@ -450,10 +449,10 @@ void WgPipelines::initialize(WgContext& context)
// compute pipeline gaussian blur // compute pipeline gaussian blur
gaussian_horz = createComputePipeline( gaussian_horz = createComputePipeline(
context.device, "The compute pipeline gaussian blur horizontal", context.device, "The compute pipeline gaussian blur horizontal",
shader_gaussian_horz, "cs_main", layout_gaussian); shader_gaussian, "cs_main_horz", layout_gaussian);
gaussian_vert = createComputePipeline( gaussian_vert = createComputePipeline(
context.device, "The compute pipeline gaussian blur vertical", context.device, "The compute pipeline gaussian blur vertical",
shader_gaussian_vert, "cs_main", layout_gaussian); shader_gaussian, "cs_main_vert", layout_gaussian);
} }
void WgPipelines::releaseGraphicHandles(WgContext& context) void WgPipelines::releaseGraphicHandles(WgContext& context)
@ -505,8 +504,7 @@ void WgPipelines::releaseGraphicHandles(WgContext& context)
releasePipelineLayout(layout_depth); releasePipelineLayout(layout_depth);
releasePipelineLayout(layout_stencil); releasePipelineLayout(layout_stencil);
// shaders // shaders
releaseShaderModule(shader_gaussian_horz); releaseShaderModule(shader_gaussian);
releaseShaderModule(shader_gaussian_vert);
releaseShaderModule(shader_blit); releaseShaderModule(shader_blit);
releaseShaderModule(shader_scene_compose); releaseShaderModule(shader_scene_compose);
releaseShaderModule(shader_scene_blend); releaseShaderModule(shader_scene_blend);

View file

@ -47,8 +47,7 @@ private:
// shader blit // shader blit
WGPUShaderModule shader_blit{}; WGPUShaderModule shader_blit{};
// shader effects // shader effects
WGPUShaderModule shader_gaussian_horz{}; WGPUShaderModule shader_gaussian;
WGPUShaderModule shader_gaussian_vert{};
// layouts helpers // layouts helpers
WGPUPipelineLayout layout_stencil{}; WGPUPipelineLayout layout_stencil{};

View file

@ -721,94 +721,106 @@ fn cs_main(@builtin(global_invocation_id) id: vec3u) {
} }
)"; )";
const char* cShaderSrc_GaussianBlur_Horz = R"( const char* cShaderSrc_GaussianBlur = R"(
@group(0) @binding(0) var imageSrc : texture_storage_2d<rgba8unorm, read>; @group(0) @binding(0) var imageSrc : texture_storage_2d<rgba8unorm, read>;
@group(1) @binding(0) var imageDst : texture_storage_2d<rgba8unorm, write>; @group(1) @binding(0) var imageDst : texture_storage_2d<rgba8unorm, write>;
@group(2) @binding(0) var<uniform> settings: vec4f; @group(2) @binding(0) var<uniform> settings: vec4f;
@group(3) @binding(0) var<uniform> viewport: vec4f; @group(3) @binding(0) var<uniform> viewport: vec4f;
const N: u32 = 128;
const M: u32 = N * 3;
var<workgroup> buff: array<vec4f, M>;
fn gaussian(x: f32, sigma: f32) -> f32 { fn gaussian(x: f32, sigma: f32) -> f32 {
let a = 0.39894f / sigma; let a = 0.39894f / sigma;
let b = -(x * x) / (2.0 * sigma * sigma); let b = -(x * x) / (2.0 * sigma * sigma);
return a * exp(b); return a * exp(b);
} }
@compute @workgroup_size(16, 16) @compute @workgroup_size(N, 1)
fn cs_main(@builtin(global_invocation_id) id: vec3u) { fn cs_main_horz(@builtin(global_invocation_id) gid: vec3u,
// id conversion @builtin(local_invocation_id) lid: vec3u) {
let iid = vec2i(id.xy);
// viewport decode
let xmin = i32(viewport.x);
let ymin = i32(viewport.y);
let xmax = i32(viewport.z);
let ymax = i32(viewport.w);
// settings decode // settings decode
let sigma = settings.x; let sigma = settings.x;
let scale = settings.y; let scale = settings.y;
let size = i32(settings.z); let size = i32(settings.z);
// draw borders points outside of viewport
if ((iid.x < xmin) || (iid.x > xmax) || (iid.y < ymin) || (iid.y > ymax)) { return; }
// apply filter
var weight = gaussian(0.0, sigma);
var color = weight * textureLoad(imageSrc, id.xy);
var sum = weight;
for (var i: i32 = 1; i < size; i++) {
let ii = i32(f32(i) * scale);
let idneg = vec2i(clamp(iid.x - ii, xmin, xmax), iid.y);
let idpos = vec2i(clamp(iid.x + ii, xmin, xmax), iid.y);
weight = gaussian(f32(i) * scale, sigma);
color += (weight * textureLoad(imageSrc, vec2u(idneg)));
color += (weight * textureLoad(imageSrc, vec2u(idpos)));
sum += (2.0 * weight);
}
textureStore(imageDst, id.xy, color / sum);
}
)";
const char* cShaderSrc_GaussianBlur_Vert = R"(
@group(0) @binding(0) var imageSrc : texture_storage_2d<rgba8unorm, read>;
@group(1) @binding(0) var imageDst : texture_storage_2d<rgba8unorm, write>;
@group(2) @binding(0) var<uniform> settings: vec4f;
@group(3) @binding(0) var<uniform> viewport: vec4f;
fn gaussian(x: f32, sigma: f32) -> f32 {
let a = 0.39894f / sigma;
let b = -(x * x) / (2.0 * sigma * sigma);
return a * exp(b);
}
@compute @workgroup_size(16, 16)
fn cs_main(@builtin(global_invocation_id) id: vec3u) {
// id conversion
let iid = vec2i(id.xy);
// viewport decode // viewport decode
let xmin = i32(viewport.x); let xmin = i32(viewport.x);
let ymin = i32(viewport.y); let ymin = i32(viewport.y);
let xmax = i32(viewport.z); let xmax = i32(viewport.z);
let ymax = i32(viewport.w); let ymax = i32(viewport.w);
// tex coord
let uid = vec2u(gid.x + u32(xmin), gid.y + u32(ymin));
let iid = vec2i(uid);
// load source to local workgroup memory
buff[lid.x + N*0] = textureLoad(imageSrc, uid - vec2u(N, 0));
buff[lid.x + N*1] = textureLoad(imageSrc, uid + vec2u(0, 0));
buff[lid.x + N*2] = textureLoad(imageSrc, uid + vec2u(N, 0));
workgroupBarrier();
// apply filter
var weight = gaussian(0.0, sigma);
var color = weight * buff[lid.x + N];
var sum = weight;
for (var i: i32 = 1; i < size; i++) {
let ii = i32(f32(i) * scale);
weight = gaussian(f32(i) * scale, sigma);
let poffset = min(iid.x + ii, xmax) - iid.x;
let noffset = max(iid.x - ii, xmin) - iid.x;
color += (weight * buff[i32(lid.x + N) + poffset]);
color += (weight * buff[i32(lid.x + N) + noffset]);
sum += (2.0 * weight);
}
// store result
textureStore(imageDst, uid, color / sum);
}
@compute @workgroup_size(1, N)
fn cs_main_vert(@builtin(global_invocation_id) gid: vec3u,
@builtin(local_invocation_id) lid: vec3u) {
// settings decode // settings decode
let sigma = settings.x; let sigma = settings.x;
let scale = settings.y; let scale = settings.y;
let size = i32(settings.z); let size = i32(settings.z);
// draw borders points outside of viewport // viewport decode
if ((iid.x < xmin) || (iid.x > xmax) || (iid.y < ymin) || (iid.y > ymax)) { return; } let xmin = i32(viewport.x);
let ymin = i32(viewport.y);
let xmax = i32(viewport.z);
let ymax = i32(viewport.w);
// tex coord
let uid = vec2u(gid.x + u32(xmin), gid.y + u32(ymin));
let iid = vec2i(uid);
// load source to local workgroup memory
buff[lid.y + N*0] = textureLoad(imageSrc, uid - vec2u(0, N));
buff[lid.y + N*1] = textureLoad(imageSrc, uid + vec2u(0, 0));
buff[lid.y + N*2] = textureLoad(imageSrc, uid + vec2u(0, N));
workgroupBarrier();
// apply filter // apply filter
var weight = gaussian(0.0, sigma); var weight = gaussian(0.0, sigma);
var color = weight * textureLoad(imageSrc, id.xy); var color = weight * buff[lid.y + N];
var sum = weight; var sum = weight;
for (var i: i32 = 1; i < size; i++) { for (var i: i32 = 1; i < size; i++) {
let ii = i32(f32(i) * scale); let ii = i32(f32(i) * scale);
let idneg = vec2i(iid.x, clamp(iid.y - ii, ymin, ymax));
let idpos = vec2i(iid.x, clamp(iid.y + ii, ymin, ymax));
weight = gaussian(f32(i) * scale, sigma); weight = gaussian(f32(i) * scale, sigma);
color += (weight * textureLoad(imageSrc, vec2u(idneg))); let poffset = min(iid.y + ii, ymax) - iid.y;
color += (weight * textureLoad(imageSrc, vec2u(idpos))); let noffset = max(iid.y - ii, ymin) - iid.y;
color += (weight * buff[i32(lid.y + N) + poffset]);
color += (weight * buff[i32(lid.y + N) + noffset]);
sum += (2.0 * weight); sum += (2.0 * weight);
} }
textureStore(imageDst, id.xy, color / sum);
// store result
textureStore(imageDst, uid, color / sum);
//textureStore(imageDst, uid, vec4f(1.0, 0.0, 0.0, 1.0));
} }
)"; )";

View file

@ -46,7 +46,6 @@ extern const char* cShaderSrc_Blit;
// compute shader sources: effects // compute shader sources: effects
extern const char* cShaderSrc_MergeMasks; extern const char* cShaderSrc_MergeMasks;
extern const char* cShaderSrc_GaussianBlur_Vert; extern const char* cShaderSrc_GaussianBlur;
extern const char* cShaderSrc_GaussianBlur_Horz;
#endif // _TVG_WG_SHEDER_SRC_H_ #endif // _TVG_WG_SHEDER_SRC_H_

View file

@ -205,8 +205,9 @@ void WgShaderTypeGaussianBlur::update(const RenderEffectGaussianBlur* gaussian,
assert(gaussian); assert(gaussian);
const float sigma = gaussian->sigma; const float sigma = gaussian->sigma;
const float scale = std::sqrt(transform.e11 * transform.e11 + transform.e12 * transform.e12); const float scale = std::sqrt(transform.e11 * transform.e11 + transform.e12 * transform.e12);
const float kernel = std::min(WG_GAUSSIAN_KERNEL_SIZE_MAX, 2 * sigma * scale); // kernel size
settings[0] = sigma; settings[0] = sigma;
settings[1] = scale; settings[1] = std::min(WG_GAUSSIAN_KERNEL_SIZE_MAX / kernel, scale);
settings[2] = 2 * sigma * scale; // kernel size settings[2] = kernel;
extend = settings[2] * 2; extend = settings[2] * 2;
} }

View file

@ -68,6 +68,7 @@ struct WgShaderTypeGradient
}; };
// gaussian settings: sigma, scale, extend // gaussian settings: sigma, scale, extend
#define WG_GAUSSIAN_KERNEL_SIZE_MAX (128.0f)
struct WgShaderTypeGaussianBlur struct WgShaderTypeGaussianBlur
{ {
float settings[4]{}; // [0]: sigma, [1]: scale, [2]: kernel size, [3]: unused float settings[4]{}; // [0]: sigma, [1]: scale, [2]: kernel size, [3]: unused