mirror of
https://github.com/thorvg/thorvg.git
synced 2025-06-07 21:23:32 +00:00
webgpu_gaussian_optimization
wg_engine: optimize gaussian blur effect for webgpu renderer Issue: https://github.com/thorvg/thorvg/issues/3054
This commit is contained in:
parent
3035501579
commit
37992b52c9
7 changed files with 103 additions and 89 deletions
|
@ -747,41 +747,45 @@ void WgCompositor::gaussianBlur(WgContext& context, WgRenderStorage* dst, const
|
|||
assert(compose->rdViewport);
|
||||
assert(!renderPassEncoder);
|
||||
auto renderDataGaussian = (WgRenderDataGaussian*)params->rd;
|
||||
auto aabb = compose->aabb;
|
||||
auto viewport = compose->rdViewport;
|
||||
WgRenderStorage* sbuff = dst;
|
||||
WgRenderStorage* dbuff = &storageDstCopy;
|
||||
|
||||
// begin compute pass
|
||||
WGPUComputePassDescriptor computePassDesc{ .label = "Compute pass gaussian blur" };
|
||||
WGPUComputePassEncoder computePassEncoder = wgpuCommandEncoderBeginComputePass(commandEncoder, &computePassDesc);
|
||||
for (uint32_t level = 0; level < renderDataGaussian->level; level++) {
|
||||
// horizontal blur
|
||||
if (params->direction != 2) {
|
||||
const WGPUImageCopyTexture texSrc { .texture = dst->texture };
|
||||
const WGPUImageCopyTexture texDst { .texture = storageDstCopy.texture };
|
||||
const WGPUExtent3D copySize { .width = width, .height = height, .depthOrArrayLayers = 1 };
|
||||
wgpuCommandEncoderCopyTextureToTexture(commandEncoder, &texSrc, &texDst, ©Size);
|
||||
WGPUComputePassDescriptor computePassDesc{ .label = "Compute pass gaussian blur horizontal" };
|
||||
WGPUComputePassEncoder computePassEncoder = wgpuCommandEncoderBeginComputePass(commandEncoder, &computePassDesc);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, storageDstCopy.bindGroupRead, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 1, dst->bindGroupWrite, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, sbuff->bindGroupRead, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 1, dbuff->bindGroupWrite, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 2, renderDataGaussian->bindGroupGaussian, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 3, viewport->bindGroupViewport, 0, nullptr);
|
||||
wgpuComputePassEncoderSetPipeline(computePassEncoder, pipelines.gaussian_horz);
|
||||
wgpuComputePassEncoderDispatchWorkgroups(computePassEncoder, width / 16, height / 16, 1);
|
||||
wgpuComputePassEncoderEnd(computePassEncoder);
|
||||
wgpuComputePassEncoderRelease(computePassEncoder);
|
||||
wgpuComputePassEncoderDispatchWorkgroups(computePassEncoder, (aabb.w - 1) / 128 + 1, aabb.h, 1);
|
||||
std::swap(sbuff, dbuff);
|
||||
}
|
||||
// vertical blur
|
||||
if (params->direction != 1) {
|
||||
const WGPUImageCopyTexture texSrc { .texture = dst->texture };
|
||||
const WGPUImageCopyTexture texDst { .texture = storageDstCopy.texture };
|
||||
const WGPUExtent3D copySize { .width = width, .height = height, .depthOrArrayLayers = 1 };
|
||||
wgpuCommandEncoderCopyTextureToTexture(commandEncoder, &texSrc, &texDst, ©Size);
|
||||
WGPUComputePassDescriptor computePassDesc{ .label = "Compute pass gaussian blur vertical" };
|
||||
WGPUComputePassEncoder computePassEncoder = wgpuCommandEncoderBeginComputePass(commandEncoder, &computePassDesc);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, storageDstCopy.bindGroupRead, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 1, dst->bindGroupWrite, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 0, sbuff->bindGroupRead, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 1, dbuff->bindGroupWrite, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 2, renderDataGaussian->bindGroupGaussian, 0, nullptr);
|
||||
wgpuComputePassEncoderSetBindGroup(computePassEncoder, 3, viewport->bindGroupViewport, 0, nullptr);
|
||||
wgpuComputePassEncoderSetPipeline(computePassEncoder, pipelines.gaussian_vert);
|
||||
wgpuComputePassEncoderDispatchWorkgroups(computePassEncoder, width / 16, height / 16, 1);
|
||||
wgpuComputePassEncoderEnd(computePassEncoder);
|
||||
wgpuComputePassEncoderRelease(computePassEncoder);
|
||||
wgpuComputePassEncoderDispatchWorkgroups(computePassEncoder, aabb.w, (aabb.h - 1) / 128 + 1, 1);
|
||||
std::swap(sbuff, dbuff);
|
||||
}
|
||||
}
|
||||
// end compute pass
|
||||
wgpuComputePassEncoderEnd(computePassEncoder);
|
||||
wgpuComputePassEncoderRelease(computePassEncoder);
|
||||
|
||||
// if final result stored in intermidiate buffer we must copy result to destination buffer
|
||||
if (sbuff == &storageDstCopy) {
|
||||
const WGPUImageCopyTexture texSrc { .texture = sbuff->texture, .origin = { .x = (uint32_t)aabb.x, .y = (uint32_t)aabb.y } };
|
||||
const WGPUImageCopyTexture texDst { .texture = dbuff->texture, .origin = { .x = (uint32_t)aabb.x, .y = (uint32_t)aabb.y } };
|
||||
const WGPUExtent3D copySize { .width = (uint32_t)aabb.w, .height = (uint32_t)aabb.h, .depthOrArrayLayers = 1 };
|
||||
wgpuCommandEncoderCopyTextureToTexture(commandEncoder, &texSrc, &texDst, ©Size);
|
||||
}
|
||||
}
|
|
@ -221,8 +221,7 @@ void WgPipelines::initialize(WgContext& context)
|
|||
// shader blit
|
||||
shader_blit = createShaderModule(context.device, "The shader blit", cShaderSrc_Blit);
|
||||
// shader effects
|
||||
shader_gaussian_horz = createShaderModule(context.device, "The shader gaussian horizontal", cShaderSrc_GaussianBlur_Horz);
|
||||
shader_gaussian_vert = createShaderModule(context.device, "The shader gaussian vertical", cShaderSrc_GaussianBlur_Vert);
|
||||
shader_gaussian = createShaderModule(context.device, "The shader gaussian", cShaderSrc_GaussianBlur);
|
||||
|
||||
// layouts
|
||||
layout_stencil = createPipelineLayout(context.device, bindGroupLayoutsStencil, 2);
|
||||
|
@ -450,10 +449,10 @@ void WgPipelines::initialize(WgContext& context)
|
|||
// compute pipeline gaussian blur
|
||||
gaussian_horz = createComputePipeline(
|
||||
context.device, "The compute pipeline gaussian blur horizontal",
|
||||
shader_gaussian_horz, "cs_main", layout_gaussian);
|
||||
shader_gaussian, "cs_main_horz", layout_gaussian);
|
||||
gaussian_vert = createComputePipeline(
|
||||
context.device, "The compute pipeline gaussian blur vertical",
|
||||
shader_gaussian_vert, "cs_main", layout_gaussian);
|
||||
shader_gaussian, "cs_main_vert", layout_gaussian);
|
||||
}
|
||||
|
||||
void WgPipelines::releaseGraphicHandles(WgContext& context)
|
||||
|
@ -505,8 +504,7 @@ void WgPipelines::releaseGraphicHandles(WgContext& context)
|
|||
releasePipelineLayout(layout_depth);
|
||||
releasePipelineLayout(layout_stencil);
|
||||
// shaders
|
||||
releaseShaderModule(shader_gaussian_horz);
|
||||
releaseShaderModule(shader_gaussian_vert);
|
||||
releaseShaderModule(shader_gaussian);
|
||||
releaseShaderModule(shader_blit);
|
||||
releaseShaderModule(shader_scene_compose);
|
||||
releaseShaderModule(shader_scene_blend);
|
||||
|
|
|
@ -47,8 +47,7 @@ private:
|
|||
// shader blit
|
||||
WGPUShaderModule shader_blit{};
|
||||
// shader effects
|
||||
WGPUShaderModule shader_gaussian_horz{};
|
||||
WGPUShaderModule shader_gaussian_vert{};
|
||||
WGPUShaderModule shader_gaussian;
|
||||
|
||||
// layouts helpers
|
||||
WGPUPipelineLayout layout_stencil{};
|
||||
|
|
|
@ -721,94 +721,106 @@ fn cs_main(@builtin(global_invocation_id) id: vec3u) {
|
|||
}
|
||||
)";
|
||||
|
||||
const char* cShaderSrc_GaussianBlur_Horz = R"(
|
||||
const char* cShaderSrc_GaussianBlur = R"(
|
||||
@group(0) @binding(0) var imageSrc : texture_storage_2d<rgba8unorm, read>;
|
||||
@group(1) @binding(0) var imageDst : texture_storage_2d<rgba8unorm, write>;
|
||||
@group(2) @binding(0) var<uniform> settings: vec4f;
|
||||
@group(3) @binding(0) var<uniform> viewport: vec4f;
|
||||
|
||||
const N: u32 = 128;
|
||||
const M: u32 = N * 3;
|
||||
var<workgroup> buff: array<vec4f, M>;
|
||||
|
||||
fn gaussian(x: f32, sigma: f32) -> f32 {
|
||||
let a = 0.39894f / sigma;
|
||||
let b = -(x * x) / (2.0 * sigma * sigma);
|
||||
return a * exp(b);
|
||||
}
|
||||
|
||||
@compute @workgroup_size(16, 16)
|
||||
fn cs_main(@builtin(global_invocation_id) id: vec3u) {
|
||||
// id conversion
|
||||
let iid = vec2i(id.xy);
|
||||
// viewport decode
|
||||
let xmin = i32(viewport.x);
|
||||
let ymin = i32(viewport.y);
|
||||
let xmax = i32(viewport.z);
|
||||
let ymax = i32(viewport.w);
|
||||
@compute @workgroup_size(N, 1)
|
||||
fn cs_main_horz(@builtin(global_invocation_id) gid: vec3u,
|
||||
@builtin(local_invocation_id) lid: vec3u) {
|
||||
// settings decode
|
||||
let sigma = settings.x;
|
||||
let scale = settings.y;
|
||||
let size = i32(settings.z);
|
||||
|
||||
// draw borders points outside of viewport
|
||||
if ((iid.x < xmin) || (iid.x > xmax) || (iid.y < ymin) || (iid.y > ymax)) { return; }
|
||||
|
||||
// apply filter
|
||||
var weight = gaussian(0.0, sigma);
|
||||
var color = weight * textureLoad(imageSrc, id.xy);
|
||||
var sum = weight;
|
||||
for (var i: i32 = 1; i < size; i++) {
|
||||
let ii = i32(f32(i) * scale);
|
||||
let idneg = vec2i(clamp(iid.x - ii, xmin, xmax), iid.y);
|
||||
let idpos = vec2i(clamp(iid.x + ii, xmin, xmax), iid.y);
|
||||
weight = gaussian(f32(i) * scale, sigma);
|
||||
color += (weight * textureLoad(imageSrc, vec2u(idneg)));
|
||||
color += (weight * textureLoad(imageSrc, vec2u(idpos)));
|
||||
sum += (2.0 * weight);
|
||||
}
|
||||
textureStore(imageDst, id.xy, color / sum);
|
||||
}
|
||||
)";
|
||||
|
||||
const char* cShaderSrc_GaussianBlur_Vert = R"(
|
||||
@group(0) @binding(0) var imageSrc : texture_storage_2d<rgba8unorm, read>;
|
||||
@group(1) @binding(0) var imageDst : texture_storage_2d<rgba8unorm, write>;
|
||||
@group(2) @binding(0) var<uniform> settings: vec4f;
|
||||
@group(3) @binding(0) var<uniform> viewport: vec4f;
|
||||
|
||||
fn gaussian(x: f32, sigma: f32) -> f32 {
|
||||
let a = 0.39894f / sigma;
|
||||
let b = -(x * x) / (2.0 * sigma * sigma);
|
||||
return a * exp(b);
|
||||
}
|
||||
|
||||
@compute @workgroup_size(16, 16)
|
||||
fn cs_main(@builtin(global_invocation_id) id: vec3u) {
|
||||
// id conversion
|
||||
let iid = vec2i(id.xy);
|
||||
// viewport decode
|
||||
let xmin = i32(viewport.x);
|
||||
let ymin = i32(viewport.y);
|
||||
let xmax = i32(viewport.z);
|
||||
let ymax = i32(viewport.w);
|
||||
|
||||
// tex coord
|
||||
let uid = vec2u(gid.x + u32(xmin), gid.y + u32(ymin));
|
||||
let iid = vec2i(uid);
|
||||
|
||||
// load source to local workgroup memory
|
||||
buff[lid.x + N*0] = textureLoad(imageSrc, uid - vec2u(N, 0));
|
||||
buff[lid.x + N*1] = textureLoad(imageSrc, uid + vec2u(0, 0));
|
||||
buff[lid.x + N*2] = textureLoad(imageSrc, uid + vec2u(N, 0));
|
||||
workgroupBarrier();
|
||||
|
||||
// apply filter
|
||||
var weight = gaussian(0.0, sigma);
|
||||
var color = weight * buff[lid.x + N];
|
||||
var sum = weight;
|
||||
|
||||
for (var i: i32 = 1; i < size; i++) {
|
||||
let ii = i32(f32(i) * scale);
|
||||
weight = gaussian(f32(i) * scale, sigma);
|
||||
let poffset = min(iid.x + ii, xmax) - iid.x;
|
||||
let noffset = max(iid.x - ii, xmin) - iid.x;
|
||||
color += (weight * buff[i32(lid.x + N) + poffset]);
|
||||
color += (weight * buff[i32(lid.x + N) + noffset]);
|
||||
sum += (2.0 * weight);
|
||||
}
|
||||
|
||||
// store result
|
||||
textureStore(imageDst, uid, color / sum);
|
||||
}
|
||||
|
||||
@compute @workgroup_size(1, N)
|
||||
fn cs_main_vert(@builtin(global_invocation_id) gid: vec3u,
|
||||
@builtin(local_invocation_id) lid: vec3u) {
|
||||
// settings decode
|
||||
let sigma = settings.x;
|
||||
let scale = settings.y;
|
||||
let size = i32(settings.z);
|
||||
|
||||
// draw borders points outside of viewport
|
||||
if ((iid.x < xmin) || (iid.x > xmax) || (iid.y < ymin) || (iid.y > ymax)) { return; }
|
||||
// viewport decode
|
||||
let xmin = i32(viewport.x);
|
||||
let ymin = i32(viewport.y);
|
||||
let xmax = i32(viewport.z);
|
||||
let ymax = i32(viewport.w);
|
||||
|
||||
// tex coord
|
||||
let uid = vec2u(gid.x + u32(xmin), gid.y + u32(ymin));
|
||||
let iid = vec2i(uid);
|
||||
|
||||
// load source to local workgroup memory
|
||||
buff[lid.y + N*0] = textureLoad(imageSrc, uid - vec2u(0, N));
|
||||
buff[lid.y + N*1] = textureLoad(imageSrc, uid + vec2u(0, 0));
|
||||
buff[lid.y + N*2] = textureLoad(imageSrc, uid + vec2u(0, N));
|
||||
workgroupBarrier();
|
||||
|
||||
// apply filter
|
||||
var weight = gaussian(0.0, sigma);
|
||||
var color = weight * textureLoad(imageSrc, id.xy);
|
||||
var color = weight * buff[lid.y + N];
|
||||
var sum = weight;
|
||||
|
||||
for (var i: i32 = 1; i < size; i++) {
|
||||
let ii = i32(f32(i) * scale);
|
||||
let idneg = vec2i(iid.x, clamp(iid.y - ii, ymin, ymax));
|
||||
let idpos = vec2i(iid.x, clamp(iid.y + ii, ymin, ymax));
|
||||
weight = gaussian(f32(i) * scale, sigma);
|
||||
color += (weight * textureLoad(imageSrc, vec2u(idneg)));
|
||||
color += (weight * textureLoad(imageSrc, vec2u(idpos)));
|
||||
let poffset = min(iid.y + ii, ymax) - iid.y;
|
||||
let noffset = max(iid.y - ii, ymin) - iid.y;
|
||||
color += (weight * buff[i32(lid.y + N) + poffset]);
|
||||
color += (weight * buff[i32(lid.y + N) + noffset]);
|
||||
sum += (2.0 * weight);
|
||||
}
|
||||
textureStore(imageDst, id.xy, color / sum);
|
||||
|
||||
// store result
|
||||
textureStore(imageDst, uid, color / sum);
|
||||
//textureStore(imageDst, uid, vec4f(1.0, 0.0, 0.0, 1.0));
|
||||
}
|
||||
)";
|
||||
)";
|
|
@ -46,7 +46,6 @@ extern const char* cShaderSrc_Blit;
|
|||
|
||||
// compute shader sources: effects
|
||||
extern const char* cShaderSrc_MergeMasks;
|
||||
extern const char* cShaderSrc_GaussianBlur_Vert;
|
||||
extern const char* cShaderSrc_GaussianBlur_Horz;
|
||||
extern const char* cShaderSrc_GaussianBlur;
|
||||
|
||||
#endif // _TVG_WG_SHEDER_SRC_H_
|
||||
|
|
|
@ -205,8 +205,9 @@ void WgShaderTypeGaussianBlur::update(const RenderEffectGaussianBlur* gaussian,
|
|||
assert(gaussian);
|
||||
const float sigma = gaussian->sigma;
|
||||
const float scale = std::sqrt(transform.e11 * transform.e11 + transform.e12 * transform.e12);
|
||||
const float kernel = std::min(WG_GAUSSIAN_KERNEL_SIZE_MAX, 2 * sigma * scale); // kernel size
|
||||
settings[0] = sigma;
|
||||
settings[1] = scale;
|
||||
settings[2] = 2 * sigma * scale; // kernel size
|
||||
settings[1] = std::min(WG_GAUSSIAN_KERNEL_SIZE_MAX / kernel, scale);
|
||||
settings[2] = kernel;
|
||||
extend = settings[2] * 2;
|
||||
}
|
|
@ -68,6 +68,7 @@ struct WgShaderTypeGradient
|
|||
};
|
||||
|
||||
// gaussian settings: sigma, scale, extend
|
||||
#define WG_GAUSSIAN_KERNEL_SIZE_MAX (128.0f)
|
||||
struct WgShaderTypeGaussianBlur
|
||||
{
|
||||
float settings[4]{}; // [0]: sigma, [1]: scale, [2]: kernel size, [3]: unused
|
||||
|
|
Loading…
Add table
Reference in a new issue