bevy/crates/bevy_pbr/src/render/mesh.wgsl
Robert Swain 0a11af9375
Reduce the size of MeshUniform to improve performance (#9416)
# Objective

- Significantly reduce the size of MeshUniform by only including
necessary data.

## Solution

Local to world, model transforms are affine. This means they only need a
4x3 matrix to represent them.

`MeshUniform` stores the current, and previous model transforms, and the
inverse transpose of the current model transform, all as 4x4 matrices.
Instead we can store the current, and previous model transforms as 4x3
matrices, and we only need the upper-left 3x3 part of the inverse
transpose of the current model transform. This change allows us to
reduce the serialized MeshUniform size from 208 bytes to 144 bytes,
which is over a 30% saving in data to serialize, and VRAM bandwidth and
space.

## Benchmarks

On an M1 Max, running `many_cubes -- sphere`, main is in yellow, this PR
is in red:
<img width="1484" alt="Screenshot 2023-08-11 at 02 36 43"
src="https://github.com/bevyengine/bevy/assets/302146/7d99c7b3-f2bb-4004-a8d0-4c00f755cb0d">
A reduction in frame time of ~14%.

---

## Changelog

- Changed: Redefined `MeshUniform` to improve performance by using 4x3
affine transforms and reconstructing 4x4 matrices in the shader. Helper
functions were added to `bevy_pbr::mesh_functions` to unpack the data.
`affine_to_square` converts the packed 4x3 in 3x4 matrix data to a 4x4
matrix. `mat2x4_f32_to_mat3x3` converts the 3x3 in mat2x4 + f32 matrix
data back into a 3x3.

## Migration Guide

Shader code before:
```
var model = mesh[instance_index].model;
```

Shader code after:
```
#import bevy_pbr::mesh_functions affine_to_square

var model = affine_to_square(mesh[instance_index].model);
```
2023-08-15 06:00:23 +00:00

128 lines
3.7 KiB
WebGPU Shading Language

#import bevy_pbr::mesh_functions as mesh_functions
#import bevy_pbr::skinning
#import bevy_pbr::morph
#import bevy_pbr::mesh_bindings mesh
#import bevy_pbr::mesh_vertex_output MeshVertexOutput
#import bevy_render::instance_index get_instance_index
struct Vertex {
@builtin(instance_index) instance_index: u32,
#ifdef VERTEX_POSITIONS
@location(0) position: vec3<f32>,
#endif
#ifdef VERTEX_NORMALS
@location(1) normal: vec3<f32>,
#endif
#ifdef VERTEX_UVS
@location(2) uv: vec2<f32>,
#endif
#ifdef VERTEX_TANGENTS
@location(3) tangent: vec4<f32>,
#endif
#ifdef VERTEX_COLORS
@location(4) color: vec4<f32>,
#endif
#ifdef SKINNED
@location(5) joint_indices: vec4<u32>,
@location(6) joint_weights: vec4<f32>,
#endif
#ifdef MORPH_TARGETS
@builtin(vertex_index) index: u32,
#endif
};
#ifdef MORPH_TARGETS
fn morph_vertex(vertex_in: Vertex) -> Vertex {
var vertex = vertex_in;
let weight_count = bevy_pbr::morph::layer_count();
for (var i: u32 = 0u; i < weight_count; i ++) {
let weight = bevy_pbr::morph::weight_at(i);
if weight == 0.0 {
continue;
}
vertex.position += weight * bevy_pbr::morph::morph(vertex.index, bevy_pbr::morph::position_offset, i);
#ifdef VERTEX_NORMALS
vertex.normal += weight * bevy_pbr::morph::morph(vertex.index, bevy_pbr::morph::normal_offset, i);
#endif
#ifdef VERTEX_TANGENTS
vertex.tangent += vec4(weight * bevy_pbr::morph::morph(vertex.index, bevy_pbr::morph::tangent_offset, i), 0.0);
#endif
}
return vertex;
}
#endif
@vertex
fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput {
var out: MeshVertexOutput;
#ifdef MORPH_TARGETS
var vertex = morph_vertex(vertex_no_morph);
#else
var vertex = vertex_no_morph;
#endif
#ifdef SKINNED
var model = bevy_pbr::skinning::skin_model(vertex.joint_indices, vertex.joint_weights);
#else
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416 .
var model = mesh_functions::get_model_matrix(vertex_no_morph.instance_index);
#endif
#ifdef VERTEX_NORMALS
#ifdef SKINNED
out.world_normal = bevy_pbr::skinning::skin_normals(model, vertex.normal);
#else
out.world_normal = mesh_functions::mesh_normal_local_to_world(
vertex.normal,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
get_instance_index(vertex_no_morph.instance_index)
);
#endif
#endif
#ifdef VERTEX_POSITIONS
out.world_position = mesh_functions::mesh_position_local_to_world(model, vec4<f32>(vertex.position, 1.0));
out.position = mesh_functions::mesh_position_world_to_clip(out.world_position);
#endif
#ifdef VERTEX_UVS
out.uv = vertex.uv;
#endif
#ifdef VERTEX_TANGENTS
out.world_tangent = mesh_functions::mesh_tangent_local_to_world(
model,
vertex.tangent,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
get_instance_index(vertex_no_morph.instance_index)
);
#endif
#ifdef VERTEX_COLORS
out.color = vertex.color;
#endif
#ifdef VERTEX_OUTPUT_INSTANCE_INDEX
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
out.instance_index = get_instance_index(vertex_no_morph.instance_index);
#endif
return out;
}
@fragment
fn fragment(
mesh: MeshVertexOutput,
) -> @location(0) vec4<f32> {
#ifdef VERTEX_COLORS
return mesh.color;
#else
return vec4<f32>(1.0, 0.0, 1.0, 1.0);
#endif
}