 aa626e4f0b
			
		
	
	
		aa626e4f0b
		
			
		
	
	
	
	
		
			
			# Objective - Prepare for streaming by storing vertex data per-meshlet, rather than per-mesh (this means duplicating vertices per-meshlet) - Compress vertex data to reduce the cost of this ## Solution The important parts are in from_mesh.rs, the changes to the Meshlet type in asset.rs, and the changes in meshlet_bindings.wgsl. Everything else is pretty secondary/boilerplate/straightforward changes. - Positions are quantized in centimeters with a user-provided power of 2 factor (ideally auto-determined, but that's a TODO for the future), encoded as an offset relative to the minimum value within the meshlet, and then stored as a packed list of bits using the minimum number of bits needed for each vertex position channel for that meshlet - E.g. quantize positions (lossly, throws away precision that's not needed leading to using less bits in the bitstream encoding) - Get the min/max quantized value of each X/Y/Z channel of the quantized positions within a meshlet - Encode values relative to the min value of the meshlet. E.g. convert from [min, max] to [0, max - min] - The new max value in the meshlet is (max - min), which only takes N bits, so we only need N bits to store each channel within the meshlet (lossless) - We can store the min value and that it takes N bits per channel in the meshlet metadata, and reconstruct the position from the bitstream - Normals are octahedral encoded and than snorm2x16 packed and stored as a single u32. - Would be better to implement the precise variant of octhedral encoding for extra precision (no extra decode cost), but decided to keep it simple for now and leave that as a followup - Tried doing a quantizing and bitstream encoding scheme like I did for positions, but struggled to get it smaller. Decided to go with this for simplicity for now - UVs are uncompressed and take a full 64bits per vertex which is expensive - In the future this should be improved - Tangents, as of the previous PR, are not explicitly stored and are instead derived from screen space gradients - While I'm here, split up MeshletMeshSaverLoader into two separate types Other future changes include implementing a smaller encoding of triangle data (3 u8 indices = 24 bits per triangle currently), and more disk-oriented compression schemes. References: * "A Deep Dive into UE5's Nanite Virtualized Geometry" https://advances.realtimerendering.com/s2021/Karis_Nanite_SIGGRAPH_Advances_2021_final.pdf#page=128 (also available on youtube) * "Towards Practical Meshlet Compression" https://arxiv.org/pdf/2404.06359 * "Vertex quantization in Omniforce Game Engine" https://daniilvinn.github.io/2024/05/04/omniforce-vertex-quantization.html ## Testing - Did you test these changes? If so, how? - Converted the stanford bunny, and rendered it with a debug material showing normals, and confirmed that it's identical to what's on main. EDIT: See additional testing in the comments below. - Are there any parts that need more testing? - Could use some more size comparisons on various meshes, and testing different quantization factors. Not sure if 4 is a good default. EDIT: See additional testing in the comments below. - Also did not test runtime performance of the shaders. EDIT: See additional testing in the comments below. - How can other people (reviewers) test your changes? Is there anything specific they need to know? - Use my unholy script, replacing the meshlet example https://paste.rs/7xQHk.rs (must make MeshletMesh fields pub instead of pub crate, must add lz4_flex as a dev-dependency) (must compile with meshlet and meshlet_processor features, mesh must have only positions, normals, and UVs, no vertex colors or tangents) --- ## Migration Guide - TBD by JMS55 at the end of the release
		
			
				
	
	
		
			89 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			WebGPU Shading Language
		
	
	
	
	
	
			
		
		
	
	
			89 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			WebGPU Shading Language
		
	
	
	
	
	
| #define_import_path bevy_pbr::utils
 | |
| 
 | |
| #import bevy_pbr::rgb9e5
 | |
| 
 | |
| // Generates a random u32 in range [0, u32::MAX].
 | |
| //
 | |
| // `state` is a mutable reference to a u32 used as the seed.
 | |
| //
 | |
| // Values are generated via "white noise", with no correlation between values.
 | |
| // In shaders, you often want spatial and/or temporal correlation. Use a different RNG method for these use cases.
 | |
| //
 | |
| // https://www.pcg-random.org
 | |
| // https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering
 | |
| fn rand_u(state: ptr<function, u32>) -> u32 {
 | |
|     *state = *state * 747796405u + 2891336453u;
 | |
|     let word = ((*state >> ((*state >> 28u) + 4u)) ^ *state) * 277803737u;
 | |
|     return (word >> 22u) ^ word;
 | |
| }
 | |
| 
 | |
| // Generates a random f32 in range [0, 1.0].
 | |
| fn rand_f(state: ptr<function, u32>) -> f32 {
 | |
|     *state = *state * 747796405u + 2891336453u;
 | |
|     let word = ((*state >> ((*state >> 28u) + 4u)) ^ *state) * 277803737u;
 | |
|     return f32((word >> 22u) ^ word) * bitcast<f32>(0x2f800004u);
 | |
| }
 | |
| 
 | |
| // Generates a random vec2<f32> where each value is in range [0, 1.0].
 | |
| fn rand_vec2f(state: ptr<function, u32>) -> vec2<f32> {
 | |
|     return vec2(rand_f(state), rand_f(state));
 | |
| }
 | |
| 
 | |
| // Generates a random u32 in range [0, n).
 | |
| fn rand_range_u(n: u32, state: ptr<function, u32>) -> u32 {
 | |
|     return rand_u(state) % n;
 | |
| }
 | |
| 
 | |
| // returns the (0-1, 0-1) position within the given viewport for the current buffer coords .
 | |
| // buffer coords can be obtained from `@builtin(position).xy`.
 | |
| // the view uniform struct contains the current camera viewport in `view.viewport`.
 | |
| // topleft = 0,0
 | |
| fn coords_to_viewport_uv(position: vec2<f32>, viewport: vec4<f32>) -> vec2<f32> {
 | |
|     return (position - viewport.xy) / viewport.zw;
 | |
| }
 | |
| 
 | |
| // https://jcgt.org/published/0003/02/01/paper.pdf
 | |
| 
 | |
| // For encoding normals or unit direction vectors as octahedral coordinates.
 | |
| fn octahedral_encode(v: vec3<f32>) -> vec2<f32> {
 | |
|     var n = v / (abs(v.x) + abs(v.y) + abs(v.z));
 | |
|     let octahedral_wrap = (1.0 - abs(n.yx)) * select(vec2(-1.0), vec2(1.0), n.xy > vec2f(0.0));
 | |
|     let n_xy = select(octahedral_wrap, n.xy, n.z >= 0.0);
 | |
|     return n_xy * 0.5 + 0.5;
 | |
| }
 | |
| 
 | |
| // For decoding normals or unit direction vectors from octahedral coordinates.
 | |
| fn octahedral_decode(v: vec2<f32>) -> vec3<f32> {
 | |
|     let f = v * 2.0 - 1.0;
 | |
|     var n = octahedral_decode_signed(f);
 | |
|     return normalize(n);
 | |
| }
 | |
| 
 | |
| // Like octahedral_decode, but for input in [-1, 1] instead of [0, 1].
 | |
| fn octahedral_decode_signed(v: vec2<f32>) -> vec3<f32> {
 | |
|     var n = vec3(v.xy, 1.0 - abs(v.x) - abs(v.y));
 | |
|     let t = saturate(-n.z);
 | |
|     let w = select(vec2(t), vec2(-t), n.xy >= vec2(0.0));
 | |
|     n = vec3(n.xy + w, n.z);
 | |
|     return normalize(n);
 | |
| }
 | |
| 
 | |
| // https://blog.demofox.org/2022/01/01/interleaved-gradient-noise-a-different-kind-of-low-discrepancy-sequence
 | |
| fn interleaved_gradient_noise(pixel_coordinates: vec2<f32>, frame: u32) -> f32 {
 | |
|     let xy = pixel_coordinates + 5.588238 * f32(frame % 64u);
 | |
|     return fract(52.9829189 * fract(0.06711056 * xy.x + 0.00583715 * xy.y));
 | |
| }
 | |
| 
 | |
| // https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare (slides 120-135)
 | |
| // TODO: Use an array here instead of a bunch of constants, once arrays work properly under DX12.
 | |
| // NOTE: The names have a final underscore to avoid the following error:
 | |
| // `Composable module identifiers must not require substitution according to naga writeback rules`
 | |
| const SPIRAL_OFFSET_0_ = vec2<f32>(-0.7071,  0.7071);
 | |
| const SPIRAL_OFFSET_1_ = vec2<f32>(-0.0000, -0.8750);
 | |
| const SPIRAL_OFFSET_2_ = vec2<f32>( 0.5303,  0.5303);
 | |
| const SPIRAL_OFFSET_3_ = vec2<f32>(-0.6250, -0.0000);
 | |
| const SPIRAL_OFFSET_4_ = vec2<f32>( 0.3536, -0.3536);
 | |
| const SPIRAL_OFFSET_5_ = vec2<f32>(-0.0000,  0.3750);
 | |
| const SPIRAL_OFFSET_6_ = vec2<f32>(-0.1768, -0.1768);
 | |
| const SPIRAL_OFFSET_7_ = vec2<f32>( 0.1250,  0.0000);
 |