bevy/examples/shader/shader_instancing.rs
Patrick Walton 37522fd0ae
Micro-optimize queue_material_meshes, primarily to remove bit manipulation. (#12791)
This commit makes the following optimizations:

## `MeshPipelineKey`/`BaseMeshPipelineKey` split

`MeshPipelineKey` has been split into `BaseMeshPipelineKey`, which lives
in `bevy_render` and `MeshPipelineKey`, which lives in `bevy_pbr`.
Conceptually, `BaseMeshPipelineKey` is a superclass of
`MeshPipelineKey`. For `BaseMeshPipelineKey`, the bits start at the
highest (most significant) bit and grow downward toward the lowest bit;
for `MeshPipelineKey`, the bits start at the lowest bit and grow upward
toward the highest bit. This prevents them from colliding.

The goal of this is to avoid having to reassemble bits of the pipeline
key for every mesh every frame. Instead, we can just use a bitwise or
operation to combine the pieces that make up a `MeshPipelineKey`.

## `specialize_slow`

Previously, all of `specialize()` was marked as `#[inline]`. This
bloated `queue_material_meshes` unnecessarily, as a large chunk of it
ended up being a slow path that was rarely hit. This commit refactors
the function to move the slow path to `specialize_slow()`.

Together, these two changes shave about 5% off `queue_material_meshes`:

![Screenshot 2024-03-29
130002](https://github.com/bevyengine/bevy/assets/157897/a7e5a994-a807-4328-b314-9003429dcdd2)

## Migration Guide

- The `primitive_topology` field on `GpuMesh` is now an accessor method:
`GpuMesh::primitive_topology()`.
- For performance reasons, `MeshPipelineKey` has been split into
`BaseMeshPipelineKey`, which lives in `bevy_render`, and
`MeshPipelineKey`, which lives in `bevy_pbr`. These two should be
combined with bitwise-or to produce the final `MeshPipelineKey`.
2024-04-01 21:58:53 +00:00

278 lines
9.3 KiB
Rust

//! A shader that renders a mesh multiple times in one draw call.
use bevy::{
core_pipeline::core_3d::Transparent3d,
ecs::{
query::QueryItem,
system::{lifetimeless::*, SystemParamItem},
},
pbr::{
MeshPipeline, MeshPipelineKey, RenderMeshInstances, SetMeshBindGroup, SetMeshViewBindGroup,
},
prelude::*,
render::{
extract_component::{ExtractComponent, ExtractComponentPlugin},
mesh::{GpuBufferInfo, MeshVertexBufferLayoutRef},
render_asset::RenderAssets,
render_phase::{
AddRenderCommand, DrawFunctions, PhaseItem, RenderCommand, RenderCommandResult,
SetItemPipeline, SortedRenderPhase, TrackedRenderPass,
},
render_resource::*,
renderer::RenderDevice,
view::{ExtractedView, NoFrustumCulling},
Render, RenderApp, RenderSet,
},
};
use bytemuck::{Pod, Zeroable};
fn main() {
App::new()
.add_plugins((DefaultPlugins, CustomMaterialPlugin))
.add_systems(Startup, setup)
.run();
}
fn setup(mut commands: Commands, mut meshes: ResMut<Assets<Mesh>>) {
commands.spawn((
meshes.add(Cuboid::new(0.5, 0.5, 0.5)),
SpatialBundle::INHERITED_IDENTITY,
InstanceMaterialData(
(1..=10)
.flat_map(|x| (1..=10).map(move |y| (x as f32 / 10.0, y as f32 / 10.0)))
.map(|(x, y)| InstanceData {
position: Vec3::new(x * 10.0 - 5.0, y * 10.0 - 5.0, 0.0),
scale: 1.0,
color: LinearRgba::from(Color::hsla(x * 360., y, 0.5, 1.0)).to_f32_array(),
})
.collect(),
),
// NOTE: Frustum culling is done based on the Aabb of the Mesh and the GlobalTransform.
// As the cube is at the origin, if its Aabb moves outside the view frustum, all the
// instanced cubes will be culled.
// The InstanceMaterialData contains the 'GlobalTransform' information for this custom
// instancing, and that is not taken into account with the built-in frustum culling.
// We must disable the built-in frustum culling by adding the `NoFrustumCulling` marker
// component to avoid incorrect culling.
NoFrustumCulling,
));
// camera
commands.spawn(Camera3dBundle {
transform: Transform::from_xyz(0.0, 0.0, 15.0).looking_at(Vec3::ZERO, Vec3::Y),
..default()
});
}
#[derive(Component, Deref)]
struct InstanceMaterialData(Vec<InstanceData>);
impl ExtractComponent for InstanceMaterialData {
type QueryData = &'static InstanceMaterialData;
type QueryFilter = ();
type Out = Self;
fn extract_component(item: QueryItem<'_, Self::QueryData>) -> Option<Self> {
Some(InstanceMaterialData(item.0.clone()))
}
}
struct CustomMaterialPlugin;
impl Plugin for CustomMaterialPlugin {
fn build(&self, app: &mut App) {
app.add_plugins(ExtractComponentPlugin::<InstanceMaterialData>::default());
app.sub_app_mut(RenderApp)
.add_render_command::<Transparent3d, DrawCustom>()
.init_resource::<SpecializedMeshPipelines<CustomPipeline>>()
.add_systems(
Render,
(
queue_custom.in_set(RenderSet::QueueMeshes),
prepare_instance_buffers.in_set(RenderSet::PrepareResources),
),
);
}
fn finish(&self, app: &mut App) {
app.sub_app_mut(RenderApp).init_resource::<CustomPipeline>();
}
}
#[derive(Clone, Copy, Pod, Zeroable)]
#[repr(C)]
struct InstanceData {
position: Vec3,
scale: f32,
color: [f32; 4],
}
#[allow(clippy::too_many_arguments)]
fn queue_custom(
transparent_3d_draw_functions: Res<DrawFunctions<Transparent3d>>,
custom_pipeline: Res<CustomPipeline>,
msaa: Res<Msaa>,
mut pipelines: ResMut<SpecializedMeshPipelines<CustomPipeline>>,
pipeline_cache: Res<PipelineCache>,
meshes: Res<RenderAssets<Mesh>>,
render_mesh_instances: Res<RenderMeshInstances>,
material_meshes: Query<Entity, With<InstanceMaterialData>>,
mut views: Query<(&ExtractedView, &mut SortedRenderPhase<Transparent3d>)>,
) {
let draw_custom = transparent_3d_draw_functions.read().id::<DrawCustom>();
let msaa_key = MeshPipelineKey::from_msaa_samples(msaa.samples());
for (view, mut transparent_phase) in &mut views {
let view_key = msaa_key | MeshPipelineKey::from_hdr(view.hdr);
let rangefinder = view.rangefinder3d();
for entity in &material_meshes {
let Some(mesh_instance) = render_mesh_instances.get(&entity) else {
continue;
};
let Some(mesh) = meshes.get(mesh_instance.mesh_asset_id) else {
continue;
};
let key =
view_key | MeshPipelineKey::from_primitive_topology(mesh.primitive_topology());
let pipeline = pipelines
.specialize(&pipeline_cache, &custom_pipeline, key, &mesh.layout)
.unwrap();
transparent_phase.add(Transparent3d {
entity,
pipeline,
draw_function: draw_custom,
distance: rangefinder
.distance_translation(&mesh_instance.transforms.transform.translation),
batch_range: 0..1,
dynamic_offset: None,
});
}
}
}
#[derive(Component)]
struct InstanceBuffer {
buffer: Buffer,
length: usize,
}
fn prepare_instance_buffers(
mut commands: Commands,
query: Query<(Entity, &InstanceMaterialData)>,
render_device: Res<RenderDevice>,
) {
for (entity, instance_data) in &query {
let buffer = render_device.create_buffer_with_data(&BufferInitDescriptor {
label: Some("instance data buffer"),
contents: bytemuck::cast_slice(instance_data.as_slice()),
usage: BufferUsages::VERTEX | BufferUsages::COPY_DST,
});
commands.entity(entity).insert(InstanceBuffer {
buffer,
length: instance_data.len(),
});
}
}
#[derive(Resource)]
struct CustomPipeline {
shader: Handle<Shader>,
mesh_pipeline: MeshPipeline,
}
impl FromWorld for CustomPipeline {
fn from_world(world: &mut World) -> Self {
let mesh_pipeline = world.resource::<MeshPipeline>();
CustomPipeline {
shader: world.load_asset("shaders/instancing.wgsl"),
mesh_pipeline: mesh_pipeline.clone(),
}
}
}
impl SpecializedMeshPipeline for CustomPipeline {
type Key = MeshPipelineKey;
fn specialize(
&self,
key: Self::Key,
layout: &MeshVertexBufferLayoutRef,
) -> Result<RenderPipelineDescriptor, SpecializedMeshPipelineError> {
let mut descriptor = self.mesh_pipeline.specialize(key, layout)?;
descriptor.vertex.shader = self.shader.clone();
descriptor.vertex.buffers.push(VertexBufferLayout {
array_stride: std::mem::size_of::<InstanceData>() as u64,
step_mode: VertexStepMode::Instance,
attributes: vec![
VertexAttribute {
format: VertexFormat::Float32x4,
offset: 0,
shader_location: 3, // shader locations 0-2 are taken up by Position, Normal and UV attributes
},
VertexAttribute {
format: VertexFormat::Float32x4,
offset: VertexFormat::Float32x4.size(),
shader_location: 4,
},
],
});
descriptor.fragment.as_mut().unwrap().shader = self.shader.clone();
Ok(descriptor)
}
}
type DrawCustom = (
SetItemPipeline,
SetMeshViewBindGroup<0>,
SetMeshBindGroup<1>,
DrawMeshInstanced,
);
struct DrawMeshInstanced;
impl<P: PhaseItem> RenderCommand<P> for DrawMeshInstanced {
type Param = (SRes<RenderAssets<Mesh>>, SRes<RenderMeshInstances>);
type ViewQuery = ();
type ItemQuery = Read<InstanceBuffer>;
#[inline]
fn render<'w>(
item: &P,
_view: (),
instance_buffer: Option<&'w InstanceBuffer>,
(meshes, render_mesh_instances): SystemParamItem<'w, '_, Self::Param>,
pass: &mut TrackedRenderPass<'w>,
) -> RenderCommandResult {
let Some(mesh_instance) = render_mesh_instances.get(&item.entity()) else {
return RenderCommandResult::Failure;
};
let Some(gpu_mesh) = meshes.into_inner().get(mesh_instance.mesh_asset_id) else {
return RenderCommandResult::Failure;
};
let Some(instance_buffer) = instance_buffer else {
return RenderCommandResult::Failure;
};
pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..));
pass.set_vertex_buffer(1, instance_buffer.buffer.slice(..));
match &gpu_mesh.buffer_info {
GpuBufferInfo::Indexed {
buffer,
index_format,
count,
} => {
pass.set_index_buffer(buffer.slice(..), 0, *index_format);
pass.draw_indexed(0..*count, 0, 0..instance_buffer.length as u32);
}
GpuBufferInfo::NonIndexed => {
pass.draw(0..gpu_mesh.vertex_count, 0..instance_buffer.length as u32);
}
}
RenderCommandResult::Success
}
}