Cache MeshInputUniform indices in each RenderBin. (#17772)

Currently, we look up each `MeshInputUniform` index in a hash table that maps the main entity ID to the index every frame. This is inefficient, cache unfriendly, and unnecessary, as the `MeshInputUniform` index for an entity remains the same from frame to frame (even if the input uniform changes). This commit changes the `IndexSet` in the `RenderBin` to an `IndexMap` that maps the `MainEntity` to `MeshInputUniformIndex` (a new type that this patch adds for more type safety). On Caldera with parallel `batch_and_prepare_binned_render_phase`, this patch improves that function from 3.18 ms to 2.42 ms, a 31% speedup.
2025-02-11 14:38:52 -08:00 · 2025-02-11 14:38:52 -08:00 · 85b366a8a2
commit 85b366a8a2
parent ce433955e6
13 changed files with 99 additions and 61 deletions
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@ -1002,6 +1002,7 @@ pub fn queue_material_meshes<M: Material>(
                        batch_set_key,
                        bin_key,
                        (*render_entity, *visible_entity),
+                        mesh_instance.current_uniform_index,
                        BinnedRenderPhaseType::mesh(
                            mesh_instance.should_batch(),
                            &gpu_preprocessing_support,
@ -1025,6 +1026,7 @@ pub fn queue_material_meshes<M: Material>(
                        batch_set_key,
                        bin_key,
                        (*render_entity, *visible_entity),
+                        mesh_instance.current_uniform_index,
                        BinnedRenderPhaseType::mesh(
                            mesh_instance.should_batch(),
                            &gpu_preprocessing_support,
--- a/crates/bevy_pbr/src/prepass/mod.rs
+++ b/crates/bevy_pbr/src/prepass/mod.rs
@ -1145,6 +1145,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
                                asset_id: mesh_instance.mesh_asset_id.into(),
                            },
                            (*render_entity, *visible_entity),
+                            mesh_instance.current_uniform_index,
                            BinnedRenderPhaseType::mesh(
                                mesh_instance.should_batch(),
                                &gpu_preprocessing_support,
@ -1169,6 +1170,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
                                asset_id: mesh_instance.mesh_asset_id.into(),
                            },
                            (*render_entity, *visible_entity),
+                            mesh_instance.current_uniform_index,
                            BinnedRenderPhaseType::mesh(
                                mesh_instance.should_batch(),
                                &gpu_preprocessing_support,
@ -1195,6 +1197,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
                            batch_set_key,
                            bin_key,
                            (*render_entity, *visible_entity),
+                            mesh_instance.current_uniform_index,
                            BinnedRenderPhaseType::mesh(
                                mesh_instance.should_batch(),
                                &gpu_preprocessing_support,
@ -1218,6 +1221,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
                            batch_set_key,
                            bin_key,
                            (*render_entity, *visible_entity),
+                            mesh_instance.current_uniform_index,
                            BinnedRenderPhaseType::mesh(
                                mesh_instance.should_batch(),
                                &gpu_preprocessing_support,
--- a/crates/bevy_pbr/src/render/light.rs
+++ b/crates/bevy_pbr/src/render/light.rs
@ -1946,6 +1946,7 @@ pub fn queue_shadows<M: Material>(
                        asset_id: mesh_instance.mesh_asset_id.into(),
                    },
                    (entity, main_entity),
+                    mesh_instance.current_uniform_index,
                    BinnedRenderPhaseType::mesh(
                        mesh_instance.should_batch(),
                        &gpu_preprocessing_support,
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@ -30,7 +30,7 @@ use bevy_render::{
    primitives::Aabb,
    render_asset::RenderAssets,
    render_phase::{
-        BinnedRenderPhasePlugin, PhaseItem, PhaseItemExtraIndex, RenderCommand,
+        BinnedRenderPhasePlugin, InputUniformIndex, PhaseItem, PhaseItemExtraIndex, RenderCommand,
        RenderCommandResult, SortedRenderPhasePlugin, TrackedRenderPass,
    },
    render_resource::*,
@ -958,6 +958,7 @@ impl RenderMeshInstancesCpu {
            .map(|render_mesh_instance| RenderMeshQueueData {
                shared: &render_mesh_instance.shared,
                translation: render_mesh_instance.transforms.world_from_local.translation,
+                current_uniform_index: InputUniformIndex::default(),
            })
    }

@ -981,6 +982,9 @@ impl RenderMeshInstancesGpu {
            .map(|render_mesh_instance| RenderMeshQueueData {
                shared: &render_mesh_instance.shared,
                translation: render_mesh_instance.translation,
+                current_uniform_index: InputUniformIndex(
+                    render_mesh_instance.current_uniform_index.into(),
+                ),
            })
    }

@ -1281,6 +1285,9 @@ pub struct RenderMeshQueueData<'a> {
    pub shared: &'a RenderMeshInstanceShared,
    /// The translation of the mesh instance.
    pub translation: Vec3,
+    /// The index of the [`MeshInputUniform`] in the GPU buffer for this mesh
+    /// instance.
+    pub current_uniform_index: InputUniformIndex,
 }

 /// A [`SystemSet`] that encompasses both [`extract_meshes_for_cpu_building`]
@ -1945,7 +1952,7 @@ impl GetFullBatchData for MeshPipeline {
    }

    fn write_batch_indirect_parameters_metadata(
-        mesh_index: u32,
+        mesh_index: InputUniformIndex,
        indexed: bool,
        base_output_index: u32,
        batch_set_index: Option<NonMaxU32>,
@ -1953,7 +1960,7 @@ impl GetFullBatchData for MeshPipeline {
        indirect_parameters_offset: u32,
    ) {
        let indirect_parameters = IndirectParametersMetadata {
-            mesh_index,
+            mesh_index: *mesh_index,
            base_output_index,
            batch_set_index: match batch_set_index {
                Some(batch_set_index) => u32::from(batch_set_index),
--- a/crates/bevy_render/src/batching/gpu_preprocessing.rs
+++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs
@ -24,9 +24,9 @@ use crate::{
    experimental::occlusion_culling::OcclusionCulling,
    render_phase::{
        BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
-        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItemBatchSetKey as _,
-        PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices,
-        ViewBinnedRenderPhases, ViewSortedRenderPhases,
+        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, InputUniformIndex,
+        PhaseItemBatchSetKey as _, PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase,
+        UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, ViewSortedRenderPhases,
    },
    render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
    renderer::{RenderAdapter, RenderDevice, RenderQueue},
@ -1271,7 +1271,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
                // Start a new batch.
                if let Some(indirect_parameters_index) = indirect_parameters_index {
                    GFBD::write_batch_indirect_parameters_metadata(
-                        current_input_index.into(),
+                        InputUniformIndex(current_input_index.into()),
                        item_is_indexed,
                        output_index,
                        None,
@ -1382,12 +1382,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                let first_output_index = data_buffer.len() as u32;
                let mut batch: Option<BinnedRenderPhaseBatch> = None;

-                for main_entity in bin.entities() {
-                    let Some(input_index) =
-                        GFBD::get_binned_index(&system_param_item, *main_entity)
-                    else {
-                        continue;
-                    };
+                for (&main_entity, &input_index) in bin.entities() {
                    let output_index = data_buffer.add() as u32;

                    match batch {
@ -1397,7 +1392,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                            work_item_buffer.push(
                                batch_set_key.indexed(),
                                PreprocessWorkItem {
-                                    input_index: input_index.into(),
+                                    input_index: *input_index,
                                    output_index: first_output_index,
                                    indirect_parameters_index: match batch.extra_index {
                                        PhaseItemExtraIndex::IndirectParametersIndex {
@ -1419,7 +1414,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                                .get_next_batch_set_index(batch_set_key.indexed());

                            GFBD::write_batch_indirect_parameters_metadata(
-                                input_index.into(),
+                                input_index,
                                batch_set_key.indexed(),
                                output_index,
                                batch_set_index,
@ -1429,13 +1424,13 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                            work_item_buffer.push(
                                batch_set_key.indexed(),
                                PreprocessWorkItem {
-                                    input_index: input_index.into(),
+                                    input_index: *input_index,
                                    output_index: first_output_index,
                                    indirect_parameters_index,
                                },
                            );
                            batch = Some(BinnedRenderPhaseBatch {
-                                representative_entity: (Entity::PLACEHOLDER, *main_entity),
+                                representative_entity: (Entity::PLACEHOLDER, main_entity),
                                instance_range: output_index..output_index + 1,
                                extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(
                                    NonMaxU32::new(indirect_parameters_index),
@ -1481,11 +1476,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
            let first_output_index = data_buffer.len() as u32;

            let mut batch: Option<BinnedRenderPhaseBatch> = None;
-            for main_entity in phase.batchable_mesh_values[key].entities() {
-                let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity)
-                else {
-                    continue;
-                };
+            for (&main_entity, &input_index) in phase.batchable_mesh_values[key].entities() {
                let output_index = data_buffer.add() as u32;

                match batch {
@ -1502,7 +1493,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                        work_item_buffer.push(
                            key.0.indexed(),
                            PreprocessWorkItem {
-                                input_index: input_index.into(),
+                                input_index: *input_index,
                                output_index: if no_indirect_drawing {
                                    output_index
                                } else {
@ -1528,7 +1519,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                            indirect_parameters_buffers.get_next_batch_set_index(key.0.indexed());

                        GFBD::write_batch_indirect_parameters_metadata(
-                            input_index.into(),
+                            input_index,
                            key.0.indexed(),
                            output_index,
                            batch_set_index,
@ -1538,13 +1529,13 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                        work_item_buffer.push(
                            key.0.indexed(),
                            PreprocessWorkItem {
-                                input_index: input_index.into(),
+                                input_index: *input_index,
                                output_index: first_output_index,
                                indirect_parameters_index,
                            },
                        );
                        batch = Some(BinnedRenderPhaseBatch {
-                            representative_entity: (Entity::PLACEHOLDER, *main_entity),
+                            representative_entity: (Entity::PLACEHOLDER, main_entity),
                            instance_range: output_index..output_index + 1,
                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
                                range: indirect_parameters_index..(indirect_parameters_index + 1),
@ -1558,13 +1549,13 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                        work_item_buffer.push(
                            key.0.indexed(),
                            PreprocessWorkItem {
-                                input_index: input_index.into(),
+                                input_index: *input_index,
                                output_index,
                                indirect_parameters_index: 0,
                            },
                        );
                        batch = Some(BinnedRenderPhaseBatch {
-                            representative_entity: (Entity::PLACEHOLDER, *main_entity),
+                            representative_entity: (Entity::PLACEHOLDER, main_entity),
                            instance_range: output_index..output_index + 1,
                            extra_index: PhaseItemExtraIndex::None,
                        });
@ -1627,7 +1618,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
                    // We're in indirect mode, so add an indirect parameters
                    // index.
                    GFBD::write_batch_indirect_parameters_metadata(
-                        input_index.into(),
+                        InputUniformIndex(input_index.into()),
                        key.0.indexed(),
                        output_index,
                        None,
--- a/crates/bevy_render/src/batching/mod.rs
+++ b/crates/bevy_render/src/batching/mod.rs
@ -7,7 +7,6 @@ use bytemuck::Pod;
 use nonmax::NonMaxU32;

 use self::gpu_preprocessing::IndirectParametersBuffers;
-use crate::{render_phase::PhaseItemExtraIndex, sync_world::MainEntity};
 use crate::{
    render_phase::{
        BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, SortedPhaseItem,
@ -15,6 +14,10 @@ use crate::{
    },
    render_resource::{CachedRenderPipelineId, GpuArrayBufferable},
 };
+use crate::{
+    render_phase::{InputUniformIndex, PhaseItemExtraIndex},
+    sync_world::MainEntity,
+};

 pub mod gpu_preprocessing;
 pub mod no_gpu_preprocessing;
@ -132,12 +135,17 @@ pub trait GetFullBatchData: GetBatchData {
    ) -> Option<(NonMaxU32, Option<Self::CompareData>)>;

    /// Returns the index of the [`GetFullBatchData::BufferInputData`] that the
-    /// GPU preprocessing phase will use, for the binning path.
+    /// GPU preprocessing phase will use.
    ///
    /// We already inserted the [`GetFullBatchData::BufferInputData`] during the
    /// extraction phase before we got here, so this function shouldn't need to
-    /// look up any render data. If CPU instance buffer building is in use, this
-    /// function will never be called.
+    /// look up any render data.
+    ///
+    /// This function is currently only called for unbatchable entities when GPU
+    /// instance buffer building is in use. For batchable entities, the uniform
+    /// index is written during queuing (e.g. in `queue_material_meshes`). In
+    /// the case of CPU instance buffer building, the CPU writes the uniforms,
+    /// so there's no index to return.
    fn get_binned_index(
        param: &SystemParamItem<Self::Param>,
        query_item: MainEntity,
@ -167,7 +175,7 @@ pub trait GetFullBatchData: GetBatchData {
    /// * `indirect_parameters_offset` is the index in that buffer at which to
    ///   write the metadata.
    fn write_batch_indirect_parameters_metadata(
-        mesh_index: u32,
+        mesh_index: InputUniformIndex,
        indexed: bool,
        base_output_index: u32,
        batch_set_index: Option<NonMaxU32>,
--- a/crates/bevy_render/src/batching/no_gpu_preprocessing.rs
+++ b/crates/bevy_render/src/batching/no_gpu_preprocessing.rs
@ -110,7 +110,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(

        for key in &phase.batchable_mesh_keys {
            let mut batch_set: SmallVec<[BinnedRenderPhaseBatch; 1]> = smallvec![];
-            for main_entity in phase.batchable_mesh_values[key].entities() {
+            for main_entity in phase.batchable_mesh_values[key].entities().keys() {
                let Some(buffer_data) =
                    GFBD::get_binned_batch_data(&system_param_item, *main_entity)
                else {
--- a/crates/bevy_render/src/render_phase/mod.rs
+++ b/crates/bevy_render/src/render_phase/mod.rs
@ -38,7 +38,7 @@ pub use draw::*;
 pub use draw_state::*;
 use encase::{internal::WriteInto, ShaderSize};
 use fixedbitset::{Block, FixedBitSet};
-use indexmap::{IndexMap, IndexSet};
+use indexmap::IndexMap;
 use nonmax::NonMaxU32;
 pub use rangefinder::*;
 use wgpu::Features;
@ -191,8 +191,9 @@ where
 /// a [`BinnedRenderPhase`].
 #[derive(Default)]
 pub struct RenderBin {
-    /// A list of the entities in each bin.
-    entities: IndexSet<MainEntity, EntityHash>,
+    /// A list of the entities in each bin, along with their cached
+    /// [`InputUniformIndex`].
+    entities: IndexMap<MainEntity, InputUniformIndex, EntityHash>,
 }

 /// Information that we track about an entity that was in one bin on the
@ -422,6 +423,19 @@ where
    }
 }

+/// The index of the uniform describing this object in the GPU buffer, when GPU
+/// preprocessing is enabled.
+///
+/// For example, for 3D meshes, this is the index of the `MeshInputUniform` in
+/// the buffer.
+///
+/// This field is ignored if GPU preprocessing isn't in use, such as (currently)
+/// in the case of 2D meshes. In that case, it can be safely set to
+/// [`core::default::Default::default`].
+#[derive(Clone, Copy, PartialEq, Default, Deref, DerefMut)]
+#[repr(transparent)]
+pub struct InputUniformIndex(pub u32);
+
 impl<BPI> BinnedRenderPhase<BPI>
 where
    BPI: BinnedPhaseItem,
@ -436,6 +450,7 @@ where
        batch_set_key: BPI::BatchSetKey,
        bin_key: BPI::BinKey,
        (entity, main_entity): (Entity, MainEntity),
+        input_uniform_index: InputUniformIndex,
        phase_type: BinnedRenderPhaseType,
        change_tick: Tick,
    ) {
@ -447,11 +462,14 @@ where
                            .get_mut()
                            .entry(bin_key.clone())
                            .or_default()
-                            .insert(main_entity);
+                            .insert(main_entity, input_uniform_index);
                    }
                    Entry::Vacant(entry) => {
                        let mut new_batch_set = HashMap::default();
-                        new_batch_set.insert(bin_key.clone(), RenderBin::from_entity(main_entity));
+                        new_batch_set.insert(
+                            bin_key.clone(),
+                            RenderBin::from_entity(main_entity, input_uniform_index),
+                        );
                        entry.insert(new_batch_set);
                    }
                }
@ -463,10 +481,10 @@ where
                    .entry((batch_set_key.clone(), bin_key.clone()).clone())
                {
                    Entry::Occupied(mut entry) => {
-                        entry.get_mut().insert(main_entity);
+                        entry.get_mut().insert(main_entity, input_uniform_index);
                    }
                    Entry::Vacant(entry) => {
-                        entry.insert(RenderBin::from_entity(main_entity));
+                        entry.insert(RenderBin::from_entity(main_entity, input_uniform_index));
                    }
                }
            }
@ -497,10 +515,10 @@ where
                    .entry((batch_set_key.clone(), bin_key.clone()).clone())
                {
                    Entry::Occupied(mut entry) => {
-                        entry.get_mut().insert(main_entity);
+                        entry.get_mut().insert(main_entity, input_uniform_index);
                    }
                    Entry::Vacant(entry) => {
-                        entry.insert(RenderBin::from_entity(main_entity));
+                        entry.insert(RenderBin::from_entity(main_entity, input_uniform_index));
                    }
                }
            }
@ -753,7 +771,7 @@ where
        let mut draw_functions = draw_functions.write();

        for ((batch_set_key, bin_key), bin) in &self.non_mesh_items {
-            for &entity in &bin.entities {
+            for &entity in bin.entities.keys() {
                // Come up with a fake batch range and extra index. The draw
                // function is expected to manage any sort of batching logic itself.
                let binned_phase_item = BPI::new(
@ -1631,15 +1649,15 @@ impl BinnedRenderPhaseType {

 impl RenderBin {
    /// Creates a [`RenderBin`] containing a single entity.
-    fn from_entity(entity: MainEntity) -> RenderBin {
-        let mut entities = IndexSet::default();
-        entities.insert(entity);
+    fn from_entity(entity: MainEntity, uniform_index: InputUniformIndex) -> RenderBin {
+        let mut entities = IndexMap::default();
+        entities.insert(entity, uniform_index);
        RenderBin { entities }
    }

    /// Inserts an entity into the bin.
-    fn insert(&mut self, entity: MainEntity) {
-        self.entities.insert(entity);
+    fn insert(&mut self, entity: MainEntity, uniform_index: InputUniformIndex) {
+        self.entities.insert(entity, uniform_index);
    }

    /// Removes an entity from the bin.
@ -1652,9 +1670,10 @@ impl RenderBin {
        self.entities.is_empty()
    }

-    /// Returns the [`IndexSet`] containing all the entities in the bin.
+    /// Returns the [`IndexMap`] containing all the entities in the bin, along
+    /// with the cached [`InputUniformIndex`] of each.
    #[inline]
-    pub fn entities(&self) -> &IndexSet<MainEntity, EntityHash> {
+    pub fn entities(&self) -> &IndexMap<MainEntity, InputUniformIndex, EntityHash> {
        &self.entities
    }
 }
--- a/crates/bevy_sprite/src/mesh2d/material.rs
+++ b/crates/bevy_sprite/src/mesh2d/material.rs
@ -22,7 +22,7 @@ use bevy_ecs::{
 use bevy_math::FloatOrd;
 use bevy_platform_support::collections::HashMap;
 use bevy_reflect::{prelude::ReflectDefault, Reflect};
-use bevy_render::render_phase::DrawFunctionId;
+use bevy_render::render_phase::{DrawFunctionId, InputUniformIndex};
 use bevy_render::render_resource::CachedRenderPipelineId;
 use bevy_render::view::RenderVisibleEntities;
 use bevy_render::{
@ -809,6 +809,7 @@ pub fn queue_material2d_meshes<M: Material2d>(
                        },
                        bin_key,
                        (*render_entity, *visible_entity),
+                        InputUniformIndex::default(),
                        binned_render_phase_type,
                        current_change_tick,
                    );
@ -826,6 +827,7 @@ pub fn queue_material2d_meshes<M: Material2d>(
                        },
                        bin_key,
                        (*render_entity, *visible_entity),
+                        InputUniformIndex::default(),
                        binned_render_phase_type,
                        current_change_tick,
                    );
--- a/crates/bevy_sprite/src/mesh2d/mesh.rs
+++ b/crates/bevy_sprite/src/mesh2d/mesh.rs
@ -1,5 +1,6 @@
 use bevy_app::Plugin;
 use bevy_asset::{load_internal_asset, weak_handle, AssetId, Handle};
+use bevy_render::render_phase::InputUniformIndex;

 use crate::{tonemapping_pipeline_key, Material2dBindGroupId};
 use bevy_core_pipeline::tonemapping::DebandDither;
@ -474,7 +475,7 @@ impl GetFullBatchData for Mesh2dPipeline {
    }

    fn write_batch_indirect_parameters_metadata(
-        input_index: u32,
+        input_index: InputUniformIndex,
        indexed: bool,
        base_output_index: u32,
        batch_set_index: Option<NonMaxU32>,
@ -485,7 +486,7 @@ impl GetFullBatchData for Mesh2dPipeline {
        // though they actually have distinct layouts. See the comment above that
        // type for more information.
        let indirect_parameters = IndirectParametersMetadata {
-            mesh_index: input_index,
+            mesh_index: *input_index,
            base_output_index,
            batch_set_index: match batch_set_index {
                None => !0,
--- a/examples/shader/custom_phase_item.rs
+++ b/examples/shader/custom_phase_item.rs
@ -19,8 +19,9 @@ use bevy::{
        extract_component::{ExtractComponent, ExtractComponentPlugin},
        primitives::Aabb,
        render_phase::{
-            AddRenderCommand, BinnedRenderPhaseType, DrawFunctions, PhaseItem, RenderCommand,
-            RenderCommandResult, SetItemPipeline, TrackedRenderPass, ViewBinnedRenderPhases,
+            AddRenderCommand, BinnedRenderPhaseType, DrawFunctions, InputUniformIndex, PhaseItem,
+            RenderCommand, RenderCommandResult, SetItemPipeline, TrackedRenderPass,
+            ViewBinnedRenderPhases,
        },
        render_resource::{
            BufferUsages, ColorTargetState, ColorWrites, CompareFunction, DepthStencilState,
@ -277,6 +278,7 @@ fn queue_custom_phase_item(
                    asset_id: AssetId::<Mesh>::invalid().untyped(),
                },
                entity,
+                InputUniformIndex::default(),
                BinnedRenderPhaseType::NonMesh,
                *next_tick,
            );
--- a/examples/shader/custom_render_phase.rs
+++ b/examples/shader/custom_render_phase.rs
@ -41,8 +41,8 @@ use bevy::{
        },
        render_phase::{
            sort_phase_system, AddRenderCommand, CachedRenderPipelinePhaseItem, DrawFunctionId,
-            DrawFunctions, PhaseItem, PhaseItemExtraIndex, SetItemPipeline, SortedPhaseItem,
-            ViewSortedRenderPhases,
+            DrawFunctions, InputUniformIndex, PhaseItem, PhaseItemExtraIndex, SetItemPipeline,
+            SortedPhaseItem, ViewSortedRenderPhases,
        },
        render_resource::{
            CachedRenderPipelineId, ColorTargetState, ColorWrites, Face, FragmentState, FrontFace,
@ -431,7 +431,7 @@ impl GetFullBatchData for StencilPipeline {
    }

    fn write_batch_indirect_parameters_metadata(
-        mesh_index: u32,
+        mesh_index: InputUniformIndex,
        indexed: bool,
        base_output_index: u32,
        batch_set_index: Option<NonMaxU32>,
@ -442,7 +442,7 @@ impl GetFullBatchData for StencilPipeline {
        // though they actually have distinct layouts. See the comment above that
        // type for more information.
        let indirect_parameters = IndirectParametersMetadata {
-            mesh_index,
+            mesh_index: *mesh_index,
            base_output_index,
            batch_set_index: match batch_set_index {
                None => !0,
--- a/examples/shader/specialized_mesh_pipeline.rs
+++ b/examples/shader/specialized_mesh_pipeline.rs
@ -427,6 +427,7 @@ fn queue_custom_mesh_pipeline(
                    asset_id: AssetId::<Mesh>::invalid().untyped(),
                },
                (render_entity, visible_entity),
+                mesh_instance.current_uniform_index,
                // This example supports batching, but if your pipeline doesn't
                // support it you can use `BinnedRenderPhaseType::UnbatchableMesh`
                BinnedRenderPhaseType::BatchableMesh,