Replace BufferVec<PreprocessWorkItem> with RawBufferVec<PreprocessWorkItem>. (#17862)

Appending to these vectors is performance-critical in `batch_and_prepare_binned_render_phase`, so `RawBufferVec`, which doesn't have the overhead of `encase`, is more appropriate.
2025-02-16 11:59:29 -08:00 · 2025-02-16 11:59:29 -08:00 · 137878ac35
commit 137878ac35
parent 7801ed315f
2 changed files with 17 additions and 17 deletions
--- a/crates/bevy_pbr/src/render/gpu_preprocess.rs
+++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs
@ -43,7 +43,7 @@ use bevy_render::{
    render_resource::{
        binding_types::{storage_buffer, storage_buffer_read_only, texture_2d, uniform_buffer},
        BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, Buffer, BufferBinding,
-        BufferVec, CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor,
+        CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor,
        DynamicBindGroupLayoutEntries, PipelineCache, PushConstantRange, RawBufferVec, Shader,
        ShaderStages, ShaderType, SpecializedComputePipeline, SpecializedComputePipelines,
        TextureSampleType, UninitBufferVec,
@ -1842,7 +1842,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
    /// and GPU occlusion culling are both disabled.
    fn create_direct_preprocess_bind_groups(
        &self,
-        work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
    ) -> Option<PhasePreprocessBindGroups> {
        // Don't use `as_entire_binding()` here; the shader reads the array
        // length and the underlying buffer may be longer than the actual size
@ -1878,8 +1878,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
    fn create_indirect_occlusion_culling_preprocess_bind_groups(
        &self,
        view_depth_pyramids: &Query<(&ViewDepthPyramid, &PreviousViewUniformOffset)>,
-        indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
-        non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
+        non_indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
        gpu_occlusion_culling_work_item_buffers: &GpuOcclusionCullingWorkItemBuffers,
    ) -> Option<PhasePreprocessBindGroups> {
        let GpuOcclusionCullingWorkItemBuffers {
@ -1926,7 +1926,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
        &self,
        view_depth_pyramid: &ViewDepthPyramid,
        previous_view_uniform_offset: &PreviousViewUniformOffset,
-        indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
        late_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>,
    ) -> Option<BindGroup> {
        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
@ -2018,7 +2018,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
        &self,
        view_depth_pyramid: &ViewDepthPyramid,
        previous_view_uniform_offset: &PreviousViewUniformOffset,
-        non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        non_indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
        late_non_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>,
    ) -> Option<BindGroup> {
        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
@ -2270,8 +2270,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
    /// is enabled, but GPU occlusion culling is disabled.
    fn create_indirect_frustum_culling_preprocess_bind_groups(
        &self,
-        indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
-        non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
+        non_indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
    ) -> Option<PhasePreprocessBindGroups> {
        Some(PhasePreprocessBindGroups::IndirectFrustumCulling {
            indexed: self
@ -2286,7 +2286,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
    /// frustum culling is enabled, but GPU occlusion culling is disabled.
    fn create_indirect_frustum_culling_indexed_bind_group(
        &self,
-        indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
    ) -> Option<BindGroup> {
        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
        let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
@ -2340,7 +2340,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
    /// GPU frustum culling is enabled, but GPU occlusion culling is disabled.
    fn create_indirect_frustum_culling_non_indexed_bind_group(
        &self,
-        non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>,
+        non_indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
    ) -> Option<BindGroup> {
        let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
        let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
--- a/crates/bevy_render/src/batching/gpu_preprocessing.rs
+++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs
@ -29,7 +29,7 @@ use crate::{
        PhaseItemBatchSetKey as _, PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase,
        UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, ViewSortedRenderPhases,
    },
-    render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
+    render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
    renderer::{RenderAdapter, RenderDevice, RenderQueue},
    view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},
    Render, RenderApp, RenderDebugFlags, RenderSet,
@ -388,7 +388,7 @@ pub enum PreprocessWorkItemBuffers {
    ///
    /// Because we don't have to separate indexed from non-indexed meshes in
    /// direct mode, we only have a single buffer here.
-    Direct(BufferVec<PreprocessWorkItem>),
+    Direct(RawBufferVec<PreprocessWorkItem>),

    /// The buffer of work items we use if we are using indirect drawing.
    ///
@ -397,9 +397,9 @@ pub enum PreprocessWorkItemBuffers {
    /// different sizes.
    Indirect {
        /// The buffer of work items corresponding to indexed meshes.
-        indexed: BufferVec<PreprocessWorkItem>,
+        indexed: RawBufferVec<PreprocessWorkItem>,
        /// The buffer of work items corresponding to non-indexed meshes.
-        non_indexed: BufferVec<PreprocessWorkItem>,
+        non_indexed: RawBufferVec<PreprocessWorkItem>,
        /// The work item buffers we use when GPU occlusion culling is in use.
        gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,
    },
@ -482,13 +482,13 @@ where
        Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
        Entry::Vacant(vacant_entry) => {
            if no_indirect_drawing {
-                vacant_entry.insert(PreprocessWorkItemBuffers::Direct(BufferVec::new(
+                vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new(
                    BufferUsages::STORAGE,
                )))
            } else {
                vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {
-                    indexed: BufferVec::new(BufferUsages::STORAGE),
-                    non_indexed: BufferVec::new(BufferUsages::STORAGE),
+                    indexed: RawBufferVec::new(BufferUsages::STORAGE),
+                    non_indexed: RawBufferVec::new(BufferUsages::STORAGE),
                    // We fill this in below if `enable_gpu_occlusion_culling`
                    // is set.
                    gpu_occlusion_culling: None,