Replace BufferVec<PreprocessWorkItem> with RawBufferVec<PreprocessWorkItem>. (#17862)

Appending to these vectors is performance-critical in
`batch_and_prepare_binned_render_phase`, so `RawBufferVec`, which
doesn't have the overhead of `encase`, is more appropriate.
This commit is contained in:
Patrick Walton 2025-02-16 11:59:29 -08:00 committed by GitHub
parent 7801ed315f
commit 137878ac35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 17 deletions

View File

@ -43,7 +43,7 @@ use bevy_render::{
render_resource::{ render_resource::{
binding_types::{storage_buffer, storage_buffer_read_only, texture_2d, uniform_buffer}, binding_types::{storage_buffer, storage_buffer_read_only, texture_2d, uniform_buffer},
BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, Buffer, BufferBinding, BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, Buffer, BufferBinding,
BufferVec, CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor, CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor,
DynamicBindGroupLayoutEntries, PipelineCache, PushConstantRange, RawBufferVec, Shader, DynamicBindGroupLayoutEntries, PipelineCache, PushConstantRange, RawBufferVec, Shader,
ShaderStages, ShaderType, SpecializedComputePipeline, SpecializedComputePipelines, ShaderStages, ShaderType, SpecializedComputePipeline, SpecializedComputePipelines,
TextureSampleType, UninitBufferVec, TextureSampleType, UninitBufferVec,
@ -1842,7 +1842,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
/// and GPU occlusion culling are both disabled. /// and GPU occlusion culling are both disabled.
fn create_direct_preprocess_bind_groups( fn create_direct_preprocess_bind_groups(
&self, &self,
work_item_buffer: &BufferVec<PreprocessWorkItem>, work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
) -> Option<PhasePreprocessBindGroups> { ) -> Option<PhasePreprocessBindGroups> {
// Don't use `as_entire_binding()` here; the shader reads the array // Don't use `as_entire_binding()` here; the shader reads the array
// length and the underlying buffer may be longer than the actual size // length and the underlying buffer may be longer than the actual size
@ -1878,8 +1878,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
fn create_indirect_occlusion_culling_preprocess_bind_groups( fn create_indirect_occlusion_culling_preprocess_bind_groups(
&self, &self,
view_depth_pyramids: &Query<(&ViewDepthPyramid, &PreviousViewUniformOffset)>, view_depth_pyramids: &Query<(&ViewDepthPyramid, &PreviousViewUniformOffset)>,
indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>, indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>, non_indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
gpu_occlusion_culling_work_item_buffers: &GpuOcclusionCullingWorkItemBuffers, gpu_occlusion_culling_work_item_buffers: &GpuOcclusionCullingWorkItemBuffers,
) -> Option<PhasePreprocessBindGroups> { ) -> Option<PhasePreprocessBindGroups> {
let GpuOcclusionCullingWorkItemBuffers { let GpuOcclusionCullingWorkItemBuffers {
@ -1926,7 +1926,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
&self, &self,
view_depth_pyramid: &ViewDepthPyramid, view_depth_pyramid: &ViewDepthPyramid,
previous_view_uniform_offset: &PreviousViewUniformOffset, previous_view_uniform_offset: &PreviousViewUniformOffset,
indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>, indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
late_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>, late_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>,
) -> Option<BindGroup> { ) -> Option<BindGroup> {
let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
@ -2018,7 +2018,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
&self, &self,
view_depth_pyramid: &ViewDepthPyramid, view_depth_pyramid: &ViewDepthPyramid,
previous_view_uniform_offset: &PreviousViewUniformOffset, previous_view_uniform_offset: &PreviousViewUniformOffset,
non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>, non_indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
late_non_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>, late_non_indexed_work_item_buffer: &UninitBufferVec<PreprocessWorkItem>,
) -> Option<BindGroup> { ) -> Option<BindGroup> {
let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
@ -2270,8 +2270,8 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
/// is enabled, but GPU occlusion culling is disabled. /// is enabled, but GPU occlusion culling is disabled.
fn create_indirect_frustum_culling_preprocess_bind_groups( fn create_indirect_frustum_culling_preprocess_bind_groups(
&self, &self,
indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>, indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>, non_indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
) -> Option<PhasePreprocessBindGroups> { ) -> Option<PhasePreprocessBindGroups> {
Some(PhasePreprocessBindGroups::IndirectFrustumCulling { Some(PhasePreprocessBindGroups::IndirectFrustumCulling {
indexed: self indexed: self
@ -2286,7 +2286,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
/// frustum culling is enabled, but GPU occlusion culling is disabled. /// frustum culling is enabled, but GPU occlusion culling is disabled.
fn create_indirect_frustum_culling_indexed_bind_group( fn create_indirect_frustum_culling_indexed_bind_group(
&self, &self,
indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>, indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
) -> Option<BindGroup> { ) -> Option<BindGroup> {
let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;
@ -2340,7 +2340,7 @@ impl<'a> PreprocessBindGroupBuilder<'a> {
/// GPU frustum culling is enabled, but GPU occlusion culling is disabled. /// GPU frustum culling is enabled, but GPU occlusion culling is disabled.
fn create_indirect_frustum_culling_non_indexed_bind_group( fn create_indirect_frustum_culling_non_indexed_bind_group(
&self, &self,
non_indexed_work_item_buffer: &BufferVec<PreprocessWorkItem>, non_indexed_work_item_buffer: &RawBufferVec<PreprocessWorkItem>,
) -> Option<BindGroup> { ) -> Option<BindGroup> {
let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?; let mesh_culling_data_buffer = self.mesh_culling_data_buffer.buffer()?;
let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; let view_uniforms_binding = self.view_uniforms.uniforms.binding()?;

View File

@ -29,7 +29,7 @@ use crate::{
PhaseItemBatchSetKey as _, PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, PhaseItemBatchSetKey as _, PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase,
UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, ViewSortedRenderPhases, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, ViewSortedRenderPhases,
}, },
render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec}, render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
renderer::{RenderAdapter, RenderDevice, RenderQueue}, renderer::{RenderAdapter, RenderDevice, RenderQueue},
view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity}, view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},
Render, RenderApp, RenderDebugFlags, RenderSet, Render, RenderApp, RenderDebugFlags, RenderSet,
@ -388,7 +388,7 @@ pub enum PreprocessWorkItemBuffers {
/// ///
/// Because we don't have to separate indexed from non-indexed meshes in /// Because we don't have to separate indexed from non-indexed meshes in
/// direct mode, we only have a single buffer here. /// direct mode, we only have a single buffer here.
Direct(BufferVec<PreprocessWorkItem>), Direct(RawBufferVec<PreprocessWorkItem>),
/// The buffer of work items we use if we are using indirect drawing. /// The buffer of work items we use if we are using indirect drawing.
/// ///
@ -397,9 +397,9 @@ pub enum PreprocessWorkItemBuffers {
/// different sizes. /// different sizes.
Indirect { Indirect {
/// The buffer of work items corresponding to indexed meshes. /// The buffer of work items corresponding to indexed meshes.
indexed: BufferVec<PreprocessWorkItem>, indexed: RawBufferVec<PreprocessWorkItem>,
/// The buffer of work items corresponding to non-indexed meshes. /// The buffer of work items corresponding to non-indexed meshes.
non_indexed: BufferVec<PreprocessWorkItem>, non_indexed: RawBufferVec<PreprocessWorkItem>,
/// The work item buffers we use when GPU occlusion culling is in use. /// The work item buffers we use when GPU occlusion culling is in use.
gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>, gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,
}, },
@ -482,13 +482,13 @@ where
Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
Entry::Vacant(vacant_entry) => { Entry::Vacant(vacant_entry) => {
if no_indirect_drawing { if no_indirect_drawing {
vacant_entry.insert(PreprocessWorkItemBuffers::Direct(BufferVec::new( vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new(
BufferUsages::STORAGE, BufferUsages::STORAGE,
))) )))
} else { } else {
vacant_entry.insert(PreprocessWorkItemBuffers::Indirect { vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {
indexed: BufferVec::new(BufferUsages::STORAGE), indexed: RawBufferVec::new(BufferUsages::STORAGE),
non_indexed: BufferVec::new(BufferUsages::STORAGE), non_indexed: RawBufferVec::new(BufferUsages::STORAGE),
// We fill this in below if `enable_gpu_occlusion_culling` // We fill this in below if `enable_gpu_occlusion_culling`
// is set. // is set.
gpu_occlusion_culling: None, gpu_occlusion_culling: None,