Parallelize prepare_assets::<T> systems (#17914)

# Objective

Because `prepare_assets::<T>` had a mutable reference to the
`RenderAssetBytesPerFrame` resource, no render asset preparation could
happen in parallel. This PR fixes this by using an `AtomicUsize` to
count bytes written (if there's a limit in place), so that the system
doesn't need mutable access.

- Related: https://github.com/bevyengine/bevy/pull/12622

**Before**
<img width="1049" alt="Screenshot 2025-02-17 at 11 40 53 AM"
src="https://github.com/user-attachments/assets/040e6184-1192-4368-9597-5ceda4b8251b"
/>

**After**
<img width="836" alt="Screenshot 2025-02-17 at 1 38 37 PM"
src="https://github.com/user-attachments/assets/95488796-3323-425c-b0a6-4cf17753512e"
/>

## Testing

- Tested on a local project (with and without limiting enabled)
- Someone with more knowledge of wgpu/underlying driver guts should
confirm that this doesn't actually bite us by introducing contention
(i.e. if buffer writing really *should be* serial).
This commit is contained in:
Brian Reavis 2025-03-09 22:01:12 -07:00 committed by GitHub
parent 2a2e0a8555
commit dacb77d745
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 91 additions and 44 deletions

View File

@ -628,7 +628,10 @@ pub fn prepare_volumetric_fog_pipelines(
>, >,
meshes: Res<RenderAssets<RenderMesh>>, meshes: Res<RenderAssets<RenderMesh>>,
) { ) {
let plane_mesh = meshes.get(&PLANE_MESH).expect("Plane mesh not found!"); let Some(plane_mesh) = meshes.get(&PLANE_MESH) else {
// There's an off chance that the mesh won't be prepared yet if `RenderAssetBytesPerFrame` limiting is in use.
return;
};
for ( for (
entity, entity,

View File

@ -78,9 +78,11 @@ pub use extract_param::Extract;
use bevy_window::{PrimaryWindow, RawHandleWrapperHolder}; use bevy_window::{PrimaryWindow, RawHandleWrapperHolder};
use experimental::occlusion_culling::OcclusionCullingPlugin; use experimental::occlusion_culling::OcclusionCullingPlugin;
use extract_resource::ExtractResourcePlugin;
use globals::GlobalsPlugin; use globals::GlobalsPlugin;
use render_asset::RenderAssetBytesPerFrame; use render_asset::{
extract_render_asset_bytes_per_frame, reset_render_asset_bytes_per_frame,
RenderAssetBytesPerFrame, RenderAssetBytesPerFrameLimiter,
};
use renderer::{RenderAdapter, RenderDevice, RenderQueue}; use renderer::{RenderAdapter, RenderDevice, RenderQueue};
use settings::RenderResources; use settings::RenderResources;
use sync_world::{ use sync_world::{
@ -408,8 +410,16 @@ impl Plugin for RenderPlugin {
OcclusionCullingPlugin, OcclusionCullingPlugin,
)); ));
app.init_resource::<RenderAssetBytesPerFrame>() app.init_resource::<RenderAssetBytesPerFrame>();
.add_plugins(ExtractResourcePlugin::<RenderAssetBytesPerFrame>::default()); if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app.init_resource::<RenderAssetBytesPerFrameLimiter>();
render_app
.add_systems(ExtractSchedule, extract_render_asset_bytes_per_frame)
.add_systems(
Render,
reset_render_asset_bytes_per_frame.in_set(RenderSet::Cleanup),
);
}
app.register_type::<alpha::AlphaMode>() app.register_type::<alpha::AlphaMode>()
// These types cannot be registered in bevy_color, as it does not depend on the rest of Bevy // These types cannot be registered in bevy_color, as it does not depend on the rest of Bevy
@ -465,14 +475,7 @@ impl Plugin for RenderPlugin {
.insert_resource(device) .insert_resource(device)
.insert_resource(queue) .insert_resource(queue)
.insert_resource(render_adapter) .insert_resource(render_adapter)
.insert_resource(adapter_info) .insert_resource(adapter_info);
.add_systems(
Render,
(|mut bpf: ResMut<RenderAssetBytesPerFrame>| {
bpf.reset();
})
.in_set(RenderSet::Cleanup),
);
} }
} }
} }

View File

@ -1,18 +1,19 @@
use crate::{ use crate::{
render_resource::AsBindGroupError, ExtractSchedule, MainWorld, Render, RenderApp, RenderSet, render_resource::AsBindGroupError, Extract, ExtractSchedule, MainWorld, Render, RenderApp,
RenderSet,
}; };
use bevy_app::{App, Plugin, SubApp}; use bevy_app::{App, Plugin, SubApp};
pub use bevy_asset::RenderAssetUsages; pub use bevy_asset::RenderAssetUsages;
use bevy_asset::{Asset, AssetEvent, AssetId, Assets}; use bevy_asset::{Asset, AssetEvent, AssetId, Assets};
use bevy_ecs::{ use bevy_ecs::{
prelude::{Commands, EventReader, IntoSystemConfigs, ResMut, Resource}, prelude::{Commands, EventReader, IntoSystemConfigs, Res, ResMut, Resource},
schedule::{SystemConfigs, SystemSet}, schedule::{SystemConfigs, SystemSet},
system::{StaticSystemParam, SystemParam, SystemParamItem, SystemState}, system::{StaticSystemParam, SystemParam, SystemParamItem, SystemState},
world::{FromWorld, Mut}, world::{FromWorld, Mut},
}; };
use bevy_platform_support::collections::{HashMap, HashSet}; use bevy_platform_support::collections::{HashMap, HashSet};
use bevy_render_macros::ExtractResource;
use core::marker::PhantomData; use core::marker::PhantomData;
use core::sync::atomic::{AtomicUsize, Ordering};
use thiserror::Error; use thiserror::Error;
use tracing::{debug, error}; use tracing::{debug, error};
@ -308,7 +309,7 @@ pub fn prepare_assets<A: RenderAsset>(
mut render_assets: ResMut<RenderAssets<A>>, mut render_assets: ResMut<RenderAssets<A>>,
mut prepare_next_frame: ResMut<PrepareNextFrameAssets<A>>, mut prepare_next_frame: ResMut<PrepareNextFrameAssets<A>>,
param: StaticSystemParam<<A as RenderAsset>::Param>, param: StaticSystemParam<<A as RenderAsset>::Param>,
mut bpf: ResMut<RenderAssetBytesPerFrame>, bpf: Res<RenderAssetBytesPerFrameLimiter>,
) { ) {
let mut wrote_asset_count = 0; let mut wrote_asset_count = 0;
@ -401,54 +402,94 @@ pub fn prepare_assets<A: RenderAsset>(
} }
} }
/// A resource that attempts to limit the amount of data transferred from cpu to gpu pub fn reset_render_asset_bytes_per_frame(
/// each frame, preventing choppy frames at the cost of waiting longer for gpu assets mut bpf_limiter: ResMut<RenderAssetBytesPerFrameLimiter>,
/// to become available ) {
#[derive(Resource, Default, Debug, Clone, Copy, ExtractResource)] bpf_limiter.reset();
}
pub fn extract_render_asset_bytes_per_frame(
bpf: Extract<Res<RenderAssetBytesPerFrame>>,
mut bpf_limiter: ResMut<RenderAssetBytesPerFrameLimiter>,
) {
bpf_limiter.max_bytes = bpf.max_bytes;
}
/// A resource that defines the amount of data allowed to be transferred from CPU to GPU
/// each frame, preventing choppy frames at the cost of waiting longer for GPU assets
/// to become available.
#[derive(Resource, Default)]
pub struct RenderAssetBytesPerFrame { pub struct RenderAssetBytesPerFrame {
pub max_bytes: Option<usize>, pub max_bytes: Option<usize>,
pub available: usize,
} }
impl RenderAssetBytesPerFrame { impl RenderAssetBytesPerFrame {
/// `max_bytes`: the number of bytes to write per frame. /// `max_bytes`: the number of bytes to write per frame.
/// this is a soft limit: only full assets are written currently, uploading stops ///
/// This is a soft limit: only full assets are written currently, uploading stops
/// after the first asset that exceeds the limit. /// after the first asset that exceeds the limit.
///
/// To participate, assets should implement [`RenderAsset::byte_len`]. If the default /// To participate, assets should implement [`RenderAsset::byte_len`]. If the default
/// is not overridden, the assets are assumed to be small enough to upload without restriction. /// is not overridden, the assets are assumed to be small enough to upload without restriction.
pub fn new(max_bytes: usize) -> Self { pub fn new(max_bytes: usize) -> Self {
Self { Self {
max_bytes: Some(max_bytes), max_bytes: Some(max_bytes),
available: 0,
} }
} }
}
/// Reset the available bytes. Called once per frame by the [`crate::RenderPlugin`]. /// A render-world resource that facilitates limiting the data transferred from CPU to GPU
/// each frame, preventing choppy frames at the cost of waiting longer for GPU assets
/// to become available.
#[derive(Resource, Default)]
pub struct RenderAssetBytesPerFrameLimiter {
/// Populated by [`RenderAssetBytesPerFrame`] during extraction.
pub max_bytes: Option<usize>,
/// Bytes written this frame.
pub bytes_written: AtomicUsize,
}
impl RenderAssetBytesPerFrameLimiter {
/// Reset the available bytes. Called once per frame during extraction by [`crate::RenderPlugin`].
pub fn reset(&mut self) { pub fn reset(&mut self) {
self.available = self.max_bytes.unwrap_or(usize::MAX);
}
/// check how many bytes are available since the last reset
pub fn available_bytes(&self, required_bytes: usize) -> usize {
if self.max_bytes.is_none() {
return required_bytes;
}
required_bytes.min(self.available)
}
/// decrease the available bytes for the current frame
fn write_bytes(&mut self, bytes: usize) {
if self.max_bytes.is_none() { if self.max_bytes.is_none() {
return; return;
} }
self.bytes_written.store(0, Ordering::Relaxed);
let write_bytes = bytes.min(self.available);
self.available -= write_bytes;
} }
// check if any bytes remain available for writing this frame /// Check how many bytes are available for writing.
pub fn available_bytes(&self, required_bytes: usize) -> usize {
if let Some(max_bytes) = self.max_bytes {
let total_bytes = self
.bytes_written
.fetch_add(required_bytes, Ordering::Relaxed);
// The bytes available is the inverse of the amount we overshot max_bytes
if total_bytes >= max_bytes {
required_bytes.saturating_sub(total_bytes - max_bytes)
} else {
required_bytes
}
} else {
required_bytes
}
}
/// Decreases the available bytes for the current frame.
fn write_bytes(&self, bytes: usize) {
if self.max_bytes.is_some() && bytes > 0 {
self.bytes_written.fetch_add(bytes, Ordering::Relaxed);
}
}
/// Returns `true` if there are no remaining bytes available for writing this frame.
fn exhausted(&self) -> bool { fn exhausted(&self) -> bool {
self.max_bytes.is_some() && self.available == 0 if let Some(max_bytes) = self.max_bytes {
let bytes_written = self.bytes_written.load(Ordering::Relaxed);
bytes_written >= max_bytes
} else {
false
}
} }
} }