Parallelize prepare_assets::<T> systems (#17914)
# Objective Because `prepare_assets::<T>` had a mutable reference to the `RenderAssetBytesPerFrame` resource, no render asset preparation could happen in parallel. This PR fixes this by using an `AtomicUsize` to count bytes written (if there's a limit in place), so that the system doesn't need mutable access. - Related: https://github.com/bevyengine/bevy/pull/12622 **Before** <img width="1049" alt="Screenshot 2025-02-17 at 11 40 53 AM" src="https://github.com/user-attachments/assets/040e6184-1192-4368-9597-5ceda4b8251b" /> **After** <img width="836" alt="Screenshot 2025-02-17 at 1 38 37 PM" src="https://github.com/user-attachments/assets/95488796-3323-425c-b0a6-4cf17753512e" /> ## Testing - Tested on a local project (with and without limiting enabled) - Someone with more knowledge of wgpu/underlying driver guts should confirm that this doesn't actually bite us by introducing contention (i.e. if buffer writing really *should be* serial).
This commit is contained in:
parent
2a2e0a8555
commit
dacb77d745
@ -628,7 +628,10 @@ pub fn prepare_volumetric_fog_pipelines(
|
|||||||
>,
|
>,
|
||||||
meshes: Res<RenderAssets<RenderMesh>>,
|
meshes: Res<RenderAssets<RenderMesh>>,
|
||||||
) {
|
) {
|
||||||
let plane_mesh = meshes.get(&PLANE_MESH).expect("Plane mesh not found!");
|
let Some(plane_mesh) = meshes.get(&PLANE_MESH) else {
|
||||||
|
// There's an off chance that the mesh won't be prepared yet if `RenderAssetBytesPerFrame` limiting is in use.
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
for (
|
for (
|
||||||
entity,
|
entity,
|
||||||
|
@ -78,9 +78,11 @@ pub use extract_param::Extract;
|
|||||||
|
|
||||||
use bevy_window::{PrimaryWindow, RawHandleWrapperHolder};
|
use bevy_window::{PrimaryWindow, RawHandleWrapperHolder};
|
||||||
use experimental::occlusion_culling::OcclusionCullingPlugin;
|
use experimental::occlusion_culling::OcclusionCullingPlugin;
|
||||||
use extract_resource::ExtractResourcePlugin;
|
|
||||||
use globals::GlobalsPlugin;
|
use globals::GlobalsPlugin;
|
||||||
use render_asset::RenderAssetBytesPerFrame;
|
use render_asset::{
|
||||||
|
extract_render_asset_bytes_per_frame, reset_render_asset_bytes_per_frame,
|
||||||
|
RenderAssetBytesPerFrame, RenderAssetBytesPerFrameLimiter,
|
||||||
|
};
|
||||||
use renderer::{RenderAdapter, RenderDevice, RenderQueue};
|
use renderer::{RenderAdapter, RenderDevice, RenderQueue};
|
||||||
use settings::RenderResources;
|
use settings::RenderResources;
|
||||||
use sync_world::{
|
use sync_world::{
|
||||||
@ -408,8 +410,16 @@ impl Plugin for RenderPlugin {
|
|||||||
OcclusionCullingPlugin,
|
OcclusionCullingPlugin,
|
||||||
));
|
));
|
||||||
|
|
||||||
app.init_resource::<RenderAssetBytesPerFrame>()
|
app.init_resource::<RenderAssetBytesPerFrame>();
|
||||||
.add_plugins(ExtractResourcePlugin::<RenderAssetBytesPerFrame>::default());
|
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
|
||||||
|
render_app.init_resource::<RenderAssetBytesPerFrameLimiter>();
|
||||||
|
render_app
|
||||||
|
.add_systems(ExtractSchedule, extract_render_asset_bytes_per_frame)
|
||||||
|
.add_systems(
|
||||||
|
Render,
|
||||||
|
reset_render_asset_bytes_per_frame.in_set(RenderSet::Cleanup),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
app.register_type::<alpha::AlphaMode>()
|
app.register_type::<alpha::AlphaMode>()
|
||||||
// These types cannot be registered in bevy_color, as it does not depend on the rest of Bevy
|
// These types cannot be registered in bevy_color, as it does not depend on the rest of Bevy
|
||||||
@ -465,14 +475,7 @@ impl Plugin for RenderPlugin {
|
|||||||
.insert_resource(device)
|
.insert_resource(device)
|
||||||
.insert_resource(queue)
|
.insert_resource(queue)
|
||||||
.insert_resource(render_adapter)
|
.insert_resource(render_adapter)
|
||||||
.insert_resource(adapter_info)
|
.insert_resource(adapter_info);
|
||||||
.add_systems(
|
|
||||||
Render,
|
|
||||||
(|mut bpf: ResMut<RenderAssetBytesPerFrame>| {
|
|
||||||
bpf.reset();
|
|
||||||
})
|
|
||||||
.in_set(RenderSet::Cleanup),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,18 +1,19 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
render_resource::AsBindGroupError, ExtractSchedule, MainWorld, Render, RenderApp, RenderSet,
|
render_resource::AsBindGroupError, Extract, ExtractSchedule, MainWorld, Render, RenderApp,
|
||||||
|
RenderSet,
|
||||||
};
|
};
|
||||||
use bevy_app::{App, Plugin, SubApp};
|
use bevy_app::{App, Plugin, SubApp};
|
||||||
pub use bevy_asset::RenderAssetUsages;
|
pub use bevy_asset::RenderAssetUsages;
|
||||||
use bevy_asset::{Asset, AssetEvent, AssetId, Assets};
|
use bevy_asset::{Asset, AssetEvent, AssetId, Assets};
|
||||||
use bevy_ecs::{
|
use bevy_ecs::{
|
||||||
prelude::{Commands, EventReader, IntoSystemConfigs, ResMut, Resource},
|
prelude::{Commands, EventReader, IntoSystemConfigs, Res, ResMut, Resource},
|
||||||
schedule::{SystemConfigs, SystemSet},
|
schedule::{SystemConfigs, SystemSet},
|
||||||
system::{StaticSystemParam, SystemParam, SystemParamItem, SystemState},
|
system::{StaticSystemParam, SystemParam, SystemParamItem, SystemState},
|
||||||
world::{FromWorld, Mut},
|
world::{FromWorld, Mut},
|
||||||
};
|
};
|
||||||
use bevy_platform_support::collections::{HashMap, HashSet};
|
use bevy_platform_support::collections::{HashMap, HashSet};
|
||||||
use bevy_render_macros::ExtractResource;
|
|
||||||
use core::marker::PhantomData;
|
use core::marker::PhantomData;
|
||||||
|
use core::sync::atomic::{AtomicUsize, Ordering};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tracing::{debug, error};
|
use tracing::{debug, error};
|
||||||
|
|
||||||
@ -308,7 +309,7 @@ pub fn prepare_assets<A: RenderAsset>(
|
|||||||
mut render_assets: ResMut<RenderAssets<A>>,
|
mut render_assets: ResMut<RenderAssets<A>>,
|
||||||
mut prepare_next_frame: ResMut<PrepareNextFrameAssets<A>>,
|
mut prepare_next_frame: ResMut<PrepareNextFrameAssets<A>>,
|
||||||
param: StaticSystemParam<<A as RenderAsset>::Param>,
|
param: StaticSystemParam<<A as RenderAsset>::Param>,
|
||||||
mut bpf: ResMut<RenderAssetBytesPerFrame>,
|
bpf: Res<RenderAssetBytesPerFrameLimiter>,
|
||||||
) {
|
) {
|
||||||
let mut wrote_asset_count = 0;
|
let mut wrote_asset_count = 0;
|
||||||
|
|
||||||
@ -401,54 +402,94 @@ pub fn prepare_assets<A: RenderAsset>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A resource that attempts to limit the amount of data transferred from cpu to gpu
|
pub fn reset_render_asset_bytes_per_frame(
|
||||||
/// each frame, preventing choppy frames at the cost of waiting longer for gpu assets
|
mut bpf_limiter: ResMut<RenderAssetBytesPerFrameLimiter>,
|
||||||
/// to become available
|
) {
|
||||||
#[derive(Resource, Default, Debug, Clone, Copy, ExtractResource)]
|
bpf_limiter.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extract_render_asset_bytes_per_frame(
|
||||||
|
bpf: Extract<Res<RenderAssetBytesPerFrame>>,
|
||||||
|
mut bpf_limiter: ResMut<RenderAssetBytesPerFrameLimiter>,
|
||||||
|
) {
|
||||||
|
bpf_limiter.max_bytes = bpf.max_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A resource that defines the amount of data allowed to be transferred from CPU to GPU
|
||||||
|
/// each frame, preventing choppy frames at the cost of waiting longer for GPU assets
|
||||||
|
/// to become available.
|
||||||
|
#[derive(Resource, Default)]
|
||||||
pub struct RenderAssetBytesPerFrame {
|
pub struct RenderAssetBytesPerFrame {
|
||||||
pub max_bytes: Option<usize>,
|
pub max_bytes: Option<usize>,
|
||||||
pub available: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RenderAssetBytesPerFrame {
|
impl RenderAssetBytesPerFrame {
|
||||||
/// `max_bytes`: the number of bytes to write per frame.
|
/// `max_bytes`: the number of bytes to write per frame.
|
||||||
/// this is a soft limit: only full assets are written currently, uploading stops
|
///
|
||||||
|
/// This is a soft limit: only full assets are written currently, uploading stops
|
||||||
/// after the first asset that exceeds the limit.
|
/// after the first asset that exceeds the limit.
|
||||||
|
///
|
||||||
/// To participate, assets should implement [`RenderAsset::byte_len`]. If the default
|
/// To participate, assets should implement [`RenderAsset::byte_len`]. If the default
|
||||||
/// is not overridden, the assets are assumed to be small enough to upload without restriction.
|
/// is not overridden, the assets are assumed to be small enough to upload without restriction.
|
||||||
pub fn new(max_bytes: usize) -> Self {
|
pub fn new(max_bytes: usize) -> Self {
|
||||||
Self {
|
Self {
|
||||||
max_bytes: Some(max_bytes),
|
max_bytes: Some(max_bytes),
|
||||||
available: 0,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Reset the available bytes. Called once per frame by the [`crate::RenderPlugin`].
|
/// A render-world resource that facilitates limiting the data transferred from CPU to GPU
|
||||||
|
/// each frame, preventing choppy frames at the cost of waiting longer for GPU assets
|
||||||
|
/// to become available.
|
||||||
|
#[derive(Resource, Default)]
|
||||||
|
pub struct RenderAssetBytesPerFrameLimiter {
|
||||||
|
/// Populated by [`RenderAssetBytesPerFrame`] during extraction.
|
||||||
|
pub max_bytes: Option<usize>,
|
||||||
|
/// Bytes written this frame.
|
||||||
|
pub bytes_written: AtomicUsize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RenderAssetBytesPerFrameLimiter {
|
||||||
|
/// Reset the available bytes. Called once per frame during extraction by [`crate::RenderPlugin`].
|
||||||
pub fn reset(&mut self) {
|
pub fn reset(&mut self) {
|
||||||
self.available = self.max_bytes.unwrap_or(usize::MAX);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// check how many bytes are available since the last reset
|
|
||||||
pub fn available_bytes(&self, required_bytes: usize) -> usize {
|
|
||||||
if self.max_bytes.is_none() {
|
|
||||||
return required_bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
required_bytes.min(self.available)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// decrease the available bytes for the current frame
|
|
||||||
fn write_bytes(&mut self, bytes: usize) {
|
|
||||||
if self.max_bytes.is_none() {
|
if self.max_bytes.is_none() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
self.bytes_written.store(0, Ordering::Relaxed);
|
||||||
let write_bytes = bytes.min(self.available);
|
|
||||||
self.available -= write_bytes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if any bytes remain available for writing this frame
|
/// Check how many bytes are available for writing.
|
||||||
|
pub fn available_bytes(&self, required_bytes: usize) -> usize {
|
||||||
|
if let Some(max_bytes) = self.max_bytes {
|
||||||
|
let total_bytes = self
|
||||||
|
.bytes_written
|
||||||
|
.fetch_add(required_bytes, Ordering::Relaxed);
|
||||||
|
|
||||||
|
// The bytes available is the inverse of the amount we overshot max_bytes
|
||||||
|
if total_bytes >= max_bytes {
|
||||||
|
required_bytes.saturating_sub(total_bytes - max_bytes)
|
||||||
|
} else {
|
||||||
|
required_bytes
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
required_bytes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decreases the available bytes for the current frame.
|
||||||
|
fn write_bytes(&self, bytes: usize) {
|
||||||
|
if self.max_bytes.is_some() && bytes > 0 {
|
||||||
|
self.bytes_written.fetch_add(bytes, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if there are no remaining bytes available for writing this frame.
|
||||||
fn exhausted(&self) -> bool {
|
fn exhausted(&self) -> bool {
|
||||||
self.max_bytes.is_some() && self.available == 0
|
if let Some(max_bytes) = self.max_bytes {
|
||||||
|
let bytes_written = self.bytes_written.load(Ordering::Relaxed);
|
||||||
|
bytes_written >= max_bytes
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user