diff --git a/crates/bevy_core_pipeline/src/bloom/mod.rs b/crates/bevy_core_pipeline/src/bloom/mod.rs index 55d5a70719..382e58a486 100644 --- a/crates/bevy_core_pipeline/src/bloom/mod.rs +++ b/crates/bevy_core_pipeline/src/bloom/mod.rs @@ -15,6 +15,7 @@ use bevy_ecs::{prelude::*, query::QueryItem}; use bevy_math::UVec2; use bevy_render::{ camera::ExtractedCamera, + diagnostic::RecordDiagnostics, extract_component::{ ComponentUniforms, DynamicUniformIndex, ExtractComponentPlugin, UniformComponentPlugin, }, @@ -156,6 +157,9 @@ impl ViewNode for BloomNode { render_context.command_encoder().push_debug_group("bloom"); + let diagnostics = render_context.diagnostic_recorder(); + let time_span = diagnostics.time_span(render_context.command_encoder(), "bloom"); + // First downsample pass { let downsampling_first_bind_group = render_context.render_device().create_bind_group( @@ -275,6 +279,7 @@ impl ViewNode for BloomNode { upsampling_final_pass.draw(0..3, 0..1); } + time_span.end(render_context.command_encoder()); render_context.command_encoder().pop_debug_group(); Ok(()) diff --git a/crates/bevy_core_pipeline/src/core_2d/main_pass_2d_node.rs b/crates/bevy_core_pipeline/src/core_2d/main_pass_2d_node.rs index 4013db7b3b..5e61f475af 100644 --- a/crates/bevy_core_pipeline/src/core_2d/main_pass_2d_node.rs +++ b/crates/bevy_core_pipeline/src/core_2d/main_pass_2d_node.rs @@ -2,6 +2,7 @@ use crate::core_2d::Transparent2d; use bevy_ecs::prelude::*; use bevy_render::{ camera::ExtractedCamera, + diagnostic::RecordDiagnostics, render_graph::{Node, NodeRunError, RenderGraphContext}, render_phase::RenderPhase, render_resource::RenderPassDescriptor, @@ -52,6 +53,8 @@ impl Node for MainPass2dNode { #[cfg(feature = "trace")] let _main_pass_2d = info_span!("main_pass_2d").entered(); + let diagnostics = render_context.diagnostic_recorder(); + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { label: Some("main_pass_2d"), color_attachments: &[Some(target.get_color_attachment())], @@ -60,11 +63,15 @@ impl Node for MainPass2dNode { occlusion_query_set: None, }); + let pass_span = diagnostics.pass_span(&mut render_pass, "main_pass_2d"); + if let Some(viewport) = camera.viewport.as_ref() { render_pass.set_camera_viewport(viewport); } transparent_phase.render(&mut render_pass, world, view_entity); + + pass_span.end(&mut render_pass); } // WebGL2 quirk: if ending with a render pass with a custom viewport, the viewport isn't diff --git a/crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs b/crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs index 5b7d1315e8..53e732ca7d 100644 --- a/crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs +++ b/crates/bevy_core_pipeline/src/core_3d/main_opaque_pass_3d_node.rs @@ -5,6 +5,7 @@ use crate::{ use bevy_ecs::{prelude::World, query::QueryItem}; use bevy_render::{ camera::ExtractedCamera, + diagnostic::RecordDiagnostics, render_graph::{NodeRunError, RenderGraphContext, ViewNode}, render_phase::{RenderPhase, TrackedRenderPass}, render_resource::{CommandEncoderDescriptor, PipelineCache, RenderPassDescriptor, StoreOp}, @@ -47,6 +48,8 @@ impl ViewNode for MainOpaquePass3dNode { ): QueryItem<'w, Self::ViewQuery>, world: &'w World, ) -> Result<(), NodeRunError> { + let diagnostics = render_context.diagnostic_recorder(); + let color_attachments = [Some(target.get_color_attachment())]; let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store)); @@ -70,6 +73,8 @@ impl ViewNode for MainOpaquePass3dNode { occlusion_query_set: None, }); let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + let pass_span = diagnostics.pass_span(&mut render_pass, "main_opaque_pass_3d"); + if let Some(viewport) = camera.viewport.as_ref() { render_pass.set_camera_viewport(viewport); } @@ -104,6 +109,7 @@ impl ViewNode for MainOpaquePass3dNode { } } + pass_span.end(&mut render_pass); drop(render_pass); command_encoder.finish() }); diff --git a/crates/bevy_core_pipeline/src/core_3d/main_transparent_pass_3d_node.rs b/crates/bevy_core_pipeline/src/core_3d/main_transparent_pass_3d_node.rs index 4057e40ee7..18a19cd2fe 100644 --- a/crates/bevy_core_pipeline/src/core_3d/main_transparent_pass_3d_node.rs +++ b/crates/bevy_core_pipeline/src/core_3d/main_transparent_pass_3d_node.rs @@ -2,6 +2,7 @@ use crate::core_3d::Transparent3d; use bevy_ecs::{prelude::*, query::QueryItem}; use bevy_render::{ camera::ExtractedCamera, + diagnostic::RecordDiagnostics, render_graph::{NodeRunError, RenderGraphContext, ViewNode}, render_phase::RenderPhase, render_resource::{RenderPassDescriptor, StoreOp}, @@ -37,6 +38,8 @@ impl ViewNode for MainTransparentPass3dNode { #[cfg(feature = "trace")] let _main_transparent_pass_3d_span = info_span!("main_transparent_pass_3d").entered(); + let diagnostics = render_context.diagnostic_recorder(); + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { label: Some("main_transparent_pass_3d"), color_attachments: &[Some(target.get_color_attachment())], @@ -51,11 +54,15 @@ impl ViewNode for MainTransparentPass3dNode { occlusion_query_set: None, }); + let pass_span = diagnostics.pass_span(&mut render_pass, "main_transparent_pass_3d"); + if let Some(viewport) = camera.viewport.as_ref() { render_pass.set_camera_viewport(viewport); } transparent_phase.render(&mut render_pass, world, view_entity); + + pass_span.end(&mut render_pass); } // WebGL2 quirk: if ending with a render pass with a custom viewport, the viewport isn't diff --git a/crates/bevy_core_pipeline/src/prepass/node.rs b/crates/bevy_core_pipeline/src/prepass/node.rs index 928183ad7d..b22fdf1fb6 100644 --- a/crates/bevy_core_pipeline/src/prepass/node.rs +++ b/crates/bevy_core_pipeline/src/prepass/node.rs @@ -2,6 +2,7 @@ use bevy_ecs::prelude::*; use bevy_ecs::query::QueryItem; use bevy_render::{ camera::ExtractedCamera, + diagnostic::RecordDiagnostics, render_graph::{NodeRunError, RenderGraphContext, ViewNode}, render_phase::{RenderPhase, TrackedRenderPass}, render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp}, @@ -43,6 +44,8 @@ impl ViewNode for PrepassNode { ): QueryItem<'w, Self::ViewQuery>, world: &'w World, ) -> Result<(), NodeRunError> { + let diagnostics = render_context.diagnostic_recorder(); + let mut color_attachments = vec![ view_prepass_textures .normal @@ -83,7 +86,10 @@ impl ViewNode for PrepassNode { timestamp_writes: None, occlusion_query_set: None, }); + let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + let pass_span = diagnostics.pass_span(&mut render_pass, "prepass"); + if let Some(viewport) = camera.viewport.as_ref() { render_pass.set_camera_viewport(viewport); } @@ -102,6 +108,7 @@ impl ViewNode for PrepassNode { alpha_mask_prepass_phase.render(&mut render_pass, world, view_entity); } + pass_span.end(&mut render_pass); drop(render_pass); // Copy prepass depth to the main depth texture if deferred isn't going to diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index 061c34a8b2..12d8961f98 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -4,6 +4,7 @@ use bevy_ecs::prelude::*; use bevy_math::{Mat4, UVec3, UVec4, Vec2, Vec3, Vec3Swizzles, Vec4, Vec4Swizzles}; use bevy_render::{ camera::Camera, + diagnostic::RecordDiagnostics, mesh::Mesh, primitives::{CascadesFrusta, CubemapFrusta, Frustum, HalfSpace}, render_asset::RenderAssets, @@ -1809,6 +1810,9 @@ impl Node for ShadowPassNode { render_context: &mut RenderContext<'w>, world: &'w World, ) -> Result<(), NodeRunError> { + let diagnostics = render_context.diagnostic_recorder(); + let time_span = diagnostics.time_span(render_context.command_encoder(), "shadows"); + let view_entity = graph.view_entity(); if let Ok(view_lights) = self.main_view_query.get_manual(world, view_entity) { for view_light_entity in view_lights.lights.iter().copied() { @@ -1820,6 +1824,7 @@ impl Node for ShadowPassNode { let depth_stencil_attachment = Some(view_light.depth_attachment.get_attachment(StoreOp::Store)); + let diagnostics = render_context.diagnostic_recorder(); render_context.add_command_buffer_generation_task(move |render_device| { #[cfg(feature = "trace")] let _shadow_pass_span = info_span!("shadow_pass").entered(); @@ -1836,16 +1841,22 @@ impl Node for ShadowPassNode { timestamp_writes: None, occlusion_query_set: None, }); + let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); + let pass_span = + diagnostics.pass_span(&mut render_pass, view_light.pass_name.clone()); shadow_phase.render(&mut render_pass, world, view_light_entity); + pass_span.end(&mut render_pass); drop(render_pass); command_encoder.finish() }); } } + time_span.end(render_context.command_encoder()); + Ok(()) } } diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml index e981506a9d..49079ed558 100644 --- a/crates/bevy_render/Cargo.toml +++ b/crates/bevy_render/Cargo.toml @@ -42,6 +42,7 @@ bevy_asset = { path = "../bevy_asset", version = "0.14.0-dev" } bevy_color = { path = "../bevy_color", version = "0.14.0-dev" } bevy_core = { path = "../bevy_core", version = "0.14.0-dev" } bevy_derive = { path = "../bevy_derive", version = "0.14.0-dev" } +bevy_diagnostic = { path = "../bevy_diagnostic", version = "0.14.0-dev" } bevy_ecs = { path = "../bevy_ecs", version = "0.14.0-dev" } bevy_encase_derive = { path = "../bevy_encase_derive", version = "0.14.0-dev" } bevy_hierarchy = { path = "../bevy_hierarchy", version = "0.14.0-dev" } diff --git a/crates/bevy_render/src/diagnostic/internal.rs b/crates/bevy_render/src/diagnostic/internal.rs new file mode 100644 index 0000000000..85cefccf5d --- /dev/null +++ b/crates/bevy_render/src/diagnostic/internal.rs @@ -0,0 +1,650 @@ +use std::{ + borrow::Cow, + ops::{DerefMut, Range}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread::{self, ThreadId}, +}; + +use bevy_diagnostic::{Diagnostic, DiagnosticMeasurement, DiagnosticPath, DiagnosticsStore}; +use bevy_ecs::system::{Res, ResMut, Resource}; +use bevy_utils::{tracing, Instant}; +use std::sync::Mutex; +use wgpu::{ + Buffer, BufferDescriptor, BufferUsages, CommandEncoder, ComputePass, Features, MapMode, + PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType, Queue, RenderPass, +}; + +use crate::renderer::RenderDevice; + +use super::RecordDiagnostics; + +// buffer offset must be divisible by 256, so this constant must be divisible by 32 (=256/8) +const MAX_TIMESTAMP_QUERIES: u32 = 256; +const MAX_PIPELINE_STATISTICS: u32 = 128; + +const TIMESTAMP_SIZE: u64 = 8; +const PIPELINE_STATISTICS_SIZE: u64 = 40; + +/// Records diagnostics into [`QuerySet`]'s keeping track of the mapping between +/// spans and indices to the corresponding entries in the [`QuerySet`]. +#[derive(Resource)] +pub struct DiagnosticsRecorder { + timestamp_period_ns: f32, + features: Features, + current_frame: Mutex, + submitted_frames: Vec, + finished_frames: Vec, +} + +impl DiagnosticsRecorder { + /// Creates the new `DiagnosticsRecorder`. + pub fn new(device: &RenderDevice, queue: &Queue) -> DiagnosticsRecorder { + let features = device.features(); + + let timestamp_period_ns = if features.contains(Features::TIMESTAMP_QUERY) { + queue.get_timestamp_period() + } else { + 0.0 + }; + + DiagnosticsRecorder { + timestamp_period_ns, + features, + current_frame: Mutex::new(FrameData::new(device, features)), + submitted_frames: Vec::new(), + finished_frames: Vec::new(), + } + } + + fn current_frame_mut(&mut self) -> &mut FrameData { + self.current_frame.get_mut().expect("lock poisoned") + } + + fn current_frame_lock(&self) -> impl DerefMut + '_ { + self.current_frame.lock().expect("lock poisoned") + } + + /// Begins recording diagnostics for a new frame. + pub fn begin_frame(&mut self) { + let mut idx = 0; + while idx < self.submitted_frames.len() { + if self.submitted_frames[idx].run_mapped_callback(self.timestamp_period_ns) { + self.finished_frames + .push(self.submitted_frames.swap_remove(idx)); + } else { + idx += 1; + } + } + + self.current_frame_mut().begin(); + } + + /// Copies data from [`QuerySet`]'s to a [`Buffer`], after which it can be downloaded to CPU. + /// + /// Should be called before [`DiagnosticsRecorder::finish_frame`] + pub fn resolve(&mut self, encoder: &mut CommandEncoder) { + self.current_frame_mut().resolve(encoder); + } + + /// Finishes recording diagnostics for the current frame. + /// + /// The specified `callback` will be invoked when diagnostics become available. + /// + /// Should be called after [`DiagnosticsRecorder::resolve`], + /// and **after** all commands buffers have been queued. + pub fn finish_frame( + &mut self, + device: &RenderDevice, + callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static, + ) { + self.current_frame_mut().finish(callback); + + // reuse one of the finished frames, if we can + let new_frame = match self.finished_frames.pop() { + Some(frame) => frame, + None => FrameData::new(device, self.features), + }; + + let old_frame = std::mem::replace(self.current_frame_mut(), new_frame); + self.submitted_frames.push(old_frame); + } +} + +impl RecordDiagnostics for DiagnosticsRecorder { + fn begin_time_span(&self, encoder: &mut E, span_name: Cow<'static, str>) { + self.current_frame_lock() + .begin_time_span(encoder, span_name); + } + + fn end_time_span(&self, encoder: &mut E) { + self.current_frame_lock().end_time_span(encoder); + } + + fn begin_pass_span(&self, pass: &mut P, span_name: Cow<'static, str>) { + self.current_frame_lock().begin_pass(pass, span_name); + } + + fn end_pass_span(&self, pass: &mut P) { + self.current_frame_lock().end_pass(pass); + } +} + +struct SpanRecord { + thread_id: ThreadId, + path_range: Range, + pass_kind: Option, + begin_timestamp_index: Option, + end_timestamp_index: Option, + begin_instant: Option, + end_instant: Option, + pipeline_statistics_index: Option, +} + +struct FrameData { + timestamps_query_set: Option, + num_timestamps: u32, + supports_timestamps_inside_passes: bool, + pipeline_statistics_query_set: Option, + num_pipeline_statistics: u32, + buffer_size: u64, + pipeline_statistics_buffer_offset: u64, + resolve_buffer: Option, + read_buffer: Option, + path_components: Vec>, + open_spans: Vec, + closed_spans: Vec, + is_mapped: Arc, + callback: Option>, +} + +impl FrameData { + fn new(device: &RenderDevice, features: Features) -> FrameData { + let wgpu_device = device.wgpu_device(); + let mut buffer_size = 0; + + let timestamps_query_set = if features.contains(Features::TIMESTAMP_QUERY) { + buffer_size += u64::from(MAX_TIMESTAMP_QUERIES) * TIMESTAMP_SIZE; + Some(wgpu_device.create_query_set(&QuerySetDescriptor { + label: Some("timestamps_query_set"), + ty: QueryType::Timestamp, + count: MAX_TIMESTAMP_QUERIES, + })) + } else { + None + }; + + let pipeline_statistics_buffer_offset = buffer_size; + + let pipeline_statistics_query_set = + if features.contains(Features::PIPELINE_STATISTICS_QUERY) { + buffer_size += u64::from(MAX_PIPELINE_STATISTICS) * PIPELINE_STATISTICS_SIZE; + Some(wgpu_device.create_query_set(&QuerySetDescriptor { + label: Some("pipeline_statistics_query_set"), + ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()), + count: MAX_PIPELINE_STATISTICS, + })) + } else { + None + }; + + let (resolve_buffer, read_buffer) = if buffer_size > 0 { + let resolve_buffer = wgpu_device.create_buffer(&BufferDescriptor { + label: Some("render_statistics_resolve_buffer"), + size: buffer_size, + usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + let read_buffer = wgpu_device.create_buffer(&BufferDescriptor { + label: Some("render_statistics_read_buffer"), + size: buffer_size, + usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + (Some(resolve_buffer), Some(read_buffer)) + } else { + (None, None) + }; + + FrameData { + timestamps_query_set, + num_timestamps: 0, + supports_timestamps_inside_passes: features + .contains(Features::TIMESTAMP_QUERY_INSIDE_PASSES), + pipeline_statistics_query_set, + num_pipeline_statistics: 0, + buffer_size, + pipeline_statistics_buffer_offset, + resolve_buffer, + read_buffer, + path_components: Vec::new(), + open_spans: Vec::new(), + closed_spans: Vec::new(), + is_mapped: Arc::new(AtomicBool::new(false)), + callback: None, + } + } + + fn begin(&mut self) { + self.num_timestamps = 0; + self.num_pipeline_statistics = 0; + self.path_components.clear(); + self.open_spans.clear(); + self.closed_spans.clear(); + } + + fn write_timestamp( + &mut self, + encoder: &mut impl WriteTimestamp, + is_inside_pass: bool, + ) -> Option { + if is_inside_pass && !self.supports_timestamps_inside_passes { + return None; + } + + if self.num_timestamps >= MAX_TIMESTAMP_QUERIES { + return None; + } + + let set = self.timestamps_query_set.as_ref()?; + let index = self.num_timestamps; + encoder.write_timestamp(set, index); + self.num_timestamps += 1; + Some(index) + } + + fn write_pipeline_statistics( + &mut self, + encoder: &mut impl WritePipelineStatistics, + ) -> Option { + if self.num_pipeline_statistics >= MAX_PIPELINE_STATISTICS { + return None; + } + + let set = self.pipeline_statistics_query_set.as_ref()?; + let index = self.num_pipeline_statistics; + encoder.begin_pipeline_statistics_query(set, index); + self.num_pipeline_statistics += 1; + Some(index) + } + + fn open_span( + &mut self, + pass_kind: Option, + name: Cow<'static, str>, + ) -> &mut SpanRecord { + let thread_id = thread::current().id(); + + let parent = self + .open_spans + .iter() + .filter(|v| v.thread_id == thread_id) + .last(); + + let path_range = match &parent { + Some(parent) if parent.path_range.end == self.path_components.len() => { + parent.path_range.start..parent.path_range.end + 1 + } + Some(parent) => { + self.path_components + .extend_from_within(parent.path_range.clone()); + self.path_components.len() - parent.path_range.len()..self.path_components.len() + 1 + } + None => self.path_components.len()..self.path_components.len() + 1, + }; + + self.path_components.push(name); + + self.open_spans.push(SpanRecord { + thread_id, + path_range, + pass_kind, + begin_timestamp_index: None, + end_timestamp_index: None, + begin_instant: None, + end_instant: None, + pipeline_statistics_index: None, + }); + + self.open_spans.last_mut().unwrap() + } + + fn close_span(&mut self) -> &mut SpanRecord { + let thread_id = thread::current().id(); + + let iter = self.open_spans.iter(); + let (index, _) = iter + .enumerate() + .filter(|(_, v)| v.thread_id == thread_id) + .last() + .unwrap(); + + let span = self.open_spans.swap_remove(index); + self.closed_spans.push(span); + self.closed_spans.last_mut().unwrap() + } + + fn begin_time_span(&mut self, encoder: &mut impl WriteTimestamp, name: Cow<'static, str>) { + let begin_instant = Instant::now(); + let begin_timestamp_index = self.write_timestamp(encoder, false); + + let span = self.open_span(None, name); + span.begin_instant = Some(begin_instant); + span.begin_timestamp_index = begin_timestamp_index; + } + + fn end_time_span(&mut self, encoder: &mut impl WriteTimestamp) { + let end_timestamp_index = self.write_timestamp(encoder, false); + + let span = self.close_span(); + span.end_timestamp_index = end_timestamp_index; + span.end_instant = Some(Instant::now()); + } + + fn begin_pass(&mut self, pass: &mut P, name: Cow<'static, str>) { + let begin_instant = Instant::now(); + + let begin_timestamp_index = self.write_timestamp(pass, true); + let pipeline_statistics_index = self.write_pipeline_statistics(pass); + + let span = self.open_span(Some(P::KIND), name); + span.begin_instant = Some(begin_instant); + span.begin_timestamp_index = begin_timestamp_index; + span.pipeline_statistics_index = pipeline_statistics_index; + } + + fn end_pass(&mut self, pass: &mut impl Pass) { + let end_timestamp_index = self.write_timestamp(pass, true); + + let span = self.close_span(); + span.end_timestamp_index = end_timestamp_index; + + if span.pipeline_statistics_index.is_some() { + pass.end_pipeline_statistics_query(); + } + + span.end_instant = Some(Instant::now()); + } + + fn resolve(&mut self, encoder: &mut CommandEncoder) { + let Some(resolve_buffer) = &self.resolve_buffer else { + return; + }; + + match &self.timestamps_query_set { + Some(set) if self.num_timestamps > 0 => { + encoder.resolve_query_set(set, 0..self.num_timestamps, resolve_buffer, 0); + } + _ => {} + } + + match &self.pipeline_statistics_query_set { + Some(set) if self.num_pipeline_statistics > 0 => { + encoder.resolve_query_set( + set, + 0..self.num_pipeline_statistics, + resolve_buffer, + self.pipeline_statistics_buffer_offset, + ); + } + _ => {} + } + + let Some(read_buffer) = &self.read_buffer else { + return; + }; + + encoder.copy_buffer_to_buffer(resolve_buffer, 0, read_buffer, 0, self.buffer_size); + } + + fn diagnostic_path(&self, range: &Range, field: &str) -> DiagnosticPath { + DiagnosticPath::from_components( + std::iter::once("render") + .chain(self.path_components[range.clone()].iter().map(|v| &**v)) + .chain(std::iter::once(field)), + ) + } + + fn finish(&mut self, callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static) { + let Some(read_buffer) = &self.read_buffer else { + // we still have cpu timings, so let's use them + + let mut diagnostics = Vec::new(); + + for span in &self.closed_spans { + if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) { + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "elapsed_cpu"), + suffix: "ms", + value: (end - begin).as_secs_f64() * 1000.0, + }); + } + } + + callback(RenderDiagnostics(diagnostics)); + return; + }; + + self.callback = Some(Box::new(callback)); + + let is_mapped = self.is_mapped.clone(); + read_buffer.slice(..).map_async(MapMode::Read, move |res| { + if let Err(e) = res { + tracing::warn!("Failed to download render statistics buffer: {e}"); + return; + } + + is_mapped.store(true, Ordering::Release); + }); + } + + // returns true if the frame is considered finished, false otherwise + fn run_mapped_callback(&mut self, timestamp_period_ns: f32) -> bool { + let Some(read_buffer) = &self.read_buffer else { + return true; + }; + if !self.is_mapped.load(Ordering::Acquire) { + // need to wait more + return false; + } + let Some(callback) = self.callback.take() else { + return true; + }; + + let data = read_buffer.slice(..).get_mapped_range(); + + let timestamps = data[..(self.num_timestamps * 8) as usize] + .chunks(8) + .map(|v| u64::from_ne_bytes(v.try_into().unwrap())) + .collect::>(); + + let start = self.pipeline_statistics_buffer_offset as usize; + let len = (self.num_pipeline_statistics as usize) * 40; + let pipeline_statistics = data[start..start + len] + .chunks(8) + .map(|v| u64::from_ne_bytes(v.try_into().unwrap())) + .collect::>(); + + let mut diagnostics = Vec::new(); + + for span in &self.closed_spans { + if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) { + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "elapsed_cpu"), + suffix: "ms", + value: (end - begin).as_secs_f64() * 1000.0, + }); + } + + if let (Some(begin), Some(end)) = (span.begin_timestamp_index, span.end_timestamp_index) + { + let begin = timestamps[begin as usize] as f64; + let end = timestamps[end as usize] as f64; + let value = (end - begin) * (timestamp_period_ns as f64) / 1e6; + + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "elapsed_gpu"), + suffix: "ms", + value, + }); + } + + if let Some(index) = span.pipeline_statistics_index { + let index = (index as usize) * 5; + + if span.pass_kind == Some(PassKind::Render) { + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "vertex_shader_invocations"), + suffix: "", + value: pipeline_statistics[index] as f64, + }); + + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "clipper_invocations"), + suffix: "", + value: pipeline_statistics[index + 1] as f64, + }); + + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "clipper_primitives_out"), + suffix: "", + value: pipeline_statistics[index + 2] as f64, + }); + + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "fragment_shader_invocations"), + suffix: "", + value: pipeline_statistics[index + 3] as f64, + }); + } + + if span.pass_kind == Some(PassKind::Compute) { + diagnostics.push(RenderDiagnostic { + path: self.diagnostic_path(&span.path_range, "compute_shader_invocations"), + suffix: "", + value: pipeline_statistics[index + 4] as f64, + }); + } + } + } + + callback(RenderDiagnostics(diagnostics)); + + drop(data); + read_buffer.unmap(); + self.is_mapped.store(false, Ordering::Release); + + true + } +} + +/// Resource which stores render diagnostics of the most recent frame. +#[derive(Debug, Default, Clone, Resource)] +pub struct RenderDiagnostics(Vec); + +/// A render diagnostic which has been recorded, but not yet stored in [`DiagnosticsStore`]. +#[derive(Debug, Clone, Resource)] +pub struct RenderDiagnostic { + pub path: DiagnosticPath, + pub suffix: &'static str, + pub value: f64, +} + +/// Stores render diagnostics before they can be synced with the main app. +/// +/// This mutex is locked twice per frame: +/// 1. in `PreUpdate`, during [`sync_diagnostics`], +/// 2. after rendering has finished and statistics have been downloaded from GPU. +#[derive(Debug, Default, Clone, Resource)] +pub struct RenderDiagnosticsMutex(pub(crate) Arc>>); + +/// Updates render diagnostics measurements. +pub fn sync_diagnostics(mutex: Res, mut store: ResMut) { + let Some(diagnostics) = mutex.0.lock().ok().and_then(|mut v| v.take()) else { + return; + }; + + let time = Instant::now(); + + for diagnostic in &diagnostics.0 { + if store.get(&diagnostic.path).is_none() { + store.add(Diagnostic::new(diagnostic.path.clone()).with_suffix(diagnostic.suffix)); + } + + store + .get_mut(&diagnostic.path) + .unwrap() + .add_measurement(DiagnosticMeasurement { + time, + value: diagnostic.value, + }); + } +} + +pub trait WriteTimestamp { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32); +} + +impl WriteTimestamp for CommandEncoder { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) { + CommandEncoder::write_timestamp(self, query_set, index); + } +} + +impl WriteTimestamp for RenderPass<'_> { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) { + RenderPass::write_timestamp(self, query_set, index); + } +} + +impl WriteTimestamp for ComputePass<'_> { + fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) { + ComputePass::write_timestamp(self, query_set, index); + } +} + +pub trait WritePipelineStatistics { + fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32); + + fn end_pipeline_statistics_query(&mut self); +} + +impl WritePipelineStatistics for RenderPass<'_> { + fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) { + RenderPass::begin_pipeline_statistics_query(self, query_set, index); + } + + fn end_pipeline_statistics_query(&mut self) { + RenderPass::end_pipeline_statistics_query(self); + } +} + +impl WritePipelineStatistics for ComputePass<'_> { + fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) { + ComputePass::begin_pipeline_statistics_query(self, query_set, index); + } + + fn end_pipeline_statistics_query(&mut self) { + ComputePass::end_pipeline_statistics_query(self); + } +} + +pub trait Pass: WritePipelineStatistics + WriteTimestamp { + const KIND: PassKind; +} + +impl Pass for RenderPass<'_> { + const KIND: PassKind = PassKind::Render; +} + +impl Pass for ComputePass<'_> { + const KIND: PassKind = PassKind::Compute; +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub enum PassKind { + Render, + Compute, +} diff --git a/crates/bevy_render/src/diagnostic/mod.rs b/crates/bevy_render/src/diagnostic/mod.rs new file mode 100644 index 0000000000..af6e4a1c37 --- /dev/null +++ b/crates/bevy_render/src/diagnostic/mod.rs @@ -0,0 +1,185 @@ +//! Infrastructure for recording render diagnostics. +//! +//! For more info, see [`RenderDiagnosticsPlugin`]. + +pub(crate) mod internal; + +use std::{borrow::Cow, marker::PhantomData, sync::Arc}; + +use bevy_app::{App, Plugin, PreUpdate}; + +use crate::RenderApp; + +use self::internal::{ + sync_diagnostics, DiagnosticsRecorder, Pass, RenderDiagnosticsMutex, WriteTimestamp, +}; + +use super::{RenderDevice, RenderQueue}; + +/// Enables collecting render diagnostics, such as CPU/GPU elapsed time per render pass, +/// as well as pipeline statistics (number of primitives, number of shader invocations, etc). +/// +/// To access the diagnostics, you can use [`DiagnosticsStore`](bevy_diagnostic::DiagnosticsStore) resource, +/// or add [`LogDiagnosticsPlugin`](bevy_diagnostic::LogDiagnosticsPlugin). +/// +/// To record diagnostics in your own passes: +/// 1. First, obtain the diagnostic recorder using [`RenderContext::diagnostic_recorder`](crate::renderer::RenderContext::diagnostic_recorder). +/// +/// It won't do anything unless [`RenderDiagnosticsPlugin`] is present, +/// so you're free to omit `#[cfg]` clauses. +/// ```ignore +/// let diagnostics = render_context.diagnostic_recorder(); +/// ``` +/// 2. Begin the span inside a command encoder, or a render/compute pass encoder. +/// ```ignore +/// let time_span = diagnostics.time_span(render_context.command_encoder(), "shadows"); +/// ``` +/// 3. End the span, providing the same encoder. +/// ```ignore +/// time_span.end(render_context.command_encoder()); +/// ``` +/// +/// # Supported platforms +/// Timestamp queries and pipeline statistics are currently supported only on Vulkan and DX12. +/// On other platforms (Metal, WebGPU, WebGL2) only CPU time will be recorded. +#[allow(clippy::doc_markdown)] +#[derive(Default)] +pub struct RenderDiagnosticsPlugin; + +impl Plugin for RenderDiagnosticsPlugin { + fn build(&self, app: &mut App) { + let render_diagnostics_mutex = RenderDiagnosticsMutex::default(); + app.insert_resource(render_diagnostics_mutex.clone()) + .add_systems(PreUpdate, sync_diagnostics); + + if let Ok(render_app) = app.get_sub_app_mut(RenderApp) { + render_app.insert_resource(render_diagnostics_mutex); + } + } + + fn finish(&self, app: &mut App) { + let Ok(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + let device = render_app.world.resource::(); + let queue = render_app.world.resource::(); + render_app.insert_resource(DiagnosticsRecorder::new(device, queue)); + } +} + +/// Allows recording diagnostic spans. +pub trait RecordDiagnostics: Send + Sync { + /// Begin a time span, which will record elapsed CPU and GPU time. + /// + /// Returns a guard, which will panic on drop unless you end the span. + fn time_span(&self, encoder: &mut E, name: N) -> TimeSpanGuard<'_, Self, E> + where + E: WriteTimestamp, + N: Into>, + { + self.begin_time_span(encoder, name.into()); + TimeSpanGuard { + recorder: self, + marker: PhantomData, + } + } + + /// Begin a pass span, which will record elapsed CPU and GPU time, + /// as well as pipeline statistics on supported platforms. + /// + /// Returns a guard, which will panic on drop unless you end the span. + fn pass_span(&self, pass: &mut P, name: N) -> PassSpanGuard<'_, Self, P> + where + P: Pass, + N: Into>, + { + self.begin_pass_span(pass, name.into()); + PassSpanGuard { + recorder: self, + marker: PhantomData, + } + } + + #[doc(hidden)] + fn begin_time_span(&self, encoder: &mut E, name: Cow<'static, str>); + + #[doc(hidden)] + fn end_time_span(&self, encoder: &mut E); + + #[doc(hidden)] + fn begin_pass_span(&self, pass: &mut P, name: Cow<'static, str>); + + #[doc(hidden)] + fn end_pass_span(&self, pass: &mut P); +} + +/// Guard returned by [`RecordDiagnostics::time_span`]. +/// +/// Will panic on drop unless [`TimeSpanGuard::end`] is called. +pub struct TimeSpanGuard<'a, R: ?Sized, E> { + recorder: &'a R, + marker: PhantomData, +} + +impl TimeSpanGuard<'_, R, E> { + /// End the span. You have to provide the same encoder which was used to begin the span. + pub fn end(self, encoder: &mut E) { + self.recorder.end_time_span(encoder); + std::mem::forget(self); + } +} + +impl Drop for TimeSpanGuard<'_, R, E> { + fn drop(&mut self) { + panic!("TimeSpanScope::end was never called") + } +} + +/// Guard returned by [`RecordDiagnostics::pass_span`]. +/// +/// Will panic on drop unless [`PassSpanGuard::end`] is called. +pub struct PassSpanGuard<'a, R: ?Sized, P> { + recorder: &'a R, + marker: PhantomData

, +} + +impl PassSpanGuard<'_, R, P> { + /// End the span. You have to provide the same encoder which was used to begin the span. + pub fn end(self, pass: &mut P) { + self.recorder.end_pass_span(pass); + std::mem::forget(self); + } +} + +impl Drop for PassSpanGuard<'_, R, P> { + fn drop(&mut self) { + panic!("PassSpanScope::end was never called") + } +} + +impl RecordDiagnostics for Option> { + fn begin_time_span(&self, encoder: &mut E, name: Cow<'static, str>) { + if let Some(recorder) = &self { + recorder.begin_time_span(encoder, name); + } + } + + fn end_time_span(&self, encoder: &mut E) { + if let Some(recorder) = &self { + recorder.end_time_span(encoder); + } + } + + fn begin_pass_span(&self, pass: &mut P, name: Cow<'static, str>) { + if let Some(recorder) = &self { + recorder.begin_pass_span(pass, name); + } + } + + fn end_pass_span(&self, pass: &mut P) { + if let Some(recorder) = &self { + recorder.end_pass_span(pass); + } + } +} diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs index dd0a04b088..290b2cabce 100644 --- a/crates/bevy_render/src/lib.rs +++ b/crates/bevy_render/src/lib.rs @@ -11,6 +11,7 @@ pub mod alpha; pub mod batching; pub mod camera; pub mod deterministic; +pub mod diagnostic; pub mod extract_component; pub mod extract_instances; mod extract_param; diff --git a/crates/bevy_render/src/render_phase/draw_state.rs b/crates/bevy_render/src/render_phase/draw_state.rs index 0c09b6aba6..41482de6a8 100644 --- a/crates/bevy_render/src/render_phase/draw_state.rs +++ b/crates/bevy_render/src/render_phase/draw_state.rs @@ -1,5 +1,6 @@ use crate::{ camera::Viewport, + diagnostic::internal::{Pass, PassKind, WritePipelineStatistics, WriteTimestamp}, render_resource::{ BindGroup, BindGroupId, Buffer, BufferId, BufferSlice, RenderPipeline, RenderPipelineId, ShaderStages, @@ -9,7 +10,7 @@ use crate::{ use bevy_color::LinearRgba; use bevy_utils::{default, detailed_trace}; use std::ops::Range; -use wgpu::{IndexFormat, RenderPass}; +use wgpu::{IndexFormat, QuerySet, RenderPass}; /// Tracks the state of a [`TrackedRenderPass`]. /// @@ -179,7 +180,7 @@ impl<'a> TrackedRenderPass<'a> { /// Assign a vertex buffer to a slot. /// /// Subsequent calls to [`draw`] and [`draw_indexed`] on this - /// [`RenderPass`] will use `buffer` as one of the source vertex buffers. + /// [`TrackedRenderPass`] will use `buffer` as one of the source vertex buffers. /// /// The `slot_index` refers to the index of the matching descriptor in /// [`VertexState::buffers`](crate::render_resource::VertexState::buffers). @@ -603,3 +604,23 @@ impl<'a> TrackedRenderPass<'a> { self.pass.set_blend_constant(wgpu::Color::from(color)); } } + +impl WriteTimestamp for TrackedRenderPass<'_> { + fn write_timestamp(&mut self, query_set: &wgpu::QuerySet, index: u32) { + self.pass.write_timestamp(query_set, index); + } +} + +impl WritePipelineStatistics for TrackedRenderPass<'_> { + fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) { + self.pass.begin_pipeline_statistics_query(query_set, index); + } + + fn end_pipeline_statistics_query(&mut self) { + self.pass.end_pipeline_statistics_query(); + } +} + +impl Pass for TrackedRenderPass<'_> { + const KIND: PassKind = PassKind::Render; +} diff --git a/crates/bevy_render/src/renderer/graph_runner.rs b/crates/bevy_render/src/renderer/graph_runner.rs index c155f4027d..2023ab326e 100644 --- a/crates/bevy_render/src/renderer/graph_runner.rs +++ b/crates/bevy_render/src/renderer/graph_runner.rs @@ -8,6 +8,7 @@ use std::{borrow::Cow, collections::VecDeque}; use thiserror::Error; use crate::{ + diagnostic::internal::{DiagnosticsRecorder, RenderDiagnosticsMutex}, render_graph::{ Edge, InternedRenderLabel, InternedRenderSubGraph, NodeRunError, NodeState, RenderGraph, RenderGraphContext, SlotLabel, SlotType, SlotValue, @@ -54,21 +55,39 @@ impl RenderGraphRunner { pub fn run( graph: &RenderGraph, render_device: RenderDevice, + mut diagnostics_recorder: Option, queue: &wgpu::Queue, adapter: &wgpu::Adapter, world: &World, finalizer: impl FnOnce(&mut wgpu::CommandEncoder), - ) -> Result<(), RenderGraphRunnerError> { - let mut render_context = RenderContext::new(render_device, adapter.get_info()); + ) -> Result, RenderGraphRunnerError> { + if let Some(recorder) = &mut diagnostics_recorder { + recorder.begin_frame(); + } + + let mut render_context = + RenderContext::new(render_device, adapter.get_info(), diagnostics_recorder); Self::run_graph(graph, None, &mut render_context, world, &[], None)?; finalizer(render_context.command_encoder()); - { + let (render_device, mut diagnostics_recorder) = { #[cfg(feature = "trace")] let _span = info_span!("submit_graph_commands").entered(); - queue.submit(render_context.finish()); + + let (commands, render_device, diagnostics_recorder) = render_context.finish(); + queue.submit(commands); + + (render_device, diagnostics_recorder) + }; + + if let Some(recorder) = &mut diagnostics_recorder { + let render_diagnostics_mutex = world.resource::().0.clone(); + recorder.finish_frame(&render_device, move |diagnostics| { + *render_diagnostics_mutex.lock().expect("lock poisoned") = Some(diagnostics); + }); } - Ok(()) + + Ok(diagnostics_recorder) } fn run_graph<'w>( diff --git a/crates/bevy_render/src/renderer/mod.rs b/crates/bevy_render/src/renderer/mod.rs index fa4377a405..92eada8f23 100644 --- a/crates/bevy_render/src/renderer/mod.rs +++ b/crates/bevy_render/src/renderer/mod.rs @@ -8,6 +8,7 @@ pub use graph_runner::*; pub use render_device::*; use crate::{ + diagnostic::{internal::DiagnosticsRecorder, RecordDiagnostics}, render_graph::RenderGraph, render_phase::TrackedRenderPass, render_resource::RenderPassDescriptor, @@ -27,34 +28,46 @@ pub fn render_system(world: &mut World, state: &mut SystemState| { graph.update(world); }); + + let diagnostics_recorder = world.remove_resource::(); + let graph = world.resource::(); let render_device = world.resource::(); let render_queue = world.resource::(); let render_adapter = world.resource::(); - if let Err(e) = RenderGraphRunner::run( + let res = RenderGraphRunner::run( graph, render_device.clone(), // TODO: is this clone really necessary? + diagnostics_recorder, &render_queue.0, &render_adapter.0, world, |encoder| { crate::view::screenshot::submit_screenshot_commands(world, encoder); }, - ) { - error!("Error running render graph:"); - { - let mut src: &dyn std::error::Error = &e; - loop { - error!("> {}", src); - match src.source() { - Some(s) => src = s, - None => break, + ); + + match res { + Ok(Some(diagnostics_recorder)) => { + world.insert_resource(diagnostics_recorder); + } + Ok(None) => {} + Err(e) => { + error!("Error running render graph:"); + { + let mut src: &dyn std::error::Error = &e; + loop { + error!("> {}", src); + match src.source() { + Some(s) => src = s, + None => break, + } } } - } - panic!("Error running render graph: {e}"); + panic!("Error running render graph: {e}"); + } } { @@ -306,11 +319,16 @@ pub struct RenderContext<'w> { command_encoder: Option, command_buffer_queue: Vec>, force_serial: bool, + diagnostics_recorder: Option>, } impl<'w> RenderContext<'w> { /// Creates a new [`RenderContext`] from a [`RenderDevice`]. - pub fn new(render_device: RenderDevice, adapter_info: AdapterInfo) -> Self { + pub fn new( + render_device: RenderDevice, + adapter_info: AdapterInfo, + diagnostics_recorder: Option, + ) -> Self { // HACK: Parallel command encoding is currently bugged on AMD + Windows + Vulkan with wgpu 0.19.1 #[cfg(target_os = "windows")] let force_serial = @@ -326,6 +344,7 @@ impl<'w> RenderContext<'w> { command_encoder: None, command_buffer_queue: Vec::new(), force_serial, + diagnostics_recorder: diagnostics_recorder.map(Arc::new), } } @@ -334,6 +353,12 @@ impl<'w> RenderContext<'w> { &self.render_device } + /// Gets the diagnostics recorder, used to track elapsed time and pipeline statistics + /// of various render and compute passes. + pub fn diagnostic_recorder(&self) -> impl RecordDiagnostics { + self.diagnostics_recorder.clone() + } + /// Gets the current [`CommandEncoder`]. pub fn command_encoder(&mut self) -> &mut CommandEncoder { self.command_encoder.get_or_insert_with(|| { @@ -353,6 +378,7 @@ impl<'w> RenderContext<'w> { self.render_device .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()) }); + let render_pass = command_encoder.begin_render_pass(&descriptor); TrackedRenderPass::new(&self.render_device, render_pass) } @@ -389,7 +415,13 @@ impl<'w> RenderContext<'w> { /// /// This function will wait until all command buffer generation tasks are complete /// by running them in parallel (where supported). - pub fn finish(mut self) -> Vec { + pub fn finish( + mut self, + ) -> ( + Vec, + RenderDevice, + Option, + ) { self.flush_encoder(); let mut command_buffers = Vec::with_capacity(self.command_buffer_queue.len()); @@ -413,9 +445,30 @@ impl<'w> RenderContext<'w> { } } }); + command_buffers.append(&mut task_based_command_buffers); command_buffers.sort_unstable_by_key(|(i, _)| *i); - command_buffers.into_iter().map(|(_, cb)| cb).collect() + + let mut command_buffers = command_buffers + .into_iter() + .map(|(_, cb)| cb) + .collect::>(); + + let mut diagnostics_recorder = self.diagnostics_recorder.take().map(|v| { + Arc::try_unwrap(v) + .ok() + .expect("diagnostic recorder shouldn't be held longer than necessary") + }); + + if let Some(recorder) = &mut diagnostics_recorder { + let mut command_encoder = self + .render_device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + recorder.resolve(&mut command_encoder); + command_buffers.push(command_encoder.finish()); + } + + (command_buffers, self.render_device, diagnostics_recorder) } fn flush_encoder(&mut self) {