Multithreaded render command encoding (#9172)
# Objective - Encoding many GPU commands (such as in a renderpass with many draws, such as the main opaque pass) onto a `wgpu::CommandEncoder` is very expensive, and takes a long time. - To improve performance, we want to perform the command encoding for these heavy passes in parallel. ## Solution - `RenderContext` can now queue up "command buffer generation tasks" which are closures that will generate a command buffer when called. - When finalizing the render context to produce the final list of command buffers, these tasks are run in parallel on the `ComputeTaskPool` to produce their corresponding command buffers. - The general idea is that the node graph will run in serial, but in a node, instead of doing rendering work, you can add tasks to do render work in parallel with other node's tasks that get ran at the end of the graph execution. ## Nodes Parallelized - `MainOpaquePass3dNode` - `PrepassNode` - `DeferredGBufferPrepassNode` - `ShadowPassNode` (One task per view) ## Future Work - For large number of draws calls, might be worth further subdividing passes into 2+ tasks. - Extend this to UI, 2d, transparent, and transmissive nodes? - Needs testing - small command buffers are inefficient - it may be worth reverting to the serial command encoder usage for render phases with few items. - All "serial" (traditional) rendering work must finish before parallel rendering tasks (the new stuff) can start to run. - There is still only one submission to the graphics queue at the end of the graph execution. There is still no ability to submit work earlier. ## Performance Improvement Thanks to @Elabajaba for testing on Bistro.  TLDR: Without shadow mapping, this PR has no impact. _With_ shadow mapping, this PR gives **~40 more fps** than main. --- ## Changelog - `MainOpaquePass3dNode`, `PrepassNode`, `DeferredGBufferPrepassNode`, and each shadow map within `ShadowPassNode` are now encoded in parallel, giving _greatly_ increased CPU performance, mainly when shadow mapping is enabled. - Does not work on WASM or AMD+Windows+Vulkan. - Added `RenderContext::add_command_buffer_generation_task()`. - `RenderContext::new()` now takes adapter info - Some render graph and Node related types and methods now have additional lifetime constraints. ## Migration Guide `RenderContext::new()` now takes adapter info - Some render graph and Node related types and methods now have additional lifetime constraints. --------- Co-authored-by: Elabajaba <Elabajaba@users.noreply.github.com> Co-authored-by: François <mockersf@gmail.com>
This commit is contained in:
parent
5313730534
commit
f4dab8a4e8
@ -6,8 +6,8 @@ use bevy_ecs::{prelude::World, query::QueryItem};
|
||||
use bevy_render::{
|
||||
camera::ExtractedCamera,
|
||||
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
|
||||
render_phase::RenderPhase,
|
||||
render_resource::{PipelineCache, RenderPassDescriptor, StoreOp},
|
||||
render_phase::{RenderPhase, TrackedRenderPass},
|
||||
render_resource::{CommandEncoderDescriptor, PipelineCache, RenderPassDescriptor, StoreOp},
|
||||
renderer::RenderContext,
|
||||
view::{ViewDepthTexture, ViewTarget, ViewUniformOffset},
|
||||
};
|
||||
@ -31,10 +31,10 @@ impl ViewNode for MainOpaquePass3dNode {
|
||||
&'static ViewUniformOffset,
|
||||
);
|
||||
|
||||
fn run(
|
||||
fn run<'w>(
|
||||
&self,
|
||||
graph: &mut RenderGraphContext,
|
||||
render_context: &mut RenderContext,
|
||||
render_context: &mut RenderContext<'w>,
|
||||
(
|
||||
camera,
|
||||
opaque_phase,
|
||||
@ -44,52 +44,69 @@ impl ViewNode for MainOpaquePass3dNode {
|
||||
skybox_pipeline,
|
||||
skybox_bind_group,
|
||||
view_uniform_offset,
|
||||
): QueryItem<Self::ViewQuery>,
|
||||
world: &World,
|
||||
): QueryItem<'w, Self::ViewQuery>,
|
||||
world: &'w World,
|
||||
) -> Result<(), NodeRunError> {
|
||||
// Run the opaque pass, sorted by pipeline key and mesh id to greatly improve batching.
|
||||
// NOTE: Scoped to drop the mutable borrow of render_context
|
||||
#[cfg(feature = "trace")]
|
||||
let _main_opaque_pass_3d_span = info_span!("main_opaque_pass_3d").entered();
|
||||
|
||||
// Setup render pass
|
||||
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
|
||||
label: Some("main_opaque_pass_3d"),
|
||||
color_attachments: &[Some(target.get_color_attachment())],
|
||||
depth_stencil_attachment: Some(depth.get_attachment(StoreOp::Store)),
|
||||
timestamp_writes: None,
|
||||
occlusion_query_set: None,
|
||||
});
|
||||
|
||||
if let Some(viewport) = camera.viewport.as_ref() {
|
||||
render_pass.set_camera_viewport(viewport);
|
||||
}
|
||||
let color_attachments = [Some(target.get_color_attachment())];
|
||||
let depth_stencil_attachment = Some(depth.get_attachment(StoreOp::Store));
|
||||
|
||||
let view_entity = graph.view_entity();
|
||||
render_context.add_command_buffer_generation_task(move |render_device| {
|
||||
#[cfg(feature = "trace")]
|
||||
let _main_opaque_pass_3d_span = info_span!("main_opaque_pass_3d").entered();
|
||||
|
||||
// Opaque draws
|
||||
opaque_phase.render(&mut render_pass, world, view_entity);
|
||||
// Command encoder setup
|
||||
let mut command_encoder =
|
||||
render_device.create_command_encoder(&CommandEncoderDescriptor {
|
||||
label: Some("main_opaque_pass_3d_command_encoder"),
|
||||
});
|
||||
|
||||
// Alpha draws
|
||||
if !alpha_mask_phase.items.is_empty() {
|
||||
alpha_mask_phase.render(&mut render_pass, world, view_entity);
|
||||
}
|
||||
|
||||
// Draw the skybox using a fullscreen triangle
|
||||
if let (Some(skybox_pipeline), Some(SkyboxBindGroup(skybox_bind_group))) =
|
||||
(skybox_pipeline, skybox_bind_group)
|
||||
{
|
||||
let pipeline_cache = world.resource::<PipelineCache>();
|
||||
if let Some(pipeline) = pipeline_cache.get_render_pipeline(skybox_pipeline.0) {
|
||||
render_pass.set_render_pipeline(pipeline);
|
||||
render_pass.set_bind_group(
|
||||
0,
|
||||
&skybox_bind_group.0,
|
||||
&[view_uniform_offset.offset, skybox_bind_group.1],
|
||||
);
|
||||
render_pass.draw(0..3, 0..1);
|
||||
// Render pass setup
|
||||
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
|
||||
label: Some("main_opaque_pass_3d"),
|
||||
color_attachments: &color_attachments,
|
||||
depth_stencil_attachment,
|
||||
timestamp_writes: None,
|
||||
occlusion_query_set: None,
|
||||
});
|
||||
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
|
||||
if let Some(viewport) = camera.viewport.as_ref() {
|
||||
render_pass.set_camera_viewport(viewport);
|
||||
}
|
||||
}
|
||||
|
||||
// Opaque draws
|
||||
if !opaque_phase.items.is_empty() {
|
||||
#[cfg(feature = "trace")]
|
||||
let _opaque_main_pass_3d_span = info_span!("opaque_main_pass_3d").entered();
|
||||
opaque_phase.render(&mut render_pass, world, view_entity);
|
||||
}
|
||||
|
||||
// Alpha draws
|
||||
if !alpha_mask_phase.items.is_empty() {
|
||||
#[cfg(feature = "trace")]
|
||||
let _alpha_mask_main_pass_3d_span = info_span!("alpha_mask_main_pass_3d").entered();
|
||||
alpha_mask_phase.render(&mut render_pass, world, view_entity);
|
||||
}
|
||||
|
||||
// Skybox draw using a fullscreen triangle
|
||||
if let (Some(skybox_pipeline), Some(SkyboxBindGroup(skybox_bind_group))) =
|
||||
(skybox_pipeline, skybox_bind_group)
|
||||
{
|
||||
let pipeline_cache = world.resource::<PipelineCache>();
|
||||
if let Some(pipeline) = pipeline_cache.get_render_pipeline(skybox_pipeline.0) {
|
||||
render_pass.set_render_pipeline(pipeline);
|
||||
render_pass.set_bind_group(
|
||||
0,
|
||||
&skybox_bind_group.0,
|
||||
&[view_uniform_offset.offset, skybox_bind_group.1],
|
||||
);
|
||||
render_pass.draw(0..3, 0..1);
|
||||
}
|
||||
}
|
||||
|
||||
drop(render_pass);
|
||||
command_encoder.finish()
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -2,7 +2,8 @@ use bevy_ecs::prelude::*;
|
||||
use bevy_ecs::query::QueryItem;
|
||||
use bevy_render::render_graph::ViewNode;
|
||||
|
||||
use bevy_render::render_resource::StoreOp;
|
||||
use bevy_render::render_phase::TrackedRenderPass;
|
||||
use bevy_render::render_resource::{CommandEncoderDescriptor, StoreOp};
|
||||
use bevy_render::{
|
||||
camera::ExtractedCamera,
|
||||
render_graph::{NodeRunError, RenderGraphContext},
|
||||
@ -33,21 +34,19 @@ impl ViewNode for DeferredGBufferPrepassNode {
|
||||
&'static ViewPrepassTextures,
|
||||
);
|
||||
|
||||
fn run(
|
||||
fn run<'w>(
|
||||
&self,
|
||||
graph: &mut RenderGraphContext,
|
||||
render_context: &mut RenderContext,
|
||||
render_context: &mut RenderContext<'w>,
|
||||
(
|
||||
camera,
|
||||
opaque_deferred_phase,
|
||||
alpha_mask_deferred_phase,
|
||||
view_depth_texture,
|
||||
view_prepass_textures,
|
||||
): QueryItem<Self::ViewQuery>,
|
||||
world: &World,
|
||||
): QueryItem<'w, Self::ViewQuery>,
|
||||
world: &'w World,
|
||||
) -> Result<(), NodeRunError> {
|
||||
let view_entity = graph.view_entity();
|
||||
|
||||
let mut color_attachments = vec![];
|
||||
color_attachments.push(
|
||||
view_prepass_textures
|
||||
@ -107,49 +106,64 @@ impl ViewNode for DeferredGBufferPrepassNode {
|
||||
.map(|deferred_lighting_pass_id| deferred_lighting_pass_id.get_attachment()),
|
||||
);
|
||||
|
||||
// If all color attachments are none: clear the color attachment list so that no fragment shader is required
|
||||
if color_attachments.iter().all(Option::is_none) {
|
||||
// All attachments are none: clear the attachment list so that no fragment shader is required.
|
||||
color_attachments.clear();
|
||||
}
|
||||
|
||||
{
|
||||
// Set up the pass descriptor with the depth attachment and optional color attachments.
|
||||
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
|
||||
let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store));
|
||||
|
||||
let view_entity = graph.view_entity();
|
||||
render_context.add_command_buffer_generation_task(move |render_device| {
|
||||
#[cfg(feature = "trace")]
|
||||
let _deferred_span = info_span!("deferred").entered();
|
||||
|
||||
// Command encoder setup
|
||||
let mut command_encoder =
|
||||
render_device.create_command_encoder(&CommandEncoderDescriptor {
|
||||
label: Some("deferred_command_encoder"),
|
||||
});
|
||||
|
||||
// Render pass setup
|
||||
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
|
||||
label: Some("deferred"),
|
||||
color_attachments: &color_attachments,
|
||||
depth_stencil_attachment: Some(view_depth_texture.get_attachment(StoreOp::Store)),
|
||||
depth_stencil_attachment,
|
||||
timestamp_writes: None,
|
||||
occlusion_query_set: None,
|
||||
});
|
||||
|
||||
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
|
||||
if let Some(viewport) = camera.viewport.as_ref() {
|
||||
render_pass.set_camera_viewport(viewport);
|
||||
}
|
||||
|
||||
// Always run deferred pass to ensure the deferred gbuffer and deferred_lighting_pass_id are cleared.
|
||||
{
|
||||
// Run the prepass, sorted front-to-back.
|
||||
// Opaque draws
|
||||
if !opaque_deferred_phase.items.is_empty() {
|
||||
#[cfg(feature = "trace")]
|
||||
let _opaque_prepass_span = info_span!("opaque_deferred").entered();
|
||||
opaque_deferred_phase.render(&mut render_pass, world, view_entity);
|
||||
}
|
||||
|
||||
// Alpha masked draws
|
||||
if !alpha_mask_deferred_phase.items.is_empty() {
|
||||
// Run the deferred, sorted front-to-back.
|
||||
#[cfg(feature = "trace")]
|
||||
let _alpha_mask_deferred_span = info_span!("alpha_mask_deferred").entered();
|
||||
alpha_mask_deferred_phase.render(&mut render_pass, world, view_entity);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
|
||||
// Copy depth buffer to texture.
|
||||
render_context.command_encoder().copy_texture_to_texture(
|
||||
view_depth_texture.texture.as_image_copy(),
|
||||
prepass_depth_texture.texture.texture.as_image_copy(),
|
||||
view_prepass_textures.size,
|
||||
);
|
||||
}
|
||||
drop(render_pass);
|
||||
|
||||
// Copy prepass depth to the main depth texture
|
||||
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
|
||||
command_encoder.copy_texture_to_texture(
|
||||
view_depth_texture.texture.as_image_copy(),
|
||||
prepass_depth_texture.texture.texture.as_image_copy(),
|
||||
view_prepass_textures.size,
|
||||
);
|
||||
}
|
||||
|
||||
command_encoder.finish()
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,12 +1,10 @@
|
||||
use bevy_ecs::prelude::*;
|
||||
use bevy_ecs::query::QueryItem;
|
||||
use bevy_render::render_graph::ViewNode;
|
||||
use bevy_render::render_resource::StoreOp;
|
||||
use bevy_render::{
|
||||
camera::ExtractedCamera,
|
||||
render_graph::{NodeRunError, RenderGraphContext},
|
||||
render_phase::RenderPhase,
|
||||
render_resource::RenderPassDescriptor,
|
||||
render_graph::{NodeRunError, RenderGraphContext, ViewNode},
|
||||
render_phase::{RenderPhase, TrackedRenderPass},
|
||||
render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp},
|
||||
renderer::RenderContext,
|
||||
view::ViewDepthTexture,
|
||||
};
|
||||
@ -31,10 +29,10 @@ impl ViewNode for PrepassNode {
|
||||
Option<&'static DeferredPrepass>,
|
||||
);
|
||||
|
||||
fn run(
|
||||
fn run<'w>(
|
||||
&self,
|
||||
graph: &mut RenderGraphContext,
|
||||
render_context: &mut RenderContext,
|
||||
render_context: &mut RenderContext<'w>,
|
||||
(
|
||||
camera,
|
||||
opaque_prepass_phase,
|
||||
@ -42,11 +40,9 @@ impl ViewNode for PrepassNode {
|
||||
view_depth_texture,
|
||||
view_prepass_textures,
|
||||
deferred_prepass,
|
||||
): QueryItem<Self::ViewQuery>,
|
||||
world: &World,
|
||||
): QueryItem<'w, Self::ViewQuery>,
|
||||
world: &'w World,
|
||||
) -> Result<(), NodeRunError> {
|
||||
let view_entity = graph.view_entity();
|
||||
|
||||
let mut color_attachments = vec![
|
||||
view_prepass_textures
|
||||
.normal
|
||||
@ -56,55 +52,72 @@ impl ViewNode for PrepassNode {
|
||||
.motion_vectors
|
||||
.as_ref()
|
||||
.map(|motion_vectors_texture| motion_vectors_texture.get_attachment()),
|
||||
// Use None in place of Deferred attachments
|
||||
// Use None in place of deferred attachments
|
||||
None,
|
||||
None,
|
||||
];
|
||||
|
||||
// If all color attachments are none: clear the color attachment list so that no fragment shader is required
|
||||
if color_attachments.iter().all(Option::is_none) {
|
||||
// all attachments are none: clear the attachment list so that no fragment shader is required
|
||||
color_attachments.clear();
|
||||
}
|
||||
|
||||
{
|
||||
// Set up the pass descriptor with the depth attachment and optional color attachments
|
||||
let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor {
|
||||
let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store));
|
||||
|
||||
let view_entity = graph.view_entity();
|
||||
render_context.add_command_buffer_generation_task(move |render_device| {
|
||||
#[cfg(feature = "trace")]
|
||||
let _prepass_span = info_span!("prepass").entered();
|
||||
|
||||
// Command encoder setup
|
||||
let mut command_encoder =
|
||||
render_device.create_command_encoder(&CommandEncoderDescriptor {
|
||||
label: Some("prepass_command_encoder"),
|
||||
});
|
||||
|
||||
// Render pass setup
|
||||
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
|
||||
label: Some("prepass"),
|
||||
color_attachments: &color_attachments,
|
||||
depth_stencil_attachment: Some(view_depth_texture.get_attachment(StoreOp::Store)),
|
||||
depth_stencil_attachment,
|
||||
timestamp_writes: None,
|
||||
occlusion_query_set: None,
|
||||
});
|
||||
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
|
||||
if let Some(viewport) = camera.viewport.as_ref() {
|
||||
render_pass.set_camera_viewport(viewport);
|
||||
}
|
||||
|
||||
// Always run opaque pass to ensure screen is cleared
|
||||
{
|
||||
// Run the prepass, sorted front-to-back
|
||||
// Opaque draws
|
||||
if !opaque_prepass_phase.items.is_empty() {
|
||||
#[cfg(feature = "trace")]
|
||||
let _opaque_prepass_span = info_span!("opaque_prepass").entered();
|
||||
opaque_prepass_phase.render(&mut render_pass, world, view_entity);
|
||||
}
|
||||
|
||||
// Alpha masked draws
|
||||
if !alpha_mask_prepass_phase.items.is_empty() {
|
||||
// Run the prepass, sorted front-to-back
|
||||
#[cfg(feature = "trace")]
|
||||
let _alpha_mask_prepass_span = info_span!("alpha_mask_prepass").entered();
|
||||
alpha_mask_prepass_phase.render(&mut render_pass, world, view_entity);
|
||||
}
|
||||
}
|
||||
if deferred_prepass.is_none() {
|
||||
// Copy if deferred isn't going to
|
||||
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
|
||||
// Copy depth buffer to texture
|
||||
render_context.command_encoder().copy_texture_to_texture(
|
||||
view_depth_texture.texture.as_image_copy(),
|
||||
prepass_depth_texture.texture.texture.as_image_copy(),
|
||||
view_prepass_textures.size,
|
||||
);
|
||||
|
||||
drop(render_pass);
|
||||
|
||||
// Copy prepass depth to the main depth texture if deferred isn't going to
|
||||
if deferred_prepass.is_none() {
|
||||
if let Some(prepass_depth_texture) = &view_prepass_textures.depth {
|
||||
command_encoder.copy_texture_to_texture(
|
||||
view_depth_texture.texture.as_image_copy(),
|
||||
prepass_depth_texture.texture.texture.as_image_copy(),
|
||||
view_prepass_textures.size,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
command_encoder.finish()
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,8 @@ use bevy_render::{
|
||||
Extract,
|
||||
};
|
||||
use bevy_transform::{components::GlobalTransform, prelude::Transform};
|
||||
#[cfg(feature = "trace")]
|
||||
use bevy_utils::tracing::info_span;
|
||||
use bevy_utils::{
|
||||
nonmax::NonMaxU32,
|
||||
tracing::{error, warn},
|
||||
@ -1780,11 +1782,11 @@ impl Node for ShadowPassNode {
|
||||
self.view_light_query.update_archetypes(world);
|
||||
}
|
||||
|
||||
fn run(
|
||||
fn run<'w>(
|
||||
&self,
|
||||
graph: &mut RenderGraphContext,
|
||||
render_context: &mut RenderContext,
|
||||
world: &World,
|
||||
render_context: &mut RenderContext<'w>,
|
||||
world: &'w World,
|
||||
) -> Result<(), NodeRunError> {
|
||||
let view_entity = graph.view_entity();
|
||||
if let Ok(view_lights) = self.main_view_query.get_manual(world, view_entity) {
|
||||
@ -1794,22 +1796,32 @@ impl Node for ShadowPassNode {
|
||||
.get_manual(world, view_light_entity)
|
||||
.unwrap();
|
||||
|
||||
if shadow_phase.items.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let depth_stencil_attachment =
|
||||
Some(view_light.depth_attachment.get_attachment(StoreOp::Store));
|
||||
|
||||
let mut render_pass =
|
||||
render_context.begin_tracked_render_pass(RenderPassDescriptor {
|
||||
render_context.add_command_buffer_generation_task(move |render_device| {
|
||||
#[cfg(feature = "trace")]
|
||||
let _shadow_pass_span = info_span!("shadow_pass").entered();
|
||||
|
||||
let mut command_encoder =
|
||||
render_device.create_command_encoder(&CommandEncoderDescriptor {
|
||||
label: Some("shadow_pass_command_encoder"),
|
||||
});
|
||||
|
||||
let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
|
||||
label: Some(&view_light.pass_name),
|
||||
color_attachments: &[],
|
||||
depth_stencil_attachment: Some(
|
||||
view_light.depth_attachment.get_attachment(StoreOp::Store),
|
||||
),
|
||||
depth_stencil_attachment,
|
||||
timestamp_writes: None,
|
||||
occlusion_query_set: None,
|
||||
});
|
||||
let mut render_pass = TrackedRenderPass::new(&render_device, render_pass);
|
||||
|
||||
shadow_phase.render(&mut render_pass, world, view_light_entity);
|
||||
shadow_phase.render(&mut render_pass, world, view_light_entity);
|
||||
|
||||
drop(render_pass);
|
||||
command_encoder.finish()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,11 +77,11 @@ pub trait Node: Downcast + Send + Sync + 'static {
|
||||
/// Runs the graph node logic, issues draw calls, updates the output slots and
|
||||
/// optionally queues up subgraphs for execution. The graph data, input and output values are
|
||||
/// passed via the [`RenderGraphContext`].
|
||||
fn run(
|
||||
fn run<'w>(
|
||||
&self,
|
||||
graph: &mut RenderGraphContext,
|
||||
render_context: &mut RenderContext,
|
||||
world: &World,
|
||||
render_context: &mut RenderContext<'w>,
|
||||
world: &'w World,
|
||||
) -> Result<(), NodeRunError>;
|
||||
}
|
||||
|
||||
@ -346,12 +346,12 @@ pub trait ViewNode {
|
||||
/// Runs the graph node logic, issues draw calls, updates the output slots and
|
||||
/// optionally queues up subgraphs for execution. The graph data, input and output values are
|
||||
/// passed via the [`RenderGraphContext`].
|
||||
fn run(
|
||||
fn run<'w>(
|
||||
&self,
|
||||
graph: &mut RenderGraphContext,
|
||||
render_context: &mut RenderContext,
|
||||
view_query: QueryItem<Self::ViewQuery>,
|
||||
world: &World,
|
||||
render_context: &mut RenderContext<'w>,
|
||||
view_query: QueryItem<'w, Self::ViewQuery>,
|
||||
world: &'w World,
|
||||
) -> Result<(), NodeRunError>;
|
||||
}
|
||||
|
||||
@ -388,11 +388,11 @@ where
|
||||
self.node.update(world);
|
||||
}
|
||||
|
||||
fn run(
|
||||
fn run<'w>(
|
||||
&self,
|
||||
graph: &mut RenderGraphContext,
|
||||
render_context: &mut RenderContext,
|
||||
world: &World,
|
||||
render_context: &mut RenderContext<'w>,
|
||||
world: &'w World,
|
||||
) -> Result<(), NodeRunError> {
|
||||
let Ok(view) = self.view_query.get_manual(world, graph.view_entity()) else {
|
||||
return Ok(());
|
||||
|
@ -57,10 +57,11 @@ impl RenderGraphRunner {
|
||||
graph: &RenderGraph,
|
||||
render_device: RenderDevice,
|
||||
queue: &wgpu::Queue,
|
||||
adapter: &wgpu::Adapter,
|
||||
world: &World,
|
||||
finalizer: impl FnOnce(&mut wgpu::CommandEncoder),
|
||||
) -> Result<(), RenderGraphRunnerError> {
|
||||
let mut render_context = RenderContext::new(render_device);
|
||||
let mut render_context = RenderContext::new(render_device, adapter.get_info());
|
||||
Self::run_graph(graph, None, &mut render_context, world, &[], None)?;
|
||||
finalizer(render_context.command_encoder());
|
||||
|
||||
@ -72,11 +73,11 @@ impl RenderGraphRunner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_graph(
|
||||
fn run_graph<'w>(
|
||||
graph: &RenderGraph,
|
||||
sub_graph: Option<InternedRenderSubGraph>,
|
||||
render_context: &mut RenderContext,
|
||||
world: &World,
|
||||
render_context: &mut RenderContext<'w>,
|
||||
world: &'w World,
|
||||
inputs: &[SlotValue],
|
||||
view_entity: Option<Entity>,
|
||||
) -> Result<(), RenderGraphRunnerError> {
|
||||
|
@ -2,6 +2,7 @@ mod graph_runner;
|
||||
mod render_device;
|
||||
|
||||
use bevy_derive::{Deref, DerefMut};
|
||||
use bevy_tasks::ComputeTaskPool;
|
||||
use bevy_utils::tracing::{error, info, info_span};
|
||||
pub use graph_runner::*;
|
||||
pub use render_device::*;
|
||||
@ -29,11 +30,13 @@ pub fn render_system(world: &mut World, state: &mut SystemState<Query<Entity, Wi
|
||||
let graph = world.resource::<RenderGraph>();
|
||||
let render_device = world.resource::<RenderDevice>();
|
||||
let render_queue = world.resource::<RenderQueue>();
|
||||
let render_adapter = world.resource::<RenderAdapter>();
|
||||
|
||||
if let Err(e) = RenderGraphRunner::run(
|
||||
graph,
|
||||
render_device.clone(), // TODO: is this clone really necessary?
|
||||
&render_queue.0,
|
||||
&render_adapter.0,
|
||||
world,
|
||||
|encoder| {
|
||||
crate::view::screenshot::submit_screenshot_commands(world, encoder);
|
||||
@ -298,19 +301,31 @@ pub async fn initialize_renderer(
|
||||
///
|
||||
/// The [`RenderDevice`] is used to create render resources and the
|
||||
/// the [`CommandEncoder`] is used to record a series of GPU operations.
|
||||
pub struct RenderContext {
|
||||
pub struct RenderContext<'w> {
|
||||
render_device: RenderDevice,
|
||||
command_encoder: Option<CommandEncoder>,
|
||||
command_buffers: Vec<CommandBuffer>,
|
||||
command_buffer_queue: Vec<QueuedCommandBuffer<'w>>,
|
||||
force_serial: bool,
|
||||
}
|
||||
|
||||
impl RenderContext {
|
||||
impl<'w> RenderContext<'w> {
|
||||
/// Creates a new [`RenderContext`] from a [`RenderDevice`].
|
||||
pub fn new(render_device: RenderDevice) -> Self {
|
||||
pub fn new(render_device: RenderDevice, adapter_info: AdapterInfo) -> Self {
|
||||
// HACK: Parallel command encoding is currently bugged on AMD + Windows + Vulkan with wgpu 0.19.1
|
||||
#[cfg(target_os = "windows")]
|
||||
let force_serial =
|
||||
adapter_info.driver.contains("AMD") && adapter_info.backend == wgpu::Backend::Vulkan;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
let force_serial = {
|
||||
drop(adapter_info);
|
||||
false
|
||||
};
|
||||
|
||||
Self {
|
||||
render_device,
|
||||
command_encoder: None,
|
||||
command_buffers: Vec::new(),
|
||||
command_buffer_queue: Vec::new(),
|
||||
force_serial,
|
||||
}
|
||||
}
|
||||
|
||||
@ -342,25 +357,76 @@ impl RenderContext {
|
||||
TrackedRenderPass::new(&self.render_device, render_pass)
|
||||
}
|
||||
|
||||
/// Append a [`CommandBuffer`] to the queue.
|
||||
/// Append a [`CommandBuffer`] to the command buffer queue.
|
||||
///
|
||||
/// If present, this will flush the currently unflushed [`CommandEncoder`]
|
||||
/// into a [`CommandBuffer`] into the queue before append the provided
|
||||
/// into a [`CommandBuffer`] into the queue before appending the provided
|
||||
/// buffer.
|
||||
pub fn add_command_buffer(&mut self, command_buffer: CommandBuffer) {
|
||||
self.flush_encoder();
|
||||
self.command_buffers.push(command_buffer);
|
||||
|
||||
self.command_buffer_queue
|
||||
.push(QueuedCommandBuffer::Ready(command_buffer));
|
||||
}
|
||||
|
||||
/// Finalizes the queue and returns the queue of [`CommandBuffer`]s.
|
||||
/// Append a function that will generate a [`CommandBuffer`] to the
|
||||
/// command buffer queue, to be ran later.
|
||||
///
|
||||
/// If present, this will flush the currently unflushed [`CommandEncoder`]
|
||||
/// into a [`CommandBuffer`] into the queue before appending the provided
|
||||
/// buffer.
|
||||
pub fn add_command_buffer_generation_task(
|
||||
&mut self,
|
||||
task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w + Send,
|
||||
) {
|
||||
self.flush_encoder();
|
||||
|
||||
self.command_buffer_queue
|
||||
.push(QueuedCommandBuffer::Task(Box::new(task)));
|
||||
}
|
||||
|
||||
/// Finalizes and returns the queue of [`CommandBuffer`]s.
|
||||
///
|
||||
/// This function will wait until all command buffer generation tasks are complete
|
||||
/// by running them in parallel (where supported).
|
||||
pub fn finish(mut self) -> Vec<CommandBuffer> {
|
||||
self.flush_encoder();
|
||||
self.command_buffers
|
||||
|
||||
let mut command_buffers = Vec::with_capacity(self.command_buffer_queue.len());
|
||||
let mut task_based_command_buffers = ComputeTaskPool::get().scope(|task_pool| {
|
||||
for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() {
|
||||
match queued_command_buffer {
|
||||
QueuedCommandBuffer::Ready(command_buffer) => {
|
||||
command_buffers.push((i, command_buffer));
|
||||
}
|
||||
QueuedCommandBuffer::Task(command_buffer_generation_task) => {
|
||||
let render_device = self.render_device.clone();
|
||||
if self.force_serial {
|
||||
command_buffers
|
||||
.push((i, command_buffer_generation_task(render_device)));
|
||||
} else {
|
||||
task_pool.spawn(async move {
|
||||
(i, command_buffer_generation_task(render_device))
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
command_buffers.append(&mut task_based_command_buffers);
|
||||
command_buffers.sort_unstable_by_key(|(i, _)| *i);
|
||||
command_buffers.into_iter().map(|(_, cb)| cb).collect()
|
||||
}
|
||||
|
||||
fn flush_encoder(&mut self) {
|
||||
if let Some(encoder) = self.command_encoder.take() {
|
||||
self.command_buffers.push(encoder.finish());
|
||||
self.command_buffer_queue
|
||||
.push(QueuedCommandBuffer::Ready(encoder.finish()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum QueuedCommandBuffer<'w> {
|
||||
Ready(CommandBuffer),
|
||||
Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w + Send>),
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user