sort by pipeline then mesh for non transparent passes for massively better batching (#11671)

# Objective

Bevy does ridiculous amount of drawcalls, and our batching isn't very
effective because we sort by distance and only batch if we get multiple
of the same object in a row. This can give us slightly better GPU
performance when not using the depth prepass (due to less overdraw), but
ends up being massively CPU bottlenecked due to doing thousands of
unnecessary drawcalls.

## Solution

Change the sort functions to sort by pipeline key then by mesh id for
large performance gains in more realistic scenes than our stress tests.

Pipelines changed:
- Opaque3d
- Opaque3dDeferred
- Opaque3dPrepass


![image](https://github.com/bevyengine/bevy/assets/177631/8c355256-ad86-4b47-81a0-f3906797fe7e)


---

## Changelog

- Opaque3d drawing order is now sorted by pipeline and mesh, rather than
by distance. This trades off a bit of GPU time in exchange for massively
better batching in scenes that aren't only drawing huge amounts of a
single object.
This commit is contained in:
Elabajaba 2024-02-05 17:12:22 -05:00 committed by GitHub
parent e927756d72
commit 2a1ebc4ac4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 37 additions and 28 deletions

View File

@ -47,7 +47,7 @@ impl ViewNode for MainOpaquePass3dNode {
): QueryItem<Self::ViewQuery>,
world: &World,
) -> Result<(), NodeRunError> {
// Run the opaque pass, sorted front-to-back
// Run the opaque pass, sorted by pipeline key and mesh id to greatly improve batching.
// NOTE: Scoped to drop the mutable borrow of render_context
#[cfg(feature = "trace")]
let _main_opaque_pass_3d_span = info_span!("main_opaque_pass_3d").entered();

View File

@ -40,6 +40,7 @@ pub const CORE_3D_DEPTH_FORMAT: TextureFormat = TextureFormat::Depth32Float;
use std::{cmp::Reverse, ops::Range};
use bevy_asset::AssetId;
pub use camera_3d::*;
pub use main_opaque_pass_3d_node::*;
pub use main_transparent_pass_3d_node::*;
@ -50,6 +51,7 @@ use bevy_render::{
camera::{Camera, ExtractedCamera},
color::Color,
extract_component::ExtractComponentPlugin,
mesh::Mesh,
prelude::Msaa,
render_graph::{EmptyNode, RenderGraphApp, ViewNodeRunner},
render_phase::{
@ -182,7 +184,7 @@ impl Plugin for Core3dPlugin {
}
pub struct Opaque3d {
pub distance: f32,
pub asset_id: AssetId<Mesh>,
pub pipeline: CachedRenderPipelineId,
pub entity: Entity,
pub draw_function: DrawFunctionId,
@ -191,8 +193,7 @@ pub struct Opaque3d {
}
impl PhaseItem for Opaque3d {
// NOTE: Values increase towards the camera. Front-to-back ordering for opaque means we need a descending sort.
type SortKey = Reverse<FloatOrd>;
type SortKey = (usize, AssetId<Mesh>);
#[inline]
fn entity(&self) -> Entity {
@ -201,7 +202,8 @@ impl PhaseItem for Opaque3d {
#[inline]
fn sort_key(&self) -> Self::SortKey {
Reverse(FloatOrd(self.distance))
// Sort by pipeline, then by mesh to massively decrease drawcall counts in real scenes.
(self.pipeline.id(), self.asset_id)
}
#[inline]
@ -211,8 +213,7 @@ impl PhaseItem for Opaque3d {
#[inline]
fn sort(items: &mut [Self]) {
// Key negated to match reversed SortKey ordering
radsort::sort_by_key(items, |item| -item.distance);
items.sort_unstable_by_key(Self::sort_key);
}
#[inline]

View File

@ -3,8 +3,10 @@ pub mod node;
use std::{cmp::Reverse, ops::Range};
use bevy_asset::AssetId;
use bevy_ecs::prelude::*;
use bevy_render::{
mesh::Mesh,
render_phase::{CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem},
render_resource::{CachedRenderPipelineId, TextureFormat},
};
@ -20,8 +22,8 @@ pub const DEFERRED_LIGHTING_PASS_ID_DEPTH_FORMAT: TextureFormat = TextureFormat:
///
/// Used to render all 3D meshes with materials that have no transparency.
pub struct Opaque3dDeferred {
pub distance: f32,
pub entity: Entity,
pub asset_id: AssetId<Mesh>,
pub pipeline_id: CachedRenderPipelineId,
pub draw_function: DrawFunctionId,
pub batch_range: Range<u32>,
@ -29,8 +31,7 @@ pub struct Opaque3dDeferred {
}
impl PhaseItem for Opaque3dDeferred {
// NOTE: Values increase towards the camera. Front-to-back ordering for opaque means we need a descending sort.
type SortKey = Reverse<FloatOrd>;
type SortKey = (usize, AssetId<Mesh>);
#[inline]
fn entity(&self) -> Entity {
@ -39,7 +40,8 @@ impl PhaseItem for Opaque3dDeferred {
#[inline]
fn sort_key(&self) -> Self::SortKey {
Reverse(FloatOrd(self.distance))
// Sort by pipeline, then by mesh to massively decrease drawcall counts in real scenes.
(self.pipeline_id.id(), self.asset_id)
}
#[inline]
@ -49,8 +51,7 @@ impl PhaseItem for Opaque3dDeferred {
#[inline]
fn sort(items: &mut [Self]) {
// Key negated to match reversed SortKey ordering
radsort::sort_by_key(items, |item| -item.distance);
items.sort_unstable_by_key(Self::sort_key);
}
#[inline]

View File

@ -29,9 +29,11 @@ pub mod node;
use std::{cmp::Reverse, ops::Range};
use bevy_asset::AssetId;
use bevy_ecs::prelude::*;
use bevy_reflect::Reflect;
use bevy_render::{
mesh::Mesh,
render_phase::{CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem},
render_resource::{CachedRenderPipelineId, Extent3d, TextureFormat, TextureView},
texture::ColorAttachment,
@ -109,8 +111,8 @@ impl ViewPrepassTextures {
///
/// Used to render all 3D meshes with materials that have no transparency.
pub struct Opaque3dPrepass {
pub distance: f32,
pub entity: Entity,
pub asset_id: AssetId<Mesh>,
pub pipeline_id: CachedRenderPipelineId,
pub draw_function: DrawFunctionId,
pub batch_range: Range<u32>,
@ -118,8 +120,7 @@ pub struct Opaque3dPrepass {
}
impl PhaseItem for Opaque3dPrepass {
// NOTE: Values increase towards the camera. Front-to-back ordering for opaque means we need a descending sort.
type SortKey = Reverse<FloatOrd>;
type SortKey = (usize, AssetId<Mesh>);
#[inline]
fn entity(&self) -> Entity {
@ -128,7 +129,8 @@ impl PhaseItem for Opaque3dPrepass {
#[inline]
fn sort_key(&self) -> Self::SortKey {
Reverse(FloatOrd(self.distance))
// Sort by pipeline, then by mesh to massively decrease drawcall counts in real scenes.
(self.pipeline_id.id(), self.asset_id)
}
#[inline]
@ -138,8 +140,7 @@ impl PhaseItem for Opaque3dPrepass {
#[inline]
fn sort(items: &mut [Self]) {
// Key negated to match reversed SortKey ordering
radsort::sort_by_key(items, |item| -item.distance);
items.sort_unstable_by_key(Self::sort_key);
}
#[inline]

View File

@ -639,12 +639,12 @@ pub fn queue_material_meshes<M: Material>(
mesh_instance.material_bind_group_id = material.get_bind_group_id();
let distance = rangefinder
.distance_translation(&mesh_instance.transforms.transform.translation)
+ material.properties.depth_bias;
match material.properties.alpha_mode {
AlphaMode::Opaque => {
if material.properties.reads_view_transmission_texture {
let distance = rangefinder
.distance_translation(&mesh_instance.transforms.transform.translation)
+ material.properties.depth_bias;
transmissive_phase.add(Transmissive3d {
entity: *visible_entity,
draw_function: draw_transmissive_pbr,
@ -658,13 +658,16 @@ pub fn queue_material_meshes<M: Material>(
entity: *visible_entity,
draw_function: draw_opaque_pbr,
pipeline: pipeline_id,
distance,
asset_id: mesh_instance.mesh_asset_id,
batch_range: 0..1,
dynamic_offset: None,
});
}
}
AlphaMode::Mask(_) => {
let distance = rangefinder
.distance_translation(&mesh_instance.transforms.transform.translation)
+ material.properties.depth_bias;
if material.properties.reads_view_transmission_texture {
transmissive_phase.add(Transmissive3d {
entity: *visible_entity,
@ -689,6 +692,9 @@ pub fn queue_material_meshes<M: Material>(
| AlphaMode::Premultiplied
| AlphaMode::Add
| AlphaMode::Multiply => {
let distance = rangefinder
.distance_translation(&mesh_instance.transforms.transform.translation)
+ material.properties.depth_bias;
transparent_phase.add(Transparent3d {
entity: *visible_entity,
draw_function: draw_transparent_pbr,

View File

@ -835,9 +835,6 @@ pub fn queue_prepass_material_meshes<M: Material>(
}
};
let distance = rangefinder
.distance_translation(&mesh_instance.transforms.transform.translation)
+ material.properties.depth_bias;
match alpha_mode {
AlphaMode::Opaque => {
if deferred {
@ -848,7 +845,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
entity: *visible_entity,
draw_function: opaque_draw_deferred,
pipeline_id,
distance,
asset_id: mesh_instance.mesh_asset_id,
batch_range: 0..1,
dynamic_offset: None,
});
@ -857,13 +854,16 @@ pub fn queue_prepass_material_meshes<M: Material>(
entity: *visible_entity,
draw_function: opaque_draw_prepass,
pipeline_id,
distance,
asset_id: mesh_instance.mesh_asset_id,
batch_range: 0..1,
dynamic_offset: None,
});
}
}
AlphaMode::Mask(_) => {
let distance = rangefinder
.distance_translation(&mesh_instance.transforms.transform.translation)
+ material.properties.depth_bias;
if deferred {
alpha_mask_deferred_phase
.as_mut()