
Currently, Bevy rebuilds the buffer containing all the transforms for joints every frame, during the extraction phase. This is inefficient in cases in which many skins are present in the scene and their joints don't move, such as the Caldera test scene. To address this problem, this commit switches skin extraction to use a set of retained GPU buffers with allocations managed by the offset allocator. I use fine-grained change detection in order to determine which skins need updating. Note that the granularity is on the level of an entire skin, not individual joints. Using the change detection at that level would yield poor performance in common cases in which an entire skin is animated at once. Also, this patch yields additional performance from the fact that changing joint transforms no longer requires the skinned mesh to be re-extracted. Note that this optimization can be a double-edged sword. In `many_foxes`, fine-grained change detection regressed the performance of `extract_skins` by 3.4x. This is because every joint is updated every frame in that example, so change detection is pointless and is pure overhead. Because the `many_foxes` workload is actually representative of animated scenes, this patch includes a heuristic that disables fine-grained change detection if the number of transformed entities in the frame exceeds a certain fraction of the total number of joints. Currently, this threshold is set to 25%. Note that this is a crude heuristic, because it doesn't distinguish between the number of transformed *joints* and the number of transformed *entities*; however, it should be good enough to yield the optimum code path most of the time. Finally, this patch fixes a bug whereby skinned meshes are actually being incorrectly retained if the buffer offsets of the joints of those skinned meshes changes from frame to frame. To fix this without retaining skins, we would have to re-extract every skinned mesh every frame. Doing this was a significant regression on Caldera. With this PR, by contrast, mesh joints stay at the same buffer offset, so we don't have to update the `MeshInputUniform` containing the buffer offset every frame. This also makes PR #17717 easier to implement, because that PR uses the buffer offset from the previous frame, and the logic for calculating that is simplified if the previous frame's buffer offset is guaranteed to be identical to that of the current frame. On Caldera, this patch reduces the time spent in `extract_skins` from 1.79 ms to near zero. On `many_foxes`, this patch regresses the performance of `extract_skins` by approximately 10%-25%, depending on the number of foxes. This has only a small impact on frame rate.
84 lines
2.9 KiB
TOML
84 lines
2.9 KiB
TOML
[package]
|
|
name = "bevy_pbr"
|
|
version = "0.16.0-dev"
|
|
edition = "2021"
|
|
description = "Adds PBR rendering to Bevy Engine"
|
|
homepage = "https://bevyengine.org"
|
|
repository = "https://github.com/bevyengine/bevy"
|
|
license = "MIT OR Apache-2.0"
|
|
keywords = ["bevy"]
|
|
|
|
[features]
|
|
webgl = []
|
|
webgpu = []
|
|
pbr_transmission_textures = []
|
|
pbr_multi_layer_material_textures = []
|
|
pbr_anisotropy_texture = []
|
|
experimental_pbr_pcss = []
|
|
pbr_specular_textures = []
|
|
shader_format_glsl = ["bevy_render/shader_format_glsl"]
|
|
trace = ["bevy_render/trace"]
|
|
# Enables the meshlet renderer for dense high-poly scenes (experimental)
|
|
meshlet = ["dep:lz4_flex", "dep:range-alloc", "dep:half", "dep:bevy_tasks"]
|
|
# Enables processing meshes into meshlet meshes
|
|
meshlet_processor = [
|
|
"meshlet",
|
|
"dep:meshopt",
|
|
"dep:metis",
|
|
"dep:itertools",
|
|
"dep:bitvec",
|
|
]
|
|
|
|
[dependencies]
|
|
# bevy
|
|
bevy_app = { path = "../bevy_app", version = "0.16.0-dev" }
|
|
bevy_asset = { path = "../bevy_asset", version = "0.16.0-dev" }
|
|
bevy_color = { path = "../bevy_color", version = "0.16.0-dev" }
|
|
bevy_core_pipeline = { path = "../bevy_core_pipeline", version = "0.16.0-dev" }
|
|
bevy_derive = { path = "../bevy_derive", version = "0.16.0-dev" }
|
|
bevy_ecs = { path = "../bevy_ecs", version = "0.16.0-dev" }
|
|
bevy_image = { path = "../bevy_image", version = "0.16.0-dev" }
|
|
bevy_math = { path = "../bevy_math", version = "0.16.0-dev" }
|
|
bevy_reflect = { path = "../bevy_reflect", version = "0.16.0-dev", features = [
|
|
"bevy",
|
|
] }
|
|
bevy_render = { path = "../bevy_render", version = "0.16.0-dev" }
|
|
bevy_tasks = { path = "../bevy_tasks", version = "0.16.0-dev", optional = true }
|
|
bevy_transform = { path = "../bevy_transform", version = "0.16.0-dev" }
|
|
bevy_utils = { path = "../bevy_utils", version = "0.16.0-dev" }
|
|
bevy_window = { path = "../bevy_window", version = "0.16.0-dev" }
|
|
bevy_platform_support = { path = "../bevy_platform_support", version = "0.16.0-dev", default-features = false, features = [
|
|
"std",
|
|
] }
|
|
|
|
# other
|
|
bitflags = "2.3"
|
|
fixedbitset = "0.5"
|
|
thiserror = { version = "2", default-features = false }
|
|
derive_more = { version = "1", default-features = false, features = ["from"] }
|
|
# meshlet
|
|
lz4_flex = { version = "0.11", default-features = false, features = [
|
|
"frame",
|
|
], optional = true }
|
|
range-alloc = { version = "0.1.3", optional = true }
|
|
half = { version = "2", features = ["bytemuck"], optional = true }
|
|
meshopt = { version = "0.4.1", optional = true }
|
|
metis = { version = "0.2", optional = true }
|
|
itertools = { version = "0.13", optional = true }
|
|
bitvec = { version = "1", optional = true }
|
|
# direct dependency required for derive macro
|
|
bytemuck = { version = "1", features = ["derive", "must_cast"] }
|
|
radsort = "0.1"
|
|
smallvec = "1.6"
|
|
nonmax = "0.5"
|
|
static_assertions = "1"
|
|
tracing = { version = "0.1", default-features = false, features = ["std"] }
|
|
offset-allocator = "0.2"
|
|
|
|
[lints]
|
|
workspace = true
|
|
|
|
[package.metadata.docs.rs]
|
|
rustdoc-args = ["-Zunstable-options", "--generate-link-to-definition"]
|
|
all-features = true
|