Allowlist mali drivers for gpu preprocessing support. (#18769)

Fixes #17591

Looking at the arm downloads page, "r48p0" is a version number that
increments, where rXX is the major version and pX seems to be a patch
version. Take the conservative approach here that we know gpu
preprocessing is working on at least version 48 and presumably higher.
The assumption here is that the driver_info string will be reported
similarly on non-pixel devices.
This commit is contained in:
charlotte 2025-04-10 17:03:54 -07:00 committed by François Mockers
parent 89c02ef296
commit 3c5eb4473f
3 changed files with 55 additions and 30 deletions

View File

@ -7,6 +7,10 @@
use core::array;
use crate::core_3d::{
graph::{Core3d, Node3d},
prepare_core_3d_depth_textures,
};
use bevy_app::{App, Plugin};
use bevy_asset::{load_internal_asset, weak_handle, Handle};
use bevy_derive::{Deref, DerefMut};
@ -21,6 +25,7 @@ use bevy_ecs::{
world::{FromWorld, World},
};
use bevy_math::{uvec2, UVec2, Vec4Swizzles as _};
use bevy_render::batching::gpu_preprocessing::GpuPreprocessingSupport;
use bevy_render::{
experimental::occlusion_culling::{
OcclusionCulling, OcclusionCullingSubview, OcclusionCullingSubviewEntities,
@ -30,23 +35,19 @@ use bevy_render::{
binding_types::{sampler, texture_2d, texture_2d_multisampled, texture_storage_2d},
BindGroup, BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries,
CachedComputePipelineId, ComputePassDescriptor, ComputePipeline, ComputePipelineDescriptor,
DownlevelFlags, Extent3d, IntoBinding, PipelineCache, PushConstantRange, Sampler,
SamplerBindingType, SamplerDescriptor, Shader, ShaderStages, SpecializedComputePipeline,
Extent3d, IntoBinding, PipelineCache, PushConstantRange, Sampler, SamplerBindingType,
SamplerDescriptor, Shader, ShaderStages, SpecializedComputePipeline,
SpecializedComputePipelines, StorageTextureAccess, TextureAspect, TextureDescriptor,
TextureDimension, TextureFormat, TextureSampleType, TextureUsages, TextureView,
TextureViewDescriptor, TextureViewDimension,
},
renderer::{RenderAdapter, RenderContext, RenderDevice},
renderer::{RenderContext, RenderDevice},
texture::TextureCache,
view::{ExtractedView, NoIndirectDrawing, ViewDepthTexture},
Render, RenderApp, RenderSet,
};
use bitflags::bitflags;
use crate::core_3d::{
graph::{Core3d, Node3d},
prepare_core_3d_depth_textures,
};
use tracing::debug;
/// Identifies the `downsample_depth.wgsl` shader.
pub const DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle<Shader> =
@ -325,26 +326,14 @@ pub struct DownsampleDepthPipelines {
sampler: Sampler,
}
fn supports_compute_shaders(device: &RenderDevice, adapter: &RenderAdapter) -> bool {
adapter
.get_downlevel_capabilities()
.flags
.contains(DownlevelFlags::COMPUTE_SHADERS)
// Even if the adapter supports compute, we might be simulating a lack of
// compute via device limits (see `WgpuSettingsPriority::WebGL2` and
// `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
// `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
&& (device.limits().max_compute_workgroup_storage_size != 0)
}
/// Creates the [`DownsampleDepthPipelines`] if downsampling is supported on the
/// current platform.
fn create_downsample_depth_pipelines(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_adapter: Res<RenderAdapter>,
pipeline_cache: Res<PipelineCache>,
mut specialized_compute_pipelines: ResMut<SpecializedComputePipelines<DownsampleDepthPipeline>>,
gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
mut has_run: Local<bool>,
) {
// Only run once.
@ -356,9 +345,8 @@ fn create_downsample_depth_pipelines(
}
*has_run = true;
// If we don't have compute shaders, we can't invoke the downsample depth
// compute shader.
if !supports_compute_shaders(&render_device, &render_adapter) {
if !gpu_preprocessing_support.is_culling_supported() {
debug!("Downsample depth is not supported on this platform.");
return;
}

View File

@ -20,7 +20,7 @@ use bytemuck::{Pod, Zeroable};
use encase::{internal::WriteInto, ShaderSize};
use indexmap::IndexMap;
use nonmax::NonMaxU32;
use tracing::error;
use tracing::{error, info};
use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
use crate::{
@ -1093,12 +1093,13 @@ impl FromWorld for GpuPreprocessingSupport {
let adapter = world.resource::<RenderAdapter>();
let device = world.resource::<RenderDevice>();
// Filter some Qualcomm devices on Android as they crash when using GPU
// preprocessing.
// We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
// than 730).
// Filter Android drivers that are incompatible with GPU preprocessing:
// - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
// than 730).
// - We filter out Mali GPUs with driver versions lower than 48.
fn is_non_supported_android_device(adapter: &RenderAdapter) -> bool {
crate::get_adreno_model(adapter).is_some_and(|model| model != 720 && model <= 730)
|| crate::get_mali_driver_version(adapter).is_some_and(|version| version < 48)
}
let culling_feature_support = device.features().contains(
@ -1107,18 +1108,31 @@ impl FromWorld for GpuPreprocessingSupport {
| Features::PUSH_CONSTANTS,
);
// Depth downsampling for occlusion culling requires 12 textures
let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12;
let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 &&
// Even if the adapter supports compute, we might be simulating a lack of
// compute via device limits (see `WgpuSettingsPriority::WebGL2` and
// `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
// `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
device.limits().max_compute_workgroup_storage_size != 0;
let downlevel_support = adapter.get_downlevel_capabilities().flags.contains(
DownlevelFlags::COMPUTE_SHADERS |
DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW
);
let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 0
|| is_non_supported_android_device(adapter)
{
info!(
"GPU preprocessing is not supported on this device. \
Falling back to CPU preprocessing.",
);
GpuPreprocessingMode::None
} else if !(culling_feature_support && limit_support && downlevel_support) {
info!("Some GPU preprocessing are limited on this device.");
GpuPreprocessingMode::PreprocessingOnly
} else {
info!("GPU preprocessing is fully supported on this device.");
GpuPreprocessingMode::Culling
};

View File

@ -586,3 +586,26 @@ pub fn get_adreno_model(adapter: &RenderAdapter) -> Option<u32> {
.fold(0, |acc, digit| acc * 10 + digit),
)
}
/// Get the Mali driver version if the adapter is a Mali GPU.
pub fn get_mali_driver_version(adapter: &RenderAdapter) -> Option<u32> {
if !cfg!(target_os = "android") {
return None;
}
let driver_name = adapter.get_info().name;
if !driver_name.contains("Mali") {
return None;
}
let driver_info = adapter.get_info().driver_info;
if let Some(start_pos) = driver_info.find("v1.r") {
if let Some(end_pos) = driver_info[start_pos..].find('p') {
let start_idx = start_pos + 4; // Skip "v1.r"
let end_idx = start_pos + end_pos;
return driver_info[start_idx..end_idx].parse::<u32>().ok();
}
}
None
}