Allowlist mali drivers for gpu preprocessing support. (#18769)

Fixes #17591 Looking at the arm downloads page, "r48p0" is a version number that increments, where rXX is the major version and pX seems to be a patch version. Take the conservative approach here that we know gpu preprocessing is working on at least version 48 and presumably higher. The assumption here is that the driver_info string will be reported similarly on non-pixel devices.
2025-04-10 17:03:54 -07:00 · 2025-04-10 17:03:54 -07:00 · 3c5eb4473f
commit 3c5eb4473f
parent 89c02ef296
3 changed files with 55 additions and 30 deletions
--- a/crates/bevy_core_pipeline/src/experimental/mip_generation/mod.rs
+++ b/crates/bevy_core_pipeline/src/experimental/mip_generation/mod.rs
@ -7,6 +7,10 @@

 use core::array;

+use crate::core_3d::{
+    graph::{Core3d, Node3d},
+    prepare_core_3d_depth_textures,
+};
 use bevy_app::{App, Plugin};
 use bevy_asset::{load_internal_asset, weak_handle, Handle};
 use bevy_derive::{Deref, DerefMut};
@ -21,6 +25,7 @@ use bevy_ecs::{
    world::{FromWorld, World},
 };
 use bevy_math::{uvec2, UVec2, Vec4Swizzles as _};
+use bevy_render::batching::gpu_preprocessing::GpuPreprocessingSupport;
 use bevy_render::{
    experimental::occlusion_culling::{
        OcclusionCulling, OcclusionCullingSubview, OcclusionCullingSubviewEntities,
@ -30,23 +35,19 @@ use bevy_render::{
        binding_types::{sampler, texture_2d, texture_2d_multisampled, texture_storage_2d},
        BindGroup, BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries,
        CachedComputePipelineId, ComputePassDescriptor, ComputePipeline, ComputePipelineDescriptor,
-        DownlevelFlags, Extent3d, IntoBinding, PipelineCache, PushConstantRange, Sampler,
-        SamplerBindingType, SamplerDescriptor, Shader, ShaderStages, SpecializedComputePipeline,
+        Extent3d, IntoBinding, PipelineCache, PushConstantRange, Sampler, SamplerBindingType,
+        SamplerDescriptor, Shader, ShaderStages, SpecializedComputePipeline,
        SpecializedComputePipelines, StorageTextureAccess, TextureAspect, TextureDescriptor,
        TextureDimension, TextureFormat, TextureSampleType, TextureUsages, TextureView,
        TextureViewDescriptor, TextureViewDimension,
    },
-    renderer::{RenderAdapter, RenderContext, RenderDevice},
+    renderer::{RenderContext, RenderDevice},
    texture::TextureCache,
    view::{ExtractedView, NoIndirectDrawing, ViewDepthTexture},
    Render, RenderApp, RenderSet,
 };
 use bitflags::bitflags;
-
-use crate::core_3d::{
-    graph::{Core3d, Node3d},
-    prepare_core_3d_depth_textures,
-};
+use tracing::debug;

 /// Identifies the `downsample_depth.wgsl` shader.
 pub const DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle<Shader> =
@ -325,26 +326,14 @@ pub struct DownsampleDepthPipelines {
    sampler: Sampler,
 }

-fn supports_compute_shaders(device: &RenderDevice, adapter: &RenderAdapter) -> bool {
-    adapter
-        .get_downlevel_capabilities()
-        .flags
-        .contains(DownlevelFlags::COMPUTE_SHADERS)
-    // Even if the adapter supports compute, we might be simulating a lack of
-    // compute via device limits (see `WgpuSettingsPriority::WebGL2` and
-    // `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
-    // `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
-    && (device.limits().max_compute_workgroup_storage_size != 0)
-}
-
 /// Creates the [`DownsampleDepthPipelines`] if downsampling is supported on the
 /// current platform.
 fn create_downsample_depth_pipelines(
    mut commands: Commands,
    render_device: Res<RenderDevice>,
-    render_adapter: Res<RenderAdapter>,
    pipeline_cache: Res<PipelineCache>,
    mut specialized_compute_pipelines: ResMut<SpecializedComputePipelines<DownsampleDepthPipeline>>,
+    gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
    mut has_run: Local<bool>,
 ) {
    // Only run once.
@ -356,9 +345,8 @@ fn create_downsample_depth_pipelines(
    }
    *has_run = true;

-    // If we don't have compute shaders, we can't invoke the downsample depth
-    // compute shader.
-    if !supports_compute_shaders(&render_device, &render_adapter) {
+    if !gpu_preprocessing_support.is_culling_supported() {
+        debug!("Downsample depth is not supported on this platform.");
        return;
    }

--- a/crates/bevy_render/src/batching/gpu_preprocessing.rs
+++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs
@ -20,7 +20,7 @@ use bytemuck::{Pod, Zeroable};
 use encase::{internal::WriteInto, ShaderSize};
 use indexmap::IndexMap;
 use nonmax::NonMaxU32;
-use tracing::error;
+use tracing::{error, info};
 use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};

 use crate::{
@ -1093,12 +1093,13 @@ impl FromWorld for GpuPreprocessingSupport {
        let adapter = world.resource::<RenderAdapter>();
        let device = world.resource::<RenderDevice>();

-        // Filter some Qualcomm devices on Android as they crash when using GPU
-        // preprocessing.
-        // We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
-        // than 730).
+        // Filter Android drivers that are incompatible with GPU preprocessing:
+        // - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
+        //   than 730).
+        // - We filter out Mali GPUs with driver versions lower than 48.
        fn is_non_supported_android_device(adapter: &RenderAdapter) -> bool {
            crate::get_adreno_model(adapter).is_some_and(|model| model != 720 && model <= 730)
+                || crate::get_mali_driver_version(adapter).is_some_and(|version| version < 48)
        }

        let culling_feature_support = device.features().contains(
@ -1107,18 +1108,31 @@ impl FromWorld for GpuPreprocessingSupport {
                | Features::PUSH_CONSTANTS,
        );
        // Depth downsampling for occlusion culling requires 12 textures
-        let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12;
+        let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 &&
+            // Even if the adapter supports compute, we might be simulating a lack of
+            // compute via device limits (see `WgpuSettingsPriority::WebGL2` and
+            // `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
+            // `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
+            device.limits().max_compute_workgroup_storage_size != 0;
+
        let downlevel_support = adapter.get_downlevel_capabilities().flags.contains(
+            DownlevelFlags::COMPUTE_SHADERS |
            DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW
        );

        let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 0
            || is_non_supported_android_device(adapter)
        {
+            info!(
+                "GPU preprocessing is not supported on this device. \
+                Falling back to CPU preprocessing.",
+            );
            GpuPreprocessingMode::None
        } else if !(culling_feature_support && limit_support && downlevel_support) {
+            info!("Some GPU preprocessing are limited on this device.");
            GpuPreprocessingMode::PreprocessingOnly
        } else {
+            info!("GPU preprocessing is fully supported on this device.");
            GpuPreprocessingMode::Culling
        };

--- a/crates/bevy_render/src/lib.rs
+++ b/crates/bevy_render/src/lib.rs
@ -586,3 +586,26 @@ pub fn get_adreno_model(adapter: &RenderAdapter) -> Option<u32> {
            .fold(0, |acc, digit| acc * 10 + digit),
    )
 }
+
+/// Get the Mali driver version if the adapter is a Mali GPU.
+pub fn get_mali_driver_version(adapter: &RenderAdapter) -> Option<u32> {
+    if !cfg!(target_os = "android") {
+        return None;
+    }
+
+    let driver_name = adapter.get_info().name;
+    if !driver_name.contains("Mali") {
+        return None;
+    }
+    let driver_info = adapter.get_info().driver_info;
+    if let Some(start_pos) = driver_info.find("v1.r") {
+        if let Some(end_pos) = driver_info[start_pos..].find('p') {
+            let start_idx = start_pos + 4; // Skip "v1.r"
+            let end_idx = start_pos + end_pos;
+
+            return driver_info[start_idx..end_idx].parse::<u32>().ok();
+        }
+    }
+
+    None
+}