Skip to content

Commit

Permalink
Make indirect drawing opt-out instead of opt-in, enabling multidraw by
Browse files Browse the repository at this point in the history
default.

This patch replaces the undocumented `NoGpuCulling` component with a new
component, `NoIndirectDrawing`, effectively turning indirect drawing on
by default. Indirect mode is needed for the recently-landed multidraw
feature (#16427). Since multidraw is such a win for performance, when
that feature is supported the small performance tax that indirect mode
incurs is virtually always worth paying.

To ensure that custom drawing code such as that in the
`custom_shader_instancing` example continues to function, this commit
additionally makes GPU culling take the `NoFrustumCulling` component
into account.

This PR is an alternative to #16670 that doesn't break the
`custom_shader_instancing` example. **PR #16755 should land first in
order to avoid breaking deferred rendering, as multidraw currently
breaks it**.
  • Loading branch information
pcwalton committed Dec 10, 2024
1 parent c4a24d5 commit 92cce83
Show file tree
Hide file tree
Showing 10 changed files with 100 additions and 65 deletions.
14 changes: 7 additions & 7 deletions crates/bevy_core_pipeline/src/core_3d/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ use bevy_render::{
batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
mesh::allocator::SlabId,
render_phase::PhaseItemBinKey,
view::GpuCulling,
view::NoIndirectDrawing,
};
pub use camera_3d::*;
pub use main_opaque_pass_3d_node::*;
Expand Down Expand Up @@ -569,20 +569,20 @@ pub fn extract_core_3d_camera_phases(
mut alpha_mask_3d_phases: ResMut<ViewBinnedRenderPhases<AlphaMask3d>>,
mut transmissive_3d_phases: ResMut<ViewSortedRenderPhases<Transmissive3d>>,
mut transparent_3d_phases: ResMut<ViewSortedRenderPhases<Transparent3d>>,
cameras_3d: Extract<Query<(RenderEntity, &Camera, Has<GpuCulling>), With<Camera3d>>>,
cameras_3d: Extract<Query<(RenderEntity, &Camera, Has<NoIndirectDrawing>), With<Camera3d>>>,
mut live_entities: Local<EntityHashSet>,
gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
) {
live_entities.clear();

for (entity, camera, has_gpu_culling) in &cameras_3d {
for (entity, camera, no_indirect_drawing) in &cameras_3d {
if !camera.is_active {
continue;
}

// If GPU culling is in use, use it (and indirect mode); otherwise, just
// preprocess the meshes.
let gpu_preprocessing_mode = gpu_preprocessing_support.min(if has_gpu_culling {
let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
GpuPreprocessingMode::Culling
} else {
GpuPreprocessingMode::PreprocessingOnly
Expand Down Expand Up @@ -616,7 +616,7 @@ pub fn extract_camera_prepass_phase(
(
RenderEntity,
&Camera,
Has<GpuCulling>,
Has<NoIndirectDrawing>,
Has<DepthPrepass>,
Has<NormalPrepass>,
Has<MotionVectorPrepass>,
Expand All @@ -633,7 +633,7 @@ pub fn extract_camera_prepass_phase(
for (
entity,
camera,
gpu_culling,
no_indirect_drawing,
depth_prepass,
normal_prepass,
motion_vector_prepass,
Expand All @@ -646,7 +646,7 @@ pub fn extract_camera_prepass_phase(

// If GPU culling is in use, use it (and indirect mode); otherwise, just
// preprocess the meshes.
let gpu_preprocessing_mode = gpu_preprocessing_support.min(if gpu_culling {
let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
GpuPreprocessingMode::Culling
} else {
GpuPreprocessingMode::PreprocessingOnly
Expand Down
12 changes: 6 additions & 6 deletions crates/bevy_pbr/src/render/gpu_preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use bevy_render::{
SpecializedComputePipeline, SpecializedComputePipelines,
},
renderer::{RenderContext, RenderDevice, RenderQueue},
view::{GpuCulling, ViewUniform, ViewUniformOffset, ViewUniforms},
view::{NoIndirectDrawing, ViewUniform, ViewUniformOffset, ViewUniforms},
Render, RenderApp, RenderSet,
};
use bevy_utils::tracing::warn;
Expand Down Expand Up @@ -70,7 +70,7 @@ pub struct GpuPreprocessNode {
Entity,
Read<PreprocessBindGroup>,
Read<ViewUniformOffset>,
Has<GpuCulling>,
Has<NoIndirectDrawing>,
),
Without<SkipGpuPreprocess>,
>,
Expand Down Expand Up @@ -202,7 +202,7 @@ impl Node for GpuPreprocessNode {
});

// Run the compute passes.
for (view, bind_group, view_uniform_offset, gpu_culling) in
for (view, bind_group, view_uniform_offset, no_indirect_drawing) in
self.view_query.iter_manual(world)
{
// Grab the index buffer for this view.
Expand All @@ -213,7 +213,7 @@ impl Node for GpuPreprocessNode {

// Select the right pipeline, depending on whether GPU culling is in
// use.
let maybe_pipeline_id = if gpu_culling {
let maybe_pipeline_id = if !no_indirect_drawing {
preprocess_pipelines.gpu_culling.pipeline_id
} else {
preprocess_pipelines.direct.pipeline_id
Expand All @@ -235,7 +235,7 @@ impl Node for GpuPreprocessNode {
compute_pass.set_pipeline(preprocess_pipeline);

let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![];
if gpu_culling {
if !no_indirect_drawing {
dynamic_offsets.push(view_uniform_offset.offset);
}
compute_pass.set_bind_group(0, &bind_group.0, &dynamic_offsets);
Expand Down Expand Up @@ -422,7 +422,7 @@ pub fn prepare_preprocess_bind_groups(
)
.ok();

let bind_group = if index_buffer_vec.gpu_culling {
let bind_group = if !index_buffer_vec.no_indirect_drawing {
let (
Some(indirect_parameters_buffer),
Some(mesh_culling_data_buffer),
Expand Down
20 changes: 10 additions & 10 deletions crates/bevy_pbr/src/render/light.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use bevy_render::{
batching::gpu_preprocessing::{GpuPreprocessingMode, GpuPreprocessingSupport},
camera::SortedCameras,
mesh::allocator::MeshAllocator,
view::GpuCulling,
view::NoIndirectDrawing,
};
use bevy_render::{
diagnostic::RecordDiagnostics,
Expand Down Expand Up @@ -687,7 +687,7 @@ pub fn prepare_lights(
&ExtractedView,
&ExtractedClusterConfig,
Option<&RenderLayers>,
Has<GpuCulling>,
Has<NoIndirectDrawing>,
),
With<Camera3d>,
>,
Expand Down Expand Up @@ -1096,15 +1096,15 @@ pub fn prepare_lights(
let mut live_views = EntityHashSet::with_capacity_and_hasher(views_count, EntityHash);

// set up light data for each view
for (entity, extracted_view, clusters, maybe_layers, has_gpu_culling) in sorted_cameras
for (entity, extracted_view, clusters, maybe_layers, no_indirect_drawing) in sorted_cameras
.0
.iter()
.filter_map(|sorted_camera| views.get(sorted_camera.entity).ok())
{
live_views.insert(entity);
let mut view_lights = Vec::new();

let gpu_preprocessing_mode = gpu_preprocessing_support.min(if has_gpu_culling {
let gpu_preprocessing_mode = gpu_preprocessing_support.min(if !no_indirect_drawing {
GpuPreprocessingMode::Culling
} else {
GpuPreprocessingMode::PreprocessingOnly
Expand Down Expand Up @@ -1237,8 +1237,8 @@ pub fn prepare_lights(
},
));

if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
commands.entity(view_light_entity).insert(GpuCulling);
if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
commands.entity(view_light_entity).insert(NoIndirectDrawing);
}

view_lights.push(view_light_entity);
Expand Down Expand Up @@ -1329,8 +1329,8 @@ pub fn prepare_lights(
LightEntity::Spot { light_entity },
));

if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
commands.entity(view_light_entity).insert(GpuCulling);
if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
commands.entity(view_light_entity).insert(NoIndirectDrawing);
}

view_lights.push(view_light_entity);
Expand Down Expand Up @@ -1464,8 +1464,8 @@ pub fn prepare_lights(
},
));

if matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
commands.entity(view_light_entity).insert(GpuCulling);
if !matches!(gpu_preprocessing_mode, GpuPreprocessingMode::Culling) {
commands.entity(view_light_entity).insert(NoIndirectDrawing);
}

view_lights.push(view_light_entity);
Expand Down
22 changes: 19 additions & 3 deletions crates/bevy_pbr/src/render/mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ use bevy_render::{
renderer::{RenderDevice, RenderQueue},
texture::DefaultImageSampler,
view::{
prepare_view_targets, GpuCulling, RenderVisibilityRanges, ViewTarget, ViewUniformOffset,
ViewVisibility, VisibilityRange,
prepare_view_targets, NoFrustumCulling, NoIndirectDrawing, RenderVisibilityRanges,
ViewTarget, ViewUniformOffset, ViewVisibility, VisibilityRange,
},
Extract,
};
Expand Down Expand Up @@ -421,6 +421,11 @@ bitflags::bitflags! {
///
/// This will be `u16::MAX` if this mesh has no LOD.
const LOD_INDEX_MASK = (1 << 16) - 1;
/// Disables frustum culling for this mesh.
///
/// This corresponds to the
/// [`bevy_render::view::visibility::NoFrustumCulling`] component.
const NO_FRUSTUM_CULLING = 1 << 28;
const SHADOW_RECEIVER = 1 << 29;
const TRANSMITTED_SHADOW_RECEIVER = 1 << 30;
// Indicates the sign of the determinant of the 3x3 model matrix. If the sign is positive,
Expand All @@ -435,6 +440,7 @@ impl MeshFlags {
fn from_components(
transform: &GlobalTransform,
lod_index: Option<NonMaxU16>,
no_frustum_culling: bool,
not_shadow_receiver: bool,
transmitted_receiver: bool,
) -> MeshFlags {
Expand All @@ -443,6 +449,9 @@ impl MeshFlags {
} else {
MeshFlags::SHADOW_RECEIVER
};
if no_frustum_culling {
mesh_flags |= MeshFlags::NO_FRUSTUM_CULLING;
}
if transmitted_receiver {
mesh_flags |= MeshFlags::TRANSMITTED_SHADOW_RECEIVER;
}
Expand Down Expand Up @@ -1046,6 +1055,7 @@ pub fn extract_meshes_for_cpu_building(
&GlobalTransform,
Option<&PreviousGlobalTransform>,
&Mesh3d,
Has<NoFrustumCulling>,
Has<NotShadowReceiver>,
Has<TransmittedShadowReceiver>,
Has<NotShadowCaster>,
Expand All @@ -1063,6 +1073,7 @@ pub fn extract_meshes_for_cpu_building(
transform,
previous_transform,
mesh,
no_frustum_culling,
not_shadow_receiver,
transmitted_receiver,
not_shadow_caster,
Expand All @@ -1084,6 +1095,7 @@ pub fn extract_meshes_for_cpu_building(
let mesh_flags = MeshFlags::from_components(
transform,
lod_index,
no_frustum_culling,
not_shadow_receiver,
transmitted_receiver,
);
Expand Down Expand Up @@ -1155,6 +1167,7 @@ pub fn extract_meshes_for_gpu_building(
Option<&Lightmap>,
Option<&Aabb>,
&Mesh3d,
Has<NoFrustumCulling>,
Has<NotShadowReceiver>,
Has<TransmittedShadowReceiver>,
Has<NotShadowCaster>,
Expand All @@ -1168,6 +1181,7 @@ pub fn extract_meshes_for_gpu_building(
Changed<Lightmap>,
Changed<Aabb>,
Changed<Mesh3d>,
Changed<NoFrustumCulling>,
Changed<NotShadowReceiver>,
Changed<TransmittedShadowReceiver>,
Changed<NotShadowCaster>,
Expand All @@ -1179,7 +1193,7 @@ pub fn extract_meshes_for_gpu_building(
mut removed_visibilities_query: Extract<RemovedComponents<ViewVisibility>>,
mut removed_global_transforms_query: Extract<RemovedComponents<GlobalTransform>>,
mut removed_meshes_query: Extract<RemovedComponents<Mesh3d>>,
cameras_query: Extract<Query<(), (With<Camera>, With<GpuCulling>)>>,
cameras_query: Extract<Query<(), (With<Camera>, Without<NoIndirectDrawing>)>>,
) {
let any_gpu_culling = !cameras_query.is_empty();
for render_mesh_instance_queue in render_mesh_instance_queues.iter_mut() {
Expand Down Expand Up @@ -1209,6 +1223,7 @@ pub fn extract_meshes_for_gpu_building(
lightmap,
aabb,
mesh,
no_frustum_culling,
not_shadow_receiver,
transmitted_receiver,
not_shadow_caster,
Expand All @@ -1231,6 +1246,7 @@ pub fn extract_meshes_for_gpu_building(
let mesh_flags = MeshFlags::from_components(
transform,
lod_index,
no_frustum_culling,
not_shadow_receiver,
transmitted_receiver,
);
Expand Down
18 changes: 10 additions & 8 deletions crates/bevy_pbr/src/render/mesh_preprocess.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// mesh's transform on the previous frame and writes it into the `MeshUniform`
// so that TAA works.

#import bevy_pbr::mesh_types::Mesh
#import bevy_pbr::mesh_types::{Mesh, MESH_FLAGS_NO_FRUSTUM_CULLING_BIT}
#import bevy_render::maths
#import bevy_render::view::View

Expand Down Expand Up @@ -145,13 +145,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {

// Cull if necessary.
#ifdef FRUSTUM_CULLING
let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;

// Do an OBB-based frustum cull.
let model_center = world_from_local * vec4(aabb_center, 1.0);
if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) {
return;
if ((current_input[input_index].flags & MESH_FLAGS_NO_FRUSTUM_CULLING_BIT) == 0u) {
let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;

// Do an OBB-based frustum cull.
let model_center = world_from_local * vec4(aabb_center, 1.0);
if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) {
return;
}
}
#endif

Expand Down
2 changes: 2 additions & 0 deletions crates/bevy_pbr/src/render/mesh_types.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ struct MorphWeights {

// [2^0, 2^16)
const MESH_FLAGS_VISIBILITY_RANGE_INDEX_BITS: u32 = 65535u;
// 2^28
const MESH_FLAGS_NO_FRUSTUM_CULLING_BIT: u32 = 268435456u;
// 2^29
const MESH_FLAGS_SHADOW_RECEIVER_BIT: u32 = 536870912u;
// 2^30
Expand Down
Loading

0 comments on commit 92cce83

Please sign in to comment.