From 356363cc21dd4bc1dafa825d1336d05c3e061cb6 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Sun, 1 Dec 2024 19:22:47 -0800 Subject: [PATCH] Batch skinned meshes on platforms where storage buffers are available. This commit makes skinned meshes batchable on platforms other than WebGL 2. On such platforms, it replaces the two uniform buffers used for joint matrices with a pair of storage buffers containing all matrices for all skinned meshes concatenated together. The indices into the buffer are stored in the mesh uniform and mesh input uniform. The GPU mesh preprocessing step copies the indices in if that step is enabled. On the `many_foxes` demo, I observed a frame time decrease from 15.470ms to 11.935ms. This is the result of reducing the `submit_graph_commands` time from an average of 5.45ms to 0.489ms, an 11x speedup in that portion of rendering. --- crates/bevy_pbr/src/prepass/mod.rs | 7 ++ crates/bevy_pbr/src/prepass/prepass.wgsl | 7 +- crates/bevy_pbr/src/render/mesh.rs | 111 +++++++++++++----- crates/bevy_pbr/src/render/mesh.wgsl | 6 +- crates/bevy_pbr/src/render/mesh_bindings.rs | 71 +++++++---- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 6 +- crates/bevy_pbr/src/render/mesh_types.wgsl | 4 +- crates/bevy_pbr/src/render/mod.rs | 2 +- crates/bevy_pbr/src/render/skin.rs | 55 +++++++-- crates/bevy_pbr/src/render/skinning.wgsl | 33 +++++- 10 files changed, 227 insertions(+), 75 deletions(-) diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 8bd10f9678984..230bab4311d77 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -251,6 +251,11 @@ pub struct PrepassPipeline { pub deferred_material_vertex_shader: Option>, pub deferred_material_fragment_shader: Option>, pub material_pipeline: MaterialPipeline, + + /// Whether skins will use uniform buffers on account of storage buffers + /// being unavailable on this platform. + pub skins_use_uniform_buffers: bool, + _marker: PhantomData, } @@ -315,6 +320,7 @@ impl FromWorld for PrepassPipeline { }, material_layout: M::bind_group_layout(render_device), material_pipeline: world.resource::>().clone(), + skins_use_uniform_buffers: skin::skins_use_uniform_buffers(render_device), _marker: PhantomData, } } @@ -468,6 +474,7 @@ where &key.mesh_key, &mut shader_defs, &mut vertex_attributes, + self.skins_use_uniform_buffers, ); bind_group_layouts.insert(1, bind_group); diff --git a/crates/bevy_pbr/src/prepass/prepass.wgsl b/crates/bevy_pbr/src/prepass/prepass.wgsl index 7a0fcb89ed8f6..c56893976c7f7 100644 --- a/crates/bevy_pbr/src/prepass/prepass.wgsl +++ b/crates/bevy_pbr/src/prepass/prepass.wgsl @@ -67,7 +67,11 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput { #endif #ifdef SKINNED - var world_from_local = skinning::skin_model(vertex.joint_indices, vertex.joint_weights); + var world_from_local = skinning::skin_model( + vertex.joint_indices, + vertex.joint_weights, + vertex_no_morph.instance_index + ); #else // SKINNED // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 @@ -140,6 +144,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput { let prev_model = skinning::skin_prev_model( prev_vertex.joint_indices, prev_vertex.joint_weights, + vertex_no_morph.instance_index ); #else // HAS_PREVIOUS_SKIN let prev_model = mesh_functions::get_previous_world_from_local(prev_vertex.instance_index); diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index e8adff671b01e..ca4cdeafaf164 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -45,6 +45,7 @@ use bevy_utils::{ tracing::{error, warn}, Entry, HashMap, Parallel, }; +use render::skin::{self, SkinIndex}; use crate::{ render::{ @@ -148,7 +149,6 @@ impl Plugin for MeshRenderPlugin { if let Some(render_app) = app.get_sub_app_mut(RenderApp) { render_app .init_resource::() - .init_resource::() .init_resource::() .init_resource::() .init_resource::() @@ -184,7 +184,9 @@ impl Plugin for MeshRenderPlugin { let mut mesh_bindings_shader_defs = Vec::with_capacity(1); if let Some(render_app) = app.get_sub_app_mut(RenderApp) { - render_app.init_resource::(); + render_app + .init_resource::() + .init_resource::(); let gpu_preprocessing_support = render_app.world().resource::(); @@ -213,6 +215,7 @@ impl Plugin for MeshRenderPlugin { collect_meshes_for_gpu_building .in_set(RenderSet::PrepareAssets) .after(allocator::allocate_and_free_meshes) + .after(extract_skins) // This must be before // `set_mesh_motion_vector_flags` so it doesn't // overwrite those flags. @@ -298,12 +301,12 @@ pub struct MeshUniform { /// [`MeshAllocator`]). This value stores the offset of the first vertex in /// this mesh in that buffer. pub first_vertex_index: u32, + /// The current skin index, or `u32::MAX` if there's no skin. + pub current_skin_index: u32, + /// The previous skin index, or `u32::MAX` if there's no previous skin. + pub previous_skin_index: u32, /// Padding. pub pad_a: u32, - /// Padding. - pub pad_b: u32, - /// Padding. - pub pad_c: u32, } /// Information that has to be transferred from CPU to GPU in order to produce @@ -340,12 +343,12 @@ pub struct MeshInputUniform { /// [`MeshAllocator`]). This value stores the offset of the first vertex in /// this mesh in that buffer. pub first_vertex_index: u32, + /// The current skin index, or `u32::MAX` if there's no skin. + pub current_skin_index: u32, + /// The previous skin index, or `u32::MAX` if there's no previous skin. + pub previous_skin_index: u32, /// Padding. pub pad_a: u32, - /// Padding. - pub pad_b: u32, - /// Padding. - pub pad_c: u32, } /// Information about each mesh instance needed to cull it on GPU. @@ -376,6 +379,8 @@ impl MeshUniform { mesh_transforms: &MeshTransforms, first_vertex_index: u32, maybe_lightmap_uv_rect: Option, + current_skin_index: Option, + previous_skin_index: Option, ) -> Self { let (local_from_world_transpose_a, local_from_world_transpose_b) = mesh_transforms.world_from_local.inverse_transpose_3x3(); @@ -387,9 +392,9 @@ impl MeshUniform { local_from_world_transpose_b, flags: mesh_transforms.flags, first_vertex_index, + current_skin_index: current_skin_index.unwrap_or(u32::MAX), + previous_skin_index: previous_skin_index.unwrap_or(u32::MAX), pad_a: 0, - pad_b: 0, - pad_c: 0, } } } @@ -776,6 +781,7 @@ impl RenderMeshInstanceGpuBuilder { render_mesh_instances: &mut MainEntityHashMap, current_input_buffer: &mut RawBufferVec, mesh_allocator: &MeshAllocator, + skin_indices: &SkinIndices, ) -> usize { let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) { @@ -783,6 +789,15 @@ impl RenderMeshInstanceGpuBuilder { None => 0, }; + let current_skin_index = match skin_indices.current.get(&entity) { + Some(skin_indices) => skin_indices.index(), + None => u32::MAX, + }; + let previous_skin_index = match skin_indices.prev.get(&entity) { + Some(skin_indices) => skin_indices.index(), + None => u32::MAX, + }; + // Push the mesh input uniform. let current_uniform_index = current_input_buffer.push(MeshInputUniform { world_from_local: self.world_from_local.to_transpose(), @@ -793,9 +808,9 @@ impl RenderMeshInstanceGpuBuilder { None => u32::MAX, }, first_vertex_index, + current_skin_index, + previous_skin_index, pad_a: 0, - pad_b: 0, - pad_c: 0, }); // Record the [`RenderMeshInstance`]. @@ -1111,6 +1126,7 @@ pub fn collect_meshes_for_gpu_building( mut mesh_culling_data_buffer: ResMut, mut render_mesh_instance_queues: ResMut, mesh_allocator: Res, + skin_indices: Res, ) { let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = render_mesh_instances.into_inner() @@ -1144,6 +1160,7 @@ pub fn collect_meshes_for_gpu_building( &mut *render_mesh_instances, current_input_buffer, &mesh_allocator, + &skin_indices, ); } } @@ -1154,6 +1171,7 @@ pub fn collect_meshes_for_gpu_building( &mut *render_mesh_instances, current_input_buffer, &mesh_allocator, + &skin_indices, ); let culling_data_index = mesh_culling_builder.add_to(&mut mesh_culling_data_buffer); @@ -1191,6 +1209,10 @@ pub struct MeshPipeline { /// /// This affects whether reflection probes can be used. pub binding_arrays_are_usable: bool, + + /// Whether skins will use uniform buffers on account of storage buffers + /// being unavailable on this platform. + pub skins_use_uniform_buffers: bool, } impl FromWorld for MeshPipeline { @@ -1248,6 +1270,7 @@ impl FromWorld for MeshPipeline { mesh_layouts: MeshLayouts::new(&render_device), per_object_buffer_batch_size: GpuArrayBuffer::::batch_size(&render_device), binding_arrays_are_usable: binding_arrays_are_usable(&render_device), + skins_use_uniform_buffers: skin::skins_use_uniform_buffers(&render_device), } } } @@ -1280,6 +1303,7 @@ impl GetBatchData for MeshPipeline { SRes, SRes>, SRes, + SRes, ); // The material bind group ID, the mesh ID, and the lightmap ID, // respectively. @@ -1288,7 +1312,7 @@ impl GetBatchData for MeshPipeline { type BufferData = MeshUniform; fn get_batch_data( - (mesh_instances, lightmaps, _, mesh_allocator): &SystemParamItem, + (mesh_instances, lightmaps, _, mesh_allocator, skin_indices): &SystemParamItem, (_entity, main_entity): (Entity, MainEntity), ) -> Option<(Self::BufferData, Option)> { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -1306,11 +1330,16 @@ impl GetBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); + let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); + let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); + Some(( MeshUniform::new( &mesh_instance.transforms, first_vertex_index, maybe_lightmap.map(|lightmap| lightmap.uv_rect), + current_skin_index, + previous_skin_index, ), mesh_instance.should_batch().then_some(( mesh_instance.material_bind_group_id.get(), @@ -1325,7 +1354,7 @@ impl GetFullBatchData for MeshPipeline { type BufferInputData = MeshInputUniform; fn get_index_and_compare_data( - (mesh_instances, lightmaps, _, _): &SystemParamItem, + (mesh_instances, lightmaps, _, _, _): &SystemParamItem, (_entity, main_entity): (Entity, MainEntity), ) -> Option<(NonMaxU32, Option)> { // This should only be called during GPU building. @@ -1351,7 +1380,7 @@ impl GetFullBatchData for MeshPipeline { } fn get_binned_batch_data( - (mesh_instances, lightmaps, _, mesh_allocator): &SystemParamItem, + (mesh_instances, lightmaps, _, mesh_allocator, skin_indices): &SystemParamItem, (_entity, main_entity): (Entity, MainEntity), ) -> Option { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -1368,15 +1397,20 @@ impl GetFullBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); + let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); + let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); + Some(MeshUniform::new( &mesh_instance.transforms, first_vertex_index, maybe_lightmap.map(|lightmap| lightmap.uv_rect), + current_skin_index, + previous_skin_index, )) } fn get_binned_index( - (mesh_instances, _, _, _): &SystemParamItem, + (mesh_instances, _, _, _, _): &SystemParamItem, (_entity, main_entity): (Entity, MainEntity), ) -> Option { // This should only be called during GPU building. @@ -1394,7 +1428,7 @@ impl GetFullBatchData for MeshPipeline { } fn get_batch_indirect_parameters_index( - (mesh_instances, _, meshes, mesh_allocator): &SystemParamItem, + (mesh_instances, _, meshes, mesh_allocator, _): &SystemParamItem, indirect_parameters_buffer: &mut IndirectParametersBuffer, entity: (Entity, MainEntity), instance_index: u32, @@ -1632,15 +1666,22 @@ pub fn setup_morph_and_skinning_defs( key: &MeshPipelineKey, shader_defs: &mut Vec, vertex_attributes: &mut Vec, + skins_use_uniform_buffers: bool, ) -> BindGroupLayout { + let is_morphed = key.intersects(MeshPipelineKey::MORPH_TARGETS); + let is_lightmapped = key.intersects(MeshPipelineKey::LIGHTMAPPED); + let motion_vector_prepass = key.intersects(MeshPipelineKey::MOTION_VECTOR_PREPASS); + + if skins_use_uniform_buffers { + shader_defs.push("SKINS_USE_UNIFORM_BUFFERS".into()); + } + let mut add_skin_data = || { shader_defs.push("SKINNED".into()); vertex_attributes.push(Mesh::ATTRIBUTE_JOINT_INDEX.at_shader_location(offset)); vertex_attributes.push(Mesh::ATTRIBUTE_JOINT_WEIGHT.at_shader_location(offset + 1)); }; - let is_morphed = key.intersects(MeshPipelineKey::MORPH_TARGETS); - let is_lightmapped = key.intersects(MeshPipelineKey::LIGHTMAPPED); - let motion_vector_prepass = key.intersects(MeshPipelineKey::MOTION_VECTOR_PREPASS); + match ( is_skinned(layout), is_morphed, @@ -1749,6 +1790,7 @@ impl SpecializedMeshPipeline for MeshPipeline { &key, &mut shader_defs, &mut vertex_attributes, + self.skins_use_uniform_buffers, )); if key.contains(MeshPipelineKey::SCREEN_SPACE_AMBIENT_OCCLUSION) { @@ -2241,6 +2283,7 @@ impl RenderCommand

for SetMeshViewBindGroup pub struct SetMeshBindGroup; impl RenderCommand

for SetMeshBindGroup { type Param = ( + SRes, SRes, SRes, SRes, @@ -2255,11 +2298,14 @@ impl RenderCommand

for SetMeshBindGroup { item: &P, has_motion_vector_prepass: bool, _item_query: Option<()>, - (bind_groups, mesh_instances, skin_indices, morph_indices, lightmaps): SystemParamItem< - 'w, - '_, - Self::Param, - >, + ( + render_device, + bind_groups, + mesh_instances, + skin_indices, + morph_indices, + lightmaps, + ): SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { let bind_groups = bind_groups.into_inner(); @@ -2272,6 +2318,7 @@ impl RenderCommand

for SetMeshBindGroup { let Some(mesh_asset_id) = mesh_instances.mesh_asset_id(*entity) else { return RenderCommandResult::Success; }; + let current_skin_index = skin_indices.current.get(entity); let prev_skin_index = skin_indices.prev.get(entity); let current_morph_index = morph_indices.current.get(entity); @@ -2306,8 +2353,10 @@ impl RenderCommand

for SetMeshBindGroup { offset_count += 1; } if let Some(current_skin_index) = current_skin_index { - dynamic_offsets[offset_count] = current_skin_index.index; - offset_count += 1; + if skin::skins_use_uniform_buffers(&render_device) { + dynamic_offsets[offset_count] = current_skin_index.byte_offset; + offset_count += 1; + } } if let Some(current_morph_index) = current_morph_index { dynamic_offsets[offset_count] = current_morph_index.index; @@ -2320,7 +2369,9 @@ impl RenderCommand

for SetMeshBindGroup { // there isn't one, just use zero as the shader will ignore it. if current_skin_index.is_some() { match prev_skin_index { - Some(prev_skin_index) => dynamic_offsets[offset_count] = prev_skin_index.index, + Some(prev_skin_index) => { + dynamic_offsets[offset_count] = prev_skin_index.byte_offset; + } None => dynamic_offsets[offset_count] = 0, } offset_count += 1; diff --git a/crates/bevy_pbr/src/render/mesh.wgsl b/crates/bevy_pbr/src/render/mesh.wgsl index 7d617755adc55..4ae426cdf4214 100644 --- a/crates/bevy_pbr/src/render/mesh.wgsl +++ b/crates/bevy_pbr/src/render/mesh.wgsl @@ -42,7 +42,11 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput { #endif #ifdef SKINNED - var world_from_local = skinning::skin_model(vertex.joint_indices, vertex.joint_weights); + var world_from_local = skinning::skin_model( + vertex.joint_indices, + vertex.joint_weights, + vertex_no_morph.instance_index + ); #else // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 . diff --git a/crates/bevy_pbr/src/render/mesh_bindings.rs b/crates/bevy_pbr/src/render/mesh_bindings.rs index cda05314fb180..7595e88a676ed 100644 --- a/crates/bevy_pbr/src/render/mesh_bindings.rs +++ b/crates/bevy_pbr/src/render/mesh_bindings.rs @@ -16,10 +16,13 @@ pub(crate) const JOINT_BUFFER_SIZE: usize = MAX_JOINTS * JOINT_SIZE; /// Individual layout entries. mod layout_entry { use super::{JOINT_BUFFER_SIZE, MORPH_BUFFER_SIZE}; - use crate::MeshUniform; + use crate::{render::skin, MeshUniform}; use bevy_render::{ render_resource::{ - binding_types::{sampler, texture_2d, texture_3d, uniform_buffer_sized}, + binding_types::{ + sampler, storage_buffer_read_only_sized, texture_2d, texture_3d, + uniform_buffer_sized, + }, BindGroupLayoutEntryBuilder, BufferSize, GpuArrayBuffer, SamplerBindingType, ShaderStages, TextureSampleType, }, @@ -30,8 +33,15 @@ mod layout_entry { GpuArrayBuffer::::binding_layout(render_device) .visibility(ShaderStages::VERTEX_FRAGMENT) } - pub(super) fn skinning() -> BindGroupLayoutEntryBuilder { - uniform_buffer_sized(true, BufferSize::new(JOINT_BUFFER_SIZE as u64)) + pub(super) fn skinning(render_device: &RenderDevice) -> BindGroupLayoutEntryBuilder { + // If we can use storage buffers, do so. Otherwise, fall back to uniform + // buffers. + let size = BufferSize::new(JOINT_BUFFER_SIZE as u64); + if skin::skins_use_uniform_buffers(render_device) { + uniform_buffer_sized(true, size) + } else { + storage_buffer_read_only_sized(false, size) + } } pub(super) fn weights() -> BindGroupLayoutEntryBuilder { uniform_buffer_sized(true, BufferSize::new(MORPH_BUFFER_SIZE as u64)) @@ -50,29 +60,44 @@ mod layout_entry { /// Individual [`BindGroupEntry`] /// for bind groups. mod entry { + use crate::render::skin; + use super::{JOINT_BUFFER_SIZE, MORPH_BUFFER_SIZE}; - use bevy_render::render_resource::{ - BindGroupEntry, BindingResource, Buffer, BufferBinding, BufferSize, Sampler, TextureView, + use bevy_render::{ + render_resource::{ + BindGroupEntry, BindingResource, Buffer, BufferBinding, BufferSize, Sampler, + TextureView, + }, + renderer::RenderDevice, }; - fn entry(binding: u32, size: u64, buffer: &Buffer) -> BindGroupEntry { + fn entry(binding: u32, size: Option, buffer: &Buffer) -> BindGroupEntry { BindGroupEntry { binding, resource: BindingResource::Buffer(BufferBinding { buffer, offset: 0, - size: Some(BufferSize::new(size).unwrap()), + size: size.map(|size| BufferSize::new(size).unwrap()), }), } } pub(super) fn model(binding: u32, resource: BindingResource) -> BindGroupEntry { BindGroupEntry { binding, resource } } - pub(super) fn skinning(binding: u32, buffer: &Buffer) -> BindGroupEntry { - entry(binding, JOINT_BUFFER_SIZE as u64, buffer) + pub(super) fn skinning<'a>( + render_device: &RenderDevice, + binding: u32, + buffer: &'a Buffer, + ) -> BindGroupEntry<'a> { + let size = if skin::skins_use_uniform_buffers(render_device) { + Some(JOINT_BUFFER_SIZE as u64) + } else { + None + }; + entry(binding, size, buffer) } pub(super) fn weights(binding: u32, buffer: &Buffer) -> BindGroupEntry { - entry(binding, MORPH_BUFFER_SIZE as u64, buffer) + entry(binding, Some(MORPH_BUFFER_SIZE as u64), buffer) } pub(super) fn targets(binding: u32, texture: &TextureView) -> BindGroupEntry { BindGroupEntry { @@ -169,7 +194,7 @@ impl MeshLayouts { ( (0, layout_entry::model(render_device)), // The current frame's joint matrix buffer. - (1, layout_entry::skinning()), + (1, layout_entry::skinning(render_device)), ), ), ) @@ -185,9 +210,9 @@ impl MeshLayouts { ( (0, layout_entry::model(render_device)), // The current frame's joint matrix buffer. - (1, layout_entry::skinning()), + (1, layout_entry::skinning(render_device)), // The previous frame's joint matrix buffer. - (6, layout_entry::skinning()), + (6, layout_entry::skinning(render_device)), ), ), ) @@ -238,7 +263,7 @@ impl MeshLayouts { ( (0, layout_entry::model(render_device)), // The current frame's joint matrix buffer. - (1, layout_entry::skinning()), + (1, layout_entry::skinning(render_device)), // The current frame's morph weight buffer. (2, layout_entry::weights()), (3, layout_entry::targets()), @@ -257,12 +282,12 @@ impl MeshLayouts { ( (0, layout_entry::model(render_device)), // The current frame's joint matrix buffer. - (1, layout_entry::skinning()), + (1, layout_entry::skinning(render_device)), // The current frame's morph weight buffer. (2, layout_entry::weights()), (3, layout_entry::targets()), // The previous frame's joint matrix buffer. - (6, layout_entry::skinning()), + (6, layout_entry::skinning(render_device)), // The previous frame's morph weight buffer. (7, layout_entry::weights()), ), @@ -323,7 +348,7 @@ impl MeshLayouts { &self.skinned, &[ entry::model(0, model.clone()), - entry::skinning(1, current_skin), + entry::skinning(render_device, 1, current_skin), ], ) } @@ -347,8 +372,8 @@ impl MeshLayouts { &self.skinned_motion, &[ entry::model(0, model.clone()), - entry::skinning(1, current_skin), - entry::skinning(6, prev_skin), + entry::skinning(render_device, 1, current_skin), + entry::skinning(render_device, 6, prev_skin), ], ) } @@ -414,7 +439,7 @@ impl MeshLayouts { &self.morphed_skinned, &[ entry::model(0, model.clone()), - entry::skinning(1, current_skin), + entry::skinning(render_device, 1, current_skin), entry::weights(2, current_weights), entry::targets(3, targets), ], @@ -444,10 +469,10 @@ impl MeshLayouts { &self.morphed_skinned_motion, &[ entry::model(0, model.clone()), - entry::skinning(1, current_skin), + entry::skinning(render_device, 1, current_skin), entry::weights(2, current_weights), entry::targets(3, targets), - entry::skinning(6, prev_skin), + entry::skinning(render_device, 6, prev_skin), entry::weights(7, prev_weights), ], ) diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 6a5a1fcf06e33..73358bb76af1d 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -23,9 +23,9 @@ struct MeshInput { // applicable. If not present, this is `u32::MAX`. previous_input_index: u32, first_vertex_index: u32, + current_skin_index: u32, + previous_skin_index: u32, pad_a: u32, - pad_b: u32, - pad_c: u32, } // Information about each mesh instance needed to cull it on GPU. @@ -191,4 +191,6 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { output[mesh_output_index].flags = current_input[input_index].flags; output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; + output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index; + output[mesh_output_index].previous_skin_index = current_input[input_index].previous_skin_index; } diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl index 57576a3bb3805..d6958c59d090a 100644 --- a/crates/bevy_pbr/src/render/mesh_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_types.wgsl @@ -17,9 +17,9 @@ struct Mesh { lightmap_uv_rect: vec2, // The index of the mesh's first vertex in the vertex buffer. first_vertex_index: u32, + current_skin_index: u32, + previous_skin_index: u32, pad_a: u32, - pad_b: u32, - pad_c: u32, }; #ifdef SKINNED diff --git a/crates/bevy_pbr/src/render/mod.rs b/crates/bevy_pbr/src/render/mod.rs index 2a69e28bf3a44..8e26e869a1c96 100644 --- a/crates/bevy_pbr/src/render/mod.rs +++ b/crates/bevy_pbr/src/render/mod.rs @@ -5,7 +5,7 @@ pub(crate) mod mesh; mod mesh_bindings; mod mesh_view_bindings; mod morph; -mod skin; +pub(crate) mod skin; pub use fog::*; pub use gpu_preprocess::*; diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index b6f35fc0bf49d..e4599bf7e7f40 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -1,4 +1,5 @@ use core::mem::{self, size_of}; +use std::sync::OnceLock; use bevy_asset::Assets; use bevy_ecs::prelude::*; @@ -17,18 +18,27 @@ use bevy_transform::prelude::GlobalTransform; /// Maximum number of joints supported for skinned meshes. pub const MAX_JOINTS: usize = 256; +/// The location of the first joint matrix in the skin uniform buffer. #[derive(Component)] pub struct SkinIndex { - pub index: u32, + /// The byte offset of the first joint matrix. + pub byte_offset: u32, } impl SkinIndex { /// Index to be in address space based on the size of a skin uniform. const fn new(start: usize) -> Self { SkinIndex { - index: (start * size_of::()) as u32, + byte_offset: (start * size_of::()) as u32, } } + + /// Returns this skin index in elements (not bytes). + /// + /// Each element is a 4x4 matrix. + pub fn index(&self) -> u32 { + self.byte_offset / size_of::() as u32 + } } /// Maps each skinned mesh to the applicable offset within the [`SkinUniforms`] @@ -64,15 +74,29 @@ pub struct SkinUniforms { pub prev_buffer: RawBufferVec, } -impl Default for SkinUniforms { - fn default() -> Self { +impl FromWorld for SkinUniforms { + fn from_world(world: &mut World) -> Self { + let device = world.resource::(); + let buffer_usages = if skins_use_uniform_buffers(device) { + BufferUsages::UNIFORM + } else { + BufferUsages::STORAGE + }; + Self { - current_buffer: RawBufferVec::new(BufferUsages::UNIFORM), - prev_buffer: RawBufferVec::new(BufferUsages::UNIFORM), + current_buffer: RawBufferVec::new(buffer_usages), + prev_buffer: RawBufferVec::new(buffer_usages), } } } +/// Returns true if skinning must use uniforms (and dynamic offsets) because +/// storage buffers aren't supported on the current platform. +pub fn skins_use_uniform_buffers(render_device: &RenderDevice) -> bool { + static SKINS_USE_UNIFORM_BUFFERS: OnceLock = OnceLock::new(); + *SKINS_USE_UNIFORM_BUFFERS.get_or_init(|| render_device.limits().max_storage_buffers_per_shader_stage == 0) +} + pub fn prepare_skins( render_device: Res, render_queue: Res, @@ -124,7 +148,10 @@ pub fn extract_skins( query: Extract>, inverse_bindposes: Extract>>, joints: Extract>, + render_device: Res, ) { + let skins_use_uniform_buffers = skins_use_uniform_buffers(&render_device); + // Borrow check workaround. let (skin_indices, uniform) = (skin_indices.into_inner(), uniform.into_inner()); @@ -164,9 +191,12 @@ pub fn extract_skins( } last_start = last_start.max(start); - // Pad to 256 byte alignment - while buffer.len() % 4 != 0 { - buffer.push(Mat4::ZERO); + // Pad to 256 byte alignment if we're using a uniform buffer. + // There's no need to do this if we're using storage buffers, though. + if skins_use_uniform_buffers { + while buffer.len() % 4 != 0 { + buffer.push(Mat4::ZERO); + } } skin_indices @@ -181,11 +211,16 @@ pub fn extract_skins( } // NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per -// entity and so cannot currently be batched. +// entity and so cannot currently be batched on WebGL 2. pub fn no_automatic_skin_batching( mut commands: Commands, query: Query, Without)>, + render_device: Res, ) { + if !skins_use_uniform_buffers(&render_device) { + return; + } + for entity in &query { commands.entity(entity).try_insert(NoAutomaticBatching); } diff --git a/crates/bevy_pbr/src/render/skinning.wgsl b/crates/bevy_pbr/src/render/skinning.wgsl index 1ed9393308995..a7ad7abd5c4ae 100644 --- a/crates/bevy_pbr/src/render/skinning.wgsl +++ b/crates/bevy_pbr/src/render/skinning.wgsl @@ -1,10 +1,15 @@ #define_import_path bevy_pbr::skinning #import bevy_pbr::mesh_types::SkinnedMesh +#import bevy_pbr::mesh_bindings::mesh #ifdef SKINNED +#ifdef SKINS_USE_UNIFORM_BUFFERS @group(1) @binding(1) var joint_matrices: SkinnedMesh; +#else // SKINS_USE_UNIFORM_BUFFERS +@group(1) @binding(1) var joint_matrices: array>; +#endif // SKINS_USE_UNIFORM_BUFFERS // An array of matrices specifying the joint positions from the previous frame. // @@ -12,16 +17,25 @@ // // If this is the first frame, or we're otherwise prevented from using data from // the previous frame, this is simply the same as `joint_matrices` above. -@group(1) @binding(6) var prev_joint_matrices: SkinnedMesh; +@group(1) @binding(6) var prev_joint_matrices: array; fn skin_model( indexes: vec4, weights: vec4, + instance_index: u32, ) -> mat4x4 { +#ifdef SKINS_USE_UNIFORM_BUFFERS return weights.x * joint_matrices.data[indexes.x] + weights.y * joint_matrices.data[indexes.y] + weights.z * joint_matrices.data[indexes.z] + weights.w * joint_matrices.data[indexes.w]; +#else // SKINS_USE_UNIFORM_BUFFERS + let skin_index = mesh[instance_index].current_skin_index; + return weights.x * joint_matrices[skin_index + indexes.x] + + weights.y * joint_matrices[skin_index + indexes.y] + + weights.z * joint_matrices[skin_index + indexes.z] + + weights.w * joint_matrices[skin_index + indexes.w]; +#endif // SKINS_USE_UNIFORM_BUFFERS } // Returns the skinned position of a vertex with the given weights from the @@ -31,11 +45,20 @@ fn skin_model( fn skin_prev_model( indexes: vec4, weights: vec4, + instance_index: u32, ) -> mat4x4 { - return weights.x * prev_joint_matrices.data[indexes.x] - + weights.y * prev_joint_matrices.data[indexes.y] - + weights.z * prev_joint_matrices.data[indexes.z] - + weights.w * prev_joint_matrices.data[indexes.w]; +#ifdef SKINS_USE_UNIFORM_BUFFERS + return weights.x * joint_matrices.data[indexes.x] + + weights.y * joint_matrices.data[indexes.y] + + weights.z * joint_matrices.data[indexes.z] + + weights.w * joint_matrices.data[indexes.w]; +#else // SKINS_USE_UNIFORM_BUFFERS + let skin_index = mesh[instance_index].current_skin_index; + return weights.x * joint_matrices[skin_index + indexes.x] + + weights.y * joint_matrices[skin_index + indexes.y] + + weights.z * joint_matrices[skin_index + indexes.z] + + weights.w * joint_matrices[skin_index + indexes.w]; +#endif // SKINS_USE_UNIFORM_BUFFERS } fn inverse_transpose_3x3m(in: mat3x3) -> mat3x3 {