Skip to content

Commit

Permalink
Allow holes in the MeshInputUniform buffer. (#16695)
Browse files Browse the repository at this point in the history
This commit removes the logic that attempted to keep the
`MeshInputUniform` buffer contiguous. Not only was it slow and complex,
but it was also incorrect, which caused #16686 and #16690. I changed the
logic to simply maintain a free list of unused slots in the buffer and
preferentially fill them when pushing new mesh input uniforms.

Closes #16686.
Closes #16690.
  • Loading branch information
pcwalton authored Dec 9, 2024
1 parent 61b98ec commit 1e7b89c
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 112 deletions.
4 changes: 2 additions & 2 deletions crates/bevy_pbr/src/render/gpu_preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,8 @@ pub fn prepare_preprocess_bind_groups(
} = batched_instance_buffers.into_inner();

let (Some(current_input_buffer), Some(previous_input_buffer), Some(data_buffer)) = (
current_input_buffer_vec.buffer.buffer(),
previous_input_buffer_vec.buffer.buffer(),
current_input_buffer_vec.buffer().buffer(),
previous_input_buffer_vec.buffer().buffer(),
data_buffer_vec.buffer(),
) else {
return;
Expand Down
109 changes: 19 additions & 90 deletions crates/bevy_pbr/src/render/mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -652,22 +652,6 @@ pub struct RenderMeshInstancesCpu(MainEntityHashMap<RenderMeshInstanceCpu>);
#[derive(Default, Deref, DerefMut)]
pub struct RenderMeshInstancesGpu(MainEntityHashMap<RenderMeshInstanceGpu>);

/// The result of an entity removal.
///
/// We want the mesh input uniform array to have no gaps in it in order to
/// simplify the GPU logic. Therefore, whenever entity data is removed from the
/// [`MeshInputUniform`] buffer, the last entity is swapped in to fill it. If
/// culling is enabled, the mesh culling data corresponding to an entity must
/// have the same indices as the mesh input uniforms for that entity. Thus we
/// need to pass this information to [`MeshCullingDataBuffer::remove`] so that
/// it can update its buffer to match the [`MeshInputUniform`] buffer.
struct RemovedMeshInputUniformIndices {
/// The index of the mesh input that was removed.
removed_index: usize,
/// The index of the mesh input that was moved to fill its place.
moved_index: usize,
}

/// Maps each mesh instance to the material ID, and allocated binding ID,
/// associated with that mesh instance.
#[derive(Resource, Default)]
Expand Down Expand Up @@ -896,7 +880,7 @@ impl RenderMeshInstanceGpuBuilder {
current_input_buffer: &mut InstanceInputUniformBuffer<MeshInputUniform>,
previous_input_buffer: &mut InstanceInputUniformBuffer<MeshInputUniform>,
mesh_allocator: &MeshAllocator,
) -> usize {
) -> u32 {
let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id)
{
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
Expand All @@ -922,39 +906,33 @@ impl RenderMeshInstanceGpuBuilder {
// Yes, it did. Replace its entry with the new one.

// Reserve a slot.
current_uniform_index =
u32::from(occupied_entry.get_mut().current_uniform_index) as usize;
current_uniform_index = u32::from(occupied_entry.get_mut().current_uniform_index);

// Save the old mesh input uniform. The mesh preprocessing
// shader will need it to compute motion vectors.
let previous_mesh_input_uniform =
current_input_buffer.buffer.values()[current_uniform_index];
let previous_input_index = previous_input_buffer
.buffer
.push(previous_mesh_input_uniform);
previous_input_buffer.main_entities.push(entity);
mesh_input_uniform.previous_input_index = previous_input_index as u32;
let previous_mesh_input_uniform = current_input_buffer.get(current_uniform_index);
let previous_input_index = previous_input_buffer.add(previous_mesh_input_uniform);
mesh_input_uniform.previous_input_index = previous_input_index;

// Write in the new mesh input uniform.
current_input_buffer.buffer.values_mut()[current_uniform_index] =
mesh_input_uniform;
current_input_buffer.set(current_uniform_index, mesh_input_uniform);

occupied_entry.replace_entry(RenderMeshInstanceGpu {
translation: self.world_from_local.translation,
shared: self.shared,
current_uniform_index: NonMaxU32::new(current_uniform_index as u32)
current_uniform_index: NonMaxU32::new(current_uniform_index)
.unwrap_or_default(),
});
}

Entry::Vacant(vacant_entry) => {
// No, this is a new entity. Push its data on to the buffer.
current_uniform_index = current_input_buffer.buffer.push(mesh_input_uniform);
current_input_buffer.main_entities.push(entity);
current_uniform_index = current_input_buffer.add(mesh_input_uniform);

vacant_entry.insert(RenderMeshInstanceGpu {
translation: self.world_from_local.translation,
shared: self.shared,
current_uniform_index: NonMaxU32::new(current_uniform_index as u32)
current_uniform_index: NonMaxU32::new(current_uniform_index)
.unwrap_or_default(),
});
}
Expand All @@ -970,34 +948,13 @@ fn remove_mesh_input_uniform(
entity: MainEntity,
render_mesh_instances: &mut MainEntityHashMap<RenderMeshInstanceGpu>,
current_input_buffer: &mut InstanceInputUniformBuffer<MeshInputUniform>,
) -> Option<RemovedMeshInputUniformIndices> {
) -> Option<u32> {
// Remove the uniform data.
let removed_render_mesh_instance = render_mesh_instances.remove(&entity)?;
let removed_uniform_index = removed_render_mesh_instance.current_uniform_index.get() as usize;

// Now *move* the final mesh uniform to fill its place in the buffer, so
// that the buffer remains contiguous.
let moved_uniform_index = current_input_buffer.buffer.len() - 1;
if moved_uniform_index != removed_uniform_index {
let moved_uniform = current_input_buffer.buffer.pop().unwrap();
let moved_entity = current_input_buffer.main_entities.pop().unwrap();

current_input_buffer.buffer.values_mut()[removed_uniform_index] = moved_uniform;

if let Some(moved_render_mesh_instance) = render_mesh_instances.get_mut(&moved_entity) {
moved_render_mesh_instance.current_uniform_index =
NonMaxU32::new(removed_uniform_index as u32).unwrap_or_default();
}
} else {
current_input_buffer.buffer.pop();
current_input_buffer.main_entities.pop();
}

// Tell our caller what we did.
Some(RemovedMeshInputUniformIndices {
removed_index: removed_uniform_index,
moved_index: moved_uniform_index,
})
let removed_uniform_index = removed_render_mesh_instance.current_uniform_index.get();
current_input_buffer.remove(removed_uniform_index);
Some(removed_uniform_index)
}

impl MeshCullingData {
Expand Down Expand Up @@ -1039,26 +996,6 @@ impl Default for MeshCullingDataBuffer {
}
}

impl MeshCullingDataBuffer {
/// Updates the culling data buffer after a mesh has been removed from the scene.
///
/// [`remove_mesh_input_uniform`] removes a mesh input uniform from the
/// buffer, swapping in the last uniform to fill the space if necessary. The
/// index of each mesh in the mesh culling data and that index of the mesh
/// in the mesh input uniform buffer must match. Thus we have to perform the
/// corresponding operation here too. [`remove_mesh_input_uniform`] returns
/// a [`RemovedMeshInputUniformIndices`] value to tell us what to do, and
/// this function is where we process that value.
fn remove(&mut self, removed_indices: RemovedMeshInputUniformIndices) {
if removed_indices.moved_index != removed_indices.removed_index {
let moved_culling_data = self.pop().unwrap();
self.values_mut()[removed_indices.removed_index] = moved_culling_data;
} else {
self.pop();
}
}
}

/// Data that [`crate::material::queue_material_meshes`] and similar systems
/// need in order to place entities that contain meshes in the right batch.
#[derive(Deref)]
Expand Down Expand Up @@ -1434,31 +1371,23 @@ pub fn collect_meshes_for_gpu_building(
previous_input_buffer,
&mesh_allocator,
);
mesh_culling_builder.update(&mut mesh_culling_data_buffer, instance_data_index);
mesh_culling_builder
.update(&mut mesh_culling_data_buffer, instance_data_index as usize);
}

for entity in removed.drain(..) {
if let Some(removed_indices) = remove_mesh_input_uniform(
remove_mesh_input_uniform(
entity,
&mut *render_mesh_instances,
current_input_buffer,
) {
mesh_culling_data_buffer.remove(removed_indices);
}
);
}
}
}
}

// Buffers can't be empty. Make sure there's something in the previous input buffer.
if previous_input_buffer.buffer.is_empty() {
previous_input_buffer
.buffer
.push(MeshInputUniform::default());
previous_input_buffer
.main_entities
.push(Entity::PLACEHOLDER.into());
}
previous_input_buffer.ensure_nonempty();
}

/// All data needed to construct a pipeline for rendering 3D meshes.
Expand Down
76 changes: 57 additions & 19 deletions crates/bevy_render/src/batching/gpu_preprocessing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use bevy_ecs::{
world::{FromWorld, World},
};
use bevy_encase_derive::ShaderType;
use bevy_utils::tracing::error;
use bevy_utils::{default, tracing::error};
use bytemuck::{Pod, Zeroable};
use nonmax::NonMaxU32;
use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
Expand All @@ -24,7 +24,6 @@ use crate::{
},
render_resource::{BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
renderer::{RenderAdapter, RenderDevice, RenderQueue},
sync_world::MainEntity,
view::{ExtractedView, GpuCulling, ViewTarget},
Render, RenderApp, RenderSet,
};
Expand Down Expand Up @@ -125,7 +124,7 @@ pub enum GpuPreprocessingMode {
pub struct BatchedInstanceBuffers<BD, BDI>
where
BD: GpuArrayBufferable + Sync + Send + 'static,
BDI: Pod,
BDI: Pod + Default,
{
/// A storage area for the buffer data that the GPU compute shader is
/// expected to write to.
Expand Down Expand Up @@ -161,44 +160,83 @@ where
/// shader is expected to expand to the full *buffer data* type.
pub struct InstanceInputUniformBuffer<BDI>
where
BDI: Pod,
BDI: Pod + Default,
{
/// The buffer containing the data that will be uploaded to the GPU.
pub buffer: RawBufferVec<BDI>,
buffer: RawBufferVec<BDI>,

/// The main-world entity that each item in
/// [`InstanceInputUniformBuffer::buffer`] corresponds to.
/// Indices of slots that are free within the buffer.
///
/// This array must have the same length as
/// [`InstanceInputUniformBuffer::buffer`]. This bookkeeping is needed when
/// moving the data for an entity in the buffer (which happens when an
/// entity is removed), so that we can update other data structures with the
/// new buffer index of that entity.
pub main_entities: Vec<MainEntity>,
/// When adding data, we preferentially overwrite these slots first before
/// growing the buffer itself.
free_uniform_indices: Vec<u32>,
}

impl<BDI> InstanceInputUniformBuffer<BDI>
where
BDI: Pod,
BDI: Pod + Default,
{
/// Creates a new, empty buffer.
pub fn new() -> InstanceInputUniformBuffer<BDI> {
InstanceInputUniformBuffer {
buffer: RawBufferVec::new(BufferUsages::STORAGE),
main_entities: vec![],
free_uniform_indices: vec![],
}
}

/// Clears the buffer and entity list out.
pub fn clear(&mut self) {
self.buffer.clear();
self.main_entities.clear();
self.free_uniform_indices.clear();
}

/// Returns the [`RawBufferVec`] corresponding to this input uniform buffer.
#[inline]
pub fn buffer(&self) -> &RawBufferVec<BDI> {
&self.buffer
}

/// Adds a new piece of buffered data to the uniform buffer and returns its
/// index.
pub fn add(&mut self, element: BDI) -> u32 {
match self.free_uniform_indices.pop() {
Some(uniform_index) => {
self.buffer.values_mut()[uniform_index as usize] = element;
uniform_index
}
None => self.buffer.push(element) as u32,
}
}

/// Removes a piece of buffered data from the uniform buffer.
///
/// This simply marks the data as free.
pub fn remove(&mut self, uniform_index: u32) {
self.free_uniform_indices.push(uniform_index);
}

/// Returns the piece of buffered data at the given index.
pub fn get(&self, uniform_index: u32) -> BDI {
self.buffer.values()[uniform_index as usize]
}

/// Stores a piece of buffered data at the given index.
pub fn set(&mut self, uniform_index: u32, element: BDI) {
self.buffer.values_mut()[uniform_index as usize] = element;
}

// Ensures that the buffers are nonempty, which the GPU requires before an
// upload can take place.
pub fn ensure_nonempty(&mut self) {
if self.buffer.is_empty() {
self.buffer.push(default());
}
}
}

impl<BDI> Default for InstanceInputUniformBuffer<BDI>
where
BDI: Pod,
BDI: Pod + Default,
{
fn default() -> Self {
Self::new()
Expand Down Expand Up @@ -360,7 +398,7 @@ impl FromWorld for GpuPreprocessingSupport {
impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
where
BD: GpuArrayBufferable + Sync + Send + 'static,
BDI: Pod,
BDI: Pod + Default,
{
/// Creates new buffers.
pub fn new() -> Self {
Expand Down Expand Up @@ -393,7 +431,7 @@ where
impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
where
BD: GpuArrayBufferable + Sync + Send + 'static,
BDI: Pod,
BDI: Pod + Default,
{
fn default() -> Self {
Self::new()
Expand Down
2 changes: 1 addition & 1 deletion crates/bevy_render/src/batching/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ pub trait GetBatchData {
pub trait GetFullBatchData: GetBatchData {
/// The per-instance data that was inserted into the
/// [`crate::render_resource::BufferVec`] during extraction.
type BufferInputData: Pod + Sync + Send;
type BufferInputData: Pod + Default + Sync + Send;

/// Get the per-instance data to be inserted into the
/// [`crate::render_resource::GpuArrayBuffer`].
Expand Down

0 comments on commit 1e7b89c

Please sign in to comment.