Skip to content

Commit

Permalink
Cleanup extract_meshes (#13026)
Browse files Browse the repository at this point in the history
# Objective

- clean up extract_mesh_(gpu/cpu)_building

## Solution

- gpu_building no need to hold  `prev_render_mesh_instances`
- using `insert_unique_unchecked` instead of simple insert as we know
all entities are unique
- direcly get `previous_input_index ` in par_loop 


## Performance
this should also bring a slight performance win.

cargo run --release --example many_cubes --features bevy/trace_tracy --
--no-frustum-culling
`extract_meshes_for_gpu_building`


![image](https://github.com/bevyengine/bevy/assets/45868716/a5425e8a-258b-482d-afda-170363ee6479)

---------

Co-authored-by: Patrick Walton <[email protected]>
  • Loading branch information
re0312 and pcwalton authored Apr 26, 2024
1 parent 91a393a commit 92928f1
Showing 1 changed file with 56 additions and 102 deletions.
158 changes: 56 additions & 102 deletions crates/bevy_pbr/src/render/mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use bevy_ecs::{
query::ROQueryItem,
system::{lifetimeless::*, SystemParamItem, SystemState},
};
use bevy_math::{Affine3, Rect, UVec2, Vec3, Vec4};
use bevy_math::{vec3, Affine3, Rect, UVec2, Vec3, Vec4};
use bevy_render::{
batching::{
gpu_preprocessing, no_gpu_preprocessing, GetBatchData, GetFullBatchData,
Expand Down Expand Up @@ -403,32 +403,6 @@ pub struct RenderMeshInstanceShared {
pub flags: RenderMeshInstanceFlags,
}

/// Information that is gathered during the parallel portion of mesh extraction
/// when GPU mesh uniform building is enabled.
///
/// From this, the [`MeshInputUniform`] and [`RenderMeshInstanceGpu`] are
/// prepared.
pub struct RenderMeshInstanceGpuBuilder {
/// Data that will be placed on the [`RenderMeshInstanceGpu`].
pub shared: RenderMeshInstanceShared,
/// The current transform.
pub transform: Affine3,
/// Four 16-bit unsigned normalized UV values packed into a [`UVec2`]:
///
/// ```text
/// <--- MSB LSB --->
/// +---- min v ----+ +---- min u ----+
/// lightmap_uv_rect.x: vvvvvvvv vvvvvvvv uuuuuuuu uuuuuuuu,
/// +---- max v ----+ +---- max u ----+
/// lightmap_uv_rect.y: VVVVVVVV VVVVVVVV UUUUUUUU UUUUUUUU,
///
/// (MSB: most significant bit; LSB: least significant bit.)
/// ```
pub lightmap_uv_rect: UVec2,
/// Various flags.
pub mesh_flags: MeshFlags,
}

impl RenderMeshInstanceShared {
fn from_components(
previous_transform: Option<&PreviousGlobalTransform>,
Expand Down Expand Up @@ -457,6 +431,7 @@ impl RenderMeshInstanceShared {

/// Returns true if this entity is eligible to participate in automatic
/// batching.
#[inline]
pub fn should_batch(&self) -> bool {
self.flags
.contains(RenderMeshInstanceFlags::AUTOMATIC_BATCHING)
Expand Down Expand Up @@ -650,7 +625,9 @@ pub fn extract_meshes_for_cpu_building(

render_mesh_instances.clear();
for queue in render_mesh_instance_queues.iter_mut() {
render_mesh_instances.extend(queue.drain(..));
for (k, v) in queue.drain(..) {
render_mesh_instances.insert_unique_unchecked(k, v);
}
}
}

Expand All @@ -664,8 +641,9 @@ pub fn extract_meshes_for_gpu_building(
mut batched_instance_buffers: ResMut<
gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>,
>,
mut render_mesh_instance_queues: Local<Parallel<Vec<(Entity, RenderMeshInstanceGpuBuilder)>>>,
mut prev_render_mesh_instances: Local<RenderMeshInstancesGpu>,
mut render_mesh_instance_queues: Local<
Parallel<Vec<(Entity, RenderMeshInstanceShared, MeshInputUniform)>>,
>,
meshes_query: Extract<
Query<(
Entity,
Expand All @@ -681,6 +659,24 @@ pub fn extract_meshes_for_gpu_building(
)>,
>,
) {
// Collect render mesh instances. Build up the uniform buffer.
let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = *render_mesh_instances
else {
panic!(
"`collect_render_mesh_instances_for_gpu_building` should only be called if we're \
using GPU `MeshUniform` building"
);
};

let gpu_preprocessing::BatchedInstanceBuffers {
ref mut current_input_buffer,
ref mut previous_input_buffer,
..
} = *batched_instance_buffers;

// Swap buffers.
mem::swap(current_input_buffer, previous_input_buffer);

meshes_query.par_iter().for_each_init(
|| render_mesh_instance_queues.borrow_local_mut(),
|queue,
Expand Down Expand Up @@ -710,94 +706,52 @@ pub fn extract_meshes_for_gpu_building(
no_automatic_batching,
);

let previous_input_index = shared
.flags
.contains(RenderMeshInstanceFlags::HAVE_PREVIOUS_TRANSFORM)
.then(|| {
render_mesh_instances
.get(&entity)
.map(|render_mesh_instance| {
render_mesh_instance.current_uniform_index.into()
})
.unwrap_or(u32::MAX)
})
.unwrap_or(u32::MAX);

let lightmap_uv_rect =
lightmap::pack_lightmap_uv_rect(lightmap.map(|lightmap| lightmap.uv_rect));
let affine3: Affine3 = (&transform.affine()).into();

queue.push((
entity,
RenderMeshInstanceGpuBuilder {
shared,
transform: (&transform.affine()).into(),
shared,
MeshInputUniform {
flags: mesh_flags.bits(),
lightmap_uv_rect,
mesh_flags,
transform: affine3.to_transpose(),
previous_input_index,
},
));
},
);

collect_meshes_for_gpu_building(
&mut render_mesh_instances,
&mut batched_instance_buffers,
&mut render_mesh_instance_queues,
&mut prev_render_mesh_instances,
);
}

/// Creates the [`RenderMeshInstanceGpu`]s and [`MeshInputUniform`]s when GPU
/// mesh uniforms are built.
fn collect_meshes_for_gpu_building(
render_mesh_instances: &mut RenderMeshInstances,
batched_instance_buffers: &mut gpu_preprocessing::BatchedInstanceBuffers<
MeshUniform,
MeshInputUniform,
>,
render_mesh_instance_queues: &mut Parallel<Vec<(Entity, RenderMeshInstanceGpuBuilder)>>,
prev_render_mesh_instances: &mut RenderMeshInstancesGpu,
) {
// Collect render mesh instances. Build up the uniform buffer.
let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = *render_mesh_instances
else {
panic!(
"`collect_render_mesh_instances_for_gpu_building` should only be called if we're \
using GPU `MeshUniform` building"
);
};

let gpu_preprocessing::BatchedInstanceBuffers {
ref mut current_input_buffer,
ref mut previous_input_buffer,
..
} = batched_instance_buffers;

// Swap buffers.
mem::swap(current_input_buffer, previous_input_buffer);
mem::swap(render_mesh_instances, prev_render_mesh_instances);

// Build the [`RenderMeshInstance`]s and [`MeshInputUniform`]s.
render_mesh_instances.clear();
for queue in render_mesh_instance_queues.iter_mut() {
for (entity, builder) in queue.drain(..) {
let previous_input_index = if builder
.shared
.flags
.contains(RenderMeshInstanceFlags::HAVE_PREVIOUS_TRANSFORM)
{
prev_render_mesh_instances
.get(&entity)
.map(|render_mesh_instance| render_mesh_instance.current_uniform_index)
} else {
None
};

// Push the mesh input uniform.
let current_uniform_index = current_input_buffer.push(MeshInputUniform {
transform: builder.transform.to_transpose(),
lightmap_uv_rect: builder.lightmap_uv_rect,
flags: builder.mesh_flags.bits(),
previous_input_index: match previous_input_index {
Some(previous_input_index) => previous_input_index.into(),
None => u32::MAX,
},
}) as u32;

// Record the [`RenderMeshInstance`].
render_mesh_instances.insert(
for (entity, shared, mesh_uniform) in queue.drain(..) {
let buffer_index = current_input_buffer.push(mesh_uniform);
let translation = vec3(
mesh_uniform.transform[0].w,
mesh_uniform.transform[1].w,
mesh_uniform.transform[2].w,
);
render_mesh_instances.insert_unique_unchecked(
entity,
RenderMeshInstanceGpu {
translation: builder.transform.translation,
shared: builder.shared,
current_uniform_index: NonMaxU32::try_from(current_uniform_index)
.unwrap_or_default(),
shared,
translation,
current_uniform_index: NonMaxU32::new(buffer_index as u32).unwrap_or_default(),
},
);
}
Expand Down

0 comments on commit 92928f1

Please sign in to comment.