Skip to content

Commit

Permalink
Add Order Independent Transparency (#14876)
Browse files Browse the repository at this point in the history
# Objective

- Alpha blending can easily fail in many situations and requires sorting
on the cpu

## Solution

- Implement order independent transparency (OIT) as an alternative to
alpha blending
- The implementation uses 2 passes
- The first pass records all the fragments colors and position to a
buffer that is the size of N layers * the render target resolution.
- The second pass sorts the fragments, blends them and draws them to the
screen. It also currently does manual depth testing because early-z
fails in too many cases in the first pass.

## Testing

- We've been using this implementation at foresight in production for
many months now and we haven't had any issues related to OIT.

---

## Showcase


![image](https://github.com/user-attachments/assets/157f3e32-adaf-4782-b25b-c10313b9bc43)

![image](https://github.com/user-attachments/assets/bef23258-0c22-4b67-a0b8-48a9f571c44f)

## Future work

- Add an example showing how to use OIT for a custom material
- Next step would be to implement a per-pixel linked list to reduce
memory use
- I'd also like to investigate using a BinnedRenderPhase instead of a
SortedRenderPhase. If it works, it would make the transparent pass
significantly faster.

---------

Co-authored-by: Kristoffer Søholm <[email protected]>
Co-authored-by: JMS55 <[email protected]>
Co-authored-by: Charlotte McElwain <[email protected]>
  • Loading branch information
4 people authored Oct 7, 2024
1 parent e7b83ac commit 4bf647f
Show file tree
Hide file tree
Showing 15 changed files with 1,090 additions and 26 deletions.
11 changes: 11 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,17 @@ description = "Demonstrates per-pixel motion blur"
category = "3D Rendering"
wasm = false

[[example]]
name = "order_independent_transparency"
path = "examples/3d/order_independent_transparency.rs"
doc-scrape-examples = true

[package.metadata.example.order_independent_transparency]
name = "Order Independent Transparency"
description = "Demonstrates how to use OIT"
category = "3D Rendering"
wasm = false

[[example]]
name = "tonemapping"
path = "examples/3d/tonemapping.rs"
Expand Down
1 change: 1 addition & 0 deletions crates/bevy_core_pipeline/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ bevy_render = { path = "../bevy_render", version = "0.15.0-dev" }
bevy_transform = { path = "../bevy_transform", version = "0.15.0-dev" }
bevy_math = { path = "../bevy_math", version = "0.15.0-dev" }
bevy_utils = { path = "../bevy_utils", version = "0.15.0-dev" }
bevy_window = { path = "../bevy_window", version = "0.15.0-dev" }

serde = { version = "1", features = ["derive"] }
bitflags = "2.3"
Expand Down
6 changes: 6 additions & 0 deletions crates/bevy_core_pipeline/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub mod fullscreen_vertex_shader;
pub mod fxaa;
pub mod motion_blur;
pub mod msaa_writeback;
pub mod oit;
pub mod post_process;
pub mod prepass;
mod skybox;
Expand Down Expand Up @@ -75,6 +76,8 @@ use crate::{
use bevy_app::{App, Plugin};
use bevy_asset::load_internal_asset;
use bevy_render::prelude::Shader;
#[cfg(not(feature = "webgl"))]
use oit::OrderIndependentTransparencyPlugin;

#[derive(Default)]
pub struct CorePipelinePlugin;
Expand Down Expand Up @@ -107,6 +110,9 @@ impl Plugin for CorePipelinePlugin {
DepthOfFieldPlugin,
SmaaPlugin,
PostProcessingPlugin,
// DownlevelFlags::FRAGMENT_WRITABLE_STORAGE is required for OIT
#[cfg(not(feature = "webgl"))]
OrderIndependentTransparencyPlugin,
));
}
}
283 changes: 283 additions & 0 deletions crates/bevy_core_pipeline/src/oit/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
//! Order Independent Transparency (OIT) for 3d rendering. See [`OrderIndependentTransparencyPlugin`] for more details.
use bevy_app::prelude::*;
use bevy_asset::{load_internal_asset, Handle};
use bevy_ecs::prelude::*;
use bevy_math::UVec2;
use bevy_render::{
camera::{Camera, ExtractedCamera},
extract_component::{ExtractComponent, ExtractComponentPlugin},
render_graph::{RenderGraphApp, ViewNodeRunner},
render_resource::{BufferUsages, BufferVec, DynamicUniformBuffer, Shader, TextureUsages},
renderer::{RenderDevice, RenderQueue},
view::Msaa,
Render, RenderApp, RenderSet,
};
use bevy_utils::{tracing::trace, HashSet, Instant};
use bevy_window::PrimaryWindow;
use resolve::{
node::{OitResolveNode, OitResolvePass},
OitResolvePlugin,
};

use crate::core_3d::{
graph::{Core3d, Node3d},
Camera3d,
};

/// Module that defines the necesasry systems to resolve the OIT buffer and render it to the screen.
pub mod resolve;

/// Shader handle for the shader that draws the transparent meshes to the OIT layers buffer.
pub const OIT_DRAW_SHADER_HANDLE: Handle<Shader> = Handle::weak_from_u128(4042527984320512);

/// Used to identify which camera will use OIT to render transparent meshes
/// and to configure OIT.
// TODO consider supporting multiple OIT techniques like WBOIT, Moment Based OIT,
// depth peeling, stochastic transparency, ray tracing etc.
// This should probably be done by adding an enum to this component.
#[derive(Component, Clone, Copy, ExtractComponent)]
pub struct OrderIndependentTransparencySettings {
/// Controls how many layers will be used to compute the blending.
/// The more layers you use the more memory it will use but it will also give better results.
/// 8 is generally recommended, going above 16 is probably not worth it in the vast majority of cases
pub layer_count: u8,
}

impl Default for OrderIndependentTransparencySettings {
fn default() -> Self {
Self { layer_count: 8 }
}
}

/// A plugin that adds support for Order Independent Transparency (OIT).
/// This can correctly render some scenes that would otherwise have artifacts due to alpha blending, but uses more memory.
///
/// To enable OIT for a camera you need to add the [`OrderIndependentTransparencySettings`] component to it.
///
/// If you want to use OIT for your custom material you need to call `oit_draw(position, color)` in your fragment shader.
/// You also need to make sure that your fragment shader doesn't output any colors.
///
/// # Implementation details
/// This implementation uses 2 passes.
///
/// The first pass writes the depth and color of all the fragments to a big buffer.
/// The buffer contains N layers for each pixel, where N can be set with [`OrderIndependentTransparencySettings::layer_count`].
/// This pass is essentially a forward pass.
///
/// The second pass is a single fullscreen triangle pass that sorts all the fragments then blends them together
/// and outputs the result to the screen.
pub struct OrderIndependentTransparencyPlugin;
impl Plugin for OrderIndependentTransparencyPlugin {
fn build(&self, app: &mut App) {
load_internal_asset!(
app,
OIT_DRAW_SHADER_HANDLE,
"oit_draw.wgsl",
Shader::from_wgsl
);

app.add_plugins((
ExtractComponentPlugin::<OrderIndependentTransparencySettings>::default(),
OitResolvePlugin,
))
.add_systems(Update, check_msaa)
.add_systems(Last, configure_depth_texture_usages);

let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};

render_app.add_systems(
Render,
prepare_oit_buffers.in_set(RenderSet::PrepareResources),
);

render_app
.add_render_graph_node::<ViewNodeRunner<OitResolveNode>>(Core3d, OitResolvePass)
.add_render_graph_edges(
Core3d,
(
Node3d::MainTransparentPass,
OitResolvePass,
Node3d::EndMainPass,
),
);
}

fn finish(&self, app: &mut App) {
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};

render_app.init_resource::<OitBuffers>();
}
}

// WARN This should only happen for cameras with the [`OrderIndependentTransparencySettings`] component
// but when multiple cameras are present on the same window
// bevy reuses the same depth texture so we need to set this on all cameras with the same render target.
fn configure_depth_texture_usages(
p: Query<Entity, With<PrimaryWindow>>,
cameras: Query<(&Camera, Has<OrderIndependentTransparencySettings>)>,
mut new_cameras: Query<(&mut Camera3d, &Camera), Added<Camera3d>>,
) {
if new_cameras.is_empty() {
return;
}

// Find all the render target that potentially uses OIT
let primary_window = p.get_single().ok();
let mut render_target_has_oit = HashSet::new();
for (camera, has_oit) in &cameras {
if has_oit {
render_target_has_oit.insert(camera.target.normalize(primary_window));
}
}

// Update the depth texture usage for cameras with a render target that has OIT
for (mut camera_3d, camera) in &mut new_cameras {
if render_target_has_oit.contains(&camera.target.normalize(primary_window)) {
let mut usages = TextureUsages::from(camera_3d.depth_texture_usages);
usages |= TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING;
camera_3d.depth_texture_usages = usages.into();
}
}
}

fn check_msaa(cameras: Query<&Msaa, With<OrderIndependentTransparencySettings>>) {
for msaa in &cameras {
if msaa.samples() > 1 {
panic!("MSAA is not supported when using OrderIndependentTransparency");
}
}
}

/// Holds the buffers that contain the data of all OIT layers.
/// We use one big buffer for the entire app. Each camaera will reuse it so it will
/// always be the size of the biggest OIT enabled camera.
#[derive(Resource)]
pub struct OitBuffers {
/// The OIT layers containing depth and color for each fragments.
/// This is essentially used as a 3d array where xy is the screen coordinate and z is
/// the list of fragments rendered with OIT.
pub layers: BufferVec<UVec2>,
/// Buffer containing the index of the last layer that was written for each fragment.
pub layer_ids: BufferVec<i32>,
pub layers_count_uniforms: DynamicUniformBuffer<i32>,
}

impl FromWorld for OitBuffers {
fn from_world(world: &mut World) -> Self {
let render_device = world.resource::<RenderDevice>();
let render_queue = world.resource::<RenderQueue>();

// initialize buffers with something so there's a valid binding

let mut layers = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
layers.set_label(Some("oit_layers"));
layers.reserve(1, render_device);
layers.write_buffer(render_device, render_queue);

let mut layer_ids = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
layer_ids.set_label(Some("oit_layer_ids"));
layer_ids.reserve(1, render_device);
layer_ids.write_buffer(render_device, render_queue);

let mut layers_count_uniforms = DynamicUniformBuffer::default();
layers_count_uniforms.set_label(Some("oit_layers_count"));

Self {
layers,
layer_ids,
layers_count_uniforms,
}
}
}

#[derive(Component)]
pub struct OitLayersCountOffset {
pub offset: u32,
}

/// This creates or resizes the oit buffers for each camera.
/// It will always create one big buffer that's as big as the biggest buffer needed.
/// Cameras with smaller viewports or less layers will simply use the big buffer and ignore the rest.
#[allow(clippy::type_complexity)]
pub fn prepare_oit_buffers(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
cameras: Query<
(&ExtractedCamera, &OrderIndependentTransparencySettings),
(
Changed<ExtractedCamera>,
Changed<OrderIndependentTransparencySettings>,
),
>,
camera_oit_uniforms: Query<(Entity, &OrderIndependentTransparencySettings)>,
mut buffers: ResMut<OitBuffers>,
) {
// Get the max buffer size for any OIT enabled camera
let mut max_layer_ids_size = usize::MIN;
let mut max_layers_size = usize::MIN;
for (camera, settings) in &cameras {
let Some(size) = camera.physical_target_size else {
continue;
};

let layer_count = settings.layer_count as usize;
let size = (size.x * size.y) as usize;
max_layer_ids_size = max_layer_ids_size.max(size);
max_layers_size = max_layers_size.max(size * layer_count);
}

// Create or update the layers buffer based on the max size
if buffers.layers.capacity() < max_layers_size {
let start = Instant::now();
buffers.layers.reserve(max_layers_size, &render_device);
let remaining = max_layers_size - buffers.layers.capacity();
for _ in 0..remaining {
buffers.layers.push(UVec2::ZERO);
}
buffers.layers.write_buffer(&render_device, &render_queue);
trace!(
"OIT layers buffer updated in {:.01}ms with total size {} MiB",
start.elapsed().as_millis(),
buffers.layers.capacity() * size_of::<UVec2>() / 1024 / 1024,
);
}

// Create or update the layer_ids buffer based on the max size
if buffers.layer_ids.capacity() < max_layer_ids_size {
let start = Instant::now();
buffers
.layer_ids
.reserve(max_layer_ids_size, &render_device);
let remaining = max_layer_ids_size - buffers.layer_ids.capacity();
for _ in 0..remaining {
buffers.layer_ids.push(0);
}
buffers
.layer_ids
.write_buffer(&render_device, &render_queue);
trace!(
"OIT layer ids buffer updated in {:.01}ms with total size {} MiB",
start.elapsed().as_millis(),
buffers.layer_ids.capacity() * size_of::<UVec2>() / 1024 / 1024,
);
}

if let Some(mut writer) = buffers.layers_count_uniforms.get_writer(
camera_oit_uniforms.iter().len(),
&render_device,
&render_queue,
) {
for (entity, settings) in &camera_oit_uniforms {
let offset = writer.write(&(settings.layer_count as i32));
commands
.entity(entity)
.insert(OitLayersCountOffset { offset });
}
}
}
44 changes: 44 additions & 0 deletions crates/bevy_core_pipeline/src/oit/oit_draw.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#define_import_path bevy_core_pipeline::oit

#import bevy_pbr::mesh_view_bindings::{view, oit_layers, oit_layer_ids, oit_layers_count}

#ifdef OIT_ENABLED
// Add the fragment to the oit buffer
fn oit_draw(position: vec4f, color: vec4f) -> vec4f {
// get the index of the current fragment relative to the screen size
let screen_index = i32(floor(position.x) + floor(position.y) * view.viewport.z);
// get the size of the buffer.
// It's always the size of the screen
let buffer_size = i32(view.viewport.z * view.viewport.w);

// gets the layer index of the current fragment
var layer_id = atomicAdd(&oit_layer_ids[screen_index], 1);
// exit early if we've reached the maximum amount of fragments per layer
if layer_id >= oit_layers_count {
// force to store the oit_layers_count to make sure we don't
// accidentally increase the index above the maximum value
atomicStore(&oit_layer_ids[screen_index], oit_layers_count);
// TODO for tail blending we should return the color here
discard;
}

// get the layer_index from the screen
let layer_index = screen_index + layer_id * buffer_size;
let rgb9e5_color = bevy_pbr::rgb9e5::vec3_to_rgb9e5_(color.rgb);
let depth_alpha = pack_24bit_depth_8bit_alpha(position.z, color.a);
oit_layers[layer_index] = vec2(rgb9e5_color, depth_alpha);
discard;
}
#endif // OIT_ENABLED

fn pack_24bit_depth_8bit_alpha(depth: f32, alpha: f32) -> u32 {
let depth_bits = u32(saturate(depth) * f32(0xFFFFFFu) + 0.5);
let alpha_bits = u32(saturate(alpha) * f32(0xFFu) + 0.5);
return (depth_bits & 0xFFFFFFu) | ((alpha_bits & 0xFFu) << 24u);
}

fn unpack_24bit_depth_8bit_alpha(packed: u32) -> vec2<f32> {
let depth_bits = packed & 0xFFFFFFu;
let alpha_bits = (packed >> 24u) & 0xFFu;
return vec2(f32(depth_bits) / f32(0xFFFFFFu), f32(alpha_bits) / f32(0xFFu));
}
Loading

0 comments on commit 4bf647f

Please sign in to comment.