Add Order Independent Transparency (#14876)

# Objective - Alpha blending can easily fail in many situations and requires sorting on the cpu ## Solution - Implement order independent transparency (OIT) as an alternative to alpha blending - The implementation uses 2 passes - The first pass records all the fragments colors and position to a buffer that is the size of N layers * the render target resolution. - The second pass sorts the fragments, blends them and draws them to the screen. It also currently does manual depth testing because early-z fails in too many cases in the first pass. ## Testing - We've been using this implementation at foresight in production for many months now and we haven't had any issues related to OIT. --- ## Showcase ![image](https://github.com/user-attachments/assets/157f3e32-adaf-4782-b25b-c10313b9bc43) ![image](https://github.com/user-attachments/assets/bef23258-0c22-4b67-a0b8-48a9f571c44f) ## Future work - Add an example showing how to use OIT for a custom material - Next step would be to implement a per-pixel linked list to reduce memory use - I'd also like to investigate using a BinnedRenderPhase instead of a SortedRenderPhase. If it works, it would make the transparent pass significantly faster. --------- Co-authored-by: Kristoffer Søholm <[email protected]> Co-authored-by: JMS55 <[email protected]> Co-authored-by: Charlotte McElwain <[email protected]>
bevyengine · Oct 7, 2024 · 4bf647f · 4bf647f
1 parent e7b83ac
commit 4bf647f
Show file tree

Hide file tree

Showing 15 changed files with 1,090 additions and 26 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -973,6 +973,17 @@ description = "Demonstrates per-pixel motion blur"
 category = "3D Rendering"
 wasm = false
 
+[[example]]
+name = "order_independent_transparency"
+path = "examples/3d/order_independent_transparency.rs"
+doc-scrape-examples = true
+
+[package.metadata.example.order_independent_transparency]
+name = "Order Independent Transparency"
+description = "Demonstrates how to use OIT"
+category = "3D Rendering"
+wasm = false
+
 [[example]]
 name = "tonemapping"
 path = "examples/3d/tonemapping.rs"

diff --git a/crates/bevy_core_pipeline/Cargo.toml b/crates/bevy_core_pipeline/Cargo.toml
@@ -34,6 +34,7 @@ bevy_render = { path = "../bevy_render", version = "0.15.0-dev" }
 bevy_transform = { path = "../bevy_transform", version = "0.15.0-dev" }
 bevy_math = { path = "../bevy_math", version = "0.15.0-dev" }
 bevy_utils = { path = "../bevy_utils", version = "0.15.0-dev" }
+bevy_window = { path = "../bevy_window", version = "0.15.0-dev" }
 
 serde = { version = "1", features = ["derive"] }
 bitflags = "2.3"

diff --git a/crates/bevy_core_pipeline/src/lib.rs b/crates/bevy_core_pipeline/src/lib.rs
@@ -19,6 +19,7 @@ pub mod fullscreen_vertex_shader;
 pub mod fxaa;
 pub mod motion_blur;
 pub mod msaa_writeback;
+pub mod oit;
 pub mod post_process;
 pub mod prepass;
 mod skybox;
@@ -75,6 +76,8 @@ use crate::{
 use bevy_app::{App, Plugin};
 use bevy_asset::load_internal_asset;
 use bevy_render::prelude::Shader;
+#[cfg(not(feature = "webgl"))]
+use oit::OrderIndependentTransparencyPlugin;
 
 #[derive(Default)]
 pub struct CorePipelinePlugin;
@@ -107,6 +110,9 @@ impl Plugin for CorePipelinePlugin {
                 DepthOfFieldPlugin,
                 SmaaPlugin,
                 PostProcessingPlugin,
+                // DownlevelFlags::FRAGMENT_WRITABLE_STORAGE is required for OIT
+                #[cfg(not(feature = "webgl"))]
+                OrderIndependentTransparencyPlugin,
             ));
     }
 }
diff --git a/crates/bevy_core_pipeline/src/oit/mod.rs b/crates/bevy_core_pipeline/src/oit/mod.rs
@@ -0,0 +1,283 @@
+//! Order Independent Transparency (OIT) for 3d rendering. See [`OrderIndependentTransparencyPlugin`] for more details.
+
+use bevy_app::prelude::*;
+use bevy_asset::{load_internal_asset, Handle};
+use bevy_ecs::prelude::*;
+use bevy_math::UVec2;
+use bevy_render::{
+    camera::{Camera, ExtractedCamera},
+    extract_component::{ExtractComponent, ExtractComponentPlugin},
+    render_graph::{RenderGraphApp, ViewNodeRunner},
+    render_resource::{BufferUsages, BufferVec, DynamicUniformBuffer, Shader, TextureUsages},
+    renderer::{RenderDevice, RenderQueue},
+    view::Msaa,
+    Render, RenderApp, RenderSet,
+};
+use bevy_utils::{tracing::trace, HashSet, Instant};
+use bevy_window::PrimaryWindow;
+use resolve::{
+    node::{OitResolveNode, OitResolvePass},
+    OitResolvePlugin,
+};
+
+use crate::core_3d::{
+    graph::{Core3d, Node3d},
+    Camera3d,
+};
+
+/// Module that defines the necesasry systems to resolve the OIT buffer and render it to the screen.
+pub mod resolve;
+
+/// Shader handle for the shader that draws the transparent meshes to the OIT layers buffer.
+pub const OIT_DRAW_SHADER_HANDLE: Handle<Shader> = Handle::weak_from_u128(4042527984320512);
+
+/// Used to identify which camera will use OIT to render transparent meshes
+/// and to configure OIT.
+// TODO consider supporting multiple OIT techniques like WBOIT, Moment Based OIT,
+// depth peeling, stochastic transparency, ray tracing etc.
+// This should probably be done by adding an enum to this component.
+#[derive(Component, Clone, Copy, ExtractComponent)]
+pub struct OrderIndependentTransparencySettings {
+    /// Controls how many layers will be used to compute the blending.
+    /// The more layers you use the more memory it will use but it will also give better results.
+    /// 8 is generally recommended, going above 16 is probably not worth it in the vast majority of cases
+    pub layer_count: u8,
+}
+
+impl Default for OrderIndependentTransparencySettings {
+    fn default() -> Self {
+        Self { layer_count: 8 }
+    }
+}
+
+/// A plugin that adds support for Order Independent Transparency (OIT).
+/// This can correctly render some scenes that would otherwise have artifacts due to alpha blending, but uses more memory.
+///
+/// To enable OIT for a camera you need to add the [`OrderIndependentTransparencySettings`] component to it.
+///
+/// If you want to use OIT for your custom material you need to call `oit_draw(position, color)` in your fragment shader.
+/// You also need to make sure that your fragment shader doesn't output any colors.
+///
+/// # Implementation details
+/// This implementation uses 2 passes.
+///
+/// The first pass writes the depth and color of all the fragments to a big buffer.
+/// The buffer contains N layers for each pixel, where N can be set with [`OrderIndependentTransparencySettings::layer_count`].
+/// This pass is essentially a forward pass.
+///
+/// The second pass is a single fullscreen triangle pass that sorts all the fragments then blends them together
+/// and outputs the result to the screen.
+pub struct OrderIndependentTransparencyPlugin;
+impl Plugin for OrderIndependentTransparencyPlugin {
+    fn build(&self, app: &mut App) {
+        load_internal_asset!(
+            app,
+            OIT_DRAW_SHADER_HANDLE,
+            "oit_draw.wgsl",
+            Shader::from_wgsl
+        );
+
+        app.add_plugins((
+            ExtractComponentPlugin::<OrderIndependentTransparencySettings>::default(),
+            OitResolvePlugin,
+        ))
+        .add_systems(Update, check_msaa)
+        .add_systems(Last, configure_depth_texture_usages);
+
+        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
+            return;
+        };
+
+        render_app.add_systems(
+            Render,
+            prepare_oit_buffers.in_set(RenderSet::PrepareResources),
+        );
+
+        render_app
+            .add_render_graph_node::<ViewNodeRunner<OitResolveNode>>(Core3d, OitResolvePass)
+            .add_render_graph_edges(
+                Core3d,
+                (
+                    Node3d::MainTransparentPass,
+                    OitResolvePass,
+                    Node3d::EndMainPass,
+                ),
+            );
+    }
+
+    fn finish(&self, app: &mut App) {
+        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
+            return;
+        };
+
+        render_app.init_resource::<OitBuffers>();
+    }
+}
+
+// WARN This should only happen for cameras with the [`OrderIndependentTransparencySettings`] component
+// but when multiple cameras are present on the same window
+// bevy reuses the same depth texture so we need to set this on all cameras with the same render target.
+fn configure_depth_texture_usages(
+    p: Query<Entity, With<PrimaryWindow>>,
+    cameras: Query<(&Camera, Has<OrderIndependentTransparencySettings>)>,
+    mut new_cameras: Query<(&mut Camera3d, &Camera), Added<Camera3d>>,
+) {
+    if new_cameras.is_empty() {
+        return;
+    }
+
+    // Find all the render target that potentially uses OIT
+    let primary_window = p.get_single().ok();
+    let mut render_target_has_oit = HashSet::new();
+    for (camera, has_oit) in &cameras {
+        if has_oit {
+            render_target_has_oit.insert(camera.target.normalize(primary_window));
+        }
+    }
+
+    // Update the depth texture usage for cameras with a render target that has OIT
+    for (mut camera_3d, camera) in &mut new_cameras {
+        if render_target_has_oit.contains(&camera.target.normalize(primary_window)) {
+            let mut usages = TextureUsages::from(camera_3d.depth_texture_usages);
+            usages |= TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING;
+            camera_3d.depth_texture_usages = usages.into();
+        }
+    }
+}
+
+fn check_msaa(cameras: Query<&Msaa, With<OrderIndependentTransparencySettings>>) {
+    for msaa in &cameras {
+        if msaa.samples() > 1 {
+            panic!("MSAA is not supported when using OrderIndependentTransparency");
+        }
+    }
+}
+
+/// Holds the buffers that contain the data of all OIT layers.
+/// We use one big buffer for the entire app. Each camaera will reuse it so it will
+/// always be the size of the biggest OIT enabled camera.
+#[derive(Resource)]
+pub struct OitBuffers {
+    /// The OIT layers containing depth and color for each fragments.
+    /// This is essentially used as a 3d array where xy is the screen coordinate and z is
+    /// the list of fragments rendered with OIT.
+    pub layers: BufferVec<UVec2>,
+    /// Buffer containing the index of the last layer that was written for each fragment.
+    pub layer_ids: BufferVec<i32>,
+    pub layers_count_uniforms: DynamicUniformBuffer<i32>,
+}
+
+impl FromWorld for OitBuffers {
+    fn from_world(world: &mut World) -> Self {
+        let render_device = world.resource::<RenderDevice>();
+        let render_queue = world.resource::<RenderQueue>();
+
+        // initialize buffers with something so there's a valid binding
+
+        let mut layers = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
+        layers.set_label(Some("oit_layers"));
+        layers.reserve(1, render_device);
+        layers.write_buffer(render_device, render_queue);
+
+        let mut layer_ids = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
+        layer_ids.set_label(Some("oit_layer_ids"));
+        layer_ids.reserve(1, render_device);
+        layer_ids.write_buffer(render_device, render_queue);
+
+        let mut layers_count_uniforms = DynamicUniformBuffer::default();
+        layers_count_uniforms.set_label(Some("oit_layers_count"));
+
+        Self {
+            layers,
+            layer_ids,
+            layers_count_uniforms,
+        }
+    }
+}
+
+#[derive(Component)]
+pub struct OitLayersCountOffset {
+    pub offset: u32,
+}
+
+/// This creates or resizes the oit buffers for each camera.
+/// It will always create one big buffer that's as big as the biggest buffer needed.
+/// Cameras with smaller viewports or less layers will simply use the big buffer and ignore the rest.
+#[allow(clippy::type_complexity)]
+pub fn prepare_oit_buffers(
+    mut commands: Commands,
+    render_device: Res<RenderDevice>,
+    render_queue: Res<RenderQueue>,
+    cameras: Query<
+        (&ExtractedCamera, &OrderIndependentTransparencySettings),
+        (
+            Changed<ExtractedCamera>,
+            Changed<OrderIndependentTransparencySettings>,
+        ),
+    >,
+    camera_oit_uniforms: Query<(Entity, &OrderIndependentTransparencySettings)>,
+    mut buffers: ResMut<OitBuffers>,
+) {
+    // Get the max buffer size for any OIT enabled camera
+    let mut max_layer_ids_size = usize::MIN;
+    let mut max_layers_size = usize::MIN;
+    for (camera, settings) in &cameras {
+        let Some(size) = camera.physical_target_size else {
+            continue;
+        };
+
+        let layer_count = settings.layer_count as usize;
+        let size = (size.x * size.y) as usize;
+        max_layer_ids_size = max_layer_ids_size.max(size);
+        max_layers_size = max_layers_size.max(size * layer_count);
+    }
+
+    // Create or update the layers buffer based on the max size
+    if buffers.layers.capacity() < max_layers_size {
+        let start = Instant::now();
+        buffers.layers.reserve(max_layers_size, &render_device);
+        let remaining = max_layers_size - buffers.layers.capacity();
+        for _ in 0..remaining {
+            buffers.layers.push(UVec2::ZERO);
+        }
+        buffers.layers.write_buffer(&render_device, &render_queue);
+        trace!(
+            "OIT layers buffer updated in {:.01}ms with total size {} MiB",
+            start.elapsed().as_millis(),
+            buffers.layers.capacity() * size_of::<UVec2>() / 1024 / 1024,
+        );
+    }
+
+    // Create or update the layer_ids buffer based on the max size
+    if buffers.layer_ids.capacity() < max_layer_ids_size {
+        let start = Instant::now();
+        buffers
+            .layer_ids
+            .reserve(max_layer_ids_size, &render_device);
+        let remaining = max_layer_ids_size - buffers.layer_ids.capacity();
+        for _ in 0..remaining {
+            buffers.layer_ids.push(0);
+        }
+        buffers
+            .layer_ids
+            .write_buffer(&render_device, &render_queue);
+        trace!(
+            "OIT layer ids buffer updated in {:.01}ms with total size {} MiB",
+            start.elapsed().as_millis(),
+            buffers.layer_ids.capacity() * size_of::<UVec2>() / 1024 / 1024,
+        );
+    }
+
+    if let Some(mut writer) = buffers.layers_count_uniforms.get_writer(
+        camera_oit_uniforms.iter().len(),
+        &render_device,
+        &render_queue,
+    ) {
+        for (entity, settings) in &camera_oit_uniforms {
+            let offset = writer.write(&(settings.layer_count as i32));
+            commands
+                .entity(entity)
+                .insert(OitLayersCountOffset { offset });
+        }
+    }
+}
diff --git a/crates/bevy_core_pipeline/src/oit/oit_draw.wgsl b/crates/bevy_core_pipeline/src/oit/oit_draw.wgsl
@@ -0,0 +1,44 @@
+#define_import_path bevy_core_pipeline::oit
+
+#import bevy_pbr::mesh_view_bindings::{view, oit_layers, oit_layer_ids, oit_layers_count}
+
+#ifdef OIT_ENABLED
+// Add the fragment to the oit buffer
+fn oit_draw(position: vec4f, color: vec4f) -> vec4f {
+    // get the index of the current fragment relative to the screen size
+    let screen_index = i32(floor(position.x) + floor(position.y) * view.viewport.z);
+    // get the size of the buffer.
+    // It's always the size of the screen
+    let buffer_size = i32(view.viewport.z * view.viewport.w);
+
+    // gets the layer index of the current fragment
+    var layer_id = atomicAdd(&oit_layer_ids[screen_index], 1);
+    // exit early if we've reached the maximum amount of fragments per layer
+    if layer_id >= oit_layers_count {
+        // force to store the oit_layers_count to make sure we don't
+        // accidentally increase the index above the maximum value
+        atomicStore(&oit_layer_ids[screen_index], oit_layers_count);
+        // TODO for tail blending we should return the color here
+        discard;
+    }
+
+    // get the layer_index from the screen
+    let layer_index = screen_index + layer_id * buffer_size;
+    let rgb9e5_color = bevy_pbr::rgb9e5::vec3_to_rgb9e5_(color.rgb);
+    let depth_alpha = pack_24bit_depth_8bit_alpha(position.z, color.a);
+    oit_layers[layer_index] = vec2(rgb9e5_color, depth_alpha);
+    discard;
+}
+#endif // OIT_ENABLED
+
+fn pack_24bit_depth_8bit_alpha(depth: f32, alpha: f32) -> u32 {
+    let depth_bits = u32(saturate(depth) * f32(0xFFFFFFu) + 0.5);
+    let alpha_bits = u32(saturate(alpha) * f32(0xFFu) + 0.5);
+    return (depth_bits & 0xFFFFFFu) | ((alpha_bits & 0xFFu) << 24u);
+}
+
+fn unpack_24bit_depth_8bit_alpha(packed: u32) -> vec2<f32> {
+    let depth_bits = packed & 0xFFFFFFu;
+    let alpha_bits = (packed >> 24u) & 0xFFu;
+    return vec2(f32(depth_bits) / f32(0xFFFFFFu), f32(alpha_bits) / f32(0xFFu));
+}