diff --git a/src/cublaslt/safe.rs b/src/cublaslt/safe.rs
index d1de3ca..df0b184 100644
--- a/src/cublaslt/safe.rs
+++ b/src/cublaslt/safe.rs
@@ -12,7 +12,7 @@ use std::sync::Arc;
 ///
 /// 1. Create with [CudaBlasLT::new()]
 /// 2. Execute matmul kernel with matmul. f32 is supported. f16 and bf16 are supported
-/// if feature `half` is activated
+///    if feature `half` is activated
 ///
 /// Note: This maintains a instance of [`Arc<CudaDevice>`], so will prevent the device
 /// from being dropped. Kernels will be launched on the device device default stream.
diff --git a/src/driver/mod.rs b/src/driver/mod.rs
index a02ba4c..8bf6847 100644
--- a/src/driver/mod.rs
+++ b/src/driver/mod.rs
@@ -11,7 +11,7 @@
 //! ```
 //!
 //! 2. Allocate device memory with host data with [CudaDevice::htod_copy()], [CudaDevice::alloc_zeros()],
-//! or [CudaDevice::htod_sync_copy()].
+//!    or [CudaDevice::htod_sync_copy()].
 //!
 //! You can also copy data to CudaSlice using [CudaDevice::htod_sync_copy_into()]
 //!
@@ -24,7 +24,7 @@
 //! ```
 //!
 //! 3. Transfer to host memory with [CudaDevice::sync_reclaim()], [CudaDevice::dtoh_sync_copy()],
-//! or [CudaDevice::dtoh_sync_copy_into()]
+//!    or [CudaDevice::dtoh_sync_copy_into()]
 //!
 //! ```rust
 //! # use cudarc::driver::*;
diff --git a/src/driver/result.rs b/src/driver/result.rs
index d55149c..49fcf25 100644
--- a/src/driver/result.rs
+++ b/src/driver/result.rs
@@ -981,10 +981,10 @@ pub mod event {
 ///
 /// 1. The cuda function must be a valid handle returned from a non-unloaded module.
 /// 2. This is asynchronous, so the results of calling this function happen
-/// at a later point after this function returns.
+///    at a later point after this function returns.
 /// 3. All parameters used for this kernel should have been allocated by stream (I think?)
 /// 4. The cuda kernel has mutable access to every parameter, that means every parameter
-/// can change at a later point after callign this function. *Even non-mutable references*.
+///    can change at a later point after callign this function. *Even non-mutable references*.
 #[inline]
 pub unsafe fn launch_kernel(
     f: sys::CUfunction,
diff --git a/src/driver/safe/core.rs b/src/driver/safe/core.rs
index 919d36f..1146373 100644
--- a/src/driver/safe/core.rs
+++ b/src/driver/safe/core.rs
@@ -29,9 +29,9 @@ use std::{collections::BTreeMap, marker::Unpin, pin::Pin, sync::Arc, vec::Vec};
 /// # Safety
 /// 1. impl [Drop] to call all the corresponding resource cleanup methods
 /// 2. Doesn't impl clone, so you can't have multiple device pointers
-/// hanging around.
+///    hanging around.
 /// 3. Any allocations enforce that self is an [Arc], meaning no allocation
-/// can outlive the [CudaDevice]
+///    can outlive the [CudaDevice]
 #[derive(Debug)]
 pub struct CudaDevice {
     pub(crate) cu_device: sys::CUdevice,
diff --git a/src/driver/safe/launch.rs b/src/driver/safe/launch.rs
index b6d1343..1a488ba 100644
--- a/src/driver/safe/launch.rs
+++ b/src/driver/safe/launch.rs
@@ -157,11 +157,11 @@ pub unsafe trait LaunchAsync<Params> {
     ///
     /// 1. `params` can be changed regardless of `&` or `&mut` usage.
     /// 2. `params` will be changed at some later point after the
-    /// function returns because the kernel is executed async.
+    ///    function returns because the kernel is executed async.
     /// 3. There are no guaruntees that the `params`
-    /// are the correct number/types/order for `func`.
+    ///    are the correct number/types/order for `func`.
     /// 4. Specifying the wrong values for [LaunchConfig] can result
-    /// in accessing/modifying values past memory limits.
+    ///    in accessing/modifying values past memory limits.
     ///
     /// ## Asynchronous mutation
     ///