diff --git a/examples/io_test.rs b/examples/io_test.rs new file mode 100644 index 0000000..adbaf31 --- /dev/null +++ b/examples/io_test.rs @@ -0,0 +1,247 @@ +use rand::{thread_rng, Rng}; +use std::error::Error; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; +use std::{env, process, thread}; +use vroom::memory::*; +use vroom::{NvmeDevice, QUEUE_LENGTH}; + +#[allow(unused_variables, unused_mut)] +pub fn main() -> Result<(), Box> { + let mut args = env::args(); + args.next(); + + let pci_addr = match args.next() { + Some(arg) => arg, + None => { + eprintln!("Usage: cargo run --example init "); + process::exit(1); + } + }; + + let duration = match args.next() { + Some(secs) => Some(Duration::from_secs(secs.parse().expect( + "Usage: cargo run --example init ", + ))), + None => None, + }; + + let mut nvme = vroom::init(&pci_addr)?; + + let nvme = qd_n(nvme, 1, 0, false, 128, duration)?; + let _ = qd_n(nvme, 1, 0, false, 256, duration)?; + + // let _ = qd1(nvme, 0, false, true, duration)?; + + Ok(()) +} + +fn qd1( + mut nvme: NvmeDevice, + n: u64, + write: bool, + random: bool, + time: Option, +) -> Result> { + let mut buffer: Dma = Dma::allocate(HUGE_PAGE_SIZE, true)?; + + let blocks = 8; + let bytes = 512 * blocks; + let ns_blocks = nvme.namespaces.get(&1).unwrap().blocks / blocks - 1; // - blocks - 1; + + let mut rng = thread_rng(); + let seq = if random { + (0..n) + .map(|_| rng.gen_range(0..ns_blocks as u64)) + .collect::>() + } else { + (0..n).map(|i| (i * 8) % ns_blocks).collect::>() + }; + + let rand_block = &(0..bytes).map(|_| rand::random::()).collect::>()[..]; + buffer[..rand_block.len()].copy_from_slice(rand_block); + + let mut total = Duration::ZERO; + + if let Some(time) = time { + let mut ios = 0; + let lba = 0; + while total < time { + let lba = if random { rng.gen_range(0..ns_blocks) } else { (lba + 1) % ns_blocks }; + + let before = Instant::now(); + if write { + nvme.write(&buffer.slice(0..bytes as usize), lba * blocks)?; + } else { + nvme.read(&buffer.slice(0..bytes as usize), lba * blocks)?; + } + let elapsed = before.elapsed(); + total += elapsed; + ios += 1; + } + println!( + "IOP: {ios}, total {} iops: {:?}", + if write { "write" } else { "read" }, + ios as f64 / total.as_secs_f64() + ); + } else { + for lba in seq { + let before = Instant::now(); + if write { + nvme.write(&buffer.slice(0..bytes as usize), lba * blocks)?; + } else { + nvme.read(&buffer.slice(0..bytes as usize), lba * blocks)?; + } + total += before.elapsed(); + } + println!( + "n: {n}, total {} iops: {:?}", + if write { "write" } else { "read" }, + n as f64 / total.as_secs_f64() + ); + } + Ok(nvme) +} + +#[allow(unused)] +fn qd_n( + nvme: NvmeDevice, + n_threads: u64, + n: u64, + write: bool, + batch_size: usize, + time: Option, +) -> Result> { + let blocks = 8; + let ns_blocks = nvme.namespaces.get(&1).unwrap().blocks / blocks; + + let nvme = Arc::new(Mutex::new(nvme)); + let mut threads = Vec::new(); + + for i in 0..n_threads { + let nvme = Arc::clone(&nvme); + let range = (0, ns_blocks); + + let handle = thread::spawn(move || -> (u64, f64) { + let mut rng = rand::thread_rng(); + let bytes = 512 * blocks as usize; + let mut total = std::time::Duration::ZERO; + let mut buffer: Dma = Dma::allocate(HUGE_PAGE_SIZE, true).unwrap(); + + let mut qpair = nvme + .lock() + .unwrap() + .create_io_queue_pair(QUEUE_LENGTH) + .unwrap(); + + let rand_block = &(0..(32 * bytes)) + .map(|_| rand::random::()) + .collect::>()[..]; + buffer[0..32 * bytes].copy_from_slice(rand_block); + + let mut ctr = 0; + if let Some(time) = time { + let mut ios = 0; + while total < time { + let lba = rng.gen_range(range.0..range.1); + let before = Instant::now(); + while let Some(_) = qpair.quick_poll() { + ctr -= 1; + ios += 1; + } + if ctr == batch_size { + qpair.complete_io(1); + ctr -= 1; + ios += 1; + } + qpair.submit_io( + &buffer.slice((ctr * bytes)..(ctr + 1) * bytes), + lba * blocks, + write, + ); + total += before.elapsed(); + ctr += 1; + } + + if ctr != 0 { + let before = Instant::now(); + qpair.complete_io(ctr); + total += before.elapsed(); + } + ios += ctr as u64; + assert!(qpair.sub_queue.is_empty()); + nvme.lock().unwrap().delete_io_queue_pair(qpair).unwrap(); + + (ios, ios as f64 / total.as_secs_f64()) + } else { + let seq = &(0..n) + .map(|_| rng.gen_range(range.0..range.1)) + .collect::>()[..]; + for &lba in seq { + let before = Instant::now(); + while let Some(_) = qpair.quick_poll() { + ctr -= 1; + } + if ctr == 32 { + qpair.complete_io(1); + ctr -= 1; + } + qpair.submit_io( + &buffer.slice((ctr * bytes)..(ctr + 1) * bytes), + lba * blocks, + write, + ); + total += before.elapsed(); + ctr += 1; + } + if ctr != 0 { + let before = Instant::now(); + qpair.complete_io(ctr); + total += before.elapsed(); + } + assert!(qpair.sub_queue.is_empty()); + nvme.lock().unwrap().delete_io_queue_pair(qpair).unwrap(); + (n, n as f64 / total.as_secs_f64()) + } + + }); + threads.push(handle); + } + + let total = threads + .into_iter() + .fold((0, 0.), |acc, thread| { + let res = thread + .join() + .expect("The thread creation or execution failed!"); + ( + acc.0 + res.0, + acc.1 + res.1, + ) + }); + println!( + "n: {}, total {} iops: {:?}", + total.0, + if write { "write" } else { "read" }, + total.1 + ); + + match Arc::try_unwrap(nvme) { + Ok(mutex) => match mutex.into_inner() { + Ok(t) => Ok(t), + Err(e) => Err(e.into()), + }, + Err(_) => Err("Arc::try_unwrap failed, not the last reference.".into()), + } +} + +fn fill_ns(nvme: &mut NvmeDevice) { + let buffer: Dma = Dma::allocate(HUGE_PAGE_SIZE, true).unwrap(); + let max_lba = nvme.namespaces.get(&1).unwrap().blocks - buffer.size as u64 / 512 - 1; + let blocks = buffer.size as u64 / 512; + let mut lba = 0; + while lba < max_lba - 512 { + nvme.write(&buffer, lba).unwrap(); + lba += blocks; + } +} diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 0000000..bf867e0 --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly diff --git a/src/cmd.rs b/src/cmd.rs index bf766ab..6aa2e6a 100644 --- a/src/cmd.rs +++ b/src/cmd.rs @@ -75,6 +75,24 @@ impl NvmeCommand { } } + pub fn delete_io_submission_queue(c_id: u16, q_id: u16) -> Self { + Self { + opcode: 0, + c_id, + cdw10: q_id as u32, + ..Default::default() + } + } + + pub fn delete_io_completion_queue(c_id: u16, q_id: u16) -> Self { + Self { + opcode: 4, + c_id, + cdw10: q_id as u32, + ..Default::default() + } + } + pub fn identify_namespace(c_id: u16, ptr: usize, ns_id: u32) -> Self { Self { opcode: 6, @@ -129,7 +147,6 @@ impl NvmeCommand { } } - #[allow(unused_variables)] pub fn get_features(c_id: u16, ptr: usize, fid: u8) -> Self { Self { opcode: 0xA, @@ -151,7 +168,7 @@ impl NvmeCommand { cdw10: lba as u32, cdw11: (lba >> 32) as u32, cdw12: blocks_1 as u32, - cdw13: 0, // TODO? + cdw13: 0, cdw14: 0, cdw15: 0, } @@ -196,6 +213,34 @@ impl NvmeCommand { } + pub(crate) fn async_event_req(c_id: u16) -> Self { + Self { + opcode: 0xC, + flags: 0, + c_id, + ns_id: 0, + _rsvd: 0, + md_ptr: 0, + d_ptr: [0, 0], + cdw10: 0, + cdw11: 0, + cdw12: 0, + cdw13: 0, + cdw14: 0, + cdw15: 0 + } + } + + pub(crate) fn get_log_page(c_id: u16, numd: u32, ptr0: u64, ptr1: u64, lid: u8, lpid: u16) -> Self { + Self { + c_id, + d_ptr: [ptr0, ptr1], + cdw10: (numd << 16) | lid as u32, + cdw11: ((lpid as u32) << 16) | numd >> 16, + ..Self::default() + } + } + // not supported by samsung pub fn write_zeroes(c_id: u16, ns_id: u32, slba: u64, nlb: u16, deac: bool) -> Self { Self { diff --git a/src/lib.rs b/src/lib.rs index 015041c..56736bb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,31 +16,6 @@ use pci::*; pub use queues::QUEUE_LENGTH; use std::error::Error; -// TODO: remove in place of std::hint::spin_loop -#[cfg(target_arch = "aarch64")] -#[inline(always)] -pub(crate) fn pause() { - unsafe { - std::arch::aarch64::__yield(); - } -} - -#[cfg(target_arch = "x86")] -#[inline(always)] -pub(crate) fn pause() { - unsafe { - std::arch::x86::_mm_pause(); - } -} - -#[cfg(target_arch = "x86_64")] -#[inline(always)] -pub(crate) fn pause() { - unsafe { - std::arch::x86_64::_mm_pause(); - } -} - pub fn init(pci_addr: &str) -> Result> { let mut vendor_file = pci_open_resource_ro(pci_addr, "vendor").expect("wrong pci address"); let mut device_file = pci_open_resource_ro(pci_addr, "device").expect("wrong pci address"); diff --git a/src/nvme.rs b/src/nvme.rs index 61f0bf8..e706575 100644 --- a/src/nvme.rs +++ b/src/nvme.rs @@ -367,7 +367,6 @@ impl NvmeDevice { } // 1 to 1 Submission/Completion Queue Mapping - // TODO: return qpair instead? pub fn create_io_queue_pair(&mut self, len: usize) -> Result> { let q_id = self.q_id; println!("Requesting i/o queue pair with id {q_id}"); @@ -403,6 +402,23 @@ impl NvmeDevice { }) } + pub fn delete_io_queue_pair(&mut self, qpair: NvmeQueuePair) -> Result<(), Box> { + println!("Deleting i/o queue pair with id {}", qpair.id); + self.submit_and_complete_admin(|c_id, _| { + NvmeCommand::delete_io_submission_queue( + c_id, + qpair.id, + ) + })?; + self.submit_and_complete_admin(|c_id, _| { + NvmeCommand::delete_io_completion_queue( + c_id, + qpair.id, + ) + })?; + Ok(()) + } + pub fn identify_namespace_list(&mut self, base: u32) -> Vec { self.submit_and_complete_admin(|c_id, addr| { NvmeCommand::identify_namespace_list(c_id, addr, base) @@ -452,13 +468,10 @@ impl NvmeDevice { namespace } - // pass prp list? pub fn write(&mut self, data: &impl DmaSlice, mut lba: u64) -> Result<(), Box> { - let ns = *self.namespaces.get(&1).unwrap(); - for chunk in data.chunks(2 * 4096) { - let blocks = (chunk.slice.len() as u64 + ns.block_size - 1) / ns.block_size; - self.namespace_io(&ns, blocks, lba, chunk.phys_addr as u64, true)?; + let blocks = (chunk.slice.len() as u64 + 512 - 1) / 512; + self.namespace_io(1, blocks, lba, chunk.phys_addr as u64, true)?; lba += blocks; } @@ -466,11 +479,10 @@ impl NvmeDevice { } pub fn read(&mut self, dest: &impl DmaSlice, mut lba: u64) -> Result<(), Box> { - let ns = *self.namespaces.get(&1).unwrap(); - + // let ns = *self.namespaces.get(&1).unwrap(); for chunk in dest.chunks(2 * 4096) { - let blocks = (chunk.slice.len() as u64 + ns.block_size - 1) / ns.block_size; - self.namespace_io(&ns, blocks, lba, chunk.phys_addr as u64, false)?; + let blocks = (chunk.slice.len() as u64 + 512 - 1) / 512; + self.namespace_io(1, blocks, lba, chunk.phys_addr as u64, false)?; lba += blocks; } Ok(()) @@ -478,12 +490,10 @@ impl NvmeDevice { pub fn write_copied(&mut self, data: &[u8], mut lba: u64) -> Result<(), Box> { let ns = *self.namespaces.get(&1).unwrap(); - - // for chunk in data.chunks(128 * 4096) { - for chunk in data.chunks(2 * 4096) { + for chunk in data.chunks(128 * 4096) { self.buffer[..chunk.len()].copy_from_slice(chunk); let blocks = (chunk.len() as u64 + ns.block_size - 1) / ns.block_size; - self.namespace_io(&ns, blocks, lba, self.buffer.phys as u64, true)?; + self.namespace_io(1, blocks, lba, self.buffer.phys as u64, true)?; lba += blocks; } @@ -497,19 +507,16 @@ impl NvmeDevice { mut lba: u64, ) -> Result<(), Box> { let ns = *self.namespaces.get(&ns_id).unwrap(); - - // for chunk in dest.chunks_mut(128 * 4096) { - for chunk in dest.chunks_mut(2 * 4096) { + for chunk in dest.chunks_mut(128 * 4096) { let blocks = (chunk.len() as u64 + ns.block_size - 1) / ns.block_size; - self.namespace_io(&ns, blocks, lba, self.buffer.phys as u64, false)?; - + self.namespace_io(1, blocks, lba, self.buffer.phys as u64, false)?; lba += blocks; chunk.copy_from_slice(&self.buffer[..chunk.len()]); } Ok(()) } - pub fn submit_io( + fn submit_io( &mut self, ns: &NvmeNamespace, addr: u64, @@ -527,8 +534,8 @@ impl NvmeDevice { } else if bytes <= 8192 { addr + 4096 // self.page_size } else { - // TODOo: idk if correct - let offset = (addr - self.prp_list.phys as u64) / 8; + // idk if this works + let offset = (addr - self.buffer.phys as u64) / 8; self.prp_list.phys as u64 + offset }; @@ -554,7 +561,7 @@ impl NvmeDevice { self.io_sq.submit_checked(entry) } - pub fn complete_io(&mut self, step: u64) -> Option { + fn complete_io(&mut self, step: u64) -> Option { let q_id = 1; let (tail, c_entry, _) = self.io_cq.complete_n(step as usize); @@ -654,9 +661,10 @@ impl NvmeDevice { Ok(()) } - pub fn namespace_io( + #[inline(always)] + fn namespace_io( &mut self, - ns: &NvmeNamespace, + ns_id: u32, blocks: u64, lba: u64, addr: u64, @@ -667,22 +675,20 @@ impl NvmeDevice { let q_id = 1; - let bytes = blocks * ns.block_size; + let bytes = blocks * 512; let ptr1 = if bytes <= 4096 { 0 } else if bytes <= 8192 { // self.buffer.phys as u64 + 4096 // self.page_size addr + 4096 // self.page_size } else { - // self.prp_list.phys as u64 - eprintln!("tough luck"); - addr + 4096 + self.prp_list.phys as u64 }; let entry = if write { NvmeCommand::io_write( self.io_sq.tail as u16, - ns.id, + ns_id, lba, blocks as u16 - 1, addr, @@ -691,7 +697,7 @@ impl NvmeDevice { } else { NvmeCommand::io_read( self.io_sq.tail as u16, - ns.id, + ns_id, lba, blocks as u16 - 1, addr, @@ -707,7 +713,7 @@ impl NvmeDevice { Ok(()) } - pub fn submit_and_complete_admin NvmeCommand>( + fn submit_and_complete_admin NvmeCommand>( &mut self, cmd_init: F, ) -> Result> { diff --git a/src/queues.rs b/src/queues.rs index 722757f..6d45cd6 100644 --- a/src/queues.rs +++ b/src/queues.rs @@ -115,6 +115,7 @@ impl NvmeCompQueue { } /// + #[inline(always)] pub fn complete_n(&mut self, commands: usize) -> (usize, NvmeCompletion, usize) { let prev = self.head; self.head += commands - 1; @@ -127,6 +128,7 @@ impl NvmeCompQueue { (head, entry, prev) } + #[inline(always)] pub fn complete_spin(&mut self) -> (usize, NvmeCompletion, usize) { loop { if let Some(val) = self.complete() {