From f109ed3df93c908c29deb51a2fb580d9c92318f7 Mon Sep 17 00:00:00 2001
From: Tuomas Pirhonen <ty.pirhonen@tum.de>
Date: Wed, 28 Feb 2024 22:39:09 +0100
Subject: [PATCH] impl deref for dma, now pass phys addr to read/write_raw,
 update init example

---
 examples/init.rs |  33 +++++++++------
 src/lib.rs       |   6 +--
 src/memory.rs    |  47 +++++++++++++++++++--
 src/nvme.rs      | 103 +++++++++++++++++++++--------------------------
 src/queues.rs    |  11 +++--
 5 files changed, 119 insertions(+), 81 deletions(-)
diff --git a/examples/init.rs b/examples/init.rs
index 98f9168..ac010db 100644
--- a/examples/init.rs
+++ b/examples/init.rs
@@ -1,5 +1,8 @@
 use std::env;
 use std::process;
+use vroom::QUEUE_LENGTH;
+use vroom::HUGE_PAGE_SIZE;
+use vroom::memory::Dma;
 
 pub fn main() -> Result<(), Box<dyn std::error::Error>> {
     let mut args = env::args();
@@ -12,20 +15,21 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
             process::exit(1);
         }
     };
+
     let mut nvme = vroom::init(&pci_addr)?;
+    nvme.create_io_queue_pair(QUEUE_LENGTH)?;
 
     // Testing stuff
     let n = 10;
-    let n2 = 100_000;
+    let n2 = 1000;
     let blocks = 8;
+    let mut buffer: Dma<[u8; HUGE_PAGE_SIZE]> = Dma::allocate(HUGE_PAGE_SIZE, true)?;
 
     let mut read = std::time::Duration::new(0, 0);
     let mut write = std::time::Duration::new(0, 0);
-    let mut read_buf = vec![0; blocks * 512];
-
     //  let mut write_batched = std::time::Duration::new(0, 0);
     //  let mut read_batched = std::time::Duration::new(0, 0);
-    //  let mut read_bbuf = vec![0; blocks * 512];
+
     let mut rng = rand::thread_rng();
     use rand::seq::SliceRandom;
 
@@ -38,12 +42,13 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
             let rand_block = &(0..(512 * blocks))
                 .map(|_| rand::random::<u8>())
                 .collect::<Vec<_>>()[..];
-            unsafe { (*nvme.buffer.virt)[..rand_block.len()].copy_from_slice(rand_block) };
+
+            buffer[..rand_block.len()].copy_from_slice(rand_block);
 
             // write
-             let before = std::time::Instant::now();
-             nvme.write_raw(rand_block, lba + (*i * blocks as u64))?;
-             write += before.elapsed();
+            let before = std::time::Instant::now();
+            nvme.write_raw(rand_block, lba + (*i * blocks as u64), buffer.phys as u64)?;
+            write += before.elapsed();
 
             //  let before = Instant::now();
             //  nvme.batched_write(1, rand_block, lba, 256)?;
@@ -54,13 +59,17 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
             //  nvme.batched_read(1, &mut read_bbuf[..], lba, 256)?;
             //  read_batched += before.elapsed();
 
+            buffer[..rand_block.len()].fill_with(Default::default);
             let before = std::time::Instant::now();
-            nvme.read(1, &mut read_buf[..], lba + (*i * blocks as u64))?;
+            nvme.read(
+                1,
+                &buffer[..rand_block.len()],
+                lba + (*i * blocks as u64),
+                buffer.phys as u64,
+            )?;
             read += before.elapsed();
 
-            // assert_eq!(read_buf, rand_block);
-            // assert_eq!(read_buf, read_bbuf);
-
+            assert_eq!(&buffer[..rand_block.len()], rand_block);
             //  lba += blocks as u64;
         }
     }
diff --git a/src/lib.rs b/src/lib.rs
index 09ef285..a409be4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,15 +2,16 @@
 #[allow(unused)]
 mod cmd;
 #[allow(dead_code)]
-mod memory;
+pub mod memory;
 mod nvme;
 #[allow(dead_code)]
 mod pci;
 #[allow(dead_code)]
 mod queues;
 
+pub use queues::QUEUE_LENGTH;
+pub use memory::HUGE_PAGE_SIZE;
 use pci::*;
-use queues::QUEUE_LENGTH;
 use nvme::NvmeDevice;
 use std::error::Error;
 
@@ -55,7 +56,6 @@ pub fn init(pci_addr: &str) -> Result<NvmeDevice, Box<dyn Error>> {
 
     let mut nvme = NvmeDevice::init(pci_addr)?;
     nvme.identify_controller()?;
-    nvme.create_io_queue_pair(QUEUE_LENGTH)?;
     let ns = nvme.identify_namespace_list(0);
     for n in ns {
         println!("ns_id: {n}");
diff --git a/src/memory.rs b/src/memory.rs
index 6f82c37..aa763fe 100644
--- a/src/memory.rs
+++ b/src/memory.rs
@@ -1,14 +1,17 @@
 use lazy_static::lazy_static;
-use libc::{c_void, memset};
+use libc::munmap;
+use libc::c_void;
+// use std::rc::Rc;
+// use std::ptr::NonNull;
 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::error::Error;
 use std::io::{self, Read, Seek};
 use std::os::fd::{AsRawFd, RawFd};
-use std::rc::Rc;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Mutex;
 use std::{fs, mem, process, ptr};
+use std::ops::{Deref, DerefMut};
 
 // from https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt
 const X86_VA_WIDTH: u8 = 47;
@@ -28,14 +31,36 @@ lazy_static! {
 }
 
 pub struct Dma<T> {
+    // pub virt: NonNull<T>,
     pub virt: *mut T,
     pub phys: usize,
+    size: usize,
+}
+
+// should be safe
+impl<T> Deref for Dma<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        unsafe {
+            &*self.virt
+            // self.virt.as_ref()
+        }
+    }
+}
+
+impl<T> DerefMut for Dma<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        unsafe {
+            &mut *self.virt
+            // self.virt.as_mut()
+        }
+    }
 }
 
 impl<T> Dma<T> {
     /// Allocates DMA Memory on a huge page
     // TODO: vfio support?
-    #[allow(arithmetic_overflow)]
     pub fn allocate(size: usize, require_contiguous: bool) -> Result<Dma<T>, Box<dyn Error>> {
         let size = if size % HUGE_PAGE_SIZE != 0 {
             ((size >> HUGE_PAGE_BITS) + 1) << HUGE_PAGE_BITS
@@ -63,7 +88,7 @@ impl<T> Dma<T> {
                         size,
                         libc::PROT_READ | libc::PROT_WRITE,
                         libc::MAP_SHARED | libc::MAP_HUGETLB,
-                        // libc::MAP_SHARED, // cuz MAP_HUGETLB doesn't exist on macOS (for lsp lol)
+                        // libc::MAP_SHARED,
                         f.as_raw_fd(),
                         0,
                     )
@@ -72,8 +97,10 @@ impl<T> Dma<T> {
                     Err("failed to mmap huge page - are huge pages enabled and free?".into())
                 } else if unsafe { libc::mlock(ptr, size) } == 0 {
                     let memory = Dma {
+                        // virt: NonNull::new(ptr as *mut T).expect("oops"),
                         virt: ptr as *mut T,
                         phys: virt_to_phys(ptr as usize)?,
+                        size
                     };
                     Ok(memory)
                 } else {
@@ -92,6 +119,16 @@ impl<T> Dma<T> {
     }
 }
 
+// idk if required
+impl<T> Drop for Dma<T> {
+    fn drop(&mut self) {
+        unsafe {
+            // munmap(self.virt.as_ptr() as *mut c_void, self.size);
+            munmap(self.virt as *mut c_void, self.size);
+        }
+    }
+}
+
 pub struct Mempool {
     base_addr: *mut u8,
     num_entries: usize,
@@ -100,6 +137,7 @@ pub struct Mempool {
     pub(crate) free_stack: RefCell<Vec<usize>>,
 }
 
+/*
 impl Mempool {
     /// Allocates a new `Mempool`.
     ///
@@ -145,6 +183,7 @@ impl Mempool {
         Ok(pool)
     }
 }
+*/
 
 /// Translates a virtual address to its physical counterpart
 pub(crate) fn virt_to_phys(addr: usize) -> Result<usize, Box<dyn Error>> {
diff --git a/src/nvme.rs b/src/nvme.rs
index 74d8299..37041d7 100644
--- a/src/nvme.rs
+++ b/src/nvme.rs
@@ -139,9 +139,7 @@ impl NvmeDevice {
         };
 
         for i in 1..512 {
-            unsafe {
-                (*dev.prp_list.virt)[i - 1] = (dev.buffer.phys + i * 4096) as u64;
-            }
+            dev.prp_list[i - 1] = (dev.buffer.phys + i * 4096) as u64;
         }
 
         println!("CAP: 0x{:x}", dev.get_reg64(NvmeRegs64::CAP as u64));
@@ -210,7 +208,7 @@ impl NvmeDevice {
 
         println!("Dumping identify controller");
         let mut serial = String::new();
-        let data = unsafe { *self.buffer.virt };
+        let data = &self.buffer;
 
         for &b in &data[4..24] {
             if b == 0 {
@@ -256,18 +254,7 @@ impl NvmeDevice {
                 queue.get_addr(),
                 (len - 1) as u16,
             )
-        });
-        let status = comp.status >> 1;
-        if status != 0 {
-            eprintln!(
-                "Status: 0x{:x}, Status Code 0x{:x}, Status Code Type: 0x{:x}",
-                status,
-                status & 0xFF,
-                (status >> 8) & 0x7
-            );
-            return Err("Requesting i/o completion queue failed".into());
-        }
-
+        })?;
         self.comp_queues.push(queue);
 
         println!("Requesting i/o submission queue");
@@ -281,20 +268,9 @@ impl NvmeDevice {
                 (len - 1) as u16,
                 cq_id as u16,
             )
-        });
-        let status = comp.status >> 1;
-        if status != 0 {
-            eprintln!(
-                "Status: 0x{:x}, Status Code 0x{:x}, Status Code Type: 0x{:x}",
-                status,
-                status & 0xFF,
-                (status >> 8) & 0x7
-            );
-            return Err("Requesting i/o submission queue failed".into());
-        }
+        })?;
 
         self.sub_queues.push(queue);
-
         Ok(())
     }
 
@@ -305,6 +281,7 @@ impl NvmeDevice {
 
         // TODO: idk bout this/don't hardcode len
         let data: &[u32] =
+            // unsafe { std::slice::from_raw_parts(self.buffer.virt.as_ptr() as *const u32, 1024) };
             unsafe { std::slice::from_raw_parts(self.buffer.virt as *const u32, 1024) };
 
         data.iter()
@@ -346,41 +323,39 @@ impl NvmeDevice {
         namespace
     }
 
-    pub fn write_string(&mut self, data: String, lba: u64) -> Result<(), Box<dyn Error>> {
-        self.write_raw(data.as_bytes(), lba)
-    }
-
-    pub fn write_raw(&mut self, data: &[u8], mut lba: u64) -> Result<(), Box<dyn Error>> {
+    // TODO: swap data with len?
+    pub fn write_raw(&mut self, data: &[u8], mut lba: u64, mut addr: u64) -> Result<(), Box<dyn Error>> {
         let ns = *self.namespaces.get(&1).unwrap();
-        // println!("data len: {}", data.len());
-
-        // for chunk in data.chunks(HUGE_PAGE_SIZE) {
-        for chunk in data.chunks(128 * 4096) {
-            // unsafe {
-            //     (*self.buffer.virt)[..chunk.len()].copy_from_slice(chunk);
-            // }
-            let blocks = (chunk.len() + ns.block_size as usize - 1) / ns.block_size as usize;
-            self.namespace_io(&ns, blocks as u64, lba, true)?;
-            lba += blocks as u64;
+
+        // for chunk in data.chunks(128 * 4096) {
+        for chunk in data.chunks(2 * 4096) {
+            let blocks = (chunk.len() as u64 + ns.block_size - 1) / ns.block_size;
+            self.namespace_io(&ns, blocks as u64, lba, addr, true)?;
+
+            addr += blocks * ns.block_size;
+            lba += blocks;
         }
 
         Ok(())
     }
 
+    // TODO: swap data with len?
     pub fn read(
         &mut self,
         ns_id: u32,
-        dest: &mut [u8],
+        dest: &[u8],
         mut lba: u64,
+        mut addr: u64,
     ) -> Result<(), Box<dyn Error>> {
         let ns = *self.namespaces.get(&ns_id).unwrap();
 
-        // for chunk in dest.chunks_mut(HUGE_PAGE_SIZE) {
-        for chunk in dest.chunks_mut(128 * 4096) {
-            let blocks = (chunk.len() + ns.block_size as usize - 1) / ns.block_size as usize;
-            self.namespace_io(&ns, blocks as u64, lba, false)?;
-            // chunk.copy_from_slice(&unsafe { (*self.buffer.virt) }[..chunk.len()]);
-            lba += blocks as u64;
+        // for chunk in dest.chunks(128 * 4096) {
+        for chunk in dest.chunks(2 * 4096) {
+            let blocks = (chunk.len() as u64 + ns.block_size - 1) / ns.block_size;
+            self.namespace_io(&ns, blocks as u64, lba, addr, false)?;
+
+            addr += blocks * ns.block_size;
+            lba += blocks;
         }
         Ok(())
     }
@@ -524,6 +499,7 @@ impl NvmeDevice {
         ns: &NvmeNamespace,
         blocks: u64,
         lba: u64,
+        addr: u64,
         write: bool,
     ) -> Result<(), Box<dyn Error>> {
         assert!(blocks > 0);
@@ -536,9 +512,12 @@ impl NvmeDevice {
         let ptr1 = if bytes <= 4096 {
             0
         } else if bytes <= 8192 {
-            self.buffer.phys as u64 + 4096 // self.page_size
+            // self.buffer.phys as u64 + 4096 // self.page_size
+            addr + 4096 // self.page_size
         } else {
-            self.prp_list.phys as u64
+            // self.prp_list.phys as u64
+            eprintln!("tough luck");
+            addr + 4096
         };
 
         let entry = if write {
@@ -547,7 +526,8 @@ impl NvmeDevice {
                 ns.id,
                 lba,
                 blocks as u16 - 1,
-                self.buffer.phys as u64,
+                // self.buffer.phys as u64,
+                addr as u64,
                 ptr1,
             )
         } else {
@@ -556,7 +536,8 @@ impl NvmeDevice {
                 ns.id,
                 lba,
                 blocks as u16 - 1,
-                self.buffer.phys as u64,
+                // self.buffer.phys as u64,
+                addr as u64,
                 ptr1,
             )
         };
@@ -573,14 +554,24 @@ impl NvmeDevice {
     pub fn submit_and_complete_admin<F: FnOnce(u16, usize) -> NvmeCommand>(
         &mut self,
         cmd_init: F,
-    ) -> NvmeCompletion {
+    ) -> Result<NvmeCompletion, Box<dyn Error>> {
         let cid = self.admin_sq.tail;
         let tail = self.admin_sq.submit(cmd_init(cid as u16, self.buffer.phys));
         self.write_reg_idx(NvmeArrayRegs::SQyTDBL, 0, tail as u32);
 
         let (head, entry, _) = self.admin_cq.complete_spin();
         self.write_reg_idx(NvmeArrayRegs::CQyHDBL, 0, head as u32);
-        entry
+        let status = entry.status >> 1;
+        if status != 0 {
+            eprintln!(
+                "Status: 0x{:x}, Status Code 0x{:x}, Status Code Type: 0x{:x}",
+                status,
+                status & 0xFF,
+                (status >> 8) & 0x7
+            );
+            return Err("Requesting i/o completion queue failed".into());
+        }
+        Ok(entry)
     }
 
     /// Sets Queue `qid` Tail Doorbell to `val`
diff --git a/src/queues.rs b/src/queues.rs
index 37e78eb..5deddd5 100644
--- a/src/queues.rs
+++ b/src/queues.rs
@@ -22,6 +22,7 @@ pub struct NvmeCompletion {
     pub status: u16,
 }
 
+/// maximum amount of submission entries on a 2MiB huge page
 pub const QUEUE_LENGTH: usize = 1024;
 
 /// Submission queue
@@ -62,9 +63,8 @@ impl NvmeSubQueue {
     #[inline(always)]
     pub fn submit(&mut self, entry: NvmeCommand) -> usize {
         // println!("SUBMISSION ENTRY: {:?}", entry);
-        unsafe {
-            (*self.commands.virt)[self.tail] = entry;
-        }
+        self.commands[self.tail] = entry;
+
         self.tail = (self.tail + 1) % self.len;
         self.tail
     }
@@ -94,7 +94,7 @@ impl NvmeCompQueue {
     }
 
     pub fn complete(&mut self) -> Option<(usize, NvmeCompletion, usize)> {
-        let entry: NvmeCompletion = unsafe { (*self.commands.virt)[self.head] };
+        let entry: NvmeCompletion = self.commands[self.head];
 
         if ((entry.status & 1) == 1) == self.phase {
             let prev = self.head;
@@ -125,9 +125,8 @@ impl NvmeCompQueue {
         loop {
             if let Some(val) = self.complete() {
                 return val;
-            } else {
-                super::pause();
             }
+            super::pause();
         }
     }