Skip to content

Commit

Permalink
Merge pull request #2 from earth-mover/seba/manifest
Browse files Browse the repository at this point in the history
Manifest arrow datastructure
  • Loading branch information
paraseba authored Aug 8, 2024
2 parents 5fe3e75 + 398e6f7 commit f5d3876
Show file tree
Hide file tree
Showing 6 changed files with 564 additions and 14 deletions.
149 changes: 148 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ debug = true

[dev-dependencies]
pretty_assertions = "1.4.0"
proptest = "1.0.0"
2 changes: 1 addition & 1 deletion Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ check-deps *args='':
cargo deny --all-features check {{args}}

# run all checks that CI actions will run
pre-commit: (compile-tests "--locked") build (format "--check") lint test check-deps
pre-commit $RUSTFLAGS="-D warnings -W unreachable-pub -W bare-trait-objects": (compile-tests "--locked") build (format "--check") lint test check-deps
7 changes: 7 additions & 0 deletions proptest-regressions/manifest.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc c056e4188055e9eacbeebae5e0a1caf8ff2fde6a877784ec42c9c32af4b4a4ff # shrinks to v = []
42 changes: 30 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@
/// - A `Dataset` concrete type, implements the high level interface, using an Storage
/// implementation and the data tables.
pub mod dataset;
pub mod manifest;
pub mod storage;
pub mod structure;

use async_trait::async_trait;
use manifest::ManifestsTable;
use std::{
collections::{HashMap, HashSet},
fmt::Display,
Expand All @@ -35,8 +37,9 @@ use std::{
};
use structure::StructureTable;

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
/// An ND index to an element in an array.
pub type ArrayIndices = Vec<u64>;
pub struct ArrayIndices(pub Vec<u64>);

/// The shape of an array.
/// 0 is a valid shape member
Expand Down Expand Up @@ -136,9 +139,9 @@ impl TryFrom<u8> for ChunkKeyEncoding {

fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
c if { c == '/' as u8 } => Ok(ChunkKeyEncoding::Slash),
c if { c == '.' as u8 } => Ok(ChunkKeyEncoding::Dot),
c if { c == 'x' as u8 } => Ok(ChunkKeyEncoding::Default),
b'/' => Ok(ChunkKeyEncoding::Slash),
b'.' => Ok(ChunkKeyEncoding::Dot),
b'x' => Ok(ChunkKeyEncoding::Default),
_ => Err("Invalid chunk key encoding character"),
}
}
Expand All @@ -147,9 +150,9 @@ impl TryFrom<u8> for ChunkKeyEncoding {
impl From<ChunkKeyEncoding> for u8 {
fn from(value: ChunkKeyEncoding) -> Self {
match value {
ChunkKeyEncoding::Slash => '/' as u8,
ChunkKeyEncoding::Dot => '.' as u8,
ChunkKeyEncoding::Default => 'x' as u8,
ChunkKeyEncoding::Slash => b'/',
ChunkKeyEncoding::Dot => b'.',
ChunkKeyEncoding::Default => b'x',
}
}
}
Expand Down Expand Up @@ -236,7 +239,7 @@ enum UserAttributesStructure {
struct ManifestExtents(Vec<ArrayIndices>);

#[derive(Debug, Clone, PartialEq, Eq)]
struct ManifestRef {
pub struct ManifestRef {
object_id: ObjectId,
location: TableRegion,
flags: Flags,
Expand Down Expand Up @@ -276,24 +279,28 @@ pub struct NodeStructure {
node_data: NodeData,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct VirtualChunkRef {
location: String, // FIXME: better type
offset: u64,
length: u64,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ChunkRef {
id: ObjectId, // FIXME: better type
offset: u64,
length: u64,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ChunkPayload {
Inline(Vec<u8>), // FIXME: optimize copies
Virtual(VirtualChunkRef),
Ref(ChunkRef),
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ChunkInfo {
node: NodeId,
coord: ArrayIndices,
Expand All @@ -303,9 +310,6 @@ pub struct ChunkInfo {
// FIXME: this will hold the arrow file
pub struct AttributesTable();

// FIXME: this will hold the arrow file
pub struct ManifestsTable();

pub enum AddNodeError {
AlreadyExists,
}
Expand All @@ -325,7 +329,7 @@ pub enum StorageError {
/// Different implementation can cache the files differently, or not at all.
/// Implementations are free to assume files are never overwritten.
#[async_trait]
trait Storage {
pub trait Storage {
async fn fetch_structure(
&self,
id: &ObjectId,
Expand Down Expand Up @@ -367,3 +371,17 @@ pub struct Dataset {
// FIXME: issue with too many inline chunks kept in mem
set_chunks: HashMap<(Path, ArrayIndices), ChunkPayload>,
}

impl Dataset {
pub fn new(storage: Box<dyn Storage>, structure_id: ObjectId) -> Self {
Dataset {
structure_id,
storage,
new_groups: HashSet::new(),
new_arrays: HashMap::new(),
updated_arrays: HashMap::new(),
updated_attributes: HashMap::new(),
set_chunks: HashMap::new(),
}
}
}
Loading

0 comments on commit f5d3876

Please sign in to comment.