Skip to content

Commit

Permalink
Add consolidated metadata support
Browse files Browse the repository at this point in the history
  • Loading branch information
LDeakin committed Oct 3, 2024
1 parent 10d3b80 commit fc50898
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 4 deletions.
25 changes: 25 additions & 0 deletions zarrs/src/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use std::sync::Arc;

use derive_more::Display;
use thiserror::Error;
use zarrs_metadata::v3::group::ConsolidatedMetadata;

use crate::{
config::{
Expand Down Expand Up @@ -150,6 +151,30 @@ impl<TStorage: ?Sized> Group<TStorage> {
(GM::V2(metadata), V::V3) => GM::V3(group_metadata_v2_to_v3(&metadata)),
}
}

/// Get the consolidated metadata. Returns [`None`] if `consolidated_metadata` is absent.
///
/// Consolidated metadata is not currently supported for Zarr V2 groups.
#[must_use]
pub fn consolidated_metadata(&self) -> Option<&ConsolidatedMetadata> {
if let GroupMetadata::V3(group_metadata) = &self.metadata {
group_metadata.consolidated_metadata.as_ref()
} else {
None

Check warning on line 163 in zarrs/src/group.rs

View check run for this annotation

Codecov / codecov/patch

zarrs/src/group.rs#L163

Added line #L163 was not covered by tests
}
}

/// Set the consolidated metadata.
///
/// Consolidated metadata is not currently supported for Zarr V2 groups, and this function is a no-op.
pub fn set_consolidated_metadata(
&mut self,
consolidated_metadata: Option<ConsolidatedMetadata>,
) {
if let GroupMetadata::V3(group_metadata) = &mut self.metadata {
group_metadata.consolidated_metadata = consolidated_metadata;
}
}
}

impl<TStorage: ?Sized + ReadableStorageTraits> Group<TStorage> {
Expand Down
40 changes: 39 additions & 1 deletion zarrs/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ pub use key::{
mod node_async;
#[cfg(feature = "async")]
pub use node_async::{async_get_child_nodes, async_node_exists, async_node_exists_listable};
use zarrs_metadata::v3::group::ConsolidatedMetadataMetadata;

use std::sync::Arc;
use std::{collections::HashMap, sync::Arc};

pub use crate::metadata::NodeMetadata;
use thiserror::Error;
Expand Down Expand Up @@ -389,6 +390,43 @@ impl Node {
update_tree(&mut string, &self.children, 1);
string
}

/// Consolidate metadata. Returns [`None`] for an array.
///
/// [`ConsolidatedMetadataMetadata`] can be converted into [`ConsolidatedMetadata`](crate::metadata::v3::group::ConsolidatedMetadata) in [`GroupMetadataV3`](crate::metadata::v3::group::GroupMetadataV3).
#[must_use]
#[allow(clippy::items_after_statements)]
pub fn consolidate_metadata(&self) -> Option<ConsolidatedMetadataMetadata> {
if let NodeMetadata::Array(_) = self.metadata {
// Arrays cannot have consolidated metadata
return None;

Check warning on line 402 in zarrs/src/node.rs

View check run for this annotation

Codecov / codecov/patch

zarrs/src/node.rs#L402

Added line #L402 was not covered by tests
}

fn update_consolidated_metadata(
node_path: &str,
consolidated_metadata: &mut ConsolidatedMetadataMetadata,
children: &[Node],
) {
for child in children {
let relative_path = child
.path()
.as_str()
.strip_prefix(node_path)
.expect("child path should always include the node path");
let relative_path = relative_path.strip_prefix('/').unwrap_or(relative_path);
let relative_path = relative_path.to_string();
consolidated_metadata.insert(relative_path, child.metadata.clone());
update_consolidated_metadata(node_path, consolidated_metadata, &child.children);
}
}
let mut consolidated_metadata = HashMap::default();
update_consolidated_metadata(
self.path().as_str(),
&mut consolidated_metadata,
&self.children,
);
Some(consolidated_metadata)
}
}

#[cfg(test)]
Expand Down
45 changes: 43 additions & 2 deletions zarrs/tests/hierarchy.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#![cfg(feature = "filesystem")]

use std::sync::Arc;

use zarrs::node::Node;
use zarrs_filesystem::FilesystemStore;
use zarrs::{
filesystem::FilesystemStore, group::Group, metadata::v3::group::ConsolidatedMetadata,
node::Node,
};

#[test]
fn hierarchy_tree() {
Expand All @@ -23,3 +27,40 @@ fn hierarchy_tree() {
"
);
}

#[test]
fn consolidated_metadata() {
let store = Arc::new(
FilesystemStore::new("./tests/data/hierarchy.zarr")
.unwrap()
.sorted(),
);
let node = Node::open(&store, "/").unwrap();
let consolidated_metadata = node.consolidate_metadata().unwrap();
println!("{:#?}", consolidated_metadata);

for relative_path in ["a", "a/baz", "a/foo", "b"] {
let consolidated = consolidated_metadata.get(relative_path).unwrap();
let node_path = format!("/{}", relative_path);
let actual = Node::open(&store, &node_path).unwrap();
assert_eq!(consolidated, actual.metadata());
}

let mut group = Group::open(store.clone(), "/").unwrap();
assert!(group.consolidated_metadata().is_none());
group.set_consolidated_metadata(Some(ConsolidatedMetadata {
metadata: consolidated_metadata,
..Default::default()
}));
assert!(group.consolidated_metadata().is_some());

let node = Node::open(&store, "/a").unwrap();
let consolidated_metadata = node.consolidate_metadata().unwrap();
println!("{:#?}", consolidated_metadata);
for relative_path in ["baz", "foo"] {
let consolidated = consolidated_metadata.get(relative_path).unwrap();
let node_path = format!("/a/{}", relative_path);
let actual = Node::open(&store, &node_path).unwrap();
assert_eq!(consolidated, actual.metadata());
}
}
105 changes: 104 additions & 1 deletion zarrs_metadata/src/v3/group.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::collections::HashMap;

use derive_more::Display;
use serde::{Deserialize, Serialize};

use crate::NodeMetadata;

use super::AdditionalFields;

/// Zarr group metadata (storage specification v3).
Expand All @@ -18,7 +22,7 @@ use super::AdditionalFields;
/// }
/// }
#[non_exhaustive]
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)]
#[derive(Serialize, Deserialize, Clone, Debug, Display)]
#[display("{}", serde_json::to_string(self).unwrap_or_default())]
pub struct GroupMetadataV3 {
/// An integer defining the version of the storage specification to which the group adheres. Must be `3`.
Expand All @@ -28,11 +32,24 @@ pub struct GroupMetadataV3 {
/// Optional user metadata.
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub attributes: serde_json::Map<String, serde_json::Value>,
/// Consolidated metadata.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub consolidated_metadata: Option<ConsolidatedMetadata>,
/// Additional fields.
#[serde(flatten)]
pub additional_fields: AdditionalFields,
}

impl std::cmp::PartialEq for GroupMetadataV3 {
fn eq(&self, other: &Self) -> bool {
self.attributes == other.attributes
// && self.consolidated_metadata == other.consolidated_metadata
&& self.additional_fields == other.additional_fields
}
}

impl Eq for GroupMetadataV3 {}

impl Default for GroupMetadataV3 {
fn default() -> Self {
Self::new(serde_json::Map::new(), AdditionalFields::default())
Expand All @@ -51,6 +68,92 @@ impl GroupMetadataV3 {
node_type: monostate::MustBe!("group"),
attributes,
additional_fields,
consolidated_metadata: None,
}
}
}

/// Consolidated metadata of a Zarr hierarchy.
#[derive(Serialize, Deserialize, Clone, PartialEq, Debug, Display)]
#[display("{}", serde_json::to_string(self).unwrap_or_default())]
pub struct ConsolidatedMetadata {
/// A mapping from node path to Group or Array [`NodeMetadata`] object.
pub metadata: ConsolidatedMetadataMetadata,
/// The kind of the consolidated metadata. Must be `'inline'`. Reserved for future use.
pub kind: ConsolidatedMetadataKind,
/// The boolean literal `false`. Indicates that the field is not required to load the Zarr hierarchy.
pub must_understand: monostate::MustBe!(false),
}

/// The `metadata` field of `consolidated_metadata` in [`GroupMetadataV3`].
pub type ConsolidatedMetadataMetadata = HashMap<String, NodeMetadata>;

impl Default for ConsolidatedMetadata {
fn default() -> Self {
Self {
metadata: HashMap::default(),
kind: ConsolidatedMetadataKind::Inline,
must_understand: monostate::MustBe!(false),
}
}
}

/// The "kind" of consolidated metadata.
#[non_exhaustive]
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)]
pub enum ConsolidatedMetadataKind {
/// Indicates that consolidated metadata is stored inline in the root `zarr.json` object.
#[serde(rename = "inline")]
Inline,
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn group_metadata_consolidated() {
let group_metadata = serde_json::from_str::<GroupMetadataV3>(
r#"{
"zarr_format": 3,
"node_type": "group",
"attributes": {
"spam": "ham",
"eggs": 42
},
"consolidated_metadata": {
"metadata": {
"/subgroup": {
"zarr_format": 3,
"node_type": "group",
"attributes": {
"consolidated": "attributes"
}
}
},
"kind": "inline",
"must_understand": false
}
}"#,
)
.unwrap();
assert_eq!(
group_metadata
.consolidated_metadata
.unwrap()
.metadata
.get("/subgroup")
.unwrap(),
&serde_json::from_str::<NodeMetadata>(
r#"{
"zarr_format": 3,
"node_type": "group",
"attributes": {
"consolidated": "attributes"
}
}"#
)
.unwrap()
);
}
}

0 comments on commit fc50898

Please sign in to comment.