Skip to content

Commit

Permalink
Add Xarray Backends tests; fix list_prefix bug (#423)
Browse files Browse the repository at this point in the history
* Add Xarray Backends tests.

This is somewhat hacky, since these tests aren't intended to run
externally. But it works! and it's finding bugs!

* Rename to fix default python action

* fix

* cleanup

* fix bug.

* Add fs,mem,minio testing

* try again

* skip pickling test

* try again
  • Loading branch information
dcherian authored Dec 2, 2024
1 parent b2ad660 commit d3bd3c1
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 22 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/python-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,56 @@ jobs:
with:
path: icechunk-python/.hypothesis/
key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}

xarray-backends:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
path: "icechunk"

- name: Stand up MinIO
working-directory: icechunk
run: |
docker compose up -d minio
- name: Wait for MinIO to be ready
working-directory: icechunk
run: |
for _ in {1..10}; do
if curl --silent --fail http://minio:9000/minio/health/live; then
break
fi
sleep 3
done
docker compose exec -T minio mc alias set minio http://minio:9000 minio123 minio123
- uses: actions/checkout@v4
with:
repository: "pydata/xarray"
path: "xarray"
fetch-depth: 0 # Fetch all history for all branches and tags.

- uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Build wheels
uses: PyO3/maturin-action@v1
with:
working-directory: icechunk/icechunk-python
args: --release --out dist --find-interpreter
sccache: 'true'

- name: pytest
shell: bash
working-directory: icechunk/icechunk-python
env:
ICECHUNK_XARRAY_BACKENDS_TESTS: 1
run: |
set -e
python3 -m venv .venv
source .venv/bin/activate
pip install icechunk['test'] --find-links dist --force-reinstall
# pass xarray's pyproject.toml so that pytest can find the `flaky` fixture
pytest -c=../../xarray/pyproject.toml -W ignore tests/run_xarray_backends_tests.py
79 changes: 79 additions & 0 deletions icechunk-python/tests/run_xarray_backends_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import contextlib
import os
import tempfile
import time

import pytest

import zarr
from icechunk import IcechunkStore, S3Credentials, StorageConfig
from xarray.tests.test_backends import (
ZarrBase,
default_zarr_version, # noqa: F401; needed otherwise not discovered
)


@pytest.mark.skipif(
not os.environ.get("ICECHUNK_XARRAY_BACKENDS_TESTS", None),
reason="skipping Xarray backends tests",
)
class IcechunkStoreBase(ZarrBase):
@pytest.mark.parametrize("consolidated", [False, True, None])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize("write_empty", [False, True, None])
def test_write_region(self, consolidated, compute, use_dask, write_empty) -> None:
if consolidated is not False:
pytest.skip("consolidated not supported.")
super().test_write_region(consolidated, compute, use_dask, write_empty)

@pytest.mark.parametrize("consolidated", [False, True, None])
def test_roundtrip_consolidated(self, consolidated) -> None:
if consolidated is not False:
pytest.skip("consolidated not supported.")
super().test_roundtrip_consolidated(consolidated)


class TestIcechunkStoreFilesystem(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self):
if zarr.config.config["default_zarr_version"] == 2:
pytest.skip("v2 not supported")
with tempfile.TemporaryDirectory() as tmpdir:
yield IcechunkStore.create(StorageConfig.filesystem(tmpdir))


class TestIcechunkStoreMemory(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self):
if zarr.config.config["default_zarr_version"] == 2:
pytest.skip("v2 not supported")
yield IcechunkStore.create(StorageConfig.memory())

def test_pickle(self):
pytest.skip(reason="memory icechunk stores cannot be pickled.")

def test_pickle_dataarray(self):
pytest.skip(reason="memory icechunk stores cannot be pickled.")


class TestIcechunkStoreMinio(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self):
if zarr.config.config["default_zarr_version"] == 2:
pytest.skip("v2 not supported")
storage_config = {
"bucket": "testbucket",
"prefix": "python-xarray-test__" + str(time.time()),
"endpoint_url": "http://localhost:9000",
"region": "us-east-1",
"allow_http": True,
}
storage_config = StorageConfig.s3_from_config(
**storage_config,
credentials=S3Credentials(
access_key_id="minio123", secret_access_key="minio123"
),
)
store = IcechunkStore.create(storage=storage_config, read_only=False)
yield store
43 changes: 21 additions & 22 deletions icechunk/src/zarr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,20 @@ pub enum StoreError {
Unknown(Box<dyn std::error::Error + Send + Sync>),
}

fn is_prefix_match(key: &str, prefix: &str) -> bool {
match key.strip_prefix(prefix) {
None => false,
Some(rest) => {
// we have a few cases
prefix.is_empty() // if prefix was empty anything matches
|| rest.is_empty() // if stripping prefix left empty we have a match
|| rest.starts_with('/') // next component so we match
// what we don't include is other matches,
// we want to catch prefix/foo but not prefix-foo
}
}
}

#[derive(Debug)]
pub struct Store {
repository: Arc<RwLock<Repository>>,
Expand Down Expand Up @@ -894,20 +908,8 @@ impl Store {
for node in repository.list_nodes().await? {
// TODO: handle non-utf8?
let meta_key = Key::Metadata { node_path: node.path }.to_string();
match meta_key.strip_prefix(prefix) {
None => {}
Some(rest) => {
// we have a few cases
if prefix.is_empty() // if prefix was empty anything matches
|| rest.is_empty() // if stripping prefix left empty we have a match
|| rest.starts_with('/') // next component so we match
// what we don't include is other matches,
// we want to catch prefix/foo but not prefix-foo
{
yield meta_key;
}

}
if is_prefix_match(&meta_key, prefix) {
yield meta_key;
}
}
};
Expand All @@ -928,7 +930,7 @@ impl Store {
match maybe_path_chunk {
Ok((path,chunk)) => {
let chunk_key = Key::Chunk { node_path: path, coords: chunk.coord }.to_string();
if chunk_key.starts_with(prefix) {
if is_prefix_match(&chunk_key, prefix) {
yield chunk_key;
}
}
Expand Down Expand Up @@ -2239,13 +2241,10 @@ mod tests {
)
.await?;

store
.borrow_mut()
.set(
"group-suffix/zarr.json",
Bytes::copy_from_slice(br#"{"zarr_format":3, "node_type":"group"}"#),
)
.await?;
let zarr_meta = Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"array","attributes":{"foo":42},"shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"#);
store.borrow_mut().set("group-suffix/zarr.json", zarr_meta).await.unwrap();
let data = Bytes::copy_from_slice(b"hello");
store.set_if_not_exists("group-suffix/c/0/1/0", data.clone()).await.unwrap();

assert_eq!(
store.list_dir("group/").await?.try_collect::<Vec<_>>().await?,
Expand Down

0 comments on commit d3bd3c1

Please sign in to comment.