Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify Python binding of StorageConfig #371

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 13 additions & 28 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -105,27 +105,7 @@ class StorageConfig:
storage_config = StorageConfig.s3_from_config("bucket", "prefix", ...)
```
"""
class Memory:
"""Config for an in-memory storage backend"""

prefix: str

class Filesystem:
"""Config for a local filesystem storage backend"""

root: str

class S3:
"""Config for an S3 Object Storage compatible storage backend"""

bucket: str
prefix: str
credentials: S3Credentials | None
endpoint_url: str | None
allow_http: bool | None
region: str | None

def __init__(self, storage: Memory | Filesystem | S3): ...
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no #[new] method defined on StorageConfig, so it wasn't possible to directly construct this anyways.

@classmethod
def memory(cls, prefix: str) -> StorageConfig:
"""Create a StorageConfig object for an in-memory storage backend with the given prefix"""
Expand All @@ -137,7 +117,14 @@ class StorageConfig:
...

@classmethod
def s3_from_env(cls, bucket: str, prefix: str) -> StorageConfig:
def s3_from_env(
cls,
bucket: str,
prefix: str,
endpoint_url: str | None,
allow_http: bool = False,
region: str | None = None,
) -> StorageConfig:
"""Create a StorageConfig object for an S3 Object Storage compatible storage backend
with the given bucket and prefix

Expand All @@ -158,7 +145,7 @@ class StorageConfig:
prefix: str,
credentials: S3Credentials,
endpoint_url: str | None,
allow_http: bool | None = None,
allow_http: bool = False,
region: str | None = None,
) -> StorageConfig:
"""Create a StorageConfig object for an S3 Object Storage compatible storage
Expand All @@ -175,7 +162,7 @@ class StorageConfig:
bucket: str,
prefix: str,
endpoint_url: str | None,
allow_http: bool | None = None,
allow_http: bool = False,
region: str | None = None,
) -> StorageConfig:
"""Create a StorageConfig object for an S3 Object Storage compatible storage
Expand Down Expand Up @@ -269,7 +256,7 @@ class StoreConfig:
inline_chunk_threshold_bytes: int | None = None,
unsafe_overwrite_refs: bool | None = None,
virtual_ref_config: VirtualRefConfig | None = None,
):
):
"""Create a StoreConfig object with the given configuration options

Parameters
Expand All @@ -284,24 +271,22 @@ class StoreConfig:
Whether to allow overwriting refs in the store. Default is False. Experimental.
virtual_ref_config: VirtualRefConfig | None
Configurations for virtual references such as credentials and endpoints

Returns
-------
StoreConfig
A StoreConfig object with the given configuration options
"""
"""
...

async def async_pyicechunk_store_exists(storage: StorageConfig) -> bool: ...
def pyicechunk_store_exists(storage: StorageConfig) -> bool: ...

async def async_pyicechunk_store_create(
storage: StorageConfig, config: StoreConfig | None
) -> PyIcechunkStore: ...
def pyicechunk_store_create(
storage: StorageConfig, config: StoreConfig | None
) -> PyIcechunkStore: ...

async def async_pyicechunk_store_open_existing(
storage: StorageConfig, read_only: bool, config: StoreConfig | None
) -> PyIcechunkStore: ...
Expand Down
138 changes: 56 additions & 82 deletions icechunk-python/src/storage.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#![allow(clippy::too_many_arguments)]
// TODO: we only need that allow for PyStorageConfig, but i don't know how to set it

use std::path::PathBuf;

use icechunk::{
Expand Down Expand Up @@ -33,6 +30,22 @@ impl From<&PyS3Credentials> for StaticS3Credentials {
}
}

impl From<PyS3Credentials> for StaticS3Credentials {
fn from(credentials: PyS3Credentials) -> Self {
StaticS3Credentials {
access_key_id: credentials.access_key_id,
secret_access_key: credentials.secret_access_key,
session_token: credentials.session_token,
}
}
}

Comment on lines +33 to +41
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's useful to have a value-to-value From impl instead of just an impl on &PyS3Credentials so that you can avoid the clones when not needed.

impl From<PyS3Credentials> for S3Credentials {
fn from(value: PyS3Credentials) -> Self {
S3Credentials::Static(value.into())
}
}

#[pymethods]
impl PyS3Credentials {
#[new]
Expand All @@ -51,62 +64,44 @@ impl PyS3Credentials {
}

#[pyclass(name = "StorageConfig")]
pub enum PyStorageConfig {
Memory {
prefix: Option<String>,
},
Filesystem {
root: String,
},
S3 {
bucket: String,
prefix: String,
anon: bool,
credentials: Option<PyS3Credentials>,
endpoint_url: Option<String>,
allow_http: Option<bool>,
region: Option<String>,
},
}
pub struct PyStorageConfig(StorageConfig);

#[pymethods]
impl PyStorageConfig {
#[classmethod]
#[pyo3(signature = (prefix = None))]
fn memory(_cls: &Bound<'_, PyType>, prefix: Option<String>) -> Self {
PyStorageConfig::Memory { prefix }
Self(StorageConfig::InMemory { prefix })
}

#[classmethod]
fn filesystem(_cls: &Bound<'_, PyType>, root: String) -> Self {
PyStorageConfig::Filesystem { root }
fn filesystem(_cls: &Bound<'_, PyType>, root: PathBuf) -> Self {
Self(StorageConfig::LocalFileSystem { root })
}

#[classmethod]
#[pyo3(signature = (
bucket,
prefix,
endpoint_url = None,
allow_http = None,
allow_http = false,
region = None,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

allow_http can only be a bool, so why not default to false instead of defaulting to None and then unwrapping?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no reason, just moving fast

))]
fn s3_from_env(
_cls: &Bound<'_, PyType>,
bucket: String,
prefix: String,
endpoint_url: Option<String>,
allow_http: Option<bool>,
allow_http: bool,
region: Option<String>,
) -> Self {
PyStorageConfig::S3 {
bucket,
prefix,
anon: false,
credentials: None,
endpoint_url,
allow_http,
let config = S3Config {
region,
}
endpoint: endpoint_url,
allow_http,
credentials: mk_credentials(None, false),
};
Self(StorageConfig::S3ObjectStore { bucket, prefix, config: Some(config) })
}

#[classmethod]
Expand All @@ -115,7 +110,7 @@ impl PyStorageConfig {
prefix,
credentials,
endpoint_url = None,
allow_http = None,
allow_http = false,
region = None,
))]
fn s3_from_config(
Expand All @@ -124,45 +119,41 @@ impl PyStorageConfig {
prefix: String,
credentials: PyS3Credentials,
endpoint_url: Option<String>,
allow_http: Option<bool>,
allow_http: bool,
region: Option<String>,
) -> Self {
PyStorageConfig::S3 {
bucket,
prefix,
anon: false,
credentials: Some(credentials),
endpoint_url,
allow_http,
let config = S3Config {
region,
}
endpoint: endpoint_url,
allow_http,
credentials: credentials.into(),
};
Self(StorageConfig::S3ObjectStore { bucket, prefix, config: Some(config) })
}

#[classmethod]
#[pyo3(signature = (
bucket,
prefix,
endpoint_url = None,
allow_http = None,
allow_http = false,
region = None,
))]
fn s3_anonymous(
_cls: &Bound<'_, PyType>,
bucket: String,
prefix: String,
endpoint_url: Option<String>,
allow_http: Option<bool>,
allow_http: bool,
region: Option<String>,
) -> Self {
PyStorageConfig::S3 {
bucket,
prefix,
anon: true,
credentials: None,
endpoint_url,
allow_http,
let config = S3Config {
region,
}
endpoint: endpoint_url,
allow_http,
credentials: mk_credentials(None, true),
};
Self(StorageConfig::S3ObjectStore { bucket, prefix, config: Some(config) })
}
}

Expand All @@ -177,38 +168,21 @@ fn mk_credentials(config: Option<&PyS3Credentials>, anon: bool) -> S3Credentials
}
}

impl From<PyStorageConfig> for StorageConfig {
fn from(storage: PyStorageConfig) -> Self {
storage.0
}
}

impl From<&PyStorageConfig> for StorageConfig {
fn from(storage: &PyStorageConfig) -> Self {
match storage {
PyStorageConfig::Memory { prefix } => {
StorageConfig::InMemory { prefix: prefix.clone() }
}
PyStorageConfig::Filesystem { root } => {
StorageConfig::LocalFileSystem { root: PathBuf::from(root.clone()) }
}
PyStorageConfig::S3 {
bucket,
prefix,
anon,
credentials,
endpoint_url,
allow_http,
region,
} => {
let s3_config = S3Config {
region: region.clone(),
credentials: mk_credentials(credentials.as_ref(), *anon),
endpoint: endpoint_url.clone(),
allow_http: allow_http.unwrap_or(false),
};
storage.0.clone()
}
}

StorageConfig::S3ObjectStore {
bucket: bucket.clone(),
prefix: prefix.clone(),
config: Some(s3_config),
}
}
}
impl AsRef<StorageConfig> for PyStorageConfig {
fn as_ref(&self) -> &StorageConfig {
&self.0
}
Comment on lines +183 to +185
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This AsRef impl means that if you have a PyStorageConfig and you want to access an &StorageConfig, you can just call .as_ref().

}

Expand Down
Loading