diff --git a/src/daft-io/src/http.rs b/src/daft-io/src/http.rs index b31f46060b..b234477c4d 100644 --- a/src/daft-io/src/http.rs +++ b/src/daft-io/src/http.rs @@ -152,6 +152,7 @@ impl From for super::Error { source: source.into(), }, }, + UnableToDetermineSize { path } => super::Error::UnableToDetermineSize { path }, _ => super::Error::Generic { store: super::SourceType::Http, source: error.into(), diff --git a/src/daft-io/src/lib.rs b/src/daft-io/src/lib.rs index 42bf0c46a2..f5d8133e26 100644 --- a/src/daft-io/src/lib.rs +++ b/src/daft-io/src/lib.rs @@ -72,6 +72,9 @@ pub enum Error { #[snafu(display("Not a File: \"{}\"", path))] NotAFile { path: String }, + #[snafu(display("Unable to determine size of {}", path))] + UnableToDetermineSize { path: String }, + #[snafu(display("Unable to load Credentials for store: {store}\nDetails:\n{source:?}"))] UnableToLoadCredentials { store: SourceType, source: DynError }, diff --git a/src/daft-io/src/object_store_glob.rs b/src/daft-io/src/object_store_glob.rs index 665f5129bb..d7ad04a2d1 100644 --- a/src/daft-io/src/object_store_glob.rs +++ b/src/daft-io/src/object_store_glob.rs @@ -355,7 +355,7 @@ pub(crate) async fn glob( let maybe_size = source.get_size(&glob, io_stats.clone()).await; match maybe_size { Ok(size_bytes) => yield Ok(FileMetadata{filepath: glob.clone(), size: Some(size_bytes as u64), filetype: FileType::File }), - Err(crate::Error::NotAFile {..} | crate::Error::NotFound { .. }) => {attempt_as_dir = true;}, + Err(crate::Error::NotAFile {..} | crate::Error::NotFound { .. } | crate::Error::UnableToDetermineSize { .. }) => {attempt_as_dir = true;}, Err(err) => yield Err(err), } } diff --git a/tests/integration/io/test_list_files_http.py b/tests/integration/io/test_list_files_http.py index a8cf410e22..0893736698 100644 --- a/tests/integration/io/test_list_files_http.py +++ b/tests/integration/io/test_list_files_http.py @@ -103,7 +103,7 @@ def test_http_listing_absolute_urls(nginx_config, tmpdir): ) with mount_data_nginx(nginx_config, tmpdir): - http_path = f"{nginx_http_url}/index.html" + http_path = f"{nginx_http_url}/" daft_ls_result = io_glob(http_path) # NOTE: Cannot use fsspec here because they do not correctly find the links @@ -129,7 +129,7 @@ def test_http_listing_absolute_base_urls(nginx_config, tmpdir): ) with mount_data_nginx(nginx_config, tmpdir): - http_path = f"{nginx_http_url}/index.html" + http_path = f"{nginx_http_url}/" daft_ls_result = io_glob(http_path) # NOTE: Cannot use fsspec here because they do not correctly find the links