Skip to content

Commit

Permalink
dont create dirs if non local fs
Browse files Browse the repository at this point in the history
  • Loading branch information
samster25 committed Feb 15, 2024
1 parent 74591c1 commit d48843d
Showing 1 changed file with 18 additions and 4 deletions.
22 changes: 18 additions & 4 deletions daft/table/table_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@
)
from daft.datatype import DataType
from daft.expressions import ExpressionsProjection
from daft.filesystem import _resolve_paths_and_filesystem
from daft.filesystem import (
_resolve_paths_and_filesystem,
canonicalize_protocol,
get_protocol_from_path,
)
from daft.logical.schema import Schema
from daft.runners.partitioning import (
TableParseCSVOptions,
Expand Down Expand Up @@ -359,6 +363,16 @@ def write_tabular(
from daft.utils import ARROW_VERSION

[resolved_path], fs = _resolve_paths_and_filesystem(path, io_config=io_config)
if isinstance(path, pathlib.Path):
path_str = str(path)
else:
path_str = path

protocol = get_protocol_from_path(path_str)
canonicalized_protocol = canonicalize_protocol(protocol)

is_local_fs = canonicalized_protocol == "file"
print("is_local:", is_local_fs)

tables_to_write: list[MicroPartition]
part_keys_postfix_per_table: list[str | None]
Expand Down Expand Up @@ -413,9 +427,6 @@ def write_tabular(
if pf is not None and len(pf) > 0:
full_path = f"{full_path}/{pf}"

# TODO: For overwriting behavior, check here if dir exists to determine to delete files
# fs.create_dir(full_path)

arrow_table = tab.to_arrow()

size_bytes = arrow_table.nbytes
Expand All @@ -439,6 +450,9 @@ def file_visitor(written_file, i=i):
kwargs["min_rows_per_group"] = rows_per_row_group
kwargs["max_rows_per_group"] = rows_per_row_group

if ARROW_VERSION >= (8, 0, 0) and not is_local_fs:
kwargs["create_dir"] = False

pads.write_dataset(
arrow_table,
base_dir=full_path,
Expand Down

0 comments on commit d48843d

Please sign in to comment.