From 62dbbffa398322052aeabb189a3be770af1d51bf Mon Sep 17 00:00:00 2001 From: Michael Leslie Date: Wed, 4 Oct 2023 12:04:37 -0700 Subject: [PATCH] Allow passing index_col=False in dd.read_csv (#9961) --- dask/dataframe/io/csv.py | 8 +++++--- dask/dataframe/io/tests/test_csv.py | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dask/dataframe/io/csv.py b/dask/dataframe/io/csv.py index 1d5189e8bf6..5a847789c2c 100644 --- a/dask/dataframe/io/csv.py +++ b/dask/dataframe/io/csv.py @@ -486,10 +486,12 @@ def read_pandas( lineterminator = "\n" if include_path_column and isinstance(include_path_column, bool): include_path_column = "path" - if "index" in kwargs or "index_col" in kwargs: + if "index" in kwargs or ( + "index_col" in kwargs and kwargs.get("index_col") is not False + ): raise ValueError( - "Keywords 'index' and 'index_col' not supported. " - f"Use dd.{reader_name}(...).set_index('my-index') instead" + "Keywords 'index' and 'index_col' not supported, except for " + "'index_col=False'. Use dd.{reader_name}(...).set_index('my-index') instead" ) for kw in ["iterator", "chunksize"]: if kw in kwargs: diff --git a/dask/dataframe/io/tests/test_csv.py b/dask/dataframe/io/tests/test_csv.py index 67df4c9df36..1df7202f2d3 100644 --- a/dask/dataframe/io/tests/test_csv.py +++ b/dask/dataframe/io/tests/test_csv.py @@ -1117,6 +1117,10 @@ def test_index_col(): except ValueError as e: assert "set_index" in str(e) + df = pd.read_csv(fn, index_col=False) + ddf = dd.read_csv(fn, blocksize=30, index_col=False) + assert_eq(df, ddf, check_index=False) + def test_read_csv_with_datetime_index_partitions_one(): with filetext(timeseries) as fn: