[DOCS] Update schema hints documentation (#1935)

Schema hint documentation was out of date after: #1636 This PR fixes our docs Co-authored-by: Jay Chia <[email protected]@users.noreply.github.com>
Eventual-Inc · Feb 21, 2024 · d115e19 · d115e19
1 parent b702e4d
commit d115e19
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/daft/io/_csv.py b/daft/io/_csv.py
@@ -43,8 +43,8 @@ def read_csv(
 
     Args:
         path (str): Path to CSV (allows for wildcards)
-        schema_hints (dict[str, DataType]): A mapping between column names and datatypes - passing this option will
-            disable all schema inference on data being read, and throw an error if data being read is incompatible.
+        schema_hints (dict[str, DataType]): A mapping between column names and datatypes - passing this option
+            will override the specified columns on the inferred schema with the specified DataTypes
         has_headers (bool): Whether the CSV has a header or not, defaults to True
         delimiter (Str): Delimiter used in the CSV, defaults to ","
         doubled_quote (bool): Whether to support double quote escapes, defaults to True

diff --git a/daft/io/_json.py b/daft/io/_json.py
@@ -36,8 +36,8 @@ def read_json(
 
     Args:
         path (str): Path to JSON files (allows for wildcards)
-        schema_hints (dict[str, DataType]): A mapping between column names and datatypes - passing this option will
-            disable all schema inference on data being read, and throw an error if data being read is incompatible.
+        schema_hints (dict[str, DataType]): A mapping between column names and datatypes - passing this option
+            will override the specified columns on the inferred schema with the specified DataTypes
         io_config (IOConfig): Config to be used with the native downloader
         use_native_downloader: Whether to use the native downloader instead of PyArrow for reading Parquet. This
             is currently experimental.

diff --git a/daft/io/_parquet.py b/daft/io/_parquet.py
@@ -35,8 +35,8 @@ def read_parquet(
 
     Args:
         path (str): Path to Parquet file (allows for wildcards)
-        schema_hints (dict[str, DataType]): A mapping between column names and datatypes - passing this option will
-            disable all schema inference on data being read, and throw an error if data being read is incompatible.
+        schema_hints (dict[str, DataType]): A mapping between column names and datatypes - passing this option
+            will override the specified columns on the inferred schema with the specified DataTypes
         io_config (IOConfig): Config to be used with the native downloader
         use_native_downloader: Whether to use the native downloader instead of PyArrow for reading Parquet.
         _multithreaded_io: Whether to use multithreading for IO threads. Setting this to False can be helpful in reducing