[CHORE] Run doctests in CI (#2362)

Add tests for our docs to CI Currently the tests don't affect the CI checks because of the extra `|| true` after the test command, but once all the examples have been fixed I'll remove it. See the run here: https://github.com/Eventual-Inc/Daft/actions/runs/9490371825/job/26153717399?pr=2362 I also changed our `daft.col` example to make it work and show that it actually runs and is tested. <img width="520" alt="Screenshot 2024-06-12 at 3 47 16 PM" src="https://github.com/Eventual-Inc/Daft/assets/77712970/e08790f4-8cc2-418b-bb16-f731e55f8075"> <img width="543" alt="Screenshot 2024-06-12 at 3 47 25 PM" src="https://github.com/Eventual-Inc/Daft/assets/77712970/edbb76ef-f167-4596-8462-0be42d125749">
Eventual-Inc · Jun 13, 2024 · 647ec43 · 647ec43
1 parent cd775f5
commit 647ec43
Show file tree

Hide file tree

Showing 7 changed files with 101 additions and 20 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -675,6 +675,43 @@ jobs:
       if: ${{ contains(needs.*.result, 'failure') }}
       run: exit 1
 
+  doctests:
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    env:
+      python-version: '3.10'
+      DAFT_BOLD_TABLE_HEADERS: '0'
+    steps:
+    - uses: actions/checkout@v4
+    - uses: moonrepo/setup-rust@v1
+      with:
+        cache: false
+    - uses: Swatinem/rust-cache@v2
+      with:
+        key: ${{ runner.os }}-build
+        cache-all-crates: 'true'
+    - name: Set up Python ${{ env.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ env.python-version }}
+        cache: pip
+        cache-dependency-path: |
+          pyproject.toml
+          requirements-dev.txt
+    - name: Setup Virtual Env
+      run: |
+        python -m venv venv
+        echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install -r requirements-dev.txt
+    - name: Run doctests
+      run: |
+        source activate
+        maturin develop
+        pytest --doctest-modules --continue-on-collection-errors daft/ || true
+
 
   publish-coverage-reports:
     name: Publish coverage reports to CodeCov

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py
@@ -1203,8 +1203,9 @@ def concat(self, other: "DataFrame") -> "DataFrame":
 
     @DataframePublicAPI
     def drop_nan(self, *cols: ColumnInputType):
-        """drops rows that contains NaNs. If cols is None it will drop rows with any NaN value.
+        """Drops rows that contains NaNs. If cols is None it will drop rows with any NaN value.
         If column names are supplied, it will drop only those rows that contains NaNs in one of these columns.
+
         Example:
             >>> df = daft.from_pydict({"a": [1.0, 2.2, 3.5, float("nan")]})
             >>> df.drop_na()  # drops rows where any column contains NaN values
@@ -1240,13 +1241,15 @@ def drop_nan(self, *cols: ColumnInputType):
 
     @DataframePublicAPI
     def drop_null(self, *cols: ColumnInputType):
-        """drops rows that contains NaNs or NULLs. If cols is None it will drop rows with any NULL value.
+        """Drops rows that contains NaNs or NULLs. If cols is None it will drop rows with any NULL value.
         If column names are supplied, it will drop only those rows that contains NULLs in one of these columns.
+
         Example:
             >>> df = daft.from_pydict({"a": [1.0, 2.2, 3.5, float("NaN")]})
             >>> df.drop_null()  # drops rows where any column contains Null/NaN values
             >>> df = daft.from_pydict({"a": [1.6, 2.5, None, float("NaN")]})
             >>> df.drop_null("a")  # drops rows where column a contains Null/NaN values
+
         Args:
             *cols (str): column names by which rows containing nans should be filtered
 

diff --git a/daft/expressions/expressions.py b/daft/expressions/expressions.py
@@ -96,7 +96,24 @@ def col(name: str) -> Expression:
     """Creates an Expression referring to the column with the provided name
 
     Example:
-        >>> col("x")
+        >>> import daft
+        >>> df = daft.from_pydict({"x": [1, 2, 3], "y": [4, 5, 6]})
+        >>> df = df.select(daft.col("x"))
+        >>> df.show()
+        ╭───────╮
+        │ x     │
+        │ ---   │
+        │ Int64 │
+        ╞═══════╡
+        │ 1     │
+        ├╌╌╌╌╌╌╌┤
+        │ 2     │
+        ├╌╌╌╌╌╌╌┤
+        │ 3     │
+        ╰───────╯
+        <BLANKLINE>
+        (Showing first 3 of 3 rows)
+
 
     Args:
         name: Name of column

diff --git a/src/common/daft-config/src/lib.rs b/src/common/daft-config/src/lib.rs
@@ -2,6 +2,9 @@
 use common_io_config::IOConfig;
 use serde::{Deserialize, Serialize};
 
+/// Environment variables for Daft to use when formatting displays.
+pub const BOLD_TABLE_HEADERS_IN_DISPLAY: &str = "DAFT_BOLD_TABLE_HEADERS";
+
 /// Configurations for Daft to use during the building of a Dataframe's plan.
 ///
 /// 1. Creation of a Dataframe including any file listing and schema inference that needs to happen. Note

diff --git a/src/daft-core/Cargo.toml b/src/daft-core/Cargo.toml
@@ -19,6 +19,7 @@ bincode = {workspace = true}
 chrono = {workspace = true}
 chrono-tz = {workspace = true}
 comfy-table = {workspace = true}
+common-daft-config = {path = "../common/daft-config", default-features = false}
 common-error = {path = "../common/error", default-features = false}
 daft-sketch = {path = "../daft-sketch", default-features = false}
 dyn-clone = "1.0.17"

diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs
@@ -2,9 +2,26 @@ use crate::{
     datatypes::{Field, TimeUnit},
     Series,
 };
-
+use common_daft_config::BOLD_TABLE_HEADERS_IN_DISPLAY;
 use itertools::Itertools;
 
+fn create_table_cell(value: &str) -> comfy_table::Cell {
+    let mut attributes = vec![];
+    if std::env::var(BOLD_TABLE_HEADERS_IN_DISPLAY)
+        .as_deref()
+        .unwrap_or("1")
+        == "1"
+    {
+        attributes.push(comfy_table::Attribute::Bold);
+    }
+
+    let mut cell = comfy_table::Cell::new(value);
+    if !attributes.is_empty() {
+        cell = cell.add_attributes(attributes);
+    }
+    cell
+}
+
 pub fn display_date32(val: i32) -> String {
     let epoch_date = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
     let date = if val.is_positive() {
@@ -98,10 +115,7 @@ pub fn make_schema_vertical_table<F: AsRef<Field>>(fields: &[F]) -> comfy_table:
         table.set_width(default_width_if_no_tty as u16);
     }
 
-    let header = vec![
-        comfy_table::Cell::new("Column Name").add_attribute(comfy_table::Attribute::Bold),
-        comfy_table::Cell::new("Type").add_attribute(comfy_table::Attribute::Bold),
-    ];
+    let header = vec![create_table_cell("Column Name"), create_table_cell("Type")];
     table.set_header(header);
 
     for f in fields.iter() {
@@ -155,25 +169,30 @@ pub fn make_comfy_table<F: AsRef<Field>>(
         .iter()
         .take(head_cols)
         .map(|field| {
-            comfy_table::Cell::new(
-                format!("{}\n---\n{}", field.as_ref().name, field.as_ref().dtype).as_str(),
-            )
-            .add_attribute(comfy_table::Attribute::Bold)
+            create_table_cell(&format!(
+                "{}\n---\n{}",
+                field.as_ref().name,
+                field.as_ref().dtype
+            ))
         })
         .collect::<Vec<_>>();
     if tail_cols > 0 {
         let unseen_cols = num_columns - (head_cols + tail_cols);
         header.push(
-            comfy_table::Cell::new(format!("{DOTS}\n\n({unseen_cols} hidden)"))
-                .add_attribute(comfy_table::Attribute::Bold)
-                .set_alignment(comfy_table::CellAlignment::Center),
+            create_table_cell(&format!(
+                "{DOTS}\n\n({unseen_cols} hidden)",
+                DOTS = DOTS,
+                unseen_cols = unseen_cols
+            ))
+            .set_alignment(comfy_table::CellAlignment::Center),
         );
         header.extend(fields.iter().skip(num_columns - tail_cols).map(|field| {
-            comfy_table::Cell::new(
-                format!("{}\n---\n{}", field.as_ref().name, field.as_ref().dtype).as_str(),
-            )
-            .add_attribute(comfy_table::Attribute::Bold)
-        }))
+            create_table_cell(&format!(
+                "{}\n---\n{}",
+                field.as_ref().name,
+                field.as_ref().dtype
+            ))
+        }));
     }
 
     if let Some(columns) = columns