From 647ec43708c1065d2e6b633aa2e9c4edde55dbc0 Mon Sep 17 00:00:00 2001 From: Colin Ho Date: Thu, 13 Jun 2024 08:52:36 -0700 Subject: [PATCH] [CHORE] Run doctests in CI (#2362) Add tests for our docs to CI Currently the tests don't affect the CI checks because of the extra `|| true` after the test command, but once all the examples have been fixed I'll remove it. See the run here: https://github.com/Eventual-Inc/Daft/actions/runs/9490371825/job/26153717399?pr=2362 I also changed our `daft.col` example to make it work and show that it actually runs and is tested. Screenshot 2024-06-12 at 3 47 16 PM Screenshot 2024-06-12 at 3 47 25 PM --- .github/workflows/python-package.yml | 37 +++++++++++++++++ Cargo.lock | 1 + daft/dataframe/dataframe.py | 7 +++- daft/expressions/expressions.py | 19 ++++++++- src/common/daft-config/src/lib.rs | 3 ++ src/daft-core/Cargo.toml | 1 + src/daft-core/src/utils/display_table.rs | 53 ++++++++++++++++-------- 7 files changed, 101 insertions(+), 20 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index dc192aa5cf..894b54ef0c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -675,6 +675,43 @@ jobs: if: ${{ contains(needs.*.result, 'failure') }} run: exit 1 + doctests: + runs-on: ubuntu-latest + continue-on-error: true + env: + python-version: '3.10' + DAFT_BOLD_TABLE_HEADERS: '0' + steps: + - uses: actions/checkout@v4 + - uses: moonrepo/setup-rust@v1 + with: + cache: false + - uses: Swatinem/rust-cache@v2 + with: + key: ${{ runner.os }}-build + cache-all-crates: 'true' + - name: Set up Python ${{ env.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.python-version }} + cache: pip + cache-dependency-path: | + pyproject.toml + requirements-dev.txt + - name: Setup Virtual Env + run: | + python -m venv venv + echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -r requirements-dev.txt + - name: Run doctests + run: | + source activate + maturin develop + pytest --doctest-modules --continue-on-collection-errors daft/ || true + publish-coverage-reports: name: Publish coverage reports to CodeCov diff --git a/Cargo.lock b/Cargo.lock index fe1c30be97..de619a24b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1574,6 +1574,7 @@ dependencies = [ "chrono", "chrono-tz", "comfy-table 7.1.1", + "common-daft-config", "common-error", "daft-sketch", "dyn-clone", diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index 62ace8cd61..8821f31f4f 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -1203,8 +1203,9 @@ def concat(self, other: "DataFrame") -> "DataFrame": @DataframePublicAPI def drop_nan(self, *cols: ColumnInputType): - """drops rows that contains NaNs. If cols is None it will drop rows with any NaN value. + """Drops rows that contains NaNs. If cols is None it will drop rows with any NaN value. If column names are supplied, it will drop only those rows that contains NaNs in one of these columns. + Example: >>> df = daft.from_pydict({"a": [1.0, 2.2, 3.5, float("nan")]}) >>> df.drop_na() # drops rows where any column contains NaN values @@ -1240,13 +1241,15 @@ def drop_nan(self, *cols: ColumnInputType): @DataframePublicAPI def drop_null(self, *cols: ColumnInputType): - """drops rows that contains NaNs or NULLs. If cols is None it will drop rows with any NULL value. + """Drops rows that contains NaNs or NULLs. If cols is None it will drop rows with any NULL value. If column names are supplied, it will drop only those rows that contains NULLs in one of these columns. + Example: >>> df = daft.from_pydict({"a": [1.0, 2.2, 3.5, float("NaN")]}) >>> df.drop_null() # drops rows where any column contains Null/NaN values >>> df = daft.from_pydict({"a": [1.6, 2.5, None, float("NaN")]}) >>> df.drop_null("a") # drops rows where column a contains Null/NaN values + Args: *cols (str): column names by which rows containing nans should be filtered diff --git a/daft/expressions/expressions.py b/daft/expressions/expressions.py index d6417c2f1b..00bd8594ff 100644 --- a/daft/expressions/expressions.py +++ b/daft/expressions/expressions.py @@ -96,7 +96,24 @@ def col(name: str) -> Expression: """Creates an Expression referring to the column with the provided name Example: - >>> col("x") + >>> import daft + >>> df = daft.from_pydict({"x": [1, 2, 3], "y": [4, 5, 6]}) + >>> df = df.select(daft.col("x")) + >>> df.show() + ╭───────╮ + │ x │ + │ --- │ + │ Int64 │ + ╞═══════╡ + │ 1 │ + ├╌╌╌╌╌╌╌┤ + │ 2 │ + ├╌╌╌╌╌╌╌┤ + │ 3 │ + ╰───────╯ + + (Showing first 3 of 3 rows) + Args: name: Name of column diff --git a/src/common/daft-config/src/lib.rs b/src/common/daft-config/src/lib.rs index 2f95d58c71..c1f39fd7f5 100644 --- a/src/common/daft-config/src/lib.rs +++ b/src/common/daft-config/src/lib.rs @@ -2,6 +2,9 @@ use common_io_config::IOConfig; use serde::{Deserialize, Serialize}; +/// Environment variables for Daft to use when formatting displays. +pub const BOLD_TABLE_HEADERS_IN_DISPLAY: &str = "DAFT_BOLD_TABLE_HEADERS"; + /// Configurations for Daft to use during the building of a Dataframe's plan. /// /// 1. Creation of a Dataframe including any file listing and schema inference that needs to happen. Note diff --git a/src/daft-core/Cargo.toml b/src/daft-core/Cargo.toml index 4f13f83541..47f719aa8d 100644 --- a/src/daft-core/Cargo.toml +++ b/src/daft-core/Cargo.toml @@ -19,6 +19,7 @@ bincode = {workspace = true} chrono = {workspace = true} chrono-tz = {workspace = true} comfy-table = {workspace = true} +common-daft-config = {path = "../common/daft-config", default-features = false} common-error = {path = "../common/error", default-features = false} daft-sketch = {path = "../daft-sketch", default-features = false} dyn-clone = "1.0.17" diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs index be456d15a6..0f9116d7d0 100644 --- a/src/daft-core/src/utils/display_table.rs +++ b/src/daft-core/src/utils/display_table.rs @@ -2,9 +2,26 @@ use crate::{ datatypes::{Field, TimeUnit}, Series, }; - +use common_daft_config::BOLD_TABLE_HEADERS_IN_DISPLAY; use itertools::Itertools; +fn create_table_cell(value: &str) -> comfy_table::Cell { + let mut attributes = vec![]; + if std::env::var(BOLD_TABLE_HEADERS_IN_DISPLAY) + .as_deref() + .unwrap_or("1") + == "1" + { + attributes.push(comfy_table::Attribute::Bold); + } + + let mut cell = comfy_table::Cell::new(value); + if !attributes.is_empty() { + cell = cell.add_attributes(attributes); + } + cell +} + pub fn display_date32(val: i32) -> String { let epoch_date = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); let date = if val.is_positive() { @@ -98,10 +115,7 @@ pub fn make_schema_vertical_table>(fields: &[F]) -> comfy_table: table.set_width(default_width_if_no_tty as u16); } - let header = vec![ - comfy_table::Cell::new("Column Name").add_attribute(comfy_table::Attribute::Bold), - comfy_table::Cell::new("Type").add_attribute(comfy_table::Attribute::Bold), - ]; + let header = vec![create_table_cell("Column Name"), create_table_cell("Type")]; table.set_header(header); for f in fields.iter() { @@ -155,25 +169,30 @@ pub fn make_comfy_table>( .iter() .take(head_cols) .map(|field| { - comfy_table::Cell::new( - format!("{}\n---\n{}", field.as_ref().name, field.as_ref().dtype).as_str(), - ) - .add_attribute(comfy_table::Attribute::Bold) + create_table_cell(&format!( + "{}\n---\n{}", + field.as_ref().name, + field.as_ref().dtype + )) }) .collect::>(); if tail_cols > 0 { let unseen_cols = num_columns - (head_cols + tail_cols); header.push( - comfy_table::Cell::new(format!("{DOTS}\n\n({unseen_cols} hidden)")) - .add_attribute(comfy_table::Attribute::Bold) - .set_alignment(comfy_table::CellAlignment::Center), + create_table_cell(&format!( + "{DOTS}\n\n({unseen_cols} hidden)", + DOTS = DOTS, + unseen_cols = unseen_cols + )) + .set_alignment(comfy_table::CellAlignment::Center), ); header.extend(fields.iter().skip(num_columns - tail_cols).map(|field| { - comfy_table::Cell::new( - format!("{}\n---\n{}", field.as_ref().name, field.as_ref().dtype).as_str(), - ) - .add_attribute(comfy_table::Attribute::Bold) - })) + create_table_cell(&format!( + "{}\n---\n{}", + field.as_ref().name, + field.as_ref().dtype + )) + })); } if let Some(columns) = columns