Skip to content

Commit

Permalink
[CHORE] Run doctests in CI (#2362)
Browse files Browse the repository at this point in the history
Add tests for our docs to CI

Currently the tests don't affect the CI checks because of the extra `||
true` after the test command, but once all the examples have been fixed
I'll remove it.

See the run here:
https://github.com/Eventual-Inc/Daft/actions/runs/9490371825/job/26153717399?pr=2362

I also changed our `daft.col` example to make it work and show that it
actually runs and is tested.
<img width="520" alt="Screenshot 2024-06-12 at 3 47 16 PM"
src="https://github.com/Eventual-Inc/Daft/assets/77712970/e08790f4-8cc2-418b-bb16-f731e55f8075">
<img width="543" alt="Screenshot 2024-06-12 at 3 47 25 PM"
src="https://github.com/Eventual-Inc/Daft/assets/77712970/edbb76ef-f167-4596-8462-0be42d125749">
  • Loading branch information
colin-ho authored Jun 13, 2024
1 parent cd775f5 commit 647ec43
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 20 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,43 @@ jobs:
if: ${{ contains(needs.*.result, 'failure') }}
run: exit 1

doctests:
runs-on: ubuntu-latest
continue-on-error: true
env:
python-version: '3.10'
DAFT_BOLD_TABLE_HEADERS: '0'
steps:
- uses: actions/checkout@v4
- uses: moonrepo/setup-rust@v1
with:
cache: false
- uses: Swatinem/rust-cache@v2
with:
key: ${{ runner.os }}-build
cache-all-crates: 'true'
- name: Set up Python ${{ env.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.python-version }}
cache: pip
cache-dependency-path: |
pyproject.toml
requirements-dev.txt
- name: Setup Virtual Env
run: |
python -m venv venv
echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -r requirements-dev.txt
- name: Run doctests
run: |
source activate
maturin develop
pytest --doctest-modules --continue-on-collection-errors daft/ || true
publish-coverage-reports:
name: Publish coverage reports to CodeCov
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions daft/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,8 +1203,9 @@ def concat(self, other: "DataFrame") -> "DataFrame":

@DataframePublicAPI
def drop_nan(self, *cols: ColumnInputType):
"""drops rows that contains NaNs. If cols is None it will drop rows with any NaN value.
"""Drops rows that contains NaNs. If cols is None it will drop rows with any NaN value.
If column names are supplied, it will drop only those rows that contains NaNs in one of these columns.
Example:
>>> df = daft.from_pydict({"a": [1.0, 2.2, 3.5, float("nan")]})
>>> df.drop_na() # drops rows where any column contains NaN values
Expand Down Expand Up @@ -1240,13 +1241,15 @@ def drop_nan(self, *cols: ColumnInputType):

@DataframePublicAPI
def drop_null(self, *cols: ColumnInputType):
"""drops rows that contains NaNs or NULLs. If cols is None it will drop rows with any NULL value.
"""Drops rows that contains NaNs or NULLs. If cols is None it will drop rows with any NULL value.
If column names are supplied, it will drop only those rows that contains NULLs in one of these columns.
Example:
>>> df = daft.from_pydict({"a": [1.0, 2.2, 3.5, float("NaN")]})
>>> df.drop_null() # drops rows where any column contains Null/NaN values
>>> df = daft.from_pydict({"a": [1.6, 2.5, None, float("NaN")]})
>>> df.drop_null("a") # drops rows where column a contains Null/NaN values
Args:
*cols (str): column names by which rows containing nans should be filtered
Expand Down
19 changes: 18 additions & 1 deletion daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,24 @@ def col(name: str) -> Expression:
"""Creates an Expression referring to the column with the provided name
Example:
>>> col("x")
>>> import daft
>>> df = daft.from_pydict({"x": [1, 2, 3], "y": [4, 5, 6]})
>>> df = df.select(daft.col("x"))
>>> df.show()
╭───────╮
│ x │
│ --- │
│ Int64 │
╞═══════╡
│ 1 │
├╌╌╌╌╌╌╌┤
│ 2 │
├╌╌╌╌╌╌╌┤
│ 3 │
╰───────╯
<BLANKLINE>
(Showing first 3 of 3 rows)
Args:
name: Name of column
Expand Down
3 changes: 3 additions & 0 deletions src/common/daft-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
use common_io_config::IOConfig;
use serde::{Deserialize, Serialize};

/// Environment variables for Daft to use when formatting displays.
pub const BOLD_TABLE_HEADERS_IN_DISPLAY: &str = "DAFT_BOLD_TABLE_HEADERS";

/// Configurations for Daft to use during the building of a Dataframe's plan.
///
/// 1. Creation of a Dataframe including any file listing and schema inference that needs to happen. Note
Expand Down
1 change: 1 addition & 0 deletions src/daft-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ bincode = {workspace = true}
chrono = {workspace = true}
chrono-tz = {workspace = true}
comfy-table = {workspace = true}
common-daft-config = {path = "../common/daft-config", default-features = false}
common-error = {path = "../common/error", default-features = false}
daft-sketch = {path = "../daft-sketch", default-features = false}
dyn-clone = "1.0.17"
Expand Down
53 changes: 36 additions & 17 deletions src/daft-core/src/utils/display_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,26 @@ use crate::{
datatypes::{Field, TimeUnit},
Series,
};

use common_daft_config::BOLD_TABLE_HEADERS_IN_DISPLAY;
use itertools::Itertools;

fn create_table_cell(value: &str) -> comfy_table::Cell {
let mut attributes = vec![];
if std::env::var(BOLD_TABLE_HEADERS_IN_DISPLAY)
.as_deref()
.unwrap_or("1")
== "1"
{
attributes.push(comfy_table::Attribute::Bold);
}

let mut cell = comfy_table::Cell::new(value);
if !attributes.is_empty() {
cell = cell.add_attributes(attributes);
}
cell
}

pub fn display_date32(val: i32) -> String {
let epoch_date = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
let date = if val.is_positive() {
Expand Down Expand Up @@ -98,10 +115,7 @@ pub fn make_schema_vertical_table<F: AsRef<Field>>(fields: &[F]) -> comfy_table:
table.set_width(default_width_if_no_tty as u16);
}

let header = vec![
comfy_table::Cell::new("Column Name").add_attribute(comfy_table::Attribute::Bold),
comfy_table::Cell::new("Type").add_attribute(comfy_table::Attribute::Bold),
];
let header = vec![create_table_cell("Column Name"), create_table_cell("Type")];
table.set_header(header);

for f in fields.iter() {
Expand Down Expand Up @@ -155,25 +169,30 @@ pub fn make_comfy_table<F: AsRef<Field>>(
.iter()
.take(head_cols)
.map(|field| {
comfy_table::Cell::new(
format!("{}\n---\n{}", field.as_ref().name, field.as_ref().dtype).as_str(),
)
.add_attribute(comfy_table::Attribute::Bold)
create_table_cell(&format!(
"{}\n---\n{}",
field.as_ref().name,
field.as_ref().dtype
))
})
.collect::<Vec<_>>();
if tail_cols > 0 {
let unseen_cols = num_columns - (head_cols + tail_cols);
header.push(
comfy_table::Cell::new(format!("{DOTS}\n\n({unseen_cols} hidden)"))
.add_attribute(comfy_table::Attribute::Bold)
.set_alignment(comfy_table::CellAlignment::Center),
create_table_cell(&format!(
"{DOTS}\n\n({unseen_cols} hidden)",
DOTS = DOTS,
unseen_cols = unseen_cols
))
.set_alignment(comfy_table::CellAlignment::Center),
);
header.extend(fields.iter().skip(num_columns - tail_cols).map(|field| {
comfy_table::Cell::new(
format!("{}\n---\n{}", field.as_ref().name, field.as_ref().dtype).as_str(),
)
.add_attribute(comfy_table::Attribute::Bold)
}))
create_table_cell(&format!(
"{}\n---\n{}",
field.as_ref().name,
field.as_ref().dtype
))
}));
}

if let Some(columns) = columns
Expand Down

0 comments on commit 647ec43

Please sign in to comment.