Skip to content

Commit

Permalink
Merge branch 'main' into gpuci-query-planning
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora authored May 2, 2024
2 parents 394a00b + 814ed3b commit 83c8cb4
Show file tree
Hide file tree
Showing 31 changed files with 153 additions and 47 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/additional.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ jobs:
timeout-minutes: 90
steps:
- name: Checkout source
uses: actions/[email protected].3
uses: actions/[email protected].4

- name: Setup Conda Environment
uses: conda-incubator/[email protected].3
uses: conda-incubator/[email protected].4
with:
miniforge-variant: Mambaforge
miniforge-version: latest
Expand Down Expand Up @@ -44,10 +44,10 @@ jobs:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- name: Checkout source
uses: actions/[email protected].3
uses: actions/[email protected].4

- name: Setup Conda
uses: conda-incubator/[email protected].3
uses: conda-incubator/[email protected].4
with:
miniforge-variant: Mambaforge
miniforge-version: latest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ jobs:
name: Build (and upload)
runs-on: ubuntu-latest
steps:
- uses: actions/[email protected].3
- uses: actions/[email protected].4
with:
fetch-depth: 0
- name: Set up Python
uses: conda-incubator/[email protected].3
uses: conda-incubator/[email protected].4
with:
miniforge-variant: Mambaforge
use-mamba: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
name: pre-commit hooks
runs-on: ubuntu-latest
steps:
- uses: actions/[email protected].3
- uses: actions/[email protected].4
- uses: actions/setup-python@v5
with:
python-version: '3.9'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ jobs:
run:
shell: bash -l {0}
steps:
- uses: actions/[email protected].3
- uses: actions/[email protected].4
with:
repository: dask/distributed

- name: Setup Conda Environment
uses: conda-incubator/[email protected].3
uses: conda-incubator/[email protected].4
with:
miniforge-variant: Mambaforge
miniforge-version: latest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:

steps:
- name: Checkout source
uses: actions/[email protected].3
uses: actions/[email protected].4
with:
fetch-depth: 0 # Needed by codecov.io

Expand All @@ -71,7 +71,7 @@ jobs:
java-version: "11"

- name: Setup Conda Environment
uses: conda-incubator/[email protected].3
uses: conda-incubator/[email protected].4
with:
miniforge-variant: Mambaforge
miniforge-version: latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/update-gpuci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
if: github.repository == 'dask/dask'

steps:
- uses: actions/[email protected].3
- uses: actions/[email protected].4

- name: Parse current axis YAML
id: rapids_current
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
outputs:
test-upstream: ${{ steps.detect-trigger.outputs.trigger-found }}
steps:
- uses: actions/[email protected].3
- uses: actions/[email protected].4
with:
fetch-depth: 2
- uses: xarray-contrib/ci-trigger@v1
Expand All @@ -47,10 +47,10 @@ jobs:

steps:
- name: Checkout source
uses: actions/[email protected].3
uses: actions/[email protected].4

- name: Setup Conda Environment
uses: conda-incubator/[email protected].3
uses: conda-incubator/[email protected].4
with:
miniforge-variant: Mambaforge
miniforge-version: latest
Expand Down
7 changes: 4 additions & 3 deletions dask/array/tests/test_array_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,8 @@ def test_stack_rechunk():


def test_stack_unknown_chunksizes():
dd = pytest.importorskip("dask.dataframe")
pd = pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")

a_df = pd.DataFrame({"x": np.arange(12)})
b_df = pd.DataFrame({"y": np.arange(12) * 10})
Expand Down Expand Up @@ -546,8 +546,8 @@ def test_concatenate_types(dtypes):


def test_concatenate_unknown_axes():
dd = pytest.importorskip("dask.dataframe")
pd = pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")

a_df = pd.DataFrame({"x": np.arange(12)})
b_df = pd.DataFrame({"y": np.arange(12) * 10})
Expand Down Expand Up @@ -2211,6 +2211,7 @@ def test_to_hdf5():


def test_to_dask_dataframe():
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
a = da.ones((4,), chunks=(2,))
d = a.to_dask_dataframe()
Expand Down Expand Up @@ -2770,8 +2771,8 @@ def test_asarray(asarray):
@pytest.mark.parametrize("asarray", [da.asarray, da.asanyarray])
def test_asarray_dask_dataframe(asarray):
# https://github.com/dask/dask/issues/3885
pd = pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
import pandas as pd

s = dd.from_pandas(pd.Series([1, 2, 3, 4]), 2)
result = asarray(s)
Expand Down
11 changes: 10 additions & 1 deletion dask/array/tests/test_rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ def test_rechunk_same():


def test_rechunk_same_fully_unknown():
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
x = da.ones(shape=(10, 10), chunks=(5, 10))
y = dd.from_array(x).values
Expand All @@ -299,6 +300,7 @@ def test_rechunk_same_fully_unknown_floats():
"""Similar to test_rechunk_same_fully_unknown but testing the behavior if
``float("nan")`` is used instead of the recommended ``np.nan``
"""
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
x = da.ones(shape=(10, 10), chunks=(5, 10))
y = dd.from_array(x).values
Expand All @@ -308,6 +310,7 @@ def test_rechunk_same_fully_unknown_floats():


def test_rechunk_same_partially_unknown():
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
x = da.ones(shape=(10, 10), chunks=(5, 10))
y = dd.from_array(x).values
Expand Down Expand Up @@ -592,8 +595,8 @@ def test_intersect_nan_long():


def test_rechunk_unknown_from_pandas():
dd = pytest.importorskip("dask.dataframe")
pd = pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")

arr = np.random.default_rng().standard_normal((50, 10))
x = dd.from_pandas(pd.DataFrame(arr), 2).values
Expand All @@ -606,6 +609,7 @@ def test_rechunk_unknown_from_pandas():


def test_rechunk_unknown_from_array():
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
# pd = pytest.importorskip('pandas')
x = dd.from_array(da.ones(shape=(4, 4), chunks=(2, 2))).values
Expand Down Expand Up @@ -635,6 +639,7 @@ def test_rechunk_unknown_from_array():
],
)
def test_rechunk_with_fully_unknown_dimension(x, chunks):
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
y = dd.from_array(x).values
result = y.rechunk(chunks)
Expand All @@ -661,6 +666,7 @@ def test_rechunk_with_fully_unknown_dimension(x, chunks):
],
)
def test_rechunk_with_partially_unknown_dimension(x, chunks):
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
y = dd.from_array(x).values
z = da.concatenate([x, y])
Expand All @@ -680,6 +686,7 @@ def test_rechunk_with_partially_unknown_dimension(x, chunks):
],
)
def test_rechunk_with_fully_unknown_dimension_explicit(new_chunks):
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
x = da.ones(shape=(10, 10), chunks=(5, 2))
y = dd.from_array(x).values
Expand All @@ -698,6 +705,7 @@ def test_rechunk_with_fully_unknown_dimension_explicit(new_chunks):
],
)
def test_rechunk_with_partially_unknown_dimension_explicit(new_chunks):
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")
x = da.ones(shape=(10, 10), chunks=(5, 2))
y = dd.from_array(x).values
Expand All @@ -715,6 +723,7 @@ def assert_chunks_match(left, right):


def test_rechunk_unknown_raises():
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")

x = da.ones(shape=(10, 10), chunks=(5, 5))
Expand Down
6 changes: 5 additions & 1 deletion dask/bag/tests/test_bag.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ def test_from_long_sequence():


def test_from_empty_sequence():
pytest.importorskip("pandas")
pytest.importorskip("dask.dataframe")
b = db.from_sequence([])
assert b.npartitions == 1
Expand Down Expand Up @@ -878,8 +879,8 @@ def test_args():


def test_to_dataframe():
dd = pytest.importorskip("dask.dataframe")
pd = pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")

def check_parts(df, sol):
assert all(
Expand Down Expand Up @@ -1608,6 +1609,7 @@ def f_drop(o):


def test_bagged_array_delayed():
pytest.importorskip("numpy")
da = pytest.importorskip("dask.array")

obj = da.ones(10, chunks=5).to_delayed()[0]
Expand All @@ -1631,6 +1633,7 @@ def test_dask_layers():
def test_dask_layers_to_delayed(optimize):
# `da.Array.to_delayed` causes the layer name to not match the key.
# Ensure the layer name is propagated between `Delayed` and `Item`.
pytest.importorskip("numpy")
da = pytest.importorskip("dask.array")
i = db.Item.from_delayed(da.ones(1).to_delayed()[0])
name = i.key[0]
Expand Down Expand Up @@ -1660,6 +1663,7 @@ def test_dask_layers_to_delayed(optimize):


def test_to_dataframe_optimize_graph():
pytest.importorskip("pandas")
dd = pytest.importorskip("dask.dataframe")

from dask.dataframe.utils import assert_eq as assert_eq_df
Expand Down
6 changes: 4 additions & 2 deletions dask/dataframe/io/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,15 +844,17 @@ def test_to_delayed_optimize_graph():
d = ddf2.to_delayed()[0]
assert len(d.dask) < 20
d2 = ddf2.to_delayed(optimize_graph=False)[0]
assert sorted(d2.dask) == sorted(ddf2.dask)
if not dd._dask_expr_enabled():
assert sorted(d2.dask) == sorted(ddf2.dask)
assert_eq(ddf2.get_partition(0), d.compute())
assert_eq(ddf2.get_partition(0), d2.compute())

# Scalar
x = ddf2.x.sum()
dx = x.to_delayed()
dx2 = x.to_delayed(optimize_graph=False)
assert len(dx.dask) < len(dx2.dask)
if not dd._dask_expr_enabled():
assert len(dx.dask) < len(dx2.dask)
assert_eq(dx.compute(), dx2.compute())


Expand Down
3 changes: 2 additions & 1 deletion dask/dataframe/io/tests/test_orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def test_orc_roundtrip(tmpdir, index, columns):
),
}
)
data.iloc[0, 0] = 100
if index:
data = data.set_index(index)
df = dd.from_pandas(data, chunksize=500)
Expand All @@ -105,7 +106,7 @@ def test_orc_roundtrip(tmpdir, index, columns):

# Read
df2 = dd.read_orc(tmp, index=index, columns=columns)
assert_eq(data, df2, check_index=bool(index))
assert_eq(data, df2, check_index=False)


@pytest.mark.parametrize("split_stripes", [True, False, 2, 4])
Expand Down
8 changes: 1 addition & 7 deletions dask/dataframe/io/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,7 @@
import dask.multiprocessing
from dask.array.numpy_compat import NUMPY_GE_124
from dask.blockwise import Blockwise, optimize_blockwise
from dask.dataframe._compat import (
PANDAS_GE_150,
PANDAS_GE_200,
PANDAS_GE_202,
PANDAS_GE_300,
)
from dask.dataframe._compat import PANDAS_GE_150, PANDAS_GE_200, PANDAS_GE_202
from dask.dataframe.io.parquet.core import get_engine
from dask.dataframe.io.parquet.utils import _parse_pandas_metadata
from dask.dataframe.optimize import optimize_dataframe_getitem
Expand Down Expand Up @@ -480,7 +475,6 @@ def test_calculate_divisions_no_index(tmpdir, write_engine, read_engine):
assert not df.known_divisions


@pytest.mark.xfail(PANDAS_GE_300, reason="KeyError")
def test_columns_index_with_multi_index(tmpdir, engine):
fn = os.path.join(str(tmpdir), "test.parquet")
index = pd.MultiIndex.from_arrays(
Expand Down
2 changes: 2 additions & 0 deletions dask/dataframe/io/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pytest

from dask.dataframe._compat import PANDAS_GE_300
from dask.dataframe.io.sql import read_sql, read_sql_query, read_sql_table
from dask.dataframe.utils import assert_eq, get_string_dtype
from dask.utils import tmpfile
Expand Down Expand Up @@ -276,6 +277,7 @@ def test_divisions(db):
assert_eq(data, df[["name"]][df.index <= 4])


@pytest.mark.xfail(PANDAS_GE_300, reason="memory doesn't match")
def test_division_or_partition(db):
with pytest.raises(TypeError, match="either 'divisions' or 'npartitions'"):
read_sql_table(
Expand Down
4 changes: 3 additions & 1 deletion dask/dataframe/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4613,7 +4613,9 @@ def test_idxmaxmin_empty_partitions():
)

if PANDAS_GE_300:
ctx = pytest.raises(ValueError, match="Encountered all NA values")
ctx = pytest.raises(
ValueError, match="Encountered all NA values|Encountered an NA value with"
)
elif PANDAS_GE_210:
ctx = pytest.warns(FutureWarning, match="all-NA values")
else:
Expand Down
1 change: 1 addition & 0 deletions dask/diagnostics/tests/test_progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def check_bar_completed(capsys, width=40):


def test_array_compute(capsys):
pytest.importorskip("numpy")
da = pytest.importorskip("dask.array")

data = da.ones((100, 100), dtype="f4", chunks=(100, 100))
Expand Down
Loading

0 comments on commit 83c8cb4

Please sign in to comment.