diff --git a/.github/workflows/additional.yml b/.github/workflows/additional.yml index 8968b3184e6..ab8f27bc0d9 100644 --- a/.github/workflows/additional.yml +++ b/.github/workflows/additional.yml @@ -13,7 +13,7 @@ jobs: timeout-minutes: 90 steps: - name: Checkout source - uses: actions/checkout@v4.1.2 + uses: actions/checkout@v4.1.3 - name: Setup Conda Environment uses: conda-incubator/setup-miniconda@v3.0.3 @@ -44,7 +44,7 @@ jobs: python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source - uses: actions/checkout@v4.1.2 + uses: actions/checkout@v4.1.3 - name: Setup Conda uses: conda-incubator/setup-miniconda@v3.0.3 diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 967e17e5ee7..187611b56a1 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -28,7 +28,7 @@ jobs: name: Build (and upload) runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.2 + - uses: actions/checkout@v4.1.3 with: fetch-depth: 0 - name: Set up Python diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 222229c7c65..d19a45eba5c 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -11,7 +11,7 @@ jobs: name: pre-commit hooks runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.2 + - uses: actions/checkout@v4.1.3 - uses: actions/setup-python@v5 with: python-version: '3.9' diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml index fc2e703a3d9..615ac53764d 100644 --- a/.github/workflows/release-drafter.yml +++ b/.github/workflows/release-drafter.yml @@ -17,7 +17,7 @@ jobs: pull-requests: read runs-on: ubuntu-latest steps: - - uses: release-drafter/release-drafter@v5 + - uses: release-drafter/release-drafter@v6 with: disable-autolabeler: true env: diff --git a/.github/workflows/release-publish.yml b/.github/workflows/release-publish.yml index 540b4177085..b21beddf97d 100644 --- a/.github/workflows/release-publish.yml +++ b/.github/workflows/release-publish.yml @@ -20,7 +20,7 @@ jobs: - name: Set version env # Use a little bit of bash to extract the tag name from the GitHub ref run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV - - uses: release-drafter/release-drafter@v5 + - uses: release-drafter/release-drafter@v6 with: disable-autolabeler: true # Override the Release name/tag/version with the actual tag name diff --git a/.github/workflows/test-report.yaml b/.github/workflows/test-report.yaml index fd5554d8d3b..44bbd12fdd6 100644 --- a/.github/workflows/test-report.yaml +++ b/.github/workflows/test-report.yaml @@ -21,7 +21,7 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v4.1.2 + - uses: actions/checkout@v4.1.3 with: repository: dask/distributed @@ -61,7 +61,7 @@ jobs: mv test_report.html test_short_report.html deploy/ - name: Deploy 🚀 - uses: JamesIves/github-pages-deploy-action@v4.5.0 + uses: JamesIves/github-pages-deploy-action@v4.6.0 with: branch: gh-pages folder: deploy diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4f6c5a809bd..d8dd08dc482 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,7 +59,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v4.1.2 + uses: actions/checkout@v4.1.3 with: fetch-depth: 0 # Needed by codecov.io diff --git a/.github/workflows/update-gpuci.yml b/.github/workflows/update-gpuci.yml index 21ec9292ffb..3085966c2c8 100644 --- a/.github/workflows/update-gpuci.yml +++ b/.github/workflows/update-gpuci.yml @@ -11,7 +11,7 @@ jobs: if: github.repository == 'dask/dask' steps: - - uses: actions/checkout@v4.1.2 + - uses: actions/checkout@v4.1.3 - name: Parse current axis YAML id: rapids_current diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml index 4bc004fcebb..cd9b386d459 100644 --- a/.github/workflows/upstream.yml +++ b/.github/workflows/upstream.yml @@ -20,7 +20,7 @@ jobs: outputs: test-upstream: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - - uses: actions/checkout@v4.1.2 + - uses: actions/checkout@v4.1.3 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1 @@ -47,7 +47,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v4.1.2 + uses: actions/checkout@v4.1.3 - name: Setup Conda Environment uses: conda-incubator/setup-miniconda@v3.0.3 diff --git a/dask/dataframe/tests/test_categorical.py b/dask/dataframe/tests/test_categorical.py index 1e0d1a9a3af..33b0b82e1af 100644 --- a/dask/dataframe/tests/test_categorical.py +++ b/dask/dataframe/tests/test_categorical.py @@ -128,6 +128,18 @@ def test_concat_unions_categoricals(): tm.assert_frame_equal(_concat(frames5), pd.concat(frames6)) +@pytest.mark.gpu +def test_unknown_categories_cudf(): + # We should always start with unknown categories + # if `clear_known_categories` is working. + pytest.importorskip("dask_cudf") + + with dask.config.set({"dataframe.backend": "cudf"}): + ddf = dd.from_dict({"a": [0, 1, 0]}, npartitions=1) + ddf["a"] = ddf["a"].astype("category") + assert not ddf["a"].cat.known + + # TODO: Remove the filterwarnings below @pytest.mark.parametrize( "numeric_only", diff --git a/dask/dataframe/utils.py b/dask/dataframe/utils.py index 284003ead3f..38eca427597 100644 --- a/dask/dataframe/utils.py +++ b/dask/dataframe/utils.py @@ -23,6 +23,7 @@ ) from dask.dataframe._compat import PANDAS_GE_150, tm # noqa: F401 from dask.dataframe.dispatch import ( # noqa : F401 + is_categorical_dtype_dispatch, make_meta, make_meta_obj, meta_nonempty, @@ -283,9 +284,9 @@ def clear_known_categories(x, cols=None, index=True, dtype_backend=None): # categorical accessor is not yet available return x - if isinstance(x, (pd.Series, pd.DataFrame)): + if not is_index_like(x): x = x.copy() - if isinstance(x, pd.DataFrame): + if is_dataframe_like(x): mask = x.dtypes == "category" if cols is None: cols = mask[mask].index @@ -293,12 +294,12 @@ def clear_known_categories(x, cols=None, index=True, dtype_backend=None): raise ValueError("Not all columns are categoricals") for c in cols: x[c] = x[c].cat.set_categories([UNKNOWN_CATEGORIES]) - elif isinstance(x, pd.Series): - if isinstance(x.dtype, pd.CategoricalDtype): + elif is_series_like(x): + if is_categorical_dtype_dispatch(x.dtype): x = x.cat.set_categories([UNKNOWN_CATEGORIES]) - if index and isinstance(x.index, pd.CategoricalIndex): + if index and is_categorical_dtype_dispatch(x.index.dtype): x.index = x.index.set_categories([UNKNOWN_CATEGORIES]) - elif isinstance(x, pd.CategoricalIndex): + elif is_categorical_dtype_dispatch(x.dtype): x = x.set_categories([UNKNOWN_CATEGORIES]) return x