Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
colin-rogers-dbt committed Jul 10, 2024
2 parents 81a7cf2 + 034cb61 commit 92f4a0b
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 24 deletions.
24 changes: 11 additions & 13 deletions .github/workflows/docs-issues.yml
Original file line number Diff line number Diff line change
@@ -1,43 +1,41 @@
# **what?**
# Open an issue in docs.getdbt.com when a PR is labeled `user docs`
# Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed

# **why?**
# To reduce barriers for keeping docs up to date

# **when?**
# When a PR is labeled `user docs` and is merged. Runs on pull_request_target to run off the workflow already merged,
# not the workflow that existed on the PR branch. This allows old PRs to get comments.
# When an issue is labeled `user docs` and is closed as completed. Can be labeled before or after the issue is closed.


name: Open issues in docs.getdbt.com repo when a PR is labeled
run-name: "Open an issue in docs.getdbt.com for PR #${{ github.event.pull_request.number }}"
name: Open issues in docs.getdbt.com repo when an issue is labeled
run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}"

on:
pull_request_target:
issues:
types: [labeled, closed]

defaults:
run:
shell: bash

permissions:
issues: write # opens new issues
pull-requests: write # comments on PRs

issues: write # comments on issues

jobs:
open_issues:
# we only want to run this when the PR has been merged or the label in the labeled event is `user docs`. Otherwise it runs the
# we only want to run this when the issue is closed as completed and the label `user docs` has been assigned.
# If this logic does not exist in this workflow, it runs the
# risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having
# generating the comment before the other runs. This lives here instead of the shared workflow because this is where we
# decide if it should run or not.
if: |
(github.event.pull_request.merged == true) &&
((github.event.action == 'closed' && contains( github.event.pull_request.labels.*.name, 'user docs')) ||
(github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && (
(github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) ||
(github.event.action == 'labeled' && github.event.label.name == 'user docs'))
uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
with:
issue_repository: "dbt-labs/docs.getdbt.com"
issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} PR #${{ github.event.pull_request.number }}"
issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
secrets: inherit
17 changes: 9 additions & 8 deletions .github/workflows/release-internal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,12 @@
#
# Manual trigger.

name: "Release internal patch"
name: "Release to Cloud"
run-name: "Release to Cloud off of ${{ inputs.ref }}"

on:
workflow_dispatch:
inputs:
version_number:
description: "The release version number (i.e. 1.0.0b1)"
type: string
required: true
ref:
description: "The ref (sha or branch name) to use"
type: string
Expand All @@ -29,6 +26,11 @@ on:
type: string
default: "python -c \"import dbt.adapters.spark\""
required: true
skip_tests:
description: "Should the tests be skipped? (default to false)"
type: boolean
required: true
default: false

defaults:
run:
Expand Down Expand Up @@ -129,15 +131,14 @@ jobs:
run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}

invoke-reusable-workflow:
name: "Build and Release Internally"
name: "Create cloud release"
needs: [run-integration-tests]

uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main"

with:
version_number: "${{ inputs.version_number }}"
package_test_command: "${{ inputs.package_test_command }}"
dbms_name: "spark"
ref: "${{ inputs.ref }}"
skip_tests: "${{ inputs.skip_tests }}"

secrets: "inherit"
22 changes: 22 additions & 0 deletions tests/functional/adapter/dbt_show/test_dbt_show.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pytest

from dbt.tests.adapter.dbt_show.test_dbt_show import (
BaseShowSqlHeader,
BaseShowLimit,
BaseShowDoesNotHandleDoubleLimit,
)


class TestSparkShowLimit(BaseShowLimit):
pass


class TestSparkShowSqlHeader(BaseShowSqlHeader):
pass


@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_http_cluster")
class TestSparkShowDoesNotHandleDoubleLimit(BaseShowDoesNotHandleDoubleLimit):
"""The syntax message is quite variable across clusters, but this hits two at once."""

DATABASE_ERROR_MESSAGE = "limit"
34 changes: 31 additions & 3 deletions tests/functional/adapter/test_python_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,22 @@ class TestPythonModelSpark(BasePythonModelTests):

@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
class TestPySpark(BasePySparkTests):
pass
def test_different_dataframes(self, project):
"""
Test that python models are supported using dataframes from:
- pandas
- pyspark
- pyspark.pandas (formerly dataspark.koalas)
Note:
The CI environment is on Apache Spark >3.1, which includes koalas as pyspark.pandas.
The only Databricks runtime that supports Apache Spark <=3.1 is 9.1 LTS, which is EOL 2024-09-23.
For more information, see:
- https://github.com/databricks/koalas
- https://docs.databricks.com/en/release-notes/runtime/index.html
"""
results = run_dbt(["run", "--exclude", "koalas_df"])
assert len(results) == 3


@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
Expand All @@ -34,7 +49,7 @@ def model(dbt, spark):
materialized='table',
submission_method='job_cluster',
job_cluster_config={
"spark_version": "7.3.x-scala2.12",
"spark_version": "12.2.x-scala2.12",
"node_type_id": "i3.xlarge",
"num_workers": 0,
"spark_conf": {
Expand All @@ -45,7 +60,7 @@ def model(dbt, spark):
"ResourceClass": "SingleNode"
}
},
packages=['spacy', 'torch', 'pydantic<1.10.3']
packages=['spacy', 'torch', 'pydantic>=1.10.8']
)
data = [[1,2]] * 10
return spark.createDataFrame(data, schema=['test', 'test2'])
Expand All @@ -64,6 +79,19 @@ def model(dbt, spark):

@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
class TestChangingSchemaSpark:
"""
Confirm that we can setup a spot instance and parse required packages into the Databricks job.
Notes:
- This test generates a spot instance on demand using the settings from `job_cluster_config`
in `models__simple_python_model` above. It takes several minutes to run due to creating the cluster.
The job can be monitored via "Data Engineering > Job Runs" or "Workflows > Job Runs"
in the Databricks UI (instead of via the normal cluster).
- The `spark_version` argument will need to periodically be updated. It will eventually become
unsupported and start experiencing issues.
- See https://github.com/explosion/spaCy/issues/12659 for why we're pinning pydantic
"""

@pytest.fixture(scope="class")
def models(self):
return {"simple_python_model.py": models__simple_python_model}
Expand Down

0 comments on commit 92f4a0b

Please sign in to comment.