diff --git a/.changes/unreleased/Dependencies-20240124-120321.yaml b/.changes/unreleased/Dependencies-20240124-120321.yaml new file mode 100644 index 000000000..ef725de67 --- /dev/null +++ b/.changes/unreleased/Dependencies-20240124-120321.yaml @@ -0,0 +1,6 @@ +kind: Dependencies +body: get dbt-tests-adapters from dbt-adapters repo +time: 2024-01-24T12:03:21.523295-08:00 +custom: + Author: colin-rogers-dbt + PR: "1077" diff --git a/.changes/unreleased/Features-20231218-155409.yaml b/.changes/unreleased/Features-20231218-155409.yaml new file mode 100644 index 000000000..bc965b06f --- /dev/null +++ b/.changes/unreleased/Features-20231218-155409.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add support for checking table-last-modified by metadata +time: 2023-12-18T15:54:09.69635-05:00 +custom: + Author: mikealfare + Issue: "938" diff --git a/.changes/unreleased/Features-20231219-201203.yaml b/.changes/unreleased/Features-20231219-201203.yaml new file mode 100644 index 000000000..eee3f1026 --- /dev/null +++ b/.changes/unreleased/Features-20231219-201203.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Support limiting get_catalog by object name +time: 2023-12-19T20:12:03.990725-05:00 +custom: + Author: mikealfare + Issue: "950" diff --git a/.changes/unreleased/Features-20240205-174614.yaml b/.changes/unreleased/Features-20240205-174614.yaml new file mode 100644 index 000000000..192273d3d --- /dev/null +++ b/.changes/unreleased/Features-20240205-174614.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Support all types for unit testing in dbt-bigquery, expand coverage of + safe_cast macro +time: 2024-02-05T17:46:14.505597-05:00 +custom: + Author: michelleark + Issue: "1090" diff --git a/.changes/unreleased/Under the Hood-20231116-062142.yaml b/.changes/unreleased/Under the Hood-20231116-062142.yaml new file mode 100644 index 000000000..c28270898 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20231116-062142.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Primary and foreign key constraints are not enforced in BigQuery +time: 2023-11-16T06:21:42.935367-08:00 +custom: + Author: dbeatty10 + Issue: "1018" diff --git a/.changes/unreleased/Under the Hood-20240116-154305.yaml b/.changes/unreleased/Under the Hood-20240116-154305.yaml new file mode 100644 index 000000000..bb115abd6 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240116-154305.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Migrate to dbt-common and dbt-adapters package +time: 2024-01-16T15:43:05.046735-08:00 +custom: + Author: colin-rogers-dbt + Issue: "1071" diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml deleted file mode 100644 index 2611a8bdd..000000000 --- a/.github/workflows/jira-creation.yml +++ /dev/null @@ -1,28 +0,0 @@ -# **what?** -# Mirrors issues into Jira. Includes the information: title, -# GitHub Issue ID and URL - -# **why?** -# Jira is our tool for tracking and we need to see these issues in there - -# **when?** -# On issue creation or when an issue is labeled `Jira` - -name: Jira Issue Creation - -on: - issues: - types: [opened, labeled] - -permissions: - issues: write - -jobs: - call-label-action: - uses: dbt-labs/actions/.github/workflows/jira-creation.yml@main - with: - project_key: ADAP - secrets: - JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} - JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} - JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml deleted file mode 100644 index 1637cbe38..000000000 --- a/.github/workflows/jira-label.yml +++ /dev/null @@ -1,28 +0,0 @@ -# **what?** -# Calls mirroring Jira label Action. Includes adding a new label -# to an existing issue or removing a label as well - -# **why?** -# Jira is our tool for tracking and we need to see these labels in there - -# **when?** -# On labels being added or removed from issues - -name: Jira Label Mirroring - -on: - issues: - types: [labeled, unlabeled] - -permissions: - issues: read - -jobs: - call-label-action: - uses: dbt-labs/actions/.github/workflows/jira-label.yml@main - with: - project_key: ADAP - secrets: - JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} - JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} - JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml deleted file mode 100644 index 99158a15f..000000000 --- a/.github/workflows/jira-transition.yml +++ /dev/null @@ -1,29 +0,0 @@ -# **what?** -# Transition a Jira issue to a new state -# Only supports these GitHub Issue transitions: -# closed, deleted, reopened - -# **why?** -# Jira needs to be kept up-to-date - -# **when?** -# On issue closing, deletion, reopened - -name: Jira Issue Transition - -on: - issues: - types: [closed, deleted, reopened] - -# no special access is needed -permissions: read-all - -jobs: - call-label-action: - uses: dbt-labs/actions/.github/workflows/jira-transition.yml@main - with: - project_key: ADAP - secrets: - JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} - JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} - JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} diff --git a/dbt/adapters/bigquery/connections.py b/dbt/adapters/bigquery/connections.py index cb933baed..c74effcdc 100644 --- a/dbt/adapters/bigquery/connections.py +++ b/dbt/adapters/bigquery/connections.py @@ -5,9 +5,9 @@ from contextlib import contextmanager from dataclasses import dataclass, field -from dbt.common.invocation import get_invocation_id +from dbt_common.invocation import get_invocation_id -from dbt.common.events.contextvars import get_node_info +from dbt_common.events.contextvars import get_node_info from mashumaro.helper import pass_through from functools import lru_cache @@ -27,21 +27,21 @@ ) from dbt.adapters.bigquery import gcloud -from dbt.common.clients import agate_helper -from dbt.adapters.contracts.connection import ConnectionState, AdapterResponse -from dbt.common.exceptions import ( +from dbt_common.clients import agate_helper +from dbt.adapters.contracts.connection import ConnectionState, AdapterResponse, Credentials +from dbt_common.exceptions import ( DbtRuntimeError, DbtConfigError, ) -from dbt.common.exceptions import DbtDatabaseError +from dbt_common.exceptions import DbtDatabaseError from dbt.adapters.exceptions.connection import FailedToConnectError -from dbt.adapters.base import BaseConnectionManager, Credentials +from dbt.adapters.base import BaseConnectionManager from dbt.adapters.events.logging import AdapterLogger from dbt.adapters.events.types import SQLQuery -from dbt.common.events.functions import fire_event +from dbt_common.events.functions import fire_event from dbt.adapters.bigquery import __version__ as dbt_version -from dbt.common.dataclass_schema import ExtensibleDbtClassMixin, StrEnum +from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, StrEnum logger = AdapterLogger("BigQuery") diff --git a/dbt/adapters/bigquery/gcloud.py b/dbt/adapters/bigquery/gcloud.py index 0a08f734d..ea1f644ba 100644 --- a/dbt/adapters/bigquery/gcloud.py +++ b/dbt/adapters/bigquery/gcloud.py @@ -1,6 +1,7 @@ +from dbt_common.exceptions import DbtRuntimeError + from dbt.adapters.events.logging import AdapterLogger -import dbt.common.exceptions -from dbt.common.clients.system import run_cmd +from dbt_common.clients.system import run_cmd NOT_INSTALLED_MSG = """ dbt requires the gcloud SDK to be installed to authenticate with BigQuery. @@ -25,4 +26,4 @@ def setup_default_credentials(): if gcloud_installed(): run_cmd(".", ["gcloud", "auth", "application-default", "login"]) else: - raise dbt.common.exceptions.DbtRuntimeError(NOT_INSTALLED_MSG) + raise DbtRuntimeError(NOT_INSTALLED_MSG) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 03cfd3561..2df35bc65 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from datetime import datetime import json import threading from multiprocessing.context import SpawnContext @@ -9,7 +10,7 @@ import agate from dbt.adapters.contracts.relation import RelationConfig -import dbt.common.exceptions.base +import dbt_common.exceptions.base from dbt.adapters.base import ( # type: ignore AdapterConfig, BaseAdapter, @@ -20,22 +21,26 @@ SchemaSearchMap, available, ) +from dbt.adapters.base.impl import FreshnessResponse from dbt.adapters.cache import _make_ref_key_dict # type: ignore -import dbt.common.clients.agate_helper +from dbt.adapters.capability import Capability, CapabilityDict, CapabilitySupport, Support +import dbt_common.clients.agate_helper from dbt.adapters.contracts.connection import AdapterResponse -from dbt.common.contracts.constraints import ColumnLevelConstraint, ConstraintType, ModelLevelConstraint # type: ignore -from dbt.common.dataclass_schema import dbtClassMixin +from dbt.adapters.contracts.macros import MacroResolverProtocol +from dbt_common.contracts.constraints import ColumnLevelConstraint, ConstraintType, ModelLevelConstraint # type: ignore +from dbt_common.dataclass_schema import dbtClassMixin from dbt.adapters.events.logging import AdapterLogger -from dbt.common.events.functions import fire_event +from dbt_common.events.functions import fire_event from dbt.adapters.events.types import SchemaCreation, SchemaDrop -import dbt.common.exceptions -from dbt.common.utils import filter_null_values +import dbt_common.exceptions +from dbt_common.utils import filter_null_values import google.api_core import google.auth import google.oauth2 import google.cloud.bigquery from google.cloud.bigquery import AccessEntry, SchemaField, Table as BigQueryTable import google.cloud.exceptions +import pytz from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager from dbt.adapters.bigquery.column import get_nested_column_data_types @@ -114,10 +119,17 @@ class BigQueryAdapter(BaseAdapter): ConstraintType.check: ConstraintSupport.NOT_SUPPORTED, ConstraintType.not_null: ConstraintSupport.ENFORCED, ConstraintType.unique: ConstraintSupport.NOT_SUPPORTED, - ConstraintType.primary_key: ConstraintSupport.ENFORCED, - ConstraintType.foreign_key: ConstraintSupport.ENFORCED, + ConstraintType.primary_key: ConstraintSupport.NOT_ENFORCED, + ConstraintType.foreign_key: ConstraintSupport.NOT_ENFORCED, } + _capabilities: CapabilityDict = CapabilityDict( + { + Capability.TableLastModifiedMetadata: CapabilitySupport(support=Support.Full), + Capability.SchemaMetadataByRelations: CapabilitySupport(support=Support.Full), + } + ) + def __init__(self, config, mp_context: SpawnContext) -> None: super().__init__(config, mp_context) self.connections: BigQueryConnectionManager = self.connections @@ -147,7 +159,7 @@ def drop_relation(self, relation: BigQueryRelation) -> None: conn.handle.delete_table(table_ref, not_found_ok=True) def truncate_relation(self, relation: BigQueryRelation) -> None: - raise dbt.common.exceptions.base.NotImplementedError( + raise dbt_common.exceptions.base.NotImplementedError( "`truncate` is not implemented for this adapter!" ) @@ -164,7 +176,7 @@ def rename_relation( or from_relation.type == RelationType.View or to_relation.type == RelationType.View ): - raise dbt.common.exceptions.DbtRuntimeError( + raise dbt_common.exceptions.DbtRuntimeError( "Renaming of views is not currently supported in BigQuery" ) @@ -390,7 +402,7 @@ def copy_table(self, source, destination, materialization): elif materialization == "table": write_disposition = WRITE_TRUNCATE else: - raise dbt.common.exceptions.CompilationError( + raise dbt_common.exceptions.CompilationError( 'Copy table materialization must be "copy" or "table", but ' f"config.get('copy_materialization', 'table') was " f"{materialization}" @@ -437,11 +449,11 @@ def poll_until_job_completes(cls, job, timeout): job.reload() if job.state != "DONE": - raise dbt.common.exceptions.DbtRuntimeError("BigQuery Timeout Exceeded") + raise dbt_common.exceptions.DbtRuntimeError("BigQuery Timeout Exceeded") elif job.error_result: message = "\n".join(error["message"].strip() for error in job.errors) - raise dbt.common.exceptions.DbtRuntimeError(message) + raise dbt_common.exceptions.DbtRuntimeError(message) def _bq_table_to_relation(self, bq_table) -> Union[BigQueryRelation, None]: if bq_table is None: @@ -465,7 +477,7 @@ def add_query(self, sql, auto_begin=True, bindings=None, abridge_sql_log=False): if self.nice_connection_name() in ["on-run-start", "on-run-end"]: self.warning_on_hooks(self.nice_connection_name()) else: - raise dbt.common.exceptions.base.NotImplementedError( + raise dbt_common.exceptions.base.NotImplementedError( "`add_query` is not implemented for this adapter!" ) @@ -709,6 +721,26 @@ def _get_catalog_schemas(self, relation_config: Iterable[RelationConfig]) -> Sch ) return result + def calculate_freshness_from_metadata( + self, + source: BaseRelation, + macro_resolver: Optional[MacroResolverProtocol] = None, + ) -> Tuple[Optional[AdapterResponse], FreshnessResponse]: + conn = self.connections.get_thread_connection() + client: google.cloud.bigquery.Client = conn.handle + + table_ref = self.get_table_ref_from_relation(source) + table = client.get_table(table_ref) + snapshot = datetime.now(tz=pytz.UTC) + + freshness = FreshnessResponse( + max_loaded_at=table.modified, + snapshotted_at=snapshot, + age=(snapshot - table.modified).total_seconds(), + ) + + return None, freshness + @available.parse(lambda *a, **k: {}) def get_common_options( self, config: Dict[str, Any], node: Dict[str, Any], temporary: bool = False @@ -777,7 +809,7 @@ def describe_relation( bq_table = self.get_bq_table(relation) parser = BigQueryMaterializedViewConfig else: - raise dbt.common.exceptions.DbtRuntimeError( + raise dbt_common.exceptions.DbtRuntimeError( f"The method `BigQueryAdapter.describe_relation` is not implemented " f"for the relation type: {relation.type}" ) @@ -843,7 +875,7 @@ def string_add_sql( elif location == "prepend": return f"concat('{value}', {add_to})" else: - raise dbt.common.exceptions.DbtRuntimeError( + raise dbt_common.exceptions.DbtRuntimeError( f'Got an unexpected location value of "{location}"' ) diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index c25ef0a67..8abda577b 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -12,8 +12,8 @@ BigQueryPartitionConfigChange, ) from dbt.adapters.contracts.relation import RelationType, RelationConfig -from dbt.common.exceptions import CompilationError -from dbt.common.utils.dict import filter_null_values +from dbt_common.exceptions import CompilationError +from dbt_common.utils.dict import filter_null_values Self = TypeVar("Self", bound="BigQueryRelation") diff --git a/dbt/adapters/bigquery/relation_configs/_materialized_view.py b/dbt/adapters/bigquery/relation_configs/_materialized_view.py index fd0c191c3..81ca6b3de 100644 --- a/dbt/adapters/bigquery/relation_configs/_materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/_materialized_view.py @@ -75,10 +75,10 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any } # optional - if "partition_by" in relation_config.config: + if relation_config.config and "partition_by" in relation_config.config: config_dict.update({"partition": PartitionConfig.parse_model_node(relation_config)}) - if "cluster_by" in relation_config.config: + if relation_config.config and "cluster_by" in relation_config.config: config_dict.update( {"cluster": BigQueryClusterConfig.parse_relation_config(relation_config)} ) diff --git a/dbt/adapters/bigquery/relation_configs/_partition.py b/dbt/adapters/bigquery/relation_configs/_partition.py index 0fe816359..555aa3664 100644 --- a/dbt/adapters/bigquery/relation_configs/_partition.py +++ b/dbt/adapters/bigquery/relation_configs/_partition.py @@ -1,10 +1,10 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional -import dbt.common.exceptions +import dbt_common.exceptions from dbt.adapters.relation_configs import RelationConfigChange from dbt.adapters.contracts.relation import RelationConfig -from dbt.common.dataclass_schema import dbtClassMixin, ValidationError +from dbt_common.dataclass_schema import dbtClassMixin, ValidationError from google.cloud.bigquery.table import Table as BigQueryTable @@ -92,11 +92,11 @@ def parse(cls, raw_partition_by) -> Optional["PartitionConfig"]: } ) except ValidationError as exc: - raise dbt.common.exceptions.base.DbtValidationError( + raise dbt_common.exceptions.base.DbtValidationError( "Could not parse partition config" ) from exc except TypeError: - raise dbt.common.exceptions.CompilationError( + raise dbt_common.exceptions.CompilationError( f"Invalid partition_by config:\n" f" Got: {raw_partition_by}\n" f' Expected a dictionary with "field" and "data_type" keys' @@ -111,7 +111,7 @@ def parse_model_node(cls, relation_config: RelationConfig) -> Dict[str, Any]: This doesn't currently collect `time_ingestion_partitioning` and `copy_partitions` because this was built for materialized views, which do not support those settings. """ - config_dict = relation_config.config.extra.get("partition_by") # type: ignore + config_dict: Dict[str, Any] = relation_config.config.extra.get("partition_by") # type: ignore if "time_ingestion_partitioning" in config_dict: del config_dict["time_ingestion_partitioning"] if "copy_partitions" in config_dict: diff --git a/dbt/adapters/bigquery/utility.py b/dbt/adapters/bigquery/utility.py index 5d9c3de12..5914280a3 100644 --- a/dbt/adapters/bigquery/utility.py +++ b/dbt/adapters/bigquery/utility.py @@ -1,7 +1,7 @@ import json from typing import Any, Optional -import dbt.common.exceptions +import dbt_common.exceptions def bool_setting(value: Optional[Any] = None) -> Optional[bool]: @@ -41,5 +41,5 @@ def float_setting(value: Optional[Any] = None) -> Optional[float]: def sql_escape(string): if not isinstance(string, str): - raise dbt.common.exceptions.CompilationError(f"cannot escape a non-string: {string}") + raise dbt_common.exceptions.CompilationError(f"cannot escape a non-string: {string}") return json.dumps(string)[1:-1] diff --git a/dbt/include/bigquery/macros/catalog.sql b/dbt/include/bigquery/macros/catalog.sql deleted file mode 100644 index 25166c7b4..000000000 --- a/dbt/include/bigquery/macros/catalog.sql +++ /dev/null @@ -1,231 +0,0 @@ - -{% macro bigquery__get_catalog(information_schema, schemas) -%} - - {%- if (schemas | length) == 0 -%} - {# Hopefully nothing cares about the columns we return when there are no rows #} - {%- set query = "select 1 as id limit 0" -%} - {%- else -%} - - {%- set query -%} - with materialized_views as ( - select - table_catalog as project_id, - table_schema as dataset_id, - table_name as table_id - from {{ information_schema.replace(information_schema_view='MATERIALIZED_VIEWS') }} - ), - tables as ( - select - tables.project_id as table_database, - tables.dataset_id as table_schema, - tables.table_id as original_table_name, - - concat(tables.project_id, '.', tables.dataset_id, '.', tables.table_id) as relation_id, - - tables.row_count, - tables.size_bytes as size_bytes, - case - when materialized_views.table_id is not null then 'materialized view' - when tables.type = 1 then 'table' - when tables.type = 2 then 'view' - else 'external' - end as table_type, - - REGEXP_CONTAINS(tables.table_id, '^.+[0-9]{8}$') and coalesce(type, 0) = 1 as is_date_shard, - REGEXP_EXTRACT(tables.table_id, '^(.+)[0-9]{8}$') as shard_base_name, - REGEXP_EXTRACT(tables.table_id, '^.+([0-9]{8})$') as shard_name - - from {{ information_schema.replace(information_schema_view='__TABLES__') }} tables - left join materialized_views - on materialized_views.project_id = tables.project_id - and materialized_views.dataset_id = tables.dataset_id - and materialized_views.table_id = tables.table_id - where ( - {%- for schema in schemas -%} - upper(tables.dataset_id) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) - ), - - table_options as ( - select - concat(table_catalog, '.', table_schema, '.', table_name) as relation_id, - JSON_VALUE(option_value) as table_comment - - from {{ information_schema.replace(information_schema_view='TABLE_OPTIONS') }} - where option_name = 'description' - ), - extracted as ( - - select *, - case - when is_date_shard then shard_base_name - else original_table_name - end as table_name - - from tables - - ), - - unsharded_tables as ( - - select - table_database, - table_schema, - table_name, - coalesce(table_type, 'external') as table_type, - is_date_shard, - - struct( - min(shard_name) as shard_min, - max(shard_name) as shard_max, - count(*) as shard_count - ) as table_shards, - - sum(size_bytes) as size_bytes, - sum(row_count) as row_count, - - max(relation_id) as relation_id - - from extracted - group by 1,2,3,4,5 - - ), - - info_schema_columns as ( - - select - concat(table_catalog, '.', table_schema, '.', table_name) as relation_id, - table_catalog as table_database, - table_schema, - table_name, - - -- use the "real" column name from the paths query below - column_name as base_column_name, - ordinal_position as column_index, - - is_partitioning_column, - clustering_ordinal_position - - from {{ information_schema.replace(information_schema_view='COLUMNS') }} - where ordinal_position is not null - - ), - - info_schema_column_paths as ( - - select - concat(table_catalog, '.', table_schema, '.', table_name) as relation_id, - field_path as column_name, - data_type as column_type, - column_name as base_column_name, - description as column_comment - - from {{ information_schema.replace(information_schema_view='COLUMN_FIELD_PATHS') }} - - ), - - columns as ( - - select * except (base_column_name) - from info_schema_columns - join info_schema_column_paths using (relation_id, base_column_name) - - ), - - column_stats as ( - - select - table_database, - table_schema, - table_name, - max(relation_id) as relation_id, - max(case when is_partitioning_column = 'YES' then 1 else 0 end) = 1 as is_partitioned, - max(case when is_partitioning_column = 'YES' then column_name else null end) as partition_column, - max(case when clustering_ordinal_position is not null then 1 else 0 end) = 1 as is_clustered, - array_to_string( - array_agg( - case - when clustering_ordinal_position is not null then column_name - else null - end ignore nulls - order by clustering_ordinal_position - ), ', ' - ) as clustering_columns - - from columns - group by 1,2,3 - - ) - - select - unsharded_tables.table_database, - unsharded_tables.table_schema, - case - when is_date_shard then concat(unsharded_tables.table_name, '*') - else unsharded_tables.table_name - end as table_name, - unsharded_tables.table_type, - table_options.table_comment, - - -- coalesce name and type for External tables - these columns are not - -- present in the COLUMN_FIELD_PATHS resultset - coalesce(columns.column_name, '') as column_name, - -- invent a row number to account for nested fields -- BQ does - -- not treat these nested properties as independent fields - row_number() over ( - partition by relation_id - order by columns.column_index, columns.column_name - ) as column_index, - coalesce(columns.column_type, '') as column_type, - columns.column_comment, - - 'Shard count' as `stats__date_shards__label`, - table_shards.shard_count as `stats__date_shards__value`, - 'The number of date shards in this table' as `stats__date_shards__description`, - is_date_shard as `stats__date_shards__include`, - - 'Shard (min)' as `stats__date_shard_min__label`, - table_shards.shard_min as `stats__date_shard_min__value`, - 'The first date shard in this table' as `stats__date_shard_min__description`, - is_date_shard as `stats__date_shard_min__include`, - - 'Shard (max)' as `stats__date_shard_max__label`, - table_shards.shard_max as `stats__date_shard_max__value`, - 'The last date shard in this table' as `stats__date_shard_max__description`, - is_date_shard as `stats__date_shard_max__include`, - - '# Rows' as `stats__num_rows__label`, - row_count as `stats__num_rows__value`, - 'Approximate count of rows in this table' as `stats__num_rows__description`, - (unsharded_tables.table_type = 'table') as `stats__num_rows__include`, - - 'Approximate Size' as `stats__num_bytes__label`, - size_bytes as `stats__num_bytes__value`, - 'Approximate size of table as reported by BigQuery' as `stats__num_bytes__description`, - (unsharded_tables.table_type = 'table') as `stats__num_bytes__include`, - - 'Partitioned By' as `stats__partitioning_type__label`, - partition_column as `stats__partitioning_type__value`, - 'The partitioning column for this table' as `stats__partitioning_type__description`, - is_partitioned as `stats__partitioning_type__include`, - - 'Clustered By' as `stats__clustering_fields__label`, - clustering_columns as `stats__clustering_fields__value`, - 'The clustering columns for this table' as `stats__clustering_fields__description`, - is_clustered as `stats__clustering_fields__include` - - -- join using relation_id (an actual relation, not a shard prefix) to make - -- sure that column metadata is picked up through the join. This will only - -- return the column information for the "max" table in a date-sharded table set - from unsharded_tables - left join table_options using (relation_id) - left join columns using (relation_id) - left join column_stats using (relation_id) - {%- endset -%} - - {%- endif -%} - - {{ return(run_query(query)) }} - -{%- endmacro %} diff --git a/dbt/include/bigquery/macros/catalog/by_relation.sql b/dbt/include/bigquery/macros/catalog/by_relation.sql new file mode 100644 index 000000000..adaa740f6 --- /dev/null +++ b/dbt/include/bigquery/macros/catalog/by_relation.sql @@ -0,0 +1,36 @@ +{% macro bigquery__get_catalog_relations(information_schema, relations) -%} + + {%- if (relations | length) == 0 -%} + {# Hopefully nothing cares about the columns we return when there are no rows #} + {%- set query = "select 1 as id limit 0" -%} + + {%- else -%} + {%- set query -%} + with + table_shards_stage as ({{ _bigquery__get_table_shards_sql(information_schema) }}), + table_shards as ( + select * from table_shards_stage + where ( + {%- for relation in relations -%} + ( + upper(table_schema) = upper('{{ relation.schema }}') + and upper(table_name) = upper('{{ relation.identifier }}') + ) + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + ), + tables as ({{ _bigquery__get_tables_sql() }}), + table_stats as ({{ _bigquery__get_table_stats_sql() }}), + + columns as ({{ _bigquery__get_columns_sql(information_schema) }}), + column_stats as ({{ _bigquery__get_column_stats_sql() }}) + + {{ _bigquery__get_extended_catalog_sql() }} + {%- endset -%} + + {%- endif -%} + + {{ return(run_query(query)) }} + +{%- endmacro %} diff --git a/dbt/include/bigquery/macros/catalog/by_schema.sql b/dbt/include/bigquery/macros/catalog/by_schema.sql new file mode 100644 index 000000000..0d36f2b84 --- /dev/null +++ b/dbt/include/bigquery/macros/catalog/by_schema.sql @@ -0,0 +1,32 @@ +{% macro bigquery__get_catalog(information_schema, schemas) -%} + + {%- if (schemas | length) == 0 -%} + {# Hopefully nothing cares about the columns we return when there are no rows #} + {%- set query = "select 1 as id limit 0" -%} + + {%- else -%} + {%- set query -%} + with + table_shards as ( + {{ _bigquery__get_table_shards_sql(information_schema) }} + where ( + {%- for schema in schemas -%} + upper(tables.dataset_id) = upper('{{ schema }}') + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + ), + tables as ({{ _bigquery__get_tables_sql() }}), + table_stats as ({{ _bigquery__get_table_stats_sql() }}), + + columns as ({{ _bigquery__get_columns_sql(information_schema) }}), + column_stats as ({{ _bigquery__get_column_stats_sql() }}) + + {{ _bigquery__get_extended_catalog_sql() }} + {%- endset -%} + + {%- endif -%} + + {{ return(run_query(query)) }} + +{%- endmacro %} diff --git a/dbt/include/bigquery/macros/catalog/catalog.sql b/dbt/include/bigquery/macros/catalog/catalog.sql new file mode 100644 index 000000000..de16f82bf --- /dev/null +++ b/dbt/include/bigquery/macros/catalog/catalog.sql @@ -0,0 +1,177 @@ +{% macro _bigquery__get_table_shards_sql(information_schema) %} + select + tables.project_id as table_catalog, + tables.dataset_id as table_schema, + coalesce(REGEXP_EXTRACT(tables.table_id, '^(.+)[0-9]{8}$'), tables.table_id) as table_name, + tables.table_id as shard_name, + REGEXP_EXTRACT(tables.table_id, '^.+([0-9]{8})$') as shard_index, + REGEXP_CONTAINS(tables.table_id, '^.+[0-9]{8}$') and tables.type = 1 as is_date_shard, + case + when materialized_views.table_name is not null then 'materialized view' + when tables.type = 1 then 'table' + when tables.type = 2 then 'view' + else 'external' + end as table_type, + tables.type = 1 as is_table, + JSON_VALUE(table_description.option_value) as table_comment, + tables.size_bytes, + tables.row_count + from {{ information_schema.replace(information_schema_view='__TABLES__') }} tables + left join {{ information_schema.replace(information_schema_view='MATERIALIZED_VIEWS') }} materialized_views + on materialized_views.table_catalog = tables.project_id + and materialized_views.table_schema = tables.dataset_id + and materialized_views.table_name = tables.table_id + left join {{ information_schema.replace(information_schema_view='TABLE_OPTIONS') }} table_description + on table_description.table_catalog = tables.project_id + and table_description.table_schema = tables.dataset_id + and table_description.table_name = tables.table_id + and table_description.option_name = 'description' +{% endmacro %} + + +{% macro _bigquery__get_tables_sql() %} + select distinct + table_catalog, + table_schema, + table_name, + is_date_shard, + table_type, + is_table, + table_comment + from table_shards +{% endmacro %} + + +{% macro _bigquery__get_table_stats_sql() %} + select + table_catalog, + table_schema, + table_name, + max(shard_name) as latest_shard_name, + min(shard_index) as shard_min, + max(shard_index) as shard_max, + count(shard_index) as shard_count, + sum(size_bytes) as size_bytes, + sum(row_count) as row_count + from table_shards + group by 1, 2, 3 +{% endmacro %} + + +{% macro _bigquery__get_columns_sql(information_schema) %} + select + columns.table_catalog, + columns.table_schema, + columns.table_name as shard_name, + coalesce(paths.field_path, '') as column_name, + -- invent a row number to account for nested fields + -- BQ does not treat these nested properties as independent fields + row_number() over ( + partition by + columns.table_catalog, + columns.table_schema, + columns.table_name + order by + columns.ordinal_position, + paths.field_path + ) as column_index, + coalesce(paths.data_type, '') as column_type, + paths.description as column_comment, + case when columns.is_partitioning_column = 'YES' then 1 else 0 end as is_partitioning_column, + case when columns.is_partitioning_column = 'YES' then paths.field_path end as partition_column, + case when columns.clustering_ordinal_position is not null then 1 else 0 end as is_clustering_column, + case when columns.clustering_ordinal_position is not null then paths.field_path end as cluster_column, + columns.clustering_ordinal_position + from {{ information_schema.replace(information_schema_view='COLUMNS') }} columns + join {{ information_schema.replace(information_schema_view='COLUMN_FIELD_PATHS') }} paths + on paths.table_catalog = columns.table_catalog + and paths.table_schema = columns.table_schema + and paths.table_name = columns.table_name + and paths.column_name = columns.column_name + where columns.ordinal_position is not null +{% endmacro %} + + +{% macro _bigquery__get_column_stats_sql() %} + select + table_catalog, + table_schema, + shard_name, + max(is_partitioning_column) = 1 as is_partitioned, + max(partition_column) as partition_column, + max(is_clustering_column) = 1 as is_clustered, + array_to_string( + array_agg( + cluster_column ignore nulls + order by clustering_ordinal_position + ), ', ' + ) as clustering_columns + from columns + group by 1, 2, 3 +{% endmacro %} + + +{% macro _bigquery__get_extended_catalog_sql() %} + select + tables.table_catalog as table_database, + tables.table_schema, + case + when tables.is_date_shard then concat(tables.table_name, '*') + else tables.table_name + end as table_name, + tables.table_type, + tables.table_comment, + columns.column_name, + columns.column_index, + columns.column_type, + columns.column_comment, + + 'Shard count' as `stats__date_shards__label`, + table_stats.shard_count as `stats__date_shards__value`, + 'The number of date shards in this table' as `stats__date_shards__description`, + tables.is_date_shard as `stats__date_shards__include`, + + 'Shard (min)' as `stats__date_shard_min__label`, + table_stats.shard_min as `stats__date_shard_min__value`, + 'The first date shard in this table' as `stats__date_shard_min__description`, + tables.is_date_shard as `stats__date_shard_min__include`, + + 'Shard (max)' as `stats__date_shard_max__label`, + table_stats.shard_max as `stats__date_shard_max__value`, + 'The last date shard in this table' as `stats__date_shard_max__description`, + tables.is_date_shard as `stats__date_shard_max__include`, + + '# Rows' as `stats__num_rows__label`, + table_stats.row_count as `stats__num_rows__value`, + 'Approximate count of rows in this table' as `stats__num_rows__description`, + tables.is_table as `stats__num_rows__include`, + + 'Approximate Size' as `stats__num_bytes__label`, + table_stats.size_bytes as `stats__num_bytes__value`, + 'Approximate size of table as reported by BigQuery' as `stats__num_bytes__description`, + tables.is_table as `stats__num_bytes__include`, + + 'Partitioned By' as `stats__partitioning_type__label`, + column_stats.partition_column as `stats__partitioning_type__value`, + 'The partitioning column for this table' as `stats__partitioning_type__description`, + column_stats.is_partitioned as `stats__partitioning_type__include`, + + 'Clustered By' as `stats__clustering_fields__label`, + column_stats.clustering_columns as `stats__clustering_fields__value`, + 'The clustering columns for this table' as `stats__clustering_fields__description`, + column_stats.is_clustered as `stats__clustering_fields__include` + + from tables + join table_stats + on table_stats.table_catalog = tables.table_catalog + and table_stats.table_schema = tables.table_schema + and table_stats.table_name = tables.table_name + left join column_stats + on column_stats.table_catalog = tables.table_catalog + and column_stats.table_schema = tables.table_schema + and column_stats.shard_name = table_stats.latest_shard_name + left join columns + on columns.table_catalog = tables.table_catalog + and columns.table_schema = tables.table_schema + and columns.shard_name = table_stats.latest_shard_name +{% endmacro %} diff --git a/dbt/include/bigquery/macros/utils/safe_cast.sql b/dbt/include/bigquery/macros/utils/safe_cast.sql index ac62bb050..ec312af11 100644 --- a/dbt/include/bigquery/macros/utils/safe_cast.sql +++ b/dbt/include/bigquery/macros/utils/safe_cast.sql @@ -1,3 +1,27 @@ {% macro bigquery__safe_cast(field, type) %} +{%- if type.lower().startswith('array') and field is iterable and (field is not string and field is not mapping) and field | length > 0 -%} + {#-- Extract nested type from 'array' --#} + {% set nested_type = type.lower()[6:-1] %} + {#-- BigQuery does not support direct casts to arrays. instead, each element must be cast individually + reaggregated into an array --#} + {%- if cast_from_string_unsupported_for(nested_type) %} + (select array_agg(safe_cast(i as {{ nested_type }})) from unnest([ + {%- for nested_field in field %} + {{ nested_field.strip('"').strip("'") }}{{ ',' if not loop.last }} + {%- endfor %} + ]) i) + {%- else -%} + (select array_agg(safe_cast(i as {{nested_type}})) from unnest({{field}}) i) + {%- endif -%} + +{%- elif type.lower() == 'json' and field is mapping -%} + safe_cast(json {{ dbt.string_literal(tojson(field)) }} as json) +{%- elif cast_from_string_unsupported_for(type) and field is string -%} + safe_cast({{field.strip('"').strip("'")}} as {{type}}) +{%- else -%} safe_cast({{field}} as {{type}}) +{%- endif -%} +{% endmacro %} + +{% macro cast_from_string_unsupported_for(type) %} + {{ return(type.lower().startswith('struct') or type.lower() == 'geography') }} {% endmacro %} diff --git a/dev-requirements.txt b/dev-requirements.txt index bd45c99a1..0af563a7d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,9 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-common.git +git+https://github.com/dbt-labs/dbt-adapters.git +git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor black~=23.12 diff --git a/setup.py b/setup.py index 56c271fec..2e969e246 100644 --- a/setup.py +++ b/setup.py @@ -35,31 +35,8 @@ def _dbt_bigquery_version() -> str: return attributes["version"] -# require a compatible minor version (~=) and prerelease if this is a prerelease -def _dbt_core_version(plugin_version: str) -> str: - """ - Determine the compatible version of dbt-core using this package's version - """ - try: - # *_ may indicate a dev release which won't affect the core version needed - major, minor, plugin_patch, *_ = plugin_version.split(".", maxsplit=3) - except ValueError: - raise ValueError(f"Invalid version: {plugin_version}") - - pre_release_phase = "".join([i for i in plugin_patch if not i.isdigit()]) - if pre_release_phase: - if pre_release_phase not in ["a", "b", "rc"]: - raise ValueError(f"Invalid version: {plugin_version}") - core_patch = f"0{pre_release_phase}1" - else: - core_patch = "0" - - return f"{major}.{minor}.{core_patch}" - - package_name = "dbt-bigquery" package_version = "1.8.0a1" -dbt_core_version = _dbt_core_version(_dbt_bigquery_version()) description = """The BigQuery adapter plugin for dbt""" setup( @@ -74,7 +51,8 @@ def _dbt_core_version(plugin_version: str) -> str: packages=find_namespace_packages(include=["dbt", "dbt.*"]), include_package_data=True, install_requires=[ - f"dbt-core~={_dbt_core_version(_dbt_bigquery_version())}", + "dbt-common<1.0", + "dbt-adapters~=0.1.0a1", "google-cloud-bigquery~=3.0", "google-cloud-storage~=2.4", "google-cloud-dataproc~=5.0", diff --git a/tests/boundary/test_bigquery_sdk.py b/tests/boundary/test_bigquery_sdk.py new file mode 100644 index 000000000..b8e6c9995 --- /dev/null +++ b/tests/boundary/test_bigquery_sdk.py @@ -0,0 +1,18 @@ +import pytest + +from dbt.tests.util import get_connection +from google.cloud.bigquery import Client, DatasetReference, TableReference +from google.api_core.exceptions import NotFound + + +@pytest.mark.parametrize("table_name", ["this_table_does_not_exist"]) +def test_get_table_does_not_exist(project, table_name): + """ + TODO: replace dbt project methods with direct connection instantiation + """ + with get_connection(project.adapter) as conn: + client: Client = conn.handle + dataset_ref = DatasetReference(project.database, project.test_schema) + table_ref = TableReference(dataset_ref, table_name) + with pytest.raises(NotFound): + client.get_table(table_ref) diff --git a/tests/functional/adapter/column_types/fixtures.py b/tests/functional/adapter/column_types/fixtures.py index b7be1e646..88175a88b 100644 --- a/tests/functional/adapter/column_types/fixtures.py +++ b/tests/functional/adapter/column_types/fixtures.py @@ -26,7 +26,7 @@ version: 2 models: - name: model - tests: + data_tests: - is_type: column_map: int64_col: ['integer', 'number'] @@ -39,7 +39,7 @@ version: 2 models: - name: model - tests: + data_tests: - is_type: column_map: int64_col: ['string', 'not number'] diff --git a/tests/functional/adapter/sources_freshness_tests/files.py b/tests/functional/adapter/sources_freshness_tests/files.py new file mode 100644 index 000000000..eaca96648 --- /dev/null +++ b/tests/functional/adapter/sources_freshness_tests/files.py @@ -0,0 +1,23 @@ +SCHEMA_YML = """version: 2 +sources: + - name: test_source + freshness: + warn_after: {count: 10, period: hour} + error_after: {count: 1, period: day} + schema: "{{ env_var('DBT_GET_LAST_RELATION_TEST_SCHEMA') }}" + tables: + - name: test_source +""" + +SEED_TEST_SOURCE_CSV = """ +id,name +1,Martin +2,Jeter +3,Ruth +4,Gehrig +5,DiMaggio +6,Torre +7,Mantle +8,Berra +9,Maris +""".strip() diff --git a/tests/functional/adapter/sources_freshness_tests/test_get_relation_last_modified.py b/tests/functional/adapter/sources_freshness_tests/test_get_relation_last_modified.py new file mode 100644 index 000000000..08e263edb --- /dev/null +++ b/tests/functional/adapter/sources_freshness_tests/test_get_relation_last_modified.py @@ -0,0 +1,30 @@ +import os +import pytest + +from dbt.tests.util import run_dbt + +from tests.functional.adapter.sources_freshness_tests import files + + +class TestGetLastRelationModified: + @pytest.fixture(scope="class") + def seeds(self): + return {"test_source.csv": files.SEED_TEST_SOURCE_CSV} + + @pytest.fixture(scope="class") + def models(self): + return {"schema.yml": files.SCHEMA_YML} + + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + # we need the schema name for the sources section + os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] = project.test_schema + run_dbt(["seed"]) + yield + del os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] + + def test_get_last_relation_modified(self, project): + results = run_dbt(["source", "freshness"]) + assert len(results) == 1 + result = results[0] + assert result.status == "pass" diff --git a/tests/functional/adapter/test_aliases.py b/tests/functional/adapter/test_aliases.py index fa28ce5d9..5ce13c8bc 100644 --- a/tests/functional/adapter/test_aliases.py +++ b/tests/functional/adapter/test_aliases.py @@ -32,12 +32,12 @@ version: 2 models: - name: model_a - tests: + data_tests: - expect_value: field: tablename value: duped_alias - name: model_b - tests: + data_tests: - expect_value: field: tablename value: duped_alias diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py new file mode 100644 index 000000000..f4d4ef1e2 --- /dev/null +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -0,0 +1,64 @@ +import pytest +from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes +from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity +from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput + + +class TestBigQueryUnitTestingTypes(BaseUnitTestingTypes): + @pytest.fixture + def data_types(self): + # sql_value, yaml_value + return [ + ["1", "1"], + ["'1'", "1"], + ["cast('true' as boolean)", "true"], + ["1.0", "1.0"], + ["'string value'", "string value"], + ["cast(1.0 as numeric)", "1.0"], + ["cast(1 as bigint)", 1], + ["cast('2019-01-01' as date)", "2019-01-01"], + ["cast('2013-11-03 00:00:00-07' as timestamp)", "2013-11-03 00:00:00-07"], + ["st_geogpoint(75, 45)", "'st_geogpoint(75, 45)'"], + # arrays + ["cast(['a','b','c'] as array)", "['a','b','c']"], + ["cast([1,2,3] as array)", "[1,2,3]"], + ["cast([true,true,false] as array)", "[true,true,false]"], + # array of date + ["[date '2019-01-01']", "['2020-01-01']"], + ["[date '2019-01-01']", "[]"], + ["[date '2019-01-01']", "null"], + # array of timestamp + ["[timestamp '2019-01-01']", "['2020-01-01']"], + ["[timestamp '2019-01-01']", "[]"], + ["[timestamp '2019-01-01']", "null"], + # json + [ + """json '{"name": "Cooper", "forname": "Alice"}'""", + """{"name": "Cooper", "forname": "Alice"}""", + ], + ["""json '{"name": "Cooper", "forname": "Alice"}'""", "{}"], + # structs + ["struct('Isha' as name, 22 as age)", """'struct("Isha" as name, 22 as age)'"""], + [ + "struct('Kipketer' AS name, [23.2, 26.1, 27.3, 29.4] AS laps)", + """'struct("Kipketer" AS name, [23.2, 26.1, 27.3, 29.4] AS laps)'""", + ], + # struct of struct + [ + "struct(struct(1 as id, 'blue' as color) as my_struct)", + """'struct(struct(1 as id, "blue" as color) as my_struct)'""", + ], + # array of struct + [ + "[struct(st_geogpoint(75, 45) as my_point), struct(st_geogpoint(75, 35) as my_point)]", + "['struct(st_geogpoint(75, 45) as my_point)', 'struct(st_geogpoint(75, 35) as my_point)']", + ], + ] + + +class TestBigQueryUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity): + pass + + +class TestBigQueryUnitTestInvalidInput(BaseUnitTestInvalidInput): + pass diff --git a/tests/unit/test_bigquery_adapter.py b/tests/unit/test_bigquery_adapter.py index a324c431a..34abd0caf 100644 --- a/tests/unit/test_bigquery_adapter.py +++ b/tests/unit/test_bigquery_adapter.py @@ -10,14 +10,16 @@ import unittest from unittest.mock import patch, MagicMock, create_autospec -import dbt.common.dataclass_schema -import dbt.common.exceptions.base +import dbt_common.dataclass_schema +import dbt_common.exceptions.base + +import dbt.adapters from dbt.adapters.bigquery.relation_configs import PartitionConfig from dbt.adapters.bigquery import BigQueryAdapter, BigQueryRelation from google.cloud.bigquery.table import Table from dbt.adapters.bigquery.connections import _sanitize_label, _VALIDATE_LABEL_LENGTH_LIMIT -from dbt.common.clients import agate_helper -import dbt.common.exceptions +from dbt_common.clients import agate_helper +import dbt_common.exceptions from dbt.context.manifest import generate_query_header_context from dbt.contracts.files import FileHash from dbt.contracts.graph.manifest import ManifestStateCheck @@ -214,7 +216,7 @@ def test_acquire_connection_oauth_no_project_validations( connection = adapter.acquire_connection("dummy") self.assertEqual(connection.type, "bigquery") - except dbt.common.exceptions.base.DbtValidationError as e: + except dbt_common.exceptions.base.DbtValidationError as e: self.fail("got DbtValidationError: {}".format(str(e))) except BaseException: @@ -231,7 +233,7 @@ def test_acquire_connection_oauth_validations(self, mock_open_connection): connection = adapter.acquire_connection("dummy") self.assertEqual(connection.type, "bigquery") - except dbt.common.exceptions.base.DbtValidationError as e: + except dbt_common.exceptions.base.DbtValidationError as e: self.fail("got DbtValidationError: {}".format(str(e))) except BaseException: @@ -255,7 +257,7 @@ def test_acquire_connection_dataproc_serverless( connection = adapter.acquire_connection("dummy") self.assertEqual(connection.type, "bigquery") - except dbt.common.exceptions.ValidationException as e: + except dbt_common.exceptions.ValidationException as e: self.fail("got ValidationException: {}".format(str(e))) except BaseException: @@ -272,7 +274,7 @@ def test_acquire_connection_service_account_validations(self, mock_open_connecti connection = adapter.acquire_connection("dummy") self.assertEqual(connection.type, "bigquery") - except dbt.common.exceptions.base.DbtValidationError as e: + except dbt_common.exceptions.base.DbtValidationError as e: self.fail("got DbtValidationError: {}".format(str(e))) except BaseException: @@ -289,7 +291,7 @@ def test_acquire_connection_oauth_token_validations(self, mock_open_connection): connection = adapter.acquire_connection("dummy") self.assertEqual(connection.type, "bigquery") - except dbt.common.exceptions.base.DbtValidationError as e: + except dbt_common.exceptions.base.DbtValidationError as e: self.fail("got DbtValidationError: {}".format(str(e))) except BaseException: @@ -306,7 +308,7 @@ def test_acquire_connection_oauth_credentials_validations(self, mock_open_connec connection = adapter.acquire_connection("dummy") self.assertEqual(connection.type, "bigquery") - except dbt.common.exceptions.base.DbtValidationError as e: + except dbt_common.exceptions.base.DbtValidationError as e: self.fail("got DbtValidationError: {}".format(str(e))) except BaseException: @@ -325,7 +327,7 @@ def test_acquire_connection_impersonated_service_account_validations( connection = adapter.acquire_connection("dummy") self.assertEqual(connection.type, "bigquery") - except dbt.common.exceptions.base.DbtValidationError as e: + except dbt_common.exceptions.base.DbtValidationError as e: self.fail("got DbtValidationError: {}".format(str(e))) except BaseException: @@ -343,7 +345,7 @@ def test_acquire_connection_priority(self, mock_open_connection): self.assertEqual(connection.type, "bigquery") self.assertEqual(connection.credentials.priority, "batch") - except dbt.common.exceptions.base.DbtValidationError as e: + except dbt_common.exceptions.base.DbtValidationError as e: self.fail("got DbtValidationError: {}".format(str(e))) mock_open_connection.assert_not_called() @@ -358,7 +360,7 @@ def test_acquire_connection_maximum_bytes_billed(self, mock_open_connection): self.assertEqual(connection.type, "bigquery") self.assertEqual(connection.credentials.maximum_bytes_billed, 0) - except dbt.common.exceptions.base.DbtValidationError as e: + except dbt_common.exceptions.base.DbtValidationError as e: self.fail("got DbtValidationError: {}".format(str(e))) mock_open_connection.assert_not_called() @@ -509,7 +511,7 @@ def test_invalid_relation(self): }, "quote_policy": {"identifier": False, "schema": True}, } - with self.assertRaises(dbt.common.dataclass_schema.ValidationError): + with self.assertRaises(dbt_common.dataclass_schema.ValidationError): BigQueryRelation.validate(kwargs) @@ -581,10 +583,10 @@ def test_copy_table_materialization_incremental(self): def test_parse_partition_by(self): adapter = self.get_adapter("oauth") - with self.assertRaises(dbt.common.exceptions.base.DbtValidationError): + with self.assertRaises(dbt_common.exceptions.base.DbtValidationError): adapter.parse_partition_by("date(ts)") - with self.assertRaises(dbt.common.exceptions.base.DbtValidationError): + with self.assertRaises(dbt_common.exceptions.base.DbtValidationError): adapter.parse_partition_by("ts") self.assertEqual( @@ -736,7 +738,7 @@ def test_parse_partition_by(self): ) # Invalid, should raise an error - with self.assertRaises(dbt.common.exceptions.base.DbtValidationError): + with self.assertRaises(dbt_common.exceptions.base.DbtValidationError): adapter.parse_partition_by({}) # passthrough diff --git a/tests/unit/test_bigquery_connection_manager.py b/tests/unit/test_bigquery_connection_manager.py index 04e6d1352..6bb89ed36 100644 --- a/tests/unit/test_bigquery_connection_manager.py +++ b/tests/unit/test_bigquery_connection_manager.py @@ -6,12 +6,13 @@ from requests.exceptions import ConnectionError from unittest.mock import patch, MagicMock, Mock, ANY -import dbt.common.dataclass_schema +import dbt.adapters +import dbt_common.dataclass_schema from dbt.adapters.bigquery import BigQueryCredentials from dbt.adapters.bigquery import BigQueryRelation from dbt.adapters.bigquery.connections import BigQueryConnectionManager -import dbt.common.exceptions +import dbt_common.exceptions from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -127,7 +128,7 @@ def test_query_and_results_timeout(self, mock_bq): self.mock_client.query = Mock( return_value=Mock(result=lambda *args, **kwargs: time.sleep(4)) ) - with pytest.raises(dbt.common.exceptions.DbtRuntimeError) as exc: + with pytest.raises(dbt_common.exceptions.DbtRuntimeError) as exc: self.connections._query_and_results( self.mock_client, "sql", diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 6d21828b3..88b09ce60 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -10,7 +10,7 @@ import agate import pytest -from dbt.common.dataclass_schema import ValidationError +from dbt_common.dataclass_schema import ValidationError from dbt.config.project import PartialProject @@ -256,7 +256,7 @@ def generate_name_macros(package): class TestAdapterConversions(TestCase): def _get_tester_for(self, column_type): - from dbt.common.clients import agate_helper + from dbt_common.clients import agate_helper if column_type is agate.TimeDelta: # dbt never makes this! return agate.TimeDelta()