From a46a6c893a5e4ee72ca3aa40b39f25f52a462c58 Mon Sep 17 00:00:00 2001 From: Christophe Oudar Date: Tue, 19 Nov 2024 01:23:48 +0100 Subject: [PATCH] Update Google base models based on latest documentation parsing --- .../Under the Hood-20241119-012330.yaml | 6 ++ macros/inputs/jobs_with_cost_base.sql | 93 +------------------ .../information_schema_schemata_replicas.sql | 16 ++-- .../information_schema_schemata_replicas.yml | 4 + .../google/jobs/information_schema_jobs.sql | 10 +- .../google/jobs/information_schema_jobs.yml | 38 ++++---- .../information_schema_jobs_by_folder.sql | 8 +- .../information_schema_jobs_by_folder.yml | 30 +++--- ...nformation_schema_jobs_by_organization.sql | 8 +- ...nformation_schema_jobs_by_organization.yml | 30 +++--- .../information_schema_jobs_by_project.sql | 10 +- .../information_schema_jobs_by_project.yml | 38 ++++---- .../jobs/information_schema_jobs_by_user.sql | 8 +- .../jobs/information_schema_jobs_by_user.yml | 30 +++--- ...information_schema_reservation_changes.yml | 16 ++-- .../information_schema_reservations.yml | 16 ++-- ...formation_schema_reservations_timeline.yml | 16 ++-- .../routines/information_schema_routines.sql | 6 +- .../routines/information_schema_routines.yml | 3 + ...information_schema_sessions_by_project.sql | 4 +- ...information_schema_sessions_by_project.yml | 4 - ...rmation_schema_constraint_column_usage.sql | 20 ++-- .../information_schema_key_column_usage.sql | 20 ++-- .../tables/information_schema_partitions.sql | 12 +-- .../information_schema_table_storage.sql | 6 +- .../information_schema_table_storage.yml | 3 + ...n_schema_table_storage_by_organization.sql | 6 +- ...n_schema_table_storage_by_organization.yml | 3 + ...on_schema_table_storage_usage_timeline.yml | 12 +-- ...storage_usage_timeline_by_organization.yml | 12 +-- .../tables/information_schema_tables.sql | 4 +- .../tables/information_schema_tables.yml | 3 + .../information_schema_materialized_views.sql | 10 +- models/base/jobs_by_project_with_cost.sql | 3 +- models/base/jobs_with_cost.sql | 83 ++++++++++++++++- 35 files changed, 299 insertions(+), 292 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20241119-012330.yaml diff --git a/.changes/unreleased/Under the Hood-20241119-012330.yaml b/.changes/unreleased/Under the Hood-20241119-012330.yaml new file mode 100644 index 0000000..1a61bd2 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20241119-012330.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Update Google base models based on latest documentation parsing +time: 2024-11-19T01:23:30.413941+01:00 +custom: + Author: Kayrnt + Issue: "71" diff --git a/macros/inputs/jobs_with_cost_base.sql b/macros/inputs/jobs_with_cost_base.sql index 30a3f1b..e10f943 100644 --- a/macros/inputs/jobs_with_cost_base.sql +++ b/macros/inputs/jobs_with_cost_base.sql @@ -1,95 +1,4 @@ {#- macro to add cost related formula to base jobs table -#} {% macro jobs_with_cost_base(table_name, contains_query) -%} -{# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs -#} -WITH base AS ( -SELECT - bi_engine_statistics, - cache_hit, - creation_time, - TIMESTAMP_TRUNC(creation_time, HOUR) hour, - destination_table, - {% if contains_query -%} - dml_statistics, - {% endif -%} - end_time, - error_result, - job_id, - job_stages, - job_type, - labels, - parent_job_id, - priority, - project_id, - project_number, - {% if contains_query -%} - query, - -- extract the dbt info from the query comment generated by dbt - replace(replace(regexp_extract(query, r'^(\/\* \{+?[\w\W]+?\} \*\/)'), '/', ''), '*', '') dbt_info, - {% endif -%} - referenced_tables, - reservation_id, - start_time, - state, - statement_type, - timeline, - total_bytes_billed, - total_bytes_processed, - total_modified_partitions, - total_slot_ms, - transaction_id, - user_email, - query_info, - transferred_bytes, - materialized_view_statistics -FROM - {{ ref(table_name) }} -{#- Prevent to duplicate costs as script contains query #} -WHERE statement_type != 'SCRIPT' -), -base_with_enriched_fields AS ( -SELECT - *, - total_slot_ms / (1000 * 60 * 60 * 24) AS avg_slots, - total_bytes_billed / POW(1024, 4) AS total_tb_billed, - TIMESTAMP_DIFF(COALESCE(end_time, CURRENT_TIMESTAMP()), start_time, SECOND) AS total_time_seconds, - {% if contains_query -%} - IF(LENGTH(dbt_info) > 0, JSON_EXTRACT_SCALAR(dbt_info, '$.dbt_version'), NULL) AS dbt_version, - IF(LENGTH(dbt_info) > 0, JSON_EXTRACT_SCALAR(dbt_info, '$.profile_name'), NULL) AS dbt_profile_name, - IF(LENGTH(dbt_info) > 0, JSON_EXTRACT_SCALAR(dbt_info, '$.target_name'), NULL) AS dbt_target_name, - IF(LENGTH(dbt_info) > 0, JSON_EXTRACT_SCALAR(dbt_info, '$.node_id'), NULL) AS dbt_model_name, - IF(LENGTH(dbt_info) > 0, - ARRAY( - SELECT JSON_VALUE(string_element, '$') - FROM UNNEST(JSON_QUERY_ARRAY(dbt_info, '$.node_tags')) AS string_element - ), NULL) AS node_tags, - {% endif -%} - FROM base -), -base_with_all_pricing AS ( -SELECT - {% if contains_query -%} - * EXCEPT(dbt_model_name), - {%- else -%} - *, - {% endif -%} - total_slot_ms / (1000 * 60 * 60) * {{ var('hourly_slot_price') }} AS flat_pricing_query_cost, - total_tb_billed * {{ var('per_billed_tb_price') }} AS ondemand_query_cost, - {% if contains_query -%} - case - when dbt_model_name like 'model.%' then 'model' - when dbt_model_name like 'snapshot.%' then 'snapshot' - when dbt_model_name like 'test.%' then 'test' - end as dbt_execution_type, - concat(split(dbt_model_name, '.')[safe_offset(1)], '.',split(dbt_model_name, '.')[safe_offset(2)]) as dbt_model_name - {% endif -%} -FROM base_with_enriched_fields -) -SELECT - *, -{% if var('use_flat_pricing') -%} - flat_pricing_query_cost AS query_cost -{%- else -%} - ondemand_query_cost AS query_cost -{%- endif %} -FROM base_with_all_pricing + {%- endmacro %} diff --git a/models/base/google/datasets/information_schema_schemata_replicas.sql b/models/base/google/datasets/information_schema_schemata_replicas.sql index 7f64e8a..d03a607 100644 --- a/models/base/google/datasets/information_schema_schemata_replicas.sql +++ b/models/base/google/datasets/information_schema_schemata_replicas.sql @@ -14,16 +14,16 @@ For more information about granting roles, see Manage access to projects, folders, and organizations. - You might also be able to get - the required permissions through custom - roles or other predefined - roles. - -#} + You might also be able to get + the required permissions through custom + roles or other predefined + roles. + -#} WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT catalog_name, schema_name, replica_name, location, replica_primary_assigned, replica_primary_assignment_complete, creation_time, creation_complete, replication_time + SELECT catalog_name, schema_name, replica_name, location, replica_primary_assigned, replica_primary_assignment_complete, creation_time, creation_complete, replication_time, sync_status FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`SCHEMATA_REPLICAS` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -37,7 +37,8 @@ replica_primary_assigned, replica_primary_assignment_complete, creation_time, creation_complete, -replication_time +replication_time, +sync_status FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`SCHEMATA_REPLICAS` {%- endif %} ) @@ -52,5 +53,6 @@ replica_primary_assignment_complete, creation_time, creation_complete, replication_time, +sync_status, FROM base diff --git a/models/base/google/datasets/information_schema_schemata_replicas.yml b/models/base/google/datasets/information_schema_schemata_replicas.yml index 34e3b45..ed97ea9 100644 --- a/models/base/google/datasets/information_schema_schemata_replicas.yml +++ b/models/base/google/datasets/information_schema_schemata_replicas.yml @@ -39,3 +39,7 @@ models: \ This value is only visible in the secondary region.\nIf the dataset contains\ \ a table with streaming data, the value of replication_time will not be accurate." type: TIMESTAMP + - name: sync_status + description: "The status of the sync\n between the primary and secondary\ + \ replica. Returns NULL if the replica is a\n primary replica." + type: JSON diff --git a/models/base/google/jobs/information_schema_jobs.sql b/models/base/google/jobs/information_schema_jobs.sql index 9d39d1f..cb3d3ab 100644 --- a/models/base/google/jobs/information_schema_jobs.sql +++ b/models/base/google/jobs/information_schema_jobs.sql @@ -18,7 +18,7 @@ WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, dml_statistics, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, job_creation_reason, query_info + SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, query_info, job_creation_reason FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -28,7 +28,6 @@ bi_engine_statistics, cache_hit, creation_time, destination_table, -dml_statistics, end_time, error_result, job_id, @@ -56,8 +55,8 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, -query_info +query_info, +job_creation_reason FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS` {%- endif %} ) @@ -67,7 +66,6 @@ SELECT cache_hit, creation_time, destination_table, -dml_statistics, end_time, error_result, job_id, @@ -95,7 +93,7 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, query_info, +job_creation_reason, FROM base diff --git a/models/base/google/jobs/information_schema_jobs.yml b/models/base/google/jobs/information_schema_jobs.yml index f8ba5f5..35e56a4 100644 --- a/models/base/google/jobs/information_schema_jobs.yml +++ b/models/base/google/jobs/information_schema_jobs.yml @@ -19,14 +19,6 @@ models: - name: destination_table description: "Destination table\n for results, if any." type: RECORD - - name: dml_statistics - description: "If the job is a query with a DML statement, the value is a record\ - \ with the\n following fields:\n\ninserted_row_count: The number of rows\ - \ that were inserted.\ndeleted_row_count: The number of rows that were deleted.\n\ - updated_row_count: The number of rows that were updated.\n\n For all\ - \ other jobs, the value is NULL.\n This column is present in the INFORMATION_SCHEMA.JOBS_BY_USER\ - \ and\n INFORMATION_SCHEMA.JOBS_BY_PROJECT views." - type: RECORD - name: end_time description: "The end time of this job, in milliseconds since the epoch. This\ \ field represents the\n time when the job enters the DONE state." @@ -142,6 +134,22 @@ models: description: "Statistics of\n materialized views considered in a query\ \ job. (Preview)" type: RECORD + - name: query_info + description: "query_info.resource_warning : The warning message that appears if\ + \ the resource usage during query processing is above the internal threshold\ + \ of the system. A successful query job can have the resource_warning field\ + \ populated. With resource_warning, you get additional data points to optimize\ + \ your queries and to set up monitoring for performance trends of an equivalent\ + \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ + \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ + \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ + \ The hash value will differ when underlying views change, or if the\ + \ query implicitly\n references columns, such as SELECT *, and the table\ + \ schema changes.\n \n This field appears for successful GoogleSQL\ + \ queries that are not cache hits.\nquery_info.performance_insights : Performance\ + \ insights for the job.\nquery_info.optimization_details : The history-based\ + \ optimizations\n for the job." + type: RECORD - name: job_creation_reason description: "job_creation_reason.code : Specifies the high level reason why a\ \ job was created.\n Possible values are:\n \nREQUESTED: job creation\ @@ -153,17 +161,3 @@ models: \ system has determined that the query needs to be executed as a\n \ \ job." type: RECORD - - name: query_info - description: "query_info.resource_warning : The warning message that appears if\ - \ the resource usage during query processing is above the internal threshold\ - \ of the system. A successful query job can have the resource_warning field\ - \ populated. With resource_warning, you get additional data points to optimize\ - \ your queries and to set up monitoring for performance trends of an equivalent\ - \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ - \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ - \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ - \ \n This field appears for successful GoogleSQL queries that\ - \ are not cache hits.\nquery_info.performance_insights : Performance insights\ - \ for the job.\nquery_info.optimization_details : The history-based optimizations\n\ - \ for the job." - type: RECORD diff --git a/models/base/google/jobs/information_schema_jobs_by_folder.sql b/models/base/google/jobs/information_schema_jobs_by_folder.sql index c9d4747..7a3e668 100644 --- a/models/base/google/jobs/information_schema_jobs_by_folder.sql +++ b/models/base/google/jobs/information_schema_jobs_by_folder.sql @@ -18,7 +18,7 @@ WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, job_creation_reason, query_info + SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, query_info, job_creation_reason FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS_BY_FOLDER` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -55,8 +55,8 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, -query_info +query_info, +job_creation_reason FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS_BY_FOLDER` {%- endif %} ) @@ -93,7 +93,7 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, query_info, +job_creation_reason, FROM base diff --git a/models/base/google/jobs/information_schema_jobs_by_folder.yml b/models/base/google/jobs/information_schema_jobs_by_folder.yml index 02191c6..a876b3c 100644 --- a/models/base/google/jobs/information_schema_jobs_by_folder.yml +++ b/models/base/google/jobs/information_schema_jobs_by_folder.yml @@ -134,6 +134,22 @@ models: description: "Statistics of\n materialized views considered in a query\ \ job. (Preview)" type: RECORD + - name: query_info + description: "query_info.resource_warning : The warning message that appears if\ + \ the resource usage during query processing is above the internal threshold\ + \ of the system. A successful query job can have the resource_warning field\ + \ populated. With resource_warning, you get additional data points to optimize\ + \ your queries and to set up monitoring for performance trends of an equivalent\ + \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ + \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ + \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ + \ The hash value will differ when underlying views change, or if the\ + \ query implicitly\n references columns, such as SELECT *, and the table\ + \ schema changes.\n \n This field appears for successful GoogleSQL\ + \ queries that are not cache hits.\nquery_info.performance_insights : Performance\ + \ insights for the job.\nquery_info.optimization_details : The history-based\ + \ optimizations\n for the job." + type: RECORD - name: job_creation_reason description: "job_creation_reason.code : Specifies the high level reason why a\ \ job was created.\n Possible values are:\n \nREQUESTED: job creation\ @@ -145,17 +161,3 @@ models: \ system has determined that the query needs to be executed as a\n \ \ job." type: RECORD - - name: query_info - description: "query_info.resource_warning : The warning message that appears if\ - \ the resource usage during query processing is above the internal threshold\ - \ of the system. A successful query job can have the resource_warning field\ - \ populated. With resource_warning, you get additional data points to optimize\ - \ your queries and to set up monitoring for performance trends of an equivalent\ - \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ - \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ - \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ - \ \n This field appears for successful GoogleSQL queries that\ - \ are not cache hits.\nquery_info.performance_insights : Performance insights\ - \ for the job.\nquery_info.optimization_details : The history-based optimizations\n\ - \ for the job." - type: RECORD diff --git a/models/base/google/jobs/information_schema_jobs_by_organization.sql b/models/base/google/jobs/information_schema_jobs_by_organization.sql index 229e0a4..7a75a6f 100644 --- a/models/base/google/jobs/information_schema_jobs_by_organization.sql +++ b/models/base/google/jobs/information_schema_jobs_by_organization.sql @@ -18,7 +18,7 @@ WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, job_creation_reason, query_info + SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, query_info, job_creation_reason FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS_BY_ORGANIZATION` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -55,8 +55,8 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, -query_info +query_info, +job_creation_reason FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS_BY_ORGANIZATION` {%- endif %} ) @@ -93,7 +93,7 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, query_info, +job_creation_reason, FROM base diff --git a/models/base/google/jobs/information_schema_jobs_by_organization.yml b/models/base/google/jobs/information_schema_jobs_by_organization.yml index 8a6d0d4..c6fa7c5 100644 --- a/models/base/google/jobs/information_schema_jobs_by_organization.yml +++ b/models/base/google/jobs/information_schema_jobs_by_organization.yml @@ -134,6 +134,22 @@ models: description: "Statistics of\n materialized views considered in a query\ \ job. (Preview)" type: RECORD + - name: query_info + description: "query_info.resource_warning : The warning message that appears if\ + \ the resource usage during query processing is above the internal threshold\ + \ of the system. A successful query job can have the resource_warning field\ + \ populated. With resource_warning, you get additional data points to optimize\ + \ your queries and to set up monitoring for performance trends of an equivalent\ + \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ + \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ + \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ + \ The hash value will differ when underlying views change, or if the\ + \ query implicitly\n references columns, such as SELECT *, and the table\ + \ schema changes.\n \n This field appears for successful GoogleSQL\ + \ queries that are not cache hits.\nquery_info.performance_insights : Performance\ + \ insights for the job.\nquery_info.optimization_details : The history-based\ + \ optimizations\n for the job." + type: RECORD - name: job_creation_reason description: "job_creation_reason.code : Specifies the high level reason why a\ \ job was created.\n Possible values are:\n \nREQUESTED: job creation\ @@ -145,17 +161,3 @@ models: \ system has determined that the query needs to be executed as a\n \ \ job." type: RECORD - - name: query_info - description: "query_info.resource_warning : The warning message that appears if\ - \ the resource usage during query processing is above the internal threshold\ - \ of the system. A successful query job can have the resource_warning field\ - \ populated. With resource_warning, you get additional data points to optimize\ - \ your queries and to set up monitoring for performance trends of an equivalent\ - \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ - \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ - \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ - \ \n This field appears for successful GoogleSQL queries that\ - \ are not cache hits.\nquery_info.performance_insights : Performance insights\ - \ for the job.\nquery_info.optimization_details : The history-based optimizations\n\ - \ for the job." - type: RECORD diff --git a/models/base/google/jobs/information_schema_jobs_by_project.sql b/models/base/google/jobs/information_schema_jobs_by_project.sql index 0022ffa..11d008e 100644 --- a/models/base/google/jobs/information_schema_jobs_by_project.sql +++ b/models/base/google/jobs/information_schema_jobs_by_project.sql @@ -18,7 +18,7 @@ WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, dml_statistics, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, job_creation_reason, query_info + SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, query_info, job_creation_reason FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS_BY_PROJECT` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -28,7 +28,6 @@ bi_engine_statistics, cache_hit, creation_time, destination_table, -dml_statistics, end_time, error_result, job_id, @@ -56,8 +55,8 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, -query_info +query_info, +job_creation_reason FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS_BY_PROJECT` {%- endif %} ) @@ -67,7 +66,6 @@ SELECT cache_hit, creation_time, destination_table, -dml_statistics, end_time, error_result, job_id, @@ -95,7 +93,7 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, query_info, +job_creation_reason, FROM base diff --git a/models/base/google/jobs/information_schema_jobs_by_project.yml b/models/base/google/jobs/information_schema_jobs_by_project.yml index ecda258..f96925d 100644 --- a/models/base/google/jobs/information_schema_jobs_by_project.yml +++ b/models/base/google/jobs/information_schema_jobs_by_project.yml @@ -19,14 +19,6 @@ models: - name: destination_table description: "Destination table\n for results, if any." type: RECORD - - name: dml_statistics - description: "If the job is a query with a DML statement, the value is a record\ - \ with the\n following fields:\n\ninserted_row_count: The number of rows\ - \ that were inserted.\ndeleted_row_count: The number of rows that were deleted.\n\ - updated_row_count: The number of rows that were updated.\n\n For all\ - \ other jobs, the value is NULL.\n This column is present in the INFORMATION_SCHEMA.JOBS_BY_USER\ - \ and\n INFORMATION_SCHEMA.JOBS_BY_PROJECT views." - type: RECORD - name: end_time description: "The end time of this job, in milliseconds since the epoch. This\ \ field represents the\n time when the job enters the DONE state." @@ -142,6 +134,22 @@ models: description: "Statistics of\n materialized views considered in a query\ \ job. (Preview)" type: RECORD + - name: query_info + description: "query_info.resource_warning : The warning message that appears if\ + \ the resource usage during query processing is above the internal threshold\ + \ of the system. A successful query job can have the resource_warning field\ + \ populated. With resource_warning, you get additional data points to optimize\ + \ your queries and to set up monitoring for performance trends of an equivalent\ + \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ + \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ + \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ + \ The hash value will differ when underlying views change, or if the\ + \ query implicitly\n references columns, such as SELECT *, and the table\ + \ schema changes.\n \n This field appears for successful GoogleSQL\ + \ queries that are not cache hits.\nquery_info.performance_insights : Performance\ + \ insights for the job.\nquery_info.optimization_details : The history-based\ + \ optimizations\n for the job." + type: RECORD - name: job_creation_reason description: "job_creation_reason.code : Specifies the high level reason why a\ \ job was created.\n Possible values are:\n \nREQUESTED: job creation\ @@ -153,17 +161,3 @@ models: \ system has determined that the query needs to be executed as a\n \ \ job." type: RECORD - - name: query_info - description: "query_info.resource_warning : The warning message that appears if\ - \ the resource usage during query processing is above the internal threshold\ - \ of the system. A successful query job can have the resource_warning field\ - \ populated. With resource_warning, you get additional data points to optimize\ - \ your queries and to set up monitoring for performance trends of an equivalent\ - \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ - \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ - \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ - \ \n This field appears for successful GoogleSQL queries that\ - \ are not cache hits.\nquery_info.performance_insights : Performance insights\ - \ for the job.\nquery_info.optimization_details : The history-based optimizations\n\ - \ for the job." - type: RECORD diff --git a/models/base/google/jobs/information_schema_jobs_by_user.sql b/models/base/google/jobs/information_schema_jobs_by_user.sql index 449534d..b94cf14 100644 --- a/models/base/google/jobs/information_schema_jobs_by_user.sql +++ b/models/base/google/jobs/information_schema_jobs_by_user.sql @@ -18,7 +18,7 @@ WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, dml_statistics, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, job_creation_reason, query_info + SELECT bi_engine_statistics, cache_hit, creation_time, destination_table, dml_statistics, end_time, error_result, job_id, job_stages, job_type, labels, parent_job_id, priority, project_id, project_number, query, referenced_tables, reservation_id, edition, session_info, start_time, state, statement_type, timeline, total_bytes_billed, total_bytes_processed, total_modified_partitions, total_slot_ms, transaction_id, user_email, transferred_bytes, materialized_view_statistics, query_info, job_creation_reason FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS_BY_USER` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -56,8 +56,8 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, -query_info +query_info, +job_creation_reason FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`JOBS_BY_USER` {%- endif %} ) @@ -95,7 +95,7 @@ transaction_id, user_email, transferred_bytes, materialized_view_statistics, -job_creation_reason, query_info, +job_creation_reason, FROM base diff --git a/models/base/google/jobs/information_schema_jobs_by_user.yml b/models/base/google/jobs/information_schema_jobs_by_user.yml index 2de68e4..d523a5e 100644 --- a/models/base/google/jobs/information_schema_jobs_by_user.yml +++ b/models/base/google/jobs/information_schema_jobs_by_user.yml @@ -142,6 +142,22 @@ models: description: "Statistics of\n materialized views considered in a query\ \ job. (Preview)" type: RECORD + - name: query_info + description: "query_info.resource_warning : The warning message that appears if\ + \ the resource usage during query processing is above the internal threshold\ + \ of the system. A successful query job can have the resource_warning field\ + \ populated. With resource_warning, you get additional data points to optimize\ + \ your queries and to set up monitoring for performance trends of an equivalent\ + \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ + \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ + \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ + \ The hash value will differ when underlying views change, or if the\ + \ query implicitly\n references columns, such as SELECT *, and the table\ + \ schema changes.\n \n This field appears for successful GoogleSQL\ + \ queries that are not cache hits.\nquery_info.performance_insights : Performance\ + \ insights for the job.\nquery_info.optimization_details : The history-based\ + \ optimizations\n for the job." + type: RECORD - name: job_creation_reason description: "job_creation_reason.code : Specifies the high level reason why a\ \ job was created.\n Possible values are:\n \nREQUESTED: job creation\ @@ -153,17 +169,3 @@ models: \ system has determined that the query needs to be executed as a\n \ \ job." type: RECORD - - name: query_info - description: "query_info.resource_warning : The warning message that appears if\ - \ the resource usage during query processing is above the internal threshold\ - \ of the system. A successful query job can have the resource_warning field\ - \ populated. With resource_warning, you get additional data points to optimize\ - \ your queries and to set up monitoring for performance trends of an equivalent\ - \ set of queries by using query_hashes.\nquery_info.query_hashes.normalized_literals\ - \ : Contains the hashes of the query. normalized_literals is a hexadecimal\n\ - \ STRING hash that ignores comments, parameter values, UDFs, and literals.\n\ - \ \n This field appears for successful GoogleSQL queries that\ - \ are not cache hits.\nquery_info.performance_insights : Performance insights\ - \ for the job.\nquery_info.optimization_details : The history-based optimizations\n\ - \ for the job." - type: RECORD diff --git a/models/base/google/reservations/information_schema_reservation_changes.yml b/models/base/google/reservations/information_schema_reservation_changes.yml index 966b965..5e2b16e 100644 --- a/models/base/google/reservations/information_schema_reservation_changes.yml +++ b/models/base/google/reservations/information_schema_reservation_changes.yml @@ -37,14 +37,14 @@ models: \ is computed automatically based on available\n resources." type: INTEGER - name: autoscale - description: 'Information about the autoscale capacity of the reservation. Fields - include the following: - - - current_slots: the number of slots added to the reservation by autoscaling. - - max_slots: the maximum number of slots that could be added to the reservation - by autoscaling.' + description: "Information about the autoscale capacity of the reservation. Fields\ + \ include the following:\n\ncurrent_slots: the number of slots added to the\ + \ reservation by autoscaling.\n \nNote: After users reduce max_slots,\ + \ it may take a while before it can be propagated,\n so\ + \ current_slots may stay in the original value and could be larger than max_slots\n\ + \ for that brief period (less than one minute).\n \ + \ \n\nmax_slots: the maximum number of slots that could be added to the\ + \ reservation by autoscaling." type: STRUCT - name: edition description: The edition associated with this reservation. For more information diff --git a/models/base/google/reservations/information_schema_reservations.yml b/models/base/google/reservations/information_schema_reservations.yml index b8bc54f..0c3f4bb 100644 --- a/models/base/google/reservations/information_schema_reservations.yml +++ b/models/base/google/reservations/information_schema_reservations.yml @@ -28,14 +28,14 @@ models: \ is computed automatically based on available\n resources." type: INTEGER - name: autoscale - description: 'Information about the autoscale capacity of the reservation. Fields - include the following: - - - current_slots: the number of slots added to the reservation by autoscaling. - - max_slots: the maximum number of slots that could be added to the reservation - by autoscaling.' + description: "Information about the autoscale capacity of the reservation. Fields\ + \ include the following:\n\ncurrent_slots: the number of slots added to the\ + \ reservation by autoscaling.\n \nNote: After users reduce max_slots,\ + \ it may take a while before it can be propagated,\n so\ + \ current_slots may stay in the original value and could be larger than max_slots\n\ + \ for that brief period (less than one minute).\n \ + \ \n\nmax_slots: the maximum number of slots that could be added to the\ + \ reservation by autoscaling." type: STRUCT - name: edition description: The edition associated with this reservation. For more information diff --git a/models/base/google/reservations/information_schema_reservations_timeline.yml b/models/base/google/reservations/information_schema_reservations_timeline.yml index 6984401..004f8ed 100644 --- a/models/base/google/reservations/information_schema_reservations_timeline.yml +++ b/models/base/google/reservations/information_schema_reservations_timeline.yml @@ -28,14 +28,14 @@ models: \ the admin project." type: INTEGER - name: autoscale - description: 'Information about the autoscale capacity of the reservation. Fields - include the following: - - - current_slots: the number of slots added to the reservation by autoscaling. - - max_slots: the maximum number of slots that could be added to the reservation - by autoscaling.' + description: "Information about the autoscale capacity of the reservation. Fields\ + \ include the following:\n\ncurrent_slots: the number of slots added to the\ + \ reservation by autoscaling.\n \nNote: After users reduce max_slots,\ + \ it may take a while before it can be propagated,\n so\ + \ current_slots may stay in the original value and could be larger than max_slots\n\ + \ for that brief period (less than one minute).\n \ + \ \n\nmax_slots: the maximum number of slots that could be added to the\ + \ reservation by autoscaling." type: STRUCT - name: reservation_id description: For joining with the jobs_timeline table. This is of the form project_id:location.reservation_name. diff --git a/models/base/google/routines/information_schema_routines.sql b/models/base/google/routines/information_schema_routines.sql index 4ced60c..c39173d 100644 --- a/models/base/google/routines/information_schema_routines.sql +++ b/models/base/google/routines/information_schema_routines.sql @@ -14,7 +14,7 @@ Access control with IAM. -#} WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT specific_catalog, specific_schema, specific_name, routine_catalog, routine_schema, routine_name, routine_type, data_type, routine_body, routine_definition, external_language, is_deterministic, security_type, created, last_altered, ddl + SELECT specific_catalog, specific_schema, specific_name, routine_catalog, routine_schema, routine_name, routine_type, data_type, routine_body, routine_definition, external_language, is_deterministic, security_type, created, last_altered, ddl, connection FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`ROUTINES` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -35,7 +35,8 @@ is_deterministic, security_type, created, last_altered, -ddl +ddl, +connection FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`ROUTINES` {%- endif %} ) @@ -57,5 +58,6 @@ security_type, created, last_altered, ddl, +connection, FROM base diff --git a/models/base/google/routines/information_schema_routines.yml b/models/base/google/routines/information_schema_routines.yml index b4720a5..96fd8a8 100644 --- a/models/base/google/routines/information_schema_routines.yml +++ b/models/base/google/routines/information_schema_routines.yml @@ -60,3 +60,6 @@ models: description: "The DDL statement\n that can be used to create the routine,\ \ such as\n CREATE FUNCTION\n or CREATE PROCEDURE" type: STRING + - name: CONNECTION + description: "The connection name, if the routine has one. Otherwise\n NULL" + type: STRING diff --git a/models/base/google/sessions/information_schema_sessions_by_project.sql b/models/base/google/sessions/information_schema_sessions_by_project.sql index 8d99b01..a2aacdd 100644 --- a/models/base/google/sessions/information_schema_sessions_by_project.sql +++ b/models/base/google/sessions/information_schema_sessions_by_project.sql @@ -11,7 +11,7 @@ Access control with IAM. -#} WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT creation_time, expiration_time, is_active, last_modified_time, principal_subject, project_id, project_number, session_id, user_email + SELECT creation_time, expiration_time, is_active, last_modified_time, project_id, project_number, session_id, user_email FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`SESSIONS_BY_PROJECT` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -21,7 +21,6 @@ creation_time, expiration_time, is_active, last_modified_time, -principal_subject, project_id, project_number, session_id, @@ -35,7 +34,6 @@ SELECT expiration_time, is_active, last_modified_time, -principal_subject, project_id, project_number, session_id, diff --git a/models/base/google/sessions/information_schema_sessions_by_project.yml b/models/base/google/sessions/information_schema_sessions_by_project.yml index abf4fc2..9c6c8be 100644 --- a/models/base/google/sessions/information_schema_sessions_by_project.yml +++ b/models/base/google/sessions/information_schema_sessions_by_project.yml @@ -19,10 +19,6 @@ models: description: "(Partitioning column) Time when the session was last modified.\n\ \ Partitioning is based on the UTC time of this timestamp." type: TIMESTAMP - - name: principal_subject - description: "(Clustering column) Principal\n identifier of the user who\ - \ ran the job." - type: STRING - name: project_id description: (Clustering column) ID of the project. type: STRING diff --git a/models/base/google/tables/information_schema_constraint_column_usage.sql b/models/base/google/tables/information_schema_constraint_column_usage.sql index 2d09531..21a8d81 100644 --- a/models/base/google/tables/information_schema_constraint_column_usage.sql +++ b/models/base/google/tables/information_schema_constraint_column_usage.sql @@ -1,23 +1,26 @@ {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-constraint-column-usage -#} - - + + {% set preflight_sql -%} {% if project_list()|length > 0 -%} {% for project in project_list() -%} SELECT - SCHEMA_NAME + CONCAT('`', CATALOG_NAME, '`.`', SCHEMA_NAME, '`') AS SCHEMA_NAME FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`SCHEMATA` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} {%- else %} SELECT - SCHEMA_NAME + CONCAT('`', CATALOG_NAME, '`.`', SCHEMA_NAME, '`') AS SCHEMA_NAME FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`SCHEMATA` {%- endif %} {%- endset %} {% set results = run_query(preflight_sql) %} {% set dataset_list = results | map(attribute='SCHEMA_NAME') | list %} - + {%- if dataset_list | length == 0 -%} + {{ log("No datasets found in the project list", info=True) }} + {%- endif -%} + WITH base AS ( {%- if dataset_list | length == 0 -%} SELECT CAST(NULL AS STRING) AS table_catalog, CAST(NULL AS STRING) AS table_schema, CAST(NULL AS STRING) AS table_name, CAST(NULL AS STRING) AS column_name, CAST(NULL AS STRING) AS constraint_catalog, CAST(NULL AS STRING) AS constraint_schema, CAST(NULL AS STRING) AS constraint_name @@ -32,14 +35,11 @@ column_name, constraint_catalog, constraint_schema, constraint_name - FROM `{{ dataset | trim }}`.`INFORMATION_SCHEMA`.`CONSTRAINT_COLUMN_USAGE` + FROM {{ dataset | trim }}.`INFORMATION_SCHEMA`.`PARTITIONS` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} {%- endif -%} - - - -) + ) SELECT table_catalog, diff --git a/models/base/google/tables/information_schema_key_column_usage.sql b/models/base/google/tables/information_schema_key_column_usage.sql index 00794e1..5eecaca 100644 --- a/models/base/google/tables/information_schema_key_column_usage.sql +++ b/models/base/google/tables/information_schema_key_column_usage.sql @@ -1,23 +1,26 @@ {# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-key-column-usage -#} - - + + {% set preflight_sql -%} {% if project_list()|length > 0 -%} {% for project in project_list() -%} SELECT - SCHEMA_NAME + CONCAT('`', CATALOG_NAME, '`.`', SCHEMA_NAME, '`') AS SCHEMA_NAME FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`SCHEMATA` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} {%- else %} SELECT - SCHEMA_NAME + CONCAT('`', CATALOG_NAME, '`.`', SCHEMA_NAME, '`') AS SCHEMA_NAME FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`SCHEMATA` {%- endif %} {%- endset %} {% set results = run_query(preflight_sql) %} {% set dataset_list = results | map(attribute='SCHEMA_NAME') | list %} - + {%- if dataset_list | length == 0 -%} + {{ log("No datasets found in the project list", info=True) }} + {%- endif -%} + WITH base AS ( {%- if dataset_list | length == 0 -%} SELECT CAST(NULL AS STRING) AS constraint_catalog, CAST(NULL AS STRING) AS constraint_schema, CAST(NULL AS STRING) AS constraint_name, CAST(NULL AS STRING) AS table_catalog, CAST(NULL AS STRING) AS table_schema, CAST(NULL AS STRING) AS table_name, CAST(NULL AS STRING) AS column_name, CAST(NULL AS INT64) AS ordinal_position, CAST(NULL AS INT64) AS position_in_unique_constraint @@ -34,14 +37,11 @@ table_name, column_name, ordinal_position, position_in_unique_constraint - FROM `{{ dataset | trim }}`.`INFORMATION_SCHEMA`.`KEY_COLUMN_USAGE` + FROM {{ dataset | trim }}.`INFORMATION_SCHEMA`.`PARTITIONS` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} {%- endif -%} - - - -) + ) SELECT constraint_catalog, diff --git a/models/base/google/tables/information_schema_partitions.sql b/models/base/google/tables/information_schema_partitions.sql index 6e02f5b..ded90ab 100644 --- a/models/base/google/tables/information_schema_partitions.sql +++ b/models/base/google/tables/information_schema_partitions.sql @@ -12,7 +12,7 @@ roles/bigquery.dataViewer For more information about BigQuery permissions, see Access control with IAM. -#} - + {% set preflight_sql -%} {% if project_list()|length > 0 -%} {% for project in project_list() -%} @@ -29,7 +29,10 @@ Access control with IAM. -#} {%- endset %} {% set results = run_query(preflight_sql) %} {% set dataset_list = results | map(attribute='SCHEMA_NAME') | list %} - + {%- if dataset_list | length == 0 -%} + {{ log("No datasets found in the project list", info=True) }} + {%- endif -%} + WITH base AS ( {%- if dataset_list | length == 0 -%} SELECT CAST(NULL AS STRING) AS table_catalog, CAST(NULL AS STRING) AS table_schema, CAST(NULL AS STRING) AS table_name, CAST(NULL AS STRING) AS partition_id, CAST(NULL AS INTEGER) AS total_rows, CAST(NULL AS INTEGER) AS total_logical_bytes, CAST(NULL AS TIMESTAMP) AS last_modified_time, CAST(NULL AS STRING) AS storage_tier @@ -49,10 +52,7 @@ storage_tier {% if not loop.last %}UNION ALL{% endif %} {% endfor %} {%- endif -%} - - - -) + ) SELECT table_catalog, diff --git a/models/base/google/tables/information_schema_table_storage.sql b/models/base/google/tables/information_schema_table_storage.sql index e7755c5..0bbbf9d 100644 --- a/models/base/google/tables/information_schema_table_storage.sql +++ b/models/base/google/tables/information_schema_table_storage.sql @@ -3,7 +3,7 @@ WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT project_id, project_number, table_catalog, table_schema, table_name, creation_time, total_rows, total_partitions, total_logical_bytes, active_logical_bytes, long_term_logical_bytes, current_physical_bytes, total_physical_bytes, active_physical_bytes, long_term_physical_bytes, time_travel_physical_bytes, storage_last_modified_time, deleted, table_type, fail_safe_physical_bytes + SELECT project_id, project_number, table_catalog, table_schema, table_name, creation_time, total_rows, total_partitions, total_logical_bytes, active_logical_bytes, long_term_logical_bytes, current_physical_bytes, total_physical_bytes, active_physical_bytes, long_term_physical_bytes, time_travel_physical_bytes, storage_last_modified_time, deleted, table_type, fail_safe_physical_bytes, last_metadata_index_refresh_time FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -28,7 +28,8 @@ time_travel_physical_bytes, storage_last_modified_time, deleted, table_type, -fail_safe_physical_bytes +fail_safe_physical_bytes, +last_metadata_index_refresh_time FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE` {%- endif %} ) @@ -54,5 +55,6 @@ storage_last_modified_time, deleted, table_type, fail_safe_physical_bytes, +last_metadata_index_refresh_time, FROM base diff --git a/models/base/google/tables/information_schema_table_storage.yml b/models/base/google/tables/information_schema_table_storage.yml index 5c280e6..9faf641 100644 --- a/models/base/google/tables/information_schema_table_storage.yml +++ b/models/base/google/tables/information_schema_table_storage.yml @@ -74,3 +74,6 @@ models: description: "Number of physical (compressed) bytes used by the fail-safe storage\n\ \ (deleted or changed data)." type: INT64 + - name: LAST_METADATA_INDEX_REFRESH_TIME + description: The last metadata index refresh time of the table. + type: TIMESTAMP diff --git a/models/base/google/tables/information_schema_table_storage_by_organization.sql b/models/base/google/tables/information_schema_table_storage_by_organization.sql index 0c147fb..dd224ef 100644 --- a/models/base/google/tables/information_schema_table_storage_by_organization.sql +++ b/models/base/google/tables/information_schema_table_storage_by_organization.sql @@ -16,7 +16,7 @@ Access control with IAM. -#} WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT project_id, project_number, table_catalog, table_schema, table_name, creation_time, total_rows, total_partitions, total_logical_bytes, active_logical_bytes, long_term_logical_bytes, current_physical_bytes, total_physical_bytes, active_physical_bytes, long_term_physical_bytes, time_travel_physical_bytes, storage_last_modified_time, deleted, table_type, fail_safe_physical_bytes + SELECT project_id, project_number, table_catalog, table_schema, table_name, creation_time, total_rows, total_partitions, total_logical_bytes, active_logical_bytes, long_term_logical_bytes, current_physical_bytes, total_physical_bytes, active_physical_bytes, long_term_physical_bytes, time_travel_physical_bytes, storage_last_modified_time, deleted, table_type, fail_safe_physical_bytes, last_metadata_index_refresh_time FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE_BY_ORGANIZATION` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -41,7 +41,8 @@ time_travel_physical_bytes, storage_last_modified_time, deleted, table_type, -fail_safe_physical_bytes +fail_safe_physical_bytes, +last_metadata_index_refresh_time FROM `region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`TABLE_STORAGE_BY_ORGANIZATION` {%- endif %} ) @@ -67,5 +68,6 @@ storage_last_modified_time, deleted, table_type, fail_safe_physical_bytes, +last_metadata_index_refresh_time, FROM base diff --git a/models/base/google/tables/information_schema_table_storage_by_organization.yml b/models/base/google/tables/information_schema_table_storage_by_organization.yml index 6d49931..ea7bf41 100644 --- a/models/base/google/tables/information_schema_table_storage_by_organization.yml +++ b/models/base/google/tables/information_schema_table_storage_by_organization.yml @@ -74,3 +74,6 @@ models: description: "Number of physical (compressed) bytes used by the fail-safe storage\n\ \ (deleted or changed data)." type: INT64 + - name: LAST_METADATA_INDEX_REFRESH_TIME + description: The last metadata index refresh time of the table. + type: TIMESTAMP diff --git a/models/base/google/tables/information_schema_table_storage_usage_timeline.yml b/models/base/google/tables/information_schema_table_storage_usage_timeline.yml index 69a4ab1..2aa3130 100644 --- a/models/base/google/tables/information_schema_table_storage_usage_timeline.yml +++ b/models/base/google/tables/information_schema_table_storage_usage_timeline.yml @@ -21,29 +21,29 @@ models: \ the\n tableId" type: STRING - name: BILLABLE_TOTAL_LOGICAL_USAGE - description: "The total logical usage, in MB second.\nReturns 0 if the dataset\ + description: "The total logical usage, in MiB second.\nReturns 0 if the dataset\ \ uses the physical storage\n billing model." type: INT64 - name: BILLABLE_ACTIVE_LOGICAL_USAGE - description: "The logical usage that is less than 90 days old, in MB\n second.\n\ + description: "The logical usage that is less than 90 days old, in MiB\n second.\n\ Returns 0 if the dataset uses the physical storage\n billing model." type: INT64 - name: BILLABLE_LONG_TERM_LOGICAL_USAGE - description: "The logical usage that is more than 90 days old, in MB\n second.\n\ + description: "The logical usage that is more than 90 days old, in MiB\n second.\n\ Returns 0 if the dataset uses the physical storage\n billing model." type: INT64 - name: BILLABLE_TOTAL_PHYSICAL_USAGE - description: "The total usage in MB second. This includes\n physical bytes\ + description: "The total usage in MiB second. This includes\n physical bytes\ \ used for fail-safe and\n time travel storage.\nReturns 0 if the dataset\ \ uses the logical storage\n billing model." type: INT64 - name: BILLABLE_ACTIVE_PHYSICAL_USAGE - description: "The physical usage that is less than 90 days old,\n in MB second.\ + description: "The physical usage that is less than 90 days old,\n in MiB second.\ \ This\n includes physical bytes used for fail-safe and\n time travel\ \ storage.\nReturns 0 if the dataset uses the logical storage\n billing\ \ model." type: INT64 - name: BILLABLE_LONG_TERM_PHYSICAL_USAGE - description: "The physical usage that is more than 90 days old,\n in MB second.\n\ + description: "The physical usage that is more than 90 days old,\n in MiB second.\n\ Returns 0 if the dataset uses the logical storage\n billing model." type: INT64 diff --git a/models/base/google/tables/information_schema_table_storage_usage_timeline_by_organization.yml b/models/base/google/tables/information_schema_table_storage_usage_timeline_by_organization.yml index 798f0e4..2df6823 100644 --- a/models/base/google/tables/information_schema_table_storage_usage_timeline_by_organization.yml +++ b/models/base/google/tables/information_schema_table_storage_usage_timeline_by_organization.yml @@ -21,29 +21,29 @@ models: \ the\n tableId" type: STRING - name: BILLABLE_TOTAL_LOGICAL_USAGE - description: "The total logical usage, in MB second.\nReturns 0 if the dataset\ + description: "The total logical usage, in MiB second.\nReturns 0 if the dataset\ \ uses the physical storage\n billing model." type: INT64 - name: BILLABLE_ACTIVE_LOGICAL_USAGE - description: "The logical usage that is less than 90 days old, in MB\n second.\n\ + description: "The logical usage that is less than 90 days old, in MiB\n second.\n\ Returns 0 if the dataset uses the physical storage\n billing model." type: INT64 - name: BILLABLE_LONG_TERM_LOGICAL_USAGE - description: "The logical usage that is more than 90 days old, in MB\n second.\n\ + description: "The logical usage that is more than 90 days old, in MiB\n second.\n\ Returns 0 if the dataset uses the physical storage\n billing model." type: INT64 - name: BILLABLE_TOTAL_PHYSICAL_USAGE - description: "The total usage in MB second. This includes\n physical bytes\ + description: "The total usage in MiB second. This includes\n physical bytes\ \ used for fail-safe and\n time travel storage.\nReturns 0 if the dataset\ \ uses the logical storage\n billing model." type: INT64 - name: BILLABLE_ACTIVE_PHYSICAL_USAGE - description: "The physical usage that is less than 90 days old,\n in MB second.\ + description: "The physical usage that is less than 90 days old,\n in MiB second.\ \ This\n includes physical bytes used for fail-safe and\n time travel\ \ storage.\nReturns 0 if the dataset uses the logical storage\n billing\ \ model." type: INT64 - name: BILLABLE_LONG_TERM_PHYSICAL_USAGE - description: "The physical usage that is more than 90 days old,\n in MB second.\n\ + description: "The physical usage that is more than 90 days old,\n in MiB second.\n\ Returns 0 if the dataset uses the logical storage\n billing model." type: INT64 diff --git a/models/base/google/tables/information_schema_tables.sql b/models/base/google/tables/information_schema_tables.sql index 32dab94..59ebb61 100644 --- a/models/base/google/tables/information_schema_tables.sql +++ b/models/base/google/tables/information_schema_tables.sql @@ -16,7 +16,7 @@ Access control with IAM. -#} WITH base AS ( {% if project_list()|length > 0 -%} {% for project in project_list() -%} - SELECT table_catalog, table_schema, table_name, table_type, is_insertable_into, is_typed, creation_time, base_table_catalog, base_table_schema, base_table_name, snapshot_time_ms, replica_source_catalog, replica_source_schema, replica_source_name, replication_status, replication_error, ddl, default_collation_name, upsert_stream_apply_watermark + SELECT table_catalog, table_schema, table_name, table_type, is_insertable_into, is_typed, is_change_history_enabled, creation_time, base_table_catalog, base_table_schema, base_table_name, snapshot_time_ms, replica_source_catalog, replica_source_schema, replica_source_name, replication_status, replication_error, ddl, default_collation_name, upsert_stream_apply_watermark FROM `{{ project | trim }}`.`region-{{ var('bq_region') }}`.`INFORMATION_SCHEMA`.`TABLES` {% if not loop.last %}UNION ALL{% endif %} {% endfor %} @@ -28,6 +28,7 @@ table_name, table_type, is_insertable_into, is_typed, +is_change_history_enabled, creation_time, base_table_catalog, base_table_schema, @@ -52,6 +53,7 @@ table_name, table_type, is_insertable_into, is_typed, +is_change_history_enabled, creation_time, base_table_catalog, base_table_schema, diff --git a/models/base/google/tables/information_schema_tables.yml b/models/base/google/tables/information_schema_tables.yml index 0824335..a3a12c2 100644 --- a/models/base/google/tables/information_schema_tables.yml +++ b/models/base/google/tables/information_schema_tables.yml @@ -28,6 +28,9 @@ models: - name: is_typed description: The value is always NO type: STRING + - name: is_change_history_enabled + description: "YES or NO depending on whether\n change history\n is enabled" + type: STRING - name: creation_time description: The table's creation time type: TIMESTAMP diff --git a/models/base/google/views/information_schema_materialized_views.sql b/models/base/google/views/information_schema_materialized_views.sql index 7db3d9e..bef7085 100644 --- a/models/base/google/views/information_schema_materialized_views.sql +++ b/models/base/google/views/information_schema_materialized_views.sql @@ -34,11 +34,11 @@ The following permissions are required to query the INFORMATION_SCHEMA.MATERIALI - You might also be able to get - these permissions - with custom roles or - other predefined roles. - Access control with IAM -#} + You might also be able to get + these permissions + with custom roles or + other predefined roles. + Access control with IAM -#} WITH base AS ( {% if project_list()|length > 0 -%} diff --git a/models/base/jobs_by_project_with_cost.sql b/models/base/jobs_by_project_with_cost.sql index 1d56c0e..3962807 100644 --- a/models/base/jobs_by_project_with_cost.sql +++ b/models/base/jobs_by_project_with_cost.sql @@ -3,4 +3,5 @@ materialized=materialized_as_view_if_explicit_projects() ) }} -{{ jobs_with_cost_base("information_schema_jobs_by_project", contains_query = True) }} +SELECT * +FROM {{ ref('jobs_with_cost') }} diff --git a/models/base/jobs_with_cost.sql b/models/base/jobs_with_cost.sql index 9c9e1dc..d21ede2 100644 --- a/models/base/jobs_with_cost.sql +++ b/models/base/jobs_with_cost.sql @@ -3,4 +3,85 @@ materialized=materialized_as_view_if_explicit_projects() ) }} -{{ jobs_with_cost_base("information_schema_jobs", contains_query = False) }} +{# More details about base table in https://cloud.google.com/bigquery/docs/information-schema-jobs -#} +WITH base AS ( +SELECT + bi_engine_statistics, + cache_hit, + creation_time, + TIMESTAMP_TRUNC(creation_time, HOUR) AS hour, + destination_table, + end_time, + error_result, + job_id, + job_stages, + job_type, + labels, + parent_job_id, + priority, + project_id, + project_number, + query, + -- extract the dbt info from the query comment generated by dbt + REPLACE(REPLACE(REGEXP_EXTRACT(query, r'^(\/\* \{+?[\w\W]+?\} \*\/)'), '/', ''), '*', '') AS dbt_info, + referenced_tables, + reservation_id, + start_time, + state, + statement_type, + timeline, + total_bytes_billed, + total_bytes_processed, + total_modified_partitions, + total_slot_ms, + transaction_id, + user_email, + query_info, + transferred_bytes, + materialized_view_statistics +FROM + {{ ref('information_schema_jobs') }} +{#- Prevent to duplicate costs as script contains query #} +WHERE statement_type != 'SCRIPT' +), + +base_with_enriched_fields AS ( +SELECT + *, + total_slot_ms / (1000 * 60 * 60 * 24) AS avg_slots, + total_bytes_billed / POW(1024, 4) AS total_tb_billed, + TIMESTAMP_DIFF(COALESCE(end_time, CURRENT_TIMESTAMP()), start_time, SECOND) AS total_time_seconds, + IF(LENGTH(dbt_info) > 0, JSON_EXTRACT_SCALAR(dbt_info, '$.dbt_version'), NULL) AS dbt_version, + IF(LENGTH(dbt_info) > 0, JSON_EXTRACT_SCALAR(dbt_info, '$.profile_name'), NULL) AS dbt_profile_name, + IF(LENGTH(dbt_info) > 0, JSON_EXTRACT_SCALAR(dbt_info, '$.target_name'), NULL) AS dbt_target_name, + IF(LENGTH(dbt_info) > 0, JSON_EXTRACT_SCALAR(dbt_info, '$.node_id'), NULL) AS dbt_model_name, + IF(LENGTH(dbt_info) > 0, + ARRAY( + SELECT JSON_VALUE(string_element, '$') + FROM UNNEST(JSON_QUERY_ARRAY(dbt_info, '$.node_tags')) AS string_element + ), NULL) AS node_tags, + FROM base +), + +base_with_all_pricing AS ( +SELECT + * EXCEPT (dbt_model_name), + total_slot_ms / (1000 * 60 * 60) * {{ var('hourly_slot_price') }} AS flat_pricing_query_cost, + total_tb_billed * {{ var('per_billed_tb_price') }} AS ondemand_query_cost, + CASE + WHEN dbt_model_name LIKE 'model.%' THEN 'model' + WHEN dbt_model_name LIKE 'snapshot.%' THEN 'snapshot' + WHEN dbt_model_name LIKE 'test.%' THEN 'test' + END AS dbt_execution_type, + CONCAT(SPLIT(dbt_model_name, '.')[SAFE_OFFSET(1)], '.', SPLIT(dbt_model_name, '.')[SAFE_OFFSET(2)]) AS dbt_model_name +FROM base_with_enriched_fields +) + +SELECT + *, +{% if var('use_flat_pricing') -%} + flat_pricing_query_cost AS query_cost +{%- else -%} + ondemand_query_cost AS query_cost +{%- endif %} +FROM base_with_all_pricing