From e241babec0afbc67bdb3c14dac6641f56a16c54f Mon Sep 17 00:00:00 2001 From: Christophe Oudar Date: Sun, 17 Nov 2024 13:54:21 +0100 Subject: [PATCH] Add BI engine usage monitoring --- .../bi_engine/bi_engine_usage_per_hour.sql | 7 ++++ .../bi_engine/bi_engine_usage_per_hour.yml | 39 +++++++++++++++++++ .../bi_engine/bi_engine_usage_per_minute.sql | 7 ++++ .../bi_engine/bi_engine_usage_per_minute.yml | 39 +++++++++++++++++++ .../intermediate/compute_rollup_per_hour.sql | 35 +++++++++++++++++ .../intermediate/compute_rollup_per_hour.yml | 39 +++++++++++++++++++ .../compute_rollup_per_minute.sql | 35 +++++++++++++++++ .../compute_rollup_per_minute.yml | 39 +++++++++++++++++++ .../cost/compute_cost_per_minute.sql | 14 +++---- .../cost/compute_cost_per_minute.yml | 4 +- 10 files changed, 248 insertions(+), 10 deletions(-) create mode 100644 models/compute/intermediate/bi_engine/bi_engine_usage_per_hour.sql create mode 100644 models/compute/intermediate/bi_engine/bi_engine_usage_per_hour.yml create mode 100644 models/compute/intermediate/bi_engine/bi_engine_usage_per_minute.sql create mode 100644 models/compute/intermediate/bi_engine/bi_engine_usage_per_minute.yml create mode 100644 models/compute/intermediate/compute_rollup_per_hour.sql create mode 100644 models/compute/intermediate/compute_rollup_per_hour.yml create mode 100644 models/compute/intermediate/compute_rollup_per_minute.sql create mode 100644 models/compute/intermediate/compute_rollup_per_minute.yml diff --git a/models/compute/intermediate/bi_engine/bi_engine_usage_per_hour.sql b/models/compute/intermediate/bi_engine/bi_engine_usage_per_hour.sql new file mode 100644 index 0000000..6899051 --- /dev/null +++ b/models/compute/intermediate/bi_engine/bi_engine_usage_per_hour.sql @@ -0,0 +1,7 @@ +{{ + config( + materialized='view', + ) +}} +SELECT * +FROM {{ ref('compute_rollup_per_hour') }} diff --git a/models/compute/intermediate/bi_engine/bi_engine_usage_per_hour.yml b/models/compute/intermediate/bi_engine/bi_engine_usage_per_hour.yml new file mode 100644 index 0000000..c1e471f --- /dev/null +++ b/models/compute/intermediate/bi_engine/bi_engine_usage_per_hour.yml @@ -0,0 +1,39 @@ +version: 2 + +models: + - name: bi_engine_usage_per_hour + description: > + A model that stores BI engine usage per hour. + meta: + label: "BI engine usage per hour" + order_fields_by: "label" + group_label: "BI engine usage" + columns: + - name: hour + description: The hour of the compute cost. + - name: project_id + description: The project id of the job. + - name: reservation_id + description: The reservation id of the job. + - name: bi_engine_mode + description: The BI engine mode of the job. + - name: total_query_cost + description: The total cost of all queries run during the hour. + - name: failing_query_cost + description: The total cost of all queries that failed during the hour. + - name: total_slot_ms + description: The total number of slot time milliseconds used by all queries during the hour. + - name: total_slot_time + description: The total number of slot time in human readable format used by all queries during the hour. + - name: query_count + description: The total number of queries run during the hour. + - name: job_state + description: A struct containing the statistics per state. + fields: + - name: done + description: Indicates if the job is done. + - name: pending + description: Indicates if the job is pending. + - name: running + description: Indicates if the job is running. + diff --git a/models/compute/intermediate/bi_engine/bi_engine_usage_per_minute.sql b/models/compute/intermediate/bi_engine/bi_engine_usage_per_minute.sql new file mode 100644 index 0000000..2bb2f80 --- /dev/null +++ b/models/compute/intermediate/bi_engine/bi_engine_usage_per_minute.sql @@ -0,0 +1,7 @@ +{{ + config( + materialized='view', + ) +}} +SELECT * +FROM {{ ref('compute_rollup_per_minute') }} diff --git a/models/compute/intermediate/bi_engine/bi_engine_usage_per_minute.yml b/models/compute/intermediate/bi_engine/bi_engine_usage_per_minute.yml new file mode 100644 index 0000000..d6b4b40 --- /dev/null +++ b/models/compute/intermediate/bi_engine/bi_engine_usage_per_minute.yml @@ -0,0 +1,39 @@ +version: 2 + +models: + - name: bi_engine_usage_per_minute + description: > + A model that stores BI engine usage per minute. + meta: + label: "BI engine usage per minute" + order_fields_by: "label" + group_label: "BI engine usage" + columns: + - name: minute + description: The minute of the compute cost. + - name: project_id + description: The project id of the job. + - name: reservation_id + description: The reservation id of the job. + - name: bi_engine_mode + description: The BI engine mode of the job. + - name: total_query_cost + description: The total cost of all queries run during the minute. + - name: failing_query_cost + description: The total cost of all queries that failed during the minute. + - name: total_slot_ms + description: The total number of slot time milliseconds used by all queries during the minute. + - name: total_slot_time + description: The total number of slot time in human readable format used by all queries during the hour. + - name: query_count + description: The total number of queries run during the minute. + - name: job_state + description: A struct containing the statistics per state. + fields: + - name: done + description: Indicates if the job is done. + - name: pending + description: Indicates if the job is pending. + - name: running + description: Indicates if the job is running. + diff --git a/models/compute/intermediate/compute_rollup_per_hour.sql b/models/compute/intermediate/compute_rollup_per_hour.sql new file mode 100644 index 0000000..0dc1419 --- /dev/null +++ b/models/compute/intermediate/compute_rollup_per_hour.sql @@ -0,0 +1,35 @@ +{{ + config( + materialized='incremental', + incremental_strategy = 'insert_overwrite', + on_schema_change='append_new_columns', + partition_by={ + "field": "hour", + "granularity": "day", + "data_type": "timestamp", + "copy_partitions": should_use_copy_partitions() + }, + cluster_by = ['hour', 'project_id'], + partition_expiration_days = var('output_partition_expiration_days') + ) +}} +{%- call set_sql_header(config) %} + {{ milliseconds_to_readable_time_udf() }} +{%- endcall %} +SELECT + TIMESTAMP_TRUNC(MINUTE, HOUR) AS hour, + project_id, + reservation_id, + bi_engine_statistics.biEngineMode AS bi_engine_mode, + SUM(ROUND(total_query_cost, 2)) AS total_query_cost, + SUM(ROUND(failing_query_cost, 2)) AS failing_query_cost, + SUM(total_slot_ms) AS total_slot_ms, + MILLISECONDS_TO_READABLE_TIME_UDF(total_slot_ms, 2) AS total_slot_time, + SUM(query_count) AS query_count, + STRUCT( + SUM(job_state.done) AS done, + SUM(job_state.running) AS running, + SUM(job_state.pending) AS pending + ) AS job_state +FROM {{ ref("compute_rollup_per_minute") }} +GROUP BY ALL diff --git a/models/compute/intermediate/compute_rollup_per_hour.yml b/models/compute/intermediate/compute_rollup_per_hour.yml new file mode 100644 index 0000000..649e09b --- /dev/null +++ b/models/compute/intermediate/compute_rollup_per_hour.yml @@ -0,0 +1,39 @@ +version: 2 + +models: + - name: compute_rollup_per_hour + description: > + A model that stores the compute cost per hour. + meta: + label: "Compute cost per hour" + order_fields_by: "label" + group_label: "Compute cost" + columns: + - name: hour + description: The hour of the compute cost. + - name: project_id + description: The project id of the job. + - name: reservation_id + description: The reservation id of the job. + - name: bi_engine_mode + description: The BI engine mode of the job. + - name: total_query_cost + description: The total cost of all queries run during the hour. + - name: failing_query_cost + description: The total cost of all queries that failed during the hour. + - name: total_slot_ms + description: The total number of slot time milliseconds used by all queries during the hour. + - name: total_slot_time + description: The total number of slot time in human readable format used by all queries during the hour. + - name: query_count + description: The total number of queries run during the hour. + - name: job_state + description: A struct containing the state of the job. + fields: + - name: done + description: Indicates if the job is done. + - name: pending + description: Indicates if the job is pending. + - name: running + description: Indicates if the job is running. + diff --git a/models/compute/intermediate/compute_rollup_per_minute.sql b/models/compute/intermediate/compute_rollup_per_minute.sql new file mode 100644 index 0000000..eb8fd6d --- /dev/null +++ b/models/compute/intermediate/compute_rollup_per_minute.sql @@ -0,0 +1,35 @@ +{{ + config( + materialized='incremental', + incremental_strategy = 'insert_overwrite', + on_schema_change='append_new_columns', + partition_by={ + "field": "minute", + "granularity": "hour", + "data_type": "timestamp", + "copy_partitions": should_use_copy_partitions() + }, + cluster_by = ['minute', 'project_id'], + partition_expiration_days = var('output_partition_expiration_days') + ) +}} +{%- call set_sql_header(config) %} + {{ milliseconds_to_readable_time_udf() }} +{%- endcall %} +SELECT + TIMESTAMP_TRUNC(creation_time, MINUTE) AS minute, + project_id, + reservation_id, + bi_engine_statistics.biEngineMode AS bi_engine_mode, + SUM(ROUND(query_cost, 2)) AS total_query_cost, + SUM(IF(error_result IS NOT NULL, ROUND(query_cost, 2), 0)) AS failing_query_cost, + SUM(total_slot_ms) AS total_slot_ms, + MILLISECONDS_TO_READABLE_TIME_UDF(total_slot_ms, 2) AS total_slot_time, + COUNT(*) AS query_count, + STRUCT( + COUNTIF(state = 'DONE') AS done, + COUNTIF(state = 'RUNNING') AS running, + COUNTIF(state = 'PENDING') AS pending + ) AS job_state +FROM {{ jobs_done_incremental_hourly() }} +GROUP BY ALL diff --git a/models/compute/intermediate/compute_rollup_per_minute.yml b/models/compute/intermediate/compute_rollup_per_minute.yml new file mode 100644 index 0000000..2c0765e --- /dev/null +++ b/models/compute/intermediate/compute_rollup_per_minute.yml @@ -0,0 +1,39 @@ +version: 2 + +models: + - name: compute_rollup_per_minute + description: > + A model that stores an intermediate rollup used for compute cost per minute. + meta: + label: "Compute rollup per minute" + order_fields_by: "label" + group_label: "Compute rollup" + columns: + - name: minute + description: The minute of the compute cost. + - name: project_id + description: The project id of the job. + - name: reservation_id + description: The reservation id of the job. + - name: bi_engine_mode + description: The BI engine mode of the job. + - name: total_query_cost + description: The total cost of all queries run during the minute. + - name: failing_query_cost + description: The total cost of all queries that failed during the minute. + - name: total_slot_ms + description: The total number of slot time milliseconds used by all queries during the minute. + - name: total_slot_time + description: The total number of slot time in human readable format used by all queries during the hour. + - name: query_count + description: The total number of queries run during the minute. + - name: job_state + description: A struct containing the statistics per state. + fields: + - name: done + description: Indicates if the job is done. + - name: pending + description: Indicates if the job is pending. + - name: running + description: Indicates if the job is running. + diff --git a/models/compute/intermediate/cost/compute_cost_per_minute.sql b/models/compute/intermediate/cost/compute_cost_per_minute.sql index 4980d3b..753b575 100644 --- a/models/compute/intermediate/cost/compute_cost_per_minute.sql +++ b/models/compute/intermediate/cost/compute_cost_per_minute.sql @@ -20,15 +20,15 @@ SELECT TIMESTAMP_TRUNC(creation_time, MINUTE) AS minute, project_id, reservation_id, - SUM(ROUND(query_cost, 2)) AS total_query_cost, - SUM(IF(error_result IS NOT NULL, ROUND(query_cost, 2), 0)) AS failing_query_cost, + SUM(ROUND(total_query_cost, 2)) AS total_query_cost, + SUM(ROUND(failing_query_cost, 2)) AS failing_query_cost, SUM(total_slot_ms) AS total_slot_ms, MILLISECONDS_TO_READABLE_TIME_UDF(total_slot_ms, 2) AS total_slot_time, - COUNT(*) AS query_count, + SUM(query_count) AS query_count, STRUCT( - COUNTIF(state = 'DONE') AS done, - COUNTIF(state = 'RUNNING') AS running, - COUNTIF(state = 'PENDING') AS pending + SUM(job_state.done) AS done, + SUM(job_state.running) AS running, + SUM(job_state.pending) AS pending ) AS job_state -FROM {{ jobs_done_incremental_hourly() }} +FROM {{ ref('compute_rollup_per_minute') }} GROUP BY ALL diff --git a/models/compute/intermediate/cost/compute_cost_per_minute.yml b/models/compute/intermediate/cost/compute_cost_per_minute.yml index f52784c..a6b049d 100644 --- a/models/compute/intermediate/cost/compute_cost_per_minute.yml +++ b/models/compute/intermediate/cost/compute_cost_per_minute.yml @@ -3,14 +3,12 @@ version: 2 models: - name: compute_cost_per_minute description: > - A model that stores the compute cost per hour. + A model that stores the compute cost per minute. meta: label: "Compute cost per minute" order_fields_by: "label" group_label: "Compute cost" columns: - - name: hour - description: The hour of the compute cost. - name: minute description: The minute of the compute cost. - name: project_id