From 06b02a09fd17722feca58313451c3e54dd696ddc Mon Sep 17 00:00:00 2001 From: Bill Date: Wed, 19 Jun 2024 20:47:00 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20docs:=20add=20combining=20dimension?= =?UTF-8?q?s=20questions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../challenging-sql-problems.md | 7 +- .../combining-dimensions--sample-input.sql | 23 +++++ .../combining-dimensions--sample-output.sql | 10 ++ .../problems/bronze/combining-dimensions.md | 94 +++++++++++++++++++ .../problems/bronze/combining-dimensions.sql | 63 +++++++++++++ .../bronze/combining-dimensions--duckdb.sql | 40 ++++++++ .../solutions/bronze/combining-dimensions.md | 45 +++++++++ .../solutions/bronze/combining-dimensions.sql | 22 +++++ mkdocs.yml | 2 + 9 files changed, 303 insertions(+), 3 deletions(-) create mode 100644 docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-input.sql create mode 100644 docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-output.sql create mode 100644 docs/challenging-sql-problems/problems/bronze/combining-dimensions.md create mode 100644 docs/challenging-sql-problems/problems/bronze/combining-dimensions.sql create mode 100644 docs/challenging-sql-problems/solutions/bronze/combining-dimensions--duckdb.sql create mode 100644 docs/challenging-sql-problems/solutions/bronze/combining-dimensions.md create mode 100644 docs/challenging-sql-problems/solutions/bronze/combining-dimensions.sql diff --git a/docs/challenging-sql-problems/challenging-sql-problems.md b/docs/challenging-sql-problems/challenging-sql-problems.md index be7e5b0..0cd59ff 100644 --- a/docs/challenging-sql-problems/challenging-sql-problems.md +++ b/docs/challenging-sql-problems/challenging-sql-problems.md @@ -19,9 +19,10 @@ As long as you know your database features and query patterns, these are fairly 1. [Temperature anomaly detection](problems/bronze/temperature-anomaly-detection.md) 2. [Personalised customer emails](problems/bronze/personalised-customer-emails.md) 3. [Suspicious login activity](problems/bronze/suspicious-login-activity.md) -4. [Customer churn](problems/bronze/customer-churn.md) -5. [Fibonacci sequence](problems/bronze/fibonacci-sequence.md) -6. [UK bank holidays](problems/bronze/uk-bank-holidays.md) +4. [Combining dimensions](problems/bronze/combining-dimensions.md) +5. [Customer churn](problems/bronze/customer-churn.md) +6. [Fibonacci sequence](problems/bronze/fibonacci-sequence.md) +7. [UK bank holidays](problems/bronze/uk-bank-holidays.md) ### ⚪ Silver Tier diff --git a/docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-input.sql b/docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-input.sql new file mode 100644 index 0000000..6d670a8 --- /dev/null +++ b/docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-input.sql @@ -0,0 +1,23 @@ +```sql +with + +dim_employee_demographics(employee_id, valid_from, valid_until, date_of_birth, gender, ethnicity) as ( + values + (1, '2021-07-12', '9999-12-31', '1995-02-24', 'Female', 'White'), + (2, '2023-12-07', '9999-12-31', '1999-12-12', 'Male', 'Asian') +), + +dim_employee_career(employee_id, valid_from, valid_until, job_title, salary) as ( + values + (1, '2021-07-12', '2023-02-18', 'Student', 0.00), + (1, '2023-02-19', '9999-12-31', 'Pianist', 2000.00), + (2, '2023-12-08', '9999-12-31', 'Paramedic', 4000.00) +), + +dim_employee_contact(employee_id, valid_from, valid_until, email, phone) as ( + values + (1, '2021-07-12', '9999-12-31', 'abcde@gmail.com', '123-456-789'), + (2, '2023-12-08', '2023-12-31', null, '01234 567890'), + (2, '2024-01-01', '9999-12-31', 'something@mail.net', '0300 123 456') +) +``` diff --git a/docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-output.sql b/docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-output.sql new file mode 100644 index 0000000..757606d --- /dev/null +++ b/docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-output.sql @@ -0,0 +1,10 @@ +```sql +solution(employee_id, valid_from, valid_until, date_of_birth, gender, ethnicity, job_title, salary, email, phone) as ( + values + (1, '2021-07-12', '2023-02-18', '1995-02-24', 'Female', 'Black', 'Student', 0.00, 'abcde@gmail.com', '123-456-789'), + (1, '2023-02-19', '9999-12-31', '1995-02-24', 'Female', 'Black', 'Pianist', 2000.00, 'abcde@gmail.com', '123-456-789'), + (2, '2023-12-07', '2023-12-07', '1999-12-12', 'Male', 'Asian', null, null, null, null), + (2, '2023-12-08', '2023-12-31', '1999-12-12', 'Male', 'Asian', 'Paramedic', 4000.00, null, '01234 567890'), + (2, '2024-01-01', '9999-12-31', '1999-12-12', 'Male', 'Asian', 'Paramedic', 4000.00, 'something@mail.net', '0300 123 456') +) +``` diff --git a/docs/challenging-sql-problems/problems/bronze/combining-dimensions.md b/docs/challenging-sql-problems/problems/bronze/combining-dimensions.md new file mode 100644 index 0000000..09c0ddd --- /dev/null +++ b/docs/challenging-sql-problems/problems/bronze/combining-dimensions.md @@ -0,0 +1,94 @@ +# Combining dimensions 🔗 + +> [!SUCCESS] Scenario +> +> A human resources department has several dimension tables with information about their employees. +> +> For reporting purposes, they need to combine these dimensions into a single dimension table which shows all the information about each employee. + +> [!QUESTION] +> +> Write a query which combines the three dimension tables below into a single dimension table. +> +> The output should have the columns from the three tables combined: +> +> - `employee_id` +> - `valid_from` +> - `valid_until` +> - `date_of_birth` +> - `gender` +> - `ethnicity` +> - `job_title` +> - `salary` +> - `email` +> - `phone` +> +> The number of rows in the output depends on the number of changes that need to be accounted for, and the `valid_from` and `valid_until` columns should be recalculated as necessary to account for these changes. +> +> Order the output by `employee_id` and `valid_from`. + +
+Expand for the DDL +--8<-- "docs/challenging-sql-problems/problems/bronze/combining-dimensions.sql" +
+ +The "dimensions" in this context are [Slowly Changing Dimensions](https://en.wikipedia.org/wiki/Slowly_changing_dimension) of [type 2](https://en.wikipedia.org/wiki/Slowly_changing_dimension#Type_2:_add_new_row) from the [Star Schema modelling framework](https://en.wikipedia.org/wiki/Star_schema). + +The solution can be found at: + +- [combining-dimensions.md](../../solutions/bronze/combining-dimensions.md) + +--- + + +>? INFO: **Sample input** +> +> **Employee Demographics Dimension** +> +> | employee_id | valid_from | valid_until | date_of_birth | gender | ethnicity | +> |------------:|:-----------|:------------|:--------------|:-------|:----------| +> | 1 | 2021-07-12 | 9999-12-31 | 1995-02-24 | Female | Black | +> | 2 | 2023-12-07 | 9999-12-31 | 1999-12-12 | Male | Asian | +> +> **Employee Career Dimension** +> +> | employee_id | valid_from | valid_until | job_title | salary | +> |------------:|:-----------|:------------|:----------|--------:| +> | 1 | 2021-07-12 | 2023-02-18 | Student | 0.00 | +> | 1 | 2023-02-19 | 9999-12-31 | Pianist | 2000.00 | +> | 2 | 2023-12-08 | 9999-12-31 | Paramedic | 4000.00 | +> +> **Employee Contact Dimension** +> +> | employee_id | valid_from | valid_until | email | phone | +> |------------:|:-----------|:------------|:-------------------|:-------------| +> | 1 | 2021-07-12 | 9999-12-31 | abcde@gmail.com | 123-456-789 | +> | 2 | 2023-12-08 | 2023-12-31 | _null_ | 01234 567890 | +> | 2 | 2024-01-01 | 9999-12-31 | something@mail.net | 0300 123 456 | +> +--8<-- "docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-input.sql" + + +>? INFO: **Sample output** +> +> | employee_id | valid_from | valid_until | date_of_birth | gender | ethnicity | job_title | salary | email | phone | +> |------------:|:-----------|:------------|:--------------|:-------|:----------|:----------|--------:|:-------------------|:-------------| +> | 1 | 2021-07-12 | 2023-02-18 | 1995-02-24 | Female | Black | Student | 0.00 | abcde@gmail.com | 123-456-789 | +> | 1 | 2023-02-19 | 9999-12-31 | 1995-02-24 | Female | Black | Pianist | 2000.00 | abcde@gmail.com | 123-456-789 | +> | 2 | 2023-12-07 | 2023-12-07 | 1999-12-12 | Male | Asian | null | _null_ | _null_ | _null_ | +> | 2 | 2023-12-08 | 2023-12-31 | 1999-12-12 | Male | Asian | Paramedic | 4000.00 | _null_ | 01234 567890 | +> | 2 | 2024-01-01 | 9999-12-31 | 1999-12-12 | Male | Asian | Paramedic | 4000.00 | something@mail.net | 0300 123 456 | +> +--8<-- "docs/challenging-sql-problems/problems/bronze/combining-dimensions--sample-output.sql" + + +>? TIP: **Hint 1** +> +> The `valid_from` values in the output will be each of the distinct `valid_from` values from the three tables for the given employee, so start by constructing an "axis" which is the distinct combination of all `employee_id` and `valid_from` values across the three tables. + + +>? TIP: **Hint 2** +> +> For databases that support it, use [an `ASOF` join](../../../everything-about-joins/syntax/timestamp-joins.md) to add the latest information from each dimension to the "axis" for each `employee_id` and `valid_from` combination. +> +> For databases that don't support `ASOF` joins, calculate the `valid_until` values for the `valid_from` values in the "axis" and then join the dimension tables to the axis by where the axis `valid_from`-`valid_until` range in contained within the dimension `valid_from`-`valid_until` range. diff --git a/docs/challenging-sql-problems/problems/bronze/combining-dimensions.sql b/docs/challenging-sql-problems/problems/bronze/combining-dimensions.sql new file mode 100644 index 0000000..8e771bc --- /dev/null +++ b/docs/challenging-sql-problems/problems/bronze/combining-dimensions.sql @@ -0,0 +1,63 @@ +```sql +create table dim_employee_demographics ( + employee_id int, + valid_from date, + valid_until date, + date_of_birth date, + gender varchar, + ethnicity varchar, + primary key (employee_id, valid_from) +); +insert into dim_employee_demographics +values + (1, '2021-06-13', '2022-06-08', '2004-02-18', 'Female', 'Malaysian'), + (2, '2021-10-19', '9999-12-31', '1963-12-12', 'Female', 'Navajo'), + (3, '2022-01-29', '9999-12-31', '2000-10-17', 'Genderqueer', 'White'), + (4, '2022-04-28', '2024-03-12', '1987-12-13', 'Male', 'Black'), + (1, '2022-06-09', '9999-12-31', '2004-02-18', 'Non-binary', 'Malaysian'), + (5, '2022-08-31', '9999-12-31', '1999-09-10', 'Female', 'Asian'), + (4, '2024-03-13', '9999-12-31', '1987-12-13', 'Female', 'Black') +; + + +create table dim_employee_career ( + employee_id int, + valid_from date, + valid_until date, + job_title varchar, + salary decimal(10, 2), + primary key (employee_id, valid_from) +); +insert into dim_employee_career +values + (1, '2021-06-13', '2023-05-27', 'Teacher', 5000.00), + (1, '2023-05-28', '9999-12-31', 'Teacher', 6000.00), + (2, '2021-10-19', '2023-11-26', 'Data Analyst', 4000.00), + (2, '2023-11-27', '2024-03-01', 'Data Analyst', 6500.00), + (2, '2024-03-02', '9999-12-31', 'Engineering Manager', 7000.00), + (3, '2022-01-29', '2023-04-02', 'Software Engineer', 6000.00), + (3, '2023-04-03', '9999-12-31', 'Software Engineer', 8000.00), + (4, '2022-06-12', '9999-12-31', 'Founder', null) +; + + +create table dim_employee_contact ( + employee_id int, + valid_from date, + valid_until date, + email varchar, + phone varchar, + primary key (employee_id, valid_from) +); +insert into dim_employee_contact +values + (1, '2021-06-13', '2024-01-29', 'c.perot0@gmail.com', null), + (1, '2024-01-30', '9999-12-31', 'c.perot0@gmail.com', '1986474151'), + (2, '2021-10-19', '2024-04-04', null, null), + (2, '2024-04-05', '9999-12-31', 'hpicard1@bing.com', null), + (4, '2022-06-12', '2022-12-01', 'tbayford3@hotmail.co.uk', '01246 209863'), + (4, '2022-12-02', '2023-11-11', 'tbayford3@hotmail.co.uk', '01752 492269'), + (4, '2023-11-12', '9999-12-31', 'tmacalinden@hotmail.co.uk', '01270 530950'), + (5, '2023-02-17', '9999-12-31', null, null) +; +``` diff --git a/docs/challenging-sql-problems/solutions/bronze/combining-dimensions--duckdb.sql b/docs/challenging-sql-problems/solutions/bronze/combining-dimensions--duckdb.sql new file mode 100644 index 0000000..f7451f4 --- /dev/null +++ b/docs/challenging-sql-problems/solutions/bronze/combining-dimensions--duckdb.sql @@ -0,0 +1,40 @@ +```sql +with axis as ( + select employee_id, valid_from + from dim_employee_demographics + union + select employee_id, valid_from + from dim_employee_career + union + select employee_id, valid_from + from dim_employee_contact +) + +select + employee_id, + valid_from, + lead(valid_from - 1, 1, '9999-12-31') over ( + partition by employee_id + order by valid_from + ) as valid_until, + + dim_employee_demographics.date_of_birth, + dim_employee_demographics.gender, + dim_employee_demographics.ethnicity, + + dim_employee_career.job_title, + dim_employee_career.salary, + + dim_employee_contact.email, + dim_employee_contact.phone +from axis + asof left join dim_employee_demographics + using (employee_id, valid_from) + asof left join dim_employee_career + using (employee_id, valid_from) + asof left join dim_employee_contact + using (employee_id, valid_from) +order by + employee_id, + valid_from +``` diff --git a/docs/challenging-sql-problems/solutions/bronze/combining-dimensions.md b/docs/challenging-sql-problems/solutions/bronze/combining-dimensions.md new file mode 100644 index 0000000..86ca1d2 --- /dev/null +++ b/docs/challenging-sql-problems/solutions/bronze/combining-dimensions.md @@ -0,0 +1,45 @@ +# Combining dimensions 🔗 + +> [!TIP] +> +> Solution to the following problem: +> +> - [combining-dimensions.md](../../problems/bronze/combining-dimensions.md) + +## Result Set + +Regardless of the database, the result set should look like: + +| employee_id | valid_from | valid_until | date_of_birth | gender | ethnicity | job_title | salary | email | phone | +| ----------: | :--------- | :---------- | :------------ | :---------- | :-------- | :------------------ | ------: | :------------------------ | :----------- | +| 1 | 2021-06-13 | 2022-06-08 | 2004-02-18 | Female | Malaysian | Teacher | 5000.00 | c.perot0@gmail.com | _null_ | +| 1 | 2022-06-09 | 2023-05-27 | 2004-02-18 | Non-binary | Malaysian | Teacher | 5000.00 | c.perot0@gmail.com | _null_ | +| 1 | 2023-05-28 | 2024-01-29 | 2004-02-18 | Non-binary | Malaysian | Teacher | 6000.00 | c.perot0@gmail.com | _null_ | +| 1 | 2024-01-30 | 9999-12-31 | 2004-02-18 | Non-binary | Malaysian | Teacher | 6000.00 | c.perot0@gmail.com | 1986474151 | +| 2 | 2021-10-19 | 2023-11-26 | 1963-12-12 | Female | Navajo | Data Analyst | 4000.00 | _null_ | _null_ | +| 2 | 2023-11-27 | 2024-03-01 | 1963-12-12 | Female | Navajo | Data Analyst | 6500.00 | _null_ | _null_ | +| 2 | 2024-03-02 | 2024-04-04 | 1963-12-12 | Female | Navajo | Engineering Manager | 7000.00 | _null_ | _null_ | +| 2 | 2024-04-05 | 9999-12-31 | 1963-12-12 | Female | Navajo | Engineering Manager | 7000.00 | hpicard1@bing.com | _null_ | +| 3 | 2022-01-29 | 2023-04-02 | 2000-10-17 | Genderqueer | White | Software Engineer | 6000.00 | _null_ | _null_ | +| 3 | 2023-04-03 | 9999-12-31 | 2000-10-17 | Genderqueer | White | Software Engineer | 8000.00 | _null_ | _null_ | +| 4 | 2022-04-28 | 2022-06-11 | 1987-12-13 | Male | Black | _null_ | _null_ | _null_ | _null_ | +| 4 | 2022-06-12 | 2022-12-01 | 1987-12-13 | Male | Black | Founder | _null_ | tbayford3@hotmail.co.uk | 01246 209863 | +| 4 | 2022-12-02 | 2023-11-11 | 1987-12-13 | Male | Black | Founder | _null_ | tbayford3@hotmail.co.uk | 01752 492269 | +| 4 | 2023-11-12 | 2024-03-12 | 1987-12-13 | Male | Black | Founder | _null_ | tmacalinden@hotmail.co.uk | 01270 530950 | +| 4 | 2024-03-13 | 9999-12-31 | 1987-12-13 | Female | Black | Founder | _null_ | tmacalinden@hotmail.co.uk | 01270 530950 | +| 5 | 2022-08-31 | 2023-02-16 | 1999-09-10 | Female | Asian | _null_ | _null_ | _null_ | _null_ | +| 5 | 2023-02-17 | 9999-12-31 | 1999-09-10 | Female | Asian | _null_ | _null_ | _null_ | _null_ | + +
+Expand for the DDL +--8<-- "docs/challenging-sql-problems/solutions/bronze/combining-dimensions.sql" +
+ +## Solution + +Some SQL solutions per database are provided below. + + +> SUCCESS: **DuckDB** +> +--8<-- "docs/challenging-sql-problems/solutions/bronze/combining-dimensions--duckdb.sql" diff --git a/docs/challenging-sql-problems/solutions/bronze/combining-dimensions.sql b/docs/challenging-sql-problems/solutions/bronze/combining-dimensions.sql new file mode 100644 index 0000000..ced8d8b --- /dev/null +++ b/docs/challenging-sql-problems/solutions/bronze/combining-dimensions.sql @@ -0,0 +1,22 @@ +```sql +solution(employee_id, valid_from, valid_until, date_of_birth, gender, ethnicity, job_title, salary, email, phone) as ( + values + (1, '2021-06-13', '2022-06-08', '2004-02-18', 'Female', 'Malaysian', 'Teacher', 5000.00, 'c.perot0@gmail.com', null), + (1, '2022-06-09', '2023-05-27', '2004-02-18', 'Non-binary', 'Malaysian', 'Teacher', 5000.00, 'c.perot0@gmail.com', null), + (1, '2023-05-28', '2024-01-29', '2004-02-18', 'Non-binary', 'Malaysian', 'Teacher', 6000.00, 'c.perot0@gmail.com', null), + (1, '2024-01-30', '9999-12-31', '2004-02-18', 'Non-binary', 'Malaysian', 'Teacher', 6000.00, 'c.perot0@gmail.com', '1986474151'), + (2, '2021-10-19', '2023-11-26', '1963-12-12', 'Female', 'Navajo', 'Data Analyst', 4000.00, null, null), + (2, '2023-11-27', '2024-03-01', '1963-12-12', 'Female', 'Navajo', 'Data Analyst', 6500.00, null, null), + (2, '2024-03-02', '2024-04-04', '1963-12-12', 'Female', 'Navajo', 'Engineering Manager', 7000.00, null, null), + (2, '2024-04-05', '9999-12-31', '1963-12-12', 'Female', 'Navajo', 'Engineering Manager', 7000.00, 'hpicard1@bing.com', null), + (3, '2022-01-29', '2023-04-02', '2000-10-17', 'Genderqueer', 'White', 'Software Engineer', 6000.00, null, null), + (3, '2023-04-03', '9999-12-31', '2000-10-17', 'Genderqueer', 'White', 'Software Engineer', 8000.00, null, null), + (4, '2022-04-28', '2022-06-11', '1987-12-13', 'Male', 'Black', null, null, null, null), + (4, '2022-06-12', '2022-12-01', '1987-12-13', 'Male', 'Black', 'Founder', null, 'tbayford3@hotmail.co.uk', '01246 209863'), + (4, '2022-12-02', '2023-11-11', '1987-12-13', 'Male', 'Black', 'Founder', null, 'tbayford3@hotmail.co.uk', '01752 492269'), + (4, '2023-11-12', '2024-03-12', '1987-12-13', 'Male', 'Black', 'Founder', null, 'tmacalinden@hotmail.co.uk', '01270 530950'), + (4, '2024-03-13', '9999-12-31', '1987-12-13', 'Female', 'Black', 'Founder', null, 'tmacalinden@hotmail.co.uk', '01270 530950'), + (5, '2022-08-31', '2023-02-16', '1999-09-10', 'Female', 'Asian', null, null, null, null), + (5, '2023-02-17', '9999-12-31', '1999-09-10', 'Female', 'Asian', null, null, null, null) +) +``` diff --git a/mkdocs.yml b/mkdocs.yml index 938c2e6..d5f642d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -79,6 +79,7 @@ nav: - challenging-sql-problems/problems/bronze/temperature-anomaly-detection.md # window functions - challenging-sql-problems/problems/bronze/personalised-customer-emails.md # string similarity - challenging-sql-problems/problems/bronze/suspicious-login-activity.md # window functions (gaps and islands) + - challenging-sql-problems/problems/bronze/combining-dimensions.md # asof join - challenging-sql-problems/problems/bronze/customer-churn.md # bit-shifting - challenging-sql-problems/problems/bronze/fibonacci-sequence.md # recursive CTE - challenging-sql-problems/problems/bronze/uk-bank-holidays.md # unpivot and unnest @@ -99,6 +100,7 @@ nav: - challenging-sql-problems/solutions/bronze/temperature-anomaly-detection.md - challenging-sql-problems/solutions/bronze/personalised-customer-emails.md - challenging-sql-problems/solutions/bronze/suspicious-login-activity.md + - challenging-sql-problems/solutions/bronze/combining-dimensions.md - challenging-sql-problems/solutions/bronze/customer-churn.md - challenging-sql-problems/solutions/bronze/fibonacci-sequence.md - challenging-sql-problems/solutions/bronze/uk-bank-holidays.md