Skip to content

Commit

Permalink
Support quoting columns in inferred schemas
Browse files Browse the repository at this point in the history
If `infer_schema` is set to `quote`, then the columns will be quoted.
This is useful in scenarios e.g., where the schemas are generated from
nested JSON objects.
  • Loading branch information
shaug committed Dec 23, 2024
1 parent 0e778b3 commit ff99933
Show file tree
Hide file tree
Showing 8 changed files with 283 additions and 3 deletions.
60 changes: 60 additions & 0 deletions integration_tests/models/plugins/snowflake/snowflake_external.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,28 @@ sources:
- last_name
- email

- name: users_csv_unpartitioned
external: &csv-users
location: '@{{ target.schema }}.dbt_external_tables_testing/csv'
file_format: '( type = csv skip_header = 1 )'
columns: &cols-of-the-users
- name: user.id
data_type: int
- name: user.first_name
data_type: varchar(64)
- name: user.last_name
data_type: varchar(64)
- name: user.email
data_type: varchar(64)
tests: &equal-to-the-users
- dbt_utils.equality:
compare_model: ref('users')
compare_columns:
- user.id
- user.first_name
- user.last_name
- user.email

- name: people_csv_partitioned
external:
<<: *csv-people
Expand All @@ -40,13 +62,31 @@ sources:
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_csv_partitioned
external:
<<: *csv-users
auto_refresh: false # make sure this templates right
partitions: &parts-of-the-users
- name: section
data_type: varchar
expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)"
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_json_unpartitioned
external: &json-people
location: '@{{ target.schema }}.dbt_external_tables_testing/json'
file_format: '( type = json )'
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_json_unpartitioned
external: &json-users
location: '@{{ target.schema }}.dbt_external_tables_testing/json'
file_format: '( type = json )'
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_json_partitioned
external:
<<: *json-people
Expand Down Expand Up @@ -123,6 +163,13 @@ sources:
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_parquet_column_list_unpartitioned
external: &parquet-users
location: '@{{ target.schema }}.dbt_external_tables_testing/parquet/'
file_format: '{{ target.schema }}.dbt_external_tables_testing_parquet'
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_parquet_column_list_partitioned
external:
<<: *parquet-people
Expand All @@ -136,13 +183,26 @@ sources:
infer_schema: true
tests: *equal-to-the-people

- name: users_parquet_infer_schema_unpartitioned_quoted
external:
<<: *parquet-users
infer_schema: 'quote'
tests: *equal-to-the-users

- name: people_parquet_infer_schema_partitioned
external:
<<: *parquet-people
partitions: *parts-of-the-people
infer_schema: true
tests: *equal-to-the-people

- name: users_parquet_infer_schema_partitioned
external:
<<: *parquet-users
partitions: *parts-of-the-users
infer_schema: 'quote'
tests: *equal-to-the-users

- name: people_parquet_infer_schema_partitioned_and_column_desc
external:
<<: *parquet-people
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
201 changes: 201 additions & 0 deletions integration_tests/seeds/users.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
user.id,user.first_name,user.last_name,user.email
1,Jack,Hunter,[email protected]
2,Kathryn,Walker,[email protected]
3,Gerald,Ryan,[email protected]
4,Bonnie,Spencer,[email protected]
5,Harold,Taylor,[email protected]
6,Jacqueline,Griffin,[email protected]
7,Wanda,Arnold,[email protected]
8,Craig,Ortiz,[email protected]
9,Gary,Day,[email protected]
10,Rose,Wright,[email protected]
11,Raymond,Kelley,[email protected]
12,Gerald,Robinson,[email protected]
13,Mildred,Martinez,[email protected]
14,Dennis,Arnold,[email protected]
15,Judy,Gray,[email protected]
16,Theresa,Garza,[email protected]
17,Gerald,Robertson,[email protected]
18,Philip,Hernandez,[email protected]
19,Julia,Gonzalez,[email protected]
20,Andrew,Davis,[email protected]
21,Kimberly,Harper,[email protected]
22,Mark,Martin,[email protected]
23,Cynthia,Ruiz,[email protected]
24,Samuel,Carroll,[email protected]
25,Jennifer,Larson,[email protected]
26,Ashley,Perry,[email protected]
27,Howard,Rodriguez,[email protected]
28,Amy,Brooks,[email protected]
29,Louise,Warren,[email protected]
30,Tina,Watson,[email protected]
31,Janice,Kelley,[email protected]
32,Terry,Mccoy,[email protected]
33,Jeffrey,Morgan,[email protected]
34,Louis,Harvey,[email protected]
35,Philip,Miller,[email protected]
36,Willie,Marshall,[email protected]
37,Patrick,Lopez,[email protected]
38,Adam,Jenkins,[email protected]
39,Benjamin,Cruz,[email protected]
40,Ruby,Hawkins,[email protected]
41,Carlos,Barnes,[email protected]
42,Ruby,Griffin,[email protected]
43,Sean,Mason,[email protected]
44,Anthony,Payne,[email protected]
45,Steve,Cruz,[email protected]
46,Anthony,Garcia,[email protected]
47,Doris,Lopez,[email protected]
48,Susan,Nichols,[email protected]
49,Wanda,Ferguson,[email protected]
50,Andrea,Pierce,[email protected]
51,Lawrence,Phillips,[email protected]
52,Judy,Gilbert,[email protected]
53,Eric,Williams,[email protected]
54,Ralph,Romero,[email protected]
55,Jean,Wilson,[email protected]
56,Lori,Reynolds,[email protected]
57,Donald,Moreno,[email protected]
58,Steven,Berry,[email protected]
59,Theresa,Shaw,[email protected]
60,John,Stephens,[email protected]
61,Richard,Jacobs,[email protected]
62,Andrew,Lawson,[email protected]
63,Peter,Morgan,[email protected]
64,Nicole,Garrett,[email protected]
65,Joshua,Kim,[email protected]
66,Ralph,Roberts,[email protected]
67,George,Montgomery,[email protected]
68,Gerald,Alvarez,[email protected]
69,Donald,Olson,[email protected]
70,Carlos,Morgan,[email protected]
71,Aaron,Stanley,[email protected]
72,Virginia,Long,[email protected]
73,Robert,Berry,[email protected]
74,Antonio,Brooks,[email protected]
75,Ruby,Garcia,[email protected]
76,Jack,Hanson,[email protected]
77,Kathryn,Nelson,[email protected]
78,Jason,Reed,[email protected]
79,George,Coleman,[email protected]
80,Rose,King,[email protected]
81,Johnny,Holmes,[email protected]
82,Katherine,Gilbert,[email protected]
83,Joshua,Thomas,[email protected]
84,Julie,Perry,[email protected]
85,Richard,Perry,[email protected]
86,Kenneth,Ruiz,[email protected]
87,Jose,Morgan,[email protected]
88,Donald,Campbell,[email protected]
89,Debra,Collins,[email protected]
90,Jesse,Johnson,[email protected]
91,Elizabeth,Stone,[email protected]
92,Angela,Rogers,[email protected]
93,Emily,Dixon,[email protected]
94,Albert,Scott,[email protected]
95,Barbara,Peterson,[email protected]
96,Adam,Greene,[email protected]
97,Earl,Sanders,[email protected]
98,Angela,Brooks,[email protected]
99,Harold,Foster,[email protected]
100,Carl,Meyer,[email protected]
101,Michael,Perez,[email protected]
102,Shawn,Mccoy,[email protected]
103,Kathleen,Payne,[email protected]
104,Jimmy,Cooper,[email protected]
105,Katherine,Rice,[email protected]
106,Sarah,Ryan,[email protected]
107,Martin,Mcdonald,[email protected]
108,Frank,Robinson,[email protected]
109,Jennifer,Franklin,[email protected]
110,Henry,Welch,[email protected]
111,Fred,Snyder,[email protected]
112,Amy,Dunn,[email protected]
113,Kathleen,Meyer,[email protected]
114,Steve,Ferguson,[email protected]
115,Teresa,Hill,[email protected]
116,Amanda,Harper,[email protected]
117,Kimberly,Ray,[email protected]
118,Johnny,Knight,[email protected]
119,Virginia,Freeman,[email protected]
120,Anna,Austin,[email protected]
121,Willie,Hill,[email protected]
122,Sean,Harris,[email protected]
123,Mildred,Adams,[email protected]
124,David,Graham,[email protected]
125,Victor,Hunter,[email protected]
126,Aaron,Ruiz,[email protected]
127,Benjamin,Brooks,[email protected]
128,Lisa,Wilson,[email protected]
129,Benjamin,King,[email protected]
130,Christina,Williamson,[email protected]
131,Jane,Gonzalez,[email protected]
132,Thomas,Owens,[email protected]
133,Katherine,Moore,[email protected]
134,Jennifer,Stewart,[email protected]
135,Sara,Tucker,[email protected]
136,Harold,Ortiz,[email protected]
137,Shirley,James,[email protected]
138,Dennis,Johnson,[email protected]
139,Louise,Weaver,[email protected]
140,Maria,Armstrong,[email protected]
141,Gloria,Cruz,[email protected]
142,Diana,Spencer,[email protected]
143,Kelly,Nguyen,[email protected]
144,Jane,Rodriguez,[email protected]
145,Scott,Brown,[email protected]
146,Norma,Cruz,[email protected]
147,Marie,Peters,[email protected]
148,Lillian,Carr,[email protected]
149,Judy,Nichols,[email protected]
150,Billy,Long,[email protected]
151,Howard,Reid,[email protected]
152,Laura,Ferguson,[email protected]
153,Anne,Bailey,[email protected]
154,Rose,Morgan,[email protected]
155,Nicholas,Reyes,[email protected]
156,Joshua,Kennedy,[email protected]
157,Paul,Watkins,[email protected]
158,Kathryn,Kelly,[email protected]
159,Adam,Armstrong,[email protected]
160,Norma,Wallace,[email protected]
161,Timothy,Reyes,[email protected]
162,Elizabeth,Patterson,[email protected]
163,Edward,Gomez,[email protected]
164,David,Cox,[email protected]
165,Brenda,Wood,[email protected]
166,Adam,Walker,[email protected]
167,Michael,Hart,[email protected]
168,Jesse,Ellis,[email protected]
169,Janet,Powell,[email protected]
170,Helen,Ford,[email protected]
171,Gerald,Carpenter,[email protected]
172,Kathryn,Oliver,[email protected]
173,Alan,Berry,[email protected]
174,Harry,Andrews,[email protected]
175,Andrea,Hall,[email protected]
176,Barbara,Wells,[email protected]
177,Anne,Wells,[email protected]
178,Harry,Harper,[email protected]
179,Jack,Ray,[email protected]
180,Phillip,Hamilton,[email protected]
181,Shirley,Hunter,[email protected]
182,Arthur,Daniels,[email protected]
183,Virginia,Rodriguez,[email protected]
184,Christina,Ryan,[email protected]
185,Theresa,Mendoza,[email protected]
186,Jason,Cole,[email protected]
187,Phillip,Bryant,[email protected]
188,Adam,Torres,[email protected]
189,Margaret,Johnston,[email protected]
190,Paul,Payne,[email protected]
191,Todd,Willis,[email protected]
192,Willie,Oliver,[email protected]
193,Frances,Robertson,[email protected]
194,Gregory,Hawkins,[email protected]
195,Lisa,Perkins,[email protected]
196,Jacqueline,Anderson,[email protected]
197,Shirley,Diaz,[email protected]
198,Nicole,Meyer,[email protected]
199,Mary,Gray,[email protected]
200,Jean,Mcdonald,[email protected]
7 changes: 4 additions & 3 deletions macros/plugins/snowflake/create_external_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,16 @@
{% endfor %}
{% else %}
{%- for column in columns_infer %}
{%- set column_quoted = adapter.quote(column[0]) if infer_schema == 'quote' else column[0] %}
{%- set col_expression -%}
{%- if ignore_case -%}
{%- set col_id = 'GET_IGNORE_CASE($1, ' ~ "'"~ column[0] ~"'"~ ')' -%}
{%- set col_id = 'GET_IGNORE_CASE($1, ' ~ "'"~ column_quoted ~"'"~ ')' -%}
{%- else -%}
{%- set col_id = 'value:' ~ column[0] -%}
{%- set col_id = 'value:' ~ column_quoted -%}
{%- endif -%}
(case when is_null_value({{col_id}}) or lower({{col_id}}) = 'null' then null else {{col_id}} end)
{%- endset %}
{{column[0]}} {{column[1]}} as ({{col_expression}}::{{column[1]}})
{{column_quoted}} {{column[1]}} as ({{col_expression}}::{{column[1]}})
{{- ',' if not loop.last -}}
{% endfor %}
{%- endif -%}
Expand Down
18 changes: 18 additions & 0 deletions sample_sources/snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ sources:
- name: name
description: and this is a name

- name: parquet_with_inferred_schema_and_quoted_columns
description: "External table using Parquet, inferring the schema, quoting the columns"
external:
location: "@stage" # reference an existing external stage
file_format: "my_file_format" # we need a named file format for infer to work
infer_schema: "quote" # infer the table schema and quote the columns
partitions:
- name: section # we can define partitions on top of the schema columns
data_type: varchar(64)
expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)"
columns: # columns can still be listed for documentation/testing purpose
- name: user.id
description: this is a user id
quote: true
- name: user.name
description: and this is a user name
quote: true

- name: aws_sns_refresh_tbl
description: "External table using AWS SNS for auto-refresh"
external:
Expand Down

0 comments on commit ff99933

Please sign in to comment.