Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support quoting columns in inferred schemas #337

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,28 @@ sources:
- last_name
- email

- name: users_csv_unpartitioned
external: &csv-users
location: '@{{ target.schema }}.dbt_external_tables_testing/csv'
file_format: '( type = csv skip_header = 1 )'
columns: &cols-of-the-users
- name: user.id
data_type: int
- name: user.first_name
data_type: varchar(64)
- name: user.last_name
data_type: varchar(64)
- name: user.email
data_type: varchar(64)
tests: &equal-to-the-users
- dbt_utils.equality:
compare_model: ref('users')
compare_columns:
- user.id
- user.first_name
- user.last_name
- user.email

- name: people_csv_partitioned
external:
<<: *csv-people
Expand All @@ -40,13 +62,31 @@ sources:
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_csv_partitioned
external:
<<: *csv-users
auto_refresh: false # make sure this templates right
partitions: &parts-of-the-users
- name: section
data_type: varchar
expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)"
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_json_unpartitioned
external: &json-people
location: '@{{ target.schema }}.dbt_external_tables_testing/json'
file_format: '( type = json )'
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_json_unpartitioned
external: &json-users
location: '@{{ target.schema }}.dbt_external_tables_testing/json'
file_format: '( type = json )'
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_json_partitioned
external:
<<: *json-people
Expand Down Expand Up @@ -123,6 +163,13 @@ sources:
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_parquet_column_list_unpartitioned
external: &parquet-users
location: '@{{ target.schema }}.dbt_external_tables_testing/parquet/'
file_format: '{{ target.schema }}.dbt_external_tables_testing_parquet'
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_parquet_column_list_partitioned
external:
<<: *parquet-people
Expand All @@ -136,13 +183,26 @@ sources:
infer_schema: true
tests: *equal-to-the-people

- name: users_parquet_infer_schema_unpartitioned_quoted
external:
<<: *parquet-users
infer_schema: 'quote'
tests: *equal-to-the-users

- name: people_parquet_infer_schema_partitioned
external:
<<: *parquet-people
partitions: *parts-of-the-people
infer_schema: true
tests: *equal-to-the-people

- name: users_parquet_infer_schema_partitioned
external:
<<: *parquet-users
partitions: *parts-of-the-users
infer_schema: 'quote'
tests: *equal-to-the-users

- name: people_parquet_infer_schema_partitioned_and_column_desc
external:
<<: *parquet-people
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
201 changes: 201 additions & 0 deletions integration_tests/seeds/users.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
user.id,user.first_name,user.last_name,user.email
1,Jack,Hunter,[email protected]
2,Kathryn,Walker,[email protected]
3,Gerald,Ryan,[email protected]
4,Bonnie,Spencer,[email protected]
5,Harold,Taylor,[email protected]
6,Jacqueline,Griffin,[email protected]
7,Wanda,Arnold,[email protected]
8,Craig,Ortiz,[email protected]
9,Gary,Day,[email protected]
10,Rose,Wright,[email protected]
11,Raymond,Kelley,[email protected]
12,Gerald,Robinson,[email protected]
13,Mildred,Martinez,[email protected]
14,Dennis,Arnold,[email protected]
15,Judy,Gray,[email protected]
16,Theresa,Garza,[email protected]
17,Gerald,Robertson,[email protected]
18,Philip,Hernandez,[email protected]
19,Julia,Gonzalez,[email protected]
20,Andrew,Davis,[email protected]
21,Kimberly,Harper,[email protected]
22,Mark,Martin,[email protected]
23,Cynthia,Ruiz,[email protected]
24,Samuel,Carroll,[email protected]
25,Jennifer,Larson,[email protected]
26,Ashley,Perry,[email protected]
27,Howard,Rodriguez,[email protected]
28,Amy,Brooks,[email protected]
29,Louise,Warren,[email protected]
30,Tina,Watson,[email protected]
31,Janice,Kelley,[email protected]
32,Terry,Mccoy,[email protected]
33,Jeffrey,Morgan,[email protected]
34,Louis,Harvey,[email protected]
35,Philip,Miller,[email protected]
36,Willie,Marshall,[email protected]
37,Patrick,Lopez,[email protected]
38,Adam,Jenkins,[email protected]
39,Benjamin,Cruz,[email protected]
40,Ruby,Hawkins,[email protected]
41,Carlos,Barnes,[email protected]
42,Ruby,Griffin,[email protected]
43,Sean,Mason,[email protected]
44,Anthony,Payne,[email protected]
45,Steve,Cruz,[email protected]
46,Anthony,Garcia,[email protected]
47,Doris,Lopez,[email protected]
48,Susan,Nichols,[email protected]
49,Wanda,Ferguson,[email protected]
50,Andrea,Pierce,[email protected]
51,Lawrence,Phillips,[email protected]
52,Judy,Gilbert,[email protected]
53,Eric,Williams,[email protected]
54,Ralph,Romero,[email protected]
55,Jean,Wilson,[email protected]
56,Lori,Reynolds,[email protected]
57,Donald,Moreno,[email protected]
58,Steven,Berry,[email protected]
59,Theresa,Shaw,[email protected]
60,John,Stephens,[email protected]
61,Richard,Jacobs,[email protected]
62,Andrew,Lawson,[email protected]
63,Peter,Morgan,[email protected]
64,Nicole,Garrett,[email protected]
65,Joshua,Kim,[email protected]
66,Ralph,Roberts,[email protected]
67,George,Montgomery,[email protected]
68,Gerald,Alvarez,[email protected]
69,Donald,Olson,[email protected]
70,Carlos,Morgan,[email protected]
71,Aaron,Stanley,[email protected]
72,Virginia,Long,[email protected]
73,Robert,Berry,[email protected]
74,Antonio,Brooks,[email protected]
75,Ruby,Garcia,[email protected]
76,Jack,Hanson,[email protected]
77,Kathryn,Nelson,[email protected]
78,Jason,Reed,[email protected]
79,George,Coleman,[email protected]
80,Rose,King,[email protected]
81,Johnny,Holmes,[email protected]
82,Katherine,Gilbert,[email protected]
83,Joshua,Thomas,[email protected]
84,Julie,Perry,[email protected]
85,Richard,Perry,[email protected]
86,Kenneth,Ruiz,[email protected]
87,Jose,Morgan,[email protected]
88,Donald,Campbell,[email protected]
89,Debra,Collins,[email protected]
90,Jesse,Johnson,[email protected]
91,Elizabeth,Stone,[email protected]
92,Angela,Rogers,[email protected]
93,Emily,Dixon,[email protected]
94,Albert,Scott,[email protected]
95,Barbara,Peterson,[email protected]
96,Adam,Greene,[email protected]
97,Earl,Sanders,[email protected]
98,Angela,Brooks,[email protected]
99,Harold,Foster,[email protected]
100,Carl,Meyer,[email protected]
101,Michael,Perez,[email protected]
102,Shawn,Mccoy,[email protected]
103,Kathleen,Payne,[email protected]
104,Jimmy,Cooper,[email protected]
105,Katherine,Rice,[email protected]
106,Sarah,Ryan,[email protected]
107,Martin,Mcdonald,[email protected]
108,Frank,Robinson,[email protected]
109,Jennifer,Franklin,[email protected]
110,Henry,Welch,[email protected]
111,Fred,Snyder,[email protected]
112,Amy,Dunn,[email protected]
113,Kathleen,Meyer,[email protected]
114,Steve,Ferguson,[email protected]
115,Teresa,Hill,[email protected]
116,Amanda,Harper,[email protected]
117,Kimberly,Ray,[email protected]
118,Johnny,Knight,[email protected]
119,Virginia,Freeman,[email protected]
120,Anna,Austin,[email protected]
121,Willie,Hill,[email protected]
122,Sean,Harris,[email protected]
123,Mildred,Adams,[email protected]
124,David,Graham,[email protected]
125,Victor,Hunter,[email protected]
126,Aaron,Ruiz,[email protected]
127,Benjamin,Brooks,[email protected]
128,Lisa,Wilson,[email protected]
129,Benjamin,King,[email protected]
130,Christina,Williamson,[email protected]
131,Jane,Gonzalez,[email protected]
132,Thomas,Owens,[email protected]
133,Katherine,Moore,[email protected]
134,Jennifer,Stewart,[email protected]
135,Sara,Tucker,[email protected]
136,Harold,Ortiz,[email protected]
137,Shirley,James,[email protected]
138,Dennis,Johnson,[email protected]
139,Louise,Weaver,[email protected]
140,Maria,Armstrong,[email protected]
141,Gloria,Cruz,[email protected]
142,Diana,Spencer,[email protected]
143,Kelly,Nguyen,[email protected]
144,Jane,Rodriguez,[email protected]
145,Scott,Brown,[email protected]
146,Norma,Cruz,[email protected]
147,Marie,Peters,[email protected]
148,Lillian,Carr,[email protected]
149,Judy,Nichols,[email protected]
150,Billy,Long,[email protected]
151,Howard,Reid,[email protected]
152,Laura,Ferguson,[email protected]
153,Anne,Bailey,[email protected]
154,Rose,Morgan,[email protected]
155,Nicholas,Reyes,[email protected]
156,Joshua,Kennedy,[email protected]
157,Paul,Watkins,[email protected]
158,Kathryn,Kelly,[email protected]
159,Adam,Armstrong,[email protected]
160,Norma,Wallace,[email protected]
161,Timothy,Reyes,[email protected]
162,Elizabeth,Patterson,[email protected]
163,Edward,Gomez,[email protected]
164,David,Cox,[email protected]
165,Brenda,Wood,[email protected]
166,Adam,Walker,[email protected]
167,Michael,Hart,[email protected]
168,Jesse,Ellis,[email protected]
169,Janet,Powell,[email protected]
170,Helen,Ford,[email protected]
171,Gerald,Carpenter,[email protected]
172,Kathryn,Oliver,[email protected]
173,Alan,Berry,[email protected]
174,Harry,Andrews,[email protected]
175,Andrea,Hall,[email protected]
176,Barbara,Wells,[email protected]
177,Anne,Wells,[email protected]
178,Harry,Harper,[email protected]
179,Jack,Ray,[email protected]
180,Phillip,Hamilton,[email protected]
181,Shirley,Hunter,[email protected]
182,Arthur,Daniels,[email protected]
183,Virginia,Rodriguez,[email protected]
184,Christina,Ryan,[email protected]
185,Theresa,Mendoza,[email protected]
186,Jason,Cole,[email protected]
187,Phillip,Bryant,[email protected]
188,Adam,Torres,[email protected]
189,Margaret,Johnston,[email protected]
190,Paul,Payne,[email protected]
191,Todd,Willis,[email protected]
192,Willie,Oliver,[email protected]
193,Frances,Robertson,[email protected]
194,Gregory,Hawkins,[email protected]
195,Lisa,Perkins,[email protected]
196,Jacqueline,Anderson,[email protected]
197,Shirley,Diaz,[email protected]
198,Nicole,Meyer,[email protected]
199,Mary,Gray,[email protected]
200,Jean,Mcdonald,[email protected]
7 changes: 4 additions & 3 deletions macros/plugins/snowflake/create_external_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,16 @@
{% endfor %}
{% else %}
{%- for column in columns_infer %}
{%- set column_quoted = adapter.quote(column[0]) if infer_schema == 'quote' else column[0] %}
{%- set col_expression -%}
{%- if ignore_case -%}
{%- set col_id = 'GET_IGNORE_CASE($1, ' ~ "'"~ column[0] ~"'"~ ')' -%}
{%- set col_id = 'GET_IGNORE_CASE($1, ' ~ "'"~ column_quoted ~"'"~ ')' -%}
{%- else -%}
{%- set col_id = 'value:' ~ column[0] -%}
{%- set col_id = 'value:' ~ column_quoted -%}
{%- endif -%}
(case when is_null_value({{col_id}}) or lower({{col_id}}) = 'null' then null else {{col_id}} end)
{%- endset %}
{{column[0]}} {{column[1]}} as ({{col_expression}}::{{column[1]}})
{{column_quoted}} {{column[1]}} as ({{col_expression}}::{{column[1]}})
{{- ',' if not loop.last -}}
{% endfor %}
{%- endif -%}
Expand Down
18 changes: 18 additions & 0 deletions sample_sources/snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ sources:
- name: name
description: and this is a name

- name: parquet_with_inferred_schema_and_quoted_columns
description: "External table using Parquet, inferring the schema, quoting the columns"
external:
location: "@stage" # reference an existing external stage
file_format: "my_file_format" # we need a named file format for infer to work
infer_schema: "quote" # infer the table schema and quote the columns
partitions:
- name: section # we can define partitions on top of the schema columns
data_type: varchar(64)
expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)"
columns: # columns can still be listed for documentation/testing purpose
- name: user.id
description: this is a user id
quote: true
- name: user.name
description: and this is a user name
quote: true

- name: aws_sns_refresh_tbl
description: "External table using AWS SNS for auto-refresh"
external:
Expand Down
Loading