Skip to content

Commit

Permalink
Merge pull request #62 from rlsalcido24/updatedmappings
Browse files Browse the repository at this point in the history
updatedmappings
  • Loading branch information
techvaquero authored May 21, 2024
2 parents 964de90 + 34a83c9 commit 804352e
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 31 deletions.
5 changes: 0 additions & 5 deletions helper/_resources/config/snowflake/function_mappings.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@
"strtok_to_array": {"source_name": "strtok_to_array", "macro_name": "strtok_to_array"},
"to_number": {"source_name": "to_number", "macro_name": "to_number"},
"array_size": {"source_name": "array_size", "macro_name": "array_size"},
"dateadd": {"source_name": "dateadd", "macro_name": "dateadd"},
"dayname": {"source_name": "dayname", "macro_name": "dayname"},
"timestampadd": {"source_name": "timestampadd", "macro_name": "timestampadd"},
"week": {"source_name": "week", "macro_name": "week"},
"timediff": {"source_name": "timediff", "macro_name": "timediff"},
"date_from_parts": {"source_name": "date_from_parts", "macro_name": "date_from_parts"},
"monthname": {"source_name": "monthname", "macro_name": "monthname"},
"timestampdiff": {"source_name": "timestampdiff", "macro_name": "timestampdiff"},
"to_time": {"source_name": "to_time", "macro_name": "to_time"},
"timeadd": {"source_name": "timeadd", "macro_name": "timeadd"},
"try_to_number": {"source_name": "try_to_number", "macro_name": "try_to_number"},
"try_to_numeric": {"source_name": "try_to_numeric", "macro_name": "try_to_numeric"},
"to_decimal": {"source_name": "to_decimal", "macro_name": "to_decimal"},
Expand All @@ -25,7 +21,6 @@
"to_boolean": {"source_name": "to_boolean", "macro_name": "to_boolean"},
"to_array": {"source_name": "to_array", "macro_name": "to_array"},
"listagg": {"source_name": "listagg", "macro_name": "listagg"},
"getdate": {"source_name": "getdate", "macro_name": "snowflake_getdate"},
"array_to_string": {"source_name": "array_to_string", "macro_name": "array_to_string"},
"hll_estimate": {"source_name": "hll_estimate", "macro_name": "hll_estimate"},
"uniform": {"source_name": "uniform", "macro_name": "uniform"},
Expand Down
9 changes: 0 additions & 9 deletions helper/_resources/config/snowflake/syntax_mappings.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@
"from_utc_timestamp" : {"source_pattern": "from_utc_timestamp\\([^)]*?\\)",
"target_pattern": "from_utc_timestamp(to_utc_timestamp(#arg2, #arg0), #arg1)"
},
"getdate" : {"source_pattern": "getdate\\([^)]*?\\)",
"target_pattern": "current_timestamp"
},
"hex_decode_string" : {"source_pattern": "hex_decode_string\\([^)]*?\\)",
"target_pattern": "decode(unhex(#arg0), 'UTF-8')"
},
Expand Down Expand Up @@ -116,15 +113,9 @@
"timestamp_ntz_from_parts" : {"source_pattern": "timestamp_ntz_from_parts\\([^)]*?\\)",
"target_pattern": "cast((make_timestamp(#arg0, #arg1, #arg2, #arg3, #arg4, #arg5)as timestamp_ntz)"
},
"timediff" : {"source_pattern": "timediff\\([^)]*?\\)",
"target_pattern": "CASE WHEN lower(#arg0) = 'year' THEN EXTRACT(YEAR FROM #arg2) - EXTRACT(YEAR FROM #arg1) WHEN lower(#arg0) = 'month' THEN (EXTRACT(YEAR FROM #arg2) * 12 + EXTRACT(MONTH FROM #arg2)) - (EXTRACT(YEAR FROM #arg1) * 12 + EXTRACT(MONTH FROM #arg1)) WHEN lower(#arg0) = 'day' THEN datediff(CAST(#arg2 AS DATE), CAST(#arg1 AS DATE)) WHEN lower(#arg0) = 'hour' THEN EXTRACT(HOUR FROM #arg2) - EXTRACT(HOUR FROM #arg1) WHEN lower(#arg0) = 'minute' THEN (EXTRACT(HOUR FROM #arg2) * 60 + EXTRACT(MINUTE FROM #arg2)) - (EXTRACT(HOURs FROM #arg1) * 60 + EXTRACT(MINUTE FROM #arg1)) WHEN lower(#arg0) = 'second' THEN (EXTRACT(HOUR FROM #arg2) * 3600 + EXTRACT(MINUTE FROM #arg2) * 60 + EXTRACT(SECOND FROM #arg2)) - (EXTRACT(HOUR FROM #arg2) * 3600 + EXTRACT(MINUTE FROM #arg2) * 60 + EXTRACT(SECOND FROM #arg2)) END"
},
"timestampadd" : {"source_pattern": "timestampadd\\([^)]*?\\)",
"target_pattern": "CASE WHEN lower(#arg0) = 'year' THEN #arg2 + make_interval(#arg1) WHEN lower(#arg0) = 'month' THEN #arg2 + make_interval(0, #arg1) WHEN lower(#arg0) = 'day' THEN #arg2 + make_interval(0, 0, 0, #arg1) WHEN lower(#arg0) = 'hour' THEN #arg2 + make_interval(0, 0, 0, 0, #arg1) WHEN lower(#arg0) = 'minute' THEN #arg2 + make_interval(0, 0, 0, 0, 0, #arg1) WHEN lower(#arg0) = 'second' THEN #arg2 + make_interval(0, 0, 0, 0, 0, 0, #arg1) END"
},
"timestampdiff" : {"source_pattern": "timestampdiff\\([^)]*?\\)",
"target_pattern": "CASE WHEN lower(#arg0) = 'year' THEN EXTRACT(YEAR FROM #arg2) - EXTRACT(YEAR FROM #arg1) WHEN lower(#arg0) = 'month' THEN (EXTRACT(YEAR FROM #arg2) * 12 + EXTRACT(MONTH FROM #arg2)) - (EXTRACT(YEAR FROM #arg1) * 12 + EXTRACT(MONTH FROM #arg1)) WHEN lower(#arg0) = 'day' THEN datediff(CAST(#arg2 AS DATE), CAST(#arg1 AS DATE)) WHEN lower(#arg0) = 'hour' THEN EXTRACT(HOUR FROM #arg2) - EXTRACT(HOUR FROM #arg1) WHEN lower(#arg0) = 'minute' THEN (EXTRACT(HOUR FROM #arg2) * 60 + EXTRACT(MINUTE FROM #arg2)) - (EXTRACT(HOUR FROM #arg1) * 60 + EXTRACT(MINUTE FROM #arg1)) WHEN lower(#arg0) = 'second' THEN (EXTRACT(HOUR FROM #arg2) * 3600 + EXTRACT(MINUTE FROM #arg2) * 60 + EXTRACT(SECOND FROM #arg2)) - (EXTRACT(HOUR FROM #arg2) * 3600 + EXTRACT(MINUTE FROM #arg2) * 60 + EXTRACT(SECOND FROM #arg2)) END"
},
"to_array" : {"source_pattern": "to_array\\([^)]*?\\)",
"target_pattern": "array(#arg0)"
},
Expand Down
35 changes: 20 additions & 15 deletions helper/convert_to_databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ def findargs (contentstring, sourcepatterninit):
initlistgold = []
findfunction = re.findall(source_patterninit, content, flags= re.IGNORECASE)
funlength = len(findfunction)
if tmplogs == 'true':
print(f"findfunc prior to error: {findfunction}")
print(f"funlength patter prior to error: {funlength}")
if funlength > 0:
for i in range(funlength):
leftparen = findfunction[i].count("(")
Expand Down Expand Up @@ -263,8 +266,6 @@ def splitargstuple(finalparsedstrings, goldenargs, flag, sourcepattern):
platinumreplace = platinumreplace.replace(timeargslice, timeargnoquotes)
if platinumreplace.find("xmlget") > -1:
platinumreplace = '"xmlgetplaceholder"'
if noisylogs == 'true':
print(f'the val of platniumreplace is {platinumreplace}')
platinumreplace = platinumreplace.replace("\n", "")
platinumreplace = platinumreplace.replace(" ", "")
platinumtuple = eval(platinumreplace)
Expand Down Expand Up @@ -325,20 +326,22 @@ def finalcountdown(finaldf, contentstring, targetstring):
def finalcountdowndbt(finaldf, contentstring, targetmacroname):

updated_content = contentstring
enrichedargs = 'zzzplaceholderzzz'
for sourcesting, args in zip(finaldf["funcstring"], finaldf["args"]):
#findfirstparen = sourcesting.find("(")
substring = targetmacroname + "("
lowersubstring = substring.lower()
lowerargs = args.lower()
enrichedargs = "{{lakehouse_utils." + lowersubstring + lowerargs + ")}}"
lenarg = len(lowerargs)
lastcomma = lowerargs.rfind(",")
if lowerargs == "'',":
enrichedargs = "{{lakehouse_utils." + lowersubstring + ")}}"
elif lenarg == lastcomma + 1:
nocommarg = lowerargs[0:lenarg - 1]
enrichedargs = "{{lakehouse_utils." + lowersubstring + nocommarg + ")}}"
updated_content = updated_content.replace(sourcesting, enrichedargs)
alreadyenriched = updated_content.find(enrichedargs)
if alreadyenriched == -1:
substring = targetmacroname + "("
lowersubstring = substring.lower()
lowerargs = args.lower()
enrichedargs = "{{lakehouse_utils." + lowersubstring + lowerargs + ")}}"
lenarg = len(lowerargs)
lastcomma = lowerargs.rfind(",")
if lowerargs == "'',":
enrichedargs = "{{lakehouse_utils." + lowersubstring + ")}}"
elif lenarg == lastcomma + 1:
nocommarg = lowerargs[0:lenarg - 1]
enrichedargs = "{{lakehouse_utils." + lowersubstring + nocommarg + ")}}"
updated_content = updated_content.replace(sourcesting, enrichedargs)
return(updated_content)

## Function to find all sql files within a given directory
Expand Down Expand Up @@ -554,6 +557,8 @@ def convert_syntax_expressions(content: str, source_pattern: str, target_pattern
# todo eliminate some of this custom logic
source_patternuno = "json_extract_path_text\([^)]*\)"
inputsearchinit = re.findall(source_patternuno, content, flags= re.DOTALL | re.IGNORECASE)
if noisylogs == 'true':
print(f"inputsearchinit = {inputsearchinit}")
num_matches = len(inputsearchinit)
updated_content = content
for i in inputsearchinit:
Expand Down
2 changes: 1 addition & 1 deletion helper/pierunner.py.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# COMMAND ----------

# MAGIC %sh
# MAGIC python3 ./convert_to_databricks.py --sourcedb "snowflake" --dir_path "snowflake/" --parse_mode 'all' --parse_first 'functions' --onlypublishagg "true"
# MAGIC python3 ./convert_to_databricks.py --sourcedb "snowflake" --dir_path "snowflake/" --parse_mode 'functions' --onlypublishagg "true"

# COMMAND ----------

Expand Down
2 changes: 1 addition & 1 deletion models/snowflake/lineitem.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ select
l_comment,
current_version() as cv,
get_ddl('table', 'snowflake_sample_data.tpch_sf1.lineitem') as ddl,
GETDATE()
timestampdiff(getdate(), getdate(), getdate())


from
Expand Down

0 comments on commit 804352e

Please sign in to comment.