diff --git a/helper/_resources/config/redshift/syntax_mappings.json b/helper/_resources/config/redshift/syntax_mappings.json index 0deed5b..4067111 100644 --- a/helper/_resources/config/redshift/syntax_mappings.json +++ b/helper/_resources/config/redshift/syntax_mappings.json @@ -59,14 +59,20 @@ "target_pattern": "date_part(#arg0, #arg1)" }, "customdatetrunc" : {"source_pattern": "date_trunc\\([^)]*?\\)", - "target_pattern": "CASE WHEN lower( #arg0) IN ('year', 'years','y', 'yr', 'yrs') THEN FLOOR(timestampdiff(YEAR, #arg1::timestamp, #arg2::timestamp) / 1) WHEN lower( #arg0) IN ('month', 'months', 'mon', 'mons') THEN timestampdiff(MONTH, #arg1::timestamp, #arg2::timestamp) WHEN lower( #arg0) IN ('week', 'weeks', 'w') THEN timestampdiff(WEEK, #arg1::timestamp, #arg2::timestamp) WHEN lower( #arg0) IN ('day', 'days', 'd') THEN timestampdiff(DAY, #arg1::timestamp, #arg2::timestamp) ELSE NULL END" + "target_pattern": "CASE WHEN lower( #arg0) IN ('year', 'years','y', 'yr', 'yrs') THEN FLOOR(date_part('YEAR', #arg1::timestamp) / 1) WHEN lower( #arg0) IN ('month', 'months', 'mon', 'mons') THEN date_part('MONTH', #arg1::timestamp) WHEN lower( #arg0) IN ('week', 'weeks', 'w') THEN date_part('WEEK', #arg1::timestamp) WHEN lower( #arg0) IN ('day', 'days', 'd') THEN date_part('DAY', #arg1::timestamp) ELSE NULL END" }, "customdatediff" : {"source_pattern": "datediff\\([^)]*?\\)", - "target_pattern": "CASE WHEN lower( #arg0) IN ('year', 'years','y', 'yr', 'yrs') THEN FLOOR(timestampdiff(YEAR, #arg1::timestamp, #arg2::timestamp) / 1) WHEN lower( #arg0) IN ('month', 'months', 'mon', 'mons') THEN timestampdiff(MONTH, #arg1::timestamp, #arg2::timestamp) WHEN lower( #arg0) IN ('week', 'weeks', 'w') THEN timestampdiff(WEEK, #arg1::timestamp, #arg2::timestamp) WHEN lower( #arg0) IN ('day', 'days', 'd') THEN timestampdiff(DAY, #arg1::timestamp, #arg2::timestamp)" + "target_pattern": "CASE WHEN lower( '#arg0') IN ('year', 'years','y', 'yr', 'yrs') THEN FLOOR(timestampdiff(YEAR, #arg1::timestamp, #arg2::timestamp) / 1) WHEN lower( '#arg0') IN ('month', 'months', 'mon', 'mons') THEN timestampdiff(MONTH, #arg1::timestamp, #arg2::timestamp) WHEN lower( '#arg0') IN ('week', 'weeks', 'w') THEN timestampdiff(WEEK, #arg1::timestamp, #arg2::timestamp) WHEN lower( '#arg0') IN ('day', 'days', 'd') THEN timestampdiff(DAY, #arg1::timestamp, #arg2::timestamp) ELSE NULL END" }, "customdateadd" : {"source_pattern": "dateadd\\([^)]*?\\)", "target_pattern": "dateadd(#arg0, #arg1, #arg2)" - }, + }, +"customposix" : {"source_pattern": "[^!]~", + "target_pattern": " rlike" + }, +"customnoposix" : {"source_pattern": "!~", + "target_pattern": "not rlike" + }, "getdate_to_df" : {"source_pattern": "getdate\\(\\)", "target_pattern": "date_format(date_trunc('second', current_timestamp()), 'yyyy-MM-dd HH:mm:ss')" } diff --git a/helper/convert_to_databricks.py b/helper/convert_to_databricks.py index c535237..dfbe33b 100644 --- a/helper/convert_to_databricks.py +++ b/helper/convert_to_databricks.py @@ -554,7 +554,17 @@ def convert_syntax_expressions(content: str, source_pattern: str, target_pattern elif target_pattern == "\\1\\2\\4 \\3 \\5": encontrar = re.findall(source_pattern, content, flags= re.DOTALL | re.IGNORECASE) num_matches = len(encontrar) - updated_content = re.sub(source_pattern, target_pattern, content, flags= re.DOTALL | re.IGNORECASE) + updated_content = re.sub(source_pattern, target_pattern, content, flags= re.DOTALL | re.IGNORECASE) + + elif target_pattern == "rlike": + encontrar = re.findall(source_pattern, content, flags= re.DOTALL | re.IGNORECASE) + num_matches = len(encontrar) + updated_content = re.sub(source_pattern, target_pattern, content, flags= re.DOTALL | re.IGNORECASE) + + elif target_pattern == "not rlike": + encontrar = re.findall(source_pattern, content, flags= re.DOTALL | re.IGNORECASE) + num_matches = len(encontrar) + updated_content = re.sub(source_pattern, target_pattern, content, flags= re.DOTALL | re.IGNORECASE) else: initargs = findargs(content, source_pattern) @@ -851,6 +861,7 @@ def get_syntax_map(sourcedb, customdp): syntax_map.pop("datepart_to_casewhen") syntax_map.pop("datetrunc_to_casewhen") syntax_map.pop("datediff_to_casewhen") + syntax_map.pop("getdate_to_df") else: syntax_map.pop("customdatepart") syntax_map.pop("customdatetrunc") diff --git a/helper/pierunner.py.py b/helper/pierunner.py.py index 24a6f1c..9ab779c 100644 --- a/helper/pierunner.py.py +++ b/helper/pierunner.py.py @@ -20,7 +20,7 @@ # COMMAND ---------- # MAGIC %sh -# MAGIC python3 ./convert_to_databricks.py --sourcedb "redshift" --dir_path "redshift/" --parse_mode 'syntax' --parse_first 'syntax' --customdp 'true' +# MAGIC python3 ./convert_to_databricks.py --sourcedb "redshift" --dir_path "redshift/" --parse_mode 'syntax' --parse_first 'syntax' --customdp "true" # COMMAND ---------- diff --git a/models/redshift/customerrs.sql b/models/redshift/customerrs.sql index 4ff34e1..0af6490 100644 --- a/models/redshift/customerrs.sql +++ b/models/redshift/customerrs.sql @@ -15,6 +15,8 @@ select datediff(days, getdate(), getdate()) as days_since_oldest_unpaid_due_date, date_trunc('months', getdate()), dateadd('day', -1, getdate()), + case when 'organictest' ~ 'organic|email' then 'match' else 'no match' end as regexmatch, + case when 'organictest' !~ 'organic|email' then 'antimatch' else 'antino match' end as antiregexmatch dlog10(c_acctbal) as actbalbaseten, dlog10(c_acctbal) as actbalbaseten, JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}}','f4', 'f6'),