diff --git a/helper/convert_to_databricks.py b/helper/convert_to_databricks.py index 8d3e90f..8d4312d 100644 --- a/helper/convert_to_databricks.py +++ b/helper/convert_to_databricks.py @@ -55,7 +55,8 @@ def findargs (contentstring, sourcepatterninit): updatedregex = initfunc + sourceappend else: updatedregex = findfunction[0] + sourceappend - udpatedregexescapeuno = updatedregex.replace("(", "\(") + udpatedregexescapepre = updatedregex.replace("\\", "\\\\") + udpatedregexescapeuno = udpatedregexescapepre.replace("(", "\(") udpatedregexescapedos = udpatedregexescapeuno.replace(")", "\)") udpatedregexescapedos = udpatedregexescapedos.replace("[", "\[") udpatedregexescapedos = udpatedregexescapedos.replace("]", "\]") @@ -157,18 +158,13 @@ def parseparens(parsedstrings): for start, end in zip(indexdf["startindex"], indexdf["endindex"]): substring = silverstring[start:end + 1] indexdflist.append(substring) - tmpindexlist = [] - for stringarg in indexdflist: - commaph = "#tmpcommaplaceholder" - substringargreplace = stringarg.replace(",", "#tmpcommaplaceholder") - removecomma = silverstring.replace(stringarg, substringargreplace) - silverstring = removecomma - tmpindexlist.append(removecomma) llave = silver["uniquekey"] - listlengthindex = len(tmpindexlist) + listlengthindex = len(indexdflist) if listlengthindex > 0: - lastelementindex = tmpindexlist[listlengthindex - 1] - platinumdict = {"target_string": lastelementindex, "uniquekey": llave } + stringarg = indexdflist[listlengthindex - 1] + substringargreplace = stringarg.replace(",", "#tmpcommaplaceholder") + removecomma = silverstring.replace(stringarg, substringargreplace) + platinumdict = {"target_string": removecomma, "uniquekey": llave } initlistplatiunum.append(platinumdict) else: platinumdict = {"target_string": silverstring, "uniquekey": llave } diff --git a/helper/pierunner.py.py b/helper/pierunner.py.py index 3a3427c..7991c5a 100644 --- a/helper/pierunner.py.py +++ b/helper/pierunner.py.py @@ -20,7 +20,7 @@ # COMMAND ---------- # MAGIC %sh -# MAGIC python3 ./convert_to_databricks.py --sourcedb "redshift" --dir_path "redshift/" --parse_mode 'syntax' --parse_first 'syntax' --customdp "true" --onlypublishagg "true" +# MAGIC python3 ./convert_to_databricks.py --sourcedb "redshift" --dir_path "redshift/" --parse_mode 'syntax' --parse_first 'syntax' --customdp "true" --onlypublishagg "true" --tmplogs 'true' # COMMAND ---------- diff --git a/models/redshift/customerrs.sql b/models/redshift/customerrs.sql index 33878d6..5a5665b 100644 --- a/models/redshift/customerrs.sql +++ b/models/redshift/customerrs.sql @@ -22,6 +22,7 @@ select dlog10(c_acctbal) as actbalbaseten, dlog10(c_acctbal) as actbalbaseten, JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}}','f4', 'f6'), + JSON_EXTRACT_PATH_TEXT(NULLIF(REPLACE(REPLACE( REPLACE(related_videos, '\\', ''), '"{', ''), '}"', ''), ''), 'id') dexp(100), date_part(dow, 2008-01-05 14:00:00), hll_cardinality(expr), @@ -37,7 +38,7 @@ select isnull(test, 'test_is_null') AS null_test_col, date_part(year, date(origination_date)) || '-' || 'Q' || floor( (date_part(month, date(origination_date)) - 1) / 3) + 1 as origination_quarter, - date_part(SECONDS, '2019-10-01 00:00:01.000001'::timestamp) + date_part(SECONDS, '2019-10-01 00:00:01.000001'::timestamp), first_value( case when colA = 2 then id2 end ignore nulls