Skip to content

Commit

Permalink
✏️ Fix typos
Browse files Browse the repository at this point in the history
  • Loading branch information
malgorzatagwinner committed Oct 9, 2023
1 parent 3496a94 commit 9377dc0
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
14 changes: 7 additions & 7 deletions tests/integration/flows/test_adls_to_azure_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from viadot.flows import ADLSToAzureSQL
from viadot.flows.adls_to_azure_sql import check_dtypes_sort, df_to_csv_task, len_from_dtypes, check_hardcoded_dtypes_len, get_real_sql_dtypes_from_df

test_df = pd.DataFrame(
TEST_DF = pd.DataFrame(
{
"Date": ["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-05"],
"User ID": ["1a34", "1d34$56", "1a3456&8", "1d3456789!", "1s3"], # max length = 10
Expand All @@ -20,7 +20,7 @@
"Last varchar": ["Last", " ", "varchar", "of this ", "df"], # max length =8
}
)
Real_Sql_Dtypes = {
REAL_SQL_DTYPES = {
"Date": "DATE",
"User ID": "VARCHAR(10)",
"Web ID": "VARCHAR(7)",
Expand Down Expand Up @@ -126,7 +126,7 @@ def test_check_dtypes_sort():


def test_get_real_sql_dtypes_from_df():
assert get_real_sql_dtypes_from_df(test_df) == Real_Sql_Dtypes
assert get_real_sql_dtypes_from_df(TEST_DF) == REAL_SQL_DTYPES


def test_len_from_dtypes():
Expand All @@ -140,7 +140,7 @@ def test_len_from_dtypes():
"Age": "INT",
"Last varchar": 8,
}
assert len_from_dtypes(Real_Sql_Dtypes) == real_df_lengths
assert len_from_dtypes(REAL_SQL_DTYPES) == real_df_lengths


def test_check_hardcoded_dtypes_len_userid(caplog):
Expand All @@ -155,7 +155,7 @@ def test_check_hardcoded_dtypes_len_userid(caplog):
"Last varchar": "varchar(10)",
}
with pytest.raises(ValueError):
check_hardcoded_dtypes_len(test_df, smaller_dtype_userid)
check_hardcoded_dtypes_len(TEST_DF, smaller_dtype_userid)
assert (
"The length of the column User ID is too big, some data could be lost. Please change the length of the provided dtypes to 10"
in caplog.text
Expand All @@ -174,7 +174,7 @@ def test_check_hardcoded_dtypes_len_usercountry(caplog):
"Last varchar": "varchar(10)",
}
with pytest.raises(ValueError):
check_hardcoded_dtypes_len(test_df, smaller_dtype_usercountry)
check_hardcoded_dtypes_len(TEST_DF, smaller_dtype_usercountry)
assert (
"The length of the column User country is too big, some data could be lost. Please change the length of the provided dtypes to 6"
in caplog.text
Expand All @@ -192,4 +192,4 @@ def test_check_hardcoded_dtypes_len():
"Age": "int",
"Last varchar": "varchar(10)",
}
assert check_hardcoded_dtypes_len(test_df, good_dtypes) == None
assert check_hardcoded_dtypes_len(TEST_DF, good_dtypes) == None
2 changes: 2 additions & 0 deletions viadot/flows/adls_to_azure_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ def union_dfs_task(dfs: List[pd.DataFrame]):
def get_real_sql_dtypes_from_df(df: pd.DataFrame) -> Dict[str, Any]:
"""Obtain SQL data types from a pandas DataFrame
and the lengths of the columns based on the real maximum lengths of the data in them.
Args:
df (pd.DataFrame): Data Frame from original ADLS file.
Returns:
Dict[str, Any]: Dictionary with data types of columns and their real maximum length.
"""
Expand Down

0 comments on commit 9377dc0

Please sign in to comment.