From 3d842cd098e0e9c675dc73dba340b2d5e312164c Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 5 Oct 2023 08:08:15 +0200 Subject: [PATCH 1/4] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Changed=20=5Fand=5Fadd?= =?UTF-8?q?=5Fviadot=5Fmetadata=5Fcolumns=5Fdecorator=20and=20aplied=20it?= =?UTF-8?q?=20to=20sharepoint=20task?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/tasks/sharepoint.py | 2 ++ viadot/utils.py | 26 +++++++++++++++----------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/viadot/tasks/sharepoint.py b/viadot/tasks/sharepoint.py index fcceb1dbf..7ba9c4d41 100644 --- a/viadot/tasks/sharepoint.py +++ b/viadot/tasks/sharepoint.py @@ -12,6 +12,7 @@ from ..exceptions import ValidationError from ..sources import Sharepoint from .azure_key_vault import AzureKeyVaultSecret +from ..utils import add_viadot_metadata_columns logger = logging.get_logger() @@ -147,6 +148,7 @@ def split_sheet( "sheet_number", "validate_excel_file", ) + @add_viadot_metadata_columns(source_name="Sharepoint") def run( self, path_to_file: str = None, diff --git a/viadot/utils.py b/viadot/utils.py index 14659c702..91c20961a 100644 --- a/viadot/utils.py +++ b/viadot/utils.py @@ -408,21 +408,25 @@ def check_if_empty_file( handle_if_empty_file(if_empty, message=f"Input file - '{path}' is empty.") -def add_viadot_metadata_columns(func: Callable) -> Callable: +def add_viadot_metadata_columns(source_name=None): """ Decorator that adds metadata columns to df in 'to_df' method. For now only _viadot_source is available because _viadot_downloaded_at_utc is added on the Flow level. """ - @functools.wraps(func) - def wrapper(*args, **kwargs) -> pd.DataFrame: - df = func(*args, **kwargs) + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + df = func(*args, **kwargs) - # Accessing instance - instance = args[0] - _viadot_source = kwargs.get("source_name") or instance.__class__.__name__ - df["_viadot_source"] = _viadot_source - # df["_viadot_downloaded_at_utc"] = datetime.now(timezone.utc).replace(microsecond=0) - return df + if source_name is not None: + df["_viadot_source"] = source_name + else: + instance = args[0] + df["_viadot_source"] = instance.__class__.__name__ - return wrapper + return df + + return wrapper + + return decorator From cd07f222af3ce9153ecccb60ff242b947d4e3819 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 5 Oct 2023 08:30:36 +0200 Subject: [PATCH 2/4] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Added=20changes=20to?= =?UTF-8?q?=20CHANGELOG?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe6a083b3..de17d50be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### Changed +- Changed `add_viadot_metadata_columns` decorator that will be used as a decorator for `to_df` class methods (in source or task). +- Changed `sharepoint` task in order to implement add_viadot_metadata_columns after changes. ## [0.4.19] - 2023-08-31 ### Added From 75dc7f02958fe69950f8e2430b6321416e3ab9ad Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Thu, 5 Oct 2023 09:21:16 +0200 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=8E=A8=20Improve=20docstring=20and=20?= =?UTF-8?q?logic=20of=20decorator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 +- viadot/utils.py | 35 +++++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de17d50be..623890232 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### Changed -- Changed `add_viadot_metadata_columns` decorator that will be used as a decorator for `to_df` class methods (in source or task). +- Modified `add_viadot_metadata_columns` to be able to apply a parameter source_name to the decorator for to_df funtion or function where the DataFrame is generated. - Changed `sharepoint` task in order to implement add_viadot_metadata_columns after changes. ## [0.4.19] - 2023-08-31 diff --git a/viadot/utils.py b/viadot/utils.py index 91c20961a..2b4c80538 100644 --- a/viadot/utils.py +++ b/viadot/utils.py @@ -408,22 +408,41 @@ def check_if_empty_file( handle_if_empty_file(if_empty, message=f"Input file - '{path}' is empty.") -def add_viadot_metadata_columns(source_name=None): +def add_viadot_metadata_columns(source_name: str = None) -> Callable: """ Decorator that adds metadata columns to df in 'to_df' method. For now only _viadot_source is available because _viadot_downloaded_at_utc is added on the Flow level. + + Args: + source_name (str, optional): The name of the source to be included in the DataFrame. + This should be provided when creating a DataFrame in a Task, rather than in a Source. + Defaults to None. + + Warning: Please remember to include brackets when applying a decorator, even if you are not passing the 'source_name' parameter. + + Example: + + In task: + + @add_viadot_metadata_columns(source_name="Sharepoint") + def to_df(self): + ... + + In source: + + @add_viadot_metadata_columns() + def to_df(self): + ... """ - def decorator(func): + def decorator(func) -> Callable: @functools.wraps(func) - def wrapper(*args, **kwargs): + def wrapper(*args, **kwargs) -> pd.DataFrame: df = func(*args, **kwargs) - if source_name is not None: - df["_viadot_source"] = source_name - else: - instance = args[0] - df["_viadot_source"] = instance.__class__.__name__ + df["_viadot_source"] = ( + source_name if source_name is not None else args[0].__class__.__name__ + ) return df From c5a1ed6be7de366a2d6e49d63c6fdb289211f9ea Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Tue, 10 Oct 2023 12:34:08 +0200 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=93=9D=20Changed=20task=20name=20in?= =?UTF-8?q?=20changelog?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 623890232..966994905 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Modified `add_viadot_metadata_columns` to be able to apply a parameter source_name to the decorator for to_df funtion or function where the DataFrame is generated. -- Changed `sharepoint` task in order to implement add_viadot_metadata_columns after changes. +- Changed `SharepointToDF` task in order to implement add_viadot_metadata_columns with value `source_name="Sharepoint"` after changes. ## [0.4.19] - 2023-08-31 ### Added