diff --git a/CHANGELOG.md b/CHANGELOG.md index fe6a083b3..966994905 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### Changed +- Modified `add_viadot_metadata_columns` to be able to apply a parameter source_name to the decorator for to_df funtion or function where the DataFrame is generated. +- Changed `SharepointToDF` task in order to implement add_viadot_metadata_columns with value `source_name="Sharepoint"` after changes. ## [0.4.19] - 2023-08-31 ### Added diff --git a/viadot/tasks/sharepoint.py b/viadot/tasks/sharepoint.py index fcceb1dbf..7ba9c4d41 100644 --- a/viadot/tasks/sharepoint.py +++ b/viadot/tasks/sharepoint.py @@ -12,6 +12,7 @@ from ..exceptions import ValidationError from ..sources import Sharepoint from .azure_key_vault import AzureKeyVaultSecret +from ..utils import add_viadot_metadata_columns logger = logging.get_logger() @@ -147,6 +148,7 @@ def split_sheet( "sheet_number", "validate_excel_file", ) + @add_viadot_metadata_columns(source_name="Sharepoint") def run( self, path_to_file: str = None, diff --git a/viadot/utils.py b/viadot/utils.py index 14659c702..2b4c80538 100644 --- a/viadot/utils.py +++ b/viadot/utils.py @@ -408,21 +408,44 @@ def check_if_empty_file( handle_if_empty_file(if_empty, message=f"Input file - '{path}' is empty.") -def add_viadot_metadata_columns(func: Callable) -> Callable: +def add_viadot_metadata_columns(source_name: str = None) -> Callable: """ Decorator that adds metadata columns to df in 'to_df' method. For now only _viadot_source is available because _viadot_downloaded_at_utc is added on the Flow level. + + Args: + source_name (str, optional): The name of the source to be included in the DataFrame. + This should be provided when creating a DataFrame in a Task, rather than in a Source. + Defaults to None. + + Warning: Please remember to include brackets when applying a decorator, even if you are not passing the 'source_name' parameter. + + Example: + + In task: + + @add_viadot_metadata_columns(source_name="Sharepoint") + def to_df(self): + ... + + In source: + + @add_viadot_metadata_columns() + def to_df(self): + ... """ - @functools.wraps(func) - def wrapper(*args, **kwargs) -> pd.DataFrame: - df = func(*args, **kwargs) + def decorator(func) -> Callable: + @functools.wraps(func) + def wrapper(*args, **kwargs) -> pd.DataFrame: + df = func(*args, **kwargs) + + df["_viadot_source"] = ( + source_name if source_name is not None else args[0].__class__.__name__ + ) + + return df - # Accessing instance - instance = args[0] - _viadot_source = kwargs.get("source_name") or instance.__class__.__name__ - df["_viadot_source"] = _viadot_source - # df["_viadot_downloaded_at_utc"] = datetime.now(timezone.utc).replace(microsecond=0) - return df + return wrapper - return wrapper + return decorator