From fe34abe661ab0be2cf5272071f6ac83ada6fbd38 Mon Sep 17 00:00:00 2001 From: Guillaume GOYON <100585515+ggoyon@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:49:20 +0100 Subject: [PATCH] Improve dataset_manager.py Fixes #17 Add missing docstrings, update existing docstrings to Google format, and add type annotations to function signatures in `sostrades_core/datasets/dataset_manager.py`. * **Docstrings**: - Add missing docstrings to all methods. - Update existing docstrings to Google format. * **Type Annotations**: - Add type annotations to all function signatures. * **Imports**: - Add `Dict` to imports from `typing`. --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/ggoyon/sostrades-core/issues/17?shareId=XXXX-XXXX-XXXX-XXXX). --- sostrades_core/datasets/dataset_manager.py | 86 +++++++++++----------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/sostrades_core/datasets/dataset_manager.py b/sostrades_core/datasets/dataset_manager.py index 0aa3c8cd..55f243fb 100644 --- a/sostrades_core/datasets/dataset_manager.py +++ b/sostrades_core/datasets/dataset_manager.py @@ -14,7 +14,7 @@ limitations under the License. ''' import logging -from typing import Any +from typing import Any, Dict from sostrades_core.datasets.dataset import Dataset from sostrades_core.datasets.dataset_info.abstract_dataset_info import AbstractDatasetInfo @@ -35,21 +35,26 @@ class DatasetsManager: DATASET_INFO = 'dataset_info' def __init__(self, logger: logging.Logger): + """ + Initializes the DatasetsManager. + + Args: + logger (logging.Logger): Logger instance for logging. + """ self.datasets = {} self.__logger = logger - def fetch_data_from_datasets(self, datasets_info: dict[AbstractDatasetInfo:dict[str:str]], - data_dict: dict[str:str]) -> dict[str:dict[str:Any]]: + def fetch_data_from_datasets(self, datasets_info: Dict[AbstractDatasetInfo, Dict[str, str]], + data_dict: Dict[str, str]) -> Dict[str, Dict[str, Any]]: """ - get data from datasets and fill data_dict + Fetches data from datasets and fills the data_dict. - :param datasets_info: list of datasets associated to a namespace - :type datasets_info: List[DatasetInfo] + Args: + datasets_info (Dict[AbstractDatasetInfo, Dict[str, str]]): List of datasets associated to a namespace. + data_dict (Dict[str, str]): Dict of data to be fetched in datasets with their types. - :param data_dict: dict of data to be fetch in datasets with their types - :type data_dict: dict[name str: type str] - - :return: data_dict of data names and retrieved values plus a DATASET_INFO field with DatasetInfo object + Returns: + Dict[str, Dict[str, Any]]: Data dict of data names and retrieved values plus a DATASET_INFO field with DatasetInfo object. """ self.__logger.debug(f"Fetching data {data_dict.keys()} from datasets {datasets_info}") data_retrieved = {} @@ -75,7 +80,7 @@ def fetch_data_from_datasets(self, datasets_info: dict[AbstractDatasetInfo:dict[ # Retrieve values dataset_values = dataset.get_values(data_dict=data_to_fetch) - # Update internal dictionnary adding provenance (DatasetInfo object) for tracking parameter changes + # Update internal dictionary adding provenance (DatasetInfo object) for tracking parameter changes dataset_data = {dataset_data_reverse_mapping[key]: {self.VALUE: value, self.DATASET_INFO: dataset_info} for key, value in dataset_values.items()} data_retrieved.update(dataset_data) @@ -85,33 +90,31 @@ def fetch_data_from_datasets(self, datasets_info: dict[AbstractDatasetInfo:dict[ def get_dataset(self, dataset_info: AbstractDatasetInfo) -> Dataset: """ - Gets a dataset, creates it if it does not exist + Gets a dataset, creates it if it does not exist. - :param dataset_info: Dataset info - :type dataset_info: DatasetInfo + Args: + dataset_info (AbstractDatasetInfo): Dataset info. - :return: Dataset + Returns: + Dataset: Dataset instance. """ if dataset_info not in self.datasets: self.datasets[dataset_info] = self.__create_dataset(dataset_info=dataset_info) return self.datasets[dataset_info] def write_data_in_dataset(self, dataset_info: AbstractDatasetInfo, - data_dict: dict[str:str], - data_type_dict: dict[str:str]) -> dict: + data_dict: Dict[str, Any], + data_type_dict: Dict[str, str]) -> Dict[str, Any]: """ - get data from data_dict and fill dataset - - :param dataset_info: dataset associated to namespaces - :type dataset_info: DatasetInfo + Writes data from data_dict into the dataset. - :param data_dict: dict of data to be written in datasets with their types - :type data_dict: dict[name str: value] + Args: + dataset_info (AbstractDatasetInfo): Dataset associated to namespaces. + data_dict (Dict[str, Any]): Dict of data to be written in datasets with their values. + data_type_dict (Dict[str, str]): Dict of data to be written in datasets with their types. - :param data_type_dict: dict of data to be written in datasets with their types - :type data_type_dict: dict[name str: type str] - - :return: data_dict of data names plus a DATASET_INFO field with DatasetInfo object + Returns: + Dict[str, Any]: Data dict of data names plus a DATASET_INFO field with DatasetInfo object. """ self.__logger.debug(f"exporting data {data_dict.keys()} into dataset {dataset_info}") @@ -129,20 +132,17 @@ def write_data_in_dataset(self, dataset_info: AbstractDatasetInfo, raise DatasetGenericException(f'Error exporting dataset "{dataset_info.dataset_id}" of datasets connector "{dataset_info.connector_id}": {exception}') return dataset_values - def get_path_to_dataset_data(self, dataset_info: AbstractDatasetInfo, data_name:str, data_type:str)-> str: + def get_path_to_dataset_data(self, dataset_info: AbstractDatasetInfo, data_name: str, data_type: str) -> str: """ - get path/link/uri to retrieve the dataset data - - :param dataset_info: dataset in witch the data is - :type dataset_info: DatasetInfo + Gets the path/link/URI to retrieve the dataset data. - :param data_name: data name to build the path - :type data_name: str + Args: + dataset_info (AbstractDatasetInfo): Dataset in which the data is. + data_name (str): Data name to build the path. + data_type (str): Type of the data in dataset. - :param data_type: type of the data in dataset - :type data_type: str - - :return: path/link/uri (str) to dataset data + Returns: + str: Path/link/URI to dataset data. """ path_to_dataset_data = "" try: @@ -160,11 +160,13 @@ def get_path_to_dataset_data(self, dataset_info: AbstractDatasetInfo, data_name: def __create_dataset(self, dataset_info: AbstractDatasetInfo) -> Dataset: """ - Private method - Get the connector associated to the dataset and create a Dataset object + Private method to get the connector associated to the dataset and create a Dataset object. + + Args: + dataset_info (AbstractDatasetInfo): Dataset info. - :param dataset_info: Dataset info - :type dataset_info: DatasetInfo + Returns: + Dataset: Dataset instance. """ # Gets connector connector = DatasetsConnectorManager.get_connector(connector_identifier=dataset_info.connector_id)