Skip to content

Commit

Permalink
Improve dataset_manager.py
Browse files Browse the repository at this point in the history
Fixes #17

Add missing docstrings, update existing docstrings to Google format, and add type annotations to function signatures in `sostrades_core/datasets/dataset_manager.py`.

* **Docstrings**:
  - Add missing docstrings to all methods.
  - Update existing docstrings to Google format.

* **Type Annotations**:
  - Add type annotations to all function signatures.

* **Imports**:
  - Add `Dict` to imports from `typing`.

---

For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/ggoyon/sostrades-core/issues/17?shareId=XXXX-XXXX-XXXX-XXXX).
  • Loading branch information
ggoyon committed Nov 5, 2024
1 parent 0ed1831 commit fe34abe
Showing 1 changed file with 44 additions and 42 deletions.
86 changes: 44 additions & 42 deletions sostrades_core/datasets/dataset_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
limitations under the License.
'''
import logging
from typing import Any
from typing import Any, Dict

from sostrades_core.datasets.dataset import Dataset
from sostrades_core.datasets.dataset_info.abstract_dataset_info import AbstractDatasetInfo
Expand All @@ -35,21 +35,26 @@ class DatasetsManager:
DATASET_INFO = 'dataset_info'

def __init__(self, logger: logging.Logger):
"""
Initializes the DatasetsManager.
Args:
logger (logging.Logger): Logger instance for logging.
"""
self.datasets = {}
self.__logger = logger

def fetch_data_from_datasets(self, datasets_info: dict[AbstractDatasetInfo:dict[str:str]],
data_dict: dict[str:str]) -> dict[str:dict[str:Any]]:
def fetch_data_from_datasets(self, datasets_info: Dict[AbstractDatasetInfo, Dict[str, str]],
data_dict: Dict[str, str]) -> Dict[str, Dict[str, Any]]:
"""
get data from datasets and fill data_dict
Fetches data from datasets and fills the data_dict.
:param datasets_info: list of datasets associated to a namespace
:type datasets_info: List[DatasetInfo]
Args:
datasets_info (Dict[AbstractDatasetInfo, Dict[str, str]]): List of datasets associated to a namespace.
data_dict (Dict[str, str]): Dict of data to be fetched in datasets with their types.
:param data_dict: dict of data to be fetch in datasets with their types
:type data_dict: dict[name str: type str]
:return: data_dict of data names and retrieved values plus a DATASET_INFO field with DatasetInfo object
Returns:
Dict[str, Dict[str, Any]]: Data dict of data names and retrieved values plus a DATASET_INFO field with DatasetInfo object.
"""
self.__logger.debug(f"Fetching data {data_dict.keys()} from datasets {datasets_info}")
data_retrieved = {}
Expand All @@ -75,7 +80,7 @@ def fetch_data_from_datasets(self, datasets_info: dict[AbstractDatasetInfo:dict[

# Retrieve values
dataset_values = dataset.get_values(data_dict=data_to_fetch)
# Update internal dictionnary adding provenance (DatasetInfo object) for tracking parameter changes
# Update internal dictionary adding provenance (DatasetInfo object) for tracking parameter changes
dataset_data = {dataset_data_reverse_mapping[key]: {self.VALUE: value,
self.DATASET_INFO: dataset_info} for key, value in dataset_values.items()}
data_retrieved.update(dataset_data)
Expand All @@ -85,33 +90,31 @@ def fetch_data_from_datasets(self, datasets_info: dict[AbstractDatasetInfo:dict[

def get_dataset(self, dataset_info: AbstractDatasetInfo) -> Dataset:
"""
Gets a dataset, creates it if it does not exist
Gets a dataset, creates it if it does not exist.
:param dataset_info: Dataset info
:type dataset_info: DatasetInfo
Args:
dataset_info (AbstractDatasetInfo): Dataset info.
:return: Dataset
Returns:
Dataset: Dataset instance.
"""
if dataset_info not in self.datasets:
self.datasets[dataset_info] = self.__create_dataset(dataset_info=dataset_info)
return self.datasets[dataset_info]

def write_data_in_dataset(self, dataset_info: AbstractDatasetInfo,
data_dict: dict[str:str],
data_type_dict: dict[str:str]) -> dict:
data_dict: Dict[str, Any],
data_type_dict: Dict[str, str]) -> Dict[str, Any]:
"""
get data from data_dict and fill dataset
:param dataset_info: dataset associated to namespaces
:type dataset_info: DatasetInfo
Writes data from data_dict into the dataset.
:param data_dict: dict of data to be written in datasets with their types
:type data_dict: dict[name str: value]
Args:
dataset_info (AbstractDatasetInfo): Dataset associated to namespaces.
data_dict (Dict[str, Any]): Dict of data to be written in datasets with their values.
data_type_dict (Dict[str, str]): Dict of data to be written in datasets with their types.
:param data_type_dict: dict of data to be written in datasets with their types
:type data_type_dict: dict[name str: type str]
:return: data_dict of data names plus a DATASET_INFO field with DatasetInfo object
Returns:
Dict[str, Any]: Data dict of data names plus a DATASET_INFO field with DatasetInfo object.
"""
self.__logger.debug(f"exporting data {data_dict.keys()} into dataset {dataset_info}")

Expand All @@ -129,20 +132,17 @@ def write_data_in_dataset(self, dataset_info: AbstractDatasetInfo,
raise DatasetGenericException(f'Error exporting dataset "{dataset_info.dataset_id}" of datasets connector "{dataset_info.connector_id}": {exception}')
return dataset_values

def get_path_to_dataset_data(self, dataset_info: AbstractDatasetInfo, data_name:str, data_type:str)-> str:
def get_path_to_dataset_data(self, dataset_info: AbstractDatasetInfo, data_name: str, data_type: str) -> str:
"""
get path/link/uri to retrieve the dataset data
:param dataset_info: dataset in witch the data is
:type dataset_info: DatasetInfo
Gets the path/link/URI to retrieve the dataset data.
:param data_name: data name to build the path
:type data_name: str
Args:
dataset_info (AbstractDatasetInfo): Dataset in which the data is.
data_name (str): Data name to build the path.
data_type (str): Type of the data in dataset.
:param data_type: type of the data in dataset
:type data_type: str
:return: path/link/uri (str) to dataset data
Returns:
str: Path/link/URI to dataset data.
"""
path_to_dataset_data = ""
try:
Expand All @@ -160,11 +160,13 @@ def get_path_to_dataset_data(self, dataset_info: AbstractDatasetInfo, data_name:

def __create_dataset(self, dataset_info: AbstractDatasetInfo) -> Dataset:
"""
Private method
Get the connector associated to the dataset and create a Dataset object
Private method to get the connector associated to the dataset and create a Dataset object.
Args:
dataset_info (AbstractDatasetInfo): Dataset info.
:param dataset_info: Dataset info
:type dataset_info: DatasetInfo
Returns:
Dataset: Dataset instance.
"""
# Gets connector
connector = DatasetsConnectorManager.get_connector(connector_identifier=dataset_info.connector_id)
Expand Down

0 comments on commit fe34abe

Please sign in to comment.