From cf80ce87979c600adc7bc342f85b1f7612d28ea2 Mon Sep 17 00:00:00 2001 From: Jostein Solaas Date: Thu, 19 Sep 2024 09:39:36 +0200 Subject: [PATCH] refactor: time series collection, resource handling Create domain objects for TimeSeriesResource, TimeSeriesCollection++. This should make the behavior more clear, and provide more flexibility in the future. Previously, time_series_collection.py and time_series_collection_mapper.py did a lot of stuff. In addition to dealing with the resource data and validating that, the yaml data was also validated. As this was bundled it was difficult to reuse some of the logic. Each separate step should now be available by using the correct class. --- examples/simple_yaml_model.ipynb | 6 +- src/ecalc_cli/commands/run.py | 6 +- .../infrastructure/file_resource_service.py | 4 +- src/ecalc_cli/main.py | 2 +- src/libecalc/common/errors/exceptions.py | 28 +- src/libecalc/common/string/string_utils.py | 6 +- .../ltp_export/ltp_power_from_shore_yaml.py | 54 +- src/libecalc/infrastructure/file_io.py | 4 +- .../presentation/yaml/domain/time_series.py | 106 +++ .../yaml/domain/time_series_collection.py | 99 +++ .../yaml/domain/time_series_collections.py | 74 +++ .../yaml/domain/time_series_exceptions.py | 9 + .../yaml/domain/time_series_provider.py | 9 + .../yaml/domain/time_series_resource.py | 148 +++++ .../yaml/mappers/facility_input.py | 6 +- .../presentation/yaml/mappers/utils.py | 4 +- .../get_global_time_vector.py | 89 +++ .../mappers/variables_mapper/time_series.py | 8 - .../time_series_collection.py | 161 ----- .../time_series_collection_mapper.py | 100 +-- .../variables_mapper/timeseries_utils.py | 160 ----- .../variables_mapper/variables_mapper.py | 46 +- src/libecalc/presentation/yaml/model.py | 88 ++- .../yaml/model_validation_exception.py | 23 + .../presentation/yaml/yaml_entities.py | 6 +- .../time_series/yaml_time_series.py | 7 +- src/tests/ecalc_cli/test_app.py | 2 +- .../variables_mapper/test_timeseries.py | 603 ++++++++++-------- .../variables_mapper/test_timeseries_utils.py | 400 +++++------- src/tests/libecalc/input/test_file_io.py | 10 +- .../test_venting_emitter_validation_errors.py | 2 +- 31 files changed, 1219 insertions(+), 1051 deletions(-) create mode 100644 src/libecalc/presentation/yaml/domain/time_series.py create mode 100644 src/libecalc/presentation/yaml/domain/time_series_collection.py create mode 100644 src/libecalc/presentation/yaml/domain/time_series_collections.py create mode 100644 src/libecalc/presentation/yaml/domain/time_series_exceptions.py create mode 100644 src/libecalc/presentation/yaml/domain/time_series_provider.py create mode 100644 src/libecalc/presentation/yaml/domain/time_series_resource.py create mode 100644 src/libecalc/presentation/yaml/mappers/variables_mapper/get_global_time_vector.py delete mode 100644 src/libecalc/presentation/yaml/mappers/variables_mapper/time_series.py delete mode 100644 src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection.py delete mode 100644 src/libecalc/presentation/yaml/mappers/variables_mapper/timeseries_utils.py create mode 100644 src/libecalc/presentation/yaml/model_validation_exception.py diff --git a/examples/simple_yaml_model.ipynb b/examples/simple_yaml_model.ipynb index b8a13f8650..8b2597ca10 100644 --- a/examples/simple_yaml_model.ipynb +++ b/examples/simple_yaml_model.ipynb @@ -81,14 +81,14 @@ " output_frequency=Frequency.NONE,\n", ")\n", "\n", - "model = EnergyCalculator(graph=yaml_model.graph)\n", + "model = EnergyCalculator(graph=yaml_model.get_graph())\n", "consumer_results = model.evaluate_energy_usage(yaml_model.variables)\n", "emission_results = model.evaluate_emissions(\n", " variables_map=yaml_model.variables,\n", " consumer_results=consumer_results,\n", ")\n", "result = GraphResult(\n", - " graph=yaml_model.graph,\n", + " graph=yaml_model.get_graph(),\n", " consumer_results=consumer_results,\n", " variables_map=yaml_model.variables,\n", " emission_results=emission_results,\n", @@ -114,7 +114,7 @@ "import matplotlib.pyplot as plt\n", "\n", "print(\"Iterating the model components to plot results: \\n\")\n", - "for identity, component in yaml_model.graph.nodes.items():\n", + "for identity, component in yaml_model.get_graph().nodes.items():\n", " if identity in result.consumer_results:\n", " component_result = result.consumer_results[identity].component_result\n", " ds = pd.Series(component_result.energy_usage.values, index=component_result.energy_usage.timesteps)\n", diff --git a/src/ecalc_cli/commands/run.py b/src/ecalc_cli/commands/run.py index cc5b01467f..01faadd41e 100644 --- a/src/ecalc_cli/commands/run.py +++ b/src/ecalc_cli/commands/run.py @@ -117,7 +117,7 @@ def run( configuration_service=configuration_service, resource_service=resource_service, output_frequency=frequency, - ) + ).validate_for_run() if (flow_diagram or ltp_export) and (model.start is None or model.end is None): logger.warning( @@ -132,7 +132,7 @@ def run( name_prefix=name_prefix, ) - energy_calculator = EnergyCalculator(graph=model.graph) + energy_calculator = EnergyCalculator(graph=model.get_graph()) precision = 6 consumer_results = energy_calculator.evaluate_energy_usage(model.variables) emission_results = energy_calculator.evaluate_emissions( @@ -140,7 +140,7 @@ def run( consumer_results=consumer_results, ) results_core = GraphResult( - graph=model.graph, + graph=model.get_graph(), consumer_results=consumer_results, variables_map=model.variables, emission_results=emission_results, diff --git a/src/ecalc_cli/infrastructure/file_resource_service.py b/src/ecalc_cli/infrastructure/file_resource_service.py index e4922fe288..72714150e0 100644 --- a/src/ecalc_cli/infrastructure/file_resource_service.py +++ b/src/ecalc_cli/infrastructure/file_resource_service.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Callable, Dict -from libecalc.common.errors.exceptions import EcalcError, HeaderNotFound +from libecalc.common.errors.exceptions import EcalcError, InvalidHeaderException from libecalc.common.logger import logger from libecalc.infrastructure.file_io import read_facility_resource, read_timeseries_resource from libecalc.presentation.yaml.resource import Resource @@ -18,7 +18,7 @@ def __init__(self, working_directory: Path): def _read_resource(resource_name: Path, *args, read_func: Callable[..., MemoryResource]) -> MemoryResource: try: return read_func(resource_name, *args) - except (HeaderNotFound, ValueError) as exc: + except (InvalidHeaderException, ValueError) as exc: logger.error(str(exc)) raise EcalcError("Failed to read resource", f"Failed to read {resource_name.name}: {str(exc)}") from exc diff --git a/src/ecalc_cli/main.py b/src/ecalc_cli/main.py index 04022742cd..e810769822 100755 --- a/src/ecalc_cli/main.py +++ b/src/ecalc_cli/main.py @@ -7,7 +7,7 @@ from ecalc_cli.commands.run import run from ecalc_cli.commands.selftest import selftest from ecalc_cli.logger import CLILogConfigurator, LogLevel, logger -from libecalc.presentation.yaml.model import ModelValidationException +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException from libecalc.presentation.yaml.validation_errors import DataValidationError app = typer.Typer(name="ecalc") diff --git a/src/libecalc/common/errors/exceptions.py b/src/libecalc/common/errors/exceptions.py index b282144efe..08b13cc3cd 100644 --- a/src/libecalc/common/errors/exceptions.py +++ b/src/libecalc/common/errors/exceptions.py @@ -74,7 +74,7 @@ def __init__(self, message: str): class InvalidDateException(EcalcError): ... -class InvalidResource(EcalcError): +class InvalidResourceException(EcalcError): """ Base exception for resource """ @@ -82,7 +82,12 @@ class InvalidResource(EcalcError): pass -class HeaderNotFound(InvalidResource): +class InvalidHeaderException(InvalidResourceException): + def __init__(self, message: str): + super().__init__("Invalid header", message, error_type=EcalcErrorType.CLIENT_ERROR) + + +class HeaderNotFoundException(InvalidResourceException): """Resource is missing header.""" def __init__(self, header: str): @@ -90,7 +95,7 @@ def __init__(self, header: str): super().__init__("Missing header(s)", f"Header '{header}' not found", error_type=EcalcErrorType.CLIENT_ERROR) -class ColumnNotFound(InvalidResource): +class ColumnNotFoundException(InvalidResourceException): """Resource is missing column""" def __init__(self, header: str): @@ -98,3 +103,20 @@ def __init__(self, header: str): super().__init__( "Missing column", f"Column matching header '{header}' is missing.", error_type=EcalcErrorType.CLIENT_ERROR ) + + +class InvalidColumnException(InvalidResourceException): + def __init__(self, header: str, message: str, row: int = None): + self.header = header + self.row = row + super().__init__( + "Invalid column", + message.format(header=header, row=row), + ) + + +class NoColumnsException(InvalidResourceException): + """Resource contains no columns""" + + def __init__(self): + super().__init__("No columns", "The resource contains no columns, it should have at least one.") diff --git a/src/libecalc/common/string/string_utils.py b/src/libecalc/common/string/string_utils.py index 0f6b7bcf95..9940110cf8 100644 --- a/src/libecalc/common/string/string_utils.py +++ b/src/libecalc/common/string/string_utils.py @@ -1,7 +1,9 @@ -from typing import Iterable, Set +from typing import Hashable, Iterable, Set, TypeVar +TItem = TypeVar("TItem", bound=Hashable) -def get_duplicates(names: Iterable[str]) -> Set[str]: + +def get_duplicates(names: Iterable[TItem]) -> Set[TItem]: seen = set() duplicates = set() for name in names: diff --git a/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py b/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py index 80456de088..c8d1591d17 100644 --- a/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py +++ b/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py @@ -1,16 +1,33 @@ +from io import StringIO from pathlib import Path +from typing import Dict, Optional, cast import pytest -import yaml from ecalc_cli.infrastructure.file_resource_service import FileResourceService from libecalc.common.time_utils import Frequency -from libecalc.dto import ResultOptions from libecalc.expression.expression import ExpressionType from libecalc.fixtures.case_types import DTOCase -from libecalc.presentation.yaml.mappers.variables_mapper import map_yaml_to_variables -from libecalc.presentation.yaml.parse_input import map_yaml_to_dto -from libecalc.presentation.yaml.yaml_models.pyyaml_yaml_model import PyYamlYamlModel +from libecalc.presentation.yaml.configuration_service import ConfigurationService +from libecalc.presentation.yaml.model import YamlModel +from libecalc.presentation.yaml.yaml_entities import ResourceStream +from libecalc.presentation.yaml.yaml_models.yaml_model import ReaderType, YamlConfiguration, YamlValidator + + +class OverridableStreamConfigurationService(ConfigurationService): + def __init__(self, stream: ResourceStream, overrides: Optional[Dict] = None): + self._overrides = overrides + self._stream = stream + + def get_configuration(self) -> YamlValidator: + main_yaml_model = YamlConfiguration.Builder.get_yaml_reader(ReaderType.PYYAML).read( + main_yaml=self._stream, + enable_include=True, + ) + + if self._overrides is not None: + main_yaml_model._internal_datamodel.update(self._overrides) + return cast(YamlValidator, main_yaml_model) @pytest.fixture @@ -78,26 +95,17 @@ def _ltp_pfs_yaml_factory( """ - yaml_text = yaml.safe_load(input_text) - configuration = PyYamlYamlModel( - internal_datamodel=yaml_text, - name="ltp_export", - instantiated_through_read=True, + configuration_service = OverridableStreamConfigurationService( + stream=ResourceStream(name="ltp_export", stream=StringIO(input_text)) ) + resource_service = FileResourceService(working_directory=path) - path = path - - resources = FileResourceService._read_resources(configuration=configuration, working_directory=path) - variables = map_yaml_to_variables( - configuration, - resources=resources, - result_options=ResultOptions( - start=configuration.start, - end=configuration.end, - output_frequency=Frequency.YEAR, - ), + model = YamlModel( + configuration_service=configuration_service, + resource_service=resource_service, + output_frequency=Frequency.YEAR, ) - yaml_model = map_yaml_to_dto(configuration=configuration, resources=resources) - return DTOCase(ecalc_model=yaml_model, variables=variables) + + return DTOCase(ecalc_model=model.dto, variables=model.variables) return _ltp_pfs_yaml_factory diff --git a/src/libecalc/infrastructure/file_io.py b/src/libecalc/infrastructure/file_io.py index d2cd7bf5cc..c7011e477a 100644 --- a/src/libecalc/infrastructure/file_io.py +++ b/src/libecalc/infrastructure/file_io.py @@ -17,7 +17,7 @@ from libecalc.common.errors.exceptions import ( EcalcError, EcalcErrorType, - HeaderNotFound, + InvalidHeaderException, ) from libecalc.common.logger import logger from libecalc.presentation.yaml.yaml_entities import MemoryResource, YamlTimeseriesType @@ -399,7 +399,7 @@ def _validate_headers(headers: List[str]): "[ _ - # + : . , /] " ) elif re.match(r"^Unnamed: \d+$", header): - raise HeaderNotFound(header=header) + raise InvalidHeaderException(message="One or more headers are missing in resource") def _validate_not_nan(columns: List[List]): diff --git a/src/libecalc/presentation/yaml/domain/time_series.py b/src/libecalc/presentation/yaml/domain/time_series.py new file mode 100644 index 0000000000..d976895b3a --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series.py @@ -0,0 +1,106 @@ +from datetime import datetime +from operator import itemgetter +from typing import List, Tuple + +from scipy.interpolate import interp1d +from typing_extensions import Self + +from libecalc.common.list.list_utils import transpose +from libecalc.dto.types import InterpolationType + + +class TimeSeries: + def __init__( + self, + reference_id: str, + time_vector: List[datetime], + series: List[float], + extrapolate: bool, + interpolation_type: InterpolationType, + ): + self.reference_id = reference_id + self.time_vector = time_vector + self.series = series + self._extrapolate = extrapolate + self._interpolation_type = interpolation_type + + @staticmethod + def _get_interpolation_kind(rate_interpolation_type: InterpolationType) -> str: + if rate_interpolation_type == InterpolationType.LINEAR: + return "linear" + elif rate_interpolation_type == InterpolationType.RIGHT: + return "previous" + elif rate_interpolation_type == InterpolationType.LEFT: + return "next" + else: + raise ValueError(f"Invalid interpolation type, got {rate_interpolation_type}.") + + def _interpolate(self, time_vector: List[datetime], rate_interpolation_type: InterpolationType) -> List[float]: + interpolation_kind = self._get_interpolation_kind( + rate_interpolation_type=rate_interpolation_type, + ) + + start_time = self.time_vector[0] + + setup_times: List[float] + if len(self.time_vector) == 1: + # add dummy time 1 second later + setup_times = [0, 1] + setup_y = 2 * self.series + else: + # Interpolator x variable is number of seconds from first date time + setup_times = [(time - start_time).total_seconds() for time in self.time_vector] + setup_y = self.series + + interpolator = interp1d(x=setup_times, y=setup_y, kind=interpolation_kind) + target_times = [(time - start_time).total_seconds() for time in time_vector] + return list(interpolator(target_times)) + + def fit_to_time_vector( + self, + time_vector: List[datetime], + ) -> Self: + start, end = self.time_vector[0], self.time_vector[-1] + number_of_entries_before, entries_between, number_of_entries_after = split_time_vector( + time_vector, start=start, end=end + ) + + if self._extrapolate: + extrapolation_after_value = self.series[-1] + else: + extrapolation_after_value = 0.0 + + before_values = [0.0] * number_of_entries_before + between_values = self._interpolate( + time_vector=entries_between, rate_interpolation_type=self._interpolation_type + ) + after_values = [extrapolation_after_value] * number_of_entries_after + + return self.__class__( + reference_id=self.reference_id, + time_vector=time_vector, + series=[*before_values, *between_values, *after_values], + extrapolate=self._extrapolate, + interpolation_type=self._interpolation_type, + ) + + def sort(self) -> Self: + sort_columns = [self.time_vector, self.series] + sort_rows = transpose(sort_columns) + sorted_rows = sorted(sort_rows, key=itemgetter(0)) + sorted_columns = transpose(sorted_rows) + self.time_vector = sorted_columns[0] + self.series = sorted_columns[1] + return self + + +def split_time_vector( + time_vector: List[datetime], + start: datetime, + end: datetime, +) -> Tuple[int, List[datetime], int]: + """Find the entries between start and end, also counting the number of entries before start and after end.""" + number_of_entries_before = len([date for date in time_vector if date < start]) + number_of_entries_after = len([date for date in time_vector if date > end]) + entries_between = [date for date in time_vector if start <= date <= end] + return number_of_entries_before, entries_between, number_of_entries_after diff --git a/src/libecalc/presentation/yaml/domain/time_series_collection.py b/src/libecalc/presentation/yaml/domain/time_series_collection.py new file mode 100644 index 0000000000..78876bd28a --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_collection.py @@ -0,0 +1,99 @@ +from datetime import datetime +from typing import List + +from typing_extensions import Self, assert_never + +from libecalc.common.errors.exceptions import InvalidResourceException +from libecalc.dto.types import InterpolationType +from libecalc.presentation.yaml.domain.time_series import TimeSeries +from libecalc.presentation.yaml.domain.time_series_exceptions import TimeSeriesNotFound +from libecalc.presentation.yaml.domain.time_series_provider import TimeSeriesProvider +from libecalc.presentation.yaml.domain.time_series_resource import TimeSeriesResource +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException +from libecalc.presentation.yaml.resource import Resource +from libecalc.presentation.yaml.validation_errors import Location, ModelValidationError +from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords +from libecalc.presentation.yaml.yaml_types.time_series.yaml_time_series import ( + YamlDefaultTimeSeriesCollection, + YamlMiscellaneousTimeSeriesCollection, + YamlTimeSeriesCollection, +) + + +class TimeSeriesCollection(TimeSeriesProvider): + """ + TimeSeriesCollection is a collection of time series (TimeSeriesResource) and common properties for all the time + series in the collection. + """ + + def __init__( + self, + name: str, + resource: TimeSeriesResource, + interpolation: InterpolationType, + extrapolation: bool, + influence_time_vector: bool, + ): + self.name = name + self._resource = resource + self._interpolation = interpolation + self._extrapolation = extrapolation + self._influence_time_vector = influence_time_vector + + def should_influence_time_vector(self) -> bool: + return self._influence_time_vector + + def get_time_vector(self) -> List[datetime]: + return self._resource.get_time_vector() + + def get_time_series_references(self) -> List[str]: + return self._resource.get_headers() + + def get_time_series(self, time_series_id: str) -> TimeSeries: + try: + return TimeSeries( + reference_id=f"{self.name};{time_series_id}", + time_vector=self.get_time_vector(), + series=self._resource.get_column(time_series_id), + extrapolate=self._extrapolation, + interpolation_type=self._interpolation, + ).sort() + except InvalidResourceException as e: + raise TimeSeriesNotFound( + f"Unable to find time series with reference '{time_series_id}' in collection '{self.name}'" + ) from e + + @classmethod + def from_yaml(cls, resource: Resource, yaml_collection: YamlTimeSeriesCollection) -> Self: + try: + time_series_resource = TimeSeriesResource(resource) + time_series_resource.validate() + + if isinstance(yaml_collection, YamlDefaultTimeSeriesCollection): + interpolation = InterpolationType.RIGHT + extrapolation = False + elif isinstance(yaml_collection, YamlMiscellaneousTimeSeriesCollection): + interpolation = InterpolationType[yaml_collection.interpolation_type] + extrapolation = yaml_collection.extrapolation if yaml_collection.extrapolation is not None else False + else: + assert_never(yaml_collection) + return cls( + name=yaml_collection.name, + resource=time_series_resource, + interpolation=interpolation, + extrapolation=extrapolation, + influence_time_vector=yaml_collection.influence_time_vector, + ) + + except InvalidResourceException as e: + # Catch validation when initializing TimeSeriesResource + raise ModelValidationException( + errors=[ + ModelValidationError( + data=yaml_collection.model_dump(), + location=Location(keys=[EcalcYamlKeywords.file]), + message=str(e), + file_context=None, + ) + ], + ) from e diff --git a/src/libecalc/presentation/yaml/domain/time_series_collections.py b/src/libecalc/presentation/yaml/domain/time_series_collections.py new file mode 100644 index 0000000000..dc058a8c28 --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_collections.py @@ -0,0 +1,74 @@ +from datetime import datetime +from typing import Dict, List, Set + +from libecalc.presentation.yaml.domain.time_series import TimeSeries +from libecalc.presentation.yaml.domain.time_series_collection import TimeSeriesCollection +from libecalc.presentation.yaml.domain.time_series_exceptions import TimeSeriesNotFound +from libecalc.presentation.yaml.domain.time_series_provider import TimeSeriesProvider +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException +from libecalc.presentation.yaml.resource import Resource +from libecalc.presentation.yaml.validation_errors import Location, ModelValidationError +from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords +from libecalc.presentation.yaml.yaml_types.time_series.yaml_time_series import YamlTimeSeriesCollection + + +class TimeSeriesCollections(TimeSeriesProvider): + """ + TimeSeriesCollections keeps several TimeSeriesCollection classes and can provide info about those, such as all time + steps in all collections. + """ + + def __init__(self, time_series: List[YamlTimeSeriesCollection], resources: Dict[str, Resource]): + time_series_collections: Dict[str, TimeSeriesCollection] = {} + errors: List[ModelValidationError] = [] + for time_series_collection in time_series: + resource = resources[time_series_collection.file] + try: + time_series_collections[time_series_collection.name] = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=time_series_collection, + ) + except ModelValidationException as e: + # Catch validation when initializing TimeSeriesResource + errors.extend( + [ + ModelValidationError( + data=error.data, + location=Location( + keys=[EcalcYamlKeywords.time_series, time_series_collection.name, *error.location.keys] + ), + message=error.message, + file_context=error.file_context, + ) + for error in e.errors() + ] + ) + if len(errors) != 0: + raise ModelValidationException(errors=errors) + + self._time_series_collections = time_series_collections + + def get_time_series_references(self) -> List[str]: + time_series_references = [] + for collection in self._time_series_collections.values(): + for time_series_reference in collection.get_time_series_references(): + time_series_references.append(f"{collection.name};{time_series_reference}") + return time_series_references + + def get_time_series(self, time_series_id: str) -> TimeSeries: + reference_id_parts = time_series_id.split(";") + if len(reference_id_parts) != 2: + raise TimeSeriesNotFound(time_series_id) + [collection_id, time_series_id] = reference_id_parts + + if collection_id not in self._time_series_collections: + raise TimeSeriesNotFound(time_series_id) + + return self._time_series_collections[collection_id].get_time_series(time_series_id) + + def get_time_vector(self) -> Set[datetime]: + time_vector: Set[datetime] = set() + for time_series_collection in self._time_series_collections.values(): + if time_series_collection.should_influence_time_vector(): + time_vector = time_vector.union(time_series_collection.get_time_vector()) + return time_vector diff --git a/src/libecalc/presentation/yaml/domain/time_series_exceptions.py b/src/libecalc/presentation/yaml/domain/time_series_exceptions.py new file mode 100644 index 0000000000..3f172de155 --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_exceptions.py @@ -0,0 +1,9 @@ +from libecalc.common.errors.exceptions import EcalcError + + +class TimeSeriesNotFound(EcalcError): + def __init__(self, time_series_reference: str, message: str = None): + if message is None: + message = f"Unable to find time series with reference '{time_series_reference}'" + + super().__init__("Time series not found", message) diff --git a/src/libecalc/presentation/yaml/domain/time_series_provider.py b/src/libecalc/presentation/yaml/domain/time_series_provider.py new file mode 100644 index 0000000000..ba98e45120 --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_provider.py @@ -0,0 +1,9 @@ +from typing import List, Protocol + +from libecalc.presentation.yaml.domain.time_series import TimeSeries + + +class TimeSeriesProvider(Protocol): + def get_time_series(self, time_series_id: str) -> TimeSeries: ... + + def get_time_series_references(self) -> List[str]: ... diff --git a/src/libecalc/presentation/yaml/domain/time_series_resource.py b/src/libecalc/presentation/yaml/domain/time_series_resource.py new file mode 100644 index 0000000000..6d90af19e6 --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_resource.py @@ -0,0 +1,148 @@ +import re +from datetime import datetime +from math import isnan +from typing import Iterable, List, Union + +from pandas.errors import ParserError +from typing_extensions import Self + +from libecalc.common.errors.exceptions import ( + InvalidColumnException, + InvalidHeaderException, + InvalidResourceException, + NoColumnsException, +) +from libecalc.common.string.string_utils import get_duplicates +from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection_mapper import parse_time_vector +from libecalc.presentation.yaml.resource import Resource +from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords + + +class InvalidTimeSeriesResourceException(InvalidResourceException): + def __init__(self, message): + super().__init__("Invalid time series resource", message) + + +class EmptyTimeVectorException(InvalidTimeSeriesResourceException): + def __init__(self): + super().__init__("The time vector is empty") + + +class DuplicateDatesException(InvalidTimeSeriesResourceException): + def __init__(self, duplicates: Iterable[datetime]): + self.duplicates = duplicates + super().__init__(f"The time series resource contains duplicate dates: {','.join(map(str, duplicates))}") + + +def _is_header_valid(header: str) -> bool: + return bool(re.match(r"^[A-Za-z][A-Za-z0-9_.,\-\s#+:\/]*$", header)) + + +class TimeSeriesResource(Resource): + """ + A time series resource containing time series + """ + + def __init__(self, resource: Resource): + self._resource = resource + headers = resource.get_headers() + + if len(headers) == 0: + raise InvalidResourceException("Invalid resource", "Resource must at least have one column") + + for header in headers: + if not _is_header_valid(header): + raise InvalidHeaderException( + "The time series resource header contains illegal characters. " + "Allowed characters are: ^[A-Za-z][A-Za-z0-9_.,\\-\\s#+:\\/]*$" + ) + + if EcalcYamlKeywords.date in headers: + # Find the column named "DATE" and use that as time vector + time_vector = resource.get_column(EcalcYamlKeywords.date) + headers = [header for header in headers if header != EcalcYamlKeywords.date] + else: + # Legacy: support random names for time vector as long as it is the first column + time_vector = resource.get_column(headers[0]) + headers = headers[1:] + + try: + if not all(isinstance(time, (int, str)) for time in time_vector): + # time_vector may be a list of floats for example. + # This might happen if the resource contains an extra comma only in a single row. + raise InvalidTimeSeriesResourceException("could not parse time vector.") + self._time_vector = parse_time_vector(time_vector) + except (ParserError, ValueError) as e: + # pandas.to_datetime might raise these two exceptions. + # See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html + raise InvalidTimeSeriesResourceException("could not parse time vector.") from e + + self._headers = headers + + def _validate_time_vector(self) -> None: + if len(self._time_vector) == 0: + raise EmptyTimeVectorException() + duplicates = get_duplicates(self._time_vector) + if len(duplicates) != 0: + raise DuplicateDatesException(duplicates=duplicates) + + def _validate_columns(self): + headers = self.get_headers() + columns = [self.get_column(header) for header in headers] + time_vector = self.get_time_vector() + + time_vector_length = len(time_vector) + headers_length = len(headers) + + if headers_length == 0: + raise NoColumnsException() + + number_of_rows = len(columns[0]) + + if number_of_rows == 0: + raise InvalidResourceException("No rows in resource", "The resource should have at least one row.") + + if not (number_of_rows == time_vector_length): + raise InvalidResourceException( + "Rows mismatch", + f"The number of records for times and data do not match: data: {number_of_rows}, time_vector: {time_vector_length}", + ) + + for column, header in zip(columns, headers): + if len(column) != time_vector_length: + raise InvalidColumnException( + header=header, + message="Column '{header}' does not match the length of the time vector.", + ) + + for index, value in enumerate(column): + row = index + 1 + if not isinstance(value, (float, int)): + raise InvalidColumnException( + header=header, + row=row, + message="The timeseries column '{header}' contains non-numeric values in row {row}.", + ) + if isnan(value): + raise InvalidColumnException( + header=header, + row=row, + message="The timeseries column '{header}' contains empty values in row {row}.", + ) + + def validate(self) -> Self: + self._validate_time_vector() + + self._validate_columns() + + return self + + def get_time_vector(self) -> List[datetime]: + return self._time_vector + + def get_headers(self) -> List[str]: + return self._headers + + def get_column(self, header: str) -> List[Union[float, int, str]]: + # TODO: Add validation on column so that we can remove 'str' from return type + return self._resource.get_column(header) diff --git a/src/libecalc/presentation/yaml/mappers/facility_input.py b/src/libecalc/presentation/yaml/mappers/facility_input.py index ee2248c99e..35cb2e9b07 100644 --- a/src/libecalc/presentation/yaml/mappers/facility_input.py +++ b/src/libecalc/presentation/yaml/mappers/facility_input.py @@ -3,7 +3,7 @@ from pydantic import TypeAdapter, ValidationError from libecalc import dto -from libecalc.common.errors.exceptions import InvalidResource +from libecalc.common.errors.exceptions import InvalidResourceException from libecalc.dto import CompressorSampled as CompressorTrainSampledDTO from libecalc.dto import GeneratorSetSampled, TabulatedData from libecalc.dto.types import ChartType, EnergyModelType, EnergyUsageType @@ -49,7 +49,7 @@ def _get_adjustment_factor(data: Dict) -> float: def _get_column_or_none(resource: Resource, header: str) -> Optional[List[Union[float, int, str]]]: try: return resource.get_column(header) - except InvalidResource: + except InvalidResourceException: return None @@ -221,7 +221,7 @@ def from_yaml_to_dto(self, data: Dict) -> dto.EnergyModel: error_key=vve.key, dump_flow_style=DumpFlowStyle.BLOCK, ) from vve - except InvalidResource as e: + except InvalidResourceException as e: message = f"Invalid resource '{resource_name}'. Reason: {str(e)}" raise DataValidationError( diff --git a/src/libecalc/presentation/yaml/mappers/utils.py b/src/libecalc/presentation/yaml/mappers/utils.py index cee9f2fa27..efdb65dfc2 100644 --- a/src/libecalc/presentation/yaml/mappers/utils.py +++ b/src/libecalc/presentation/yaml/mappers/utils.py @@ -3,7 +3,7 @@ import pandas as pd -from libecalc.common.errors.exceptions import HeaderNotFound, InvalidReferenceException +from libecalc.common.errors.exceptions import HeaderNotFoundException, InvalidReferenceException from libecalc.common.logger import logger from libecalc.common.units import Unit from libecalc.dto.types import ( @@ -269,7 +269,7 @@ def get_single_speed_chart_data(resource: Resource, resource_name: str) -> Chart ) # Get first speed, all are equal. speed = speed_values[0] - except HeaderNotFound: + except HeaderNotFoundException: logger.debug(f"Speed not specified for single speed chart {resource_name}, setting speed to 1.") speed = 1 diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/get_global_time_vector.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/get_global_time_vector.py new file mode 100644 index 0000000000..0e2ec681ba --- /dev/null +++ b/src/libecalc/presentation/yaml/mappers/variables_mapper/get_global_time_vector.py @@ -0,0 +1,89 @@ +from datetime import datetime, timedelta +from typing import Iterable, List, Optional, Set + +import pandas as pd + +import libecalc.common.time_utils +from libecalc.presentation.yaml.validation_errors import ValidationError + + +def _get_date_range(start: datetime, end: datetime, frequency: libecalc.common.time_utils.Frequency) -> Set[datetime]: + if frequency == libecalc.common.time_utils.Frequency.NONE: + return set() + + date_range = pd.date_range(start=start, end=end, freq=frequency.value) + return set(date_range.to_pydatetime()) + + +def _get_end_boundary(frequency: libecalc.common.time_utils.Frequency, time_vector_set: Set[datetime]) -> datetime: + """If end boundary has not been specified explicitly, we attempt to make an educated guess for the + user, based on output frequency provided and assuming data is forward filled. + + It is however recommended that the user specified END explicitly + """ + time_vector: List[datetime] = sorted(time_vector_set) + + if frequency == libecalc.common.time_utils.Frequency.YEAR: + return datetime(year=time_vector[-1].year + 1, month=1, day=1) + elif frequency == libecalc.common.time_utils.Frequency.MONTH: + return (time_vector[-1].replace(day=1) + timedelta(days=31)).replace(day=1) + elif frequency == libecalc.common.time_utils.Frequency.DAY: + return time_vector[-1] + timedelta(days=1) + else: + return max( + time_vector + ) # Frequency.NONE . We are clueless and user does not help us, just fallback to last time given + + +def get_global_time_vector( + time_series_time_vector: Iterable[datetime], + start: Optional[datetime] = None, + end: Optional[datetime] = None, + additional_dates: Optional[Set[datetime]] = None, + frequency: libecalc.common.time_utils.Frequency = libecalc.common.time_utils.Frequency.NONE, +) -> List[datetime]: + """ + + Args: + time_series_time_vector: all dates from time series that should influence time vector + start: user specified start + end: user specified end + additional_dates: dates from the model configuration + frequency: user specified frequency + + Returns: the actual set of dates that should be computed + """ + time_vector: Set[datetime] = set(time_series_time_vector) + + has_time_vector = len(time_vector) > 0 + has_start = start is not None + has_end = end is not None + has_frequency = frequency != libecalc.common.time_utils.Frequency.NONE + if not (has_time_vector or (has_start and has_end) or (has_start and has_frequency)): + raise ValidationError("No time series found, please provide one or specify a start and end (or frequency).") + + # Store start, end before adding dates from yaml. This is to make sure dates in yaml are trimmed. + start = start or min(time_vector) + + # Add start + time_vector.add(start) + + if not end: + end = _get_end_boundary(frequency=frequency, time_vector_set=time_vector) + + # Add end + time_vector.add(end) + + # Add all dates specified in yaml + time_vector = time_vector.union(additional_dates or set()) + + # Trim time vector based on start + time_vector = {date for date in time_vector if date >= start} + + # Trim time vector based on end + time_vector = {date for date in time_vector if date <= end} + + # Add all dates for frequency + time_vector = time_vector.union(_get_date_range(start=start, end=end, frequency=frequency)) + + return sorted(time_vector) diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series.py deleted file mode 100644 index dd86152c7e..0000000000 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series.py +++ /dev/null @@ -1,8 +0,0 @@ -from datetime import datetime -from typing import List, NamedTuple - - -class TimeSeries(NamedTuple): - reference_id: str - time_vector: List[datetime] - series: List[float] diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection.py deleted file mode 100644 index 5bd4498909..0000000000 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection.py +++ /dev/null @@ -1,161 +0,0 @@ -from datetime import datetime -from math import isnan -from typing import List, Literal, Optional, Tuple, Union - -from pydantic import ConfigDict, Field, field_validator, model_validator -from typing_extensions import Annotated - -from libecalc.dto.base import EcalcBaseModel -from libecalc.dto.types import InterpolationType, TimeSeriesType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series import TimeSeries - - -def transpose(data: List[List]) -> List[List]: - return list(map(list, zip(*data))) - - -def _sort_time_series_data( - time_vector: List[Union[datetime]], - columns: List[List], -) -> Tuple[List[Union[datetime]], List[List]]: - timeseries_columns = [time_vector, *columns] - timeseries_rows = transpose(timeseries_columns) - sorted_timeseries_rows = sorted(timeseries_rows, key=lambda row: row[0]) - sorted_timeseries_columns = transpose(sorted_timeseries_rows) - return sorted_timeseries_columns[0], sorted_timeseries_columns[1:] - - -class TimeSeriesCollection(EcalcBaseModel): - typ: TimeSeriesType - name: str = Field(pattern=r"^[A-Za-z][A-Za-z0-9_]*$") - - headers: List[Annotated[str, Field(pattern=r"^[A-Za-z][A-Za-z0-9_.,\-\s#+:\/]*$")]] = Field( - default_factory=list - ) # Does not include date header - columns: List[List[float]] = Field(default_factory=list) - time_vector: List[datetime] = Field(default_factory=list) - - influence_time_vector: Optional[bool] = True - extrapolate_outside_defined_time_interval: Optional[bool] = None - interpolation_type: InterpolationType = None - model_config = ConfigDict(validate_default=True) - - @field_validator("influence_time_vector") - @classmethod - def set_influence_time_vector_default(cls, value): - return value if value is not None else True - - @field_validator("extrapolate_outside_defined_time_interval") - @classmethod - def set_extrapolate_outside_defined_time_interval_default(cls, value): - return value if value is not None else False - - @field_validator("time_vector") - @classmethod - def check_that_dates_are_ok(cls, dates): - if len(dates) == 0: - raise ValueError("Time vectors must have at least one record") - if not (len(dates) == len(set(dates))): - raise ValueError("The list of dates have duplicates. Duplicated dates are currently not supported.") - return dates - - @model_validator(mode="after") - def check_that_lists_match(self): - headers = self.headers - columns = self.columns - time_vector = self.time_vector - - time_vector_length = len(time_vector) - headers_length = len(headers) - - if headers_length == 0: - raise ValueError("Headers must at least have one column") - - number_of_columns = len(columns) - - if number_of_columns == 0: - raise ValueError("Data vector must at least have one column") - - if not (headers_length == number_of_columns): - raise ValueError( - f"The number of columns provided do not match for header and data: data: {number_of_columns}, headers: {headers_length}" - ) - - number_of_rows = len(columns[0]) - - if number_of_rows == 0: - raise ValueError("Data must have at least one record") - - if not (number_of_rows == time_vector_length): - raise ValueError( - f"The number of records for times and data do not match: data: {number_of_rows}, time_vector: {time_vector_length}" - ) - - sorted_time_vector, sorted_columns = _sort_time_series_data(time_vector, columns) - self.time_vector = sorted_time_vector - self.columns = sorted_columns - return self - - @model_validator(mode="after") - def check_that_columns_are_ok(self): - headers = self.headers - - if headers is None or self.columns is None: - return self.columns - - for column, header in zip(self.columns, headers): - for value in column: - if isnan(value): - reference_id = f"{self.name};{header}" - raise ValueError( - f"The timeseries column '{reference_id}' contains empty values. " - f"Please check your file for missing data, each column should define values for all timesteps.", - ) - - return self - - @property - def time_series(self): - return [ - TimeSeries( - reference_id=f"{self.name};{header}", - time_vector=self.time_vector, - series=column, - ) - for header, column in zip(self.headers, self.columns) - ] - - -class MiscellaneousTimeSeriesCollection(TimeSeriesCollection): - typ: Literal[TimeSeriesType.MISCELLANEOUS] = TimeSeriesType.MISCELLANEOUS.value - - @field_validator("interpolation_type", mode="before") - @classmethod - def interpolation_is_required(cls, value): - if value is None: - raise ValueError("interpolation_type must be specified for the MISCELLANEOUS time series type.") - return value - - -class DefaultTimeSeriesCollection(TimeSeriesCollection): - typ: Literal[TimeSeriesType.DEFAULT] = TimeSeriesType.DEFAULT.value - - @field_validator("extrapolate_outside_defined_time_interval", mode="before") - @classmethod - def extrapolate_outside_defined_time_interval_cannot_be_set(cls, value): - if value is not None: - raise ValueError( - "extrapolate_outside_defined_time_interval cannot be set on " - "DEFAULT-type (since DEFAULT-models should not be possible to extrapolate)." - ) - - return value - - @field_validator("interpolation_type", mode="before") - def set_default_interpolation_type(cls, value): - if value is not None: - raise ValueError( - "interpolation_type cannot be set on DEFAULT-type " - "(since DEFAULT-models can only have RIGHT interpolation)." - ) - return InterpolationType.RIGHT diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection_mapper.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection_mapper.py index 188709608e..dc0f30affa 100644 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection_mapper.py +++ b/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection_mapper.py @@ -1,39 +1,13 @@ import re from datetime import datetime -from typing import Dict, List, Union +from typing import List, Union import pandas -from pydantic import Field, TypeAdapter, ValidationError -from typing_extensions import Annotated -from libecalc.common.errors.exceptions import InvalidResource -from libecalc.dto import TimeSeriesType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection import ( - DefaultTimeSeriesCollection, - MiscellaneousTimeSeriesCollection, -) -from libecalc.presentation.yaml.resource import Resource, Resources -from libecalc.presentation.yaml.validation_errors import ( - DataValidationError, - DtoValidationError, - DumpFlowStyle, -) -from libecalc.presentation.yaml.yaml_entities import ( - YamlTimeseriesType, -) +from libecalc.common.errors.exceptions import InvalidResourceException +from libecalc.presentation.yaml.resource import Resource from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords -# Used here to make pydantic understand which object to instantiate. -TimeSeriesUnionType = Annotated[ - Union[MiscellaneousTimeSeriesCollection, DefaultTimeSeriesCollection], - Field(discriminator="typ"), -] - -time_series_type_map = { - YamlTimeseriesType.MISCELLANEOUS.value: TimeSeriesType.MISCELLANEOUS, - YamlTimeseriesType.DEFAULT.value: TimeSeriesType.DEFAULT, -} - def _parse_date(date_input: Union[int, str]) -> datetime: """ @@ -54,7 +28,7 @@ def _parse_date(date_input: Union[int, str]) -> datetime: return pandas.to_datetime(date_input, dayfirst=True).to_pydatetime() -def parse_time_vector(time_vector: List[str]) -> List[datetime]: +def parse_time_vector(time_vector: List[Union[int, str]]) -> List[datetime]: return [_parse_date(date_input) for date_input in time_vector] @@ -62,7 +36,7 @@ def parse_time_series_from_resource(resource: Resource): time_series_resource_headers = resource.get_headers() if len(time_series_resource_headers) == 0: - raise InvalidResource("Invalid resource", "Resource must at least have one column") + raise InvalidResourceException("Invalid resource", "Resource must at least have one column") if EcalcYamlKeywords.date in time_series_resource_headers: # Find the column named "DATE" and use that as time vector @@ -74,67 +48,3 @@ def parse_time_series_from_resource(resource: Resource): headers = time_series_resource_headers[1:] return parse_time_vector(time_vector), headers - - -class TimeSeriesCollectionMapper: - def __init__(self, resources: Resources): - self.__resources = resources - - def from_yaml_to_dto(self, data: Dict) -> TimeSeriesUnionType: - """ - Fixme: we do not use the input date format when reading Time Series Collections. - """ - - time_series = { - "typ": data.get(EcalcYamlKeywords.type), - "name": data.get(EcalcYamlKeywords.name), - "influence_time_vector": data.get(EcalcYamlKeywords.time_series_influence_time_vector), - "extrapolate_outside_defined_time_interval": data.get( - EcalcYamlKeywords.time_series_extrapolate_outside_defined - ), - "interpolation_type": data.get(EcalcYamlKeywords.time_series_interpolation_type), - } - - resource_name = data.get(EcalcYamlKeywords.file) - time_series_resource = self.__resources.get( - resource_name, - ) - - if time_series_resource is None: - resource_name_context = "." - if resource_name is not None: - resource_name_context = f" with name '{resource_name}'" - raise DataValidationError( - data, - message=f"Could not find resource{resource_name_context}", - error_key=EcalcYamlKeywords.file, - dump_flow_style=DumpFlowStyle.BLOCK, - ) - - try: - time_vector, headers = parse_time_series_from_resource(time_series_resource) - except InvalidResource as e: - raise DataValidationError( - data, - message=str(e), - error_key=EcalcYamlKeywords.file, - dump_flow_style=DumpFlowStyle.BLOCK, - ) from e - - columns = [] - - for header in headers: - try: - columns.append(time_series_resource.get_column(header)) - except InvalidResource: - # Validation handled below when creating TimeSeries class - pass - - time_series["headers"] = headers - time_series["time_vector"] = time_vector - time_series["columns"] = columns - - try: - return TypeAdapter(TimeSeriesUnionType).validate_python(time_series) - except ValidationError as e: - raise DtoValidationError(data=data, validation_error=e, dump_flow_style=DumpFlowStyle.BLOCK) from e diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/timeseries_utils.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/timeseries_utils.py deleted file mode 100644 index f7013747c5..0000000000 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/timeseries_utils.py +++ /dev/null @@ -1,160 +0,0 @@ -from datetime import datetime, timedelta -from typing import List, Optional, Set, Tuple - -import pandas as pd -from scipy.interpolate import interp1d - -import libecalc.common.time_utils -from libecalc.dto.types import InterpolationType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series import TimeSeries -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection import ( - TimeSeriesCollection, -) -from libecalc.presentation.yaml.validation_errors import ValidationError - - -def _split_time_vector( - time_vector: List[datetime], - start: datetime, - end: datetime, -) -> Tuple[int, List[datetime], int]: - """Find the entries between start and end, also counting the number of entries before start and after end.""" - number_of_entries_before = len([date for date in time_vector if date < start]) - number_of_entries_after = len([date for date in time_vector if date > end]) - entries_between = [date for date in time_vector if start <= date <= end] - return number_of_entries_before, entries_between, number_of_entries_after - - -def _get_interpolation_kind(rate_interpolation_type: InterpolationType) -> str: - if rate_interpolation_type == InterpolationType.LINEAR: - return "linear" - elif rate_interpolation_type == InterpolationType.RIGHT: - return "previous" - elif rate_interpolation_type == InterpolationType.LEFT: - return "next" - else: - raise ValueError(f"Invalid interpolation typem, got {rate_interpolation_type}.") - - -def _interpolate( - time_series: TimeSeries, time_vector: List[datetime], rate_interpolation_type: InterpolationType -) -> List[float]: - interpolation_kind = _get_interpolation_kind( - rate_interpolation_type=rate_interpolation_type, - ) - - start_time = time_series.time_vector[0] - - if len(time_series.time_vector) == 1: - # add dummy time 1 second later - setup_times = [0, 1] - setup_y = 2 * time_series.series - else: - # Interpolator x variable is number of seconds from first date time - setup_times = [(time - start_time).total_seconds() for time in time_series.time_vector] - setup_y = time_series.series - - interpolator = interp1d(x=setup_times, y=setup_y, kind=interpolation_kind) - target_times = [(time - start_time).total_seconds() for time in time_vector] - return list(interpolator(target_times)) - - -def fit_time_series_to_time_vector( - time_series: TimeSeries, - time_vector: List[datetime], - extrapolate_outside_defined_time_interval: bool, - interpolation_type: InterpolationType, -) -> List[float]: - start, end = time_series.time_vector[0], time_series.time_vector[-1] - number_of_entries_before, entries_between, number_of_entries_after = _split_time_vector( - time_vector, start=start, end=end - ) - - if extrapolate_outside_defined_time_interval: - extrapolation_after_value = time_series.series[-1] - else: - extrapolation_after_value = 0.0 - - before_values = [0.0] * number_of_entries_before - between_values = _interpolate( - time_series=time_series, time_vector=entries_between, rate_interpolation_type=interpolation_type - ) - after_values = [extrapolation_after_value] * number_of_entries_after - - return [*before_values, *between_values, *after_values] - - -def _get_date_range(start: datetime, end: datetime, frequency: libecalc.common.time_utils.Frequency) -> Set[datetime]: - if frequency == libecalc.common.time_utils.Frequency.NONE: - return set() - - date_range = pd.date_range(start=start, end=end, freq=frequency.value) - return set(date_range.to_pydatetime()) - - -def _get_end_boundary(frequency: libecalc.common.time_utils.Frequency, time_vector_set: Set[datetime]) -> datetime: - """If end boundary has not been specified explicitly, we attempt to make an educated guess for the - user, based on output frequency provided and assuming data is forward filled. - - It is however recommended that the user specified END explicitly - """ - time_vector: List[datetime] = sorted(time_vector_set) - - if frequency == libecalc.common.time_utils.Frequency.YEAR: - return datetime(year=time_vector[-1].year + 1, month=1, day=1) - elif frequency == libecalc.common.time_utils.Frequency.MONTH: - return (time_vector[-1].replace(day=1) + timedelta(days=31)).replace(day=1) - elif frequency == libecalc.common.time_utils.Frequency.DAY: - return time_vector[-1] + timedelta(days=1) - else: - return max( - time_vector - ) # Frequency.NONE . We are clueless and user does not help us, just fallback to last time given - - -def get_global_time_vector( - time_series_collections: List[TimeSeriesCollection], - start: Optional[datetime] = None, - end: Optional[datetime] = None, - additional_dates: Optional[Set[datetime]] = None, - frequency: libecalc.common.time_utils.Frequency = libecalc.common.time_utils.Frequency.NONE, -) -> List[datetime]: - time_vector: Set[datetime] = set() - - # Add all dates from time series that should influence time vector - for time_series_collection in time_series_collections: - if time_series_collection.influence_time_vector: - time_vector = time_vector.union(time_series_collection.time_vector) - - has_time_vector = len(time_vector) > 0 - has_start = start is not None - has_end = end is not None - has_frequency = frequency != libecalc.common.time_utils.Frequency.NONE - if not (has_time_vector or (has_start and has_end) or (has_start and has_frequency)): - raise ValidationError("No time series found, please provide one or specify a start and end (or frequency).") - - # Store start, end before adding dates from yaml. This is to make sure dates in yaml are trimmed. - start = start or min(time_vector) - - # Add start - time_vector.add(start) - - if not end: - end = _get_end_boundary(frequency=frequency, time_vector_set=time_vector) - - # Add end - time_vector.add(end) - - # Add all dates specified in yaml - time_vector = time_vector.union(additional_dates or set()) - - # Trim time vector based on start - time_vector = {date for date in time_vector if date >= start} - - # Trim time vector based on end - time_vector = {date for date in time_vector if date <= end} - - # Add all dates for frequency - time_vector = time_vector.union(_get_date_range(start=start, end=end, frequency=frequency)) - - return sorted(time_vector) diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/variables_mapper.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/variables_mapper.py index bc0cd4a523..6e725457ba 100644 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/variables_mapper.py +++ b/src/libecalc/presentation/yaml/mappers/variables_mapper/variables_mapper.py @@ -6,15 +6,8 @@ from libecalc.common.logger import logger from libecalc.common.time_utils import Periods from libecalc.dto import VariablesMap -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection_mapper import ( - TimeSeriesCollectionMapper, -) -from libecalc.presentation.yaml.mappers.variables_mapper.timeseries_utils import ( - fit_time_series_to_time_vector, - get_global_time_vector, -) -from libecalc.presentation.yaml.resource import Resources -from libecalc.presentation.yaml.yaml_models.pyyaml_yaml_model import PyYamlYamlModel +from libecalc.presentation.yaml.domain.time_series_provider import TimeSeriesProvider +from libecalc.presentation.yaml.yaml_models.yaml_model import YamlValidator from libecalc.presentation.yaml.yaml_types.yaml_variable import ( YamlSingleVariable, YamlVariable, @@ -107,36 +100,17 @@ def _evaluate_variables(variables: Dict[str, YamlVariable], variables_map: Varia def map_yaml_to_variables( - configuration: PyYamlYamlModel, - resources: Resources, - result_options: dto.ResultOptions, + configuration: YamlValidator, time_series_provider: TimeSeriesProvider, global_time_vector: List[datetime] ) -> dto.VariablesMap: - # TODO: Replace configuration type with YamlValidator - timeseries_collections = [ - TimeSeriesCollectionMapper(resources).from_yaml_to_dto(timeseries.model_dump(by_alias=True)) - for timeseries in configuration.time_series_raise_if_invalid - ] - - global_time_vector = get_global_time_vector( - time_series_collections=timeseries_collections, - start=configuration.start, - end=configuration.end, - frequency=result_options.output_frequency, - additional_dates=configuration.dates, - ) - variables = {} - for timeseries_collection in timeseries_collections: - timeseries_list = timeseries_collection.time_series - for timeseries in timeseries_list: - variables[timeseries.reference_id] = fit_time_series_to_time_vector( - time_series=timeseries, - time_vector=global_time_vector, - extrapolate_outside_defined_time_interval=timeseries_collection.extrapolate_outside_defined_time_interval, - interpolation_type=timeseries_collection.interpolation_type, - ) + time_series_list = [ + time_series_provider.get_time_series(time_series_reference) + for time_series_reference in time_series_provider.get_time_series_references() + ] + for time_series in time_series_list: + variables[time_series.reference_id] = time_series.fit_to_time_vector(global_time_vector).series return _evaluate_variables( - configuration.variables_raise_if_invalid, + configuration.variables, variables_map=VariablesMap(variables=variables, time_vector=global_time_vector), ) diff --git a/src/libecalc/presentation/yaml/model.py b/src/libecalc/presentation/yaml/model.py index b3d6b59855..8c772cf036 100644 --- a/src/libecalc/presentation/yaml/model.py +++ b/src/libecalc/presentation/yaml/model.py @@ -1,17 +1,20 @@ from datetime import datetime -from textwrap import indent +from functools import cached_property from typing import Dict, List, Optional -from libecalc.common.errors.exceptions import InvalidResource +from typing_extensions import Self, deprecated + from libecalc.common.time_utils import Frequency from libecalc.dto import ResultOptions, VariablesMap from libecalc.dto.component_graph import ComponentGraph from libecalc.presentation.yaml.configuration_service import ConfigurationService +from libecalc.presentation.yaml.domain.time_series_collections import TimeSeriesCollections from libecalc.presentation.yaml.mappers.variables_mapper import map_yaml_to_variables +from libecalc.presentation.yaml.mappers.variables_mapper.get_global_time_vector import get_global_time_vector +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException from libecalc.presentation.yaml.parse_input import map_yaml_to_dto -from libecalc.presentation.yaml.resource import Resource from libecalc.presentation.yaml.resource_service import ResourceService -from libecalc.presentation.yaml.validation_errors import DtoValidationError, ModelValidationError +from libecalc.presentation.yaml.validation_errors import DtoValidationError from libecalc.presentation.yaml.yaml_models.yaml_model import YamlValidator from libecalc.presentation.yaml.yaml_validation_context import ( ModelContext, @@ -21,25 +24,6 @@ ) -class ModelValidationException(Exception): - def __init__(self, errors: List[ModelValidationError]): - self._errors = errors - super().__init__("Model is not valid") - - def error_count(self) -> int: - return len(self._errors) - - def errors(self) -> List[ModelValidationError]: - return self._errors - - def __str__(self): - msg = "Validation error\n\n" - errors = "\n\n".join(map(str, self._errors)) - errors = indent(errors, "\t") - msg += errors - return msg - - class YamlModel: """ Class representing both the yaml and the resources. @@ -64,8 +48,16 @@ def __init__( self._output_frequency = output_frequency self._configuration = configuration_service.get_configuration() self.resources = resource_service.get_resources(self._configuration) - self.is_valid_for_run() - self.dto = map_yaml_to_dto(configuration=self._configuration, resources=self.resources) + + self._is_validated = False + + @cached_property + @deprecated( + "Avoid using the dto objects directly, we want to remove them. get_graph() might be useful instead, although the nodes will change." + ) + def dto(self): + self.validate_for_run() + return map_yaml_to_dto(configuration=self._configuration, resources=self.resources) @property def start(self) -> Optional[datetime]: @@ -75,10 +67,24 @@ def start(self) -> Optional[datetime]: def end(self) -> Optional[datetime]: return self._configuration.end + def _get_time_series_collections(self) -> TimeSeriesCollections: + return TimeSeriesCollections(time_series=self._configuration.time_series, resources=self.resources) + + def _get_time_vector(self): + return get_global_time_vector( + time_series_time_vector=self._get_time_series_collections().get_time_vector(), + start=self.start, + end=self.end, + frequency=self._output_frequency, + additional_dates=self._configuration.dates, + ) + @property def variables(self) -> VariablesMap: return map_yaml_to_variables( - configuration=self._configuration, resources=self.resources, result_options=self.result_options + configuration=self._configuration, + time_series_provider=self._get_time_series_collections(), + global_time_vector=self._get_time_vector(), ) @property @@ -89,29 +95,11 @@ def result_options(self) -> ResultOptions: output_frequency=self._output_frequency, ) - @property - def graph(self) -> ComponentGraph: + def get_graph(self) -> ComponentGraph: return self.dto.get_graph() - def _find_resource_from_name(self, filename: str) -> Optional[Resource]: - return self.resources.get(filename) - def _get_token_references(self, yaml_model: YamlValidator) -> List[str]: - token_references = [] - for time_series in yaml_model.time_series: - resource = self._find_resource_from_name(time_series.file) - - if resource is None: - # Don't add any tokens if the resource is not found - continue - - try: - headers = resource.get_headers() - for header in headers: - token_references.append(f"{time_series.name};{header}") - except InvalidResource: - # Don't add any tokens if resource is invalid (unable to read header) - continue + token_references = self._get_time_series_collections().get_time_series_references() for reference in yaml_model.variables: token_references.append(f"$var.{reference}") @@ -134,11 +122,15 @@ def _get_validation_context(self, yaml_model: YamlValidator) -> YamlModelValidat YamlModelValidationContextNames.model_types: self._get_model_types(yaml_model=yaml_model), } - def is_valid_for_run(self) -> bool: + def validate_for_run(self) -> Self: + if self._is_validated: + return self + try: # Validate model validation_context = self._get_validation_context(yaml_model=self._configuration) self._configuration.validate(validation_context) - return True + self._is_validated = True + return self except DtoValidationError as e: raise ModelValidationException(errors=e.errors()) from e diff --git a/src/libecalc/presentation/yaml/model_validation_exception.py b/src/libecalc/presentation/yaml/model_validation_exception.py new file mode 100644 index 0000000000..27f9823ac0 --- /dev/null +++ b/src/libecalc/presentation/yaml/model_validation_exception.py @@ -0,0 +1,23 @@ +from textwrap import indent +from typing import List + +from libecalc.presentation.yaml.validation_errors import ModelValidationError, ValidationError + + +class ModelValidationException(ValidationError): + def __init__(self, errors: List[ModelValidationError]): + self._errors = errors + super().__init__("Model is not valid") + + def error_count(self) -> int: + return len(self._errors) + + def errors(self) -> List[ModelValidationError]: + return self._errors + + def __str__(self): + msg = "Validation error\n\n" + errors = "\n\n".join(map(str, self._errors)) + errors = indent(errors, "\t") + msg += errors + return msg diff --git a/src/libecalc/presentation/yaml/yaml_entities.py b/src/libecalc/presentation/yaml/yaml_entities.py index 9a5a4f210a..a2cc0df727 100644 --- a/src/libecalc/presentation/yaml/yaml_entities.py +++ b/src/libecalc/presentation/yaml/yaml_entities.py @@ -3,7 +3,7 @@ from typing import Dict, List, TextIO, Union from libecalc import dto -from libecalc.common.errors.exceptions import ColumnNotFound, HeaderNotFound +from libecalc.common.errors.exceptions import ColumnNotFoundException, HeaderNotFoundException from libecalc.presentation.yaml.resource import Resource @@ -24,10 +24,10 @@ def get_column(self, header: str) -> List[Union[float, int, str]]: header_index = self.headers.index(header) return self.data[header_index] except ValueError as e: - raise HeaderNotFound(header=header) from e + raise HeaderNotFoundException(header=header) from e except IndexError as e: # Should validate that header and columns are of equal length, but that is currently done elsewhere. - raise ColumnNotFound(header=header) from e + raise ColumnNotFoundException(header=header) from e @dataclass diff --git a/src/libecalc/presentation/yaml/yaml_types/time_series/yaml_time_series.py b/src/libecalc/presentation/yaml/yaml_types/time_series/yaml_time_series.py index 51c6753ea3..6666d8a616 100644 --- a/src/libecalc/presentation/yaml/yaml_types/time_series/yaml_time_series.py +++ b/src/libecalc/presentation/yaml/yaml_types/time_series/yaml_time_series.py @@ -15,6 +15,7 @@ class YamlTimeSeriesCollectionBase(YamlBase): name: str = Field( ..., title="NAME", + pattern=r"^[A-Za-z][A-Za-z0-9_]*$", description="Name of the time series.\n\n$ECALC_DOCS_KEYWORDS_URL/NAME", ) file: str = Field( @@ -43,12 +44,6 @@ class YamlDefaultTimeSeriesCollection(YamlTimeSeriesCollectionBase): description="Defines the type of time series input file.\n\n$ECALC_DOCS_KEYWORDS_URL/TYPE", ) - interpolation_type: Literal["RIGHT"] = Field( - None, - title="INTERPOLATION_TYPE", - description="Defines how the time series are interpolated between input time steps.\n\n$ECALC_DOCS_KEYWORDS_URL/INTERPOLATION_TYPE", - ) - class YamlMiscellaneousTimeSeriesCollection(YamlTimeSeriesCollectionBase): type: Literal["MISCELLANEOUS"] = Field( diff --git a/src/tests/ecalc_cli/test_app.py b/src/tests/ecalc_cli/test_app.py index 6a0c497b90..b394a6e16a 100644 --- a/src/tests/ecalc_cli/test_app.py +++ b/src/tests/ecalc_cli/test_app.py @@ -16,7 +16,7 @@ from libecalc.common.errors.exceptions import EcalcError from libecalc.common.run_info import RunInfo from libecalc.dto.utils.validators import COMPONENT_NAME_ALLOWED_CHARS -from libecalc.presentation.yaml.model import ModelValidationException +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException from libecalc.presentation.yaml.yaml_entities import ResourceStream from libecalc.presentation.yaml.yaml_models.exceptions import YamlError from libecalc.presentation.yaml.yaml_models.pyyaml_yaml_model import PyYamlYamlModel diff --git a/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries.py b/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries.py index ee031ae13e..b1203d76f7 100644 --- a/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries.py +++ b/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries.py @@ -2,20 +2,18 @@ from datetime import datetime from typing import Dict, Optional +import pydantic import pytest from inline_snapshot import snapshot +from pydantic import TypeAdapter from libecalc.dto import TimeSeriesType from libecalc.dto.types import InterpolationType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection import ( - MiscellaneousTimeSeriesCollection, -) -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection_mapper import ( - TimeSeriesCollectionMapper, -) -from libecalc.presentation.yaml.validation_errors import DtoValidationError, ValidationError +from libecalc.presentation.yaml.domain.time_series_collection import TimeSeriesCollection +from libecalc.presentation.yaml.validation_errors import ValidationError from libecalc.presentation.yaml.yaml_entities import MemoryResource from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords +from libecalc.presentation.yaml.yaml_types.time_series.yaml_time_series import YamlTimeSeriesCollection def _create_timeseries_data( @@ -39,7 +37,7 @@ def _create_timeseries_data( timeseries_dict[EcalcYamlKeywords.time_series_extrapolate_outside_defined] = extrapolate_outside if interpolation_type is not None: - timeseries_dict[EcalcYamlKeywords.time_series_interpolation_type] = interpolation_type + timeseries_dict[EcalcYamlKeywords.time_series_interpolation_type] = interpolation_type.value return timeseries_dict @@ -47,8 +45,6 @@ def _create_timeseries_data( class TestTimeSeries: parameterized_valid_timeseries_data = [ ( - TimeSeriesType.MISCELLANEOUS, - MiscellaneousTimeSeriesCollection, TimeSeriesType.MISCELLANEOUS, True, True, @@ -58,13 +54,11 @@ class TestTimeSeries: ] @pytest.mark.parametrize( - "typ_string, typ_class, typ_enum, extrapolate, influence_time_vector, interpolation_type, extrapolate_result", + "typ_enum, extrapolate, influence_time_vector, interpolation_type, extrapolate_result", parameterized_valid_timeseries_data, ) def test_valid_minimal_timeserie_different_types( self, - typ_string, - typ_class, typ_enum, extrapolate, influence_time_vector, @@ -72,81 +66,113 @@ def test_valid_minimal_timeserie_different_types( extrapolate_result, ): filename = "test.csv" - resources = {filename: MemoryResource(headers=["DATE", "OIL_PROD"], data=[["01.01.2017"], [5016]])} - - timeseries_mapper = TimeSeriesCollectionMapper(resources=resources) - timeseries_model = timeseries_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=typ_string, - name="OIL_PROD", - file=filename, - extrapolate_outside=extrapolate, - interpolation_type=interpolation_type, - influence_time_vector=influence_time_vector, - ) + resource = MemoryResource(headers=["DATE", "OIL_PROD"], data=[["01.01.2017"], [5016]]) + + timeseries_model = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=typ_enum, + name="SIM1", + file=filename, + extrapolate_outside=extrapolate, + interpolation_type=interpolation_type, + influence_time_vector=influence_time_vector, + ) + ), ) - assert isinstance(timeseries_model, typ_class) - assert timeseries_model.typ == typ_enum - assert timeseries_model.headers == ["OIL_PROD"] - assert timeseries_model.time_vector == [datetime(2017, 1, 1)] - assert timeseries_model.columns == [[5016]] - assert timeseries_model.extrapolate_outside_defined_time_interval is extrapolate_result - assert timeseries_model.influence_time_vector is True - assert timeseries_model.interpolation_type == InterpolationType.LEFT + assert timeseries_model.name == "SIM1" + assert timeseries_model.get_time_series_references() == ["OIL_PROD"] + assert timeseries_model.get_time_vector() == [datetime(2017, 1, 1)] + assert timeseries_model.should_influence_time_vector() is True + + time_series = timeseries_model.get_time_series("OIL_PROD") + assert time_series.series == [5016] + assert time_series.time_vector == [datetime(2017, 1, 1)] + assert time_series._extrapolate is extrapolate_result + assert timeseries_model._interpolation == InterpolationType.LEFT def test_valid_time_series_multiple_columns(self): + """ + Test TimeSeriesCollection.type 'DEFAULT' defaults + """ filename = "test_multiple_columns.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "COLUMN1", "COLUMN2", "COLUMN3"], - data=[["01.01.2015", "01.01.2016"], [1, 2], [3, 4], [5, 6]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - time_series_dto = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", "COLUMN1", "COLUMN2", "COLUMN3"], + data=[["01.01.2015", "01.01.2016"], [1, 2], [3, 4], [5, 6]], ) - assert time_series_dto.columns == [[1, 2], [3, 4], [5, 6]] - assert time_series_dto.time_vector == [datetime(2015, 1, 1), datetime(2016, 1, 1)] - assert time_series_dto.headers == ["COLUMN1", "COLUMN2", "COLUMN3"] - assert time_series_dto.typ == TimeSeriesType.DEFAULT + timeseries_model = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + interpolation_type=None, + influence_time_vector=True, + ) + ), + ) + + assert timeseries_model.name == "SIM1" + assert timeseries_model.get_time_series_references() == ["COLUMN1", "COLUMN2", "COLUMN3"] + assert timeseries_model.get_time_vector() == [datetime(2015, 1, 1), datetime(2016, 1, 1)] + assert timeseries_model.should_influence_time_vector() is True + + time_series = timeseries_model.get_time_series("COLUMN1") + assert time_series.series == [1, 2] + assert time_series.time_vector == [datetime(2015, 1, 1), datetime(2016, 1, 1)] + assert time_series._extrapolate is False + assert timeseries_model._interpolation == InterpolationType.RIGHT + + time_series = timeseries_model.get_time_series("COLUMN2") + assert time_series.series == [3, 4] + assert time_series.time_vector == [datetime(2015, 1, 1), datetime(2016, 1, 1)] + assert time_series._extrapolate is False + assert timeseries_model._interpolation == InterpolationType.RIGHT + + time_series = timeseries_model.get_time_series("COLUMN3") + assert time_series.series == [5, 6] + assert time_series.time_vector == [datetime(2015, 1, 1), datetime(2016, 1, 1)] + assert time_series._extrapolate is False + assert timeseries_model._interpolation == InterpolationType.RIGHT def test_valid_time_series_unsorted(self): filename = "test_unsorted.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "COLUMN1", "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - time_series_dto = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", "COLUMN1", "COLUMN2"], + data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], ) - assert time_series_dto.columns == [ - [3, 1, 2], - [1, 2, 3], - ] - assert time_series_dto.time_vector == [datetime(1900, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1)] - assert time_series_dto.headers == ["COLUMN1", "COLUMN2"] - assert time_series_dto.typ == TimeSeriesType.DEFAULT + timeseries_model = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + influence_time_vector=True, + interpolation_type=None, + ) + ), + ) + + assert timeseries_model.name == "SIM1" + assert timeseries_model.get_time_series_references() == ["COLUMN1", "COLUMN2"] + assert timeseries_model.get_time_vector() == [datetime(2015, 1, 1), datetime(2016, 1, 1), datetime(1900, 1, 1)] + assert timeseries_model.should_influence_time_vector() is True + + time_series = timeseries_model.get_time_series("COLUMN1") + assert time_series.series == [3, 1, 2] + assert time_series.time_vector == [datetime(1900, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1)] + + time_series = timeseries_model.get_time_series("COLUMN2") + assert time_series.series == [1, 2, 3] + assert time_series.time_vector == [datetime(1900, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1)] parameterized_invalid_timeseries_data = [ # headers, data mismatch (+1) @@ -154,8 +180,19 @@ def test_valid_time_series_unsorted(self): ["DATE", "OIL_PROD", "BVBV"], [["01.01.2017"], [5016]], snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, The number of columns provided do not match for header and data: data: 1, headers: 2 +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Missing column: Column matching header 'BVBV' is missing. """), ), # no data @@ -163,8 +200,19 @@ def test_valid_time_series_unsorted(self): ["DATE", "DUMMY"], [["01.01.2017"]], snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, Data vector must at least have one column +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Missing column: Column matching header 'DUMMY' is missing. """), ), # no time @@ -172,51 +220,148 @@ def test_valid_time_series_unsorted(self): ["DATE", "OIL_PROD"], [[], [5016]], snapshot("""\ -Location: MISCELLANEOUS.time_vector -Message: Value error, Time vectors must have at least one record +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Invalid time series resource: The time vector is empty """), ), # no headers ( [], [["01.01.2017"], [5016]], - snapshot("Invalid resource: Resource must at least have one column"), + snapshot("""\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Invalid resource: Resource must at least have one column +"""), ), # mismatch data, time ( ["DATE", "OIL_PROD"], [["01.01.2017", "01.01.2018"], [5016]], - snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, The number of records for times and data do not match: data: 1, time_vector: 2 -"""), + snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Rows mismatch: The number of records for times and data do not match: data: 1, time_vector: 2 +""" + ), ), # mismatch data, time ( ["DATE", "OIL_PROD"], [["01.01.2017"], [5016, 5026]], - snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, The number of records for times and data do not match: data: 2, time_vector: 1 -"""), + snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Rows mismatch: The number of records for times and data do not match: data: 2, time_vector: 1 +""" + ), ), # no data cols ( ["DATE", "HEADER"], [["01.01.2017"]], snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, Data vector must at least have one column +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Missing column: Column matching header 'HEADER' is missing. """), ), # duplicate dates ( ["DATE", "HEADER"], [["01.01.2015", "01.01.2016", "01.01.2017", "01.01.2017"], [5016, 5036, 5026, 5216]], - snapshot("""\ -Location: MISCELLANEOUS.time_vector -Message: Value error, The list of dates have duplicates. Duplicated dates are currently not supported. -"""), + snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Invalid time series resource: The time series resource contains duplicate dates: 2017-01-01 00:00:00 +""" + ), + ), + # string values + ( + ["DATE", "HEADER"], + [["01.01.2015", "01.01.2016", "01.01.2017"], [5016, 5036, "invalid"]], + snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Invalid column: The timeseries column 'HEADER' contains non-numeric values in row 3. +""" + ), ), ] @@ -228,35 +373,37 @@ def test_valid_time_series_unsorted(self): ) def test_invalid_timeseries(self, headers, columns, error_message): filename = "test.csv" - resources = { - filename: MemoryResource( - headers=headers, - data=columns, - ) - } + resource = MemoryResource( + headers=headers, + data=columns, + ) - timeseries_mapper = TimeSeriesCollectionMapper(resources=resources) with pytest.raises(ValidationError) as ve: - timeseries_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.MISCELLANEOUS, - name="OIL_PROD", - file=filename, - extrapolate_outside=True, - interpolation_type=InterpolationType.LINEAR, - ) + TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.MISCELLANEOUS, + name="SIM1", + file=filename, + extrapolate_outside=True, + interpolation_type=InterpolationType.LINEAR, + ) + ), ) assert str(ve.value) == error_message def test_timeseries_with_int_as_date(self): filename = "sim1.csv" - resources = {filename: MemoryResource(headers=["DATE", "HEADER1"], data=[[2012, 2013, 2014], [1, 2, 3]])} - timeseries_mapper = TimeSeriesCollectionMapper(resources=resources) - timeseries_dto = timeseries_mapper.from_yaml_to_dto( - _create_timeseries_data(typ=TimeSeriesType.DEFAULT, name="SIM1", file=filename) + resource = MemoryResource(headers=["DATE", "HEADER1"], data=[[2012, 2013, 2014], [1, 2, 3]]) + time_series_collection = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data(typ=TimeSeriesType.DEFAULT, name="SIM1", file=filename), + ), ) - assert timeseries_dto.time_vector == [ + assert time_series_collection.get_time_vector() == [ datetime(2012, 1, 1), datetime(2013, 1, 1), datetime(2014, 1, 1), @@ -268,33 +415,31 @@ def test_timeseries_with_int_as_date(self): ) def test_invalid_time_series_headers(self, header): filename = "test_invalid_headers.csv" - resources = { - filename: MemoryResource( - headers=["DATE", header, "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - with pytest.raises(DtoValidationError) as ve: - time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", header, "COLUMN2"], + data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], + ) + + with pytest.raises(ValidationError) as ve: + TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + influence_time_vector=True, + interpolation_type=None, + ) + ), ) - error_message = str(ve.value.extended_message) + error_message = str(ve.value) assert "SIM1" in error_message assert ( - "DEFAULT.headers[0]" in error_message - ) # This should probably not be required, does not make sense to user as it isn't related to the yaml path/location. - assert ( - "The string/name contains illegal characters. Allowed characters are: ^[A-Za-z][A-Za-z0-9_.,\\-\\s#+:\\/]*$" + "The time series resource header contains illegal characters. Allowed characters are: ^[A-Za-z][A-Za-z0-9_.,\\-\\s#+:\\/]*$" in error_message ) @@ -304,25 +449,26 @@ def test_invalid_time_series_headers(self, header): ) def test_valid_time_series_headers(self, header): filename = "test_valid_headers.csv" - resources = { - filename: MemoryResource( - headers=["DATE", header, "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - time_series_dto = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", header, "COLUMN2"], + data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], ) - assert time_series_dto.headers == [header, "COLUMN2"] + time_series_collection = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + influence_time_vector=True, + interpolation_type=None, + ) + ), + ) + + assert time_series_collection.get_time_series_references() == [header, "COLUMN2"] @pytest.mark.parametrize( "resource_name", @@ -330,16 +476,9 @@ def test_valid_time_series_headers(self, header): ) def test_invalid_resource_names(self, resource_name): filename = "test_invalid_resource_names.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "COLUMN1", "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - with pytest.raises(DtoValidationError) as ve: - time_series_mapper.from_yaml_to_dto( + with pytest.raises(pydantic.ValidationError) as ve: + TypeAdapter(YamlTimeSeriesCollection).validate_python( _create_timeseries_data( typ=TimeSeriesType.DEFAULT, name=resource_name, @@ -350,114 +489,60 @@ def test_invalid_resource_names(self, resource_name): ) ) - error_message = str(ve.value.extended_message) + error_message = str(ve.value) assert resource_name in error_message - assert ( - "The string/name contains illegal characters. Allowed characters are: ^[A-Za-z][A-Za-z0-9_]*$" - in error_message - ) - - def test_interpretation_of_interpolation_type_for_default_resource(self): - """Check default interpolation for DEFAULT time series.""" - filename = "test_interpretation_of_rate_interpolation_type_for_reservoir_resource.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "GAS_PROD"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - - time_series_explicit_none = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) - ) - assert time_series_explicit_none.interpolation_type == InterpolationType.RIGHT - - time_series_implicit_none = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - ) - ) - - assert time_series_implicit_none.interpolation_type == InterpolationType.RIGHT + assert "String should match pattern '^[A-Za-z][A-Za-z0-9_]*$' " in error_message def test_undefined_type_for_miscellaneous_resource(self): """Check that MISCELLANEOUS fails if interpolation not defined.""" - filename = "test_interpretation_of_rate_interpolation_type_for_reservoir_resource.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "GAS_PROD"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - with pytest.raises(DtoValidationError) as ve: - time_series_mapper.from_yaml_to_dto( + with pytest.raises(pydantic.ValidationError) as ve: + TypeAdapter(YamlTimeSeriesCollection).validate_python( _create_timeseries_data( typ=TimeSeriesType.MISCELLANEOUS, name="SIM1", - file=filename, + file="test.csv", extrapolate_outside=None, influence_time_vector=True, ) ) - assert isinstance(ve.value, DtoValidationError) - - def test_left_interpolation_type_for_miscellaneous_resource(self): - """Check that LEFT is used when specified for MISCELLANEOUS.""" - filename = "test_interpretation_of_rate_interpolation_type_for_reservoir_resource.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "GAS_PROD"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - - time_series = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.MISCELLANEOUS, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=InterpolationType.LEFT, - ) - ) - assert time_series.interpolation_type == InterpolationType.LEFT + + assert isinstance(ve.value, pydantic.ValidationError) def test_error_if_nan_data(self): filename = "test_invalid_data.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, math.nan]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - with pytest.raises(DtoValidationError) as exc_info: - time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", "COLUMN2"], + data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, math.nan]], + ) + with pytest.raises(ValidationError) as exc_info: + TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + influence_time_vector=True, + interpolation_type=None, + ) + ), ) - - assert "The timeseries column 'SIM1;COLUMN2' contains empty values." in str(exc_info.value) + message = str(exc_info.value) + assert message == snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test_invalid_data.csv + type: DEFAULT + influence_time_vector: true + ... + + Location: FILE + Message: Invalid column: The timeseries column 'COLUMN2' contains empty values in row 3. +""" + ) diff --git a/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries_utils.py b/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries_utils.py index b6e552a264..7a088b41f4 100644 --- a/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries_utils.py +++ b/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries_utils.py @@ -5,43 +5,33 @@ import libecalc.common.time_utils from libecalc.common.time_utils import Frequency from libecalc.dto.types import InterpolationType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection import ( - MiscellaneousTimeSeriesCollection, -) -from libecalc.presentation.yaml.mappers.variables_mapper.timeseries_utils import ( +from libecalc.presentation.yaml.domain.time_series import TimeSeries +from libecalc.presentation.yaml.mappers.variables_mapper.get_global_time_vector import ( _get_end_boundary, - fit_time_series_to_time_vector, get_global_time_vector, ) from libecalc.presentation.yaml.validation_errors import ValidationError -@pytest.fixture -def miscellaneous_time_series_collection_yearly(): - return MiscellaneousTimeSeriesCollection( - name="test", - headers=["COL1_RATE", "COL2"], - columns=[[1, 2, 3, 4], [2, 4, 6, 8]], - time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], - interpolation_type=InterpolationType.RIGHT, - ) - - -@pytest.fixture -def miscellaneous_time_series_collection_single_date(): - return MiscellaneousTimeSeriesCollection( - name="test", - headers=["COL1_RATE", "COL2"], - columns=[[3], [6]], +def create_single_date_time_series(interpolation_type: InterpolationType, extrapolate: bool) -> TimeSeries: + return TimeSeries( + reference_id="COL1_RATE", time_vector=[datetime(2012, 1, 1)], - interpolation_type=InterpolationType.RIGHT, + series=[3], + interpolation_type=interpolation_type, + extrapolate=extrapolate, ) class TestGetGlobalTimeVector: - def test_single_collection(self, miscellaneous_time_series_collection_yearly): + def test_single_collection(self): global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], ) assert global_time_vector == [ @@ -51,9 +41,14 @@ def test_single_collection(self, miscellaneous_time_series_collection_yearly): datetime(2013, 1, 1), ] - def test_single_collection_with_monthly_frequency(self, miscellaneous_time_series_collection_yearly): + def test_single_collection_with_monthly_frequency(self): global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], frequency=libecalc.common.time_utils.Frequency.MONTH, ) @@ -125,51 +120,73 @@ def test_single_collection_with_yearly_frequency(self): datetime(2011, 11, 1), datetime(2011, 12, 1), ] - time_series_collection = MiscellaneousTimeSeriesCollection( - name="test", - headers=["COL1", "COL2"], - columns=[[1.0] * len(time_vector), [2.0] * len(time_vector)], - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - ) global_time_vector = get_global_time_vector( - time_series_collections=[time_series_collection], frequency=libecalc.common.time_utils.Frequency.YEAR + time_series_time_vector=time_vector, frequency=libecalc.common.time_utils.Frequency.YEAR ) # Time vector is not filtered based on frequency, only there to make sure all frequencies are present. time_vector.append(datetime(2012, 1, 1)) assert global_time_vector == time_vector - def test_trim_start(self, miscellaneous_time_series_collection_yearly): + def test_trim_start(self): # trim with date already present global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], start=datetime(2011, 1, 1) + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], + start=datetime(2011, 1, 1), ) assert global_time_vector == [datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)] # trim with date not present global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], start=datetime(2011, 1, 2) + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], + start=datetime(2011, 1, 2), ) assert global_time_vector == [datetime(2011, 1, 2), datetime(2012, 1, 1), datetime(2013, 1, 1)] - def test_trim_end(self, miscellaneous_time_series_collection_yearly): + def test_trim_end(self): # trim with date already present global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], end=datetime(2011, 1, 1) + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], + end=datetime(2011, 1, 1), ) assert global_time_vector == [datetime(2010, 1, 1), datetime(2011, 1, 1)] # trim with date not present global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], end=datetime(2011, 2, 2) + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], + end=datetime(2011, 2, 2), ) assert global_time_vector == [datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2011, 2, 2)] - def test_additional_dates(self, miscellaneous_time_series_collection_yearly): + def test_additional_dates(self): global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], additional_dates={datetime(2011, 6, 1), datetime(2013, 2, 1)}, ) @@ -185,56 +202,64 @@ def test_additional_dates(self, miscellaneous_time_series_collection_yearly): def test_only_start_and_frequency(self): assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), frequency=Frequency.YEAR + time_series_time_vector=[], start=datetime(2020, 1, 1), frequency=Frequency.YEAR ) == [datetime(2020, 1, 1), datetime(2021, 1, 1)] assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), frequency=Frequency.MONTH + time_series_time_vector=[], start=datetime(2020, 1, 1), frequency=Frequency.MONTH ) == [datetime(2020, 1, 1), datetime(2020, 2, 1)] assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), frequency=Frequency.DAY + time_series_time_vector=[], start=datetime(2020, 1, 1), frequency=Frequency.DAY ) == [datetime(2020, 1, 1), datetime(2020, 1, 2)] def test_only_start_and_end(self): assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), end=datetime(2021, 1, 1) + time_series_time_vector=[], start=datetime(2020, 1, 1), end=datetime(2021, 1, 1) ) == [datetime(2020, 1, 1), datetime(2021, 1, 1)] assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), end=datetime(2020, 2, 1) + time_series_time_vector=[], start=datetime(2020, 1, 1), end=datetime(2020, 2, 1) ) == [datetime(2020, 1, 1), datetime(2020, 2, 1)] assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), end=datetime(2020, 1, 2) + time_series_time_vector=[], start=datetime(2020, 1, 1), end=datetime(2020, 1, 2) ) == [datetime(2020, 1, 1), datetime(2020, 1, 2)] def test_only_start(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[], start=datetime(2020, 1, 1)) + get_global_time_vector(time_series_time_vector=[], start=datetime(2020, 1, 1)) assert "No time series found" in str(exc_info.value) def test_only_end(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[], end=datetime(2020, 1, 1)) + get_global_time_vector(time_series_time_vector=[], end=datetime(2020, 1, 1)) assert "No time series found" in str(exc_info.value) def test_only_freq(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[], frequency=Frequency.YEAR) + get_global_time_vector(time_series_time_vector=[], frequency=Frequency.YEAR) assert "No time series found" in str(exc_info.value) def test_only_freq_and_end(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[], frequency=Frequency.YEAR, end=datetime(2020, 1, 1)) + get_global_time_vector(time_series_time_vector=[], frequency=Frequency.YEAR, end=datetime(2020, 1, 1)) assert "No time series found" in str(exc_info.value) def test_only_empty_time_series(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[]) + get_global_time_vector(time_series_time_vector=[]) assert "No time series found" in str(exc_info.value) -class TestFitTimeSeriesToTimeVector: - def test_interpolate_linear(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series +def create_time_series(interpolation_type: InterpolationType, extrapolate: bool): + return TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], + interpolation_type=interpolation_type, + extrapolate=extrapolate, + ) + +class TestFitTimeSeriesToTimeVector: + def test_interpolate_linear(self): time_vector = [ datetime(2010, 1, 1), datetime(2011, 1, 1), @@ -243,31 +268,19 @@ def test_interpolate_linear(self, miscellaneous_time_series_collection_yearly): datetime(2013, 1, 1), ] - rate_time_series = time_series[0] - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.LINEAR, - extrapolate_outside_defined_time_interval=False, + extrapolate=False, ) + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) # Interpolate based on interpolation type - assert fitted_rate_time_series == [1, 2, 2.4136986301369863, 3, 4] - - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=False, - ) - - # Interpolate based on interpolation type - assert fitted_time_series == [2.0, 4.0, 4.0, 6.0, 8.0] - - def test_interpolate_left(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series + assert fitted_rate_time_series.series == [1, 2, 2.4136986301369863, 3, 4] + def test_interpolate_left(self): time_vector = [ datetime(2010, 1, 1), datetime(2011, 1, 1), @@ -276,32 +289,21 @@ def test_interpolate_left(self, miscellaneous_time_series_collection_yearly): datetime(2013, 1, 1), ] - rate_time_series = time_series[0] - - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.LEFT, - extrapolate_outside_defined_time_interval=False, + extrapolate=False, ) - # Interpolate based on interpolation type - assert fitted_rate_time_series == [1, 2, 3, 3, 4] - - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.LEFT, - extrapolate_outside_defined_time_interval=False, - ) + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) # Interpolate based on interpolation type - assert fitted_time_series == [2.0, 4.0, 6.0, 6.0, 8.0] - - def test_interpolate_right(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series + assert fitted_rate_time_series.series == [1, 2, 3, 3, 4] + assert fitted_rate_time_series.time_vector == time_vector + def test_interpolate_right(self): time_vector = [ datetime(2010, 1, 1), datetime(2011, 1, 1), @@ -310,30 +312,20 @@ def test_interpolate_right(self, miscellaneous_time_series_collection_yearly): datetime(2013, 1, 1), ] - rate_time_series = time_series[0] - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=False, - ) - - # Interpolate based on interpolation type - assert fitted_rate_time_series == [1, 2, 2, 3, 4] - - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=False, + extrapolate=False, ) + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) # Interpolate based on interpolation type - assert fitted_time_series == [2.0, 4.0, 4.0, 6.0, 8.0] + assert fitted_rate_time_series.series == [1, 2, 2, 3, 4] + assert fitted_rate_time_series.time_vector == time_vector - def test_extrapolate_outside_true(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series + def test_extrapolate_outside_true(self): time_vector = [ datetime(2009, 1, 1), datetime(2010, 1, 1), @@ -343,28 +335,19 @@ def test_extrapolate_outside_true(self, miscellaneous_time_series_collection_yea datetime(2014, 1, 1), ] - rate_time_series = time_series[0] - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, - extrapolate_outside_defined_time_interval=True, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.LINEAR, + extrapolate=True, ) - # Rate should use extrapolate_outside_defined_time_interval to decide extrapolation - assert fitted_rate_time_series == [0, 1, 2, 3, 4, 4] - - # Check that Non-rate behaves like rate - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=True, - ) - assert fitted_time_series == [0, 2.0, 4.0, 6.0, 8.0, 8.0] + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) + + assert fitted_rate_time_series.series == [0, 1, 2, 3, 4, 4] + assert fitted_rate_time_series.time_vector == time_vector - def test_extrapolate_outside_false(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series + def test_extrapolate_outside_false(self): time_vector = [ datetime(2009, 1, 1), datetime(2010, 1, 1), @@ -374,31 +357,19 @@ def test_extrapolate_outside_false(self, miscellaneous_time_series_collection_ye datetime(2014, 1, 1), ] - rate_time_series = time_series[0] - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, - extrapolate_outside_defined_time_interval=False, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.LINEAR, + extrapolate=False, ) - # Rate should use extrapolate_outside_defined_time_interval to decide extrapolation - assert fitted_rate_time_series == [0, 1.0, 2.0, 3.0, 4.0, 0.0] - - # Check that Non-rate behaves like rate - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=False, - ) - assert fitted_time_series == [0.0, 2.0, 4.0, 6.0, 8.0, 0.0] + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) + assert fitted_rate_time_series.series == [0, 1.0, 2.0, 3.0, 4.0, 0.0] + assert fitted_rate_time_series.time_vector == time_vector - def test_interpolate_to_shorter_global_time_vector(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series - rate_time_series = time_series[0] - - time_vector = [ + def test_interpolate_to_shorter_global_time_vector(self): + all_times = [ datetime(2009, 1, 1), datetime(2011, 1, 1), datetime(2012, 7, 1), @@ -406,44 +377,39 @@ def test_interpolate_to_shorter_global_time_vector(self, miscellaneous_time_seri ] for i in range(1, 5): - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector[0:i], - extrapolate_outside_defined_time_interval=True, + current_time_vector = all_times[0:i] + fitted_rate_time_series = create_time_series( interpolation_type=InterpolationType.RIGHT, - ) - assert fitted_rate_time_series == [0, 2, 3, 4][0:i] - fitted_rate_time_series_shifted_left = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector[0:i], - extrapolate_outside_defined_time_interval=True, + extrapolate=True, + ).fit_to_time_vector(current_time_vector) + assert fitted_rate_time_series.series == [0, 2, 3, 4][0:i] + assert fitted_rate_time_series.time_vector == current_time_vector + + fitted_rate_time_series_shifted_left = create_time_series( interpolation_type=InterpolationType.LEFT, - ) - assert fitted_rate_time_series_shifted_left == [0, 2, 4, 4][0:i] - fitted_rate_time_series_shifted_linear = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector[0:i], - extrapolate_outside_defined_time_interval=True, + extrapolate=True, + ).fit_to_time_vector(current_time_vector) + assert fitted_rate_time_series_shifted_left.series == [0, 2, 4, 4][0:i] + assert fitted_rate_time_series_shifted_left.time_vector == current_time_vector + + fitted_rate_time_series_shifted_linear = create_time_series( interpolation_type=InterpolationType.LINEAR, - ) - assert fitted_rate_time_series_shifted_linear == [0, 2, 3.4972677595628414, 4][0:i] + extrapolate=True, + ).fit_to_time_vector(current_time_vector) + assert fitted_rate_time_series_shifted_linear.series == [0, 2, 3.4972677595628414, 4][0:i] + assert fitted_rate_time_series_shifted_linear.time_vector == current_time_vector for rate_interp_type in [InterpolationType.LEFT, InterpolationType.RIGHT, InterpolationType.LINEAR]: - fitted_rate_time_series_outside_interval_no_extrapolation = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[-1]], - extrapolate_outside_defined_time_interval=False, + current_time_vector = [all_times[-1]] + fitted_rate_time_series_outside_interval_no_extrapolation = create_time_series( + extrapolate=False, interpolation_type=rate_interp_type, - ) - assert fitted_rate_time_series_outside_interval_no_extrapolation == [0] - - def test_interpolate_single_date_to_single_date_global_time_vector( - self, miscellaneous_time_series_collection_single_date - ): - time_series = miscellaneous_time_series_collection_single_date.time_series - rate_time_series = time_series[0] + ).fit_to_time_vector(current_time_vector) + assert fitted_rate_time_series_outside_interval_no_extrapolation.series == [0] + assert fitted_rate_time_series_outside_interval_no_extrapolation.time_vector == current_time_vector - time_vector = [ + def test_interpolate_single_date_to_single_date_global_time_vector(self): + all_times = [ datetime(2011, 7, 1), datetime(2012, 1, 1), datetime(2012, 1, 2), @@ -456,29 +422,21 @@ def test_interpolate_single_date_to_single_date_global_time_vector( fitted_rate_time_series_left_with_extrapolation = [] fitted_rate_time_series_linear_with_extrapolation = [] for i in range(3): + current_time_vector = [all_times[i]] fitted_rate_time_series_right_with_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=True, - interpolation_type=InterpolationType.RIGHT, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.RIGHT, extrapolate=True) + .fit_to_time_vector(current_time_vector) + .series[0] ) fitted_rate_time_series_left_with_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=True, - interpolation_type=InterpolationType.LEFT, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.LEFT, extrapolate=True) + .fit_to_time_vector(current_time_vector) + .series[0] ) fitted_rate_time_series_linear_with_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=True, - interpolation_type=InterpolationType.LINEAR, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.LINEAR, extrapolate=True) + .fit_to_time_vector(current_time_vector) + .series[0] ) assert fitted_rate_time_series_right_with_extrapolation == expected_with_extrapolation @@ -489,35 +447,29 @@ def test_interpolate_single_date_to_single_date_global_time_vector( fitted_rate_time_series_left_without_extrapolation = [] fitted_rate_time_series_linear_without_extrapolation = [] for i in range(3): + current_time_vector = [all_times[i]] fitted_rate_time_series_right_without_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=False, - interpolation_type=InterpolationType.RIGHT, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.RIGHT, extrapolate=False) + .fit_to_time_vector(current_time_vector) + .series[0] ) fitted_rate_time_series_left_without_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=False, - interpolation_type=InterpolationType.LEFT, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.LEFT, extrapolate=False) + .fit_to_time_vector(current_time_vector) + .series[0] ) fitted_rate_time_series_linear_without_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=False, - interpolation_type=InterpolationType.LINEAR, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.LINEAR, extrapolate=False) + .fit_to_time_vector(current_time_vector) + .series[0] ) assert fitted_rate_time_series_right_without_extrapolation == expected_without_extrapolation assert fitted_rate_time_series_left_without_extrapolation == expected_without_extrapolation assert fitted_rate_time_series_linear_without_extrapolation == expected_without_extrapolation + +class TestGetEndBoundary: @pytest.mark.parametrize( "what, end_date, dates", [ diff --git a/src/tests/libecalc/input/test_file_io.py b/src/tests/libecalc/input/test_file_io.py index 67ae10d28f..ea2fbbd73c 100644 --- a/src/tests/libecalc/input/test_file_io.py +++ b/src/tests/libecalc/input/test_file_io.py @@ -8,7 +8,7 @@ from inline_snapshot import snapshot from ecalc_cli.infrastructure.file_resource_service import FileResourceService -from libecalc.common.errors.exceptions import EcalcError, HeaderNotFound +from libecalc.common.errors.exceptions import EcalcError, InvalidHeaderException from libecalc.fixtures.cases import input_file_examples from libecalc.infrastructure import file_io from libecalc.presentation.yaml import yaml_entities @@ -165,9 +165,9 @@ def test_valid_characters(self, tmp_path_fixture, csv_line: str, is_valid_charac @pytest.mark.snapshot @pytest.mark.inlinesnapshot def test_missing_headers(self, tmp_path_fixture): - with pytest.raises(HeaderNotFound) as e: + with pytest.raises(InvalidHeaderException) as e: file_io.read_facility_resource(create_csv_from_line(tmp_path_fixture, "HEADER1 ,,HEADER3")) - assert str(e.value) == snapshot("Missing header(s): Header 'Unnamed: 1' not found") + assert str(e.value) == snapshot("Invalid header: One or more headers are missing in resource") @pytest.fixture @@ -319,7 +319,7 @@ def test_time_series_missing_headers(self): ) assert str(e.value) == snapshot( - "Failed to read resource: Failed to read base_profile_missing_header_oil_prod.csv: Missing header(s): Header 'Unnamed: 1' not found" + "Failed to read resource: Failed to read base_profile_missing_header_oil_prod.csv: Invalid header: One or more headers are missing in resource" ) @pytest.mark.snapshot @@ -346,7 +346,7 @@ def test_facility_input_missing_headers(self): ) assert str(e.value) == snapshot( - "Failed to read resource: Failed to read tabular_missing_header_fuel.csv: Missing header(s): Header 'Unnamed: 1' not found" + "Failed to read resource: Failed to read tabular_missing_header_fuel.csv: Invalid header: One or more headers are missing in resource" ) diff --git a/src/tests/libecalc/presentation/yaml/test_venting_emitter_validation_errors.py b/src/tests/libecalc/presentation/yaml/test_venting_emitter_validation_errors.py index 30295de9cc..80f324e51c 100644 --- a/src/tests/libecalc/presentation/yaml/test_venting_emitter_validation_errors.py +++ b/src/tests/libecalc/presentation/yaml/test_venting_emitter_validation_errors.py @@ -7,7 +7,7 @@ from libecalc.fixtures.cases.venting_emitters.venting_emitter_yaml import ( venting_emitter_yaml_factory, ) -from libecalc.presentation.yaml.model import ModelValidationException +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException from libecalc.presentation.yaml.yaml_types.yaml_stream_conditions import ( YamlEmissionRateUnits, YamlOilRateUnits,