diff --git a/examples/simple_yaml_model.ipynb b/examples/simple_yaml_model.ipynb index b8a13f8650..8b2597ca10 100644 --- a/examples/simple_yaml_model.ipynb +++ b/examples/simple_yaml_model.ipynb @@ -81,14 +81,14 @@ " output_frequency=Frequency.NONE,\n", ")\n", "\n", - "model = EnergyCalculator(graph=yaml_model.graph)\n", + "model = EnergyCalculator(graph=yaml_model.get_graph())\n", "consumer_results = model.evaluate_energy_usage(yaml_model.variables)\n", "emission_results = model.evaluate_emissions(\n", " variables_map=yaml_model.variables,\n", " consumer_results=consumer_results,\n", ")\n", "result = GraphResult(\n", - " graph=yaml_model.graph,\n", + " graph=yaml_model.get_graph(),\n", " consumer_results=consumer_results,\n", " variables_map=yaml_model.variables,\n", " emission_results=emission_results,\n", @@ -114,7 +114,7 @@ "import matplotlib.pyplot as plt\n", "\n", "print(\"Iterating the model components to plot results: \\n\")\n", - "for identity, component in yaml_model.graph.nodes.items():\n", + "for identity, component in yaml_model.get_graph().nodes.items():\n", " if identity in result.consumer_results:\n", " component_result = result.consumer_results[identity].component_result\n", " ds = pd.Series(component_result.energy_usage.values, index=component_result.energy_usage.timesteps)\n", diff --git a/src/ecalc_cli/commands/run.py b/src/ecalc_cli/commands/run.py index cc5b01467f..01faadd41e 100644 --- a/src/ecalc_cli/commands/run.py +++ b/src/ecalc_cli/commands/run.py @@ -117,7 +117,7 @@ def run( configuration_service=configuration_service, resource_service=resource_service, output_frequency=frequency, - ) + ).validate_for_run() if (flow_diagram or ltp_export) and (model.start is None or model.end is None): logger.warning( @@ -132,7 +132,7 @@ def run( name_prefix=name_prefix, ) - energy_calculator = EnergyCalculator(graph=model.graph) + energy_calculator = EnergyCalculator(graph=model.get_graph()) precision = 6 consumer_results = energy_calculator.evaluate_energy_usage(model.variables) emission_results = energy_calculator.evaluate_emissions( @@ -140,7 +140,7 @@ def run( consumer_results=consumer_results, ) results_core = GraphResult( - graph=model.graph, + graph=model.get_graph(), consumer_results=consumer_results, variables_map=model.variables, emission_results=emission_results, diff --git a/src/ecalc_cli/infrastructure/file_resource_service.py b/src/ecalc_cli/infrastructure/file_resource_service.py index e4922fe288..72714150e0 100644 --- a/src/ecalc_cli/infrastructure/file_resource_service.py +++ b/src/ecalc_cli/infrastructure/file_resource_service.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Callable, Dict -from libecalc.common.errors.exceptions import EcalcError, HeaderNotFound +from libecalc.common.errors.exceptions import EcalcError, InvalidHeaderException from libecalc.common.logger import logger from libecalc.infrastructure.file_io import read_facility_resource, read_timeseries_resource from libecalc.presentation.yaml.resource import Resource @@ -18,7 +18,7 @@ def __init__(self, working_directory: Path): def _read_resource(resource_name: Path, *args, read_func: Callable[..., MemoryResource]) -> MemoryResource: try: return read_func(resource_name, *args) - except (HeaderNotFound, ValueError) as exc: + except (InvalidHeaderException, ValueError) as exc: logger.error(str(exc)) raise EcalcError("Failed to read resource", f"Failed to read {resource_name.name}: {str(exc)}") from exc diff --git a/src/ecalc_cli/main.py b/src/ecalc_cli/main.py index 04022742cd..e810769822 100755 --- a/src/ecalc_cli/main.py +++ b/src/ecalc_cli/main.py @@ -7,7 +7,7 @@ from ecalc_cli.commands.run import run from ecalc_cli.commands.selftest import selftest from ecalc_cli.logger import CLILogConfigurator, LogLevel, logger -from libecalc.presentation.yaml.model import ModelValidationException +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException from libecalc.presentation.yaml.validation_errors import DataValidationError app = typer.Typer(name="ecalc") diff --git a/src/libecalc/common/errors/exceptions.py b/src/libecalc/common/errors/exceptions.py index b282144efe..08b13cc3cd 100644 --- a/src/libecalc/common/errors/exceptions.py +++ b/src/libecalc/common/errors/exceptions.py @@ -74,7 +74,7 @@ def __init__(self, message: str): class InvalidDateException(EcalcError): ... -class InvalidResource(EcalcError): +class InvalidResourceException(EcalcError): """ Base exception for resource """ @@ -82,7 +82,12 @@ class InvalidResource(EcalcError): pass -class HeaderNotFound(InvalidResource): +class InvalidHeaderException(InvalidResourceException): + def __init__(self, message: str): + super().__init__("Invalid header", message, error_type=EcalcErrorType.CLIENT_ERROR) + + +class HeaderNotFoundException(InvalidResourceException): """Resource is missing header.""" def __init__(self, header: str): @@ -90,7 +95,7 @@ def __init__(self, header: str): super().__init__("Missing header(s)", f"Header '{header}' not found", error_type=EcalcErrorType.CLIENT_ERROR) -class ColumnNotFound(InvalidResource): +class ColumnNotFoundException(InvalidResourceException): """Resource is missing column""" def __init__(self, header: str): @@ -98,3 +103,20 @@ def __init__(self, header: str): super().__init__( "Missing column", f"Column matching header '{header}' is missing.", error_type=EcalcErrorType.CLIENT_ERROR ) + + +class InvalidColumnException(InvalidResourceException): + def __init__(self, header: str, message: str, row: int = None): + self.header = header + self.row = row + super().__init__( + "Invalid column", + message.format(header=header, row=row), + ) + + +class NoColumnsException(InvalidResourceException): + """Resource contains no columns""" + + def __init__(self): + super().__init__("No columns", "The resource contains no columns, it should have at least one.") diff --git a/src/libecalc/common/string/string_utils.py b/src/libecalc/common/string/string_utils.py index 0f6b7bcf95..9940110cf8 100644 --- a/src/libecalc/common/string/string_utils.py +++ b/src/libecalc/common/string/string_utils.py @@ -1,7 +1,9 @@ -from typing import Iterable, Set +from typing import Hashable, Iterable, Set, TypeVar +TItem = TypeVar("TItem", bound=Hashable) -def get_duplicates(names: Iterable[str]) -> Set[str]: + +def get_duplicates(names: Iterable[TItem]) -> Set[TItem]: seen = set() duplicates = set() for name in names: diff --git a/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py b/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py index 80456de088..c8d1591d17 100644 --- a/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py +++ b/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py @@ -1,16 +1,33 @@ +from io import StringIO from pathlib import Path +from typing import Dict, Optional, cast import pytest -import yaml from ecalc_cli.infrastructure.file_resource_service import FileResourceService from libecalc.common.time_utils import Frequency -from libecalc.dto import ResultOptions from libecalc.expression.expression import ExpressionType from libecalc.fixtures.case_types import DTOCase -from libecalc.presentation.yaml.mappers.variables_mapper import map_yaml_to_variables -from libecalc.presentation.yaml.parse_input import map_yaml_to_dto -from libecalc.presentation.yaml.yaml_models.pyyaml_yaml_model import PyYamlYamlModel +from libecalc.presentation.yaml.configuration_service import ConfigurationService +from libecalc.presentation.yaml.model import YamlModel +from libecalc.presentation.yaml.yaml_entities import ResourceStream +from libecalc.presentation.yaml.yaml_models.yaml_model import ReaderType, YamlConfiguration, YamlValidator + + +class OverridableStreamConfigurationService(ConfigurationService): + def __init__(self, stream: ResourceStream, overrides: Optional[Dict] = None): + self._overrides = overrides + self._stream = stream + + def get_configuration(self) -> YamlValidator: + main_yaml_model = YamlConfiguration.Builder.get_yaml_reader(ReaderType.PYYAML).read( + main_yaml=self._stream, + enable_include=True, + ) + + if self._overrides is not None: + main_yaml_model._internal_datamodel.update(self._overrides) + return cast(YamlValidator, main_yaml_model) @pytest.fixture @@ -78,26 +95,17 @@ def _ltp_pfs_yaml_factory( """ - yaml_text = yaml.safe_load(input_text) - configuration = PyYamlYamlModel( - internal_datamodel=yaml_text, - name="ltp_export", - instantiated_through_read=True, + configuration_service = OverridableStreamConfigurationService( + stream=ResourceStream(name="ltp_export", stream=StringIO(input_text)) ) + resource_service = FileResourceService(working_directory=path) - path = path - - resources = FileResourceService._read_resources(configuration=configuration, working_directory=path) - variables = map_yaml_to_variables( - configuration, - resources=resources, - result_options=ResultOptions( - start=configuration.start, - end=configuration.end, - output_frequency=Frequency.YEAR, - ), + model = YamlModel( + configuration_service=configuration_service, + resource_service=resource_service, + output_frequency=Frequency.YEAR, ) - yaml_model = map_yaml_to_dto(configuration=configuration, resources=resources) - return DTOCase(ecalc_model=yaml_model, variables=variables) + + return DTOCase(ecalc_model=model.dto, variables=model.variables) return _ltp_pfs_yaml_factory diff --git a/src/libecalc/infrastructure/file_io.py b/src/libecalc/infrastructure/file_io.py index d2cd7bf5cc..c7011e477a 100644 --- a/src/libecalc/infrastructure/file_io.py +++ b/src/libecalc/infrastructure/file_io.py @@ -17,7 +17,7 @@ from libecalc.common.errors.exceptions import ( EcalcError, EcalcErrorType, - HeaderNotFound, + InvalidHeaderException, ) from libecalc.common.logger import logger from libecalc.presentation.yaml.yaml_entities import MemoryResource, YamlTimeseriesType @@ -399,7 +399,7 @@ def _validate_headers(headers: List[str]): "[ _ - # + : . , /] " ) elif re.match(r"^Unnamed: \d+$", header): - raise HeaderNotFound(header=header) + raise InvalidHeaderException(message="One or more headers are missing in resource") def _validate_not_nan(columns: List[List]): diff --git a/src/libecalc/presentation/yaml/domain/time_series.py b/src/libecalc/presentation/yaml/domain/time_series.py new file mode 100644 index 0000000000..d976895b3a --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series.py @@ -0,0 +1,106 @@ +from datetime import datetime +from operator import itemgetter +from typing import List, Tuple + +from scipy.interpolate import interp1d +from typing_extensions import Self + +from libecalc.common.list.list_utils import transpose +from libecalc.dto.types import InterpolationType + + +class TimeSeries: + def __init__( + self, + reference_id: str, + time_vector: List[datetime], + series: List[float], + extrapolate: bool, + interpolation_type: InterpolationType, + ): + self.reference_id = reference_id + self.time_vector = time_vector + self.series = series + self._extrapolate = extrapolate + self._interpolation_type = interpolation_type + + @staticmethod + def _get_interpolation_kind(rate_interpolation_type: InterpolationType) -> str: + if rate_interpolation_type == InterpolationType.LINEAR: + return "linear" + elif rate_interpolation_type == InterpolationType.RIGHT: + return "previous" + elif rate_interpolation_type == InterpolationType.LEFT: + return "next" + else: + raise ValueError(f"Invalid interpolation type, got {rate_interpolation_type}.") + + def _interpolate(self, time_vector: List[datetime], rate_interpolation_type: InterpolationType) -> List[float]: + interpolation_kind = self._get_interpolation_kind( + rate_interpolation_type=rate_interpolation_type, + ) + + start_time = self.time_vector[0] + + setup_times: List[float] + if len(self.time_vector) == 1: + # add dummy time 1 second later + setup_times = [0, 1] + setup_y = 2 * self.series + else: + # Interpolator x variable is number of seconds from first date time + setup_times = [(time - start_time).total_seconds() for time in self.time_vector] + setup_y = self.series + + interpolator = interp1d(x=setup_times, y=setup_y, kind=interpolation_kind) + target_times = [(time - start_time).total_seconds() for time in time_vector] + return list(interpolator(target_times)) + + def fit_to_time_vector( + self, + time_vector: List[datetime], + ) -> Self: + start, end = self.time_vector[0], self.time_vector[-1] + number_of_entries_before, entries_between, number_of_entries_after = split_time_vector( + time_vector, start=start, end=end + ) + + if self._extrapolate: + extrapolation_after_value = self.series[-1] + else: + extrapolation_after_value = 0.0 + + before_values = [0.0] * number_of_entries_before + between_values = self._interpolate( + time_vector=entries_between, rate_interpolation_type=self._interpolation_type + ) + after_values = [extrapolation_after_value] * number_of_entries_after + + return self.__class__( + reference_id=self.reference_id, + time_vector=time_vector, + series=[*before_values, *between_values, *after_values], + extrapolate=self._extrapolate, + interpolation_type=self._interpolation_type, + ) + + def sort(self) -> Self: + sort_columns = [self.time_vector, self.series] + sort_rows = transpose(sort_columns) + sorted_rows = sorted(sort_rows, key=itemgetter(0)) + sorted_columns = transpose(sorted_rows) + self.time_vector = sorted_columns[0] + self.series = sorted_columns[1] + return self + + +def split_time_vector( + time_vector: List[datetime], + start: datetime, + end: datetime, +) -> Tuple[int, List[datetime], int]: + """Find the entries between start and end, also counting the number of entries before start and after end.""" + number_of_entries_before = len([date for date in time_vector if date < start]) + number_of_entries_after = len([date for date in time_vector if date > end]) + entries_between = [date for date in time_vector if start <= date <= end] + return number_of_entries_before, entries_between, number_of_entries_after diff --git a/src/libecalc/presentation/yaml/domain/time_series_collection.py b/src/libecalc/presentation/yaml/domain/time_series_collection.py new file mode 100644 index 0000000000..78876bd28a --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_collection.py @@ -0,0 +1,99 @@ +from datetime import datetime +from typing import List + +from typing_extensions import Self, assert_never + +from libecalc.common.errors.exceptions import InvalidResourceException +from libecalc.dto.types import InterpolationType +from libecalc.presentation.yaml.domain.time_series import TimeSeries +from libecalc.presentation.yaml.domain.time_series_exceptions import TimeSeriesNotFound +from libecalc.presentation.yaml.domain.time_series_provider import TimeSeriesProvider +from libecalc.presentation.yaml.domain.time_series_resource import TimeSeriesResource +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException +from libecalc.presentation.yaml.resource import Resource +from libecalc.presentation.yaml.validation_errors import Location, ModelValidationError +from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords +from libecalc.presentation.yaml.yaml_types.time_series.yaml_time_series import ( + YamlDefaultTimeSeriesCollection, + YamlMiscellaneousTimeSeriesCollection, + YamlTimeSeriesCollection, +) + + +class TimeSeriesCollection(TimeSeriesProvider): + """ + TimeSeriesCollection is a collection of time series (TimeSeriesResource) and common properties for all the time + series in the collection. + """ + + def __init__( + self, + name: str, + resource: TimeSeriesResource, + interpolation: InterpolationType, + extrapolation: bool, + influence_time_vector: bool, + ): + self.name = name + self._resource = resource + self._interpolation = interpolation + self._extrapolation = extrapolation + self._influence_time_vector = influence_time_vector + + def should_influence_time_vector(self) -> bool: + return self._influence_time_vector + + def get_time_vector(self) -> List[datetime]: + return self._resource.get_time_vector() + + def get_time_series_references(self) -> List[str]: + return self._resource.get_headers() + + def get_time_series(self, time_series_id: str) -> TimeSeries: + try: + return TimeSeries( + reference_id=f"{self.name};{time_series_id}", + time_vector=self.get_time_vector(), + series=self._resource.get_column(time_series_id), + extrapolate=self._extrapolation, + interpolation_type=self._interpolation, + ).sort() + except InvalidResourceException as e: + raise TimeSeriesNotFound( + f"Unable to find time series with reference '{time_series_id}' in collection '{self.name}'" + ) from e + + @classmethod + def from_yaml(cls, resource: Resource, yaml_collection: YamlTimeSeriesCollection) -> Self: + try: + time_series_resource = TimeSeriesResource(resource) + time_series_resource.validate() + + if isinstance(yaml_collection, YamlDefaultTimeSeriesCollection): + interpolation = InterpolationType.RIGHT + extrapolation = False + elif isinstance(yaml_collection, YamlMiscellaneousTimeSeriesCollection): + interpolation = InterpolationType[yaml_collection.interpolation_type] + extrapolation = yaml_collection.extrapolation if yaml_collection.extrapolation is not None else False + else: + assert_never(yaml_collection) + return cls( + name=yaml_collection.name, + resource=time_series_resource, + interpolation=interpolation, + extrapolation=extrapolation, + influence_time_vector=yaml_collection.influence_time_vector, + ) + + except InvalidResourceException as e: + # Catch validation when initializing TimeSeriesResource + raise ModelValidationException( + errors=[ + ModelValidationError( + data=yaml_collection.model_dump(), + location=Location(keys=[EcalcYamlKeywords.file]), + message=str(e), + file_context=None, + ) + ], + ) from e diff --git a/src/libecalc/presentation/yaml/domain/time_series_collections.py b/src/libecalc/presentation/yaml/domain/time_series_collections.py new file mode 100644 index 0000000000..dc058a8c28 --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_collections.py @@ -0,0 +1,74 @@ +from datetime import datetime +from typing import Dict, List, Set + +from libecalc.presentation.yaml.domain.time_series import TimeSeries +from libecalc.presentation.yaml.domain.time_series_collection import TimeSeriesCollection +from libecalc.presentation.yaml.domain.time_series_exceptions import TimeSeriesNotFound +from libecalc.presentation.yaml.domain.time_series_provider import TimeSeriesProvider +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException +from libecalc.presentation.yaml.resource import Resource +from libecalc.presentation.yaml.validation_errors import Location, ModelValidationError +from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords +from libecalc.presentation.yaml.yaml_types.time_series.yaml_time_series import YamlTimeSeriesCollection + + +class TimeSeriesCollections(TimeSeriesProvider): + """ + TimeSeriesCollections keeps several TimeSeriesCollection classes and can provide info about those, such as all time + steps in all collections. + """ + + def __init__(self, time_series: List[YamlTimeSeriesCollection], resources: Dict[str, Resource]): + time_series_collections: Dict[str, TimeSeriesCollection] = {} + errors: List[ModelValidationError] = [] + for time_series_collection in time_series: + resource = resources[time_series_collection.file] + try: + time_series_collections[time_series_collection.name] = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=time_series_collection, + ) + except ModelValidationException as e: + # Catch validation when initializing TimeSeriesResource + errors.extend( + [ + ModelValidationError( + data=error.data, + location=Location( + keys=[EcalcYamlKeywords.time_series, time_series_collection.name, *error.location.keys] + ), + message=error.message, + file_context=error.file_context, + ) + for error in e.errors() + ] + ) + if len(errors) != 0: + raise ModelValidationException(errors=errors) + + self._time_series_collections = time_series_collections + + def get_time_series_references(self) -> List[str]: + time_series_references = [] + for collection in self._time_series_collections.values(): + for time_series_reference in collection.get_time_series_references(): + time_series_references.append(f"{collection.name};{time_series_reference}") + return time_series_references + + def get_time_series(self, time_series_id: str) -> TimeSeries: + reference_id_parts = time_series_id.split(";") + if len(reference_id_parts) != 2: + raise TimeSeriesNotFound(time_series_id) + [collection_id, time_series_id] = reference_id_parts + + if collection_id not in self._time_series_collections: + raise TimeSeriesNotFound(time_series_id) + + return self._time_series_collections[collection_id].get_time_series(time_series_id) + + def get_time_vector(self) -> Set[datetime]: + time_vector: Set[datetime] = set() + for time_series_collection in self._time_series_collections.values(): + if time_series_collection.should_influence_time_vector(): + time_vector = time_vector.union(time_series_collection.get_time_vector()) + return time_vector diff --git a/src/libecalc/presentation/yaml/domain/time_series_exceptions.py b/src/libecalc/presentation/yaml/domain/time_series_exceptions.py new file mode 100644 index 0000000000..3f172de155 --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_exceptions.py @@ -0,0 +1,9 @@ +from libecalc.common.errors.exceptions import EcalcError + + +class TimeSeriesNotFound(EcalcError): + def __init__(self, time_series_reference: str, message: str = None): + if message is None: + message = f"Unable to find time series with reference '{time_series_reference}'" + + super().__init__("Time series not found", message) diff --git a/src/libecalc/presentation/yaml/domain/time_series_provider.py b/src/libecalc/presentation/yaml/domain/time_series_provider.py new file mode 100644 index 0000000000..ba98e45120 --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_provider.py @@ -0,0 +1,9 @@ +from typing import List, Protocol + +from libecalc.presentation.yaml.domain.time_series import TimeSeries + + +class TimeSeriesProvider(Protocol): + def get_time_series(self, time_series_id: str) -> TimeSeries: ... + + def get_time_series_references(self) -> List[str]: ... diff --git a/src/libecalc/presentation/yaml/domain/time_series_resource.py b/src/libecalc/presentation/yaml/domain/time_series_resource.py new file mode 100644 index 0000000000..6d90af19e6 --- /dev/null +++ b/src/libecalc/presentation/yaml/domain/time_series_resource.py @@ -0,0 +1,148 @@ +import re +from datetime import datetime +from math import isnan +from typing import Iterable, List, Union + +from pandas.errors import ParserError +from typing_extensions import Self + +from libecalc.common.errors.exceptions import ( + InvalidColumnException, + InvalidHeaderException, + InvalidResourceException, + NoColumnsException, +) +from libecalc.common.string.string_utils import get_duplicates +from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection_mapper import parse_time_vector +from libecalc.presentation.yaml.resource import Resource +from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords + + +class InvalidTimeSeriesResourceException(InvalidResourceException): + def __init__(self, message): + super().__init__("Invalid time series resource", message) + + +class EmptyTimeVectorException(InvalidTimeSeriesResourceException): + def __init__(self): + super().__init__("The time vector is empty") + + +class DuplicateDatesException(InvalidTimeSeriesResourceException): + def __init__(self, duplicates: Iterable[datetime]): + self.duplicates = duplicates + super().__init__(f"The time series resource contains duplicate dates: {','.join(map(str, duplicates))}") + + +def _is_header_valid(header: str) -> bool: + return bool(re.match(r"^[A-Za-z][A-Za-z0-9_.,\-\s#+:\/]*$", header)) + + +class TimeSeriesResource(Resource): + """ + A time series resource containing time series + """ + + def __init__(self, resource: Resource): + self._resource = resource + headers = resource.get_headers() + + if len(headers) == 0: + raise InvalidResourceException("Invalid resource", "Resource must at least have one column") + + for header in headers: + if not _is_header_valid(header): + raise InvalidHeaderException( + "The time series resource header contains illegal characters. " + "Allowed characters are: ^[A-Za-z][A-Za-z0-9_.,\\-\\s#+:\\/]*$" + ) + + if EcalcYamlKeywords.date in headers: + # Find the column named "DATE" and use that as time vector + time_vector = resource.get_column(EcalcYamlKeywords.date) + headers = [header for header in headers if header != EcalcYamlKeywords.date] + else: + # Legacy: support random names for time vector as long as it is the first column + time_vector = resource.get_column(headers[0]) + headers = headers[1:] + + try: + if not all(isinstance(time, (int, str)) for time in time_vector): + # time_vector may be a list of floats for example. + # This might happen if the resource contains an extra comma only in a single row. + raise InvalidTimeSeriesResourceException("could not parse time vector.") + self._time_vector = parse_time_vector(time_vector) + except (ParserError, ValueError) as e: + # pandas.to_datetime might raise these two exceptions. + # See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html + raise InvalidTimeSeriesResourceException("could not parse time vector.") from e + + self._headers = headers + + def _validate_time_vector(self) -> None: + if len(self._time_vector) == 0: + raise EmptyTimeVectorException() + duplicates = get_duplicates(self._time_vector) + if len(duplicates) != 0: + raise DuplicateDatesException(duplicates=duplicates) + + def _validate_columns(self): + headers = self.get_headers() + columns = [self.get_column(header) for header in headers] + time_vector = self.get_time_vector() + + time_vector_length = len(time_vector) + headers_length = len(headers) + + if headers_length == 0: + raise NoColumnsException() + + number_of_rows = len(columns[0]) + + if number_of_rows == 0: + raise InvalidResourceException("No rows in resource", "The resource should have at least one row.") + + if not (number_of_rows == time_vector_length): + raise InvalidResourceException( + "Rows mismatch", + f"The number of records for times and data do not match: data: {number_of_rows}, time_vector: {time_vector_length}", + ) + + for column, header in zip(columns, headers): + if len(column) != time_vector_length: + raise InvalidColumnException( + header=header, + message="Column '{header}' does not match the length of the time vector.", + ) + + for index, value in enumerate(column): + row = index + 1 + if not isinstance(value, (float, int)): + raise InvalidColumnException( + header=header, + row=row, + message="The timeseries column '{header}' contains non-numeric values in row {row}.", + ) + if isnan(value): + raise InvalidColumnException( + header=header, + row=row, + message="The timeseries column '{header}' contains empty values in row {row}.", + ) + + def validate(self) -> Self: + self._validate_time_vector() + + self._validate_columns() + + return self + + def get_time_vector(self) -> List[datetime]: + return self._time_vector + + def get_headers(self) -> List[str]: + return self._headers + + def get_column(self, header: str) -> List[Union[float, int, str]]: + # TODO: Add validation on column so that we can remove 'str' from return type + return self._resource.get_column(header) diff --git a/src/libecalc/presentation/yaml/mappers/facility_input.py b/src/libecalc/presentation/yaml/mappers/facility_input.py index ee2248c99e..35cb2e9b07 100644 --- a/src/libecalc/presentation/yaml/mappers/facility_input.py +++ b/src/libecalc/presentation/yaml/mappers/facility_input.py @@ -3,7 +3,7 @@ from pydantic import TypeAdapter, ValidationError from libecalc import dto -from libecalc.common.errors.exceptions import InvalidResource +from libecalc.common.errors.exceptions import InvalidResourceException from libecalc.dto import CompressorSampled as CompressorTrainSampledDTO from libecalc.dto import GeneratorSetSampled, TabulatedData from libecalc.dto.types import ChartType, EnergyModelType, EnergyUsageType @@ -49,7 +49,7 @@ def _get_adjustment_factor(data: Dict) -> float: def _get_column_or_none(resource: Resource, header: str) -> Optional[List[Union[float, int, str]]]: try: return resource.get_column(header) - except InvalidResource: + except InvalidResourceException: return None @@ -221,7 +221,7 @@ def from_yaml_to_dto(self, data: Dict) -> dto.EnergyModel: error_key=vve.key, dump_flow_style=DumpFlowStyle.BLOCK, ) from vve - except InvalidResource as e: + except InvalidResourceException as e: message = f"Invalid resource '{resource_name}'. Reason: {str(e)}" raise DataValidationError( diff --git a/src/libecalc/presentation/yaml/mappers/utils.py b/src/libecalc/presentation/yaml/mappers/utils.py index cee9f2fa27..efdb65dfc2 100644 --- a/src/libecalc/presentation/yaml/mappers/utils.py +++ b/src/libecalc/presentation/yaml/mappers/utils.py @@ -3,7 +3,7 @@ import pandas as pd -from libecalc.common.errors.exceptions import HeaderNotFound, InvalidReferenceException +from libecalc.common.errors.exceptions import HeaderNotFoundException, InvalidReferenceException from libecalc.common.logger import logger from libecalc.common.units import Unit from libecalc.dto.types import ( @@ -269,7 +269,7 @@ def get_single_speed_chart_data(resource: Resource, resource_name: str) -> Chart ) # Get first speed, all are equal. speed = speed_values[0] - except HeaderNotFound: + except HeaderNotFoundException: logger.debug(f"Speed not specified for single speed chart {resource_name}, setting speed to 1.") speed = 1 diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/get_global_time_vector.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/get_global_time_vector.py new file mode 100644 index 0000000000..0e2ec681ba --- /dev/null +++ b/src/libecalc/presentation/yaml/mappers/variables_mapper/get_global_time_vector.py @@ -0,0 +1,89 @@ +from datetime import datetime, timedelta +from typing import Iterable, List, Optional, Set + +import pandas as pd + +import libecalc.common.time_utils +from libecalc.presentation.yaml.validation_errors import ValidationError + + +def _get_date_range(start: datetime, end: datetime, frequency: libecalc.common.time_utils.Frequency) -> Set[datetime]: + if frequency == libecalc.common.time_utils.Frequency.NONE: + return set() + + date_range = pd.date_range(start=start, end=end, freq=frequency.value) + return set(date_range.to_pydatetime()) + + +def _get_end_boundary(frequency: libecalc.common.time_utils.Frequency, time_vector_set: Set[datetime]) -> datetime: + """If end boundary has not been specified explicitly, we attempt to make an educated guess for the + user, based on output frequency provided and assuming data is forward filled. + + It is however recommended that the user specified END explicitly + """ + time_vector: List[datetime] = sorted(time_vector_set) + + if frequency == libecalc.common.time_utils.Frequency.YEAR: + return datetime(year=time_vector[-1].year + 1, month=1, day=1) + elif frequency == libecalc.common.time_utils.Frequency.MONTH: + return (time_vector[-1].replace(day=1) + timedelta(days=31)).replace(day=1) + elif frequency == libecalc.common.time_utils.Frequency.DAY: + return time_vector[-1] + timedelta(days=1) + else: + return max( + time_vector + ) # Frequency.NONE . We are clueless and user does not help us, just fallback to last time given + + +def get_global_time_vector( + time_series_time_vector: Iterable[datetime], + start: Optional[datetime] = None, + end: Optional[datetime] = None, + additional_dates: Optional[Set[datetime]] = None, + frequency: libecalc.common.time_utils.Frequency = libecalc.common.time_utils.Frequency.NONE, +) -> List[datetime]: + """ + + Args: + time_series_time_vector: all dates from time series that should influence time vector + start: user specified start + end: user specified end + additional_dates: dates from the model configuration + frequency: user specified frequency + + Returns: the actual set of dates that should be computed + """ + time_vector: Set[datetime] = set(time_series_time_vector) + + has_time_vector = len(time_vector) > 0 + has_start = start is not None + has_end = end is not None + has_frequency = frequency != libecalc.common.time_utils.Frequency.NONE + if not (has_time_vector or (has_start and has_end) or (has_start and has_frequency)): + raise ValidationError("No time series found, please provide one or specify a start and end (or frequency).") + + # Store start, end before adding dates from yaml. This is to make sure dates in yaml are trimmed. + start = start or min(time_vector) + + # Add start + time_vector.add(start) + + if not end: + end = _get_end_boundary(frequency=frequency, time_vector_set=time_vector) + + # Add end + time_vector.add(end) + + # Add all dates specified in yaml + time_vector = time_vector.union(additional_dates or set()) + + # Trim time vector based on start + time_vector = {date for date in time_vector if date >= start} + + # Trim time vector based on end + time_vector = {date for date in time_vector if date <= end} + + # Add all dates for frequency + time_vector = time_vector.union(_get_date_range(start=start, end=end, frequency=frequency)) + + return sorted(time_vector) diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series.py deleted file mode 100644 index dd86152c7e..0000000000 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series.py +++ /dev/null @@ -1,8 +0,0 @@ -from datetime import datetime -from typing import List, NamedTuple - - -class TimeSeries(NamedTuple): - reference_id: str - time_vector: List[datetime] - series: List[float] diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection.py deleted file mode 100644 index 5bd4498909..0000000000 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection.py +++ /dev/null @@ -1,161 +0,0 @@ -from datetime import datetime -from math import isnan -from typing import List, Literal, Optional, Tuple, Union - -from pydantic import ConfigDict, Field, field_validator, model_validator -from typing_extensions import Annotated - -from libecalc.dto.base import EcalcBaseModel -from libecalc.dto.types import InterpolationType, TimeSeriesType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series import TimeSeries - - -def transpose(data: List[List]) -> List[List]: - return list(map(list, zip(*data))) - - -def _sort_time_series_data( - time_vector: List[Union[datetime]], - columns: List[List], -) -> Tuple[List[Union[datetime]], List[List]]: - timeseries_columns = [time_vector, *columns] - timeseries_rows = transpose(timeseries_columns) - sorted_timeseries_rows = sorted(timeseries_rows, key=lambda row: row[0]) - sorted_timeseries_columns = transpose(sorted_timeseries_rows) - return sorted_timeseries_columns[0], sorted_timeseries_columns[1:] - - -class TimeSeriesCollection(EcalcBaseModel): - typ: TimeSeriesType - name: str = Field(pattern=r"^[A-Za-z][A-Za-z0-9_]*$") - - headers: List[Annotated[str, Field(pattern=r"^[A-Za-z][A-Za-z0-9_.,\-\s#+:\/]*$")]] = Field( - default_factory=list - ) # Does not include date header - columns: List[List[float]] = Field(default_factory=list) - time_vector: List[datetime] = Field(default_factory=list) - - influence_time_vector: Optional[bool] = True - extrapolate_outside_defined_time_interval: Optional[bool] = None - interpolation_type: InterpolationType = None - model_config = ConfigDict(validate_default=True) - - @field_validator("influence_time_vector") - @classmethod - def set_influence_time_vector_default(cls, value): - return value if value is not None else True - - @field_validator("extrapolate_outside_defined_time_interval") - @classmethod - def set_extrapolate_outside_defined_time_interval_default(cls, value): - return value if value is not None else False - - @field_validator("time_vector") - @classmethod - def check_that_dates_are_ok(cls, dates): - if len(dates) == 0: - raise ValueError("Time vectors must have at least one record") - if not (len(dates) == len(set(dates))): - raise ValueError("The list of dates have duplicates. Duplicated dates are currently not supported.") - return dates - - @model_validator(mode="after") - def check_that_lists_match(self): - headers = self.headers - columns = self.columns - time_vector = self.time_vector - - time_vector_length = len(time_vector) - headers_length = len(headers) - - if headers_length == 0: - raise ValueError("Headers must at least have one column") - - number_of_columns = len(columns) - - if number_of_columns == 0: - raise ValueError("Data vector must at least have one column") - - if not (headers_length == number_of_columns): - raise ValueError( - f"The number of columns provided do not match for header and data: data: {number_of_columns}, headers: {headers_length}" - ) - - number_of_rows = len(columns[0]) - - if number_of_rows == 0: - raise ValueError("Data must have at least one record") - - if not (number_of_rows == time_vector_length): - raise ValueError( - f"The number of records for times and data do not match: data: {number_of_rows}, time_vector: {time_vector_length}" - ) - - sorted_time_vector, sorted_columns = _sort_time_series_data(time_vector, columns) - self.time_vector = sorted_time_vector - self.columns = sorted_columns - return self - - @model_validator(mode="after") - def check_that_columns_are_ok(self): - headers = self.headers - - if headers is None or self.columns is None: - return self.columns - - for column, header in zip(self.columns, headers): - for value in column: - if isnan(value): - reference_id = f"{self.name};{header}" - raise ValueError( - f"The timeseries column '{reference_id}' contains empty values. " - f"Please check your file for missing data, each column should define values for all timesteps.", - ) - - return self - - @property - def time_series(self): - return [ - TimeSeries( - reference_id=f"{self.name};{header}", - time_vector=self.time_vector, - series=column, - ) - for header, column in zip(self.headers, self.columns) - ] - - -class MiscellaneousTimeSeriesCollection(TimeSeriesCollection): - typ: Literal[TimeSeriesType.MISCELLANEOUS] = TimeSeriesType.MISCELLANEOUS.value - - @field_validator("interpolation_type", mode="before") - @classmethod - def interpolation_is_required(cls, value): - if value is None: - raise ValueError("interpolation_type must be specified for the MISCELLANEOUS time series type.") - return value - - -class DefaultTimeSeriesCollection(TimeSeriesCollection): - typ: Literal[TimeSeriesType.DEFAULT] = TimeSeriesType.DEFAULT.value - - @field_validator("extrapolate_outside_defined_time_interval", mode="before") - @classmethod - def extrapolate_outside_defined_time_interval_cannot_be_set(cls, value): - if value is not None: - raise ValueError( - "extrapolate_outside_defined_time_interval cannot be set on " - "DEFAULT-type (since DEFAULT-models should not be possible to extrapolate)." - ) - - return value - - @field_validator("interpolation_type", mode="before") - def set_default_interpolation_type(cls, value): - if value is not None: - raise ValueError( - "interpolation_type cannot be set on DEFAULT-type " - "(since DEFAULT-models can only have RIGHT interpolation)." - ) - return InterpolationType.RIGHT diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection_mapper.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection_mapper.py index 188709608e..dc0f30affa 100644 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection_mapper.py +++ b/src/libecalc/presentation/yaml/mappers/variables_mapper/time_series_collection_mapper.py @@ -1,39 +1,13 @@ import re from datetime import datetime -from typing import Dict, List, Union +from typing import List, Union import pandas -from pydantic import Field, TypeAdapter, ValidationError -from typing_extensions import Annotated -from libecalc.common.errors.exceptions import InvalidResource -from libecalc.dto import TimeSeriesType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection import ( - DefaultTimeSeriesCollection, - MiscellaneousTimeSeriesCollection, -) -from libecalc.presentation.yaml.resource import Resource, Resources -from libecalc.presentation.yaml.validation_errors import ( - DataValidationError, - DtoValidationError, - DumpFlowStyle, -) -from libecalc.presentation.yaml.yaml_entities import ( - YamlTimeseriesType, -) +from libecalc.common.errors.exceptions import InvalidResourceException +from libecalc.presentation.yaml.resource import Resource from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords -# Used here to make pydantic understand which object to instantiate. -TimeSeriesUnionType = Annotated[ - Union[MiscellaneousTimeSeriesCollection, DefaultTimeSeriesCollection], - Field(discriminator="typ"), -] - -time_series_type_map = { - YamlTimeseriesType.MISCELLANEOUS.value: TimeSeriesType.MISCELLANEOUS, - YamlTimeseriesType.DEFAULT.value: TimeSeriesType.DEFAULT, -} - def _parse_date(date_input: Union[int, str]) -> datetime: """ @@ -54,7 +28,7 @@ def _parse_date(date_input: Union[int, str]) -> datetime: return pandas.to_datetime(date_input, dayfirst=True).to_pydatetime() -def parse_time_vector(time_vector: List[str]) -> List[datetime]: +def parse_time_vector(time_vector: List[Union[int, str]]) -> List[datetime]: return [_parse_date(date_input) for date_input in time_vector] @@ -62,7 +36,7 @@ def parse_time_series_from_resource(resource: Resource): time_series_resource_headers = resource.get_headers() if len(time_series_resource_headers) == 0: - raise InvalidResource("Invalid resource", "Resource must at least have one column") + raise InvalidResourceException("Invalid resource", "Resource must at least have one column") if EcalcYamlKeywords.date in time_series_resource_headers: # Find the column named "DATE" and use that as time vector @@ -74,67 +48,3 @@ def parse_time_series_from_resource(resource: Resource): headers = time_series_resource_headers[1:] return parse_time_vector(time_vector), headers - - -class TimeSeriesCollectionMapper: - def __init__(self, resources: Resources): - self.__resources = resources - - def from_yaml_to_dto(self, data: Dict) -> TimeSeriesUnionType: - """ - Fixme: we do not use the input date format when reading Time Series Collections. - """ - - time_series = { - "typ": data.get(EcalcYamlKeywords.type), - "name": data.get(EcalcYamlKeywords.name), - "influence_time_vector": data.get(EcalcYamlKeywords.time_series_influence_time_vector), - "extrapolate_outside_defined_time_interval": data.get( - EcalcYamlKeywords.time_series_extrapolate_outside_defined - ), - "interpolation_type": data.get(EcalcYamlKeywords.time_series_interpolation_type), - } - - resource_name = data.get(EcalcYamlKeywords.file) - time_series_resource = self.__resources.get( - resource_name, - ) - - if time_series_resource is None: - resource_name_context = "." - if resource_name is not None: - resource_name_context = f" with name '{resource_name}'" - raise DataValidationError( - data, - message=f"Could not find resource{resource_name_context}", - error_key=EcalcYamlKeywords.file, - dump_flow_style=DumpFlowStyle.BLOCK, - ) - - try: - time_vector, headers = parse_time_series_from_resource(time_series_resource) - except InvalidResource as e: - raise DataValidationError( - data, - message=str(e), - error_key=EcalcYamlKeywords.file, - dump_flow_style=DumpFlowStyle.BLOCK, - ) from e - - columns = [] - - for header in headers: - try: - columns.append(time_series_resource.get_column(header)) - except InvalidResource: - # Validation handled below when creating TimeSeries class - pass - - time_series["headers"] = headers - time_series["time_vector"] = time_vector - time_series["columns"] = columns - - try: - return TypeAdapter(TimeSeriesUnionType).validate_python(time_series) - except ValidationError as e: - raise DtoValidationError(data=data, validation_error=e, dump_flow_style=DumpFlowStyle.BLOCK) from e diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/timeseries_utils.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/timeseries_utils.py deleted file mode 100644 index f7013747c5..0000000000 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/timeseries_utils.py +++ /dev/null @@ -1,160 +0,0 @@ -from datetime import datetime, timedelta -from typing import List, Optional, Set, Tuple - -import pandas as pd -from scipy.interpolate import interp1d - -import libecalc.common.time_utils -from libecalc.dto.types import InterpolationType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series import TimeSeries -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection import ( - TimeSeriesCollection, -) -from libecalc.presentation.yaml.validation_errors import ValidationError - - -def _split_time_vector( - time_vector: List[datetime], - start: datetime, - end: datetime, -) -> Tuple[int, List[datetime], int]: - """Find the entries between start and end, also counting the number of entries before start and after end.""" - number_of_entries_before = len([date for date in time_vector if date < start]) - number_of_entries_after = len([date for date in time_vector if date > end]) - entries_between = [date for date in time_vector if start <= date <= end] - return number_of_entries_before, entries_between, number_of_entries_after - - -def _get_interpolation_kind(rate_interpolation_type: InterpolationType) -> str: - if rate_interpolation_type == InterpolationType.LINEAR: - return "linear" - elif rate_interpolation_type == InterpolationType.RIGHT: - return "previous" - elif rate_interpolation_type == InterpolationType.LEFT: - return "next" - else: - raise ValueError(f"Invalid interpolation typem, got {rate_interpolation_type}.") - - -def _interpolate( - time_series: TimeSeries, time_vector: List[datetime], rate_interpolation_type: InterpolationType -) -> List[float]: - interpolation_kind = _get_interpolation_kind( - rate_interpolation_type=rate_interpolation_type, - ) - - start_time = time_series.time_vector[0] - - if len(time_series.time_vector) == 1: - # add dummy time 1 second later - setup_times = [0, 1] - setup_y = 2 * time_series.series - else: - # Interpolator x variable is number of seconds from first date time - setup_times = [(time - start_time).total_seconds() for time in time_series.time_vector] - setup_y = time_series.series - - interpolator = interp1d(x=setup_times, y=setup_y, kind=interpolation_kind) - target_times = [(time - start_time).total_seconds() for time in time_vector] - return list(interpolator(target_times)) - - -def fit_time_series_to_time_vector( - time_series: TimeSeries, - time_vector: List[datetime], - extrapolate_outside_defined_time_interval: bool, - interpolation_type: InterpolationType, -) -> List[float]: - start, end = time_series.time_vector[0], time_series.time_vector[-1] - number_of_entries_before, entries_between, number_of_entries_after = _split_time_vector( - time_vector, start=start, end=end - ) - - if extrapolate_outside_defined_time_interval: - extrapolation_after_value = time_series.series[-1] - else: - extrapolation_after_value = 0.0 - - before_values = [0.0] * number_of_entries_before - between_values = _interpolate( - time_series=time_series, time_vector=entries_between, rate_interpolation_type=interpolation_type - ) - after_values = [extrapolation_after_value] * number_of_entries_after - - return [*before_values, *between_values, *after_values] - - -def _get_date_range(start: datetime, end: datetime, frequency: libecalc.common.time_utils.Frequency) -> Set[datetime]: - if frequency == libecalc.common.time_utils.Frequency.NONE: - return set() - - date_range = pd.date_range(start=start, end=end, freq=frequency.value) - return set(date_range.to_pydatetime()) - - -def _get_end_boundary(frequency: libecalc.common.time_utils.Frequency, time_vector_set: Set[datetime]) -> datetime: - """If end boundary has not been specified explicitly, we attempt to make an educated guess for the - user, based on output frequency provided and assuming data is forward filled. - - It is however recommended that the user specified END explicitly - """ - time_vector: List[datetime] = sorted(time_vector_set) - - if frequency == libecalc.common.time_utils.Frequency.YEAR: - return datetime(year=time_vector[-1].year + 1, month=1, day=1) - elif frequency == libecalc.common.time_utils.Frequency.MONTH: - return (time_vector[-1].replace(day=1) + timedelta(days=31)).replace(day=1) - elif frequency == libecalc.common.time_utils.Frequency.DAY: - return time_vector[-1] + timedelta(days=1) - else: - return max( - time_vector - ) # Frequency.NONE . We are clueless and user does not help us, just fallback to last time given - - -def get_global_time_vector( - time_series_collections: List[TimeSeriesCollection], - start: Optional[datetime] = None, - end: Optional[datetime] = None, - additional_dates: Optional[Set[datetime]] = None, - frequency: libecalc.common.time_utils.Frequency = libecalc.common.time_utils.Frequency.NONE, -) -> List[datetime]: - time_vector: Set[datetime] = set() - - # Add all dates from time series that should influence time vector - for time_series_collection in time_series_collections: - if time_series_collection.influence_time_vector: - time_vector = time_vector.union(time_series_collection.time_vector) - - has_time_vector = len(time_vector) > 0 - has_start = start is not None - has_end = end is not None - has_frequency = frequency != libecalc.common.time_utils.Frequency.NONE - if not (has_time_vector or (has_start and has_end) or (has_start and has_frequency)): - raise ValidationError("No time series found, please provide one or specify a start and end (or frequency).") - - # Store start, end before adding dates from yaml. This is to make sure dates in yaml are trimmed. - start = start or min(time_vector) - - # Add start - time_vector.add(start) - - if not end: - end = _get_end_boundary(frequency=frequency, time_vector_set=time_vector) - - # Add end - time_vector.add(end) - - # Add all dates specified in yaml - time_vector = time_vector.union(additional_dates or set()) - - # Trim time vector based on start - time_vector = {date for date in time_vector if date >= start} - - # Trim time vector based on end - time_vector = {date for date in time_vector if date <= end} - - # Add all dates for frequency - time_vector = time_vector.union(_get_date_range(start=start, end=end, frequency=frequency)) - - return sorted(time_vector) diff --git a/src/libecalc/presentation/yaml/mappers/variables_mapper/variables_mapper.py b/src/libecalc/presentation/yaml/mappers/variables_mapper/variables_mapper.py index bc0cd4a523..6e725457ba 100644 --- a/src/libecalc/presentation/yaml/mappers/variables_mapper/variables_mapper.py +++ b/src/libecalc/presentation/yaml/mappers/variables_mapper/variables_mapper.py @@ -6,15 +6,8 @@ from libecalc.common.logger import logger from libecalc.common.time_utils import Periods from libecalc.dto import VariablesMap -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection_mapper import ( - TimeSeriesCollectionMapper, -) -from libecalc.presentation.yaml.mappers.variables_mapper.timeseries_utils import ( - fit_time_series_to_time_vector, - get_global_time_vector, -) -from libecalc.presentation.yaml.resource import Resources -from libecalc.presentation.yaml.yaml_models.pyyaml_yaml_model import PyYamlYamlModel +from libecalc.presentation.yaml.domain.time_series_provider import TimeSeriesProvider +from libecalc.presentation.yaml.yaml_models.yaml_model import YamlValidator from libecalc.presentation.yaml.yaml_types.yaml_variable import ( YamlSingleVariable, YamlVariable, @@ -107,36 +100,17 @@ def _evaluate_variables(variables: Dict[str, YamlVariable], variables_map: Varia def map_yaml_to_variables( - configuration: PyYamlYamlModel, - resources: Resources, - result_options: dto.ResultOptions, + configuration: YamlValidator, time_series_provider: TimeSeriesProvider, global_time_vector: List[datetime] ) -> dto.VariablesMap: - # TODO: Replace configuration type with YamlValidator - timeseries_collections = [ - TimeSeriesCollectionMapper(resources).from_yaml_to_dto(timeseries.model_dump(by_alias=True)) - for timeseries in configuration.time_series_raise_if_invalid - ] - - global_time_vector = get_global_time_vector( - time_series_collections=timeseries_collections, - start=configuration.start, - end=configuration.end, - frequency=result_options.output_frequency, - additional_dates=configuration.dates, - ) - variables = {} - for timeseries_collection in timeseries_collections: - timeseries_list = timeseries_collection.time_series - for timeseries in timeseries_list: - variables[timeseries.reference_id] = fit_time_series_to_time_vector( - time_series=timeseries, - time_vector=global_time_vector, - extrapolate_outside_defined_time_interval=timeseries_collection.extrapolate_outside_defined_time_interval, - interpolation_type=timeseries_collection.interpolation_type, - ) + time_series_list = [ + time_series_provider.get_time_series(time_series_reference) + for time_series_reference in time_series_provider.get_time_series_references() + ] + for time_series in time_series_list: + variables[time_series.reference_id] = time_series.fit_to_time_vector(global_time_vector).series return _evaluate_variables( - configuration.variables_raise_if_invalid, + configuration.variables, variables_map=VariablesMap(variables=variables, time_vector=global_time_vector), ) diff --git a/src/libecalc/presentation/yaml/model.py b/src/libecalc/presentation/yaml/model.py index b3d6b59855..8c772cf036 100644 --- a/src/libecalc/presentation/yaml/model.py +++ b/src/libecalc/presentation/yaml/model.py @@ -1,17 +1,20 @@ from datetime import datetime -from textwrap import indent +from functools import cached_property from typing import Dict, List, Optional -from libecalc.common.errors.exceptions import InvalidResource +from typing_extensions import Self, deprecated + from libecalc.common.time_utils import Frequency from libecalc.dto import ResultOptions, VariablesMap from libecalc.dto.component_graph import ComponentGraph from libecalc.presentation.yaml.configuration_service import ConfigurationService +from libecalc.presentation.yaml.domain.time_series_collections import TimeSeriesCollections from libecalc.presentation.yaml.mappers.variables_mapper import map_yaml_to_variables +from libecalc.presentation.yaml.mappers.variables_mapper.get_global_time_vector import get_global_time_vector +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException from libecalc.presentation.yaml.parse_input import map_yaml_to_dto -from libecalc.presentation.yaml.resource import Resource from libecalc.presentation.yaml.resource_service import ResourceService -from libecalc.presentation.yaml.validation_errors import DtoValidationError, ModelValidationError +from libecalc.presentation.yaml.validation_errors import DtoValidationError from libecalc.presentation.yaml.yaml_models.yaml_model import YamlValidator from libecalc.presentation.yaml.yaml_validation_context import ( ModelContext, @@ -21,25 +24,6 @@ ) -class ModelValidationException(Exception): - def __init__(self, errors: List[ModelValidationError]): - self._errors = errors - super().__init__("Model is not valid") - - def error_count(self) -> int: - return len(self._errors) - - def errors(self) -> List[ModelValidationError]: - return self._errors - - def __str__(self): - msg = "Validation error\n\n" - errors = "\n\n".join(map(str, self._errors)) - errors = indent(errors, "\t") - msg += errors - return msg - - class YamlModel: """ Class representing both the yaml and the resources. @@ -64,8 +48,16 @@ def __init__( self._output_frequency = output_frequency self._configuration = configuration_service.get_configuration() self.resources = resource_service.get_resources(self._configuration) - self.is_valid_for_run() - self.dto = map_yaml_to_dto(configuration=self._configuration, resources=self.resources) + + self._is_validated = False + + @cached_property + @deprecated( + "Avoid using the dto objects directly, we want to remove them. get_graph() might be useful instead, although the nodes will change." + ) + def dto(self): + self.validate_for_run() + return map_yaml_to_dto(configuration=self._configuration, resources=self.resources) @property def start(self) -> Optional[datetime]: @@ -75,10 +67,24 @@ def start(self) -> Optional[datetime]: def end(self) -> Optional[datetime]: return self._configuration.end + def _get_time_series_collections(self) -> TimeSeriesCollections: + return TimeSeriesCollections(time_series=self._configuration.time_series, resources=self.resources) + + def _get_time_vector(self): + return get_global_time_vector( + time_series_time_vector=self._get_time_series_collections().get_time_vector(), + start=self.start, + end=self.end, + frequency=self._output_frequency, + additional_dates=self._configuration.dates, + ) + @property def variables(self) -> VariablesMap: return map_yaml_to_variables( - configuration=self._configuration, resources=self.resources, result_options=self.result_options + configuration=self._configuration, + time_series_provider=self._get_time_series_collections(), + global_time_vector=self._get_time_vector(), ) @property @@ -89,29 +95,11 @@ def result_options(self) -> ResultOptions: output_frequency=self._output_frequency, ) - @property - def graph(self) -> ComponentGraph: + def get_graph(self) -> ComponentGraph: return self.dto.get_graph() - def _find_resource_from_name(self, filename: str) -> Optional[Resource]: - return self.resources.get(filename) - def _get_token_references(self, yaml_model: YamlValidator) -> List[str]: - token_references = [] - for time_series in yaml_model.time_series: - resource = self._find_resource_from_name(time_series.file) - - if resource is None: - # Don't add any tokens if the resource is not found - continue - - try: - headers = resource.get_headers() - for header in headers: - token_references.append(f"{time_series.name};{header}") - except InvalidResource: - # Don't add any tokens if resource is invalid (unable to read header) - continue + token_references = self._get_time_series_collections().get_time_series_references() for reference in yaml_model.variables: token_references.append(f"$var.{reference}") @@ -134,11 +122,15 @@ def _get_validation_context(self, yaml_model: YamlValidator) -> YamlModelValidat YamlModelValidationContextNames.model_types: self._get_model_types(yaml_model=yaml_model), } - def is_valid_for_run(self) -> bool: + def validate_for_run(self) -> Self: + if self._is_validated: + return self + try: # Validate model validation_context = self._get_validation_context(yaml_model=self._configuration) self._configuration.validate(validation_context) - return True + self._is_validated = True + return self except DtoValidationError as e: raise ModelValidationException(errors=e.errors()) from e diff --git a/src/libecalc/presentation/yaml/model_validation_exception.py b/src/libecalc/presentation/yaml/model_validation_exception.py new file mode 100644 index 0000000000..27f9823ac0 --- /dev/null +++ b/src/libecalc/presentation/yaml/model_validation_exception.py @@ -0,0 +1,23 @@ +from textwrap import indent +from typing import List + +from libecalc.presentation.yaml.validation_errors import ModelValidationError, ValidationError + + +class ModelValidationException(ValidationError): + def __init__(self, errors: List[ModelValidationError]): + self._errors = errors + super().__init__("Model is not valid") + + def error_count(self) -> int: + return len(self._errors) + + def errors(self) -> List[ModelValidationError]: + return self._errors + + def __str__(self): + msg = "Validation error\n\n" + errors = "\n\n".join(map(str, self._errors)) + errors = indent(errors, "\t") + msg += errors + return msg diff --git a/src/libecalc/presentation/yaml/yaml_entities.py b/src/libecalc/presentation/yaml/yaml_entities.py index 9a5a4f210a..a2cc0df727 100644 --- a/src/libecalc/presentation/yaml/yaml_entities.py +++ b/src/libecalc/presentation/yaml/yaml_entities.py @@ -3,7 +3,7 @@ from typing import Dict, List, TextIO, Union from libecalc import dto -from libecalc.common.errors.exceptions import ColumnNotFound, HeaderNotFound +from libecalc.common.errors.exceptions import ColumnNotFoundException, HeaderNotFoundException from libecalc.presentation.yaml.resource import Resource @@ -24,10 +24,10 @@ def get_column(self, header: str) -> List[Union[float, int, str]]: header_index = self.headers.index(header) return self.data[header_index] except ValueError as e: - raise HeaderNotFound(header=header) from e + raise HeaderNotFoundException(header=header) from e except IndexError as e: # Should validate that header and columns are of equal length, but that is currently done elsewhere. - raise ColumnNotFound(header=header) from e + raise ColumnNotFoundException(header=header) from e @dataclass diff --git a/src/libecalc/presentation/yaml/yaml_types/time_series/yaml_time_series.py b/src/libecalc/presentation/yaml/yaml_types/time_series/yaml_time_series.py index 51c6753ea3..6666d8a616 100644 --- a/src/libecalc/presentation/yaml/yaml_types/time_series/yaml_time_series.py +++ b/src/libecalc/presentation/yaml/yaml_types/time_series/yaml_time_series.py @@ -15,6 +15,7 @@ class YamlTimeSeriesCollectionBase(YamlBase): name: str = Field( ..., title="NAME", + pattern=r"^[A-Za-z][A-Za-z0-9_]*$", description="Name of the time series.\n\n$ECALC_DOCS_KEYWORDS_URL/NAME", ) file: str = Field( @@ -43,12 +44,6 @@ class YamlDefaultTimeSeriesCollection(YamlTimeSeriesCollectionBase): description="Defines the type of time series input file.\n\n$ECALC_DOCS_KEYWORDS_URL/TYPE", ) - interpolation_type: Literal["RIGHT"] = Field( - None, - title="INTERPOLATION_TYPE", - description="Defines how the time series are interpolated between input time steps.\n\n$ECALC_DOCS_KEYWORDS_URL/INTERPOLATION_TYPE", - ) - class YamlMiscellaneousTimeSeriesCollection(YamlTimeSeriesCollectionBase): type: Literal["MISCELLANEOUS"] = Field( diff --git a/src/tests/ecalc_cli/test_app.py b/src/tests/ecalc_cli/test_app.py index 6a0c497b90..b394a6e16a 100644 --- a/src/tests/ecalc_cli/test_app.py +++ b/src/tests/ecalc_cli/test_app.py @@ -16,7 +16,7 @@ from libecalc.common.errors.exceptions import EcalcError from libecalc.common.run_info import RunInfo from libecalc.dto.utils.validators import COMPONENT_NAME_ALLOWED_CHARS -from libecalc.presentation.yaml.model import ModelValidationException +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException from libecalc.presentation.yaml.yaml_entities import ResourceStream from libecalc.presentation.yaml.yaml_models.exceptions import YamlError from libecalc.presentation.yaml.yaml_models.pyyaml_yaml_model import PyYamlYamlModel diff --git a/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries.py b/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries.py index ee031ae13e..b1203d76f7 100644 --- a/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries.py +++ b/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries.py @@ -2,20 +2,18 @@ from datetime import datetime from typing import Dict, Optional +import pydantic import pytest from inline_snapshot import snapshot +from pydantic import TypeAdapter from libecalc.dto import TimeSeriesType from libecalc.dto.types import InterpolationType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection import ( - MiscellaneousTimeSeriesCollection, -) -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection_mapper import ( - TimeSeriesCollectionMapper, -) -from libecalc.presentation.yaml.validation_errors import DtoValidationError, ValidationError +from libecalc.presentation.yaml.domain.time_series_collection import TimeSeriesCollection +from libecalc.presentation.yaml.validation_errors import ValidationError from libecalc.presentation.yaml.yaml_entities import MemoryResource from libecalc.presentation.yaml.yaml_keywords import EcalcYamlKeywords +from libecalc.presentation.yaml.yaml_types.time_series.yaml_time_series import YamlTimeSeriesCollection def _create_timeseries_data( @@ -39,7 +37,7 @@ def _create_timeseries_data( timeseries_dict[EcalcYamlKeywords.time_series_extrapolate_outside_defined] = extrapolate_outside if interpolation_type is not None: - timeseries_dict[EcalcYamlKeywords.time_series_interpolation_type] = interpolation_type + timeseries_dict[EcalcYamlKeywords.time_series_interpolation_type] = interpolation_type.value return timeseries_dict @@ -47,8 +45,6 @@ def _create_timeseries_data( class TestTimeSeries: parameterized_valid_timeseries_data = [ ( - TimeSeriesType.MISCELLANEOUS, - MiscellaneousTimeSeriesCollection, TimeSeriesType.MISCELLANEOUS, True, True, @@ -58,13 +54,11 @@ class TestTimeSeries: ] @pytest.mark.parametrize( - "typ_string, typ_class, typ_enum, extrapolate, influence_time_vector, interpolation_type, extrapolate_result", + "typ_enum, extrapolate, influence_time_vector, interpolation_type, extrapolate_result", parameterized_valid_timeseries_data, ) def test_valid_minimal_timeserie_different_types( self, - typ_string, - typ_class, typ_enum, extrapolate, influence_time_vector, @@ -72,81 +66,113 @@ def test_valid_minimal_timeserie_different_types( extrapolate_result, ): filename = "test.csv" - resources = {filename: MemoryResource(headers=["DATE", "OIL_PROD"], data=[["01.01.2017"], [5016]])} - - timeseries_mapper = TimeSeriesCollectionMapper(resources=resources) - timeseries_model = timeseries_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=typ_string, - name="OIL_PROD", - file=filename, - extrapolate_outside=extrapolate, - interpolation_type=interpolation_type, - influence_time_vector=influence_time_vector, - ) + resource = MemoryResource(headers=["DATE", "OIL_PROD"], data=[["01.01.2017"], [5016]]) + + timeseries_model = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=typ_enum, + name="SIM1", + file=filename, + extrapolate_outside=extrapolate, + interpolation_type=interpolation_type, + influence_time_vector=influence_time_vector, + ) + ), ) - assert isinstance(timeseries_model, typ_class) - assert timeseries_model.typ == typ_enum - assert timeseries_model.headers == ["OIL_PROD"] - assert timeseries_model.time_vector == [datetime(2017, 1, 1)] - assert timeseries_model.columns == [[5016]] - assert timeseries_model.extrapolate_outside_defined_time_interval is extrapolate_result - assert timeseries_model.influence_time_vector is True - assert timeseries_model.interpolation_type == InterpolationType.LEFT + assert timeseries_model.name == "SIM1" + assert timeseries_model.get_time_series_references() == ["OIL_PROD"] + assert timeseries_model.get_time_vector() == [datetime(2017, 1, 1)] + assert timeseries_model.should_influence_time_vector() is True + + time_series = timeseries_model.get_time_series("OIL_PROD") + assert time_series.series == [5016] + assert time_series.time_vector == [datetime(2017, 1, 1)] + assert time_series._extrapolate is extrapolate_result + assert timeseries_model._interpolation == InterpolationType.LEFT def test_valid_time_series_multiple_columns(self): + """ + Test TimeSeriesCollection.type 'DEFAULT' defaults + """ filename = "test_multiple_columns.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "COLUMN1", "COLUMN2", "COLUMN3"], - data=[["01.01.2015", "01.01.2016"], [1, 2], [3, 4], [5, 6]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - time_series_dto = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", "COLUMN1", "COLUMN2", "COLUMN3"], + data=[["01.01.2015", "01.01.2016"], [1, 2], [3, 4], [5, 6]], ) - assert time_series_dto.columns == [[1, 2], [3, 4], [5, 6]] - assert time_series_dto.time_vector == [datetime(2015, 1, 1), datetime(2016, 1, 1)] - assert time_series_dto.headers == ["COLUMN1", "COLUMN2", "COLUMN3"] - assert time_series_dto.typ == TimeSeriesType.DEFAULT + timeseries_model = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + interpolation_type=None, + influence_time_vector=True, + ) + ), + ) + + assert timeseries_model.name == "SIM1" + assert timeseries_model.get_time_series_references() == ["COLUMN1", "COLUMN2", "COLUMN3"] + assert timeseries_model.get_time_vector() == [datetime(2015, 1, 1), datetime(2016, 1, 1)] + assert timeseries_model.should_influence_time_vector() is True + + time_series = timeseries_model.get_time_series("COLUMN1") + assert time_series.series == [1, 2] + assert time_series.time_vector == [datetime(2015, 1, 1), datetime(2016, 1, 1)] + assert time_series._extrapolate is False + assert timeseries_model._interpolation == InterpolationType.RIGHT + + time_series = timeseries_model.get_time_series("COLUMN2") + assert time_series.series == [3, 4] + assert time_series.time_vector == [datetime(2015, 1, 1), datetime(2016, 1, 1)] + assert time_series._extrapolate is False + assert timeseries_model._interpolation == InterpolationType.RIGHT + + time_series = timeseries_model.get_time_series("COLUMN3") + assert time_series.series == [5, 6] + assert time_series.time_vector == [datetime(2015, 1, 1), datetime(2016, 1, 1)] + assert time_series._extrapolate is False + assert timeseries_model._interpolation == InterpolationType.RIGHT def test_valid_time_series_unsorted(self): filename = "test_unsorted.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "COLUMN1", "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - time_series_dto = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", "COLUMN1", "COLUMN2"], + data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], ) - assert time_series_dto.columns == [ - [3, 1, 2], - [1, 2, 3], - ] - assert time_series_dto.time_vector == [datetime(1900, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1)] - assert time_series_dto.headers == ["COLUMN1", "COLUMN2"] - assert time_series_dto.typ == TimeSeriesType.DEFAULT + timeseries_model = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + influence_time_vector=True, + interpolation_type=None, + ) + ), + ) + + assert timeseries_model.name == "SIM1" + assert timeseries_model.get_time_series_references() == ["COLUMN1", "COLUMN2"] + assert timeseries_model.get_time_vector() == [datetime(2015, 1, 1), datetime(2016, 1, 1), datetime(1900, 1, 1)] + assert timeseries_model.should_influence_time_vector() is True + + time_series = timeseries_model.get_time_series("COLUMN1") + assert time_series.series == [3, 1, 2] + assert time_series.time_vector == [datetime(1900, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1)] + + time_series = timeseries_model.get_time_series("COLUMN2") + assert time_series.series == [1, 2, 3] + assert time_series.time_vector == [datetime(1900, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1)] parameterized_invalid_timeseries_data = [ # headers, data mismatch (+1) @@ -154,8 +180,19 @@ def test_valid_time_series_unsorted(self): ["DATE", "OIL_PROD", "BVBV"], [["01.01.2017"], [5016]], snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, The number of columns provided do not match for header and data: data: 1, headers: 2 +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Missing column: Column matching header 'BVBV' is missing. """), ), # no data @@ -163,8 +200,19 @@ def test_valid_time_series_unsorted(self): ["DATE", "DUMMY"], [["01.01.2017"]], snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, Data vector must at least have one column +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Missing column: Column matching header 'DUMMY' is missing. """), ), # no time @@ -172,51 +220,148 @@ def test_valid_time_series_unsorted(self): ["DATE", "OIL_PROD"], [[], [5016]], snapshot("""\ -Location: MISCELLANEOUS.time_vector -Message: Value error, Time vectors must have at least one record +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Invalid time series resource: The time vector is empty """), ), # no headers ( [], [["01.01.2017"], [5016]], - snapshot("Invalid resource: Resource must at least have one column"), + snapshot("""\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Invalid resource: Resource must at least have one column +"""), ), # mismatch data, time ( ["DATE", "OIL_PROD"], [["01.01.2017", "01.01.2018"], [5016]], - snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, The number of records for times and data do not match: data: 1, time_vector: 2 -"""), + snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Rows mismatch: The number of records for times and data do not match: data: 1, time_vector: 2 +""" + ), ), # mismatch data, time ( ["DATE", "OIL_PROD"], [["01.01.2017"], [5016, 5026]], - snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, The number of records for times and data do not match: data: 2, time_vector: 1 -"""), + snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Rows mismatch: The number of records for times and data do not match: data: 2, time_vector: 1 +""" + ), ), # no data cols ( ["DATE", "HEADER"], [["01.01.2017"]], snapshot("""\ -Location: MISCELLANEOUS -Message: Value error, Data vector must at least have one column +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Missing column: Column matching header 'HEADER' is missing. """), ), # duplicate dates ( ["DATE", "HEADER"], [["01.01.2015", "01.01.2016", "01.01.2017", "01.01.2017"], [5016, 5036, 5026, 5216]], - snapshot("""\ -Location: MISCELLANEOUS.time_vector -Message: Value error, The list of dates have duplicates. Duplicated dates are currently not supported. -"""), + snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Invalid time series resource: The time series resource contains duplicate dates: 2017-01-01 00:00:00 +""" + ), + ), + # string values + ( + ["DATE", "HEADER"], + [["01.01.2015", "01.01.2016", "01.01.2017"], [5016, 5036, "invalid"]], + snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test.csv + type: MISCELLANEOUS + influence_time_vector: true + extrapolation: true + interpolation_type: LINEAR + ... + + Location: FILE + Message: Invalid column: The timeseries column 'HEADER' contains non-numeric values in row 3. +""" + ), ), ] @@ -228,35 +373,37 @@ def test_valid_time_series_unsorted(self): ) def test_invalid_timeseries(self, headers, columns, error_message): filename = "test.csv" - resources = { - filename: MemoryResource( - headers=headers, - data=columns, - ) - } + resource = MemoryResource( + headers=headers, + data=columns, + ) - timeseries_mapper = TimeSeriesCollectionMapper(resources=resources) with pytest.raises(ValidationError) as ve: - timeseries_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.MISCELLANEOUS, - name="OIL_PROD", - file=filename, - extrapolate_outside=True, - interpolation_type=InterpolationType.LINEAR, - ) + TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.MISCELLANEOUS, + name="SIM1", + file=filename, + extrapolate_outside=True, + interpolation_type=InterpolationType.LINEAR, + ) + ), ) assert str(ve.value) == error_message def test_timeseries_with_int_as_date(self): filename = "sim1.csv" - resources = {filename: MemoryResource(headers=["DATE", "HEADER1"], data=[[2012, 2013, 2014], [1, 2, 3]])} - timeseries_mapper = TimeSeriesCollectionMapper(resources=resources) - timeseries_dto = timeseries_mapper.from_yaml_to_dto( - _create_timeseries_data(typ=TimeSeriesType.DEFAULT, name="SIM1", file=filename) + resource = MemoryResource(headers=["DATE", "HEADER1"], data=[[2012, 2013, 2014], [1, 2, 3]]) + time_series_collection = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data(typ=TimeSeriesType.DEFAULT, name="SIM1", file=filename), + ), ) - assert timeseries_dto.time_vector == [ + assert time_series_collection.get_time_vector() == [ datetime(2012, 1, 1), datetime(2013, 1, 1), datetime(2014, 1, 1), @@ -268,33 +415,31 @@ def test_timeseries_with_int_as_date(self): ) def test_invalid_time_series_headers(self, header): filename = "test_invalid_headers.csv" - resources = { - filename: MemoryResource( - headers=["DATE", header, "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - with pytest.raises(DtoValidationError) as ve: - time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", header, "COLUMN2"], + data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], + ) + + with pytest.raises(ValidationError) as ve: + TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + influence_time_vector=True, + interpolation_type=None, + ) + ), ) - error_message = str(ve.value.extended_message) + error_message = str(ve.value) assert "SIM1" in error_message assert ( - "DEFAULT.headers[0]" in error_message - ) # This should probably not be required, does not make sense to user as it isn't related to the yaml path/location. - assert ( - "The string/name contains illegal characters. Allowed characters are: ^[A-Za-z][A-Za-z0-9_.,\\-\\s#+:\\/]*$" + "The time series resource header contains illegal characters. Allowed characters are: ^[A-Za-z][A-Za-z0-9_.,\\-\\s#+:\\/]*$" in error_message ) @@ -304,25 +449,26 @@ def test_invalid_time_series_headers(self, header): ) def test_valid_time_series_headers(self, header): filename = "test_valid_headers.csv" - resources = { - filename: MemoryResource( - headers=["DATE", header, "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - time_series_dto = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", header, "COLUMN2"], + data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], ) - assert time_series_dto.headers == [header, "COLUMN2"] + time_series_collection = TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + influence_time_vector=True, + interpolation_type=None, + ) + ), + ) + + assert time_series_collection.get_time_series_references() == [header, "COLUMN2"] @pytest.mark.parametrize( "resource_name", @@ -330,16 +476,9 @@ def test_valid_time_series_headers(self, header): ) def test_invalid_resource_names(self, resource_name): filename = "test_invalid_resource_names.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "COLUMN1", "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3], [2, 3, 1]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - with pytest.raises(DtoValidationError) as ve: - time_series_mapper.from_yaml_to_dto( + with pytest.raises(pydantic.ValidationError) as ve: + TypeAdapter(YamlTimeSeriesCollection).validate_python( _create_timeseries_data( typ=TimeSeriesType.DEFAULT, name=resource_name, @@ -350,114 +489,60 @@ def test_invalid_resource_names(self, resource_name): ) ) - error_message = str(ve.value.extended_message) + error_message = str(ve.value) assert resource_name in error_message - assert ( - "The string/name contains illegal characters. Allowed characters are: ^[A-Za-z][A-Za-z0-9_]*$" - in error_message - ) - - def test_interpretation_of_interpolation_type_for_default_resource(self): - """Check default interpolation for DEFAULT time series.""" - filename = "test_interpretation_of_rate_interpolation_type_for_reservoir_resource.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "GAS_PROD"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - - time_series_explicit_none = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) - ) - assert time_series_explicit_none.interpolation_type == InterpolationType.RIGHT - - time_series_implicit_none = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - ) - ) - - assert time_series_implicit_none.interpolation_type == InterpolationType.RIGHT + assert "String should match pattern '^[A-Za-z][A-Za-z0-9_]*$' " in error_message def test_undefined_type_for_miscellaneous_resource(self): """Check that MISCELLANEOUS fails if interpolation not defined.""" - filename = "test_interpretation_of_rate_interpolation_type_for_reservoir_resource.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "GAS_PROD"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - with pytest.raises(DtoValidationError) as ve: - time_series_mapper.from_yaml_to_dto( + with pytest.raises(pydantic.ValidationError) as ve: + TypeAdapter(YamlTimeSeriesCollection).validate_python( _create_timeseries_data( typ=TimeSeriesType.MISCELLANEOUS, name="SIM1", - file=filename, + file="test.csv", extrapolate_outside=None, influence_time_vector=True, ) ) - assert isinstance(ve.value, DtoValidationError) - - def test_left_interpolation_type_for_miscellaneous_resource(self): - """Check that LEFT is used when specified for MISCELLANEOUS.""" - filename = "test_interpretation_of_rate_interpolation_type_for_reservoir_resource.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "GAS_PROD"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, 3]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - - time_series = time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.MISCELLANEOUS, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=InterpolationType.LEFT, - ) - ) - assert time_series.interpolation_type == InterpolationType.LEFT + + assert isinstance(ve.value, pydantic.ValidationError) def test_error_if_nan_data(self): filename = "test_invalid_data.csv" - resources = { - filename: MemoryResource( - headers=["DATE", "COLUMN2"], - data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, math.nan]], - ) - } - time_series_mapper = TimeSeriesCollectionMapper(resources=resources) - with pytest.raises(DtoValidationError) as exc_info: - time_series_mapper.from_yaml_to_dto( - _create_timeseries_data( - typ=TimeSeriesType.DEFAULT, - name="SIM1", - file=filename, - extrapolate_outside=None, - influence_time_vector=True, - interpolation_type=None, - ) + resource = MemoryResource( + headers=["DATE", "COLUMN2"], + data=[["01.01.2015", "01.01.2016", "01.01.1900"], [1, 2, math.nan]], + ) + with pytest.raises(ValidationError) as exc_info: + TimeSeriesCollection.from_yaml( + resource=resource, + yaml_collection=TypeAdapter(YamlTimeSeriesCollection).validate_python( + _create_timeseries_data( + typ=TimeSeriesType.DEFAULT, + name="SIM1", + file=filename, + extrapolate_outside=None, + influence_time_vector=True, + interpolation_type=None, + ) + ), ) - - assert "The timeseries column 'SIM1;COLUMN2' contains empty values." in str(exc_info.value) + message = str(exc_info.value) + assert message == snapshot( + """\ +Validation error + + ... + name: SIM1 + file: test_invalid_data.csv + type: DEFAULT + influence_time_vector: true + ... + + Location: FILE + Message: Invalid column: The timeseries column 'COLUMN2' contains empty values in row 3. +""" + ) diff --git a/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries_utils.py b/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries_utils.py index b6e552a264..7a088b41f4 100644 --- a/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries_utils.py +++ b/src/tests/libecalc/input/mappers/variables_mapper/test_timeseries_utils.py @@ -5,43 +5,33 @@ import libecalc.common.time_utils from libecalc.common.time_utils import Frequency from libecalc.dto.types import InterpolationType -from libecalc.presentation.yaml.mappers.variables_mapper.time_series_collection import ( - MiscellaneousTimeSeriesCollection, -) -from libecalc.presentation.yaml.mappers.variables_mapper.timeseries_utils import ( +from libecalc.presentation.yaml.domain.time_series import TimeSeries +from libecalc.presentation.yaml.mappers.variables_mapper.get_global_time_vector import ( _get_end_boundary, - fit_time_series_to_time_vector, get_global_time_vector, ) from libecalc.presentation.yaml.validation_errors import ValidationError -@pytest.fixture -def miscellaneous_time_series_collection_yearly(): - return MiscellaneousTimeSeriesCollection( - name="test", - headers=["COL1_RATE", "COL2"], - columns=[[1, 2, 3, 4], [2, 4, 6, 8]], - time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], - interpolation_type=InterpolationType.RIGHT, - ) - - -@pytest.fixture -def miscellaneous_time_series_collection_single_date(): - return MiscellaneousTimeSeriesCollection( - name="test", - headers=["COL1_RATE", "COL2"], - columns=[[3], [6]], +def create_single_date_time_series(interpolation_type: InterpolationType, extrapolate: bool) -> TimeSeries: + return TimeSeries( + reference_id="COL1_RATE", time_vector=[datetime(2012, 1, 1)], - interpolation_type=InterpolationType.RIGHT, + series=[3], + interpolation_type=interpolation_type, + extrapolate=extrapolate, ) class TestGetGlobalTimeVector: - def test_single_collection(self, miscellaneous_time_series_collection_yearly): + def test_single_collection(self): global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], ) assert global_time_vector == [ @@ -51,9 +41,14 @@ def test_single_collection(self, miscellaneous_time_series_collection_yearly): datetime(2013, 1, 1), ] - def test_single_collection_with_monthly_frequency(self, miscellaneous_time_series_collection_yearly): + def test_single_collection_with_monthly_frequency(self): global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], frequency=libecalc.common.time_utils.Frequency.MONTH, ) @@ -125,51 +120,73 @@ def test_single_collection_with_yearly_frequency(self): datetime(2011, 11, 1), datetime(2011, 12, 1), ] - time_series_collection = MiscellaneousTimeSeriesCollection( - name="test", - headers=["COL1", "COL2"], - columns=[[1.0] * len(time_vector), [2.0] * len(time_vector)], - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - ) global_time_vector = get_global_time_vector( - time_series_collections=[time_series_collection], frequency=libecalc.common.time_utils.Frequency.YEAR + time_series_time_vector=time_vector, frequency=libecalc.common.time_utils.Frequency.YEAR ) # Time vector is not filtered based on frequency, only there to make sure all frequencies are present. time_vector.append(datetime(2012, 1, 1)) assert global_time_vector == time_vector - def test_trim_start(self, miscellaneous_time_series_collection_yearly): + def test_trim_start(self): # trim with date already present global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], start=datetime(2011, 1, 1) + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], + start=datetime(2011, 1, 1), ) assert global_time_vector == [datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)] # trim with date not present global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], start=datetime(2011, 1, 2) + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], + start=datetime(2011, 1, 2), ) assert global_time_vector == [datetime(2011, 1, 2), datetime(2012, 1, 1), datetime(2013, 1, 1)] - def test_trim_end(self, miscellaneous_time_series_collection_yearly): + def test_trim_end(self): # trim with date already present global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], end=datetime(2011, 1, 1) + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], + end=datetime(2011, 1, 1), ) assert global_time_vector == [datetime(2010, 1, 1), datetime(2011, 1, 1)] # trim with date not present global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], end=datetime(2011, 2, 2) + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], + end=datetime(2011, 2, 2), ) assert global_time_vector == [datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2011, 2, 2)] - def test_additional_dates(self, miscellaneous_time_series_collection_yearly): + def test_additional_dates(self): global_time_vector = get_global_time_vector( - time_series_collections=[miscellaneous_time_series_collection_yearly], + time_series_time_vector=[ + datetime(2010, 1, 1), + datetime(2011, 1, 1), + datetime(2012, 1, 1), + datetime(2013, 1, 1), + ], additional_dates={datetime(2011, 6, 1), datetime(2013, 2, 1)}, ) @@ -185,56 +202,64 @@ def test_additional_dates(self, miscellaneous_time_series_collection_yearly): def test_only_start_and_frequency(self): assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), frequency=Frequency.YEAR + time_series_time_vector=[], start=datetime(2020, 1, 1), frequency=Frequency.YEAR ) == [datetime(2020, 1, 1), datetime(2021, 1, 1)] assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), frequency=Frequency.MONTH + time_series_time_vector=[], start=datetime(2020, 1, 1), frequency=Frequency.MONTH ) == [datetime(2020, 1, 1), datetime(2020, 2, 1)] assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), frequency=Frequency.DAY + time_series_time_vector=[], start=datetime(2020, 1, 1), frequency=Frequency.DAY ) == [datetime(2020, 1, 1), datetime(2020, 1, 2)] def test_only_start_and_end(self): assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), end=datetime(2021, 1, 1) + time_series_time_vector=[], start=datetime(2020, 1, 1), end=datetime(2021, 1, 1) ) == [datetime(2020, 1, 1), datetime(2021, 1, 1)] assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), end=datetime(2020, 2, 1) + time_series_time_vector=[], start=datetime(2020, 1, 1), end=datetime(2020, 2, 1) ) == [datetime(2020, 1, 1), datetime(2020, 2, 1)] assert get_global_time_vector( - time_series_collections=[], start=datetime(2020, 1, 1), end=datetime(2020, 1, 2) + time_series_time_vector=[], start=datetime(2020, 1, 1), end=datetime(2020, 1, 2) ) == [datetime(2020, 1, 1), datetime(2020, 1, 2)] def test_only_start(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[], start=datetime(2020, 1, 1)) + get_global_time_vector(time_series_time_vector=[], start=datetime(2020, 1, 1)) assert "No time series found" in str(exc_info.value) def test_only_end(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[], end=datetime(2020, 1, 1)) + get_global_time_vector(time_series_time_vector=[], end=datetime(2020, 1, 1)) assert "No time series found" in str(exc_info.value) def test_only_freq(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[], frequency=Frequency.YEAR) + get_global_time_vector(time_series_time_vector=[], frequency=Frequency.YEAR) assert "No time series found" in str(exc_info.value) def test_only_freq_and_end(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[], frequency=Frequency.YEAR, end=datetime(2020, 1, 1)) + get_global_time_vector(time_series_time_vector=[], frequency=Frequency.YEAR, end=datetime(2020, 1, 1)) assert "No time series found" in str(exc_info.value) def test_only_empty_time_series(self): with pytest.raises(ValidationError) as exc_info: - get_global_time_vector(time_series_collections=[]) + get_global_time_vector(time_series_time_vector=[]) assert "No time series found" in str(exc_info.value) -class TestFitTimeSeriesToTimeVector: - def test_interpolate_linear(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series +def create_time_series(interpolation_type: InterpolationType, extrapolate: bool): + return TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], + interpolation_type=interpolation_type, + extrapolate=extrapolate, + ) + +class TestFitTimeSeriesToTimeVector: + def test_interpolate_linear(self): time_vector = [ datetime(2010, 1, 1), datetime(2011, 1, 1), @@ -243,31 +268,19 @@ def test_interpolate_linear(self, miscellaneous_time_series_collection_yearly): datetime(2013, 1, 1), ] - rate_time_series = time_series[0] - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.LINEAR, - extrapolate_outside_defined_time_interval=False, + extrapolate=False, ) + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) # Interpolate based on interpolation type - assert fitted_rate_time_series == [1, 2, 2.4136986301369863, 3, 4] - - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=False, - ) - - # Interpolate based on interpolation type - assert fitted_time_series == [2.0, 4.0, 4.0, 6.0, 8.0] - - def test_interpolate_left(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series + assert fitted_rate_time_series.series == [1, 2, 2.4136986301369863, 3, 4] + def test_interpolate_left(self): time_vector = [ datetime(2010, 1, 1), datetime(2011, 1, 1), @@ -276,32 +289,21 @@ def test_interpolate_left(self, miscellaneous_time_series_collection_yearly): datetime(2013, 1, 1), ] - rate_time_series = time_series[0] - - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.LEFT, - extrapolate_outside_defined_time_interval=False, + extrapolate=False, ) - # Interpolate based on interpolation type - assert fitted_rate_time_series == [1, 2, 3, 3, 4] - - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.LEFT, - extrapolate_outside_defined_time_interval=False, - ) + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) # Interpolate based on interpolation type - assert fitted_time_series == [2.0, 4.0, 6.0, 6.0, 8.0] - - def test_interpolate_right(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series + assert fitted_rate_time_series.series == [1, 2, 3, 3, 4] + assert fitted_rate_time_series.time_vector == time_vector + def test_interpolate_right(self): time_vector = [ datetime(2010, 1, 1), datetime(2011, 1, 1), @@ -310,30 +312,20 @@ def test_interpolate_right(self, miscellaneous_time_series_collection_yearly): datetime(2013, 1, 1), ] - rate_time_series = time_series[0] - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=False, - ) - - # Interpolate based on interpolation type - assert fitted_rate_time_series == [1, 2, 2, 3, 4] - - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=False, + extrapolate=False, ) + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) # Interpolate based on interpolation type - assert fitted_time_series == [2.0, 4.0, 4.0, 6.0, 8.0] + assert fitted_rate_time_series.series == [1, 2, 2, 3, 4] + assert fitted_rate_time_series.time_vector == time_vector - def test_extrapolate_outside_true(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series + def test_extrapolate_outside_true(self): time_vector = [ datetime(2009, 1, 1), datetime(2010, 1, 1), @@ -343,28 +335,19 @@ def test_extrapolate_outside_true(self, miscellaneous_time_series_collection_yea datetime(2014, 1, 1), ] - rate_time_series = time_series[0] - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, - extrapolate_outside_defined_time_interval=True, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.LINEAR, + extrapolate=True, ) - # Rate should use extrapolate_outside_defined_time_interval to decide extrapolation - assert fitted_rate_time_series == [0, 1, 2, 3, 4, 4] - - # Check that Non-rate behaves like rate - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=True, - ) - assert fitted_time_series == [0, 2.0, 4.0, 6.0, 8.0, 8.0] + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) + + assert fitted_rate_time_series.series == [0, 1, 2, 3, 4, 4] + assert fitted_rate_time_series.time_vector == time_vector - def test_extrapolate_outside_false(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series + def test_extrapolate_outside_false(self): time_vector = [ datetime(2009, 1, 1), datetime(2010, 1, 1), @@ -374,31 +357,19 @@ def test_extrapolate_outside_false(self, miscellaneous_time_series_collection_ye datetime(2014, 1, 1), ] - rate_time_series = time_series[0] - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector, - extrapolate_outside_defined_time_interval=False, + rate_time_series = TimeSeries( + reference_id="COL1_RATE", + time_vector=[datetime(2010, 1, 1), datetime(2011, 1, 1), datetime(2012, 1, 1), datetime(2013, 1, 1)], + series=[1, 2, 3, 4], interpolation_type=InterpolationType.LINEAR, + extrapolate=False, ) - # Rate should use extrapolate_outside_defined_time_interval to decide extrapolation - assert fitted_rate_time_series == [0, 1.0, 2.0, 3.0, 4.0, 0.0] - - # Check that Non-rate behaves like rate - non_rate_time_series = time_series[1] - fitted_time_series = fit_time_series_to_time_vector( - time_series=non_rate_time_series, - time_vector=time_vector, - interpolation_type=InterpolationType.RIGHT, - extrapolate_outside_defined_time_interval=False, - ) - assert fitted_time_series == [0.0, 2.0, 4.0, 6.0, 8.0, 0.0] + fitted_rate_time_series = rate_time_series.fit_to_time_vector(time_vector) + assert fitted_rate_time_series.series == [0, 1.0, 2.0, 3.0, 4.0, 0.0] + assert fitted_rate_time_series.time_vector == time_vector - def test_interpolate_to_shorter_global_time_vector(self, miscellaneous_time_series_collection_yearly): - time_series = miscellaneous_time_series_collection_yearly.time_series - rate_time_series = time_series[0] - - time_vector = [ + def test_interpolate_to_shorter_global_time_vector(self): + all_times = [ datetime(2009, 1, 1), datetime(2011, 1, 1), datetime(2012, 7, 1), @@ -406,44 +377,39 @@ def test_interpolate_to_shorter_global_time_vector(self, miscellaneous_time_seri ] for i in range(1, 5): - fitted_rate_time_series = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector[0:i], - extrapolate_outside_defined_time_interval=True, + current_time_vector = all_times[0:i] + fitted_rate_time_series = create_time_series( interpolation_type=InterpolationType.RIGHT, - ) - assert fitted_rate_time_series == [0, 2, 3, 4][0:i] - fitted_rate_time_series_shifted_left = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector[0:i], - extrapolate_outside_defined_time_interval=True, + extrapolate=True, + ).fit_to_time_vector(current_time_vector) + assert fitted_rate_time_series.series == [0, 2, 3, 4][0:i] + assert fitted_rate_time_series.time_vector == current_time_vector + + fitted_rate_time_series_shifted_left = create_time_series( interpolation_type=InterpolationType.LEFT, - ) - assert fitted_rate_time_series_shifted_left == [0, 2, 4, 4][0:i] - fitted_rate_time_series_shifted_linear = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=time_vector[0:i], - extrapolate_outside_defined_time_interval=True, + extrapolate=True, + ).fit_to_time_vector(current_time_vector) + assert fitted_rate_time_series_shifted_left.series == [0, 2, 4, 4][0:i] + assert fitted_rate_time_series_shifted_left.time_vector == current_time_vector + + fitted_rate_time_series_shifted_linear = create_time_series( interpolation_type=InterpolationType.LINEAR, - ) - assert fitted_rate_time_series_shifted_linear == [0, 2, 3.4972677595628414, 4][0:i] + extrapolate=True, + ).fit_to_time_vector(current_time_vector) + assert fitted_rate_time_series_shifted_linear.series == [0, 2, 3.4972677595628414, 4][0:i] + assert fitted_rate_time_series_shifted_linear.time_vector == current_time_vector for rate_interp_type in [InterpolationType.LEFT, InterpolationType.RIGHT, InterpolationType.LINEAR]: - fitted_rate_time_series_outside_interval_no_extrapolation = fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[-1]], - extrapolate_outside_defined_time_interval=False, + current_time_vector = [all_times[-1]] + fitted_rate_time_series_outside_interval_no_extrapolation = create_time_series( + extrapolate=False, interpolation_type=rate_interp_type, - ) - assert fitted_rate_time_series_outside_interval_no_extrapolation == [0] - - def test_interpolate_single_date_to_single_date_global_time_vector( - self, miscellaneous_time_series_collection_single_date - ): - time_series = miscellaneous_time_series_collection_single_date.time_series - rate_time_series = time_series[0] + ).fit_to_time_vector(current_time_vector) + assert fitted_rate_time_series_outside_interval_no_extrapolation.series == [0] + assert fitted_rate_time_series_outside_interval_no_extrapolation.time_vector == current_time_vector - time_vector = [ + def test_interpolate_single_date_to_single_date_global_time_vector(self): + all_times = [ datetime(2011, 7, 1), datetime(2012, 1, 1), datetime(2012, 1, 2), @@ -456,29 +422,21 @@ def test_interpolate_single_date_to_single_date_global_time_vector( fitted_rate_time_series_left_with_extrapolation = [] fitted_rate_time_series_linear_with_extrapolation = [] for i in range(3): + current_time_vector = [all_times[i]] fitted_rate_time_series_right_with_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=True, - interpolation_type=InterpolationType.RIGHT, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.RIGHT, extrapolate=True) + .fit_to_time_vector(current_time_vector) + .series[0] ) fitted_rate_time_series_left_with_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=True, - interpolation_type=InterpolationType.LEFT, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.LEFT, extrapolate=True) + .fit_to_time_vector(current_time_vector) + .series[0] ) fitted_rate_time_series_linear_with_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=True, - interpolation_type=InterpolationType.LINEAR, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.LINEAR, extrapolate=True) + .fit_to_time_vector(current_time_vector) + .series[0] ) assert fitted_rate_time_series_right_with_extrapolation == expected_with_extrapolation @@ -489,35 +447,29 @@ def test_interpolate_single_date_to_single_date_global_time_vector( fitted_rate_time_series_left_without_extrapolation = [] fitted_rate_time_series_linear_without_extrapolation = [] for i in range(3): + current_time_vector = [all_times[i]] fitted_rate_time_series_right_without_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=False, - interpolation_type=InterpolationType.RIGHT, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.RIGHT, extrapolate=False) + .fit_to_time_vector(current_time_vector) + .series[0] ) fitted_rate_time_series_left_without_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=False, - interpolation_type=InterpolationType.LEFT, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.LEFT, extrapolate=False) + .fit_to_time_vector(current_time_vector) + .series[0] ) fitted_rate_time_series_linear_without_extrapolation.append( - fit_time_series_to_time_vector( - time_series=rate_time_series, - time_vector=[time_vector[i]], - extrapolate_outside_defined_time_interval=False, - interpolation_type=InterpolationType.LINEAR, - )[0] + create_single_date_time_series(interpolation_type=InterpolationType.LINEAR, extrapolate=False) + .fit_to_time_vector(current_time_vector) + .series[0] ) assert fitted_rate_time_series_right_without_extrapolation == expected_without_extrapolation assert fitted_rate_time_series_left_without_extrapolation == expected_without_extrapolation assert fitted_rate_time_series_linear_without_extrapolation == expected_without_extrapolation + +class TestGetEndBoundary: @pytest.mark.parametrize( "what, end_date, dates", [ diff --git a/src/tests/libecalc/input/test_file_io.py b/src/tests/libecalc/input/test_file_io.py index 67ae10d28f..ea2fbbd73c 100644 --- a/src/tests/libecalc/input/test_file_io.py +++ b/src/tests/libecalc/input/test_file_io.py @@ -8,7 +8,7 @@ from inline_snapshot import snapshot from ecalc_cli.infrastructure.file_resource_service import FileResourceService -from libecalc.common.errors.exceptions import EcalcError, HeaderNotFound +from libecalc.common.errors.exceptions import EcalcError, InvalidHeaderException from libecalc.fixtures.cases import input_file_examples from libecalc.infrastructure import file_io from libecalc.presentation.yaml import yaml_entities @@ -165,9 +165,9 @@ def test_valid_characters(self, tmp_path_fixture, csv_line: str, is_valid_charac @pytest.mark.snapshot @pytest.mark.inlinesnapshot def test_missing_headers(self, tmp_path_fixture): - with pytest.raises(HeaderNotFound) as e: + with pytest.raises(InvalidHeaderException) as e: file_io.read_facility_resource(create_csv_from_line(tmp_path_fixture, "HEADER1 ,,HEADER3")) - assert str(e.value) == snapshot("Missing header(s): Header 'Unnamed: 1' not found") + assert str(e.value) == snapshot("Invalid header: One or more headers are missing in resource") @pytest.fixture @@ -319,7 +319,7 @@ def test_time_series_missing_headers(self): ) assert str(e.value) == snapshot( - "Failed to read resource: Failed to read base_profile_missing_header_oil_prod.csv: Missing header(s): Header 'Unnamed: 1' not found" + "Failed to read resource: Failed to read base_profile_missing_header_oil_prod.csv: Invalid header: One or more headers are missing in resource" ) @pytest.mark.snapshot @@ -346,7 +346,7 @@ def test_facility_input_missing_headers(self): ) assert str(e.value) == snapshot( - "Failed to read resource: Failed to read tabular_missing_header_fuel.csv: Missing header(s): Header 'Unnamed: 1' not found" + "Failed to read resource: Failed to read tabular_missing_header_fuel.csv: Invalid header: One or more headers are missing in resource" ) diff --git a/src/tests/libecalc/presentation/yaml/test_venting_emitter_validation_errors.py b/src/tests/libecalc/presentation/yaml/test_venting_emitter_validation_errors.py index 30295de9cc..80f324e51c 100644 --- a/src/tests/libecalc/presentation/yaml/test_venting_emitter_validation_errors.py +++ b/src/tests/libecalc/presentation/yaml/test_venting_emitter_validation_errors.py @@ -7,7 +7,7 @@ from libecalc.fixtures.cases.venting_emitters.venting_emitter_yaml import ( venting_emitter_yaml_factory, ) -from libecalc.presentation.yaml.model import ModelValidationException +from libecalc.presentation.yaml.model_validation_exception import ModelValidationException from libecalc.presentation.yaml.yaml_types.yaml_stream_conditions import ( YamlEmissionRateUnits, YamlOilRateUnits,