refactor: time series collection, resource handling

Create domain objects for TimeSeriesResource, TimeSeriesCollection++. This should make the behavior more clear, and provide more flexibility in the future. Previously, time_series_collection.py and time_series_collection_mapper.py did a lot of stuff. In addition to dealing with the resource data and validating that, the yaml data was also validated. As this was bundled it was difficult to reuse some of the logic. Each separate step should now be available by using the correct class.
equinor · Sep 23, 2024 · cf80ce8 · cf80ce8
1 parent 2190f99
commit cf80ce8
Show file tree

Hide file tree

Showing 31 changed files with 1,219 additions and 1,051 deletions.
diff --git a/examples/simple_yaml_model.ipynb b/examples/simple_yaml_model.ipynb
@@ -81,14 +81,14 @@
     "    output_frequency=Frequency.NONE,\n",
     ")\n",
     "\n",
-    "model = EnergyCalculator(graph=yaml_model.graph)\n",
+    "model = EnergyCalculator(graph=yaml_model.get_graph())\n",
     "consumer_results = model.evaluate_energy_usage(yaml_model.variables)\n",
     "emission_results = model.evaluate_emissions(\n",
     "    variables_map=yaml_model.variables,\n",
     "    consumer_results=consumer_results,\n",
     ")\n",
     "result = GraphResult(\n",
-    "    graph=yaml_model.graph,\n",
+    "    graph=yaml_model.get_graph(),\n",
     "    consumer_results=consumer_results,\n",
     "    variables_map=yaml_model.variables,\n",
     "    emission_results=emission_results,\n",
@@ -114,7 +114,7 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "print(\"Iterating the model components to plot results: \\n\")\n",
-    "for identity, component in yaml_model.graph.nodes.items():\n",
+    "for identity, component in yaml_model.get_graph().nodes.items():\n",
     "    if identity in result.consumer_results:\n",
     "        component_result = result.consumer_results[identity].component_result\n",
     "        ds = pd.Series(component_result.energy_usage.values, index=component_result.energy_usage.timesteps)\n",

diff --git a/src/ecalc_cli/commands/run.py b/src/ecalc_cli/commands/run.py
@@ -117,7 +117,7 @@ def run(
         configuration_service=configuration_service,
         resource_service=resource_service,
         output_frequency=frequency,
-    )
+    ).validate_for_run()
 
     if (flow_diagram or ltp_export) and (model.start is None or model.end is None):
         logger.warning(
@@ -132,15 +132,15 @@ def run(
             name_prefix=name_prefix,
         )
 
-    energy_calculator = EnergyCalculator(graph=model.graph)
+    energy_calculator = EnergyCalculator(graph=model.get_graph())
     precision = 6
     consumer_results = energy_calculator.evaluate_energy_usage(model.variables)
     emission_results = energy_calculator.evaluate_emissions(
         variables_map=model.variables,
         consumer_results=consumer_results,
     )
     results_core = GraphResult(
-        graph=model.graph,
+        graph=model.get_graph(),
         consumer_results=consumer_results,
         variables_map=model.variables,
         emission_results=emission_results,

diff --git a/src/ecalc_cli/infrastructure/file_resource_service.py b/src/ecalc_cli/infrastructure/file_resource_service.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 from typing import Callable, Dict
 
-from libecalc.common.errors.exceptions import EcalcError, HeaderNotFound
+from libecalc.common.errors.exceptions import EcalcError, InvalidHeaderException
 from libecalc.common.logger import logger
 from libecalc.infrastructure.file_io import read_facility_resource, read_timeseries_resource
 from libecalc.presentation.yaml.resource import Resource
@@ -18,7 +18,7 @@ def __init__(self, working_directory: Path):
     def _read_resource(resource_name: Path, *args, read_func: Callable[..., MemoryResource]) -> MemoryResource:
         try:
             return read_func(resource_name, *args)
-        except (HeaderNotFound, ValueError) as exc:
+        except (InvalidHeaderException, ValueError) as exc:
             logger.error(str(exc))
             raise EcalcError("Failed to read resource", f"Failed to read {resource_name.name}: {str(exc)}") from exc
 

diff --git a/src/ecalc_cli/main.py b/src/ecalc_cli/main.py
@@ -7,7 +7,7 @@
 from ecalc_cli.commands.run import run
 from ecalc_cli.commands.selftest import selftest
 from ecalc_cli.logger import CLILogConfigurator, LogLevel, logger
-from libecalc.presentation.yaml.model import ModelValidationException
+from libecalc.presentation.yaml.model_validation_exception import ModelValidationException
 from libecalc.presentation.yaml.validation_errors import DataValidationError
 
 app = typer.Typer(name="ecalc")

diff --git a/src/libecalc/common/errors/exceptions.py b/src/libecalc/common/errors/exceptions.py
@@ -74,27 +74,49 @@ def __init__(self, message: str):
 class InvalidDateException(EcalcError): ...
 
 
-class InvalidResource(EcalcError):
+class InvalidResourceException(EcalcError):
     """
     Base exception for resource
     """
 
     pass
 
 
-class HeaderNotFound(InvalidResource):
+class InvalidHeaderException(InvalidResourceException):
+    def __init__(self, message: str):
+        super().__init__("Invalid header", message, error_type=EcalcErrorType.CLIENT_ERROR)
+
+
+class HeaderNotFoundException(InvalidResourceException):
     """Resource is missing header."""
 
     def __init__(self, header: str):
         self.header = header
         super().__init__("Missing header(s)", f"Header '{header}' not found", error_type=EcalcErrorType.CLIENT_ERROR)
 
 
-class ColumnNotFound(InvalidResource):
+class ColumnNotFoundException(InvalidResourceException):
     """Resource is missing column"""
 
     def __init__(self, header: str):
         self.header = header
         super().__init__(
             "Missing column", f"Column matching header '{header}' is missing.", error_type=EcalcErrorType.CLIENT_ERROR
         )
+
+
+class InvalidColumnException(InvalidResourceException):
+    def __init__(self, header: str, message: str, row: int = None):
+        self.header = header
+        self.row = row
+        super().__init__(
+            "Invalid column",
+            message.format(header=header, row=row),
+        )
+
+
+class NoColumnsException(InvalidResourceException):
+    """Resource contains no columns"""
+
+    def __init__(self):
+        super().__init__("No columns", "The resource contains no columns, it should have at least one.")
diff --git a/src/libecalc/common/string/string_utils.py b/src/libecalc/common/string/string_utils.py
@@ -1,7 +1,9 @@
-from typing import Iterable, Set
+from typing import Hashable, Iterable, Set, TypeVar
 
+TItem = TypeVar("TItem", bound=Hashable)
 
-def get_duplicates(names: Iterable[str]) -> Set[str]:
+
+def get_duplicates(names: Iterable[TItem]) -> Set[TItem]:
     seen = set()
     duplicates = set()
     for name in names:

diff --git a/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py b/src/libecalc/fixtures/cases/ltp_export/ltp_power_from_shore_yaml.py
@@ -1,16 +1,33 @@
+from io import StringIO
 from pathlib import Path
+from typing import Dict, Optional, cast
 
 import pytest
-import yaml
 
 from ecalc_cli.infrastructure.file_resource_service import FileResourceService
 from libecalc.common.time_utils import Frequency
-from libecalc.dto import ResultOptions
 from libecalc.expression.expression import ExpressionType
 from libecalc.fixtures.case_types import DTOCase
-from libecalc.presentation.yaml.mappers.variables_mapper import map_yaml_to_variables
-from libecalc.presentation.yaml.parse_input import map_yaml_to_dto
-from libecalc.presentation.yaml.yaml_models.pyyaml_yaml_model import PyYamlYamlModel
+from libecalc.presentation.yaml.configuration_service import ConfigurationService
+from libecalc.presentation.yaml.model import YamlModel
+from libecalc.presentation.yaml.yaml_entities import ResourceStream
+from libecalc.presentation.yaml.yaml_models.yaml_model import ReaderType, YamlConfiguration, YamlValidator
+
+
+class OverridableStreamConfigurationService(ConfigurationService):
+    def __init__(self, stream: ResourceStream, overrides: Optional[Dict] = None):
+        self._overrides = overrides
+        self._stream = stream
+
+    def get_configuration(self) -> YamlValidator:
+        main_yaml_model = YamlConfiguration.Builder.get_yaml_reader(ReaderType.PYYAML).read(
+            main_yaml=self._stream,
+            enable_include=True,
+        )
+
+        if self._overrides is not None:
+            main_yaml_model._internal_datamodel.update(self._overrides)
+        return cast(YamlValidator, main_yaml_model)
 
 
 @pytest.fixture
@@ -78,26 +95,17 @@ def _ltp_pfs_yaml_factory(
 
         """
 
-        yaml_text = yaml.safe_load(input_text)
-        configuration = PyYamlYamlModel(
-            internal_datamodel=yaml_text,
-            name="ltp_export",
-            instantiated_through_read=True,
+        configuration_service = OverridableStreamConfigurationService(
+            stream=ResourceStream(name="ltp_export", stream=StringIO(input_text))
         )
+        resource_service = FileResourceService(working_directory=path)
 
-        path = path
-
-        resources = FileResourceService._read_resources(configuration=configuration, working_directory=path)
-        variables = map_yaml_to_variables(
-            configuration,
-            resources=resources,
-            result_options=ResultOptions(
-                start=configuration.start,
-                end=configuration.end,
-                output_frequency=Frequency.YEAR,
-            ),
+        model = YamlModel(
+            configuration_service=configuration_service,
+            resource_service=resource_service,
+            output_frequency=Frequency.YEAR,
         )
-        yaml_model = map_yaml_to_dto(configuration=configuration, resources=resources)
-        return DTOCase(ecalc_model=yaml_model, variables=variables)
+
+        return DTOCase(ecalc_model=model.dto, variables=model.variables)
 
     return _ltp_pfs_yaml_factory
diff --git a/src/libecalc/infrastructure/file_io.py b/src/libecalc/infrastructure/file_io.py
@@ -17,7 +17,7 @@
 from libecalc.common.errors.exceptions import (
     EcalcError,
     EcalcErrorType,
-    HeaderNotFound,
+    InvalidHeaderException,
 )
 from libecalc.common.logger import logger
 from libecalc.presentation.yaml.yaml_entities import MemoryResource, YamlTimeseriesType
@@ -399,7 +399,7 @@ def _validate_headers(headers: List[str]):
                 "[ _ - # + : . , /] "
             )
         elif re.match(r"^Unnamed: \d+$", header):
-            raise HeaderNotFound(header=header)
+            raise InvalidHeaderException(message="One or more headers are missing in resource")
 
 
 def _validate_not_nan(columns: List[List]):

diff --git a/src/libecalc/presentation/yaml/domain/time_series.py b/src/libecalc/presentation/yaml/domain/time_series.py
@@ -0,0 +1,106 @@
+from datetime import datetime
+from operator import itemgetter
+from typing import List, Tuple
+
+from scipy.interpolate import interp1d
+from typing_extensions import Self
+
+from libecalc.common.list.list_utils import transpose
+from libecalc.dto.types import InterpolationType
+
+
+class TimeSeries:
+    def __init__(
+        self,
+        reference_id: str,
+        time_vector: List[datetime],
+        series: List[float],
+        extrapolate: bool,
+        interpolation_type: InterpolationType,
+    ):
+        self.reference_id = reference_id
+        self.time_vector = time_vector
+        self.series = series
+        self._extrapolate = extrapolate
+        self._interpolation_type = interpolation_type
+
+    @staticmethod
+    def _get_interpolation_kind(rate_interpolation_type: InterpolationType) -> str:
+        if rate_interpolation_type == InterpolationType.LINEAR:
+            return "linear"
+        elif rate_interpolation_type == InterpolationType.RIGHT:
+            return "previous"
+        elif rate_interpolation_type == InterpolationType.LEFT:
+            return "next"
+        else:
+            raise ValueError(f"Invalid interpolation type, got {rate_interpolation_type}.")
+
+    def _interpolate(self, time_vector: List[datetime], rate_interpolation_type: InterpolationType) -> List[float]:
+        interpolation_kind = self._get_interpolation_kind(
+            rate_interpolation_type=rate_interpolation_type,
+        )
+
+        start_time = self.time_vector[0]
+
+        setup_times: List[float]
+        if len(self.time_vector) == 1:
+            # add dummy time 1 second later
+            setup_times = [0, 1]
+            setup_y = 2 * self.series
+        else:
+            # Interpolator x variable is number of seconds from first date time
+            setup_times = [(time - start_time).total_seconds() for time in self.time_vector]
+            setup_y = self.series
+
+        interpolator = interp1d(x=setup_times, y=setup_y, kind=interpolation_kind)
+        target_times = [(time - start_time).total_seconds() for time in time_vector]
+        return list(interpolator(target_times))
+
+    def fit_to_time_vector(
+        self,
+        time_vector: List[datetime],
+    ) -> Self:
+        start, end = self.time_vector[0], self.time_vector[-1]
+        number_of_entries_before, entries_between, number_of_entries_after = split_time_vector(
+            time_vector, start=start, end=end
+        )
+
+        if self._extrapolate:
+            extrapolation_after_value = self.series[-1]
+        else:
+            extrapolation_after_value = 0.0
+
+        before_values = [0.0] * number_of_entries_before
+        between_values = self._interpolate(
+            time_vector=entries_between, rate_interpolation_type=self._interpolation_type
+        )
+        after_values = [extrapolation_after_value] * number_of_entries_after
+
+        return self.__class__(
+            reference_id=self.reference_id,
+            time_vector=time_vector,
+            series=[*before_values, *between_values, *after_values],
+            extrapolate=self._extrapolate,
+            interpolation_type=self._interpolation_type,
+        )
+
+    def sort(self) -> Self:
+        sort_columns = [self.time_vector, self.series]
+        sort_rows = transpose(sort_columns)
+        sorted_rows = sorted(sort_rows, key=itemgetter(0))
+        sorted_columns = transpose(sorted_rows)
+        self.time_vector = sorted_columns[0]
+        self.series = sorted_columns[1]
+        return self
+
+
+def split_time_vector(
+    time_vector: List[datetime],
+    start: datetime,
+    end: datetime,
+) -> Tuple[int, List[datetime], int]:
+    """Find the entries between start and end, also counting the number of entries before start and after end."""
+    number_of_entries_before = len([date for date in time_vector if date < start])
+    number_of_entries_after = len([date for date in time_vector if date > end])
+    entries_between = [date for date in time_vector if start <= date <= end]
+    return number_of_entries_before, entries_between, number_of_entries_after