qiskit-community · woodsp-ibm · Jun 13, 2024 · Feb 24, 2024 · Feb 24, 2024 · Feb 24, 2024
@@ -29,10 +29,12 @@ covariances
 currentmodule
 cvar
 dataondemand
+datetime
 dicts
 discretization
 discretize
 discretized
+DTW
 egger
 eigendecomposition
 eigensolver

@@ -11,9 +11,9 @@
 # that they have been altered from the originals.
 
 """This module implements the abstract base class for data_provider modules the finance module."""
-
+from __future__ import annotations
 from abc import ABC, abstractmethod
-from typing import Tuple, Optional, List, cast
+from typing import cast
 import logging
 from enum import Enum
 
@@ -27,11 +27,11 @@
 
 
 class StockMarket(Enum):
-    """Stock Market enum"""
+    """Enum representing various stock markets."""
 
-    LONDON = "XLON"
-    EURONEXT = "XPAR"
-    SINGAPORE = "XSES"
+    LONDON: str = "XLON"
+    EURONEXT: str = "XPAR"
+    SINGAPORE: str = "XSES"
 
 
 class BaseDataProvider(ABC):
@@ -42,42 +42,60 @@ class BaseDataProvider(ABC):
 
     To use the subclasses, please see
     https://qiskit-community.github.io/qiskit-finance/tutorials/11_time_series.html
-
     """
 
     @abstractmethod
     def __init__(self) -> None:
-        self._data: Optional[List] = None
-        self._n = 0  # pylint: disable=invalid-name
-        self.period_return_mean: Optional[np.ndarray] = None
-        self.cov: Optional[np.ndarray] = None
-        self.period_return_cov: Optional[np.ndarray] = None
-        self.rho: Optional[np.ndarray] = None
-        self.mean: Optional[np.ndarray] = None
+        self._data: list | None = None
+        self._n: int = 0  # pylint: disable=invalid-name
+        self.period_return_mean: np.ndarray | None = None
+        self.cov: np.ndarray | None = None
+        self.period_return_cov: np.ndarray | None = None
+        self.rho: np.ndarray | None = None
+        self.mean: np.ndarray | None = None
 
     @abstractmethod
     def run(self) -> None:
-        """Loads data."""
+        """
+        Abstract method to load data.
+
+        Method responsible for loading data. Subclasses of `BaseDataProvider`
+        must implement this method to load data from a specific data source.
+
+        Raises:
+            NotImplementedError: This method must be implemented in a subclass.
+        """
         pass
 
+    def _check_data_loaded(self) -> None:
+        """
+        Checks if data is loaded.
+
+        Raises:
+            QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
+                first to load the data.
+        """
+        if not hasattr(self, "_data") or not self._data:
+            raise QiskitFinanceError(
+                "No data loaded yet. Please run the method :code:`run()` first to load the data."
+            )
+
     # it does not have to be overridden in non-abstract derived classes.
     def get_mean_vector(self) -> np.ndarray:
-        """Returns a vector containing the mean value of each asset.
+        """Returns the mean value vector of each asset.
+
+        Calculates the mean value for each asset based on the loaded data,
+        assuming each row represents a time-series observation for an asset.
 
         Returns:
-            a per-asset mean vector.
+            np.ndarray: A vector containing the mean value of each asset.
+
         Raises:
-            QiskitFinanceError: no data loaded
+            QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
+                first to load the data.
         """
-        try:
-            if not self._data:
-                raise QiskitFinanceError(
-                    "No data loaded, yet. Please run the method run() first to load the data."
-                )
-        except AttributeError as ex:
-            raise QiskitFinanceError(
-                "No data loaded, yet. Please run the method run() first to load the data."
-            ) from ex
+        self._check_data_loaded()
+
         self.mean = cast(np.ndarray, np.mean(self._data, axis=1))
         return self.mean
 
@@ -93,22 +111,21 @@ def _divide(val_1, val_2):
     # it does not have to be overridden in non-abstract derived classes.
     def get_period_return_mean_vector(self) -> np.ndarray:
         """
-        Returns a vector containing the mean value of each asset.
+        Calculates the mean period return vector for each asset.
+
+        Returns the mean period return value for each asset based on the loaded data.
+        Period return is calculated as the ratio of the current period's value to
+        the previous period's value minus one.
 
         Returns:
-            a per-asset mean vector.
+            np.ndarray: A vector containing the mean period return value of each asset.
+
         Raises:
-            QiskitFinanceError: no data loaded
+            QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
+                first to load the data.
         """
-        try:
-            if not self._data:
-                raise QiskitFinanceError(
-                    "No data loaded, yet. Please run the method run() first to load the data."
-                )
-        except AttributeError as ex:
-            raise QiskitFinanceError(
-                "No data loaded, yet. Please run the method run() first to load the data."
-            ) from ex
+        self._check_data_loaded()
+
         _div_func = np.vectorize(BaseDataProvider._divide)
         period_returns = _div_func(np.array(self._data)[:, 1:], np.array(self._data)[:, :-1]) - 1
         self.period_return_mean = cast(np.ndarray, np.mean(period_returns, axis=1))
@@ -117,44 +134,45 @@ def get_period_return_mean_vector(self) -> np.ndarray:
     # it does not have to be overridden in non-abstract derived classes.
     def get_covariance_matrix(self) -> np.ndarray:
         """
-        Returns the covariance matrix.
+        Calculates the covariance matrix of asset returns.
+
+        Returns the covariance matrix of asset returns based on the loaded data.
+        Each row in the data is assumed to represent a time-series observation for an asset.
+        Covariance measures the relationship between two assets, indicating how they move in relation
+        to each other.
 
         Returns:
-            an asset-to-asset covariance matrix.
+            np.ndarray: An asset-to-asset covariance matrix.
+
         Raises:
-            QiskitFinanceError: no data loaded
+            QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
+                first to load the data.
         """
-        try:
-            if not self._data:
-                raise QiskitFinanceError(
-                    "No data loaded, yet. Please run the method run() first to load the data."
-                )
-        except AttributeError as ex:
-            raise QiskitFinanceError(
-                "No data loaded, yet. Please run the method run() first to load the data."
-            ) from ex
+        self._check_data_loaded()
+
         self.cov = np.cov(self._data, rowvar=True)
         return self.cov
 
     # it does not have to be overridden in non-abstract derived classes.
     def get_period_return_covariance_matrix(self) -> np.ndarray:
         """
-        Returns a vector containing the mean value of each asset.
+        Calculates the covariance matrix of period returns for each asset.
+
+        Returns the covariance matrix of period returns for each asset based
+        on the loaded data. Period return is calculated as the ratio of the
+        current period's value to the previous period's value minus one.
+        Covariance measures the relationship between two assets' period
+        returns, indicating how they move in relation to each other.
 
         Returns:
-            a per-asset mean vector.
+            np.ndarray: A covariance matrix between period returns of assets.
+
         Raises:
-            QiskitFinanceError: no data loaded
+            QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
+                first to load the data.
         """
-        try:
-            if not self._data:
-                raise QiskitFinanceError(
-                    "No data loaded, yet. Please run the method run() first to load the data."
-                )
-        except AttributeError as ex:
-            raise QiskitFinanceError(
-                "No data loaded, yet. Please run the method run() first to load the data."
-            ) from ex
+        self._check_data_loaded()
+
         _div_func = np.vectorize(BaseDataProvider._divide)
         period_returns = _div_func(np.array(self._data)[:, 1:], np.array(self._data)[:, :-1]) - 1
         self.period_return_cov = np.cov(period_returns)
@@ -163,22 +181,26 @@ def get_period_return_covariance_matrix(self) -> np.ndarray:
     # it does not have to be overridden in non-abstract derived classes.
     def get_similarity_matrix(self) -> np.ndarray:
         """
-        Returns time-series similarity matrix computed using dynamic time warping.
+        Calculates the similarity matrix based on time-series using dynamic
+        time warping.
+
+        Returns the similarity matrix based on time-series using the
+        approximate Dynamic Time Warping (DTW) algorithm that provides
+        optimal or near-optimal alignments with an :math:`O(N)` time and memory
+        complexity. DTW is a technique to measure the
+        similarity between two sequences that may vary in time or speed.
+        The resulting similarity matrix indicates the similarity between
+        different assets' time-series data.
 
         Returns:
-            an asset-to-asset similarity matrix.
+            np.ndarray: An asset-to-asset similarity matrix.
+
         Raises:
-            QiskitFinanceError: no data loaded
+            QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
+                first to load the data.
         """
-        try:
-            if not self._data:
-                raise QiskitFinanceError(
-                    "No data loaded, yet. Please run the method run() first to load the data."
-                )
-        except AttributeError as ex:
-            raise QiskitFinanceError(
-                "No data loaded, yet. Please run the method run() first to load the data."
-            ) from ex
+        self._check_data_loaded()
+
         self.rho = np.zeros((self._n, self._n))
         for i_i in range(0, self._n):
             self.rho[i_i, i_i] = 1.0
@@ -191,9 +213,21 @@ def get_similarity_matrix(self) -> np.ndarray:
 
     # gets coordinates suitable for plotting
     # it does not have to be overridden in non-abstract derived classes.
-    def get_coordinates(self) -> Tuple[np.ndarray, np.ndarray]:
-        """Returns random coordinates for visualisation purposes."""
-        # Coordinates for visualisation purposes
+    def get_coordinates(self) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Generates random coordinates for visualisation purposes.
+
+        Returns random coordinates for visualization purposes. These coordinates
+        can be used to plot assets in a two-dimensional space, facilitating visualization
+        of relationships between assets.
+
+        Returns:
+            tuple[np.ndarray, np.ndarray]: :math:`x` and :math:`y` coordinates of each asset.
+
+        Note:
+            The generated coordinates are random and may not reflect any meaningful relationship
+            between assets.
+        """
         x_c = np.zeros([self._n, 1])
         y_c = np.zeros([self._n, 1])
         x_c = (algorithm_globals.random.random(self._n) - 0.5) * 1

@@ -1,6 +1,6 @@
 # This code is part of a Qiskit project.
 #
-# (C) Copyright IBM 2019, 2023.
+# (C) Copyright IBM 2019, 2024.
 #
 # This code is licensed under the Apache License, Version 2.0. You may
 # obtain a copy of this license in the LICENSE.txt file in the root directory
@@ -11,11 +11,9 @@
 # that they have been altered from the originals.
 
 """ Pseudo-randomly generated mock stock-market data provider """
-
-from typing import Optional, Union, List
+from __future__ import annotations
 import datetime
 import logging
-import pandas as pd
 import numpy as np
 
 from ._base_data_provider import BaseDataProvider
@@ -28,50 +26,57 @@ class RandomDataProvider(BaseDataProvider):
 
     def __init__(
         self,
-        tickers: Optional[Union[str, List[str]]] = None,
+        tickers: list[str] | None = None,
         start: datetime.datetime = datetime.datetime(2016, 1, 1),
         end: datetime.datetime = datetime.datetime(2016, 1, 30),
-        seed: Optional[int] = None,
+        seed: int | None = None,
     ) -> None:
         """
+        Initialize RandomDataProvider.
+
         Args:
-            tickers: tickers
-            start: first data point
-            end: last data point precedes this date
-            seed: optional random seed
+            tickers (list[str] | None): Tickers for the data provider.
+                Default is None, using ["TICKER1", "TICKER2"] if not provided.
+            start (datetime.datetime): Start date of the data.
+                Defaults to January 1st, 2016.
+            end (datetime.datetime): End date of the data.
+                Defaults to January 30th, 2016.
+            seed (int | None): Random seed for reproducibility.
         """
         super().__init__()
         tickers = tickers if tickers is not None else ["TICKER1", "TICKER2"]
+
         if isinstance(tickers, list):
             self._tickers = tickers
         else:
             self._tickers = tickers.replace("\n", ";").split(";")
-        self._n = len(self._tickers)
 
+        self._n = len(self._tickers)
         self._start = start
         self._end = end
         self._seed = seed
 
     def run(self) -> None:
         """
-        Generates data pseudo-randomly, thus enabling get_similarity_matrix
-        and get_covariance_matrix methods in the base class.
-        """
+        Generate pseudo-random stock market data.
 
+        Generates pseudo-random stock market data using normal distribution
+        and truncates values to zero after the first occurrence of zero.
+        """
         length = (self._end - self._start).days
         generator = np.random.default_rng(self._seed)
         self._data = []
+
         for _ in self._tickers:
-            d_f = pd.DataFrame(generator.standard_normal(length)).cumsum() + generator.integers(
-                1, 101
-            )
-            trimmed = np.maximum(d_f[0].values, np.zeros(len(d_f[0].values)))
-            trimmed_list = trimmed.tolist()
-            # find index of first 0 element
-            zero_idx = next((idx for idx, val in enumerate(trimmed_list) if val == 0), -1)
-            if zero_idx >= 0:
-                # set to 0 all values after first 0
-                trimmed_list = [
-                    val if idx < zero_idx else 0 for idx, val in enumerate(trimmed_list)
-                ]
-            self._data.append(trimmed_list)
+            random_numbers = generator.standard_normal(length)
+            cumsum = np.cumsum(random_numbers)
+            d_f = cumsum + generator.integers(1, 101)
+            trimmed = np.maximum(d_f, np.zeros(length))
+
+            # Set all values after the first 0 to 0
+            for idx, val in enumerate(trimmed):
+                if val == 0:
+                    trimmed[idx + 1 :] = 0
+                    break
+
+            self._data.append(trimmed.tolist())