Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplifies code in Data Providers and enhances API reference #335

Merged
merged 25 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
95eef34
Simplify pandas usage, update type hinting and improve API docs
edoaltamura Feb 24, 2024
78b8786
Trigger workflow
edoaltamura Feb 24, 2024
330517a
Add more documentation
edoaltamura Feb 24, 2024
c235ee9
Whitelist datetime in pylint
edoaltamura Feb 24, 2024
7c92647
Whitelist `datetime` in pylint
edoaltamura Feb 24, 2024
891b47d
Whitelist `datetime` in pylint
edoaltamura Feb 24, 2024
f5594ff
Whitelist `DTW` in pylint (wasn't flagged locally)
edoaltamura Feb 25, 2024
56e14b8
Merge branch 'qiskit-community:main' into main
edoaltamura Feb 29, 2024
40a00fa
Merge branch 'qiskit-community:main' into main
edoaltamura May 24, 2024
e1ca6ef
Add `from __future__ import annotations`
edoaltamura May 24, 2024
d080169
Remove Return None statements
edoaltamura May 24, 2024
daa0985
Remove ticker str type
edoaltamura May 28, 2024
e65c76f
Remove ticker str type
edoaltamura Jun 7, 2024
5718ef2
Remove ticker str type
edoaltamura Jun 7, 2024
d7530e1
Enhance documentation for str | list[str] | None and restore str type
edoaltamura Jun 10, 2024
ef53f9e
Resolve Incompatible types in assignment in linter
edoaltamura Jun 10, 2024
4ad61fe
Add to dictionary
edoaltamura Jun 10, 2024
7c92d97
Replace f-string with lazy formatting
edoaltamura Jun 10, 2024
25cd223
Remove redundant strings
edoaltamura Jun 10, 2024
f71d98f
Fix html compilation
edoaltamura Jun 10, 2024
4e5ce49
Whitelist Euronext in lint
edoaltamura Jun 11, 2024
dfd8f8a
Whitelist singapore and paris in lint
edoaltamura Jun 11, 2024
1837fe3
Correct the docstrings and fix scattered typos
edoaltamura Jun 12, 2024
0ca7b4a
Correct method links in API docs
edoaltamura Jun 12, 2024
ed6d09d
Spell check
edoaltamura Jun 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pylintdict
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ covariances
currentmodule
cvar
dataondemand
datetime
dicts
discretization
discretize
discretized
DTW
egger
eigendecomposition
eigensolver
Expand Down
190 changes: 112 additions & 78 deletions qiskit_finance/data_providers/_base_data_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
# that they have been altered from the originals.

"""This module implements the abstract base class for data_provider modules the finance module."""

from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Tuple, Optional, List, cast
from typing import cast
import logging
from enum import Enum

Expand All @@ -27,11 +27,11 @@


class StockMarket(Enum):
"""Stock Market enum"""
"""Enum representing various stock markets."""

LONDON = "XLON"
EURONEXT = "XPAR"
SINGAPORE = "XSES"
LONDON: str = "XLON"
EURONEXT: str = "XPAR"
SINGAPORE: str = "XSES"


class BaseDataProvider(ABC):
Expand All @@ -42,42 +42,60 @@ class BaseDataProvider(ABC):

To use the subclasses, please see
https://qiskit-community.github.io/qiskit-finance/tutorials/11_time_series.html

"""

@abstractmethod
def __init__(self) -> None:
self._data: Optional[List] = None
self._n = 0 # pylint: disable=invalid-name
self.period_return_mean: Optional[np.ndarray] = None
self.cov: Optional[np.ndarray] = None
self.period_return_cov: Optional[np.ndarray] = None
self.rho: Optional[np.ndarray] = None
self.mean: Optional[np.ndarray] = None
self._data: list | None = None
self._n: int = 0 # pylint: disable=invalid-name
self.period_return_mean: np.ndarray | None = None
self.cov: np.ndarray | None = None
self.period_return_cov: np.ndarray | None = None
self.rho: np.ndarray | None = None
self.mean: np.ndarray | None = None

@abstractmethod
def run(self) -> None:
"""Loads data."""
"""
Abstract method to load data.

Method responsible for loading data. Subclasses of `BaseDataProvider`
must implement this method to load data from a specific data source.

Raises:
NotImplementedError: This method must be implemented in a subclass.
edoaltamura marked this conversation as resolved.
Show resolved Hide resolved
"""
pass

def _check_data_loaded(self) -> None:
"""
Checks if data is loaded.

Raises:
QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
first to load the data.
"""
if not hasattr(self, "_data") or not self._data:
raise QiskitFinanceError(
"No data loaded yet. Please run the method :code:`run()` first to load the data."
)

# it does not have to be overridden in non-abstract derived classes.
def get_mean_vector(self) -> np.ndarray:
"""Returns a vector containing the mean value of each asset.
"""Returns the mean value vector of each asset.

Calculates the mean value for each asset based on the loaded data,
assuming each row represents a time-series observation for an asset.

Returns:
a per-asset mean vector.
np.ndarray: A vector containing the mean value of each asset.

Raises:
QiskitFinanceError: no data loaded
QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
first to load the data.
"""
try:
if not self._data:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
)
except AttributeError as ex:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
) from ex
self._check_data_loaded()

self.mean = cast(np.ndarray, np.mean(self._data, axis=1))
return self.mean

Expand All @@ -93,22 +111,21 @@ def _divide(val_1, val_2):
# it does not have to be overridden in non-abstract derived classes.
def get_period_return_mean_vector(self) -> np.ndarray:
"""
Returns a vector containing the mean value of each asset.
Calculates the mean period return vector for each asset.

Returns the mean period return value for each asset based on the loaded data.
Period return is calculated as the ratio of the current period's value to
the previous period's value minus one.

Returns:
a per-asset mean vector.
np.ndarray: A vector containing the mean period return value of each asset.

Raises:
QiskitFinanceError: no data loaded
QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
first to load the data.
"""
try:
if not self._data:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
)
except AttributeError as ex:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
) from ex
self._check_data_loaded()

_div_func = np.vectorize(BaseDataProvider._divide)
period_returns = _div_func(np.array(self._data)[:, 1:], np.array(self._data)[:, :-1]) - 1
self.period_return_mean = cast(np.ndarray, np.mean(period_returns, axis=1))
Expand All @@ -117,44 +134,45 @@ def get_period_return_mean_vector(self) -> np.ndarray:
# it does not have to be overridden in non-abstract derived classes.
def get_covariance_matrix(self) -> np.ndarray:
"""
Returns the covariance matrix.
Calculates the covariance matrix of asset returns.

Returns the covariance matrix of asset returns based on the loaded data.
Each row in the data is assumed to represent a time-series observation for an asset.
Covariance measures the relationship between two assets, indicating how they move in relation
to each other.

Returns:
an asset-to-asset covariance matrix.
np.ndarray: An asset-to-asset covariance matrix.

Raises:
QiskitFinanceError: no data loaded
QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
first to load the data.
"""
try:
if not self._data:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
)
except AttributeError as ex:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
) from ex
self._check_data_loaded()

self.cov = np.cov(self._data, rowvar=True)
return self.cov

# it does not have to be overridden in non-abstract derived classes.
def get_period_return_covariance_matrix(self) -> np.ndarray:
"""
Returns a vector containing the mean value of each asset.
Calculates the covariance matrix of period returns for each asset.

Returns the covariance matrix of period returns for each asset based
on the loaded data. Period return is calculated as the ratio of the
current period's value to the previous period's value minus one.
Covariance measures the relationship between two assets' period
returns, indicating how they move in relation to each other.

Returns:
a per-asset mean vector.
np.ndarray: A covariance matrix between period returns of assets.

Raises:
QiskitFinanceError: no data loaded
QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
first to load the data.
"""
try:
if not self._data:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
)
except AttributeError as ex:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
) from ex
self._check_data_loaded()

_div_func = np.vectorize(BaseDataProvider._divide)
period_returns = _div_func(np.array(self._data)[:, 1:], np.array(self._data)[:, :-1]) - 1
self.period_return_cov = np.cov(period_returns)
Expand All @@ -163,22 +181,26 @@ def get_period_return_covariance_matrix(self) -> np.ndarray:
# it does not have to be overridden in non-abstract derived classes.
def get_similarity_matrix(self) -> np.ndarray:
"""
Returns time-series similarity matrix computed using dynamic time warping.
Calculates the similarity matrix based on time-series using dynamic
time warping.

Returns the similarity matrix based on time-series using the
approximate Dynamic Time Warping (DTW) algorithm that provides
optimal or near-optimal alignments with an :math:`O(N)` time and memory
complexity. DTW is a technique to measure the
similarity between two sequences that may vary in time or speed.
The resulting similarity matrix indicates the similarity between
different assets' time-series data.

Returns:
an asset-to-asset similarity matrix.
np.ndarray: An asset-to-asset similarity matrix.

Raises:
QiskitFinanceError: no data loaded
QiskitFinanceError: If no data is loaded. Please run the method :code:`run()`
first to load the data.
"""
try:
if not self._data:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
)
except AttributeError as ex:
raise QiskitFinanceError(
"No data loaded, yet. Please run the method run() first to load the data."
) from ex
self._check_data_loaded()

self.rho = np.zeros((self._n, self._n))
for i_i in range(0, self._n):
self.rho[i_i, i_i] = 1.0
Expand All @@ -191,9 +213,21 @@ def get_similarity_matrix(self) -> np.ndarray:

# gets coordinates suitable for plotting
# it does not have to be overridden in non-abstract derived classes.
def get_coordinates(self) -> Tuple[np.ndarray, np.ndarray]:
"""Returns random coordinates for visualisation purposes."""
# Coordinates for visualisation purposes
def get_coordinates(self) -> tuple[np.ndarray, np.ndarray]:
"""
Generates random coordinates for visualisation purposes.

Returns random coordinates for visualization purposes. These coordinates
can be used to plot assets in a two-dimensional space, facilitating visualization
of relationships between assets.

Returns:
tuple[np.ndarray, np.ndarray]: :math:`x` and :math:`y` coordinates of each asset.

Note:
The generated coordinates are random and may not reflect any meaningful relationship
between assets.
"""
x_c = np.zeros([self._n, 1])
y_c = np.zeros([self._n, 1])
x_c = (algorithm_globals.random.random(self._n) - 0.5) * 1
Expand Down
59 changes: 32 additions & 27 deletions qiskit_finance/data_providers/random_data_provider.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This code is part of a Qiskit project.
#
# (C) Copyright IBM 2019, 2023.
# (C) Copyright IBM 2019, 2024.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
Expand All @@ -11,11 +11,9 @@
# that they have been altered from the originals.

""" Pseudo-randomly generated mock stock-market data provider """

from typing import Optional, Union, List
from __future__ import annotations
import datetime
import logging
import pandas as pd
import numpy as np

from ._base_data_provider import BaseDataProvider
Expand All @@ -28,50 +26,57 @@ class RandomDataProvider(BaseDataProvider):

def __init__(
self,
tickers: Optional[Union[str, List[str]]] = None,
tickers: list[str] | None = None,
start: datetime.datetime = datetime.datetime(2016, 1, 1),
end: datetime.datetime = datetime.datetime(2016, 1, 30),
seed: Optional[int] = None,
seed: int | None = None,
) -> None:
"""
Initialize RandomDataProvider.

Args:
tickers: tickers
start: first data point
end: last data point precedes this date
seed: optional random seed
tickers (list[str] | None): Tickers for the data provider.
edoaltamura marked this conversation as resolved.
Show resolved Hide resolved
Default is None, using ["TICKER1", "TICKER2"] if not provided.
start (datetime.datetime): Start date of the data.
Defaults to January 1st, 2016.
end (datetime.datetime): End date of the data.
Defaults to January 30th, 2016.
seed (int | None): Random seed for reproducibility.
"""
super().__init__()
tickers = tickers if tickers is not None else ["TICKER1", "TICKER2"]

if isinstance(tickers, list):
self._tickers = tickers
else:
self._tickers = tickers.replace("\n", ";").split(";")
self._n = len(self._tickers)

self._n = len(self._tickers)
self._start = start
self._end = end
self._seed = seed

def run(self) -> None:
"""
Generates data pseudo-randomly, thus enabling get_similarity_matrix
and get_covariance_matrix methods in the base class.
"""
Generate pseudo-random stock market data.

Generates pseudo-random stock market data using normal distribution
and truncates values to zero after the first occurrence of zero.
"""
length = (self._end - self._start).days
generator = np.random.default_rng(self._seed)
self._data = []

for _ in self._tickers:
d_f = pd.DataFrame(generator.standard_normal(length)).cumsum() + generator.integers(
1, 101
)
trimmed = np.maximum(d_f[0].values, np.zeros(len(d_f[0].values)))
trimmed_list = trimmed.tolist()
# find index of first 0 element
zero_idx = next((idx for idx, val in enumerate(trimmed_list) if val == 0), -1)
if zero_idx >= 0:
# set to 0 all values after first 0
trimmed_list = [
val if idx < zero_idx else 0 for idx, val in enumerate(trimmed_list)
]
self._data.append(trimmed_list)
random_numbers = generator.standard_normal(length)
cumsum = np.cumsum(random_numbers)
d_f = cumsum + generator.integers(1, 101)
trimmed = np.maximum(d_f, np.zeros(length))

# Set all values after the first 0 to 0
for idx, val in enumerate(trimmed):
if val == 0:
trimmed[idx + 1 :] = 0
break

self._data.append(trimmed.tolist())
Loading
Loading