From 37b904da58fa848a5511a71fefda177f52c1fbc7 Mon Sep 17 00:00:00 2001 From: Edoardo Altamura <38359901+edoaltamura@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:35:30 +0200 Subject: [PATCH] Simplifies code in Data Providers and enhances API reference (#335) * Simplify pandas usage, update type hinting and improve API docs * Trigger workflow * Add more documentation * Whitelist datetime in pylint * Whitelist `datetime` in pylint * Whitelist `datetime` in pylint * Whitelist `DTW` in pylint (wasn't flagged locally) * Add `from __future__ import annotations` * Remove Return None statements * Remove ticker str type * Remove ticker str type * Remove ticker str type * Enhance documentation for str | list[str] | None and restore str type * Resolve Incompatible types in assignment in linter * Add to dictionary * Replace f-string with lazy formatting * Remove redundant strings * Fix html compilation * Whitelist Euronext in lint * Whitelist singapore and paris in lint * Correct the docstrings and fix scattered typos * Correct method links in API docs * Spell check --- .pylintdict | 7 + .../data_providers/_base_data_provider.py | 207 +++++++++++------- .../data_providers/data_on_demand_provider.py | 70 +++--- .../data_providers/exchange_data_provider.py | 58 +++-- .../data_providers/random_data_provider.py | 73 +++--- .../data_providers/wikipedia_data_provider.py | 60 +++-- .../data_providers/yahoo_data_provider.py | 61 ++++-- 7 files changed, 331 insertions(+), 205 deletions(-) diff --git a/.pylintdict b/.pylintdict index e7ba887c..8612cde3 100644 --- a/.pylintdict +++ b/.pylintdict @@ -29,15 +29,18 @@ covariances currentmodule cvar dataondemand +datetime dicts discretization discretize discretized +DTW egger eigendecomposition eigensolver enum et +euronext european farhi formatter @@ -59,6 +62,7 @@ integrality ints ising isometry +january kwargs latin linesep @@ -87,6 +91,7 @@ ok os param parametrization +paris paulis pca pdf @@ -116,6 +121,7 @@ selectable scholes scipy silvio +singapore stamatopoulos statevector stdout @@ -142,6 +148,7 @@ vec visualisation wikipedia woerner +www yamamoto yfinance york diff --git a/qiskit_finance/data_providers/_base_data_provider.py b/qiskit_finance/data_providers/_base_data_provider.py index c965abfd..9707728e 100644 --- a/qiskit_finance/data_providers/_base_data_provider.py +++ b/qiskit_finance/data_providers/_base_data_provider.py @@ -10,10 +10,16 @@ # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. -"""This module implements the abstract base class for data_provider modules the finance module.""" +"""This module implements the abstract base class for data provider modules in the finance module. +The module defines the :code:`BaseDataProvider` abstract class which should be inherited by any data +provider class within the finance module. It also includes the :code:`StockMarket` :code:`Enum` +representing supported stock markets. +""" + +from __future__ import annotations from abc import ABC, abstractmethod -from typing import Tuple, Optional, List, cast +from typing import cast import logging from enum import Enum @@ -27,57 +33,82 @@ class StockMarket(Enum): - """Stock Market enum""" + """:code:`Enum` representing various stock markets. + + This :code:`Enum` contains identifiers for the following stock markets, + represented by their respective codes: + + * :code:`"XLON"`: The London Stock Exchange. + * :code:`"XPAR"`: The Euronext Paris. + * :code:`"XSES"`: The Singapore Exchange. - LONDON = "XLON" - EURONEXT = "XPAR" - SINGAPORE = "XSES" + """ + + LONDON: str = "XLON" + EURONEXT: str = "XPAR" + SINGAPORE: str = "XSES" class BaseDataProvider(ABC): - """The abstract base class for data_provider modules within Qiskit Finance module. + """The abstract base class for :code:`data_provider` modules within Qiskit Finance. - To create add-on data_provider module subclass the BaseDataProvider class in this module. + Creates :code:`data_provider` module subclasses based on the :code:`BaseDataProvider` + abstract class in this module. Doing so requires that the required driver interface is implemented. To use the subclasses, please see https://qiskit-community.github.io/qiskit-finance/tutorials/11_time_series.html - """ @abstractmethod def __init__(self) -> None: - self._data: Optional[List] = None - self._n = 0 # pylint: disable=invalid-name - self.period_return_mean: Optional[np.ndarray] = None - self.cov: Optional[np.ndarray] = None - self.period_return_cov: Optional[np.ndarray] = None - self.rho: Optional[np.ndarray] = None - self.mean: Optional[np.ndarray] = None + self._data: list | None = None + self._n: int = 0 # pylint: disable=invalid-name + self.period_return_mean: np.ndarray | None = None + self.cov: np.ndarray | None = None + self.period_return_cov: np.ndarray | None = None + self.rho: np.ndarray | None = None + self.mean: np.ndarray | None = None @abstractmethod def run(self) -> None: - """Loads data.""" + """ + Abstract method to load data. + + Method responsible for loading data. Subclasses of :code:`BaseDataProvider` + must implement this method to load data from a specific data source. + """ pass + def _check_data_loaded(self) -> None: + """ + Checks if data is loaded. + + Raises: + QiskitFinanceError: If no data is loaded. Please run the method :code:`run()` + first to load the data. + """ + if not hasattr(self, "_data") or not self._data: + raise QiskitFinanceError( + "No data loaded yet. Please run the method `run()` first to load the data." + ) + # it does not have to be overridden in non-abstract derived classes. def get_mean_vector(self) -> np.ndarray: - """Returns a vector containing the mean value of each asset. + """Returns the mean value vector of each asset. + + Calculates the mean value for each asset based on the loaded data, + assuming each row represents a time-series observation for an asset. Returns: - a per-asset mean vector. + np.ndarray: A vector containing the mean value of each asset. + Raises: - QiskitFinanceError: no data loaded + QiskitFinanceError: If no data is loaded. Please run the method :code:`run()` + first to load the data. """ - try: - if not self._data: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) - except AttributeError as ex: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) from ex + self._check_data_loaded() + self.mean = cast(np.ndarray, np.mean(self._data, axis=1)) return self.mean @@ -93,22 +124,21 @@ def _divide(val_1, val_2): # it does not have to be overridden in non-abstract derived classes. def get_period_return_mean_vector(self) -> np.ndarray: """ - Returns a vector containing the mean value of each asset. + Calculates the mean period return vector for each asset. + + Returns the mean period return value for each asset based on the loaded data. + Period return is calculated as the ratio of the current period's value to + the previous period's value minus one. Returns: - a per-asset mean vector. + np.ndarray: A vector containing the mean period return value of each asset. + Raises: - QiskitFinanceError: no data loaded + QiskitFinanceError: If no data is loaded. Please run the method :code:`run()` + first to load the data. """ - try: - if not self._data: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) - except AttributeError as ex: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) from ex + self._check_data_loaded() + _div_func = np.vectorize(BaseDataProvider._divide) period_returns = _div_func(np.array(self._data)[:, 1:], np.array(self._data)[:, :-1]) - 1 self.period_return_mean = cast(np.ndarray, np.mean(period_returns, axis=1)) @@ -117,44 +147,45 @@ def get_period_return_mean_vector(self) -> np.ndarray: # it does not have to be overridden in non-abstract derived classes. def get_covariance_matrix(self) -> np.ndarray: """ - Returns the covariance matrix. + Calculates the covariance matrix of asset returns. + + Returns the covariance matrix of asset returns based on the loaded data. + Each row in the data is assumed to represent a time-series observation for an asset. + Covariance measures the relationship between two assets, indicating how they move in relation + to each other. Returns: - an asset-to-asset covariance matrix. + np.ndarray: An asset-to-asset covariance matrix. + Raises: - QiskitFinanceError: no data loaded + QiskitFinanceError: If no data is loaded. Please run the method :code:`run()` + first to load the data. """ - try: - if not self._data: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) - except AttributeError as ex: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) from ex + self._check_data_loaded() + self.cov = np.cov(self._data, rowvar=True) return self.cov # it does not have to be overridden in non-abstract derived classes. def get_period_return_covariance_matrix(self) -> np.ndarray: """ - Returns a vector containing the mean value of each asset. + Calculates the covariance matrix of period returns for each asset. + + Returns the covariance matrix of period returns for each asset based + on the loaded data. Period return is calculated as the ratio of the + current period's value to the previous period's value minus one. + Covariance measures the relationship between two assets' period + returns, indicating how they move in relation to each other. Returns: - a per-asset mean vector. + np.ndarray: A covariance matrix between period returns of assets. + Raises: - QiskitFinanceError: no data loaded + QiskitFinanceError: If no data is loaded. Please run the method :meth:`run()` + first to load the data. """ - try: - if not self._data: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) - except AttributeError as ex: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) from ex + self._check_data_loaded() + _div_func = np.vectorize(BaseDataProvider._divide) period_returns = _div_func(np.array(self._data)[:, 1:], np.array(self._data)[:, :-1]) - 1 self.period_return_cov = np.cov(period_returns) @@ -163,22 +194,26 @@ def get_period_return_covariance_matrix(self) -> np.ndarray: # it does not have to be overridden in non-abstract derived classes. def get_similarity_matrix(self) -> np.ndarray: """ - Returns time-series similarity matrix computed using dynamic time warping. + Calculates the similarity matrix based on time-series using dynamic + time warping. + + Returns the similarity matrix based on time-series using the + approximate Dynamic Time Warping (DTW) algorithm that provides + optimal or near-optimal alignments with an :math:`O(N)` time and memory + complexity. DTW is a technique to measure the + similarity between two sequences that may vary in time or speed. + The resulting similarity matrix indicates the similarity between + different assets' time-series data. Returns: - an asset-to-asset similarity matrix. + np.ndarray: An asset-to-asset similarity matrix. + Raises: - QiskitFinanceError: no data loaded + QiskitFinanceError: If no data is loaded. Please run the method :meth:`run()` + first to load the data. """ - try: - if not self._data: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) - except AttributeError as ex: - raise QiskitFinanceError( - "No data loaded, yet. Please run the method run() first to load the data." - ) from ex + self._check_data_loaded() + self.rho = np.zeros((self._n, self._n)) for i_i in range(0, self._n): self.rho[i_i, i_i] = 1.0 @@ -191,9 +226,21 @@ def get_similarity_matrix(self) -> np.ndarray: # gets coordinates suitable for plotting # it does not have to be overridden in non-abstract derived classes. - def get_coordinates(self) -> Tuple[np.ndarray, np.ndarray]: - """Returns random coordinates for visualisation purposes.""" - # Coordinates for visualisation purposes + def get_coordinates(self) -> tuple[np.ndarray, np.ndarray]: + """ + Generates random coordinates for visualization purposes. + + Returns random coordinates for visualization purposes. These coordinates + can be used to plot assets in a two-dimensional space, facilitating visualization + of relationships between assets. + + Returns: + tuple[np.ndarray, np.ndarray]: :math:`x` and :math:`y` coordinates of each asset. + + Note: + The generated coordinates are random and may not reflect any meaningful relationship + between assets. + """ x_c = np.zeros([self._n, 1]) y_c = np.zeros([self._n, 1]) x_c = (algorithm_globals.random.random(self._n) - 0.5) * 1 diff --git a/qiskit_finance/data_providers/data_on_demand_provider.py b/qiskit_finance/data_providers/data_on_demand_provider.py index 4c5f5e9c..f0041b7a 100644 --- a/qiskit_finance/data_providers/data_on_demand_provider.py +++ b/qiskit_finance/data_providers/data_on_demand_provider.py @@ -10,9 +10,9 @@ # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. -""" NASDAQ Data on demand data provider. """ +"""NASDAQ Data on demand data provider.""" -from typing import Optional, Union, List +from __future__ import annotations import datetime from urllib.parse import urlencode import logging @@ -37,33 +37,40 @@ class DataOnDemandProvider(BaseDataProvider): def __init__( self, token: str, - tickers: Union[str, List[str]], + tickers: str | list[str] | None = None, start: datetime.datetime = datetime.datetime(2016, 1, 1), end: datetime.datetime = datetime.datetime(2016, 1, 30), - verify: Optional[Union[str, bool]] = None, + verify: str | bool | None = None, ) -> None: """ Args: - token: data on demand access token - tickers: tickers - start: first data point - end: last data point precedes this date - verify: if verify is None, certify certificates - will be used (default); - if this is False, no certificates will be checked; if this is a string, + token (str): Nasdaq Data Link access token. + tickers (str | list[str] | None): Tickers for the data provider. + + * If a string is provided, it can be a single ticker symbol or multiple symbols + separated by semicolons or newlines. + * If a list of strings is provided, each string should be a single ticker symbol. + + Default is :code:`None`, which corresponds to no tickers provided. + start (datetime.datetime): Start date of the data. + Defaults to January 1st, 2016. + end (datetime.datetime): End date of the data. + Defaults to January 30th, 2016. + verify (str | bool | None): If verify is `None`, runs the certificate verification (default); + if this is :code:`False`, no certificates will be checked; if this is a :code:`str`, it should be pointing - to a certificate for the HTTPS connection to NASDAQ (dataondemand.nasdaq.com), - either in the - form of a CA_BUNDLE file or a directory wherein to look. + to a certificate for the HTTPS connection to NASDAQ (www.dataondemand.nasdaq.com), + either in the form of a :code:`CA_BUNDLE` file or a directory wherein to look. """ super().__init__() - if isinstance(tickers, list): - self._tickers = tickers - else: - self._tickers = tickers.replace("\n", ";").split(";") - self._n = len(self._tickers) + if tickers is None: + tickers = [] + if isinstance(tickers, str): + tickers = tickers.replace("\n", ";").split(";") + self._tickers = tickers + self._n = len(tickers) self._token = token self._start = start self._end = end @@ -71,10 +78,9 @@ def __init__( def run(self) -> None: """ - Loads data, thus enabling get_similarity_matrix and get_covariance_matrix + Loads data, thus enabling :code:`get_similarity_matrix` and :code:`get_covariance_matrix` methods in the base class. """ - http = urllib3.PoolManager(cert_reqs="CERT_REQUIRED", ca_certs=certifi.where()) url = "https://dataondemand.nasdaq.com/api/v1/quotes?" self._data = [] @@ -88,23 +94,23 @@ def run(self) -> None: "end": self._end.strftime("%Y-%m-%d'T'%H:%M:%S.%f'Z'"), "next_cursor": 0, } - encoded = url + urlencode(values) + encoded_url = f"{url}{urlencode(values)}" if self._verify is None: - response = http.request( - "POST", encoded - ) # this runs certificate verification, as per the set-up of the urllib3 + # Runs certificate verification, as per the set-up of the urllib3 + response = http.request("POST", encoded_url) else: - # this disables certificate verification (False) + # Disables certificate verification (False) # or forces the certificate path (str) - response = http.request("POST", encoded, verify=self._verify) + response = http.request("POST", encoded_url, verify=self._verify) + if response.status != 200: - logger.debug(response.data.decode("utf-8")) + error_message = response.data.decode("utf-8") + logger.debug("Error fetching data for %s: %s", ticker, error_message) stocks_error.append(ticker) continue - quotes = json.loads(response.data.decode("utf-8"))["quotes"] - price_evolution = [] - for q in quotes: - price_evolution.append(q["ask_price"]) + + quotes = json.loads(response.data.decode("utf-8")).get("quotes", []) + price_evolution = [q["ask_price"] for q in quotes] self._data.append(price_evolution) finally: http.clear() diff --git a/qiskit_finance/data_providers/exchange_data_provider.py b/qiskit_finance/data_providers/exchange_data_provider.py index 2cd275fe..3263a202 100644 --- a/qiskit_finance/data_providers/exchange_data_provider.py +++ b/qiskit_finance/data_providers/exchange_data_provider.py @@ -12,7 +12,7 @@ """ Exchange data provider. """ -from typing import Union, List +from __future__ import annotations import logging import datetime import nasdaqdatalink @@ -22,6 +22,12 @@ logger = logging.getLogger(__name__) +VALID_STOCKMARKETS = [ + StockMarket.LONDON, + StockMarket.EURONEXT, + StockMarket.SINGAPORE, +] + class ExchangeDataProvider(BaseDataProvider): """Exchange data provider. @@ -34,38 +40,42 @@ class ExchangeDataProvider(BaseDataProvider): def __init__( self, token: str, - tickers: Union[str, List[str]], + tickers: str | list[str] | None = None, stockmarket: StockMarket = StockMarket.LONDON, start: datetime.datetime = datetime.datetime(2016, 1, 1), end: datetime.datetime = datetime.datetime(2016, 1, 30), ) -> None: """ Args: - token: Nasdaq Data Link access token - tickers: tickers - stockmarket: LONDON, EURONEXT, or SINGAPORE - start: first data point - end: last data point precedes this date + token (str): Nasdaq Data Link access token. + tickers (str | list[str] | None): Tickers for the data provider. + + * If a string is provided, it can be a single ticker symbol or multiple symbols + separated by semicolons or newlines. + * If a list of strings is provided, each string should be a single ticker symbol. + + Default is :code:`None`, which corresponds to no tickers provided. + stockmarket (StockMarket): LONDON (default), EURONEXT, or SINGAPORE + start (datetime.datetime): Start date of the data. + Defaults to January 1st, 2016. + end (datetime.datetime): End date of the data. + Defaults to January 30th, 2016. Raises: - QiskitFinanceError: provider doesn't support given stock market + QiskitFinanceError: provider doesn't support given stock market. """ super().__init__() - self._tickers = [] # type: Union[str, List[str]] - if isinstance(tickers, list): - self._tickers = tickers - else: - self._tickers = tickers.replace("\n", ";").split(";") - self._n = len(self._tickers) - - if stockmarket not in [ - StockMarket.LONDON, - StockMarket.EURONEXT, - StockMarket.SINGAPORE, - ]: - msg = "ExchangeDataProvider does not support " - msg += stockmarket.value - msg += " as a stock market." + + if tickers is None: + tickers = [] + if isinstance(tickers, str): + tickers = tickers.replace("\n", ";").split(";") + + self._tickers = tickers + self._n = len(tickers) + + if stockmarket not in VALID_STOCKMARKETS: + msg = f"ExchangeDataProvider does not support {stockmarket.value} as a stock market." raise QiskitFinanceError(msg) # This is to aid serialization; string is ok to serialize @@ -78,7 +88,7 @@ def __init__( def run(self) -> None: """ - Loads data, thus enabling get_similarity_matrix and get_covariance_matrix + Loads data, thus enabling :meth:`get_similarity_matrix` and :meth:`get_covariance_matrix` methods in the base class. """ nasdaqdatalink.ApiConfig.api_key = self._token diff --git a/qiskit_finance/data_providers/random_data_provider.py b/qiskit_finance/data_providers/random_data_provider.py index ebd641a7..f25576a1 100644 --- a/qiskit_finance/data_providers/random_data_provider.py +++ b/qiskit_finance/data_providers/random_data_provider.py @@ -1,6 +1,6 @@ # This code is part of a Qiskit project. # -# (C) Copyright IBM 2019, 2023. +# (C) Copyright IBM 2019, 2024. # # This code is licensed under the Apache License, Version 2.0. You may # obtain a copy of this license in the LICENSE.txt file in the root directory @@ -12,10 +12,9 @@ """ Pseudo-randomly generated mock stock-market data provider """ -from typing import Optional, Union, List +from __future__ import annotations import datetime import logging -import pandas as pd import numpy as np from ._base_data_provider import BaseDataProvider @@ -28,50 +27,62 @@ class RandomDataProvider(BaseDataProvider): def __init__( self, - tickers: Optional[Union[str, List[str]]] = None, + tickers: str | list[str] | None = None, start: datetime.datetime = datetime.datetime(2016, 1, 1), end: datetime.datetime = datetime.datetime(2016, 1, 30), - seed: Optional[int] = None, + seed: int | None = None, ) -> None: """ + Initialize an instance of pseudo-randomly generated mock stock-market data provider. + Args: - tickers: tickers - start: first data point - end: last data point precedes this date - seed: optional random seed + tickers (str | list[str] | None): Tickers for the data provider. + + * If a string is provided, it can be a single ticker symbol or multiple symbols + separated by semicolons or newlines. + * If a list of strings is provided, each string should be a single ticker symbol. + + Default is :code:`None`, using :code:`["TICKER1", "TICKER2"]` if not provided. + start (datetime.datetime): Start date of the data. + Defaults to January 1st, 2016. + end (datetime.datetime): End date of the data. + Defaults to January 30th, 2016. + seed (int | None): Random seed for reproducibility. """ super().__init__() - tickers = tickers if tickers is not None else ["TICKER1", "TICKER2"] - if isinstance(tickers, list): - self._tickers = tickers - else: - self._tickers = tickers.replace("\n", ";").split(";") - self._n = len(self._tickers) + if tickers is None: + tickers = ["TICKER1", "TICKER2"] + if isinstance(tickers, str): + tickers = tickers.replace("\n", ";").split(";") + + self._tickers = tickers + self._n = len(tickers) self._start = start self._end = end self._seed = seed def run(self) -> None: """ - Generates data pseudo-randomly, thus enabling get_similarity_matrix - and get_covariance_matrix methods in the base class. - """ + Generate pseudo-random stock market data. + Generates pseudo-random stock market data using normal distribution + and truncates values to zero after the first occurrence of zero. + """ length = (self._end - self._start).days generator = np.random.default_rng(self._seed) self._data = [] + for _ in self._tickers: - d_f = pd.DataFrame(generator.standard_normal(length)).cumsum() + generator.integers( - 1, 101 - ) - trimmed = np.maximum(d_f[0].values, np.zeros(len(d_f[0].values))) - trimmed_list = trimmed.tolist() - # find index of first 0 element - zero_idx = next((idx for idx, val in enumerate(trimmed_list) if val == 0), -1) - if zero_idx >= 0: - # set to 0 all values after first 0 - trimmed_list = [ - val if idx < zero_idx else 0 for idx, val in enumerate(trimmed_list) - ] - self._data.append(trimmed_list) + random_numbers = generator.standard_normal(length) + cumsum = np.cumsum(random_numbers) + d_f = cumsum + generator.integers(1, 101) + trimmed = np.maximum(d_f, np.zeros(length)) + + # Set all values after the first 0 to 0 + for idx, val in enumerate(trimmed): + if val == 0: + trimmed[idx + 1 :] = 0 + break + + self._data.append(trimmed.tolist()) diff --git a/qiskit_finance/data_providers/wikipedia_data_provider.py b/qiskit_finance/data_providers/wikipedia_data_provider.py index 0844ecb5..7c57c605 100644 --- a/qiskit_finance/data_providers/wikipedia_data_provider.py +++ b/qiskit_finance/data_providers/wikipedia_data_provider.py @@ -10,9 +10,9 @@ # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. -""" Wikipedia data provider. """ +"""Wikipedia data provider.""" -from typing import Optional, Union, List +from __future__ import annotations import logging import datetime import nasdaqdatalink @@ -26,44 +26,64 @@ class WikipediaDataProvider(BaseDataProvider): """Wikipedia data provider. - Please see: + This data provider retrieves stock market data from the Wikipedia dataset + using Nasdaq Data Link API. For more details on usage, please refer to the + official documentation: https://qiskit-community.github.io/qiskit-finance/tutorials/11_time_series.html - for instructions on use. """ def __init__( self, - token: Optional[str] = None, - tickers: Optional[Union[str, List[str]]] = None, + token: str | None = None, + tickers: str | list[str] | None = None, start: datetime.datetime = datetime.datetime(2016, 1, 1), end: datetime.datetime = datetime.datetime(2016, 1, 30), ) -> None: """ + Initialize the Wikipedia Data Provider. + Args: - token: Nasdaq Data Link access token, which is not needed, strictly speaking - tickers: tickers - start: start time - end: end time + token (str | None): Nasdaq Data Link access token. + Default is None. + tickers (str | list[str] | None): Tickers for the data provider. + + * If a string is provided, it can be a single ticker symbol or multiple symbols + separated by semicolons or newlines. + * If a list of strings is provided, each string should be a single ticker symbol. + + Default is :code:`None`, which corresponds to no tickers provided. + start (datetime.datetime): Start date of the data. + Default is January 1st, 2016. + end (datetime.datetime): End date of the data. + Default is January 30th, 2016. """ super().__init__() - self._tickers = None # type: Optional[Union[str, List[str]]] - tickers = tickers if tickers is not None else [] - if isinstance(tickers, list): - self._tickers = tickers - else: - self._tickers = tickers.replace("\n", ";").split(";") - self._n = len(self._tickers) - self._token = token + if tickers is None: + tickers = [] + if isinstance(tickers, str): + tickers = tickers.replace("\n", ";").split(";") + self._tickers = tickers + self._n = len(tickers) + self._token = token self._start = start.strftime("%Y-%m-%d") self._end = end.strftime("%Y-%m-%d") self._data = [] def run(self) -> None: """ - Loads data, thus enabling get_similarity_matrix and - get_covariance_matrix methods in the base class. + Loads data from Wikipedia using Nasdaq Data Link API. + Retrieves stock market data from the Wikipedia dataset + using Nasdaq Data Link API, and populates the data attribute of the + base class, enabling further calculations like similarity and covariance + matrices. + + Raises: + QiskitFinanceError: If there is an invalid Nasdaq Data Link token, + if the Nasdaq Data Link limit is exceeded, if data is not found + for the specified tickers, or if there is an error accessing + Nasdaq Data Link. """ nasdaqdatalink.ApiConfig.api_key = self._token self._data = [] diff --git a/qiskit_finance/data_providers/yahoo_data_provider.py b/qiskit_finance/data_providers/yahoo_data_provider.py index 4d24f4b3..d690434c 100644 --- a/qiskit_finance/data_providers/yahoo_data_provider.py +++ b/qiskit_finance/data_providers/yahoo_data_provider.py @@ -12,7 +12,7 @@ """ Yahoo data provider. """ -from typing import Optional, Union, List +from __future__ import annotations import datetime import logging import tempfile @@ -35,47 +35,65 @@ class YahooDataProvider(BaseDataProvider): """Yahoo data provider. - Please see: + This data provider retrieves stock market data from Yahoo Finance using the yfinance library. + For more details on usage, please refer to the official documentation: https://qiskit-community.github.io/qiskit-finance/tutorials/11_time_series.html - for instructions on use. """ def __init__( self, - tickers: Optional[Union[str, List[str]]] = None, + tickers: str | list[str] | None = None, start: datetime.datetime = datetime.datetime(2016, 1, 1), end: datetime.datetime = datetime.datetime(2016, 1, 30), ) -> None: """ + Initialize the Yahoo Data Provider. + Args: - tickers: tickers - start: start time - end: end time + tickers (str | list[str] | None): Tickers for the data provider. + + * If a string is provided, it can be a single ticker symbol or multiple symbols + separated by semicolons or newlines. + * If a list of strings is provided, each string should be a single ticker symbol. + + Default is :code:`None`, which corresponds to no tickers provided. + start (datetime.datetime): Start date of the data. + Default is January 1st, 2016. + end (datetime.datetime): End date of the data. + Default is January 30th, 2016. """ super().__init__() - self._tickers = [] - tickers = tickers if tickers is not None else [] - if isinstance(tickers, list): - self._tickers = tickers - else: - self._tickers = tickers.replace("\n", ";").split(";") - self._n = len(self._tickers) + if tickers is None: + tickers = [] + if isinstance(tickers, str): + tickers = tickers.replace("\n", ";").split(";") + + self._tickers = tickers + self._n = len(tickers) self._start = start.strftime("%Y-%m-%d") self._end = end.strftime("%Y-%m-%d") self._data = [] def run(self) -> None: """ - Loads data, thus enabling get_similarity_matrix and - get_covariance_matrix methods in the base class. + Loads data from Yahoo Finance. + + This method retrieves stock market data from Yahoo Finance using the :code:`yfinance` library, + and populates the data attribute of the base class, enabling further calculations like + similarity and covariance matrices. + + Raises: + QiskitFinanceError: If there are missing tickers in download, + if accessing Yahoo Data fails, or if no data is found for + the specified date range, possibly due to de-listed symbols. """ if len(self._tickers) == 0: raise QiskitFinanceError("Missing tickers to download.") self._data = [] stocks_notfound = [] try: - # download multiple tickers in single thread to avoid + # Download multiple tickers in single thread to avoid # race condition stock_data = yf.download( self._tickers, @@ -85,23 +103,30 @@ def run(self) -> None: threads=False, progress=logger.isEnabledFor(logging.DEBUG), ) + if len(self._tickers) == 1: ticker_name = self._tickers[0] stock_value = stock_data["Adj Close"] + if stock_value.dropna().empty: stocks_notfound.append(ticker_name) + self._data.append(stock_value) + else: for ticker_name in self._tickers: stock_value = stock_data[ticker_name]["Adj Close"] + if stock_value.dropna().empty: stocks_notfound.append(ticker_name) + self._data.append(stock_value) + except Exception as ex: # pylint: disable=broad-except logger.debug(ex, exc_info=True) raise QiskitFinanceError("Accessing Yahoo Data failed.") from ex if stocks_notfound: raise QiskitFinanceError( - f"No data found for this date range, symbols may be delisted: {stocks_notfound}." + f"No data found for this date range, symbols may be de-listed: {stocks_notfound}." )