Add and revise tests to cover fixed right limit

Principally adds and revises tests to cover new properties and methods that provide for implementing a fixed right limit. To `test_base`: - Adds mock Prices fixtures for FixedLimits, including classes defined with IntradayOnly and DailyOnly intervals. - revises `test_limits` to include tests for `limit_right_intraday` and `limit_right_daily` properties. - adds `test_limits_fixed` to test right limit properties and setup. - adds `test_live_prices`. - adds `test_latest_requestable_minute`. - adds `test__minute_to_earliest_previous_trading_minute`. - revises `test__bis_available` to include `bis_available_any`. - adds `test__bis_available_fixed_right` to test `bis_available` methods when right limit defined. - revises `test_get_prices_params_cls` to test `gpp.request_all_available_data`. To `test_daterange`, for both `TestGetterDaily` and `TestGetterIntraday` test classes: - revises `test_constructor_properties` to cover new `limit_right` and `end_limit` properties. - revises `test_get_start` to verify effect of new `limit` property and defining a fixed right limit. - revises `test_get_end` to verify effect of new `limit` and `stict` properties and defining a fixed right limit. - For `TestIntradayGetter`, specifically revises `test_get_start_too_late`, `test_get_end_ool` and `test_end_now_and_get_end_none`. To `test_data` revises `test_pre_requests` to `verify_available_any`. Also: To `prices.base` module: - Fixes `set_base_limits_right` to consider BASE_LIMITS_RIGHT attr. - Revises `_update_base_limits` to provide for not setting BASE_LIMITS attr and relying on update method at runtime. - Revises `PricesBase` class doc to cover changes to optionally provide BaseInterval, BASE_LIMITS and BASE_LIMITS_RIGHT dynamically via update methods. - Further changes to provide for defining only intraday or daily intervals. - Refactors `prices_for_symbols` from `PricesYahoo` to base. Moves test to `test_base_prices` and revises. To prices.csv module: - implements `prices_for_symbols`. Includes test to `test_csv`.
maread99 · Jan 30, 2024 · d9703e1 · d9703e1
1 parent aefe424
commit d9703e1
Show file tree

Hide file tree

Showing 11 changed files with 1,239 additions and 331 deletions.
diff --git a/src/market_prices/data.py b/src/market_prices/data.py
@@ -13,7 +13,7 @@
 
 from market_prices import errors, helpers, intervals
 from market_prices.utils import calendar_utils as calutils
-from market_prices.mptypes import DateRange, DateRangeReq
+from market_prices.mptypes import DateRangeReq
 
 from .utils.pandas_utils import (
     interval_contains,
@@ -286,7 +286,6 @@ def available_range(
 
         return avail
 
-    # TODO ADD TEST
     def available_any(self, daterange: DateRangeReq) -> bool | None:
         """Query if data is available for any timestamp within a range.
 

diff --git a/src/market_prices/daterange.py b/src/market_prices/daterange.py
@@ -137,7 +137,6 @@ def limit(self) -> pd.Timestamp:
             return self._limit(interval)
         return self._limit
 
-    # TODO one way or another tests will need to verify that can pass through as a callable
     @property
     def limit_right(self) -> pd.Timestamp | None:
         """Right limit."""
@@ -275,12 +274,6 @@ def get_end(  # pylint: disable=missing-param-doc
         if ts is None:
             return self.end_limit
 
-        # TODO will require a test to ensure limiting when limit is True
-        # Currently revised at least one existing test (test_get_end_non_trading_minutes)
-        # to pass limit as False (NB all daterange tests were passing)
-        # TODO will require a test to ensure treating strict as `strict` if
-        # `strict` passed to override `self.strict`
-
         end, end_acc = self._get_end(ts)
         if end < self.limit:
             raise errors.EndTooEarlyError(end, self.limit)

diff --git a/src/market_prices/prices/base.py b/src/market_prices/prices/base.py
diff --git a/src/market_prices/prices/csv.py b/src/market_prices/prices/csv.py
@@ -12,7 +12,6 @@
 from collections import defaultdict
 from datetime import timedelta
 from pathlib import Path
-from typing import Any
 
 from exchange_calendars import ExchangeCalendar
 import numpy as np
@@ -796,8 +795,8 @@ class PricesCsv(base.PricesBase):
     ----------
     path : str | pathlib.Path
         Path to directory containing .csv files and/or a hierarchy of
-        subdirectories containing .csv files that comply with the
-        requirements detailed to the 'Notes' section.
+        subdirectories containing .csv files. Files and folders should
+        conform with requirements detailed here and to the 'Notes' section.
 
         The constructor will search for .csv files in this directory and
         all directories under it. All files without the .csv extension will
@@ -806,7 +805,7 @@ class PricesCsv(base.PricesBase):
         Each csv file must contain data for a single symbol and for a
         single interval. The symbol and interval should be included within
         the filename and separated from each other and/or any other parts
-        of the filename with the '_' separator. The following are examples
+        of the filename with a '_' separator. The following are examples
         of valid filenames:
             MSFT_5T.csv
             5T_MSFT.csv
@@ -815,10 +814,9 @@ class PricesCsv(base.PricesBase):
             whatever_MSFT_5T_whatever.csv
             whatever_MSFT_whatever_5T_whatever.csv
             whatever_whatever_5T_whatever_MSFT_whatever.csv
-            ...
 
         The interval part expresses the duration of the period corresonding
-        with each row of data. The interval comprises two parts a unit and
+        with each row of data. The interval comprises two parts, a unit and
         a value. Valid units are:
             MIN - to describe mintues
             T - to describe mintues
@@ -853,9 +851,9 @@ class PricesCsv(base.PricesBase):
         the file being ignored:
             MSFT_p5T_else.csv (malformed interval)
             MSFT_5T_15T,csv (ambiguous interval)
-            MSFT_5T_TSLA.csv (two symbols included to `symbols` parameter)
-            MSFT_2D.csv (if interval unit day value cannot be greater than
-                one)
+            MSFT_5T_TSLA.csv (two symbols)
+            MSFT_2D.csv (if interval unit is day then value cannot be
+                greater than one)
             MSFT.txt (not a .csv file)
 
         The `csv_paths` property shows all the csv files that have been
@@ -883,7 +881,8 @@ class PricesCsv(base.PricesBase):
             `str` of ISO Code of an exchange for which the
             `exchange_calendars` package maintains a calendar. See
             https://github.com/gerrymanoim/exchange_calendars#calendars
-            or call market_prices.get_exchange_info`.
+            or call market_prices.get_exchange_info`. For example:
+                calendars="XLON",
 
             `str` of any other calendar name supported by
             `exchange_calendars`, as returned by
@@ -895,32 +894,34 @@ class PricesCsv(base.PricesBase):
             calendar). List should have same length as `symbols` with each
             element relating to the symbol at the corresponding index.
 
-            Dictionary with items representing only those symbols for which
-            wish to define a calendar. Any symbol not included to keys will
-            be assigned, if possible, the default calendar assigned for the
-            symbol.
+            Dictionary mapping each symbol with a calendar.
                 key: str
                     symbol.
                 value: mptypes.Calendar (i.e. as for a single calendar)
                     Calendar corresponding with symbol.
 
+                For example:
+                    calendars = {"MSFT": "XNYS", "AZN.L": "XLON"}
+
         Each Calendar should have a first session no later than the first
         session from which prices are available for any symbol
         corresponding with that calendar.
 
     lead_symbol : str
         Symbol with calendar that should be used as the default calendar to
         evaluate period from period parameters. If not passed default
-        calendar will be defined as the most common calendar.
+        calendar will be defined as the most common calendar (and if more
+        than one calendar is the most common then of those the the calendar
+        that's defined 'first' in the list or dictionary).
 
     read_csv_kwargs : Optional[dict[str, Any]]
         Keyword argumnets to pass to `pandas.read_csv` to parse a csv file
-        to a pandas DataFrame. The 'Notes' section covers how a csv file
-        can be formatted such that it parses with the default
-        `read_csv_kwargs`.
+        to a pandas DataFrame. See the 'Notes' section for how a csv file
+        can be formatted such that it parses under the default
+        implementation.
 
-        market-prices requires that the DataFrame parses with:
-            index as a pd.DatetimeIndex named 'date'.
+        market_prices requires that the DataFrame parses with:
+            index as a `pd.DatetimeIndex` named 'date'.
 
             columns labelled 'open', 'high', 'low', 'close' and optionally
             'volume', each with dtype "float64".
@@ -938,8 +939,8 @@ class PricesCsv(base.PricesBase):
         See help(pandas.read_csv) for all available kwargs.
 
         Note that the following arguments will always be passed by
-        market_prices to `pandas.read_csv` with the following values which
-        cannot be overriden by `read_csv_kwargs`:
+        market_prices to `pandas.read_csv` with the following values (these
+        values cannot be overriden by `read_csv_kwargs`):
             "filepath_or_buffer": <csv file path>
             "dtype": {
                 'open': "float64",
@@ -962,14 +963,14 @@ class PricesCsv(base.PricesBase):
             }
         This would override the names as defined in the csv file's first
         row with the required values. Note that all references to column
-        names in other kwargs, such as 'usecols' and 'dtype', will now look
-        at the overridden names (as required), not the names as defined in
-        the csv files.
+        names in other kwargs, such as 'usecols' and 'dtype', will now
+        refer to the overridden names (as required), not the names as
+        defined in the csv files.
 
     ohlc_thres : float, default: 0.08
-        Threshold to reject incongrument ohlc data, in terms of percentage
-        of incongrument rows. For example, pass as 0.1 to reject data if
-        more than 10% of rows exhibit incongruent data.
+        Threshold to reject incongruent ohlc data, in terms of maximum
+        percentage of incongrument rows to permit. For example, pass as 0.1
+        to reject data if more than 10% of rows exhibit incongruent data.
 
         If the number of incongruent rows are below the threshold then
         adjustements will be made to force congruence.
@@ -984,9 +985,9 @@ class PricesCsv(base.PricesBase):
             open is higher than high
                 within threshold, open will be forced to high
 
-        NOTE Data will always be rejected if any row has a high value
-        lower than the low value. No provision is made for setting a
-        threshold in this circumstance.
+        Note: Data will always be rejected if any row has a high value
+        lower than the low value. No provision is made to permit this
+        circumstance.
 
     pm_subsession_origin : Literal["open", "break_end"], default: "open"
         How to evaluate indices of sessions that include a break. (The
@@ -1019,9 +1020,7 @@ class PricesCsv(base.PricesBase):
     represented (it's common for data sources to exclude intraday data for
     periods during which a symbol did not register a trade). The price data
     will be reindexed against expected indices as evaluated from the
-    corresponding calendar of `calendars`. This complies with the
-    `base.PricesBase` implementation's requirement that all indices are
-    included over all periods of regular trading.
+    corresponding calendar of `calendars`.
 
     For daily price data values in the 'date' column should represent a
     date, for example '2023-11-16'.
@@ -1044,7 +1043,7 @@ class PricesCsv(base.PricesBase):
     is aligned with the interval, based on the (sub)session open, and which
     falls before the corresonding (sub)session close. See the
     `pm_subsession_origin` parameter for how to determine how indices are
-    evaluated for for sessions that include a break.
+    evaluated for sessions that include a break.
 
     Examples
     If a session opens at 10:00 and the interval is 15T then
@@ -1059,12 +1058,6 @@ class PricesCsv(base.PricesBase):
     regular trading hours will be ignored.
     """
 
-    # TODO HAVE A READ THROUGH / final revision of DOC
-
-    # These are defined dynamically by constructor
-    BASE_LIMITS: dict[BI, pd.Timedelta | pd.Timestamp | None] = {}
-    BASE_LIMITS_RIGHT: dict[BI, pd.Timestamp | None] = {}
-
     @parse
     def __init__(
         self,
@@ -1085,6 +1078,14 @@ def __init__(
                 "'path' must represent an existing local directory, although"
                 f" received {path}."
             )
+
+        self._receieved_kwargs = dict(
+            path=path,
+            read_csv_kwargs=read_csv_kwargs,
+            ohlc_thres=ohlc_thres,
+            pm_subsession_origin=pm_subsession_origin,
+        )  # for `prices_for_symbols`
+
         root = path
         self.PM_SUBSESSION_ORIGIN = pm_subsession_origin  # override class attr
         symbols_ = helpers.symbols_to_list(symbols)
@@ -1113,11 +1114,9 @@ def __init__(
 
         super().__init__(symbols_, calendars, lead_symbol, delays)
 
-        # TODO following attrs are temporary affairs
-        # for _tables don't duplicate any data - needs to go through to the pdata or what/wherever
         self._tables, reindexing_warnings = self._compile_tables(parsed_data)
         all_errors_warnings.extend(reindexing_warnings)
-        self.all_errors_warnings = all_errors_warnings
+
         if not self._tables:
             raise CsvNoDataError(symbols_, all_errors_warnings, verbose)
 
@@ -1263,6 +1262,44 @@ def _request_data(
             )
         return self._tables[interval]
 
-    def prices_for_symbols():
-        # TODO
-        pass
+    def _get_class_instance(self, symbols: list[str], **kwargs) -> "PricesCsv":
+        """Return an instance of PricesCsv with same arguments as self.
+
+        Notes
+        -----
+        If required, subclass should override or extend this method.
+        """
+        cals = {s: self.calendars[s] for s in symbols}
+        if self.lead_symbol_default in symbols:
+            kwargs.setdefault("lead_symbol", self.lead_symbol_default)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            rtrn = type(self)(
+                symbols=symbols, calendars=cals, **self._receieved_kwargs, **kwargs
+            )
+
+        return rtrn
+
+    def prices_for_symbols(self, symbols: mptypes.Symbols) -> "PricesCsv":
+        """Return instance of prices class for one or more symbols.
+
+        Creates new instance for `symbols` with freshly retrieved price data.
+
+        Parameters
+        ----------
+        symbols
+            Symbols to include to the new instance. Passed as class'
+            'symbols' parameter.
+        """
+        # pylint: disable=protected-access
+        symbols = helpers.symbols_to_list(symbols)
+        difference = set(symbols).difference(set(self.symbols))
+        if difference:
+            msg = (
+                "symbols must be a subset of Prices' symbols although"
+                f" received the following symbols which are not:"
+                f" {difference}.\nPrices symbols are {self.symbols}."
+            )
+            raise ValueError(msg)
+        return self._get_class_instance(symbols)
diff --git a/src/market_prices/prices/yahoo.py b/src/market_prices/prices/yahoo.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import copy
 import datetime
 import functools
 import warnings
@@ -18,7 +17,7 @@
 from market_prices.helpers import UTC
 from market_prices.prices import base
 
-from ..mptypes import Calendar, Symbols
+from ..mptypes import Calendar
 from .config import config_yahoo
 
 
@@ -872,51 +871,3 @@ def _request_data(
                 end_ += pd.Timedelta(22, "H")
             prices = self._request_yahoo(interval=interval, start=start, end=end_)
         return self._tidy_yahoo(prices, interval, start, end)
-
-    @staticmethod
-    def _remove_non_trading_indices(
-        df: pd.DataFrame, cals: list[xcals.ExchangeCalendar]
-    ) -> pd.DataFrame:
-        """Remove indices that include no minutes of any of `cals`."""
-        non_trading = df.pt.indices_non_trading(cals[0])
-        for cal in cals[1:]:
-            non_trading = non_trading.intersection(df.pt.indices_non_trading(cal))
-        return df.drop(labels=non_trading)
-
-    def prices_for_symbols(self, symbols: Symbols) -> base.PricesBase:
-        """Return PricesYahoo instance for one or more symbols.
-
-        Populates instance with any pre-existing price data.
-
-        Parameters
-        ----------
-        symbols
-            Symbols to include to the new instance. Passed as class'
-            'symbols' parameter.
-        """
-        # pylint: disable=protected-access
-        symbols = helpers.symbols_to_list(symbols)
-        difference = set(symbols).difference(set(self.symbols))
-        if difference:
-            msg = (
-                "symbols must be a subset of Prices' symbols although"
-                f" received the following symbols which are not:"
-                f" {difference}.\nPrices symbols are {self.symbols}."
-            )
-            raise ValueError(msg)
-
-        cals_all = {s: self.calendars[s] for s in symbols}
-        delays_all = {s: self.delays[s].components.minutes for s in symbols}
-        prices_obj = type(self)(symbols=symbols, calendars=cals_all, delays=delays_all)
-
-        cals = list(prices_obj.calendars_unique)
-        fewer_cals = len(cals) < len(self.calendars_unique)
-        for bi in self.bis:
-            new_pdata = copy.deepcopy(self._pdata[bi])
-            if new_pdata._table is not None:
-                table = new_pdata._table[symbols].copy()
-                if fewer_cals:
-                    table = self._remove_non_trading_indices(table, cals)
-                new_pdata._table = table
-            prices_obj._pdata[bi] = new_pdata
-        return prices_obj