diff --git a/src/market_prices/data.py b/src/market_prices/data.py index 03f67db..c950c4c 100644 --- a/src/market_prices/data.py +++ b/src/market_prices/data.py @@ -13,7 +13,7 @@ from market_prices import errors, helpers, intervals from market_prices.utils import calendar_utils as calutils -from market_prices.mptypes import DateRange, DateRangeReq +from market_prices.mptypes import DateRangeReq from .utils.pandas_utils import ( interval_contains, @@ -286,7 +286,6 @@ def available_range( return avail - # TODO ADD TEST def available_any(self, daterange: DateRangeReq) -> bool | None: """Query if data is available for any timestamp within a range. diff --git a/src/market_prices/daterange.py b/src/market_prices/daterange.py index 88fee6d..80fc3cd 100644 --- a/src/market_prices/daterange.py +++ b/src/market_prices/daterange.py @@ -137,7 +137,6 @@ def limit(self) -> pd.Timestamp: return self._limit(interval) return self._limit - # TODO one way or another tests will need to verify that can pass through as a callable @property def limit_right(self) -> pd.Timestamp | None: """Right limit.""" @@ -275,12 +274,6 @@ def get_end( # pylint: disable=missing-param-doc if ts is None: return self.end_limit - # TODO will require a test to ensure limiting when limit is True - # Currently revised at least one existing test (test_get_end_non_trading_minutes) - # to pass limit as False (NB all daterange tests were passing) - # TODO will require a test to ensure treating strict as `strict` if - # `strict` passed to override `self.strict` - end, end_acc = self._get_end(ts) if end < self.limit: raise errors.EndTooEarlyError(end, self.limit) diff --git a/src/market_prices/prices/base.py b/src/market_prices/prices/base.py index 33b5729..1f473b6 100644 --- a/src/market_prices/prices/base.py +++ b/src/market_prices/prices/base.py @@ -28,7 +28,7 @@ from market_prices import daterange as dr from market_prices import errors, helpers, intervals, mptypes, parsing, pt from market_prices.helpers import UTC -from market_prices.mptypes import Anchor, OpenEnd, Alignment, Priority +from market_prices.mptypes import Anchor, OpenEnd, Alignment, Priority, Symbols from market_prices.intervals import BI, TDInterval, _BaseIntervalMeta from market_prices.utils import calendar_utils as calutils from market_prices.utils import pandas_utils as pdutils @@ -366,18 +366,32 @@ class PricesBase(metaclass=abc.ABCMeta): Note: On base class BaseInterval is implemented as a type only. This type is not enforced at runtime. + Note: Alternatively, if available base intervals can only be + ascertained at runtime, the `_define_base_intervals` can be + called from the subclasses constructor to define the base + intervals for the specific instance. If base intervals will be + defined in this way for all instances then it is not necessary + to separately define a `BaseInterval` class attribute. + BASE_LIMITS : dict[BI, pd.Timestamp | pd.Timedelta | None] key: BaseInterval Every base interval should be represented. value : pd.Timedelta | pd.Timestamp, optional - Limit of availability of historal data for the base - interval, as either timedelta to now or absolute timestamp - (utc). If interval is daily then timedelta / timestamp - should be day accurate (no time component) or None if limit - is unknown. Note: A limit must be defined for all intraday - base intervals. + Limit of earliest availability of historal data for the + base interval, as either timedelta to now or absolute + timestamp. + + For a daily interval: + The timedelta / timestamp must be day accurate (no + time component). + If defined as a timestamp then must be timezone naive. + If limit is unknown then value can take None. + + For intraday intervals: + Timestamps must have timezone as UTC. + Limits must be defined for all intraday base intervals. Example, if only 60 days of data are available for data at the 5 minute base interval, although there is no limit on daily @@ -385,14 +399,53 @@ class PricesBase(metaclass=abc.ABCMeta): {BaseInterval.T5: pd.Timedelta(days=60), BaseInterval.D1: None} + Note: Subclass instances can call `_update_base_limits` to + override BASE_LIMITS with instance-specific limits for one, + many or all intervals. This can be used when the limit for an + interval can only be ascertained at runtime, for example if + data availability for the daily interval is dependent on the + specific set of symbols. It can alternatively be used to define + the limits for all intervals. If used to define the limits for + all intervals for all instances then it is not necessary to + define the BASE_LIMITS class attribute. + If used, `_update_base_limits` must be called from the + subclass constructor before executing the constructor as + defined on the base class. + Note: Instance specific base limits are exposed via the - `base_limits` property. By default the `base_limits` property - returns a copy of BASE_LIMITS. Subclass instances can use - `_update_base_limits` to define instance-specific limits, for - example if for a particular interval data availability differs - for specific symbols. `_update_base_limits` should be called - from the subclass constructor before executing the constructor - as defined on the base class. + `base_limits` property. + + BASE_LIMITS_RIGHT : + dict[BI, pd.Timestamp | None] + Note: If price data is available through to 'now' for all base + intervals then it is NOT necessary to define BASE_LIMITS_RIGHT. + + key: BaseInterval + If BASE_LIMITS_RIGHT is defined then every base interval + must be represented. + + value : pd.Timestamp, optional + Limit of most recent availability of historal data for the + base interval, as either an absolute timestamp or None + if price data is available through to 'now'. If the + interval is daily then the timestamp should be day accurate + (no time component) and be timezone naive, otherwise the + timestamp should have timezone as UTC. + + Note: Subclass instances can call `_update_base_limits_right` + to override BASE_LIMITS_RIGHT with instance-specific limits for + one, many or all intervals. This can be used when the limit for + an interval can only be ascertained at runtime, for example if + the data source are local .csv files. If the method is used to + define the right limits for all intervals for all instances + then it is not necessary to define the BASE_LIMITS_RIGHT class + attribute. + If used, `_update_base_limits_right` must be called from + the subclass constructor before executing the constructor + as defined on the base class. + + Note: Instance specific base limits are exposed via the + `base_limits` property. - Abstract Methods - @@ -414,9 +467,6 @@ class PricesBase(metaclass=abc.ABCMeta): Parameters as abstract method doc. - prices_for_symbols(self, symbols: str): Type[PricesBase] - Instance of subclass itself for one or more symbols. - Subclasses can optionally override the following class attributes: PM_SUBSESSION_ORIGIN : Literal["open", "break_end"]: default "open" @@ -433,9 +483,26 @@ class PricesBase(metaclass=abc.ABCMeta): All attributes and properties created by the base constructor should be preserved. - All private methods (prefixed with single underscore) are internal to - the base class. It is not intended that private methods are overriden - or extended by subclasses. + Subclasses can optionally override or extend the following methods: + + prices_for_symbols(self, symbols: str) -> Type[PricesBase] + Returns an instance of the prices class for a received subset + of symbols. + + Can be overriden by subclass if the default implementation is + not vaiable. (Also, see `_get_class_instance`.) + + _get_class_instance(self, symbols: list[str], **kwargs) + Called by `prices_for_symbols` to create instance of subclass. + + If subclass needs to make changes to implement + `prices_for_symbols` then it may be possible to simply extend + this method to pass through any additional arguments that the + constructor requires. + + Other than any noted above, it is not intended that private methods + (prefixed with single underscore) are overriden or extended by + subclasses. The base class also defines a host of public properties and methods. Use `help(PricesBase)` for a listing. @@ -674,7 +741,7 @@ def __init__( self._base_limits: dict[BI, pd.Timedelta | pd.Timestamp | None] self._verify_base_limits() self._base_limits_right: dict[BI, pd.Timestamp | None] - self._set_dflt_base_limits_right() + self._set_base_limits_right() self._verify_base_limits_right() self._verify_lead_symbol(lead_symbol) self._calendars: dict[str, xcals.ExchangeCalendar] @@ -850,11 +917,14 @@ def _verify_base_limits(self): f" {base_limits_keys}." ) - def _set_dflt_base_limits_right(self): - """Set default `_base_limits_right` if not otherwise defined.""" + def _set_base_limits_right(self): + """Set `_base_limits_right` to default if not otherwise defined.""" if getattr(self, "_base_limits_right", None) is not None: return - self._base_limits_right = {bi: None for bi in self.bis} + if getattr(self, "BASE_LIMITS_RIGHT", None) is not None: + self._base_limits_right = self.BASE_LIMITS_RIGHT.copy() + else: + self._base_limits_right = {bi: None for bi in self.bis} def _verify_base_limits_right(self): """Verify type of right base limits values.""" @@ -896,7 +966,10 @@ def _update_base_limits(self, update: dict[BI, pd.Timedelta | pd.Timestamp | Non on the base class. """ if getattr(self, "_base_limits", None) is None: - self._base_limits = self.BASE_LIMITS.copy() + if getattr(self, "BASE_LIMITS", None) is None: + self._base_limits = {} + else: + self._base_limits = self.BASE_LIMITS.copy() prev_limits = self._base_limits.copy() self._base_limits.update(update) try: @@ -920,7 +993,7 @@ def _update_base_limits_right(self, update: dict[BI, pd.Timestamp | None]): on the base class. """ if getattr(self, "_base_limits_right", None) is None: - self._set_dflt_base_limits_right() + self._set_base_limits_right() prev_limits = self._base_limits_right.copy() self._base_limits_right.update(update) try: @@ -1109,7 +1182,7 @@ def bi_daily(self) -> BI | None: """Daily base interval, or None if all base intervals intraday.""" return self.bis.daily_bi() - @property + @functools.cached_property def bis_intraday(self) -> list[BI]: """Intraday base intervals.""" return self.bis.intraday_bis() @@ -1144,7 +1217,6 @@ def _set_pdata(self): ) self._pdata = d - # TODO would need simple test @property def live_prices(self) -> bool: """Query if live prices are avaiable (as opposed to only historic). @@ -1256,10 +1328,14 @@ def limit_daily(self) -> pd.Timestamp | None: return None return self._pdata[self.bi_daily].ll - # TODO INCLUDE SIMPLE TEST based on any for limit_daily @property - def limit_right_daily(self) -> pd.Timestamp: - """Latest date for which daily prices can be requested.""" + def limit_right_daily(self) -> pd.Timestamp | None: + """Latest date for which daily prices can be requested. + + None if no daily interval. + """ + if not self.bi_daily: + return None return self._pdata[self.bi_daily].rl def limit_intraday( @@ -1279,6 +1355,11 @@ def limit_intraday( By default (None) intraday prices will be available for all calendars from the returned timestamp. """ + if not self.bis_intraday: + raise NotImplementedError( + "`limit_intraday` is not implemented when no intraday interval is" + " defined." + ) limits_raw = [] for bi in self.bis_intraday: limit_minute = self.limits[bi][0] @@ -1292,7 +1373,6 @@ def limit_intraday( return calendar.minute_to_trading_minute(limit_raw, "next") return self._minute_to_latest_next_trading_minute(limit_raw) - # TODO will need a test, perhaps using limit_intraday as template @property def limit_right_intraday(self) -> pd.Timestamp: """Latest minute that intraday prices can be requested. @@ -1303,6 +1383,12 @@ def limit_right_intraday(self) -> pd.Timestamp: base interval for which indices are not aligned over the full period for which intraday data is available at that interval. """ + if not self.bis_intraday: + raise NotImplementedError( + "`limit_right_intraday` is not implemented when no intraday interval" + " is defined." + ) + if self.live_prices: return helpers.now() limits_raw = [] @@ -1478,8 +1564,6 @@ def last_requestable_session_any(self) -> pd.Timestamp: minute = self._minute_for_last_requestable_session return self._minute_to_session(minute, "latest", "previous") - # TODO currently no test, possibly base one on test for earliest_requestable_minute? - # ... although they are implemented notably differently @property def latest_requestable_minute(self) -> pd.Timestamp: """Latest minute for which prices can be requested. @@ -1958,7 +2042,7 @@ def _bis_available_all(self) -> list[BI]: period as defined by the current gpp parameters. """ bis_valid = self._bis_valid - if not self.gpp.request_all_available_data: + if self.gpp.request_all_available_data: return bis_valid # Why use the 'no_limit' drg? Becuase do not want dateranges to be @@ -1985,13 +2069,16 @@ def _bis_available_all(self) -> list[BI]: elif end < self.limits[bi][0]: ends_too_early += 1 - if rtrn or not interval_period_errors: + if rtrn: return rtrn len_bis = len(bis_valid) if starts_too_late == len_bis or ends_too_early == len_bis: raise errors.PricesIntradayUnavailableError(self) + if not interval_period_errors: + return rtrn # return empty list + # no rtrn and interval_period_errors. Should an interval error # be raised or data availability error? If a no_limit drg with T1 # interval doesn't raise a period / interval error then its AT @@ -2014,7 +2101,7 @@ def _bis_available_end(self) -> list[BI]: returned bis. """ bis_valid = self._bis_valid - if not self.gpp.request_all_available_data: + if self.gpp.request_all_available_data: return bis_valid # Why use the 'no_limit' drg? See comment to `_bis_available_all` @@ -2061,7 +2148,7 @@ def _bis_available_any(self) -> list[BI]: some point during the period). """ bis_valid = self._bis_valid - if not self.gpp.request_all_available_data: + if self.gpp.request_all_available_data: return bis_valid # Why use the 'no_limit' drg? See comment to `_bis_available_all` @@ -3119,12 +3206,10 @@ def request_earliest_available_data(self) -> bool: """Query if params represent request for earliest available data.""" return self.pp_raw["start"] is None and not self.duration - # TODO INCLUDE TEST FOR ADDED PROPERTY, probably alongside the test that includes - # effect of request_earliest_available_data @property def request_all_available_data(self) -> bool: """Query if params represent request for all available data.""" - return not ( + return ( self.pp_raw["start"] is None and self.pp_raw["end"] is None and not self.duration @@ -4887,15 +4972,61 @@ def price_range( return res, df return res - @abc.abstractmethod - def prices_for_symbols(self, symbols: str) -> "PricesBase": - """Instance of subclass itself for one or more symbols. + @staticmethod + def _remove_non_trading_indices( + df: pd.DataFrame, cals: list[xcals.ExchangeCalendar] + ) -> pd.DataFrame: + """Remove indices that include no minutes of any of `cals`.""" + non_trading = df.pt.indices_non_trading(cals[0]) + for cal in cals[1:]: + non_trading = non_trading.intersection(df.pt.indices_non_trading(cal)) + return df.drop(labels=non_trading) + + def _get_class_instance(self, symbols: list[str], **kwargs) -> "PricesBase": + """Return an instance of the prices class with the same parameters as self. - Subclass should implement to populate instance with any pre-existing - price data. + Notes + ----- + If required, subclass should override or extend this method. + """ + cals_all = {s: self.calendars[s] for s in symbols} + delays_all = {s: self.delays[s].components.minutes for s in symbols} + if self.lead_symbol_default in symbols: + kwargs.setdefault("lead_symbol", self.lead_symbol_default) + return type(self)( + symbols=symbols, calendars=cals_all, delays=delays_all, **kwargs + ) + + def prices_for_symbols(self, symbols: Symbols) -> "PricesBase": + """Return instance of prices class for one or more symbols. + + Populates instance with any pre-existing price data. Parameters ---------- - symbols: str - Symbols to be carried into new instance. + symbols + Symbols to include to the new instance. Passed as class' + 'symbols' parameter. """ + # pylint: disable=protected-access + symbols = helpers.symbols_to_list(symbols) + difference = set(symbols).difference(set(self.symbols)) + if difference: + msg = ( + "symbols must be a subset of Prices' symbols although" + f" received the following symbols which are not:" + f" {difference}.\nPrices symbols are {self.symbols}." + ) + raise ValueError(msg) + prices_obj = self._get_class_instance(symbols) + cals = list(prices_obj.calendars_unique) + fewer_cals = len(cals) < len(self.calendars_unique) + for bi in self.bis: + new_pdata = copy.deepcopy(self._pdata[bi]) + if new_pdata._table is not None: + table = new_pdata._table[symbols].copy() + if fewer_cals: + table = self._remove_non_trading_indices(table, cals) + new_pdata._table = table + prices_obj._pdata[bi] = new_pdata + return prices_obj diff --git a/src/market_prices/prices/csv.py b/src/market_prices/prices/csv.py index 583a8a4..10763ce 100644 --- a/src/market_prices/prices/csv.py +++ b/src/market_prices/prices/csv.py @@ -12,7 +12,6 @@ from collections import defaultdict from datetime import timedelta from pathlib import Path -from typing import Any from exchange_calendars import ExchangeCalendar import numpy as np @@ -796,8 +795,8 @@ class PricesCsv(base.PricesBase): ---------- path : str | pathlib.Path Path to directory containing .csv files and/or a hierarchy of - subdirectories containing .csv files that comply with the - requirements detailed to the 'Notes' section. + subdirectories containing .csv files. Files and folders should + conform with requirements detailed here and to the 'Notes' section. The constructor will search for .csv files in this directory and all directories under it. All files without the .csv extension will @@ -806,7 +805,7 @@ class PricesCsv(base.PricesBase): Each csv file must contain data for a single symbol and for a single interval. The symbol and interval should be included within the filename and separated from each other and/or any other parts - of the filename with the '_' separator. The following are examples + of the filename with a '_' separator. The following are examples of valid filenames: MSFT_5T.csv 5T_MSFT.csv @@ -815,10 +814,9 @@ class PricesCsv(base.PricesBase): whatever_MSFT_5T_whatever.csv whatever_MSFT_whatever_5T_whatever.csv whatever_whatever_5T_whatever_MSFT_whatever.csv - ... The interval part expresses the duration of the period corresonding - with each row of data. The interval comprises two parts a unit and + with each row of data. The interval comprises two parts, a unit and a value. Valid units are: MIN - to describe mintues T - to describe mintues @@ -853,9 +851,9 @@ class PricesCsv(base.PricesBase): the file being ignored: MSFT_p5T_else.csv (malformed interval) MSFT_5T_15T,csv (ambiguous interval) - MSFT_5T_TSLA.csv (two symbols included to `symbols` parameter) - MSFT_2D.csv (if interval unit day value cannot be greater than - one) + MSFT_5T_TSLA.csv (two symbols) + MSFT_2D.csv (if interval unit is day then value cannot be + greater than one) MSFT.txt (not a .csv file) The `csv_paths` property shows all the csv files that have been @@ -883,7 +881,8 @@ class PricesCsv(base.PricesBase): `str` of ISO Code of an exchange for which the `exchange_calendars` package maintains a calendar. See https://github.com/gerrymanoim/exchange_calendars#calendars - or call market_prices.get_exchange_info`. + or call market_prices.get_exchange_info`. For example: + calendars="XLON", `str` of any other calendar name supported by `exchange_calendars`, as returned by @@ -895,15 +894,15 @@ class PricesCsv(base.PricesBase): calendar). List should have same length as `symbols` with each element relating to the symbol at the corresponding index. - Dictionary with items representing only those symbols for which - wish to define a calendar. Any symbol not included to keys will - be assigned, if possible, the default calendar assigned for the - symbol. + Dictionary mapping each symbol with a calendar. key: str symbol. value: mptypes.Calendar (i.e. as for a single calendar) Calendar corresponding with symbol. + For example: + calendars = {"MSFT": "XNYS", "AZN.L": "XLON"} + Each Calendar should have a first session no later than the first session from which prices are available for any symbol corresponding with that calendar. @@ -911,16 +910,18 @@ class PricesCsv(base.PricesBase): lead_symbol : str Symbol with calendar that should be used as the default calendar to evaluate period from period parameters. If not passed default - calendar will be defined as the most common calendar. + calendar will be defined as the most common calendar (and if more + than one calendar is the most common then of those the the calendar + that's defined 'first' in the list or dictionary). read_csv_kwargs : Optional[dict[str, Any]] Keyword argumnets to pass to `pandas.read_csv` to parse a csv file - to a pandas DataFrame. The 'Notes' section covers how a csv file - can be formatted such that it parses with the default - `read_csv_kwargs`. + to a pandas DataFrame. See the 'Notes' section for how a csv file + can be formatted such that it parses under the default + implementation. - market-prices requires that the DataFrame parses with: - index as a pd.DatetimeIndex named 'date'. + market_prices requires that the DataFrame parses with: + index as a `pd.DatetimeIndex` named 'date'. columns labelled 'open', 'high', 'low', 'close' and optionally 'volume', each with dtype "float64". @@ -938,8 +939,8 @@ class PricesCsv(base.PricesBase): See help(pandas.read_csv) for all available kwargs. Note that the following arguments will always be passed by - market_prices to `pandas.read_csv` with the following values which - cannot be overriden by `read_csv_kwargs`: + market_prices to `pandas.read_csv` with the following values (these + values cannot be overriden by `read_csv_kwargs`): "filepath_or_buffer": "dtype": { 'open': "float64", @@ -962,14 +963,14 @@ class PricesCsv(base.PricesBase): } This would override the names as defined in the csv file's first row with the required values. Note that all references to column - names in other kwargs, such as 'usecols' and 'dtype', will now look - at the overridden names (as required), not the names as defined in - the csv files. + names in other kwargs, such as 'usecols' and 'dtype', will now + refer to the overridden names (as required), not the names as + defined in the csv files. ohlc_thres : float, default: 0.08 - Threshold to reject incongrument ohlc data, in terms of percentage - of incongrument rows. For example, pass as 0.1 to reject data if - more than 10% of rows exhibit incongruent data. + Threshold to reject incongruent ohlc data, in terms of maximum + percentage of incongrument rows to permit. For example, pass as 0.1 + to reject data if more than 10% of rows exhibit incongruent data. If the number of incongruent rows are below the threshold then adjustements will be made to force congruence. @@ -984,9 +985,9 @@ class PricesCsv(base.PricesBase): open is higher than high within threshold, open will be forced to high - NOTE Data will always be rejected if any row has a high value - lower than the low value. No provision is made for setting a - threshold in this circumstance. + Note: Data will always be rejected if any row has a high value + lower than the low value. No provision is made to permit this + circumstance. pm_subsession_origin : Literal["open", "break_end"], default: "open" How to evaluate indices of sessions that include a break. (The @@ -1019,9 +1020,7 @@ class PricesCsv(base.PricesBase): represented (it's common for data sources to exclude intraday data for periods during which a symbol did not register a trade). The price data will be reindexed against expected indices as evaluated from the - corresponding calendar of `calendars`. This complies with the - `base.PricesBase` implementation's requirement that all indices are - included over all periods of regular trading. + corresponding calendar of `calendars`. For daily price data values in the 'date' column should represent a date, for example '2023-11-16'. @@ -1044,7 +1043,7 @@ class PricesCsv(base.PricesBase): is aligned with the interval, based on the (sub)session open, and which falls before the corresonding (sub)session close. See the `pm_subsession_origin` parameter for how to determine how indices are - evaluated for for sessions that include a break. + evaluated for sessions that include a break. Examples If a session opens at 10:00 and the interval is 15T then @@ -1059,12 +1058,6 @@ class PricesCsv(base.PricesBase): regular trading hours will be ignored. """ - # TODO HAVE A READ THROUGH / final revision of DOC - - # These are defined dynamically by constructor - BASE_LIMITS: dict[BI, pd.Timedelta | pd.Timestamp | None] = {} - BASE_LIMITS_RIGHT: dict[BI, pd.Timestamp | None] = {} - @parse def __init__( self, @@ -1085,6 +1078,14 @@ def __init__( "'path' must represent an existing local directory, although" f" received {path}." ) + + self._receieved_kwargs = dict( + path=path, + read_csv_kwargs=read_csv_kwargs, + ohlc_thres=ohlc_thres, + pm_subsession_origin=pm_subsession_origin, + ) # for `prices_for_symbols` + root = path self.PM_SUBSESSION_ORIGIN = pm_subsession_origin # override class attr symbols_ = helpers.symbols_to_list(symbols) @@ -1113,11 +1114,9 @@ def __init__( super().__init__(symbols_, calendars, lead_symbol, delays) - # TODO following attrs are temporary affairs - # for _tables don't duplicate any data - needs to go through to the pdata or what/wherever self._tables, reindexing_warnings = self._compile_tables(parsed_data) all_errors_warnings.extend(reindexing_warnings) - self.all_errors_warnings = all_errors_warnings + if not self._tables: raise CsvNoDataError(symbols_, all_errors_warnings, verbose) @@ -1263,6 +1262,44 @@ def _request_data( ) return self._tables[interval] - def prices_for_symbols(): - # TODO - pass + def _get_class_instance(self, symbols: list[str], **kwargs) -> "PricesCsv": + """Return an instance of PricesCsv with same arguments as self. + + Notes + ----- + If required, subclass should override or extend this method. + """ + cals = {s: self.calendars[s] for s in symbols} + if self.lead_symbol_default in symbols: + kwargs.setdefault("lead_symbol", self.lead_symbol_default) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + rtrn = type(self)( + symbols=symbols, calendars=cals, **self._receieved_kwargs, **kwargs + ) + + return rtrn + + def prices_for_symbols(self, symbols: mptypes.Symbols) -> "PricesCsv": + """Return instance of prices class for one or more symbols. + + Creates new instance for `symbols` with freshly retrieved price data. + + Parameters + ---------- + symbols + Symbols to include to the new instance. Passed as class' + 'symbols' parameter. + """ + # pylint: disable=protected-access + symbols = helpers.symbols_to_list(symbols) + difference = set(symbols).difference(set(self.symbols)) + if difference: + msg = ( + "symbols must be a subset of Prices' symbols although" + f" received the following symbols which are not:" + f" {difference}.\nPrices symbols are {self.symbols}." + ) + raise ValueError(msg) + return self._get_class_instance(symbols) diff --git a/src/market_prices/prices/yahoo.py b/src/market_prices/prices/yahoo.py index a20bc9a..840c602 100644 --- a/src/market_prices/prices/yahoo.py +++ b/src/market_prices/prices/yahoo.py @@ -2,7 +2,6 @@ from __future__ import annotations -import copy import datetime import functools import warnings @@ -18,7 +17,7 @@ from market_prices.helpers import UTC from market_prices.prices import base -from ..mptypes import Calendar, Symbols +from ..mptypes import Calendar from .config import config_yahoo @@ -872,51 +871,3 @@ def _request_data( end_ += pd.Timedelta(22, "H") prices = self._request_yahoo(interval=interval, start=start, end=end_) return self._tidy_yahoo(prices, interval, start, end) - - @staticmethod - def _remove_non_trading_indices( - df: pd.DataFrame, cals: list[xcals.ExchangeCalendar] - ) -> pd.DataFrame: - """Remove indices that include no minutes of any of `cals`.""" - non_trading = df.pt.indices_non_trading(cals[0]) - for cal in cals[1:]: - non_trading = non_trading.intersection(df.pt.indices_non_trading(cal)) - return df.drop(labels=non_trading) - - def prices_for_symbols(self, symbols: Symbols) -> base.PricesBase: - """Return PricesYahoo instance for one or more symbols. - - Populates instance with any pre-existing price data. - - Parameters - ---------- - symbols - Symbols to include to the new instance. Passed as class' - 'symbols' parameter. - """ - # pylint: disable=protected-access - symbols = helpers.symbols_to_list(symbols) - difference = set(symbols).difference(set(self.symbols)) - if difference: - msg = ( - "symbols must be a subset of Prices' symbols although" - f" received the following symbols which are not:" - f" {difference}.\nPrices symbols are {self.symbols}." - ) - raise ValueError(msg) - - cals_all = {s: self.calendars[s] for s in symbols} - delays_all = {s: self.delays[s].components.minutes for s in symbols} - prices_obj = type(self)(symbols=symbols, calendars=cals_all, delays=delays_all) - - cals = list(prices_obj.calendars_unique) - fewer_cals = len(cals) < len(self.calendars_unique) - for bi in self.bis: - new_pdata = copy.deepcopy(self._pdata[bi]) - if new_pdata._table is not None: - table = new_pdata._table[symbols].copy() - if fewer_cals: - table = self._remove_non_trading_indices(table, cals) - new_pdata._table = table - prices_obj._pdata[bi] = new_pdata - return prices_obj diff --git a/tests/test_base.py b/tests/test_base.py index f80826b..3f0f097 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -892,18 +892,39 @@ def prices_for_symbols(self, *_, **__): yield PricesMockEmpty_ +@pytest.fixture +def right_limits() -> abc.Iterator[tuple[pd.Timestamp, pd.Timestamp]]: + """Use for an intrday / daily interval with a fixed right limit.""" + yield pd.Timestamp("2021-10-20 18:22", tz=UTC), pd.Timestamp("2021-10-20") + + +@pytest.fixture +def left_limits() -> abc.Iterator[dict[intervals.TDInterval, pd.Timestamp]]: + """Use to define an intrday interval left limits as a timestamp.""" + yield { + intervals.TDInterval.T1: pd.Timestamp("2021-09-20 18:23:00+0000", tz="UTC"), + intervals.TDInterval.T5: pd.Timestamp("2021-08-21 18:23:00+0000", tz="UTC"), + intervals.TDInterval.H1: pd.Timestamp("2020-10-20 18:23:00+0000", tz="UTC"), + } + + @pytest.fixture def prices_mock_base_intervals( - daily_limit, + daily_limit, right_limits, left_limits ) -> abc.Iterator[ tuple[ type[intervals._BaseInterval], dict[intervals._BaseInterval, pd.Timedelta | pd.Timestamp], + dict[intervals._BaseInterval, pd.Timestamp], + dict[intervals._BaseInterval, pd.Timestamp], ] ]: - """BaseInterval and corresponding BASE_LIMITS for a PricesMock class. + """BaseInterval and corresponding limits for a PricesMock class. + + Defines left and right limits for both intraday and daily intervals. - Defines intraday and daily intervals. + Left intervals defined as pd.Timedelta and pd.Timestamp, use that + which required. """ BaseInterval = intervals._BaseInterval( "BaseInterval", @@ -921,21 +942,39 @@ def prices_mock_base_intervals( BaseInterval.H1: pd.Timedelta(365, "D"), BaseInterval.D1: daily_limit, } - yield BaseInterval, LIMITS + + LIMITS_FIXED = { + BaseInterval.T1: left_limits[intervals.TDInterval.T1], + BaseInterval.T5: left_limits[intervals.TDInterval.T5], + BaseInterval.H1: left_limits[intervals.TDInterval.H1], + BaseInterval.D1: daily_limit, + } + + RIGHT_LIMITS = { + BaseInterval.T1: right_limits[0], + BaseInterval.T5: right_limits[0], + BaseInterval.H1: right_limits[0], + BaseInterval.D1: right_limits[1], + } + + yield BaseInterval, LIMITS, LIMITS_FIXED, RIGHT_LIMITS @pytest.fixture -def prices_mock_base_intervals_intraday_only() -> ( - abc.Iterator[ - tuple[ - type[intervals._BaseInterval], - dict[intervals._BaseInterval, pd.Timedelta], - ] +def prices_mock_base_intervals_intraday_only( + left_limits, right_limits +) -> abc.Iterator[ + tuple[ + type[intervals._BaseInterval], + dict[intervals._BaseInterval, pd.Timedelta], + dict[intervals._BaseInterval, pd.Timestamp], + dict[intervals._BaseInterval, pd.Timestamp], ] -): - """BaseInterval and corresponding BASE_LIMITS for a PricesMock class. +]: + """BaseInterval and corresponding left and right limits for a PricesMock class. - Defines only intraday intervals. + Defines only intraday intervals. Defines left limits as both timedelta + and fixed timestamp. """ BaseInterval = intervals._BaseInterval( "BaseInterval", @@ -951,19 +990,33 @@ def prices_mock_base_intervals_intraday_only() -> ( BaseInterval.T5: pd.Timedelta(60, "D"), BaseInterval.H1: pd.Timedelta(365, "D"), } - yield BaseInterval, LIMITS + + LIMITS_FIXED = { + BaseInterval.T1: left_limits[intervals.TDInterval.T1], + BaseInterval.T5: left_limits[intervals.TDInterval.T5], + BaseInterval.H1: left_limits[intervals.TDInterval.H1], + } + + RIGHT_LIMITS = { + BaseInterval.T1: right_limits[0], + BaseInterval.T5: right_limits[0], + BaseInterval.H1: right_limits[0], + } + + yield BaseInterval, LIMITS, LIMITS_FIXED, RIGHT_LIMITS @pytest.fixture def prices_mock_base_intervals_daily_only( - daily_limit, + daily_limit, right_limits ) -> abc.Iterator[ tuple[ type[intervals._BaseInterval], dict[intervals._BaseInterval, pd.Timestamp], + dict[intervals._BaseInterval, pd.Timestamp], ] ]: - """BaseInterval and corresponding BASE_LIMITS for a PricesMock class. + """BaseInterval and corresponding left and right limits for a PricesMock class. Defines only daily interval. """ @@ -971,7 +1024,12 @@ def prices_mock_base_intervals_daily_only( "BaseInterval", dict(D1=intervals.TIMEDELTA_ARGS["D1"]) ) LIMITS = {BaseInterval.D1: daily_limit} - yield BaseInterval, LIMITS + + RIGHT_LIMITS = { + BaseInterval.D1: right_limits[1], + } + + yield BaseInterval, LIMITS, RIGHT_LIMITS @pytest.fixture @@ -979,7 +1037,7 @@ def PricesMock( PricesMockEmpty, prices_mock_base_intervals ) -> abc.Iterator[type[m.PricesBase]]: """Mock PricesBase class with both intraday and daily intervals.""" - base_interval, limits = prices_mock_base_intervals + base_interval, limits, _, _ = prices_mock_base_intervals class PricesMock_(PricesMockEmpty): """Mock PricesBase class with both intraday and daily intervals.""" @@ -996,7 +1054,7 @@ def PricesMockIntradayOnly( PricesMockEmpty, prices_mock_base_intervals_intraday_only ) -> abc.Iterator[type[m.PricesBase]]: """Mock PricesBase class with only intraday intervals.""" - base_interval, limits = prices_mock_base_intervals_intraday_only + base_interval, limits, _, _ = prices_mock_base_intervals_intraday_only class PricesMockIntradayOnly_(PricesMockEmpty): # type: ignore[valid-type, misc] """Mock PricesBase class with only intraday intervals.""" @@ -1013,7 +1071,7 @@ def PricesMockDailyOnly( PricesMockEmpty, prices_mock_base_intervals_daily_only ) -> abc.Iterator[type[m.PricesBase]]: """Mock PricesBase with only a daily interval.""" - base_interval, limits = prices_mock_base_intervals_daily_only + base_interval, limits, _ = prices_mock_base_intervals_daily_only class PricesMockDailyOnly_(PricesMockEmpty): # type: ignore[valid-type, misc] """Mock PricesBase with only a daily interval.""" @@ -1036,6 +1094,60 @@ class PricesMockBreakendPmOrigin_(PricesMock): # type: ignore[valid-type, misc] yield PricesMockBreakendPmOrigin_ +@pytest.fixture +def PricesMockFixedLimits( + PricesMockEmpty, prices_mock_base_intervals +) -> abc.Iterator[type[m.PricesBase]]: + """Mock PricesBase class with fixed limits and both intraday and daily intervals.""" + base_interval, _, limits, limits_right = prices_mock_base_intervals + + class PricesMockFixedLimits_(PricesMockEmpty): + """Mock PricesBase class with both intraday and daily intervals.""" + + # pylint: disable=too-few-public-methods + BaseInterval = base_interval + BASE_LIMITS = limits + BASE_LIMITS_RIGHT = limits_right + + yield PricesMockFixedLimits_ + + +@pytest.fixture +def PricesMockIntradayOnlyFixedLimits( + PricesMockEmpty, prices_mock_base_intervals_intraday_only +) -> abc.Iterator[type[m.PricesBase]]: + """Mock PricesBase class with fixed limits and only intraday intervals.""" + base_interval, _, limits, limits_right = prices_mock_base_intervals_intraday_only + + class PricesMockIntradayOnlyFixedLimits_(PricesMockEmpty): # type: ignore[valid-type, misc] + """Mock PricesBase class with only intraday intervals.""" + + # pylint: disable=too-few-public-methods + BaseInterval = base_interval + BASE_LIMITS = limits + BASE_LIMITS_RIGHT = limits_right + + yield PricesMockIntradayOnlyFixedLimits_ + + +@pytest.fixture +def PricesMockDailyOnlyFixedLimits( + PricesMockEmpty, prices_mock_base_intervals_daily_only +) -> abc.Iterator[type[m.PricesBase]]: + """Mock PricesBase with fixed limits and only a daily interval.""" + base_interval, limits, limits_right = prices_mock_base_intervals_daily_only + + class PricesMockDailyOnlyFixedLimits_(PricesMockEmpty): # type: ignore[valid-type, misc] + """Mock PricesBase with only a daily interval.""" + + # pylint: disable=too-few-public-methods + BaseInterval = base_interval + BASE_LIMITS = limits + BASE_LIMITS_RIGHT = limits_right + + yield PricesMockDailyOnlyFixedLimits_ + + @pytest.fixture(scope="class") def symbols() -> abc.Iterator[list[str]]: """Fictitious symbols for a mock prices class.""" @@ -1083,6 +1195,7 @@ def __init__( delay: pd.Timedelta = pd.Timedelta(0), limit: pd.Timestamp | None = None, ignore_breaks: bool | dict[intervals.BI, bool] = False, + limit_right: pd.Timestamp | None = None, ): """Constructor. @@ -1109,6 +1222,9 @@ def __init__( limit : default: xnys.first_minute Passed to base class constructor `limit` parameter. + limit_right: default: None + Passed to base class constructor `limit_right` parameter. + ignore_breaks : default: False for all intervals Passed to base class constructor `ignore_breaks` parameter. """ @@ -1127,6 +1243,7 @@ def __init__( limit=xnys.first_minute, interval=interval, ignore_breaks=ignore_breaks, + limit_right=limit_right, ) @property @@ -1157,7 +1274,7 @@ def test_base_intervals_and_limits_defined( with pytest.raises(AttributeError, match=match): PricesMockEmpty(symbols, xnys) - base_intervals, limits = prices_mock_base_intervals + base_intervals, limits, _, _ = prices_mock_base_intervals class PricesMockNoLimits(PricesMockEmpty): BaseInterval = base_intervals @@ -1642,7 +1759,10 @@ def test_limits( one_min, monkeypatch, ): - """Test limits properties.""" + """Test limits properties. + + Test default right intrday / daily limit is 'now' / 'today'. + """ def mock_now(tz=None) -> pd.Timestamp: return pd.Timestamp("2022-02-14 21:21:05", tz=tz) @@ -1668,6 +1788,8 @@ def mock_now(tz=None) -> pd.Timestamp: # verify `limit_daily` assert prices.limit_daily == daily_limit + # verify `limit_right_daily` defaults to today + assert prices.limit_right_daily == today # verify `limit_intraday` delta = PricesMock.BASE_LIMITS[PricesMock.BaseInterval.T5] # unaligned at H1 @@ -1680,6 +1802,8 @@ def mock_now(tz=None) -> pd.Timestamp: expected_latest_intraday_limit = max(limits_intraday) assert prices.limit_intraday() == expected_latest_intraday_limit assert prices.limit_intraday(None) == expected_latest_intraday_limit + # verify `limit_right_intraday` + assert prices.limit_right_intraday == now.floor("T") # verify 'limit_sessions' assert len(prices.limits_sessions) == len(PricesMock.BaseInterval) @@ -1705,6 +1829,7 @@ def mock_now(tz=None) -> pd.Timestamp: assert prices.limits[bi_daily] == (limit_daily, today) assert prices.limit_daily == limit_daily + assert prices.limit_right_daily == today for cal in calendars: expected_limit_intraday = cal.minute_to_trading_minute(limit_raw, "next") assert prices.limit_intraday(cal) == expected_limit_intraday @@ -1722,6 +1847,7 @@ def mock_now(tz=None) -> pd.Timestamp: assert not pd.Timedelta(1, "D") in prices.bis assert prices.limit_daily is None + assert prices.limit_right_daily is None # verify None when no daily interval for cal in calendars: expected_limit_intraday = cal.minute_to_trading_minute(limit_raw, "next") assert prices.limit_intraday(cal) == expected_limit_intraday @@ -1729,6 +1855,152 @@ def mock_now(tz=None) -> pd.Timestamp: assert prices.limit_intraday(None) == expected_latest_intraday_limit assert len(prices.limits_sessions) == len(PricesMockIntradayOnly.BaseInterval) + def test_limits_fixed( + self, + PricesMock, + PricesMockFixedLimits, + PricesMockIntradayOnlyFixedLimits, + PricesMockDailyOnlyFixedLimits, + daily_limit, + right_limits, + left_limits, + symbols, + xnys, + xlon, + xhkg, + ): + """Test limit properties when class has fixed left and right limits.""" + right_limit, right_limit_daily = right_limits + + calendars = [xnys, xhkg, xlon] + prices = PricesMockFixedLimits(symbols, calendars) + + # verify `limits`. + assert set(prices.limits.keys()) == set(PricesMockFixedLimits.BaseInterval) + assert len(prices.limits) == len(PricesMockFixedLimits.BaseInterval) + + for bi in PricesMockFixedLimits.BaseInterval: + if bi.is_daily: + assert prices.limits[bi] == (daily_limit, right_limit_daily) + else: + assert prices.limits[bi] == (left_limits[bi], right_limit) + + # verify `limit_daily` and `limit_right_daily` + assert prices.limit_daily == daily_limit + assert prices.limit_right_daily == right_limit_daily + + # verify `limit_intraday` + limit_raw = left_limits[intervals.TDInterval.T5] # unaligned at H1 + limits_intraday = [] + for cal in calendars: + expected_limit_intraday = cal.minute_to_trading_minute(limit_raw, "next") + limits_intraday.append(expected_limit_intraday) + assert prices.limit_intraday(cal) == expected_limit_intraday + + expected_latest_intraday_limit = max(limits_intraday) + assert prices.limit_intraday() == expected_latest_intraday_limit + assert prices.limit_intraday(None) == expected_latest_intraday_limit + + # verify `limit_right_intraday` + assert prices.limit_right_intraday == right_limits[0] + + # verify 'limit_sessions' + assert len(prices.limits_sessions) == len(PricesMock.BaseInterval) + + # from manual inspection: + lefts = { + PricesMock.BaseInterval.T1: pd.Timestamp("2021-09-20"), + PricesMock.BaseInterval.T5: pd.Timestamp("2021-08-23"), # 21 is a Saturday + PricesMock.BaseInterval.H1: pd.Timestamp("2020-10-20"), + PricesMock.BaseInterval.D1: daily_limit, + } + + for bi in PricesMock.BaseInterval: + assert prices.limits_sessions[bi] == (lefts[bi], right_limits[1]) + + prices = PricesMockDailyOnlyFixedLimits(symbols, calendars) + assert set(prices.limits.keys()) == set( + PricesMockDailyOnlyFixedLimits.BaseInterval + ) + assert len(prices.limits) == len(PricesMockDailyOnlyFixedLimits.BaseInterval) + bi_daily = PricesMockDailyOnlyFixedLimits.BaseInterval.D1 + assert prices.limits[bi_daily] == (daily_limit, right_limits[1]) + + assert prices.limit_daily == daily_limit + assert prices.limit_right_daily == right_limits[1] + + match = re.escape( + "`limit_intraday` is not implemented when no intraday interval is defined." + ) + with pytest.raises(NotImplementedError, match=match): + prices.limit_intraday() + + # verify `limit_right_intraday` + match = re.escape( + "`limit_right_intraday` is not implemented when no intraday interval" + " is defined." + ) + with pytest.raises(NotImplementedError, match=match): + prices.limit_right_intraday() + + assert len(prices.limits_sessions) == len( + PricesMockDailyOnlyFixedLimits.BaseInterval + ) + assert prices.limits_sessions[bi_daily] == (daily_limit, right_limits[1]) + + prices = PricesMockIntradayOnlyFixedLimits(symbols, calendars) + + assert set(prices.limits.keys()) == set( + PricesMockIntradayOnlyFixedLimits.BaseInterval + ) + assert len(prices.limits) == len(PricesMockIntradayOnlyFixedLimits.BaseInterval) + assert pd.Timedelta(1, "T") in prices.bis + assert not pd.Timedelta(1, "D") in prices.bis + + assert prices.limit_daily is None + assert prices.limit_right_daily is None # verify None when no daily interval + for cal in calendars: + expected_limit_intraday = cal.minute_to_trading_minute(limit_raw, "next") + assert prices.limit_intraday(cal) == expected_limit_intraday + assert prices.limit_intraday() == expected_latest_intraday_limit + assert prices.limit_intraday(None) == expected_latest_intraday_limit + assert len(prices.limits_sessions) == len( + PricesMockIntradayOnlyFixedLimits.BaseInterval + ) + + def test_live_prices( + self, + PricesMock, + PricesMockDailyOnly, + PricesMockIntradayOnly, + PricesMockFixedLimits, + PricesMockIntradayOnlyFixedLimits, + PricesMockDailyOnlyFixedLimits, + symbols, + xnys, + xlon, + xhkg, + ): + """Test `live_prices` property.""" + + calendars = [xnys, xhkg, xlon] + + # verifications against manual inspection of calendars' schedules. + + prices = PricesMock(symbols, calendars) + assert prices.live_prices + prices = PricesMockDailyOnly(symbols, calendars) + assert prices.live_prices + prices = PricesMockIntradayOnly(symbols, calendars) + assert prices.live_prices + + prices = PricesMockFixedLimits(symbols, calendars) + assert not prices.live_prices + prices = PricesMockDailyOnlyFixedLimits(symbols, calendars) + assert not prices.live_prices + prices = PricesMockIntradayOnlyFixedLimits(symbols, calendars) + assert not prices.live_prices + def test_earliest( self, PricesMock, @@ -1799,7 +2071,7 @@ def mock_now(tz=None) -> pd.Timestamp: expected_minute = xnys.session_open("2021-12-17") assert prices.earliest_requestable_minute == expected_minute - def test_last_requestable( + def test_last_requestable_session_( self, PricesMock, symbols, xnys, xlon, xhkg, one_min, monkeypatch ): """Test `last_requestable_session*` methods. @@ -1831,6 +2103,43 @@ def patch_now(ts: pd.Timestamp): patch_now(xhkg_open) assert prices.last_requestable_session_any == prev_session + def test_latest_requestable_minute( + self, + PricesMock, + PricesMockFixedLimits, + PricesMockIntradayOnlyFixedLimits, + PricesMockDailyOnlyFixedLimits, + symbols, + right_limits, + xnys, + xlon, + xhkg, + monkeypatch, + ): + """Test `latest_requestable_minute` property.""" + + def mock_now(tz=None) -> pd.Timestamp: + return pd.Timestamp("2022-02-14 21:21:05", tz=tz) + + monkeypatch.setattr("pandas.Timestamp.now", mock_now) + calendars = [xnys, xhkg, xlon] + + now = mock_now(tz="UTC") + + # verifications against manual inspection of calendars' schedules. + + prices = PricesMock(symbols, calendars) + assert prices.latest_requestable_minute == now.floor("min") + + prices = PricesMockDailyOnlyFixedLimits(symbols, calendars) + assert prices.latest_requestable_minute == xnys.closes[right_limits[1]] + + prices = PricesMockIntradayOnlyFixedLimits(symbols, calendars) + assert prices.latest_requestable_minute == right_limits[0] + + prices = PricesMockFixedLimits(symbols, calendars) + assert prices.latest_requestable_minute == xnys.closes[right_limits[1]] + def test__indices_aligned( self, PricesMock, @@ -2375,6 +2684,60 @@ def test__minute_to_latest_next_trading_minute(PricesMock, cal_start, side, one_ assert f(minute) == xnys_next_session_open +def test__minute_to_earliest_previous_trading_minute( + PricesMock, cal_start, side, one_min +): + """Test `_minute_to_latest_next_trading_minute`.""" + xnys = xcals.get_calendar("XNYS", start=cal_start, side=side) + xlon = xcals.get_calendar("XLON", start=cal_start, side=side) + xhkg = xcals.get_calendar("XHKG", start=cal_start, side=side) + + symbols = "LON, NY, HK" + prices = PricesMock(symbols, [xlon, xnys, xhkg]) + f = prices._minute_to_earliest_previous_trading_minute + + # two consecutive sessions for all calendars (from knowledge of schedule) + session = pd.Timestamp("2021-12-22") + next_session = pd.Timestamp("2021-12-23") + + # verify from xnys close to xnys open of next session + xhkg_next_session_last_min = xhkg.last_minutes[next_session] + for minute in ( + xnys.closes[next_session], + xlon.closes[next_session], + xnys.opens[next_session], + ): + assert f(minute) == xhkg_next_session_last_min + assert f(xnys.opens[next_session] - one_min) != xhkg_next_session_last_min + + # verify from xnys open to xlon open of next session + xnys_session_last_min = xnys.last_minutes[session] + for minute in ( + xnys.opens[next_session] - one_min, + xlon.opens[next_session], + ): + assert f(minute) == xnys_session_last_min + assert f(xlon.opens[next_session] - one_min) != xhkg_next_session_last_min + + # verify from xlon open to xhkg open of next session + xlon_session_last_min = xlon.last_minutes[session] + for minute in ( + xlon.opens[next_session] - one_min, + xhkg.opens[next_session], + ): + assert f(minute) == xlon_session_last_min + assert f(xhkg.opens[next_session] - one_min) != xlon_session_last_min + + # verify from xhkg open of next session to xnys open of session + xhkg_session_last_min = xhkg.last_minutes[session] + for minute in ( + xhkg.opens[next_session] - one_min, + xnys.opens[session], + ): + assert f(minute) == xhkg_session_last_min + assert f(xnys.opens[session] - one_min) != xhkg_session_last_min + + def test__get_trading_index( PricesMock, PricesMockBreakendPmOrigin, @@ -2433,7 +2796,7 @@ def expected_index( class TestBis: - """Tests methods and properties that return base interval/s.""" + """Test methods and properties that return base interval/s.""" _now = pd.Timestamp("2022", tz=UTC) @@ -2527,12 +2890,53 @@ def pp_raw(self) -> str: @property def request_all_available_data(self) -> bool: - return True + return False self._gpp = GetPricesParamsMock(drg, drg, ds_interval, anchor) yield PricesMockBis_ + @pytest.fixture + def right_limits(self) -> abc.Iterator[tuple[pd.Timestamp, pd.Timestamp]]: + """Use for an intrday / daily interval with a fixed right limit.""" + yield pd.Timestamp("2021-12-20 18:22", tz=UTC), pd.Timestamp("2021-12-20") + + @pytest.fixture + def PricesRightLimitMockBis( + self, PricesMockBis, right_limits + ) -> abc.Iterator[type[m.PricesBase]]: + right_limit_intraday, right_limit_daily = right_limits + + class PricesRightLimitMockBis_(PricesMockBis): # type: ignore[valid-type, misc] + """As PricesMockBis with right limit defined earlier than now.""" + + BaseInterval = intervals._BaseInterval( + "BaseInterval", + dict( + T1=intervals.TIMEDELTA_ARGS["T1"], + T2=intervals.TIMEDELTA_ARGS["T2"], + T5=intervals.TIMEDELTA_ARGS["T5"], + T10=intervals.TIMEDELTA_ARGS["T10"], + T15=intervals.TIMEDELTA_ARGS["T15"], + T30=intervals.TIMEDELTA_ARGS["T30"], + H1=intervals.TIMEDELTA_ARGS["H1"], + D1=intervals.TIMEDELTA_ARGS["D1"], + ), + ) + + BASE_LIMITS_RIGHT = { + BaseInterval.T1: right_limit_intraday, + BaseInterval.T2: right_limit_intraday, + BaseInterval.T5: right_limit_intraday, + BaseInterval.T10: right_limit_intraday, + BaseInterval.T15: right_limit_intraday, + BaseInterval.T30: right_limit_intraday, + BaseInterval.H1: right_limit_intraday, + BaseInterval.D1: right_limit_daily, + } + + yield PricesRightLimitMockBis_ + @staticmethod def get_start_end_sessions( cc: calutils.CompositeCalendar, start: pd.Timestamp, end: pd.Timestamp @@ -2587,7 +2991,7 @@ def get_mock_drg_limit_available( Frame: set end of datearange to now. delta_end - start of daterange will be displaced by `delta_end` mintues. + end of daterange will be displaced by `delta_end` mintues. """ start = prices.BASE_LIMITS[bi] assert isinstance(start, pd.Timestamp) @@ -2595,6 +2999,40 @@ def get_mock_drg_limit_available( start += pd.Timedelta(delta, "T") return self.get_mock_drg(GetterMock, prices.cc, start, end) + def get_mock_drg_limit_right_available( + self, + prices: m.PricesBase, + GetterMock: type[daterange.GetterIntraday], + bi: intervals.BI, + delta: int = 0, + limit_start: bool = False, + delta_start: int = 0, + ) -> daterange.GetterIntraday: + """Return drg representing right limit of availability at `bi`. + + Parameters + ---------- + bi + drg will be set to reflect a daterange that covers period that + data is availabile at `bi`. + + delta + start of daterange will be displaced by `delta` mintues. + + limit_start + True: set start of daterange as end (unadjusted for `delta`) + Frame: set start of datearange to left limit for `bi`. + + delta_end + end of daterange will be displaced by `delta_end` mintues. + """ + end = prices.BASE_LIMITS_RIGHT[bi] + start = end if limit_start else prices.BASE_LIMITS[bi] + assert isinstance(start, pd.Timestamp) + start += pd.Timedelta(delta_start, "T") + end += pd.Timedelta(delta, "T") + return self.get_mock_drg(GetterMock, prices.cc, start, end) + def get_drg( self, calendar: xcals.ExchangeCalendar, @@ -2640,7 +3078,7 @@ def set_prices_gpp_drg_properties( prices.gpp.drg_intraday = drg prices.gpp.drg_intraday_no_limit = drg - def test__bis_valid(self, PricesMockBis, GetterMock, symbols, xlon, xnys): + def test__bis_valid(self, PricesMockBis, GetterMock, symbols, xlon, xnys, one_min): """Test `_bis_valid`.""" # pylint: disable=too-complex prices = PricesMockBis(symbols, [xlon, xnys]) @@ -2733,8 +3171,15 @@ def get_drg(pp: dict) -> daterange.GetterIntraday: prices.gpp.drg_intraday = drg assert prices._bis_valid == prices.bis_intraday[:-3] - def test__bis_available(self, PricesMockBis, GetterMock, symbols, xlon, xnys): - """Test `_bis_available_all` and `_bis_available_end`.""" + def test__bis_available( + self, + PricesMockBis, + GetterMock, + symbols, + xlon, + xnys, + ): + """Test `_bis_available_all`, `_bis_available_end` and `_bis_available_any`.""" prices = PricesMockBis(symbols, [xlon, xnys]) get_drg_args = (prices, GetterMock) @@ -2748,11 +3193,17 @@ def bis_available_end(interval: int, drg) -> list[intervals.BI]: self.set_prices_gpp_drg_properties(prices, drg) return prices._bis_available_end + def bis_available_any(interval: int, drg) -> list[intervals.BI]: + prices.gpp.ds_interval = intervals.to_ptinterval(str(interval) + "T") + self.set_prices_gpp_drg_properties(prices, drg) + return prices._bis_available_any + for i, bi in enumerate(prices.bis_intraday[:-1]): # start at limit for bi, end now drg = self.get_mock_drg_limit_available(*get_drg_args, bi) assert bis_available_all(30, drg) == prices.bis_intraday[i:-1] assert bis_available_end(30, drg) == prices.bis_intraday[:-1] + assert bis_available_any(30, drg) == prices.bis_intraday[:-1] assert bis_available_all(bi.as_minutes, drg) == [bi] @@ -2760,6 +3211,7 @@ def bis_available_end(interval: int, drg) -> list[intervals.BI]: drg = self.get_mock_drg_limit_available(*get_drg_args, bi, -1) assert bis_available_all(30, drg) == prices.bis_intraday[i + 1 : -1] assert bis_available_end(30, drg) == prices.bis_intraday[:-1] + assert bis_available_any(30, drg) == prices.bis_intraday[:-1] assert bis_available_all(bi.as_minutes, drg) == [] @@ -2767,8 +3219,11 @@ def bis_available_end(interval: int, drg) -> list[intervals.BI]: drg = self.get_mock_drg_limit_available(*get_drg_args, bi, limit_end=True) assert bis_available_all(30, drg) == prices.bis_intraday[i:-1] assert bis_available_end(30, drg) == prices.bis_intraday[i:-1] + assert bis_available_any(30, drg) == prices.bis_intraday[i:-1] + assert bis_available_all(bi.as_minutes, drg) == [bi] assert bis_available_end(bi.as_minutes, drg) == [bi] + assert bis_available_any(bi.as_minutes, drg) == [bi] # start and end beyond limit for bi match = re.escape( # start of message only @@ -2778,18 +3233,116 @@ def bis_available_end(interval: int, drg) -> list[intervals.BI]: drg = self.get_mock_drg_limit_available( *get_drg_args, bi, -1, limit_end=True, delta_end=-1 ) - assert bis_available_all(30, drg) == prices.bis_intraday[i + 1 : -1] + if bi.as_minutes == 30: # Not even T30 data available to meet + with pytest.raises(errors.PricesIntradayUnavailableError, match=match): + bis_available_all(30, drg) with pytest.raises(errors.PricesIntradayUnavailableError, match=match): bis_available_end(30, drg) + with pytest.raises(errors.PricesIntradayUnavailableError, match=match): + bis_available_any(30, drg) else: assert bis_available_end(30, drg) == prices.bis_intraday[i + 1 : -1] + assert bis_available_any(30, drg) == prices.bis_intraday[i + 1 : -1] + assert bis_available_all(30, drg) == prices.bis_intraday[i + 1 : -1] # Can only be met by this interval or a lower interval, i.e. can only be # met by intervals for which data not available with pytest.raises(errors.PricesIntradayUnavailableError, match=match): bis_available_end(bi.as_minutes, drg) + with pytest.raises(errors.PricesIntradayUnavailableError, match=match): + bis_available_any(bi.as_minutes, drg) + + def test__bis_available_fixed_right( + self, + PricesRightLimitMockBis, + GetterMock, + symbols, + xlon, + xnys, + ): + """Test `_bis_available_all`, `_bis_available_end` and `_bis_available_any`. + + Tests with Prices class that has a fixed right limit. + """ + prices = PricesRightLimitMockBis(symbols, [xlon, xnys]) + get_drg_args = (prices, GetterMock) + + def bis_available_all(interval: int, drg) -> list[intervals.BI]: + prices.gpp.ds_interval = intervals.to_ptinterval(str(interval) + "T") + self.set_prices_gpp_drg_properties(prices, drg) + return prices._bis_available_all + + def bis_available_end(interval: int, drg) -> list[intervals.BI]: + prices.gpp.ds_interval = intervals.to_ptinterval(str(interval) + "T") + self.set_prices_gpp_drg_properties(prices, drg) + return prices._bis_available_end + + def bis_available_any(interval: int, drg) -> list[intervals.BI]: + prices.gpp.ds_interval = intervals.to_ptinterval(str(interval) + "T") + self.set_prices_gpp_drg_properties(prices, drg) + return prices._bis_available_any + + for i, bi in enumerate(prices.bis_intraday[:-1]): + # start at left limit, end at right limit for bi + drg = self.get_mock_drg_limit_right_available(*get_drg_args, bi) + assert bis_available_all(30, drg) == prices.bis_intraday[i:-1] + assert bis_available_end(30, drg) == prices.bis_intraday[:-1] + assert bis_available_any(30, drg) == prices.bis_intraday[:-1] + + assert bis_available_all(bi.as_minutes, drg) == [bi] + + # start and end at right limit for bi + drg = self.get_mock_drg_limit_right_available( + *get_drg_args, bi, limit_start=True + ) + assert bis_available_all(30, drg) == prices.bis_intraday[:-1] + assert bis_available_end(30, drg) == prices.bis_intraday[:-1] + assert bis_available_any(30, drg) == prices.bis_intraday[:-1] + + assert bis_available_all(bi.as_minutes, drg) + assert bis_available_end(bi.as_minutes, drg) + assert bis_available_any(bi.as_minutes, drg) + + # start before left limit for bi, end at right_limit + drg = self.get_mock_drg_limit_right_available( + *get_drg_args, bi, delta_start=-1 + ) + assert bis_available_all(30, drg) == prices.bis_intraday[i + 1 : -1] + assert bis_available_end(30, drg) == prices.bis_intraday[:-1] + assert bis_available_any(30, drg) == prices.bis_intraday[:-1] + + assert bis_available_all(bi.as_minutes, drg) == [] + + # start at left limit for bi, end after right_limit + drg = self.get_mock_drg_limit_right_available(*get_drg_args, bi, 1) + assert bis_available_all(30, drg) == [] + assert bis_available_end(30, drg) == [] + assert bis_available_any(30, drg) == prices.bis_intraday[:-1] + + # start before left limit for bi, end after right_limit + drg = self.get_mock_drg_limit_right_available( + *get_drg_args, bi, 1, delta_start=-1 + ) + assert bis_available_all(30, drg) == [] + assert bis_available_end(30, drg) == [] + assert bis_available_any(30, drg) == prices.bis_intraday[:-1] + + # start and end beyond limit for bi + match = re.escape( # start of message only + "The start of the requested period is later than the latest" + " timestamp at which intraday data is available for any base interval." + ) + drg = self.get_mock_drg_limit_right_available( + *get_drg_args, bi, 1, limit_start=True, delta_start=1 + ) + with pytest.raises(errors.PricesIntradayUnavailableError, match=match): + bis_available_all(30, drg) + with pytest.raises(errors.PricesIntradayUnavailableError, match=match): + bis_available_end(30, drg) + with pytest.raises(errors.PricesIntradayUnavailableError, match=match): + bis_available_any(30, drg) def test_bis_stored_methods(self, PricesMockBis, GetterMock, symbols, xlon, xnys): """Tests `_bis_stored` and `_get_stored_bi_from_bis`.""" @@ -3296,6 +3849,7 @@ def assert_drg_daily_properties( assert not gpp.intraday_duration assert gpp.duration assert not gpp.request_earliest_available_data + assert not gpp.request_all_available_data # assert parameters being passed through to drg. drg = gpp.drg_intraday @@ -3353,6 +3907,7 @@ def assert_drg_daily_properties( assert gpp.intraday_duration assert gpp.duration assert not gpp.request_earliest_available_data + assert not gpp.request_all_available_data drg = gpp.drg_intraday assert_drg_intraday_properties(drg, gpp, strict, ds_interval) @@ -3377,3 +3932,11 @@ def assert_drg_daily_properties( assert not gpp.intraday_duration assert not gpp.duration assert gpp.request_earliest_available_data + assert not gpp.request_all_available_data + + # alternative parameters just to verify request_all_available_data True + pp = get_pp() + gpp = f(prices, pp, ds_interval, lead_symbol, anchor, openend, strict, priority) + assert not gpp.intraday_duration + assert not gpp.duration + assert gpp.request_all_available_data diff --git a/tests/test_base_prices.py b/tests/test_base_prices.py index efd9631..4c9c22e 100644 --- a/tests/test_base_prices.py +++ b/tests/test_base_prices.py @@ -29,7 +29,7 @@ import valimp import market_prices.prices.base as m -from market_prices import errors, helpers, intervals, mptypes, pt +from market_prices import errors, helpers, intervals, mptypes, pt, data from market_prices.helpers import UTC from market_prices.intervals import TDInterval, DOInterval from market_prices.mptypes import Anchor, OpenEnd, Priority @@ -143,18 +143,23 @@ def __init__( prices_tables: dict[str, pd.DataFrame], lead_symbol: str | None = None, recon_symbols: bool = True, + calendars: list[xcals.ExchangeCalendar] + | dict[str, xcals.ExchangeCalendar] + | None = None, + delays: list[int] | dict[str, int] | None = None, ): self._prices_tables = prices_tables symbols = helpers.symbols_to_list(symbols) if recon_symbols: # verify that prices_tables are for symbols assert set(prices_tables["T1"].pt.symbols) == set(symbols) - if getattr(self.BaseInterval, "D1", False): earliest = min(TST_SYMBOLS[symbol].earliest_date for symbol in symbols) self._update_base_limits({self.BaseInterval.D1: earliest}) - calendars = [TST_SYMBOLS[symbol].calendar for symbol in symbols] - delays = [TST_SYMBOLS[symbol].delay for symbol in symbols] + if calendars is None: + calendars = [TST_SYMBOLS[symbol].calendar for symbol in symbols] + if delays is None: + delays = [TST_SYMBOLS[symbol].delay for symbol in symbols] super().__init__(symbols, calendars, lead_symbol, delays) def _request_data( @@ -177,8 +182,13 @@ def _request_data( end -= helpers.ONE_SEC return df.loc[start:end].copy() - def prices_for_symbols(self, symbols: str): - raise NotImplementedError() + def _get_class_instance(self, symbols: list[str], **kwargs) -> "PricesBaseTst": + """Return an instance of the prices class with the same parameters as self.""" + diff = list(set(self.symbols) - set(symbols)) + tables = { + bi: df.drop(columns=diff, level=0) for bi, df in self._prices_tables.items() + } + return super()._get_class_instance(symbols, prices_tables=tables, **kwargs) class PricesBaseIntradayTst(PricesBaseTst): @@ -6156,3 +6166,106 @@ def test_it(kwargs: dict, pt_type: pt._PT | None = None, to_now: bool = False): # verify for no arguments test_it({}, to_now=True) + + +def test_prices_for_symbols(prices_us_lon): + """Verify `prices_for_symbols`. + + Notes + ----- + H1 interval not tested as not synchronised for xnys/xlon calendars. + """ + prices = prices_us_lon + symbols = prices.symbols + f = prices.prices_for_symbols + + for s, cal in prices.calendars.items(): + if cal.name == "XNYS": + symb_us = s + elif cal.name == "XLON": + symb_lon = s + + _ = prices.get("1d", start="2021-12-31", end="2022-01-05") + + # set up inraday data as period within a single session during which + # us and lon calendars overlap (from inspection of calendar schedules). + cal_us = prices.calendars[symb_us] + cal_lon = prices.calendars[symb_lon] + + session = pd.Timestamp("2022-06-08") + us_open = cal_us.opens[session] + lon_close = cal_lon.closes[session] + assert us_open + pd.Timedelta(1, "H") < lon_close # verify overlap > one hour + start = us_open - pd.Timedelta(2, "H") + end = lon_close + pd.Timedelta(2, "H") + + _ = prices.get("5T", start, us_open, lead_symbol="AZN.L") + _ = prices.get("2T", start, us_open, lead_symbol="AZN.L") + _ = prices.get("1T", start, us_open, lead_symbol="AZN.L") + _ = prices.get("5T", us_open, end) + _ = prices.get("2T", us_open, end) + _ = prices.get("1T", us_open, end) + + def assertions( + pdata: data.Data, + symb: str, + interval: intervals.BI, + expect_missing: bool = True, + ): + orig = prices._pdata[interval] + assert pdata.ranges == orig.ranges + + orig_table = orig._table[symb] + if expect_missing: + # Assert that at least one row of original data should be missing + # from new table + assert orig_table.isna().all(axis=1).any() + + table = pdata._table + assert table is not None + assert table.pt.symbols == [symb] + assert table.notna().all(axis=1).all() + assert_frame_equal(table.droplevel(0, axis=1), orig_table.dropna()) + + # Verify prices for us symb only + us = f(symb_us) + assert us.symbols == [symb_us] + assert us.calendars_unique == [prices.calendars[symb_us]] + + interval = us.BaseInterval.D1 + pdata = us._pdata[interval] + assertions(pdata, symb_us, interval, expect_missing=False) + for interval in us.BaseInterval[:-2]: + pdata = us._pdata[interval] + assert pdata._table.pt.first_ts == us_open + assertions(pdata, symb_us, interval) + + # Verify prices for lon symb only + lon = f(symb_lon) + assert lon.symbols == [symb_lon] + assert lon.calendars_unique == [prices.calendars[symb_lon]] + + for interval in lon.BaseInterval[:-2]: + if interval == intervals.TDInterval.H1: + continue + pdata = lon._pdata[interval] + assert pdata._table.pt.last_ts == lon_close + assertions(pdata, symb_lon, interval) + + # Verify prices when symbols as original + both = f(prices.symbols) + assert both.symbols == prices.symbols + assert both.calendars_unique == prices.calendars_unique + + for interval in both.BaseInterval: + if interval == intervals.TDInterval.H1: + continue + pdata = both._pdata[interval] + table = pdata._table + orig = prices._pdata[interval] + assert pdata.ranges == orig.ranges + assert table.pt.symbols == symbols + assert not table.isna().all(axis=1).any() + # verify columns same length an order identically to compare + assert len(table.columns) == len(orig._table.columns) + assert_frame_equal(table[orig._table.columns], orig._table) diff --git a/tests/test_csv.py b/tests/test_csv.py index b7a0be1..79ba558 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -22,7 +22,7 @@ RESOURCES_PATH, get_resource_pbt, ) - + @pytest.fixture def csv_dir() -> abc.Iterator[Path]: @@ -751,12 +751,10 @@ def res_us_lon_hk() -> abc.Iterator[tuple[dict[str, pd.DataFrame], pd.Timestamp] yield get_resource_pbt("us_lon_hk") -# TODO revise test to look at wherever the tables data ends up def test_tables(csv_dir, symbols, calendars, res_us_lon_hk): with pytest.warns(m.PricesCsvParsingConsolidatedWarning): prices = m.PricesCsv(csv_dir, symbols, calendars) - # TODO revise test to look at wherever the tables data ends up for interval, pdata in prices._pdata.items(): table = pdata._table res = res_us_lon_hk[0][interval.as_pdfreq[-1::-1]] # just reversed freq str @@ -768,3 +766,26 @@ def test_tables(csv_dir, symbols, calendars, res_us_lon_hk): expected = res.loc[table.index.left[0] : table.index.left[-1]] for symbol in symbols: pd.testing.assert_frame_equal(table[symbol], expected[symbol]) + + +def test_prices_for_symbol(csv_dir, symbols, calendars): + """Simple verification.""" + with pytest.warns(m.PricesCsvParsingConsolidatedWarning): + prices = m.PricesCsv(csv_dir, symbols, calendars) + + assert_frame_equal = pd.testing.assert_frame_equal + + kwargs_daily = dict(days=20) + daily_df = prices.get(**kwargs_daily) + kwargs_intraday = dict(minutes=1111, end=prices.limit_right_intraday) + intraday_df = prices.get(**kwargs_intraday) + + new = prices.prices_for_symbols("MSFT AZN.L") + + daily_df_new = new.get(**kwargs_daily) + intraday_df_new = new.get(**kwargs_intraday) + + assert_frame_equal(daily_df.drop(columns="9988.HK", level=0), daily_df_new) + expected = intraday_df.drop(columns="9988.HK", level=0) + expected = expected.dropna(axis=0, how="all") + assert_frame_equal(expected, intraday_df_new) diff --git a/tests/test_data.py b/tests/test_data.py index 7dfbbc4..9145aeb 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -154,7 +154,7 @@ def assert_ts_not_available(data: m.Data, tss: pd.Timestamp | list[pd.Timestamp] def assert_rng_available_unknown(data: m.Data, start: pd.Timestamp, end: pd.Timestamp): - """Test that unkonwn if data is available over a range.""" + """Test that unknown if data is available over a range.""" delta = get_delta(start) tss = [start, start + delta, end - delta, end] for ts in tss: @@ -164,6 +164,7 @@ def assert_rng_available_unknown(data: m.Data, start: pd.Timestamp, end: pd.Time rng = (tss[0], tss[-1]) assert data.available_range(rng) is None + assert data.available_any(rng) is None assert not data.requested_range(rng) @@ -180,6 +181,7 @@ def assert_rng_available_not_requested( rng = (tss[0], tss[-1]) assert data.available_range(rng) + assert data.available_any(rng) assert not data.requested_range(rng) @@ -199,9 +201,34 @@ def assert_rng_available_requested( rng = (tss[0], tss[-1]) assert data.available_range(rng) + assert data.available_any(rng) assert data.requested_range(rng) +def verify_available_any( + data: m.Data, + left_limit: pd.Timestamp, + right_limit: pd.Timestamp, +): + """Tests `available_any`.""" + delta = helpers.ONE_MIN + assert data.available_any((left_limit, right_limit)) + loll = left_limit - delta + rorl = right_limit + delta + assert data.available_any((loll, left_limit)) + assert data.available_any((right_limit, rorl)) + assert not data.available_any((loll - delta, loll)) + assert not data.available_any((rorl, rorl + delta)) + + delta = helpers.ONE_DAY + loll = left_limit - delta + rorl = right_limit + delta + assert data.available_any((loll, left_limit)) + assert data.available_any((right_limit, rorl)) + assert not data.available_any((loll - delta, loll)) + assert not data.available_any((rorl, rorl + delta)) + + @pytest.mark.parametrize("bi", [TDInterval.T1, TDInterval.H1, TDInterval.D1]) def test_pre_requests( mr_admin, @@ -278,6 +305,7 @@ def get_pool_value(idx: int) -> pd.Timestamp: assert_rng_available_not_requested(data, left_limit, right_limit) assert_ts_not_available(data, [left_limit - delta, right_limit + delta]) + verify_available_any(data, left_limit, right_limit) def assert_table_matches( diff --git a/tests/test_daterange.py b/tests/test_daterange.py index 2a4c1a5..327fc8d 100644 --- a/tests/test_daterange.py +++ b/tests/test_daterange.py @@ -110,36 +110,41 @@ def get_drg( limit = calendar.first_session return m.GetterDaily(calendar, limit, pp, ds_interval, strict, limit_right) - def test_constructor_properties(self, xlon_calendar, pp_default): + def test_constructor_properties(self, xlon_calendar_extended, pp_default, today): """Test properties that expose constructor parameters.""" - cal = xlon_calendar + cal = xlon_calendar_extended limit = cal.first_session # required arguments only, options as default for drg in ( m.GetterDaily(cal, limit, pp_default), # options as default # explicitly pass arguments as default values - m.GetterDaily(cal, limit, pp_default, None, True), + m.GetterDaily(cal, limit, pp_default, None, True, None), ): assert drg.cal == cal assert drg.pp == pp_default assert drg.limit == limit + assert drg.limit_right is None assert drg.interval is TDInterval.D1 assert drg.ds_interval is None assert drg.strict + assert drg.end_limit == (today, today) - pp = pp_default["start"] = cal.minutes[333] - limit = cal.minutes[222] + pp = pp_default["start"] = cal.sessions[33] + limit = cal.sessions[22] + limit_right = cal.sessions[44] ds_interval = TDInterval.D5 strict = False - drg = self.get_drg(cal, pp, limit, ds_interval, strict) + drg = self.get_drg(cal, pp, limit, ds_interval, strict, limit_right) assert drg.pp == pp assert drg.limit == limit + assert drg.limit_right == limit_right assert drg.interval is TDInterval.D1 assert drg.ds_interval is ds_interval assert drg.strict == strict + assert drg.end_limit == (limit_right, limit_right) def test_intervals(self, xlon_calendar, pp_default): """Test interval, ds_interval and final_interval properties.""" @@ -177,7 +182,7 @@ def test_end_now(self, calendars_with_answers, monkeypatch, pp_default): session = answers.date_to_session(non_session, "previous") assert drg.end_now == (session, session) - def test_get_start(self, calendars_with_answers, pp_default): + def test_get_start(self, calendars_with_answers, pp_default, one_day): """Test `get_start`. Notes @@ -201,10 +206,26 @@ def test_get_start(self, calendars_with_answers, pp_default): limit = ans.sessions[len(ans.sessions) // 2] too_early = ans.get_prev_session(limit) - drg = self.get_drg(cal, pp_default, limit=limit, strict=False) + limit_right = ans.sessions[int(len(ans.sessions) * (3 / 4))] + too_late = ans.get_next_session(limit_right) + drg = self.get_drg( + cal, pp_default, limit=limit, strict=False, limit_right=limit_right + ) assert drg.get_start(too_early) == limit + assert drg.get_start(too_early, limit=True) == limit + assert drg.get_start(too_early, limit=False) == too_early + oob = cal.first_session - one_day + assert drg.get_start(oob, limit=True) == limit - drg = self.get_drg(cal, pp_default, limit=limit, strict=True) + # should raise regardless of strict + with pytest.raises(errors.StartTooLateError): + drg.get_start(too_late, limit=True) + with pytest.raises(errors.StartTooLateError): + drg.get_start(too_late, limit=False) # limit should have no effect + + drg = self.get_drg( + cal, pp_default, limit=limit, strict=True, limit_right=limit_right + ) match = re.escape( f"Prices unavailable as start evaluates to {helpers.fts(too_early)} which" " is earlier than the earliest session for which price data is available." @@ -213,6 +234,20 @@ def test_get_start(self, calendars_with_answers, pp_default): ) with pytest.raises(errors.StartTooEarlyError, match=match): _ = drg.get_start(too_early) + with pytest.raises(errors.StartTooEarlyError, match=match): + _ = drg.get_start(too_early, limit=True) + with pytest.raises(errors.StartTooEarlyError, match=match): + drg.get_start(too_early, limit=False) # NB limit ignored if strict is True + + with pytest.raises(errors.StartOutOfBoundsError): + _ = drg.get_start(oob, limit=False) + with pytest.raises(errors.StartOutOfBoundsError): + _ = drg.get_start(oob, limit=True) # limit should have no effect as strict + + with pytest.raises(errors.StartTooLateError): + drg.get_start(too_late, limit=True) + with pytest.raises(errors.StartTooLateError): + drg.get_start(too_late, limit=False) # limit should have no effect def test_get_end(self, calendars_with_answers, pp_default, one_day): """Test `get_end`. @@ -238,6 +273,7 @@ def test_get_end(self, calendars_with_answers, pp_default, one_day): limit = ans.sessions[len(ans.sessions) // 2] too_early = cal.date_to_session(limit - one_day, "previous") + match = re.escape( f"Prices unavailable as end evaluates to {helpers.fts(too_early)} which is" " earlier than the earliest session for which price data is available. The" @@ -245,8 +281,52 @@ def test_get_end(self, calendars_with_answers, pp_default, one_day): ) for strict in [True, False]: drg = self.get_drg(cal, pp_default, limit=limit, strict=strict) - with pytest.raises(errors.EndTooEarlyError, match=match): - _ = drg.get_end(too_early) + for limit_, strict_ in itertools.product([True, False], [True, False]): + with pytest.raises(errors.EndTooEarlyError, match=match): + _ = drg.get_end(too_early, limit=limit_, strict=strict_) + + limit_right = session = ans.sessions[int(len(ans.sessions) * (3 / 4))] + too_late = ans.get_next_session(limit_right) + + drg = self.get_drg( + cal, pp_default, limit=limit, strict=False, limit_right=limit_right + ) + for limit_, strict_ in itertools.product([True, False], [True, False]): + assert drg.get_end(session, limit=limit_, strict=strict_) == ( + session, + session, + ) + + assert drg.get_end(too_late, limit=True) == (limit_right, limit_right) + assert drg.get_end(too_late, limit=False) == (too_late, too_late) + assert drg.get_end(too_late, limit=True, strict=False) == ( + limit_right, + limit_right, + ) + assert drg.get_end(too_late, limit=False, strict=False) == (too_late, too_late) + for limit_ in (True, False): + with pytest.raises(errors.EndTooLateError): + drg.get_end(too_late, limit=limit_, strict=True) # limit has no effect + + # strict True + drg = self.get_drg( + cal, pp_default, limit=limit, strict=True, limit_right=limit_right + ) + for limit_, strict_ in itertools.product([True, False], [True, False]): + assert drg.get_end(session, limit=limit_, strict=strict_) == ( # on limit + session, + session, + ) + + for limit_ in [True, False]: + with pytest.raises(errors.EndTooLateError): + drg.get_end(too_late, limit=limit_) + with pytest.raises(errors.EndTooLateError): + drg.get_end(too_late, limit=limit_, strict=True) # limit has no effect + + expected = (limit_right, limit_right) + assert drg.get_end(too_late, limit=True, strict=False) == expected + assert drg.get_end(too_late, limit=False, strict=False) == (too_late, too_late) def test_get_end_none(self, calendars_extended, pp_default): """Test `get_end` with None input.""" @@ -254,6 +334,9 @@ def test_get_end_none(self, calendars_extended, pp_default): drg = self.get_drg(cal, pp_default) today = get_today(cal) assert drg.get_end(None) == (today, today) + limit_right = today - cal.day * 10 + drg = self.get_drg(cal, pp_default, limit_right=limit_right) + assert drg.get_end(None) == (limit_right, limit_right) def verify_add_a_row(self, cal, ans, ds_interval, start, end, pp): """Test 'add_a_row' as True.""" @@ -975,9 +1058,11 @@ def get_drg( limit_right, ) - def test_constructor_properties(self, xlon_calendar, pp_default, one_min): + def test_constructor_properties( + self, xlon_calendar_extended, pp_default, one_min, today + ): """Test properties that expose constructor parameters.""" - cal = xlon_calendar + cal = xlon_calendar_extended cc = calutils.CompositeCalendar([cal]) delay = pd.Timedelta(0) limit = cal.first_minute @@ -1000,6 +1085,7 @@ def test_constructor_properties(self, xlon_calendar, pp_default, one_min): Anchor.OPEN, Alignment.BI, True, + None, ), ): assert drg.cal == cal @@ -1010,6 +1096,7 @@ def test_constructor_properties(self, xlon_calendar, pp_default, one_min): assert drg.anchor is Anchor.OPEN assert drg.alignment is Alignment.FINAL assert drg.end_alignment is Alignment.BI + assert drg.limit_right is None assert not drg.ignore_breaks with pytest.raises(ValueError, match="`interval` has not been set."): _ = drg.interval is None @@ -1017,6 +1104,8 @@ def test_constructor_properties(self, xlon_calendar, pp_default, one_min): drg.interval = interval assert drg.interval is interval assert drg.ds_factor == 1 + end_limit = cal.closes[today] + assert drg.end_limit == (end_limit, end_limit) pp = pp_default["start"] = cal.minutes[333] cc = calutils.CompositeCalendar([cal]) @@ -1031,6 +1120,8 @@ def test_constructor_properties(self, xlon_calendar, pp_default, one_min): anchor = Anchor.WORKBACK end_alignment = Alignment.FINAL strict = False + # trading minute by inspection... + limit_right = pd.Timestamp("2020-04-29 11:22", tz=helpers.UTC) drg = self.get_drg( cal, @@ -1044,6 +1135,7 @@ def test_constructor_properties(self, xlon_calendar, pp_default, one_min): anchor, end_alignment, strict, + limit_right, ) assert drg.pp == pp @@ -1054,12 +1146,15 @@ def test_constructor_properties(self, xlon_calendar, pp_default, one_min): assert drg.anchor == anchor assert drg.alignment == Alignment.BI # because anchor workback assert drg.end_alignment == end_alignment + assert drg.limit_right == limit_right assert drg.ds_factor == ds_interval // interval assert not drg.ignore_breaks drg.interval = intervals.TDInterval.H1 assert drg.ignore_breaks + exp = pd.Timestamp("2020-04-29 11:00", tz=helpers.UTC) # for H1 data + assert drg.end_limit == (exp, exp) - # verify can pass limit as callable + # verify can pass limits as callables limit_T1 = limit + one_min def mock_limit(interval: intervals.TDInterval) -> pd.Timestamp: @@ -1067,11 +1162,22 @@ def mock_limit(interval: intervals.TDInterval) -> pd.Timestamp: return limit_T1 return limit - drg = m.GetterIntraday(cal, cc, delay, mock_limit, False, pp_default) + limit_T1_right = limit_right - one_min + + def mock_limit_right(interval: intervals.TDInterval) -> pd.Timestamp: + if interval is TDInterval.T1: + return limit_T1_right + return limit_right + + drg = m.GetterIntraday( + cal, cc, delay, mock_limit, False, pp_default, limit_right=mock_limit_right + ) drg.interval = TDInterval.T1 assert drg.limit == limit_T1 + assert drg.limit_right == limit_T1_right drg.interval = TDInterval.T2 assert drg.limit == limit + assert drg.limit_right == limit_right def test_intervals(self, xlon_calendar, pp_default): """Test interval, ds_interval and final_interval properties.""" @@ -1217,6 +1323,10 @@ def test_get_start( ): """Test `get_start`. + Also tests raises when start later than a right limit. NB test to + verify raises when start is too late when end is 'now' is covered + by separate `test_get_start_too_late` test. + Notes ----- Test assumes value passed to `get_start` will be a time that @@ -1314,21 +1424,52 @@ def verify_starts(open_, close, next_open): session = ans.sessions[len(ans.sessions) // 2] limit = ans.opens[session] too_early = ans.closes[ans.get_prev_session(session)] - one_min - drg = self.get_drg(cal, pp, limit=limit, strict=False) + session_limit_right = ans.sessions[int(len(ans.sessions) * (3 / 4))] + limit_right = ans.opens[session_limit_right] + pd.Timedelta(95, "min") + drg = self.get_drg(cal, pp, limit=limit, strict=False, limit_right=limit_right) drg.interval = intervals.BI_ONE_MIN assert drg.get_start(too_early) == limit + assert drg.get_start(too_early, limit=True) == limit + assert drg.get_start(too_early, limit=False) == too_early + + bound = cal.first_minute + assert drg.get_start(bound, limit=False) == bound + oob = cal.first_minute - one_min + assert drg.get_start(oob, limit=True) == limit + assert drg.get_start(oob, limit=False) == limit # limit should have no effect + + assert drg.get_start(limit_right - interval) + with pytest.raises(errors.StartTooLateError): + drg.get_start(limit_right, limit=True) + with pytest.raises(errors.StartTooLateError): + drg.get_start(limit_right, limit=False) # limit should have no effect match = re.escape( f"Prices unavailable as start evaluates to {helpers.fts(too_early)} which is" " earlier than the earliest minute for which price data is available. The" f" earliest minute for which prices are available is {helpers.fts(limit)}." ) - drg = self.get_drg(cal, pp, limit=limit, strict=True) + drg = self.get_drg(cal, pp, limit=limit, strict=True, limit_right=limit_right) drg.interval = intervals.BI_ONE_MIN # assert returns on true limit (earliest value that will evaluate to limit) assert drg.get_start(too_early + one_min) == limit with pytest.raises(errors.StartTooEarlyError, match=match): _ = drg.get_start(too_early) + with pytest.raises(errors.StartTooEarlyError, match=match): + _ = drg.get_start(too_early, limit=True) + with pytest.raises(errors.StartTooEarlyError, match=match): + _ = drg.get_start(too_early, limit=False) # limit will have no effect + + with pytest.raises(errors.StartOutOfBoundsError): + drg.get_start(oob, limit=True) + with pytest.raises(errors.StartOutOfBoundsError): + drg.get_start(oob, limit=False) # limit should have no effect + + assert drg.get_start(limit_right - interval) + with pytest.raises(errors.StartTooLateError): + drg.get_start(limit_right, limit=True) + with pytest.raises(errors.StartTooLateError): + drg.get_start(limit_right, limit=False) # limit should have no effect @hyp.given(ds_interval=stmp.intervals_intraday()) @hyp.example(conftest.base_ds_intervals_dict[TDInterval.T1][0]) @@ -1349,6 +1490,9 @@ def test_get_start_too_late( ): """Test `get_start` raises error when evaluates later than now. + NB does not test that raises when start later than a defined right + limit (which is covered within the `test_get_start` test). + Notes ----- Test assumes value passed to `get_start` will be a time that @@ -1872,8 +2016,49 @@ def test_get_end_ool(self, calendars_with_answers_extended, pp_default, one_min) for strict in [True, False]: drg = self.get_drg(cal, pp_default, limit=limit, strict=strict) drg.interval = intervals.BI_ONE_MIN - with pytest.raises(errors.EndTooEarlyError, match=match): - _ = drg.get_end(too_early) + for limit_, strict_ in itertools.product((True, False), (True, False)): + with pytest.raises(errors.EndTooEarlyError, match=match): + drg.get_end(too_early, limit=limit_, strict=strict_) + + limit_right = ans.opens.iloc[int(len(ans.sessions) * (3 / 4))] + one_min + too_late = limit_right + one_min + + # strict False at drg level + drg = self.get_drg( + cal, pp_default, limit=limit, strict=False, limit_right=limit_right + ) + drg.interval = intervals.BI_ONE_MIN + expected = (limit_right, limit_right) + # on limit + for limit_, strict_ in itertools.product((True, False), (True, False)): + assert drg.get_end(limit_right, limit_, strict_) == expected + # too late + assert drg.get_end(too_late, limit=True) == expected + assert drg.get_end(too_late, limit=False) == (too_late, too_late) + assert drg.get_end(too_late, limit=True, strict=False) == expected + assert drg.get_end(too_late, limit=False, strict=False) == (too_late, too_late) + for limit_ in (True, False): + # check raises when strict Ture even when strict False at drg level + with pytest.raises(errors.EndTooLateError): + drg.get_end(too_late, limit=limit_, strict=True) + + # strict True at drg level + drg = self.get_drg( + cal, pp_default, limit=limit, strict=True, limit_right=limit_right + ) + drg.interval = intervals.BI_ONE_MIN + # on limit + for limit_, strict_ in itertools.product((True, False), (True, False)): + assert drg.get_end(limit_right, limit_, strict_) == expected + # too late + for limit_ in (True, False): + with pytest.raises(errors.EndTooLateError): + drg.get_end(too_late, limit=limit_) + with pytest.raises(errors.EndTooLateError): + drg.get_end(too_late, limit=limit_, strict=True) + # verify does not raise when strict False at method level + assert drg.get_end(too_late, limit=True, strict=False) == expected + assert drg.get_end(too_late, limit=False, strict=False) == (too_late, too_late) def test_end_now_and_get_end_none( self, calendars_with_answers, monkeypatch, pp_default, one_min @@ -1913,6 +2098,11 @@ def test_end_now_and_get_end_none( end = close - pd.Timedelta(5, "T") + one_min # returns right of live indice assert drg.end_now == drg.get_end(None) == (end, now + one_min - delay) + # verify None input to `get_end` returns any fixed right limit + limit_right = now - pd.Timedelta(84, "min") + drg = self.get_drg(cal, pp, interval=TDInterval.T1, limit_right=limit_right) + assert drg.get_end(None) == (limit_right, limit_right) + def test_get_start_get_end_anchor_effect( self, calendars_with_answers_extended, pp_default ): diff --git a/tests/test_yahoo.py b/tests/test_yahoo.py index a5f71d8..9c570ad 100644 --- a/tests/test_yahoo.py +++ b/tests/test_yahoo.py @@ -1438,124 +1438,6 @@ def assertions( assertions(prices, start, end, prev_close) -def test_prices_for_symbols(): - """Verify implementation of abstract `prices_for_symbols`. - - Notes - ----- - H1 interval not tested as not synchronised for xnys/xlon calendars. - """ - # pylint: disable=too-complex - symb_us = "MSFT" - symb_lon = "AZN.L" - symbols = [symb_us, symb_lon] - calendars, delays = ["XNYS", "XLON"], [0, 15] - prices = m.PricesYahoo([symb_us, symb_lon], calendars=calendars, delays=delays) - f = prices.prices_for_symbols - - _ = prices.get("1d", start="2021-12-31", end="2022-01-05") - - # set up inraday data as period within a single session during which - # us and lon calendars overlap. - cal_us = prices.calendars[symb_us] - cal_lon = prices.calendars[symb_lon] - now = pd.Timestamp.now(tz=UTC) - end_session = cal_us.minute_to_past_session(now, 2) - start_session = cal_us.minute_to_past_session(now, 12) - - sessions_us = cal_us.opens[start_session:end_session].index - sessions_lon = cal_lon.opens[start_session:end_session].index - - common_sessions = sessions_us.intersection(sessions_lon) - for session in reversed(common_sessions): - if session in _flakylist: - continue - lon_close = cal_lon.closes[session] - us_open = cal_us.opens[session] - # ensure overlap - if us_open < lon_close - pd.Timedelta(1, "H"): - start = us_open - pd.Timedelta(2, "H") - end = lon_close + pd.Timedelta(2, "H") - # xcals 4.0 del clause - if start.tz is not UTC: - start = start.tz_localize(UTC) - end = end.tz_localize(UTC) - us_open = us_open.tz_localize(UTC) - lon_close = lon_close.tz_localize(UTC) - break - - _ = prices.get("5T", start, us_open, lead_symbol="AZN.L") - _ = prices.get("2T", start, us_open, lead_symbol="AZN.L") - _ = prices.get("1T", start, us_open, lead_symbol="AZN.L") - _ = prices.get("5T", us_open, end) - _ = prices.get("2T", us_open, end) - _ = prices.get("1T", us_open, end) - - def assertions( - pdata: data.Data, - symb: str, - interval: intervals.BI, - expect_missing: bool = True, - ): - orig = prices._pdata[interval] - assert pdata.ranges == orig.ranges - - orig_table = orig._table[symb] - if expect_missing: - # Assert that at least one row of original data should be missing - # from new table - assert orig_table.isna().all(axis=1).any() - - table = pdata._table - assert table is not None - assert table.pt.symbols == [symb] - assert table.notna().all(axis=1).all() - assert_frame_equal(table.droplevel(0, axis=1), orig_table.dropna()) - - # Verify prices for us symb only - us = f(symb_us) - assert us.symbols == [symb_us] - assert us.calendars_unique == [prices.calendars[symb_us]] - - interval = us.BaseInterval.D1 - pdata = us._pdata[interval] - assertions(pdata, symb_us, interval, expect_missing=False) - for interval in us.BaseInterval[:-2]: - pdata = us._pdata[interval] - assert pdata._table.pt.first_ts == us_open - assertions(pdata, symb_us, interval) - - # Verify prices for lon symb only - lon = f(symb_lon) - assert lon.symbols == [symb_lon] - assert lon.calendars_unique == [prices.calendars[symb_lon]] - - for interval in lon.BaseInterval[:-2]: - if interval == intervals.TDInterval.H1: - continue - pdata = lon._pdata[interval] - assert pdata._table.pt.last_ts == lon_close - assertions(pdata, symb_lon, interval) - - # Verify prices when symbols as original - both = f(prices.symbols) - assert both.symbols == prices.symbols - assert both.calendars_unique == prices.calendars_unique - - for interval in both.BaseInterval: - if interval == intervals.TDInterval.H1: - continue - pdata = both._pdata[interval] - table = pdata._table - orig = prices._pdata[interval] - assert pdata.ranges == orig.ranges - assert table.pt.symbols == symbols - assert not table.isna().all(axis=1).any() - # verify columns same length an order identically to compare - assert len(table.columns) == len(orig._table.columns) - assert_frame_equal(table[orig._table.columns], orig._table) - - # ========================================================================= # Following are PricesBase methods that are tested here only for # convenience of using fixtures and helpers defined for tests here.