Skip to content

Commit

Permalink
Merge pull request #289 from curveresearch/data-sources
Browse files Browse the repository at this point in the history
Implement Data Sources
  • Loading branch information
allt0ld authored Apr 4, 2024
2 parents 4f2cf88 + 9a889c9 commit 8363194
Show file tree
Hide file tree
Showing 46 changed files with 1,255 additions and 699 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ on:
pull_request:
branches: [ main, develop ]

env:
ALCHEMY_API_KEY: ${{ secrets.ALCHEMY_API_KEY }}

jobs:
generate:
name: Generate comparison results
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/make_test_data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ name: Generate comparison results for CI test

on: workflow_dispatch

env:
ALCHEMY_API_KEY: ${{ secrets.ALCHEMY_API_KEY }}

jobs:
build:
runs-on: ubuntu-latest
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/volume_limited_arb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ name: Run volume limited arbitrage pipeline
# events but only for the main and develop branches.
on: workflow_dispatch

env:
ALCHEMY_API_KEY: ${{ secrets.ALCHEMY_API_KEY }}

jobs:
build:
runs-on: ubuntu-latest
Expand Down
34 changes: 34 additions & 0 deletions changelog.d/20240221_175629_nagakingg_data_sources.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
Removed
-------
- Removed SimAssets type and SimPool.assets property
- Removed coin_names property from PricingMetrics
- Removed Coingecko pool_prices and coin_ids_from addresses


Added
-----
- Added DataSource, SimAsset, and TimeSequence template classes
- Added OnChainAsset and OnChainAssetPair as common SimAsset types
- Added DateTimeSequence for TimeSequences of datetimes
- Added get_asset_data() and get_pool_data() convenience functions to pipelines.common
- Added pool_data.get_pool_assets()
- Added CoinGeckoPriceVolumeSource and CsvDataSource in price_data.data_sources

Changed
-------
- Moved price/volume data retrieval outside of PriceVolume iterator
- Made explicit price and volume properties for PriceVolume iterator
- Changed Coingecko price data resampling to hourly samples with 10 minute tolerance
- Moved Coingecko resampling and DataFrame processing into CoinGeckoPriceVolumeSource
- Unified simple and volume-limited arbitrage pipeline interfaces
- Replaced pipeline arguments 'end_ts' & 'days' with 'time_sequence' & 'pool_ts'
- Renamed price_data.get() to price_data.get_price_data()
- Changed get_price_data() interface to use SimAsset, TimeSequence, and DataSource
- Replaced get_pool_volume() 'days' and 'end' arguments to 'start' and 'end'

Fixed
-----
- Fixed error in unit conversion for CoinGecko volume data.
Bug was introduced in commit df79810.


15 changes: 15 additions & 0 deletions curvesim/exceptions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""Contains various exceptions used in curvesim."""


class CurvesimException(Exception):
"""Base exception class"""

Expand Down Expand Up @@ -27,6 +30,10 @@ def __repr__(self):
return f"HttpClientError({self.status}, {self.message}, url={self.url})"


class CurvesimTypeError(CurvesimException, TypeError):
"""Raised when an argument is the wrong type."""


class CurvesimValueError(CurvesimException, ValueError):
"""Raised when an argument has an inappropriate value (but the right type)."""

Expand Down Expand Up @@ -77,3 +84,11 @@ class StateLogError(CurvesimException):

class UnregisteredPoolError(StateLogError):
"""Error raised when a pool type is not recognized by the metrics framework."""


class TimeSequenceError(CurvesimException):
"""Error using a TimeSequence object."""


class DataSourceError(CurvesimException):
"""Error using a DataSource object."""
93 changes: 44 additions & 49 deletions curvesim/iterators/price_samplers/price_volume.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""
Contains PriceVolume price sampler and PriceVolumeSample dataclass.
"""

from typing import Iterator

from pandas import DataFrame

from curvesim.logging import get_logger
from curvesim.price_data import get
from curvesim.templates.price_samplers import PriceSample, PriceSampler
from curvesim.utils import dataclass, override

Expand All @@ -26,48 +31,22 @@ class PriceVolumeSample(PriceSample):

class PriceVolume(PriceSampler):
"""
An iterator that retrieves price/volume and iterates over timepoints in the data.
Iterates over price and volume data in the provided DataFrame.
"""

def __init__(
self,
assets,
*,
days=60,
data_dir="data",
src="coingecko",
end=None,
):
def __init__(self, data: DataFrame):
"""
Retrieves price/volume data and prepares it for iteration.
Parameters
----------
assets: SimAssets
Object giving the properties of the assets for simulation
(e.g., symbols, addresses, chain)
days: int, defaults to 60
Number of days to pull data for.
data_dir: str, defaults to "data"
Relative path to saved data folder.
src: str, defaults to "coingecko"
Identifies pricing source: coingecko or local.
data: DataFrame
DataFrame with prices and volumes for each asset pair.
Format should match output of :fun:"curvesim.price_data.get_price_data".
Row indices: datetime.datetime or pandas.Timestamp.
Column indices: MultIndex with "price" and "volume" level 1 for each tuple
of symbols in level 2.
"""
prices, volumes, _ = get(
assets.addresses,
chain=assets.chain,
days=days,
data_dir=data_dir,
src=src,
end=end,
)

self.prices = prices.set_axis(assets.symbol_pairs, axis="columns")
self.volumes = volumes.set_axis(assets.symbol_pairs, axis="columns")
self.data = data

@override
def __iter__(self) -> Iterator[PriceVolumeSample]:
Expand All @@ -76,16 +55,32 @@ def __iter__(self) -> Iterator[PriceVolumeSample]:
-------
:class:`PriceVolumeSample`
"""
for price_row, volume_row in zip(
self.prices.iterrows(), self.volumes.iterrows()
):
price_timestamp, prices = price_row
volume_timestamp, volumes = volume_row
assert (
price_timestamp == volume_timestamp
), "Price/volume timestamps don't match"

prices = prices.to_dict()
volumes = volumes.to_dict()

yield PriceVolumeSample(price_timestamp, prices, volumes) # type:ignore
for row in self.data.iterrows():
timestamp, row_data = row

prices = row_data["price"].to_dict()
volumes = row_data["volume"].to_dict()

yield PriceVolumeSample(timestamp, prices, volumes) # type:ignore

@property
def prices(self):
"""
Returns price data for all asset pairs.
Returns
-------
pandas.DataFrame
"""
return self.data["price"]

@property
def volumes(self):
"""
Returns volume data for all asset pairs.
Returns
-------
pandas.DataFrame
"""
return self.data["volume"]
7 changes: 1 addition & 6 deletions curvesim/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,6 @@ def __init__(self, coin_names, **kwargs):
Symbols for the coins used in a simulation. A numeraire is selected from
the specified coins.
"""

self.coin_names = coin_names
self.numeraire = get_numeraire(coin_names)
super().__init__(**kwargs)

Expand Down Expand Up @@ -348,9 +346,6 @@ def get_market_price(self, base, quote, prices):
return prices[(base, quote)]


pandas_coin_pair_attr = {DataFrame: "columns", Series: "index"}


def get_coin_pairs(prices):
"""
Returns the coin pairs available in the price data.
Expand Down Expand Up @@ -418,4 +413,4 @@ def __init__(self, pool, **kwargs):
:func:`pool_config` and stored as :python:`self._pool` for access during
metric computations. Number and names of coins derived from pool metadata.
"""
super().__init__(pool.assets.symbols, pool=pool)
super().__init__(pool.asset_names, pool=pool)
4 changes: 2 additions & 2 deletions curvesim/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def config(self):
}

def __init__(self, pool, **kwargs):
super().__init__(pool.assets.symbols)
super().__init__(pool.asset_names)

def compute_arb_metrics(self, **kwargs):
"""Computes all metrics for each timestamp in an individual run."""
Expand Down Expand Up @@ -218,7 +218,7 @@ def get_stableswap_metapool_volume(self, **kwargs):
"""
trade_data = kwargs["trade_data"]

meta_asset = self._pool.assets.symbols[0]
meta_asset = self._pool.asset_names[0]

def per_timestamp_function(trade_data):
volume = 0
Expand Down
Loading

0 comments on commit 8363194

Please sign in to comment.