ORIGINAL_config.yaml

# Cryptostore sample config file

# Redis or Kafka are required. They are used to batch updates from cryptofeed and the storage medium of choice
#
# del_after_read: (redis only) toggles the removal of data from redis after it has been processed with cryptostore.
# retention_time: (redis only) if data removal is enabled (via del_after_read) will allow retention of data in redis for N seconds.
# socket: (redis only) allows redis connections via a unix domain socket
# start_flush: toggles if redis/kafka should be flushed at the start. Primarily for debugging, it will flush ALL of redis/kafka
cache: redis

kafka:
    # ip/port are for the bootstrap server
    ip: '127.0.0.1'
    port: 9092
    start_flush: true
redis:
    ip: '127.0.0.1'
    port: 6379
    socket: null
    del_after_read: true
    retention_time: null
    start_flush: true

# Data sources and data types configured under exchanges. Exchange names follow the naming scheme in cryptofeed (they
# must be capitalized) and only exchanges supported by cryptofeed are supported.
# Data types follow cryptofeed definitions, see defines.py in cryptofeed for more details, common ones are
# trades, l2_book, l3_book, funding, ticker, and open_interest
# Trading pairs for all exchanges (except BitMEX, Deribit and other derivatives) follow the currency-quote format.
# Information about what an exchange supports (pairs, data types, etc) can be found via the info() merhod available on
# all cryptofeed exchange clases.
#
# max_depth controls the size of the book to return. The top N levels will be returned, only when those N levels
# have changed.
# book_delta enables book deltas (snapshot, then only deltas are delivered). Snapshops are delivered
# every book_interval updates. book_interval defaults to 1000 if not specified
#
# Retries are the number of times the connection to the exchange will be retried before exiting. -1 is infinity.
# Using a retry of -1 with a bug in your config can lead to bans by exchanges
#
# Channel timeouts are channel specific and control how long a channel can go with no messages before declaring the connection
# dead. Cryptofeed will restart the connection if no message is received in this time period. Default is 120 seconds. -1 means no timeout.
#
# snapshot_interval controls how often to deliver snapshots on NON delta feeds. Book updates between the snapshots are discarded.
# In other words, enabling snapshot_interval and setting it to a value (say 1000) will cause every 1000th book update to be stored (the full book). Other
# updates will be dropped. Can also set up book_depth with snapshot_interval enabled.

exchanges:
    BITMEX:
        channel_timeouts:
            l2_book: 30
            trades: 120
            ticker: 120
            funding: -1
        retries: -1
        l2_book:
            symbols: [XBTUSD]
            max_depth: 10
            book_delta: true
            book_interval: 100000
        trades: [XBTUSD]
        ticker: [XBTUSD]
        funding: [XBTUSD]
    COINBASE:
        retries: -1
        l3_book:
            symbols: [BTC-USD]
            book_delta: true
            book_interval: 100000
        trades: [BTC-USD, ETH-USD, ETH-BTC]
        ticker: [BTC-USD]

# Where to store the data. Currently arctic, influx, elastic, and parquet are supported. More than one can be enabled
storage: [arctic, influx]
# These two values configure how many times to retry a write (before dying)
# and how long to wait between attempts to write after a failure is encountered.
storage_retries: 5
storage_retry_wait: 30

# Configurable passthrough for data - data will be sent in realtime (no aggregation in redis).
# To disable, remove
pass_through:
    type: zmq
    host: '127.0.0.1'
    port: 5678


elastic:
    host: 'http://127.0.0.1:9200'
    user: null
    token: null
    shards: 10
    replicas: 0
    refresh_interval: '30s'

influx:
    host: 'http://127.0.0.1:8086'
    db: example
    create: true
    username: None
    password: None

# Parquet specific options. Parquet will default to storing the data on disk unless these are specified
parquet:
    # If storing the data to an external source (like S3) toggle this to enable the removal of the local file after
    # writing to external store
    del_file: true
    # Append X times a parquet file before creating a new one to store data.
    # This feature allows dumping frequently data from Redis (as defined by 'storage_interval'), while writing less frequently new parquet files.
    # Remove this parameter or set it to 0 to write a new file each 'storage_interval' (appending is then disabled).
    # Beware that a parquet file not properly closed cannot be read back. Closing is made at the last appending iteration.
    append_counter: 4
    # File name format, as a list of arguments. Default is <exchange>-<data_type>-<pair>-<timestamp>.parquet, which would be exchange, data_type, pair, timestamp
    # Can specify any combination exchange, data_type, pair, and timestamp
    # timestamp MUST be present.
    # examples:
    #    pair,timestamp
    #    timestamp
    #    timestamp,exchange
    #    etc
    file_format: [exchange, pair, data_type, timestamp]
    # A compression codec (and level if relevant) can be defined to compress data when writing parquet file.
    # Compression is managed by use of `pyarrow.parquet.write_table()` interface.
    # Compression codecs (and levels in parentheses) are: NONE, SNAPPY, GZIP (1 to 9), BROTLI (1 to 11), LZ4 (1 is forced) & ZSTD (1 to 22).
    # You can remove these keywords if you don't want to compress the data. As a proposal, BROTLI appears especially efficient.
    compression:
        codec: 'BROTLI'
        level: 6
    # Path controls where the file is written (if not using S3/GC) If null, will write to CWD. Must be absolute path.
    path: null
    # Automatically groups all paquet files in a folder YYYY-MM-DD both on the local filesystem, as well as on the cloud storage engine.
    # Setting prefix on the cloud storage engine will override this setting.
    prefix_date: false

    S3:
        # endpoint allows you to override the write endpoint and write to other provicers that have the same API interface
        # If key_id/secret are NULL boto will default to using ENV vars or credentials file
        # prefix: a prefix to append to the default data path.
        endpoint: null
        key_id: null
        secret: null
        bucket: null
        prefix: null
    GCS:
        # path to service account key, if null will default to using env vars or auth tokens
        # on GCE node
        # prefix: a prefix to append to the default data path
        service_account: null
        bucket: null
        prefix: null
    GD:
        # Google Drive connector.
        # `service_account` is the path to service account key file. If null,
        # will default to using ENV var.
        # `prefix` is the folder name where to store files. This folder has to
        # exist, it will not be created.
        service_account: null
        prefix: null
        # Separator between `exchange`, `data_type`, and `pair` in Google Drive folder name.
        folder_name_sep: '.'

# arctic specific configuration options - the mongo URL
arctic: mongodb://127.0.0.1

# Data batching window, in seconds, or optionally a time interval: M(inutely), H(ourly), D(aily).
# These intervals will require that you have enough memory available to redis (and python) to hold this amount of data.
# String intervals can be combined with numbers, eg 2H is 2 hours, 5M is 5 minutes, etc.
# Note that if a time interval is selected and kafka is used, the timestamps used to aggregate the data
# for the time intervals from from Kafka's internal timestamp offsets - these will differ from the exchange
# provided timestamps - it may be anywhere from 0.001 ms to seconds depending on your kafka cluster and
# other hardware and setup configurations.
storage_interval: 60


#
# The backfill plugin is deprecated and unsupported. A new one is being developed and will be released in the coming weeks.
#

# Cryptostore Plugin Interface
#plugins:
#    backfill:
        # The import path, and class name
        # from cryptostore.plugin.backfill import Backfill
#        module: [cryptostore.plugin.backfill, Backfill]
        # Config path, config can be added to this file, or in a separate config
        # backfill does not use a dynamic config, so its fine to put its config here
#        config: config.yaml

# If configured, backfill will run in a separate process and
# backfill trade data from earliest data (in storage) up to `start` (inclusive).
#backfill:
#    COINBASE:
#        BTC-USD:
#            start: '2017-01-01'