diff --git a/Cargo.lock b/Cargo.lock index 4c1cb47e..12a008de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -110,12 +110,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "allocator-api2" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" - [[package]] name = "android-tzdata" version = "0.1.1" @@ -185,12 +179,6 @@ version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" -[[package]] -name = "arc-swap" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" - [[package]] name = "ark-bn254" version = "0.4.0" @@ -1080,39 +1068,6 @@ dependencies = [ "syn 2.0.60", ] -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if", - "hashbrown 0.14.3", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "deepsize" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cdb987ec36f6bf7bfbea3f928b75590b736fc42af8e54d97592481351b2b96c" -dependencies = [ - "deepsize_derive", -] - -[[package]] -name = "deepsize_derive" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990101d41f3bc8c1a45641024377ee284ecc338e5ecf3ea0f0e236d897c72796" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "deranged" version = "0.3.11" @@ -1120,7 +1075,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" dependencies = [ "powerfmt", - "serde", ] [[package]] @@ -1290,22 +1244,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "etcd-client" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae697f3928e8c89ae6f4dcf788059f49fd01a76dc53e63628f5a33881f5715e" -dependencies = [ - "http", - "prost 0.12.3", - "tokio", - "tokio-stream", - "tonic 0.10.2", - "tonic-build", - "tower", - "tower-service", -] - [[package]] name = "event-listener" version = "2.5.3" @@ -1688,10 +1626,6 @@ name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" -dependencies = [ - "ahash 0.8.11", - "allocator-api2", -] [[package]] name = "heck" @@ -1720,12 +1654,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "histogram" -version = "0.6.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cb882ccb290b8646e554b157ab0b71e64e8d5bef775cd66b6531e52d302669" - [[package]] name = "hmac" version = "0.8.1" @@ -1942,7 +1870,6 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", - "serde", ] [[package]] @@ -1953,7 +1880,6 @@ checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" dependencies = [ "equivalent", "hashbrown 0.14.3", - "serde", ] [[package]] @@ -2155,18 +2081,6 @@ version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" -[[package]] -name = "local-ip-address" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136ef34e18462b17bf39a7826f8f3bbc223341f8e83822beb8b77db9a3d49696" -dependencies = [ - "libc", - "neli", - "thiserror", - "windows-sys 0.48.0", -] - [[package]] name = "lock_api" version = "0.4.11" @@ -2183,15 +2097,6 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" -[[package]] -name = "lz4_flex" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" -dependencies = [ - "twox-hash", -] - [[package]] name = "maplit" version = "1.0.2" @@ -2305,31 +2210,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "neli" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1100229e06604150b3becd61a4965d5c70f3be1759544ea7274166f4be41ef43" -dependencies = [ - "byteorder", - "libc", - "log", - "neli-proc-macros", -] - -[[package]] -name = "neli-proc-macros" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c168194d373b1e134786274020dae7fc5513d565ea2ebb9bc9ff17ffb69106d4" -dependencies = [ - "either", - "proc-macro2", - "quote", - "serde", - "syn 1.0.109", -] - [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -3132,15 +3012,6 @@ dependencies = [ "rand_core 0.5.1", ] -[[package]] -name = "rand_pcg" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e" -dependencies = [ - "rand_core 0.6.4", -] - [[package]] name = "rand_xorshift" version = "0.3.0" @@ -3496,67 +3367,6 @@ dependencies = [ "untrusted 0.9.0", ] -[[package]] -name = "scylla" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9439d92eea9f86c07175c819c3a129ca28b02477b47df26db354a1f4ea7ee276" -dependencies = [ - "arc-swap", - "async-trait", - "byteorder", - "bytes", - "chrono", - "dashmap", - "futures", - "hashbrown 0.14.3", - "histogram", - "itertools 0.11.0", - "lazy_static", - "lz4_flex", - "rand 0.8.5", - "rand_pcg", - "scylla-cql", - "scylla-macros", - "smallvec", - "snap", - "socket2", - "thiserror", - "tokio", - "tracing", - "uuid", -] - -[[package]] -name = "scylla-cql" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64037fb9d9c59ae15137fff9a56c4d528908dfd38d09e75b5f8e56e3894966dd" -dependencies = [ - "async-trait", - "byteorder", - "bytes", - "chrono", - "lz4_flex", - "scylla-macros", - "snap", - "thiserror", - "tokio", - "uuid", -] - -[[package]] -name = "scylla-macros" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e5fe1d389adebe6a1a27bce18b81a65ff18c25d58a795de490e18b0e7a27b9f" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 2.0.60", -] - [[package]] name = "security-framework" version = "2.9.2" @@ -3657,25 +3467,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe" dependencies = [ "serde", - "serde_with_macros 2.3.3", -] - -[[package]] -name = "serde_with" -version = "3.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee80b0e361bbf88fd2f6e242ccd19cfda072cb0faa6ae694ecee08199938569a" -dependencies = [ - "base64 0.21.7", - "chrono", - "hex", - "indexmap 1.9.3", - "indexmap 2.2.5", - "serde", - "serde_derive", - "serde_json", - "serde_with_macros 3.7.0", - "time", + "serde_with_macros", ] [[package]] @@ -3690,18 +3482,6 @@ dependencies = [ "syn 2.0.60", ] -[[package]] -name = "serde_with_macros" -version = "3.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6561dc161a9224638a31d876ccdfefbc1df91d3f3a8342eddb35f055d48c7655" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 2.0.60", -] - [[package]] name = "serde_yaml" version = "0.9.32" @@ -3838,12 +3618,6 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - [[package]] name = "socket2" version = "0.5.6" @@ -4102,7 +3876,7 @@ dependencies = [ "serde_bytes", "serde_derive", "serde_json", - "serde_with 2.3.3", + "serde_with", "sha2 0.10.8", "sha3 0.10.8", "siphasher", @@ -4479,12 +4253,6 @@ dependencies = [ "spl-program-error", ] -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "strsim" version = "0.10.0" @@ -5017,16 +4785,6 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - [[package]] name = "typenum" version = "1.17.0" @@ -5141,9 +4899,6 @@ name = "uuid" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" -dependencies = [ - "getrandom 0.2.12", -] [[package]] name = "valuable" @@ -5588,11 +5343,8 @@ dependencies = [ "async-trait", "atty", "cargo-lock", - "chrono", "clap", "const-hex", - "deepsize", - "etcd-client", "futures", "git-version", "google-cloud-googleapis", @@ -5600,17 +5352,13 @@ dependencies = [ "hyper", "json5", "lazy_static", - "local-ip-address", "project-root", "prometheus", "rdkafka", - "scylla", "serde", "serde_json", - "serde_with 3.7.0", "serde_yaml", "sha2 0.10.8", - "thiserror", "tokio", "tokio-stream", "tonic 0.10.2", diff --git a/Cargo.toml b/Cargo.toml index 93ec6125..f08394f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,9 +30,7 @@ chrono = "0.4.26" clap = "4.3.0" const-hex = "1.6.2" crossbeam-channel = "0.5.8" -deepsize = "0.2.0" env_logger = "0.10.0" -etcd-client = "0.12.4" futures = "0.3.24" git-version = "0.3.5" google-cloud-googleapis = "0.11.0" @@ -53,7 +51,6 @@ rdkafka = "0.34.0" scylla = "0.13.0" serde = "1.0.145" serde_json = "1.0.86" -serde_with = "3.7.0" serde_yaml = "0.9.25" sha2 = "0.10.7" solana-account-decoder = "=1.18.17" diff --git a/yellowstone-grpc-proto/build.rs b/yellowstone-grpc-proto/build.rs index a2a02f48..66db00ba 100644 --- a/yellowstone-grpc-proto/build.rs +++ b/yellowstone-grpc-proto/build.rs @@ -1,5 +1,5 @@ fn main() -> anyhow::Result<()> { std::env::set_var("PROTOC", protobuf_src::protoc()); - tonic_build::compile_protos("proto/yellowstone-log.proto")?; + tonic_build::compile_protos("proto/geyser.proto")?; Ok(()) } diff --git a/yellowstone-grpc-proto/proto/yellowstone-log.proto b/yellowstone-grpc-proto/proto/yellowstone-log.proto deleted file mode 100644 index c453f123..00000000 --- a/yellowstone-grpc-proto/proto/yellowstone-log.proto +++ /dev/null @@ -1,149 +0,0 @@ -syntax = "proto3"; - -import public "geyser.proto"; - - -option go_package = "github.com/rpcpool/solana-geyser-grpc/golang/proto"; - -package yellowstone.log; - -service YellowstoneLog { - rpc CreateStaticConsumerGroup(CreateStaticConsumerGroupRequest) returns (CreateStaticConsumerGroupResponse) {} - rpc Consume(ConsumeRequest) returns (stream geyser.SubscribeUpdate) {} -} - -message CreateStaticConsumerGroupResponse { - string group_id = 1; -} - -message CreateStaticConsumerGroupRequest { - repeated string instance_id_list = 2; - repeated string redundancy_instance_id_list = 3; -} - -enum PartitionAssignmentStrategy { - STATIC = 0; -} - - -/// The InitialOffsetPolicy enum determines the initial offset used when subscribing to events or messages. It provides three options: -/// -/// EARLIEST (0) -/// This policy subscribes to events or messages starting from the earliest available offset in the data stream. It ensures that all historical data is consumed from the beginning. -/// -/// LATEST (1) -/// Subscribes to events or messages starting from the latest available offset in the data stream. It only consumes new events or messages generated after the subscription is initiated. -/// -/// SLOT (2) -/// This policy subscribes to events or messages starting from a specific slot number in the data stream. It allows for precise control over where consumption begins based on slot numbers. -enum InitialOffsetPolicy { - EARLIEST = 0; - LATEST = 1; - SLOT = 2; -} - - -/// The EventSubscriptionPolicy enum defines the types of events to subscribe to. It offers three options: -/// -/// ACCOUNT_UPDATE_ONLY (0) -/// Subscribes to account update events exclusively. It filters out other types of events, focusing solely on account-related updates. -/// -/// TRANSACTION_ONLY (1) -/// Subscribes to transaction events exclusively. It filters out non-transactional events, ensuring that only transaction-related data is consumed. -/// -/// BOTH (2) -/// This policy subscribes to both account update and transaction events. It enables consumption of a wider range of event types, encompassing both account-related updates and transactions. -enum EventSubscriptionPolicy { - ACCOUNT_UPDATE_ONLY = 0; - TRANSACTION_ONLY = 1; - BOTH = 2; -} - -/// Timeline Translation Policy -/// The TimelineTranslationPolicy enum defines different strategies for translating users to timelines based on their last seen slot number or a specified lag. This is particularly useful for managing data consumption in conjunction with the ConsumeRequest.ttp_maximum_slot_lag parameter. -/// -/// ALLOW_LAG -/// This policy allows users to be translated to a timeline that contains the last seen slot number or up to a certain lag. It extends the allowable lag for eligible timelines. -/// -/// Example: -/// -/// Suppose we have three timelines with the latest available slot numbers: -/// -/// timeline1 : 10 -/// timeline2 : 5 -/// timeline3 : 8 -/// If a consumer is assigned to timeline1 with ttp_maximum_slot_lag set to 2, then the only eligible destination timeline would be timeline3. -/// -/// STRICT_SLOT -/// Under this policy, eligible destination timelines must contain the last seen slot number in the current consumer timeline; otherwise, the translation fails. This ensures strict adherence to slot numbers when translating users to timelines. -enum TimelineTranslationPolicy { - ALLOW_LAG = 0; - STRICT_SLOT = 1; -} - -/// The ConsumeRequest message defines parameters for consuming events or messages from a data stream. It includes the following fields: -/// -/// consumer_id (1) -/// An optional string representing the consumer's unique identifier. -/// -/// initial_offset_policy (2) -/// Specifies the initial offset policy for subscribing to events. It uses values from the InitialOffsetPolicy enum. -/// -/// at_slot (3) -/// An optional int64 indicating the specific slot number from which consumption should start. This is relevant when initial_offset_policy is set to SLOT. -/// -/// event_subscription_policy (4) -/// Defines the event subscription policy using values from the EventSubscriptionPolicy enum. -/// -/// account_update_event_filter (5) -/// An optional AccountUpdateEventFilter message specifying filters for account update events. -/// -/// tx_event_filter (6) -/// An optional TransactionEventFilter message defining filters for transaction events. -/// -/// commitment_level (7) -/// Specifies the commitment level for consuming events. It uses values from the geyser.CommitmentLevel enum. -/// -/// timelineTranslationPolicy (8) -/// An optional TimelineTranslationPolicy describing the policy for triggering timeline translation when an ingester is out of service and consumers need to be translated to a different ingestion timeline. -/// -/// ttp_maximum_slot_lag (9) -/// An optional uint32 indicating the maximum slot lag allowed for timeline translation. -message ConsumeRequest { - optional string consumer_group_id = 1; - optional string consumer_id = 2; - optional string instance_id = 3; - - InitialOffsetPolicy initial_offset_policy = 4; - geyser.CommitmentLevel commitment_level = 5; - EventSubscriptionPolicy event_subscription_policy = 6; - - optional int64 at_slot = 7; - optional AccountUpdateEventFilter account_update_event_filter = 8; - optional TransactionEventFilter tx_event_filter = 9; - - // timelineTranslationPolicy is used when an ingester is out of service and we need to translate a set - // of consumer to a different ingestion timeline. The policy describe what to do when we need to trigger timeline translation. - optional TimelineTranslationPolicy timelineTranslationPolicy = 10; - optional uint32 ttp_maximum_slot_lag = 11; -} - -/// The AccountUpdateEventFilter message defines filters for account update events. It includes the following fields: -/// -/// pubkeys (1) -/// A repeated field of bytes representing public keys. Events matching any of these public keys will be included in the filtered results. -/// -/// owners (2) -/// A repeated field of bytes representing account owners. Events matching any of these account owners will be included in the filtered results. -message AccountUpdateEventFilter { - repeated bytes pubkeys = 1; - repeated bytes owners = 2; -} - -/// The TransactionEventFilter message specifies filters for transaction events. It contains the following field: - -/// account_keys (1) -/// A repeated field of bytes representing account keys. Events associated with any of these account keys will be included in the filtered results. -message TransactionEventFilter { - repeated bytes account_keys = 1; -} \ No newline at end of file diff --git a/yellowstone-grpc-proto/src/lib.rs b/yellowstone-grpc-proto/src/lib.rs index 50a77277..8760c7cd 100644 --- a/yellowstone-grpc-proto/src/lib.rs +++ b/yellowstone-grpc-proto/src/lib.rs @@ -1,11 +1,5 @@ #![allow(clippy::large_enum_variant)] -pub mod yellowstone { - pub mod log { - tonic::include_proto!("yellowstone.log"); - } -} - pub mod geyser { tonic::include_proto!("geyser"); } diff --git a/yellowstone-grpc-tools/Cargo.toml b/yellowstone-grpc-tools/Cargo.toml index 1b86632e..e693327e 100644 --- a/yellowstone-grpc-tools/Cargo.toml +++ b/yellowstone-grpc-tools/Cargo.toml @@ -18,34 +18,23 @@ required-features = ["google-pubsub"] name = "grpc-kafka" required-features = ["kafka"] -[[bin]] -name = "grpc-scylladb" -required-features = ["scylladb"] - [dependencies] anyhow = { workspace = true } async-trait = { workspace = true } atty = { workspace = true } -chrono = { workspace = true, optional = true } clap = { workspace = true, features = ["derive"] } const-hex = { workspace = true, optional = true } -deepsize = { workspace = true, optional = true } -etcd-client = { workspace = true, optional = true } futures = { workspace = true } google-cloud-googleapis = { workspace = true, optional = true } google-cloud-pubsub = { workspace = true, optional = true } hyper = { workspace = true } json5 = { workspace = true } lazy_static = { workspace = true } -local-ip-address = { workspace = true, optional = true } prometheus = { workspace = true } -scylla = { workspace = true, optional = true, features = ["chrono"] } serde = { workspace = true } serde_json = { workspace = true } -serde_with = { workspace = true, optional = true } serde_yaml = { workspace = true } sha2 = { workspace = true, optional = true } -thiserror = { workspace = true, optional = true } tokio = { workspace = true, features = ["signal", "time"] } tokio-stream = { workspace = true } tonic = { workspace = true, features = ["gzip"] } @@ -75,5 +64,4 @@ vergen = { workspace = true, features = ["build", "rustc"] } [features] default = ["google-pubsub", "kafka"] google-pubsub = ["google-cloud-googleapis", "google-cloud-pubsub"] -kafka = ["const-hex", "rdkafka", "sha2"] -scylladb = ["scylla", "serde_with", "deepsize", "uuid", "local-ip-address", "chrono", "thiserror", "etcd-client"] +kafka = ["const-hex", "rdkafka", "sha2"] \ No newline at end of file diff --git a/yellowstone-grpc-tools/config-scylladb.json b/yellowstone-grpc-tools/config-scylladb.json deleted file mode 100644 index a0e38197..00000000 --- a/yellowstone-grpc-tools/config-scylladb.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "prometheus": "127.0.0.1:8873", - "scylladb": { - "hostname": "localhost:9042", - "username": "cassandra", - "password": "cassandra" - }, - "grpc2scylladb": { - "endpoint": "localhost:10000", - "x_token": "", - "request": { - "accounts": { - "my_filter": { - "owner": ["TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"] - } - } - }, - "batch_size_limit": 1000, - "linger": 10, - "keyspace": "solana", - "max_inflight_batch_delivery": 80 - } -} diff --git a/yellowstone-grpc-tools/config-ys-log-server.json b/yellowstone-grpc-tools/config-ys-log-server.json deleted file mode 100644 index a64004c4..00000000 --- a/yellowstone-grpc-tools/config-ys-log-server.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "prometheus": "127.0.0.1:8873", - "scylladb": { - "hostname": "localhost:9042", - "username": "cassandra", - "password": "cassandra" - }, - "yellowstone_log_server": { - "listen": "localhost:10001", - "keyspace": "solana" - } -} diff --git a/yellowstone-grpc-tools/grpcurl_yellowstone_log_server_example.sh b/yellowstone-grpc-tools/grpcurl_yellowstone_log_server_example.sh deleted file mode 100644 index c7d0bd4b..00000000 --- a/yellowstone-grpc-tools/grpcurl_yellowstone_log_server_example.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/bash - - -# Subscripte to account update only -grpcurl \ - -max-msg-sz 10000000 \ - -plaintext -import-path . \ - -proto yellowstone-log.proto \ - -d '{"initial_offset_policy": 0, "event_subscription_policy": 0 }' \ - '127.0.0.1:10001' yellowstone.log.YellowstoneLog.Consume - - -# Create a static consumer group -grpcurl \ - -plaintext -import-path . \ - -proto yellowstone-log.proto \ - -d '{"instance_id_list": ["a", "b"], "redundancy_instance_id_list": ["c", "d"] }' \ - '127.0.0.1:10001' yellowstone.log.YellowstoneLog.CreateStaticConsumerGroup \ No newline at end of file diff --git a/yellowstone-grpc-tools/solana.cql b/yellowstone-grpc-tools/solana.cql deleted file mode 100644 index a0afdcd8..00000000 --- a/yellowstone-grpc-tools/solana.cql +++ /dev/null @@ -1,255 +0,0 @@ -CREATE KEYSPACE solana WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': '3'} AND durable_writes = true; -CREATE KEYSPACE solana2 WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': '3'} AND durable_writes = true; - - -drop materialized view if exists producer_consumer_mapping_mv; -drop materialized view if exists slot_map; - -drop table if exists producer_slot_seen; -drop table if exists shard_statistics; -drop table if exists producer_info; -drop table if exists consumer_shard_offset; -drop table if exists consumer_producer_mapping; -drop table if exists log; -drop type if exists transaction_meta; -drop type if exists message_addr_table_lookup; -drop type if exists compiled_instr; -drop type if exists tx_token_balance; -drop type if exists reward; -drop type if exists inner_instrs; -drop type if exists inner_instr; -drop type if exists return_data; - - -create table if not exists consumer_shard_offset ( - consumer_id text, - producer_id blob, - shard_id smallint, - event_type smallint, - offset bigint, - slot bigint, - created_at timestamp, - updated_at timestamp, - PRIMARY KEY ((consumer_id, producer_id), shard_id, event_type) -) -with default_time_to_live = 3600; - - -create table if not exists consumer_groups ( - consumer_group_id blob, - group_type smallint, - - last_access_ip_address text, - - -- fields for static consumer group only - instance_id_shard_assignments frozen>>, - redundant_id_shard_assignments frozen>>, - - created_at timestamp, - updated_at timestamp, - - primary key (consumer_group_id) -); - -create table if not exists consumer_info ( - consumer_id text, - producer_id blob, - consumer_ip: text, - subscribed_event_types frozen>, - last_connection timestamp, - created_at timestamp, - updated_at timestamp, - PRIMARY KEY (consumer_id) -); - -create materialized view if not exists producer_consumer_mapping_mv -as -select - producer_id, - consumer_id -from consumer_info -where - consumer_id is not null - and producer_id is not null -primary key (producer_id, consumer_id); - -create table if not exists producer_info ( - producer_id blob, - commitment smallint, - num_shards smallint, - created_at timestamp, - updated_at timestamp, - PRIMARY KEY (producer_id) -); - -create table if not exists producer_lock ( - producer_id blob, - lock_id text, - ifname text, - ipv4 text, - is_ready boolean, - minimum_shard_offset frozen>>, - created_at timestamp, - primary key (producer_id) -); - - -create type if not exists message_addr_table_lookup ( - account_key blob, - writable_indexes blob, - readonly_indexes blob -); - -create type if not exists compiled_instr ( - program_id_index bigint, - accounts blob, - data blob -); - -create type if not exists inner_instr ( - program_id_index bigint, - accounts blob, - data blob, - stack_height bigint -); - -create type if not exists inner_instrs ( - "index" bigint, - instructions frozen> -); - -create type if not exists ui_token_amount ( - ui_amount double, - decimals bigint, - amount text, - ui_amount_string text -); - -create type if not exists tx_token_balance ( - account_index bigint, - mint text, --varchar(44) - ui_token_amount frozen, - owner text, --varchar(44) - program_id text, -); - -create type if not exists reward ( - pubkey text, -- varchar(44) - lamports bigint, - post_balance bigint, - reward_type int, --Fee, Rent, Staking, Voting - commission text -); - -create type if not exists return_data ( - program_id blob, - data blob -); - -create type if not exists transaction_meta ( - error blob, - fee bigint, - pre_balances frozen>, - post_balances frozen>, - inner_instructions frozen>, - log_messages frozen>, - pre_token_balances frozen>, - post_token_balances frozen>, - rewards frozen>, - loaded_writable_addresses frozen>, - loaded_readonly_addresses frozen>, - return_data frozen, - compute_units_consumed bigint -); - - --- ScyllaDB table can hold different kind of entities at the same time. --- There is not performance advantage to have separate tables since ScyllaDB is wide-column family database. --- ScyllaDB is built to have sparse columns (alot of unused columns) --- On each query, the storage engine only retrieves what matters to the query. -create table if not exists log ( - - -- commun columns - shard_id smallint, - period bigint, - producer_id blob, - offset bigint, - slot bigint, - event_type smallint, - -- 0 = account update - -- 1 = new transaction - - -- account columns - pubkey blob, - lamports bigint, - owner blob, - executable boolean, - rent_epoch bigint, - write_version bigint, - data blob, - txn_signature blob, - - - -- transaction columns - signature blob, - signatures frozen>, - num_required_signatures int, - num_readonly_signed_accounts int, - num_readonly_unsigned_accounts int, - account_keys frozen>, - recent_blockhash blob, - instructions frozen>, - versioned boolean, - address_table_lookups frozen>, - meta transaction_meta, - is_vote boolean, - tx_index bigint, - - - -- meta data field for debugging purposes - created_at timestamp, - - primary key ((shard_id, period, producer_id), offset) -) -WITH CLUSTERING ORDER BY (offset desc) - AND default_time_to_live = 86400 - and compaction = {'class': 'TimeWindowCompactionStrategy', 'compaction_window_unit': 'MINUTES', 'compaction_window_size' : 10}; - -create table if not exists producer_slot_seen ( - producer_id blob, - slot bigint, - shard_offset_map frozen>>, - created_at timestamp, - primary key (producer_id, slot) -) -with clustering order by (slot DESC) - AND default_time_to_live = 82800; -- 23 hours - -CREATE materialized VIEW if not exists slot_producer_seen_mv -AS -SELECT slot, producer_id FROM producer_slot_seen -WHERE - producer_id is not null - and slot is not null -PRIMARY KEY (slot, producer_id); - - - -create table if not exists producer_period_commit_log ( - producer_id blob, - shard_id smallint, - period bigint, - created_at timestamp, - PRIMARY KEY((producer_id, shard_id), period) -) with clustering order by (period desc) - AND default_time_to_live = 82800; -- 23 hours - --- clear all table - -truncate log; -truncate producer_period_commit_log; -truncate producer_slot_seen; -truncate producer_lock; - -truncate consumer_info; -truncate consumer_shard_offset; \ No newline at end of file diff --git a/yellowstone-grpc-tools/solana_keyspace_setup.sh b/yellowstone-grpc-tools/solana_keyspace_setup.sh deleted file mode 100644 index f2c1a554..00000000 --- a/yellowstone-grpc-tools/solana_keyspace_setup.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/bash - -nodetool disableautocompaction solana log producer_period_commit_log producer_slot_seen \ No newline at end of file diff --git a/yellowstone-grpc-tools/src/bin/grpc-scylladb.rs b/yellowstone-grpc-tools/src/bin/grpc-scylladb.rs deleted file mode 100644 index a9004584..00000000 --- a/yellowstone-grpc-tools/src/bin/grpc-scylladb.rs +++ /dev/null @@ -1,277 +0,0 @@ -use { - anyhow::Ok, - clap::{Parser, Subcommand}, - futures::{future::BoxFuture, stream::StreamExt, TryFutureExt}, - scylla::{frame::Compression, Session, SessionBuilder}, - std::{net::SocketAddr, sync::Arc, time::Duration}, - tokio::time::Instant, - tonic::transport::Server, - tracing::{error, info, warn}, - yellowstone_grpc_client::GeyserGrpcClient, - yellowstone_grpc_proto::{ - prelude::subscribe_update::UpdateOneof, - yellowstone::log::{ - yellowstone_log_server::YellowstoneLogServer, EventSubscriptionPolicy, - TimelineTranslationPolicy, - }, - }, - yellowstone_grpc_tools::{ - config::{load as config_load, GrpcRequestToProto}, - create_shutdown, - prom::run_server as prometheus_run_server, - scylladb::{ - config::{ - Config, ConfigGrpc2ScyllaDB, ConfigYellowstoneLogServer, ScyllaDbConnectionInfo, - }, - sink::ScyllaSink, - types::{CommitmentLevel, Transaction}, - yellowstone_log::{ - common::InitialOffset, - grpc::{spawn_grpc_consumer, ScyllaYsLog, SpawnGrpcConsumerReq}, - }, - }, - setup_tracing, - }, -}; - -// 512MB -const MAX_DECODING_MESSAGE_SIZE_BYTES: usize = 512_000_000; - -#[derive(Debug, Clone, Parser)] -#[clap(author, version, about = "Yellowstone gRPC ScyllaDB Tool")] -struct Args { - /// Path to config file - #[clap(short, long)] - config: String, - - /// Prometheus listen address - #[clap(long)] - prometheus: Option, - - #[command(subcommand)] - action: ArgsAction, -} - -#[derive(Debug, Clone, Subcommand)] -enum ArgsAction { - /// Receive data from gRPC and send them to the Kafka - #[command(name = "grpc2scylla")] - Grpc2Scylla, - - /// Receive data from Kafka and send them over gRPC - #[command(name = "yellowstone-log-server")] - YellowstoneLogServer, - - #[command(name = "test")] - Test, -} - -impl ArgsAction { - async fn run(self, config: Config) -> anyhow::Result<()> { - let shutdown = create_shutdown()?; - match self { - ArgsAction::Grpc2Scylla => { - let config2 = config.grpc2scylladb.ok_or_else(|| { - anyhow::anyhow!("`grpc2scylladb` section in config should be defined") - })?; - Self::grpc2scylladb(config2, config.scylladb, shutdown).await - } - ArgsAction::YellowstoneLogServer => { - let config2 = config.yellowstone_log_server.ok_or_else(|| { - anyhow::anyhow!("`grpc2scylladb` section in config should be defined") - })?; - Self::yellowstone_log_server(config2, config.scylladb, shutdown).await - } - ArgsAction::Test => { - let config2 = config.grpc2scylladb.ok_or_else(|| { - anyhow::anyhow!("`grpc2scylladb` section in config should be defined") - })?; - Self::test(config2, config.scylladb, shutdown).await - } - } - } - - async fn yellowstone_log_server( - config: ConfigYellowstoneLogServer, - scylladb_conn_config: ScyllaDbConnectionInfo, - mut shutdown: BoxFuture<'static, ()>, - ) -> anyhow::Result<()> { - let addr = config.listen.parse().unwrap(); - - let session: Session = SessionBuilder::new() - .known_node(scylladb_conn_config.hostname) - .user(scylladb_conn_config.username, scylladb_conn_config.password) - .compression(Some(Compression::Lz4)) - .use_keyspace(config.keyspace.clone(), false) - .build() - .await?; - - let session = Arc::new(session); - let scylla_ys_log = ScyllaYsLog::new(session).await?; - let ys_log_server = YellowstoneLogServer::new(scylla_ys_log); - - println!("YellowstoneLogServer listening on {}", addr); - - let server_fut = Server::builder() - // GrpcWeb is over http1 so we must enable it. - .add_service(ys_log_server) - .serve(addr) - .map_err(anyhow::Error::new); - - tokio::select! { - _ = &mut shutdown => Ok(()), - result = server_fut => result, - } - } - - async fn test( - config: ConfigGrpc2ScyllaDB, - scylladb_conn_config: ScyllaDbConnectionInfo, - mut shutdown: BoxFuture<'static, ()>, - ) -> anyhow::Result<()> { - let session: Session = SessionBuilder::new() - .known_node(scylladb_conn_config.hostname) - .user(scylladb_conn_config.username, scylladb_conn_config.password) - .compression(Some(Compression::Lz4)) - .use_keyspace(config.keyspace.clone(), false) - .build() - .await?; - let session = Arc::new(session); - let req = SpawnGrpcConsumerReq { - consumer_id: String::from("test"), - consumer_ip: None, - account_update_event_filter: None, - tx_event_filter: None, - buffer_capacity: None, - offset_commit_interval: None, - event_subscription_policy: EventSubscriptionPolicy::Both, - commitment_level: CommitmentLevel::Processed, - timeline_translation_policy: TimelineTranslationPolicy::AllowLag, - timeline_translation_allowed_lag: None, - }; - let mut rx = spawn_grpc_consumer(session, req, InitialOffset::Earliest).await?; - - let mut print_tx_secs = Instant::now() + Duration::from_secs(1); - let mut num_events = 0; - loop { - if print_tx_secs.elapsed() > Duration::ZERO { - println!("event/second {}", num_events); - num_events = 0; - print_tx_secs = Instant::now() + Duration::from_secs(1); - } - tokio::select! { - _ = &mut shutdown => return Ok(()), - Some(result) = rx.recv() => { - if result.is_err() { - anyhow::bail!("fail!!!") - } - let _x = result?.update_oneof.expect("got none"); - num_events += 1; - }, - _ = tokio::time::sleep_until(Instant::now() + Duration::from_secs(1)) => { - warn!("received no event") - } - } - } - } - - async fn grpc2scylladb( - config: ConfigGrpc2ScyllaDB, - scylladb_conn_config: ScyllaDbConnectionInfo, - mut shutdown: BoxFuture<'static, ()>, - ) -> anyhow::Result<()> { - let sink_config = config.get_scylladb_sink_config(); - info!("sink configuration {:?}", sink_config); - - // Create gRPC client & subscribe - let mut client = GeyserGrpcClient::build_from_shared(config.endpoint)? - .x_token(config.x_token)? - .max_decoding_message_size(MAX_DECODING_MESSAGE_SIZE_BYTES) - .connect_timeout(Duration::from_secs(10)) - .timeout(Duration::from_secs(5)) - .connect() - .await?; - - let mut geyser = client.subscribe_once(config.request.to_proto()).await?; - info!("Grpc subscription is successful ."); - - let mut sink = ScyllaSink::new( - sink_config, - scylladb_conn_config.hostname, - scylladb_conn_config.username, - scylladb_conn_config.password, - ) - .await?; - - info!("ScyllaSink is ready."); - // Receive-send loop - loop { - let message = tokio::select! { - _ = &mut shutdown => break, - message = geyser.next() => message, - } - .transpose(); - - if let Err(error) = &message { - error!("geyser plugin disconnected: {error:?}"); - break; - } - - if let Some(message) = message? { - let message = match message.update_oneof { - Some(value) => value, - None => unreachable!("Expect valid message"), - }; - - let result = match message { - UpdateOneof::Account(msg) => { - let acc_update = msg.clone().try_into(); - if acc_update.is_err() { - // Drop the message if invalid - warn!( - "failed to parse account update: {:?}", - acc_update.err().unwrap() - ); - continue; - } - // If the sink is close, let it crash... - sink.log_account_update(acc_update.unwrap()).await - } - UpdateOneof::Transaction(msg) => { - let tx: Result = msg.try_into(); - if tx.is_err() { - warn!("failed to convert update tx: {:?}", tx.err().unwrap()); - continue; - } - sink.log_transaction(tx.unwrap()).await - } - _ => continue, - }; - - if let Err(e) = result { - error!("error detected in sink: {e}"); - break; - } - } - } - sink.shutdown().await - } -} - -#[tokio::main] -async fn main() -> anyhow::Result<()> { - setup_tracing()?; - - // Parse args - let args = Args::parse(); - let config = config_load::(&args.config).await?; - - // Run prometheus server - if let Some(address) = args.prometheus.or(config.prometheus) { - prometheus_run_server(address)?; - } - - args.action.run(config).await.unwrap(); - - Ok(()) -} diff --git a/yellowstone-grpc-tools/src/lib.rs b/yellowstone-grpc-tools/src/lib.rs index dbc0e95f..55651ec5 100644 --- a/yellowstone-grpc-tools/src/lib.rs +++ b/yellowstone-grpc-tools/src/lib.rs @@ -8,8 +8,7 @@ pub mod google_pubsub; #[cfg(feature = "kafka")] pub mod kafka; pub mod prom; -#[cfg(feature = "scylla")] -pub mod scylladb; + pub mod version; use { diff --git a/yellowstone-grpc-tools/src/prom.rs b/yellowstone-grpc-tools/src/prom.rs index 2e27422c..a5c2b77d 100644 --- a/yellowstone-grpc-tools/src/prom.rs +++ b/yellowstone-grpc-tools/src/prom.rs @@ -6,11 +6,7 @@ use crate::google_pubsub::prom::{ }; #[cfg(feature = "kafka")] use crate::kafka::prom::{KAFKA_DEDUP_TOTAL, KAFKA_RECV_TOTAL, KAFKA_SENT_TOTAL, KAFKA_STATS}; -#[cfg(feature = "scylla")] -use crate::scylladb::prom::{ - SCYLLADB_BATCHITEM_DELIVERED, SCYLLADB_BATCH_DELIVERED, SCYLLADB_BATCH_QUEUE, - SCYLLADB_BATCH_REQUEST_LAG, SCYLLADB_BATCH_SIZE, SCYLLADB_PEAK_BATCH_LINGER_SECONDS, -}; + use { crate::version::VERSION as VERSION_INFO, hyper::{ @@ -62,16 +58,6 @@ pub fn run_server(address: SocketAddr) -> anyhow::Result<()> { register!(KAFKA_SENT_TOTAL); } - #[cfg(feature = "scylla")] - { - register!(SCYLLADB_PEAK_BATCH_LINGER_SECONDS); - register!(SCYLLADB_BATCH_DELIVERED); - register!(SCYLLADB_BATCHITEM_DELIVERED); - register!(SCYLLADB_BATCH_SIZE); - register!(SCYLLADB_BATCH_QUEUE); - register!(SCYLLADB_BATCH_REQUEST_LAG); - } - VERSION .with_label_values(&[ VERSION_INFO.buildts, diff --git a/yellowstone-grpc-tools/src/scylladb/config.rs b/yellowstone-grpc-tools/src/scylladb/config.rs deleted file mode 100644 index a52787a8..00000000 --- a/yellowstone-grpc-tools/src/scylladb/config.rs +++ /dev/null @@ -1,107 +0,0 @@ -use { - super::{sink::ScyllaSinkConfig, types::CommitmentLevel}, - crate::config::ConfigGrpcRequest, - serde::Deserialize, - serde_with::{serde_as, DurationMilliSeconds}, - std::{net::SocketAddr, time::Duration}, -}; - -const fn default_batch_len_limit() -> usize { - 10 -} - -const fn default_batch_size_kb() -> usize { - 131585 -} - -const fn default_linger() -> Duration { - Duration::from_millis(10) -} - -fn default_scylla_username() -> String { - "cassandra".into() -} - -fn default_scylla_password() -> String { - "cassandra".into() -} - -fn default_keyspace() -> String { - "default".into() -} - -fn default_hostname() -> String { - String::from("127.0.0.1:9144") -} - -#[derive(Debug, Default, Deserialize)] -#[serde(default)] -pub struct Config { - pub prometheus: Option, - pub scylladb: ScyllaDbConnectionInfo, - pub grpc2scylladb: Option, - pub yellowstone_log_server: Option, -} - -#[derive(Debug, Default, Deserialize)] -#[serde(default)] -pub struct ScyllaDbConnectionInfo { - #[serde(default = "default_hostname")] - pub hostname: String, - #[serde(default = "default_scylla_username")] - pub username: String, - #[serde(default = "default_scylla_password")] - pub password: String, -} - -#[serde_as] -#[derive(Debug, Deserialize)] -pub struct ConfigYellowstoneLogServer { - pub listen: String, - #[serde(default = "default_keyspace")] - pub keyspace: String, -} - -#[serde_as] -#[derive(Debug, Deserialize)] -pub struct ConfigGrpc2ScyllaDB { - pub endpoint: String, - pub x_token: Option, - pub request: ConfigGrpcRequest, - - pub producer_id: u8, - - // Optional network interface name used to write in the producer lock table. - pub ifname: Option, - - #[serde(default = "default_batch_len_limit")] - pub batch_len_limit: usize, - - #[serde(default = "default_batch_size_kb")] - pub batch_size_kb_limit: usize, - - #[serde(default = "default_linger")] - #[serde_as(as = "DurationMilliSeconds")] - pub linger: Duration, - - #[serde(default = "default_keyspace")] - pub keyspace: String, -} - -impl ConfigGrpc2ScyllaDB { - pub fn get_scylladb_sink_config(&self) -> ScyllaSinkConfig { - ScyllaSinkConfig { - producer_id: self.producer_id, - batch_len_limit: self.batch_len_limit, - batch_size_kb_limit: self.batch_size_kb_limit, - linger: self.linger, - keyspace: self.keyspace.clone(), - ifname: self.ifname.to_owned(), - commitment_level: match self.request.commitment.expect("Missing commitment level") { - crate::config::ConfigGrpcRequestCommitment::Processed => CommitmentLevel::Processed, - crate::config::ConfigGrpcRequestCommitment::Confirmed => CommitmentLevel::Confirmed, - crate::config::ConfigGrpcRequestCommitment::Finalized => CommitmentLevel::Finalized, - }, - } - } -} diff --git a/yellowstone-grpc-tools/src/scylladb/mod.rs b/yellowstone-grpc-tools/src/scylladb/mod.rs deleted file mode 100644 index 6f321506..00000000 --- a/yellowstone-grpc-tools/src/scylladb/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod config; -pub mod prom; -pub mod sink; -pub mod types; -pub mod yellowstone_log; diff --git a/yellowstone-grpc-tools/src/scylladb/prom.rs b/yellowstone-grpc-tools/src/scylladb/prom.rs deleted file mode 100644 index a694d0ae..00000000 --- a/yellowstone-grpc-tools/src/scylladb/prom.rs +++ /dev/null @@ -1,63 +0,0 @@ -use { - prometheus::{Histogram, HistogramOpts, IntCounter, IntGauge}, - std::time::Duration, -}; - -lazy_static::lazy_static! { - pub(crate) static ref SCYLLADB_BATCH_DELIVERED: IntCounter = IntCounter::new( - "scylladb_batch_sent_total", "Total number of batch delivered" - ).unwrap(); - - pub(crate) static ref SCYLLADB_BATCH_SIZE: Histogram = Histogram::with_opts( - HistogramOpts::new("scylladb_batch_size", "The batch size sent to Scylladb"), - ).unwrap(); - - pub(crate) static ref SCYLLADB_BATCH_REQUEST_LAG: IntGauge = IntGauge::new( - "scylladb_batch_request_lag", "The amount of batch request not being handle by a batching task" - ).unwrap(); - - pub(crate) static ref SCYLLADB_BATCHITEM_DELIVERED: IntCounter = IntCounter::new( - "scylladb_batchitem_sent_total", "Total number of batch items delivered" - ).unwrap(); - - pub(crate) static ref SCYLLADB_PEAK_BATCH_LINGER_SECONDS: Histogram = Histogram::with_opts( - HistogramOpts::new("scylladb_peak_batch_linger_seconds", "The actual batch linger of the next batch to sent"), - ).unwrap(); - - pub(crate) static ref SCYLLADB_BATCH_QUEUE: IntGauge = IntGauge::new( - "scylladb_batch_queue_size", "The amount of batch concurrently being linger." - ).unwrap(); - -} - -pub fn scylladb_batch_sent_inc() { - SCYLLADB_BATCH_DELIVERED.inc() -} - -pub fn scylladb_batchitem_sent_inc_by(amount: u64) { - SCYLLADB_BATCHITEM_DELIVERED.inc_by(amount) -} - -pub fn scylladb_batch_size_observe(batch_size: usize) { - SCYLLADB_BATCH_SIZE.observe(batch_size as f64) -} - -pub fn scylladb_peak_batch_linger_observe(batch_linger: Duration) { - SCYLLADB_PEAK_BATCH_LINGER_SECONDS.observe(batch_linger.as_secs_f64()) -} - -pub fn scylladb_batch_queue_inc() { - SCYLLADB_BATCH_QUEUE.inc() -} - -pub fn scylladb_batch_queue_dec() { - SCYLLADB_BATCH_QUEUE.dec() -} - -pub fn scylladb_batch_request_lag_inc() { - SCYLLADB_BATCH_REQUEST_LAG.inc() -} - -pub fn scylladb_batch_request_lag_sub(amount: i64) { - SCYLLADB_BATCH_REQUEST_LAG.sub(amount) -} diff --git a/yellowstone-grpc-tools/src/scylladb/sink.rs b/yellowstone-grpc-tools/src/scylladb/sink.rs deleted file mode 100644 index b7041c3d..00000000 --- a/yellowstone-grpc-tools/src/scylladb/sink.rs +++ /dev/null @@ -1,819 +0,0 @@ -use { - super::{ - prom::{ - scylladb_batch_request_lag_inc, scylladb_batch_request_lag_sub, - scylladb_batch_sent_inc, scylladb_batch_size_observe, scylladb_batchitem_sent_inc_by, - }, - types::{ - AccountUpdate, BlockchainEvent, CommitmentLevel, ProducerId, ProducerInfo, ShardId, - ShardOffset, ShardPeriod, Slot, Transaction, SHARD_OFFSET_MODULO, UNDEFINED_SLOT, - }, - }, - deepsize::DeepSizeOf, - futures::{ - future::{self, try_join_all}, - Future, - }, - local_ip_address::{list_afinet_netifas, local_ip}, - scylla::{ - batch::{Batch, BatchType}, - cql_to_rust::{FromCqlVal, FromCqlValError, FromRowError}, - frame::Compression, - FromRow, Session, SessionBuilder, - }, - std::{ - collections::{BTreeMap, BTreeSet}, - net::IpAddr, - sync::Arc, - time::Duration, - }, - tokio::{ - sync::mpsc::{error::SendError, Permit}, - task::{JoinError, JoinHandle}, - time::Instant, - }, - tracing::{error, info, warn}, - uuid::Uuid, -}; - -const WARNING_SCYLLADB_LATENCY_THRESHOLD: Duration = Duration::from_millis(1000); - -const DEFAULT_SHARD_MAX_BUFFER_CAPACITY: usize = 15; - -/// Untyped API in scylla will soon be deprecated, this is why we need to implement our own deser logic to -/// only read the first column returned by a light weight transaction. -struct LwtSuccess(bool); - -impl FromRow for LwtSuccess { - fn from_row( - row: scylla::frame::response::result::Row, - ) -> Result { - row.columns - .first() - .ok_or(FromRowError::BadCqlVal { - err: FromCqlValError::ValIsNull, - column: 0, - }) - .and_then(|cqlval| { - bool::from_cql(cqlval.to_owned()).map_err(|_err| FromRowError::BadCqlVal { - err: FromCqlValError::BadCqlType, - column: 0, - }) - }) - .map(LwtSuccess) - } -} - -const INSERT_PRODUCER_SLOT: &str = r###" - INSERT INTO producer_slot_seen (producer_id, slot, shard_offset_map, created_at) - VALUES (?, ?, ?, currentTimestamp()) -"###; - -const DROP_PRODUCER_LOCK: &str = r###" - DELETE FROM producer_lock - WHERE producer_id = ? - IF lock_id = ? -"###; - -const TRY_ACQUIRE_PRODUCER_LOCK: &str = r###" - INSERT INTO producer_lock (producer_id, lock_id, ifname, ipv4, is_ready, minimum_shard_offset, created_at) - VALUES (?, ?, ?, ?, false, null, currentTimestamp()) - IF NOT EXISTS -"###; - -const GET_PRODUCER_INFO_BY_ID: &str = r###" - SELECT - producer_id, - num_shards, - commitment_level - FROM producer_info - WHERE producer_id = ? -"###; - -const COMMIT_SHARD_PERIOD: &str = r###" - INSERT INTO producer_period_commit_log (producer_id, shard_id, period, created_at) - VALUES (?, ?, ?, currentTimestamp()) -"###; - -const INSERT_BLOCKCHAIN_EVENT: &str = r###" - INSERT INTO log ( - shard_id, - period, - producer_id, - offset, - slot, - event_type, - pubkey, - lamports, - owner, - executable, - rent_epoch, - write_version, - data, - txn_signature, - signature, - signatures, - num_readonly_signed_accounts, - num_readonly_unsigned_accounts, - num_required_signatures, - account_keys, - recent_blockhash, - instructions, - versioned, - address_table_lookups, - meta, - is_vote, - tx_index, - created_at - ) - VALUES (?,?,?, ?,?,?, ?,?,?, ?,?,?, ?,?,?, ?,?,?, ?,?,?, ?,?,?, ?,?,?, currentTimestamp()) -"###; - -#[derive(Clone, PartialEq, Debug)] -pub struct ScyllaSinkConfig { - pub producer_id: u8, - pub batch_len_limit: usize, - pub batch_size_kb_limit: usize, - pub linger: Duration, - pub keyspace: String, - pub ifname: Option, - pub commitment_level: CommitmentLevel, -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug, PartialEq)] -enum ShardCommand { - Shutdown, - // Add other action if necessary... - InsertAccountUpdate(AccountUpdate), - InsertTransaction(Transaction), -} - -/// Represents a shard responsible for processing and batching `ShardCommand` messages -/// before committing them to the database in a background daemon. -/// -/// This struct encapsulates the state and behavior required to manage message buffering, -/// batching, and period-based commitment for a specific shard within a distributed system. -struct Shard { - /// Arc-wrapped database session for executing queries. - session: Arc, - - /// Unique identifier for the shard. - shard_id: ShardId, - - /// Unique identifier for the producer associated with this shard. - producer_id: ProducerId, - - /// The next offset to be assigned for incoming client commands. - next_offset: ShardOffset, - - /// Buffer to store sharded client commands before batching. - buffer: Vec, - - /// Maximum capacity of the buffer (number of commands it can hold). - max_buffer_capacity: usize, - - /// Maximum byte size of the buffer (sum of sizes of commands it can hold). - max_buffer_byte_size: usize, - - /// Batch for executing database statements in bulk. - scylla_batch: Batch, - - /// Current byte size of the batch being constructed. - curr_batch_byte_size: usize, - - /// Duration to linger before flushing the buffer. - buffer_linger: Duration, - - last_committed_period: ShardPeriod, -} - -impl Shard { - fn new( - session: Arc, - shard_id: ShardId, - producer_id: ProducerId, - next_offset: ShardOffset, - max_buffer_capacity: usize, - max_buffer_byte_size: usize, - buffer_linger: Duration, - ) -> Self { - if next_offset < 0 { - panic!("next offset can not be negative"); - } - Shard { - session, - shard_id, - producer_id, - next_offset, - buffer: Vec::with_capacity(max_buffer_capacity), - max_buffer_capacity, - max_buffer_byte_size, - // Since each shard will only batch into a single partition at a time, we can safely disable batch logging - // without losing atomicity guarantee provided by scylla. - scylla_batch: Batch::new(BatchType::Unlogged), - buffer_linger, - curr_batch_byte_size: 0, - last_committed_period: -1, - } - } - - fn clear_buffer(&mut self) { - self.buffer.clear(); - self.curr_batch_byte_size = 0; - self.scylla_batch.statements.clear(); - } - - async fn flush(&mut self) -> anyhow::Result<()> { - let buffer_len = self.buffer.len(); - if buffer_len > 0 { - let before = Instant::now(); - // We must wait for the batch success to guarantee monotonicity in the shard's timeline. - self.session.batch(&self.scylla_batch, &self.buffer).await?; - scylladb_batch_request_lag_sub(buffer_len as i64); - scylladb_batch_sent_inc(); - scylladb_batch_size_observe(buffer_len); - scylladb_batchitem_sent_inc_by(buffer_len as u64); - if before.elapsed() >= WARNING_SCYLLADB_LATENCY_THRESHOLD { - warn!("sent {} elements in {:?}", buffer_len, before.elapsed()); - } - } - self.clear_buffer(); - Ok(()) - } - - /// Converts the current `Shard` instance into a background daemon for processing and batching `ShardCommand` messages. - /// - /// This method spawns an asynchronous task (`tokio::spawn`) to continuously receive messages from a channel (`receiver`), - /// batch process them, and commit periods to the database. It handles message buffering - /// and period commitment based on the configured buffer settings and period boundaries. - /// - /// # Returns - /// Returns a `Sender` channel (`tokio::sync::mpsc::Sender`) that can be used to send `ShardCommand` messages - /// to the background daemon for processing and batching. - fn into_daemon(mut self) -> ShardHandle { - let (sender, mut receiver) = tokio::sync::mpsc::channel::(16); - let shard_id = self.shard_id; - let (wsender, wreceiver) = tokio::sync::watch::channel(self.next_offset - 1); - - let handle: JoinHandle> = tokio::spawn(async move { - let insert_event_ps = self.session.prepare(INSERT_BLOCKCHAIN_EVENT).await?; - let commit_period_ps = self.session.prepare(COMMIT_SHARD_PERIOD).await?; - - let mut buffering_timeout = Instant::now() + self.buffer_linger; - loop { - let shard_id = self.shard_id; - let producer_id = self.producer_id; - let offset = self.next_offset; - let curr_period = offset / SHARD_OFFSET_MODULO; - let prev_period = curr_period - 1; - - // If we started a new period - if offset % SHARD_OFFSET_MODULO == 0 - && offset > 0 - && self.last_committed_period != prev_period - { - // Make sure the last period is committed - let t = Instant::now(); - self.session - .execute(&commit_period_ps, (producer_id, shard_id, prev_period)) - .await?; - info!( - shard = shard_id, - producer_id = ?self.producer_id, - committed_period = curr_period, - time_to_commit = ?t.elapsed() - ); - self.last_committed_period = prev_period; - } - - let msg = receiver - .recv() - .await - .ok_or(anyhow::anyhow!("Shard mailbox closed"))?; - - let maybe_blockchain_event = match msg { - ShardCommand::Shutdown => { - warn!("Shard {} received shutdown command.", shard_id); - self.flush().await?; - warn!("shard {} finished shutdown procedure", shard_id); - return Ok(()); - } - ShardCommand::InsertAccountUpdate(acc_update) => { - Some(acc_update.as_blockchain_event(shard_id, producer_id, offset)) - } - ShardCommand::InsertTransaction(new_tx) => { - Some(new_tx.as_blockchain_event(shard_id, producer_id, offset)) - } - }; - - if let Some(blockchain_event) = maybe_blockchain_event { - let msg_byte_size = blockchain_event.deep_size_of(); - - let need_flush = self.buffer.len() >= self.max_buffer_capacity - || self.curr_batch_byte_size + msg_byte_size >= self.max_buffer_byte_size - || buffering_timeout.elapsed() > Duration::ZERO; - - if need_flush { - self.flush().await?; - buffering_timeout = Instant::now() + self.buffer_linger; - } - - self.buffer.push(blockchain_event); - self.scylla_batch.append_statement(insert_event_ps.clone()); - self.curr_batch_byte_size += msg_byte_size; - wsender - .send(offset) - .map_err(|_offset| anyhow::anyhow!("failed to notify committed offset"))?; - self.next_offset += 1; - } - } - }); - - ShardHandle { - shard_id, - sender, - tokio_handle: handle, - shard_offset_watch: wreceiver, - } - } -} - -struct ShardHandle { - shard_id: ShardId, - sender: tokio::sync::mpsc::Sender, - tokio_handle: JoinHandle>, - shard_offset_watch: tokio::sync::watch::Receiver, -} - -impl ShardHandle { - async fn reserve(&self) -> Result, SendError<()>> { - self.sender.reserve().await - } - - async fn send(&self, value: ShardCommand) -> Result<(), SendError> { - self.sender.send(value).await - } - - fn get_last_committed_offset(&self) -> ShardOffset { - self.shard_offset_watch.borrow().to_owned() - } -} - -impl Future for ShardHandle { - type Output = Result, JoinError>; - - fn poll( - mut self: std::pin::Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - ) -> std::task::Poll { - let handle = &mut self.tokio_handle; - tokio::pin!(handle); - handle.poll(cx) - } -} - -pub struct ScyllaSink { - router_sender: tokio::sync::mpsc::Sender, - router_handle: JoinHandle>, - producer_lock: ProducerLock, -} - -#[derive(Debug)] -pub enum ScyllaSinkError { - SinkClose, -} - -/// Retrieves the latest shard offsets for a specific producer from the `shard_max_offset_mv` materialized view. -/// -/// This asynchronous function queries the database session to fetch the latest shard offsets associated with -/// a given `producer_id` from the `shard_max_offset_mv` materialized view. It constructs and executes a SELECT -/// query to retrieve the shard IDs and corresponding offsets ordered by offset and period. -/// -/// # Parameters -/// - `session`: An Arc-wrapped database session (`Arc`) for executing database queries. -/// - `producer_id`: The unique identifier (`ProducerId`) of the producer whose shard offsets are being retrieved. -/// - `num_shards` : number of shard assigned to producer. -/// -/// # Returns -/// - `Ok(None)`: If no shard offsets are found for the specified producer. -/// - `Ok(Some(rows))`: If shard offsets are found, returns a vector of tuples containing shard IDs and offsets. -/// Each tuple represents a shard's latest offset for the producer. -/// - `Err`: If an error occurs during database query execution or result parsing, returns an `anyhow::Result`. -pub(crate) async fn get_max_shard_offsets_for_producer( - session: Arc, - producer_id: ProducerId, - num_shards: usize, -) -> anyhow::Result> { - let cql_shard_list = (0..num_shards) - .map(|shard_id| format!("{shard_id}")) - .collect::>() - .join(", "); - - let query_last_period_commit = format!( - r###" - SELECT - shard_id, - period - FROM producer_period_commit_log - where producer_id = ? - AND shard_id IN ({cql_shard_list}) - ORDER BY period DESC - PER PARTITION LIMIT 1 - "### - ); - - let mut current_period_foreach_shard = session - .query(query_last_period_commit, (producer_id,)) - .await? - .rows_typed_or_empty::<(ShardId, ShardPeriod)>() - .map(|result| result.map(|(shard_id, period)| (shard_id, period + 1))) - .collect::, _>>()?; - - for shard_id in 0..num_shards { - // Put period 0 by default for each missing shard. - current_period_foreach_shard - .entry(shard_id as ShardId) - .or_insert(0); - } - - let query_max_offset_for_shard_period = r###" - SELECT - offset, - slot - FROM log - WHERE - producer_id = ? - AND shard_id = ? - and period = ? - ORDER BY offset desc - PER PARTITION LIMIT 1 - "###; - let max_offset_for_shard_period_ps = session.prepare(query_max_offset_for_shard_period).await?; - - //let mut js: JoinSet> = JoinSet::new(); - let mut shard_max_offset_pairs = - futures::future::try_join_all(current_period_foreach_shard.iter().map( - |(shard_id, curr_period)| { - let ps = max_offset_for_shard_period_ps.clone(); - let session = Arc::clone(&session); - async move { - let (max_offset, slot) = session - .execute(&ps, (producer_id, shard_id, curr_period)) - .await? - .maybe_first_row_typed::<(ShardOffset, Slot)>()? - // If row is None, it means no period has started since the last period commit. - // So we seek at the end of the previous period. - .unwrap_or(((curr_period * SHARD_OFFSET_MODULO) - 1, UNDEFINED_SLOT)); - Ok::<_, anyhow::Error>((*shard_id, max_offset, slot)) - } - }, - )) - .await?; - - if shard_max_offset_pairs.len() != num_shards { - panic!("missing shard period commit information, make sure the period commit is initialize before computing shard offsets"); - } - - shard_max_offset_pairs.sort_by_key(|pair| pair.0); - - Ok(shard_max_offset_pairs) -} - -/// Spawns a round-robin dispatcher for sending `ShardCommand` messages to a list of shard mailboxes. -/// -/// This function takes a vector of shard mailboxes (`tokio::sync::mpsc::Sender`) and returns -/// a new `Sender` that can be used to dispatch messages in a round-robin fashion to the provided shard mailboxes. -/// -/// The dispatcher cycles through the shard mailboxes indefinitely, ensuring each message is sent to the next -/// available shard without waiting, or falling back to the original shard if all are busy. It increments the -/// ScyllaDB batch request lag for monitoring purposes. -/// -/// # Parameters -/// - `shard_mailboxes`: A vector of `Sender` channels representing shard mailboxes to dispatch messages to. -/// -/// # Returns -/// A `Sender` channel that can be used to send `ShardCommand` messages to the shard mailboxes in a round-robin manner. -fn spawn_round_robin( - session: Arc, - producer_id: ProducerId, - shard_handles: Vec, -) -> ( - tokio::sync::mpsc::Sender, - JoinHandle>, -) { - let (sender, mut receiver) = tokio::sync::mpsc::channel(DEFAULT_SHARD_MAX_BUFFER_CAPACITY); - - let h: JoinHandle> = tokio::spawn(async move { - let insert_slot_ps = session.prepare(INSERT_PRODUCER_SLOT).await?; - - // One hour worth of slots - const SLOT_SEEN_RETENTION: usize = 9000; - //session.execute(&insert_slot_ps, (producer_id,)).await?; - - let iterator = shard_handles.iter().enumerate().cycle(); - info!("Started round robin router"); - let mut msg_between_slot = 0; - let mut max_slot_seen = -1; - let mut time_since_new_max_slot = Instant::now(); - let mut background_commit_slot_seen = - tokio::spawn(future::ready(Ok::<(), anyhow::Error>(()))); - - let mut slots_seen = BTreeSet::::new(); - - for (i, shard_sender) in iterator { - let msg = receiver.recv().await.unwrap_or(ShardCommand::Shutdown); - - if msg == ShardCommand::Shutdown { - warn!("round robin router's mailbox closed unexpectly."); - break; - } - let slot = match &msg { - ShardCommand::Shutdown => -1, - ShardCommand::InsertAccountUpdate(x) => x.slot, - ShardCommand::InsertTransaction(x) => x.slot, - }; - - if slots_seen.insert(slot) { - while slots_seen.len() >= SLOT_SEEN_RETENTION { - slots_seen.pop_first(); - } - - if max_slot_seen > slot { - warn!("Slot {slot} arrived late after seeing {max_slot_seen}"); - } else { - max_slot_seen = slot; - } - let time_elapsed_between_last_max_slot = time_since_new_max_slot.elapsed(); - // We only commit every 3 slot number - - let t = Instant::now(); - background_commit_slot_seen.await??; - - let session = Arc::clone(&session); - let insert_slot_ps = insert_slot_ps.clone(); - let shard_offset_pairs = shard_handles - .iter() - .map(|sh| (sh.shard_id, sh.get_last_committed_offset())) - .collect::>(); - - background_commit_slot_seen = tokio::spawn(async move { - session - .execute(&insert_slot_ps, (producer_id, slot, shard_offset_pairs)) - .await?; - - let time_to_commit_slot = t.elapsed(); - info!( - "New slot: {} after {time_elapsed_between_last_max_slot:?}, events in between: {}, max_slot_approx committed in {time_to_commit_slot:?}", - slot, msg_between_slot - ); - Ok(()) - }); - time_since_new_max_slot = Instant::now(); - msg_between_slot = 0; - } - msg_between_slot += 1; - let result = shard_sender.reserve().await; - if let Ok(permit) = result { - permit.send(msg); - scylladb_batch_request_lag_inc(); - } else { - error!("shard {} seems to be closed: {:?}", i, result); - break; - } - } - // Send shutdown to all shards - for (i, shard_sender) in shard_handles.iter().enumerate() { - warn!("Shutting down shard: {}", i); - shard_sender.send(ShardCommand::Shutdown).await?; - } - - try_join_all(shard_handles.into_iter()).await?; - - warn!("End of round robin router"); - Ok(()) - }); - (sender, h) -} - -async fn get_producer_info_by_id( - session: Arc, - producer_id: ProducerId, -) -> anyhow::Result> { - session - .query(GET_PRODUCER_INFO_BY_ID, (producer_id,)) - .await? - .maybe_first_row_typed::() - .map_err(anyhow::Error::new) -} - -struct ProducerLock { - session: Arc, - lock_id: String, - producer_id: ProducerId, -} - -impl ProducerLock { - async fn release(self) -> anyhow::Result<()> { - self.session - .query(DROP_PRODUCER_LOCK, (self.producer_id, self.lock_id)) - .await - .map(|_query_result| ()) - .map_err(anyhow::Error::new) - } -} - -async fn try_acquire_lock( - session: Arc, - producer_id: ProducerId, - ifname: Option, -) -> anyhow::Result { - let network_interfaces = list_afinet_netifas()?; - - let (ifname, ipaddr) = if let Some(ifname) = ifname { - if let Some((_, ipaddr)) = network_interfaces - .iter() - .find(|(name, ipaddr)| *name == ifname && matches!(ipaddr, IpAddr::V4(_))) - { - (ifname, ipaddr.to_string()) - } else { - anyhow::bail!("Found not interface named {}", ifname); - } - } else { - let ipaddr = local_ip()?; - if !ipaddr.is_ipv4() { - anyhow::bail!("ipv6 not support for producer lock info."); - } - if let Some((ifname, _)) = network_interfaces - .iter() - .find(|(_, ipaddr2)| ipaddr == *ipaddr2) - { - (ifname.to_owned(), ipaddr.to_string()) - } else { - anyhow::bail!("Found not interface matching ip {}", ipaddr); - } - }; - - let lock_id = Uuid::new_v4().to_string(); - let qr = session - .query( - TRY_ACQUIRE_PRODUCER_LOCK, - (producer_id, lock_id.clone(), ifname, ipaddr), - ) - .await?; - let lwt_success = qr.single_row_typed::()?; - - if let LwtSuccess(true) = lwt_success { - let lock = ProducerLock { - session: Arc::clone(&session), - lock_id, - producer_id, - }; - Ok(lock) - } else { - anyhow::bail!( - "Failed to lock producer {:?}, you may need to release it manually", - producer_id - ); - } -} - -async fn set_minimum_producer_offsets( - session: Arc, - producer_lock: &ProducerLock, - minimum_shard_offsets: &[(ShardId, ShardOffset, Slot)], -) -> anyhow::Result<()> { - let ps = session - .prepare( - r###" - UPDATE producer_lock - SET minimum_shard_offset = ?, is_ready = true - WHERE - producer_id = ? - IF EXISTS - "###, - ) - .await?; - - let lwt = session - .execute(&ps, (minimum_shard_offsets, producer_lock.producer_id)) - .await? - .first_row_typed::()?; - - if let LwtSuccess(false) = lwt { - anyhow::bail!("Producer lock is corrupted, it may be cause by concurrent lock acquisition"); - } - - Ok(()) -} - -impl ScyllaSink { - pub async fn new( - config: ScyllaSinkConfig, - hostname: impl AsRef, - username: impl Into, - password: impl Into, - ) -> anyhow::Result { - let producer_id = [config.producer_id]; - - let session: Session = SessionBuilder::new() - .known_node(hostname) - .user(username, password) - .compression(Some(Compression::Lz4)) - .use_keyspace(config.keyspace.clone(), false) - .build() - .await?; - info!("connection pool to scylladb ready."); - let session = Arc::new(session); - - let producer_info = get_producer_info_by_id(Arc::clone(&session), producer_id) - .await? - .unwrap_or_else(|| panic!("producer {:?} has not yet been registered", producer_id)); - - if producer_info.commitment_level != config.commitment_level { - anyhow::bail!("Commitment level in configuration ({:?}) don't match producer info in database ({:?})", config.commitment_level, producer_info.commitment_level); - } - - info!("Producer {producer_id:?} is registered"); - - let producer_lock = - try_acquire_lock(Arc::clone(&session), producer_id, config.ifname.to_owned()).await?; - - info!("Producer {producer_id:?} lock acquired!"); - - let shard_count = producer_info.num_shards as usize; - - // On init, we collect where the producer left = max shard offsets - // Where we left of, it becomes new earliest offset available. - // This is to prevent - let shard_offsets = - get_max_shard_offsets_for_producer(Arc::clone(&session), producer_id, shard_count) - .await?; - - let result = - set_minimum_producer_offsets(Arc::clone(&session), &producer_lock, &shard_offsets) - .await; - if let Err(e) = result { - let result2 = producer_lock.release().await; - if let Err(e2) = result2 { - error!("Releasing lock failed during error handling: {e2:?}"); - } - anyhow::bail!(e); - } - - info!("Got back last offsets of all {shard_count} shards"); - let mut shard_handles = Vec::with_capacity(shard_count); - for (shard_id, last_offset, _slot) in shard_offsets.into_iter() { - let session = Arc::clone(&session); - let shard = Shard::new( - session, - shard_id, - producer_id, - last_offset + 1, - DEFAULT_SHARD_MAX_BUFFER_CAPACITY, - config.batch_size_kb_limit * 1024, - config.linger, - ); - let shard_handle = shard.into_daemon(); - shard_handles.push(shard_handle); - } - - let (sender, router_handle) = - spawn_round_robin(Arc::clone(&session), producer_id, shard_handles); - - Ok(ScyllaSink { - router_sender: sender, - router_handle, - producer_lock, - }) - } - - pub async fn shutdown(self) -> anyhow::Result<()> { - warn!("Shutthing down scylla sink..."); - let router_result = self.router_sender.send(ShardCommand::Shutdown).await; - if router_result.is_err() { - error!("router was closed before we could gracefully shutdown all sharders."); - } - if let Err(e) = self.router_handle.await? { - error!("router error {e}"); - } - self.producer_lock.release().await - } - - async fn inner_log(&mut self, cmd: ShardCommand) -> anyhow::Result<()> { - self.router_sender - .send(cmd) - .await - .map_err(|_e| anyhow::anyhow!("failed to route")) - } - - pub async fn log_account_update(&mut self, update: AccountUpdate) -> anyhow::Result<()> { - let cmd = ShardCommand::InsertAccountUpdate(update); - self.inner_log(cmd).await - } - - pub async fn log_transaction(&mut self, tx: Transaction) -> anyhow::Result<()> { - let cmd = ShardCommand::InsertTransaction(tx); - self.inner_log(cmd).await - } -} diff --git a/yellowstone-grpc-tools/src/scylladb/types.rs b/yellowstone-grpc-tools/src/scylladb/types.rs deleted file mode 100644 index 05c6c132..00000000 --- a/yellowstone-grpc-tools/src/scylladb/types.rs +++ /dev/null @@ -1,1095 +0,0 @@ -use { - anyhow::{anyhow, Ok}, - core::fmt, - deepsize::DeepSizeOf, - scylla::{ - cql_to_rust::{FromCqlVal, FromCqlValError}, - frame::response::result::CqlValue, - serialize::value::SerializeCql, - FromRow, FromUserType, SerializeCql, SerializeRow, - }, - std::iter::repeat, - yellowstone_grpc_proto::{ - geyser::{ - SubscribeUpdateAccount, SubscribeUpdateTransaction, SubscribeUpdateTransactionInfo, - }, - solana::storage::confirmed_block::{self, CompiledInstruction}, - }, -}; - -pub type ProgramId = [u8; 32]; -pub type Pubkey = [u8; 32]; -pub type Slot = i64; -pub type ShardId = i16; -pub type ShardPeriod = i64; -pub type ShardOffset = i64; -pub type ProducerId = [u8; 1]; // one byte is enough to assign an id to a machine -pub type ConsumerId = String; -pub const SHARD_OFFSET_MODULO: i64 = 10000; -pub const MIN_PROCUDER: ProducerId = [0x00]; -pub const MAX_PRODUCER: ProducerId = [0xFF]; -pub const UNDEFINED_SLOT: Slot = -1; - -#[derive(Clone, Debug, PartialEq, Eq, FromRow)] -pub struct ConsumerInfo { - pub consumer_id: ConsumerId, - pub producer_id: ProducerId, - //pub initital_shard_offsets: Vec, - pub subscribed_blockchain_event_types: Vec, -} - -#[derive(Clone, Debug, PartialEq, Eq, FromRow)] -pub struct ConsumerShardOffset { - pub consumer_id: ConsumerId, - pub producer_id: ProducerId, - pub shard_id: ShardId, - pub event_type: BlockchainEventType, - pub offset: ShardOffset, - pub slot: Slot, -} - -#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Copy, DeepSizeOf)] -pub enum BlockchainEventType { - AccountUpdate = 0, - NewTransaction = 1, -} - -impl TryFrom for BlockchainEventType { - type Error = anyhow::Error; - - fn try_from(value: i16) -> Result { - match value { - 0 => Ok(BlockchainEventType::AccountUpdate), - 1 => Ok(BlockchainEventType::NewTransaction), - x => Err(anyhow!("Unknown LogEntryType equivalent for {:?}", x)), - } - } -} - -impl From for i16 { - fn from(val: BlockchainEventType) -> Self { - match val { - BlockchainEventType::AccountUpdate => 0, - BlockchainEventType::NewTransaction => 1, - } - } -} - -impl SerializeCql for BlockchainEventType { - fn serialize<'b>( - &self, - typ: &scylla::frame::response::result::ColumnType, - writer: scylla::serialize::CellWriter<'b>, - ) -> Result< - scylla::serialize::writers::WrittenCellProof<'b>, - scylla::serialize::SerializationError, - > { - let x: i16 = (*self).into(); - SerializeCql::serialize(&x, typ, writer) - } -} - -impl FromCqlVal for BlockchainEventType { - fn from_cql(cql_val: CqlValue) -> Result { - match cql_val { - CqlValue::SmallInt(x) => x.try_into().map_err(|_| FromCqlValError::BadVal), - _ => Err(FromCqlValError::BadCqlType), - } - } -} - -#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Copy)] -pub enum CommitmentLevel { - Processed = 0, - Confirmed = 1, - Finalized = 2, -} - -impl fmt::Display for CommitmentLevel { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - CommitmentLevel::Processed => f.write_str("Processed"), - CommitmentLevel::Confirmed => f.write_str("Confirmed"), - CommitmentLevel::Finalized => f.write_str("Finalized"), - } - } -} - -impl From for i16 { - fn from(val: CommitmentLevel) -> Self { - match val { - CommitmentLevel::Processed => 0, - CommitmentLevel::Confirmed => 1, - CommitmentLevel::Finalized => 2, - } - } -} - -impl TryFrom for CommitmentLevel { - type Error = anyhow::Error; - - fn try_from(value: i16) -> Result { - match value { - 0 => Ok(CommitmentLevel::Processed), - 1 => Ok(CommitmentLevel::Confirmed), - 2 => Ok(CommitmentLevel::Finalized), - x => Err(anyhow!( - "Unknown CommitmentLevel equivalent for code {:?}", - x - )), - } - } -} - -impl SerializeCql for CommitmentLevel { - fn serialize<'b>( - &self, - typ: &scylla::frame::response::result::ColumnType, - writer: scylla::serialize::CellWriter<'b>, - ) -> Result< - scylla::serialize::writers::WrittenCellProof<'b>, - scylla::serialize::SerializationError, - > { - let x: i16 = (*self).into(); - SerializeCql::serialize(&x, typ, writer) - } -} - -impl FromCqlVal for CommitmentLevel { - fn from_cql(cql_val: CqlValue) -> Result { - match cql_val { - CqlValue::SmallInt(x) => x.try_into().map_err(|_| FromCqlValError::BadVal), - _ => Err(FromCqlValError::BadCqlType), - } - } -} - -#[derive(SerializeRow, Clone, Debug, FromRow, DeepSizeOf, PartialEq)] -pub struct BlockchainEvent { - // Common - pub shard_id: ShardId, - pub period: ShardPeriod, - pub producer_id: ProducerId, - pub offset: ShardOffset, - pub slot: i64, - pub event_type: BlockchainEventType, - - // AccountUpdate - pub pubkey: Option, - pub lamports: Option, - pub owner: Option, - pub executable: Option, - pub rent_epoch: Option, - pub write_version: Option, - pub data: Option>, - pub txn_signature: Option>, - - // Transaction - pub signature: Option>, - pub signatures: Option>>, - pub num_required_signatures: Option, - pub num_readonly_signed_accounts: Option, - pub num_readonly_unsigned_accounts: Option, - pub account_keys: Option>>, - pub recent_blockhash: Option>, - pub instructions: Option>, - pub versioned: Option, - pub address_table_lookups: Option>, - pub meta: Option, - pub is_vote: Option, - pub tx_index: Option, -} - -#[derive(SerializeRow, Clone, Debug, DeepSizeOf, PartialEq, Eq)] -pub struct AccountUpdate { - pub slot: i64, - pub pubkey: Pubkey, - pub lamports: i64, - pub owner: Pubkey, - pub executable: bool, - pub rent_epoch: i64, - pub write_version: i64, - pub data: Vec, - pub txn_signature: Option>, -} - -fn try_collect(it: I) -> Result, >::Error> -where - I::Item: TryInto, -{ - it.into_iter().map(|item| item.try_into()).collect() -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, Eq, PartialEq)] -#[scylla(flavor = "match_by_name")] -pub struct MessageAddrTableLookup { - pub account_key: Vec, - pub writable_indexes: Vec, - pub readonly_indexes: Vec, -} - -impl From for MessageAddrTableLookup { - fn from(msg: confirmed_block::MessageAddressTableLookup) -> Self { - // Extract fields from MessageAddressLookup - let account_key = msg.account_key; - let writable_indexes = msg.writable_indexes; - let readonly_indexes = msg.readonly_indexes; - - // Create a new instance of AddressLookup - MessageAddrTableLookup { - account_key, - writable_indexes, - readonly_indexes, - } - } -} - -impl From for confirmed_block::MessageAddressTableLookup { - fn from(msg: MessageAddrTableLookup) -> Self { - // Create a new instance of AddressLookup - confirmed_block::MessageAddressTableLookup { - account_key: msg.account_key, - writable_indexes: msg.writable_indexes, - readonly_indexes: msg.readonly_indexes, - } - } -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, Eq, PartialEq)] -#[scylla(flavor = "match_by_name")] -pub struct CompiledInstr { - pub program_id_index: i64, - pub accounts: Vec, - pub data: Vec, -} - -impl From for CompiledInstr { - fn from(compiled_instr: confirmed_block::CompiledInstruction) -> Self { - // Extract fields from CompiledInstruction - let program_id_index = compiled_instr.program_id_index.into(); - let accounts = compiled_instr.accounts; - let data = compiled_instr.data; - - // Create a new instance of CompileInstr - CompiledInstr { - program_id_index, - accounts, - data, - } - - // Return the new CompileInstr instance - } -} - -impl TryFrom for confirmed_block::CompiledInstruction { - type Error = anyhow::Error; - - fn try_from(value: CompiledInstr) -> Result { - Ok(CompiledInstruction { - program_id_index: value.program_id_index.try_into()?, - accounts: value.accounts, - data: value.data, - }) - } -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, Eq, PartialEq)] -#[scylla(flavor = "match_by_name")] -pub struct InnerInstr { - pub program_id_index: i64, - pub accounts: Vec, - pub data: Vec, - pub stack_height: Option, -} - -impl From for InnerInstr { - fn from(value: confirmed_block::InnerInstruction) -> Self { - InnerInstr { - program_id_index: value.program_id_index.into(), - accounts: value.accounts, - data: value.data, - stack_height: value.stack_height.map(|x| x.into()), - } - } -} - -impl TryFrom for confirmed_block::InnerInstruction { - type Error = anyhow::Error; - - fn try_from(value: InnerInstr) -> Result { - Ok(confirmed_block::InnerInstruction { - program_id_index: value.program_id_index.try_into()?, - accounts: value.accounts, - data: value.data, - stack_height: value.stack_height.map(|x| x.try_into()).transpose()?, - }) - } -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, Eq, PartialEq)] -#[scylla(flavor = "match_by_name")] -pub struct InnerInstrs { - pub index: i64, - pub instructions: Vec, -} - -impl TryFrom for InnerInstrs { - type Error = anyhow::Error; - - fn try_from(value: confirmed_block::InnerInstructions) -> Result { - let instructions: Vec = try_collect(value.instructions)?; - - let index = value.index.into(); - Ok(InnerInstrs { - index, - instructions, - }) - } -} - -impl TryFrom for confirmed_block::InnerInstructions { - type Error = anyhow::Error; - - fn try_from(value: InnerInstrs) -> Result { - Ok(confirmed_block::InnerInstructions { - index: value.index.try_into()?, - instructions: try_collect(value.instructions)?, - }) - } -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, PartialEq)] -#[scylla(flavor = "match_by_name")] -pub struct UiTokenAmount { - pub ui_amount: f64, - pub decimals: i64, - pub amount: String, - pub ui_amount_string: String, -} - -impl From for UiTokenAmount { - fn from(value: confirmed_block::UiTokenAmount) -> Self { - UiTokenAmount { - ui_amount: value.ui_amount, - decimals: value.decimals.into(), - amount: value.amount, - ui_amount_string: value.ui_amount_string, - } - } -} - -impl TryFrom for confirmed_block::UiTokenAmount { - type Error = anyhow::Error; - - fn try_from(value: UiTokenAmount) -> Result { - Ok(confirmed_block::UiTokenAmount { - ui_amount: value.ui_amount, - decimals: value.decimals.try_into()?, - amount: value.amount, - ui_amount_string: value.ui_amount_string, - }) - } -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, PartialEq)] -#[scylla(flavor = "match_by_name")] -pub struct TxTokenBalance { - pub account_index: i64, - pub mint: String, - pub ui_token_amount: Option, - pub owner: String, - pub program_id: String, -} - -impl From for TxTokenBalance { - fn from(value: confirmed_block::TokenBalance) -> Self { - TxTokenBalance { - account_index: value.account_index.into(), - mint: value.mint, - ui_token_amount: value.ui_token_amount.map(Into::into), - owner: value.owner, - program_id: value.program_id, - } - } -} - -impl TryFrom for confirmed_block::TokenBalance { - type Error = anyhow::Error; - - fn try_from(value: TxTokenBalance) -> Result { - Ok(confirmed_block::TokenBalance { - account_index: value.account_index.try_into()?, - mint: value.mint, - ui_token_amount: value.ui_token_amount.map(TryInto::try_into).transpose()?, - owner: value.owner, - program_id: value.program_id, - }) - } -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, Eq, PartialEq)] -#[scylla(flavor = "match_by_name")] -pub struct Reward { - pub pubkey: String, - pub lamports: i64, - pub post_balance: i64, - pub reward_type: i32, - pub commission: String, -} - -impl TryFrom for Reward { - type Error = anyhow::Error; - fn try_from(value: confirmed_block::Reward) -> Result { - Ok(Reward { - pubkey: value.pubkey, - lamports: value.lamports, - post_balance: value.post_balance.try_into()?, - reward_type: value.reward_type, - commission: value.commission, - }) - } -} - -impl TryFrom for confirmed_block::Reward { - type Error = anyhow::Error; - - fn try_from(value: Reward) -> Result { - Ok(confirmed_block::Reward { - pubkey: value.pubkey, - lamports: value.lamports, - post_balance: value.post_balance.try_into()?, - reward_type: value.reward_type, - commission: value.commission, - }) - } -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, PartialEq, Eq)] -#[scylla(flavor = "match_by_name")] -pub struct ReturnData { - pub program_id: ProgramId, - pub data: Vec, -} - -impl TryFrom for ReturnData { - type Error = anyhow::Error; - fn try_from(value: confirmed_block::ReturnData) -> Result { - Ok(ReturnData { - program_id: value - .program_id - .try_into() - .map_err(|e| anyhow::anyhow!("Inavlid readonly address, got: {:?}", e))?, - data: value.data, - }) - } -} - -impl From for confirmed_block::ReturnData { - fn from(value: ReturnData) -> Self { - confirmed_block::ReturnData { - program_id: value.program_id.into(), - data: value.data, - } - } -} - -#[derive(Debug, SerializeCql, Clone, DeepSizeOf, FromUserType, Default, PartialEq)] -#[scylla(flavor = "match_by_name")] -pub struct TransactionMeta { - pub error: Option>, - pub fee: i64, - pub pre_balances: Vec, - pub post_balances: Vec, - pub inner_instructions: Option>, - pub log_messages: Option>, - pub pre_token_balances: Vec, - pub post_token_balances: Vec, - pub rewards: Vec, - pub loaded_writable_addresses: Vec, - pub loaded_readonly_addresses: Vec, - pub return_data: Option, - pub compute_units_consumed: Option, -} - -impl TryFrom for TransactionMeta { - type Error = anyhow::Error; - - fn try_from(status_meta: confirmed_block::TransactionStatusMeta) -> Result { - let error = status_meta.err.map(|err| err.err); - let fee = status_meta.fee.try_into()?; - let pre_balances: Vec = try_collect(status_meta.pre_balances)?; - let post_balances = try_collect(status_meta.post_balances)?; - let inner_instructions: Vec = try_collect(status_meta.inner_instructions)?; - let log_messages = status_meta.log_messages; - - let pre_token_balances: Vec = status_meta - .pre_token_balances - .into_iter() - .map(|pre_tb| pre_tb.into()) - .collect(); - - let post_token_balances: Vec = status_meta - .post_token_balances - .into_iter() - .map(|pre_tb| pre_tb.into()) - .collect(); - - let rewards: Vec = try_collect(status_meta.rewards)?; - - let loaded_readonly_addresses: Vec = - try_collect(status_meta.loaded_readonly_addresses) - .map_err(|e| anyhow::anyhow!("Inavlid readonly address, got: {:?}", e))?; - let loaded_writable_addresses = try_collect(status_meta.loaded_writable_addresses) - .map_err(|e| anyhow::anyhow!("Inavlid readonly address, got: {:?}", e))?; - - let return_data = status_meta - .return_data - .map(|rd| rd.try_into()) - .transpose()?; - let compute_units_consumed = status_meta - .compute_units_consumed - .map(|cu| cu.try_into()) - .transpose()?; - - // Create a new TransactionMeta instance - let transaction_meta = TransactionMeta { - error, - fee, - pre_balances, - post_balances, - inner_instructions: if status_meta.inner_instructions_none { - Some(inner_instructions) - } else { - None - }, - log_messages: if status_meta.log_messages_none { - Some(log_messages) - } else { - None - }, - pre_token_balances, - post_token_balances, - rewards, - loaded_readonly_addresses, - loaded_writable_addresses, - return_data, - compute_units_consumed, - }; - - // Return the new TransactionMeta instance - Ok(transaction_meta) - } -} - -impl TryFrom for confirmed_block::TransactionStatusMeta { - type Error = anyhow::Error; - - fn try_from(value: TransactionMeta) -> Result { - let inner_instructions_none = value.inner_instructions.is_none(); - let log_messages_none = value.log_messages.is_none(); - let return_data_none = value.return_data.is_none(); - Ok(confirmed_block::TransactionStatusMeta { - err: value - .error - .map(|bindata| confirmed_block::TransactionError { err: bindata }), - fee: value.fee.try_into()?, - pre_balances: try_collect(value.pre_balances)?, - post_balances: try_collect(value.post_balances)?, - inner_instructions: value - .inner_instructions - .map(try_collect) - .transpose()? - .unwrap_or(Vec::new()), - inner_instructions_none, - log_messages: value - .log_messages - .map(try_collect) - .transpose()? - .unwrap_or(Vec::new()), - log_messages_none, - pre_token_balances: try_collect(value.pre_token_balances)?, - post_token_balances: try_collect(value.post_token_balances)?, - rewards: try_collect(value.rewards)?, - loaded_writable_addresses: try_collect(value.loaded_writable_addresses)?, - loaded_readonly_addresses: try_collect(value.loaded_readonly_addresses)?, - return_data: value.return_data.map(Into::into), - return_data_none, - compute_units_consumed: value - .compute_units_consumed - .map(TryInto::try_into) - .transpose()?, - }) - } -} - -#[derive(Debug, SerializeRow, Clone, DeepSizeOf, PartialEq)] -pub struct Transaction { - pub slot: i64, - pub signature: Vec, - pub signatures: Vec>, - pub num_required_signatures: i32, - pub num_readonly_signed_accounts: i32, - pub num_readonly_unsigned_accounts: i32, - pub account_keys: Vec>, - pub recent_blockhash: Vec, - pub instructions: Vec, - pub versioned: bool, - pub address_table_lookups: Vec, - pub meta: TransactionMeta, - pub is_vote: bool, - pub tx_index: i64, -} - -impl TryFrom for Transaction { - type Error = anyhow::Error; - - fn try_from(value: SubscribeUpdateTransaction) -> Result { - let slot: i64 = value.slot as i64; - - let val_tx = value - .transaction - .ok_or(anyhow!("missing transaction info object"))?; - - let signature = val_tx.signature; - let meta = val_tx - .meta - .ok_or(anyhow!("missing transaction status meta"))?; - let tx = val_tx - .transaction - .ok_or(anyhow!("missing transaction object from transaction info"))?; - let message = tx - .message - .ok_or(anyhow!("missing message object from transaction"))?; - let message_header = message.header.ok_or(anyhow!("missing message header"))?; - - let res = Transaction { - slot, - signature, - signatures: tx.signatures, - num_readonly_signed_accounts: message_header.num_readonly_signed_accounts as i32, - num_readonly_unsigned_accounts: message_header.num_readonly_unsigned_accounts as i32, - num_required_signatures: message_header.num_required_signatures as i32, - account_keys: message.account_keys, - recent_blockhash: message.recent_blockhash, - instructions: message - .instructions - .into_iter() - .map(|ci| ci.into()) - .collect(), - versioned: message.versioned, - address_table_lookups: message - .address_table_lookups - .into_iter() - .map(|atl| atl.into()) - .collect(), - meta: meta.try_into()?, - is_vote: val_tx.is_vote, - tx_index: val_tx.index as i64, - }; - - Ok(res) - } -} - -impl TryFrom for SubscribeUpdateTransaction { - type Error = anyhow::Error; - - fn try_from(value: Transaction) -> Result { - let ret = SubscribeUpdateTransaction { - transaction: Some(SubscribeUpdateTransactionInfo { - signature: value.signature, - is_vote: value.is_vote, - transaction: Some(confirmed_block::Transaction { - signatures: value.signatures, - message: Some(confirmed_block::Message { - header: Some(confirmed_block::MessageHeader { - num_required_signatures: value.num_required_signatures.try_into()?, - num_readonly_signed_accounts: value - .num_readonly_signed_accounts - .try_into()?, - num_readonly_unsigned_accounts: value - .num_readonly_unsigned_accounts - .try_into()?, - }), - account_keys: value.account_keys, - recent_blockhash: value.recent_blockhash, - instructions: try_collect(value.instructions)?, - versioned: value.versioned, - address_table_lookups: try_collect(value.address_table_lookups)?, - }), - }), - meta: Some(value.meta.try_into()).transpose()?, - index: value.tx_index.try_into()?, - }), - slot: value.slot.try_into()?, - }; - Ok(ret) - } -} - -impl From - for ( - i64, - Pubkey, - i64, - Pubkey, - bool, - i64, - i64, - Vec, - Option>, - ) -{ - fn from(acc: AccountUpdate) -> Self { - ( - acc.slot, - acc.pubkey, - acc.lamports, - acc.owner, - acc.executable, - acc.rent_epoch, - acc.write_version, - acc.data, - acc.txn_signature, - ) - } -} - -impl AccountUpdate { - pub fn zero_account() -> Self { - let bytes_vec: Vec = repeat(0).take(32).collect(); - let bytes_arr: [u8; 32] = bytes_vec.try_into().unwrap(); - AccountUpdate { - slot: 0, - pubkey: bytes_arr, - lamports: 0, - owner: bytes_arr, - executable: false, - rent_epoch: 0, - write_version: 0, - data: vec![], - txn_signature: None, - } - } - - pub fn as_blockchain_event( - self, - shard_id: ShardId, - producer_id: ProducerId, - offset: ShardOffset, - ) -> BlockchainEvent { - BlockchainEvent { - shard_id, - period: offset / SHARD_OFFSET_MODULO, - producer_id, - offset, - slot: self.slot, - event_type: BlockchainEventType::AccountUpdate, - pubkey: Some(self.pubkey), - lamports: Some(self.lamports), - owner: Some(self.owner), - executable: Some(self.executable), - rent_epoch: Some(self.rent_epoch), - write_version: Some(self.write_version), - data: Some(self.data), - txn_signature: self.txn_signature, - signature: Default::default(), - signatures: Default::default(), - num_required_signatures: Default::default(), - num_readonly_signed_accounts: Default::default(), - num_readonly_unsigned_accounts: Default::default(), - account_keys: Default::default(), - recent_blockhash: Default::default(), - instructions: Default::default(), - versioned: Default::default(), - address_table_lookups: Default::default(), - meta: Default::default(), - is_vote: Default::default(), - tx_index: Default::default(), - } - } -} - -impl TryFrom for AccountUpdate { - type Error = anyhow::Error; - fn try_from(value: SubscribeUpdateAccount) -> Result { - let slot = value.slot; - if value.account.is_none() { - Err(anyhow!("Missing account update.")) - } else { - let acc: yellowstone_grpc_proto::prelude::SubscribeUpdateAccountInfo = - value.account.unwrap(); - let pubkey: Pubkey = acc - .pubkey - .try_into() - .map_err(|err| anyhow!("Invalid pubkey: {:?}", err))?; - let owner: Pubkey = acc - .owner - .try_into() - .map_err(|err| anyhow!("Invalid owner: {:?}", err))?; - - let ret = AccountUpdate { - slot: slot as i64, - pubkey, - lamports: acc.lamports as i64, - owner, - executable: acc.executable, - rent_epoch: acc.rent_epoch as i64, - write_version: acc.write_version as i64, - data: acc.data, - txn_signature: acc.txn_signature, - }; - Ok(ret) - } - } -} - -impl Transaction { - pub fn as_blockchain_event( - self, - shard_id: ShardId, - producer_id: ProducerId, - offset: ShardOffset, - ) -> BlockchainEvent { - BlockchainEvent { - shard_id, - period: offset / SHARD_OFFSET_MODULO, - producer_id, - offset, - slot: self.slot, - event_type: BlockchainEventType::NewTransaction, - - pubkey: Default::default(), - lamports: Default::default(), - owner: Default::default(), - executable: Default::default(), - rent_epoch: Default::default(), - write_version: Default::default(), - data: Default::default(), - txn_signature: Default::default(), - - signature: Some(self.signature), - signatures: Some(self.signatures), - num_required_signatures: Some(self.num_required_signatures), - num_readonly_signed_accounts: Some(self.num_readonly_signed_accounts), - num_readonly_unsigned_accounts: Some(self.num_readonly_unsigned_accounts), - account_keys: Some(self.account_keys), - recent_blockhash: Some(self.recent_blockhash), - instructions: Some(self.instructions), - versioned: Some(self.versioned), - address_table_lookups: Some(self.address_table_lookups), - meta: Some(self.meta), - is_vote: Some(self.is_vote), - tx_index: Some(self.tx_index), - } - } -} - -#[derive(SerializeRow, Debug, Clone, DeepSizeOf)] -pub struct ShardedAccountUpdate { - // Common - pub shard_id: ShardId, - pub period: ShardPeriod, - pub producer_id: ProducerId, - pub offset: ShardOffset, - pub slot: i64, - pub event_type: BlockchainEventType, - - // AccountUpdate - pub pubkey: Pubkey, - pub lamports: i64, - pub owner: Pubkey, - pub executable: bool, - pub rent_epoch: i64, - pub write_version: i64, - pub data: Vec, - pub txn_signature: Option>, -} - -#[derive(SerializeRow, Debug, Clone, DeepSizeOf)] -pub struct ShardedTransaction { - // Common - pub shard_id: ShardId, - pub period: ShardPeriod, - pub producer_id: ProducerId, - pub offset: ShardOffset, - pub slot: i64, - pub event_type: BlockchainEventType, - - // Transaction - pub signature: Vec, - pub signatures: Vec>, - pub num_required_signatures: i32, - pub num_readonly_signed_accounts: i32, - pub num_readonly_unsigned_accounts: i32, - pub account_keys: Vec>, - pub recent_blockhash: Vec, - pub instructions: Vec, - pub versioned: bool, - pub address_table_lookups: Vec, - pub meta: TransactionMeta, - pub is_vote: bool, - pub tx_index: i64, -} - -// Implement Into for BlockchainEvent -impl From for ShardedAccountUpdate { - fn from(val: BlockchainEvent) -> Self { - ShardedAccountUpdate { - shard_id: val.shard_id, - period: val.period, - producer_id: val.producer_id, - offset: val.offset, - event_type: val.event_type, - slot: val.slot, - pubkey: val.pubkey.expect("pubkey is none"), - lamports: val.lamports.expect("lamports is none"), - owner: val.owner.expect("owner is none"), - executable: val.executable.expect("executable is none"), - rent_epoch: val.rent_epoch.expect("rent_epch is none"), - write_version: val.write_version.expect("write_version is none"), - data: val.data.expect("data is none"), - txn_signature: val.txn_signature, - } - } -} - -// Implement Into for BlockchainEvent -impl From for ShardedTransaction { - fn from(val: BlockchainEvent) -> Self { - ShardedTransaction { - shard_id: val.shard_id, - period: val.period, - producer_id: val.producer_id, - offset: val.offset, - event_type: val.event_type, - slot: val.slot, - signature: val.signature.expect("signature is none"), - signatures: val.signatures.expect("signatures is none"), - num_required_signatures: val - .num_required_signatures - .expect("num_required_signature is none"), - num_readonly_signed_accounts: val - .num_readonly_signed_accounts - .expect("num_readonly_signed_accounts is none"), - num_readonly_unsigned_accounts: val - .num_readonly_unsigned_accounts - .expect("num_readonly_unsigned_accounts is none"), - account_keys: val.account_keys.expect("account_keys is none"), - recent_blockhash: val.recent_blockhash.expect("recent_blockhash is none"), - instructions: val.instructions.expect("instructions is none"), - versioned: val.versioned.expect("versioned is none"), - address_table_lookups: val - .address_table_lookups - .expect("address_table_lookups is none"), - meta: val.meta.expect("meta is none"), - is_vote: val.is_vote.expect("is_vote is none"), - tx_index: val.tx_index.expect("tx_index is none"), - } - } -} - -impl From for Transaction { - fn from(val: BlockchainEvent) -> Self { - Transaction { - slot: val.slot, - signature: val.signature.expect("signature is none"), - signatures: val.signatures.expect("signatures is none"), - num_required_signatures: val - .num_required_signatures - .expect("num_required_signature is none"), - num_readonly_signed_accounts: val - .num_readonly_signed_accounts - .expect("num_readonly_signed_accounts is none"), - num_readonly_unsigned_accounts: val - .num_readonly_unsigned_accounts - .expect("num_readonly_unsigned_accounts is none"), - account_keys: val.account_keys.expect("account_keys is none"), - recent_blockhash: val.recent_blockhash.expect("recent_blockhash is none"), - instructions: val.instructions.expect("instructions is none"), - versioned: val.versioned.expect("versioned is none"), - address_table_lookups: val - .address_table_lookups - .expect("address_table_lookups is none"), - meta: val.meta.expect("meta is none"), - is_vote: val.is_vote.expect("is_vote is none"), - tx_index: val.tx_index.expect("tx_index is none"), - } - } -} - -impl From for AccountUpdate { - fn from(val: BlockchainEvent) -> Self { - AccountUpdate { - slot: val.slot, - pubkey: val.pubkey.expect("pubkey is none"), - lamports: val.lamports.expect("lamports is none"), - owner: val.owner.expect("owner is none"), - executable: val.executable.expect("executable is none"), - rent_epoch: val.rent_epoch.expect("rent_epch is none"), - write_version: val.write_version.expect("write_version is none"), - data: val.data.expect("data is none"), - txn_signature: val.txn_signature, - } - } -} - -#[derive(FromRow, Debug, Clone)] -pub struct ProducerInfo { - pub producer_id: ProducerId, - pub num_shards: ShardId, - pub commitment_level: CommitmentLevel, -} - -impl TryFrom for SubscribeUpdateAccount { - type Error = anyhow::Error; - - fn try_from(acc_update: AccountUpdate) -> anyhow::Result { - let _pubkey_bytes: [u8; 32] = acc_update.pubkey; - let _owner_bytes: [u8; 32] = acc_update.owner; - - // Create the SubscribeUpdateAccount instance - let subscribe_update_account = SubscribeUpdateAccount { - slot: acc_update.slot as u64, - account: Some( - yellowstone_grpc_proto::prelude::SubscribeUpdateAccountInfo { - pubkey: Vec::from(acc_update.pubkey), - lamports: acc_update.lamports as u64, - owner: Vec::from(acc_update.owner), - executable: acc_update.executable, - rent_epoch: acc_update.rent_epoch as u64, - write_version: acc_update.write_version as u64, - data: acc_update.data, - txn_signature: acc_update.txn_signature, - }, - ), - is_startup: false, - }; - - Ok(subscribe_update_account) - } -} - -impl TryFrom for SubscribeUpdateAccount { - type Error = anyhow::Error; - fn try_from(value: BlockchainEvent) -> Result { - anyhow::ensure!( - value.event_type == BlockchainEventType::AccountUpdate, - "BlockchainEvent is not an AccountUpdate" - ); - let ret: AccountUpdate = value.into(); - ret.try_into() - } -} - -impl TryFrom for SubscribeUpdateTransaction { - type Error = anyhow::Error; - fn try_from(value: BlockchainEvent) -> Result { - anyhow::ensure!( - value.event_type == BlockchainEventType::NewTransaction, - "BlockchainEvent is not a Transaction" - ); - let ret: Transaction = value.into(); - ret.try_into() - } -} diff --git a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/common.rs b/yellowstone-grpc-tools/src/scylladb/yellowstone_log/common.rs deleted file mode 100644 index 4c990746..00000000 --- a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/common.rs +++ /dev/null @@ -1,24 +0,0 @@ -use crate::scylladb::types::{BlockchainEventType, ConsumerId, ProducerId, ShardOffset, Slot}; - -pub type OldShardOffset = ShardOffset; - -/// -/// Initial position in the log when creating a new consumer. -/// -#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] -pub enum InitialOffset { - Earliest, - #[default] - Latest, - SlotApprox { - desired_slot: Slot, - min_slot: Slot, - }, -} - -pub struct ConsumerInfo { - pub consumer_id: ConsumerId, - pub producer_id: ProducerId, - //pub initital_shard_offsets: Vec, - pub subscribed_blockchain_event_types: Vec, -} diff --git a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_group/mod.rs b/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_group/mod.rs deleted file mode 100644 index c426b23e..00000000 --- a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_group/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod repo; diff --git a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_group/repo.rs b/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_group/repo.rs deleted file mode 100644 index a638ce11..00000000 --- a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_group/repo.rs +++ /dev/null @@ -1,153 +0,0 @@ -use { - crate::scylladb::types::ShardId, - scylla::{ - cql_to_rust::{FromCqlVal, FromCqlValError}, - frame::response::result::CqlValue, - prepared_statement::PreparedStatement, - serialize::value::SerializeCql, - Session, - }, - std::{collections::BTreeMap, net::IpAddr, sync::Arc}, - uuid::Uuid, -}; - -const NUM_SHARDS: usize = 64; - -type ConsumerGroupId = Uuid; -type InstanceId = String; - -const CREATE_STATIC_CONSUMER_GROUP: &str = r###" - INSERT INTO consumer_groups ( - consumer_group_id, - group_type, - last_access_ip_address, - instance_id_shard_assignments, - redundant_id_shard_assignments, - created_at, - updated_at - ) - VALUES (?, ?, ?, ?, ?, currentTimestamp(), currentTimestamp()) -"###; - -#[derive(Clone, Debug, PartialEq, Eq, Copy)] -enum ConsumerGroupType { - Static = 0, -} - -impl TryFrom for ConsumerGroupType { - type Error = anyhow::Error; - - fn try_from(value: i16) -> Result { - match value { - 0 => Ok(ConsumerGroupType::Static), - x => Err(anyhow::anyhow!( - "Unknown ConsumerGroupType equivalent for {:?}", - x - )), - } - } -} - -impl From for i16 { - fn from(val: ConsumerGroupType) -> Self { - match val { - ConsumerGroupType::Static => 0, - } - } -} - -impl SerializeCql for ConsumerGroupType { - fn serialize<'b>( - &self, - typ: &scylla::frame::response::result::ColumnType, - writer: scylla::serialize::CellWriter<'b>, - ) -> Result< - scylla::serialize::writers::WrittenCellProof<'b>, - scylla::serialize::SerializationError, - > { - let x: i16 = (*self).into(); - SerializeCql::serialize(&x, typ, writer) - } -} - -impl FromCqlVal for ConsumerGroupType { - fn from_cql(cql_val: CqlValue) -> Result { - match cql_val { - CqlValue::SmallInt(x) => x.try_into().map_err(|_| FromCqlValError::BadVal), - _ => Err(FromCqlValError::BadCqlType), - } - } -} - -pub(crate) struct ConsumerGroupRepo { - session: Arc, - create_static_consumer_group_ps: PreparedStatement, -} - -fn assign_shards(ids: &[InstanceId], num_shards: usize) -> BTreeMap> { - let mut ids = ids.to_vec(); - ids.sort(); - - let num_parts_per_id = num_shards / ids.len(); - let shard_vec = (0..num_shards).map(|x| x as ShardId).collect::>(); - let chunk_it = shard_vec - .chunks(num_parts_per_id) - .into_iter() - .map(|chunk| chunk.iter().cloned().collect()); - - ids.into_iter().zip(chunk_it).collect() -} - -pub(crate) struct StaticConsumerGroupInfo { - pub(crate) consumer_group_id: ConsumerGroupId, - pub(crate) instance_id_assignments: BTreeMap>, - pub(crate) redundant_instance_id_assignments: BTreeMap>, -} - -impl ConsumerGroupRepo { - pub async fn new(session: Arc) -> anyhow::Result { - let create_static_consumer_group_ps = session.prepare(CREATE_STATIC_CONSUMER_GROUP).await?; - - let this = ConsumerGroupRepo { - session, - create_static_consumer_group_ps, - }; - - Ok(this) - } - - pub async fn create_static_consumer_group( - &self, - instance_ids: &[InstanceId], - redundant_instance_ids: &[InstanceId], - remote_ip_addr: Option, - ) -> anyhow::Result { - let consumer_group_id = Uuid::new_v4(); - anyhow::ensure!( - instance_ids.len() == redundant_instance_ids.len(), - "mismatch number if instance/redundant ids" - ); - let shard_assignments = assign_shards(&instance_ids, NUM_SHARDS); - let shard_assignments2 = assign_shards(&redundant_instance_ids, NUM_SHARDS); - self.session - .execute( - &self.create_static_consumer_group_ps, - ( - consumer_group_id.as_bytes(), - ConsumerGroupType::Static, - remote_ip_addr.map(|ipaddr| ipaddr.to_string()), - &shard_assignments, - &shard_assignments2, - ), - ) - .await?; - - let ret = StaticConsumerGroupInfo { - consumer_group_id, - instance_id_assignments: shard_assignments, - redundant_instance_id_assignments: shard_assignments2, - }; - - Ok(ret) - } -} diff --git a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_source.rs b/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_source.rs deleted file mode 100644 index 4a2d00cf..00000000 --- a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/consumer_source.rs +++ /dev/null @@ -1,187 +0,0 @@ -use { - crate::scylladb::{ - types::{BlockchainEvent, ConsumerInfo, ProducerId, ShardId, Slot, UNDEFINED_SLOT}, - yellowstone_log::shard_iterator::ShardIterator, - }, - core::fmt, - futures::future::try_join_all, - scylla::{ - batch::{Batch, BatchType}, - prepared_statement::PreparedStatement, - Session, - }, - std::{collections::BTreeMap, sync::Arc, time::Duration}, - thiserror::Error, - tokio::{ - sync::{ - mpsc, - oneshot::{self, error::TryRecvError}, - }, - time::Instant, - }, - tracing::{info, warn}, -}; - -const CLIENT_LAG_WARN_THRESHOLD: Duration = Duration::from_millis(250); - -const FETCH_MICRO_BATCH_LATENCY_WARN_THRESHOLD: Duration = Duration::from_millis(500); - -const UPDATE_CONSUMER_SHARD_OFFSET: &str = r###" - UPDATE consumer_shard_offset - SET offset = ?, slot = ?, updated_at = currentTimestamp() - WHERE - consumer_id = ? - AND producer_id = ? - AND shard_id = ? - AND event_type = ? -"###; - -pub(crate) struct ConsumerSource { - session: Arc, - consumer_info: ConsumerInfo, - sender: mpsc::Sender, - // The interval at which we want to commit our Offset progression to Scylla - offset_commit_interval: Duration, - shard_iterators: BTreeMap, - pub(crate) shard_iterators_slot: BTreeMap, - update_consumer_shard_offset_prepared_stmt: PreparedStatement, -} - -pub type InterruptSignal = oneshot::Receiver<()>; - -#[derive(Clone, Debug, PartialEq, Error, Eq, Copy)] -pub(crate) struct Interrupted; - -impl fmt::Display for Interrupted { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("Interrupted") - } -} - -pub(crate) trait FromBlockchainEvent { - type Output; - - fn from(blockchain_event: BlockchainEvent) -> Self::Output; -} - -impl> ConsumerSource { - pub(crate) async fn new( - session: Arc, - consumer_info: ConsumerInfo, - sender: mpsc::Sender, - offset_commit_interval: Duration, - mut shard_iterators: Vec, - ) -> anyhow::Result { - let update_consumer_shard_offset_prepared_stmt = - session.prepare(UPDATE_CONSUMER_SHARD_OFFSET).await?; - // Prewarm every shard iterator - try_join_all(shard_iterators.iter_mut().map(|shard_it| shard_it.warm())).await?; - let shard_iterators_slot = shard_iterators - .iter() - .map(|shard_it| (shard_it.shard_id, UNDEFINED_SLOT)) - .collect(); - Ok(ConsumerSource { - session, - consumer_info, - sender, - offset_commit_interval, - shard_iterators: shard_iterators - .into_iter() - .map(|shard_it| (shard_it.shard_id, shard_it)) - .collect(), - shard_iterators_slot, - update_consumer_shard_offset_prepared_stmt, - }) - } - - pub(crate) fn producer_id(&self) -> ProducerId { - self.consumer_info.producer_id - } - - async fn update_consumer_shard_offsets(&self) -> anyhow::Result<()> { - let mut batch = Batch::new(BatchType::Unlogged); - let mut values = Vec::with_capacity(self.shard_iterators_slot.len()); - for (shard_id, shard_it) in self.shard_iterators.iter() { - values.push(( - shard_it.last_offset(), - self.shard_iterators_slot - .get(shard_id) - .expect("missing shard slot info"), - self.consumer_info.consumer_id.to_owned(), - self.consumer_info.producer_id, - shard_it.shard_id, - shard_it.event_type, - )); - batch.append_statement(self.update_consumer_shard_offset_prepared_stmt.clone()); - } - - self.session.batch(&batch, values).await?; - Ok(()) - } - - pub async fn run(&mut self, mut interrupt: InterruptSignal) -> anyhow::Result<()> { - let consumer_id = self.consumer_info.consumer_id.to_owned(); - let mut commit_offset_deadline = Instant::now() + self.offset_commit_interval; - const PRINT_CONSUMER_SLOT_REACH_DELAY: Duration = Duration::from_secs(5); - info!("Serving consumer: {:?}", consumer_id); - - let mut max_seen_slot = UNDEFINED_SLOT; - let mut num_event_between_two_slots = 0; - - let mut next_trace_schedule = Instant::now() + PRINT_CONSUMER_SLOT_REACH_DELAY; - let mut t = Instant::now(); - loop { - for (shard_id, shard_it) in self.shard_iterators.iter_mut() { - match interrupt.try_recv() { - Ok(_) => { - warn!("consumer {consumer_id} received an interrupted signal"); - self.update_consumer_shard_offsets().await?; - anyhow::bail!(Interrupted) - } - Err(TryRecvError::Closed) => anyhow::bail!("detected orphan consumer source"), - Err(TryRecvError::Empty) => (), - } - - let maybe = shard_it.try_next().await?; - - if let Some(block_chain_event) = maybe { - self.shard_iterators_slot - .insert(*shard_id, block_chain_event.slot); - if t.elapsed() >= FETCH_MICRO_BATCH_LATENCY_WARN_THRESHOLD { - warn!( - "consumer {consumer_id} micro batch took {:?} to fetch.", - t.elapsed() - ); - } - if max_seen_slot < block_chain_event.slot { - if next_trace_schedule.elapsed() > Duration::ZERO { - info!("Consumer {consumer_id} reach slot {max_seen_slot} after {num_event_between_two_slots} blockchain event(s)"); - next_trace_schedule = Instant::now() + PRINT_CONSUMER_SLOT_REACH_DELAY; - } - max_seen_slot = block_chain_event.slot; - num_event_between_two_slots = 0; - } - let t_send = Instant::now(); - - if self.sender.send(T::from(block_chain_event)).await.is_err() { - warn!("Consumer {consumer_id} closed its streaming half"); - return Ok(()); - } - let send_latency = t_send.elapsed(); - if send_latency >= CLIENT_LAG_WARN_THRESHOLD { - warn!("Slow read from consumer {consumer_id}, recorded latency: {send_latency:?}") - } - num_event_between_two_slots += 1; - t = Instant::now(); - } - } - // Every now and then, we commit where the consumer is loc - if commit_offset_deadline.elapsed() > Duration::ZERO { - let t = Instant::now(); - self.update_consumer_shard_offsets().await?; - info!("updated consumer shard offset in {:?}", t.elapsed()); - commit_offset_deadline = Instant::now() + self.offset_commit_interval; - } - } - } -} diff --git a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/grpc.rs b/yellowstone-grpc-tools/src/scylladb/yellowstone_log/grpc.rs deleted file mode 100644 index fd60db57..00000000 --- a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/grpc.rs +++ /dev/null @@ -1,1051 +0,0 @@ -use { - super::{ - common::InitialOffset, - consumer_group::repo::ConsumerGroupRepo, - consumer_source::{ConsumerSource, FromBlockchainEvent}, - shard_iterator::{ShardFilter, ShardIterator}, - }, - crate::scylladb::{ - sink, - types::{ - BlockchainEventType, CommitmentLevel, ConsumerId, ConsumerInfo, ConsumerShardOffset, - ProducerId, ProducerInfo, ShardId, ShardOffset, Slot, - }, - yellowstone_log::consumer_source::Interrupted, - }, - chrono::{DateTime, TimeDelta, Utc}, - core::fmt, - futures::{ - future::{try_join, try_join_all}, - Stream, - }, - scylla::{ - batch::{Batch, BatchType}, - prepared_statement::PreparedStatement, - transport::query_result::SingleRowTypedError, - Session, - }, - std::{ - collections::{BTreeMap, BTreeSet}, - net::IpAddr, - ops::RangeInclusive, - pin::Pin, - sync::Arc, - time::Duration, - }, - thiserror::Error, - tokio::sync::{mpsc, oneshot}, - tokio_stream::wrappers::ReceiverStream, - tonic::Response, - tracing::{error, info, warn}, - uuid::Uuid, - yellowstone_grpc_proto::{ - geyser::{subscribe_update::UpdateOneof, SubscribeUpdate}, - yellowstone::log::{ - yellowstone_log_server::YellowstoneLog, ConsumeRequest, - CreateStaticConsumerGroupRequest, CreateStaticConsumerGroupResponse, - EventSubscriptionPolicy, TimelineTranslationPolicy, - }, - }, -}; - -const DEFAULT_LAST_HEARTBEAT_TIME_DELTA: Duration = Duration::from_secs(10); - -const DEFAULT_OFFSET_COMMIT_INTERVAL: Duration = Duration::from_secs(10); - -const DEFAULT_CONSUMER_STREAM_BUFFER_CAPACITY: usize = 100; - -const UPDATE_CONSUMER_SHARD_OFFSET: &str = r###" - UPDATE consumer_shard_offset - SET offset = ?, slot = ?, updated_at = currentTimestamp() - WHERE - consumer_id = ? - AND producer_id = ? - AND shard_id = ? - AND event_type = ? -"###; - -const LIST_PRODUCER_WITH_COMMITMENT_LEVEL: &str = r###" - SELECT - producer_id - FROM producer_info - WHERE commitment_level = ? - ALLOW FILTERING -"###; - -/// -/// This query leverage the fact that partition data are always sorted by the clustering key and that scylla -/// always iterator or scan data in cluster order. In leyman terms that mean per partition limit will always return -/// the most recent entry for each producer_id. -const LIST_PRODUCER_LAST_HEARBEAT: &str = r###" - SELECT - producer_id, - created_at - FROM producer_slot_seen - PER PARTITION LIMIT 1 -"###; - -const GET_SHARD_OFFSET_AT_SLOT_APPROX: &str = r###" - SELECT - shard_offset_map, - slot - FROM producer_slot_seen - where - producer_id = ? - AND slot <= ? - AND slot >= ? - ORDER BY slot desc - LIMIT 1; -"###; - -const INSERT_CONSUMER_OFFSET: &str = r###" - INSERT INTO consumer_shard_offset ( - consumer_id, - producer_id, - shard_id, - event_type, - offset, - slot, - created_at, - updated_at - ) - VALUES - (?,?,?,?,?,?, currentTimestamp(), currentTimestamp()) -"###; - -const GET_CONSUMER_INFO_BY_ID: &str = r###" - SELECT - consumer_id, - producer_id, - subscribed_event_types - FROM consumer_info - where consumer_id = ? -"###; - -const LIST_PRODUCERS_WITH_LOCK: &str = r###" - SELECT - producer_id - FROM producer_lock - WHERE is_ready = true - ALLOW FILTERING -"###; - -const GET_PRODUCERS_CONSUMER_COUNT: &str = r###" - SELECT - producer_id, - count(1) - FROM producer_consumer_mapping_mv - GROUP BY producer_id -"###; - -const INSERT_CONSUMER_INFO: &str = r###" - INSERT INTO consumer_info (consumer_id, producer_id, consumer_ip, subscribed_event_types, created_at, updated_at) - VALUES (?, ?, ?, ?, currentTimestamp(), currentTimestamp()) -"###; - -const UPSERT_CONSUMER_INFO: &str = r###" - UPDATE consumer_info - SET producer_id = ?, - subscribed_event_types = ?, - updated_at = currentTimestamp() - WHERE consumer_id = ? -"###; - -const GET_PRODUCER_INFO_BY_ID: &str = r###" - SELECT - producer_id, - num_shards, - commitment_level - FROM producer_info - WHERE producer_id = ? -"###; - -/// -/// This error is raised when no lock is held by any producer. -/// -#[derive(Error, PartialEq, Eq, Debug)] -struct NoActiveProducer; - -impl fmt::Display for NoActiveProducer { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("NoActiveProducer") - } -} - -/// -/// This error is raised when there is no active producer for the desired commitment level. -/// -#[derive(Copy, Error, PartialEq, Eq, Debug, Clone)] -struct ImpossibleCommitmentLevel(CommitmentLevel); - -impl fmt::Display for ImpossibleCommitmentLevel { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let cl = self.0; - f.write_fmt(format_args!("ImpossibleCommitmentLevel({})", cl)) - } -} - -/// -/// This error is raised when the combination of consumer critera result in an empty set of elligible producer timeline. -/// -#[derive(Error, PartialEq, Eq, Debug)] -struct ImpossibleTimelineSelection; - -impl fmt::Display for ImpossibleTimelineSelection { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("ImpossibleTimelineSelection") - } -} -/// -/// This error is raised when no producer as seen the desired `slot`. -/// -#[derive(Clone, Debug, Error, PartialEq, Eq, Copy)] -struct ImpossibleSlotOffset(Slot); - -impl fmt::Display for ImpossibleSlotOffset { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let slot = self.0; - f.write_fmt(format_args!("ImpossbielInititalOffset({})", slot)) - } -} - -#[derive(Clone, Debug, PartialEq, Error, Eq, Copy)] -struct DeadProducerErr(ProducerId); - -impl fmt::Display for DeadProducerErr { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let producer_id = self.0[0]; - f.write_fmt(format_args!("ProducerStale({})", producer_id)) - } -} - -/// -/// Returns the assigned producer id to specific consumer if any. -/// -pub async fn get_consumer_info_by_id( - session: Arc, - consumer_id: ConsumerId, -) -> anyhow::Result> { - session - .query(GET_CONSUMER_INFO_BY_ID, (consumer_id,)) - .await? - .maybe_first_row_typed::() - .map_err(anyhow::Error::new) -} - -/// -/// Returns a list of producer that has a lock -/// -async fn list_producers_with_lock_held(session: Arc) -> anyhow::Result> { - session - .query(LIST_PRODUCERS_WITH_LOCK, &[]) - .await? - .rows_typed_or_empty::<(ProducerId,)>() - .map(|result| result.map(|row| row.0)) - .collect::, _>>() - .map_err(anyhow::Error::new) -} - -async fn list_producer_with_slot( - session: Arc, - slot_range: RangeInclusive, -) -> anyhow::Result> { - let slot_values = slot_range - .map(|slot| format!("{slot}")) - .collect::>() - .join(", "); - - let query_template = format!( - r###" - SELECT - producer_id, - slot - FROM slot_producer_seen_mv - WHERE slot IN ({slot_values}) - "### - ); - info!("query {query_template}"); - - session - .query(query_template, &[]) - .await? - .rows_typed_or_empty::<(ProducerId, Slot)>() - .map(|result| result.map(|(producer_id, _slot)| producer_id)) - .collect::, _>>() - .map_err(anyhow::Error::new) - .map(|btree_set| btree_set.into_iter().collect()) -} - -async fn list_producer_with_commitment_level( - session: Arc, - commitment_level: CommitmentLevel, -) -> anyhow::Result> { - session - .query(LIST_PRODUCER_WITH_COMMITMENT_LEVEL, (commitment_level,)) - .await? - .rows_typed_or_empty::<(ProducerId,)>() - .map(|result| result.map(|row| row.0)) - .collect::, _>>() - .map_err(anyhow::Error::new) -} - -async fn list_producers_heartbeat( - session: Arc, - heartbeat_time_dt: Duration, -) -> anyhow::Result> { - let utc_now = Utc::now(); - let heartbeat_lower_bound = utc_now - .checked_sub_signed(TimeDelta::seconds(heartbeat_time_dt.as_secs().try_into()?)) - .ok_or(anyhow::anyhow!("Invalid heartbeat time delta"))?; - println!("heartbeat lower bound: {heartbeat_lower_bound}"); - let producer_id_with_last_hb_datetime_pairs = session - .query(LIST_PRODUCER_LAST_HEARBEAT, &[]) - .await? - .rows_typed::<(ProducerId, DateTime)>()? - //.map(|result| result.map(|row| row.0)) - .collect::, _>>()?; - - println!("{producer_id_with_last_hb_datetime_pairs:?}"); - //.map_err(anyhow::Error::new) - - Ok(producer_id_with_last_hb_datetime_pairs - .into_iter() - .filter(|(_, last_hb)| last_hb >= &heartbeat_lower_bound) - .map(|(pid, _)| pid) - .collect::>()) -} - -async fn is_producer_still_alive( - session: Arc, - producer_id: ProducerId, -) -> anyhow::Result { - let check_last_slot_seen = r###" - SELECT - slot, - created_at - FROM producer_slot_seen - WHERE - producer_id = ? - ORDER BY slot DESC - PER PARTITION LIMIT 1 - "###; - let heartbeat_lower_bound = - Utc::now() - TimeDelta::seconds(DEFAULT_LAST_HEARTBEAT_TIME_DELTA.as_secs() as i64); - let check_if_lock_held = "SELECT producer_id FROM producer_lock WHERE producer_id = ?"; - let fut1 = session.query(check_last_slot_seen, (producer_id,)); - let fut2 = session.query(check_if_lock_held, (producer_id,)); - let (qr1, qr2) = try_join(fut1, fut2).await?; - if let Some((_slot, created_at)) = qr1.maybe_first_row_typed::<(Slot, DateTime)>()? { - if created_at < heartbeat_lower_bound { - return Ok(false); - } - } - - Ok(qr2.rows.is_some()) -} - -fn wait_for_producer_is_dead( - session: Arc, - producer_id: ProducerId, -) -> oneshot::Receiver<()> { - let (sender, receiver) = oneshot::channel(); - - tokio::spawn(async move { - let session = session; - loop { - let is_alive = is_producer_still_alive(Arc::clone(&session), producer_id) - .await - .expect("checking producer is alive failed"); - if !is_alive { - info!("producer {producer_id:?} is dead"); - sender - .send(()) - .expect(format!("the receiveing half closed while waiting for producer({producer_id:?}) liveness status").as_str()); - break; - } - tokio::time::sleep(Duration::from_secs(5)).await; - } - }); - - receiver -} - -/// -/// Returns the producer id with least consumer assignment. -/// -async fn get_producer_id_with_least_assigned_consumer( - session: Arc, - opt_slot_range: Option>, - commitment_level: CommitmentLevel, -) -> anyhow::Result { - let locked_producers = list_producers_with_lock_held(Arc::clone(&session)).await?; - info!("{} producer lock(s) detected", locked_producers.len()); - - anyhow::ensure!(!locked_producers.is_empty(), NoActiveProducer); - - let recently_active_producers = BTreeSet::from_iter( - list_producers_heartbeat(Arc::clone(&session), DEFAULT_LAST_HEARTBEAT_TIME_DELTA).await?, - ); - info!( - "{} living producer(s) detected", - recently_active_producers.len() - ); - - anyhow::ensure!(!recently_active_producers.is_empty(), NoActiveProducer); - - let producers_with_commitment_level = - list_producer_with_commitment_level(Arc::clone(&session), commitment_level).await?; - info!( - "{} producer(s) with {commitment_level:?} commitment level", - producers_with_commitment_level.len() - ); - - if producers_with_commitment_level.is_empty() { - anyhow::bail!(ImpossibleCommitmentLevel(commitment_level)) - } - - let mut elligible_producers = locked_producers - .into_iter() - .filter(|producer_id| recently_active_producers.contains(producer_id)) - .collect::>(); - - anyhow::ensure!(!elligible_producers.is_empty(), ImpossibleTimelineSelection); - - if let Some(slot_range) = opt_slot_range { - info!("Producer needs slot in {slot_range:?}"); - let producers_with_slot = BTreeSet::from_iter( - list_producer_with_slot( - Arc::clone(&session), - *slot_range.start()..=*slot_range.end(), - ) - .await?, - ); - info!( - "{} producer(s) with required slot range: {slot_range:?}", - producers_with_slot.len() - ); - - elligible_producers.retain(|k| producers_with_slot.contains(k)); - - anyhow::ensure!( - !elligible_producers.is_empty(), - ImpossibleSlotOffset(*slot_range.end()) - ); - }; - - info!("{} elligible producer(s)", recently_active_producers.len()); - - let mut producer_count_pairs = session - .query(GET_PRODUCERS_CONSUMER_COUNT, &[]) - .await? - .rows_typed::<(ProducerId, i64)>()? - .collect::, _>>()?; - - elligible_producers.iter().for_each(|producer_id| { - producer_count_pairs - .entry(producer_id.to_owned()) - .or_insert(0); - }); - - producer_count_pairs - .into_iter() - .filter(|(producer_id, _)| elligible_producers.contains(producer_id)) - .min_by_key(|(_, count)| *count) - .map(|(producer_id, _)| producer_id) - .ok_or(anyhow::anyhow!("No producer is available right now")) -} - -/// -/// Returns a specific producer information by id or return a random producer_info if `producer_id` is None. -pub async fn get_producer_info_by_id( - session: Arc, - producer_id: ProducerId, -) -> anyhow::Result> { - let qr = session - .query(GET_PRODUCER_INFO_BY_ID, (producer_id,)) - .await?; - - match qr.single_row_typed::() { - Ok(row) => Ok(Some(row)), - Err(SingleRowTypedError::BadNumberOfRows(_)) => Ok(None), - Err(e) => Err(anyhow::Error::new(e)), - } -} - -fn get_blockchain_event_types( - event_sub_policy: EventSubscriptionPolicy, -) -> Vec { - match event_sub_policy { - EventSubscriptionPolicy::AccountUpdateOnly => vec![BlockchainEventType::AccountUpdate], - EventSubscriptionPolicy::TransactionOnly => vec![BlockchainEventType::NewTransaction], - EventSubscriptionPolicy::Both => vec![ - BlockchainEventType::AccountUpdate, - BlockchainEventType::NewTransaction, - ], - } -} - -async fn assign_producer_to_consumer( - session: Arc, - consumer_id: ConsumerId, - consumer_ip: Option, - initial_offset: InitialOffset, - event_sub_policy: EventSubscriptionPolicy, - commitment_level: CommitmentLevel, - is_new: bool, -) -> anyhow::Result<(ConsumerInfo, Vec)> { - let maybe_slot_range = if let InitialOffset::SlotApprox { - desired_slot, - min_slot, - } = initial_offset - { - Some(min_slot..=desired_slot) - } else { - None - }; - - let producer_id = get_producer_id_with_least_assigned_consumer( - Arc::clone(&session), - maybe_slot_range, - commitment_level, - ) - .await?; - if is_new { - session - .query( - INSERT_CONSUMER_INFO, - ( - consumer_id.as_str(), - producer_id, - consumer_ip.map(|ipaddr| ipaddr.to_string()), - get_blockchain_event_types(event_sub_policy), - ), - ) - .await?; - } else { - session - .query( - UPSERT_CONSUMER_INFO, - ( - producer_id, - get_blockchain_event_types(event_sub_policy), - consumer_id.as_str(), - ), - ) - .await?; - } - - info!( - "consumer {:?} successfully assigned producer {:?}", - consumer_id.as_str(), - producer_id - ); - let initital_shard_offsets = set_initial_consumer_shard_offsets( - Arc::clone(&session), - consumer_id.as_str(), - producer_id, - initial_offset, - event_sub_policy, - ) - .await?; - info!("Successfully set consumer shard offsets following {initial_offset:?} policy"); - let cs = ConsumerInfo { - consumer_id: consumer_id.clone(), - producer_id, - subscribed_blockchain_event_types: get_blockchain_event_types(event_sub_policy), - }; - - Ok((cs, initital_shard_offsets)) -} - -async fn get_min_offset_for_producer( - session: Arc, - producer_id: ProducerId, -) -> anyhow::Result> { - session - .query( - "SELECT minimum_shard_offset FROM producer_lock WHERE producer_id = ?", - (producer_id,), - ) - .await? - .first_row_typed::<(Option>,)>()? - .0 - .ok_or(anyhow::anyhow!( - "Producer lock exists, but its minimum shard offset is not set." - )) -} - -async fn get_slot_shard_offsets( - session: Arc, - slot: Slot, - min_slot: Slot, - producer_id: ProducerId, - _num_shards: ShardId, -) -> anyhow::Result>> { - let maybe = session - .query( - GET_SHARD_OFFSET_AT_SLOT_APPROX, - (producer_id, slot, min_slot), - ) - .await? - .maybe_first_row_typed::<(Vec<(ShardId, ShardOffset)>, Slot)>()?; - - if let Some((offsets, slot_approx)) = maybe { - info!( - "found producer({producer_id:?}) shard offsets within slot range: {min_slot}..={slot}" - ); - Ok(Some( - offsets - .into_iter() - .map(|(shard_id, shard_offset)| (shard_id, shard_offset, slot_approx)) - .collect(), - )) - } else { - Ok(None) - } -} - -/// Sets the initial shard offsets for a newly created consumer based on [[`InitialOffsetPolicy`]]. -/// -/// Similar to seeking in a file, we can seek right at the beginning of the log, completly at the end or at first -/// log event containg a specific slot number. -async fn set_initial_consumer_shard_offsets( - session: Arc, - new_consumer_id: impl AsRef, - producer_id: ProducerId, - initial_offset_policy: InitialOffset, - event_sub_policy: EventSubscriptionPolicy, -) -> anyhow::Result> { - // Create all the shards counter - let producer_info = get_producer_info_by_id(Arc::clone(&session), producer_id) - .await? - .unwrap_or_else(|| panic!("Producer Info `{:?}` must exists", producer_id)); - - let new_consumer_id = new_consumer_id.as_ref(); - info!("consumer {new_consumer_id} will be assigned to producer {producer_id:?}"); - let num_shards = producer_info.num_shards; - - let shard_offset_pairs = match initial_offset_policy { - InitialOffset::Latest => { - sink::get_max_shard_offsets_for_producer( - Arc::clone(&session), - producer_id, - num_shards as usize, - ) - .await? - } - InitialOffset::Earliest => { - get_min_offset_for_producer(Arc::clone(&session), producer_id).await? - } - InitialOffset::SlotApprox { - desired_slot, - min_slot, - } => { - let minium_producer_offsets = - get_min_offset_for_producer(Arc::clone(&session), producer_id) - .await? - .into_iter() - .map(|(shard_id, shard_offset, slot)| (shard_id, (shard_offset, slot))) - .collect::>(); - info!("(consumer-id={new_consumer_id}) SlotApprox step 1: retrieved minimum producer({producer_id:?}) offset."); - - let shard_offsets_contain_slot = get_slot_shard_offsets( - Arc::clone(&session), - desired_slot, - min_slot, - producer_id, - num_shards, - ) - .await? - .ok_or(ImpossibleSlotOffset(desired_slot))?; - - info!("(consumer-id={new_consumer_id}) SlotApprox step 2: producer({producer_id:?}) shard offsets containing slot range."); - - let are_shard_offset_reachable = - shard_offsets_contain_slot - .iter() - .all(|(shard_id, offset1, _)| { - minium_producer_offsets - .get(shard_id) - .filter(|(offset2, _)| offset1 > offset2) - .is_some() - }); - - info!("(consumer-id={new_consumer_id}) SlotApprox step 3: producer({producer_id:?}) shard offset reachability: {are_shard_offset_reachable}"); - if !are_shard_offset_reachable { - anyhow::bail!(ImpossibleSlotOffset(desired_slot)) - } - - shard_offsets_contain_slot - } - }; - - if shard_offset_pairs.len() != (num_shards as usize) { - anyhow::bail!("Producer {producer_id:?} shard offsets is incomplete {new_consumer_id}"); - } - info!("Shard offset has been computed successfully"); - let adjustment = match initial_offset_policy { - InitialOffset::Earliest - | InitialOffset::SlotApprox { - desired_slot: _, - min_slot: _, - } => -1, - InitialOffset::Latest => 0, - }; - - let insert_consumer_offset_ps: PreparedStatement = - session.prepare(INSERT_CONSUMER_OFFSET).await?; - - let mut batch = Batch::new(BatchType::Unlogged); - let mut buffer = Vec::with_capacity(shard_offset_pairs.len()); - - let ev_types = get_blockchain_event_types(event_sub_policy); - - ev_types - .into_iter() - .flat_map(|ev_type| { - shard_offset_pairs - .iter() - .cloned() - .map(move |(shard_id, offset, slot)| (ev_type, shard_id, offset, slot)) - }) - .for_each(|(ev_type, shard_id, offset, slot)| { - let offset = offset + adjustment; - batch.append_statement(insert_consumer_offset_ps.clone()); - buffer.push(( - new_consumer_id.to_owned(), - producer_id, - shard_id, - ev_type, - offset, - slot, - )); - }); - - session.batch(&batch, &buffer).await?; - - let shard_offsets = buffer - .drain(..) - .map( - |(consumer_id, producer_id, shard_id, event_type, offset, slot)| ConsumerShardOffset { - consumer_id, - producer_id, - shard_id, - event_type, - offset, - slot, - }, - ) - .collect::>(); - - Ok(shard_offsets) -} - -pub struct ScyllaYsLog { - session: Arc, - consumer_group_repo: ConsumerGroupRepo, -} - -impl ScyllaYsLog { - pub async fn new(session: Arc) -> anyhow::Result { - let consumer_group_repo = ConsumerGroupRepo::new(Arc::clone(&session)).await?; - Ok(ScyllaYsLog { - session, - consumer_group_repo, - }) - } -} - -pub type LogStream = Pin> + Send>>; - -#[tonic::async_trait] -impl YellowstoneLog for ScyllaYsLog { - #[doc = r" Server streaming response type for the consume method."] - type ConsumeStream = LogStream; - - async fn create_static_consumer_group( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - let remote_ip_addr = request.remote_addr().map(|addr| addr.ip()); - let request = request.into_inner(); - - let instance_ids = request.instance_id_list; - let redundant_instance_ids = request.redundancy_instance_id_list; - - let consumer_group_info = self - .consumer_group_repo - .create_static_consumer_group(&instance_ids, &redundant_instance_ids, remote_ip_addr) - .await - .map_err(|e| { - error!("create_static_consumer_group: {e:?}"); - tonic::Status::internal("failed to create consumer group") - })?; - Ok(Response::new(CreateStaticConsumerGroupResponse { - group_id: consumer_group_info.consumer_group_id.to_string(), - })) - } - - async fn consume( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - let consumer_ip = request.remote_addr().map(|addr| addr.ip()); - let cr = request.into_inner(); - - let consumer_id = cr.consumer_id.clone().unwrap_or(Uuid::new_v4().to_string()); - let initial_offset_policy = match cr.initial_offset_policy() { - yellowstone_grpc_proto::yellowstone::log::InitialOffsetPolicy::Earliest => { - InitialOffset::Earliest - } - yellowstone_grpc_proto::yellowstone::log::InitialOffsetPolicy::Latest => { - InitialOffset::Latest - } - yellowstone_grpc_proto::yellowstone::log::InitialOffsetPolicy::Slot => { - let slot = cr.at_slot.ok_or(tonic::Status::invalid_argument( - "Expected at_lot when initital_offset_policy is to `Slot`", - ))?; - InitialOffset::SlotApprox { - desired_slot: slot, - min_slot: slot, - } - } - }; - - let timeline_translation_policy = cr.timeline_translation_policy(); - - let event_subscription_policy = cr.event_subscription_policy(); - let account_update_event_filter = cr.account_update_event_filter; - let tx_event_filter = cr.tx_event_filter; - let commitment_level: CommitmentLevel = (cr.commitment_level as i16) - .try_into() - .map_err(|_| tonic::Status::invalid_argument("commitment level is invalid"))?; - - info!( - consumer_id = consumer_id, - initital_offset_policy = ?initial_offset_policy, - event_subscription_policy = ?event_subscription_policy, - commitment_level = ?commitment_level, - ); - - let req: SpawnGrpcConsumerReq = SpawnGrpcConsumerReq { - consumer_id: consumer_id.clone(), - consumer_ip, - account_update_event_filter, - tx_event_filter, - buffer_capacity: None, - offset_commit_interval: None, - timeline_translation_policy, - timeline_translation_allowed_lag: cr.ttp_maximum_slot_lag, - event_subscription_policy, - commitment_level, - }; - - let result = - spawn_grpc_consumer(Arc::clone(&self.session), req, initial_offset_policy).await; - - match result { - Ok(rx) => { - let ret = ReceiverStream::new(rx); - let res = Response::new(Box::pin(ret) as Self::ConsumeStream); - Ok(res) - } - Err(e) => { - error!(consumer_id=consumer_id, error = %e); - Err(tonic::Status::internal(format!( - "({consumer_id}) fail to spawn consumer" - ))) - } - } - } -} - -#[derive(Clone)] -pub struct SpawnGrpcConsumerReq { - pub consumer_id: ConsumerId, - pub consumer_ip: Option, - pub account_update_event_filter: - Option, - pub tx_event_filter: Option, - pub buffer_capacity: Option, - pub offset_commit_interval: Option, - pub timeline_translation_policy: TimelineTranslationPolicy, - pub timeline_translation_allowed_lag: Option, - pub event_subscription_policy: EventSubscriptionPolicy, - pub commitment_level: CommitmentLevel, -} - -type GrpcConsumerSender = mpsc::Sender>; -type GrpcConsumerReceiver = mpsc::Receiver>; -type GrpcEvent = Result; - -impl FromBlockchainEvent for GrpcEvent { - type Output = Self; - fn from(blockchain_event: crate::scylladb::types::BlockchainEvent) -> Self::Output { - let geyser_event = match blockchain_event.event_type { - BlockchainEventType::AccountUpdate => { - UpdateOneof::Account(blockchain_event.try_into().map_err(|e| { - error!(error=?e); - tonic::Status::internal("corrupted account update event in the stream") - })?) - } - BlockchainEventType::NewTransaction => { - UpdateOneof::Transaction(blockchain_event.try_into().map_err(|e| { - error!(error=?e); - tonic::Status::internal("corrupted new transaction event in the stream") - })?) - } - }; - let subscribe_update = SubscribeUpdate { - filters: Default::default(), - update_oneof: Some(geyser_event), - }; - - Ok(subscribe_update) - } -} - -async fn build_grpc_consumer_source( - sender: GrpcConsumerSender, - session: Arc, - req: SpawnGrpcConsumerReq, - initial_offset_policy: InitialOffset, - is_new: bool, -) -> anyhow::Result> { - let (consumer_info, initial_shard_offsets) = assign_producer_to_consumer( - Arc::clone(&session), - req.consumer_id.clone(), - req.consumer_ip, - initial_offset_policy, - req.event_subscription_policy, - req.commitment_level, - is_new, - ) - .await?; - - //let last_committed_offsets = state.shard_offsets.clone(); - let consumer_session = Arc::clone(&session); - - let shard_filter = ShardFilter { - tx_account_keys: req - .tx_event_filter - .map(|f| f.account_keys) - .unwrap_or_default(), - account_pubkyes: req - .account_update_event_filter - .as_ref() - .map(|f| f.pubkeys.to_owned()) - .unwrap_or_default(), - account_owners: req - .account_update_event_filter - .as_ref() - .map(|f| f.owners.to_owned()) - .unwrap_or_default(), - }; - - let shard_iterators = try_join_all(initial_shard_offsets.iter().cloned().map( - |consumer_shard_offset| { - let session = Arc::clone(&session); - let producer_id = consumer_info.producer_id; - let shard_filter = shard_filter.clone(); - ShardIterator::new( - session, - producer_id, - consumer_shard_offset.shard_id, - consumer_shard_offset.offset, - // The ev_type will dictate if shard iterator streams account update or transaction. - consumer_shard_offset.event_type, - Some(shard_filter), - ) - }, - )) - .await?; - - let consumer = ConsumerSource::new( - consumer_session, - consumer_info, - sender, - req.offset_commit_interval - .unwrap_or(DEFAULT_OFFSET_COMMIT_INTERVAL), - shard_iterators, - ) - .await?; - Ok(consumer) -} - -pub async fn spawn_grpc_consumer( - session: Arc, - req: SpawnGrpcConsumerReq, - initial_offset_policy: InitialOffset, -) -> anyhow::Result { - let original_req = req.clone(); - let buffer_capacity = req - .buffer_capacity - .unwrap_or(DEFAULT_CONSUMER_STREAM_BUFFER_CAPACITY); - let (sender, receiver) = mpsc::channel(buffer_capacity); - const DEFAULT_ALLOWED_LAG: u32 = 10; - let mut grpc_consumer_source = build_grpc_consumer_source( - sender.clone(), - Arc::clone(&session), - req, - initial_offset_policy, - true, - ) - .await?; - let consumer_id = original_req.consumer_id.to_owned(); - - info!("Spawning consumer {consumer_id} thread"); - tokio::spawn(async move { - let consumer_id = original_req.consumer_id.to_owned(); - let sender = sender; - let session = session; - while !sender.is_closed() { - let current_producer_id = grpc_consumer_source.producer_id(); - let interrupt_signal = - wait_for_producer_is_dead(Arc::clone(&session), current_producer_id); - - match grpc_consumer_source.run(interrupt_signal).await { - Ok(_) => break, - Err(e) => { - warn!("Consumer {consumer_id} source has stop with {e:?}"); - if let Some(Interrupted) = e.downcast_ref::() { - let forged_offset_policy = grpc_consumer_source - .shard_iterators_slot - .into_iter() - .min() - .map(|(_shard_id, slot)| { - let min_slot = match &original_req.timeline_translation_policy { - TimelineTranslationPolicy::AllowLag => { - let lag = original_req - .timeline_translation_allowed_lag - .unwrap_or(DEFAULT_ALLOWED_LAG); - slot - (lag as Slot) - } - TimelineTranslationPolicy::StrictSlot => slot, - }; - - InitialOffset::SlotApprox { - desired_slot: slot, - min_slot, - } - }) - .unwrap_or(initial_offset_policy); - - grpc_consumer_source = build_grpc_consumer_source( - sender.clone(), - Arc::clone(&session), - original_req.clone(), - forged_offset_policy, - false, - ) - .await - .unwrap_or_else(|_| panic!("cannot translate consumer {consumer_id}")); - } else { - panic!("{e:?}") - } - } - } - } - }); - Ok(receiver) -} diff --git a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/mod.rs b/yellowstone-grpc-tools/src/scylladb/yellowstone_log/mod.rs deleted file mode 100644 index 6689e781..00000000 --- a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod common; -mod consumer_group; -mod consumer_source; -pub mod grpc; -pub mod shard_iterator; diff --git a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/shard_iterator.rs b/yellowstone-grpc-tools/src/scylladb/yellowstone_log/shard_iterator.rs deleted file mode 100644 index f73868a4..00000000 --- a/yellowstone-grpc-tools/src/scylladb/yellowstone_log/shard_iterator.rs +++ /dev/null @@ -1,476 +0,0 @@ -use { - crate::scylladb::types::{ - BlockchainEvent, BlockchainEventType, ProducerId, ShardId, ShardOffset, ShardPeriod, - SHARD_OFFSET_MODULO, - }, - core::fmt, - scylla::{prepared_statement::PreparedStatement, Session}, - std::{collections::VecDeque, sync::Arc}, - tokio::sync::oneshot::{self, error::TryRecvError}, - tracing::warn, -}; - -const MICRO_BATCH_SIZE: usize = 40; - -pub const GET_NEW_TRANSACTION_EVENT: &str = r###" - SELECT - shard_id, - period, - producer_id, - offset, - slot, - event_type, - - pubkey, - lamports, - owner, - executable, - rent_epoch, - write_version, - data, - txn_signature, - - signature, - signatures, - num_required_signatures, - num_readonly_signed_accounts, - num_readonly_unsigned_accounts, - account_keys, - recent_blockhash, - instructions, - versioned, - address_table_lookups, - meta, - is_vote, - tx_index - FROM log - WHERE producer_id = ? and shard_id = ? and offset > ? and period = ? - and event_type = 1 - ORDER BY offset ASC - ALLOW FILTERING -"###; - -const GET_LAST_SHARD_PERIOD_COMMIT: &str = r###" - SELECT - period - FROM producer_period_commit_log - WHERE - producer_id = ? - AND shard_id = ? - ORDER BY period DESC - PER PARTITION LIMIT 1 -"###; - -/// Represents the state of a shard iterator, which is used to manage the iteration -/// and retrieval of blockchain events from a shard. -/// -/// The `ShardIteratorState` enum encapsulates different states that the iterator -/// can be in during its lifecycle. -enum ShardIteratorState { - /// The iterator is initialized and empty. - Empty(ShardOffset), - - /// The iterator is in the process of loading blockchain events from the shard. - Loading(ShardOffset, oneshot::Receiver>), - - /// The iterator has loaded blockchain events and is ready for retrieval. - Loaded(ShardOffset, VecDeque), - - /// The iterator is confirming the end of a period in the shard. - ConfirmingPeriod(ShardOffset, oneshot::Receiver), - - /// The iterator is actively streaming blockchain events. - AvailableData(ShardOffset, VecDeque), - - /// The iterator is waiting for the end of a period in the shard. - WaitingEndOfPeriod(ShardOffset, oneshot::Receiver), -} - -impl fmt::Debug for ShardIteratorState { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Empty(arg0) => f.debug_tuple("Empty").field(arg0).finish(), - Self::Loading(arg0, _) => f.debug_tuple("Loading").field(arg0).finish(), - Self::Loaded(arg0, micro_batch) => f - .debug_tuple("Loaded") - .field(arg0) - .field(&format!("micro_batch({})", micro_batch.len())) - .finish(), - Self::ConfirmingPeriod(arg0, _) => { - f.debug_tuple("ConfirmingPeriod").field(arg0).finish() - } - Self::AvailableData(arg0, micro_batch) => f - .debug_tuple("Available") - .field(arg0) - .field(&format!("micro_batch({})", micro_batch.len())) - .finish(), - Self::WaitingEndOfPeriod(arg0, _) => f.debug_tuple("EndOfPeriod").field(arg0).finish(), - } - } -} - -impl ShardIteratorState { - const fn last_offset(&self) -> ShardOffset { - match self { - Self::Empty(offset) => *offset, - Self::Loading(offset, _) => *offset, - Self::Loaded(offset, _) => *offset, - Self::ConfirmingPeriod(offset, _) => *offset, - Self::AvailableData(offset, _) => *offset, - Self::WaitingEndOfPeriod(offset, _) => *offset, - } - } - - const fn is_empty(&self) -> bool { - matches!(self, ShardIteratorState::Empty(_)) - } -} - -#[derive(Clone, Default)] -pub(crate) struct ShardFilter { - pub(crate) tx_account_keys: Vec>, - pub(crate) account_owners: Vec>, - pub(crate) account_pubkyes: Vec>, -} - -pub(crate) struct ShardIterator { - session: Arc, - pub(crate) producer_id: ProducerId, - pub(crate) shard_id: ShardId, - inner: ShardIteratorState, - pub(crate) event_type: BlockchainEventType, - get_events_prepared_stmt: PreparedStatement, - get_last_shard_period_commit_prepared_stmt: PreparedStatement, - last_period_confirmed: ShardPeriod, - filter: ShardFilter, -} - -/// Represents an iterator for fetching and processing blockchain events from a specific shard. -/// The iterator fetch "micro batch" at a time. -impl ShardIterator { - pub(crate) async fn new( - session: Arc, - producer_id: ProducerId, - shard_id: ShardId, - offset: ShardOffset, - event_type: BlockchainEventType, - filter: Option, - ) -> anyhow::Result { - let get_events_ps = if event_type == BlockchainEventType::AccountUpdate { - let query_str = forge_account_upadate_event_query(filter.clone().unwrap_or_default()); - session.prepare(query_str).await? - } else { - session.prepare(GET_NEW_TRANSACTION_EVENT).await? - }; - - let get_last_shard_period_commit = session.prepare(GET_LAST_SHARD_PERIOD_COMMIT).await?; - - Ok(ShardIterator { - session, - producer_id, - shard_id, - inner: ShardIteratorState::Empty(offset), - event_type, - get_events_prepared_stmt: get_events_ps, - get_last_shard_period_commit_prepared_stmt: get_last_shard_period_commit, - last_period_confirmed: (offset / SHARD_OFFSET_MODULO) - 1, - filter: filter.unwrap_or_default(), - }) - } - - pub(crate) const fn last_offset(&self) -> ShardOffset { - self.inner.last_offset() - } - - /// Warms up the shard iterator by loading the initial micro batch if in the `Empty` state. - pub(crate) async fn warm(&mut self) -> anyhow::Result<()> { - if !self.inner.is_empty() { - return Ok(()); - } - let last_offset = self.inner.last_offset(); - - let micro_batch = self.fetch_micro_batch(last_offset).await?; - let new_state = ShardIteratorState::AvailableData(last_offset, micro_batch); - self.inner = new_state; - Ok(()) - } - - /// Checks if a period is committed based on the given last offset. - fn is_period_committed(&self, last_offset: ShardOffset) -> oneshot::Receiver { - let session = Arc::clone(&self.session); - let producer_id = self.producer_id; - let ps = self.get_last_shard_period_commit_prepared_stmt.clone(); - let shard_id = self.shard_id; - let period = last_offset / SHARD_OFFSET_MODULO; - let (sender, receiver) = oneshot::channel(); - tokio::spawn(async move { - let result = session - .execute(&ps, (producer_id, shard_id)) - .await - .expect("failed to query period commit state") - .maybe_first_row_typed::<(ShardPeriod,)>() - .expect("query not elligible to return rows") - .map(|row| row.0 >= period) - .unwrap_or(false); - sender.send(result).map_err(|_| ()).unwrap_or_else(|_| { - panic!( - "failed to send back period commit status to shard iterator {}", - shard_id - ) - }); - }); - receiver - } - - /// Fetches a micro batch of blockchain events starting from the given last offset. - fn fetch_micro_batch( - &self, - last_offset: ShardOffset, - ) -> oneshot::Receiver> { - let period = (last_offset + 1) / SHARD_OFFSET_MODULO; - let producer_id = self.producer_id; - let ps = self.get_events_prepared_stmt.clone(); - let shard_id = self.shard_id; - let session = Arc::clone(&self.session); - let (sender, receiver) = oneshot::channel(); - tokio::spawn(async move { - let micro_batch = session - .execute(&ps, (producer_id, shard_id, last_offset, period)) - .await - .expect("failed to fetch micro batch from scylladb") - .rows_typed_or_empty::() - .collect::, _>>() - .expect("failed to typed scylladb rows"); - if sender.send(micro_batch).is_err() { - warn!("Shard iterator {shard_id} was fetching micro batch, but client closed its stream half.") - } - }); - receiver - } - - /// - /// Apply any filter that cannot be pushed down to the database - /// - fn filter_row(&self, row: BlockchainEvent) -> Option { - if row.event_type == BlockchainEventType::NewTransaction { - // Apply transaction filter here - let elligible_acc_keys = &self.filter.tx_account_keys; - if !elligible_acc_keys.is_empty() { - let is_row_elligible = row - .account_keys - .as_ref() - .filter(|actual_keys| { - actual_keys - .iter() - .any(|account_key| elligible_acc_keys.contains(account_key)) - }) - .map(|_| true) - .unwrap_or(false); - if !is_row_elligible { - return None; - } - } - } - - Some(row) - } - - /// Attempts to retrieve the next blockchain event from the shard iterator. - /// - /// This method asynchronously advances the iterator's state and fetches the next blockchain event - /// based on its current state. - /// - /// It handles different states of the iterator and performs - /// appropriate actions such as loading, streaming, and period confirmation. - /// - /// Returns `Ok(None)` if no event is available or the iterator is waiting for period confirmation. - pub(crate) async fn try_next(&mut self) -> anyhow::Result> { - let last_offset = self.inner.last_offset(); - let current_state = - std::mem::replace(&mut self.inner, ShardIteratorState::Empty(last_offset)); - - let (next_state, maybe_to_return) = match current_state { - ShardIteratorState::Empty(last_offset) => { - let receiver = self.fetch_micro_batch(last_offset); - (ShardIteratorState::Loading(last_offset, receiver), None) - } - ShardIteratorState::Loading(last_offset, mut receiver) => { - let result = receiver.try_recv(); - match result { - Err(TryRecvError::Empty) => { - (ShardIteratorState::Loading(last_offset, receiver), None) - } - Err(TryRecvError::Closed) => anyhow::bail!("failed to receive micro batch"), - Ok(micro_batch) => (ShardIteratorState::Loaded(last_offset, micro_batch), None), - } - } - ShardIteratorState::Loaded(last_offset, mut micro_batch) => { - let maybe_row = micro_batch.pop_front(); - if let Some(row) = maybe_row { - ( - ShardIteratorState::AvailableData(row.offset, micro_batch), - Some(row), - ) - } else { - let curr_period = last_offset / SHARD_OFFSET_MODULO; - if curr_period <= self.last_period_confirmed { - let last_offset_for_curr_period = - ((curr_period + 1) * SHARD_OFFSET_MODULO) - 1; - (ShardIteratorState::Empty(last_offset_for_curr_period), None) - } else { - // If a newly loaded row stream is already empty, we must figure out if - // its because there no more data in the period or is it because we consume too fast and we should try again later. - let receiver = self.is_period_committed(last_offset); - ( - ShardIteratorState::ConfirmingPeriod(last_offset, receiver), - None, - ) - } - } - } - ShardIteratorState::ConfirmingPeriod(last_offset, mut rx) => match rx.try_recv() { - Err(TryRecvError::Empty) => { - (ShardIteratorState::ConfirmingPeriod(last_offset, rx), None) - } - Err(TryRecvError::Closed) => anyhow::bail!("fail"), - Ok(period_committed) => { - if period_committed { - self.last_period_confirmed = last_offset / SHARD_OFFSET_MODULO; - } - (ShardIteratorState::Empty(last_offset), None) - } - }, - ShardIteratorState::AvailableData(last_offset, mut micro_batch) => { - let maybe_row = micro_batch.pop_front(); - if let Some(row) = maybe_row { - ( - ShardIteratorState::AvailableData(row.offset, micro_batch), - Some(row), - ) - } else if (last_offset + 1) % SHARD_OFFSET_MODULO == 0 { - let receiver = self.is_period_committed(last_offset); - ( - ShardIteratorState::WaitingEndOfPeriod(last_offset, receiver), - None, - ) - } else { - (ShardIteratorState::Empty(last_offset), None) - } - } - ShardIteratorState::WaitingEndOfPeriod(last_offset, mut rx) => { - match rx.try_recv() { - Err(TryRecvError::Empty) => ( - ShardIteratorState::WaitingEndOfPeriod(last_offset, rx), - None, - ), - Err(TryRecvError::Closed) => anyhow::bail!("fail"), - Ok(period_committed) => { - if period_committed { - self.last_period_confirmed = last_offset / SHARD_OFFSET_MODULO; - (ShardIteratorState::Empty(last_offset), None) - } else { - // Renew the background task - let rx2 = self.is_period_committed(last_offset); - ( - ShardIteratorState::WaitingEndOfPeriod(last_offset, rx2), - None, - ) - } - } - } - } - }; - let _ = std::mem::replace(&mut self.inner, next_state); - Ok(maybe_to_return.and_then(|row| self.filter_row(row))) - } -} - -const LOG_PRIMARY_KEY_CONDITION: &str = r###" - producer_id = ? and shard_id = ? and offset > ? and period = ? -"###; - -const LOG_PROJECTION: &str = r###" - shard_id, - period, - producer_id, - offset, - slot, - event_type, - pubkey, - lamports, - owner, - executable, - rent_epoch, - write_version, - data, - txn_signature, - signature, - signatures, - num_required_signatures, - num_readonly_signed_accounts, - num_readonly_unsigned_accounts, - account_keys, - recent_blockhash, - instructions, - versioned, - address_table_lookups, - meta, - is_vote, - tx_index -"###; - -fn format_as_scylla_hexstring(bytes: &[u8]) -> String { - if bytes.is_empty() { - panic!("byte slice is empty") - } - let hex = bytes - .iter() - .map(|b| format!("{:02x}", b)) - .collect::>() - .join(""); - format!("0x{}", hex) -} - -fn forge_account_upadate_event_query(filter: ShardFilter) -> String { - let mut conds = vec![]; - - let pubkeys = filter - .account_pubkyes - .iter() - .map(|pubkey| format_as_scylla_hexstring(pubkey.as_slice())) - .collect::>(); - - let owners = filter - .account_owners - .iter() - .map(|owner| format_as_scylla_hexstring(owner.as_slice())) - .collect::>(); - - if !pubkeys.is_empty() { - let cond = format!("AND pubkey IN ({})", pubkeys.join(", ")); - conds.push(cond); - } - if !owners.is_empty() { - let cond = format!("AND owner IN ({})", owners.join(", ")); - conds.push(cond) - } - let conds_string = conds.join(" "); - - format!( - r###" - SELECT - {projection} - FROM log - WHERE {primary_key_cond} - AND event_type = 0 - {other_conds} - ORDER BY offset ASC - LIMIT {batch_size} - ALLOW FILTERING - "###, - projection = LOG_PROJECTION, - primary_key_cond = LOG_PRIMARY_KEY_CONDITION, - other_conds = conds_string, - batch_size = MICRO_BATCH_SIZE, - ) -}