From 72f658a57707c1b51203b20e7e326f8356e9f8f3 Mon Sep 17 00:00:00 2001 From: Tim Sawicki <136370015+tim-quix@users.noreply.github.com> Date: Thu, 4 Jul 2024 06:52:47 -0400 Subject: [PATCH] remove column name deserialization (#392) * remove everything related to generating column_names during deserialization * remove doc references to column_name deserialization args --- docs/api-reference/application.md | 20 +- docs/api-reference/context.md | 4 +- docs/api-reference/dataframe.md | 52 +- docs/api-reference/kafka.md | 52 +- docs/api-reference/quixstreams.md | 8501 ++++++++--------- docs/api-reference/serialization.md | 39 +- docs/api-reference/state.md | 14 +- docs/api-reference/topics.md | 48 +- quixstreams/models/serializers/base.py | 11 +- quixstreams/models/serializers/json.py | 8 +- quixstreams/models/serializers/quix.py | 11 +- .../models/serializers/simple_types.py | 27 +- tests/test_quixstreams/test_app.py | 9 +- .../test_models/test_quix_serializers.py | 99 - .../test_models/test_serializers.py | 26 - .../test_models/test_topics/test_topics.py | 14 +- 16 files changed, 4385 insertions(+), 4550 deletions(-) diff --git a/docs/api-reference/application.md b/docs/api-reference/application.md index 7557ee4b9..0f6bf5326 100644 --- a/docs/api-reference/application.md +++ b/docs/api-reference/application.md @@ -10,7 +10,7 @@ class Application() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L55) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L55) The main Application class. @@ -82,7 +82,7 @@ def __init__(broker_address: Optional[Union[str, ConnectionConfig]] = None, topic_create_timeout: float = 60) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L93) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L93)
@@ -180,7 +180,7 @@ def Quix(cls, topic_create_timeout: float = 60) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L313) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L313) >***NOTE:*** DEPRECATED: use Application with `quix_sdk_token` argument instead. @@ -290,7 +290,7 @@ def topic(name: str, timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L451) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L451) Create a topic definition. @@ -371,7 +371,7 @@ topic = app.topic("input-topic", timestamp_extractor=custom_ts_extractor) def dataframe(topic: Topic) -> StreamingDataFrame ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L531) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L531) A simple helper method that generates a `StreamingDataFrame`, which is used @@ -421,7 +421,7 @@ to be used as an input topic. def stop(fail: bool = False) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L570) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L570) Stop the internal poll loop and the message processing. @@ -448,7 +448,7 @@ to unhandled exception, and it shouldn't commit the current checkpoint. def get_producer() -> Producer ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L593) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L593) Create and return a pre-configured Producer instance. The Producer is initialized with params passed to Application. @@ -483,7 +483,7 @@ with app.get_producer() as producer: def get_consumer(auto_commit_enable: bool = True) -> Consumer ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L623) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L623) Create and return a pre-configured Consumer instance. The Consumer is initialized with params passed to Application. @@ -528,7 +528,7 @@ with app.get_consumer() as consumer: def clear_state() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L666) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L666) Clear the state of the application. @@ -542,7 +542,7 @@ Clear the state of the application. def run(dataframe: StreamingDataFrame) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L672) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L672) Start processing data from Kafka using provided `StreamingDataFrame` diff --git a/docs/api-reference/context.md b/docs/api-reference/context.md index d7d0ec1e9..f291a5c1f 100644 --- a/docs/api-reference/context.md +++ b/docs/api-reference/context.md @@ -12,7 +12,7 @@ def set_message_context(context: Optional[MessageContext]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/context.py#L20) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/context.py#L20) Set a MessageContext for the current message in the given `contextvars.Context` @@ -55,7 +55,7 @@ sdf = sdf.update(lambda value: alter_context(value)) def message_context() -> MessageContext ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/context.py#L51) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/context.py#L51) Get a MessageContext for the current message, which houses most of the message diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md index 142c04c98..44415f69d 100644 --- a/docs/api-reference/dataframe.md +++ b/docs/api-reference/dataframe.md @@ -10,7 +10,7 @@ class StreamingDataFrame(BaseStreaming) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L62) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L62) `StreamingDataFrame` is the main object you will use for ETL work. @@ -81,7 +81,7 @@ def apply(func: Union[ metadata: bool = False) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L177) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L177) Apply a function to transform the value and return a new value. @@ -139,7 +139,7 @@ def update(func: Union[ metadata: bool = False) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L266) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L266) Apply a function to mutate value in-place or to perform a side effect @@ -197,7 +197,7 @@ def filter(func: Union[ metadata: bool = False) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L354) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L354) Filter value using provided function. @@ -249,7 +249,7 @@ def group_by(key: Union[str, Callable[[Any], Any]], key_serializer: Optional[SerializerType] = "json") -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L440) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L440) "Groups" messages by re-keying them via the provided group_by operation @@ -314,7 +314,7 @@ a clone with this operation added (assign to keep its effect). def contains(key: str) -> StreamingSeries ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L518) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L518) Check if the key is present in the Row value. @@ -353,7 +353,7 @@ or False otherwise. def to_topic(topic: Topic, key: Optional[Callable[[Any], Any]] = None) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L543) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L543) Produce current value to a topic. You can optionally specify a new key. @@ -396,7 +396,7 @@ By default, the current message key will be used. def set_timestamp(func: Callable[[Any, Any, int, Any], int]) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L584) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L584) Set a new timestamp based on the current message value and its metadata. @@ -449,7 +449,7 @@ def set_headers( ) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L625) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L625) Set new message headers based on the current message value and metadata. @@ -500,7 +500,7 @@ def compose( ) -> Dict[str, VoidExecutor] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L676) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L676) Compose all functions of this StreamingDataFrame into one big closure. @@ -554,7 +554,7 @@ def test(value: Any, topic: Optional[Topic] = None) -> List[Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L713) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L713) A shorthand to test `StreamingDataFrame` with provided value @@ -591,7 +591,7 @@ def tumbling_window(duration_ms: Union[int, timedelta], name: Optional[str] = None) -> TumblingWindowDefinition ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L750) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L750) Create a tumbling window transformation on this StreamingDataFrame. @@ -677,7 +677,7 @@ def hopping_window(duration_ms: Union[int, timedelta], name: Optional[str] = None) -> HoppingWindowDefinition ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L826) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L826) Create a hopping window transformation on this StreamingDataFrame. @@ -771,7 +771,7 @@ sdf = ( class StreamingSeries(BaseStreaming) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L47) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L47) `StreamingSeries` are typically generated by `StreamingDataframes` when getting elements from, or performing certain operations on, a `StreamingDataframe`, @@ -837,7 +837,7 @@ sdf = sdf[["column_a"] & (sdf["new_sum_field"] >= 10)] def from_apply_callback(cls, func: ApplyWithMetadataCallback) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L107) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L107) Create a StreamingSeries from a function. @@ -865,7 +865,7 @@ instance of `StreamingSeries` def apply(func: ApplyCallback) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L121) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L121) Add a callable to the execution list for this series. @@ -917,7 +917,7 @@ a new `StreamingSeries` with the new callable added def compose_returning() -> ReturningExecutor ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L155) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L155) Compose a list of functions from this StreamingSeries and its parents into one @@ -948,7 +948,7 @@ def compose( None]] = None) -> VoidExecutor ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L170) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L170) Compose all functions of this StreamingSeries into one big closure. @@ -1006,7 +1006,7 @@ def test(value: Any, ctx: Optional[MessageContext] = None) -> Any ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L214) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L214) A shorthand to test `StreamingSeries` with provided value @@ -1038,7 +1038,7 @@ result of `StreamingSeries` def isin(other: Container) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L269) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L269) Check if series value is in "other". @@ -1083,7 +1083,7 @@ new StreamingSeries def contains(other: Union[Self, object]) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L296) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L296) Check if series value contains "other" @@ -1128,7 +1128,7 @@ new StreamingSeries def is_(other: Union[Self, object]) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L321) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L321) Check if series value refers to the same object as `other` @@ -1170,7 +1170,7 @@ new StreamingSeries def isnot(other: Union[Self, object]) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L344) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L344) Check if series value does not refer to the same object as `other` @@ -1213,7 +1213,7 @@ new StreamingSeries def isnull() -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L368) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L368) Check if series value is None. @@ -1250,7 +1250,7 @@ new StreamingSeries def notnull() -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L391) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L391) Check if series value is not None. @@ -1287,7 +1287,7 @@ new StreamingSeries def abs() -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L414) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L414) Get absolute value of the series value. diff --git a/docs/api-reference/kafka.md b/docs/api-reference/kafka.md index 8234a7041..750a7f280 100644 --- a/docs/api-reference/kafka.md +++ b/docs/api-reference/kafka.md @@ -10,7 +10,7 @@ class Producer() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L44) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L44) @@ -26,7 +26,7 @@ def __init__(broker_address: Union[str, ConnectionConfig], flush_timeout: Optional[int] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L45) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L45) A wrapper around `confluent_kafka.Producer`. @@ -66,7 +66,7 @@ def produce(topic: str, on_delivery: Optional[DeliveryCallback] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L83) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L83) Produce a message to a topic. @@ -101,7 +101,7 @@ for the produced message. def poll(timeout: float = 0) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L144) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L144) Polls the producer for events and calls `on_delivery` callbacks. @@ -122,7 +122,7 @@ Polls the producer for events and calls `on_delivery` callbacks. def flush(timeout: Optional[float] = None) -> int ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L152) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L152) Wait for all messages in the Producer queue to be delivered. @@ -151,7 +151,7 @@ number of messages remaining to flush class Consumer() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L64) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L64) @@ -171,7 +171,7 @@ def __init__(broker_address: Union[str, ConnectionConfig], extra_config: Optional[dict] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L65) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L65) A wrapper around `confluent_kafka.Consumer`. @@ -214,7 +214,7 @@ Note: values passed as arguments override values in `extra_config`. def poll(timeout: Optional[float] = None) -> Optional[Message] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L128) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L128) Consumes a single message, calls callbacks and returns events. @@ -255,7 +255,7 @@ def subscribe(topics: List[str], on_lost: Optional[RebalancingCallback] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L146) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L146) Set subscription to supplied list of topics @@ -298,7 +298,7 @@ for example, may fail. def unsubscribe() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L240) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L240) Remove current subscription. @@ -318,7 +318,7 @@ def store_offsets(message: Optional[Message] = None, offsets: Optional[List[TopicPartition]] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L248) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L248) .. py:function:: store_offsets([message=None], [offsets=None]) @@ -353,7 +353,7 @@ def commit(message: Optional[Message] = None, asynchronous: bool = True) -> Optional[List[TopicPartition]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L282) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L282) Commit a message or a list of offsets. @@ -391,7 +391,7 @@ def committed(partitions: List[TopicPartition], timeout: Optional[float] = None) -> List[TopicPartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L322) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L322) .. py:function:: committed(partitions, [timeout=None]) @@ -428,7 +428,7 @@ def get_watermark_offsets(partition: TopicPartition, cached: bool = False) -> Tuple[int, int] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L342) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L342) Retrieve low and high offsets for the specified partition. @@ -467,7 +467,7 @@ def list_topics(topic: Optional[str] = None, timeout: Optional[float] = None) -> ClusterMetadata ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L368) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L368) .. py:function:: list_topics([topic=None], [timeout=-1]) @@ -500,7 +500,7 @@ None or -1 is infinite. Default: None def memberid() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L391) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L391) Return this client's broker-assigned group member id. @@ -523,7 +523,7 @@ def offsets_for_times(partitions: List[TopicPartition], timeout: Optional[float] = None) -> List[TopicPartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L404) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L404) Look up offsets by timestamp for the specified partitions. @@ -552,7 +552,7 @@ last message in the partition, a value of -1 will be returned. def pause(partitions: List[TopicPartition]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L430) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L430) Pause consumption for the provided list of partitions. @@ -580,7 +580,7 @@ Does NOT affect the result of Consumer.assignment(). def resume(partitions: List[TopicPartition]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L444) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L444) .. py:function:: resume(partitions) @@ -606,7 +606,7 @@ Resume consumption for the provided list of partitions. def position(partitions: List[TopicPartition]) -> List[TopicPartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L456) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L456) Retrieve current positions (offsets) for the specified partitions. @@ -639,7 +639,7 @@ the last consumed message + 1. def seek(partition: TopicPartition) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L470) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L470) Set consume position for partition to offset. @@ -671,7 +671,7 @@ pass the offset in an `assign()` call. def assignment() -> List[TopicPartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L487) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L487) Returns the current partition assignment. @@ -696,7 +696,7 @@ Returns the current partition assignment. def set_sasl_credentials(username: str, password: str) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L500) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L500) Sets the SASL credentials used for this client. These credentials will overwrite the old ones, and will be used the next @@ -715,7 +715,7 @@ This method is applicable only to SASL PLAIN and SCRAM mechanisms. def incremental_assign(partitions: List[TopicPartition]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L512) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L512) Assign new partitions. @@ -735,7 +735,7 @@ Any additional partitions besides the ones passed during the `Consumer` def incremental_unassign(partitions: List[TopicPartition]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L524) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L524) Revoke partitions. @@ -751,7 +751,7 @@ Can be called outside an on_revoke callback. def close() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L532) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L532) Close down and terminate the Kafka Consumer. diff --git a/docs/api-reference/quixstreams.md b/docs/api-reference/quixstreams.md index ecd844866..4fe8c1ab6 100644 --- a/docs/api-reference/quixstreams.md +++ b/docs/api-reference/quixstreams.md @@ -2,1447 +2,1672 @@ ## quixstreams - - -## quixstreams.logging + - +## quixstreams.core -#### configure\_logging + -```python -def configure_logging(loglevel: Optional[LogLevel]) -> bool -``` +## quixstreams.core.stream -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/logging.py#L26) + -Configure "quixstreams" logger. +## quixstreams.core.stream.functions ->***NOTE:*** If "quixstreams" logger already has pre-defined handlers -(e.g. logging has already been configured via `logging`, or the function -is called twice), it will skip configuration and return `False`. + -**Arguments**: +### StreamFunction -- `loglevel`: a valid log level as a string or None. -If None passed, this function is no-op and no logging will be configured. +```python +class StreamFunction(abc.ABC) +``` -**Returns**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L65) -True if logging config has been updated, otherwise False. +A base class for all the streaming operations in Quix Streams. - +It provides a `get_executor` method to return a closure to be called with the input +values. -## quixstreams.error\_callbacks + - +#### StreamFunction.get\_executor -## quixstreams.platforms +```python +@abc.abstractmethod +def get_executor(child_executor: VoidExecutor) -> VoidExecutor +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L79) -## quixstreams.platforms.quix.config +Returns a wrapper to be called on a value, key and timestamp. - + -#### strip\_workspace\_id\_prefix +### ApplyFunction ```python -def strip_workspace_id_prefix(workspace_id: str, s: str) -> str +class ApplyFunction(StreamFunction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L31) - -Remove the workspace ID from a given string if it starts with it, - -typically a topic or consumer group id - -**Arguments**: - -- `workspace_id`: the workspace id -- `s`: the string to append to +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L85) -**Returns**: +Wrap a function into "Apply" function. -the string with workspace_id prefix removed +The provided callback is expected to return a new value based on input, +and its result will always be passed downstream. - + -#### prepend\_workspace\_id +### ApplyWithMetadataFunction ```python -def prepend_workspace_id(workspace_id: str, s: str) -> str +class ApplyWithMetadataFunction(StreamFunction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L43) - -Add the workspace ID as a prefix to a given string if it does not have it, - -typically a topic or consumer group it - -**Arguments**: - -- `workspace_id`: the workspace id -- `s`: the string to append to +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L125) -**Returns**: +Wrap a function into "Apply" function. -the string with workspace_id prepended +The provided function is expected to accept value, and timestamp and return +a new value based on input, +and its result will always be passed downstream. - + -### QuixApplicationConfig +### FilterFunction ```python -@dataclasses.dataclass -class QuixApplicationConfig() +class FilterFunction(StreamFunction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L56) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L166) -A convenience container class for Quix Application configs. +Wraps a function into a "Filter" function. +The result of a Filter function is interpreted as boolean. +If it's `True`, the input will be return downstream. +If it's `False`, the `Filtered` exception will be raised to signal that the +value is filtered out. - + -### QuixKafkaConfigsBuilder +### FilterWithMetadataFunction ```python -class QuixKafkaConfigsBuilder() +class FilterWithMetadataFunction(StreamFunction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L66) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L187) -Retrieves all the necessary information from the Quix API and builds all the -objects required to connect a confluent-kafka client to the Quix Platform. - -If not executed within the Quix platform directly, you must provide a Quix -"streaming" (aka "sdk") token, or Personal Access Token. +Wraps a function into a "Filter" function. -Ideally you also know your workspace name or id. If not, you can search for it -using a known topic name, but note the search space is limited to the access level -of your token. +The passed callback must accept value, key, and timestamp, and it's expected to +return a boolean-like result. -It also currently handles the app_auto_create_topics setting for Application.Quix. +If the result is `True`, the input will be passed downstream. +Otherwise, the value will be filtered out. - + -#### QuixKafkaConfigsBuilder.\_\_init\_\_ +### UpdateFunction ```python -def __init__(quix_sdk_token: Optional[str] = None, - workspace_id: Optional[str] = None, - quix_portal_api_service: Optional[QuixPortalApiService] = None, - timeout: float = 30, - topic_create_timeout: float = 60) +class UpdateFunction(StreamFunction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L82) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L210) -**Arguments**: +Wrap a function into an "Update" function. -- `quix_portal_api_service`: A QuixPortalApiService instance (else generated) -- `workspace_id`: A valid Quix Workspace ID (else searched for) +The provided function must accept a value, and it's expected to mutate it +or to perform some side effect. - +The result of the callback is always ignored, and the original input is passed +downstream. -#### QuixKafkaConfigsBuilder.strip\_workspace\_id\_prefix + + +### UpdateWithMetadataFunction ```python -def strip_workspace_id_prefix(s: str) -> str +class UpdateWithMetadataFunction(StreamFunction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L160) - -Remove the workspace ID from a given string if it starts with it, - -typically a topic or consumer group id - -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L233) -- `s`: the string to append to +Wrap a function into an "Update" function. -**Returns**: +The provided function must accept a value, a key, and a timestamp. +The callback is expected to mutate the value or to perform some side effect with it. -the string with workspace_id prefix removed +The result of the callback is always ignored, and the original input is passed +downstream. - + -#### QuixKafkaConfigsBuilder.prepend\_workspace\_id +### TransformFunction ```python -def prepend_workspace_id(s: str) -> str +class TransformFunction(StreamFunction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L170) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L256) -Add the workspace ID as a prefix to a given string if it does not have it, +Wrap a function into a "Transform" function. -typically a topic or consumer group it +The provided callback must accept a value, a key and a timestamp. +It's expected to return a new value, new key and new timestamp. -**Arguments**: +This function must be used with caution, because it can technically change the +key. +It's supposed to be used by the library internals and not be a part of the public +API. -- `s`: the string to append to +The result of the callback will always be passed downstream. -**Returns**: + -the string with workspace_id prepended +## quixstreams.core.stream.stream - + -#### QuixKafkaConfigsBuilder.search\_for\_workspace +### Stream ```python -def search_for_workspace(workspace_name_or_id: Optional[str] = None, - timeout: Optional[float] = None) -> Optional[dict] +class Stream() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L180) - -Search for a workspace given an expected workspace name or id. - -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L34) -- `workspace_name_or_id`: the expected name or id of a workspace -- `timeout`: response timeout (seconds); Default 30 + -**Returns**: +#### Stream.\_\_init\_\_ -the workspace data dict if search success, else None +```python +def __init__(func: Optional[StreamFunction] = None, + parent: Optional[Self] = None) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L35) -#### QuixKafkaConfigsBuilder.get\_workspace\_info +A base class for all streaming operations. -```python -def get_workspace_info(known_workspace_topic: Optional[str] = None, - timeout: Optional[float] = None) -> dict -``` +`Stream` is an abstraction of a function pipeline. +Each Stream has a function and a parent (None by default). +When adding new function to the stream, it creates a new `Stream` object and +sets "parent" to the previous `Stream` to maintain an order of execution. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L222) +Streams supports four types of functions: -Queries for workspace data from the Quix API, regardless of instance cache, +- "Apply" - generate new values based on a previous one. + The result of an Apply function is passed downstream to the next functions. + If "expand=True" is passed and the function returns an `Iterable`, + each item of it will be treated as a separate value downstream. +- "Update" - update values in-place. + The result of an Update function is always ignored, and its input is passed + downstream. +- "Filter" - to filter values from the Stream. + The result of a Filter function is interpreted as boolean. + If it's `True`, the input will be passed downstream. + If it's `False`, the record will be filtered from the stream. +- "Transform" - to transform keys and timestamps along with the values. + "Transform" functions may change the keys and should be used with caution. + The result of the Transform function is passed downstream to the next + functions. + If "expand=True" is passed and the function returns an `Iterable`, + each item of it will be treated as a separate value downstream. -and updates instance attributes from query result. +To execute the functions on the `Stream`, call `.compose()` method, and +it will return a closure to execute all the functions accumulated in the Stream +and its parents. **Arguments**: -- `known_workspace_topic`: a topic you know to exist in some workspace -- `timeout`: response timeout (seconds); Default 30 +- `func`: a function to be called on the stream. +It is expected to be wrapped into one of "Apply", "Filter", "Update" or +"Trasform" from `quixstreams.core.stream.functions` package. +Default - "ApplyFunction(lambda value: value)". +- `parent`: a parent `Stream` - + -#### QuixKafkaConfigsBuilder.search\_workspace\_for\_topic +#### Stream.add\_filter ```python -def search_workspace_for_topic( - workspace_id: str, - topic: str, - timeout: Optional[float] = None) -> Optional[str] +def add_filter(func: Union[FilterCallback, FilterWithMetadataCallback], + *, + metadata: bool = False) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L250) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L97) -Search through all the topics in the given workspace id to see if there is a - -match with the provided topic. +Add a function to filter values from the Stream. + +The return value of the function will be interpreted as `bool`. +If the function returns `False`-like result, the Stream will raise `Filtered` +exception during execution. **Arguments**: -- `workspace_id`: the workspace to search in -- `topic`: the topic to search for -- `timeout`: response timeout (seconds); Default 30 +- `func`: a function to filter values from the stream +- `metadata`: if True, the callback will receive key and timestamp along with +the value. +Default - `False`. **Returns**: -the workspace_id if success, else None +a new `Stream` derived from the current one - + -#### QuixKafkaConfigsBuilder.search\_for\_topic\_workspace +#### Stream.add\_apply ```python -def search_for_topic_workspace(topic: str, - timeout: Optional[float] = None - ) -> Optional[dict] +def add_apply(func: Union[ + ApplyCallback, + ApplyExpandedCallback, + ApplyWithMetadataCallback, + ApplyWithMetadataExpandedCallback, +], + *, + expand: bool = False, + metadata: bool = False) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L271) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L122) -Find what workspace a topic belongs to. +Add an "apply" function to the Stream. -If there is only one workspace altogether, it is assumed to be the workspace. -More than one means each workspace will be searched until the first hit. +The function is supposed to return a new value, which will be passed +further during execution. **Arguments**: -- `topic`: the topic to search for -- `timeout`: response timeout (seconds); Default 30 +- `func`: a function to generate a new value +- `expand`: if True, expand the returned iterable into individual values +downstream. If returned value is not iterable, `TypeError` will be raised. +Default - `False`. +- `metadata`: if True, the callback will receive key and timestamp along with +the value. +Default - `False`. **Returns**: -workspace data dict if topic search success, else None +a new `Stream` derived from the current one - + -#### QuixKafkaConfigsBuilder.create\_topics +#### Stream.add\_update ```python -def create_topics(topics: List[Topic], - timeout: Optional[float] = None, - finalize_timeout: Optional[float] = None) +def add_update(func: Union[UpdateCallback, UpdateWithMetadataCallback], + *, + metadata: bool = False) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L369) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L155) -Create topics in a Quix cluster. +Add an "update" function to the Stream, that will mutate the input value. + +The return of this function will be ignored and its input +will be passed downstream. **Arguments**: -- `topics`: a list of `Topic` objects -- `timeout`: response timeout (seconds); Default 30 -- `finalize_timeout`: topic finalization timeout (seconds); Default 60 -marked as "Ready" (and thus ready to produce to/consume from). +- `func`: a function to mutate the value +- `metadata`: if True, the callback will receive key and timestamp along with +the value. +Default - `False`. - +**Returns**: -#### QuixKafkaConfigsBuilder.get\_topic +a new Stream derived from the current one + + + +#### Stream.add\_transform ```python -def get_topic(topic_name: str, - timeout: Optional[float] = None) -> Optional[dict] +def add_transform(func: Union[TransformCallback, TransformExpandedCallback], + *, + expand: bool = False) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L419) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L179) -return the topic ID (the actual cluster topic name) if it exists, else None +Add a "transform" function to the Stream, that will mutate the input value. ->***NOTE***: if the name registered in Quix is instead the workspace-prefixed -version, this returns None unless that exact name was created WITHOUT the -Quix API. +The callback must accept a value, a key, and a timestamp. +It's expected to return a new value, new key and new timestamp. + +The result of the callback which will be passed downstream +during execution. **Arguments**: -- `topic_name`: name of the topic -- `timeout`: response timeout (seconds); Default 30 +- `func`: a function to mutate the value +- `expand`: if True, expand the returned iterable into individual items +downstream. If returned value is not iterable, `TypeError` will be raised. +Default - `False`. **Returns**: -response dict of the topic info if topic found, else None +a new Stream derived from the current one - + -#### QuixKafkaConfigsBuilder.confirm\_topics\_exist +#### Stream.diff ```python -def confirm_topics_exist(topics: Union[List[Topic], List[str]], - timeout: Optional[float] = None) +def diff(other: "Stream") -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L451) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L204) -Confirm whether the desired set of topics exists in the Quix workspace. +Takes the difference between Streams `self` and `other` based on their last + +common parent, and returns a new `Stream` that includes only this difference. + +It's impossible to calculate a diff when: + - Streams don't have a common parent. + - When the `self` Stream already includes all the nodes from + the `other` Stream, and the resulting diff is empty. **Arguments**: -- `topics`: a list of `Topic` or topic names -- `timeout`: response timeout (seconds); Default 30 +- `other`: a `Stream` to take a diff from. - +**Raises**: -#### QuixKafkaConfigsBuilder.get\_application\_config +- `ValueError`: if Streams don't have a common parent +or if the diff is empty. -```python -def get_application_config(consumer_group_id: str) -> QuixApplicationConfig -``` +**Returns**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L483) +new `Stream` instance including all the Streams from the diff -Get all the necessary attributes for an Application to run on Quix Cloud. + -**Arguments**: +#### Stream.tree -- `consumer_group_id`: consumer group id, if needed +```python +def tree() -> List[Self] +``` -**Returns**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L233) -a QuixApplicationConfig instance +Return a list of all parent Streams including the node itself. - +The tree is ordered from child to parent (current node comes first). -## quixstreams.platforms.quix.env +**Returns**: - +a list of `Stream` objects -### QuixEnvironment + + +#### Stream.compose\_returning ```python -class QuixEnvironment() +def compose_returning() -> ReturningExecutor ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L7) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L247) -Class to access various Quix platform environment settings +Compose a list of functions from this `Stream` and its parents into one +big closure that always returns the transformed record. - +This closure is to be used to execute the functions in the stream and to get +the result of the transformations. -#### QuixEnvironment.state\_management\_enabled +Stream may only contain simple "apply" functions to be able to compose itself +into a returning function. + + + +#### Stream.compose ```python -@property -def state_management_enabled() -> bool +def compose( + allow_filters: bool = True, + allow_updates: bool = True, + allow_expands: bool = True, + allow_transforms: bool = True, + sink: Optional[Callable[[Any, Any, int, Any], + None]] = None) -> VoidExecutor ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L19) - -Check whether "State management" is enabled for the current deployment +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L284) -**Returns**: +Compose a list of functions from this `Stream` and its parents into one -True if state management is enabled, otherwise False +big closure using a "composer" function. - +This "executor" closure is to be used to execute all functions in the stream for the given +key, value and timestamps. -#### QuixEnvironment.deployment\_id +By default, executor doesn't return the result of the execution. +To accumulate the results, pass the `sink` parameter. -```python -@property -def deployment_id() -> Optional[str] -``` +**Arguments**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L27) +- `allow_filters`: If False, this function will fail with `ValueError` if +the stream has filter functions in the tree. Default - True. +- `allow_updates`: If False, this function will fail with `ValueError` if +the stream has update functions in the tree. Default - True. +- `allow_expands`: If False, this function will fail with `ValueError` if +the stream has functions with "expand=True" in the tree. Default - True. +- `allow_transforms`: If False, this function will fail with `ValueError` if +the stream has transform functions in the tree. Default - True. +- `sink`: callable to accumulate the results of the execution, optional. -Return current Quix deployment id. +**Raises**: -This variable is meant to be set only by Quix Platform and only -when the application is deployed. +- `ValueError`: if disallowed functions are present in the stream tree. -**Returns**: + -deployment id or None +## quixstreams.dataframe.utils - + -#### QuixEnvironment.workspace\_id +#### ensure\_milliseconds ```python -@property -def workspace_id() -> Optional[str] +def ensure_milliseconds(delta: Union[int, timedelta]) -> int ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L39) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/utils.py#L5) -Return Quix workspace id if set +Convert timedelta to milliseconds. -**Returns**: +If the `delta` is not +This function will also round the value to the closest milliseconds in case of +higher precision. -workspace id or None +**Arguments**: - +- `delta`: `timedelta` object -#### QuixEnvironment.portal\_api +**Returns**: -```python -@property -def portal_api() -> Optional[str] -``` +timedelta value in milliseconds as `int` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L47) + -Return Quix Portal API url if set +## quixstreams.dataframe.windows -**Returns**: + -portal API URL or None +## quixstreams.dataframe.windows.definitions - + -#### QuixEnvironment.state\_dir +### FixedTimeWindowDefinition ```python -@property -def state_dir() -> str +class FixedTimeWindowDefinition(abc.ABC) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L56) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L20) -Return application state directory on Quix. + -**Returns**: +#### FixedTimeWindowDefinition.sum -path to state dir +```python +def sum() -> "FixedTimeWindow" +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L67) -## quixstreams.platforms.quix.checks - - - -#### check\_state\_management\_enabled +Configure the window to aggregate data by summing up values within -```python -def check_state_management_enabled() -``` +each window period. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/checks.py#L11) +**Returns**: -Check if State Management feature is enabled for the current deployment on -Quix platform. -If it's disabled, the exception will be raised. +an instance of `FixedTimeWindow` configured to perform sum aggregation. - + -#### check\_state\_dir +#### FixedTimeWindowDefinition.count ```python -def check_state_dir(state_dir: str) +def count() -> "FixedTimeWindow" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/checks.py#L28) - -Check if Application "state_dir" matches the state dir on Quix platform. - -If it doesn't match, the warning will be logged. - -**Arguments**: - -- `state_dir`: application state_dir path +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L94) - +Configure the window to aggregate data by counting the number of values -## quixstreams.platforms.quix +within each window period. - +**Returns**: -## quixstreams.platforms.quix.api +an instance of `FixedTimeWindow` configured to perform record count. - + -### QuixPortalApiService +#### FixedTimeWindowDefinition.mean ```python -class QuixPortalApiService() +def mean() -> "FixedTimeWindow" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/api.py#L19) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L121) -A light wrapper around the Quix Portal Api. If used in the Quix Platform, it will -use that workspaces auth token and portal endpoint, else you must provide it. +Configure the window to aggregate data by calculating the mean of the values -Function names closely reflect the respective API endpoint, -each starting with the method [GET, POST, etc.] followed by the endpoint path. +within each window period. -Results will be returned in the form of request's Response.json(), unless something -else is required. Non-200's will raise exceptions. +**Returns**: -See the swagger documentation for more info about the endpoints. +an instance of `FixedTimeWindow` configured to calculate the mean +of the values. - + -#### QuixPortalApiService.get\_workspace\_certificate +#### FixedTimeWindowDefinition.reduce ```python -def get_workspace_certificate(workspace_id: Optional[str] = None, - timeout: float = 30) -> Optional[bytes] +def reduce(reducer: Callable[[Any, Any], Any], + initializer: Callable[[Any], Any]) -> "FixedTimeWindow" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/api.py#L119) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L152) -Get a workspace TLS certificate if available. +Configure the window to perform a custom aggregation using `reducer` -Returns `None` if certificate is not specified. +and `initializer` functions. -**Arguments**: +Example Snippet: +```python +sdf = StreamingDataFrame(...) -- `workspace_id`: workspace id, optional -- `timeout`: request timeout; Default 30 +# Using "reduce()" to calculate multiple aggregates at once +def reducer(agg: dict, current: int): + aggregated = { + 'min': min(agg['min'], current), + 'max': max(agg['max'], current) + 'count': agg['count'] + 1 + } + return aggregated -**Returns**: +def initializer(current) -> dict: + return {'min': current, 'max': current, 'count': 1} -certificate as bytes if present, or None +window = ( + sdf.tumbling_window(duration_ms=1000) + .reduce(reducer=reducer, initializer=initializer) + .final() +) +``` - +**Arguments**: -## quixstreams.platforms.quix.exceptions +- `reducer`: A function that takes two arguments +(the accumulated value and a new value) and returns a single value. +The returned value will be saved to the state store and sent downstream. +- `initializer`: A function to call for every first element of the window. +This function is used to initialize the aggregation within a window. - +**Returns**: -## quixstreams.platforms.quix.topic\_manager +A window configured to perform custom reduce aggregation on the data. - + -### QuixTopicManager +#### FixedTimeWindowDefinition.max ```python -class QuixTopicManager(TopicManager) +def max() -> "FixedTimeWindow" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/topic_manager.py#L9) - -The source of all topic management with quixstreams. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L212) -This is specifically for Applications using the Quix platform. +Configure a window to aggregate the maximum value within each window period. -Generally initialized and managed automatically by an `Application.Quix`, -but allows a user to work with it directly when needed, such as using it alongside -a plain `Producer` to create its topics. +**Returns**: -See methods for details. +an instance of `FixedTimeWindow` configured to calculate the maximum +value within each window period. - + -#### QuixTopicManager.\_\_init\_\_ +#### FixedTimeWindowDefinition.min ```python -def __init__(topic_admin: TopicAdmin, - consumer_group: str, - quix_config_builder: QuixKafkaConfigsBuilder, - timeout: float = 30, - create_timeout: float = 60) +def min() -> "FixedTimeWindow" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/topic_manager.py#L30) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L241) -**Arguments**: +Configure a window to aggregate the minimum value within each window period. -- `topic_admin`: an `Admin` instance -- `quix_config_builder`: A QuixKafkaConfigsBuilder instance, else one is -generated for you. -- `timeout`: response timeout (seconds) -- `create_timeout`: timeout for topic creation +**Returns**: - +an instance of `FixedTimeWindow` configured to calculate the maximum +value within each window period. -## quixstreams.dataframe.dataframe + - +## quixstreams.dataframe.windows.base -### StreamingDataFrame + + +#### get\_window\_ranges ```python -class StreamingDataFrame(BaseStreaming) +def get_window_ranges(timestamp_ms: int, + duration_ms: int, + step_ms: Optional[int] = None) -> List[Tuple[int, int]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L62) - -`StreamingDataFrame` is the main object you will use for ETL work. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/base.py#L18) -Typically created with an `app = quixstreams.app.Application()` instance, -via `sdf = app.dataframe()`. +Get a list of window ranges for the given timestamp. +**Arguments**: -What it Does: +- `timestamp_ms`: timestamp in milliseconds +- `duration_ms`: window duration in milliseconds +- `step_ms`: window step in milliseconds for hopping windows, optional. -- Builds a data processing pipeline, declaratively (not executed immediately) - - Executes this pipeline on inputs at runtime (Kafka message values) -- Provides functions/interface similar to Pandas Dataframes/Series -- Enables stateful processing (and manages everything related to it) +**Returns**: +a list of (, ) tuples -How to Use: + -Define various operations while continuously reassigning to itself (or new fields). +## quixstreams.dataframe.windows.time\_based -These operations will generally transform your data, access/update state, or produce -to kafka topics. + -We recommend your data structure to be "columnar" (aka a dict/JSON) in nature so -that it works with the entire interface, but simple types like `ints`, `str`, etc. -are also supported. +### FixedTimeWindow -See the various methods and classes for more specifics, or for a deep dive into -usage, see `streamingdataframe.md` under the `docs/` folder. +```python +class FixedTimeWindow() +``` ->***NOTE:*** column referencing like `sdf["a_column"]` and various methods often - create other object types (typically `quixstreams.dataframe.StreamingSeries`), - which is expected; type hinting should alert you to any issues should you - attempt invalid operations with said objects (however, we cannot infer whether - an operation is valid with respect to your data!). +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/time_based.py#L32) + -Example Snippet: +#### FixedTimeWindow.final ```python -sdf = StreamingDataframe() -sdf = sdf.apply(a_func) -sdf = sdf.filter(another_func) -sdf = sdf.to_topic(topic_obj) +def final() -> "StreamingDataFrame" ``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/time_based.py#L107) -#### StreamingDataFrame.apply +Apply the window aggregation and return results only when the windows are +closed. +The format of returned windows: ```python -def apply(func: Union[ - ApplyCallback, - ApplyCallbackStateful, - ApplyWithMetadataCallback, - ApplyWithMetadataCallbackStateful, -], - *, - stateful: bool = False, - expand: bool = False, - metadata: bool = False) -> Self +{ + "start": , + "end": , + "value: , +} ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L177) - -Apply a function to transform the value and return a new value. +The individual window is closed when the event time +(the maximum observed timestamp across the partition) passes +its end timestamp + grace period. +The closed windows cannot receive updates anymore and are considered final. -The result will be passed downstream as an input value. +>***NOTE:*** Windows can be closed only within the same message key. +If some message keys appear irregularly in the stream, the latest windows +can remain unprocessed until the message the same key is received. + -Example Snippet: +#### FixedTimeWindow.current ```python -# This stores a string in state and capitalizes every column with a string value. -# A second apply then keeps only the string value columns (shows non-stateful). -def func(d: dict, state: State): - value = d["store_field"] - if value != state.get("my_store_key"): - state.set("my_store_key") = value - return {k: v.upper() if isinstance(v, str) else v for k, v in d.items()} +def current() -> "StreamingDataFrame" +``` -sdf = StreamingDataframe() -sdf = sdf.apply(func, stateful=True) -sdf = sdf.apply(lambda d: {k: v for k,v in d.items() if isinstance(v, str)}) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/time_based.py#L145) + +Apply the window transformation to the StreamingDataFrame to return results +for each updated window. +The format of returned windows: +```python +{ + "start": , + "end": , + "value: , +} ``` -**Arguments**: +This method processes streaming data and returns results as they come, +regardless of whether the window is closed or not. -- `func`: a function to apply -- `stateful`: if `True`, the function will be provided with a second argument -of type `State` to perform stateful operations. -- `expand`: if True, expand the returned iterable into individual values -downstream. If returned value is not iterable, `TypeError` will be raised. -Default - `False`. -- `metadata`: if True, the callback will receive key, timestamp and headers -along with the value. -Default - `False`. + - +## quixstreams.dataframe -#### StreamingDataFrame.update + -```python -def update(func: Union[ - UpdateCallback, - UpdateCallbackStateful, - UpdateWithMetadataCallback, - UpdateWithMetadataCallbackStateful, -], - *, - stateful: bool = False, - metadata: bool = False) -> Self -``` +## quixstreams.dataframe.exceptions -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L266) + -Apply a function to mutate value in-place or to perform a side effect +## quixstreams.dataframe.base -(e.g., printing a value to the console). + -The result of the function will be ignored, and the original value will be -passed downstream. +## quixstreams.dataframe.series + -Example Snippet: +### StreamingSeries ```python -# Stores a value and mutates a list by appending a new item to it. -# Also prints to console. +class StreamingSeries(BaseStreaming) +``` -def func(values: list, state: State): - value = values[0] - if value != state.get("my_store_key"): - state.set("my_store_key") = value - values.append("new_item") +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L47) -sdf = StreamingDataframe() -sdf = sdf.update(func, stateful=True) -sdf = sdf.update(lambda value: print("Received value: ", value)) -``` +`StreamingSeries` are typically generated by `StreamingDataframes` when getting +elements from, or performing certain operations on, a `StreamingDataframe`, +thus acting as a representation of "column" value. -**Arguments**: +They share some operations with the `StreamingDataframe`, but also provide some +additional functionality. -- `func`: function to update value -- `stateful`: if `True`, the function will be provided with a second argument -of type `State` to perform stateful operations. -- `metadata`: if True, the callback will receive key, timestamp and headers -along with the value. -Default - `False`. +Most column value operations are handled by this class, and `StreamingSeries` can +generate other `StreamingSeries` as a result of said operations. - -#### StreamingDataFrame.filter +What it Does: -```python -def filter(func: Union[ - FilterCallback, - FilterCallbackStateful, - FilterWithMetadataCallback, - FilterWithMetadataCallbackStateful, -], - *, - stateful: bool = False, - metadata: bool = False) -> Self -``` +- Allows ways to do simple operations with dataframe "column"/dictionary values: + - Basic ops like add, subtract, modulo, etc. +- Enables comparisons/inequalities: + - Greater than, equals, etc. + - and/or, is/not operations +- Can check for existence of columns in `StreamingDataFrames` +- Enables chaining of various operations together -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L354) -Filter value using provided function. +How to Use: + +For the most part, you may not even notice this class exists! +They will naturally be created as a result of typical `StreamingDataFrame` use. + +Auto-complete should help you with valid methods and type-checking should alert +you to invalid operations between `StreamingSeries`. + +In general, any typical Pands dataframe operation between columns should be valid +with `StreamingSeries`, and you shouldn't have to think about them explicitly. -If the function returns True-like value, the original value will be -passed downstream. Example Snippet: ```python -# Stores a value and allows further processing only if the value is greater than -# what was previously stored. - -def func(d: dict, state: State): - value = d["my_value"] - if value > state.get("my_store_key"): - state.set("my_store_key") = value - return True - return False +# Random methods for example purposes. More detailed explanations found under +# various methods or in the docs folder. sdf = StreamingDataframe() -sdf = sdf.filter(func, stateful=True) +sdf = sdf["column_a"].apply(a_func).apply(diff_func, stateful=True) +sdf["my_new_bool_field"] = sdf["column_b"].contains("this_string") +sdf["new_sum_field"] = sdf["column_c"] + sdf["column_d"] + 2 +sdf = sdf[["column_a"] & (sdf["new_sum_field"] >= 10)] +``` + + + +#### StreamingSeries.from\_apply\_callback + +```python +@classmethod +def from_apply_callback(cls, func: ApplyWithMetadataCallback) -> Self ``` +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L107) + +Create a StreamingSeries from a function. + +The provided function will be wrapped into `Apply` + **Arguments**: -- `func`: function to filter value -- `stateful`: if `True`, the function will be provided with second argument -of type `State` to perform stateful operations. -- `metadata`: if True, the callback will receive key, timestamp and headers -along with the value. -Default - `False`. +- `func`: a function to apply - +**Returns**: -#### StreamingDataFrame.group\_by +instance of `StreamingSeries` -```python -def group_by(key: Union[str, Callable[[Any], Any]], - name: Optional[str] = None, - value_deserializer: Optional[DeserializerType] = "json", - key_deserializer: Optional[DeserializerType] = "json", - value_serializer: Optional[SerializerType] = "json", - key_serializer: Optional[SerializerType] = "json") -> Self -``` + -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L440) +#### StreamingSeries.apply -"Groups" messages by re-keying them via the provided group_by operation +```python +def apply(func: ApplyCallback) -> Self +``` -on their message values. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L121) -This enables things like aggregations on messages with non-matching keys. +Add a callable to the execution list for this series. -You can provide a column name (uses the column's value) or a custom function -to generate this new key. +The provided callable should accept a single argument, which will be its input. +The provided callable should similarly return one output, or None -`.groupby()` can only be performed once per `StreamingDataFrame` instance. +They can be chained together or included with other operations. ->**NOTE:** group_by generates a topic that copies the original topic's settings. Example Snippet: ```python -# We have customer purchase events where the message key is the "store_id", -# but we want to calculate sales per customer (by "customer_account_id"). +# The `StreamingSeries` are generated when `sdf["COLUMN_NAME"]` is called. +# This stores a string in state and capitalizes the column value; the result is +# assigned to a new column. +# Another apply converts a str column to an int, assigning it to a new column. -def func(d: dict, state: State): - current_total = state.get("customer_sum", 0) - new_total = current_total + d["customer_spent"] - state.set("customer_sum", new_total) - d["customer_total"] = new_total - return d +def func(value: str, state: State): + if value != state.get("my_store_key"): + state.set("my_store_key") = value + return v.upper() sdf = StreamingDataframe() -sdf = sdf.group_by("customer_account_id") -sdf = sdf.apply(func, stateful=True) +sdf["new_col"] = sdf["a_column"]["nested_dict_key"].apply(func, stateful=True) +sdf["new_col_2"] = sdf["str_col"].apply(lambda v: int(v)) + sdf["str_col2"] + 2 ``` **Arguments**: -- `key`: how the new key should be generated from the message value; -requires a column name (string) or a callable that takes the message value. -- `name`: a name for the op (must be unique per group-by), required if `key` -is a custom callable. -- `value_deserializer`: a deserializer type for values; default - JSON -- `key_deserializer`: a deserializer type for keys; default - JSON -- `value_serializer`: a serializer type for values; default - JSON -- `key_serializer`: a serializer type for keys; default - JSON +- `func`: a callable with one argument and one output **Returns**: -a clone with this operation added (assign to keep its effect). +a new `StreamingSeries` with the new callable added - + -#### StreamingDataFrame.contains +#### StreamingSeries.compose\_returning ```python -@staticmethod -def contains(key: str) -> StreamingSeries +def compose_returning() -> ReturningExecutor ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L518) - -Check if the key is present in the Row value. - -Example Snippet: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L155) -```python -# Add new column 'has_column' which contains a boolean indicating -# the presence of 'column_x' +Compose a list of functions from this StreamingSeries and its parents into one -sdf = StreamingDataframe() -sdf['has_column'] = sdf.contains('column_x') -``` +big closure that always returns the transformed record. -**Arguments**: +This closure is to be used to execute the functions in the stream and to get +the result of the transformations. -- `key`: a column name to check. +Stream may only contain simple "apply" functions to be able to compose itself +into a returning function. **Returns**: -a Column object that evaluates to True if the key is present -or False otherwise. +a callable accepting value, key and timestamp and +returning a tuple "(value, key, timestamp) - + -#### StreamingDataFrame.to\_topic +#### StreamingSeries.compose ```python -def to_topic(topic: Topic, key: Optional[Callable[[Any], Any]] = None) -> Self +def compose( + sink: Optional[Callable[[Any, Any, int, Any], + None]] = None) -> VoidExecutor ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L543) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L170) + +Compose all functions of this StreamingSeries into one big closure. + +Generally not required by users; the `quixstreams.app.Application` class will +do this automatically. -Produce current value to a topic. You can optionally specify a new key. Example Snippet: ```python from quixstreams import Application -# Produce to two different topics, changing the key for one of them. +app = Application(...) -app = Application() -input_topic = app.topic("input_x") -output_topic_0 = app.topic("output_a") -output_topic_1 = app.topic("output_b") +sdf = app.dataframe() +sdf = sdf["column_a"].apply(apply_func) +sdf = sdf["column_b"].contains(filter_func) +sdf = sdf.compose() -sdf = app.dataframe(input_topic) -sdf = sdf.to_topic(output_topic_0) -sdf = sdf.to_topic(output_topic_1, key=lambda data: data["a_field"]) +result_0 = sdf({"my": "record"}) +result_1 = sdf({"other": "record"}) ``` **Arguments**: -- `topic`: instance of `Topic` -- `key`: a callable to generate a new message key, optional. -If passed, the return type of this callable must be serializable -by `key_serializer` defined for this Topic object. -By default, the current message key will be used. - - - -#### StreamingDataFrame.set\_timestamp +- `sink`: callable to accumulate the results of the execution. -```python -def set_timestamp(func: Callable[[Any, Any, int, Any], int]) -> Self -``` +**Raises**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L584) +- `ValueError`: if disallowed functions are present in the tree of +underlying `Stream`. -Set a new timestamp based on the current message value and its metadata. +**Returns**: -The new timestamp will be used in windowed aggregations and when producing -messages to the output topics. +a callable accepting value, key and timestamp and +returning None -The new timestamp must be in milliseconds to conform Kafka requirements. + -Example Snippet: +#### StreamingSeries.test ```python -from quixstreams import Application +def test(value: Any, + key: Any, + timestamp: int, + headers: Optional[Any] = None, + ctx: Optional[MessageContext] = None) -> Any +``` +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L214) -app = Application() -input_topic = app.topic("data") +A shorthand to test `StreamingSeries` with provided value -sdf = app.dataframe(input_topic) -# Updating the record's timestamp based on the value -sdf = sdf.set_timestamp(lambda value, key, timestamp, headers: value['new_timestamp']) -``` +and `MessageContext`. **Arguments**: -- `func`: callable accepting the current value, key, timestamp, and headers. -It's expected to return a new timestamp as integer in milliseconds. +- `value`: value to pass through `StreamingSeries` +- `ctx`: instance of `MessageContext`, optional. +Provide it if the StreamingSeries instance has +functions calling `get_current_key()`. +Default - `None`. **Returns**: -a new StreamingDataFrame instance +result of `StreamingSeries` - + -#### StreamingDataFrame.set\_headers +#### StreamingSeries.isin ```python -def set_headers( - func: Callable[ - [Any, Any, int, List[Tuple[str, HeaderValue]]], - Collection[Tuple[str, HeaderValue]], - ] -) -> Self +def isin(other: Container) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L625) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L269) -Set new message headers based on the current message value and metadata. +Check if series value is in "other". -The new headers will be used when producing messages to the output topics. +Same as "StreamingSeries in other". + +Runtime result will be a `bool`. -The provided callback must accept value, key, timestamp, and headers, -and return a new collection of (header, value) tuples. Example Snippet: ```python from quixstreams import Application +# Check if "str_column" is contained in a column with a list of strings and +# assign the resulting `bool` to a new column: "has_my_str". -app = Application() -input_topic = app.topic("data") - -sdf = app.dataframe(input_topic) -# Updating the record's headers based on the value and metadata -sdf = sdf.set_headers(lambda value, key, timestamp, headers: [('id', value['id'])]) +sdf = app.dataframe() +sdf["has_my_str"] = sdf["str_column"].isin(sdf["column_with_list_of_strs"]) ``` **Arguments**: -- `func`: callable accepting the current value, key, timestamp, and headers. -It's expected to return a new set of headers -as a collection of (header, value) tuples. +- `other`: a container to check **Returns**: -a new StreamingDataFrame instance +new StreamingSeries - + -#### StreamingDataFrame.compose +#### StreamingSeries.contains ```python -def compose( - sink: Optional[Callable[[Any, Any, int, Any], None]] = None -) -> Dict[str, VoidExecutor] +def contains(other: Union[Self, object]) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L676) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L296) -Compose all functions of this StreamingDataFrame into one big closure. +Check if series value contains "other" -Closures are more performant than calling all the functions in the -`StreamingDataFrame` one-by-one. +Same as "other in StreamingSeries". -Generally not required by users; the `quixstreams.app.Application` class will -do this automatically. +Runtime result will be a `bool`. Example Snippet: ```python from quixstreams import Application -sdf = app.dataframe() -sdf = sdf.apply(apply_func) -sdf = sdf.filter(filter_func) -sdf = sdf.compose() -result_0 = sdf({"my": "record"}) -result_1 = sdf({"other": "record"}) +# Check if "column_a" contains "my_substring" and assign the resulting +# `bool` to a new column: "has_my_substr" + +sdf = app.dataframe() +sdf["has_my_substr"] = sdf["column_a"].contains("my_substring") ``` **Arguments**: -- `sink`: callable to accumulate the results of the execution, optional. +- `other`: object to check **Returns**: -a function that accepts "value" -and returns a result of StreamingDataFrame +new StreamingSeries - + -#### StreamingDataFrame.test +#### StreamingSeries.is\_ ```python -def test(value: Any, - key: Any, - timestamp: int, - headers: Optional[Any] = None, - ctx: Optional[MessageContext] = None, - topic: Optional[Topic] = None) -> List[Any] +def is_(other: Union[Self, object]) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L713) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L321) -A shorthand to test `StreamingDataFrame` with provided value +Check if series value refers to the same object as `other` -and `MessageContext`. +Runtime result will be a `bool`. -**Arguments**: -- `value`: value to pass through `StreamingDataFrame` -- `key`: key to pass through `StreamingDataFrame` -- `timestamp`: timestamp to pass through `StreamingDataFrame` -- `ctx`: instance of `MessageContext`, optional. -Provide it if the StreamingDataFrame instance calls `to_topic()`, -has stateful functions or windows. -Default - `None`. -- `topic`: optionally, a topic branch to test with +Example Snippet: -**Returns**: +```python +# Check if "column_a" is the same as "column_b" and assign the resulting `bool` +# to a new column: "is_same" -result of `StreamingDataFrame` +from quixstreams import Application +sdf = app.dataframe() +sdf["is_same"] = sdf["column_a"].is_(sdf["column_b"]) +``` - +**Arguments**: -#### StreamingDataFrame.tumbling\_window +- `other`: object to check for "is" -```python -def tumbling_window(duration_ms: Union[int, timedelta], - grace_ms: Union[int, timedelta] = 0, - name: Optional[str] = None) -> TumblingWindowDefinition -``` +**Returns**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L750) +new StreamingSeries -Create a tumbling window transformation on this StreamingDataFrame. + -Tumbling windows divide time into fixed-sized, non-overlapping windows. +#### StreamingSeries.isnot -They allow performing stateful aggregations like `sum`, `reduce`, etc. -on top of the data and emit results downstream. +```python +def isnot(other: Union[Self, object]) -> Self +``` -Notes: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L344) -- The timestamp of the aggregation result is set to the window start timestamp. -- Every window is grouped by the current Kafka message key. -- Messages with `None` key will be ignored. -- The time windows always use the current event time. +Check if series value does not refer to the same object as `other` +Runtime result will be a `bool`. Example Snippet: ```python -app = Application() -sdf = app.dataframe(...) - -sdf = ( - # Define a tumbling window of 60s and grace period of 10s - sdf.tumbling_window( - duration_ms=timedelta(seconds=60), grace_ms=timedelta(seconds=10.0) - ) +from quixstreams import Application - # Specify the aggregation function - .sum() +# Check if "column_a" is the same as "column_b" and assign the resulting `bool` +# to a new column: "is_not_same" - # Specify how the results should be emitted downstream. - # "all()" will emit results as they come for each updated window, - # possibly producing multiple messages per key-window pair - # "final()" will emit windows only when they are closed and cannot - # receive any updates anymore. - .all() -) +sdf = app.dataframe() +sdf["is_not_same"] = sdf["column_a"].isnot(sdf["column_b"]) ``` **Arguments**: -- `duration_ms`: The length of each window. -Can be specified as either an `int` representing milliseconds or a -`timedelta` object. ->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond -value. -- `grace_ms`: The grace period for data arrival. -It allows late-arriving data (data arriving after the window -has theoretically closed) to be included in the window. -Can be specified as either an `int` representing milliseconds -or as a `timedelta` object. ->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond -value. -- `name`: The unique identifier for the window. If not provided, it will be -automatically generated based on the window's properties. +- `other`: object to check for "is_not" **Returns**: -`TumblingWindowDefinition` instance representing the tumbling window -configuration. -This object can be further configured with aggregation functions -like `sum`, `count`, etc. applied to the StreamingDataFrame. +new StreamingSeries - + -#### StreamingDataFrame.hopping\_window +#### StreamingSeries.isnull ```python -def hopping_window(duration_ms: Union[int, timedelta], - step_ms: Union[int, timedelta], - grace_ms: Union[int, timedelta] = 0, - name: Optional[str] = None) -> HoppingWindowDefinition +def isnull() -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L826) - -Create a hopping window transformation on this StreamingDataFrame. - -Hopping windows divide the data stream into overlapping windows based on time. -The overlap is controlled by the `step_ms` parameter. - -They allow performing stateful aggregations like `sum`, `reduce`, etc. -on top of the data and emit results downstream. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L368) -Notes: +Check if series value is None. -- The timestamp of the aggregation result is set to the window start timestamp. -- Every window is grouped by the current Kafka message key. -- Messages with `None` key will be ignored. -- The time windows always use the current event time. +Runtime result will be a `bool`. Example Snippet: ```python -app = Application() -sdf = app.dataframe(...) - -sdf = ( - # Define a hopping window of 60s with step 30s and grace period of 10s - sdf.hopping_window( - duration_ms=timedelta(seconds=60), - step_ms=timedelta(seconds=30), - grace_ms=timedelta(seconds=10) - ) +from quixstreams import Application - # Specify the aggregation function - .sum() +# Check if "column_a" is null and assign the resulting `bool` to a new column: +# "is_null" - # Specify how the results should be emitted downstream. - # "all()" will emit results as they come for each updated window, - # possibly producing multiple messages per key-window pair - # "final()" will emit windows only when they are closed and cannot - # receive any updates anymore. - .all() -) +sdf = app.dataframe() +sdf["is_null"] = sdf["column_a"].isnull() ``` -**Arguments**: +**Returns**: -- `duration_ms`: The length of each window. It defines the time span for -which each window aggregates data. -Can be specified as either an `int` representing milliseconds -or a `timedelta` object. ->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond -value. -- `step_ms`: The step size for the window. -It determines how much each successive window moves forward in time. -Can be specified as either an `int` representing milliseconds -or a `timedelta` object. ->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond -value. -- `grace_ms`: The grace period for data arrival. -It allows late-arriving data to be included in the window, -even if it arrives after the window has theoretically moved forward. -Can be specified as either an `int` representing milliseconds -or a `timedelta` object. ->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond -value. -- `name`: The unique identifier for the window. If not provided, it will be -automatically generated based on the window's properties. +new StreamingSeries + + + +#### StreamingSeries.notnull + +```python +def notnull() -> Self +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L391) + +Check if series value is not None. + +Runtime result will be a `bool`. + + +Example Snippet: + +```python +from quixstreams import Application + +# Check if "column_a" is not null and assign the resulting `bool` to a new column: +# "is_not_null" + +sdf = app.dataframe() +sdf["is_not_null"] = sdf["column_a"].notnull() +``` **Returns**: -`HoppingWindowDefinition` instance representing the hopping -window configuration. -This object can be further configured with aggregation functions -like `sum`, `count`, etc. and applied to the StreamingDataFrame. +new StreamingSeries - + -## quixstreams.dataframe.series +#### StreamingSeries.abs + +```python +def abs() -> Self +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L414) + +Get absolute value of the series value. + +Example Snippet: + +```python +from quixstreams import Application + +# Get absolute value of "int_col" and add it to "other_int_col". +# Finally, assign the result to a new column: "abs_col_sum". + +sdf = app.dataframe() +sdf["abs_col_sum"] = sdf["int_col"].abs() + sdf["other_int_col"] +``` + +**Returns**: + +new StreamingSeries + + + +## quixstreams.dataframe.dataframe + + + +### StreamingDataFrame + +```python +class StreamingDataFrame(BaseStreaming) +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L62) + +`StreamingDataFrame` is the main object you will use for ETL work. + +Typically created with an `app = quixstreams.app.Application()` instance, +via `sdf = app.dataframe()`. + + +What it Does: + +- Builds a data processing pipeline, declaratively (not executed immediately) + - Executes this pipeline on inputs at runtime (Kafka message values) +- Provides functions/interface similar to Pandas Dataframes/Series +- Enables stateful processing (and manages everything related to it) + + +How to Use: + +Define various operations while continuously reassigning to itself (or new fields). + +These operations will generally transform your data, access/update state, or produce +to kafka topics. + +We recommend your data structure to be "columnar" (aka a dict/JSON) in nature so +that it works with the entire interface, but simple types like `ints`, `str`, etc. +are also supported. + +See the various methods and classes for more specifics, or for a deep dive into +usage, see `streamingdataframe.md` under the `docs/` folder. + +>***NOTE:*** column referencing like `sdf["a_column"]` and various methods often + create other object types (typically `quixstreams.dataframe.StreamingSeries`), + which is expected; type hinting should alert you to any issues should you + attempt invalid operations with said objects (however, we cannot infer whether + an operation is valid with respect to your data!). + + +Example Snippet: + +```python +sdf = StreamingDataframe() +sdf = sdf.apply(a_func) +sdf = sdf.filter(another_func) +sdf = sdf.to_topic(topic_obj) +``` + + + +#### StreamingDataFrame.apply + +```python +def apply(func: Union[ + ApplyCallback, + ApplyCallbackStateful, + ApplyWithMetadataCallback, + ApplyWithMetadataCallbackStateful, +], + *, + stateful: bool = False, + expand: bool = False, + metadata: bool = False) -> Self +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L177) + +Apply a function to transform the value and return a new value. + +The result will be passed downstream as an input value. + + +Example Snippet: + +```python +# This stores a string in state and capitalizes every column with a string value. +# A second apply then keeps only the string value columns (shows non-stateful). +def func(d: dict, state: State): + value = d["store_field"] + if value != state.get("my_store_key"): + state.set("my_store_key") = value + return {k: v.upper() if isinstance(v, str) else v for k, v in d.items()} + +sdf = StreamingDataframe() +sdf = sdf.apply(func, stateful=True) +sdf = sdf.apply(lambda d: {k: v for k,v in d.items() if isinstance(v, str)}) + +``` + +**Arguments**: + +- `func`: a function to apply +- `stateful`: if `True`, the function will be provided with a second argument +of type `State` to perform stateful operations. +- `expand`: if True, expand the returned iterable into individual values +downstream. If returned value is not iterable, `TypeError` will be raised. +Default - `False`. +- `metadata`: if True, the callback will receive key, timestamp and headers +along with the value. +Default - `False`. + + + +#### StreamingDataFrame.update + +```python +def update(func: Union[ + UpdateCallback, + UpdateCallbackStateful, + UpdateWithMetadataCallback, + UpdateWithMetadataCallbackStateful, +], + *, + stateful: bool = False, + metadata: bool = False) -> Self +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L266) + +Apply a function to mutate value in-place or to perform a side effect + +(e.g., printing a value to the console). + +The result of the function will be ignored, and the original value will be +passed downstream. + + +Example Snippet: + +```python +# Stores a value and mutates a list by appending a new item to it. +# Also prints to console. + +def func(values: list, state: State): + value = values[0] + if value != state.get("my_store_key"): + state.set("my_store_key") = value + values.append("new_item") + +sdf = StreamingDataframe() +sdf = sdf.update(func, stateful=True) +sdf = sdf.update(lambda value: print("Received value: ", value)) +``` + +**Arguments**: + +- `func`: function to update value +- `stateful`: if `True`, the function will be provided with a second argument +of type `State` to perform stateful operations. +- `metadata`: if True, the callback will receive key, timestamp and headers +along with the value. +Default - `False`. + + + +#### StreamingDataFrame.filter + +```python +def filter(func: Union[ + FilterCallback, + FilterCallbackStateful, + FilterWithMetadataCallback, + FilterWithMetadataCallbackStateful, +], + *, + stateful: bool = False, + metadata: bool = False) -> Self +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L354) + +Filter value using provided function. + +If the function returns True-like value, the original value will be +passed downstream. + +Example Snippet: + +```python +# Stores a value and allows further processing only if the value is greater than +# what was previously stored. + +def func(d: dict, state: State): + value = d["my_value"] + if value > state.get("my_store_key"): + state.set("my_store_key") = value + return True + return False + +sdf = StreamingDataframe() +sdf = sdf.filter(func, stateful=True) +``` + +**Arguments**: + +- `func`: function to filter value +- `stateful`: if `True`, the function will be provided with second argument +of type `State` to perform stateful operations. +- `metadata`: if True, the callback will receive key, timestamp and headers +along with the value. +Default - `False`. + + + +#### StreamingDataFrame.group\_by + +```python +def group_by(key: Union[str, Callable[[Any], Any]], + name: Optional[str] = None, + value_deserializer: Optional[DeserializerType] = "json", + key_deserializer: Optional[DeserializerType] = "json", + value_serializer: Optional[SerializerType] = "json", + key_serializer: Optional[SerializerType] = "json") -> Self +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L440) + +"Groups" messages by re-keying them via the provided group_by operation + +on their message values. + +This enables things like aggregations on messages with non-matching keys. + +You can provide a column name (uses the column's value) or a custom function +to generate this new key. + +`.groupby()` can only be performed once per `StreamingDataFrame` instance. + +>**NOTE:** group_by generates a topic that copies the original topic's settings. + +Example Snippet: + +```python +# We have customer purchase events where the message key is the "store_id", +# but we want to calculate sales per customer (by "customer_account_id"). + +def func(d: dict, state: State): + current_total = state.get("customer_sum", 0) + new_total = current_total + d["customer_spent"] + state.set("customer_sum", new_total) + d["customer_total"] = new_total + return d + +sdf = StreamingDataframe() +sdf = sdf.group_by("customer_account_id") +sdf = sdf.apply(func, stateful=True) +``` + +**Arguments**: + +- `key`: how the new key should be generated from the message value; +requires a column name (string) or a callable that takes the message value. +- `name`: a name for the op (must be unique per group-by), required if `key` +is a custom callable. +- `value_deserializer`: a deserializer type for values; default - JSON +- `key_deserializer`: a deserializer type for keys; default - JSON +- `value_serializer`: a serializer type for values; default - JSON +- `key_serializer`: a serializer type for keys; default - JSON + +**Returns**: + +a clone with this operation added (assign to keep its effect). - + -### StreamingSeries +#### StreamingDataFrame.contains ```python -class StreamingSeries(BaseStreaming) +@staticmethod +def contains(key: str) -> StreamingSeries ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L47) - -`StreamingSeries` are typically generated by `StreamingDataframes` when getting -elements from, or performing certain operations on, a `StreamingDataframe`, -thus acting as a representation of "column" value. - -They share some operations with the `StreamingDataframe`, but also provide some -additional functionality. - -Most column value operations are handled by this class, and `StreamingSeries` can -generate other `StreamingSeries` as a result of said operations. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L518) +Check if the key is present in the Row value. -What it Does: +Example Snippet: -- Allows ways to do simple operations with dataframe "column"/dictionary values: - - Basic ops like add, subtract, modulo, etc. -- Enables comparisons/inequalities: - - Greater than, equals, etc. - - and/or, is/not operations -- Can check for existence of columns in `StreamingDataFrames` -- Enables chaining of various operations together +```python +# Add new column 'has_column' which contains a boolean indicating +# the presence of 'column_x' +sdf = StreamingDataframe() +sdf['has_column'] = sdf.contains('column_x') +``` -How to Use: +**Arguments**: -For the most part, you may not even notice this class exists! -They will naturally be created as a result of typical `StreamingDataFrame` use. +- `key`: a column name to check. -Auto-complete should help you with valid methods and type-checking should alert -you to invalid operations between `StreamingSeries`. +**Returns**: -In general, any typical Pands dataframe operation between columns should be valid -with `StreamingSeries`, and you shouldn't have to think about them explicitly. +a Column object that evaluates to True if the key is present +or False otherwise. + -Example Snippet: +#### StreamingDataFrame.to\_topic ```python -# Random methods for example purposes. More detailed explanations found under -# various methods or in the docs folder. - -sdf = StreamingDataframe() -sdf = sdf["column_a"].apply(a_func).apply(diff_func, stateful=True) -sdf["my_new_bool_field"] = sdf["column_b"].contains("this_string") -sdf["new_sum_field"] = sdf["column_c"] + sdf["column_d"] + 2 -sdf = sdf[["column_a"] & (sdf["new_sum_field"] >= 10)] +def to_topic(topic: Topic, key: Optional[Callable[[Any], Any]] = None) -> Self ``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L543) -#### StreamingSeries.from\_apply\_callback +Produce current value to a topic. You can optionally specify a new key. + +Example Snippet: ```python -@classmethod -def from_apply_callback(cls, func: ApplyWithMetadataCallback) -> Self -``` +from quixstreams import Application -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L107) +# Produce to two different topics, changing the key for one of them. -Create a StreamingSeries from a function. +app = Application() +input_topic = app.topic("input_x") +output_topic_0 = app.topic("output_a") +output_topic_1 = app.topic("output_b") -The provided function will be wrapped into `Apply` +sdf = app.dataframe(input_topic) +sdf = sdf.to_topic(output_topic_0) +sdf = sdf.to_topic(output_topic_1, key=lambda data: data["a_field"]) +``` **Arguments**: -- `func`: a function to apply - -**Returns**: - -instance of `StreamingSeries` +- `topic`: instance of `Topic` +- `key`: a callable to generate a new message key, optional. +If passed, the return type of this callable must be serializable +by `key_serializer` defined for this Topic object. +By default, the current message key will be used. - + -#### StreamingSeries.apply +#### StreamingDataFrame.set\_timestamp ```python -def apply(func: ApplyCallback) -> Self +def set_timestamp(func: Callable[[Any, Any, int, Any], int]) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L121) - -Add a callable to the execution list for this series. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L584) -The provided callable should accept a single argument, which will be its input. -The provided callable should similarly return one output, or None +Set a new timestamp based on the current message value and its metadata. -They can be chained together or included with other operations. +The new timestamp will be used in windowed aggregations and when producing +messages to the output topics. +The new timestamp must be in milliseconds to conform Kafka requirements. Example Snippet: ```python -# The `StreamingSeries` are generated when `sdf["COLUMN_NAME"]` is called. -# This stores a string in state and capitalizes the column value; the result is -# assigned to a new column. -# Another apply converts a str column to an int, assigning it to a new column. +from quixstreams import Application -def func(value: str, state: State): - if value != state.get("my_store_key"): - state.set("my_store_key") = value - return v.upper() -sdf = StreamingDataframe() -sdf["new_col"] = sdf["a_column"]["nested_dict_key"].apply(func, stateful=True) -sdf["new_col_2"] = sdf["str_col"].apply(lambda v: int(v)) + sdf["str_col2"] + 2 +app = Application() +input_topic = app.topic("data") + +sdf = app.dataframe(input_topic) +# Updating the record's timestamp based on the value +sdf = sdf.set_timestamp(lambda value, key, timestamp, headers: value['new_timestamp']) ``` **Arguments**: -- `func`: a callable with one argument and one output +- `func`: callable accepting the current value, key, timestamp, and headers. +It's expected to return a new timestamp as integer in milliseconds. **Returns**: -a new `StreamingSeries` with the new callable added +a new StreamingDataFrame instance - + -#### StreamingSeries.compose\_returning +#### StreamingDataFrame.set\_headers ```python -def compose_returning() -> ReturningExecutor +def set_headers( + func: Callable[ + [Any, Any, int, List[Tuple[str, HeaderValue]]], + Collection[Tuple[str, HeaderValue]], + ] +) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L155) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L625) -Compose a list of functions from this StreamingSeries and its parents into one +Set new message headers based on the current message value and metadata. -big closure that always returns the transformed record. +The new headers will be used when producing messages to the output topics. -This closure is to be used to execute the functions in the stream and to get -the result of the transformations. +The provided callback must accept value, key, timestamp, and headers, +and return a new collection of (header, value) tuples. -Stream may only contain simple "apply" functions to be able to compose itself -into a returning function. +Example Snippet: + +```python +from quixstreams import Application + + +app = Application() +input_topic = app.topic("data") + +sdf = app.dataframe(input_topic) +# Updating the record's headers based on the value and metadata +sdf = sdf.set_headers(lambda value, key, timestamp, headers: [('id', value['id'])]) +``` + +**Arguments**: + +- `func`: callable accepting the current value, key, timestamp, and headers. +It's expected to return a new set of headers +as a collection of (header, value) tuples. **Returns**: -a callable accepting value, key and timestamp and -returning a tuple "(value, key, timestamp) +a new StreamingDataFrame instance - + -#### StreamingSeries.compose +#### StreamingDataFrame.compose ```python def compose( - sink: Optional[Callable[[Any, Any, int, Any], - None]] = None) -> VoidExecutor + sink: Optional[Callable[[Any, Any, int, Any], None]] = None +) -> Dict[str, VoidExecutor] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L170) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L676) -Compose all functions of this StreamingSeries into one big closure. +Compose all functions of this StreamingDataFrame into one big closure. + +Closures are more performant than calling all the functions in the +`StreamingDataFrame` one-by-one. Generally not required by users; the `quixstreams.app.Application` class will do this automatically. @@ -1452,12 +1677,9 @@ Example Snippet: ```python from quixstreams import Application - -app = Application(...) - sdf = app.dataframe() -sdf = sdf["column_a"].apply(apply_func) -sdf = sdf["column_b"].contains(filter_func) +sdf = sdf.apply(apply_func) +sdf = sdf.filter(filter_func) sdf = sdf.compose() result_0 = sdf({"my": "record"}) @@ -1466,2577 +1688,2760 @@ result_1 = sdf({"other": "record"}) **Arguments**: -- `sink`: callable to accumulate the results of the execution. +- `sink`: callable to accumulate the results of the execution, optional. -**Raises**: +**Returns**: -- `ValueError`: if disallowed functions are present in the tree of -underlying `Stream`. +a function that accepts "value" +and returns a result of StreamingDataFrame + + + +#### StreamingDataFrame.test + +```python +def test(value: Any, + key: Any, + timestamp: int, + headers: Optional[Any] = None, + ctx: Optional[MessageContext] = None, + topic: Optional[Topic] = None) -> List[Any] +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L713) + +A shorthand to test `StreamingDataFrame` with provided value + +and `MessageContext`. + +**Arguments**: + +- `value`: value to pass through `StreamingDataFrame` +- `key`: key to pass through `StreamingDataFrame` +- `timestamp`: timestamp to pass through `StreamingDataFrame` +- `ctx`: instance of `MessageContext`, optional. +Provide it if the StreamingDataFrame instance calls `to_topic()`, +has stateful functions or windows. +Default - `None`. +- `topic`: optionally, a topic branch to test with **Returns**: -a callable accepting value, key and timestamp and -returning None +result of `StreamingDataFrame` - + -#### StreamingSeries.test +#### StreamingDataFrame.tumbling\_window + +```python +def tumbling_window(duration_ms: Union[int, timedelta], + grace_ms: Union[int, timedelta] = 0, + name: Optional[str] = None) -> TumblingWindowDefinition +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L750) + +Create a tumbling window transformation on this StreamingDataFrame. + +Tumbling windows divide time into fixed-sized, non-overlapping windows. + +They allow performing stateful aggregations like `sum`, `reduce`, etc. +on top of the data and emit results downstream. + +Notes: + +- The timestamp of the aggregation result is set to the window start timestamp. +- Every window is grouped by the current Kafka message key. +- Messages with `None` key will be ignored. +- The time windows always use the current event time. + + + +Example Snippet: ```python -def test(value: Any, - key: Any, - timestamp: int, - headers: Optional[Any] = None, - ctx: Optional[MessageContext] = None) -> Any -``` +app = Application() +sdf = app.dataframe(...) -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L214) +sdf = ( + # Define a tumbling window of 60s and grace period of 10s + sdf.tumbling_window( + duration_ms=timedelta(seconds=60), grace_ms=timedelta(seconds=10.0) + ) -A shorthand to test `StreamingSeries` with provided value + # Specify the aggregation function + .sum() -and `MessageContext`. + # Specify how the results should be emitted downstream. + # "all()" will emit results as they come for each updated window, + # possibly producing multiple messages per key-window pair + # "final()" will emit windows only when they are closed and cannot + # receive any updates anymore. + .all() +) +``` **Arguments**: -- `value`: value to pass through `StreamingSeries` -- `ctx`: instance of `MessageContext`, optional. -Provide it if the StreamingSeries instance has -functions calling `get_current_key()`. -Default - `None`. +- `duration_ms`: The length of each window. +Can be specified as either an `int` representing milliseconds or a +`timedelta` object. +>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond +value. +- `grace_ms`: The grace period for data arrival. +It allows late-arriving data (data arriving after the window +has theoretically closed) to be included in the window. +Can be specified as either an `int` representing milliseconds +or as a `timedelta` object. +>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond +value. +- `name`: The unique identifier for the window. If not provided, it will be +automatically generated based on the window's properties. **Returns**: -result of `StreamingSeries` +`TumblingWindowDefinition` instance representing the tumbling window +configuration. +This object can be further configured with aggregation functions +like `sum`, `count`, etc. applied to the StreamingDataFrame. - + -#### StreamingSeries.isin +#### StreamingDataFrame.hopping\_window ```python -def isin(other: Container) -> Self +def hopping_window(duration_ms: Union[int, timedelta], + step_ms: Union[int, timedelta], + grace_ms: Union[int, timedelta] = 0, + name: Optional[str] = None) -> HoppingWindowDefinition ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L269) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L826) -Check if series value is in "other". +Create a hopping window transformation on this StreamingDataFrame. -Same as "StreamingSeries in other". +Hopping windows divide the data stream into overlapping windows based on time. +The overlap is controlled by the `step_ms` parameter. -Runtime result will be a `bool`. +They allow performing stateful aggregations like `sum`, `reduce`, etc. +on top of the data and emit results downstream. + +Notes: + +- The timestamp of the aggregation result is set to the window start timestamp. +- Every window is grouped by the current Kafka message key. +- Messages with `None` key will be ignored. +- The time windows always use the current event time. Example Snippet: ```python -from quixstreams import Application +app = Application() +sdf = app.dataframe(...) -# Check if "str_column" is contained in a column with a list of strings and -# assign the resulting `bool` to a new column: "has_my_str". +sdf = ( + # Define a hopping window of 60s with step 30s and grace period of 10s + sdf.hopping_window( + duration_ms=timedelta(seconds=60), + step_ms=timedelta(seconds=30), + grace_ms=timedelta(seconds=10) + ) -sdf = app.dataframe() -sdf["has_my_str"] = sdf["str_column"].isin(sdf["column_with_list_of_strs"]) + # Specify the aggregation function + .sum() + + # Specify how the results should be emitted downstream. + # "all()" will emit results as they come for each updated window, + # possibly producing multiple messages per key-window pair + # "final()" will emit windows only when they are closed and cannot + # receive any updates anymore. + .all() +) ``` **Arguments**: -- `other`: a container to check +- `duration_ms`: The length of each window. It defines the time span for +which each window aggregates data. +Can be specified as either an `int` representing milliseconds +or a `timedelta` object. +>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond +value. +- `step_ms`: The step size for the window. +It determines how much each successive window moves forward in time. +Can be specified as either an `int` representing milliseconds +or a `timedelta` object. +>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond +value. +- `grace_ms`: The grace period for data arrival. +It allows late-arriving data to be included in the window, +even if it arrives after the window has theoretically moved forward. +Can be specified as either an `int` representing milliseconds +or a `timedelta` object. +>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond +value. +- `name`: The unique identifier for the window. If not provided, it will be +automatically generated based on the window's properties. **Returns**: -new StreamingSeries - - +`HoppingWindowDefinition` instance representing the hopping +window configuration. +This object can be further configured with aggregation functions +like `sum`, `count`, etc. and applied to the StreamingDataFrame. -#### StreamingSeries.contains + -```python -def contains(other: Union[Self, object]) -> Self -``` +## quixstreams.error\_callbacks -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L296) + -Check if series value contains "other" +## quixstreams.exceptions -Same as "other in StreamingSeries". + -Runtime result will be a `bool`. +## quixstreams.exceptions.base + -Example Snippet: +## quixstreams.exceptions.assignment -```python -from quixstreams import Application + -# Check if "column_a" contains "my_substring" and assign the resulting -# `bool` to a new column: "has_my_substr" +### PartitionAssignmentError -sdf = app.dataframe() -sdf["has_my_substr"] = sdf["column_a"].contains("my_substring") +```python +class PartitionAssignmentError(QuixException) ``` -**Arguments**: - -- `other`: object to check - -**Returns**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/exceptions/assignment.py#L6) -new StreamingSeries +Error happened during partition rebalancing. +Raised from `on_assign`, `on_revoke` and `on_lost` callbacks - + -#### StreamingSeries.is\_ +## quixstreams.kafka.exceptions -```python -def is_(other: Union[Self, object]) -> Self -``` + -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L321) +## quixstreams.kafka -Check if series value refers to the same object as `other` + -Runtime result will be a `bool`. +## quixstreams.kafka.configuration + -Example Snippet: +### ConnectionConfig ```python -# Check if "column_a" is the same as "column_b" and assign the resulting `bool` -# to a new column: "is_same" - -from quixstreams import Application -sdf = app.dataframe() -sdf["is_same"] = sdf["column_a"].is_(sdf["column_b"]) +class ConnectionConfig(BaseSettings) ``` -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/configuration.py#L17) -- `other`: object to check for "is" +Provides an interface for all librdkafka connection-based configs. -**Returns**: +Allows converting to or from a librdkafka dictionary. -new StreamingSeries +Also obscures secrets and handles any case sensitivity issues. - + -#### StreamingSeries.isnot +#### ConnectionConfig.settings\_customise\_sources ```python -def isnot(other: Union[Self, object]) -> Self +@classmethod +def settings_customise_sources( + cls, settings_cls: Type[BaseSettings], + init_settings: PydanticBaseSettingsSource, + env_settings: PydanticBaseSettingsSource, + dotenv_settings: PydanticBaseSettingsSource, + file_secret_settings: PydanticBaseSettingsSource +) -> Tuple[PydanticBaseSettingsSource, ...] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L344) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/configuration.py#L96) -Check if series value does not refer to the same object as `other` - -Runtime result will be a `bool`. +Included to ignore reading/setting values from the environment + -Example Snippet: +#### ConnectionConfig.from\_librdkafka\_dict ```python -from quixstreams import Application +@classmethod +def from_librdkafka_dict(cls, + config: dict, + ignore_extras: bool = False) -> Self +``` -# Check if "column_a" is the same as "column_b" and assign the resulting `bool` -# to a new column: "is_not_same" +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/configuration.py#L110) -sdf = app.dataframe() -sdf["is_not_same"] = sdf["column_a"].isnot(sdf["column_b"]) -``` +Create a `ConnectionConfig` from a librdkafka config dictionary. **Arguments**: -- `other`: object to check for "is_not" +- `config`: a dict of configs (like {"bootstrap.servers": "url"}) +- `ignore_extras`: Ignore non-connection settings (else raise exception) **Returns**: -new StreamingSeries +a ConnectionConfig - + -#### StreamingSeries.isnull +#### ConnectionConfig.as\_librdkafka\_dict ```python -def isnull() -> Self +def as_librdkafka_dict(plaintext_secrets=True) -> dict ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L368) - -Check if series value is None. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/configuration.py#L125) -Runtime result will be a `bool`. +Dump any non-empty config values as a librdkafka dictionary. +>***NOTE***: All secret values will be dumped in PLAINTEXT by default. -Example Snippet: +**Arguments**: -```python -from quixstreams import Application +- `plaintext_secrets`: whether secret values are plaintext or obscured (***) -# Check if "column_a" is null and assign the resulting `bool` to a new column: -# "is_null" +**Returns**: -sdf = app.dataframe() -sdf["is_null"] = sdf["column_a"].isnull() -``` +a librdkafka-compatible dictionary -**Returns**: + -new StreamingSeries +## quixstreams.kafka.producer - + -#### StreamingSeries.notnull +### Producer ```python -def notnull() -> Self +class Producer() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L391) - -Check if series value is not None. - -Runtime result will be a `bool`. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L44) + -Example Snippet: +#### Producer.\_\_init\_\_ ```python -from quixstreams import Application +def __init__(broker_address: Union[str, ConnectionConfig], + logger: logging.Logger = logger, + error_callback: Callable[[KafkaError], None] = _default_error_cb, + extra_config: Optional[dict] = None, + flush_timeout: Optional[int] = None) +``` -# Check if "column_a" is not null and assign the resulting `bool` to a new column: -# "is_not_null" +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L45) -sdf = app.dataframe() -sdf["is_not_null"] = sdf["column_a"].notnull() -``` +A wrapper around `confluent_kafka.Producer`. -**Returns**: +It initializes `confluent_kafka.Producer` on demand +avoiding network calls during `__init__`, provides typing info for methods +and some reasonable defaults. -new StreamingSeries +**Arguments**: - +- `broker_address`: Connection settings for Kafka. +Accepts string with Kafka broker host and port formatted as `:`, +or a ConnectionConfig object if authentication is required. +- `logger`: a Logger instance to attach librdkafka logging to +- `error_callback`: callback used for producer errors +- `extra_config`: A dictionary with additional options that +will be passed to `confluent_kafka.Producer` as is. +Note: values passed as arguments override values in `extra_config`. +- `flush_timeout`: The time the producer is waiting for all messages to be delivered. -#### StreamingSeries.abs + + +#### Producer.produce ```python -def abs() -> Self +def produce(topic: str, + value: Optional[Union[str, bytes]] = None, + key: Optional[Union[str, bytes]] = None, + headers: Optional[Headers] = None, + partition: Optional[int] = None, + timestamp: Optional[int] = None, + poll_timeout: float = 5.0, + buffer_error_max_tries: int = 3, + on_delivery: Optional[DeliveryCallback] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L414) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L83) -Get absolute value of the series value. +Produce a message to a topic. -Example Snippet: +It also polls Kafka for callbacks before producing to minimize +the probability of `BufferError`. +If `BufferError` still happens, the method will poll Kafka with timeout +to free up the buffer and try again. -```python -from quixstreams import Application +**Arguments**: -# Get absolute value of "int_col" and add it to "other_int_col". -# Finally, assign the result to a new column: "abs_col_sum". +- `topic`: topic name +- `value`: message value +- `key`: message key +- `headers`: message headers +- `partition`: topic partition +- `timestamp`: message timestamp +- `poll_timeout`: timeout for `poll()` call in case of `BufferError` +- `buffer_error_max_tries`: max retries for `BufferError`. +Pass `0` to not retry after `BufferError`. +- `on_delivery`: the delivery callback to be triggered on `poll()` +for the produced message. -sdf = app.dataframe() -sdf["abs_col_sum"] = sdf["int_col"].abs() + sdf["other_int_col"] -``` + -**Returns**: +#### Producer.poll -new StreamingSeries +```python +def poll(timeout: float = 0) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L144) -## quixstreams.dataframe +Polls the producer for events and calls `on_delivery` callbacks. - +**Arguments**: -## quixstreams.dataframe.utils +- `timeout`: poll timeout seconds; Default: 0 (unlike others) +> NOTE: -1 will hang indefinitely if there are no messages to acknowledge - + -#### ensure\_milliseconds +#### Producer.flush ```python -def ensure_milliseconds(delta: Union[int, timedelta]) -> int +def flush(timeout: Optional[float] = None) -> int ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/utils.py#L5) - -Convert timedelta to milliseconds. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L152) -If the `delta` is not -This function will also round the value to the closest milliseconds in case of -higher precision. +Wait for all messages in the Producer queue to be delivered. **Arguments**: -- `delta`: `timedelta` object +- `timeout` (`float`): time to attempt flushing (seconds). +None use producer default or -1 is infinite. Default: None **Returns**: -timedelta value in milliseconds as `int` - - - -## quixstreams.dataframe.exceptions +number of messages remaining to flush - + -## quixstreams.dataframe.windows.definitions +## quixstreams.kafka.consumer - + -### FixedTimeWindowDefinition +### Consumer ```python -class FixedTimeWindowDefinition(abc.ABC) +class Consumer() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L20) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L64) - + -#### FixedTimeWindowDefinition.sum +#### Consumer.\_\_init\_\_ ```python -def sum() -> "FixedTimeWindow" +def __init__(broker_address: Union[str, ConnectionConfig], + consumer_group: Optional[str], + auto_offset_reset: AutoOffsetReset, + auto_commit_enable: bool = True, + logger: logging.Logger = logger, + error_callback: Callable[[KafkaError], None] = _default_error_cb, + on_commit: Optional[Callable[ + [Optional[KafkaError], List[TopicPartition]], None]] = None, + extra_config: Optional[dict] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L67) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L65) -Configure the window to aggregate data by summing up values within +A wrapper around `confluent_kafka.Consumer`. -each window period. +It initializes `confluent_kafka.Consumer` on demand +avoiding network calls during `__init__`, provides typing info for methods +and some reasonable defaults. -**Returns**: +**Arguments**: -an instance of `FixedTimeWindow` configured to perform sum aggregation. +- `broker_address`: Connection settings for Kafka. +Accepts string with Kafka broker host and port formatted as `:`, +or a ConnectionConfig object if authentication is required. +- `consumer_group`: Kafka consumer group. +Passed as `group.id` to `confluent_kafka.Consumer` +- `auto_offset_reset`: Consumer `auto.offset.reset` setting. +Available values: +- "earliest" - automatically reset the offset to the smallest offset +- "latest" - automatically reset the offset to the largest offset +- "error" - trigger an error (ERR__AUTO_OFFSET_RESET) which is retrieved + by consuming messages (used for testing) +- `auto_commit_enable`: If true, periodically commit offset of +the last message handed to the application. Default - `True`. +- `logger`: a Logger instance to attach librdkafka logging to +- `error_callback`: callback used for consumer errors +- `on_commit`: Offset commit result propagation callback. +Passed as "offset_commit_cb" to `confluent_kafka.Consumer`. +- `extra_config`: A dictionary with additional options that +will be passed to `confluent_kafka.Consumer` as is. +Note: values passed as arguments override values in `extra_config`. - + -#### FixedTimeWindowDefinition.count +#### Consumer.poll ```python -def count() -> "FixedTimeWindow" +def poll(timeout: Optional[float] = None) -> Optional[Message] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L94) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L128) -Configure the window to aggregate data by counting the number of values - -within each window period. - -**Returns**: - -an instance of `FixedTimeWindow` configured to perform record count. +Consumes a single message, calls callbacks and returns events. - +The application must check the returned :py:class:`Message` +object's :py:func:`Message.error()` method to distinguish between proper +messages (error() returns None), or an event or error. -#### FixedTimeWindowDefinition.mean +Note: Callbacks may be called from this method, such as +``on_assign``, ``on_revoke``, et al. -```python -def mean() -> "FixedTimeWindow" -``` +**Arguments**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L121) +- `timeout` (`float`): Maximum time in seconds to block waiting for message, +event or callback. None or -1 is infinite. Default: None. -Configure the window to aggregate data by calculating the mean of the values +**Raises**: -within each window period. +- `None`: RuntimeError if called on a closed consumer **Returns**: -an instance of `FixedTimeWindow` configured to calculate the mean -of the values. +A Message object or None on timeout - + -#### FixedTimeWindowDefinition.reduce +#### Consumer.subscribe ```python -def reduce(reducer: Callable[[Any, Any], Any], - initializer: Callable[[Any], Any]) -> "FixedTimeWindow" +def subscribe(topics: List[str], + on_assign: Optional[RebalancingCallback] = None, + on_revoke: Optional[RebalancingCallback] = None, + on_lost: Optional[RebalancingCallback] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L152) - -Configure the window to perform a custom aggregation using `reducer` - -and `initializer` functions. - -Example Snippet: -```python -sdf = StreamingDataFrame(...) - -# Using "reduce()" to calculate multiple aggregates at once -def reducer(agg: dict, current: int): - aggregated = { - 'min': min(agg['min'], current), - 'max': max(agg['max'], current) - 'count': agg['count'] + 1 - } - return aggregated +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L146) -def initializer(current) -> dict: - return {'min': current, 'max': current, 'count': 1} +Set subscription to supplied list of topics -window = ( - sdf.tumbling_window(duration_ms=1000) - .reduce(reducer=reducer, initializer=initializer) - .final() -) -``` +This replaces a previous subscription. **Arguments**: -- `reducer`: A function that takes two arguments -(the accumulated value and a new value) and returns a single value. -The returned value will be saved to the state store and sent downstream. -- `initializer`: A function to call for every first element of the window. -This function is used to initialize the aggregation within a window. +- `topics` (`list(str)`): List of topics (strings) to subscribe to. +- `on_assign` (`callable`): callback to provide handling of customized offsets +on completion of a successful partition re-assignment. +- `on_revoke` (`callable`): callback to provide handling of offset commits to +a customized store on the start of a rebalance operation. +- `on_lost` (`callable`): callback to provide handling in the case the partition +assignment has been lost. Partitions that have been lost may already be +owned by other members in the group and therefore committing offsets, +for example, may fail. + +**Raises**: -**Returns**: +- `KafkaException`: +- `None`: RuntimeError if called on a closed consumer +.. py:function:: on_assign(consumer, partitions) +.. py:function:: on_revoke(consumer, partitions) +.. py:function:: on_lost(consumer, partitions) -A window configured to perform custom reduce aggregation on the data. + :param Consumer consumer: Consumer instance. + :param list(TopicPartition) partitions: Absolute list of partitions being + assigned or revoked. - + -#### FixedTimeWindowDefinition.max +#### Consumer.unsubscribe ```python -def max() -> "FixedTimeWindow" +def unsubscribe() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L212) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L240) -Configure a window to aggregate the maximum value within each window period. +Remove current subscription. -**Returns**: +**Raises**: -an instance of `FixedTimeWindow` configured to calculate the maximum -value within each window period. +- `None`: KafkaException +- `None`: RuntimeError if called on a closed consumer - + -#### FixedTimeWindowDefinition.min +#### Consumer.store\_offsets ```python -def min() -> "FixedTimeWindow" +def store_offsets(message: Optional[Message] = None, + offsets: Optional[List[TopicPartition]] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L241) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L248) -Configure a window to aggregate the minimum value within each window period. +.. py:function:: store_offsets([message=None], [offsets=None]) -**Returns**: +Store offsets for a message or a list of offsets. -an instance of `FixedTimeWindow` configured to calculate the maximum -value within each window period. +``message`` and ``offsets`` are mutually exclusive. The stored offsets +will be committed according to 'auto.commit.interval.ms' or manual +offset-less `commit`. +Note that 'enable.auto.offset.store' must be set to False when using this API. - +**Arguments**: -## quixstreams.dataframe.windows +- `message` (`confluent_kafka.Message`): Store message's offset+1. +- `offsets` (`list(TopicPartition)`): List of topic+partitions+offsets to store. - +**Raises**: -## quixstreams.dataframe.windows.time\_based +- `None`: KafkaException +- `None`: RuntimeError if called on a closed consumer - + -### FixedTimeWindow +#### Consumer.commit ```python -class FixedTimeWindow() +def commit(message: Optional[Message] = None, + offsets: Optional[List[TopicPartition]] = None, + asynchronous: bool = True) -> Optional[List[TopicPartition]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/time_based.py#L32) - - - -#### FixedTimeWindow.final +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L282) -```python -def final() -> "StreamingDataFrame" -``` +Commit a message or a list of offsets. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/time_based.py#L107) +The ``message`` and ``offsets`` parameters are mutually exclusive. +If neither is set, the current partition assignment's offsets are used instead. +Use this method to commit offsets if you have 'enable.auto.commit' set to False. -Apply the window aggregation and return results only when the windows are -closed. +**Arguments**: -The format of returned windows: -```python -{ - "start": , - "end": , - "value: , -} -``` +- `message` (`confluent_kafka.Message`): Commit the message's offset+1. +Note: By convention, committed offsets reflect the next message +to be consumed, **not** the last message consumed. +- `offsets` (`list(TopicPartition)`): List of topic+partitions+offsets to commit. +- `asynchronous` (`bool`): If true, asynchronously commit, returning None +immediately. If False, the commit() call will block until the commit +succeeds or fails and the committed offsets will be returned (on success). +Note that specific partitions may have failed and the .err field of +each partition should be checked for success. -The individual window is closed when the event time -(the maximum observed timestamp across the partition) passes -its end timestamp + grace period. -The closed windows cannot receive updates anymore and are considered final. +**Raises**: ->***NOTE:*** Windows can be closed only within the same message key. -If some message keys appear irregularly in the stream, the latest windows -can remain unprocessed until the message the same key is received. +- `None`: KafkaException +- `None`: RuntimeError if called on a closed consumer - + -#### FixedTimeWindow.current +#### Consumer.committed ```python -def current() -> "StreamingDataFrame" +def committed(partitions: List[TopicPartition], + timeout: Optional[float] = None) -> List[TopicPartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/time_based.py#L145) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L322) -Apply the window transformation to the StreamingDataFrame to return results -for each updated window. +.. py:function:: committed(partitions, [timeout=None]) -The format of returned windows: -```python -{ - "start": , - "end": , - "value: , -} -``` +Retrieve committed offsets for the specified partitions. -This method processes streaming data and returns results as they come, -regardless of whether the window is closed or not. +**Arguments**: - +- `partitions` (`list(TopicPartition)`): List of topic+partitions to query for stored offsets. +- `timeout` (`float`): Request timeout (seconds). +None or -1 is infinite. Default: None -## quixstreams.dataframe.windows.base +**Raises**: - +- `None`: KafkaException +- `None`: RuntimeError if called on a closed consumer -#### get\_window\_ranges +**Returns**: + +`list(TopicPartition)`: List of topic+partitions with offset and possibly error set. + + + +#### Consumer.get\_watermark\_offsets ```python -def get_window_ranges(timestamp_ms: int, - duration_ms: int, - step_ms: Optional[int] = None) -> List[Tuple[int, int]] +def get_watermark_offsets(partition: TopicPartition, + timeout: Optional[float] = None, + cached: bool = False) -> Tuple[int, int] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/base.py#L18) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L342) -Get a list of window ranges for the given timestamp. +Retrieve low and high offsets for the specified partition. **Arguments**: -- `timestamp_ms`: timestamp in milliseconds -- `duration_ms`: window duration in milliseconds -- `step_ms`: window step in milliseconds for hopping windows, optional. - -**Returns**: - -a list of (, ) tuples +- `partition` (`TopicPartition`): Topic+partition to return offsets for. +- `timeout` (`float`): Request timeout (seconds). None or -1 is infinite. +Ignored if cached=True. Default: None +- `cached` (`bool`): Instead of querying the broker, use cached information. +Cached values: The low offset is updated periodically +(if statistics.interval.ms is set) while the high offset is updated on each +message fetched from the broker for this partition. - +**Raises**: -## quixstreams.dataframe.base +- `None`: KafkaException +- `None`: RuntimeError if called on a closed consumer - +**Returns**: -## quixstreams.rowproducer +`tuple(int,int)`: Tuple of (low,high) on success or None on timeout. +The high offset is the offset of the last message + 1. - + -### RowProducer +#### Consumer.list\_topics ```python -class RowProducer() +def list_topics(topic: Optional[str] = None, + timeout: Optional[float] = None) -> ClusterMetadata ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowproducer.py#L18) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L368) -A producer class that is capable of serializing Rows to bytes and send them to Kafka. +.. py:function:: list_topics([topic=None], [timeout=-1]) -The serialization is performed according to the Topic serialization settings. +Request metadata from the cluster. +This method provides the same information as +listTopics(), describeTopics() and describeCluster() in the Java Admin client. **Arguments**: -- `broker_address`: Connection settings for Kafka. -Accepts string with Kafka broker host and port formatted as `:`, -or a ConnectionConfig object if authentication is required. -- `extra_config`: A dictionary with additional options that -will be passed to `confluent_kafka.Producer` as is. -Note: values passed as arguments override values in `extra_config`. -- `on_error`: a callback triggered when `RowProducer.produce_row()` -or `RowProducer.poll()` fail`. -If producer fails and the callback returns `True`, the exception -will be logged but not propagated. -The default callback logs an exception and returns `False`. -- `flush_timeout`: The time the producer is waiting for all messages to be delivered. +- `topic` (`str`): If specified, only request information about this topic, +else return results for all topics in cluster. +Warning: If auto.create.topics.enable is set to true on the broker and +an unknown topic is specified, it will be created. +- `timeout` (`float`): The maximum response time before timing out +None or -1 is infinite. Default: None - +**Raises**: -#### RowProducer.produce\_row +- `None`: KafkaException + + + +#### Consumer.memberid ```python -def produce_row(row: Row, - topic: Topic, - key: Optional[Any] = _KEY_UNSET, - partition: Optional[int] = None, - timestamp: Optional[int] = None) +def memberid() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowproducer.py#L56) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L391) -Serialize Row to bytes according to the Topic serialization settings +Return this client's broker-assigned group member id. -and produce it to Kafka +The member id is assigned by the group coordinator and is propagated to +the consumer during rebalance. -If this method fails, it will trigger the provided "on_error" callback. + :returns: Member id string or None + :rtype: string + :raises: RuntimeError if called on a closed consumer -**Arguments**: -- `row`: Row object -- `topic`: Topic object -- `key`: message key, optional -- `partition`: partition number, optional -- `timestamp`: timestamp in milliseconds, optional + + +#### Consumer.offsets\_for\_times + +```python +def offsets_for_times(partitions: List[TopicPartition], + timeout: Optional[float] = None) -> List[TopicPartition] +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L404) + +Look up offsets by timestamp for the specified partitions. + +The returned offset for each partition is the earliest offset whose +timestamp is greater than or equal to the given timestamp in the +corresponding partition. If the provided timestamp exceeds that of the +last message in the partition, a value of -1 will be returned. + + :param list(TopicPartition) partitions: topic+partitions with timestamps + in the TopicPartition.offset field. + :param float timeout: The maximum response time before timing out. + None or -1 is infinite. Default: None + :returns: List of topic+partition with offset field set and possibly error set + :rtype: list(TopicPartition) + :raises: KafkaException + :raises: RuntimeError if called on a closed consumer - -#### RowProducer.poll + + +#### Consumer.pause ```python -def poll(timeout: float = None) +def pause(partitions: List[TopicPartition]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowproducer.py#L96) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L430) -Polls the producer for events and calls `on_delivery` callbacks. +Pause consumption for the provided list of partitions. -If `poll()` fails, it will trigger the provided "on_error" callback +Paused partitions must be tracked manually. + +Does NOT affect the result of Consumer.assignment(). **Arguments**: -- `timeout`: timeout in seconds +- `partitions` (`list(TopicPartition)`): List of topic+partitions to pause. - +**Raises**: -## quixstreams.core.stream.functions +- `None`: KafkaException - + -### StreamFunction +#### Consumer.resume ```python -class StreamFunction(abc.ABC) +def resume(partitions: List[TopicPartition]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L65) - -A base class for all the streaming operations in Quix Streams. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L444) -It provides a `get_executor` method to return a closure to be called with the input -values. +.. py:function:: resume(partitions) - +Resume consumption for the provided list of partitions. -#### StreamFunction.get\_executor +**Arguments**: -```python -@abc.abstractmethod -def get_executor(child_executor: VoidExecutor) -> VoidExecutor -``` +- `partitions` (`list(TopicPartition)`): List of topic+partitions to resume. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L79) +**Raises**: -Returns a wrapper to be called on a value, key and timestamp. +- `None`: KafkaException - + -### ApplyFunction +#### Consumer.position ```python -class ApplyFunction(StreamFunction) +def position(partitions: List[TopicPartition]) -> List[TopicPartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L85) - -Wrap a function into "Apply" function. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L456) -The provided callback is expected to return a new value based on input, -and its result will always be passed downstream. +Retrieve current positions (offsets) for the specified partitions. - +**Arguments**: -### ApplyWithMetadataFunction +- `partitions` (`list(TopicPartition)`): List of topic+partitions to return +current offsets for. The current offset is the offset of +the last consumed message + 1. -```python -class ApplyWithMetadataFunction(StreamFunction) -``` +**Raises**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L125) +- `None`: KafkaException +- `None`: RuntimeError if called on a closed consumer -Wrap a function into "Apply" function. +**Returns**: -The provided function is expected to accept value, and timestamp and return -a new value based on input, -and its result will always be passed downstream. +`list(TopicPartition)`: List of topic+partitions with offset and possibly error set. - + -### FilterFunction +#### Consumer.seek ```python -class FilterFunction(StreamFunction) +def seek(partition: TopicPartition) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L166) - -Wraps a function into a "Filter" function. -The result of a Filter function is interpreted as boolean. -If it's `True`, the input will be return downstream. -If it's `False`, the `Filtered` exception will be raised to signal that the -value is filtered out. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L470) - +Set consume position for partition to offset. -### FilterWithMetadataFunction +The offset may be an absolute (>=0) or a +logical offset (:py:const:`OFFSET_BEGINNING` et.al). -```python -class FilterWithMetadataFunction(StreamFunction) -``` +seek() may only be used to update the consume offset of an +actively consumed partition (i.e., after :py:const:`assign()`), +to set the starting offset of partition not being consumed instead +pass the offset in an `assign()` call. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L187) +**Arguments**: -Wraps a function into a "Filter" function. +- `partition` (`TopicPartition`): Topic+partition+offset to seek to. -The passed callback must accept value, key, and timestamp, and it's expected to -return a boolean-like result. +**Raises**: -If the result is `True`, the input will be passed downstream. -Otherwise, the value will be filtered out. +- `None`: KafkaException - + -### UpdateFunction +#### Consumer.assignment ```python -class UpdateFunction(StreamFunction) +def assignment() -> List[TopicPartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L210) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L487) -Wrap a function into an "Update" function. +Returns the current partition assignment. -The provided function must accept a value, and it's expected to mutate it -or to perform some side effect. +**Raises**: -The result of the callback is always ignored, and the original input is passed -downstream. +- `None`: KafkaException +- `None`: RuntimeError if called on a closed consumer - +**Returns**: -### UpdateWithMetadataFunction +`list(TopicPartition)`: List of assigned topic+partitions. -```python -class UpdateWithMetadataFunction(StreamFunction) -``` + -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L233) +#### Consumer.set\_sasl\_credentials -Wrap a function into an "Update" function. +```python +def set_sasl_credentials(username: str, password: str) +``` -The provided function must accept a value, a key, and a timestamp. -The callback is expected to mutate the value or to perform some side effect with it. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L500) -The result of the callback is always ignored, and the original input is passed -downstream. +Sets the SASL credentials used for this client. +These credentials will overwrite the old ones, and will be used the next +time the client needs to authenticate. +This method will not disconnect existing broker connections that have been +established with the old credentials. +This method is applicable only to SASL PLAIN and SCRAM mechanisms. - + -### TransformFunction +#### Consumer.incremental\_assign ```python -class TransformFunction(StreamFunction) +def incremental_assign(partitions: List[TopicPartition]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L256) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L512) -Wrap a function into a "Transform" function. +Assign new partitions. -The provided callback must accept a value, a key and a timestamp. -It's expected to return a new value, new key and new timestamp. +Can be called outside the `Consumer` `on_assign` callback (multiple times). +Partitions immediately show on `Consumer.assignment()`. -This function must be used with caution, because it can technically change the -key. -It's supposed to be used by the library internals and not be a part of the public -API. +Any additional partitions besides the ones passed during the `Consumer` +`on_assign` callback will NOT be associated with the consumer group. -The result of the callback will always be passed downstream. + - +#### Consumer.incremental\_unassign -## quixstreams.core.stream +```python +def incremental_unassign(partitions: List[TopicPartition]) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L524) -## quixstreams.core.stream.stream +Revoke partitions. - +Can be called outside an on_revoke callback. -### Stream + + +#### Consumer.close ```python -class Stream() +def close() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L34) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L532) - +Close down and terminate the Kafka Consumer. -#### Stream.\_\_init\_\_ +Actions performed: -```python -def __init__(func: Optional[StreamFunction] = None, - parent: Optional[Self] = None) -``` +- Stops consuming. +- Commits offsets, unless the consumer property 'enable.auto.commit' is set to False. +- Leaves the consumer group. + +Registered callbacks may be called from this method, +see `poll()` for more info. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L35) -A base class for all streaming operations. + -`Stream` is an abstraction of a function pipeline. -Each Stream has a function and a parent (None by default). -When adding new function to the stream, it creates a new `Stream` object and -sets "parent" to the previous `Stream` to maintain an order of execution. +## quixstreams.models.serializers -Streams supports four types of functions: + -- "Apply" - generate new values based on a previous one. - The result of an Apply function is passed downstream to the next functions. - If "expand=True" is passed and the function returns an `Iterable`, - each item of it will be treated as a separate value downstream. -- "Update" - update values in-place. - The result of an Update function is always ignored, and its input is passed - downstream. -- "Filter" - to filter values from the Stream. - The result of a Filter function is interpreted as boolean. - If it's `True`, the input will be passed downstream. - If it's `False`, the record will be filtered from the stream. -- "Transform" - to transform keys and timestamps along with the values. - "Transform" functions may change the keys and should be used with caution. - The result of the Transform function is passed downstream to the next - functions. - If "expand=True" is passed and the function returns an `Iterable`, - each item of it will be treated as a separate value downstream. +## quixstreams.models.serializers.json -To execute the functions on the `Stream`, call `.compose()` method, and -it will return a closure to execute all the functions accumulated in the Stream -and its parents. + -**Arguments**: +### JSONSerializer -- `func`: a function to be called on the stream. -It is expected to be wrapped into one of "Apply", "Filter", "Update" or -"Trasform" from `quixstreams.core.stream.functions` package. -Default - "ApplyFunction(lambda value: value)". -- `parent`: a parent `Stream` +```python +class JSONSerializer(Serializer) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/json.py#L13) -#### Stream.add\_filter + + +#### JSONSerializer.\_\_init\_\_ ```python -def add_filter(func: Union[FilterCallback, FilterWithMetadataCallback], - *, - metadata: bool = False) -> Self +def __init__(dumps: Callable[[Any], Union[str, bytes]] = default_dumps) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L97) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/json.py#L14) -Add a function to filter values from the Stream. - -The return value of the function will be interpreted as `bool`. -If the function returns `False`-like result, the Stream will raise `Filtered` -exception during execution. +Serializer that returns data in json format. **Arguments**: -- `func`: a function to filter values from the stream -- `metadata`: if True, the callback will receive key and timestamp along with -the value. -Default - `False`. +- `dumps`: a function to serialize objects to json. +Default - :py:func:`quixstreams.utils.json.dumps` -**Returns**: + -a new `Stream` derived from the current one +### JSONDeserializer - +```python +class JSONDeserializer(Deserializer) +``` -#### Stream.add\_apply +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/json.py#L35) + + + +#### JSONDeserializer.\_\_init\_\_ ```python -def add_apply(func: Union[ - ApplyCallback, - ApplyExpandedCallback, - ApplyWithMetadataCallback, - ApplyWithMetadataExpandedCallback, -], - *, - expand: bool = False, - metadata: bool = False) -> Self +def __init__(loads: Callable[[Union[bytes, bytearray]], Any] = default_loads) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L122) - -Add an "apply" function to the Stream. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/json.py#L36) -The function is supposed to return a new value, which will be passed -further during execution. +Deserializer that parses data from JSON **Arguments**: -- `func`: a function to generate a new value -- `expand`: if True, expand the returned iterable into individual values -downstream. If returned value is not iterable, `TypeError` will be raised. -Default - `False`. -- `metadata`: if True, the callback will receive key and timestamp along with -the value. -Default - `False`. +- `loads`: function to parse json from bytes. +Default - :py:func:`quixstreams.utils.json.loads`. -**Returns**: + -a new `Stream` derived from the current one +## quixstreams.models.serializers.simple\_types - + -#### Stream.add\_update +### BytesDeserializer ```python -def add_update(func: Union[UpdateCallback, UpdateWithMetadataCallback], - *, - metadata: bool = False) -> Self +class BytesDeserializer(Deserializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L155) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L44) -Add an "update" function to the Stream, that will mutate the input value. +A deserializer to bypass bytes without any changes -The return of this function will be ignored and its input -will be passed downstream. + -**Arguments**: +### BytesSerializer -- `func`: a function to mutate the value -- `metadata`: if True, the callback will receive key and timestamp along with -the value. -Default - `False`. +```python +class BytesSerializer(Serializer) +``` -**Returns**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L53) -a new Stream derived from the current one +A serializer to bypass bytes without any changes - + -#### Stream.add\_transform +### StringDeserializer ```python -def add_transform(func: Union[TransformCallback, TransformExpandedCallback], - *, - expand: bool = False) -> Self +class StringDeserializer(Deserializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L179) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L62) -Add a "transform" function to the Stream, that will mutate the input value. + -The callback must accept a value, a key, and a timestamp. -It's expected to return a new value, new key and new timestamp. +#### StringDeserializer.\_\_init\_\_ -The result of the callback which will be passed downstream -during execution. +```python +def __init__(codec: str = "utf_8") +``` -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L63) -- `func`: a function to mutate the value -- `expand`: if True, expand the returned iterable into individual items -downstream. If returned value is not iterable, `TypeError` will be raised. -Default - `False`. +Deserializes bytes to strings using the specified encoding. -**Returns**: +**Arguments**: -a new Stream derived from the current one +- `codec`: string encoding +A wrapper around `confluent_kafka.serialization.StringDeserializer`. - + -#### Stream.diff +### IntegerDeserializer ```python -def diff(other: "Stream") -> Self +class IntegerDeserializer(Deserializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L204) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L81) -Takes the difference between Streams `self` and `other` based on their last - -common parent, and returns a new `Stream` that includes only this difference. +Deserializes bytes to integers. -It's impossible to calculate a diff when: - - Streams don't have a common parent. - - When the `self` Stream already includes all the nodes from - the `other` Stream, and the resulting diff is empty. +A wrapper around `confluent_kafka.serialization.IntegerDeserializer`. -**Arguments**: + -- `other`: a `Stream` to take a diff from. +### DoubleDeserializer -**Raises**: +```python +class DoubleDeserializer(Deserializer) +``` -- `ValueError`: if Streams don't have a common parent -or if the diff is empty. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L99) -**Returns**: +Deserializes float to IEEE 764 binary64. -new `Stream` instance including all the Streams from the diff +A wrapper around `confluent_kafka.serialization.DoubleDeserializer`. - + -#### Stream.tree +### StringSerializer ```python -def tree() -> List[Self] +class StringSerializer(Serializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L233) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L117) -Return a list of all parent Streams including the node itself. + -The tree is ordered from child to parent (current node comes first). +#### StringSerializer.\_\_init\_\_ -**Returns**: +```python +def __init__(codec: str = "utf_8") +``` -a list of `Stream` objects +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L118) - +Serializes strings to bytes using the specified encoding. -#### Stream.compose\_returning +**Arguments**: -```python -def compose_returning() -> ReturningExecutor -``` +- `codec`: string encoding -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L247) + -Compose a list of functions from this `Stream` and its parents into one -big closure that always returns the transformed record. +### IntegerSerializer -This closure is to be used to execute the functions in the stream and to get -the result of the transformations. +```python +class IntegerSerializer(Serializer) +``` -Stream may only contain simple "apply" functions to be able to compose itself -into a returning function. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L130) - +Serializes integers to bytes -#### Stream.compose + + +### DoubleSerializer ```python -def compose( - allow_filters: bool = True, - allow_updates: bool = True, - allow_expands: bool = True, - allow_transforms: bool = True, - sink: Optional[Callable[[Any, Any, int, Any], - None]] = None) -> VoidExecutor +class DoubleSerializer(Serializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L284) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L143) -Compose a list of functions from this `Stream` and its parents into one +Serializes floats to bytes -big closure using a "composer" function. + -This "executor" closure is to be used to execute all functions in the stream for the given -key, value and timestamps. +## quixstreams.models.serializers.quix -By default, executor doesn't return the result of the execution. -To accumulate the results, pass the `sink` parameter. + -**Arguments**: +### QuixDeserializer -- `allow_filters`: If False, this function will fail with `ValueError` if -the stream has filter functions in the tree. Default - True. -- `allow_updates`: If False, this function will fail with `ValueError` if -the stream has update functions in the tree. Default - True. -- `allow_expands`: If False, this function will fail with `ValueError` if -the stream has functions with "expand=True" in the tree. Default - True. -- `allow_transforms`: If False, this function will fail with `ValueError` if -the stream has transform functions in the tree. Default - True. -- `sink`: callable to accumulate the results of the execution, optional. +```python +class QuixDeserializer(JSONDeserializer) +``` -**Raises**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L73) -- `ValueError`: if disallowed functions are present in the stream tree. +Handles Deserialization for any Quix-formatted topic. - +Parses JSON data from either `TimeseriesData` and `EventData` (ignores the rest). -## quixstreams.core + - +#### QuixDeserializer.\_\_init\_\_ -## quixstreams.processing\_context +```python +def __init__(loads: Callable[[Union[bytes, bytearray]], Any] = default_loads) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L80) -### ProcessingContext +**Arguments**: + +- `loads`: function to parse json from bytes. +Default - :py:func:`quixstreams.utils.json.loads`. + + + +#### QuixDeserializer.split\_values ```python -@dataclasses.dataclass -class ProcessingContext() +@property +def split_values() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/processing_context.py#L21) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L97) -A class to share processing-related objects -between `Application` and `StreamingDataFrame` instances. +Each Quix message might contain data for multiple Rows. +This property informs the downstream processors about that, so they can +expect an Iterable instead of Mapping. - + -#### ProcessingContext.store\_offset +#### QuixDeserializer.deserialize ```python -def store_offset(topic: str, partition: int, offset: int) +def deserialize(model_key: str, value: Union[List[Mapping], + Mapping]) -> Iterable[Mapping] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/processing_context.py#L41) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L150) -Store the offset of the processed message to the checkpoint. +Deserialization function for particular data types (Timeseries or EventData). **Arguments**: -- `topic`: topic name -- `partition`: partition number -- `offset`: message offset +- `model_key`: value of "__Q_ModelKey" message header +- `value`: deserialized JSON value of the message, list or dict - +**Returns**: -#### ProcessingContext.init\_checkpoint +Iterable of dicts + + + +### QuixSerializer ```python -def init_checkpoint() +class QuixSerializer(JSONSerializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/processing_context.py#L51) - -Initialize a new checkpoint +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L271) - + -#### ProcessingContext.commit\_checkpoint +#### QuixSerializer.\_\_init\_\_ ```python -def commit_checkpoint(force: bool = False) +def __init__(as_legacy: bool = True, + dumps: Callable[[Any], Union[str, bytes]] = default_dumps) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/processing_context.py#L62) - -Commit the current checkpoint. - -The actual commit will happen only when: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L275) -1. The checkpoint has at least one stored offset -2. The checkpoint is expired or `force=True` is passed +Serializer that returns data in json format. **Arguments**: -- `force`: if `True`, commit the checkpoint before its expiration deadline. +- `as_legacy`: parse as the legacy format; Default = True +- `dumps`: a function to serialize objects to json. +Default - :py:func:`quixstreams.utils.json.dumps` - + -## quixstreams.utils +### QuixTimeseriesSerializer - +```python +class QuixTimeseriesSerializer(QuixSerializer) +``` -## quixstreams.utils.dicts +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L318) - +Serialize data to JSON formatted according to Quix Timeseries format. -#### dict\_values +The serializable object must be dictionary, and each item must be of `str`, `int`, +`float`, `bytes` or `bytearray` type. +Otherwise, the `SerializationError` will be raised. +Input: ```python -def dict_values(d: object) -> List +{'a': 1, 'b': 1.1, 'c': "string", 'd': b'bytes', 'Tags': {'tag1': 'tag'}} ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/utils/dicts.py#L4) +Output: +```json +{ + "Timestamps": [123123123], + "NumericValues": {"a": [1], "b": [1.1]}, + "StringValues": {"c": ["string"]}, + "BinaryValues": {"d": ["Ynl0ZXM="]}, + "TagValues": {"tag1": ["tag"]} +} +``` -Recursively unpacks a set of nested dicts to get a flattened list of leaves, + -where "leaves" are the first non-dict item. +### QuixEventsSerializer -i.e {"a": {"b": {"c": 1}, "d": 2}, "e": 3} becomes [1, 2, 3] +```python +class QuixEventsSerializer(QuixSerializer) +``` -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L406) -- `d`: initially, a dict (with potentially nested dicts) +Serialize data to JSON formatted according to Quix EventData format. +The input value is expected to be a dictionary with the following keys: + - "Id" (type `str`, default - "") + - "Value" (type `str`, default - ""), + - "Tags" (type `dict`, default - {}) -**Returns**: +>***NOTE:*** All the other fields will be ignored. -a list with all the leaves of the various contained dicts +Input: +```python +{ + "Id": "an_event", + "Value": "any_string", + "Tags": {"tag1": "tag"}} +} +``` - +Output: +```json +{ + "Id": "an_event", + "Value": "any_string", + "Tags": {"tag1": "tag"}}, + "Timestamp":1692703362840389000 +} +``` -## quixstreams.utils.json + - +## quixstreams.models.serializers.base -#### dumps + + +### SerializationContext ```python -def dumps(value: Any) -> bytes +class SerializationContext() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/utils/json.py#L8) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L22) -Serialize to JSON using `orjson` package. - -**Arguments**: - -- `value`: value to serialize to JSON - -**Returns**: +Provides additional context for message serialization/deserialization. -bytes +Every `Serializer` and `Deserializer` receives an instance of `SerializationContext` - + -#### loads +#### SerializationContext.to\_confluent\_ctx ```python -def loads(value: bytes) -> Any +def to_confluent_ctx(field: MessageField) -> _SerializationContext ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/utils/json.py#L18) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L35) -Deserialize from JSON using `orjson` package. +Convert `SerializationContext` to `confluent_kafka.SerializationContext` -Main differences: -- It returns `bytes` -- It doesn't allow non-str keys in dictionaries +in order to re-use serialization already provided by `confluent_kafka` library. **Arguments**: -- `value`: value to deserialize from +- `field`: instance of `confluent_kafka.serialization.MessageField` **Returns**: -object +instance of `confluent_kafka.serialization.SerializationContext` - + -## quixstreams.types +### Deserializer - +```python +class Deserializer(abc.ABC) +``` -## quixstreams.models.timestamps +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L47) - + -### TimestampType +#### Deserializer.\_\_init\_\_ ```python -class TimestampType(enum.IntEnum) +def __init__(*args, **kwargs) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/timestamps.py#L8) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L48) - - -#### TIMESTAMP\_NOT\_AVAILABLE - -timestamps not supported by broker - - +A base class for all Deserializers -#### TIMESTAMP\_CREATE\_TIME + -message creation time (or source / producer time) +#### Deserializer.split\_values - +```python +@property +def split_values() -> bool +``` -#### TIMESTAMP\_LOG\_APPEND\_TIME +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L54) -broker receive time +Return True if the deserialized message should be considered as Iterable +and each item in it should be processed as a separate message. - + -### MessageTimestamp +### Serializer ```python -class MessageTimestamp() +class Serializer(abc.ABC) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/timestamps.py#L14) - -Represents a timestamp of incoming Kafka message. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L65) -It is made pseudo-immutable (i.e. public attributes don't have setters), and -it should not be mutated during message processing. +A base class for all Serializers - + -#### MessageTimestamp.create +#### Serializer.extra\_headers ```python -@classmethod -def create(cls, timestamp_type: int, milliseconds: int) -> Self +@property +def extra_headers() -> MessageHeadersMapping ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/timestamps.py#L41) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L71) -Create a Timestamp object based on data - -from `confluent_kafka.Message.timestamp()`. - -If timestamp type is "TIMESTAMP_NOT_AVAILABLE", the milliseconds are set to None +Informs producer to set additional headers -**Arguments**: +for the message it will be serializing -- `timestamp_type`: a timestamp type represented as a number -Can be one of: -- "0" - TIMESTAMP_NOT_AVAILABLE, timestamps not supported by broker. -- "1" - TIMESTAMP_CREATE_TIME, message creation time (or source / producer time). -- "2" - TIMESTAMP_LOG_APPEND_TIME, broker receive time. -- `milliseconds`: the number of milliseconds since the epoch (UTC). +Must return a dictionary with headers. +Keys must be strings, and values must be strings, bytes or None. **Returns**: -Timestamp object - - - -## quixstreams.models +dict with headers - + -## quixstreams.models.messagecontext +## quixstreams.models.serializers.exceptions - + -### MessageContext +### IgnoreMessage ```python -class MessageContext() +class IgnoreMessage(exceptions.QuixException) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/messagecontext.py#L4) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/exceptions.py#L46) -An object with Kafka message properties. - -It is made pseudo-immutable (i.e. public attributes don't have setters), and -it should not be mutated during message processing. +Raise this exception from Deserializer.__call__ in order to ignore the processing +of the particular message. - + -## quixstreams.models.types +## quixstreams.models.topics - + -### ConfluentKafkaMessageProto +## quixstreams.models.topics.exceptions -```python -class ConfluentKafkaMessageProto(Protocol) -``` + -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/types.py#L13) +## quixstreams.models.topics.manager -An interface of `confluent_kafka.Message`. + -Use it to not depend on exact implementation and simplify testing. +#### affirm\_ready\_for\_create -Instances of `confluent_kafka.Message` cannot be directly created from Python, -see https://github.com/confluentinc/confluent-kafka-python/issues/1535. +```python +def affirm_ready_for_create(topics: List[Topic]) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L20) -## quixstreams.models.serializers +Validate a list of topics is ready for creation attempt - +**Arguments**: -## quixstreams.models.serializers.exceptions +- `topics`: list of `Topic`s - + -### IgnoreMessage +### TopicManager ```python -class IgnoreMessage(exceptions.QuixException) +class TopicManager() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/exceptions.py#L46) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L30) -Raise this exception from Deserializer.__call__ in order to ignore the processing -of the particular message. +The source of all topic management with quixstreams. - +Generally initialized and managed automatically by an `Application`, +but allows a user to work with it directly when needed, such as using it alongside +a plain `Producer` to create its topics. -## quixstreams.models.serializers.quix +See methods for details. - + -### QuixDeserializer +#### TopicManager.\_\_init\_\_ ```python -class QuixDeserializer(JSONDeserializer) +def __init__(topic_admin: TopicAdmin, + consumer_group: str, + timeout: float = 30, + create_timeout: float = 60) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L73) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L53) -Handles Deserialization for any Quix-formatted topic. +**Arguments**: -Parses JSON data from either `TimeseriesData` and `EventData` (ignores the rest). +- `topic_admin`: an `Admin` instance (required for some functionality) +- `consumer_group`: the consumer group (of the `Application`) +- `timeout`: response timeout (seconds) +- `create_timeout`: timeout for topic creation - + -#### QuixDeserializer.\_\_init\_\_ +#### TopicManager.changelog\_topics ```python -def __init__(column_name: Optional[str] = None, - loads: Callable[[Union[bytes, bytearray]], Any] = default_loads) +@property +def changelog_topics() -> Dict[str, Dict[str, Topic]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L80) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L103) -**Arguments**: +Note: `Topic`s are the changelogs. -- `column_name`: if provided, the deserialized value will be wrapped into -dictionary with `column_name` as a key. -- `loads`: function to parse json from bytes. -Default - :py:func:`quixstreams.utils.json.loads`. +returns: the changelog topic dict, {topic_name: {suffix: Topic}} - + -#### QuixDeserializer.split\_values +#### TopicManager.all\_topics ```python @property -def split_values() -> bool +def all_topics() -> Dict[str, Topic] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L100) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L112) -Each Quix message might contain data for multiple Rows. -This property informs the downstream processors about that, so they can -expect an Iterable instead of Mapping. +Every registered topic name mapped to its respective `Topic`. - +returns: full topic dict, {topic_name: Topic} -#### QuixDeserializer.deserialize + + +#### TopicManager.topic\_config ```python -def deserialize(model_key: str, value: Union[List[Mapping], - Mapping]) -> Iterable[Mapping] +def topic_config(num_partitions: Optional[int] = None, + replication_factor: Optional[int] = None, + extra_config: Optional[dict] = None) -> TopicConfig ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L153) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L220) -Deserialization function for particular data types (Timeseries or EventData). +Convenience method for generating a `TopicConfig` with default settings **Arguments**: -- `model_key`: value of "__Q_ModelKey" message header -- `value`: deserialized JSON value of the message, list or dict +- `num_partitions`: the number of topic partitions +- `replication_factor`: the topic replication factor +- `extra_config`: other optional configuration settings **Returns**: -Iterable of dicts +a TopicConfig object - + -### QuixSerializer +#### TopicManager.topic ```python -class QuixSerializer(JSONSerializer) +def topic(name: str, + value_deserializer: Optional[DeserializerType] = None, + key_deserializer: Optional[DeserializerType] = "bytes", + value_serializer: Optional[SerializerType] = None, + key_serializer: Optional[SerializerType] = "bytes", + config: Optional[TopicConfig] = None, + timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L274) - - - -#### QuixSerializer.\_\_init\_\_ - -```python -def __init__(as_legacy: bool = True, - dumps: Callable[[Any], Union[str, bytes]] = default_dumps) -``` +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L241) -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L278) +A convenience method for generating a `Topic`. Will use default config options -Serializer that returns data in json format. +as dictated by the TopicManager. **Arguments**: -- `as_legacy`: parse as the legacy format; Default = True -- `dumps`: a function to serialize objects to json. -Default - :py:func:`quixstreams.utils.json.dumps` - - - -### QuixTimeseriesSerializer - -```python -class QuixTimeseriesSerializer(QuixSerializer) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L321) - -Serialize data to JSON formatted according to Quix Timeseries format. - -The serializable object must be dictionary, and each item must be of `str`, `int`, -`float`, `bytes` or `bytearray` type. -Otherwise, the `SerializationError` will be raised. +- `name`: topic name +- `value_deserializer`: a deserializer type for values +- `key_deserializer`: a deserializer type for keys +- `value_serializer`: a serializer type for values +- `key_serializer`: a serializer type for keys +- `config`: optional topic configurations (for creation/validation) +- `timestamp_extractor`: a callable that returns a timestamp in +milliseconds from a deserialized message. -Input: -```python -{'a': 1, 'b': 1.1, 'c': "string", 'd': b'bytes', 'Tags': {'tag1': 'tag'}} -``` +**Returns**: -Output: -```json -{ - "Timestamps": [123123123], - "NumericValues": {"a": [1], "b": [1.1]}, - "StringValues": {"c": ["string"]}, - "BinaryValues": {"d": ["Ynl0ZXM="]}, - "TagValues": {"tag1": ["tag"]} -} -``` +Topic object with creation configs - + -### QuixEventsSerializer +#### TopicManager.repartition\_topic ```python -class QuixEventsSerializer(QuixSerializer) +def repartition_topic(operation: str, + topic_name: str, + value_deserializer: Optional[DeserializerType] = "json", + key_deserializer: Optional[DeserializerType] = "json", + value_serializer: Optional[SerializerType] = "json", + key_serializer: Optional[SerializerType] = "json", + timeout: Optional[float] = None) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L409) - -Serialize data to JSON formatted according to Quix EventData format. -The input value is expected to be a dictionary with the following keys: - - "Id" (type `str`, default - "") - - "Value" (type `str`, default - ""), - - "Tags" (type `dict`, default - {}) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L286) ->***NOTE:*** All the other fields will be ignored. +Create an internal repartition topic. -Input: -```python -{ - "Id": "an_event", - "Value": "any_string", - "Tags": {"tag1": "tag"}} -} -``` +**Arguments**: -Output: -```json -{ - "Id": "an_event", - "Value": "any_string", - "Tags": {"tag1": "tag"}}, - "Timestamp":1692703362840389000 -} -``` +- `operation`: name of the GroupBy operation (column name or user-defined). +- `topic_name`: name of the topic the GroupBy is sourced from. +- `value_deserializer`: a deserializer type for values; default - JSON +- `key_deserializer`: a deserializer type for keys; default - JSON +- `value_serializer`: a serializer type for values; default - JSON +- `key_serializer`: a serializer type for keys; default - JSON +- `timeout`: config lookup timeout (seconds); Default 30 - +**Returns**: -## quixstreams.models.serializers.simple\_types +`Topic` object (which is also stored on the TopicManager) - + -### BytesDeserializer +#### TopicManager.changelog\_topic ```python -class BytesDeserializer(Deserializer) +def changelog_topic(topic_name: str, + store_name: str, + timeout: Optional[float] = None) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L44) - -A deserializer to bypass bytes without any changes +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L326) - +Performs all the logic necessary to generate a changelog topic based on a -### BytesSerializer +"source topic" (aka input/consumed topic). -```python -class BytesSerializer(Serializer) -``` +Its main goal is to ensure partition counts of the to-be generated changelog +match the source topic, and ensure the changelog topic is compacted. Also +enforces the serialization type. All `Topic` objects generated with this are +stored on the TopicManager. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L55) +If source topic already exists, defers to the existing topic settings, else +uses the settings as defined by the `Topic` (and its defaults) as generated +by the `TopicManager`. -A serializer to bypass bytes without any changes +In general, users should NOT need this; an Application knows when/how to +generate changelog topics. To turn off changelogs, init an Application with +"use_changelog_topics"=`False`. - +**Arguments**: -### StringDeserializer +- `topic_name`: name of consumed topic (app input topic) +> NOTE: normally contain any prefixes added by TopicManager.topic() +- `store_name`: name of the store this changelog belongs to +(default, rolling10s, etc.) +- `timeout`: config lookup timeout (seconds); Default 30 -```python -class StringDeserializer(Deserializer) -``` +**Returns**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L64) +`Topic` object (which is also stored on the TopicManager) - + -#### StringDeserializer.\_\_init\_\_ +#### TopicManager.create\_topics ```python -def __init__(column_name: Optional[str] = None, codec: str = "utf_8") +def create_topics(topics: List[Topic], + timeout: Optional[float] = None, + create_timeout: Optional[float] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L65) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L383) -Deserializes bytes to strings using the specified encoding. +Creates topics via an explicit list of provided `Topics`. + +Exists as a way to manually specify what topics to create; otherwise, +`create_all_topics()` is generally simpler. **Arguments**: -- `codec`: string encoding -A wrapper around `confluent_kafka.serialization.StringDeserializer`. +- `topics`: list of `Topic`s +- `timeout`: creation acknowledge timeout (seconds); Default 30 +- `create_timeout`: topic finalization timeout (seconds); Default 60 - + -### IntegerDeserializer +#### TopicManager.create\_all\_topics ```python -class IntegerDeserializer(Deserializer) +def create_all_topics(timeout: Optional[float] = None, + create_timeout: Optional[float] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L84) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L411) -Deserializes bytes to integers. +A convenience method to create all Topic objects stored on this TopicManager. -A wrapper around `confluent_kafka.serialization.IntegerDeserializer`. +**Arguments**: - +- `timeout`: creation acknowledge timeout (seconds); Default 30 +- `create_timeout`: topic finalization timeout (seconds); Default 60 -### DoubleDeserializer + + +#### TopicManager.validate\_all\_topics ```python -class DoubleDeserializer(Deserializer) +def validate_all_topics(timeout: Optional[float] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L103) - -Deserializes float to IEEE 764 binary64. - -A wrapper around `confluent_kafka.serialization.DoubleDeserializer`. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L424) - +Validates all topics exist and changelogs have correct topic and rep factor. -### StringSerializer +Issues are pooled and raised as an Exception once inspections are complete. -```python -class StringSerializer(Serializer) -``` + -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L122) +## quixstreams.models.topics.admin - + -#### StringSerializer.\_\_init\_\_ +#### convert\_topic\_list ```python -def __init__(codec: str = "utf_8") +def convert_topic_list(topics: List[Topic]) -> List[ConfluentTopic] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L123) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L24) -Serializes strings to bytes using the specified encoding. +Converts `Topic`s to `ConfluentTopic`s as required for Confluent's + +`AdminClient.create_topic()`. **Arguments**: -- `codec`: string encoding +- `topics`: list of `Topic`s - +**Returns**: -### IntegerSerializer +list of confluent_kafka `ConfluentTopic`s + + + +### TopicAdmin ```python -class IntegerSerializer(Serializer) +class TopicAdmin() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L135) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L47) -Serializes integers to bytes +For performing "admin"-level operations on a Kafka cluster, mostly around topics. - +Primarily used to create and inspect topic configurations. -### DoubleSerializer + + +#### TopicAdmin.\_\_init\_\_ ```python -class DoubleSerializer(Serializer) +def __init__(broker_address: Union[str, ConnectionConfig], + logger: logging.Logger = logger, + extra_config: Optional[Mapping] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L148) - -Serializes floats to bytes +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L54) - +**Arguments**: -## quixstreams.models.serializers.json +- `broker_address`: Connection settings for Kafka. +Accepts string with Kafka broker host and port formatted as `:`, +or a ConnectionConfig object if authentication is required. +- `logger`: a Logger instance to attach librdkafka logging to +- `extra_config`: optional configs (generally accepts producer configs) - + -### JSONSerializer +#### TopicAdmin.list\_topics ```python -class JSONSerializer(Serializer) +def list_topics(timeout: float = -1) -> Dict[str, ConfluentTopicMetadata] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/json.py#L13) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L83) - +Get a list of topics and their metadata from a Kafka cluster -#### JSONSerializer.\_\_init\_\_ +**Arguments**: + +- `timeout`: response timeout (seconds); Default infinite (-1) + +**Returns**: + +a dict of topic names and their metadata objects + + + +#### TopicAdmin.inspect\_topics ```python -def __init__(dumps: Callable[[Any], Union[str, bytes]] = default_dumps) +def inspect_topics(topic_names: List[str], + timeout: float = 30) -> Dict[str, Optional[TopicConfig]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/json.py#L14) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L94) -Serializer that returns data in json format. - -**Arguments**: +A simplified way of getting the topic configurations of the provided topics -- `dumps`: a function to serialize objects to json. -Default - :py:func:`quixstreams.utils.json.dumps` +from the cluster (if they exist). - +**Arguments**: -### JSONDeserializer +- `topic_names`: a list of topic names +- `timeout`: response timeout (seconds) +>***NOTE***: `timeout` must be >0 here (expects non-neg, and 0 != inf). -```python -class JSONDeserializer(Deserializer) -``` +**Returns**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/json.py#L35) +a dict with topic names and their respective `TopicConfig` - + -#### JSONDeserializer.\_\_init\_\_ +#### TopicAdmin.create\_topics ```python -def __init__(column_name: Optional[str] = None, - loads: Callable[[Union[bytes, bytearray]], Any] = default_loads) +def create_topics(topics: List[Topic], + timeout: float = 30, + finalize_timeout: float = 60) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/json.py#L36) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L176) -Deserializer that parses data from JSON +Create the given list of topics and confirm they are ready. + +Also raises an exception with detailed printout should the creation +fail (it ignores issues for a topic already existing). **Arguments**: -- `column_name`: if provided, the deserialized value will be wrapped into -dictionary with `column_name` as a key. -- `loads`: function to parse json from bytes. -Default - :py:func:`quixstreams.utils.json.loads`. +- `topics`: a list of `Topic` +- `timeout`: creation acknowledge timeout (seconds) +- `finalize_timeout`: topic finalization timeout (seconds) +>***NOTE***: `timeout` must be >0 here (expects non-neg, and 0 != inf). - + -## quixstreams.models.serializers.base +## quixstreams.models.topics.topic - + -### SerializationContext +### TopicConfig ```python -class SerializationContext() +@dataclasses.dataclass(eq=True) +class TopicConfig() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L22) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L42) -Provides additional context for message serialization/deserialization. +Represents all kafka-level configuration for a kafka topic. -Every `Serializer` and `Deserializer` receives an instance of `SerializationContext` +Generally used by Topic and any topic creation procedures. - + -#### SerializationContext.to\_confluent\_ctx +### Topic ```python -def to_confluent_ctx(field: MessageField) -> _SerializationContext +class Topic() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L35) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L83) -Convert `SerializationContext` to `confluent_kafka.SerializationContext` +A definition of a Kafka topic. -in order to re-use serialization already provided by `confluent_kafka` library. +Typically created with an `app = quixstreams.app.Application()` instance via +`app.topic()`, and used by `quixstreams.dataframe.StreamingDataFrame` +instance. -**Arguments**: + -- `field`: instance of `confluent_kafka.serialization.MessageField` +#### Topic.\_\_init\_\_ -**Returns**: +```python +def __init__( + name: str, + config: TopicConfig, + value_deserializer: Optional[DeserializerType] = None, + key_deserializer: Optional[DeserializerType] = BytesDeserializer(), + value_serializer: Optional[SerializerType] = None, + key_serializer: Optional[SerializerType] = BytesSerializer(), + timestamp_extractor: Optional[TimestampExtractor] = None) +``` -instance of `confluent_kafka.serialization.SerializationContext` +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L92) - +**Arguments**: -### Deserializer +- `name`: topic name +- `config`: topic configs via `TopicConfig` (creation/validation) +- `value_deserializer`: a deserializer type for values +- `key_deserializer`: a deserializer type for keys +- `value_serializer`: a serializer type for values +- `key_serializer`: a serializer type for keys +- `timestamp_extractor`: a callable that returns a timestamp in +milliseconds from a deserialized message. + + + +#### Topic.name ```python -class Deserializer(abc.ABC) +@property +def name() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L47) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L121) - +Topic name -#### Deserializer.\_\_init\_\_ + + +#### Topic.row\_serialize ```python -def __init__(column_name: Optional[str] = None, *args, **kwargs) +def row_serialize(row: Row, key: Any) -> KafkaMessage ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L48) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L131) -A base class for all Deserializers +Serialize Row to a Kafka message structure **Arguments**: -- `column_name`: if provided, the deserialized value will be wrapped into -dictionary with `column_name` as a key. +- `row`: Row to serialize +- `key`: message key to serialize + +**Returns**: - +KafkaMessage object with serialized values -#### Deserializer.split\_values + + +#### Topic.row\_deserialize ```python -@property -def split_values() -> bool +def row_deserialize( + message: ConfluentKafkaMessageProto) -> Union[Row, List[Row], None] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L58) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L162) -Return True if the deserialized message should be considered as Iterable -and each item in it should be processed as a separate message. +Deserialize incoming Kafka message to a Row. - +**Arguments**: -### Serializer +- `message`: an object with interface of `confluent_kafka.Message` -```python -class Serializer(abc.ABC) -``` +**Returns**: + +Row, list of Rows or None if the message is ignored. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L74) + -A base class for all Serializers +## quixstreams.models.topics.utils - + -#### Serializer.extra\_headers +#### merge\_headers ```python -@property -def extra_headers() -> MessageHeadersMapping +def merge_headers(original: Optional[MessageHeadersTuples], + other: MessageHeadersMapping) -> MessageHeadersTuples ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L80) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/utils.py#L8) -Informs producer to set additional headers +Merge two sets of Kafka message headers, overwriting headers in "origin" -for the message it will be serializing +by the values from "other". -Must return a dictionary with headers. -Keys must be strings, and values must be strings, bytes or None. +**Arguments**: + +- `original`: original headers as a list of (key, value) tuples. +- `other`: headers to merge as a dictionary. **Returns**: -dict with headers +a list of (key, value) tuples. ## quixstreams.models.messages - - -## quixstreams.models.rows + - +## quixstreams.models.timestamps -## quixstreams.models.topics + - +### TimestampType -## quixstreams.models.topics.admin +```python +class TimestampType(enum.IntEnum) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/timestamps.py#L8) -#### convert\_topic\_list + -```python -def convert_topic_list(topics: List[Topic]) -> List[ConfluentTopic] -``` +#### TIMESTAMP\_NOT\_AVAILABLE -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L24) +timestamps not supported by broker -Converts `Topic`s to `ConfluentTopic`s as required for Confluent's + -`AdminClient.create_topic()`. +#### TIMESTAMP\_CREATE\_TIME -**Arguments**: +message creation time (or source / producer time) -- `topics`: list of `Topic`s + -**Returns**: +#### TIMESTAMP\_LOG\_APPEND\_TIME -list of confluent_kafka `ConfluentTopic`s +broker receive time - + -### TopicAdmin +### MessageTimestamp ```python -class TopicAdmin() +class MessageTimestamp() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L47) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/timestamps.py#L14) -For performing "admin"-level operations on a Kafka cluster, mostly around topics. +Represents a timestamp of incoming Kafka message. -Primarily used to create and inspect topic configurations. +It is made pseudo-immutable (i.e. public attributes don't have setters), and +it should not be mutated during message processing. - + -#### TopicAdmin.\_\_init\_\_ +#### MessageTimestamp.create ```python -def __init__(broker_address: Union[str, ConnectionConfig], - logger: logging.Logger = logger, - extra_config: Optional[Mapping] = None) +@classmethod +def create(cls, timestamp_type: int, milliseconds: int) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L54) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/timestamps.py#L41) -**Arguments**: +Create a Timestamp object based on data -- `broker_address`: Connection settings for Kafka. -Accepts string with Kafka broker host and port formatted as `:`, -or a ConnectionConfig object if authentication is required. -- `logger`: a Logger instance to attach librdkafka logging to -- `extra_config`: optional configs (generally accepts producer configs) +from `confluent_kafka.Message.timestamp()`. - +If timestamp type is "TIMESTAMP_NOT_AVAILABLE", the milliseconds are set to None -#### TopicAdmin.list\_topics +**Arguments**: -```python -def list_topics(timeout: float = -1) -> Dict[str, ConfluentTopicMetadata] -``` +- `timestamp_type`: a timestamp type represented as a number +Can be one of: +- "0" - TIMESTAMP_NOT_AVAILABLE, timestamps not supported by broker. +- "1" - TIMESTAMP_CREATE_TIME, message creation time (or source / producer time). +- "2" - TIMESTAMP_LOG_APPEND_TIME, broker receive time. +- `milliseconds`: the number of milliseconds since the epoch (UTC). -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L83) +**Returns**: -Get a list of topics and their metadata from a Kafka cluster +Timestamp object -**Arguments**: + -- `timeout`: response timeout (seconds); Default infinite (-1) +## quixstreams.models -**Returns**: + -a dict of topic names and their metadata objects +## quixstreams.models.messagecontext - + -#### TopicAdmin.inspect\_topics +### MessageContext ```python -def inspect_topics(topic_names: List[str], - timeout: float = 30) -> Dict[str, Optional[TopicConfig]] +class MessageContext() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L94) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/messagecontext.py#L4) -A simplified way of getting the topic configurations of the provided topics +An object with Kafka message properties. -from the cluster (if they exist). +It is made pseudo-immutable (i.e. public attributes don't have setters), and +it should not be mutated during message processing. -**Arguments**: + -- `topic_names`: a list of topic names -- `timeout`: response timeout (seconds) ->***NOTE***: `timeout` must be >0 here (expects non-neg, and 0 != inf). +## quixstreams.models.rows -**Returns**: + -a dict with topic names and their respective `TopicConfig` +## quixstreams.models.types - + -#### TopicAdmin.create\_topics +### ConfluentKafkaMessageProto ```python -def create_topics(topics: List[Topic], - timeout: float = 30, - finalize_timeout: float = 60) +class ConfluentKafkaMessageProto(Protocol) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L176) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/types.py#L13) -Create the given list of topics and confirm they are ready. +An interface of `confluent_kafka.Message`. -Also raises an exception with detailed printout should the creation -fail (it ignores issues for a topic already existing). +Use it to not depend on exact implementation and simplify testing. -**Arguments**: +Instances of `confluent_kafka.Message` cannot be directly created from Python, +see https://github.com/confluentinc/confluent-kafka-python/issues/1535. -- `topics`: a list of `Topic` -- `timeout`: creation acknowledge timeout (seconds) -- `finalize_timeout`: topic finalization timeout (seconds) ->***NOTE***: `timeout` must be >0 here (expects non-neg, and 0 != inf). + - +## quixstreams.platforms -## quixstreams.models.topics.utils + - +## quixstreams.platforms.quix.checks -#### merge\_headers + + +#### check\_state\_management\_enabled ```python -def merge_headers(original: Optional[MessageHeadersTuples], - other: MessageHeadersMapping) -> MessageHeadersTuples +def check_state_management_enabled() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/utils.py#L8) - -Merge two sets of Kafka message headers, overwriting headers in "origin" +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/checks.py#L11) -by the values from "other". +Check if State Management feature is enabled for the current deployment on +Quix platform. +If it's disabled, the exception will be raised. -**Arguments**: + -- `original`: original headers as a list of (key, value) tuples. -- `other`: headers to merge as a dictionary. +#### check\_state\_dir -**Returns**: +```python +def check_state_dir(state_dir: str) +``` -a list of (key, value) tuples. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/checks.py#L28) - +Check if Application "state_dir" matches the state dir on Quix platform. -## quixstreams.models.topics.topic +If it doesn't match, the warning will be logged. - +**Arguments**: -### TopicConfig +- `state_dir`: application state_dir path -```python -@dataclasses.dataclass(eq=True) -class TopicConfig() -``` + -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L42) +## quixstreams.platforms.quix -Represents all kafka-level configuration for a kafka topic. + -Generally used by Topic and any topic creation procedures. +## quixstreams.platforms.quix.env - + -### Topic +### QuixEnvironment ```python -class Topic() +class QuixEnvironment() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L83) - -A definition of a Kafka topic. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L7) -Typically created with an `app = quixstreams.app.Application()` instance via -`app.topic()`, and used by `quixstreams.dataframe.StreamingDataFrame` -instance. +Class to access various Quix platform environment settings - + -#### Topic.\_\_init\_\_ +#### QuixEnvironment.state\_management\_enabled ```python -def __init__( - name: str, - config: TopicConfig, - value_deserializer: Optional[DeserializerType] = None, - key_deserializer: Optional[DeserializerType] = BytesDeserializer(), - value_serializer: Optional[SerializerType] = None, - key_serializer: Optional[SerializerType] = BytesSerializer(), - timestamp_extractor: Optional[TimestampExtractor] = None) +@property +def state_management_enabled() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L92) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L19) -**Arguments**: +Check whether "State management" is enabled for the current deployment -- `name`: topic name -- `config`: topic configs via `TopicConfig` (creation/validation) -- `value_deserializer`: a deserializer type for values -- `key_deserializer`: a deserializer type for keys -- `value_serializer`: a serializer type for values -- `key_serializer`: a serializer type for keys -- `timestamp_extractor`: a callable that returns a timestamp in -milliseconds from a deserialized message. +**Returns**: - +True if state management is enabled, otherwise False -#### Topic.name + + +#### QuixEnvironment.deployment\_id ```python @property -def name() -> str +def deployment_id() -> Optional[str] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L121) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L27) -Topic name +Return current Quix deployment id. - +This variable is meant to be set only by Quix Platform and only +when the application is deployed. -#### Topic.row\_serialize +**Returns**: -```python -def row_serialize(row: Row, key: Any) -> KafkaMessage -``` +deployment id or None -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L131) + -Serialize Row to a Kafka message structure +#### QuixEnvironment.workspace\_id -**Arguments**: +```python +@property +def workspace_id() -> Optional[str] +``` -- `row`: Row to serialize -- `key`: message key to serialize +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L39) + +Return Quix workspace id if set **Returns**: -KafkaMessage object with serialized values +workspace id or None - + -#### Topic.row\_deserialize +#### QuixEnvironment.portal\_api ```python -def row_deserialize( - message: ConfluentKafkaMessageProto) -> Union[Row, List[Row], None] +@property +def portal_api() -> Optional[str] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L162) - -Deserialize incoming Kafka message to a Row. - -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L47) -- `message`: an object with interface of `confluent_kafka.Message` +Return Quix Portal API url if set **Returns**: -Row, list of Rows or None if the message is ignored. +portal API URL or None - + -## quixstreams.models.topics.exceptions +#### QuixEnvironment.state\_dir - +```python +@property +def state_dir() -> str +``` -## quixstreams.models.topics.manager +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L56) - +Return application state directory on Quix. -#### affirm\_ready\_for\_create +**Returns**: -```python -def affirm_ready_for_create(topics: List[Topic]) -``` +path to state dir -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L20) + -Validate a list of topics is ready for creation attempt +## quixstreams.platforms.quix.exceptions -**Arguments**: + -- `topics`: list of `Topic`s +## quixstreams.platforms.quix.topic\_manager - + -### TopicManager +### QuixTopicManager ```python -class TopicManager() +class QuixTopicManager(TopicManager) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L30) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/topic_manager.py#L9) The source of all topic management with quixstreams. -Generally initialized and managed automatically by an `Application`, +This is specifically for Applications using the Quix platform. + +Generally initialized and managed automatically by an `Application.Quix`, but allows a user to work with it directly when needed, such as using it alongside a plain `Producer` to create its topics. See methods for details. - + -#### TopicManager.\_\_init\_\_ +#### QuixTopicManager.\_\_init\_\_ ```python def __init__(topic_admin: TopicAdmin, consumer_group: str, + quix_config_builder: QuixKafkaConfigsBuilder, timeout: float = 30, create_timeout: float = 60) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L53) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/topic_manager.py#L30) **Arguments**: -- `topic_admin`: an `Admin` instance (required for some functionality) -- `consumer_group`: the consumer group (of the `Application`) +- `topic_admin`: an `Admin` instance +- `quix_config_builder`: A QuixKafkaConfigsBuilder instance, else one is +generated for you. - `timeout`: response timeout (seconds) - `create_timeout`: timeout for topic creation - + -#### TopicManager.changelog\_topics +## quixstreams.platforms.quix.api + + + +### QuixPortalApiService ```python -@property -def changelog_topics() -> Dict[str, Dict[str, Topic]] +class QuixPortalApiService() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L103) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/api.py#L19) -Note: `Topic`s are the changelogs. +A light wrapper around the Quix Portal Api. If used in the Quix Platform, it will +use that workspaces auth token and portal endpoint, else you must provide it. -returns: the changelog topic dict, {topic_name: {suffix: Topic}} +Function names closely reflect the respective API endpoint, +each starting with the method [GET, POST, etc.] followed by the endpoint path. - +Results will be returned in the form of request's Response.json(), unless something +else is required. Non-200's will raise exceptions. -#### TopicManager.all\_topics +See the swagger documentation for more info about the endpoints. + + + +#### QuixPortalApiService.get\_workspace\_certificate ```python -@property -def all_topics() -> Dict[str, Topic] +def get_workspace_certificate(workspace_id: Optional[str] = None, + timeout: float = 30) -> Optional[bytes] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L112) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/api.py#L119) -Every registered topic name mapped to its respective `Topic`. +Get a workspace TLS certificate if available. -returns: full topic dict, {topic_name: Topic} +Returns `None` if certificate is not specified. - +**Arguments**: -#### TopicManager.topic\_config +- `workspace_id`: workspace id, optional +- `timeout`: request timeout; Default 30 + +**Returns**: + +certificate as bytes if present, or None + + + +## quixstreams.platforms.quix.config + + + +#### strip\_workspace\_id\_prefix ```python -def topic_config(num_partitions: Optional[int] = None, - replication_factor: Optional[int] = None, - extra_config: Optional[dict] = None) -> TopicConfig +def strip_workspace_id_prefix(workspace_id: str, s: str) -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L220) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L31) -Convenience method for generating a `TopicConfig` with default settings +Remove the workspace ID from a given string if it starts with it, + +typically a topic or consumer group id **Arguments**: -- `num_partitions`: the number of topic partitions -- `replication_factor`: the topic replication factor -- `extra_config`: other optional configuration settings +- `workspace_id`: the workspace id +- `s`: the string to append to **Returns**: -a TopicConfig object +the string with workspace_id prefix removed - + -#### TopicManager.topic +#### prepend\_workspace\_id ```python -def topic(name: str, - value_deserializer: Optional[DeserializerType] = None, - key_deserializer: Optional[DeserializerType] = "bytes", - value_serializer: Optional[SerializerType] = None, - key_serializer: Optional[SerializerType] = "bytes", - config: Optional[TopicConfig] = None, - timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic +def prepend_workspace_id(workspace_id: str, s: str) -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L241) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L43) -A convenience method for generating a `Topic`. Will use default config options +Add the workspace ID as a prefix to a given string if it does not have it, -as dictated by the TopicManager. +typically a topic or consumer group it **Arguments**: -- `name`: topic name -- `value_deserializer`: a deserializer type for values -- `key_deserializer`: a deserializer type for keys -- `value_serializer`: a serializer type for values -- `key_serializer`: a serializer type for keys -- `config`: optional topic configurations (for creation/validation) -- `timestamp_extractor`: a callable that returns a timestamp in -milliseconds from a deserialized message. +- `workspace_id`: the workspace id +- `s`: the string to append to **Returns**: -Topic object with creation configs +the string with workspace_id prepended - + -#### TopicManager.repartition\_topic +### QuixApplicationConfig ```python -def repartition_topic(operation: str, - topic_name: str, - value_deserializer: Optional[DeserializerType] = "json", - key_deserializer: Optional[DeserializerType] = "json", - value_serializer: Optional[SerializerType] = "json", - key_serializer: Optional[SerializerType] = "json", - timeout: Optional[float] = None) -> Topic +@dataclasses.dataclass +class QuixApplicationConfig() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L286) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L56) -Create an internal repartition topic. +A convenience container class for Quix Application configs. -**Arguments**: + -- `operation`: name of the GroupBy operation (column name or user-defined). -- `topic_name`: name of the topic the GroupBy is sourced from. -- `value_deserializer`: a deserializer type for values; default - JSON -- `key_deserializer`: a deserializer type for keys; default - JSON -- `value_serializer`: a serializer type for values; default - JSON -- `key_serializer`: a serializer type for keys; default - JSON -- `timeout`: config lookup timeout (seconds); Default 30 +### QuixKafkaConfigsBuilder -**Returns**: +```python +class QuixKafkaConfigsBuilder() +``` -`Topic` object (which is also stored on the TopicManager) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L66) - +Retrieves all the necessary information from the Quix API and builds all the +objects required to connect a confluent-kafka client to the Quix Platform. -#### TopicManager.changelog\_topic +If not executed within the Quix platform directly, you must provide a Quix +"streaming" (aka "sdk") token, or Personal Access Token. + +Ideally you also know your workspace name or id. If not, you can search for it +using a known topic name, but note the search space is limited to the access level +of your token. + +It also currently handles the app_auto_create_topics setting for Application.Quix. + + + +#### QuixKafkaConfigsBuilder.\_\_init\_\_ ```python -def changelog_topic(topic_name: str, - store_name: str, - timeout: Optional[float] = None) -> Topic +def __init__(quix_sdk_token: Optional[str] = None, + workspace_id: Optional[str] = None, + quix_portal_api_service: Optional[QuixPortalApiService] = None, + timeout: float = 30, + topic_create_timeout: float = 60) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L326) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L82) -Performs all the logic necessary to generate a changelog topic based on a +**Arguments**: -"source topic" (aka input/consumed topic). +- `quix_portal_api_service`: A QuixPortalApiService instance (else generated) +- `workspace_id`: A valid Quix Workspace ID (else searched for) -Its main goal is to ensure partition counts of the to-be generated changelog -match the source topic, and ensure the changelog topic is compacted. Also -enforces the serialization type. All `Topic` objects generated with this are -stored on the TopicManager. + -If source topic already exists, defers to the existing topic settings, else -uses the settings as defined by the `Topic` (and its defaults) as generated -by the `TopicManager`. +#### QuixKafkaConfigsBuilder.strip\_workspace\_id\_prefix -In general, users should NOT need this; an Application knows when/how to -generate changelog topics. To turn off changelogs, init an Application with -"use_changelog_topics"=`False`. +```python +def strip_workspace_id_prefix(s: str) -> str +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L160) + +Remove the workspace ID from a given string if it starts with it, + +typically a topic or consumer group id **Arguments**: -- `topic_name`: name of consumed topic (app input topic) -> NOTE: normally contain any prefixes added by TopicManager.topic() -- `store_name`: name of the store this changelog belongs to -(default, rolling10s, etc.) -- `timeout`: config lookup timeout (seconds); Default 30 +- `s`: the string to append to **Returns**: -`Topic` object (which is also stored on the TopicManager) +the string with workspace_id prefix removed - + -#### TopicManager.create\_topics +#### QuixKafkaConfigsBuilder.prepend\_workspace\_id ```python -def create_topics(topics: List[Topic], - timeout: Optional[float] = None, - create_timeout: Optional[float] = None) +def prepend_workspace_id(s: str) -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L383) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L170) -Creates topics via an explicit list of provided `Topics`. +Add the workspace ID as a prefix to a given string if it does not have it, -Exists as a way to manually specify what topics to create; otherwise, -`create_all_topics()` is generally simpler. +typically a topic or consumer group it **Arguments**: -- `topics`: list of `Topic`s -- `timeout`: creation acknowledge timeout (seconds); Default 30 -- `create_timeout`: topic finalization timeout (seconds); Default 60 +- `s`: the string to append to - +**Returns**: -#### TopicManager.create\_all\_topics +the string with workspace_id prepended + + + +#### QuixKafkaConfigsBuilder.search\_for\_workspace ```python -def create_all_topics(timeout: Optional[float] = None, - create_timeout: Optional[float] = None) +def search_for_workspace(workspace_name_or_id: Optional[str] = None, + timeout: Optional[float] = None) -> Optional[dict] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L411) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L180) -A convenience method to create all Topic objects stored on this TopicManager. +Search for a workspace given an expected workspace name or id. **Arguments**: -- `timeout`: creation acknowledge timeout (seconds); Default 30 -- `create_timeout`: topic finalization timeout (seconds); Default 60 +- `workspace_name_or_id`: the expected name or id of a workspace +- `timeout`: response timeout (seconds); Default 30 + +**Returns**: + +the workspace data dict if search success, else None + + + +#### QuixKafkaConfigsBuilder.get\_workspace\_info + +```python +def get_workspace_info(known_workspace_topic: Optional[str] = None, + timeout: Optional[float] = None) -> dict +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L222) + +Queries for workspace data from the Quix API, regardless of instance cache, + +and updates instance attributes from query result. + +**Arguments**: + +- `known_workspace_topic`: a topic you know to exist in some workspace +- `timeout`: response timeout (seconds); Default 30 - + -#### TopicManager.validate\_all\_topics +#### QuixKafkaConfigsBuilder.search\_workspace\_for\_topic ```python -def validate_all_topics(timeout: Optional[float] = None) +def search_workspace_for_topic( + workspace_id: str, + topic: str, + timeout: Optional[float] = None) -> Optional[str] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L424) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L250) -Validates all topics exist and changelogs have correct topic and rep factor. +Search through all the topics in the given workspace id to see if there is a -Issues are pooled and raised as an Exception once inspections are complete. +match with the provided topic. - +**Arguments**: -## quixstreams.state.rocksdb.windowed.store +- `workspace_id`: the workspace to search in +- `topic`: the topic to search for +- `timeout`: response timeout (seconds); Default 30 - +**Returns**: -### WindowedRocksDBStore +the workspace_id if success, else None + + + +#### QuixKafkaConfigsBuilder.search\_for\_topic\_workspace ```python -class WindowedRocksDBStore(RocksDBStore) +def search_for_topic_workspace(topic: str, + timeout: Optional[float] = None + ) -> Optional[dict] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/store.py#L10) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L271) -RocksDB-based windowed state store. +Find what workspace a topic belongs to. -It keeps track of individual store partitions and provides access to the -partitions' transactions. +If there is only one workspace altogether, it is assumed to be the workspace. +More than one means each workspace will be searched until the first hit. - +**Arguments**: -#### WindowedRocksDBStore.\_\_init\_\_ +- `topic`: the topic to search for +- `timeout`: response timeout (seconds); Default 30 + +**Returns**: + +workspace data dict if topic search success, else None + + + +#### QuixKafkaConfigsBuilder.create\_topics ```python -def __init__( - name: str, - topic: str, - base_dir: str, - changelog_producer_factory: Optional[ChangelogProducerFactory] = None, - options: Optional[RocksDBOptionsType] = None) +def create_topics(topics: List[Topic], + timeout: Optional[float] = None, + finalize_timeout: Optional[float] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/store.py#L18) - -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L369) -- `name`: a unique store name -- `topic`: a topic name for this store -- `base_dir`: path to a directory with the state -- `changelog_producer_factory`: a ChangelogProducerFactory instance -if using changelogs -- `options`: RocksDB options. If `None`, the default options will be used. +Create topics in a Quix cluster. - +**Arguments**: -## quixstreams.state.rocksdb.windowed.partition +- `topics`: a list of `Topic` objects +- `timeout`: response timeout (seconds); Default 30 +- `finalize_timeout`: topic finalization timeout (seconds); Default 60 +marked as "Ready" (and thus ready to produce to/consume from). - + -### WindowedRocksDBStorePartition +#### QuixKafkaConfigsBuilder.get\_topic ```python -class WindowedRocksDBStorePartition(RocksDBStorePartition) +def get_topic(topic_name: str, + timeout: Optional[float] = None) -> Optional[dict] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/partition.py#L24) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L419) -A base class to access windowed state in RocksDB. - -It represents a single RocksDB database. +return the topic ID (the actual cluster topic name) if it exists, else None -Besides the data, it keeps track of the latest observed timestamp and -stores the expiration index to delete expired windows. +>***NOTE***: if the name registered in Quix is instead the workspace-prefixed +version, this returns None unless that exact name was created WITHOUT the +Quix API. **Arguments**: -- `path`: an absolute path to the RocksDB folder -- `options`: RocksDB options. If `None`, the default options will be used. - - - -## quixstreams.state.rocksdb.windowed.metadata +- `topic_name`: name of the topic +- `timeout`: response timeout (seconds); Default 30 - +**Returns**: -## quixstreams.state.rocksdb.windowed.transaction +response dict of the topic info if topic found, else None - + -### WindowedRocksDBPartitionTransaction +#### QuixKafkaConfigsBuilder.confirm\_topics\_exist ```python -class WindowedRocksDBPartitionTransaction(RocksDBPartitionTransaction) +def confirm_topics_exist(topics: Union[List[Topic], List[str]], + timeout: Optional[float] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/transaction.py#L22) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L451) - +Confirm whether the desired set of topics exists in the Quix workspace. -#### WindowedRocksDBPartitionTransaction.expire\_windows +**Arguments**: + +- `topics`: a list of `Topic` or topic names +- `timeout`: response timeout (seconds); Default 30 + + + +#### QuixKafkaConfigsBuilder.get\_application\_config ```python -def expire_windows(duration_ms: int, - prefix: bytes, - grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]] +def get_application_config(consumer_group_id: str) -> QuixApplicationConfig ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/transaction.py#L105) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L483) -Get a list of expired windows from RocksDB considering latest timestamp, +Get all the necessary attributes for an Application to run on Quix Cloud. -window size and grace period. -It marks the latest found window as expired in the expiration index, so -calling this method multiple times will yield different results for the same -"latest timestamp". +**Arguments**: -How it works: -- First, it looks for the start time of the last expired window for the current - prefix using expiration cache. If it's found, it will be used to reduce - the search space and to avoid returning already expired windows. -- Then it goes over window segments and fetches the windows - that should be expired. -- At last, it updates the expiration cache with the start time of the latest - found windows +- `consumer_group_id`: consumer group id, if needed **Returns**: -sorted list of tuples in format `((start, end), value)` +a QuixApplicationConfig instance + + + +## quixstreams.state.rocksdb.serialization @@ -4054,7 +4459,7 @@ sorted list of tuples in format `((start, end), value)` def parse_window_key(key: bytes) -> Tuple[bytes, int, int] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/serialization.py#L12) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/serialization.py#L12) Parse the window key from Rocksdb into (message_key, start, end) structure. @@ -4077,7 +4482,7 @@ a tuple with message key, start timestamp, end timestamp def encode_window_key(start_ms: int, end_ms: int) -> bytes ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/serialization.py#L39) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/serialization.py#L39) Encode window start and end timestamps into bytes of the following format: @@ -4102,7 +4507,7 @@ window timestamps as bytes def encode_window_prefix(prefix: bytes, start_ms: int) -> bytes ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/serialization.py#L53) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/serialization.py#L53) Encode window prefix and start time to iterate over keys in RocksDB @@ -4118,6 +4523,79 @@ Format: bytes + + +## quixstreams.state.rocksdb.windowed.metadata + + + +## quixstreams.state.rocksdb.windowed.store + + + +### WindowedRocksDBStore + +```python +class WindowedRocksDBStore(RocksDBStore) +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/store.py#L10) + +RocksDB-based windowed state store. + +It keeps track of individual store partitions and provides access to the +partitions' transactions. + + + +#### WindowedRocksDBStore.\_\_init\_\_ + +```python +def __init__( + name: str, + topic: str, + base_dir: str, + changelog_producer_factory: Optional[ChangelogProducerFactory] = None, + options: Optional[RocksDBOptionsType] = None) +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/store.py#L18) + +**Arguments**: + +- `name`: a unique store name +- `topic`: a topic name for this store +- `base_dir`: path to a directory with the state +- `changelog_producer_factory`: a ChangelogProducerFactory instance +if using changelogs +- `options`: RocksDB options. If `None`, the default options will be used. + + + +## quixstreams.state.rocksdb.windowed.partition + + + +### WindowedRocksDBStorePartition + +```python +class WindowedRocksDBStorePartition(RocksDBStorePartition) +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/partition.py#L24) + +A base class to access windowed state in RocksDB. + +It represents a single RocksDB database. + +Besides the data, it keeps track of the latest observed timestamp and +stores the expiration index to delete expired windows. + +**Arguments**: + +- `path`: an absolute path to the RocksDB folder +- `options`: RocksDB options. If `None`, the default options will be used. + ## quixstreams.state.rocksdb.windowed.state @@ -4130,7 +4608,7 @@ bytes class WindowedTransactionState(WindowedState) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L9) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L9) @@ -4141,7 +4619,7 @@ def __init__(transaction: "WindowedRocksDBPartitionTransaction", prefix: bytes) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L12) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L12) A windowed state to be provided into `StreamingDataFrame` window functions. @@ -4159,7 +4637,7 @@ def get_window(start_ms: int, default: Any = None) -> Optional[Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L23) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L23) Get the value of the window defined by `start` and `end` timestamps @@ -4183,7 +4661,7 @@ value or None if the key is not found and `default` is not provided def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L39) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L39) Set a value for the window. @@ -4205,7 +4683,7 @@ using the provided `timestamp`. def get_latest_timestamp() -> int ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L60) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L60) Get the latest observed timestamp for the current state partition. @@ -4225,7 +4703,7 @@ def expire_windows(duration_ms: int, grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L72) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L72) Get a list of expired windows from RocksDB considering the current latest timestamp, window duration and grace period. @@ -4234,47 +4712,55 @@ It also marks the latest found window as expired in the expiration index, so calling this method multiple times will yield different results for the same "latest timestamp". - + -## quixstreams.state.rocksdb.options +## quixstreams.state.rocksdb.windowed.transaction - + -### RocksDBOptions +### WindowedRocksDBPartitionTransaction ```python -@dataclasses.dataclass(frozen=True) -class RocksDBOptions(RocksDBOptionsType) +class WindowedRocksDBPartitionTransaction(RocksDBPartitionTransaction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/options.py#L25) - -RocksDB database options. - -**Arguments**: - -- `dumps`: function to dump data to JSON -- `loads`: function to load data from JSON -- `open_max_retries`: number of times to retry opening the database -if it's locked by another process. To disable retrying, pass 0 -- `open_retry_backoff`: number of seconds to wait between each retry. -Please see `rocksdict.Options` for a complete description of other options. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/transaction.py#L22) - + -#### RocksDBOptions.to\_options +#### WindowedRocksDBPartitionTransaction.expire\_windows ```python -def to_options() -> rocksdict.Options +def expire_windows(duration_ms: int, + prefix: bytes, + grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/options.py#L53) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/transaction.py#L105) + +Get a list of expired windows from RocksDB considering latest timestamp, + +window size and grace period. +It marks the latest found window as expired in the expiration index, so +calling this method multiple times will yield different results for the same +"latest timestamp". -Convert parameters to `rocksdict.Options` +How it works: +- First, it looks for the start time of the last expired window for the current + prefix using expiration cache. If it's found, it will be used to reduce + the search space and to avoid returning already expired windows. +- Then it goes over window segments and fetches the windows + that should be expired. +- At last, it updates the expiration cache with the start time of the latest + found windows **Returns**: -instance of `rocksdict.Options` +sorted list of tuples in format `((start, end), value)` + + + +## quixstreams.state.rocksdb @@ -4288,7 +4774,7 @@ instance of `rocksdict.Options` class RocksDBStore(Store) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L19) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L19) RocksDB-based state store. @@ -4308,7 +4794,7 @@ def __init__( options: Optional[options_type] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L29) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L29) **Arguments**: @@ -4328,7 +4814,7 @@ if using changelogs def topic() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L53) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L53) Store topic name @@ -4341,7 +4827,7 @@ Store topic name def name() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L60) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L60) Store name @@ -4354,7 +4840,7 @@ Store name def partitions() -> Dict[int, RocksDBStorePartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L67) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L67) Mapping of assigned store partitions @@ -4366,7 +4852,7 @@ Mapping of assigned store partitions def assign_partition(partition: int) -> RocksDBStorePartition ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L80) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L80) Open and assign store partition. @@ -4389,7 +4875,7 @@ instance of`RocksDBStorePartition` def revoke_partition(partition: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L117) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L117) Revoke and close the assigned store partition. @@ -4407,7 +4893,7 @@ If the partition is not assigned, it will log the message and return. def start_partition_transaction(partition: int) -> RocksDBPartitionTransaction ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L138) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L138) Start a new partition transaction. @@ -4430,293 +4916,59 @@ instance of `RocksDBPartitionTransaction` def close() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L160) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L160) Close the store and revoke all assigned partitions - - -## quixstreams.state.rocksdb.partition - - - -### RocksDBStorePartition - -```python -class RocksDBStorePartition(StorePartition) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L40) - -A base class to access state in RocksDB. - -It represents a single RocksDB database. - -Responsibilities: - 1. Managing access to the RocksDB instance - 2. Creating transactions to interact with data - 3. Flushing WriteBatches to the RocksDB - -It opens the RocksDB on `__init__`. If the db is locked by another process, -it will retry according to `open_max_retries` and `open_retry_backoff` options. - -**Arguments**: - -- `path`: an absolute path to the RocksDB folder -- `options`: RocksDB options. If `None`, the default options will be used. - - - -#### RocksDBStorePartition.begin - -```python -def begin() -> RocksDBPartitionTransaction -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L75) - -Create a new `RocksDBTransaction` object. - -Using `RocksDBTransaction` is a recommended way for accessing the data. - -**Returns**: - -an instance of `RocksDBTransaction` - - - -#### RocksDBStorePartition.recover\_from\_changelog\_message - -```python -def recover_from_changelog_message( - changelog_message: ConfluentKafkaMessageProto, committed_offset: int) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L128) - -Updates state from a given changelog message. - -The actual update may be skipped when both conditions are met: - -- The changelog message has headers with the processed message offset. -- This processed offset is larger than the latest committed offset for the same - topic partition. - -This way the state does not apply the state changes for not-yet-committed -messages and improves the state consistency guarantees. - -**Arguments**: - -- `changelog_message`: A raw Confluent message read from a changelog topic. -- `committed_offset`: latest committed offset for the partition - - - -#### RocksDBStorePartition.set\_changelog\_offset - -```python -def set_changelog_offset(changelog_offset: int) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L169) - -Set the changelog offset based on a message (usually an "offset-only" message). - -Used during recovery. - -**Arguments**: - -- `changelog_offset`: A changelog offset - - - -#### RocksDBStorePartition.write - -```python -def write(batch: WriteBatch) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L179) - -Write `WriteBatch` to RocksDB - -**Arguments**: - -- `batch`: an instance of `rocksdict.WriteBatch` - - - -#### RocksDBStorePartition.get - -```python -def get(key: bytes, - default: Any = None, - cf_name: str = "default") -> Union[None, bytes, Any] -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L186) - -Get a key from RocksDB. - -**Arguments**: - -- `key`: a key encoded to `bytes` -- `default`: a default value to return if the key is not found. -- `cf_name`: rocksdb column family name. Default - "default" - -**Returns**: - -a value if the key is present in the DB. Otherwise, `default` - - - -#### RocksDBStorePartition.exists - -```python -def exists(key: bytes, cf_name: str = "default") -> bool -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L200) - -Check if a key is present in the DB. - -**Arguments**: - -- `key`: a key encoded to `bytes`. -- `cf_name`: rocksdb column family name. Default - "default" - -**Returns**: - -`True` if the key is present, `False` otherwise. - - - -#### RocksDBStorePartition.get\_processed\_offset - -```python -def get_processed_offset() -> Optional[int] -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L211) - -Get last processed offset for the given partition - -**Returns**: - -offset or `None` if there's no processed offset yet - - - -#### RocksDBStorePartition.get\_changelog\_offset - -```python -def get_changelog_offset() -> Optional[int] -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L223) - -Get offset that the changelog is up-to-date with. - -**Returns**: - -offset or `None` if there's no processed offset yet - - - -#### RocksDBStorePartition.close - -```python -def close() -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L233) - -Close the underlying RocksDB - - - -#### RocksDBStorePartition.path - -```python -@property -def path() -> str -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L246) - -Absolute path to RocksDB database folder - -**Returns**: - -file path - - - -#### RocksDBStorePartition.destroy - -```python -@classmethod -def destroy(cls, path: str) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L254) - -Delete underlying RocksDB database - -The database must be closed first. - -**Arguments**: - -- `path`: an absolute path to the RocksDB folder - - - -#### RocksDBStorePartition.get\_column\_family\_handle - -```python -def get_column_family_handle(cf_name: str) -> ColumnFamily -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L264) - -Get a column family handle to pass to it WriteBatch. + -This method will cache the CF handle instance to avoid creating them -repeatedly. +## quixstreams.state.rocksdb.exceptions -**Arguments**: + -- `cf_name`: column family name +## quixstreams.state.rocksdb.metadata -**Returns**: + -instance of `rocksdict.ColumnFamily` +## quixstreams.state.rocksdb.options - + -#### RocksDBStorePartition.get\_column\_family +### RocksDBOptions ```python -def get_column_family(cf_name: str) -> Rdict +@dataclasses.dataclass(frozen=True) +class RocksDBOptions(RocksDBOptionsType) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L285) - -Get a column family instance. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/options.py#L25) -This method will cache the CF instance to avoid creating them repeatedly. +RocksDB database options. **Arguments**: -- `cf_name`: column family name +- `dumps`: function to dump data to JSON +- `loads`: function to load data from JSON +- `open_max_retries`: number of times to retry opening the database +if it's locked by another process. To disable retrying, pass 0 +- `open_retry_backoff`: number of seconds to wait between each retry. +Please see `rocksdict.Options` for a complete description of other options. -**Returns**: + -instance of `rocksdict.Rdict` for the given column family +#### RocksDBOptions.to\_options - +```python +def to_options() -> rocksdict.Options +``` -## quixstreams.state.rocksdb.metadata +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/options.py#L53) + +Convert parameters to `rocksdict.Options` + +**Returns**: + +instance of `rocksdict.Options` @@ -4730,7 +4982,7 @@ instance of `rocksdict.Rdict` for the given column family class RocksDBPartitionTransaction(PartitionTransaction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L61) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L61) A transaction class to perform simple key-value operations like "get", "set", "delete" and "exists" on a single RocksDB partition. @@ -4770,7 +5022,7 @@ def __init__(partition: "RocksDBStorePartition", changelog_producer: Optional[ChangelogProducer] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L100) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L100) **Arguments**: @@ -4791,7 +5043,7 @@ def get(key: Any, cf_name: str = "default") -> Optional[Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L124) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L124) Get a key from the store. @@ -4821,7 +5073,7 @@ value or `default` def set(key: Any, value: Any, prefix: bytes, cf_name: str = "default") ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L164) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L164) Set a key to the store. @@ -4843,7 +5095,7 @@ It first updates the key in the update cache. def delete(key: Any, prefix: bytes, cf_name: str = "default") ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L187) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L187) Delete a key from the store. @@ -4864,7 +5116,7 @@ It first deletes the key from the update cache. def exists(key: Any, prefix: bytes, cf_name: str = "default") -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L208) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L208) Check if a key exists in the store. @@ -4878,937 +5130,773 @@ It first looks up the key in the update cache. **Returns**: -`True` if the key exists, `False` otherwise. - - - -#### RocksDBPartitionTransaction.prepare - -```python -@_validate_transaction_status(PartitionTransactionStatus.STARTED) -def prepare(processed_offset: int) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L235) - -Produce changelog messages to the changelog topic for all changes accumulated - -in this transaction and prepare transaction to flush its state to the state -store. - -After successful `prepare()`, the transaction status is changed to PREPARED, -and it cannot receive updates anymore. - -If changelog is disabled for this application, no updates will be produced -to the changelog topic. - -**Arguments**: - -- `processed_offset`: the offset of the latest processed message - - - -#### RocksDBPartitionTransaction.flush - -```python -@_validate_transaction_status(PartitionTransactionStatus.STARTED, - PartitionTransactionStatus.PREPARED) -def flush(processed_offset: Optional[int] = None, - changelog_offset: Optional[int] = None) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L259) - -Flush the recent updates to the database. - -It writes the WriteBatch to RocksDB and marks itself as finished. - -If writing fails, the transaction is marked as failed and -cannot be used anymore. - ->***NOTE:*** If no keys have been modified during the transaction - (i.e. no "set" or "delete" have been called at least once), it will - not flush ANY data to the database including the offset to optimize - I/O. - -**Arguments**: - -- `processed_offset`: offset of the last processed message, optional. -- `changelog_offset`: offset of the last produced changelog message, -optional. - - - -#### RocksDBPartitionTransaction.completed - -```python -@property -def completed() -> bool -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L294) - -Check if the transaction is completed. - -It doesn't indicate whether transaction is successful or not. -Use `RocksDBTransaction.failed` for that. - -The completed transaction should not be re-used. - -**Returns**: - -`True` if transaction is completed, `False` otherwise. - - - -#### RocksDBPartitionTransaction.prepared - -```python -@property -def prepared() -> bool -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L308) - -Check if the transaction is in PREPARED status. - -Prepared transaction successfully flushed its changelog and cannot receive -updates anymore, but its state is not yet flushed to the disk - -**Returns**: - -`True` if transaction is prepared, `False` otherwise. - - - -#### RocksDBPartitionTransaction.failed - -```python -@property -def failed() -> bool -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L320) - -Check if the transaction has failed. - -The failed transaction should not be re-used because the update cache -and - -**Returns**: - -`True` if transaction is failed, `False` otherwise. - - - -#### RocksDBPartitionTransaction.changelog\_topic\_partition - -```python -@property -def changelog_topic_partition() -> Optional[Tuple[str, int]] -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L332) - -Return the changelog topic-partition for the StorePartition of this transaction. - -Returns `None` if changelog_producer is not provided. - -**Returns**: - -(topic, partition) or None - - - -#### RocksDBPartitionTransaction.as\_state - -```python -def as_state(prefix: Any = DEFAULT_PREFIX) -> TransactionState -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L346) - -Create a one-time use `TransactionState` object with a limited CRUD interface - -to be provided to `StreamingDataFrame` operations. - -The `TransactionState` will prefix all the keys with the supplied `prefix` -for all underlying operations. - -**Arguments**: - -- `prefix`: a prefix to be used for all keys - -**Returns**: - -an instance of `TransactionState` - - - -## quixstreams.state.rocksdb - - - -## quixstreams.state.rocksdb.types - - - -## quixstreams.state.rocksdb.exceptions - - - -## quixstreams.state.rocksdb.serialization - - - -## quixstreams.state.recovery - - - -### RecoveryPartition - -```python -class RecoveryPartition() -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L24) - -A changelog topic partition mapped to a respective `StorePartition` with helper -methods to determine its current recovery status. - -Since `StorePartition`s do recovery directly, it also handles recovery transactions. - - - -#### RecoveryPartition.offset - -```python -@property -def offset() -> int -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L63) - -Get the changelog offset from the underlying `StorePartition`. - -**Returns**: - -changelog offset (int) - - - -#### RecoveryPartition.needs\_recovery - -```python -@property -def needs_recovery() -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L72) - -Determine whether recovery is necessary for underlying `StorePartition`. +`True` if the key exists, `False` otherwise. - + -#### RecoveryPartition.needs\_offset\_update +#### RocksDBPartitionTransaction.prepare ```python -@property -def needs_offset_update() +@_validate_transaction_status(PartitionTransactionStatus.STARTED) +def prepare(processed_offset: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L81) - -Determine if an offset update is required. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L235) -Usually checked during assign if recovery was not required. +Produce changelog messages to the changelog topic for all changes accumulated - +in this transaction and prepare transaction to flush its state to the state +store. -#### RecoveryPartition.update\_offset +After successful `prepare()`, the transaction status is changed to PREPARED, +and it cannot receive updates anymore. -```python -def update_offset() -``` +If changelog is disabled for this application, no updates will be produced +to the changelog topic. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L89) +**Arguments**: -Update only the changelog offset of a StorePartition. +- `processed_offset`: the offset of the latest processed message - + -#### RecoveryPartition.recover\_from\_changelog\_message +#### RocksDBPartitionTransaction.flush ```python -def recover_from_changelog_message( - changelog_message: ConfluentKafkaMessageProto) +@_validate_transaction_status(PartitionTransactionStatus.STARTED, + PartitionTransactionStatus.PREPARED) +def flush(processed_offset: Optional[int] = None, + changelog_offset: Optional[int] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L109) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L259) -Recover the StorePartition using a message read from its respective changelog. +Flush the recent updates to the database. + +It writes the WriteBatch to RocksDB and marks itself as finished. + +If writing fails, the transaction is marked as failed and +cannot be used anymore. + +>***NOTE:*** If no keys have been modified during the transaction + (i.e. no "set" or "delete" have been called at least once), it will + not flush ANY data to the database including the offset to optimize + I/O. **Arguments**: -- `changelog_message`: A confluent kafka message (everything as bytes) +- `processed_offset`: offset of the last processed message, optional. +- `changelog_offset`: offset of the last produced changelog message, +optional. - + -#### RecoveryPartition.set\_watermarks +#### RocksDBPartitionTransaction.completed ```python -def set_watermarks(lowwater: int, highwater: int) +@property +def completed() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L121) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L294) -Set the changelog watermarks as gathered from Consumer.get_watermark_offsets() +Check if the transaction is completed. -**Arguments**: +It doesn't indicate whether transaction is successful or not. +Use `RocksDBTransaction.failed` for that. -- `lowwater`: topic partition lowwater -- `highwater`: topic partition highwater +The completed transaction should not be re-used. - +**Returns**: -### ChangelogProducerFactory +`True` if transaction is completed, `False` otherwise. + + + +#### RocksDBPartitionTransaction.prepared ```python -class ChangelogProducerFactory() +@property +def prepared() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L132) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L308) -Generates ChangelogProducers, which produce changelog messages to a StorePartition. +Check if the transaction is in PREPARED status. - +Prepared transaction successfully flushed its changelog and cannot receive +updates anymore, but its state is not yet flushed to the disk -#### ChangelogProducerFactory.\_\_init\_\_ +**Returns**: + +`True` if transaction is prepared, `False` otherwise. + + + +#### RocksDBPartitionTransaction.failed ```python -def __init__(changelog_name: str, producer: RowProducer) +@property +def failed() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L137) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L320) -**Arguments**: +Check if the transaction has failed. -- `changelog_name`: changelog topic name -- `producer`: a RowProducer (not shared with `Application` instance) +The failed transaction should not be re-used because the update cache +and **Returns**: -a ChangelogWriter instance +`True` if transaction is failed, `False` otherwise. - + -#### ChangelogProducerFactory.get\_partition\_producer +#### RocksDBPartitionTransaction.changelog\_topic\_partition ```python -def get_partition_producer(partition_num) -> "ChangelogProducer" +@property +def changelog_topic_partition() -> Optional[Tuple[str, int]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L147) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L332) -Generate a ChangelogProducer for producing to a specific partition number +Return the changelog topic-partition for the StorePartition of this transaction. -(and thus StorePartition). +Returns `None` if changelog_producer is not provided. -**Arguments**: +**Returns**: -- `partition_num`: source topic partition number +(topic, partition) or None - + -### ChangelogProducer +#### RocksDBPartitionTransaction.as\_state ```python -class ChangelogProducer() +def as_state(prefix: Any = DEFAULT_PREFIX) -> TransactionState ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L161) - -Generated for a `StorePartition` to produce state changes to its respective -kafka changelog partition. - - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L346) -#### ChangelogProducer.\_\_init\_\_ +Create a one-time use `TransactionState` object with a limited CRUD interface -```python -def __init__(changelog_name: str, partition: int, producer: RowProducer) -``` +to be provided to `StreamingDataFrame` operations. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L167) +The `TransactionState` will prefix all the keys with the supplied `prefix` +for all underlying operations. **Arguments**: -- `changelog_name`: A changelog topic name -- `partition`: source topic partition number -- `producer`: a RowProducer (not shared with `Application` instance) - - +- `prefix`: a prefix to be used for all keys -#### ChangelogProducer.produce +**Returns**: -```python -def produce(key: bytes, - value: Optional[bytes] = None, - headers: Optional[MessageHeadersMapping] = None) -``` +an instance of `TransactionState` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L190) + -Produce a message to a changelog topic partition. +## quixstreams.state.rocksdb.types -**Arguments**: + -- `key`: message key (same as state key, including prefixes) -- `value`: message value (same as state value) -- `headers`: message headers (includes column family info) +## quixstreams.state.rocksdb.partition - + -### RecoveryManager +### RocksDBStorePartition ```python -class RecoveryManager() +class RocksDBStorePartition(StorePartition) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L215) - -Manages all consumer-related aspects of recovery, including: - - assigning/revoking, pausing/resuming topic partitions (especially changelogs) - - consuming changelog messages until state is updated fully. - -Also tracks/manages `RecoveryPartitions`, which are assigned/tracked only if -recovery for that changelog partition is required. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L40) -Recovery is attempted from the `Application` after any new partition assignment. +A base class to access state in RocksDB. - +It represents a single RocksDB database. -#### RecoveryManager.partitions +Responsibilities: + 1. Managing access to the RocksDB instance + 2. Creating transactions to interact with data + 3. Flushing WriteBatches to the RocksDB -```python -@property -def partitions() -> Dict[int, Dict[str, RecoveryPartition]] -``` +It opens the RocksDB on `__init__`. If the db is locked by another process, +it will retry according to `open_max_retries` and `open_retry_backoff` options. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L234) +**Arguments**: -Returns a mapping of assigned RecoveryPartitions in the following format: -{: {: }} +- `path`: an absolute path to the RocksDB folder +- `options`: RocksDB options. If `None`, the default options will be used. - + -#### RecoveryManager.has\_assignments +#### RocksDBStorePartition.begin ```python -@property -def has_assignments() -> bool +def begin() -> RocksDBPartitionTransaction ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L242) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L75) -Whether the Application has assigned RecoveryPartitions +Create a new `RocksDBTransaction` object. + +Using `RocksDBTransaction` is a recommended way for accessing the data. **Returns**: -has assignments, as bool +an instance of `RocksDBTransaction` - + -#### RecoveryManager.recovering +#### RocksDBStorePartition.recover\_from\_changelog\_message ```python -@property -def recovering() -> bool +def recover_from_changelog_message( + changelog_message: ConfluentKafkaMessageProto, committed_offset: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L251) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L128) -Whether the Application is currently recovering +Updates state from a given changelog message. -**Returns**: +The actual update may be skipped when both conditions are met: -is recovering, as bool +- The changelog message has headers with the processed message offset. +- This processed offset is larger than the latest committed offset for the same + topic partition. - +This way the state does not apply the state changes for not-yet-committed +messages and improves the state consistency guarantees. -#### RecoveryManager.register\_changelog +**Arguments**: + +- `changelog_message`: A raw Confluent message read from a changelog topic. +- `committed_offset`: latest committed offset for the partition + + + +#### RocksDBStorePartition.set\_changelog\_offset ```python -def register_changelog(topic_name: str, store_name: str) -> Topic +def set_changelog_offset(changelog_offset: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L259) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L169) -Register a changelog Topic with the TopicManager. +Set the changelog offset based on a message (usually an "offset-only" message). + +Used during recovery. **Arguments**: -- `topic_name`: source topic name -- `store_name`: name of the store +- `changelog_offset`: A changelog offset - + -#### RecoveryManager.do\_recovery +#### RocksDBStorePartition.write ```python -def do_recovery() +def write(batch: WriteBatch) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L271) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L179) -If there are any active RecoveryPartitions, do a recovery procedure. +Write `WriteBatch` to RocksDB -After, will resume normal `Application` processing. +**Arguments**: - +- `batch`: an instance of `rocksdict.WriteBatch` + + -#### RecoveryManager.assign\_partition +#### RocksDBStorePartition.get ```python -def assign_partition(topic: str, partition: int, committed_offset: int, - store_partitions: Dict[str, StorePartition]) +def get(key: bytes, + default: Any = None, + cf_name: str = "default") -> Union[None, bytes, Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L324) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L186) -Assigns `StorePartition`s (as `RecoveryPartition`s) ONLY IF recovery required. +Get a key from RocksDB. -Pauses active consumer partitions as needed. +**Arguments**: - +- `key`: a key encoded to `bytes` +- `default`: a default value to return if the key is not found. +- `cf_name`: rocksdb column family name. Default - "default" -#### RecoveryManager.revoke\_partition +**Returns**: + +a value if the key is present in the DB. Otherwise, `default` + + + +#### RocksDBStorePartition.exists ```python -def revoke_partition(partition_num: int) +def exists(key: bytes, cf_name: str = "default") -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L391) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L200) -revoke ALL StorePartitions (across all Stores) for a given partition number +Check if a key is present in the DB. **Arguments**: -- `partition_num`: partition number of source topic - - - -## quixstreams.state +- `key`: a key encoded to `bytes`. +- `cf_name`: rocksdb column family name. Default - "default" - +**Returns**: -## quixstreams.state.types +`True` if the key is present, `False` otherwise. - + -### Store +#### RocksDBStorePartition.get\_processed\_offset ```python -class Store(Protocol) +def get_processed_offset() -> Optional[int] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L11) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L211) -Abstract state store. +Get last processed offset for the given partition -It keeps track of individual store partitions and provides access to the -partitions' transactions. +**Returns**: - +offset or `None` if there's no processed offset yet -#### Store.topic + + +#### RocksDBStorePartition.get\_changelog\_offset ```python -@property -def topic() -> str +def get_changelog_offset() -> Optional[int] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L22) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L223) -Topic name +Get offset that the changelog is up-to-date with. - +**Returns**: -#### Store.name +offset or `None` if there's no processed offset yet + + + +#### RocksDBStorePartition.close ```python -@property -def name() -> str +def close() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L29) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L233) -Store name +Close the underlying RocksDB - + -#### Store.partitions +#### RocksDBStorePartition.path ```python @property -def partitions() -> Dict[int, "StorePartition"] +def path() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L36) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L246) -Mapping of assigned store partitions +Absolute path to RocksDB database folder **Returns**: -dict of "{partition: }" +file path - + -#### Store.assign\_partition +#### RocksDBStorePartition.destroy ```python -def assign_partition(partition: int) -> "StorePartition" +@classmethod +def destroy(cls, path: str) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L43) - -Assign new store partition +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L254) -**Arguments**: +Delete underlying RocksDB database -- `partition`: partition number +The database must be closed first. -**Returns**: +**Arguments**: -instance of `StorePartition` +- `path`: an absolute path to the RocksDB folder - + -#### Store.revoke\_partition +#### RocksDBStorePartition.get\_column\_family\_handle ```python -def revoke_partition(partition: int) +def get_column_family_handle(cf_name: str) -> ColumnFamily ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L52) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L264) -Revoke assigned store partition +Get a column family handle to pass to it WriteBatch. + +This method will cache the CF handle instance to avoid creating them +repeatedly. **Arguments**: -- `partition`: partition number +- `cf_name`: column family name - +**Returns**: -#### Store.start\_partition\_transaction +instance of `rocksdict.ColumnFamily` + + + +#### RocksDBStorePartition.get\_column\_family ```python -def start_partition_transaction(partition: int) -> "PartitionTransaction" +def get_column_family(cf_name: str) -> Rdict ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L60) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L285) -Start a new partition transaction. +Get a column family instance. -`PartitionTransaction` is the primary interface for working with data in Stores. +This method will cache the CF instance to avoid creating them repeatedly. **Arguments**: -- `partition`: partition number +- `cf_name`: column family name **Returns**: -instance of `PartitionTransaction` +instance of `rocksdict.Rdict` for the given column family - + -#### Store.close +## quixstreams.state + + + +## quixstreams.state.manager + + + +### StateStoreManager ```python -def close() +class StateStoreManager() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L69) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L24) -Close store and revoke all store partitions +Class for managing state stores and partitions. - +StateStoreManager is responsible for: + - reacting to rebalance callbacks + - managing the individual state stores + - providing access to store transactions -### StorePartition + + +#### StateStoreManager.stores ```python -class StorePartition(Protocol) +@property +def stores() -> Dict[str, Dict[str, Store]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L80) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L62) -A base class to access state in the underlying storage. -It represents a single instance of some storage (e.g. a single database for -the persistent storage). +Map of registered state stores - +**Returns**: -#### StorePartition.path +dict in format {topic: {store_name: store}} + + + +#### StateStoreManager.recovery\_required ```python @property -def path() -> str +def recovery_required() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L89) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L70) -Absolute path to RocksDB database folder +Whether recovery needs to be done. - + -#### StorePartition.begin +#### StateStoreManager.using\_changelogs ```python -def begin() -> "PartitionTransaction" +@property +def using_changelogs() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L95) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L79) -State new `PartitionTransaction` +Whether the StateStoreManager is using changelog topics - +**Returns**: -#### StorePartition.recover\_from\_changelog\_message +using changelogs, as bool + + + +#### StateStoreManager.do\_recovery ```python -def recover_from_changelog_message( - changelog_message: ConfluentKafkaMessageProto, committed_offset: int) +def do_recovery() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L100) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L87) -Updates state from a given changelog message. +Perform a state recovery, if necessary. -**Arguments**: + -- `changelog_message`: A raw Confluent message read from a changelog topic. -- `committed_offset`: latest committed offset for the partition +#### StateStoreManager.stop\_recovery - +```python +def stop_recovery() +``` -#### StorePartition.get\_processed\_offset +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L93) + +Stop recovery (called during app shutdown). + + + +#### StateStoreManager.get\_store ```python -def get_processed_offset() -> Optional[int] +def get_store(topic: str, store_name: str = DEFAULT_STATE_STORE_NAME) -> Store ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L111) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L99) -Get last processed offset for the given partition +Get a store for given name and topic + +**Arguments**: + +- `topic`: topic name +- `store_name`: store name **Returns**: -offset or `None` if there's no processed offset yet +instance of `Store` - + -#### StorePartition.get\_changelog\_offset +#### StateStoreManager.register\_store ```python -def get_changelog_offset() -> Optional[int] +def register_store(topic_name: str, + store_name: str = DEFAULT_STATE_STORE_NAME) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L118) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L132) -Get offset that the changelog is up-to-date with. +Register a state store to be managed by StateStoreManager. -**Returns**: +During processing, the StateStoreManager will react to rebalancing callbacks +and assign/revoke the partitions for registered stores. -offset or `None` if there's no processed offset yet +Each store can be registered only once for each topic. - +**Arguments**: -#### StorePartition.set\_changelog\_offset +- `topic_name`: topic name +- `store_name`: store name + + + +#### StateStoreManager.register\_windowed\_store ```python -def set_changelog_offset(changelog_offset: int) +def register_windowed_store(topic_name: str, store_name: str) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L125) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L157) -Set the changelog offset based on a message (usually an "offset-only" message). +Register a windowed state store to be managed by StateStoreManager. -Used during recovery. +During processing, the StateStoreManager will react to rebalancing callbacks +and assign/revoke the partitions for registered stores. + +Each window store can be registered only once for each topic. **Arguments**: -- `changelog_offset`: A changelog offset +- `topic_name`: topic name +- `store_name`: store name - + -### State +#### StateStoreManager.clear\_stores ```python -class State(Protocol) +def clear_stores() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L136) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L180) -Primary interface for working with key-value state data from `StreamingDataFrame` +Delete all state stores managed by StateStoreManager. - + -#### State.get +#### StateStoreManager.on\_partition\_assign ```python -def get(key: Any, default: Any = None) -> Optional[Any] +def on_partition_assign(topic: str, partition: int, + committed_offset: int) -> List[StorePartition] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L141) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L195) -Get the value for key if key is present in the state, else default +Assign store partitions for each registered store for the given `TopicPartition` + +and return a list of assigned `StorePartition` objects. **Arguments**: -- `key`: key -- `default`: default value to return if the key is not found +- `topic`: Kafka topic name +- `partition`: Kafka topic partition +- `committed_offset`: latest committed offset for the partition **Returns**: -value or None if the key is not found and `default` is not provided +list of assigned `StorePartition` - + -#### State.set +#### StateStoreManager.on\_partition\_revoke ```python -def set(key: Any, value: Any) +def on_partition_revoke(topic: str, partition: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L151) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L221) -Set value for the key. +Revoke store partitions for each registered store for the given `TopicPartition` **Arguments**: -- `key`: key -- `value`: value +- `topic`: Kafka topic name +- `partition`: Kafka topic partition - + -#### State.delete +#### StateStoreManager.init ```python -def delete(key: Any) +def init() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L159) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L234) -Delete value for the key. - -This function always returns `None`, even if value is not found. - -**Arguments**: +Initialize `StateStoreManager` and create a store directory -- `key`: key - + -#### State.exists +#### StateStoreManager.close ```python -def exists(key: Any) -> bool +def close() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L168) - -Check if the key exists in state. - -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L241) -- `key`: key +Close all registered stores -**Returns**: + -True if key exists, False otherwise +## quixstreams.state.state - + -### PartitionTransaction +### TransactionState ```python -class PartitionTransaction(Protocol) +class TransactionState(State) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L177) - -A transaction class to perform simple key-value operations like -"get", "set", "delete" and "exists" on a single storage partition. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L6) - + -#### PartitionTransaction.as\_state +#### TransactionState.\_\_init\_\_ ```python -def as_state(prefix: Any) -> State +def __init__(prefix: bytes, transaction: PartitionTransaction) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L183) - -Create an instance implementing the `State` protocol to be provided +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L12) -to `StreamingDataFrame` functions. -All operations called on this State object will be prefixed with -the supplied `prefix`. +Simple key-value state to be provided into `StreamingDataFrame` functions -**Returns**: +**Arguments**: -an instance implementing the `State` protocol +- `transaction`: instance of `PartitionTransaction` - + -#### PartitionTransaction.get +#### TransactionState.get ```python -def get(key: Any, prefix: bytes, default: Any = None) -> Optional[Any] +def get(key: Any, default: Any = None) -> Optional[Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L194) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L21) Get the value for key if key is present in the state, else default **Arguments**: - `key`: key -- `prefix`: a key prefix - `default`: default value to return if the key is not found **Returns**: value or None if the key is not found and `default` is not provided - + -#### PartitionTransaction.set +#### TransactionState.set ```python -def set(key: Any, prefix: bytes, value: Any) +def set(key: Any, value: Any) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L205) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L31) Set value for the key. **Arguments**: - `key`: key -- `prefix`: a key prefix - `value`: value - + -#### PartitionTransaction.delete +#### TransactionState.delete ```python -def delete(key: Any, prefix: bytes) +def delete(key: Any) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L214) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L39) Delete value for the key. @@ -5817,1749 +5905,1732 @@ This function always returns `None`, even if value is not found. **Arguments**: - `key`: key -- `prefix`: a key prefix - + -#### PartitionTransaction.exists +#### TransactionState.exists ```python -def exists(key: Any, prefix: bytes) -> bool +def exists(key: Any) -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L224) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L48) Check if the key exists in state. **Arguments**: - `key`: key -- `prefix`: a key prefix **Returns**: True if key exists, False otherwise - - -#### PartitionTransaction.failed - -```python -@property -def failed() -> bool -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L234) - -Return `True` if transaction failed to update data at some point. - -Failed transactions cannot be re-used. - -**Returns**: + -bool +## quixstreams.state.types - + -#### PartitionTransaction.completed +### Store ```python -@property -def completed() -> bool +class Store(Protocol) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L244) - -Return `True` if transaction is successfully completed. - -Completed transactions cannot be re-used. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L11) -**Returns**: +Abstract state store. -bool +It keeps track of individual store partitions and provides access to the +partitions' transactions. - + -#### PartitionTransaction.prepared +#### Store.topic ```python @property -def prepared() -> bool +def topic() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L254) - -Return `True` if transaction is prepared completed. - -Prepared transactions cannot receive new updates, but can be flushed. - -**Returns**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L22) -bool +Topic name - + -#### PartitionTransaction.prepare +#### Store.name ```python -def prepare(processed_offset: int) +@property +def name() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L263) - -Produce changelog messages to the changelog topic for all changes accumulated - -in this transaction and prepare transcation to flush its state to the state -store. - -After successful `prepare()`, the transaction status is changed to PREPARED, -and it cannot receive updates anymore. - -If changelog is disabled for this application, no updates will be produced -to the changelog topic. - -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L29) -- `processed_offset`: the offset of the latest processed message +Store name - + -#### PartitionTransaction.changelog\_topic\_partition +#### Store.partitions ```python @property -def changelog_topic_partition() -> Optional[Tuple[str, int]] +def partitions() -> Dict[int, "StorePartition"] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L279) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L36) -Return the changelog topic-partition for the StorePartition of this transaction. - -Returns `None` if changelog_producer is not provided. +Mapping of assigned store partitions **Returns**: -(topic, partition) or None - - - -#### PartitionTransaction.flush - -```python -def flush(processed_offset: Optional[int] = None, - changelog_offset: Optional[int] = None) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L288) - -Flush the recent updates to the storage. - -**Arguments**: - -- `processed_offset`: offset of the last processed message, optional. -- `changelog_offset`: offset of the last produced changelog message, -optional. - - - -### WindowedState - -```python -class WindowedState(Protocol) -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L306) - -A windowed state to be provided into `StreamingDataFrame` window functions. +dict of "{partition: }" - + -#### WindowedState.get\_window +#### Store.assign\_partition ```python -def get_window(start_ms: int, - end_ms: int, - default: Any = None) -> Optional[Any] +def assign_partition(partition: int) -> "StorePartition" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L311) - -Get the value of the window defined by `start` and `end` timestamps +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L43) -if the window is present in the state, else default +Assign new store partition **Arguments**: -- `start_ms`: start of the window in milliseconds -- `end_ms`: end of the window in milliseconds -- `default`: default value to return if the key is not found +- `partition`: partition number **Returns**: -value or None if the key is not found and `default` is not provided +instance of `StorePartition` - + -#### WindowedState.update\_window +#### Store.revoke\_partition ```python -def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int) +def revoke_partition(partition: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L325) - -Set a value for the window. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L52) -This method will also update the latest observed timestamp in state partition -using the provided `timestamp`. +Revoke assigned store partition **Arguments**: -- `start_ms`: start of the window in milliseconds -- `end_ms`: end of the window in milliseconds -- `value`: value of the window -- `timestamp_ms`: current message timestamp in milliseconds +- `partition`: partition number - + -#### WindowedState.get\_latest\_timestamp +#### Store.start\_partition\_transaction ```python -def get_latest_timestamp() -> int +def start_partition_transaction(partition: int) -> "PartitionTransaction" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L339) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L60) -Get the latest observed timestamp for the current state partition. +Start a new partition transaction. -Use this timestamp to determine if the arriving event is late and should be -discarded from the processing. +`PartitionTransaction` is the primary interface for working with data in Stores. + +**Arguments**: + +- `partition`: partition number **Returns**: -latest observed event timestamp in milliseconds +instance of `PartitionTransaction` - + -#### WindowedState.expire\_windows +#### Store.close ```python -def expire_windows(duration_ms: int, - grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]] +def close() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L350) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L69) -Get a list of expired windows from RocksDB considering the current +Close store and revoke all store partitions -latest timestamp, window duration and grace period. + -It also marks the latest found window as expired in the expiration index, so -calling this method multiple times will yield different results for the same -"latest timestamp". +### StorePartition -**Arguments**: +```python +class StorePartition(Protocol) +``` -- `duration_ms`: duration of the windows in milliseconds -- `grace_ms`: grace period in milliseconds. Default - "0" +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L80) - +A base class to access state in the underlying storage. +It represents a single instance of some storage (e.g. a single database for +the persistent storage). -### WindowedPartitionTransaction + + +#### StorePartition.path ```python -class WindowedPartitionTransaction(Protocol) +@property +def path() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L367) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L89) - +Absolute path to RocksDB database folder -#### WindowedPartitionTransaction.failed + + +#### StorePartition.begin ```python -@property -def failed() -> bool +def begin() -> "PartitionTransaction" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L370) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L95) -Return `True` if transaction failed to update data at some point. +State new `PartitionTransaction` -Failed transactions cannot be re-used. + -**Returns**: +#### StorePartition.recover\_from\_changelog\_message -bool +```python +def recover_from_changelog_message( + changelog_message: ConfluentKafkaMessageProto, committed_offset: int) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L100) -#### WindowedPartitionTransaction.completed +Updates state from a given changelog message. + +**Arguments**: + +- `changelog_message`: A raw Confluent message read from a changelog topic. +- `committed_offset`: latest committed offset for the partition + + + +#### StorePartition.get\_processed\_offset ```python -@property -def completed() -> bool +def get_processed_offset() -> Optional[int] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L380) - -Return `True` if transaction is successfully completed. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L111) -Completed transactions cannot be re-used. +Get last processed offset for the given partition **Returns**: -bool +offset or `None` if there's no processed offset yet - + -#### WindowedPartitionTransaction.prepared +#### StorePartition.get\_changelog\_offset ```python -@property -def prepared() -> bool +def get_changelog_offset() -> Optional[int] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L390) - -Return `True` if transaction is prepared completed. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L118) -Prepared transactions cannot receive new updates, but can be flushed. +Get offset that the changelog is up-to-date with. **Returns**: -bool +offset or `None` if there's no processed offset yet - + -#### WindowedPartitionTransaction.prepare +#### StorePartition.set\_changelog\_offset ```python -def prepare(processed_offset: int) +def set_changelog_offset(changelog_offset: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L399) - -Produce changelog messages to the changelog topic for all changes accumulated - -in this transaction and prepare transcation to flush its state to the state -store. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L125) -After successful `prepare()`, the transaction status is changed to PREPARED, -and it cannot receive updates anymore. +Set the changelog offset based on a message (usually an "offset-only" message). -If changelog is disabled for this application, no updates will be produced -to the changelog topic. +Used during recovery. **Arguments**: -- `processed_offset`: the offset of the latest processed message +- `changelog_offset`: A changelog offset - + -#### WindowedPartitionTransaction.get\_window +### State ```python -def get_window(start_ms: int, - end_ms: int, - prefix: bytes, - default: Any = None) -> Optional[Any] +class State(Protocol) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L416) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L136) -Get the value of the window defined by `start` and `end` timestamps +Primary interface for working with key-value state data from `StreamingDataFrame` -if the window is present in the state, else default + + +#### State.get + +```python +def get(key: Any, default: Any = None) -> Optional[Any] +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L141) + +Get the value for key if key is present in the state, else default **Arguments**: -- `start_ms`: start of the window in milliseconds -- `end_ms`: end of the window in milliseconds -- `prefix`: a key prefix +- `key`: key - `default`: default value to return if the key is not found **Returns**: value or None if the key is not found and `default` is not provided - + -#### WindowedPartitionTransaction.update\_window +#### State.set ```python -def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int, - prefix: bytes) +def set(key: Any, value: Any) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L435) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L151) -Set a value for the window. - -This method will also update the latest observed timestamp in state partition -using the provided `timestamp`. +Set value for the key. **Arguments**: -- `start_ms`: start of the window in milliseconds -- `end_ms`: end of the window in milliseconds -- `value`: value of the window -- `timestamp_ms`: current message timestamp in milliseconds -- `prefix`: a key prefix +- `key`: key +- `value`: value - + -#### WindowedPartitionTransaction.get\_latest\_timestamp +#### State.delete ```python -def get_latest_timestamp() -> int +def delete(key: Any) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L452) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L159) -Get the latest observed timestamp for the current state partition. +Delete value for the key. -Use this timestamp to determine if the arriving event is late and should be -discarded from the processing. +This function always returns `None`, even if value is not found. -**Returns**: +**Arguments**: -latest observed event timestamp in milliseconds +- `key`: key - + -#### WindowedPartitionTransaction.expire\_windows +#### State.exists ```python -def expire_windows(duration_ms: int, prefix: bytes, grace_ms: int = 0) +def exists(key: Any) -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L463) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L168) -Get a list of expired windows from RocksDB considering the current +Check if the key exists in state. -latest timestamp, window duration and grace period. +**Arguments**: -It also marks the latest found window as expired in the expiration index, so -calling this method multiple times will yield different results for the same -"latest timestamp". +- `key`: key -**Arguments**: +**Returns**: -- `duration_ms`: duration of the windows in milliseconds -- `prefix`: a key prefix -- `grace_ms`: grace period in milliseconds. Default - "0" +True if key exists, False otherwise - + -#### WindowedPartitionTransaction.flush +### PartitionTransaction ```python -def flush(processed_offset: Optional[int] = None, - changelog_offset: Optional[int] = None) +class PartitionTransaction(Protocol) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L478) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L177) -Flush the recent updates to the storage. +A transaction class to perform simple key-value operations like +"get", "set", "delete" and "exists" on a single storage partition. -**Arguments**: + -- `processed_offset`: offset of the last processed message, optional. -- `changelog_offset`: offset of the last produced changelog message, -optional. +#### PartitionTransaction.as\_state - +```python +def as_state(prefix: Any) -> State +``` -#### WindowedPartitionTransaction.changelog\_topic\_partition +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L183) + +Create an instance implementing the `State` protocol to be provided + +to `StreamingDataFrame` functions. +All operations called on this State object will be prefixed with +the supplied `prefix`. + +**Returns**: + +an instance implementing the `State` protocol + + + +#### PartitionTransaction.get ```python -@property -def changelog_topic_partition() -> Optional[Tuple[str, int]] +def get(key: Any, prefix: bytes, default: Any = None) -> Optional[Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L492) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L194) -Return the changelog topic-partition for the StorePartition of this transaction. +Get the value for key if key is present in the state, else default + +**Arguments**: -Returns `None` if changelog_producer is not provided. +- `key`: key +- `prefix`: a key prefix +- `default`: default value to return if the key is not found **Returns**: -(topic, partition) or None +value or None if the key is not found and `default` is not provided - + -### PartitionRecoveryTransaction +#### PartitionTransaction.set ```python -class PartitionRecoveryTransaction(Protocol) +def set(key: Any, prefix: bytes, value: Any) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L506) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L205) -A class for managing recovery for a StorePartition from a changelog message +Set value for the key. - +**Arguments**: -#### PartitionRecoveryTransaction.flush +- `key`: key +- `prefix`: a key prefix +- `value`: value + + + +#### PartitionTransaction.delete ```python -def flush() +def delete(key: Any, prefix: bytes) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L513) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L214) -Flush the recovery update to the storage. +Delete value for the key. - +This function always returns `None`, even if value is not found. -### PartitionTransactionStatus +**Arguments**: -```python -class PartitionTransactionStatus(enum.Enum) -``` +- `key`: key +- `prefix`: a key prefix -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L520) + - +#### PartitionTransaction.exists -#### STARTED +```python +def exists(key: Any, prefix: bytes) -> bool +``` -Transaction is started and accepts updates +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L224) - +Check if the key exists in state. -#### PREPARED +**Arguments**: -Transaction is prepared, it can no longer receive updates +- `key`: key +- `prefix`: a key prefix - +**Returns**: -#### COMPLETE +True if key exists, False otherwise -Transaction is fully completed, it cannot be used anymore + - +#### PartitionTransaction.failed -#### FAILED +```python +@property +def failed() -> bool +``` -Transaction is failed, it cannot be used anymore +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L234) - +Return `True` if transaction failed to update data at some point. -## quixstreams.state.exceptions +Failed transactions cannot be re-used. - +**Returns**: -## quixstreams.state.manager +bool - + -### StateStoreManager +#### PartitionTransaction.completed ```python -class StateStoreManager() +@property +def completed() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L24) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L244) -Class for managing state stores and partitions. +Return `True` if transaction is successfully completed. -StateStoreManager is responsible for: - - reacting to rebalance callbacks - - managing the individual state stores - - providing access to store transactions +Completed transactions cannot be re-used. - +**Returns**: -#### StateStoreManager.stores +bool + + + +#### PartitionTransaction.prepared ```python @property -def stores() -> Dict[str, Dict[str, Store]] +def prepared() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L62) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L254) -Map of registered state stores +Return `True` if transaction is prepared completed. + +Prepared transactions cannot receive new updates, but can be flushed. **Returns**: -dict in format {topic: {store_name: store}} +bool - + -#### StateStoreManager.recovery\_required +#### PartitionTransaction.prepare ```python -@property -def recovery_required() -> bool +def prepare(processed_offset: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L70) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L263) -Whether recovery needs to be done. +Produce changelog messages to the changelog topic for all changes accumulated - +in this transaction and prepare transcation to flush its state to the state +store. -#### StateStoreManager.using\_changelogs +After successful `prepare()`, the transaction status is changed to PREPARED, +and it cannot receive updates anymore. + +If changelog is disabled for this application, no updates will be produced +to the changelog topic. + +**Arguments**: + +- `processed_offset`: the offset of the latest processed message + + + +#### PartitionTransaction.changelog\_topic\_partition ```python @property -def using_changelogs() -> bool +def changelog_topic_partition() -> Optional[Tuple[str, int]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L79) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L279) -Whether the StateStoreManager is using changelog topics +Return the changelog topic-partition for the StorePartition of this transaction. + +Returns `None` if changelog_producer is not provided. **Returns**: -using changelogs, as bool +(topic, partition) or None - + -#### StateStoreManager.do\_recovery +#### PartitionTransaction.flush ```python -def do_recovery() +def flush(processed_offset: Optional[int] = None, + changelog_offset: Optional[int] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L87) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L288) -Perform a state recovery, if necessary. +Flush the recent updates to the storage. - +**Arguments**: -#### StateStoreManager.stop\_recovery +- `processed_offset`: offset of the last processed message, optional. +- `changelog_offset`: offset of the last produced changelog message, +optional. + + + +### WindowedState ```python -def stop_recovery() +class WindowedState(Protocol) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L93) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L306) -Stop recovery (called during app shutdown). +A windowed state to be provided into `StreamingDataFrame` window functions. - + -#### StateStoreManager.get\_store +#### WindowedState.get\_window ```python -def get_store(topic: str, store_name: str = DEFAULT_STATE_STORE_NAME) -> Store +def get_window(start_ms: int, + end_ms: int, + default: Any = None) -> Optional[Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L99) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L311) -Get a store for given name and topic +Get the value of the window defined by `start` and `end` timestamps + +if the window is present in the state, else default **Arguments**: -- `topic`: topic name -- `store_name`: store name +- `start_ms`: start of the window in milliseconds +- `end_ms`: end of the window in milliseconds +- `default`: default value to return if the key is not found **Returns**: -instance of `Store` +value or None if the key is not found and `default` is not provided - + -#### StateStoreManager.register\_store +#### WindowedState.update\_window ```python -def register_store(topic_name: str, - store_name: str = DEFAULT_STATE_STORE_NAME) +def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L132) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L325) -Register a state store to be managed by StateStoreManager. - -During processing, the StateStoreManager will react to rebalancing callbacks -and assign/revoke the partitions for registered stores. +Set a value for the window. -Each store can be registered only once for each topic. +This method will also update the latest observed timestamp in state partition +using the provided `timestamp`. **Arguments**: -- `topic_name`: topic name -- `store_name`: store name +- `start_ms`: start of the window in milliseconds +- `end_ms`: end of the window in milliseconds +- `value`: value of the window +- `timestamp_ms`: current message timestamp in milliseconds - + -#### StateStoreManager.register\_windowed\_store +#### WindowedState.get\_latest\_timestamp ```python -def register_windowed_store(topic_name: str, store_name: str) +def get_latest_timestamp() -> int ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L157) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L339) -Register a windowed state store to be managed by StateStoreManager. - -During processing, the StateStoreManager will react to rebalancing callbacks -and assign/revoke the partitions for registered stores. +Get the latest observed timestamp for the current state partition. -Each window store can be registered only once for each topic. +Use this timestamp to determine if the arriving event is late and should be +discarded from the processing. -**Arguments**: +**Returns**: -- `topic_name`: topic name -- `store_name`: store name +latest observed event timestamp in milliseconds - + -#### StateStoreManager.clear\_stores +#### WindowedState.expire\_windows ```python -def clear_stores() +def expire_windows(duration_ms: int, + grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L180) - -Delete all state stores managed by StateStoreManager. - - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L350) -#### StateStoreManager.on\_partition\_assign +Get a list of expired windows from RocksDB considering the current -```python -def on_partition_assign(topic: str, partition: int, - committed_offset: int) -> List[StorePartition] -``` +latest timestamp, window duration and grace period. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L195) +It also marks the latest found window as expired in the expiration index, so +calling this method multiple times will yield different results for the same +"latest timestamp". -Assign store partitions for each registered store for the given `TopicPartition` +**Arguments**: -and return a list of assigned `StorePartition` objects. +- `duration_ms`: duration of the windows in milliseconds +- `grace_ms`: grace period in milliseconds. Default - "0" -**Arguments**: + -- `topic`: Kafka topic name -- `partition`: Kafka topic partition -- `committed_offset`: latest committed offset for the partition +### WindowedPartitionTransaction -**Returns**: +```python +class WindowedPartitionTransaction(Protocol) +``` -list of assigned `StorePartition` +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L367) - + -#### StateStoreManager.on\_partition\_revoke +#### WindowedPartitionTransaction.failed ```python -def on_partition_revoke(topic: str, partition: int) +@property +def failed() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L221) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L370) -Revoke store partitions for each registered store for the given `TopicPartition` +Return `True` if transaction failed to update data at some point. -**Arguments**: +Failed transactions cannot be re-used. -- `topic`: Kafka topic name -- `partition`: Kafka topic partition +**Returns**: - +bool -#### StateStoreManager.init + + +#### WindowedPartitionTransaction.completed ```python -def init() +@property +def completed() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L234) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L380) -Initialize `StateStoreManager` and create a store directory +Return `True` if transaction is successfully completed. +Completed transactions cannot be re-used. - +**Returns**: -#### StateStoreManager.close +bool + + + +#### WindowedPartitionTransaction.prepared ```python -def close() +@property +def prepared() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L241) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L390) -Close all registered stores +Return `True` if transaction is prepared completed. - +Prepared transactions cannot receive new updates, but can be flushed. -## quixstreams.state.state +**Returns**: - +bool -### TransactionState + + +#### WindowedPartitionTransaction.prepare ```python -class TransactionState(State) +def prepare(processed_offset: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L6) - - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L399) -#### TransactionState.\_\_init\_\_ +Produce changelog messages to the changelog topic for all changes accumulated -```python -def __init__(prefix: bytes, transaction: PartitionTransaction) -``` +in this transaction and prepare transcation to flush its state to the state +store. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L12) +After successful `prepare()`, the transaction status is changed to PREPARED, +and it cannot receive updates anymore. -Simple key-value state to be provided into `StreamingDataFrame` functions +If changelog is disabled for this application, no updates will be produced +to the changelog topic. **Arguments**: -- `transaction`: instance of `PartitionTransaction` +- `processed_offset`: the offset of the latest processed message - + -#### TransactionState.get +#### WindowedPartitionTransaction.get\_window ```python -def get(key: Any, default: Any = None) -> Optional[Any] +def get_window(start_ms: int, + end_ms: int, + prefix: bytes, + default: Any = None) -> Optional[Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L21) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L416) -Get the value for key if key is present in the state, else default +Get the value of the window defined by `start` and `end` timestamps + +if the window is present in the state, else default **Arguments**: -- `key`: key +- `start_ms`: start of the window in milliseconds +- `end_ms`: end of the window in milliseconds +- `prefix`: a key prefix - `default`: default value to return if the key is not found **Returns**: value or None if the key is not found and `default` is not provided - + -#### TransactionState.set +#### WindowedPartitionTransaction.update\_window ```python -def set(key: Any, value: Any) +def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int, + prefix: bytes) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L31) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L435) -Set value for the key. +Set a value for the window. + +This method will also update the latest observed timestamp in state partition +using the provided `timestamp`. **Arguments**: -- `key`: key -- `value`: value +- `start_ms`: start of the window in milliseconds +- `end_ms`: end of the window in milliseconds +- `value`: value of the window +- `timestamp_ms`: current message timestamp in milliseconds +- `prefix`: a key prefix - + -#### TransactionState.delete +#### WindowedPartitionTransaction.get\_latest\_timestamp ```python -def delete(key: Any) +def get_latest_timestamp() -> int ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L39) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L452) -Delete value for the key. +Get the latest observed timestamp for the current state partition. -This function always returns `None`, even if value is not found. +Use this timestamp to determine if the arriving event is late and should be +discarded from the processing. -**Arguments**: +**Returns**: -- `key`: key +latest observed event timestamp in milliseconds - + -#### TransactionState.exists +#### WindowedPartitionTransaction.expire\_windows ```python -def exists(key: Any) -> bool +def expire_windows(duration_ms: int, prefix: bytes, grace_ms: int = 0) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L48) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L463) -Check if the key exists in state. +Get a list of expired windows from RocksDB considering the current + +latest timestamp, window duration and grace period. + +It also marks the latest found window as expired in the expiration index, so +calling this method multiple times will yield different results for the same +"latest timestamp". **Arguments**: -- `key`: key +- `duration_ms`: duration of the windows in milliseconds +- `prefix`: a key prefix +- `grace_ms`: grace period in milliseconds. Default - "0" -**Returns**: + -True if key exists, False otherwise +#### WindowedPartitionTransaction.flush - +```python +def flush(processed_offset: Optional[int] = None, + changelog_offset: Optional[int] = None) +``` -## quixstreams.exceptions +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L478) - +Flush the recent updates to the storage. -## quixstreams.exceptions.assignment +**Arguments**: - +- `processed_offset`: offset of the last processed message, optional. +- `changelog_offset`: offset of the last produced changelog message, +optional. -### PartitionAssignmentError + + +#### WindowedPartitionTransaction.changelog\_topic\_partition ```python -class PartitionAssignmentError(QuixException) +@property +def changelog_topic_partition() -> Optional[Tuple[str, int]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/exceptions/assignment.py#L6) - -Error happened during partition rebalancing. -Raised from `on_assign`, `on_revoke` and `on_lost` callbacks +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L492) - +Return the changelog topic-partition for the StorePartition of this transaction. -## quixstreams.exceptions.base +Returns `None` if changelog_producer is not provided. - +**Returns**: -## quixstreams.context +(topic, partition) or None - + -#### set\_message\_context +### PartitionRecoveryTransaction ```python -def set_message_context(context: Optional[MessageContext]) +class PartitionRecoveryTransaction(Protocol) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/context.py#L20) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L506) -Set a MessageContext for the current message in the given `contextvars.Context` - ->***NOTE:*** This is for advanced usage only. If you need to change the message key, -`StreamingDataFrame.to_topic()` has an argument for it. +A class for managing recovery for a StorePartition from a changelog message + -Example Snippet: +#### PartitionRecoveryTransaction.flush ```python -from quixstreams import Application, set_message_context, message_context - -# Changes the current sdf value based on what the message partition is. -def alter_context(value): - context = message_context() - if value > 1: - context.headers = context.headers + (b"cool_new_header", value.encode()) - set_message_context(context) - -app = Application() -sdf = app.dataframe() -sdf = sdf.update(lambda value: alter_context(value)) +def flush() ``` -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L513) -- `context`: instance of `MessageContext` +Flush the recovery update to the storage. - + -#### message\_context +### PartitionTransactionStatus ```python -def message_context() -> MessageContext +class PartitionTransactionStatus(enum.Enum) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/context.py#L51) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L520) -Get a MessageContext for the current message, which houses most of the message + -metadata, like: - - key - - timestamp - - partition - - offset +#### STARTED +Transaction is started and accepts updates -Example Snippet: + -```python -from quixstreams import Application, message_context +#### PREPARED -# Changes the current sdf value based on what the message partition is. +Transaction is prepared, it can no longer receive updates -app = Application() -sdf = app.dataframe() -sdf = sdf.apply(lambda value: 1 if message_context().partition == 2 else 0) -``` + -**Returns**: +#### COMPLETE + +Transaction is fully completed, it cannot be used anymore + + + +#### FAILED + +Transaction is failed, it cannot be used anymore + + + +## quixstreams.state.exceptions + + + +## quixstreams.state.recovery + + + +### RecoveryPartition + +```python +class RecoveryPartition() +``` -instance of `MessageContext` +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L24) - +A changelog topic partition mapped to a respective `StorePartition` with helper +methods to determine its current recovery status. -## quixstreams.kafka.configuration +Since `StorePartition`s do recovery directly, it also handles recovery transactions. - + -### ConnectionConfig +#### RecoveryPartition.offset ```python -class ConnectionConfig(BaseSettings) +@property +def offset() -> int ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/configuration.py#L17) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L63) -Provides an interface for all librdkafka connection-based configs. +Get the changelog offset from the underlying `StorePartition`. -Allows converting to or from a librdkafka dictionary. +**Returns**: -Also obscures secrets and handles any case sensitivity issues. +changelog offset (int) - + -#### ConnectionConfig.settings\_customise\_sources +#### RecoveryPartition.needs\_recovery ```python -@classmethod -def settings_customise_sources( - cls, settings_cls: Type[BaseSettings], - init_settings: PydanticBaseSettingsSource, - env_settings: PydanticBaseSettingsSource, - dotenv_settings: PydanticBaseSettingsSource, - file_secret_settings: PydanticBaseSettingsSource -) -> Tuple[PydanticBaseSettingsSource, ...] +@property +def needs_recovery() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/configuration.py#L96) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L72) -Included to ignore reading/setting values from the environment +Determine whether recovery is necessary for underlying `StorePartition`. - + -#### ConnectionConfig.from\_librdkafka\_dict +#### RecoveryPartition.needs\_offset\_update ```python -@classmethod -def from_librdkafka_dict(cls, - config: dict, - ignore_extras: bool = False) -> Self +@property +def needs_offset_update() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/configuration.py#L110) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L81) -Create a `ConnectionConfig` from a librdkafka config dictionary. +Determine if an offset update is required. -**Arguments**: +Usually checked during assign if recovery was not required. -- `config`: a dict of configs (like {"bootstrap.servers": "url"}) -- `ignore_extras`: Ignore non-connection settings (else raise exception) + -**Returns**: +#### RecoveryPartition.update\_offset -a ConnectionConfig +```python +def update_offset() +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L89) -#### ConnectionConfig.as\_librdkafka\_dict +Update only the changelog offset of a StorePartition. + + + +#### RecoveryPartition.recover\_from\_changelog\_message ```python -def as_librdkafka_dict(plaintext_secrets=True) -> dict +def recover_from_changelog_message( + changelog_message: ConfluentKafkaMessageProto) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/configuration.py#L125) - -Dump any non-empty config values as a librdkafka dictionary. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L109) ->***NOTE***: All secret values will be dumped in PLAINTEXT by default. +Recover the StorePartition using a message read from its respective changelog. **Arguments**: -- `plaintext_secrets`: whether secret values are plaintext or obscured (***) +- `changelog_message`: A confluent kafka message (everything as bytes) -**Returns**: + -a librdkafka-compatible dictionary +#### RecoveryPartition.set\_watermarks - +```python +def set_watermarks(lowwater: int, highwater: int) +``` -## quixstreams.kafka +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L121) - +Set the changelog watermarks as gathered from Consumer.get_watermark_offsets() -## quixstreams.kafka.producer +**Arguments**: - +- `lowwater`: topic partition lowwater +- `highwater`: topic partition highwater -### Producer + + +### ChangelogProducerFactory ```python -class Producer() +class ChangelogProducerFactory() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L44) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L132) - +Generates ChangelogProducers, which produce changelog messages to a StorePartition. -#### Producer.\_\_init\_\_ + + +#### ChangelogProducerFactory.\_\_init\_\_ ```python -def __init__(broker_address: Union[str, ConnectionConfig], - logger: logging.Logger = logger, - error_callback: Callable[[KafkaError], None] = _default_error_cb, - extra_config: Optional[dict] = None, - flush_timeout: Optional[int] = None) +def __init__(changelog_name: str, producer: RowProducer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L45) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L137) -A wrapper around `confluent_kafka.Producer`. +**Arguments**: -It initializes `confluent_kafka.Producer` on demand -avoiding network calls during `__init__`, provides typing info for methods -and some reasonable defaults. +- `changelog_name`: changelog topic name +- `producer`: a RowProducer (not shared with `Application` instance) -**Arguments**: +**Returns**: -- `broker_address`: Connection settings for Kafka. -Accepts string with Kafka broker host and port formatted as `:`, -or a ConnectionConfig object if authentication is required. -- `logger`: a Logger instance to attach librdkafka logging to -- `error_callback`: callback used for producer errors -- `extra_config`: A dictionary with additional options that -will be passed to `confluent_kafka.Producer` as is. -Note: values passed as arguments override values in `extra_config`. -- `flush_timeout`: The time the producer is waiting for all messages to be delivered. +a ChangelogWriter instance - + -#### Producer.produce +#### ChangelogProducerFactory.get\_partition\_producer ```python -def produce(topic: str, - value: Optional[Union[str, bytes]] = None, - key: Optional[Union[str, bytes]] = None, - headers: Optional[Headers] = None, - partition: Optional[int] = None, - timestamp: Optional[int] = None, - poll_timeout: float = 5.0, - buffer_error_max_tries: int = 3, - on_delivery: Optional[DeliveryCallback] = None) +def get_partition_producer(partition_num) -> "ChangelogProducer" ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L83) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L147) -Produce a message to a topic. +Generate a ChangelogProducer for producing to a specific partition number -It also polls Kafka for callbacks before producing to minimize -the probability of `BufferError`. -If `BufferError` still happens, the method will poll Kafka with timeout -to free up the buffer and try again. +(and thus StorePartition). **Arguments**: -- `topic`: topic name -- `value`: message value -- `key`: message key -- `headers`: message headers -- `partition`: topic partition -- `timestamp`: message timestamp -- `poll_timeout`: timeout for `poll()` call in case of `BufferError` -- `buffer_error_max_tries`: max retries for `BufferError`. -Pass `0` to not retry after `BufferError`. -- `on_delivery`: the delivery callback to be triggered on `poll()` -for the produced message. +- `partition_num`: source topic partition number - + -#### Producer.poll +### ChangelogProducer ```python -def poll(timeout: float = 0) +class ChangelogProducer() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L144) - -Polls the producer for events and calls `on_delivery` callbacks. - -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L161) -- `timeout`: poll timeout seconds; Default: 0 (unlike others) -> NOTE: -1 will hang indefinitely if there are no messages to acknowledge +Generated for a `StorePartition` to produce state changes to its respective +kafka changelog partition. - + -#### Producer.flush +#### ChangelogProducer.\_\_init\_\_ ```python -def flush(timeout: Optional[float] = None) -> int +def __init__(changelog_name: str, partition: int, producer: RowProducer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L152) - -Wait for all messages in the Producer queue to be delivered. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L167) **Arguments**: -- `timeout` (`float`): time to attempt flushing (seconds). -None use producer default or -1 is infinite. Default: None - -**Returns**: +- `changelog_name`: A changelog topic name +- `partition`: source topic partition number +- `producer`: a RowProducer (not shared with `Application` instance) -number of messages remaining to flush + - +#### ChangelogProducer.produce -## quixstreams.kafka.consumer +```python +def produce(key: bytes, + value: Optional[bytes] = None, + headers: Optional[MessageHeadersMapping] = None) +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L190) -### Consumer +Produce a message to a changelog topic partition. -```python -class Consumer() -``` +**Arguments**: -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L64) +- `key`: message key (same as state key, including prefixes) +- `value`: message value (same as state value) +- `headers`: message headers (includes column family info) - + -#### Consumer.\_\_init\_\_ +### RecoveryManager ```python -def __init__(broker_address: Union[str, ConnectionConfig], - consumer_group: Optional[str], - auto_offset_reset: AutoOffsetReset, - auto_commit_enable: bool = True, - logger: logging.Logger = logger, - error_callback: Callable[[KafkaError], None] = _default_error_cb, - on_commit: Optional[Callable[ - [Optional[KafkaError], List[TopicPartition]], None]] = None, - extra_config: Optional[dict] = None) +class RecoveryManager() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L65) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L215) -A wrapper around `confluent_kafka.Consumer`. +Manages all consumer-related aspects of recovery, including: + - assigning/revoking, pausing/resuming topic partitions (especially changelogs) + - consuming changelog messages until state is updated fully. -It initializes `confluent_kafka.Consumer` on demand -avoiding network calls during `__init__`, provides typing info for methods -and some reasonable defaults. +Also tracks/manages `RecoveryPartitions`, which are assigned/tracked only if +recovery for that changelog partition is required. -**Arguments**: +Recovery is attempted from the `Application` after any new partition assignment. -- `broker_address`: Connection settings for Kafka. -Accepts string with Kafka broker host and port formatted as `:`, -or a ConnectionConfig object if authentication is required. -- `consumer_group`: Kafka consumer group. -Passed as `group.id` to `confluent_kafka.Consumer` -- `auto_offset_reset`: Consumer `auto.offset.reset` setting. -Available values: -- "earliest" - automatically reset the offset to the smallest offset -- "latest" - automatically reset the offset to the largest offset -- "error" - trigger an error (ERR__AUTO_OFFSET_RESET) which is retrieved - by consuming messages (used for testing) -- `auto_commit_enable`: If true, periodically commit offset of -the last message handed to the application. Default - `True`. -- `logger`: a Logger instance to attach librdkafka logging to -- `error_callback`: callback used for consumer errors -- `on_commit`: Offset commit result propagation callback. -Passed as "offset_commit_cb" to `confluent_kafka.Consumer`. -- `extra_config`: A dictionary with additional options that -will be passed to `confluent_kafka.Consumer` as is. -Note: values passed as arguments override values in `extra_config`. + - +#### RecoveryManager.partitions -#### Consumer.poll +```python +@property +def partitions() -> Dict[int, Dict[str, RecoveryPartition]] +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L234) + +Returns a mapping of assigned RecoveryPartitions in the following format: +{: {: }} + + + +#### RecoveryManager.has\_assignments ```python -def poll(timeout: Optional[float] = None) -> Optional[Message] +@property +def has_assignments() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L128) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L242) -Consumes a single message, calls callbacks and returns events. +Whether the Application has assigned RecoveryPartitions -The application must check the returned :py:class:`Message` -object's :py:func:`Message.error()` method to distinguish between proper -messages (error() returns None), or an event or error. +**Returns**: -Note: Callbacks may be called from this method, such as -``on_assign``, ``on_revoke``, et al. +has assignments, as bool -**Arguments**: + -- `timeout` (`float`): Maximum time in seconds to block waiting for message, -event or callback. None or -1 is infinite. Default: None. +#### RecoveryManager.recovering -**Raises**: +```python +@property +def recovering() -> bool +``` -- `None`: RuntimeError if called on a closed consumer +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L251) + +Whether the Application is currently recovering **Returns**: -A Message object or None on timeout +is recovering, as bool - + -#### Consumer.subscribe +#### RecoveryManager.register\_changelog ```python -def subscribe(topics: List[str], - on_assign: Optional[RebalancingCallback] = None, - on_revoke: Optional[RebalancingCallback] = None, - on_lost: Optional[RebalancingCallback] = None) +def register_changelog(topic_name: str, store_name: str) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L146) - -Set subscription to supplied list of topics +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L259) -This replaces a previous subscription. +Register a changelog Topic with the TopicManager. **Arguments**: -- `topics` (`list(str)`): List of topics (strings) to subscribe to. -- `on_assign` (`callable`): callback to provide handling of customized offsets -on completion of a successful partition re-assignment. -- `on_revoke` (`callable`): callback to provide handling of offset commits to -a customized store on the start of a rebalance operation. -- `on_lost` (`callable`): callback to provide handling in the case the partition -assignment has been lost. Partitions that have been lost may already be -owned by other members in the group and therefore committing offsets, -for example, may fail. +- `topic_name`: source topic name +- `store_name`: name of the store -**Raises**: + -- `KafkaException`: -- `None`: RuntimeError if called on a closed consumer -.. py:function:: on_assign(consumer, partitions) -.. py:function:: on_revoke(consumer, partitions) -.. py:function:: on_lost(consumer, partitions) +#### RecoveryManager.do\_recovery - :param Consumer consumer: Consumer instance. - :param list(TopicPartition) partitions: Absolute list of partitions being - assigned or revoked. +```python +def do_recovery() +``` - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L271) -#### Consumer.unsubscribe +If there are any active RecoveryPartitions, do a recovery procedure. + +After, will resume normal `Application` processing. + + + +#### RecoveryManager.assign\_partition ```python -def unsubscribe() +def assign_partition(topic: str, partition: int, committed_offset: int, + store_partitions: Dict[str, StorePartition]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L240) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L324) -Remove current subscription. - -**Raises**: +Assigns `StorePartition`s (as `RecoveryPartition`s) ONLY IF recovery required. -- `None`: KafkaException -- `None`: RuntimeError if called on a closed consumer +Pauses active consumer partitions as needed. - + -#### Consumer.store\_offsets +#### RecoveryManager.revoke\_partition ```python -def store_offsets(message: Optional[Message] = None, - offsets: Optional[List[TopicPartition]] = None) +def revoke_partition(partition_num: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L248) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L391) -.. py:function:: store_offsets([message=None], [offsets=None]) +revoke ALL StorePartitions (across all Stores) for a given partition number -Store offsets for a message or a list of offsets. +**Arguments**: -``message`` and ``offsets`` are mutually exclusive. The stored offsets -will be committed according to 'auto.commit.interval.ms' or manual -offset-less `commit`. -Note that 'enable.auto.offset.store' must be set to False when using this API. +- `partition_num`: partition number of source topic -**Arguments**: + -- `message` (`confluent_kafka.Message`): Store message's offset+1. -- `offsets` (`list(TopicPartition)`): List of topic+partitions+offsets to store. +## quixstreams.utils -**Raises**: + -- `None`: KafkaException -- `None`: RuntimeError if called on a closed consumer +## quixstreams.utils.json - + -#### Consumer.commit +#### dumps ```python -def commit(message: Optional[Message] = None, - offsets: Optional[List[TopicPartition]] = None, - asynchronous: bool = True) -> Optional[List[TopicPartition]] +def dumps(value: Any) -> bytes ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L282) - -Commit a message or a list of offsets. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/utils/json.py#L8) -The ``message`` and ``offsets`` parameters are mutually exclusive. -If neither is set, the current partition assignment's offsets are used instead. -Use this method to commit offsets if you have 'enable.auto.commit' set to False. +Serialize to JSON using `orjson` package. **Arguments**: -- `message` (`confluent_kafka.Message`): Commit the message's offset+1. -Note: By convention, committed offsets reflect the next message -to be consumed, **not** the last message consumed. -- `offsets` (`list(TopicPartition)`): List of topic+partitions+offsets to commit. -- `asynchronous` (`bool`): If true, asynchronously commit, returning None -immediately. If False, the commit() call will block until the commit -succeeds or fails and the committed offsets will be returned (on success). -Note that specific partitions may have failed and the .err field of -each partition should be checked for success. +- `value`: value to serialize to JSON -**Raises**: +**Returns**: -- `None`: KafkaException -- `None`: RuntimeError if called on a closed consumer +bytes - + -#### Consumer.committed +#### loads ```python -def committed(partitions: List[TopicPartition], - timeout: Optional[float] = None) -> List[TopicPartition] +def loads(value: bytes) -> Any ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L322) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/utils/json.py#L18) -.. py:function:: committed(partitions, [timeout=None]) +Deserialize from JSON using `orjson` package. -Retrieve committed offsets for the specified partitions. +Main differences: +- It returns `bytes` +- It doesn't allow non-str keys in dictionaries **Arguments**: -- `partitions` (`list(TopicPartition)`): List of topic+partitions to query for stored offsets. -- `timeout` (`float`): Request timeout (seconds). -None or -1 is infinite. Default: None +- `value`: value to deserialize from -**Raises**: +**Returns**: -- `None`: KafkaException -- `None`: RuntimeError if called on a closed consumer +object -**Returns**: + -`list(TopicPartition)`: List of topic+partitions with offset and possibly error set. +## quixstreams.utils.dicts - + -#### Consumer.get\_watermark\_offsets +#### dict\_values ```python -def get_watermark_offsets(partition: TopicPartition, - timeout: Optional[float] = None, - cached: bool = False) -> Tuple[int, int] +def dict_values(d: object) -> List ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L342) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/utils/dicts.py#L4) -Retrieve low and high offsets for the specified partition. +Recursively unpacks a set of nested dicts to get a flattened list of leaves, -**Arguments**: +where "leaves" are the first non-dict item. -- `partition` (`TopicPartition`): Topic+partition to return offsets for. -- `timeout` (`float`): Request timeout (seconds). None or -1 is infinite. -Ignored if cached=True. Default: None -- `cached` (`bool`): Instead of querying the broker, use cached information. -Cached values: The low offset is updated periodically -(if statistics.interval.ms is set) while the high offset is updated on each -message fetched from the broker for this partition. +i.e {"a": {"b": {"c": 1}, "d": 2}, "e": 3} becomes [1, 2, 3] -**Raises**: +**Arguments**: -- `None`: KafkaException -- `None`: RuntimeError if called on a closed consumer +- `d`: initially, a dict (with potentially nested dicts) **Returns**: -`tuple(int,int)`: Tuple of (low,high) on success or None on timeout. -The high offset is the offset of the last message + 1. +a list with all the leaves of the various contained dicts - + -#### Consumer.list\_topics +## quixstreams.checkpointing.exceptions + + + +## quixstreams.checkpointing + + + +## quixstreams.checkpointing.checkpoint + + + +### Checkpoint ```python -def list_topics(topic: Optional[str] = None, - timeout: Optional[float] = None) -> ClusterMetadata +class Checkpoint() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L368) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L24) -.. py:function:: list_topics([topic=None], [timeout=-1]) +Class to keep track of state updates and consumer offsets and to checkpoint these +updates on schedule. -Request metadata from the cluster. -This method provides the same information as -listTopics(), describeTopics() and describeCluster() in the Java Admin client. + -**Arguments**: +#### Checkpoint.expired -- `topic` (`str`): If specified, only request information about this topic, -else return results for all topics in cluster. -Warning: If auto.create.topics.enable is set to true on the broker and -an unknown topic is specified, it will be created. -- `timeout` (`float`): The maximum response time before timing out -None or -1 is infinite. Default: None +```python +def expired() -> bool +``` + +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L49) + +Returns `True` if checkpoint deadline has expired. + + + +#### Checkpoint.empty + +```python +def empty() -> bool +``` -**Raises**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L55) -- `None`: KafkaException +Returns `True` if checkpoint doesn't have any offsets stored yet. - -#### Consumer.memberid + + +#### Checkpoint.store\_offset ```python -def memberid() -> str +def store_offset(topic: str, partition: int, offset: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L391) - -Return this client's broker-assigned group member id. +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L62) -The member id is assigned by the group coordinator and is propagated to -the consumer during rebalance. +Store the offset of the processed message to the checkpoint. - :returns: Member id string or None - :rtype: string - :raises: RuntimeError if called on a closed consumer +**Arguments**: +- `topic`: topic name +- `partition`: partition number +- `offset`: message offset - + -#### Consumer.offsets\_for\_times +#### Checkpoint.get\_store\_transaction ```python -def offsets_for_times(partitions: List[TopicPartition], - timeout: Optional[float] = None) -> List[TopicPartition] +def get_store_transaction( + topic: str, + partition: int, + store_name: str = DEFAULT_STATE_STORE_NAME) -> PartitionTransaction ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L404) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L82) -Look up offsets by timestamp for the specified partitions. - -The returned offset for each partition is the earliest offset whose -timestamp is greater than or equal to the given timestamp in the -corresponding partition. If the provided timestamp exceeds that of the -last message in the partition, a value of -1 will be returned. +Get a PartitionTransaction for the given store, topic and partition. - :param list(TopicPartition) partitions: topic+partitions with timestamps - in the TopicPartition.offset field. - :param float timeout: The maximum response time before timing out. - None or -1 is infinite. Default: None - :returns: List of topic+partition with offset field set and possibly error set - :rtype: list(TopicPartition) - :raises: KafkaException - :raises: RuntimeError if called on a closed consumer +It will return already started transaction if there's one. +**Arguments**: - +- `topic`: topic name +- `partition`: partition number +- `store_name`: store name -#### Consumer.pause +**Returns**: -```python -def pause(partitions: List[TopicPartition]) -``` +instance of `PartitionTransaction` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L430) + -Pause consumption for the provided list of partitions. +#### Checkpoint.commit -Paused partitions must be tracked manually. +```python +def commit() +``` -Does NOT affect the result of Consumer.assignment(). +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L105) -**Arguments**: +Commit the checkpoint. -- `partitions` (`list(TopicPartition)`): List of topic+partitions to pause. +This method will: + 1. Produce the changelogs for each state store + 2. Flush the producer to ensure everything is delivered. + 3. Commit topic offsets. + 4. Flush each state store partition to the disk. -**Raises**: + -- `None`: KafkaException +## quixstreams.logging - + -#### Consumer.resume +#### configure\_logging ```python -def resume(partitions: List[TopicPartition]) +def configure_logging(loglevel: Optional[LogLevel]) -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L444) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/logging.py#L26) -.. py:function:: resume(partitions) +Configure "quixstreams" logger. -Resume consumption for the provided list of partitions. +>***NOTE:*** If "quixstreams" logger already has pre-defined handlers +(e.g. logging has already been configured via `logging`, or the function +is called twice), it will skip configuration and return `False`. **Arguments**: -- `partitions` (`list(TopicPartition)`): List of topic+partitions to resume. +- `loglevel`: a valid log level as a string or None. +If None passed, this function is no-op and no logging will be configured. -**Raises**: +**Returns**: -- `None`: KafkaException +True if logging config has been updated, otherwise False. - + -#### Consumer.position +## quixstreams.rowconsumer + + + +### RowConsumer ```python -def position(partitions: List[TopicPartition]) -> List[TopicPartition] +class RowConsumer(Consumer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L456) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowconsumer.py#L19) -Retrieve current positions (offsets) for the specified partitions. + -**Arguments**: +#### RowConsumer.\_\_init\_\_ -- `partitions` (`list(TopicPartition)`): List of topic+partitions to return -current offsets for. The current offset is the offset of -the last consumed message + 1. +```python +def __init__(broker_address: Union[str, ConnectionConfig], + consumer_group: str, + auto_offset_reset: AutoOffsetReset, + auto_commit_enable: bool = True, + on_commit: Callable[[Optional[KafkaError], List[TopicPartition]], + None] = None, + extra_config: Optional[dict] = None, + on_error: Optional[ConsumerErrorCallback] = None) +``` -**Raises**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowconsumer.py#L20) -- `None`: KafkaException -- `None`: RuntimeError if called on a closed consumer +A consumer class that is capable of deserializing Kafka messages to Rows -**Returns**: +according to the Topics deserialization settings. -`list(TopicPartition)`: List of topic+partitions with offset and possibly error set. +It overrides `.subscribe()` method of Consumer class to accept `Topic` +objects instead of strings. - +**Arguments**: -#### Consumer.seek +- `broker_address`: Connection settings for Kafka. +Accepts string with Kafka broker host and port formatted as `:`, +or a ConnectionConfig object if authentication is required. +- `consumer_group`: Kafka consumer group. +Passed as `group.id` to `confluent_kafka.Consumer` +- `auto_offset_reset`: Consumer `auto.offset.reset` setting. +Available values: +- "earliest" - automatically reset the offset to the smallest offset +- "latest" - automatically reset the offset to the largest offset +- `auto_commit_enable`: If true, periodically commit offset of +the last message handed to the application. Default - `True`. +- `on_commit`: Offset commit result propagation callback. +Passed as "offset_commit_cb" to `confluent_kafka.Consumer`. +- `extra_config`: A dictionary with additional options that +will be passed to `confluent_kafka.Consumer` as is. +Note: values passed as arguments override values in `extra_config`. +- `on_error`: a callback triggered when `RowConsumer.poll_row` fails. +If consumer fails and the callback returns `True`, the exception +will be logged but not propagated. +The default callback logs an exception and returns `False`. + + + +#### RowConsumer.subscribe ```python -def seek(partition: TopicPartition) +def subscribe(topics: List[Topic], + on_assign: Optional[RebalancingCallback] = None, + on_revoke: Optional[RebalancingCallback] = None, + on_lost: Optional[RebalancingCallback] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L470) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowconsumer.py#L72) -Set consume position for partition to offset. +Set subscription to supplied list of topics. -The offset may be an absolute (>=0) or a -logical offset (:py:const:`OFFSET_BEGINNING` et.al). +This replaces a previous subscription. -seek() may only be used to update the consume offset of an -actively consumed partition (i.e., after :py:const:`assign()`), -to set the starting offset of partition not being consumed instead -pass the offset in an `assign()` call. +This method also updates the internal mapping with topics that is used +to deserialize messages to Rows. **Arguments**: -- `partition` (`TopicPartition`): Topic+partition+offset to seek to. - -**Raises**: - -- `None`: KafkaException +- `topics`: list of `Topic` instances to subscribe to. +- `on_assign` (`callable`): callback to provide handling of customized offsets +on completion of a successful partition re-assignment. +- `on_revoke` (`callable`): callback to provide handling of offset commits to +a customized store on the start of a rebalance operation. +- `on_lost` (`callable`): callback to provide handling in the case the partition +assignment has been lost. Partitions that have been lost may already be +owned by other members in the group and therefore committing offsets, +for example, may fail. - + -#### Consumer.assignment +#### RowConsumer.poll\_row ```python -def assignment() -> List[TopicPartition] +def poll_row(timeout: float = None) -> Union[Row, List[Row], None] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L487) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowconsumer.py#L106) -Returns the current partition assignment. +Consumes a single message and deserialize it to Row or a list of Rows. -**Raises**: +The message is deserialized according to the corresponding Topic. +If deserializer raises `IgnoreValue` exception, this method will return None. +If Kafka returns an error, it will be raised as exception. -- `None`: KafkaException -- `None`: RuntimeError if called on a closed consumer +**Arguments**: + +- `timeout`: poll timeout seconds **Returns**: -`list(TopicPartition)`: List of assigned topic+partitions. +single Row, list of Rows or None - + -#### Consumer.set\_sasl\_credentials +## quixstreams.context + + + +#### set\_message\_context ```python -def set_sasl_credentials(username: str, password: str) +def set_message_context(context: Optional[MessageContext]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L500) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/context.py#L20) -Sets the SASL credentials used for this client. -These credentials will overwrite the old ones, and will be used the next -time the client needs to authenticate. -This method will not disconnect existing broker connections that have been -established with the old credentials. -This method is applicable only to SASL PLAIN and SCRAM mechanisms. +Set a MessageContext for the current message in the given `contextvars.Context` - +>***NOTE:*** This is for advanced usage only. If you need to change the message key, +`StreamingDataFrame.to_topic()` has an argument for it. -#### Consumer.incremental\_assign + +Example Snippet: ```python -def incremental_assign(partitions: List[TopicPartition]) -``` +from quixstreams import Application, set_message_context, message_context -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L512) +# Changes the current sdf value based on what the message partition is. +def alter_context(value): + context = message_context() + if value > 1: + context.headers = context.headers + (b"cool_new_header", value.encode()) + set_message_context(context) -Assign new partitions. +app = Application() +sdf = app.dataframe() +sdf = sdf.update(lambda value: alter_context(value)) +``` -Can be called outside the `Consumer` `on_assign` callback (multiple times). -Partitions immediately show on `Consumer.assignment()`. +**Arguments**: -Any additional partitions besides the ones passed during the `Consumer` -`on_assign` callback will NOT be associated with the consumer group. +- `context`: instance of `MessageContext` - + -#### Consumer.incremental\_unassign +#### message\_context ```python -def incremental_unassign(partitions: List[TopicPartition]) +def message_context() -> MessageContext ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L524) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/context.py#L51) -Revoke partitions. +Get a MessageContext for the current message, which houses most of the message -Can be called outside an on_revoke callback. +metadata, like: + - key + - timestamp + - partition + - offset - -#### Consumer.close +Example Snippet: ```python -def close() -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L532) - -Close down and terminate the Kafka Consumer. +from quixstreams import Application, message_context -Actions performed: +# Changes the current sdf value based on what the message partition is. -- Stops consuming. -- Commits offsets, unless the consumer property 'enable.auto.commit' is set to False. -- Leaves the consumer group. +app = Application() +sdf = app.dataframe() +sdf = sdf.apply(lambda value: 1 if message_context().partition == 2 else 0) +``` -Registered callbacks may be called from this method, -see `poll()` for more info. +**Returns**: +instance of `MessageContext` - + -## quixstreams.kafka.exceptions +## quixstreams.types @@ -7573,7 +7644,7 @@ see `poll()` for more info. class Application() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L55) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L55) The main Application class. @@ -7639,7 +7710,7 @@ def __init__(broker_address: Optional[Union[str, ConnectionConfig]] = None, topic_create_timeout: float = 60) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L93) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L93) **Arguments**: @@ -7733,7 +7804,7 @@ def Quix(cls, topic_create_timeout: float = 60) -> Self ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L313) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L313) >***NOTE:*** DEPRECATED: use Application with `quix_sdk_token` argument instead. @@ -7835,7 +7906,7 @@ def topic(name: str, timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L451) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L451) Create a topic definition. @@ -7906,7 +7977,7 @@ topic = app.topic("input-topic", timestamp_extractor=custom_ts_extractor) def dataframe(topic: Topic) -> StreamingDataFrame ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L531) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L531) A simple helper method that generates a `StreamingDataFrame`, which is used @@ -7948,7 +8019,7 @@ to be used as an input topic. def stop(fail: bool = False) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L570) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L570) Stop the internal poll loop and the message processing. @@ -7971,7 +8042,7 @@ to unhandled exception, and it shouldn't commit the current checkpoint. def get_producer() -> Producer ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L593) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L593) Create and return a pre-configured Producer instance. The Producer is initialized with params passed to Application. @@ -8002,7 +8073,7 @@ with app.get_producer() as producer: def get_consumer(auto_commit_enable: bool = True) -> Consumer ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L623) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L623) Create and return a pre-configured Consumer instance. The Consumer is initialized with params passed to Application. @@ -8043,7 +8114,7 @@ with app.get_consumer() as consumer: def clear_state() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L666) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L666) Clear the state of the application. @@ -8055,7 +8126,7 @@ Clear the state of the application. def run(dataframe: StreamingDataFrame) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L672) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L672) Start processing data from Kafka using provided `StreamingDataFrame` @@ -8083,233 +8154,151 @@ app.run(dataframe=df) - `dataframe`: instance of `StreamingDataFrame` - + -## quixstreams.rowconsumer +## quixstreams.processing\_context - + -### RowConsumer +### ProcessingContext ```python -class RowConsumer(Consumer) +@dataclasses.dataclass +class ProcessingContext() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowconsumer.py#L19) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/processing_context.py#L21) - +A class to share processing-related objects +between `Application` and `StreamingDataFrame` instances. -#### RowConsumer.\_\_init\_\_ + + +#### ProcessingContext.store\_offset ```python -def __init__(broker_address: Union[str, ConnectionConfig], - consumer_group: str, - auto_offset_reset: AutoOffsetReset, - auto_commit_enable: bool = True, - on_commit: Callable[[Optional[KafkaError], List[TopicPartition]], - None] = None, - extra_config: Optional[dict] = None, - on_error: Optional[ConsumerErrorCallback] = None) +def store_offset(topic: str, partition: int, offset: int) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowconsumer.py#L20) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/processing_context.py#L41) -A consumer class that is capable of deserializing Kafka messages to Rows - -according to the Topics deserialization settings. - -It overrides `.subscribe()` method of Consumer class to accept `Topic` -objects instead of strings. +Store the offset of the processed message to the checkpoint. **Arguments**: -- `broker_address`: Connection settings for Kafka. -Accepts string with Kafka broker host and port formatted as `:`, -or a ConnectionConfig object if authentication is required. -- `consumer_group`: Kafka consumer group. -Passed as `group.id` to `confluent_kafka.Consumer` -- `auto_offset_reset`: Consumer `auto.offset.reset` setting. -Available values: -- "earliest" - automatically reset the offset to the smallest offset -- "latest" - automatically reset the offset to the largest offset -- `auto_commit_enable`: If true, periodically commit offset of -the last message handed to the application. Default - `True`. -- `on_commit`: Offset commit result propagation callback. -Passed as "offset_commit_cb" to `confluent_kafka.Consumer`. -- `extra_config`: A dictionary with additional options that -will be passed to `confluent_kafka.Consumer` as is. -Note: values passed as arguments override values in `extra_config`. -- `on_error`: a callback triggered when `RowConsumer.poll_row` fails. -If consumer fails and the callback returns `True`, the exception -will be logged but not propagated. -The default callback logs an exception and returns `False`. +- `topic`: topic name +- `partition`: partition number +- `offset`: message offset - + -#### RowConsumer.subscribe +#### ProcessingContext.init\_checkpoint ```python -def subscribe(topics: List[Topic], - on_assign: Optional[RebalancingCallback] = None, - on_revoke: Optional[RebalancingCallback] = None, - on_lost: Optional[RebalancingCallback] = None) +def init_checkpoint() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowconsumer.py#L72) - -Set subscription to supplied list of topics. - -This replaces a previous subscription. - -This method also updates the internal mapping with topics that is used -to deserialize messages to Rows. - -**Arguments**: +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/processing_context.py#L51) -- `topics`: list of `Topic` instances to subscribe to. -- `on_assign` (`callable`): callback to provide handling of customized offsets -on completion of a successful partition re-assignment. -- `on_revoke` (`callable`): callback to provide handling of offset commits to -a customized store on the start of a rebalance operation. -- `on_lost` (`callable`): callback to provide handling in the case the partition -assignment has been lost. Partitions that have been lost may already be -owned by other members in the group and therefore committing offsets, -for example, may fail. +Initialize a new checkpoint - + -#### RowConsumer.poll\_row +#### ProcessingContext.commit\_checkpoint ```python -def poll_row(timeout: float = None) -> Union[Row, List[Row], None] +def commit_checkpoint(force: bool = False) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowconsumer.py#L106) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/processing_context.py#L62) -Consumes a single message and deserialize it to Row or a list of Rows. - -The message is deserialized according to the corresponding Topic. -If deserializer raises `IgnoreValue` exception, this method will return None. -If Kafka returns an error, it will be raised as exception. +Commit the current checkpoint. -**Arguments**: +The actual commit will happen only when: -- `timeout`: poll timeout seconds +1. The checkpoint has at least one stored offset +2. The checkpoint is expired or `force=True` is passed -**Returns**: +**Arguments**: -single Row, list of Rows or None +- `force`: if `True`, commit the checkpoint before its expiration deadline. - + -## quixstreams.checkpointing.checkpoint +## quixstreams.rowproducer - + -### Checkpoint +### RowProducer ```python -class Checkpoint() +class RowProducer() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L24) - -Class to keep track of state updates and consumer offsets and to checkpoint these -updates on schedule. - - +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowproducer.py#L18) -#### Checkpoint.expired +A producer class that is capable of serializing Rows to bytes and send them to Kafka. -```python -def expired() -> bool -``` +The serialization is performed according to the Topic serialization settings. -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L49) +**Arguments**: -Returns `True` if checkpoint deadline has expired. +- `broker_address`: Connection settings for Kafka. +Accepts string with Kafka broker host and port formatted as `:`, +or a ConnectionConfig object if authentication is required. +- `extra_config`: A dictionary with additional options that +will be passed to `confluent_kafka.Producer` as is. +Note: values passed as arguments override values in `extra_config`. +- `on_error`: a callback triggered when `RowProducer.produce_row()` +or `RowProducer.poll()` fail`. +If producer fails and the callback returns `True`, the exception +will be logged but not propagated. +The default callback logs an exception and returns `False`. +- `flush_timeout`: The time the producer is waiting for all messages to be delivered. - + -#### Checkpoint.empty +#### RowProducer.produce\_row ```python -def empty() -> bool +def produce_row(row: Row, + topic: Topic, + key: Optional[Any] = _KEY_UNSET, + partition: Optional[int] = None, + timestamp: Optional[int] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L55) - -Returns `True` if checkpoint doesn't have any offsets stored yet. - - - - -#### Checkpoint.store\_offset +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowproducer.py#L56) -```python -def store_offset(topic: str, partition: int, offset: int) -``` +Serialize Row to bytes according to the Topic serialization settings -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L62) +and produce it to Kafka -Store the offset of the processed message to the checkpoint. +If this method fails, it will trigger the provided "on_error" callback. **Arguments**: -- `topic`: topic name -- `partition`: partition number -- `offset`: message offset +- `row`: Row object +- `topic`: Topic object +- `key`: message key, optional +- `partition`: partition number, optional +- `timestamp`: timestamp in milliseconds, optional - + -#### Checkpoint.get\_store\_transaction +#### RowProducer.poll ```python -def get_store_transaction( - topic: str, - partition: int, - store_name: str = DEFAULT_STATE_STORE_NAME) -> PartitionTransaction +def poll(timeout: float = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L82) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowproducer.py#L96) -Get a PartitionTransaction for the given store, topic and partition. +Polls the producer for events and calls `on_delivery` callbacks. -It will return already started transaction if there's one. +If `poll()` fails, it will trigger the provided "on_error" callback **Arguments**: -- `topic`: topic name -- `partition`: partition number -- `store_name`: store name - -**Returns**: - -instance of `PartitionTransaction` - - - -#### Checkpoint.commit - -```python -def commit() -``` - -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L105) - -Commit the checkpoint. - -This method will: - 1. Produce the changelogs for each state store - 2. Flush the producer to ensure everything is delivered. - 3. Commit topic offsets. - 4. Flush each state store partition to the disk. - - - -## quixstreams.checkpointing - - - -## quixstreams.checkpointing.exceptions +- `timeout`: timeout in seconds diff --git a/docs/api-reference/serialization.md b/docs/api-reference/serialization.md index bdb383979..67351eda3 100644 --- a/docs/api-reference/serialization.md +++ b/docs/api-reference/serialization.md @@ -10,7 +10,7 @@ class QuixDeserializer(JSONDeserializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L73) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L73) Handles Deserialization for any Quix-formatted topic. @@ -23,18 +23,15 @@ Parses JSON data from either `TimeseriesData` and `EventData` (ignores the rest) #### QuixDeserializer.\_\_init\_\_ ```python -def __init__(column_name: Optional[str] = None, - loads: Callable[[Union[bytes, bytearray]], Any] = default_loads) +def __init__(loads: Callable[[Union[bytes, bytearray]], Any] = default_loads) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L80) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L80)
***Arguments:*** -- `column_name`: if provided, the deserialized value will be wrapped into -dictionary with `column_name` as a key. - `loads`: function to parse json from bytes. Default - :py:func:`quixstreams.utils.json.loads`. @@ -49,7 +46,7 @@ Default - :py:func:`quixstreams.utils.json.loads`. def split_values() -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L100) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L97) Each Quix message might contain data for multiple Rows. This property informs the downstream processors about that, so they can @@ -66,7 +63,7 @@ def deserialize(model_key: str, value: Union[List[Mapping], Mapping]) -> Iterable[Mapping] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L153) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L150) Deserialization function for particular data types (Timeseries or EventData). @@ -91,7 +88,7 @@ Iterable of dicts class QuixTimeseriesSerializer(QuixSerializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L321) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L318) Serialize data to JSON formatted according to Quix Timeseries format. @@ -123,7 +120,7 @@ Output: class QuixEventsSerializer(QuixSerializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L409) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L406) Serialize data to JSON formatted according to Quix EventData format. The input value is expected to be a dictionary with the following keys: @@ -164,7 +161,7 @@ Output: class BytesDeserializer(Deserializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L44) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L44) A deserializer to bypass bytes without any changes @@ -176,7 +173,7 @@ A deserializer to bypass bytes without any changes class BytesSerializer(Serializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L55) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L53) A serializer to bypass bytes without any changes @@ -188,7 +185,7 @@ A serializer to bypass bytes without any changes class StringDeserializer(Deserializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L64) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L62) @@ -197,10 +194,10 @@ class StringDeserializer(Deserializer) #### StringDeserializer.\_\_init\_\_ ```python -def __init__(column_name: Optional[str] = None, codec: str = "utf_8") +def __init__(codec: str = "utf_8") ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L65) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L63) Deserializes bytes to strings using the specified encoding. @@ -219,7 +216,7 @@ A wrapper around `confluent_kafka.serialization.StringDeserializer`. class IntegerDeserializer(Deserializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L84) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L81) Deserializes bytes to integers. @@ -233,7 +230,7 @@ A wrapper around `confluent_kafka.serialization.IntegerDeserializer`. class DoubleDeserializer(Deserializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L103) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L99) Deserializes float to IEEE 764 binary64. @@ -247,7 +244,7 @@ A wrapper around `confluent_kafka.serialization.DoubleDeserializer`. class StringSerializer(Serializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L122) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L117) @@ -259,7 +256,7 @@ class StringSerializer(Serializer) def __init__(codec: str = "utf_8") ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L123) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L118) Serializes strings to bytes using the specified encoding. @@ -277,7 +274,7 @@ Serializes strings to bytes using the specified encoding. class IntegerSerializer(Serializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L135) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L130) Serializes integers to bytes @@ -289,7 +286,7 @@ Serializes integers to bytes class DoubleSerializer(Serializer) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L148) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L143) Serializes floats to bytes diff --git a/docs/api-reference/state.md b/docs/api-reference/state.md index a5a3ddc1b..8e3bb5410 100644 --- a/docs/api-reference/state.md +++ b/docs/api-reference/state.md @@ -10,7 +10,7 @@ class State(Protocol) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L136) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L136) Primary interface for working with key-value state data from `StreamingDataFrame` @@ -24,7 +24,7 @@ Primary interface for working with key-value state data from `StreamingDataFrame def get(key: Any, default: Any = None) -> Optional[Any] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L141) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L141) Get the value for key if key is present in the state, else default @@ -51,7 +51,7 @@ value or None if the key is not found and `default` is not provided def set(key: Any, value: Any) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L151) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L151) Set value for the key. @@ -72,7 +72,7 @@ Set value for the key. def delete(key: Any) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L159) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L159) Delete value for the key. @@ -94,7 +94,7 @@ This function always returns `None`, even if value is not found. def exists(key: Any) -> bool ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L168) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L168) Check if the key exists in state. @@ -123,7 +123,7 @@ True if key exists, False otherwise class RocksDBOptions(RocksDBOptionsType) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/options.py#L25) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/options.py#L25) RocksDB database options. @@ -148,7 +148,7 @@ Please see `rocksdict.Options` for a complete description of other options. def to_options() -> rocksdict.Options ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/options.py#L53) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/options.py#L53) Convert parameters to `rocksdict.Options` diff --git a/docs/api-reference/topics.md b/docs/api-reference/topics.md index de6b3079f..b0c764c6b 100644 --- a/docs/api-reference/topics.md +++ b/docs/api-reference/topics.md @@ -16,7 +16,7 @@ def convert_topic_list(topics: List[Topic]) -> List[ConfluentTopic] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L24) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L24) Converts `Topic`s to `ConfluentTopic`s as required for Confluent's @@ -42,7 +42,7 @@ list of confluent_kafka `ConfluentTopic`s class TopicAdmin() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L47) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L47) For performing "admin"-level operations on a Kafka cluster, mostly around topics. @@ -60,7 +60,7 @@ def __init__(broker_address: Union[str, ConnectionConfig], extra_config: Optional[Mapping] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L54) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L54)
@@ -82,7 +82,7 @@ or a ConnectionConfig object if authentication is required. def list_topics(timeout: float = -1) -> Dict[str, ConfluentTopicMetadata] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L83) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L83) Get a list of topics and their metadata from a Kafka cluster @@ -109,7 +109,7 @@ def inspect_topics(topic_names: List[str], timeout: float = 30) -> Dict[str, Optional[TopicConfig]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L94) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L94) A simplified way of getting the topic configurations of the provided topics @@ -141,7 +141,7 @@ def create_topics(topics: List[Topic], finalize_timeout: float = 60) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L176) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L176) Create the given list of topics and confirm they are ready. @@ -170,7 +170,7 @@ fail (it ignores issues for a topic already existing). class TopicConfig() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L42) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L42) Represents all kafka-level configuration for a kafka topic. @@ -184,7 +184,7 @@ Generally used by Topic and any topic creation procedures. class Topic() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L83) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L83) A definition of a Kafka topic. @@ -209,7 +209,7 @@ def __init__( timestamp_extractor: Optional[TimestampExtractor] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L92) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L92)
@@ -235,7 +235,7 @@ milliseconds from a deserialized message. def name() -> str ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L121) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L121) Topic name @@ -249,7 +249,7 @@ Topic name def row_serialize(row: Row, key: Any) -> KafkaMessage ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L131) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L131) Serialize Row to a Kafka message structure @@ -277,7 +277,7 @@ def row_deserialize( message: ConfluentKafkaMessageProto) -> Union[Row, List[Row], None] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L162) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L162) Deserialize incoming Kafka message to a Row. @@ -307,7 +307,7 @@ Row, list of Rows or None if the message is ignored. def affirm_ready_for_create(topics: List[Topic]) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L20) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L20) Validate a list of topics is ready for creation attempt @@ -325,7 +325,7 @@ Validate a list of topics is ready for creation attempt class TopicManager() ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L30) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L30) The source of all topic management with quixstreams. @@ -348,7 +348,7 @@ def __init__(topic_admin: TopicAdmin, create_timeout: float = 60) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L53) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L53)
@@ -370,7 +370,7 @@ def __init__(topic_admin: TopicAdmin, def changelog_topics() -> Dict[str, Dict[str, Topic]] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L103) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L103) Note: `Topic`s are the changelogs. @@ -387,7 +387,7 @@ returns: the changelog topic dict, {topic_name: {suffix: Topic}} def all_topics() -> Dict[str, Topic] ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L112) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L112) Every registered topic name mapped to its respective `Topic`. @@ -405,7 +405,7 @@ def topic_config(num_partitions: Optional[int] = None, extra_config: Optional[dict] = None) -> TopicConfig ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L220) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L220) Convenience method for generating a `TopicConfig` with default settings @@ -439,7 +439,7 @@ def topic(name: str, timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L241) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L241) A convenience method for generating a `Topic`. Will use default config options @@ -480,7 +480,7 @@ def repartition_topic(operation: str, timeout: Optional[float] = None) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L286) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L286) Create an internal repartition topic. @@ -514,7 +514,7 @@ def changelog_topic(topic_name: str, timeout: Optional[float] = None) -> Topic ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L326) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L326) Performs all the logic necessary to generate a changelog topic based on a @@ -561,7 +561,7 @@ def create_topics(topics: List[Topic], create_timeout: Optional[float] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L383) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L383) Creates topics via an explicit list of provided `Topics`. @@ -587,7 +587,7 @@ def create_all_topics(timeout: Optional[float] = None, create_timeout: Optional[float] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L411) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L411) A convenience method to create all Topic objects stored on this TopicManager. @@ -608,7 +608,7 @@ A convenience method to create all Topic objects stored on this TopicManager. def validate_all_topics(timeout: Optional[float] = None) ``` -[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L424) +[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L424) Validates all topics exist and changelogs have correct topic and rep factor. diff --git a/quixstreams/models/serializers/base.py b/quixstreams/models/serializers/base.py index 609de16a8..0a8813667 100644 --- a/quixstreams/models/serializers/base.py +++ b/quixstreams/models/serializers/base.py @@ -45,14 +45,10 @@ def to_confluent_ctx(self, field: MessageField) -> _SerializationContext: class Deserializer(abc.ABC): - def __init__(self, column_name: Optional[str] = None, *args, **kwargs): + def __init__(self, *args, **kwargs): """ A base class for all Deserializers - - :param column_name: if provided, the deserialized value will be wrapped into - dictionary with `column_name` as a key. """ - self.column_name = column_name @property def split_values(self) -> bool: @@ -62,11 +58,6 @@ def split_values(self) -> bool: """ return False - def _to_dict(self, value: Any) -> Union[Any, dict]: - if self.column_name: - return {self.column_name: value} - return value - @abc.abstractmethod def __call__(self, *args, **kwargs) -> Any: ... diff --git a/quixstreams/models/serializers/json.py b/quixstreams/models/serializers/json.py index 187c1cee8..0a5a824e1 100644 --- a/quixstreams/models/serializers/json.py +++ b/quixstreams/models/serializers/json.py @@ -35,25 +35,21 @@ def _to_json(self, value: Any): class JSONDeserializer(Deserializer): def __init__( self, - column_name: Optional[str] = None, loads: Callable[[Union[bytes, bytearray]], Any] = default_loads, ): """ Deserializer that parses data from JSON - :param column_name: if provided, the deserialized value will be wrapped into - dictionary with `column_name` as a key. :param loads: function to parse json from bytes. Default - :py:func:`quixstreams.utils.json.loads`. """ - super().__init__(column_name=column_name) + super().__init__() self._loads = loads def __call__( self, value: bytes, ctx: SerializationContext ) -> Union[Iterable[Mapping], Mapping]: try: - deserialized = self._loads(value) - return self._to_dict(deserialized) + return self._loads(value) except (ValueError, TypeError) as exc: raise SerializationError(str(exc)) from exc diff --git a/quixstreams/models/serializers/quix.py b/quixstreams/models/serializers/quix.py index 2253f1b37..e9081b623 100644 --- a/quixstreams/models/serializers/quix.py +++ b/quixstreams/models/serializers/quix.py @@ -79,16 +79,13 @@ class QuixDeserializer(JSONDeserializer): def __init__( self, - column_name: Optional[str] = None, loads: Callable[[Union[bytes, bytearray]], Any] = default_loads, ): """ - :param column_name: if provided, the deserialized value will be wrapped into - dictionary with `column_name` as a key. :param loads: function to parse json from bytes. Default - :py:func:`quixstreams.utils.json.loads`. """ - super().__init__(column_name=column_name, loads=loads) + super().__init__(loads=loads) self._deserializers = { QModelKey.TIMESERIESDATA: self.deserialize_timeseries, QModelKey.PARAMETERDATA: self.deserialize_timeseries, @@ -148,7 +145,7 @@ def deserialize_timeseries( row_value["Tags"] = {tag: next(values) for tag, values in tags} row_value[Q_TIMESTAMP_KEY] = timestamp_ns - yield self._to_dict(row_value) + yield row_value def deserialize( self, model_key: str, value: Union[List[Mapping], Mapping] @@ -163,11 +160,11 @@ def deserialize( return self._deserializers[model_key](value) def deserialize_event_data(self, value: Mapping) -> Iterable[Mapping]: - yield self._to_dict(self._parse_event_data(value)) + yield self._parse_event_data(value) def deserialize_event_data_list(self, value: List[Mapping]) -> Iterable[Mapping]: for item in value: - yield self._to_dict(self._parse_event_data(item)) + yield self._parse_event_data(item) def _parse_event_data(self, value: Mapping) -> Mapping: if not isinstance(value, Mapping): diff --git a/quixstreams/models/serializers/simple_types.py b/quixstreams/models/serializers/simple_types.py index 846fa2f44..c63ad5f3e 100644 --- a/quixstreams/models/serializers/simple_types.py +++ b/quixstreams/models/serializers/simple_types.py @@ -46,10 +46,8 @@ class BytesDeserializer(Deserializer): A deserializer to bypass bytes without any changes """ - def __call__( - self, value: bytes, ctx: SerializationContext - ) -> Union[bytes, Mapping[str, bytes]]: - return self._to_dict(value) + def __call__(self, value: bytes, ctx: SerializationContext) -> bytes: + return value class BytesSerializer(Serializer): @@ -62,14 +60,14 @@ def __call__(self, value: bytes, ctx: SerializationContext) -> bytes: class StringDeserializer(Deserializer): - def __init__(self, column_name: Optional[str] = None, codec: str = "utf_8"): + def __init__(self, codec: str = "utf_8"): """ Deserializes bytes to strings using the specified encoding. :param codec: string encoding A wrapper around `confluent_kafka.serialization.StringDeserializer`. """ - super().__init__(column_name=column_name) + super().__init__() self._codec = codec self._deserializer = _StringDeserializer(codec=self._codec) @@ -77,8 +75,7 @@ def __init__(self, column_name: Optional[str] = None, codec: str = "utf_8"): def __call__( self, value: bytes, ctx: SerializationContext ) -> Union[str, Mapping[str, str]]: - deserialized = self._deserializer(value=value) - return self._to_dict(deserialized) + return self._deserializer(value=value) class IntegerDeserializer(Deserializer): @@ -88,16 +85,15 @@ class IntegerDeserializer(Deserializer): A wrapper around `confluent_kafka.serialization.IntegerDeserializer`. """ - def __init__(self, column_name: Optional[str] = None): - super().__init__(column_name=column_name) + def __init__(self): + super().__init__() self._deserializer = _IntegerDeserializer() @_wrap_serialization_error def __call__( self, value: bytes, ctx: SerializationContext ) -> Union[int, Mapping[str, int]]: - deserialized = self._deserializer(value=value) - return self._to_dict(deserialized) + return self._deserializer(value=value) class DoubleDeserializer(Deserializer): @@ -107,16 +103,15 @@ class DoubleDeserializer(Deserializer): A wrapper around `confluent_kafka.serialization.DoubleDeserializer`. """ - def __init__(self, column_name: Optional[str] = None): - super().__init__(column_name=column_name) + def __init__(self): + super().__init__() self._deserializer = _DoubleDeserializer() @_wrap_serialization_error def __call__( self, value: bytes, ctx: SerializationContext ) -> Union[float, Mapping[str, float]]: - deserialized = self._deserializer(value=value) - return self._to_dict(deserialized) + return self._deserializer(value=value) class StringSerializer(Serializer): diff --git a/tests/test_quixstreams/test_app.py b/tests/test_quixstreams/test_app.py index 9d83c2465..571a9a310 100644 --- a/tests/test_quixstreams/test_app.py +++ b/tests/test_quixstreams/test_app.py @@ -121,11 +121,10 @@ def on_message_processed(topic_, partition, offset): on_message_processed=on_message_processed, ) - column_name = "root" partition_num = 0 topic_in = app.topic( str(uuid.uuid4()), - value_deserializer=JSONDeserializer(column_name=column_name), + value_deserializer=JSONDeserializer(), ) topic_out = app.topic( str(uuid.uuid4()), @@ -178,7 +177,7 @@ def on_message_processed(topic_, partition, offset): for row in rows_out: assert row.topic == topic_out.name assert row.key == data["key"] - assert row.value == {column_name: loads(data["value"].decode())} + assert row.value == loads(data["value"].decode()) assert row.timestamp == timestamp_ms assert row.headers == headers @@ -240,9 +239,7 @@ def count_and_fail(_): def test_run_consumer_error_raised(self, app_factory, executor): # Set "auto_offset_reset" to "error" to simulate errors in Consumer app = app_factory(auto_offset_reset="error") - topic = app.topic( - str(uuid.uuid4()), value_deserializer=JSONDeserializer(column_name="root") - ) + topic = app.topic(str(uuid.uuid4()), value_deserializer=JSONDeserializer()) sdf = app.dataframe(topic) # Stop app after 10s if nothing failed diff --git a/tests/test_quixstreams/test_models/test_quix_serializers.py b/tests/test_quixstreams/test_models/test_quix_serializers.py index 72427e33b..c0cd3a651 100644 --- a/tests/test_quixstreams/test_models/test_quix_serializers.py +++ b/tests/test_quixstreams/test_models/test_quix_serializers.py @@ -261,66 +261,6 @@ def test_deserialize_timeseries_timestamp_field_clash( ) ) - @pytest.mark.parametrize("as_legacy", [False, True]) - def test_deserialize_timeseries_with_column_name_success( - self, quix_timeseries_factory, as_legacy - ): - message = quix_timeseries_factory( - binary={"param1": [b"1", None], "param2": [None, b"1"]}, - strings={"param3": [1, None], "param4": [None, 1.1]}, - numeric={"param5": ["1", None], "param6": [None, "a"], "param7": ["", ""]}, - tags={"tag1": ["value1", "value2"], "tag2": ["value3", "value4"]}, - timestamps=[1234567890, 1234567891], - as_legacy=as_legacy, - ) - - expected = [ - { - "root": { - "param1": b"1", - "param2": None, - "param3": 1, - "param4": None, - "param5": "1", - "param6": None, - "param7": "", - "Tags": {"tag1": "value1", "tag2": "value3"}, - "Timestamp": 1234567890, - } - }, - { - "root": { - "param1": None, - "param2": b"1", - "param3": None, - "param4": 1.1, - "param5": None, - "param6": "a", - "param7": "", - "Tags": {"tag1": "value2", "tag2": "value4"}, - "Timestamp": 1234567891, - } - }, - ] - - deserializer = QuixDeserializer(column_name="root") - rows = list( - deserializer( - value=message.value(), - ctx=SerializationContext( - topic=message.topic(), - headers=message.headers(), - ), - ) - ) - assert len(rows) == len(expected) - for item, row in zip(expected, rows): - assert "root" in row - value = row["root"] - item = row["root"] - for key in item: - assert item[key] == value[key] - @pytest.mark.parametrize("as_legacy", [False, True]) def test_deserialize_eventdata_success( self, quix_eventdata_factory, quix_eventdata_params_factory, as_legacy @@ -381,45 +321,6 @@ def test_deserialize_eventdata_list_success( assert row["Value"] == params.value assert row["Tags"] == params.tags - @pytest.mark.parametrize("as_legacy", [False, True]) - def test_deserialize_event_data_with_column( - self, - quix_eventdata_list_factory, - quix_eventdata_params_factory, - as_legacy, - ): - event_params = [ - quix_eventdata_params_factory( - id="test", - value={"blabla": 123}, - tags={"tag1": "1"}, - timestamp=1234567790, - ), - quix_eventdata_params_factory( - id="test2", - value={"blabla2": 1234}, - tags={"tag2": "2"}, - timestamp=1234567891, - ), - ] - message = quix_eventdata_list_factory(params=event_params, as_legacy=as_legacy) - - deserializer = QuixDeserializer(column_name="root") - rows = list( - deserializer( - value=message.value(), - ctx=SerializationContext(topic="test", headers=message.headers()), - ) - ) - assert len(rows) == 2 - for row, params in zip(rows, event_params): - assert "root" in row - row = row["root"] - assert row["Timestamp"] == params.timestamp - assert row["Id"] == params.id - assert row["Value"] == params.value - assert row["Tags"] == params.tags - class TestQuixTimeseriesSerializer: def test_serialize_dict_success(self): diff --git a/tests/test_quixstreams/test_models/test_serializers.py b/tests/test_quixstreams/test_models/test_serializers.py index 0f51435d9..ce3012bd4 100644 --- a/tests/test_quixstreams/test_models/test_serializers.py +++ b/tests/test_quixstreams/test_models/test_serializers.py @@ -77,32 +77,6 @@ def test_deserialize_no_column_name_success( ): assert deserializer(value, ctx=dummy_context) == expected - @pytest.mark.parametrize( - "deserializer, value, expected", - [ - ( - IntegerDeserializer("value"), - int_to_bytes(123), - {"value": 123}, - ), - (DoubleDeserializer("value"), float_to_bytes(123), {"value": 123.0}), - (DoubleDeserializer("value"), float_to_bytes(123.123), {"value": 123.123}), - (StringDeserializer("value"), b"abc", {"value": "abc"}), - ( - StringDeserializer("value", codec="cp1251"), - "abc".encode("cp1251"), - {"value": "abc"}, - ), - (BytesDeserializer("value"), b"123123", {"value": b"123123"}), - (JSONDeserializer("value"), b"123123", {"value": 123123}), - (JSONDeserializer("value"), b'{"a":"b"}', {"value": {"a": "b"}}), - ], - ) - def test_deserialize_with_column_name_success( - self, deserializer: Deserializer, value, expected - ): - assert deserializer(value, ctx=dummy_context) == expected - @pytest.mark.parametrize( "deserializer, value", [ diff --git a/tests/test_quixstreams/test_models/test_topics/test_topics.py b/tests/test_quixstreams/test_models/test_topics/test_topics.py index 739759cd9..ef639a0ca 100644 --- a/tests/test_quixstreams/test_models/test_topics/test_topics.py +++ b/tests/test_quixstreams/test_models/test_topics/test_topics.py @@ -40,8 +40,6 @@ def __call__(self, value: bytes, ctx: SerializationContext): deserialized = self._deserializer(value=value) if not deserialized % 3: raise IgnoreMessage("Ignore numbers divisible by 3") - if self.column_name: - return {self.column_name: deserialized} return deserialized @@ -51,11 +49,11 @@ class TestTopic: [ ( IntegerDeserializer(), - IntegerDeserializer("column"), + IntegerDeserializer(), int_to_bytes(1), int_to_bytes(2), 1, - {"column": 2}, + 2, ), ( DoubleDeserializer(), @@ -75,11 +73,11 @@ class TestTopic: ), ( DoubleDeserializer(), - JSONDeserializer(column_name="root"), + JSONDeserializer(), float_to_bytes(1.1), json.dumps({"key": "value"}).encode(), 1.1, - {"root": {"key": "value"}}, + {"key": "value"}, ), ( BytesDeserializer(), @@ -194,13 +192,13 @@ def test_row_list_deserialize_success( def test_row_deserialize_ignorevalueerror_raised(self, topic_manager_topic_factory): topic = topic_manager_topic_factory( - value_deserializer=IgnoreDivisibleBy3Deserializer(column_name="value"), + value_deserializer=IgnoreDivisibleBy3Deserializer(), ) row = topic.row_deserialize( message=ConfluentKafkaMessageStub(key=b"key", value=int_to_bytes(4)) ) assert row - assert row.value == {"value": 4} + assert row.value == 4 row = topic.row_deserialize( message=ConfluentKafkaMessageStub(key=b"key", value=int_to_bytes(3))