From 72f658a57707c1b51203b20e7e326f8356e9f8f3 Mon Sep 17 00:00:00 2001
From: Tim Sawicki <136370015+tim-quix@users.noreply.github.com>
Date: Thu, 4 Jul 2024 06:52:47 -0400
Subject: [PATCH] remove column name deserialization (#392)
* remove everything related to generating column_names during deserialization
* remove doc references to column_name deserialization args
---
docs/api-reference/application.md | 20 +-
docs/api-reference/context.md | 4 +-
docs/api-reference/dataframe.md | 52 +-
docs/api-reference/kafka.md | 52 +-
docs/api-reference/quixstreams.md | 8501 ++++++++---------
docs/api-reference/serialization.md | 39 +-
docs/api-reference/state.md | 14 +-
docs/api-reference/topics.md | 48 +-
quixstreams/models/serializers/base.py | 11 +-
quixstreams/models/serializers/json.py | 8 +-
quixstreams/models/serializers/quix.py | 11 +-
.../models/serializers/simple_types.py | 27 +-
tests/test_quixstreams/test_app.py | 9 +-
.../test_models/test_quix_serializers.py | 99 -
.../test_models/test_serializers.py | 26 -
.../test_models/test_topics/test_topics.py | 14 +-
16 files changed, 4385 insertions(+), 4550 deletions(-)
diff --git a/docs/api-reference/application.md b/docs/api-reference/application.md
index 7557ee4b9..0f6bf5326 100644
--- a/docs/api-reference/application.md
+++ b/docs/api-reference/application.md
@@ -10,7 +10,7 @@
class Application()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L55)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L55)
The main Application class.
@@ -82,7 +82,7 @@ def __init__(broker_address: Optional[Union[str, ConnectionConfig]] = None,
topic_create_timeout: float = 60)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L93)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L93)
@@ -180,7 +180,7 @@ def Quix(cls,
topic_create_timeout: float = 60) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L313)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L313)
>***NOTE:*** DEPRECATED: use Application with `quix_sdk_token` argument instead.
@@ -290,7 +290,7 @@ def topic(name: str,
timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L451)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L451)
Create a topic definition.
@@ -371,7 +371,7 @@ topic = app.topic("input-topic", timestamp_extractor=custom_ts_extractor)
def dataframe(topic: Topic) -> StreamingDataFrame
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L531)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L531)
A simple helper method that generates a `StreamingDataFrame`, which is used
@@ -421,7 +421,7 @@ to be used as an input topic.
def stop(fail: bool = False)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L570)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L570)
Stop the internal poll loop and the message processing.
@@ -448,7 +448,7 @@ to unhandled exception, and it shouldn't commit the current checkpoint.
def get_producer() -> Producer
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L593)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L593)
Create and return a pre-configured Producer instance.
The Producer is initialized with params passed to Application.
@@ -483,7 +483,7 @@ with app.get_producer() as producer:
def get_consumer(auto_commit_enable: bool = True) -> Consumer
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L623)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L623)
Create and return a pre-configured Consumer instance.
The Consumer is initialized with params passed to Application.
@@ -528,7 +528,7 @@ with app.get_consumer() as consumer:
def clear_state()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L666)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L666)
Clear the state of the application.
@@ -542,7 +542,7 @@ Clear the state of the application.
def run(dataframe: StreamingDataFrame)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L672)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L672)
Start processing data from Kafka using provided `StreamingDataFrame`
diff --git a/docs/api-reference/context.md b/docs/api-reference/context.md
index d7d0ec1e9..f291a5c1f 100644
--- a/docs/api-reference/context.md
+++ b/docs/api-reference/context.md
@@ -12,7 +12,7 @@
def set_message_context(context: Optional[MessageContext])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/context.py#L20)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/context.py#L20)
Set a MessageContext for the current message in the given `contextvars.Context`
@@ -55,7 +55,7 @@ sdf = sdf.update(lambda value: alter_context(value))
def message_context() -> MessageContext
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/context.py#L51)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/context.py#L51)
Get a MessageContext for the current message, which houses most of the message
diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md
index 142c04c98..44415f69d 100644
--- a/docs/api-reference/dataframe.md
+++ b/docs/api-reference/dataframe.md
@@ -10,7 +10,7 @@
class StreamingDataFrame(BaseStreaming)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L62)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L62)
`StreamingDataFrame` is the main object you will use for ETL work.
@@ -81,7 +81,7 @@ def apply(func: Union[
metadata: bool = False) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L177)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L177)
Apply a function to transform the value and return a new value.
@@ -139,7 +139,7 @@ def update(func: Union[
metadata: bool = False) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L266)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L266)
Apply a function to mutate value in-place or to perform a side effect
@@ -197,7 +197,7 @@ def filter(func: Union[
metadata: bool = False) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L354)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L354)
Filter value using provided function.
@@ -249,7 +249,7 @@ def group_by(key: Union[str, Callable[[Any], Any]],
key_serializer: Optional[SerializerType] = "json") -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L440)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L440)
"Groups" messages by re-keying them via the provided group_by operation
@@ -314,7 +314,7 @@ a clone with this operation added (assign to keep its effect).
def contains(key: str) -> StreamingSeries
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L518)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L518)
Check if the key is present in the Row value.
@@ -353,7 +353,7 @@ or False otherwise.
def to_topic(topic: Topic, key: Optional[Callable[[Any], Any]] = None) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L543)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L543)
Produce current value to a topic. You can optionally specify a new key.
@@ -396,7 +396,7 @@ By default, the current message key will be used.
def set_timestamp(func: Callable[[Any, Any, int, Any], int]) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L584)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L584)
Set a new timestamp based on the current message value and its metadata.
@@ -449,7 +449,7 @@ def set_headers(
) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L625)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L625)
Set new message headers based on the current message value and metadata.
@@ -500,7 +500,7 @@ def compose(
) -> Dict[str, VoidExecutor]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L676)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L676)
Compose all functions of this StreamingDataFrame into one big closure.
@@ -554,7 +554,7 @@ def test(value: Any,
topic: Optional[Topic] = None) -> List[Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L713)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L713)
A shorthand to test `StreamingDataFrame` with provided value
@@ -591,7 +591,7 @@ def tumbling_window(duration_ms: Union[int, timedelta],
name: Optional[str] = None) -> TumblingWindowDefinition
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L750)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L750)
Create a tumbling window transformation on this StreamingDataFrame.
@@ -677,7 +677,7 @@ def hopping_window(duration_ms: Union[int, timedelta],
name: Optional[str] = None) -> HoppingWindowDefinition
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L826)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L826)
Create a hopping window transformation on this StreamingDataFrame.
@@ -771,7 +771,7 @@ sdf = (
class StreamingSeries(BaseStreaming)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L47)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L47)
`StreamingSeries` are typically generated by `StreamingDataframes` when getting
elements from, or performing certain operations on, a `StreamingDataframe`,
@@ -837,7 +837,7 @@ sdf = sdf[["column_a"] & (sdf["new_sum_field"] >= 10)]
def from_apply_callback(cls, func: ApplyWithMetadataCallback) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L107)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L107)
Create a StreamingSeries from a function.
@@ -865,7 +865,7 @@ instance of `StreamingSeries`
def apply(func: ApplyCallback) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L121)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L121)
Add a callable to the execution list for this series.
@@ -917,7 +917,7 @@ a new `StreamingSeries` with the new callable added
def compose_returning() -> ReturningExecutor
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L155)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L155)
Compose a list of functions from this StreamingSeries and its parents into one
@@ -948,7 +948,7 @@ def compose(
None]] = None) -> VoidExecutor
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L170)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L170)
Compose all functions of this StreamingSeries into one big closure.
@@ -1006,7 +1006,7 @@ def test(value: Any,
ctx: Optional[MessageContext] = None) -> Any
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L214)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L214)
A shorthand to test `StreamingSeries` with provided value
@@ -1038,7 +1038,7 @@ result of `StreamingSeries`
def isin(other: Container) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L269)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L269)
Check if series value is in "other".
@@ -1083,7 +1083,7 @@ new StreamingSeries
def contains(other: Union[Self, object]) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L296)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L296)
Check if series value contains "other"
@@ -1128,7 +1128,7 @@ new StreamingSeries
def is_(other: Union[Self, object]) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L321)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L321)
Check if series value refers to the same object as `other`
@@ -1170,7 +1170,7 @@ new StreamingSeries
def isnot(other: Union[Self, object]) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L344)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L344)
Check if series value does not refer to the same object as `other`
@@ -1213,7 +1213,7 @@ new StreamingSeries
def isnull() -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L368)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L368)
Check if series value is None.
@@ -1250,7 +1250,7 @@ new StreamingSeries
def notnull() -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L391)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L391)
Check if series value is not None.
@@ -1287,7 +1287,7 @@ new StreamingSeries
def abs() -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L414)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L414)
Get absolute value of the series value.
diff --git a/docs/api-reference/kafka.md b/docs/api-reference/kafka.md
index 8234a7041..750a7f280 100644
--- a/docs/api-reference/kafka.md
+++ b/docs/api-reference/kafka.md
@@ -10,7 +10,7 @@
class Producer()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L44)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L44)
@@ -26,7 +26,7 @@ def __init__(broker_address: Union[str, ConnectionConfig],
flush_timeout: Optional[int] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L45)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L45)
A wrapper around `confluent_kafka.Producer`.
@@ -66,7 +66,7 @@ def produce(topic: str,
on_delivery: Optional[DeliveryCallback] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L83)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L83)
Produce a message to a topic.
@@ -101,7 +101,7 @@ for the produced message.
def poll(timeout: float = 0)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L144)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L144)
Polls the producer for events and calls `on_delivery` callbacks.
@@ -122,7 +122,7 @@ Polls the producer for events and calls `on_delivery` callbacks.
def flush(timeout: Optional[float] = None) -> int
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L152)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L152)
Wait for all messages in the Producer queue to be delivered.
@@ -151,7 +151,7 @@ number of messages remaining to flush
class Consumer()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L64)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L64)
@@ -171,7 +171,7 @@ def __init__(broker_address: Union[str, ConnectionConfig],
extra_config: Optional[dict] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L65)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L65)
A wrapper around `confluent_kafka.Consumer`.
@@ -214,7 +214,7 @@ Note: values passed as arguments override values in `extra_config`.
def poll(timeout: Optional[float] = None) -> Optional[Message]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L128)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L128)
Consumes a single message, calls callbacks and returns events.
@@ -255,7 +255,7 @@ def subscribe(topics: List[str],
on_lost: Optional[RebalancingCallback] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L146)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L146)
Set subscription to supplied list of topics
@@ -298,7 +298,7 @@ for example, may fail.
def unsubscribe()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L240)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L240)
Remove current subscription.
@@ -318,7 +318,7 @@ def store_offsets(message: Optional[Message] = None,
offsets: Optional[List[TopicPartition]] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L248)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L248)
.. py:function:: store_offsets([message=None], [offsets=None])
@@ -353,7 +353,7 @@ def commit(message: Optional[Message] = None,
asynchronous: bool = True) -> Optional[List[TopicPartition]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L282)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L282)
Commit a message or a list of offsets.
@@ -391,7 +391,7 @@ def committed(partitions: List[TopicPartition],
timeout: Optional[float] = None) -> List[TopicPartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L322)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L322)
.. py:function:: committed(partitions, [timeout=None])
@@ -428,7 +428,7 @@ def get_watermark_offsets(partition: TopicPartition,
cached: bool = False) -> Tuple[int, int]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L342)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L342)
Retrieve low and high offsets for the specified partition.
@@ -467,7 +467,7 @@ def list_topics(topic: Optional[str] = None,
timeout: Optional[float] = None) -> ClusterMetadata
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L368)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L368)
.. py:function:: list_topics([topic=None], [timeout=-1])
@@ -500,7 +500,7 @@ None or -1 is infinite. Default: None
def memberid() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L391)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L391)
Return this client's broker-assigned group member id.
@@ -523,7 +523,7 @@ def offsets_for_times(partitions: List[TopicPartition],
timeout: Optional[float] = None) -> List[TopicPartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L404)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L404)
Look up offsets by timestamp for the specified partitions.
@@ -552,7 +552,7 @@ last message in the partition, a value of -1 will be returned.
def pause(partitions: List[TopicPartition])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L430)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L430)
Pause consumption for the provided list of partitions.
@@ -580,7 +580,7 @@ Does NOT affect the result of Consumer.assignment().
def resume(partitions: List[TopicPartition])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L444)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L444)
.. py:function:: resume(partitions)
@@ -606,7 +606,7 @@ Resume consumption for the provided list of partitions.
def position(partitions: List[TopicPartition]) -> List[TopicPartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L456)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L456)
Retrieve current positions (offsets) for the specified partitions.
@@ -639,7 +639,7 @@ the last consumed message + 1.
def seek(partition: TopicPartition)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L470)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L470)
Set consume position for partition to offset.
@@ -671,7 +671,7 @@ pass the offset in an `assign()` call.
def assignment() -> List[TopicPartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L487)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L487)
Returns the current partition assignment.
@@ -696,7 +696,7 @@ Returns the current partition assignment.
def set_sasl_credentials(username: str, password: str)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L500)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L500)
Sets the SASL credentials used for this client.
These credentials will overwrite the old ones, and will be used the next
@@ -715,7 +715,7 @@ This method is applicable only to SASL PLAIN and SCRAM mechanisms.
def incremental_assign(partitions: List[TopicPartition])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L512)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L512)
Assign new partitions.
@@ -735,7 +735,7 @@ Any additional partitions besides the ones passed during the `Consumer`
def incremental_unassign(partitions: List[TopicPartition])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L524)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L524)
Revoke partitions.
@@ -751,7 +751,7 @@ Can be called outside an on_revoke callback.
def close()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L532)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L532)
Close down and terminate the Kafka Consumer.
diff --git a/docs/api-reference/quixstreams.md b/docs/api-reference/quixstreams.md
index ecd844866..4fe8c1ab6 100644
--- a/docs/api-reference/quixstreams.md
+++ b/docs/api-reference/quixstreams.md
@@ -2,1447 +2,1672 @@
## quixstreams
-
-
-## quixstreams.logging
+
-
+## quixstreams.core
-#### configure\_logging
+
-```python
-def configure_logging(loglevel: Optional[LogLevel]) -> bool
-```
+## quixstreams.core.stream
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/logging.py#L26)
+
-Configure "quixstreams" logger.
+## quixstreams.core.stream.functions
->***NOTE:*** If "quixstreams" logger already has pre-defined handlers
-(e.g. logging has already been configured via `logging`, or the function
-is called twice), it will skip configuration and return `False`.
+
-**Arguments**:
+### StreamFunction
-- `loglevel`: a valid log level as a string or None.
-If None passed, this function is no-op and no logging will be configured.
+```python
+class StreamFunction(abc.ABC)
+```
-**Returns**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L65)
-True if logging config has been updated, otherwise False.
+A base class for all the streaming operations in Quix Streams.
-
+It provides a `get_executor` method to return a closure to be called with the input
+values.
-## quixstreams.error\_callbacks
+
-
+#### StreamFunction.get\_executor
-## quixstreams.platforms
+```python
+@abc.abstractmethod
+def get_executor(child_executor: VoidExecutor) -> VoidExecutor
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L79)
-## quixstreams.platforms.quix.config
+Returns a wrapper to be called on a value, key and timestamp.
-
+
-#### strip\_workspace\_id\_prefix
+### ApplyFunction
```python
-def strip_workspace_id_prefix(workspace_id: str, s: str) -> str
+class ApplyFunction(StreamFunction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L31)
-
-Remove the workspace ID from a given string if it starts with it,
-
-typically a topic or consumer group id
-
-**Arguments**:
-
-- `workspace_id`: the workspace id
-- `s`: the string to append to
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L85)
-**Returns**:
+Wrap a function into "Apply" function.
-the string with workspace_id prefix removed
+The provided callback is expected to return a new value based on input,
+and its result will always be passed downstream.
-
+
-#### prepend\_workspace\_id
+### ApplyWithMetadataFunction
```python
-def prepend_workspace_id(workspace_id: str, s: str) -> str
+class ApplyWithMetadataFunction(StreamFunction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L43)
-
-Add the workspace ID as a prefix to a given string if it does not have it,
-
-typically a topic or consumer group it
-
-**Arguments**:
-
-- `workspace_id`: the workspace id
-- `s`: the string to append to
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L125)
-**Returns**:
+Wrap a function into "Apply" function.
-the string with workspace_id prepended
+The provided function is expected to accept value, and timestamp and return
+a new value based on input,
+and its result will always be passed downstream.
-
+
-### QuixApplicationConfig
+### FilterFunction
```python
-@dataclasses.dataclass
-class QuixApplicationConfig()
+class FilterFunction(StreamFunction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L56)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L166)
-A convenience container class for Quix Application configs.
+Wraps a function into a "Filter" function.
+The result of a Filter function is interpreted as boolean.
+If it's `True`, the input will be return downstream.
+If it's `False`, the `Filtered` exception will be raised to signal that the
+value is filtered out.
-
+
-### QuixKafkaConfigsBuilder
+### FilterWithMetadataFunction
```python
-class QuixKafkaConfigsBuilder()
+class FilterWithMetadataFunction(StreamFunction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L66)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L187)
-Retrieves all the necessary information from the Quix API and builds all the
-objects required to connect a confluent-kafka client to the Quix Platform.
-
-If not executed within the Quix platform directly, you must provide a Quix
-"streaming" (aka "sdk") token, or Personal Access Token.
+Wraps a function into a "Filter" function.
-Ideally you also know your workspace name or id. If not, you can search for it
-using a known topic name, but note the search space is limited to the access level
-of your token.
+The passed callback must accept value, key, and timestamp, and it's expected to
+return a boolean-like result.
-It also currently handles the app_auto_create_topics setting for Application.Quix.
+If the result is `True`, the input will be passed downstream.
+Otherwise, the value will be filtered out.
-
+
-#### QuixKafkaConfigsBuilder.\_\_init\_\_
+### UpdateFunction
```python
-def __init__(quix_sdk_token: Optional[str] = None,
- workspace_id: Optional[str] = None,
- quix_portal_api_service: Optional[QuixPortalApiService] = None,
- timeout: float = 30,
- topic_create_timeout: float = 60)
+class UpdateFunction(StreamFunction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L82)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L210)
-**Arguments**:
+Wrap a function into an "Update" function.
-- `quix_portal_api_service`: A QuixPortalApiService instance (else generated)
-- `workspace_id`: A valid Quix Workspace ID (else searched for)
+The provided function must accept a value, and it's expected to mutate it
+or to perform some side effect.
-
+The result of the callback is always ignored, and the original input is passed
+downstream.
-#### QuixKafkaConfigsBuilder.strip\_workspace\_id\_prefix
+
+
+### UpdateWithMetadataFunction
```python
-def strip_workspace_id_prefix(s: str) -> str
+class UpdateWithMetadataFunction(StreamFunction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L160)
-
-Remove the workspace ID from a given string if it starts with it,
-
-typically a topic or consumer group id
-
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L233)
-- `s`: the string to append to
+Wrap a function into an "Update" function.
-**Returns**:
+The provided function must accept a value, a key, and a timestamp.
+The callback is expected to mutate the value or to perform some side effect with it.
-the string with workspace_id prefix removed
+The result of the callback is always ignored, and the original input is passed
+downstream.
-
+
-#### QuixKafkaConfigsBuilder.prepend\_workspace\_id
+### TransformFunction
```python
-def prepend_workspace_id(s: str) -> str
+class TransformFunction(StreamFunction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L170)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/functions.py#L256)
-Add the workspace ID as a prefix to a given string if it does not have it,
+Wrap a function into a "Transform" function.
-typically a topic or consumer group it
+The provided callback must accept a value, a key and a timestamp.
+It's expected to return a new value, new key and new timestamp.
-**Arguments**:
+This function must be used with caution, because it can technically change the
+key.
+It's supposed to be used by the library internals and not be a part of the public
+API.
-- `s`: the string to append to
+The result of the callback will always be passed downstream.
-**Returns**:
+
-the string with workspace_id prepended
+## quixstreams.core.stream.stream
-
+
-#### QuixKafkaConfigsBuilder.search\_for\_workspace
+### Stream
```python
-def search_for_workspace(workspace_name_or_id: Optional[str] = None,
- timeout: Optional[float] = None) -> Optional[dict]
+class Stream()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L180)
-
-Search for a workspace given an expected workspace name or id.
-
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L34)
-- `workspace_name_or_id`: the expected name or id of a workspace
-- `timeout`: response timeout (seconds); Default 30
+
-**Returns**:
+#### Stream.\_\_init\_\_
-the workspace data dict if search success, else None
+```python
+def __init__(func: Optional[StreamFunction] = None,
+ parent: Optional[Self] = None)
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L35)
-#### QuixKafkaConfigsBuilder.get\_workspace\_info
+A base class for all streaming operations.
-```python
-def get_workspace_info(known_workspace_topic: Optional[str] = None,
- timeout: Optional[float] = None) -> dict
-```
+`Stream` is an abstraction of a function pipeline.
+Each Stream has a function and a parent (None by default).
+When adding new function to the stream, it creates a new `Stream` object and
+sets "parent" to the previous `Stream` to maintain an order of execution.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L222)
+Streams supports four types of functions:
-Queries for workspace data from the Quix API, regardless of instance cache,
+- "Apply" - generate new values based on a previous one.
+ The result of an Apply function is passed downstream to the next functions.
+ If "expand=True" is passed and the function returns an `Iterable`,
+ each item of it will be treated as a separate value downstream.
+- "Update" - update values in-place.
+ The result of an Update function is always ignored, and its input is passed
+ downstream.
+- "Filter" - to filter values from the Stream.
+ The result of a Filter function is interpreted as boolean.
+ If it's `True`, the input will be passed downstream.
+ If it's `False`, the record will be filtered from the stream.
+- "Transform" - to transform keys and timestamps along with the values.
+ "Transform" functions may change the keys and should be used with caution.
+ The result of the Transform function is passed downstream to the next
+ functions.
+ If "expand=True" is passed and the function returns an `Iterable`,
+ each item of it will be treated as a separate value downstream.
-and updates instance attributes from query result.
+To execute the functions on the `Stream`, call `.compose()` method, and
+it will return a closure to execute all the functions accumulated in the Stream
+and its parents.
**Arguments**:
-- `known_workspace_topic`: a topic you know to exist in some workspace
-- `timeout`: response timeout (seconds); Default 30
+- `func`: a function to be called on the stream.
+It is expected to be wrapped into one of "Apply", "Filter", "Update" or
+"Trasform" from `quixstreams.core.stream.functions` package.
+Default - "ApplyFunction(lambda value: value)".
+- `parent`: a parent `Stream`
-
+
-#### QuixKafkaConfigsBuilder.search\_workspace\_for\_topic
+#### Stream.add\_filter
```python
-def search_workspace_for_topic(
- workspace_id: str,
- topic: str,
- timeout: Optional[float] = None) -> Optional[str]
+def add_filter(func: Union[FilterCallback, FilterWithMetadataCallback],
+ *,
+ metadata: bool = False) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L250)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L97)
-Search through all the topics in the given workspace id to see if there is a
-
-match with the provided topic.
+Add a function to filter values from the Stream.
+
+The return value of the function will be interpreted as `bool`.
+If the function returns `False`-like result, the Stream will raise `Filtered`
+exception during execution.
**Arguments**:
-- `workspace_id`: the workspace to search in
-- `topic`: the topic to search for
-- `timeout`: response timeout (seconds); Default 30
+- `func`: a function to filter values from the stream
+- `metadata`: if True, the callback will receive key and timestamp along with
+the value.
+Default - `False`.
**Returns**:
-the workspace_id if success, else None
+a new `Stream` derived from the current one
-
+
-#### QuixKafkaConfigsBuilder.search\_for\_topic\_workspace
+#### Stream.add\_apply
```python
-def search_for_topic_workspace(topic: str,
- timeout: Optional[float] = None
- ) -> Optional[dict]
+def add_apply(func: Union[
+ ApplyCallback,
+ ApplyExpandedCallback,
+ ApplyWithMetadataCallback,
+ ApplyWithMetadataExpandedCallback,
+],
+ *,
+ expand: bool = False,
+ metadata: bool = False) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L271)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L122)
-Find what workspace a topic belongs to.
+Add an "apply" function to the Stream.
-If there is only one workspace altogether, it is assumed to be the workspace.
-More than one means each workspace will be searched until the first hit.
+The function is supposed to return a new value, which will be passed
+further during execution.
**Arguments**:
-- `topic`: the topic to search for
-- `timeout`: response timeout (seconds); Default 30
+- `func`: a function to generate a new value
+- `expand`: if True, expand the returned iterable into individual values
+downstream. If returned value is not iterable, `TypeError` will be raised.
+Default - `False`.
+- `metadata`: if True, the callback will receive key and timestamp along with
+the value.
+Default - `False`.
**Returns**:
-workspace data dict if topic search success, else None
+a new `Stream` derived from the current one
-
+
-#### QuixKafkaConfigsBuilder.create\_topics
+#### Stream.add\_update
```python
-def create_topics(topics: List[Topic],
- timeout: Optional[float] = None,
- finalize_timeout: Optional[float] = None)
+def add_update(func: Union[UpdateCallback, UpdateWithMetadataCallback],
+ *,
+ metadata: bool = False) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L369)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L155)
-Create topics in a Quix cluster.
+Add an "update" function to the Stream, that will mutate the input value.
+
+The return of this function will be ignored and its input
+will be passed downstream.
**Arguments**:
-- `topics`: a list of `Topic` objects
-- `timeout`: response timeout (seconds); Default 30
-- `finalize_timeout`: topic finalization timeout (seconds); Default 60
-marked as "Ready" (and thus ready to produce to/consume from).
+- `func`: a function to mutate the value
+- `metadata`: if True, the callback will receive key and timestamp along with
+the value.
+Default - `False`.
-
+**Returns**:
-#### QuixKafkaConfigsBuilder.get\_topic
+a new Stream derived from the current one
+
+
+
+#### Stream.add\_transform
```python
-def get_topic(topic_name: str,
- timeout: Optional[float] = None) -> Optional[dict]
+def add_transform(func: Union[TransformCallback, TransformExpandedCallback],
+ *,
+ expand: bool = False) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L419)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L179)
-return the topic ID (the actual cluster topic name) if it exists, else None
+Add a "transform" function to the Stream, that will mutate the input value.
->***NOTE***: if the name registered in Quix is instead the workspace-prefixed
-version, this returns None unless that exact name was created WITHOUT the
-Quix API.
+The callback must accept a value, a key, and a timestamp.
+It's expected to return a new value, new key and new timestamp.
+
+The result of the callback which will be passed downstream
+during execution.
**Arguments**:
-- `topic_name`: name of the topic
-- `timeout`: response timeout (seconds); Default 30
+- `func`: a function to mutate the value
+- `expand`: if True, expand the returned iterable into individual items
+downstream. If returned value is not iterable, `TypeError` will be raised.
+Default - `False`.
**Returns**:
-response dict of the topic info if topic found, else None
+a new Stream derived from the current one
-
+
-#### QuixKafkaConfigsBuilder.confirm\_topics\_exist
+#### Stream.diff
```python
-def confirm_topics_exist(topics: Union[List[Topic], List[str]],
- timeout: Optional[float] = None)
+def diff(other: "Stream") -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L451)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L204)
-Confirm whether the desired set of topics exists in the Quix workspace.
+Takes the difference between Streams `self` and `other` based on their last
+
+common parent, and returns a new `Stream` that includes only this difference.
+
+It's impossible to calculate a diff when:
+ - Streams don't have a common parent.
+ - When the `self` Stream already includes all the nodes from
+ the `other` Stream, and the resulting diff is empty.
**Arguments**:
-- `topics`: a list of `Topic` or topic names
-- `timeout`: response timeout (seconds); Default 30
+- `other`: a `Stream` to take a diff from.
-
+**Raises**:
-#### QuixKafkaConfigsBuilder.get\_application\_config
+- `ValueError`: if Streams don't have a common parent
+or if the diff is empty.
-```python
-def get_application_config(consumer_group_id: str) -> QuixApplicationConfig
-```
+**Returns**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/config.py#L483)
+new `Stream` instance including all the Streams from the diff
-Get all the necessary attributes for an Application to run on Quix Cloud.
+
-**Arguments**:
+#### Stream.tree
-- `consumer_group_id`: consumer group id, if needed
+```python
+def tree() -> List[Self]
+```
-**Returns**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L233)
-a QuixApplicationConfig instance
+Return a list of all parent Streams including the node itself.
-
+The tree is ordered from child to parent (current node comes first).
-## quixstreams.platforms.quix.env
+**Returns**:
-
+a list of `Stream` objects
-### QuixEnvironment
+
+
+#### Stream.compose\_returning
```python
-class QuixEnvironment()
+def compose_returning() -> ReturningExecutor
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L7)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L247)
-Class to access various Quix platform environment settings
+Compose a list of functions from this `Stream` and its parents into one
+big closure that always returns the transformed record.
-
+This closure is to be used to execute the functions in the stream and to get
+the result of the transformations.
-#### QuixEnvironment.state\_management\_enabled
+Stream may only contain simple "apply" functions to be able to compose itself
+into a returning function.
+
+
+
+#### Stream.compose
```python
-@property
-def state_management_enabled() -> bool
+def compose(
+ allow_filters: bool = True,
+ allow_updates: bool = True,
+ allow_expands: bool = True,
+ allow_transforms: bool = True,
+ sink: Optional[Callable[[Any, Any, int, Any],
+ None]] = None) -> VoidExecutor
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L19)
-
-Check whether "State management" is enabled for the current deployment
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/core/stream/stream.py#L284)
-**Returns**:
+Compose a list of functions from this `Stream` and its parents into one
-True if state management is enabled, otherwise False
+big closure using a "composer" function.
-
+This "executor" closure is to be used to execute all functions in the stream for the given
+key, value and timestamps.
-#### QuixEnvironment.deployment\_id
+By default, executor doesn't return the result of the execution.
+To accumulate the results, pass the `sink` parameter.
-```python
-@property
-def deployment_id() -> Optional[str]
-```
+**Arguments**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L27)
+- `allow_filters`: If False, this function will fail with `ValueError` if
+the stream has filter functions in the tree. Default - True.
+- `allow_updates`: If False, this function will fail with `ValueError` if
+the stream has update functions in the tree. Default - True.
+- `allow_expands`: If False, this function will fail with `ValueError` if
+the stream has functions with "expand=True" in the tree. Default - True.
+- `allow_transforms`: If False, this function will fail with `ValueError` if
+the stream has transform functions in the tree. Default - True.
+- `sink`: callable to accumulate the results of the execution, optional.
-Return current Quix deployment id.
+**Raises**:
-This variable is meant to be set only by Quix Platform and only
-when the application is deployed.
+- `ValueError`: if disallowed functions are present in the stream tree.
-**Returns**:
+
-deployment id or None
+## quixstreams.dataframe.utils
-
+
-#### QuixEnvironment.workspace\_id
+#### ensure\_milliseconds
```python
-@property
-def workspace_id() -> Optional[str]
+def ensure_milliseconds(delta: Union[int, timedelta]) -> int
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L39)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/utils.py#L5)
-Return Quix workspace id if set
+Convert timedelta to milliseconds.
-**Returns**:
+If the `delta` is not
+This function will also round the value to the closest milliseconds in case of
+higher precision.
-workspace id or None
+**Arguments**:
-
+- `delta`: `timedelta` object
-#### QuixEnvironment.portal\_api
+**Returns**:
-```python
-@property
-def portal_api() -> Optional[str]
-```
+timedelta value in milliseconds as `int`
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L47)
+
-Return Quix Portal API url if set
+## quixstreams.dataframe.windows
-**Returns**:
+
-portal API URL or None
+## quixstreams.dataframe.windows.definitions
-
+
-#### QuixEnvironment.state\_dir
+### FixedTimeWindowDefinition
```python
-@property
-def state_dir() -> str
+class FixedTimeWindowDefinition(abc.ABC)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/env.py#L56)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L20)
-Return application state directory on Quix.
+
-**Returns**:
+#### FixedTimeWindowDefinition.sum
-path to state dir
+```python
+def sum() -> "FixedTimeWindow"
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L67)
-## quixstreams.platforms.quix.checks
-
-
-
-#### check\_state\_management\_enabled
+Configure the window to aggregate data by summing up values within
-```python
-def check_state_management_enabled()
-```
+each window period.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/checks.py#L11)
+**Returns**:
-Check if State Management feature is enabled for the current deployment on
-Quix platform.
-If it's disabled, the exception will be raised.
+an instance of `FixedTimeWindow` configured to perform sum aggregation.
-
+
-#### check\_state\_dir
+#### FixedTimeWindowDefinition.count
```python
-def check_state_dir(state_dir: str)
+def count() -> "FixedTimeWindow"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/checks.py#L28)
-
-Check if Application "state_dir" matches the state dir on Quix platform.
-
-If it doesn't match, the warning will be logged.
-
-**Arguments**:
-
-- `state_dir`: application state_dir path
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L94)
-
+Configure the window to aggregate data by counting the number of values
-## quixstreams.platforms.quix
+within each window period.
-
+**Returns**:
-## quixstreams.platforms.quix.api
+an instance of `FixedTimeWindow` configured to perform record count.
-
+
-### QuixPortalApiService
+#### FixedTimeWindowDefinition.mean
```python
-class QuixPortalApiService()
+def mean() -> "FixedTimeWindow"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/api.py#L19)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L121)
-A light wrapper around the Quix Portal Api. If used in the Quix Platform, it will
-use that workspaces auth token and portal endpoint, else you must provide it.
+Configure the window to aggregate data by calculating the mean of the values
-Function names closely reflect the respective API endpoint,
-each starting with the method [GET, POST, etc.] followed by the endpoint path.
+within each window period.
-Results will be returned in the form of request's Response.json(), unless something
-else is required. Non-200's will raise exceptions.
+**Returns**:
-See the swagger documentation for more info about the endpoints.
+an instance of `FixedTimeWindow` configured to calculate the mean
+of the values.
-
+
-#### QuixPortalApiService.get\_workspace\_certificate
+#### FixedTimeWindowDefinition.reduce
```python
-def get_workspace_certificate(workspace_id: Optional[str] = None,
- timeout: float = 30) -> Optional[bytes]
+def reduce(reducer: Callable[[Any, Any], Any],
+ initializer: Callable[[Any], Any]) -> "FixedTimeWindow"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/api.py#L119)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L152)
-Get a workspace TLS certificate if available.
+Configure the window to perform a custom aggregation using `reducer`
-Returns `None` if certificate is not specified.
+and `initializer` functions.
-**Arguments**:
+Example Snippet:
+```python
+sdf = StreamingDataFrame(...)
-- `workspace_id`: workspace id, optional
-- `timeout`: request timeout; Default 30
+# Using "reduce()" to calculate multiple aggregates at once
+def reducer(agg: dict, current: int):
+ aggregated = {
+ 'min': min(agg['min'], current),
+ 'max': max(agg['max'], current)
+ 'count': agg['count'] + 1
+ }
+ return aggregated
-**Returns**:
+def initializer(current) -> dict:
+ return {'min': current, 'max': current, 'count': 1}
-certificate as bytes if present, or None
+window = (
+ sdf.tumbling_window(duration_ms=1000)
+ .reduce(reducer=reducer, initializer=initializer)
+ .final()
+)
+```
-
+**Arguments**:
-## quixstreams.platforms.quix.exceptions
+- `reducer`: A function that takes two arguments
+(the accumulated value and a new value) and returns a single value.
+The returned value will be saved to the state store and sent downstream.
+- `initializer`: A function to call for every first element of the window.
+This function is used to initialize the aggregation within a window.
-
+**Returns**:
-## quixstreams.platforms.quix.topic\_manager
+A window configured to perform custom reduce aggregation on the data.
-
+
-### QuixTopicManager
+#### FixedTimeWindowDefinition.max
```python
-class QuixTopicManager(TopicManager)
+def max() -> "FixedTimeWindow"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/topic_manager.py#L9)
-
-The source of all topic management with quixstreams.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L212)
-This is specifically for Applications using the Quix platform.
+Configure a window to aggregate the maximum value within each window period.
-Generally initialized and managed automatically by an `Application.Quix`,
-but allows a user to work with it directly when needed, such as using it alongside
-a plain `Producer` to create its topics.
+**Returns**:
-See methods for details.
+an instance of `FixedTimeWindow` configured to calculate the maximum
+value within each window period.
-
+
-#### QuixTopicManager.\_\_init\_\_
+#### FixedTimeWindowDefinition.min
```python
-def __init__(topic_admin: TopicAdmin,
- consumer_group: str,
- quix_config_builder: QuixKafkaConfigsBuilder,
- timeout: float = 30,
- create_timeout: float = 60)
+def min() -> "FixedTimeWindow"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/platforms/quix/topic_manager.py#L30)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/definitions.py#L241)
-**Arguments**:
+Configure a window to aggregate the minimum value within each window period.
-- `topic_admin`: an `Admin` instance
-- `quix_config_builder`: A QuixKafkaConfigsBuilder instance, else one is
-generated for you.
-- `timeout`: response timeout (seconds)
-- `create_timeout`: timeout for topic creation
+**Returns**:
-
+an instance of `FixedTimeWindow` configured to calculate the maximum
+value within each window period.
-## quixstreams.dataframe.dataframe
+
-
+## quixstreams.dataframe.windows.base
-### StreamingDataFrame
+
+
+#### get\_window\_ranges
```python
-class StreamingDataFrame(BaseStreaming)
+def get_window_ranges(timestamp_ms: int,
+ duration_ms: int,
+ step_ms: Optional[int] = None) -> List[Tuple[int, int]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L62)
-
-`StreamingDataFrame` is the main object you will use for ETL work.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/base.py#L18)
-Typically created with an `app = quixstreams.app.Application()` instance,
-via `sdf = app.dataframe()`.
+Get a list of window ranges for the given timestamp.
+**Arguments**:
-What it Does:
+- `timestamp_ms`: timestamp in milliseconds
+- `duration_ms`: window duration in milliseconds
+- `step_ms`: window step in milliseconds for hopping windows, optional.
-- Builds a data processing pipeline, declaratively (not executed immediately)
- - Executes this pipeline on inputs at runtime (Kafka message values)
-- Provides functions/interface similar to Pandas Dataframes/Series
-- Enables stateful processing (and manages everything related to it)
+**Returns**:
+a list of (, ) tuples
-How to Use:
+
-Define various operations while continuously reassigning to itself (or new fields).
+## quixstreams.dataframe.windows.time\_based
-These operations will generally transform your data, access/update state, or produce
-to kafka topics.
+
-We recommend your data structure to be "columnar" (aka a dict/JSON) in nature so
-that it works with the entire interface, but simple types like `ints`, `str`, etc.
-are also supported.
+### FixedTimeWindow
-See the various methods and classes for more specifics, or for a deep dive into
-usage, see `streamingdataframe.md` under the `docs/` folder.
+```python
+class FixedTimeWindow()
+```
->***NOTE:*** column referencing like `sdf["a_column"]` and various methods often
- create other object types (typically `quixstreams.dataframe.StreamingSeries`),
- which is expected; type hinting should alert you to any issues should you
- attempt invalid operations with said objects (however, we cannot infer whether
- an operation is valid with respect to your data!).
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/time_based.py#L32)
+
-Example Snippet:
+#### FixedTimeWindow.final
```python
-sdf = StreamingDataframe()
-sdf = sdf.apply(a_func)
-sdf = sdf.filter(another_func)
-sdf = sdf.to_topic(topic_obj)
+def final() -> "StreamingDataFrame"
```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/time_based.py#L107)
-#### StreamingDataFrame.apply
+Apply the window aggregation and return results only when the windows are
+closed.
+The format of returned windows:
```python
-def apply(func: Union[
- ApplyCallback,
- ApplyCallbackStateful,
- ApplyWithMetadataCallback,
- ApplyWithMetadataCallbackStateful,
-],
- *,
- stateful: bool = False,
- expand: bool = False,
- metadata: bool = False) -> Self
+{
+ "start": ,
+ "end": ,
+ "value: ,
+}
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L177)
-
-Apply a function to transform the value and return a new value.
+The individual window is closed when the event time
+(the maximum observed timestamp across the partition) passes
+its end timestamp + grace period.
+The closed windows cannot receive updates anymore and are considered final.
-The result will be passed downstream as an input value.
+>***NOTE:*** Windows can be closed only within the same message key.
+If some message keys appear irregularly in the stream, the latest windows
+can remain unprocessed until the message the same key is received.
+
-Example Snippet:
+#### FixedTimeWindow.current
```python
-# This stores a string in state and capitalizes every column with a string value.
-# A second apply then keeps only the string value columns (shows non-stateful).
-def func(d: dict, state: State):
- value = d["store_field"]
- if value != state.get("my_store_key"):
- state.set("my_store_key") = value
- return {k: v.upper() if isinstance(v, str) else v for k, v in d.items()}
+def current() -> "StreamingDataFrame"
+```
-sdf = StreamingDataframe()
-sdf = sdf.apply(func, stateful=True)
-sdf = sdf.apply(lambda d: {k: v for k,v in d.items() if isinstance(v, str)})
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/windows/time_based.py#L145)
+
+Apply the window transformation to the StreamingDataFrame to return results
+for each updated window.
+The format of returned windows:
+```python
+{
+ "start": ,
+ "end": ,
+ "value: ,
+}
```
-**Arguments**:
+This method processes streaming data and returns results as they come,
+regardless of whether the window is closed or not.
-- `func`: a function to apply
-- `stateful`: if `True`, the function will be provided with a second argument
-of type `State` to perform stateful operations.
-- `expand`: if True, expand the returned iterable into individual values
-downstream. If returned value is not iterable, `TypeError` will be raised.
-Default - `False`.
-- `metadata`: if True, the callback will receive key, timestamp and headers
-along with the value.
-Default - `False`.
+
-
+## quixstreams.dataframe
-#### StreamingDataFrame.update
+
-```python
-def update(func: Union[
- UpdateCallback,
- UpdateCallbackStateful,
- UpdateWithMetadataCallback,
- UpdateWithMetadataCallbackStateful,
-],
- *,
- stateful: bool = False,
- metadata: bool = False) -> Self
-```
+## quixstreams.dataframe.exceptions
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L266)
+
-Apply a function to mutate value in-place or to perform a side effect
+## quixstreams.dataframe.base
-(e.g., printing a value to the console).
+
-The result of the function will be ignored, and the original value will be
-passed downstream.
+## quixstreams.dataframe.series
+
-Example Snippet:
+### StreamingSeries
```python
-# Stores a value and mutates a list by appending a new item to it.
-# Also prints to console.
+class StreamingSeries(BaseStreaming)
+```
-def func(values: list, state: State):
- value = values[0]
- if value != state.get("my_store_key"):
- state.set("my_store_key") = value
- values.append("new_item")
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L47)
-sdf = StreamingDataframe()
-sdf = sdf.update(func, stateful=True)
-sdf = sdf.update(lambda value: print("Received value: ", value))
-```
+`StreamingSeries` are typically generated by `StreamingDataframes` when getting
+elements from, or performing certain operations on, a `StreamingDataframe`,
+thus acting as a representation of "column" value.
-**Arguments**:
+They share some operations with the `StreamingDataframe`, but also provide some
+additional functionality.
-- `func`: function to update value
-- `stateful`: if `True`, the function will be provided with a second argument
-of type `State` to perform stateful operations.
-- `metadata`: if True, the callback will receive key, timestamp and headers
-along with the value.
-Default - `False`.
+Most column value operations are handled by this class, and `StreamingSeries` can
+generate other `StreamingSeries` as a result of said operations.
-
-#### StreamingDataFrame.filter
+What it Does:
-```python
-def filter(func: Union[
- FilterCallback,
- FilterCallbackStateful,
- FilterWithMetadataCallback,
- FilterWithMetadataCallbackStateful,
-],
- *,
- stateful: bool = False,
- metadata: bool = False) -> Self
-```
+- Allows ways to do simple operations with dataframe "column"/dictionary values:
+ - Basic ops like add, subtract, modulo, etc.
+- Enables comparisons/inequalities:
+ - Greater than, equals, etc.
+ - and/or, is/not operations
+- Can check for existence of columns in `StreamingDataFrames`
+- Enables chaining of various operations together
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L354)
-Filter value using provided function.
+How to Use:
+
+For the most part, you may not even notice this class exists!
+They will naturally be created as a result of typical `StreamingDataFrame` use.
+
+Auto-complete should help you with valid methods and type-checking should alert
+you to invalid operations between `StreamingSeries`.
+
+In general, any typical Pands dataframe operation between columns should be valid
+with `StreamingSeries`, and you shouldn't have to think about them explicitly.
-If the function returns True-like value, the original value will be
-passed downstream.
Example Snippet:
```python
-# Stores a value and allows further processing only if the value is greater than
-# what was previously stored.
-
-def func(d: dict, state: State):
- value = d["my_value"]
- if value > state.get("my_store_key"):
- state.set("my_store_key") = value
- return True
- return False
+# Random methods for example purposes. More detailed explanations found under
+# various methods or in the docs folder.
sdf = StreamingDataframe()
-sdf = sdf.filter(func, stateful=True)
+sdf = sdf["column_a"].apply(a_func).apply(diff_func, stateful=True)
+sdf["my_new_bool_field"] = sdf["column_b"].contains("this_string")
+sdf["new_sum_field"] = sdf["column_c"] + sdf["column_d"] + 2
+sdf = sdf[["column_a"] & (sdf["new_sum_field"] >= 10)]
+```
+
+
+
+#### StreamingSeries.from\_apply\_callback
+
+```python
+@classmethod
+def from_apply_callback(cls, func: ApplyWithMetadataCallback) -> Self
```
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L107)
+
+Create a StreamingSeries from a function.
+
+The provided function will be wrapped into `Apply`
+
**Arguments**:
-- `func`: function to filter value
-- `stateful`: if `True`, the function will be provided with second argument
-of type `State` to perform stateful operations.
-- `metadata`: if True, the callback will receive key, timestamp and headers
-along with the value.
-Default - `False`.
+- `func`: a function to apply
-
+**Returns**:
-#### StreamingDataFrame.group\_by
+instance of `StreamingSeries`
-```python
-def group_by(key: Union[str, Callable[[Any], Any]],
- name: Optional[str] = None,
- value_deserializer: Optional[DeserializerType] = "json",
- key_deserializer: Optional[DeserializerType] = "json",
- value_serializer: Optional[SerializerType] = "json",
- key_serializer: Optional[SerializerType] = "json") -> Self
-```
+
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L440)
+#### StreamingSeries.apply
-"Groups" messages by re-keying them via the provided group_by operation
+```python
+def apply(func: ApplyCallback) -> Self
+```
-on their message values.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L121)
-This enables things like aggregations on messages with non-matching keys.
+Add a callable to the execution list for this series.
-You can provide a column name (uses the column's value) or a custom function
-to generate this new key.
+The provided callable should accept a single argument, which will be its input.
+The provided callable should similarly return one output, or None
-`.groupby()` can only be performed once per `StreamingDataFrame` instance.
+They can be chained together or included with other operations.
->**NOTE:** group_by generates a topic that copies the original topic's settings.
Example Snippet:
```python
-# We have customer purchase events where the message key is the "store_id",
-# but we want to calculate sales per customer (by "customer_account_id").
+# The `StreamingSeries` are generated when `sdf["COLUMN_NAME"]` is called.
+# This stores a string in state and capitalizes the column value; the result is
+# assigned to a new column.
+# Another apply converts a str column to an int, assigning it to a new column.
-def func(d: dict, state: State):
- current_total = state.get("customer_sum", 0)
- new_total = current_total + d["customer_spent"]
- state.set("customer_sum", new_total)
- d["customer_total"] = new_total
- return d
+def func(value: str, state: State):
+ if value != state.get("my_store_key"):
+ state.set("my_store_key") = value
+ return v.upper()
sdf = StreamingDataframe()
-sdf = sdf.group_by("customer_account_id")
-sdf = sdf.apply(func, stateful=True)
+sdf["new_col"] = sdf["a_column"]["nested_dict_key"].apply(func, stateful=True)
+sdf["new_col_2"] = sdf["str_col"].apply(lambda v: int(v)) + sdf["str_col2"] + 2
```
**Arguments**:
-- `key`: how the new key should be generated from the message value;
-requires a column name (string) or a callable that takes the message value.
-- `name`: a name for the op (must be unique per group-by), required if `key`
-is a custom callable.
-- `value_deserializer`: a deserializer type for values; default - JSON
-- `key_deserializer`: a deserializer type for keys; default - JSON
-- `value_serializer`: a serializer type for values; default - JSON
-- `key_serializer`: a serializer type for keys; default - JSON
+- `func`: a callable with one argument and one output
**Returns**:
-a clone with this operation added (assign to keep its effect).
+a new `StreamingSeries` with the new callable added
-
+
-#### StreamingDataFrame.contains
+#### StreamingSeries.compose\_returning
```python
-@staticmethod
-def contains(key: str) -> StreamingSeries
+def compose_returning() -> ReturningExecutor
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L518)
-
-Check if the key is present in the Row value.
-
-Example Snippet:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L155)
-```python
-# Add new column 'has_column' which contains a boolean indicating
-# the presence of 'column_x'
+Compose a list of functions from this StreamingSeries and its parents into one
-sdf = StreamingDataframe()
-sdf['has_column'] = sdf.contains('column_x')
-```
+big closure that always returns the transformed record.
-**Arguments**:
+This closure is to be used to execute the functions in the stream and to get
+the result of the transformations.
-- `key`: a column name to check.
+Stream may only contain simple "apply" functions to be able to compose itself
+into a returning function.
**Returns**:
-a Column object that evaluates to True if the key is present
-or False otherwise.
+a callable accepting value, key and timestamp and
+returning a tuple "(value, key, timestamp)
-
+
-#### StreamingDataFrame.to\_topic
+#### StreamingSeries.compose
```python
-def to_topic(topic: Topic, key: Optional[Callable[[Any], Any]] = None) -> Self
+def compose(
+ sink: Optional[Callable[[Any, Any, int, Any],
+ None]] = None) -> VoidExecutor
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L543)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L170)
+
+Compose all functions of this StreamingSeries into one big closure.
+
+Generally not required by users; the `quixstreams.app.Application` class will
+do this automatically.
-Produce current value to a topic. You can optionally specify a new key.
Example Snippet:
```python
from quixstreams import Application
-# Produce to two different topics, changing the key for one of them.
+app = Application(...)
-app = Application()
-input_topic = app.topic("input_x")
-output_topic_0 = app.topic("output_a")
-output_topic_1 = app.topic("output_b")
+sdf = app.dataframe()
+sdf = sdf["column_a"].apply(apply_func)
+sdf = sdf["column_b"].contains(filter_func)
+sdf = sdf.compose()
-sdf = app.dataframe(input_topic)
-sdf = sdf.to_topic(output_topic_0)
-sdf = sdf.to_topic(output_topic_1, key=lambda data: data["a_field"])
+result_0 = sdf({"my": "record"})
+result_1 = sdf({"other": "record"})
```
**Arguments**:
-- `topic`: instance of `Topic`
-- `key`: a callable to generate a new message key, optional.
-If passed, the return type of this callable must be serializable
-by `key_serializer` defined for this Topic object.
-By default, the current message key will be used.
-
-
-
-#### StreamingDataFrame.set\_timestamp
+- `sink`: callable to accumulate the results of the execution.
-```python
-def set_timestamp(func: Callable[[Any, Any, int, Any], int]) -> Self
-```
+**Raises**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L584)
+- `ValueError`: if disallowed functions are present in the tree of
+underlying `Stream`.
-Set a new timestamp based on the current message value and its metadata.
+**Returns**:
-The new timestamp will be used in windowed aggregations and when producing
-messages to the output topics.
+a callable accepting value, key and timestamp and
+returning None
-The new timestamp must be in milliseconds to conform Kafka requirements.
+
-Example Snippet:
+#### StreamingSeries.test
```python
-from quixstreams import Application
+def test(value: Any,
+ key: Any,
+ timestamp: int,
+ headers: Optional[Any] = None,
+ ctx: Optional[MessageContext] = None) -> Any
+```
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L214)
-app = Application()
-input_topic = app.topic("data")
+A shorthand to test `StreamingSeries` with provided value
-sdf = app.dataframe(input_topic)
-# Updating the record's timestamp based on the value
-sdf = sdf.set_timestamp(lambda value, key, timestamp, headers: value['new_timestamp'])
-```
+and `MessageContext`.
**Arguments**:
-- `func`: callable accepting the current value, key, timestamp, and headers.
-It's expected to return a new timestamp as integer in milliseconds.
+- `value`: value to pass through `StreamingSeries`
+- `ctx`: instance of `MessageContext`, optional.
+Provide it if the StreamingSeries instance has
+functions calling `get_current_key()`.
+Default - `None`.
**Returns**:
-a new StreamingDataFrame instance
+result of `StreamingSeries`
-
+
-#### StreamingDataFrame.set\_headers
+#### StreamingSeries.isin
```python
-def set_headers(
- func: Callable[
- [Any, Any, int, List[Tuple[str, HeaderValue]]],
- Collection[Tuple[str, HeaderValue]],
- ]
-) -> Self
+def isin(other: Container) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L625)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L269)
-Set new message headers based on the current message value and metadata.
+Check if series value is in "other".
-The new headers will be used when producing messages to the output topics.
+Same as "StreamingSeries in other".
+
+Runtime result will be a `bool`.
-The provided callback must accept value, key, timestamp, and headers,
-and return a new collection of (header, value) tuples.
Example Snippet:
```python
from quixstreams import Application
+# Check if "str_column" is contained in a column with a list of strings and
+# assign the resulting `bool` to a new column: "has_my_str".
-app = Application()
-input_topic = app.topic("data")
-
-sdf = app.dataframe(input_topic)
-# Updating the record's headers based on the value and metadata
-sdf = sdf.set_headers(lambda value, key, timestamp, headers: [('id', value['id'])])
+sdf = app.dataframe()
+sdf["has_my_str"] = sdf["str_column"].isin(sdf["column_with_list_of_strs"])
```
**Arguments**:
-- `func`: callable accepting the current value, key, timestamp, and headers.
-It's expected to return a new set of headers
-as a collection of (header, value) tuples.
+- `other`: a container to check
**Returns**:
-a new StreamingDataFrame instance
+new StreamingSeries
-
+
-#### StreamingDataFrame.compose
+#### StreamingSeries.contains
```python
-def compose(
- sink: Optional[Callable[[Any, Any, int, Any], None]] = None
-) -> Dict[str, VoidExecutor]
+def contains(other: Union[Self, object]) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L676)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L296)
-Compose all functions of this StreamingDataFrame into one big closure.
+Check if series value contains "other"
-Closures are more performant than calling all the functions in the
-`StreamingDataFrame` one-by-one.
+Same as "other in StreamingSeries".
-Generally not required by users; the `quixstreams.app.Application` class will
-do this automatically.
+Runtime result will be a `bool`.
Example Snippet:
```python
from quixstreams import Application
-sdf = app.dataframe()
-sdf = sdf.apply(apply_func)
-sdf = sdf.filter(filter_func)
-sdf = sdf.compose()
-result_0 = sdf({"my": "record"})
-result_1 = sdf({"other": "record"})
+# Check if "column_a" contains "my_substring" and assign the resulting
+# `bool` to a new column: "has_my_substr"
+
+sdf = app.dataframe()
+sdf["has_my_substr"] = sdf["column_a"].contains("my_substring")
```
**Arguments**:
-- `sink`: callable to accumulate the results of the execution, optional.
+- `other`: object to check
**Returns**:
-a function that accepts "value"
-and returns a result of StreamingDataFrame
+new StreamingSeries
-
+
-#### StreamingDataFrame.test
+#### StreamingSeries.is\_
```python
-def test(value: Any,
- key: Any,
- timestamp: int,
- headers: Optional[Any] = None,
- ctx: Optional[MessageContext] = None,
- topic: Optional[Topic] = None) -> List[Any]
+def is_(other: Union[Self, object]) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L713)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L321)
-A shorthand to test `StreamingDataFrame` with provided value
+Check if series value refers to the same object as `other`
-and `MessageContext`.
+Runtime result will be a `bool`.
-**Arguments**:
-- `value`: value to pass through `StreamingDataFrame`
-- `key`: key to pass through `StreamingDataFrame`
-- `timestamp`: timestamp to pass through `StreamingDataFrame`
-- `ctx`: instance of `MessageContext`, optional.
-Provide it if the StreamingDataFrame instance calls `to_topic()`,
-has stateful functions or windows.
-Default - `None`.
-- `topic`: optionally, a topic branch to test with
+Example Snippet:
-**Returns**:
+```python
+# Check if "column_a" is the same as "column_b" and assign the resulting `bool`
+# to a new column: "is_same"
-result of `StreamingDataFrame`
+from quixstreams import Application
+sdf = app.dataframe()
+sdf["is_same"] = sdf["column_a"].is_(sdf["column_b"])
+```
-
+**Arguments**:
-#### StreamingDataFrame.tumbling\_window
+- `other`: object to check for "is"
-```python
-def tumbling_window(duration_ms: Union[int, timedelta],
- grace_ms: Union[int, timedelta] = 0,
- name: Optional[str] = None) -> TumblingWindowDefinition
-```
+**Returns**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L750)
+new StreamingSeries
-Create a tumbling window transformation on this StreamingDataFrame.
+
-Tumbling windows divide time into fixed-sized, non-overlapping windows.
+#### StreamingSeries.isnot
-They allow performing stateful aggregations like `sum`, `reduce`, etc.
-on top of the data and emit results downstream.
+```python
+def isnot(other: Union[Self, object]) -> Self
+```
-Notes:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L344)
-- The timestamp of the aggregation result is set to the window start timestamp.
-- Every window is grouped by the current Kafka message key.
-- Messages with `None` key will be ignored.
-- The time windows always use the current event time.
+Check if series value does not refer to the same object as `other`
+Runtime result will be a `bool`.
Example Snippet:
```python
-app = Application()
-sdf = app.dataframe(...)
-
-sdf = (
- # Define a tumbling window of 60s and grace period of 10s
- sdf.tumbling_window(
- duration_ms=timedelta(seconds=60), grace_ms=timedelta(seconds=10.0)
- )
+from quixstreams import Application
- # Specify the aggregation function
- .sum()
+# Check if "column_a" is the same as "column_b" and assign the resulting `bool`
+# to a new column: "is_not_same"
- # Specify how the results should be emitted downstream.
- # "all()" will emit results as they come for each updated window,
- # possibly producing multiple messages per key-window pair
- # "final()" will emit windows only when they are closed and cannot
- # receive any updates anymore.
- .all()
-)
+sdf = app.dataframe()
+sdf["is_not_same"] = sdf["column_a"].isnot(sdf["column_b"])
```
**Arguments**:
-- `duration_ms`: The length of each window.
-Can be specified as either an `int` representing milliseconds or a
-`timedelta` object.
->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
-value.
-- `grace_ms`: The grace period for data arrival.
-It allows late-arriving data (data arriving after the window
-has theoretically closed) to be included in the window.
-Can be specified as either an `int` representing milliseconds
-or as a `timedelta` object.
->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
-value.
-- `name`: The unique identifier for the window. If not provided, it will be
-automatically generated based on the window's properties.
+- `other`: object to check for "is_not"
**Returns**:
-`TumblingWindowDefinition` instance representing the tumbling window
-configuration.
-This object can be further configured with aggregation functions
-like `sum`, `count`, etc. applied to the StreamingDataFrame.
+new StreamingSeries
-
+
-#### StreamingDataFrame.hopping\_window
+#### StreamingSeries.isnull
```python
-def hopping_window(duration_ms: Union[int, timedelta],
- step_ms: Union[int, timedelta],
- grace_ms: Union[int, timedelta] = 0,
- name: Optional[str] = None) -> HoppingWindowDefinition
+def isnull() -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/dataframe.py#L826)
-
-Create a hopping window transformation on this StreamingDataFrame.
-
-Hopping windows divide the data stream into overlapping windows based on time.
-The overlap is controlled by the `step_ms` parameter.
-
-They allow performing stateful aggregations like `sum`, `reduce`, etc.
-on top of the data and emit results downstream.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L368)
-Notes:
+Check if series value is None.
-- The timestamp of the aggregation result is set to the window start timestamp.
-- Every window is grouped by the current Kafka message key.
-- Messages with `None` key will be ignored.
-- The time windows always use the current event time.
+Runtime result will be a `bool`.
Example Snippet:
```python
-app = Application()
-sdf = app.dataframe(...)
-
-sdf = (
- # Define a hopping window of 60s with step 30s and grace period of 10s
- sdf.hopping_window(
- duration_ms=timedelta(seconds=60),
- step_ms=timedelta(seconds=30),
- grace_ms=timedelta(seconds=10)
- )
+from quixstreams import Application
- # Specify the aggregation function
- .sum()
+# Check if "column_a" is null and assign the resulting `bool` to a new column:
+# "is_null"
- # Specify how the results should be emitted downstream.
- # "all()" will emit results as they come for each updated window,
- # possibly producing multiple messages per key-window pair
- # "final()" will emit windows only when they are closed and cannot
- # receive any updates anymore.
- .all()
-)
+sdf = app.dataframe()
+sdf["is_null"] = sdf["column_a"].isnull()
```
-**Arguments**:
+**Returns**:
-- `duration_ms`: The length of each window. It defines the time span for
-which each window aggregates data.
-Can be specified as either an `int` representing milliseconds
-or a `timedelta` object.
->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
-value.
-- `step_ms`: The step size for the window.
-It determines how much each successive window moves forward in time.
-Can be specified as either an `int` representing milliseconds
-or a `timedelta` object.
->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
-value.
-- `grace_ms`: The grace period for data arrival.
-It allows late-arriving data to be included in the window,
-even if it arrives after the window has theoretically moved forward.
-Can be specified as either an `int` representing milliseconds
-or a `timedelta` object.
->***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
-value.
-- `name`: The unique identifier for the window. If not provided, it will be
-automatically generated based on the window's properties.
+new StreamingSeries
+
+
+
+#### StreamingSeries.notnull
+
+```python
+def notnull() -> Self
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L391)
+
+Check if series value is not None.
+
+Runtime result will be a `bool`.
+
+
+Example Snippet:
+
+```python
+from quixstreams import Application
+
+# Check if "column_a" is not null and assign the resulting `bool` to a new column:
+# "is_not_null"
+
+sdf = app.dataframe()
+sdf["is_not_null"] = sdf["column_a"].notnull()
+```
**Returns**:
-`HoppingWindowDefinition` instance representing the hopping
-window configuration.
-This object can be further configured with aggregation functions
-like `sum`, `count`, etc. and applied to the StreamingDataFrame.
+new StreamingSeries
-
+
-## quixstreams.dataframe.series
+#### StreamingSeries.abs
+
+```python
+def abs() -> Self
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/series.py#L414)
+
+Get absolute value of the series value.
+
+Example Snippet:
+
+```python
+from quixstreams import Application
+
+# Get absolute value of "int_col" and add it to "other_int_col".
+# Finally, assign the result to a new column: "abs_col_sum".
+
+sdf = app.dataframe()
+sdf["abs_col_sum"] = sdf["int_col"].abs() + sdf["other_int_col"]
+```
+
+**Returns**:
+
+new StreamingSeries
+
+
+
+## quixstreams.dataframe.dataframe
+
+
+
+### StreamingDataFrame
+
+```python
+class StreamingDataFrame(BaseStreaming)
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L62)
+
+`StreamingDataFrame` is the main object you will use for ETL work.
+
+Typically created with an `app = quixstreams.app.Application()` instance,
+via `sdf = app.dataframe()`.
+
+
+What it Does:
+
+- Builds a data processing pipeline, declaratively (not executed immediately)
+ - Executes this pipeline on inputs at runtime (Kafka message values)
+- Provides functions/interface similar to Pandas Dataframes/Series
+- Enables stateful processing (and manages everything related to it)
+
+
+How to Use:
+
+Define various operations while continuously reassigning to itself (or new fields).
+
+These operations will generally transform your data, access/update state, or produce
+to kafka topics.
+
+We recommend your data structure to be "columnar" (aka a dict/JSON) in nature so
+that it works with the entire interface, but simple types like `ints`, `str`, etc.
+are also supported.
+
+See the various methods and classes for more specifics, or for a deep dive into
+usage, see `streamingdataframe.md` under the `docs/` folder.
+
+>***NOTE:*** column referencing like `sdf["a_column"]` and various methods often
+ create other object types (typically `quixstreams.dataframe.StreamingSeries`),
+ which is expected; type hinting should alert you to any issues should you
+ attempt invalid operations with said objects (however, we cannot infer whether
+ an operation is valid with respect to your data!).
+
+
+Example Snippet:
+
+```python
+sdf = StreamingDataframe()
+sdf = sdf.apply(a_func)
+sdf = sdf.filter(another_func)
+sdf = sdf.to_topic(topic_obj)
+```
+
+
+
+#### StreamingDataFrame.apply
+
+```python
+def apply(func: Union[
+ ApplyCallback,
+ ApplyCallbackStateful,
+ ApplyWithMetadataCallback,
+ ApplyWithMetadataCallbackStateful,
+],
+ *,
+ stateful: bool = False,
+ expand: bool = False,
+ metadata: bool = False) -> Self
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L177)
+
+Apply a function to transform the value and return a new value.
+
+The result will be passed downstream as an input value.
+
+
+Example Snippet:
+
+```python
+# This stores a string in state and capitalizes every column with a string value.
+# A second apply then keeps only the string value columns (shows non-stateful).
+def func(d: dict, state: State):
+ value = d["store_field"]
+ if value != state.get("my_store_key"):
+ state.set("my_store_key") = value
+ return {k: v.upper() if isinstance(v, str) else v for k, v in d.items()}
+
+sdf = StreamingDataframe()
+sdf = sdf.apply(func, stateful=True)
+sdf = sdf.apply(lambda d: {k: v for k,v in d.items() if isinstance(v, str)})
+
+```
+
+**Arguments**:
+
+- `func`: a function to apply
+- `stateful`: if `True`, the function will be provided with a second argument
+of type `State` to perform stateful operations.
+- `expand`: if True, expand the returned iterable into individual values
+downstream. If returned value is not iterable, `TypeError` will be raised.
+Default - `False`.
+- `metadata`: if True, the callback will receive key, timestamp and headers
+along with the value.
+Default - `False`.
+
+
+
+#### StreamingDataFrame.update
+
+```python
+def update(func: Union[
+ UpdateCallback,
+ UpdateCallbackStateful,
+ UpdateWithMetadataCallback,
+ UpdateWithMetadataCallbackStateful,
+],
+ *,
+ stateful: bool = False,
+ metadata: bool = False) -> Self
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L266)
+
+Apply a function to mutate value in-place or to perform a side effect
+
+(e.g., printing a value to the console).
+
+The result of the function will be ignored, and the original value will be
+passed downstream.
+
+
+Example Snippet:
+
+```python
+# Stores a value and mutates a list by appending a new item to it.
+# Also prints to console.
+
+def func(values: list, state: State):
+ value = values[0]
+ if value != state.get("my_store_key"):
+ state.set("my_store_key") = value
+ values.append("new_item")
+
+sdf = StreamingDataframe()
+sdf = sdf.update(func, stateful=True)
+sdf = sdf.update(lambda value: print("Received value: ", value))
+```
+
+**Arguments**:
+
+- `func`: function to update value
+- `stateful`: if `True`, the function will be provided with a second argument
+of type `State` to perform stateful operations.
+- `metadata`: if True, the callback will receive key, timestamp and headers
+along with the value.
+Default - `False`.
+
+
+
+#### StreamingDataFrame.filter
+
+```python
+def filter(func: Union[
+ FilterCallback,
+ FilterCallbackStateful,
+ FilterWithMetadataCallback,
+ FilterWithMetadataCallbackStateful,
+],
+ *,
+ stateful: bool = False,
+ metadata: bool = False) -> Self
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L354)
+
+Filter value using provided function.
+
+If the function returns True-like value, the original value will be
+passed downstream.
+
+Example Snippet:
+
+```python
+# Stores a value and allows further processing only if the value is greater than
+# what was previously stored.
+
+def func(d: dict, state: State):
+ value = d["my_value"]
+ if value > state.get("my_store_key"):
+ state.set("my_store_key") = value
+ return True
+ return False
+
+sdf = StreamingDataframe()
+sdf = sdf.filter(func, stateful=True)
+```
+
+**Arguments**:
+
+- `func`: function to filter value
+- `stateful`: if `True`, the function will be provided with second argument
+of type `State` to perform stateful operations.
+- `metadata`: if True, the callback will receive key, timestamp and headers
+along with the value.
+Default - `False`.
+
+
+
+#### StreamingDataFrame.group\_by
+
+```python
+def group_by(key: Union[str, Callable[[Any], Any]],
+ name: Optional[str] = None,
+ value_deserializer: Optional[DeserializerType] = "json",
+ key_deserializer: Optional[DeserializerType] = "json",
+ value_serializer: Optional[SerializerType] = "json",
+ key_serializer: Optional[SerializerType] = "json") -> Self
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L440)
+
+"Groups" messages by re-keying them via the provided group_by operation
+
+on their message values.
+
+This enables things like aggregations on messages with non-matching keys.
+
+You can provide a column name (uses the column's value) or a custom function
+to generate this new key.
+
+`.groupby()` can only be performed once per `StreamingDataFrame` instance.
+
+>**NOTE:** group_by generates a topic that copies the original topic's settings.
+
+Example Snippet:
+
+```python
+# We have customer purchase events where the message key is the "store_id",
+# but we want to calculate sales per customer (by "customer_account_id").
+
+def func(d: dict, state: State):
+ current_total = state.get("customer_sum", 0)
+ new_total = current_total + d["customer_spent"]
+ state.set("customer_sum", new_total)
+ d["customer_total"] = new_total
+ return d
+
+sdf = StreamingDataframe()
+sdf = sdf.group_by("customer_account_id")
+sdf = sdf.apply(func, stateful=True)
+```
+
+**Arguments**:
+
+- `key`: how the new key should be generated from the message value;
+requires a column name (string) or a callable that takes the message value.
+- `name`: a name for the op (must be unique per group-by), required if `key`
+is a custom callable.
+- `value_deserializer`: a deserializer type for values; default - JSON
+- `key_deserializer`: a deserializer type for keys; default - JSON
+- `value_serializer`: a serializer type for values; default - JSON
+- `key_serializer`: a serializer type for keys; default - JSON
+
+**Returns**:
+
+a clone with this operation added (assign to keep its effect).
-
+
-### StreamingSeries
+#### StreamingDataFrame.contains
```python
-class StreamingSeries(BaseStreaming)
+@staticmethod
+def contains(key: str) -> StreamingSeries
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L47)
-
-`StreamingSeries` are typically generated by `StreamingDataframes` when getting
-elements from, or performing certain operations on, a `StreamingDataframe`,
-thus acting as a representation of "column" value.
-
-They share some operations with the `StreamingDataframe`, but also provide some
-additional functionality.
-
-Most column value operations are handled by this class, and `StreamingSeries` can
-generate other `StreamingSeries` as a result of said operations.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L518)
+Check if the key is present in the Row value.
-What it Does:
+Example Snippet:
-- Allows ways to do simple operations with dataframe "column"/dictionary values:
- - Basic ops like add, subtract, modulo, etc.
-- Enables comparisons/inequalities:
- - Greater than, equals, etc.
- - and/or, is/not operations
-- Can check for existence of columns in `StreamingDataFrames`
-- Enables chaining of various operations together
+```python
+# Add new column 'has_column' which contains a boolean indicating
+# the presence of 'column_x'
+sdf = StreamingDataframe()
+sdf['has_column'] = sdf.contains('column_x')
+```
-How to Use:
+**Arguments**:
-For the most part, you may not even notice this class exists!
-They will naturally be created as a result of typical `StreamingDataFrame` use.
+- `key`: a column name to check.
-Auto-complete should help you with valid methods and type-checking should alert
-you to invalid operations between `StreamingSeries`.
+**Returns**:
-In general, any typical Pands dataframe operation between columns should be valid
-with `StreamingSeries`, and you shouldn't have to think about them explicitly.
+a Column object that evaluates to True if the key is present
+or False otherwise.
+
-Example Snippet:
+#### StreamingDataFrame.to\_topic
```python
-# Random methods for example purposes. More detailed explanations found under
-# various methods or in the docs folder.
-
-sdf = StreamingDataframe()
-sdf = sdf["column_a"].apply(a_func).apply(diff_func, stateful=True)
-sdf["my_new_bool_field"] = sdf["column_b"].contains("this_string")
-sdf["new_sum_field"] = sdf["column_c"] + sdf["column_d"] + 2
-sdf = sdf[["column_a"] & (sdf["new_sum_field"] >= 10)]
+def to_topic(topic: Topic, key: Optional[Callable[[Any], Any]] = None) -> Self
```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L543)
-#### StreamingSeries.from\_apply\_callback
+Produce current value to a topic. You can optionally specify a new key.
+
+Example Snippet:
```python
-@classmethod
-def from_apply_callback(cls, func: ApplyWithMetadataCallback) -> Self
-```
+from quixstreams import Application
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L107)
+# Produce to two different topics, changing the key for one of them.
-Create a StreamingSeries from a function.
+app = Application()
+input_topic = app.topic("input_x")
+output_topic_0 = app.topic("output_a")
+output_topic_1 = app.topic("output_b")
-The provided function will be wrapped into `Apply`
+sdf = app.dataframe(input_topic)
+sdf = sdf.to_topic(output_topic_0)
+sdf = sdf.to_topic(output_topic_1, key=lambda data: data["a_field"])
+```
**Arguments**:
-- `func`: a function to apply
-
-**Returns**:
-
-instance of `StreamingSeries`
+- `topic`: instance of `Topic`
+- `key`: a callable to generate a new message key, optional.
+If passed, the return type of this callable must be serializable
+by `key_serializer` defined for this Topic object.
+By default, the current message key will be used.
-
+
-#### StreamingSeries.apply
+#### StreamingDataFrame.set\_timestamp
```python
-def apply(func: ApplyCallback) -> Self
+def set_timestamp(func: Callable[[Any, Any, int, Any], int]) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L121)
-
-Add a callable to the execution list for this series.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L584)
-The provided callable should accept a single argument, which will be its input.
-The provided callable should similarly return one output, or None
+Set a new timestamp based on the current message value and its metadata.
-They can be chained together or included with other operations.
+The new timestamp will be used in windowed aggregations and when producing
+messages to the output topics.
+The new timestamp must be in milliseconds to conform Kafka requirements.
Example Snippet:
```python
-# The `StreamingSeries` are generated when `sdf["COLUMN_NAME"]` is called.
-# This stores a string in state and capitalizes the column value; the result is
-# assigned to a new column.
-# Another apply converts a str column to an int, assigning it to a new column.
+from quixstreams import Application
-def func(value: str, state: State):
- if value != state.get("my_store_key"):
- state.set("my_store_key") = value
- return v.upper()
-sdf = StreamingDataframe()
-sdf["new_col"] = sdf["a_column"]["nested_dict_key"].apply(func, stateful=True)
-sdf["new_col_2"] = sdf["str_col"].apply(lambda v: int(v)) + sdf["str_col2"] + 2
+app = Application()
+input_topic = app.topic("data")
+
+sdf = app.dataframe(input_topic)
+# Updating the record's timestamp based on the value
+sdf = sdf.set_timestamp(lambda value, key, timestamp, headers: value['new_timestamp'])
```
**Arguments**:
-- `func`: a callable with one argument and one output
+- `func`: callable accepting the current value, key, timestamp, and headers.
+It's expected to return a new timestamp as integer in milliseconds.
**Returns**:
-a new `StreamingSeries` with the new callable added
+a new StreamingDataFrame instance
-
+
-#### StreamingSeries.compose\_returning
+#### StreamingDataFrame.set\_headers
```python
-def compose_returning() -> ReturningExecutor
+def set_headers(
+ func: Callable[
+ [Any, Any, int, List[Tuple[str, HeaderValue]]],
+ Collection[Tuple[str, HeaderValue]],
+ ]
+) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L155)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L625)
-Compose a list of functions from this StreamingSeries and its parents into one
+Set new message headers based on the current message value and metadata.
-big closure that always returns the transformed record.
+The new headers will be used when producing messages to the output topics.
-This closure is to be used to execute the functions in the stream and to get
-the result of the transformations.
+The provided callback must accept value, key, timestamp, and headers,
+and return a new collection of (header, value) tuples.
-Stream may only contain simple "apply" functions to be able to compose itself
-into a returning function.
+Example Snippet:
+
+```python
+from quixstreams import Application
+
+
+app = Application()
+input_topic = app.topic("data")
+
+sdf = app.dataframe(input_topic)
+# Updating the record's headers based on the value and metadata
+sdf = sdf.set_headers(lambda value, key, timestamp, headers: [('id', value['id'])])
+```
+
+**Arguments**:
+
+- `func`: callable accepting the current value, key, timestamp, and headers.
+It's expected to return a new set of headers
+as a collection of (header, value) tuples.
**Returns**:
-a callable accepting value, key and timestamp and
-returning a tuple "(value, key, timestamp)
+a new StreamingDataFrame instance
-
+
-#### StreamingSeries.compose
+#### StreamingDataFrame.compose
```python
def compose(
- sink: Optional[Callable[[Any, Any, int, Any],
- None]] = None) -> VoidExecutor
+ sink: Optional[Callable[[Any, Any, int, Any], None]] = None
+) -> Dict[str, VoidExecutor]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L170)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L676)
-Compose all functions of this StreamingSeries into one big closure.
+Compose all functions of this StreamingDataFrame into one big closure.
+
+Closures are more performant than calling all the functions in the
+`StreamingDataFrame` one-by-one.
Generally not required by users; the `quixstreams.app.Application` class will
do this automatically.
@@ -1452,12 +1677,9 @@ Example Snippet:
```python
from quixstreams import Application
-
-app = Application(...)
-
sdf = app.dataframe()
-sdf = sdf["column_a"].apply(apply_func)
-sdf = sdf["column_b"].contains(filter_func)
+sdf = sdf.apply(apply_func)
+sdf = sdf.filter(filter_func)
sdf = sdf.compose()
result_0 = sdf({"my": "record"})
@@ -1466,2577 +1688,2760 @@ result_1 = sdf({"other": "record"})
**Arguments**:
-- `sink`: callable to accumulate the results of the execution.
+- `sink`: callable to accumulate the results of the execution, optional.
-**Raises**:
+**Returns**:
-- `ValueError`: if disallowed functions are present in the tree of
-underlying `Stream`.
+a function that accepts "value"
+and returns a result of StreamingDataFrame
+
+
+
+#### StreamingDataFrame.test
+
+```python
+def test(value: Any,
+ key: Any,
+ timestamp: int,
+ headers: Optional[Any] = None,
+ ctx: Optional[MessageContext] = None,
+ topic: Optional[Topic] = None) -> List[Any]
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L713)
+
+A shorthand to test `StreamingDataFrame` with provided value
+
+and `MessageContext`.
+
+**Arguments**:
+
+- `value`: value to pass through `StreamingDataFrame`
+- `key`: key to pass through `StreamingDataFrame`
+- `timestamp`: timestamp to pass through `StreamingDataFrame`
+- `ctx`: instance of `MessageContext`, optional.
+Provide it if the StreamingDataFrame instance calls `to_topic()`,
+has stateful functions or windows.
+Default - `None`.
+- `topic`: optionally, a topic branch to test with
**Returns**:
-a callable accepting value, key and timestamp and
-returning None
+result of `StreamingDataFrame`
-
+
-#### StreamingSeries.test
+#### StreamingDataFrame.tumbling\_window
+
+```python
+def tumbling_window(duration_ms: Union[int, timedelta],
+ grace_ms: Union[int, timedelta] = 0,
+ name: Optional[str] = None) -> TumblingWindowDefinition
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L750)
+
+Create a tumbling window transformation on this StreamingDataFrame.
+
+Tumbling windows divide time into fixed-sized, non-overlapping windows.
+
+They allow performing stateful aggregations like `sum`, `reduce`, etc.
+on top of the data and emit results downstream.
+
+Notes:
+
+- The timestamp of the aggregation result is set to the window start timestamp.
+- Every window is grouped by the current Kafka message key.
+- Messages with `None` key will be ignored.
+- The time windows always use the current event time.
+
+
+
+Example Snippet:
```python
-def test(value: Any,
- key: Any,
- timestamp: int,
- headers: Optional[Any] = None,
- ctx: Optional[MessageContext] = None) -> Any
-```
+app = Application()
+sdf = app.dataframe(...)
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L214)
+sdf = (
+ # Define a tumbling window of 60s and grace period of 10s
+ sdf.tumbling_window(
+ duration_ms=timedelta(seconds=60), grace_ms=timedelta(seconds=10.0)
+ )
-A shorthand to test `StreamingSeries` with provided value
+ # Specify the aggregation function
+ .sum()
-and `MessageContext`.
+ # Specify how the results should be emitted downstream.
+ # "all()" will emit results as they come for each updated window,
+ # possibly producing multiple messages per key-window pair
+ # "final()" will emit windows only when they are closed and cannot
+ # receive any updates anymore.
+ .all()
+)
+```
**Arguments**:
-- `value`: value to pass through `StreamingSeries`
-- `ctx`: instance of `MessageContext`, optional.
-Provide it if the StreamingSeries instance has
-functions calling `get_current_key()`.
-Default - `None`.
+- `duration_ms`: The length of each window.
+Can be specified as either an `int` representing milliseconds or a
+`timedelta` object.
+>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
+value.
+- `grace_ms`: The grace period for data arrival.
+It allows late-arriving data (data arriving after the window
+has theoretically closed) to be included in the window.
+Can be specified as either an `int` representing milliseconds
+or as a `timedelta` object.
+>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
+value.
+- `name`: The unique identifier for the window. If not provided, it will be
+automatically generated based on the window's properties.
**Returns**:
-result of `StreamingSeries`
+`TumblingWindowDefinition` instance representing the tumbling window
+configuration.
+This object can be further configured with aggregation functions
+like `sum`, `count`, etc. applied to the StreamingDataFrame.
-
+
-#### StreamingSeries.isin
+#### StreamingDataFrame.hopping\_window
```python
-def isin(other: Container) -> Self
+def hopping_window(duration_ms: Union[int, timedelta],
+ step_ms: Union[int, timedelta],
+ grace_ms: Union[int, timedelta] = 0,
+ name: Optional[str] = None) -> HoppingWindowDefinition
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L269)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/dataframe/dataframe.py#L826)
-Check if series value is in "other".
+Create a hopping window transformation on this StreamingDataFrame.
-Same as "StreamingSeries in other".
+Hopping windows divide the data stream into overlapping windows based on time.
+The overlap is controlled by the `step_ms` parameter.
-Runtime result will be a `bool`.
+They allow performing stateful aggregations like `sum`, `reduce`, etc.
+on top of the data and emit results downstream.
+
+Notes:
+
+- The timestamp of the aggregation result is set to the window start timestamp.
+- Every window is grouped by the current Kafka message key.
+- Messages with `None` key will be ignored.
+- The time windows always use the current event time.
Example Snippet:
```python
-from quixstreams import Application
+app = Application()
+sdf = app.dataframe(...)
-# Check if "str_column" is contained in a column with a list of strings and
-# assign the resulting `bool` to a new column: "has_my_str".
+sdf = (
+ # Define a hopping window of 60s with step 30s and grace period of 10s
+ sdf.hopping_window(
+ duration_ms=timedelta(seconds=60),
+ step_ms=timedelta(seconds=30),
+ grace_ms=timedelta(seconds=10)
+ )
-sdf = app.dataframe()
-sdf["has_my_str"] = sdf["str_column"].isin(sdf["column_with_list_of_strs"])
+ # Specify the aggregation function
+ .sum()
+
+ # Specify how the results should be emitted downstream.
+ # "all()" will emit results as they come for each updated window,
+ # possibly producing multiple messages per key-window pair
+ # "final()" will emit windows only when they are closed and cannot
+ # receive any updates anymore.
+ .all()
+)
```
**Arguments**:
-- `other`: a container to check
+- `duration_ms`: The length of each window. It defines the time span for
+which each window aggregates data.
+Can be specified as either an `int` representing milliseconds
+or a `timedelta` object.
+>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
+value.
+- `step_ms`: The step size for the window.
+It determines how much each successive window moves forward in time.
+Can be specified as either an `int` representing milliseconds
+or a `timedelta` object.
+>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
+value.
+- `grace_ms`: The grace period for data arrival.
+It allows late-arriving data to be included in the window,
+even if it arrives after the window has theoretically moved forward.
+Can be specified as either an `int` representing milliseconds
+or a `timedelta` object.
+>***NOTE:*** `timedelta` objects will be rounded to the closest millisecond
+value.
+- `name`: The unique identifier for the window. If not provided, it will be
+automatically generated based on the window's properties.
**Returns**:
-new StreamingSeries
-
-
+`HoppingWindowDefinition` instance representing the hopping
+window configuration.
+This object can be further configured with aggregation functions
+like `sum`, `count`, etc. and applied to the StreamingDataFrame.
-#### StreamingSeries.contains
+
-```python
-def contains(other: Union[Self, object]) -> Self
-```
+## quixstreams.error\_callbacks
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L296)
+
-Check if series value contains "other"
+## quixstreams.exceptions
-Same as "other in StreamingSeries".
+
-Runtime result will be a `bool`.
+## quixstreams.exceptions.base
+
-Example Snippet:
+## quixstreams.exceptions.assignment
-```python
-from quixstreams import Application
+
-# Check if "column_a" contains "my_substring" and assign the resulting
-# `bool` to a new column: "has_my_substr"
+### PartitionAssignmentError
-sdf = app.dataframe()
-sdf["has_my_substr"] = sdf["column_a"].contains("my_substring")
+```python
+class PartitionAssignmentError(QuixException)
```
-**Arguments**:
-
-- `other`: object to check
-
-**Returns**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/exceptions/assignment.py#L6)
-new StreamingSeries
+Error happened during partition rebalancing.
+Raised from `on_assign`, `on_revoke` and `on_lost` callbacks
-
+
-#### StreamingSeries.is\_
+## quixstreams.kafka.exceptions
-```python
-def is_(other: Union[Self, object]) -> Self
-```
+
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L321)
+## quixstreams.kafka
-Check if series value refers to the same object as `other`
+
-Runtime result will be a `bool`.
+## quixstreams.kafka.configuration
+
-Example Snippet:
+### ConnectionConfig
```python
-# Check if "column_a" is the same as "column_b" and assign the resulting `bool`
-# to a new column: "is_same"
-
-from quixstreams import Application
-sdf = app.dataframe()
-sdf["is_same"] = sdf["column_a"].is_(sdf["column_b"])
+class ConnectionConfig(BaseSettings)
```
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/configuration.py#L17)
-- `other`: object to check for "is"
+Provides an interface for all librdkafka connection-based configs.
-**Returns**:
+Allows converting to or from a librdkafka dictionary.
-new StreamingSeries
+Also obscures secrets and handles any case sensitivity issues.
-
+
-#### StreamingSeries.isnot
+#### ConnectionConfig.settings\_customise\_sources
```python
-def isnot(other: Union[Self, object]) -> Self
+@classmethod
+def settings_customise_sources(
+ cls, settings_cls: Type[BaseSettings],
+ init_settings: PydanticBaseSettingsSource,
+ env_settings: PydanticBaseSettingsSource,
+ dotenv_settings: PydanticBaseSettingsSource,
+ file_secret_settings: PydanticBaseSettingsSource
+) -> Tuple[PydanticBaseSettingsSource, ...]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L344)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/configuration.py#L96)
-Check if series value does not refer to the same object as `other`
-
-Runtime result will be a `bool`.
+Included to ignore reading/setting values from the environment
+
-Example Snippet:
+#### ConnectionConfig.from\_librdkafka\_dict
```python
-from quixstreams import Application
+@classmethod
+def from_librdkafka_dict(cls,
+ config: dict,
+ ignore_extras: bool = False) -> Self
+```
-# Check if "column_a" is the same as "column_b" and assign the resulting `bool`
-# to a new column: "is_not_same"
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/configuration.py#L110)
-sdf = app.dataframe()
-sdf["is_not_same"] = sdf["column_a"].isnot(sdf["column_b"])
-```
+Create a `ConnectionConfig` from a librdkafka config dictionary.
**Arguments**:
-- `other`: object to check for "is_not"
+- `config`: a dict of configs (like {"bootstrap.servers": "url"})
+- `ignore_extras`: Ignore non-connection settings (else raise exception)
**Returns**:
-new StreamingSeries
+a ConnectionConfig
-
+
-#### StreamingSeries.isnull
+#### ConnectionConfig.as\_librdkafka\_dict
```python
-def isnull() -> Self
+def as_librdkafka_dict(plaintext_secrets=True) -> dict
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L368)
-
-Check if series value is None.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/configuration.py#L125)
-Runtime result will be a `bool`.
+Dump any non-empty config values as a librdkafka dictionary.
+>***NOTE***: All secret values will be dumped in PLAINTEXT by default.
-Example Snippet:
+**Arguments**:
-```python
-from quixstreams import Application
+- `plaintext_secrets`: whether secret values are plaintext or obscured (***)
-# Check if "column_a" is null and assign the resulting `bool` to a new column:
-# "is_null"
+**Returns**:
-sdf = app.dataframe()
-sdf["is_null"] = sdf["column_a"].isnull()
-```
+a librdkafka-compatible dictionary
-**Returns**:
+
-new StreamingSeries
+## quixstreams.kafka.producer
-
+
-#### StreamingSeries.notnull
+### Producer
```python
-def notnull() -> Self
+class Producer()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L391)
-
-Check if series value is not None.
-
-Runtime result will be a `bool`.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L44)
+
-Example Snippet:
+#### Producer.\_\_init\_\_
```python
-from quixstreams import Application
+def __init__(broker_address: Union[str, ConnectionConfig],
+ logger: logging.Logger = logger,
+ error_callback: Callable[[KafkaError], None] = _default_error_cb,
+ extra_config: Optional[dict] = None,
+ flush_timeout: Optional[int] = None)
+```
-# Check if "column_a" is not null and assign the resulting `bool` to a new column:
-# "is_not_null"
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L45)
-sdf = app.dataframe()
-sdf["is_not_null"] = sdf["column_a"].notnull()
-```
+A wrapper around `confluent_kafka.Producer`.
-**Returns**:
+It initializes `confluent_kafka.Producer` on demand
+avoiding network calls during `__init__`, provides typing info for methods
+and some reasonable defaults.
-new StreamingSeries
+**Arguments**:
-
+- `broker_address`: Connection settings for Kafka.
+Accepts string with Kafka broker host and port formatted as `:`,
+or a ConnectionConfig object if authentication is required.
+- `logger`: a Logger instance to attach librdkafka logging to
+- `error_callback`: callback used for producer errors
+- `extra_config`: A dictionary with additional options that
+will be passed to `confluent_kafka.Producer` as is.
+Note: values passed as arguments override values in `extra_config`.
+- `flush_timeout`: The time the producer is waiting for all messages to be delivered.
-#### StreamingSeries.abs
+
+
+#### Producer.produce
```python
-def abs() -> Self
+def produce(topic: str,
+ value: Optional[Union[str, bytes]] = None,
+ key: Optional[Union[str, bytes]] = None,
+ headers: Optional[Headers] = None,
+ partition: Optional[int] = None,
+ timestamp: Optional[int] = None,
+ poll_timeout: float = 5.0,
+ buffer_error_max_tries: int = 3,
+ on_delivery: Optional[DeliveryCallback] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/series.py#L414)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L83)
-Get absolute value of the series value.
+Produce a message to a topic.
-Example Snippet:
+It also polls Kafka for callbacks before producing to minimize
+the probability of `BufferError`.
+If `BufferError` still happens, the method will poll Kafka with timeout
+to free up the buffer and try again.
-```python
-from quixstreams import Application
+**Arguments**:
-# Get absolute value of "int_col" and add it to "other_int_col".
-# Finally, assign the result to a new column: "abs_col_sum".
+- `topic`: topic name
+- `value`: message value
+- `key`: message key
+- `headers`: message headers
+- `partition`: topic partition
+- `timestamp`: message timestamp
+- `poll_timeout`: timeout for `poll()` call in case of `BufferError`
+- `buffer_error_max_tries`: max retries for `BufferError`.
+Pass `0` to not retry after `BufferError`.
+- `on_delivery`: the delivery callback to be triggered on `poll()`
+for the produced message.
-sdf = app.dataframe()
-sdf["abs_col_sum"] = sdf["int_col"].abs() + sdf["other_int_col"]
-```
+
-**Returns**:
+#### Producer.poll
-new StreamingSeries
+```python
+def poll(timeout: float = 0)
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L144)
-## quixstreams.dataframe
+Polls the producer for events and calls `on_delivery` callbacks.
-
+**Arguments**:
-## quixstreams.dataframe.utils
+- `timeout`: poll timeout seconds; Default: 0 (unlike others)
+> NOTE: -1 will hang indefinitely if there are no messages to acknowledge
-
+
-#### ensure\_milliseconds
+#### Producer.flush
```python
-def ensure_milliseconds(delta: Union[int, timedelta]) -> int
+def flush(timeout: Optional[float] = None) -> int
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/utils.py#L5)
-
-Convert timedelta to milliseconds.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/producer.py#L152)
-If the `delta` is not
-This function will also round the value to the closest milliseconds in case of
-higher precision.
+Wait for all messages in the Producer queue to be delivered.
**Arguments**:
-- `delta`: `timedelta` object
+- `timeout` (`float`): time to attempt flushing (seconds).
+None use producer default or -1 is infinite. Default: None
**Returns**:
-timedelta value in milliseconds as `int`
-
-
-
-## quixstreams.dataframe.exceptions
+number of messages remaining to flush
-
+
-## quixstreams.dataframe.windows.definitions
+## quixstreams.kafka.consumer
-
+
-### FixedTimeWindowDefinition
+### Consumer
```python
-class FixedTimeWindowDefinition(abc.ABC)
+class Consumer()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L20)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L64)
-
+
-#### FixedTimeWindowDefinition.sum
+#### Consumer.\_\_init\_\_
```python
-def sum() -> "FixedTimeWindow"
+def __init__(broker_address: Union[str, ConnectionConfig],
+ consumer_group: Optional[str],
+ auto_offset_reset: AutoOffsetReset,
+ auto_commit_enable: bool = True,
+ logger: logging.Logger = logger,
+ error_callback: Callable[[KafkaError], None] = _default_error_cb,
+ on_commit: Optional[Callable[
+ [Optional[KafkaError], List[TopicPartition]], None]] = None,
+ extra_config: Optional[dict] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L67)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L65)
-Configure the window to aggregate data by summing up values within
+A wrapper around `confluent_kafka.Consumer`.
-each window period.
+It initializes `confluent_kafka.Consumer` on demand
+avoiding network calls during `__init__`, provides typing info for methods
+and some reasonable defaults.
-**Returns**:
+**Arguments**:
-an instance of `FixedTimeWindow` configured to perform sum aggregation.
+- `broker_address`: Connection settings for Kafka.
+Accepts string with Kafka broker host and port formatted as `:`,
+or a ConnectionConfig object if authentication is required.
+- `consumer_group`: Kafka consumer group.
+Passed as `group.id` to `confluent_kafka.Consumer`
+- `auto_offset_reset`: Consumer `auto.offset.reset` setting.
+Available values:
+- "earliest" - automatically reset the offset to the smallest offset
+- "latest" - automatically reset the offset to the largest offset
+- "error" - trigger an error (ERR__AUTO_OFFSET_RESET) which is retrieved
+ by consuming messages (used for testing)
+- `auto_commit_enable`: If true, periodically commit offset of
+the last message handed to the application. Default - `True`.
+- `logger`: a Logger instance to attach librdkafka logging to
+- `error_callback`: callback used for consumer errors
+- `on_commit`: Offset commit result propagation callback.
+Passed as "offset_commit_cb" to `confluent_kafka.Consumer`.
+- `extra_config`: A dictionary with additional options that
+will be passed to `confluent_kafka.Consumer` as is.
+Note: values passed as arguments override values in `extra_config`.
-
+
-#### FixedTimeWindowDefinition.count
+#### Consumer.poll
```python
-def count() -> "FixedTimeWindow"
+def poll(timeout: Optional[float] = None) -> Optional[Message]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L94)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L128)
-Configure the window to aggregate data by counting the number of values
-
-within each window period.
-
-**Returns**:
-
-an instance of `FixedTimeWindow` configured to perform record count.
+Consumes a single message, calls callbacks and returns events.
-
+The application must check the returned :py:class:`Message`
+object's :py:func:`Message.error()` method to distinguish between proper
+messages (error() returns None), or an event or error.
-#### FixedTimeWindowDefinition.mean
+Note: Callbacks may be called from this method, such as
+``on_assign``, ``on_revoke``, et al.
-```python
-def mean() -> "FixedTimeWindow"
-```
+**Arguments**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L121)
+- `timeout` (`float`): Maximum time in seconds to block waiting for message,
+event or callback. None or -1 is infinite. Default: None.
-Configure the window to aggregate data by calculating the mean of the values
+**Raises**:
-within each window period.
+- `None`: RuntimeError if called on a closed consumer
**Returns**:
-an instance of `FixedTimeWindow` configured to calculate the mean
-of the values.
+A Message object or None on timeout
-
+
-#### FixedTimeWindowDefinition.reduce
+#### Consumer.subscribe
```python
-def reduce(reducer: Callable[[Any, Any], Any],
- initializer: Callable[[Any], Any]) -> "FixedTimeWindow"
+def subscribe(topics: List[str],
+ on_assign: Optional[RebalancingCallback] = None,
+ on_revoke: Optional[RebalancingCallback] = None,
+ on_lost: Optional[RebalancingCallback] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L152)
-
-Configure the window to perform a custom aggregation using `reducer`
-
-and `initializer` functions.
-
-Example Snippet:
-```python
-sdf = StreamingDataFrame(...)
-
-# Using "reduce()" to calculate multiple aggregates at once
-def reducer(agg: dict, current: int):
- aggregated = {
- 'min': min(agg['min'], current),
- 'max': max(agg['max'], current)
- 'count': agg['count'] + 1
- }
- return aggregated
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L146)
-def initializer(current) -> dict:
- return {'min': current, 'max': current, 'count': 1}
+Set subscription to supplied list of topics
-window = (
- sdf.tumbling_window(duration_ms=1000)
- .reduce(reducer=reducer, initializer=initializer)
- .final()
-)
-```
+This replaces a previous subscription.
**Arguments**:
-- `reducer`: A function that takes two arguments
-(the accumulated value and a new value) and returns a single value.
-The returned value will be saved to the state store and sent downstream.
-- `initializer`: A function to call for every first element of the window.
-This function is used to initialize the aggregation within a window.
+- `topics` (`list(str)`): List of topics (strings) to subscribe to.
+- `on_assign` (`callable`): callback to provide handling of customized offsets
+on completion of a successful partition re-assignment.
+- `on_revoke` (`callable`): callback to provide handling of offset commits to
+a customized store on the start of a rebalance operation.
+- `on_lost` (`callable`): callback to provide handling in the case the partition
+assignment has been lost. Partitions that have been lost may already be
+owned by other members in the group and therefore committing offsets,
+for example, may fail.
+
+**Raises**:
-**Returns**:
+- `KafkaException`:
+- `None`: RuntimeError if called on a closed consumer
+.. py:function:: on_assign(consumer, partitions)
+.. py:function:: on_revoke(consumer, partitions)
+.. py:function:: on_lost(consumer, partitions)
-A window configured to perform custom reduce aggregation on the data.
+ :param Consumer consumer: Consumer instance.
+ :param list(TopicPartition) partitions: Absolute list of partitions being
+ assigned or revoked.
-
+
-#### FixedTimeWindowDefinition.max
+#### Consumer.unsubscribe
```python
-def max() -> "FixedTimeWindow"
+def unsubscribe()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L212)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L240)
-Configure a window to aggregate the maximum value within each window period.
+Remove current subscription.
-**Returns**:
+**Raises**:
-an instance of `FixedTimeWindow` configured to calculate the maximum
-value within each window period.
+- `None`: KafkaException
+- `None`: RuntimeError if called on a closed consumer
-
+
-#### FixedTimeWindowDefinition.min
+#### Consumer.store\_offsets
```python
-def min() -> "FixedTimeWindow"
+def store_offsets(message: Optional[Message] = None,
+ offsets: Optional[List[TopicPartition]] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/definitions.py#L241)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L248)
-Configure a window to aggregate the minimum value within each window period.
+.. py:function:: store_offsets([message=None], [offsets=None])
-**Returns**:
+Store offsets for a message or a list of offsets.
-an instance of `FixedTimeWindow` configured to calculate the maximum
-value within each window period.
+``message`` and ``offsets`` are mutually exclusive. The stored offsets
+will be committed according to 'auto.commit.interval.ms' or manual
+offset-less `commit`.
+Note that 'enable.auto.offset.store' must be set to False when using this API.
-
+**Arguments**:
-## quixstreams.dataframe.windows
+- `message` (`confluent_kafka.Message`): Store message's offset+1.
+- `offsets` (`list(TopicPartition)`): List of topic+partitions+offsets to store.
-
+**Raises**:
-## quixstreams.dataframe.windows.time\_based
+- `None`: KafkaException
+- `None`: RuntimeError if called on a closed consumer
-
+
-### FixedTimeWindow
+#### Consumer.commit
```python
-class FixedTimeWindow()
+def commit(message: Optional[Message] = None,
+ offsets: Optional[List[TopicPartition]] = None,
+ asynchronous: bool = True) -> Optional[List[TopicPartition]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/time_based.py#L32)
-
-
-
-#### FixedTimeWindow.final
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L282)
-```python
-def final() -> "StreamingDataFrame"
-```
+Commit a message or a list of offsets.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/time_based.py#L107)
+The ``message`` and ``offsets`` parameters are mutually exclusive.
+If neither is set, the current partition assignment's offsets are used instead.
+Use this method to commit offsets if you have 'enable.auto.commit' set to False.
-Apply the window aggregation and return results only when the windows are
-closed.
+**Arguments**:
-The format of returned windows:
-```python
-{
- "start": ,
- "end": ,
- "value: ,
-}
-```
+- `message` (`confluent_kafka.Message`): Commit the message's offset+1.
+Note: By convention, committed offsets reflect the next message
+to be consumed, **not** the last message consumed.
+- `offsets` (`list(TopicPartition)`): List of topic+partitions+offsets to commit.
+- `asynchronous` (`bool`): If true, asynchronously commit, returning None
+immediately. If False, the commit() call will block until the commit
+succeeds or fails and the committed offsets will be returned (on success).
+Note that specific partitions may have failed and the .err field of
+each partition should be checked for success.
-The individual window is closed when the event time
-(the maximum observed timestamp across the partition) passes
-its end timestamp + grace period.
-The closed windows cannot receive updates anymore and are considered final.
+**Raises**:
->***NOTE:*** Windows can be closed only within the same message key.
-If some message keys appear irregularly in the stream, the latest windows
-can remain unprocessed until the message the same key is received.
+- `None`: KafkaException
+- `None`: RuntimeError if called on a closed consumer
-
+
-#### FixedTimeWindow.current
+#### Consumer.committed
```python
-def current() -> "StreamingDataFrame"
+def committed(partitions: List[TopicPartition],
+ timeout: Optional[float] = None) -> List[TopicPartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/time_based.py#L145)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L322)
-Apply the window transformation to the StreamingDataFrame to return results
-for each updated window.
+.. py:function:: committed(partitions, [timeout=None])
-The format of returned windows:
-```python
-{
- "start": ,
- "end": ,
- "value: ,
-}
-```
+Retrieve committed offsets for the specified partitions.
-This method processes streaming data and returns results as they come,
-regardless of whether the window is closed or not.
+**Arguments**:
-
+- `partitions` (`list(TopicPartition)`): List of topic+partitions to query for stored offsets.
+- `timeout` (`float`): Request timeout (seconds).
+None or -1 is infinite. Default: None
-## quixstreams.dataframe.windows.base
+**Raises**:
-
+- `None`: KafkaException
+- `None`: RuntimeError if called on a closed consumer
-#### get\_window\_ranges
+**Returns**:
+
+`list(TopicPartition)`: List of topic+partitions with offset and possibly error set.
+
+
+
+#### Consumer.get\_watermark\_offsets
```python
-def get_window_ranges(timestamp_ms: int,
- duration_ms: int,
- step_ms: Optional[int] = None) -> List[Tuple[int, int]]
+def get_watermark_offsets(partition: TopicPartition,
+ timeout: Optional[float] = None,
+ cached: bool = False) -> Tuple[int, int]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/dataframe/windows/base.py#L18)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L342)
-Get a list of window ranges for the given timestamp.
+Retrieve low and high offsets for the specified partition.
**Arguments**:
-- `timestamp_ms`: timestamp in milliseconds
-- `duration_ms`: window duration in milliseconds
-- `step_ms`: window step in milliseconds for hopping windows, optional.
-
-**Returns**:
-
-a list of (, ) tuples
+- `partition` (`TopicPartition`): Topic+partition to return offsets for.
+- `timeout` (`float`): Request timeout (seconds). None or -1 is infinite.
+Ignored if cached=True. Default: None
+- `cached` (`bool`): Instead of querying the broker, use cached information.
+Cached values: The low offset is updated periodically
+(if statistics.interval.ms is set) while the high offset is updated on each
+message fetched from the broker for this partition.
-
+**Raises**:
-## quixstreams.dataframe.base
+- `None`: KafkaException
+- `None`: RuntimeError if called on a closed consumer
-
+**Returns**:
-## quixstreams.rowproducer
+`tuple(int,int)`: Tuple of (low,high) on success or None on timeout.
+The high offset is the offset of the last message + 1.
-
+
-### RowProducer
+#### Consumer.list\_topics
```python
-class RowProducer()
+def list_topics(topic: Optional[str] = None,
+ timeout: Optional[float] = None) -> ClusterMetadata
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowproducer.py#L18)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L368)
-A producer class that is capable of serializing Rows to bytes and send them to Kafka.
+.. py:function:: list_topics([topic=None], [timeout=-1])
-The serialization is performed according to the Topic serialization settings.
+Request metadata from the cluster.
+This method provides the same information as
+listTopics(), describeTopics() and describeCluster() in the Java Admin client.
**Arguments**:
-- `broker_address`: Connection settings for Kafka.
-Accepts string with Kafka broker host and port formatted as `:`,
-or a ConnectionConfig object if authentication is required.
-- `extra_config`: A dictionary with additional options that
-will be passed to `confluent_kafka.Producer` as is.
-Note: values passed as arguments override values in `extra_config`.
-- `on_error`: a callback triggered when `RowProducer.produce_row()`
-or `RowProducer.poll()` fail`.
-If producer fails and the callback returns `True`, the exception
-will be logged but not propagated.
-The default callback logs an exception and returns `False`.
-- `flush_timeout`: The time the producer is waiting for all messages to be delivered.
+- `topic` (`str`): If specified, only request information about this topic,
+else return results for all topics in cluster.
+Warning: If auto.create.topics.enable is set to true on the broker and
+an unknown topic is specified, it will be created.
+- `timeout` (`float`): The maximum response time before timing out
+None or -1 is infinite. Default: None
-
+**Raises**:
-#### RowProducer.produce\_row
+- `None`: KafkaException
+
+
+
+#### Consumer.memberid
```python
-def produce_row(row: Row,
- topic: Topic,
- key: Optional[Any] = _KEY_UNSET,
- partition: Optional[int] = None,
- timestamp: Optional[int] = None)
+def memberid() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowproducer.py#L56)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L391)
-Serialize Row to bytes according to the Topic serialization settings
+Return this client's broker-assigned group member id.
-and produce it to Kafka
+The member id is assigned by the group coordinator and is propagated to
+the consumer during rebalance.
-If this method fails, it will trigger the provided "on_error" callback.
+ :returns: Member id string or None
+ :rtype: string
+ :raises: RuntimeError if called on a closed consumer
-**Arguments**:
-- `row`: Row object
-- `topic`: Topic object
-- `key`: message key, optional
-- `partition`: partition number, optional
-- `timestamp`: timestamp in milliseconds, optional
+
+
+#### Consumer.offsets\_for\_times
+
+```python
+def offsets_for_times(partitions: List[TopicPartition],
+ timeout: Optional[float] = None) -> List[TopicPartition]
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L404)
+
+Look up offsets by timestamp for the specified partitions.
+
+The returned offset for each partition is the earliest offset whose
+timestamp is greater than or equal to the given timestamp in the
+corresponding partition. If the provided timestamp exceeds that of the
+last message in the partition, a value of -1 will be returned.
+
+ :param list(TopicPartition) partitions: topic+partitions with timestamps
+ in the TopicPartition.offset field.
+ :param float timeout: The maximum response time before timing out.
+ None or -1 is infinite. Default: None
+ :returns: List of topic+partition with offset field set and possibly error set
+ :rtype: list(TopicPartition)
+ :raises: KafkaException
+ :raises: RuntimeError if called on a closed consumer
-
-#### RowProducer.poll
+
+
+#### Consumer.pause
```python
-def poll(timeout: float = None)
+def pause(partitions: List[TopicPartition])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowproducer.py#L96)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L430)
-Polls the producer for events and calls `on_delivery` callbacks.
+Pause consumption for the provided list of partitions.
-If `poll()` fails, it will trigger the provided "on_error" callback
+Paused partitions must be tracked manually.
+
+Does NOT affect the result of Consumer.assignment().
**Arguments**:
-- `timeout`: timeout in seconds
+- `partitions` (`list(TopicPartition)`): List of topic+partitions to pause.
-
+**Raises**:
-## quixstreams.core.stream.functions
+- `None`: KafkaException
-
+
-### StreamFunction
+#### Consumer.resume
```python
-class StreamFunction(abc.ABC)
+def resume(partitions: List[TopicPartition])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L65)
-
-A base class for all the streaming operations in Quix Streams.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L444)
-It provides a `get_executor` method to return a closure to be called with the input
-values.
+.. py:function:: resume(partitions)
-
+Resume consumption for the provided list of partitions.
-#### StreamFunction.get\_executor
+**Arguments**:
-```python
-@abc.abstractmethod
-def get_executor(child_executor: VoidExecutor) -> VoidExecutor
-```
+- `partitions` (`list(TopicPartition)`): List of topic+partitions to resume.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L79)
+**Raises**:
-Returns a wrapper to be called on a value, key and timestamp.
+- `None`: KafkaException
-
+
-### ApplyFunction
+#### Consumer.position
```python
-class ApplyFunction(StreamFunction)
+def position(partitions: List[TopicPartition]) -> List[TopicPartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L85)
-
-Wrap a function into "Apply" function.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L456)
-The provided callback is expected to return a new value based on input,
-and its result will always be passed downstream.
+Retrieve current positions (offsets) for the specified partitions.
-
+**Arguments**:
-### ApplyWithMetadataFunction
+- `partitions` (`list(TopicPartition)`): List of topic+partitions to return
+current offsets for. The current offset is the offset of
+the last consumed message + 1.
-```python
-class ApplyWithMetadataFunction(StreamFunction)
-```
+**Raises**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L125)
+- `None`: KafkaException
+- `None`: RuntimeError if called on a closed consumer
-Wrap a function into "Apply" function.
+**Returns**:
-The provided function is expected to accept value, and timestamp and return
-a new value based on input,
-and its result will always be passed downstream.
+`list(TopicPartition)`: List of topic+partitions with offset and possibly error set.
-
+
-### FilterFunction
+#### Consumer.seek
```python
-class FilterFunction(StreamFunction)
+def seek(partition: TopicPartition)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L166)
-
-Wraps a function into a "Filter" function.
-The result of a Filter function is interpreted as boolean.
-If it's `True`, the input will be return downstream.
-If it's `False`, the `Filtered` exception will be raised to signal that the
-value is filtered out.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L470)
-
+Set consume position for partition to offset.
-### FilterWithMetadataFunction
+The offset may be an absolute (>=0) or a
+logical offset (:py:const:`OFFSET_BEGINNING` et.al).
-```python
-class FilterWithMetadataFunction(StreamFunction)
-```
+seek() may only be used to update the consume offset of an
+actively consumed partition (i.e., after :py:const:`assign()`),
+to set the starting offset of partition not being consumed instead
+pass the offset in an `assign()` call.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L187)
+**Arguments**:
-Wraps a function into a "Filter" function.
+- `partition` (`TopicPartition`): Topic+partition+offset to seek to.
-The passed callback must accept value, key, and timestamp, and it's expected to
-return a boolean-like result.
+**Raises**:
-If the result is `True`, the input will be passed downstream.
-Otherwise, the value will be filtered out.
+- `None`: KafkaException
-
+
-### UpdateFunction
+#### Consumer.assignment
```python
-class UpdateFunction(StreamFunction)
+def assignment() -> List[TopicPartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L210)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L487)
-Wrap a function into an "Update" function.
+Returns the current partition assignment.
-The provided function must accept a value, and it's expected to mutate it
-or to perform some side effect.
+**Raises**:
-The result of the callback is always ignored, and the original input is passed
-downstream.
+- `None`: KafkaException
+- `None`: RuntimeError if called on a closed consumer
-
+**Returns**:
-### UpdateWithMetadataFunction
+`list(TopicPartition)`: List of assigned topic+partitions.
-```python
-class UpdateWithMetadataFunction(StreamFunction)
-```
+
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L233)
+#### Consumer.set\_sasl\_credentials
-Wrap a function into an "Update" function.
+```python
+def set_sasl_credentials(username: str, password: str)
+```
-The provided function must accept a value, a key, and a timestamp.
-The callback is expected to mutate the value or to perform some side effect with it.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L500)
-The result of the callback is always ignored, and the original input is passed
-downstream.
+Sets the SASL credentials used for this client.
+These credentials will overwrite the old ones, and will be used the next
+time the client needs to authenticate.
+This method will not disconnect existing broker connections that have been
+established with the old credentials.
+This method is applicable only to SASL PLAIN and SCRAM mechanisms.
-
+
-### TransformFunction
+#### Consumer.incremental\_assign
```python
-class TransformFunction(StreamFunction)
+def incremental_assign(partitions: List[TopicPartition])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/functions.py#L256)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L512)
-Wrap a function into a "Transform" function.
+Assign new partitions.
-The provided callback must accept a value, a key and a timestamp.
-It's expected to return a new value, new key and new timestamp.
+Can be called outside the `Consumer` `on_assign` callback (multiple times).
+Partitions immediately show on `Consumer.assignment()`.
-This function must be used with caution, because it can technically change the
-key.
-It's supposed to be used by the library internals and not be a part of the public
-API.
+Any additional partitions besides the ones passed during the `Consumer`
+`on_assign` callback will NOT be associated with the consumer group.
-The result of the callback will always be passed downstream.
+
-
+#### Consumer.incremental\_unassign
-## quixstreams.core.stream
+```python
+def incremental_unassign(partitions: List[TopicPartition])
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L524)
-## quixstreams.core.stream.stream
+Revoke partitions.
-
+Can be called outside an on_revoke callback.
-### Stream
+
+
+#### Consumer.close
```python
-class Stream()
+def close()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L34)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/kafka/consumer.py#L532)
-
+Close down and terminate the Kafka Consumer.
-#### Stream.\_\_init\_\_
+Actions performed:
-```python
-def __init__(func: Optional[StreamFunction] = None,
- parent: Optional[Self] = None)
-```
+- Stops consuming.
+- Commits offsets, unless the consumer property 'enable.auto.commit' is set to False.
+- Leaves the consumer group.
+
+Registered callbacks may be called from this method,
+see `poll()` for more info.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L35)
-A base class for all streaming operations.
+
-`Stream` is an abstraction of a function pipeline.
-Each Stream has a function and a parent (None by default).
-When adding new function to the stream, it creates a new `Stream` object and
-sets "parent" to the previous `Stream` to maintain an order of execution.
+## quixstreams.models.serializers
-Streams supports four types of functions:
+
-- "Apply" - generate new values based on a previous one.
- The result of an Apply function is passed downstream to the next functions.
- If "expand=True" is passed and the function returns an `Iterable`,
- each item of it will be treated as a separate value downstream.
-- "Update" - update values in-place.
- The result of an Update function is always ignored, and its input is passed
- downstream.
-- "Filter" - to filter values from the Stream.
- The result of a Filter function is interpreted as boolean.
- If it's `True`, the input will be passed downstream.
- If it's `False`, the record will be filtered from the stream.
-- "Transform" - to transform keys and timestamps along with the values.
- "Transform" functions may change the keys and should be used with caution.
- The result of the Transform function is passed downstream to the next
- functions.
- If "expand=True" is passed and the function returns an `Iterable`,
- each item of it will be treated as a separate value downstream.
+## quixstreams.models.serializers.json
-To execute the functions on the `Stream`, call `.compose()` method, and
-it will return a closure to execute all the functions accumulated in the Stream
-and its parents.
+
-**Arguments**:
+### JSONSerializer
-- `func`: a function to be called on the stream.
-It is expected to be wrapped into one of "Apply", "Filter", "Update" or
-"Trasform" from `quixstreams.core.stream.functions` package.
-Default - "ApplyFunction(lambda value: value)".
-- `parent`: a parent `Stream`
+```python
+class JSONSerializer(Serializer)
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/json.py#L13)
-#### Stream.add\_filter
+
+
+#### JSONSerializer.\_\_init\_\_
```python
-def add_filter(func: Union[FilterCallback, FilterWithMetadataCallback],
- *,
- metadata: bool = False) -> Self
+def __init__(dumps: Callable[[Any], Union[str, bytes]] = default_dumps)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L97)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/json.py#L14)
-Add a function to filter values from the Stream.
-
-The return value of the function will be interpreted as `bool`.
-If the function returns `False`-like result, the Stream will raise `Filtered`
-exception during execution.
+Serializer that returns data in json format.
**Arguments**:
-- `func`: a function to filter values from the stream
-- `metadata`: if True, the callback will receive key and timestamp along with
-the value.
-Default - `False`.
+- `dumps`: a function to serialize objects to json.
+Default - :py:func:`quixstreams.utils.json.dumps`
-**Returns**:
+
-a new `Stream` derived from the current one
+### JSONDeserializer
-
+```python
+class JSONDeserializer(Deserializer)
+```
-#### Stream.add\_apply
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/json.py#L35)
+
+
+
+#### JSONDeserializer.\_\_init\_\_
```python
-def add_apply(func: Union[
- ApplyCallback,
- ApplyExpandedCallback,
- ApplyWithMetadataCallback,
- ApplyWithMetadataExpandedCallback,
-],
- *,
- expand: bool = False,
- metadata: bool = False) -> Self
+def __init__(loads: Callable[[Union[bytes, bytearray]], Any] = default_loads)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L122)
-
-Add an "apply" function to the Stream.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/json.py#L36)
-The function is supposed to return a new value, which will be passed
-further during execution.
+Deserializer that parses data from JSON
**Arguments**:
-- `func`: a function to generate a new value
-- `expand`: if True, expand the returned iterable into individual values
-downstream. If returned value is not iterable, `TypeError` will be raised.
-Default - `False`.
-- `metadata`: if True, the callback will receive key and timestamp along with
-the value.
-Default - `False`.
+- `loads`: function to parse json from bytes.
+Default - :py:func:`quixstreams.utils.json.loads`.
-**Returns**:
+
-a new `Stream` derived from the current one
+## quixstreams.models.serializers.simple\_types
-
+
-#### Stream.add\_update
+### BytesDeserializer
```python
-def add_update(func: Union[UpdateCallback, UpdateWithMetadataCallback],
- *,
- metadata: bool = False) -> Self
+class BytesDeserializer(Deserializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L155)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L44)
-Add an "update" function to the Stream, that will mutate the input value.
+A deserializer to bypass bytes without any changes
-The return of this function will be ignored and its input
-will be passed downstream.
+
-**Arguments**:
+### BytesSerializer
-- `func`: a function to mutate the value
-- `metadata`: if True, the callback will receive key and timestamp along with
-the value.
-Default - `False`.
+```python
+class BytesSerializer(Serializer)
+```
-**Returns**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L53)
-a new Stream derived from the current one
+A serializer to bypass bytes without any changes
-
+
-#### Stream.add\_transform
+### StringDeserializer
```python
-def add_transform(func: Union[TransformCallback, TransformExpandedCallback],
- *,
- expand: bool = False) -> Self
+class StringDeserializer(Deserializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L179)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L62)
-Add a "transform" function to the Stream, that will mutate the input value.
+
-The callback must accept a value, a key, and a timestamp.
-It's expected to return a new value, new key and new timestamp.
+#### StringDeserializer.\_\_init\_\_
-The result of the callback which will be passed downstream
-during execution.
+```python
+def __init__(codec: str = "utf_8")
+```
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L63)
-- `func`: a function to mutate the value
-- `expand`: if True, expand the returned iterable into individual items
-downstream. If returned value is not iterable, `TypeError` will be raised.
-Default - `False`.
+Deserializes bytes to strings using the specified encoding.
-**Returns**:
+**Arguments**:
-a new Stream derived from the current one
+- `codec`: string encoding
+A wrapper around `confluent_kafka.serialization.StringDeserializer`.
-
+
-#### Stream.diff
+### IntegerDeserializer
```python
-def diff(other: "Stream") -> Self
+class IntegerDeserializer(Deserializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L204)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L81)
-Takes the difference between Streams `self` and `other` based on their last
-
-common parent, and returns a new `Stream` that includes only this difference.
+Deserializes bytes to integers.
-It's impossible to calculate a diff when:
- - Streams don't have a common parent.
- - When the `self` Stream already includes all the nodes from
- the `other` Stream, and the resulting diff is empty.
+A wrapper around `confluent_kafka.serialization.IntegerDeserializer`.
-**Arguments**:
+
-- `other`: a `Stream` to take a diff from.
+### DoubleDeserializer
-**Raises**:
+```python
+class DoubleDeserializer(Deserializer)
+```
-- `ValueError`: if Streams don't have a common parent
-or if the diff is empty.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L99)
-**Returns**:
+Deserializes float to IEEE 764 binary64.
-new `Stream` instance including all the Streams from the diff
+A wrapper around `confluent_kafka.serialization.DoubleDeserializer`.
-
+
-#### Stream.tree
+### StringSerializer
```python
-def tree() -> List[Self]
+class StringSerializer(Serializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L233)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L117)
-Return a list of all parent Streams including the node itself.
+
-The tree is ordered from child to parent (current node comes first).
+#### StringSerializer.\_\_init\_\_
-**Returns**:
+```python
+def __init__(codec: str = "utf_8")
+```
-a list of `Stream` objects
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L118)
-
+Serializes strings to bytes using the specified encoding.
-#### Stream.compose\_returning
+**Arguments**:
-```python
-def compose_returning() -> ReturningExecutor
-```
+- `codec`: string encoding
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L247)
+
-Compose a list of functions from this `Stream` and its parents into one
-big closure that always returns the transformed record.
+### IntegerSerializer
-This closure is to be used to execute the functions in the stream and to get
-the result of the transformations.
+```python
+class IntegerSerializer(Serializer)
+```
-Stream may only contain simple "apply" functions to be able to compose itself
-into a returning function.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L130)
-
+Serializes integers to bytes
-#### Stream.compose
+
+
+### DoubleSerializer
```python
-def compose(
- allow_filters: bool = True,
- allow_updates: bool = True,
- allow_expands: bool = True,
- allow_transforms: bool = True,
- sink: Optional[Callable[[Any, Any, int, Any],
- None]] = None) -> VoidExecutor
+class DoubleSerializer(Serializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/core/stream/stream.py#L284)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L143)
-Compose a list of functions from this `Stream` and its parents into one
+Serializes floats to bytes
-big closure using a "composer" function.
+
-This "executor" closure is to be used to execute all functions in the stream for the given
-key, value and timestamps.
+## quixstreams.models.serializers.quix
-By default, executor doesn't return the result of the execution.
-To accumulate the results, pass the `sink` parameter.
+
-**Arguments**:
+### QuixDeserializer
-- `allow_filters`: If False, this function will fail with `ValueError` if
-the stream has filter functions in the tree. Default - True.
-- `allow_updates`: If False, this function will fail with `ValueError` if
-the stream has update functions in the tree. Default - True.
-- `allow_expands`: If False, this function will fail with `ValueError` if
-the stream has functions with "expand=True" in the tree. Default - True.
-- `allow_transforms`: If False, this function will fail with `ValueError` if
-the stream has transform functions in the tree. Default - True.
-- `sink`: callable to accumulate the results of the execution, optional.
+```python
+class QuixDeserializer(JSONDeserializer)
+```
-**Raises**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L73)
-- `ValueError`: if disallowed functions are present in the stream tree.
+Handles Deserialization for any Quix-formatted topic.
-
+Parses JSON data from either `TimeseriesData` and `EventData` (ignores the rest).
-## quixstreams.core
+
-
+#### QuixDeserializer.\_\_init\_\_
-## quixstreams.processing\_context
+```python
+def __init__(loads: Callable[[Union[bytes, bytearray]], Any] = default_loads)
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L80)
-### ProcessingContext
+**Arguments**:
+
+- `loads`: function to parse json from bytes.
+Default - :py:func:`quixstreams.utils.json.loads`.
+
+
+
+#### QuixDeserializer.split\_values
```python
-@dataclasses.dataclass
-class ProcessingContext()
+@property
+def split_values() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/processing_context.py#L21)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L97)
-A class to share processing-related objects
-between `Application` and `StreamingDataFrame` instances.
+Each Quix message might contain data for multiple Rows.
+This property informs the downstream processors about that, so they can
+expect an Iterable instead of Mapping.
-
+
-#### ProcessingContext.store\_offset
+#### QuixDeserializer.deserialize
```python
-def store_offset(topic: str, partition: int, offset: int)
+def deserialize(model_key: str, value: Union[List[Mapping],
+ Mapping]) -> Iterable[Mapping]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/processing_context.py#L41)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L150)
-Store the offset of the processed message to the checkpoint.
+Deserialization function for particular data types (Timeseries or EventData).
**Arguments**:
-- `topic`: topic name
-- `partition`: partition number
-- `offset`: message offset
+- `model_key`: value of "__Q_ModelKey" message header
+- `value`: deserialized JSON value of the message, list or dict
-
+**Returns**:
-#### ProcessingContext.init\_checkpoint
+Iterable of dicts
+
+
+
+### QuixSerializer
```python
-def init_checkpoint()
+class QuixSerializer(JSONSerializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/processing_context.py#L51)
-
-Initialize a new checkpoint
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L271)
-
+
-#### ProcessingContext.commit\_checkpoint
+#### QuixSerializer.\_\_init\_\_
```python
-def commit_checkpoint(force: bool = False)
+def __init__(as_legacy: bool = True,
+ dumps: Callable[[Any], Union[str, bytes]] = default_dumps)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/processing_context.py#L62)
-
-Commit the current checkpoint.
-
-The actual commit will happen only when:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L275)
-1. The checkpoint has at least one stored offset
-2. The checkpoint is expired or `force=True` is passed
+Serializer that returns data in json format.
**Arguments**:
-- `force`: if `True`, commit the checkpoint before its expiration deadline.
+- `as_legacy`: parse as the legacy format; Default = True
+- `dumps`: a function to serialize objects to json.
+Default - :py:func:`quixstreams.utils.json.dumps`
-
+
-## quixstreams.utils
+### QuixTimeseriesSerializer
-
+```python
+class QuixTimeseriesSerializer(QuixSerializer)
+```
-## quixstreams.utils.dicts
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L318)
-
+Serialize data to JSON formatted according to Quix Timeseries format.
-#### dict\_values
+The serializable object must be dictionary, and each item must be of `str`, `int`,
+`float`, `bytes` or `bytearray` type.
+Otherwise, the `SerializationError` will be raised.
+Input:
```python
-def dict_values(d: object) -> List
+{'a': 1, 'b': 1.1, 'c': "string", 'd': b'bytes', 'Tags': {'tag1': 'tag'}}
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/utils/dicts.py#L4)
+Output:
+```json
+{
+ "Timestamps": [123123123],
+ "NumericValues": {"a": [1], "b": [1.1]},
+ "StringValues": {"c": ["string"]},
+ "BinaryValues": {"d": ["Ynl0ZXM="]},
+ "TagValues": {"tag1": ["tag"]}
+}
+```
-Recursively unpacks a set of nested dicts to get a flattened list of leaves,
+
-where "leaves" are the first non-dict item.
+### QuixEventsSerializer
-i.e {"a": {"b": {"c": 1}, "d": 2}, "e": 3} becomes [1, 2, 3]
+```python
+class QuixEventsSerializer(QuixSerializer)
+```
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L406)
-- `d`: initially, a dict (with potentially nested dicts)
+Serialize data to JSON formatted according to Quix EventData format.
+The input value is expected to be a dictionary with the following keys:
+ - "Id" (type `str`, default - "")
+ - "Value" (type `str`, default - ""),
+ - "Tags" (type `dict`, default - {})
-**Returns**:
+>***NOTE:*** All the other fields will be ignored.
-a list with all the leaves of the various contained dicts
+Input:
+```python
+{
+ "Id": "an_event",
+ "Value": "any_string",
+ "Tags": {"tag1": "tag"}}
+}
+```
-
+Output:
+```json
+{
+ "Id": "an_event",
+ "Value": "any_string",
+ "Tags": {"tag1": "tag"}},
+ "Timestamp":1692703362840389000
+}
+```
-## quixstreams.utils.json
+
-
+## quixstreams.models.serializers.base
-#### dumps
+
+
+### SerializationContext
```python
-def dumps(value: Any) -> bytes
+class SerializationContext()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/utils/json.py#L8)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L22)
-Serialize to JSON using `orjson` package.
-
-**Arguments**:
-
-- `value`: value to serialize to JSON
-
-**Returns**:
+Provides additional context for message serialization/deserialization.
-bytes
+Every `Serializer` and `Deserializer` receives an instance of `SerializationContext`
-
+
-#### loads
+#### SerializationContext.to\_confluent\_ctx
```python
-def loads(value: bytes) -> Any
+def to_confluent_ctx(field: MessageField) -> _SerializationContext
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/utils/json.py#L18)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L35)
-Deserialize from JSON using `orjson` package.
+Convert `SerializationContext` to `confluent_kafka.SerializationContext`
-Main differences:
-- It returns `bytes`
-- It doesn't allow non-str keys in dictionaries
+in order to re-use serialization already provided by `confluent_kafka` library.
**Arguments**:
-- `value`: value to deserialize from
+- `field`: instance of `confluent_kafka.serialization.MessageField`
**Returns**:
-object
+instance of `confluent_kafka.serialization.SerializationContext`
-
+
-## quixstreams.types
+### Deserializer
-
+```python
+class Deserializer(abc.ABC)
+```
-## quixstreams.models.timestamps
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L47)
-
+
-### TimestampType
+#### Deserializer.\_\_init\_\_
```python
-class TimestampType(enum.IntEnum)
+def __init__(*args, **kwargs)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/timestamps.py#L8)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L48)
-
-
-#### TIMESTAMP\_NOT\_AVAILABLE
-
-timestamps not supported by broker
-
-
+A base class for all Deserializers
-#### TIMESTAMP\_CREATE\_TIME
+
-message creation time (or source / producer time)
+#### Deserializer.split\_values
-
+```python
+@property
+def split_values() -> bool
+```
-#### TIMESTAMP\_LOG\_APPEND\_TIME
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L54)
-broker receive time
+Return True if the deserialized message should be considered as Iterable
+and each item in it should be processed as a separate message.
-
+
-### MessageTimestamp
+### Serializer
```python
-class MessageTimestamp()
+class Serializer(abc.ABC)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/timestamps.py#L14)
-
-Represents a timestamp of incoming Kafka message.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L65)
-It is made pseudo-immutable (i.e. public attributes don't have setters), and
-it should not be mutated during message processing.
+A base class for all Serializers
-
+
-#### MessageTimestamp.create
+#### Serializer.extra\_headers
```python
-@classmethod
-def create(cls, timestamp_type: int, milliseconds: int) -> Self
+@property
+def extra_headers() -> MessageHeadersMapping
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/timestamps.py#L41)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/base.py#L71)
-Create a Timestamp object based on data
-
-from `confluent_kafka.Message.timestamp()`.
-
-If timestamp type is "TIMESTAMP_NOT_AVAILABLE", the milliseconds are set to None
+Informs producer to set additional headers
-**Arguments**:
+for the message it will be serializing
-- `timestamp_type`: a timestamp type represented as a number
-Can be one of:
-- "0" - TIMESTAMP_NOT_AVAILABLE, timestamps not supported by broker.
-- "1" - TIMESTAMP_CREATE_TIME, message creation time (or source / producer time).
-- "2" - TIMESTAMP_LOG_APPEND_TIME, broker receive time.
-- `milliseconds`: the number of milliseconds since the epoch (UTC).
+Must return a dictionary with headers.
+Keys must be strings, and values must be strings, bytes or None.
**Returns**:
-Timestamp object
-
-
-
-## quixstreams.models
+dict with headers
-
+
-## quixstreams.models.messagecontext
+## quixstreams.models.serializers.exceptions
-
+
-### MessageContext
+### IgnoreMessage
```python
-class MessageContext()
+class IgnoreMessage(exceptions.QuixException)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/messagecontext.py#L4)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/exceptions.py#L46)
-An object with Kafka message properties.
-
-It is made pseudo-immutable (i.e. public attributes don't have setters), and
-it should not be mutated during message processing.
+Raise this exception from Deserializer.__call__ in order to ignore the processing
+of the particular message.
-
+
-## quixstreams.models.types
+## quixstreams.models.topics
-
+
-### ConfluentKafkaMessageProto
+## quixstreams.models.topics.exceptions
-```python
-class ConfluentKafkaMessageProto(Protocol)
-```
+
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/types.py#L13)
+## quixstreams.models.topics.manager
-An interface of `confluent_kafka.Message`.
+
-Use it to not depend on exact implementation and simplify testing.
+#### affirm\_ready\_for\_create
-Instances of `confluent_kafka.Message` cannot be directly created from Python,
-see https://github.com/confluentinc/confluent-kafka-python/issues/1535.
+```python
+def affirm_ready_for_create(topics: List[Topic])
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L20)
-## quixstreams.models.serializers
+Validate a list of topics is ready for creation attempt
-
+**Arguments**:
-## quixstreams.models.serializers.exceptions
+- `topics`: list of `Topic`s
-
+
-### IgnoreMessage
+### TopicManager
```python
-class IgnoreMessage(exceptions.QuixException)
+class TopicManager()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/exceptions.py#L46)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L30)
-Raise this exception from Deserializer.__call__ in order to ignore the processing
-of the particular message.
+The source of all topic management with quixstreams.
-
+Generally initialized and managed automatically by an `Application`,
+but allows a user to work with it directly when needed, such as using it alongside
+a plain `Producer` to create its topics.
-## quixstreams.models.serializers.quix
+See methods for details.
-
+
-### QuixDeserializer
+#### TopicManager.\_\_init\_\_
```python
-class QuixDeserializer(JSONDeserializer)
+def __init__(topic_admin: TopicAdmin,
+ consumer_group: str,
+ timeout: float = 30,
+ create_timeout: float = 60)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L73)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L53)
-Handles Deserialization for any Quix-formatted topic.
+**Arguments**:
-Parses JSON data from either `TimeseriesData` and `EventData` (ignores the rest).
+- `topic_admin`: an `Admin` instance (required for some functionality)
+- `consumer_group`: the consumer group (of the `Application`)
+- `timeout`: response timeout (seconds)
+- `create_timeout`: timeout for topic creation
-
+
-#### QuixDeserializer.\_\_init\_\_
+#### TopicManager.changelog\_topics
```python
-def __init__(column_name: Optional[str] = None,
- loads: Callable[[Union[bytes, bytearray]], Any] = default_loads)
+@property
+def changelog_topics() -> Dict[str, Dict[str, Topic]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L80)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L103)
-**Arguments**:
+Note: `Topic`s are the changelogs.
-- `column_name`: if provided, the deserialized value will be wrapped into
-dictionary with `column_name` as a key.
-- `loads`: function to parse json from bytes.
-Default - :py:func:`quixstreams.utils.json.loads`.
+returns: the changelog topic dict, {topic_name: {suffix: Topic}}
-
+
-#### QuixDeserializer.split\_values
+#### TopicManager.all\_topics
```python
@property
-def split_values() -> bool
+def all_topics() -> Dict[str, Topic]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L100)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L112)
-Each Quix message might contain data for multiple Rows.
-This property informs the downstream processors about that, so they can
-expect an Iterable instead of Mapping.
+Every registered topic name mapped to its respective `Topic`.
-
+returns: full topic dict, {topic_name: Topic}
-#### QuixDeserializer.deserialize
+
+
+#### TopicManager.topic\_config
```python
-def deserialize(model_key: str, value: Union[List[Mapping],
- Mapping]) -> Iterable[Mapping]
+def topic_config(num_partitions: Optional[int] = None,
+ replication_factor: Optional[int] = None,
+ extra_config: Optional[dict] = None) -> TopicConfig
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L153)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L220)
-Deserialization function for particular data types (Timeseries or EventData).
+Convenience method for generating a `TopicConfig` with default settings
**Arguments**:
-- `model_key`: value of "__Q_ModelKey" message header
-- `value`: deserialized JSON value of the message, list or dict
+- `num_partitions`: the number of topic partitions
+- `replication_factor`: the topic replication factor
+- `extra_config`: other optional configuration settings
**Returns**:
-Iterable of dicts
+a TopicConfig object
-
+
-### QuixSerializer
+#### TopicManager.topic
```python
-class QuixSerializer(JSONSerializer)
+def topic(name: str,
+ value_deserializer: Optional[DeserializerType] = None,
+ key_deserializer: Optional[DeserializerType] = "bytes",
+ value_serializer: Optional[SerializerType] = None,
+ key_serializer: Optional[SerializerType] = "bytes",
+ config: Optional[TopicConfig] = None,
+ timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L274)
-
-
-
-#### QuixSerializer.\_\_init\_\_
-
-```python
-def __init__(as_legacy: bool = True,
- dumps: Callable[[Any], Union[str, bytes]] = default_dumps)
-```
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L241)
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L278)
+A convenience method for generating a `Topic`. Will use default config options
-Serializer that returns data in json format.
+as dictated by the TopicManager.
**Arguments**:
-- `as_legacy`: parse as the legacy format; Default = True
-- `dumps`: a function to serialize objects to json.
-Default - :py:func:`quixstreams.utils.json.dumps`
-
-
-
-### QuixTimeseriesSerializer
-
-```python
-class QuixTimeseriesSerializer(QuixSerializer)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L321)
-
-Serialize data to JSON formatted according to Quix Timeseries format.
-
-The serializable object must be dictionary, and each item must be of `str`, `int`,
-`float`, `bytes` or `bytearray` type.
-Otherwise, the `SerializationError` will be raised.
+- `name`: topic name
+- `value_deserializer`: a deserializer type for values
+- `key_deserializer`: a deserializer type for keys
+- `value_serializer`: a serializer type for values
+- `key_serializer`: a serializer type for keys
+- `config`: optional topic configurations (for creation/validation)
+- `timestamp_extractor`: a callable that returns a timestamp in
+milliseconds from a deserialized message.
-Input:
-```python
-{'a': 1, 'b': 1.1, 'c': "string", 'd': b'bytes', 'Tags': {'tag1': 'tag'}}
-```
+**Returns**:
-Output:
-```json
-{
- "Timestamps": [123123123],
- "NumericValues": {"a": [1], "b": [1.1]},
- "StringValues": {"c": ["string"]},
- "BinaryValues": {"d": ["Ynl0ZXM="]},
- "TagValues": {"tag1": ["tag"]}
-}
-```
+Topic object with creation configs
-
+
-### QuixEventsSerializer
+#### TopicManager.repartition\_topic
```python
-class QuixEventsSerializer(QuixSerializer)
+def repartition_topic(operation: str,
+ topic_name: str,
+ value_deserializer: Optional[DeserializerType] = "json",
+ key_deserializer: Optional[DeserializerType] = "json",
+ value_serializer: Optional[SerializerType] = "json",
+ key_serializer: Optional[SerializerType] = "json",
+ timeout: Optional[float] = None) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L409)
-
-Serialize data to JSON formatted according to Quix EventData format.
-The input value is expected to be a dictionary with the following keys:
- - "Id" (type `str`, default - "")
- - "Value" (type `str`, default - ""),
- - "Tags" (type `dict`, default - {})
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L286)
->***NOTE:*** All the other fields will be ignored.
+Create an internal repartition topic.
-Input:
-```python
-{
- "Id": "an_event",
- "Value": "any_string",
- "Tags": {"tag1": "tag"}}
-}
-```
+**Arguments**:
-Output:
-```json
-{
- "Id": "an_event",
- "Value": "any_string",
- "Tags": {"tag1": "tag"}},
- "Timestamp":1692703362840389000
-}
-```
+- `operation`: name of the GroupBy operation (column name or user-defined).
+- `topic_name`: name of the topic the GroupBy is sourced from.
+- `value_deserializer`: a deserializer type for values; default - JSON
+- `key_deserializer`: a deserializer type for keys; default - JSON
+- `value_serializer`: a serializer type for values; default - JSON
+- `key_serializer`: a serializer type for keys; default - JSON
+- `timeout`: config lookup timeout (seconds); Default 30
-
+**Returns**:
-## quixstreams.models.serializers.simple\_types
+`Topic` object (which is also stored on the TopicManager)
-
+
-### BytesDeserializer
+#### TopicManager.changelog\_topic
```python
-class BytesDeserializer(Deserializer)
+def changelog_topic(topic_name: str,
+ store_name: str,
+ timeout: Optional[float] = None) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L44)
-
-A deserializer to bypass bytes without any changes
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L326)
-
+Performs all the logic necessary to generate a changelog topic based on a
-### BytesSerializer
+"source topic" (aka input/consumed topic).
-```python
-class BytesSerializer(Serializer)
-```
+Its main goal is to ensure partition counts of the to-be generated changelog
+match the source topic, and ensure the changelog topic is compacted. Also
+enforces the serialization type. All `Topic` objects generated with this are
+stored on the TopicManager.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L55)
+If source topic already exists, defers to the existing topic settings, else
+uses the settings as defined by the `Topic` (and its defaults) as generated
+by the `TopicManager`.
-A serializer to bypass bytes without any changes
+In general, users should NOT need this; an Application knows when/how to
+generate changelog topics. To turn off changelogs, init an Application with
+"use_changelog_topics"=`False`.
-
+**Arguments**:
-### StringDeserializer
+- `topic_name`: name of consumed topic (app input topic)
+> NOTE: normally contain any prefixes added by TopicManager.topic()
+- `store_name`: name of the store this changelog belongs to
+(default, rolling10s, etc.)
+- `timeout`: config lookup timeout (seconds); Default 30
-```python
-class StringDeserializer(Deserializer)
-```
+**Returns**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L64)
+`Topic` object (which is also stored on the TopicManager)
-
+
-#### StringDeserializer.\_\_init\_\_
+#### TopicManager.create\_topics
```python
-def __init__(column_name: Optional[str] = None, codec: str = "utf_8")
+def create_topics(topics: List[Topic],
+ timeout: Optional[float] = None,
+ create_timeout: Optional[float] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L65)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L383)
-Deserializes bytes to strings using the specified encoding.
+Creates topics via an explicit list of provided `Topics`.
+
+Exists as a way to manually specify what topics to create; otherwise,
+`create_all_topics()` is generally simpler.
**Arguments**:
-- `codec`: string encoding
-A wrapper around `confluent_kafka.serialization.StringDeserializer`.
+- `topics`: list of `Topic`s
+- `timeout`: creation acknowledge timeout (seconds); Default 30
+- `create_timeout`: topic finalization timeout (seconds); Default 60
-
+
-### IntegerDeserializer
+#### TopicManager.create\_all\_topics
```python
-class IntegerDeserializer(Deserializer)
+def create_all_topics(timeout: Optional[float] = None,
+ create_timeout: Optional[float] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L84)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L411)
-Deserializes bytes to integers.
+A convenience method to create all Topic objects stored on this TopicManager.
-A wrapper around `confluent_kafka.serialization.IntegerDeserializer`.
+**Arguments**:
-
+- `timeout`: creation acknowledge timeout (seconds); Default 30
+- `create_timeout`: topic finalization timeout (seconds); Default 60
-### DoubleDeserializer
+
+
+#### TopicManager.validate\_all\_topics
```python
-class DoubleDeserializer(Deserializer)
+def validate_all_topics(timeout: Optional[float] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L103)
-
-Deserializes float to IEEE 764 binary64.
-
-A wrapper around `confluent_kafka.serialization.DoubleDeserializer`.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L424)
-
+Validates all topics exist and changelogs have correct topic and rep factor.
-### StringSerializer
+Issues are pooled and raised as an Exception once inspections are complete.
-```python
-class StringSerializer(Serializer)
-```
+
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L122)
+## quixstreams.models.topics.admin
-
+
-#### StringSerializer.\_\_init\_\_
+#### convert\_topic\_list
```python
-def __init__(codec: str = "utf_8")
+def convert_topic_list(topics: List[Topic]) -> List[ConfluentTopic]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L123)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L24)
-Serializes strings to bytes using the specified encoding.
+Converts `Topic`s to `ConfluentTopic`s as required for Confluent's
+
+`AdminClient.create_topic()`.
**Arguments**:
-- `codec`: string encoding
+- `topics`: list of `Topic`s
-
+**Returns**:
-### IntegerSerializer
+list of confluent_kafka `ConfluentTopic`s
+
+
+
+### TopicAdmin
```python
-class IntegerSerializer(Serializer)
+class TopicAdmin()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L135)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L47)
-Serializes integers to bytes
+For performing "admin"-level operations on a Kafka cluster, mostly around topics.
-
+Primarily used to create and inspect topic configurations.
-### DoubleSerializer
+
+
+#### TopicAdmin.\_\_init\_\_
```python
-class DoubleSerializer(Serializer)
+def __init__(broker_address: Union[str, ConnectionConfig],
+ logger: logging.Logger = logger,
+ extra_config: Optional[Mapping] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L148)
-
-Serializes floats to bytes
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L54)
-
+**Arguments**:
-## quixstreams.models.serializers.json
+- `broker_address`: Connection settings for Kafka.
+Accepts string with Kafka broker host and port formatted as `:`,
+or a ConnectionConfig object if authentication is required.
+- `logger`: a Logger instance to attach librdkafka logging to
+- `extra_config`: optional configs (generally accepts producer configs)
-
+
-### JSONSerializer
+#### TopicAdmin.list\_topics
```python
-class JSONSerializer(Serializer)
+def list_topics(timeout: float = -1) -> Dict[str, ConfluentTopicMetadata]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/json.py#L13)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L83)
-
+Get a list of topics and their metadata from a Kafka cluster
-#### JSONSerializer.\_\_init\_\_
+**Arguments**:
+
+- `timeout`: response timeout (seconds); Default infinite (-1)
+
+**Returns**:
+
+a dict of topic names and their metadata objects
+
+
+
+#### TopicAdmin.inspect\_topics
```python
-def __init__(dumps: Callable[[Any], Union[str, bytes]] = default_dumps)
+def inspect_topics(topic_names: List[str],
+ timeout: float = 30) -> Dict[str, Optional[TopicConfig]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/json.py#L14)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L94)
-Serializer that returns data in json format.
-
-**Arguments**:
+A simplified way of getting the topic configurations of the provided topics
-- `dumps`: a function to serialize objects to json.
-Default - :py:func:`quixstreams.utils.json.dumps`
+from the cluster (if they exist).
-
+**Arguments**:
-### JSONDeserializer
+- `topic_names`: a list of topic names
+- `timeout`: response timeout (seconds)
+>***NOTE***: `timeout` must be >0 here (expects non-neg, and 0 != inf).
-```python
-class JSONDeserializer(Deserializer)
-```
+**Returns**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/json.py#L35)
+a dict with topic names and their respective `TopicConfig`
-
+
-#### JSONDeserializer.\_\_init\_\_
+#### TopicAdmin.create\_topics
```python
-def __init__(column_name: Optional[str] = None,
- loads: Callable[[Union[bytes, bytearray]], Any] = default_loads)
+def create_topics(topics: List[Topic],
+ timeout: float = 30,
+ finalize_timeout: float = 60)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/json.py#L36)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L176)
-Deserializer that parses data from JSON
+Create the given list of topics and confirm they are ready.
+
+Also raises an exception with detailed printout should the creation
+fail (it ignores issues for a topic already existing).
**Arguments**:
-- `column_name`: if provided, the deserialized value will be wrapped into
-dictionary with `column_name` as a key.
-- `loads`: function to parse json from bytes.
-Default - :py:func:`quixstreams.utils.json.loads`.
+- `topics`: a list of `Topic`
+- `timeout`: creation acknowledge timeout (seconds)
+- `finalize_timeout`: topic finalization timeout (seconds)
+>***NOTE***: `timeout` must be >0 here (expects non-neg, and 0 != inf).
-
+
-## quixstreams.models.serializers.base
+## quixstreams.models.topics.topic
-
+
-### SerializationContext
+### TopicConfig
```python
-class SerializationContext()
+@dataclasses.dataclass(eq=True)
+class TopicConfig()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L22)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L42)
-Provides additional context for message serialization/deserialization.
+Represents all kafka-level configuration for a kafka topic.
-Every `Serializer` and `Deserializer` receives an instance of `SerializationContext`
+Generally used by Topic and any topic creation procedures.
-
+
-#### SerializationContext.to\_confluent\_ctx
+### Topic
```python
-def to_confluent_ctx(field: MessageField) -> _SerializationContext
+class Topic()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L35)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L83)
-Convert `SerializationContext` to `confluent_kafka.SerializationContext`
+A definition of a Kafka topic.
-in order to re-use serialization already provided by `confluent_kafka` library.
+Typically created with an `app = quixstreams.app.Application()` instance via
+`app.topic()`, and used by `quixstreams.dataframe.StreamingDataFrame`
+instance.
-**Arguments**:
+
-- `field`: instance of `confluent_kafka.serialization.MessageField`
+#### Topic.\_\_init\_\_
-**Returns**:
+```python
+def __init__(
+ name: str,
+ config: TopicConfig,
+ value_deserializer: Optional[DeserializerType] = None,
+ key_deserializer: Optional[DeserializerType] = BytesDeserializer(),
+ value_serializer: Optional[SerializerType] = None,
+ key_serializer: Optional[SerializerType] = BytesSerializer(),
+ timestamp_extractor: Optional[TimestampExtractor] = None)
+```
-instance of `confluent_kafka.serialization.SerializationContext`
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L92)
-
+**Arguments**:
-### Deserializer
+- `name`: topic name
+- `config`: topic configs via `TopicConfig` (creation/validation)
+- `value_deserializer`: a deserializer type for values
+- `key_deserializer`: a deserializer type for keys
+- `value_serializer`: a serializer type for values
+- `key_serializer`: a serializer type for keys
+- `timestamp_extractor`: a callable that returns a timestamp in
+milliseconds from a deserialized message.
+
+
+
+#### Topic.name
```python
-class Deserializer(abc.ABC)
+@property
+def name() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L47)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L121)
-
+Topic name
-#### Deserializer.\_\_init\_\_
+
+
+#### Topic.row\_serialize
```python
-def __init__(column_name: Optional[str] = None, *args, **kwargs)
+def row_serialize(row: Row, key: Any) -> KafkaMessage
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L48)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L131)
-A base class for all Deserializers
+Serialize Row to a Kafka message structure
**Arguments**:
-- `column_name`: if provided, the deserialized value will be wrapped into
-dictionary with `column_name` as a key.
+- `row`: Row to serialize
+- `key`: message key to serialize
+
+**Returns**:
-
+KafkaMessage object with serialized values
-#### Deserializer.split\_values
+
+
+#### Topic.row\_deserialize
```python
-@property
-def split_values() -> bool
+def row_deserialize(
+ message: ConfluentKafkaMessageProto) -> Union[Row, List[Row], None]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L58)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L162)
-Return True if the deserialized message should be considered as Iterable
-and each item in it should be processed as a separate message.
+Deserialize incoming Kafka message to a Row.
-
+**Arguments**:
-### Serializer
+- `message`: an object with interface of `confluent_kafka.Message`
-```python
-class Serializer(abc.ABC)
-```
+**Returns**:
+
+Row, list of Rows or None if the message is ignored.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L74)
+
-A base class for all Serializers
+## quixstreams.models.topics.utils
-
+
-#### Serializer.extra\_headers
+#### merge\_headers
```python
-@property
-def extra_headers() -> MessageHeadersMapping
+def merge_headers(original: Optional[MessageHeadersTuples],
+ other: MessageHeadersMapping) -> MessageHeadersTuples
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/base.py#L80)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/utils.py#L8)
-Informs producer to set additional headers
+Merge two sets of Kafka message headers, overwriting headers in "origin"
-for the message it will be serializing
+by the values from "other".
-Must return a dictionary with headers.
-Keys must be strings, and values must be strings, bytes or None.
+**Arguments**:
+
+- `original`: original headers as a list of (key, value) tuples.
+- `other`: headers to merge as a dictionary.
**Returns**:
-dict with headers
+a list of (key, value) tuples.
## quixstreams.models.messages
-
-
-## quixstreams.models.rows
+
-
+## quixstreams.models.timestamps
-## quixstreams.models.topics
+
-
+### TimestampType
-## quixstreams.models.topics.admin
+```python
+class TimestampType(enum.IntEnum)
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/timestamps.py#L8)
-#### convert\_topic\_list
+
-```python
-def convert_topic_list(topics: List[Topic]) -> List[ConfluentTopic]
-```
+#### TIMESTAMP\_NOT\_AVAILABLE
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L24)
+timestamps not supported by broker
-Converts `Topic`s to `ConfluentTopic`s as required for Confluent's
+
-`AdminClient.create_topic()`.
+#### TIMESTAMP\_CREATE\_TIME
-**Arguments**:
+message creation time (or source / producer time)
-- `topics`: list of `Topic`s
+
-**Returns**:
+#### TIMESTAMP\_LOG\_APPEND\_TIME
-list of confluent_kafka `ConfluentTopic`s
+broker receive time
-
+
-### TopicAdmin
+### MessageTimestamp
```python
-class TopicAdmin()
+class MessageTimestamp()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L47)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/timestamps.py#L14)
-For performing "admin"-level operations on a Kafka cluster, mostly around topics.
+Represents a timestamp of incoming Kafka message.
-Primarily used to create and inspect topic configurations.
+It is made pseudo-immutable (i.e. public attributes don't have setters), and
+it should not be mutated during message processing.
-
+
-#### TopicAdmin.\_\_init\_\_
+#### MessageTimestamp.create
```python
-def __init__(broker_address: Union[str, ConnectionConfig],
- logger: logging.Logger = logger,
- extra_config: Optional[Mapping] = None)
+@classmethod
+def create(cls, timestamp_type: int, milliseconds: int) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L54)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/timestamps.py#L41)
-**Arguments**:
+Create a Timestamp object based on data
-- `broker_address`: Connection settings for Kafka.
-Accepts string with Kafka broker host and port formatted as `:`,
-or a ConnectionConfig object if authentication is required.
-- `logger`: a Logger instance to attach librdkafka logging to
-- `extra_config`: optional configs (generally accepts producer configs)
+from `confluent_kafka.Message.timestamp()`.
-
+If timestamp type is "TIMESTAMP_NOT_AVAILABLE", the milliseconds are set to None
-#### TopicAdmin.list\_topics
+**Arguments**:
-```python
-def list_topics(timeout: float = -1) -> Dict[str, ConfluentTopicMetadata]
-```
+- `timestamp_type`: a timestamp type represented as a number
+Can be one of:
+- "0" - TIMESTAMP_NOT_AVAILABLE, timestamps not supported by broker.
+- "1" - TIMESTAMP_CREATE_TIME, message creation time (or source / producer time).
+- "2" - TIMESTAMP_LOG_APPEND_TIME, broker receive time.
+- `milliseconds`: the number of milliseconds since the epoch (UTC).
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L83)
+**Returns**:
-Get a list of topics and their metadata from a Kafka cluster
+Timestamp object
-**Arguments**:
+
-- `timeout`: response timeout (seconds); Default infinite (-1)
+## quixstreams.models
-**Returns**:
+
-a dict of topic names and their metadata objects
+## quixstreams.models.messagecontext
-
+
-#### TopicAdmin.inspect\_topics
+### MessageContext
```python
-def inspect_topics(topic_names: List[str],
- timeout: float = 30) -> Dict[str, Optional[TopicConfig]]
+class MessageContext()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L94)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/messagecontext.py#L4)
-A simplified way of getting the topic configurations of the provided topics
+An object with Kafka message properties.
-from the cluster (if they exist).
+It is made pseudo-immutable (i.e. public attributes don't have setters), and
+it should not be mutated during message processing.
-**Arguments**:
+
-- `topic_names`: a list of topic names
-- `timeout`: response timeout (seconds)
->***NOTE***: `timeout` must be >0 here (expects non-neg, and 0 != inf).
+## quixstreams.models.rows
-**Returns**:
+
-a dict with topic names and their respective `TopicConfig`
+## quixstreams.models.types
-
+
-#### TopicAdmin.create\_topics
+### ConfluentKafkaMessageProto
```python
-def create_topics(topics: List[Topic],
- timeout: float = 30,
- finalize_timeout: float = 60)
+class ConfluentKafkaMessageProto(Protocol)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L176)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/types.py#L13)
-Create the given list of topics and confirm they are ready.
+An interface of `confluent_kafka.Message`.
-Also raises an exception with detailed printout should the creation
-fail (it ignores issues for a topic already existing).
+Use it to not depend on exact implementation and simplify testing.
-**Arguments**:
+Instances of `confluent_kafka.Message` cannot be directly created from Python,
+see https://github.com/confluentinc/confluent-kafka-python/issues/1535.
-- `topics`: a list of `Topic`
-- `timeout`: creation acknowledge timeout (seconds)
-- `finalize_timeout`: topic finalization timeout (seconds)
->***NOTE***: `timeout` must be >0 here (expects non-neg, and 0 != inf).
+
-
+## quixstreams.platforms
-## quixstreams.models.topics.utils
+
-
+## quixstreams.platforms.quix.checks
-#### merge\_headers
+
+
+#### check\_state\_management\_enabled
```python
-def merge_headers(original: Optional[MessageHeadersTuples],
- other: MessageHeadersMapping) -> MessageHeadersTuples
+def check_state_management_enabled()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/utils.py#L8)
-
-Merge two sets of Kafka message headers, overwriting headers in "origin"
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/checks.py#L11)
-by the values from "other".
+Check if State Management feature is enabled for the current deployment on
+Quix platform.
+If it's disabled, the exception will be raised.
-**Arguments**:
+
-- `original`: original headers as a list of (key, value) tuples.
-- `other`: headers to merge as a dictionary.
+#### check\_state\_dir
-**Returns**:
+```python
+def check_state_dir(state_dir: str)
+```
-a list of (key, value) tuples.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/checks.py#L28)
-
+Check if Application "state_dir" matches the state dir on Quix platform.
-## quixstreams.models.topics.topic
+If it doesn't match, the warning will be logged.
-
+**Arguments**:
-### TopicConfig
+- `state_dir`: application state_dir path
-```python
-@dataclasses.dataclass(eq=True)
-class TopicConfig()
-```
+
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L42)
+## quixstreams.platforms.quix
-Represents all kafka-level configuration for a kafka topic.
+
-Generally used by Topic and any topic creation procedures.
+## quixstreams.platforms.quix.env
-
+
-### Topic
+### QuixEnvironment
```python
-class Topic()
+class QuixEnvironment()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L83)
-
-A definition of a Kafka topic.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L7)
-Typically created with an `app = quixstreams.app.Application()` instance via
-`app.topic()`, and used by `quixstreams.dataframe.StreamingDataFrame`
-instance.
+Class to access various Quix platform environment settings
-
+
-#### Topic.\_\_init\_\_
+#### QuixEnvironment.state\_management\_enabled
```python
-def __init__(
- name: str,
- config: TopicConfig,
- value_deserializer: Optional[DeserializerType] = None,
- key_deserializer: Optional[DeserializerType] = BytesDeserializer(),
- value_serializer: Optional[SerializerType] = None,
- key_serializer: Optional[SerializerType] = BytesSerializer(),
- timestamp_extractor: Optional[TimestampExtractor] = None)
+@property
+def state_management_enabled() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L92)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L19)
-**Arguments**:
+Check whether "State management" is enabled for the current deployment
-- `name`: topic name
-- `config`: topic configs via `TopicConfig` (creation/validation)
-- `value_deserializer`: a deserializer type for values
-- `key_deserializer`: a deserializer type for keys
-- `value_serializer`: a serializer type for values
-- `key_serializer`: a serializer type for keys
-- `timestamp_extractor`: a callable that returns a timestamp in
-milliseconds from a deserialized message.
+**Returns**:
-
+True if state management is enabled, otherwise False
-#### Topic.name
+
+
+#### QuixEnvironment.deployment\_id
```python
@property
-def name() -> str
+def deployment_id() -> Optional[str]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L121)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L27)
-Topic name
+Return current Quix deployment id.
-
+This variable is meant to be set only by Quix Platform and only
+when the application is deployed.
-#### Topic.row\_serialize
+**Returns**:
-```python
-def row_serialize(row: Row, key: Any) -> KafkaMessage
-```
+deployment id or None
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L131)
+
-Serialize Row to a Kafka message structure
+#### QuixEnvironment.workspace\_id
-**Arguments**:
+```python
+@property
+def workspace_id() -> Optional[str]
+```
-- `row`: Row to serialize
-- `key`: message key to serialize
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L39)
+
+Return Quix workspace id if set
**Returns**:
-KafkaMessage object with serialized values
+workspace id or None
-
+
-#### Topic.row\_deserialize
+#### QuixEnvironment.portal\_api
```python
-def row_deserialize(
- message: ConfluentKafkaMessageProto) -> Union[Row, List[Row], None]
+@property
+def portal_api() -> Optional[str]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L162)
-
-Deserialize incoming Kafka message to a Row.
-
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L47)
-- `message`: an object with interface of `confluent_kafka.Message`
+Return Quix Portal API url if set
**Returns**:
-Row, list of Rows or None if the message is ignored.
+portal API URL or None
-
+
-## quixstreams.models.topics.exceptions
+#### QuixEnvironment.state\_dir
-
+```python
+@property
+def state_dir() -> str
+```
-## quixstreams.models.topics.manager
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/env.py#L56)
-
+Return application state directory on Quix.
-#### affirm\_ready\_for\_create
+**Returns**:
-```python
-def affirm_ready_for_create(topics: List[Topic])
-```
+path to state dir
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L20)
+
-Validate a list of topics is ready for creation attempt
+## quixstreams.platforms.quix.exceptions
-**Arguments**:
+
-- `topics`: list of `Topic`s
+## quixstreams.platforms.quix.topic\_manager
-
+
-### TopicManager
+### QuixTopicManager
```python
-class TopicManager()
+class QuixTopicManager(TopicManager)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L30)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/topic_manager.py#L9)
The source of all topic management with quixstreams.
-Generally initialized and managed automatically by an `Application`,
+This is specifically for Applications using the Quix platform.
+
+Generally initialized and managed automatically by an `Application.Quix`,
but allows a user to work with it directly when needed, such as using it alongside
a plain `Producer` to create its topics.
See methods for details.
-
+
-#### TopicManager.\_\_init\_\_
+#### QuixTopicManager.\_\_init\_\_
```python
def __init__(topic_admin: TopicAdmin,
consumer_group: str,
+ quix_config_builder: QuixKafkaConfigsBuilder,
timeout: float = 30,
create_timeout: float = 60)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L53)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/topic_manager.py#L30)
**Arguments**:
-- `topic_admin`: an `Admin` instance (required for some functionality)
-- `consumer_group`: the consumer group (of the `Application`)
+- `topic_admin`: an `Admin` instance
+- `quix_config_builder`: A QuixKafkaConfigsBuilder instance, else one is
+generated for you.
- `timeout`: response timeout (seconds)
- `create_timeout`: timeout for topic creation
-
+
-#### TopicManager.changelog\_topics
+## quixstreams.platforms.quix.api
+
+
+
+### QuixPortalApiService
```python
-@property
-def changelog_topics() -> Dict[str, Dict[str, Topic]]
+class QuixPortalApiService()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L103)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/api.py#L19)
-Note: `Topic`s are the changelogs.
+A light wrapper around the Quix Portal Api. If used in the Quix Platform, it will
+use that workspaces auth token and portal endpoint, else you must provide it.
-returns: the changelog topic dict, {topic_name: {suffix: Topic}}
+Function names closely reflect the respective API endpoint,
+each starting with the method [GET, POST, etc.] followed by the endpoint path.
-
+Results will be returned in the form of request's Response.json(), unless something
+else is required. Non-200's will raise exceptions.
-#### TopicManager.all\_topics
+See the swagger documentation for more info about the endpoints.
+
+
+
+#### QuixPortalApiService.get\_workspace\_certificate
```python
-@property
-def all_topics() -> Dict[str, Topic]
+def get_workspace_certificate(workspace_id: Optional[str] = None,
+ timeout: float = 30) -> Optional[bytes]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L112)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/api.py#L119)
-Every registered topic name mapped to its respective `Topic`.
+Get a workspace TLS certificate if available.
-returns: full topic dict, {topic_name: Topic}
+Returns `None` if certificate is not specified.
-
+**Arguments**:
-#### TopicManager.topic\_config
+- `workspace_id`: workspace id, optional
+- `timeout`: request timeout; Default 30
+
+**Returns**:
+
+certificate as bytes if present, or None
+
+
+
+## quixstreams.platforms.quix.config
+
+
+
+#### strip\_workspace\_id\_prefix
```python
-def topic_config(num_partitions: Optional[int] = None,
- replication_factor: Optional[int] = None,
- extra_config: Optional[dict] = None) -> TopicConfig
+def strip_workspace_id_prefix(workspace_id: str, s: str) -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L220)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L31)
-Convenience method for generating a `TopicConfig` with default settings
+Remove the workspace ID from a given string if it starts with it,
+
+typically a topic or consumer group id
**Arguments**:
-- `num_partitions`: the number of topic partitions
-- `replication_factor`: the topic replication factor
-- `extra_config`: other optional configuration settings
+- `workspace_id`: the workspace id
+- `s`: the string to append to
**Returns**:
-a TopicConfig object
+the string with workspace_id prefix removed
-
+
-#### TopicManager.topic
+#### prepend\_workspace\_id
```python
-def topic(name: str,
- value_deserializer: Optional[DeserializerType] = None,
- key_deserializer: Optional[DeserializerType] = "bytes",
- value_serializer: Optional[SerializerType] = None,
- key_serializer: Optional[SerializerType] = "bytes",
- config: Optional[TopicConfig] = None,
- timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic
+def prepend_workspace_id(workspace_id: str, s: str) -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L241)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L43)
-A convenience method for generating a `Topic`. Will use default config options
+Add the workspace ID as a prefix to a given string if it does not have it,
-as dictated by the TopicManager.
+typically a topic or consumer group it
**Arguments**:
-- `name`: topic name
-- `value_deserializer`: a deserializer type for values
-- `key_deserializer`: a deserializer type for keys
-- `value_serializer`: a serializer type for values
-- `key_serializer`: a serializer type for keys
-- `config`: optional topic configurations (for creation/validation)
-- `timestamp_extractor`: a callable that returns a timestamp in
-milliseconds from a deserialized message.
+- `workspace_id`: the workspace id
+- `s`: the string to append to
**Returns**:
-Topic object with creation configs
+the string with workspace_id prepended
-
+
-#### TopicManager.repartition\_topic
+### QuixApplicationConfig
```python
-def repartition_topic(operation: str,
- topic_name: str,
- value_deserializer: Optional[DeserializerType] = "json",
- key_deserializer: Optional[DeserializerType] = "json",
- value_serializer: Optional[SerializerType] = "json",
- key_serializer: Optional[SerializerType] = "json",
- timeout: Optional[float] = None) -> Topic
+@dataclasses.dataclass
+class QuixApplicationConfig()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L286)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L56)
-Create an internal repartition topic.
+A convenience container class for Quix Application configs.
-**Arguments**:
+
-- `operation`: name of the GroupBy operation (column name or user-defined).
-- `topic_name`: name of the topic the GroupBy is sourced from.
-- `value_deserializer`: a deserializer type for values; default - JSON
-- `key_deserializer`: a deserializer type for keys; default - JSON
-- `value_serializer`: a serializer type for values; default - JSON
-- `key_serializer`: a serializer type for keys; default - JSON
-- `timeout`: config lookup timeout (seconds); Default 30
+### QuixKafkaConfigsBuilder
-**Returns**:
+```python
+class QuixKafkaConfigsBuilder()
+```
-`Topic` object (which is also stored on the TopicManager)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L66)
-
+Retrieves all the necessary information from the Quix API and builds all the
+objects required to connect a confluent-kafka client to the Quix Platform.
-#### TopicManager.changelog\_topic
+If not executed within the Quix platform directly, you must provide a Quix
+"streaming" (aka "sdk") token, or Personal Access Token.
+
+Ideally you also know your workspace name or id. If not, you can search for it
+using a known topic name, but note the search space is limited to the access level
+of your token.
+
+It also currently handles the app_auto_create_topics setting for Application.Quix.
+
+
+
+#### QuixKafkaConfigsBuilder.\_\_init\_\_
```python
-def changelog_topic(topic_name: str,
- store_name: str,
- timeout: Optional[float] = None) -> Topic
+def __init__(quix_sdk_token: Optional[str] = None,
+ workspace_id: Optional[str] = None,
+ quix_portal_api_service: Optional[QuixPortalApiService] = None,
+ timeout: float = 30,
+ topic_create_timeout: float = 60)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L326)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L82)
-Performs all the logic necessary to generate a changelog topic based on a
+**Arguments**:
-"source topic" (aka input/consumed topic).
+- `quix_portal_api_service`: A QuixPortalApiService instance (else generated)
+- `workspace_id`: A valid Quix Workspace ID (else searched for)
-Its main goal is to ensure partition counts of the to-be generated changelog
-match the source topic, and ensure the changelog topic is compacted. Also
-enforces the serialization type. All `Topic` objects generated with this are
-stored on the TopicManager.
+
-If source topic already exists, defers to the existing topic settings, else
-uses the settings as defined by the `Topic` (and its defaults) as generated
-by the `TopicManager`.
+#### QuixKafkaConfigsBuilder.strip\_workspace\_id\_prefix
-In general, users should NOT need this; an Application knows when/how to
-generate changelog topics. To turn off changelogs, init an Application with
-"use_changelog_topics"=`False`.
+```python
+def strip_workspace_id_prefix(s: str) -> str
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L160)
+
+Remove the workspace ID from a given string if it starts with it,
+
+typically a topic or consumer group id
**Arguments**:
-- `topic_name`: name of consumed topic (app input topic)
-> NOTE: normally contain any prefixes added by TopicManager.topic()
-- `store_name`: name of the store this changelog belongs to
-(default, rolling10s, etc.)
-- `timeout`: config lookup timeout (seconds); Default 30
+- `s`: the string to append to
**Returns**:
-`Topic` object (which is also stored on the TopicManager)
+the string with workspace_id prefix removed
-
+
-#### TopicManager.create\_topics
+#### QuixKafkaConfigsBuilder.prepend\_workspace\_id
```python
-def create_topics(topics: List[Topic],
- timeout: Optional[float] = None,
- create_timeout: Optional[float] = None)
+def prepend_workspace_id(s: str) -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L383)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L170)
-Creates topics via an explicit list of provided `Topics`.
+Add the workspace ID as a prefix to a given string if it does not have it,
-Exists as a way to manually specify what topics to create; otherwise,
-`create_all_topics()` is generally simpler.
+typically a topic or consumer group it
**Arguments**:
-- `topics`: list of `Topic`s
-- `timeout`: creation acknowledge timeout (seconds); Default 30
-- `create_timeout`: topic finalization timeout (seconds); Default 60
+- `s`: the string to append to
-
+**Returns**:
-#### TopicManager.create\_all\_topics
+the string with workspace_id prepended
+
+
+
+#### QuixKafkaConfigsBuilder.search\_for\_workspace
```python
-def create_all_topics(timeout: Optional[float] = None,
- create_timeout: Optional[float] = None)
+def search_for_workspace(workspace_name_or_id: Optional[str] = None,
+ timeout: Optional[float] = None) -> Optional[dict]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L411)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L180)
-A convenience method to create all Topic objects stored on this TopicManager.
+Search for a workspace given an expected workspace name or id.
**Arguments**:
-- `timeout`: creation acknowledge timeout (seconds); Default 30
-- `create_timeout`: topic finalization timeout (seconds); Default 60
+- `workspace_name_or_id`: the expected name or id of a workspace
+- `timeout`: response timeout (seconds); Default 30
+
+**Returns**:
+
+the workspace data dict if search success, else None
+
+
+
+#### QuixKafkaConfigsBuilder.get\_workspace\_info
+
+```python
+def get_workspace_info(known_workspace_topic: Optional[str] = None,
+ timeout: Optional[float] = None) -> dict
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L222)
+
+Queries for workspace data from the Quix API, regardless of instance cache,
+
+and updates instance attributes from query result.
+
+**Arguments**:
+
+- `known_workspace_topic`: a topic you know to exist in some workspace
+- `timeout`: response timeout (seconds); Default 30
-
+
-#### TopicManager.validate\_all\_topics
+#### QuixKafkaConfigsBuilder.search\_workspace\_for\_topic
```python
-def validate_all_topics(timeout: Optional[float] = None)
+def search_workspace_for_topic(
+ workspace_id: str,
+ topic: str,
+ timeout: Optional[float] = None) -> Optional[str]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L424)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L250)
-Validates all topics exist and changelogs have correct topic and rep factor.
+Search through all the topics in the given workspace id to see if there is a
-Issues are pooled and raised as an Exception once inspections are complete.
+match with the provided topic.
-
+**Arguments**:
-## quixstreams.state.rocksdb.windowed.store
+- `workspace_id`: the workspace to search in
+- `topic`: the topic to search for
+- `timeout`: response timeout (seconds); Default 30
-
+**Returns**:
-### WindowedRocksDBStore
+the workspace_id if success, else None
+
+
+
+#### QuixKafkaConfigsBuilder.search\_for\_topic\_workspace
```python
-class WindowedRocksDBStore(RocksDBStore)
+def search_for_topic_workspace(topic: str,
+ timeout: Optional[float] = None
+ ) -> Optional[dict]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/store.py#L10)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L271)
-RocksDB-based windowed state store.
+Find what workspace a topic belongs to.
-It keeps track of individual store partitions and provides access to the
-partitions' transactions.
+If there is only one workspace altogether, it is assumed to be the workspace.
+More than one means each workspace will be searched until the first hit.
-
+**Arguments**:
-#### WindowedRocksDBStore.\_\_init\_\_
+- `topic`: the topic to search for
+- `timeout`: response timeout (seconds); Default 30
+
+**Returns**:
+
+workspace data dict if topic search success, else None
+
+
+
+#### QuixKafkaConfigsBuilder.create\_topics
```python
-def __init__(
- name: str,
- topic: str,
- base_dir: str,
- changelog_producer_factory: Optional[ChangelogProducerFactory] = None,
- options: Optional[RocksDBOptionsType] = None)
+def create_topics(topics: List[Topic],
+ timeout: Optional[float] = None,
+ finalize_timeout: Optional[float] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/store.py#L18)
-
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L369)
-- `name`: a unique store name
-- `topic`: a topic name for this store
-- `base_dir`: path to a directory with the state
-- `changelog_producer_factory`: a ChangelogProducerFactory instance
-if using changelogs
-- `options`: RocksDB options. If `None`, the default options will be used.
+Create topics in a Quix cluster.
-
+**Arguments**:
-## quixstreams.state.rocksdb.windowed.partition
+- `topics`: a list of `Topic` objects
+- `timeout`: response timeout (seconds); Default 30
+- `finalize_timeout`: topic finalization timeout (seconds); Default 60
+marked as "Ready" (and thus ready to produce to/consume from).
-
+
-### WindowedRocksDBStorePartition
+#### QuixKafkaConfigsBuilder.get\_topic
```python
-class WindowedRocksDBStorePartition(RocksDBStorePartition)
+def get_topic(topic_name: str,
+ timeout: Optional[float] = None) -> Optional[dict]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/partition.py#L24)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L419)
-A base class to access windowed state in RocksDB.
-
-It represents a single RocksDB database.
+return the topic ID (the actual cluster topic name) if it exists, else None
-Besides the data, it keeps track of the latest observed timestamp and
-stores the expiration index to delete expired windows.
+>***NOTE***: if the name registered in Quix is instead the workspace-prefixed
+version, this returns None unless that exact name was created WITHOUT the
+Quix API.
**Arguments**:
-- `path`: an absolute path to the RocksDB folder
-- `options`: RocksDB options. If `None`, the default options will be used.
-
-
-
-## quixstreams.state.rocksdb.windowed.metadata
+- `topic_name`: name of the topic
+- `timeout`: response timeout (seconds); Default 30
-
+**Returns**:
-## quixstreams.state.rocksdb.windowed.transaction
+response dict of the topic info if topic found, else None
-
+
-### WindowedRocksDBPartitionTransaction
+#### QuixKafkaConfigsBuilder.confirm\_topics\_exist
```python
-class WindowedRocksDBPartitionTransaction(RocksDBPartitionTransaction)
+def confirm_topics_exist(topics: Union[List[Topic], List[str]],
+ timeout: Optional[float] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/transaction.py#L22)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L451)
-
+Confirm whether the desired set of topics exists in the Quix workspace.
-#### WindowedRocksDBPartitionTransaction.expire\_windows
+**Arguments**:
+
+- `topics`: a list of `Topic` or topic names
+- `timeout`: response timeout (seconds); Default 30
+
+
+
+#### QuixKafkaConfigsBuilder.get\_application\_config
```python
-def expire_windows(duration_ms: int,
- prefix: bytes,
- grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]]
+def get_application_config(consumer_group_id: str) -> QuixApplicationConfig
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/transaction.py#L105)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/platforms/quix/config.py#L483)
-Get a list of expired windows from RocksDB considering latest timestamp,
+Get all the necessary attributes for an Application to run on Quix Cloud.
-window size and grace period.
-It marks the latest found window as expired in the expiration index, so
-calling this method multiple times will yield different results for the same
-"latest timestamp".
+**Arguments**:
-How it works:
-- First, it looks for the start time of the last expired window for the current
- prefix using expiration cache. If it's found, it will be used to reduce
- the search space and to avoid returning already expired windows.
-- Then it goes over window segments and fetches the windows
- that should be expired.
-- At last, it updates the expiration cache with the start time of the latest
- found windows
+- `consumer_group_id`: consumer group id, if needed
**Returns**:
-sorted list of tuples in format `((start, end), value)`
+a QuixApplicationConfig instance
+
+
+
+## quixstreams.state.rocksdb.serialization
@@ -4054,7 +4459,7 @@ sorted list of tuples in format `((start, end), value)`
def parse_window_key(key: bytes) -> Tuple[bytes, int, int]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/serialization.py#L12)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/serialization.py#L12)
Parse the window key from Rocksdb into (message_key, start, end) structure.
@@ -4077,7 +4482,7 @@ a tuple with message key, start timestamp, end timestamp
def encode_window_key(start_ms: int, end_ms: int) -> bytes
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/serialization.py#L39)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/serialization.py#L39)
Encode window start and end timestamps into bytes of the following format:
@@ -4102,7 +4507,7 @@ window timestamps as bytes
def encode_window_prefix(prefix: bytes, start_ms: int) -> bytes
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/serialization.py#L53)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/serialization.py#L53)
Encode window prefix and start time to iterate over keys in RocksDB
@@ -4118,6 +4523,79 @@ Format:
bytes
+
+
+## quixstreams.state.rocksdb.windowed.metadata
+
+
+
+## quixstreams.state.rocksdb.windowed.store
+
+
+
+### WindowedRocksDBStore
+
+```python
+class WindowedRocksDBStore(RocksDBStore)
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/store.py#L10)
+
+RocksDB-based windowed state store.
+
+It keeps track of individual store partitions and provides access to the
+partitions' transactions.
+
+
+
+#### WindowedRocksDBStore.\_\_init\_\_
+
+```python
+def __init__(
+ name: str,
+ topic: str,
+ base_dir: str,
+ changelog_producer_factory: Optional[ChangelogProducerFactory] = None,
+ options: Optional[RocksDBOptionsType] = None)
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/store.py#L18)
+
+**Arguments**:
+
+- `name`: a unique store name
+- `topic`: a topic name for this store
+- `base_dir`: path to a directory with the state
+- `changelog_producer_factory`: a ChangelogProducerFactory instance
+if using changelogs
+- `options`: RocksDB options. If `None`, the default options will be used.
+
+
+
+## quixstreams.state.rocksdb.windowed.partition
+
+
+
+### WindowedRocksDBStorePartition
+
+```python
+class WindowedRocksDBStorePartition(RocksDBStorePartition)
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/partition.py#L24)
+
+A base class to access windowed state in RocksDB.
+
+It represents a single RocksDB database.
+
+Besides the data, it keeps track of the latest observed timestamp and
+stores the expiration index to delete expired windows.
+
+**Arguments**:
+
+- `path`: an absolute path to the RocksDB folder
+- `options`: RocksDB options. If `None`, the default options will be used.
+
## quixstreams.state.rocksdb.windowed.state
@@ -4130,7 +4608,7 @@ bytes
class WindowedTransactionState(WindowedState)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L9)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L9)
@@ -4141,7 +4619,7 @@ def __init__(transaction: "WindowedRocksDBPartitionTransaction",
prefix: bytes)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L12)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L12)
A windowed state to be provided into `StreamingDataFrame` window functions.
@@ -4159,7 +4637,7 @@ def get_window(start_ms: int,
default: Any = None) -> Optional[Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L23)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L23)
Get the value of the window defined by `start` and `end` timestamps
@@ -4183,7 +4661,7 @@ value or None if the key is not found and `default` is not provided
def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L39)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L39)
Set a value for the window.
@@ -4205,7 +4683,7 @@ using the provided `timestamp`.
def get_latest_timestamp() -> int
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L60)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L60)
Get the latest observed timestamp for the current state partition.
@@ -4225,7 +4703,7 @@ def expire_windows(duration_ms: int,
grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/windowed/state.py#L72)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/state.py#L72)
Get a list of expired windows from RocksDB considering the current
latest timestamp, window duration and grace period.
@@ -4234,47 +4712,55 @@ It also marks the latest found window as expired in the expiration index, so
calling this method multiple times will yield different results for the same
"latest timestamp".
-
+
-## quixstreams.state.rocksdb.options
+## quixstreams.state.rocksdb.windowed.transaction
-
+
-### RocksDBOptions
+### WindowedRocksDBPartitionTransaction
```python
-@dataclasses.dataclass(frozen=True)
-class RocksDBOptions(RocksDBOptionsType)
+class WindowedRocksDBPartitionTransaction(RocksDBPartitionTransaction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/options.py#L25)
-
-RocksDB database options.
-
-**Arguments**:
-
-- `dumps`: function to dump data to JSON
-- `loads`: function to load data from JSON
-- `open_max_retries`: number of times to retry opening the database
-if it's locked by another process. To disable retrying, pass 0
-- `open_retry_backoff`: number of seconds to wait between each retry.
-Please see `rocksdict.Options` for a complete description of other options.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/transaction.py#L22)
-
+
-#### RocksDBOptions.to\_options
+#### WindowedRocksDBPartitionTransaction.expire\_windows
```python
-def to_options() -> rocksdict.Options
+def expire_windows(duration_ms: int,
+ prefix: bytes,
+ grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/options.py#L53)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/windowed/transaction.py#L105)
+
+Get a list of expired windows from RocksDB considering latest timestamp,
+
+window size and grace period.
+It marks the latest found window as expired in the expiration index, so
+calling this method multiple times will yield different results for the same
+"latest timestamp".
-Convert parameters to `rocksdict.Options`
+How it works:
+- First, it looks for the start time of the last expired window for the current
+ prefix using expiration cache. If it's found, it will be used to reduce
+ the search space and to avoid returning already expired windows.
+- Then it goes over window segments and fetches the windows
+ that should be expired.
+- At last, it updates the expiration cache with the start time of the latest
+ found windows
**Returns**:
-instance of `rocksdict.Options`
+sorted list of tuples in format `((start, end), value)`
+
+
+
+## quixstreams.state.rocksdb
@@ -4288,7 +4774,7 @@ instance of `rocksdict.Options`
class RocksDBStore(Store)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L19)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L19)
RocksDB-based state store.
@@ -4308,7 +4794,7 @@ def __init__(
options: Optional[options_type] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L29)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L29)
**Arguments**:
@@ -4328,7 +4814,7 @@ if using changelogs
def topic() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L53)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L53)
Store topic name
@@ -4341,7 +4827,7 @@ Store topic name
def name() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L60)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L60)
Store name
@@ -4354,7 +4840,7 @@ Store name
def partitions() -> Dict[int, RocksDBStorePartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L67)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L67)
Mapping of assigned store partitions
@@ -4366,7 +4852,7 @@ Mapping of assigned store partitions
def assign_partition(partition: int) -> RocksDBStorePartition
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L80)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L80)
Open and assign store partition.
@@ -4389,7 +4875,7 @@ instance of`RocksDBStorePartition`
def revoke_partition(partition: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L117)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L117)
Revoke and close the assigned store partition.
@@ -4407,7 +4893,7 @@ If the partition is not assigned, it will log the message and return.
def start_partition_transaction(partition: int) -> RocksDBPartitionTransaction
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L138)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L138)
Start a new partition transaction.
@@ -4430,293 +4916,59 @@ instance of `RocksDBPartitionTransaction`
def close()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/store.py#L160)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/store.py#L160)
Close the store and revoke all assigned partitions
-
-
-## quixstreams.state.rocksdb.partition
-
-
-
-### RocksDBStorePartition
-
-```python
-class RocksDBStorePartition(StorePartition)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L40)
-
-A base class to access state in RocksDB.
-
-It represents a single RocksDB database.
-
-Responsibilities:
- 1. Managing access to the RocksDB instance
- 2. Creating transactions to interact with data
- 3. Flushing WriteBatches to the RocksDB
-
-It opens the RocksDB on `__init__`. If the db is locked by another process,
-it will retry according to `open_max_retries` and `open_retry_backoff` options.
-
-**Arguments**:
-
-- `path`: an absolute path to the RocksDB folder
-- `options`: RocksDB options. If `None`, the default options will be used.
-
-
-
-#### RocksDBStorePartition.begin
-
-```python
-def begin() -> RocksDBPartitionTransaction
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L75)
-
-Create a new `RocksDBTransaction` object.
-
-Using `RocksDBTransaction` is a recommended way for accessing the data.
-
-**Returns**:
-
-an instance of `RocksDBTransaction`
-
-
-
-#### RocksDBStorePartition.recover\_from\_changelog\_message
-
-```python
-def recover_from_changelog_message(
- changelog_message: ConfluentKafkaMessageProto, committed_offset: int)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L128)
-
-Updates state from a given changelog message.
-
-The actual update may be skipped when both conditions are met:
-
-- The changelog message has headers with the processed message offset.
-- This processed offset is larger than the latest committed offset for the same
- topic partition.
-
-This way the state does not apply the state changes for not-yet-committed
-messages and improves the state consistency guarantees.
-
-**Arguments**:
-
-- `changelog_message`: A raw Confluent message read from a changelog topic.
-- `committed_offset`: latest committed offset for the partition
-
-
-
-#### RocksDBStorePartition.set\_changelog\_offset
-
-```python
-def set_changelog_offset(changelog_offset: int)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L169)
-
-Set the changelog offset based on a message (usually an "offset-only" message).
-
-Used during recovery.
-
-**Arguments**:
-
-- `changelog_offset`: A changelog offset
-
-
-
-#### RocksDBStorePartition.write
-
-```python
-def write(batch: WriteBatch)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L179)
-
-Write `WriteBatch` to RocksDB
-
-**Arguments**:
-
-- `batch`: an instance of `rocksdict.WriteBatch`
-
-
-
-#### RocksDBStorePartition.get
-
-```python
-def get(key: bytes,
- default: Any = None,
- cf_name: str = "default") -> Union[None, bytes, Any]
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L186)
-
-Get a key from RocksDB.
-
-**Arguments**:
-
-- `key`: a key encoded to `bytes`
-- `default`: a default value to return if the key is not found.
-- `cf_name`: rocksdb column family name. Default - "default"
-
-**Returns**:
-
-a value if the key is present in the DB. Otherwise, `default`
-
-
-
-#### RocksDBStorePartition.exists
-
-```python
-def exists(key: bytes, cf_name: str = "default") -> bool
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L200)
-
-Check if a key is present in the DB.
-
-**Arguments**:
-
-- `key`: a key encoded to `bytes`.
-- `cf_name`: rocksdb column family name. Default - "default"
-
-**Returns**:
-
-`True` if the key is present, `False` otherwise.
-
-
-
-#### RocksDBStorePartition.get\_processed\_offset
-
-```python
-def get_processed_offset() -> Optional[int]
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L211)
-
-Get last processed offset for the given partition
-
-**Returns**:
-
-offset or `None` if there's no processed offset yet
-
-
-
-#### RocksDBStorePartition.get\_changelog\_offset
-
-```python
-def get_changelog_offset() -> Optional[int]
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L223)
-
-Get offset that the changelog is up-to-date with.
-
-**Returns**:
-
-offset or `None` if there's no processed offset yet
-
-
-
-#### RocksDBStorePartition.close
-
-```python
-def close()
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L233)
-
-Close the underlying RocksDB
-
-
-
-#### RocksDBStorePartition.path
-
-```python
-@property
-def path() -> str
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L246)
-
-Absolute path to RocksDB database folder
-
-**Returns**:
-
-file path
-
-
-
-#### RocksDBStorePartition.destroy
-
-```python
-@classmethod
-def destroy(cls, path: str)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L254)
-
-Delete underlying RocksDB database
-
-The database must be closed first.
-
-**Arguments**:
-
-- `path`: an absolute path to the RocksDB folder
-
-
-
-#### RocksDBStorePartition.get\_column\_family\_handle
-
-```python
-def get_column_family_handle(cf_name: str) -> ColumnFamily
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L264)
-
-Get a column family handle to pass to it WriteBatch.
+
-This method will cache the CF handle instance to avoid creating them
-repeatedly.
+## quixstreams.state.rocksdb.exceptions
-**Arguments**:
+
-- `cf_name`: column family name
+## quixstreams.state.rocksdb.metadata
-**Returns**:
+
-instance of `rocksdict.ColumnFamily`
+## quixstreams.state.rocksdb.options
-
+
-#### RocksDBStorePartition.get\_column\_family
+### RocksDBOptions
```python
-def get_column_family(cf_name: str) -> Rdict
+@dataclasses.dataclass(frozen=True)
+class RocksDBOptions(RocksDBOptionsType)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/partition.py#L285)
-
-Get a column family instance.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/options.py#L25)
-This method will cache the CF instance to avoid creating them repeatedly.
+RocksDB database options.
**Arguments**:
-- `cf_name`: column family name
+- `dumps`: function to dump data to JSON
+- `loads`: function to load data from JSON
+- `open_max_retries`: number of times to retry opening the database
+if it's locked by another process. To disable retrying, pass 0
+- `open_retry_backoff`: number of seconds to wait between each retry.
+Please see `rocksdict.Options` for a complete description of other options.
-**Returns**:
+
-instance of `rocksdict.Rdict` for the given column family
+#### RocksDBOptions.to\_options
-
+```python
+def to_options() -> rocksdict.Options
+```
-## quixstreams.state.rocksdb.metadata
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/options.py#L53)
+
+Convert parameters to `rocksdict.Options`
+
+**Returns**:
+
+instance of `rocksdict.Options`
@@ -4730,7 +4982,7 @@ instance of `rocksdict.Rdict` for the given column family
class RocksDBPartitionTransaction(PartitionTransaction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L61)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L61)
A transaction class to perform simple key-value operations like
"get", "set", "delete" and "exists" on a single RocksDB partition.
@@ -4770,7 +5022,7 @@ def __init__(partition: "RocksDBStorePartition",
changelog_producer: Optional[ChangelogProducer] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L100)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L100)
**Arguments**:
@@ -4791,7 +5043,7 @@ def get(key: Any,
cf_name: str = "default") -> Optional[Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L124)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L124)
Get a key from the store.
@@ -4821,7 +5073,7 @@ value or `default`
def set(key: Any, value: Any, prefix: bytes, cf_name: str = "default")
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L164)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L164)
Set a key to the store.
@@ -4843,7 +5095,7 @@ It first updates the key in the update cache.
def delete(key: Any, prefix: bytes, cf_name: str = "default")
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L187)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L187)
Delete a key from the store.
@@ -4864,7 +5116,7 @@ It first deletes the key from the update cache.
def exists(key: Any, prefix: bytes, cf_name: str = "default") -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L208)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L208)
Check if a key exists in the store.
@@ -4878,937 +5130,773 @@ It first looks up the key in the update cache.
**Returns**:
-`True` if the key exists, `False` otherwise.
-
-
-
-#### RocksDBPartitionTransaction.prepare
-
-```python
-@_validate_transaction_status(PartitionTransactionStatus.STARTED)
-def prepare(processed_offset: int)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L235)
-
-Produce changelog messages to the changelog topic for all changes accumulated
-
-in this transaction and prepare transaction to flush its state to the state
-store.
-
-After successful `prepare()`, the transaction status is changed to PREPARED,
-and it cannot receive updates anymore.
-
-If changelog is disabled for this application, no updates will be produced
-to the changelog topic.
-
-**Arguments**:
-
-- `processed_offset`: the offset of the latest processed message
-
-
-
-#### RocksDBPartitionTransaction.flush
-
-```python
-@_validate_transaction_status(PartitionTransactionStatus.STARTED,
- PartitionTransactionStatus.PREPARED)
-def flush(processed_offset: Optional[int] = None,
- changelog_offset: Optional[int] = None)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L259)
-
-Flush the recent updates to the database.
-
-It writes the WriteBatch to RocksDB and marks itself as finished.
-
-If writing fails, the transaction is marked as failed and
-cannot be used anymore.
-
->***NOTE:*** If no keys have been modified during the transaction
- (i.e. no "set" or "delete" have been called at least once), it will
- not flush ANY data to the database including the offset to optimize
- I/O.
-
-**Arguments**:
-
-- `processed_offset`: offset of the last processed message, optional.
-- `changelog_offset`: offset of the last produced changelog message,
-optional.
-
-
-
-#### RocksDBPartitionTransaction.completed
-
-```python
-@property
-def completed() -> bool
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L294)
-
-Check if the transaction is completed.
-
-It doesn't indicate whether transaction is successful or not.
-Use `RocksDBTransaction.failed` for that.
-
-The completed transaction should not be re-used.
-
-**Returns**:
-
-`True` if transaction is completed, `False` otherwise.
-
-
-
-#### RocksDBPartitionTransaction.prepared
-
-```python
-@property
-def prepared() -> bool
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L308)
-
-Check if the transaction is in PREPARED status.
-
-Prepared transaction successfully flushed its changelog and cannot receive
-updates anymore, but its state is not yet flushed to the disk
-
-**Returns**:
-
-`True` if transaction is prepared, `False` otherwise.
-
-
-
-#### RocksDBPartitionTransaction.failed
-
-```python
-@property
-def failed() -> bool
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L320)
-
-Check if the transaction has failed.
-
-The failed transaction should not be re-used because the update cache
-and
-
-**Returns**:
-
-`True` if transaction is failed, `False` otherwise.
-
-
-
-#### RocksDBPartitionTransaction.changelog\_topic\_partition
-
-```python
-@property
-def changelog_topic_partition() -> Optional[Tuple[str, int]]
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L332)
-
-Return the changelog topic-partition for the StorePartition of this transaction.
-
-Returns `None` if changelog_producer is not provided.
-
-**Returns**:
-
-(topic, partition) or None
-
-
-
-#### RocksDBPartitionTransaction.as\_state
-
-```python
-def as_state(prefix: Any = DEFAULT_PREFIX) -> TransactionState
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/transaction.py#L346)
-
-Create a one-time use `TransactionState` object with a limited CRUD interface
-
-to be provided to `StreamingDataFrame` operations.
-
-The `TransactionState` will prefix all the keys with the supplied `prefix`
-for all underlying operations.
-
-**Arguments**:
-
-- `prefix`: a prefix to be used for all keys
-
-**Returns**:
-
-an instance of `TransactionState`
-
-
-
-## quixstreams.state.rocksdb
-
-
-
-## quixstreams.state.rocksdb.types
-
-
-
-## quixstreams.state.rocksdb.exceptions
-
-
-
-## quixstreams.state.rocksdb.serialization
-
-
-
-## quixstreams.state.recovery
-
-
-
-### RecoveryPartition
-
-```python
-class RecoveryPartition()
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L24)
-
-A changelog topic partition mapped to a respective `StorePartition` with helper
-methods to determine its current recovery status.
-
-Since `StorePartition`s do recovery directly, it also handles recovery transactions.
-
-
-
-#### RecoveryPartition.offset
-
-```python
-@property
-def offset() -> int
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L63)
-
-Get the changelog offset from the underlying `StorePartition`.
-
-**Returns**:
-
-changelog offset (int)
-
-
-
-#### RecoveryPartition.needs\_recovery
-
-```python
-@property
-def needs_recovery()
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L72)
-
-Determine whether recovery is necessary for underlying `StorePartition`.
+`True` if the key exists, `False` otherwise.
-
+
-#### RecoveryPartition.needs\_offset\_update
+#### RocksDBPartitionTransaction.prepare
```python
-@property
-def needs_offset_update()
+@_validate_transaction_status(PartitionTransactionStatus.STARTED)
+def prepare(processed_offset: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L81)
-
-Determine if an offset update is required.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L235)
-Usually checked during assign if recovery was not required.
+Produce changelog messages to the changelog topic for all changes accumulated
-
+in this transaction and prepare transaction to flush its state to the state
+store.
-#### RecoveryPartition.update\_offset
+After successful `prepare()`, the transaction status is changed to PREPARED,
+and it cannot receive updates anymore.
-```python
-def update_offset()
-```
+If changelog is disabled for this application, no updates will be produced
+to the changelog topic.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L89)
+**Arguments**:
-Update only the changelog offset of a StorePartition.
+- `processed_offset`: the offset of the latest processed message
-
+
-#### RecoveryPartition.recover\_from\_changelog\_message
+#### RocksDBPartitionTransaction.flush
```python
-def recover_from_changelog_message(
- changelog_message: ConfluentKafkaMessageProto)
+@_validate_transaction_status(PartitionTransactionStatus.STARTED,
+ PartitionTransactionStatus.PREPARED)
+def flush(processed_offset: Optional[int] = None,
+ changelog_offset: Optional[int] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L109)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L259)
-Recover the StorePartition using a message read from its respective changelog.
+Flush the recent updates to the database.
+
+It writes the WriteBatch to RocksDB and marks itself as finished.
+
+If writing fails, the transaction is marked as failed and
+cannot be used anymore.
+
+>***NOTE:*** If no keys have been modified during the transaction
+ (i.e. no "set" or "delete" have been called at least once), it will
+ not flush ANY data to the database including the offset to optimize
+ I/O.
**Arguments**:
-- `changelog_message`: A confluent kafka message (everything as bytes)
+- `processed_offset`: offset of the last processed message, optional.
+- `changelog_offset`: offset of the last produced changelog message,
+optional.
-
+
-#### RecoveryPartition.set\_watermarks
+#### RocksDBPartitionTransaction.completed
```python
-def set_watermarks(lowwater: int, highwater: int)
+@property
+def completed() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L121)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L294)
-Set the changelog watermarks as gathered from Consumer.get_watermark_offsets()
+Check if the transaction is completed.
-**Arguments**:
+It doesn't indicate whether transaction is successful or not.
+Use `RocksDBTransaction.failed` for that.
-- `lowwater`: topic partition lowwater
-- `highwater`: topic partition highwater
+The completed transaction should not be re-used.
-
+**Returns**:
-### ChangelogProducerFactory
+`True` if transaction is completed, `False` otherwise.
+
+
+
+#### RocksDBPartitionTransaction.prepared
```python
-class ChangelogProducerFactory()
+@property
+def prepared() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L132)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L308)
-Generates ChangelogProducers, which produce changelog messages to a StorePartition.
+Check if the transaction is in PREPARED status.
-
+Prepared transaction successfully flushed its changelog and cannot receive
+updates anymore, but its state is not yet flushed to the disk
-#### ChangelogProducerFactory.\_\_init\_\_
+**Returns**:
+
+`True` if transaction is prepared, `False` otherwise.
+
+
+
+#### RocksDBPartitionTransaction.failed
```python
-def __init__(changelog_name: str, producer: RowProducer)
+@property
+def failed() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L137)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L320)
-**Arguments**:
+Check if the transaction has failed.
-- `changelog_name`: changelog topic name
-- `producer`: a RowProducer (not shared with `Application` instance)
+The failed transaction should not be re-used because the update cache
+and
**Returns**:
-a ChangelogWriter instance
+`True` if transaction is failed, `False` otherwise.
-
+
-#### ChangelogProducerFactory.get\_partition\_producer
+#### RocksDBPartitionTransaction.changelog\_topic\_partition
```python
-def get_partition_producer(partition_num) -> "ChangelogProducer"
+@property
+def changelog_topic_partition() -> Optional[Tuple[str, int]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L147)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L332)
-Generate a ChangelogProducer for producing to a specific partition number
+Return the changelog topic-partition for the StorePartition of this transaction.
-(and thus StorePartition).
+Returns `None` if changelog_producer is not provided.
-**Arguments**:
+**Returns**:
-- `partition_num`: source topic partition number
+(topic, partition) or None
-
+
-### ChangelogProducer
+#### RocksDBPartitionTransaction.as\_state
```python
-class ChangelogProducer()
+def as_state(prefix: Any = DEFAULT_PREFIX) -> TransactionState
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L161)
-
-Generated for a `StorePartition` to produce state changes to its respective
-kafka changelog partition.
-
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/transaction.py#L346)
-#### ChangelogProducer.\_\_init\_\_
+Create a one-time use `TransactionState` object with a limited CRUD interface
-```python
-def __init__(changelog_name: str, partition: int, producer: RowProducer)
-```
+to be provided to `StreamingDataFrame` operations.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L167)
+The `TransactionState` will prefix all the keys with the supplied `prefix`
+for all underlying operations.
**Arguments**:
-- `changelog_name`: A changelog topic name
-- `partition`: source topic partition number
-- `producer`: a RowProducer (not shared with `Application` instance)
-
-
+- `prefix`: a prefix to be used for all keys
-#### ChangelogProducer.produce
+**Returns**:
-```python
-def produce(key: bytes,
- value: Optional[bytes] = None,
- headers: Optional[MessageHeadersMapping] = None)
-```
+an instance of `TransactionState`
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L190)
+
-Produce a message to a changelog topic partition.
+## quixstreams.state.rocksdb.types
-**Arguments**:
+
-- `key`: message key (same as state key, including prefixes)
-- `value`: message value (same as state value)
-- `headers`: message headers (includes column family info)
+## quixstreams.state.rocksdb.partition
-
+
-### RecoveryManager
+### RocksDBStorePartition
```python
-class RecoveryManager()
+class RocksDBStorePartition(StorePartition)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L215)
-
-Manages all consumer-related aspects of recovery, including:
- - assigning/revoking, pausing/resuming topic partitions (especially changelogs)
- - consuming changelog messages until state is updated fully.
-
-Also tracks/manages `RecoveryPartitions`, which are assigned/tracked only if
-recovery for that changelog partition is required.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L40)
-Recovery is attempted from the `Application` after any new partition assignment.
+A base class to access state in RocksDB.
-
+It represents a single RocksDB database.
-#### RecoveryManager.partitions
+Responsibilities:
+ 1. Managing access to the RocksDB instance
+ 2. Creating transactions to interact with data
+ 3. Flushing WriteBatches to the RocksDB
-```python
-@property
-def partitions() -> Dict[int, Dict[str, RecoveryPartition]]
-```
+It opens the RocksDB on `__init__`. If the db is locked by another process,
+it will retry according to `open_max_retries` and `open_retry_backoff` options.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L234)
+**Arguments**:
-Returns a mapping of assigned RecoveryPartitions in the following format:
-{: {: }}
+- `path`: an absolute path to the RocksDB folder
+- `options`: RocksDB options. If `None`, the default options will be used.
-
+
-#### RecoveryManager.has\_assignments
+#### RocksDBStorePartition.begin
```python
-@property
-def has_assignments() -> bool
+def begin() -> RocksDBPartitionTransaction
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L242)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L75)
-Whether the Application has assigned RecoveryPartitions
+Create a new `RocksDBTransaction` object.
+
+Using `RocksDBTransaction` is a recommended way for accessing the data.
**Returns**:
-has assignments, as bool
+an instance of `RocksDBTransaction`
-
+
-#### RecoveryManager.recovering
+#### RocksDBStorePartition.recover\_from\_changelog\_message
```python
-@property
-def recovering() -> bool
+def recover_from_changelog_message(
+ changelog_message: ConfluentKafkaMessageProto, committed_offset: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L251)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L128)
-Whether the Application is currently recovering
+Updates state from a given changelog message.
-**Returns**:
+The actual update may be skipped when both conditions are met:
-is recovering, as bool
+- The changelog message has headers with the processed message offset.
+- This processed offset is larger than the latest committed offset for the same
+ topic partition.
-
+This way the state does not apply the state changes for not-yet-committed
+messages and improves the state consistency guarantees.
-#### RecoveryManager.register\_changelog
+**Arguments**:
+
+- `changelog_message`: A raw Confluent message read from a changelog topic.
+- `committed_offset`: latest committed offset for the partition
+
+
+
+#### RocksDBStorePartition.set\_changelog\_offset
```python
-def register_changelog(topic_name: str, store_name: str) -> Topic
+def set_changelog_offset(changelog_offset: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L259)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L169)
-Register a changelog Topic with the TopicManager.
+Set the changelog offset based on a message (usually an "offset-only" message).
+
+Used during recovery.
**Arguments**:
-- `topic_name`: source topic name
-- `store_name`: name of the store
+- `changelog_offset`: A changelog offset
-
+
-#### RecoveryManager.do\_recovery
+#### RocksDBStorePartition.write
```python
-def do_recovery()
+def write(batch: WriteBatch)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L271)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L179)
-If there are any active RecoveryPartitions, do a recovery procedure.
+Write `WriteBatch` to RocksDB
-After, will resume normal `Application` processing.
+**Arguments**:
-
+- `batch`: an instance of `rocksdict.WriteBatch`
+
+
-#### RecoveryManager.assign\_partition
+#### RocksDBStorePartition.get
```python
-def assign_partition(topic: str, partition: int, committed_offset: int,
- store_partitions: Dict[str, StorePartition])
+def get(key: bytes,
+ default: Any = None,
+ cf_name: str = "default") -> Union[None, bytes, Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L324)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L186)
-Assigns `StorePartition`s (as `RecoveryPartition`s) ONLY IF recovery required.
+Get a key from RocksDB.
-Pauses active consumer partitions as needed.
+**Arguments**:
-
+- `key`: a key encoded to `bytes`
+- `default`: a default value to return if the key is not found.
+- `cf_name`: rocksdb column family name. Default - "default"
-#### RecoveryManager.revoke\_partition
+**Returns**:
+
+a value if the key is present in the DB. Otherwise, `default`
+
+
+
+#### RocksDBStorePartition.exists
```python
-def revoke_partition(partition_num: int)
+def exists(key: bytes, cf_name: str = "default") -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/recovery.py#L391)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L200)
-revoke ALL StorePartitions (across all Stores) for a given partition number
+Check if a key is present in the DB.
**Arguments**:
-- `partition_num`: partition number of source topic
-
-
-
-## quixstreams.state
+- `key`: a key encoded to `bytes`.
+- `cf_name`: rocksdb column family name. Default - "default"
-
+**Returns**:
-## quixstreams.state.types
+`True` if the key is present, `False` otherwise.
-
+
-### Store
+#### RocksDBStorePartition.get\_processed\_offset
```python
-class Store(Protocol)
+def get_processed_offset() -> Optional[int]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L11)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L211)
-Abstract state store.
+Get last processed offset for the given partition
-It keeps track of individual store partitions and provides access to the
-partitions' transactions.
+**Returns**:
-
+offset or `None` if there's no processed offset yet
-#### Store.topic
+
+
+#### RocksDBStorePartition.get\_changelog\_offset
```python
-@property
-def topic() -> str
+def get_changelog_offset() -> Optional[int]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L22)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L223)
-Topic name
+Get offset that the changelog is up-to-date with.
-
+**Returns**:
-#### Store.name
+offset or `None` if there's no processed offset yet
+
+
+
+#### RocksDBStorePartition.close
```python
-@property
-def name() -> str
+def close()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L29)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L233)
-Store name
+Close the underlying RocksDB
-
+
-#### Store.partitions
+#### RocksDBStorePartition.path
```python
@property
-def partitions() -> Dict[int, "StorePartition"]
+def path() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L36)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L246)
-Mapping of assigned store partitions
+Absolute path to RocksDB database folder
**Returns**:
-dict of "{partition: }"
+file path
-
+
-#### Store.assign\_partition
+#### RocksDBStorePartition.destroy
```python
-def assign_partition(partition: int) -> "StorePartition"
+@classmethod
+def destroy(cls, path: str)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L43)
-
-Assign new store partition
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L254)
-**Arguments**:
+Delete underlying RocksDB database
-- `partition`: partition number
+The database must be closed first.
-**Returns**:
+**Arguments**:
-instance of `StorePartition`
+- `path`: an absolute path to the RocksDB folder
-
+
-#### Store.revoke\_partition
+#### RocksDBStorePartition.get\_column\_family\_handle
```python
-def revoke_partition(partition: int)
+def get_column_family_handle(cf_name: str) -> ColumnFamily
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L52)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L264)
-Revoke assigned store partition
+Get a column family handle to pass to it WriteBatch.
+
+This method will cache the CF handle instance to avoid creating them
+repeatedly.
**Arguments**:
-- `partition`: partition number
+- `cf_name`: column family name
-
+**Returns**:
-#### Store.start\_partition\_transaction
+instance of `rocksdict.ColumnFamily`
+
+
+
+#### RocksDBStorePartition.get\_column\_family
```python
-def start_partition_transaction(partition: int) -> "PartitionTransaction"
+def get_column_family(cf_name: str) -> Rdict
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L60)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/partition.py#L285)
-Start a new partition transaction.
+Get a column family instance.
-`PartitionTransaction` is the primary interface for working with data in Stores.
+This method will cache the CF instance to avoid creating them repeatedly.
**Arguments**:
-- `partition`: partition number
+- `cf_name`: column family name
**Returns**:
-instance of `PartitionTransaction`
+instance of `rocksdict.Rdict` for the given column family
-
+
-#### Store.close
+## quixstreams.state
+
+
+
+## quixstreams.state.manager
+
+
+
+### StateStoreManager
```python
-def close()
+class StateStoreManager()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L69)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L24)
-Close store and revoke all store partitions
+Class for managing state stores and partitions.
-
+StateStoreManager is responsible for:
+ - reacting to rebalance callbacks
+ - managing the individual state stores
+ - providing access to store transactions
-### StorePartition
+
+
+#### StateStoreManager.stores
```python
-class StorePartition(Protocol)
+@property
+def stores() -> Dict[str, Dict[str, Store]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L80)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L62)
-A base class to access state in the underlying storage.
-It represents a single instance of some storage (e.g. a single database for
-the persistent storage).
+Map of registered state stores
-
+**Returns**:
-#### StorePartition.path
+dict in format {topic: {store_name: store}}
+
+
+
+#### StateStoreManager.recovery\_required
```python
@property
-def path() -> str
+def recovery_required() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L89)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L70)
-Absolute path to RocksDB database folder
+Whether recovery needs to be done.
-
+
-#### StorePartition.begin
+#### StateStoreManager.using\_changelogs
```python
-def begin() -> "PartitionTransaction"
+@property
+def using_changelogs() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L95)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L79)
-State new `PartitionTransaction`
+Whether the StateStoreManager is using changelog topics
-
+**Returns**:
-#### StorePartition.recover\_from\_changelog\_message
+using changelogs, as bool
+
+
+
+#### StateStoreManager.do\_recovery
```python
-def recover_from_changelog_message(
- changelog_message: ConfluentKafkaMessageProto, committed_offset: int)
+def do_recovery()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L100)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L87)
-Updates state from a given changelog message.
+Perform a state recovery, if necessary.
-**Arguments**:
+
-- `changelog_message`: A raw Confluent message read from a changelog topic.
-- `committed_offset`: latest committed offset for the partition
+#### StateStoreManager.stop\_recovery
-
+```python
+def stop_recovery()
+```
-#### StorePartition.get\_processed\_offset
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L93)
+
+Stop recovery (called during app shutdown).
+
+
+
+#### StateStoreManager.get\_store
```python
-def get_processed_offset() -> Optional[int]
+def get_store(topic: str, store_name: str = DEFAULT_STATE_STORE_NAME) -> Store
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L111)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L99)
-Get last processed offset for the given partition
+Get a store for given name and topic
+
+**Arguments**:
+
+- `topic`: topic name
+- `store_name`: store name
**Returns**:
-offset or `None` if there's no processed offset yet
+instance of `Store`
-
+
-#### StorePartition.get\_changelog\_offset
+#### StateStoreManager.register\_store
```python
-def get_changelog_offset() -> Optional[int]
+def register_store(topic_name: str,
+ store_name: str = DEFAULT_STATE_STORE_NAME)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L118)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L132)
-Get offset that the changelog is up-to-date with.
+Register a state store to be managed by StateStoreManager.
-**Returns**:
+During processing, the StateStoreManager will react to rebalancing callbacks
+and assign/revoke the partitions for registered stores.
-offset or `None` if there's no processed offset yet
+Each store can be registered only once for each topic.
-
+**Arguments**:
-#### StorePartition.set\_changelog\_offset
+- `topic_name`: topic name
+- `store_name`: store name
+
+
+
+#### StateStoreManager.register\_windowed\_store
```python
-def set_changelog_offset(changelog_offset: int)
+def register_windowed_store(topic_name: str, store_name: str)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L125)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L157)
-Set the changelog offset based on a message (usually an "offset-only" message).
+Register a windowed state store to be managed by StateStoreManager.
-Used during recovery.
+During processing, the StateStoreManager will react to rebalancing callbacks
+and assign/revoke the partitions for registered stores.
+
+Each window store can be registered only once for each topic.
**Arguments**:
-- `changelog_offset`: A changelog offset
+- `topic_name`: topic name
+- `store_name`: store name
-
+
-### State
+#### StateStoreManager.clear\_stores
```python
-class State(Protocol)
+def clear_stores()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L136)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L180)
-Primary interface for working with key-value state data from `StreamingDataFrame`
+Delete all state stores managed by StateStoreManager.
-
+
-#### State.get
+#### StateStoreManager.on\_partition\_assign
```python
-def get(key: Any, default: Any = None) -> Optional[Any]
+def on_partition_assign(topic: str, partition: int,
+ committed_offset: int) -> List[StorePartition]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L141)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L195)
-Get the value for key if key is present in the state, else default
+Assign store partitions for each registered store for the given `TopicPartition`
+
+and return a list of assigned `StorePartition` objects.
**Arguments**:
-- `key`: key
-- `default`: default value to return if the key is not found
+- `topic`: Kafka topic name
+- `partition`: Kafka topic partition
+- `committed_offset`: latest committed offset for the partition
**Returns**:
-value or None if the key is not found and `default` is not provided
+list of assigned `StorePartition`
-
+
-#### State.set
+#### StateStoreManager.on\_partition\_revoke
```python
-def set(key: Any, value: Any)
+def on_partition_revoke(topic: str, partition: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L151)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L221)
-Set value for the key.
+Revoke store partitions for each registered store for the given `TopicPartition`
**Arguments**:
-- `key`: key
-- `value`: value
+- `topic`: Kafka topic name
+- `partition`: Kafka topic partition
-
+
-#### State.delete
+#### StateStoreManager.init
```python
-def delete(key: Any)
+def init()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L159)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L234)
-Delete value for the key.
-
-This function always returns `None`, even if value is not found.
-
-**Arguments**:
+Initialize `StateStoreManager` and create a store directory
-- `key`: key
-
+
-#### State.exists
+#### StateStoreManager.close
```python
-def exists(key: Any) -> bool
+def close()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L168)
-
-Check if the key exists in state.
-
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/manager.py#L241)
-- `key`: key
+Close all registered stores
-**Returns**:
+
-True if key exists, False otherwise
+## quixstreams.state.state
-
+
-### PartitionTransaction
+### TransactionState
```python
-class PartitionTransaction(Protocol)
+class TransactionState(State)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L177)
-
-A transaction class to perform simple key-value operations like
-"get", "set", "delete" and "exists" on a single storage partition.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L6)
-
+
-#### PartitionTransaction.as\_state
+#### TransactionState.\_\_init\_\_
```python
-def as_state(prefix: Any) -> State
+def __init__(prefix: bytes, transaction: PartitionTransaction)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L183)
-
-Create an instance implementing the `State` protocol to be provided
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L12)
-to `StreamingDataFrame` functions.
-All operations called on this State object will be prefixed with
-the supplied `prefix`.
+Simple key-value state to be provided into `StreamingDataFrame` functions
-**Returns**:
+**Arguments**:
-an instance implementing the `State` protocol
+- `transaction`: instance of `PartitionTransaction`
-
+
-#### PartitionTransaction.get
+#### TransactionState.get
```python
-def get(key: Any, prefix: bytes, default: Any = None) -> Optional[Any]
+def get(key: Any, default: Any = None) -> Optional[Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L194)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L21)
Get the value for key if key is present in the state, else default
**Arguments**:
- `key`: key
-- `prefix`: a key prefix
- `default`: default value to return if the key is not found
**Returns**:
value or None if the key is not found and `default` is not provided
-
+
-#### PartitionTransaction.set
+#### TransactionState.set
```python
-def set(key: Any, prefix: bytes, value: Any)
+def set(key: Any, value: Any)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L205)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L31)
Set value for the key.
**Arguments**:
- `key`: key
-- `prefix`: a key prefix
- `value`: value
-
+
-#### PartitionTransaction.delete
+#### TransactionState.delete
```python
-def delete(key: Any, prefix: bytes)
+def delete(key: Any)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L214)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L39)
Delete value for the key.
@@ -5817,1749 +5905,1732 @@ This function always returns `None`, even if value is not found.
**Arguments**:
- `key`: key
-- `prefix`: a key prefix
-
+
-#### PartitionTransaction.exists
+#### TransactionState.exists
```python
-def exists(key: Any, prefix: bytes) -> bool
+def exists(key: Any) -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L224)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/state.py#L48)
Check if the key exists in state.
**Arguments**:
- `key`: key
-- `prefix`: a key prefix
**Returns**:
True if key exists, False otherwise
-
-
-#### PartitionTransaction.failed
-
-```python
-@property
-def failed() -> bool
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L234)
-
-Return `True` if transaction failed to update data at some point.
-
-Failed transactions cannot be re-used.
-
-**Returns**:
+
-bool
+## quixstreams.state.types
-
+
-#### PartitionTransaction.completed
+### Store
```python
-@property
-def completed() -> bool
+class Store(Protocol)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L244)
-
-Return `True` if transaction is successfully completed.
-
-Completed transactions cannot be re-used.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L11)
-**Returns**:
+Abstract state store.
-bool
+It keeps track of individual store partitions and provides access to the
+partitions' transactions.
-
+
-#### PartitionTransaction.prepared
+#### Store.topic
```python
@property
-def prepared() -> bool
+def topic() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L254)
-
-Return `True` if transaction is prepared completed.
-
-Prepared transactions cannot receive new updates, but can be flushed.
-
-**Returns**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L22)
-bool
+Topic name
-
+
-#### PartitionTransaction.prepare
+#### Store.name
```python
-def prepare(processed_offset: int)
+@property
+def name() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L263)
-
-Produce changelog messages to the changelog topic for all changes accumulated
-
-in this transaction and prepare transcation to flush its state to the state
-store.
-
-After successful `prepare()`, the transaction status is changed to PREPARED,
-and it cannot receive updates anymore.
-
-If changelog is disabled for this application, no updates will be produced
-to the changelog topic.
-
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L29)
-- `processed_offset`: the offset of the latest processed message
+Store name
-
+
-#### PartitionTransaction.changelog\_topic\_partition
+#### Store.partitions
```python
@property
-def changelog_topic_partition() -> Optional[Tuple[str, int]]
+def partitions() -> Dict[int, "StorePartition"]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L279)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L36)
-Return the changelog topic-partition for the StorePartition of this transaction.
-
-Returns `None` if changelog_producer is not provided.
+Mapping of assigned store partitions
**Returns**:
-(topic, partition) or None
-
-
-
-#### PartitionTransaction.flush
-
-```python
-def flush(processed_offset: Optional[int] = None,
- changelog_offset: Optional[int] = None)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L288)
-
-Flush the recent updates to the storage.
-
-**Arguments**:
-
-- `processed_offset`: offset of the last processed message, optional.
-- `changelog_offset`: offset of the last produced changelog message,
-optional.
-
-
-
-### WindowedState
-
-```python
-class WindowedState(Protocol)
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L306)
-
-A windowed state to be provided into `StreamingDataFrame` window functions.
+dict of "{partition: }"
-
+
-#### WindowedState.get\_window
+#### Store.assign\_partition
```python
-def get_window(start_ms: int,
- end_ms: int,
- default: Any = None) -> Optional[Any]
+def assign_partition(partition: int) -> "StorePartition"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L311)
-
-Get the value of the window defined by `start` and `end` timestamps
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L43)
-if the window is present in the state, else default
+Assign new store partition
**Arguments**:
-- `start_ms`: start of the window in milliseconds
-- `end_ms`: end of the window in milliseconds
-- `default`: default value to return if the key is not found
+- `partition`: partition number
**Returns**:
-value or None if the key is not found and `default` is not provided
+instance of `StorePartition`
-
+
-#### WindowedState.update\_window
+#### Store.revoke\_partition
```python
-def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int)
+def revoke_partition(partition: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L325)
-
-Set a value for the window.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L52)
-This method will also update the latest observed timestamp in state partition
-using the provided `timestamp`.
+Revoke assigned store partition
**Arguments**:
-- `start_ms`: start of the window in milliseconds
-- `end_ms`: end of the window in milliseconds
-- `value`: value of the window
-- `timestamp_ms`: current message timestamp in milliseconds
+- `partition`: partition number
-
+
-#### WindowedState.get\_latest\_timestamp
+#### Store.start\_partition\_transaction
```python
-def get_latest_timestamp() -> int
+def start_partition_transaction(partition: int) -> "PartitionTransaction"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L339)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L60)
-Get the latest observed timestamp for the current state partition.
+Start a new partition transaction.
-Use this timestamp to determine if the arriving event is late and should be
-discarded from the processing.
+`PartitionTransaction` is the primary interface for working with data in Stores.
+
+**Arguments**:
+
+- `partition`: partition number
**Returns**:
-latest observed event timestamp in milliseconds
+instance of `PartitionTransaction`
-
+
-#### WindowedState.expire\_windows
+#### Store.close
```python
-def expire_windows(duration_ms: int,
- grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]]
+def close()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L350)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L69)
-Get a list of expired windows from RocksDB considering the current
+Close store and revoke all store partitions
-latest timestamp, window duration and grace period.
+
-It also marks the latest found window as expired in the expiration index, so
-calling this method multiple times will yield different results for the same
-"latest timestamp".
+### StorePartition
-**Arguments**:
+```python
+class StorePartition(Protocol)
+```
-- `duration_ms`: duration of the windows in milliseconds
-- `grace_ms`: grace period in milliseconds. Default - "0"
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L80)
-
+A base class to access state in the underlying storage.
+It represents a single instance of some storage (e.g. a single database for
+the persistent storage).
-### WindowedPartitionTransaction
+
+
+#### StorePartition.path
```python
-class WindowedPartitionTransaction(Protocol)
+@property
+def path() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L367)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L89)
-
+Absolute path to RocksDB database folder
-#### WindowedPartitionTransaction.failed
+
+
+#### StorePartition.begin
```python
-@property
-def failed() -> bool
+def begin() -> "PartitionTransaction"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L370)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L95)
-Return `True` if transaction failed to update data at some point.
+State new `PartitionTransaction`
-Failed transactions cannot be re-used.
+
-**Returns**:
+#### StorePartition.recover\_from\_changelog\_message
-bool
+```python
+def recover_from_changelog_message(
+ changelog_message: ConfluentKafkaMessageProto, committed_offset: int)
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L100)
-#### WindowedPartitionTransaction.completed
+Updates state from a given changelog message.
+
+**Arguments**:
+
+- `changelog_message`: A raw Confluent message read from a changelog topic.
+- `committed_offset`: latest committed offset for the partition
+
+
+
+#### StorePartition.get\_processed\_offset
```python
-@property
-def completed() -> bool
+def get_processed_offset() -> Optional[int]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L380)
-
-Return `True` if transaction is successfully completed.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L111)
-Completed transactions cannot be re-used.
+Get last processed offset for the given partition
**Returns**:
-bool
+offset or `None` if there's no processed offset yet
-
+
-#### WindowedPartitionTransaction.prepared
+#### StorePartition.get\_changelog\_offset
```python
-@property
-def prepared() -> bool
+def get_changelog_offset() -> Optional[int]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L390)
-
-Return `True` if transaction is prepared completed.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L118)
-Prepared transactions cannot receive new updates, but can be flushed.
+Get offset that the changelog is up-to-date with.
**Returns**:
-bool
+offset or `None` if there's no processed offset yet
-
+
-#### WindowedPartitionTransaction.prepare
+#### StorePartition.set\_changelog\_offset
```python
-def prepare(processed_offset: int)
+def set_changelog_offset(changelog_offset: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L399)
-
-Produce changelog messages to the changelog topic for all changes accumulated
-
-in this transaction and prepare transcation to flush its state to the state
-store.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L125)
-After successful `prepare()`, the transaction status is changed to PREPARED,
-and it cannot receive updates anymore.
+Set the changelog offset based on a message (usually an "offset-only" message).
-If changelog is disabled for this application, no updates will be produced
-to the changelog topic.
+Used during recovery.
**Arguments**:
-- `processed_offset`: the offset of the latest processed message
+- `changelog_offset`: A changelog offset
-
+
-#### WindowedPartitionTransaction.get\_window
+### State
```python
-def get_window(start_ms: int,
- end_ms: int,
- prefix: bytes,
- default: Any = None) -> Optional[Any]
+class State(Protocol)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L416)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L136)
-Get the value of the window defined by `start` and `end` timestamps
+Primary interface for working with key-value state data from `StreamingDataFrame`
-if the window is present in the state, else default
+
+
+#### State.get
+
+```python
+def get(key: Any, default: Any = None) -> Optional[Any]
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L141)
+
+Get the value for key if key is present in the state, else default
**Arguments**:
-- `start_ms`: start of the window in milliseconds
-- `end_ms`: end of the window in milliseconds
-- `prefix`: a key prefix
+- `key`: key
- `default`: default value to return if the key is not found
**Returns**:
value or None if the key is not found and `default` is not provided
-
+
-#### WindowedPartitionTransaction.update\_window
+#### State.set
```python
-def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int,
- prefix: bytes)
+def set(key: Any, value: Any)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L435)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L151)
-Set a value for the window.
-
-This method will also update the latest observed timestamp in state partition
-using the provided `timestamp`.
+Set value for the key.
**Arguments**:
-- `start_ms`: start of the window in milliseconds
-- `end_ms`: end of the window in milliseconds
-- `value`: value of the window
-- `timestamp_ms`: current message timestamp in milliseconds
-- `prefix`: a key prefix
+- `key`: key
+- `value`: value
-
+
-#### WindowedPartitionTransaction.get\_latest\_timestamp
+#### State.delete
```python
-def get_latest_timestamp() -> int
+def delete(key: Any)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L452)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L159)
-Get the latest observed timestamp for the current state partition.
+Delete value for the key.
-Use this timestamp to determine if the arriving event is late and should be
-discarded from the processing.
+This function always returns `None`, even if value is not found.
-**Returns**:
+**Arguments**:
-latest observed event timestamp in milliseconds
+- `key`: key
-
+
-#### WindowedPartitionTransaction.expire\_windows
+#### State.exists
```python
-def expire_windows(duration_ms: int, prefix: bytes, grace_ms: int = 0)
+def exists(key: Any) -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L463)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L168)
-Get a list of expired windows from RocksDB considering the current
+Check if the key exists in state.
-latest timestamp, window duration and grace period.
+**Arguments**:
-It also marks the latest found window as expired in the expiration index, so
-calling this method multiple times will yield different results for the same
-"latest timestamp".
+- `key`: key
-**Arguments**:
+**Returns**:
-- `duration_ms`: duration of the windows in milliseconds
-- `prefix`: a key prefix
-- `grace_ms`: grace period in milliseconds. Default - "0"
+True if key exists, False otherwise
-
+
-#### WindowedPartitionTransaction.flush
+### PartitionTransaction
```python
-def flush(processed_offset: Optional[int] = None,
- changelog_offset: Optional[int] = None)
+class PartitionTransaction(Protocol)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L478)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L177)
-Flush the recent updates to the storage.
+A transaction class to perform simple key-value operations like
+"get", "set", "delete" and "exists" on a single storage partition.
-**Arguments**:
+
-- `processed_offset`: offset of the last processed message, optional.
-- `changelog_offset`: offset of the last produced changelog message,
-optional.
+#### PartitionTransaction.as\_state
-
+```python
+def as_state(prefix: Any) -> State
+```
-#### WindowedPartitionTransaction.changelog\_topic\_partition
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L183)
+
+Create an instance implementing the `State` protocol to be provided
+
+to `StreamingDataFrame` functions.
+All operations called on this State object will be prefixed with
+the supplied `prefix`.
+
+**Returns**:
+
+an instance implementing the `State` protocol
+
+
+
+#### PartitionTransaction.get
```python
-@property
-def changelog_topic_partition() -> Optional[Tuple[str, int]]
+def get(key: Any, prefix: bytes, default: Any = None) -> Optional[Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L492)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L194)
-Return the changelog topic-partition for the StorePartition of this transaction.
+Get the value for key if key is present in the state, else default
+
+**Arguments**:
-Returns `None` if changelog_producer is not provided.
+- `key`: key
+- `prefix`: a key prefix
+- `default`: default value to return if the key is not found
**Returns**:
-(topic, partition) or None
+value or None if the key is not found and `default` is not provided
-
+
-### PartitionRecoveryTransaction
+#### PartitionTransaction.set
```python
-class PartitionRecoveryTransaction(Protocol)
+def set(key: Any, prefix: bytes, value: Any)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L506)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L205)
-A class for managing recovery for a StorePartition from a changelog message
+Set value for the key.
-
+**Arguments**:
-#### PartitionRecoveryTransaction.flush
+- `key`: key
+- `prefix`: a key prefix
+- `value`: value
+
+
+
+#### PartitionTransaction.delete
```python
-def flush()
+def delete(key: Any, prefix: bytes)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L513)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L214)
-Flush the recovery update to the storage.
+Delete value for the key.
-
+This function always returns `None`, even if value is not found.
-### PartitionTransactionStatus
+**Arguments**:
-```python
-class PartitionTransactionStatus(enum.Enum)
-```
+- `key`: key
+- `prefix`: a key prefix
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L520)
+
-
+#### PartitionTransaction.exists
-#### STARTED
+```python
+def exists(key: Any, prefix: bytes) -> bool
+```
-Transaction is started and accepts updates
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L224)
-
+Check if the key exists in state.
-#### PREPARED
+**Arguments**:
-Transaction is prepared, it can no longer receive updates
+- `key`: key
+- `prefix`: a key prefix
-
+**Returns**:
-#### COMPLETE
+True if key exists, False otherwise
-Transaction is fully completed, it cannot be used anymore
+
-
+#### PartitionTransaction.failed
-#### FAILED
+```python
+@property
+def failed() -> bool
+```
-Transaction is failed, it cannot be used anymore
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L234)
-
+Return `True` if transaction failed to update data at some point.
-## quixstreams.state.exceptions
+Failed transactions cannot be re-used.
-
+**Returns**:
-## quixstreams.state.manager
+bool
-
+
-### StateStoreManager
+#### PartitionTransaction.completed
```python
-class StateStoreManager()
+@property
+def completed() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L24)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L244)
-Class for managing state stores and partitions.
+Return `True` if transaction is successfully completed.
-StateStoreManager is responsible for:
- - reacting to rebalance callbacks
- - managing the individual state stores
- - providing access to store transactions
+Completed transactions cannot be re-used.
-
+**Returns**:
-#### StateStoreManager.stores
+bool
+
+
+
+#### PartitionTransaction.prepared
```python
@property
-def stores() -> Dict[str, Dict[str, Store]]
+def prepared() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L62)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L254)
-Map of registered state stores
+Return `True` if transaction is prepared completed.
+
+Prepared transactions cannot receive new updates, but can be flushed.
**Returns**:
-dict in format {topic: {store_name: store}}
+bool
-
+
-#### StateStoreManager.recovery\_required
+#### PartitionTransaction.prepare
```python
-@property
-def recovery_required() -> bool
+def prepare(processed_offset: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L70)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L263)
-Whether recovery needs to be done.
+Produce changelog messages to the changelog topic for all changes accumulated
-
+in this transaction and prepare transcation to flush its state to the state
+store.
-#### StateStoreManager.using\_changelogs
+After successful `prepare()`, the transaction status is changed to PREPARED,
+and it cannot receive updates anymore.
+
+If changelog is disabled for this application, no updates will be produced
+to the changelog topic.
+
+**Arguments**:
+
+- `processed_offset`: the offset of the latest processed message
+
+
+
+#### PartitionTransaction.changelog\_topic\_partition
```python
@property
-def using_changelogs() -> bool
+def changelog_topic_partition() -> Optional[Tuple[str, int]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L79)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L279)
-Whether the StateStoreManager is using changelog topics
+Return the changelog topic-partition for the StorePartition of this transaction.
+
+Returns `None` if changelog_producer is not provided.
**Returns**:
-using changelogs, as bool
+(topic, partition) or None
-
+
-#### StateStoreManager.do\_recovery
+#### PartitionTransaction.flush
```python
-def do_recovery()
+def flush(processed_offset: Optional[int] = None,
+ changelog_offset: Optional[int] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L87)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L288)
-Perform a state recovery, if necessary.
+Flush the recent updates to the storage.
-
+**Arguments**:
-#### StateStoreManager.stop\_recovery
+- `processed_offset`: offset of the last processed message, optional.
+- `changelog_offset`: offset of the last produced changelog message,
+optional.
+
+
+
+### WindowedState
```python
-def stop_recovery()
+class WindowedState(Protocol)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L93)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L306)
-Stop recovery (called during app shutdown).
+A windowed state to be provided into `StreamingDataFrame` window functions.
-
+
-#### StateStoreManager.get\_store
+#### WindowedState.get\_window
```python
-def get_store(topic: str, store_name: str = DEFAULT_STATE_STORE_NAME) -> Store
+def get_window(start_ms: int,
+ end_ms: int,
+ default: Any = None) -> Optional[Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L99)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L311)
-Get a store for given name and topic
+Get the value of the window defined by `start` and `end` timestamps
+
+if the window is present in the state, else default
**Arguments**:
-- `topic`: topic name
-- `store_name`: store name
+- `start_ms`: start of the window in milliseconds
+- `end_ms`: end of the window in milliseconds
+- `default`: default value to return if the key is not found
**Returns**:
-instance of `Store`
+value or None if the key is not found and `default` is not provided
-
+
-#### StateStoreManager.register\_store
+#### WindowedState.update\_window
```python
-def register_store(topic_name: str,
- store_name: str = DEFAULT_STATE_STORE_NAME)
+def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L132)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L325)
-Register a state store to be managed by StateStoreManager.
-
-During processing, the StateStoreManager will react to rebalancing callbacks
-and assign/revoke the partitions for registered stores.
+Set a value for the window.
-Each store can be registered only once for each topic.
+This method will also update the latest observed timestamp in state partition
+using the provided `timestamp`.
**Arguments**:
-- `topic_name`: topic name
-- `store_name`: store name
+- `start_ms`: start of the window in milliseconds
+- `end_ms`: end of the window in milliseconds
+- `value`: value of the window
+- `timestamp_ms`: current message timestamp in milliseconds
-
+
-#### StateStoreManager.register\_windowed\_store
+#### WindowedState.get\_latest\_timestamp
```python
-def register_windowed_store(topic_name: str, store_name: str)
+def get_latest_timestamp() -> int
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L157)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L339)
-Register a windowed state store to be managed by StateStoreManager.
-
-During processing, the StateStoreManager will react to rebalancing callbacks
-and assign/revoke the partitions for registered stores.
+Get the latest observed timestamp for the current state partition.
-Each window store can be registered only once for each topic.
+Use this timestamp to determine if the arriving event is late and should be
+discarded from the processing.
-**Arguments**:
+**Returns**:
-- `topic_name`: topic name
-- `store_name`: store name
+latest observed event timestamp in milliseconds
-
+
-#### StateStoreManager.clear\_stores
+#### WindowedState.expire\_windows
```python
-def clear_stores()
+def expire_windows(duration_ms: int,
+ grace_ms: int = 0) -> List[Tuple[Tuple[int, int], Any]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L180)
-
-Delete all state stores managed by StateStoreManager.
-
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L350)
-#### StateStoreManager.on\_partition\_assign
+Get a list of expired windows from RocksDB considering the current
-```python
-def on_partition_assign(topic: str, partition: int,
- committed_offset: int) -> List[StorePartition]
-```
+latest timestamp, window duration and grace period.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L195)
+It also marks the latest found window as expired in the expiration index, so
+calling this method multiple times will yield different results for the same
+"latest timestamp".
-Assign store partitions for each registered store for the given `TopicPartition`
+**Arguments**:
-and return a list of assigned `StorePartition` objects.
+- `duration_ms`: duration of the windows in milliseconds
+- `grace_ms`: grace period in milliseconds. Default - "0"
-**Arguments**:
+
-- `topic`: Kafka topic name
-- `partition`: Kafka topic partition
-- `committed_offset`: latest committed offset for the partition
+### WindowedPartitionTransaction
-**Returns**:
+```python
+class WindowedPartitionTransaction(Protocol)
+```
-list of assigned `StorePartition`
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L367)
-
+
-#### StateStoreManager.on\_partition\_revoke
+#### WindowedPartitionTransaction.failed
```python
-def on_partition_revoke(topic: str, partition: int)
+@property
+def failed() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L221)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L370)
-Revoke store partitions for each registered store for the given `TopicPartition`
+Return `True` if transaction failed to update data at some point.
-**Arguments**:
+Failed transactions cannot be re-used.
-- `topic`: Kafka topic name
-- `partition`: Kafka topic partition
+**Returns**:
-
+bool
-#### StateStoreManager.init
+
+
+#### WindowedPartitionTransaction.completed
```python
-def init()
+@property
+def completed() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L234)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L380)
-Initialize `StateStoreManager` and create a store directory
+Return `True` if transaction is successfully completed.
+Completed transactions cannot be re-used.
-
+**Returns**:
-#### StateStoreManager.close
+bool
+
+
+
+#### WindowedPartitionTransaction.prepared
```python
-def close()
+@property
+def prepared() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/manager.py#L241)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L390)
-Close all registered stores
+Return `True` if transaction is prepared completed.
-
+Prepared transactions cannot receive new updates, but can be flushed.
-## quixstreams.state.state
+**Returns**:
-
+bool
-### TransactionState
+
+
+#### WindowedPartitionTransaction.prepare
```python
-class TransactionState(State)
+def prepare(processed_offset: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L6)
-
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L399)
-#### TransactionState.\_\_init\_\_
+Produce changelog messages to the changelog topic for all changes accumulated
-```python
-def __init__(prefix: bytes, transaction: PartitionTransaction)
-```
+in this transaction and prepare transcation to flush its state to the state
+store.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L12)
+After successful `prepare()`, the transaction status is changed to PREPARED,
+and it cannot receive updates anymore.
-Simple key-value state to be provided into `StreamingDataFrame` functions
+If changelog is disabled for this application, no updates will be produced
+to the changelog topic.
**Arguments**:
-- `transaction`: instance of `PartitionTransaction`
+- `processed_offset`: the offset of the latest processed message
-
+
-#### TransactionState.get
+#### WindowedPartitionTransaction.get\_window
```python
-def get(key: Any, default: Any = None) -> Optional[Any]
+def get_window(start_ms: int,
+ end_ms: int,
+ prefix: bytes,
+ default: Any = None) -> Optional[Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L21)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L416)
-Get the value for key if key is present in the state, else default
+Get the value of the window defined by `start` and `end` timestamps
+
+if the window is present in the state, else default
**Arguments**:
-- `key`: key
+- `start_ms`: start of the window in milliseconds
+- `end_ms`: end of the window in milliseconds
+- `prefix`: a key prefix
- `default`: default value to return if the key is not found
**Returns**:
value or None if the key is not found and `default` is not provided
-
+
-#### TransactionState.set
+#### WindowedPartitionTransaction.update\_window
```python
-def set(key: Any, value: Any)
+def update_window(start_ms: int, end_ms: int, value: Any, timestamp_ms: int,
+ prefix: bytes)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L31)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L435)
-Set value for the key.
+Set a value for the window.
+
+This method will also update the latest observed timestamp in state partition
+using the provided `timestamp`.
**Arguments**:
-- `key`: key
-- `value`: value
+- `start_ms`: start of the window in milliseconds
+- `end_ms`: end of the window in milliseconds
+- `value`: value of the window
+- `timestamp_ms`: current message timestamp in milliseconds
+- `prefix`: a key prefix
-
+
-#### TransactionState.delete
+#### WindowedPartitionTransaction.get\_latest\_timestamp
```python
-def delete(key: Any)
+def get_latest_timestamp() -> int
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L39)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L452)
-Delete value for the key.
+Get the latest observed timestamp for the current state partition.
-This function always returns `None`, even if value is not found.
+Use this timestamp to determine if the arriving event is late and should be
+discarded from the processing.
-**Arguments**:
+**Returns**:
-- `key`: key
+latest observed event timestamp in milliseconds
-
+
-#### TransactionState.exists
+#### WindowedPartitionTransaction.expire\_windows
```python
-def exists(key: Any) -> bool
+def expire_windows(duration_ms: int, prefix: bytes, grace_ms: int = 0)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/state.py#L48)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L463)
-Check if the key exists in state.
+Get a list of expired windows from RocksDB considering the current
+
+latest timestamp, window duration and grace period.
+
+It also marks the latest found window as expired in the expiration index, so
+calling this method multiple times will yield different results for the same
+"latest timestamp".
**Arguments**:
-- `key`: key
+- `duration_ms`: duration of the windows in milliseconds
+- `prefix`: a key prefix
+- `grace_ms`: grace period in milliseconds. Default - "0"
-**Returns**:
+
-True if key exists, False otherwise
+#### WindowedPartitionTransaction.flush
-
+```python
+def flush(processed_offset: Optional[int] = None,
+ changelog_offset: Optional[int] = None)
+```
-## quixstreams.exceptions
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L478)
-
+Flush the recent updates to the storage.
-## quixstreams.exceptions.assignment
+**Arguments**:
-
+- `processed_offset`: offset of the last processed message, optional.
+- `changelog_offset`: offset of the last produced changelog message,
+optional.
-### PartitionAssignmentError
+
+
+#### WindowedPartitionTransaction.changelog\_topic\_partition
```python
-class PartitionAssignmentError(QuixException)
+@property
+def changelog_topic_partition() -> Optional[Tuple[str, int]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/exceptions/assignment.py#L6)
-
-Error happened during partition rebalancing.
-Raised from `on_assign`, `on_revoke` and `on_lost` callbacks
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L492)
-
+Return the changelog topic-partition for the StorePartition of this transaction.
-## quixstreams.exceptions.base
+Returns `None` if changelog_producer is not provided.
-
+**Returns**:
-## quixstreams.context
+(topic, partition) or None
-
+
-#### set\_message\_context
+### PartitionRecoveryTransaction
```python
-def set_message_context(context: Optional[MessageContext])
+class PartitionRecoveryTransaction(Protocol)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/context.py#L20)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L506)
-Set a MessageContext for the current message in the given `contextvars.Context`
-
->***NOTE:*** This is for advanced usage only. If you need to change the message key,
-`StreamingDataFrame.to_topic()` has an argument for it.
+A class for managing recovery for a StorePartition from a changelog message
+
-Example Snippet:
+#### PartitionRecoveryTransaction.flush
```python
-from quixstreams import Application, set_message_context, message_context
-
-# Changes the current sdf value based on what the message partition is.
-def alter_context(value):
- context = message_context()
- if value > 1:
- context.headers = context.headers + (b"cool_new_header", value.encode())
- set_message_context(context)
-
-app = Application()
-sdf = app.dataframe()
-sdf = sdf.update(lambda value: alter_context(value))
+def flush()
```
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L513)
-- `context`: instance of `MessageContext`
+Flush the recovery update to the storage.
-
+
-#### message\_context
+### PartitionTransactionStatus
```python
-def message_context() -> MessageContext
+class PartitionTransactionStatus(enum.Enum)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/context.py#L51)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L520)
-Get a MessageContext for the current message, which houses most of the message
+
-metadata, like:
- - key
- - timestamp
- - partition
- - offset
+#### STARTED
+Transaction is started and accepts updates
-Example Snippet:
+
-```python
-from quixstreams import Application, message_context
+#### PREPARED
-# Changes the current sdf value based on what the message partition is.
+Transaction is prepared, it can no longer receive updates
-app = Application()
-sdf = app.dataframe()
-sdf = sdf.apply(lambda value: 1 if message_context().partition == 2 else 0)
-```
+
-**Returns**:
+#### COMPLETE
+
+Transaction is fully completed, it cannot be used anymore
+
+
+
+#### FAILED
+
+Transaction is failed, it cannot be used anymore
+
+
+
+## quixstreams.state.exceptions
+
+
+
+## quixstreams.state.recovery
+
+
+
+### RecoveryPartition
+
+```python
+class RecoveryPartition()
+```
-instance of `MessageContext`
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L24)
-
+A changelog topic partition mapped to a respective `StorePartition` with helper
+methods to determine its current recovery status.
-## quixstreams.kafka.configuration
+Since `StorePartition`s do recovery directly, it also handles recovery transactions.
-
+
-### ConnectionConfig
+#### RecoveryPartition.offset
```python
-class ConnectionConfig(BaseSettings)
+@property
+def offset() -> int
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/configuration.py#L17)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L63)
-Provides an interface for all librdkafka connection-based configs.
+Get the changelog offset from the underlying `StorePartition`.
-Allows converting to or from a librdkafka dictionary.
+**Returns**:
-Also obscures secrets and handles any case sensitivity issues.
+changelog offset (int)
-
+
-#### ConnectionConfig.settings\_customise\_sources
+#### RecoveryPartition.needs\_recovery
```python
-@classmethod
-def settings_customise_sources(
- cls, settings_cls: Type[BaseSettings],
- init_settings: PydanticBaseSettingsSource,
- env_settings: PydanticBaseSettingsSource,
- dotenv_settings: PydanticBaseSettingsSource,
- file_secret_settings: PydanticBaseSettingsSource
-) -> Tuple[PydanticBaseSettingsSource, ...]
+@property
+def needs_recovery()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/configuration.py#L96)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L72)
-Included to ignore reading/setting values from the environment
+Determine whether recovery is necessary for underlying `StorePartition`.
-
+
-#### ConnectionConfig.from\_librdkafka\_dict
+#### RecoveryPartition.needs\_offset\_update
```python
-@classmethod
-def from_librdkafka_dict(cls,
- config: dict,
- ignore_extras: bool = False) -> Self
+@property
+def needs_offset_update()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/configuration.py#L110)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L81)
-Create a `ConnectionConfig` from a librdkafka config dictionary.
+Determine if an offset update is required.
-**Arguments**:
+Usually checked during assign if recovery was not required.
-- `config`: a dict of configs (like {"bootstrap.servers": "url"})
-- `ignore_extras`: Ignore non-connection settings (else raise exception)
+
-**Returns**:
+#### RecoveryPartition.update\_offset
-a ConnectionConfig
+```python
+def update_offset()
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L89)
-#### ConnectionConfig.as\_librdkafka\_dict
+Update only the changelog offset of a StorePartition.
+
+
+
+#### RecoveryPartition.recover\_from\_changelog\_message
```python
-def as_librdkafka_dict(plaintext_secrets=True) -> dict
+def recover_from_changelog_message(
+ changelog_message: ConfluentKafkaMessageProto)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/configuration.py#L125)
-
-Dump any non-empty config values as a librdkafka dictionary.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L109)
->***NOTE***: All secret values will be dumped in PLAINTEXT by default.
+Recover the StorePartition using a message read from its respective changelog.
**Arguments**:
-- `plaintext_secrets`: whether secret values are plaintext or obscured (***)
+- `changelog_message`: A confluent kafka message (everything as bytes)
-**Returns**:
+
-a librdkafka-compatible dictionary
+#### RecoveryPartition.set\_watermarks
-
+```python
+def set_watermarks(lowwater: int, highwater: int)
+```
-## quixstreams.kafka
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L121)
-
+Set the changelog watermarks as gathered from Consumer.get_watermark_offsets()
-## quixstreams.kafka.producer
+**Arguments**:
-
+- `lowwater`: topic partition lowwater
+- `highwater`: topic partition highwater
-### Producer
+
+
+### ChangelogProducerFactory
```python
-class Producer()
+class ChangelogProducerFactory()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L44)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L132)
-
+Generates ChangelogProducers, which produce changelog messages to a StorePartition.
-#### Producer.\_\_init\_\_
+
+
+#### ChangelogProducerFactory.\_\_init\_\_
```python
-def __init__(broker_address: Union[str, ConnectionConfig],
- logger: logging.Logger = logger,
- error_callback: Callable[[KafkaError], None] = _default_error_cb,
- extra_config: Optional[dict] = None,
- flush_timeout: Optional[int] = None)
+def __init__(changelog_name: str, producer: RowProducer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L45)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L137)
-A wrapper around `confluent_kafka.Producer`.
+**Arguments**:
-It initializes `confluent_kafka.Producer` on demand
-avoiding network calls during `__init__`, provides typing info for methods
-and some reasonable defaults.
+- `changelog_name`: changelog topic name
+- `producer`: a RowProducer (not shared with `Application` instance)
-**Arguments**:
+**Returns**:
-- `broker_address`: Connection settings for Kafka.
-Accepts string with Kafka broker host and port formatted as `:`,
-or a ConnectionConfig object if authentication is required.
-- `logger`: a Logger instance to attach librdkafka logging to
-- `error_callback`: callback used for producer errors
-- `extra_config`: A dictionary with additional options that
-will be passed to `confluent_kafka.Producer` as is.
-Note: values passed as arguments override values in `extra_config`.
-- `flush_timeout`: The time the producer is waiting for all messages to be delivered.
+a ChangelogWriter instance
-
+
-#### Producer.produce
+#### ChangelogProducerFactory.get\_partition\_producer
```python
-def produce(topic: str,
- value: Optional[Union[str, bytes]] = None,
- key: Optional[Union[str, bytes]] = None,
- headers: Optional[Headers] = None,
- partition: Optional[int] = None,
- timestamp: Optional[int] = None,
- poll_timeout: float = 5.0,
- buffer_error_max_tries: int = 3,
- on_delivery: Optional[DeliveryCallback] = None)
+def get_partition_producer(partition_num) -> "ChangelogProducer"
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L83)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L147)
-Produce a message to a topic.
+Generate a ChangelogProducer for producing to a specific partition number
-It also polls Kafka for callbacks before producing to minimize
-the probability of `BufferError`.
-If `BufferError` still happens, the method will poll Kafka with timeout
-to free up the buffer and try again.
+(and thus StorePartition).
**Arguments**:
-- `topic`: topic name
-- `value`: message value
-- `key`: message key
-- `headers`: message headers
-- `partition`: topic partition
-- `timestamp`: message timestamp
-- `poll_timeout`: timeout for `poll()` call in case of `BufferError`
-- `buffer_error_max_tries`: max retries for `BufferError`.
-Pass `0` to not retry after `BufferError`.
-- `on_delivery`: the delivery callback to be triggered on `poll()`
-for the produced message.
+- `partition_num`: source topic partition number
-
+
-#### Producer.poll
+### ChangelogProducer
```python
-def poll(timeout: float = 0)
+class ChangelogProducer()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L144)
-
-Polls the producer for events and calls `on_delivery` callbacks.
-
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L161)
-- `timeout`: poll timeout seconds; Default: 0 (unlike others)
-> NOTE: -1 will hang indefinitely if there are no messages to acknowledge
+Generated for a `StorePartition` to produce state changes to its respective
+kafka changelog partition.
-
+
-#### Producer.flush
+#### ChangelogProducer.\_\_init\_\_
```python
-def flush(timeout: Optional[float] = None) -> int
+def __init__(changelog_name: str, partition: int, producer: RowProducer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/producer.py#L152)
-
-Wait for all messages in the Producer queue to be delivered.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L167)
**Arguments**:
-- `timeout` (`float`): time to attempt flushing (seconds).
-None use producer default or -1 is infinite. Default: None
-
-**Returns**:
+- `changelog_name`: A changelog topic name
+- `partition`: source topic partition number
+- `producer`: a RowProducer (not shared with `Application` instance)
-number of messages remaining to flush
+
-
+#### ChangelogProducer.produce
-## quixstreams.kafka.consumer
+```python
+def produce(key: bytes,
+ value: Optional[bytes] = None,
+ headers: Optional[MessageHeadersMapping] = None)
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L190)
-### Consumer
+Produce a message to a changelog topic partition.
-```python
-class Consumer()
-```
+**Arguments**:
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L64)
+- `key`: message key (same as state key, including prefixes)
+- `value`: message value (same as state value)
+- `headers`: message headers (includes column family info)
-
+
-#### Consumer.\_\_init\_\_
+### RecoveryManager
```python
-def __init__(broker_address: Union[str, ConnectionConfig],
- consumer_group: Optional[str],
- auto_offset_reset: AutoOffsetReset,
- auto_commit_enable: bool = True,
- logger: logging.Logger = logger,
- error_callback: Callable[[KafkaError], None] = _default_error_cb,
- on_commit: Optional[Callable[
- [Optional[KafkaError], List[TopicPartition]], None]] = None,
- extra_config: Optional[dict] = None)
+class RecoveryManager()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L65)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L215)
-A wrapper around `confluent_kafka.Consumer`.
+Manages all consumer-related aspects of recovery, including:
+ - assigning/revoking, pausing/resuming topic partitions (especially changelogs)
+ - consuming changelog messages until state is updated fully.
-It initializes `confluent_kafka.Consumer` on demand
-avoiding network calls during `__init__`, provides typing info for methods
-and some reasonable defaults.
+Also tracks/manages `RecoveryPartitions`, which are assigned/tracked only if
+recovery for that changelog partition is required.
-**Arguments**:
+Recovery is attempted from the `Application` after any new partition assignment.
-- `broker_address`: Connection settings for Kafka.
-Accepts string with Kafka broker host and port formatted as `:`,
-or a ConnectionConfig object if authentication is required.
-- `consumer_group`: Kafka consumer group.
-Passed as `group.id` to `confluent_kafka.Consumer`
-- `auto_offset_reset`: Consumer `auto.offset.reset` setting.
-Available values:
-- "earliest" - automatically reset the offset to the smallest offset
-- "latest" - automatically reset the offset to the largest offset
-- "error" - trigger an error (ERR__AUTO_OFFSET_RESET) which is retrieved
- by consuming messages (used for testing)
-- `auto_commit_enable`: If true, periodically commit offset of
-the last message handed to the application. Default - `True`.
-- `logger`: a Logger instance to attach librdkafka logging to
-- `error_callback`: callback used for consumer errors
-- `on_commit`: Offset commit result propagation callback.
-Passed as "offset_commit_cb" to `confluent_kafka.Consumer`.
-- `extra_config`: A dictionary with additional options that
-will be passed to `confluent_kafka.Consumer` as is.
-Note: values passed as arguments override values in `extra_config`.
+
-
+#### RecoveryManager.partitions
-#### Consumer.poll
+```python
+@property
+def partitions() -> Dict[int, Dict[str, RecoveryPartition]]
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L234)
+
+Returns a mapping of assigned RecoveryPartitions in the following format:
+{: {: }}
+
+
+
+#### RecoveryManager.has\_assignments
```python
-def poll(timeout: Optional[float] = None) -> Optional[Message]
+@property
+def has_assignments() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L128)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L242)
-Consumes a single message, calls callbacks and returns events.
+Whether the Application has assigned RecoveryPartitions
-The application must check the returned :py:class:`Message`
-object's :py:func:`Message.error()` method to distinguish between proper
-messages (error() returns None), or an event or error.
+**Returns**:
-Note: Callbacks may be called from this method, such as
-``on_assign``, ``on_revoke``, et al.
+has assignments, as bool
-**Arguments**:
+
-- `timeout` (`float`): Maximum time in seconds to block waiting for message,
-event or callback. None or -1 is infinite. Default: None.
+#### RecoveryManager.recovering
-**Raises**:
+```python
+@property
+def recovering() -> bool
+```
-- `None`: RuntimeError if called on a closed consumer
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L251)
+
+Whether the Application is currently recovering
**Returns**:
-A Message object or None on timeout
+is recovering, as bool
-
+
-#### Consumer.subscribe
+#### RecoveryManager.register\_changelog
```python
-def subscribe(topics: List[str],
- on_assign: Optional[RebalancingCallback] = None,
- on_revoke: Optional[RebalancingCallback] = None,
- on_lost: Optional[RebalancingCallback] = None)
+def register_changelog(topic_name: str, store_name: str) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L146)
-
-Set subscription to supplied list of topics
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L259)
-This replaces a previous subscription.
+Register a changelog Topic with the TopicManager.
**Arguments**:
-- `topics` (`list(str)`): List of topics (strings) to subscribe to.
-- `on_assign` (`callable`): callback to provide handling of customized offsets
-on completion of a successful partition re-assignment.
-- `on_revoke` (`callable`): callback to provide handling of offset commits to
-a customized store on the start of a rebalance operation.
-- `on_lost` (`callable`): callback to provide handling in the case the partition
-assignment has been lost. Partitions that have been lost may already be
-owned by other members in the group and therefore committing offsets,
-for example, may fail.
+- `topic_name`: source topic name
+- `store_name`: name of the store
-**Raises**:
+
-- `KafkaException`:
-- `None`: RuntimeError if called on a closed consumer
-.. py:function:: on_assign(consumer, partitions)
-.. py:function:: on_revoke(consumer, partitions)
-.. py:function:: on_lost(consumer, partitions)
+#### RecoveryManager.do\_recovery
- :param Consumer consumer: Consumer instance.
- :param list(TopicPartition) partitions: Absolute list of partitions being
- assigned or revoked.
+```python
+def do_recovery()
+```
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L271)
-#### Consumer.unsubscribe
+If there are any active RecoveryPartitions, do a recovery procedure.
+
+After, will resume normal `Application` processing.
+
+
+
+#### RecoveryManager.assign\_partition
```python
-def unsubscribe()
+def assign_partition(topic: str, partition: int, committed_offset: int,
+ store_partitions: Dict[str, StorePartition])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L240)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L324)
-Remove current subscription.
-
-**Raises**:
+Assigns `StorePartition`s (as `RecoveryPartition`s) ONLY IF recovery required.
-- `None`: KafkaException
-- `None`: RuntimeError if called on a closed consumer
+Pauses active consumer partitions as needed.
-
+
-#### Consumer.store\_offsets
+#### RecoveryManager.revoke\_partition
```python
-def store_offsets(message: Optional[Message] = None,
- offsets: Optional[List[TopicPartition]] = None)
+def revoke_partition(partition_num: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L248)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/recovery.py#L391)
-.. py:function:: store_offsets([message=None], [offsets=None])
+revoke ALL StorePartitions (across all Stores) for a given partition number
-Store offsets for a message or a list of offsets.
+**Arguments**:
-``message`` and ``offsets`` are mutually exclusive. The stored offsets
-will be committed according to 'auto.commit.interval.ms' or manual
-offset-less `commit`.
-Note that 'enable.auto.offset.store' must be set to False when using this API.
+- `partition_num`: partition number of source topic
-**Arguments**:
+
-- `message` (`confluent_kafka.Message`): Store message's offset+1.
-- `offsets` (`list(TopicPartition)`): List of topic+partitions+offsets to store.
+## quixstreams.utils
-**Raises**:
+
-- `None`: KafkaException
-- `None`: RuntimeError if called on a closed consumer
+## quixstreams.utils.json
-
+
-#### Consumer.commit
+#### dumps
```python
-def commit(message: Optional[Message] = None,
- offsets: Optional[List[TopicPartition]] = None,
- asynchronous: bool = True) -> Optional[List[TopicPartition]]
+def dumps(value: Any) -> bytes
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L282)
-
-Commit a message or a list of offsets.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/utils/json.py#L8)
-The ``message`` and ``offsets`` parameters are mutually exclusive.
-If neither is set, the current partition assignment's offsets are used instead.
-Use this method to commit offsets if you have 'enable.auto.commit' set to False.
+Serialize to JSON using `orjson` package.
**Arguments**:
-- `message` (`confluent_kafka.Message`): Commit the message's offset+1.
-Note: By convention, committed offsets reflect the next message
-to be consumed, **not** the last message consumed.
-- `offsets` (`list(TopicPartition)`): List of topic+partitions+offsets to commit.
-- `asynchronous` (`bool`): If true, asynchronously commit, returning None
-immediately. If False, the commit() call will block until the commit
-succeeds or fails and the committed offsets will be returned (on success).
-Note that specific partitions may have failed and the .err field of
-each partition should be checked for success.
+- `value`: value to serialize to JSON
-**Raises**:
+**Returns**:
-- `None`: KafkaException
-- `None`: RuntimeError if called on a closed consumer
+bytes
-
+
-#### Consumer.committed
+#### loads
```python
-def committed(partitions: List[TopicPartition],
- timeout: Optional[float] = None) -> List[TopicPartition]
+def loads(value: bytes) -> Any
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L322)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/utils/json.py#L18)
-.. py:function:: committed(partitions, [timeout=None])
+Deserialize from JSON using `orjson` package.
-Retrieve committed offsets for the specified partitions.
+Main differences:
+- It returns `bytes`
+- It doesn't allow non-str keys in dictionaries
**Arguments**:
-- `partitions` (`list(TopicPartition)`): List of topic+partitions to query for stored offsets.
-- `timeout` (`float`): Request timeout (seconds).
-None or -1 is infinite. Default: None
+- `value`: value to deserialize from
-**Raises**:
+**Returns**:
-- `None`: KafkaException
-- `None`: RuntimeError if called on a closed consumer
+object
-**Returns**:
+
-`list(TopicPartition)`: List of topic+partitions with offset and possibly error set.
+## quixstreams.utils.dicts
-
+
-#### Consumer.get\_watermark\_offsets
+#### dict\_values
```python
-def get_watermark_offsets(partition: TopicPartition,
- timeout: Optional[float] = None,
- cached: bool = False) -> Tuple[int, int]
+def dict_values(d: object) -> List
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L342)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/utils/dicts.py#L4)
-Retrieve low and high offsets for the specified partition.
+Recursively unpacks a set of nested dicts to get a flattened list of leaves,
-**Arguments**:
+where "leaves" are the first non-dict item.
-- `partition` (`TopicPartition`): Topic+partition to return offsets for.
-- `timeout` (`float`): Request timeout (seconds). None or -1 is infinite.
-Ignored if cached=True. Default: None
-- `cached` (`bool`): Instead of querying the broker, use cached information.
-Cached values: The low offset is updated periodically
-(if statistics.interval.ms is set) while the high offset is updated on each
-message fetched from the broker for this partition.
+i.e {"a": {"b": {"c": 1}, "d": 2}, "e": 3} becomes [1, 2, 3]
-**Raises**:
+**Arguments**:
-- `None`: KafkaException
-- `None`: RuntimeError if called on a closed consumer
+- `d`: initially, a dict (with potentially nested dicts)
**Returns**:
-`tuple(int,int)`: Tuple of (low,high) on success or None on timeout.
-The high offset is the offset of the last message + 1.
+a list with all the leaves of the various contained dicts
-
+
-#### Consumer.list\_topics
+## quixstreams.checkpointing.exceptions
+
+
+
+## quixstreams.checkpointing
+
+
+
+## quixstreams.checkpointing.checkpoint
+
+
+
+### Checkpoint
```python
-def list_topics(topic: Optional[str] = None,
- timeout: Optional[float] = None) -> ClusterMetadata
+class Checkpoint()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L368)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L24)
-.. py:function:: list_topics([topic=None], [timeout=-1])
+Class to keep track of state updates and consumer offsets and to checkpoint these
+updates on schedule.
-Request metadata from the cluster.
-This method provides the same information as
-listTopics(), describeTopics() and describeCluster() in the Java Admin client.
+
-**Arguments**:
+#### Checkpoint.expired
-- `topic` (`str`): If specified, only request information about this topic,
-else return results for all topics in cluster.
-Warning: If auto.create.topics.enable is set to true on the broker and
-an unknown topic is specified, it will be created.
-- `timeout` (`float`): The maximum response time before timing out
-None or -1 is infinite. Default: None
+```python
+def expired() -> bool
+```
+
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L49)
+
+Returns `True` if checkpoint deadline has expired.
+
+
+
+#### Checkpoint.empty
+
+```python
+def empty() -> bool
+```
-**Raises**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L55)
-- `None`: KafkaException
+Returns `True` if checkpoint doesn't have any offsets stored yet.
-
-#### Consumer.memberid
+
+
+#### Checkpoint.store\_offset
```python
-def memberid() -> str
+def store_offset(topic: str, partition: int, offset: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L391)
-
-Return this client's broker-assigned group member id.
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L62)
-The member id is assigned by the group coordinator and is propagated to
-the consumer during rebalance.
+Store the offset of the processed message to the checkpoint.
- :returns: Member id string or None
- :rtype: string
- :raises: RuntimeError if called on a closed consumer
+**Arguments**:
+- `topic`: topic name
+- `partition`: partition number
+- `offset`: message offset
-
+
-#### Consumer.offsets\_for\_times
+#### Checkpoint.get\_store\_transaction
```python
-def offsets_for_times(partitions: List[TopicPartition],
- timeout: Optional[float] = None) -> List[TopicPartition]
+def get_store_transaction(
+ topic: str,
+ partition: int,
+ store_name: str = DEFAULT_STATE_STORE_NAME) -> PartitionTransaction
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L404)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L82)
-Look up offsets by timestamp for the specified partitions.
-
-The returned offset for each partition is the earliest offset whose
-timestamp is greater than or equal to the given timestamp in the
-corresponding partition. If the provided timestamp exceeds that of the
-last message in the partition, a value of -1 will be returned.
+Get a PartitionTransaction for the given store, topic and partition.
- :param list(TopicPartition) partitions: topic+partitions with timestamps
- in the TopicPartition.offset field.
- :param float timeout: The maximum response time before timing out.
- None or -1 is infinite. Default: None
- :returns: List of topic+partition with offset field set and possibly error set
- :rtype: list(TopicPartition)
- :raises: KafkaException
- :raises: RuntimeError if called on a closed consumer
+It will return already started transaction if there's one.
+**Arguments**:
-
+- `topic`: topic name
+- `partition`: partition number
+- `store_name`: store name
-#### Consumer.pause
+**Returns**:
-```python
-def pause(partitions: List[TopicPartition])
-```
+instance of `PartitionTransaction`
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L430)
+
-Pause consumption for the provided list of partitions.
+#### Checkpoint.commit
-Paused partitions must be tracked manually.
+```python
+def commit()
+```
-Does NOT affect the result of Consumer.assignment().
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/checkpointing/checkpoint.py#L105)
-**Arguments**:
+Commit the checkpoint.
-- `partitions` (`list(TopicPartition)`): List of topic+partitions to pause.
+This method will:
+ 1. Produce the changelogs for each state store
+ 2. Flush the producer to ensure everything is delivered.
+ 3. Commit topic offsets.
+ 4. Flush each state store partition to the disk.
-**Raises**:
+
-- `None`: KafkaException
+## quixstreams.logging
-
+
-#### Consumer.resume
+#### configure\_logging
```python
-def resume(partitions: List[TopicPartition])
+def configure_logging(loglevel: Optional[LogLevel]) -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L444)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/logging.py#L26)
-.. py:function:: resume(partitions)
+Configure "quixstreams" logger.
-Resume consumption for the provided list of partitions.
+>***NOTE:*** If "quixstreams" logger already has pre-defined handlers
+(e.g. logging has already been configured via `logging`, or the function
+is called twice), it will skip configuration and return `False`.
**Arguments**:
-- `partitions` (`list(TopicPartition)`): List of topic+partitions to resume.
+- `loglevel`: a valid log level as a string or None.
+If None passed, this function is no-op and no logging will be configured.
-**Raises**:
+**Returns**:
-- `None`: KafkaException
+True if logging config has been updated, otherwise False.
-
+
-#### Consumer.position
+## quixstreams.rowconsumer
+
+
+
+### RowConsumer
```python
-def position(partitions: List[TopicPartition]) -> List[TopicPartition]
+class RowConsumer(Consumer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L456)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowconsumer.py#L19)
-Retrieve current positions (offsets) for the specified partitions.
+
-**Arguments**:
+#### RowConsumer.\_\_init\_\_
-- `partitions` (`list(TopicPartition)`): List of topic+partitions to return
-current offsets for. The current offset is the offset of
-the last consumed message + 1.
+```python
+def __init__(broker_address: Union[str, ConnectionConfig],
+ consumer_group: str,
+ auto_offset_reset: AutoOffsetReset,
+ auto_commit_enable: bool = True,
+ on_commit: Callable[[Optional[KafkaError], List[TopicPartition]],
+ None] = None,
+ extra_config: Optional[dict] = None,
+ on_error: Optional[ConsumerErrorCallback] = None)
+```
-**Raises**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowconsumer.py#L20)
-- `None`: KafkaException
-- `None`: RuntimeError if called on a closed consumer
+A consumer class that is capable of deserializing Kafka messages to Rows
-**Returns**:
+according to the Topics deserialization settings.
-`list(TopicPartition)`: List of topic+partitions with offset and possibly error set.
+It overrides `.subscribe()` method of Consumer class to accept `Topic`
+objects instead of strings.
-
+**Arguments**:
-#### Consumer.seek
+- `broker_address`: Connection settings for Kafka.
+Accepts string with Kafka broker host and port formatted as `:`,
+or a ConnectionConfig object if authentication is required.
+- `consumer_group`: Kafka consumer group.
+Passed as `group.id` to `confluent_kafka.Consumer`
+- `auto_offset_reset`: Consumer `auto.offset.reset` setting.
+Available values:
+- "earliest" - automatically reset the offset to the smallest offset
+- "latest" - automatically reset the offset to the largest offset
+- `auto_commit_enable`: If true, periodically commit offset of
+the last message handed to the application. Default - `True`.
+- `on_commit`: Offset commit result propagation callback.
+Passed as "offset_commit_cb" to `confluent_kafka.Consumer`.
+- `extra_config`: A dictionary with additional options that
+will be passed to `confluent_kafka.Consumer` as is.
+Note: values passed as arguments override values in `extra_config`.
+- `on_error`: a callback triggered when `RowConsumer.poll_row` fails.
+If consumer fails and the callback returns `True`, the exception
+will be logged but not propagated.
+The default callback logs an exception and returns `False`.
+
+
+
+#### RowConsumer.subscribe
```python
-def seek(partition: TopicPartition)
+def subscribe(topics: List[Topic],
+ on_assign: Optional[RebalancingCallback] = None,
+ on_revoke: Optional[RebalancingCallback] = None,
+ on_lost: Optional[RebalancingCallback] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L470)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowconsumer.py#L72)
-Set consume position for partition to offset.
+Set subscription to supplied list of topics.
-The offset may be an absolute (>=0) or a
-logical offset (:py:const:`OFFSET_BEGINNING` et.al).
+This replaces a previous subscription.
-seek() may only be used to update the consume offset of an
-actively consumed partition (i.e., after :py:const:`assign()`),
-to set the starting offset of partition not being consumed instead
-pass the offset in an `assign()` call.
+This method also updates the internal mapping with topics that is used
+to deserialize messages to Rows.
**Arguments**:
-- `partition` (`TopicPartition`): Topic+partition+offset to seek to.
-
-**Raises**:
-
-- `None`: KafkaException
+- `topics`: list of `Topic` instances to subscribe to.
+- `on_assign` (`callable`): callback to provide handling of customized offsets
+on completion of a successful partition re-assignment.
+- `on_revoke` (`callable`): callback to provide handling of offset commits to
+a customized store on the start of a rebalance operation.
+- `on_lost` (`callable`): callback to provide handling in the case the partition
+assignment has been lost. Partitions that have been lost may already be
+owned by other members in the group and therefore committing offsets,
+for example, may fail.
-
+
-#### Consumer.assignment
+#### RowConsumer.poll\_row
```python
-def assignment() -> List[TopicPartition]
+def poll_row(timeout: float = None) -> Union[Row, List[Row], None]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L487)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowconsumer.py#L106)
-Returns the current partition assignment.
+Consumes a single message and deserialize it to Row or a list of Rows.
-**Raises**:
+The message is deserialized according to the corresponding Topic.
+If deserializer raises `IgnoreValue` exception, this method will return None.
+If Kafka returns an error, it will be raised as exception.
-- `None`: KafkaException
-- `None`: RuntimeError if called on a closed consumer
+**Arguments**:
+
+- `timeout`: poll timeout seconds
**Returns**:
-`list(TopicPartition)`: List of assigned topic+partitions.
+single Row, list of Rows or None
-
+
-#### Consumer.set\_sasl\_credentials
+## quixstreams.context
+
+
+
+#### set\_message\_context
```python
-def set_sasl_credentials(username: str, password: str)
+def set_message_context(context: Optional[MessageContext])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L500)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/context.py#L20)
-Sets the SASL credentials used for this client.
-These credentials will overwrite the old ones, and will be used the next
-time the client needs to authenticate.
-This method will not disconnect existing broker connections that have been
-established with the old credentials.
-This method is applicable only to SASL PLAIN and SCRAM mechanisms.
+Set a MessageContext for the current message in the given `contextvars.Context`
-
+>***NOTE:*** This is for advanced usage only. If you need to change the message key,
+`StreamingDataFrame.to_topic()` has an argument for it.
-#### Consumer.incremental\_assign
+
+Example Snippet:
```python
-def incremental_assign(partitions: List[TopicPartition])
-```
+from quixstreams import Application, set_message_context, message_context
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L512)
+# Changes the current sdf value based on what the message partition is.
+def alter_context(value):
+ context = message_context()
+ if value > 1:
+ context.headers = context.headers + (b"cool_new_header", value.encode())
+ set_message_context(context)
-Assign new partitions.
+app = Application()
+sdf = app.dataframe()
+sdf = sdf.update(lambda value: alter_context(value))
+```
-Can be called outside the `Consumer` `on_assign` callback (multiple times).
-Partitions immediately show on `Consumer.assignment()`.
+**Arguments**:
-Any additional partitions besides the ones passed during the `Consumer`
-`on_assign` callback will NOT be associated with the consumer group.
+- `context`: instance of `MessageContext`
-
+
-#### Consumer.incremental\_unassign
+#### message\_context
```python
-def incremental_unassign(partitions: List[TopicPartition])
+def message_context() -> MessageContext
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L524)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/context.py#L51)
-Revoke partitions.
+Get a MessageContext for the current message, which houses most of the message
-Can be called outside an on_revoke callback.
+metadata, like:
+ - key
+ - timestamp
+ - partition
+ - offset
-
-#### Consumer.close
+Example Snippet:
```python
-def close()
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/kafka/consumer.py#L532)
-
-Close down and terminate the Kafka Consumer.
+from quixstreams import Application, message_context
-Actions performed:
+# Changes the current sdf value based on what the message partition is.
-- Stops consuming.
-- Commits offsets, unless the consumer property 'enable.auto.commit' is set to False.
-- Leaves the consumer group.
+app = Application()
+sdf = app.dataframe()
+sdf = sdf.apply(lambda value: 1 if message_context().partition == 2 else 0)
+```
-Registered callbacks may be called from this method,
-see `poll()` for more info.
+**Returns**:
+instance of `MessageContext`
-
+
-## quixstreams.kafka.exceptions
+## quixstreams.types
@@ -7573,7 +7644,7 @@ see `poll()` for more info.
class Application()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L55)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L55)
The main Application class.
@@ -7639,7 +7710,7 @@ def __init__(broker_address: Optional[Union[str, ConnectionConfig]] = None,
topic_create_timeout: float = 60)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L93)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L93)
**Arguments**:
@@ -7733,7 +7804,7 @@ def Quix(cls,
topic_create_timeout: float = 60) -> Self
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L313)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L313)
>***NOTE:*** DEPRECATED: use Application with `quix_sdk_token` argument instead.
@@ -7835,7 +7906,7 @@ def topic(name: str,
timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L451)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L451)
Create a topic definition.
@@ -7906,7 +7977,7 @@ topic = app.topic("input-topic", timestamp_extractor=custom_ts_extractor)
def dataframe(topic: Topic) -> StreamingDataFrame
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L531)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L531)
A simple helper method that generates a `StreamingDataFrame`, which is used
@@ -7948,7 +8019,7 @@ to be used as an input topic.
def stop(fail: bool = False)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L570)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L570)
Stop the internal poll loop and the message processing.
@@ -7971,7 +8042,7 @@ to unhandled exception, and it shouldn't commit the current checkpoint.
def get_producer() -> Producer
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L593)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L593)
Create and return a pre-configured Producer instance.
The Producer is initialized with params passed to Application.
@@ -8002,7 +8073,7 @@ with app.get_producer() as producer:
def get_consumer(auto_commit_enable: bool = True) -> Consumer
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L623)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L623)
Create and return a pre-configured Consumer instance.
The Consumer is initialized with params passed to Application.
@@ -8043,7 +8114,7 @@ with app.get_consumer() as consumer:
def clear_state()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L666)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L666)
Clear the state of the application.
@@ -8055,7 +8126,7 @@ Clear the state of the application.
def run(dataframe: StreamingDataFrame)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/app.py#L672)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/app.py#L672)
Start processing data from Kafka using provided `StreamingDataFrame`
@@ -8083,233 +8154,151 @@ app.run(dataframe=df)
- `dataframe`: instance of `StreamingDataFrame`
-
+
-## quixstreams.rowconsumer
+## quixstreams.processing\_context
-
+
-### RowConsumer
+### ProcessingContext
```python
-class RowConsumer(Consumer)
+@dataclasses.dataclass
+class ProcessingContext()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowconsumer.py#L19)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/processing_context.py#L21)
-
+A class to share processing-related objects
+between `Application` and `StreamingDataFrame` instances.
-#### RowConsumer.\_\_init\_\_
+
+
+#### ProcessingContext.store\_offset
```python
-def __init__(broker_address: Union[str, ConnectionConfig],
- consumer_group: str,
- auto_offset_reset: AutoOffsetReset,
- auto_commit_enable: bool = True,
- on_commit: Callable[[Optional[KafkaError], List[TopicPartition]],
- None] = None,
- extra_config: Optional[dict] = None,
- on_error: Optional[ConsumerErrorCallback] = None)
+def store_offset(topic: str, partition: int, offset: int)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowconsumer.py#L20)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/processing_context.py#L41)
-A consumer class that is capable of deserializing Kafka messages to Rows
-
-according to the Topics deserialization settings.
-
-It overrides `.subscribe()` method of Consumer class to accept `Topic`
-objects instead of strings.
+Store the offset of the processed message to the checkpoint.
**Arguments**:
-- `broker_address`: Connection settings for Kafka.
-Accepts string with Kafka broker host and port formatted as `:`,
-or a ConnectionConfig object if authentication is required.
-- `consumer_group`: Kafka consumer group.
-Passed as `group.id` to `confluent_kafka.Consumer`
-- `auto_offset_reset`: Consumer `auto.offset.reset` setting.
-Available values:
-- "earliest" - automatically reset the offset to the smallest offset
-- "latest" - automatically reset the offset to the largest offset
-- `auto_commit_enable`: If true, periodically commit offset of
-the last message handed to the application. Default - `True`.
-- `on_commit`: Offset commit result propagation callback.
-Passed as "offset_commit_cb" to `confluent_kafka.Consumer`.
-- `extra_config`: A dictionary with additional options that
-will be passed to `confluent_kafka.Consumer` as is.
-Note: values passed as arguments override values in `extra_config`.
-- `on_error`: a callback triggered when `RowConsumer.poll_row` fails.
-If consumer fails and the callback returns `True`, the exception
-will be logged but not propagated.
-The default callback logs an exception and returns `False`.
+- `topic`: topic name
+- `partition`: partition number
+- `offset`: message offset
-
+
-#### RowConsumer.subscribe
+#### ProcessingContext.init\_checkpoint
```python
-def subscribe(topics: List[Topic],
- on_assign: Optional[RebalancingCallback] = None,
- on_revoke: Optional[RebalancingCallback] = None,
- on_lost: Optional[RebalancingCallback] = None)
+def init_checkpoint()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowconsumer.py#L72)
-
-Set subscription to supplied list of topics.
-
-This replaces a previous subscription.
-
-This method also updates the internal mapping with topics that is used
-to deserialize messages to Rows.
-
-**Arguments**:
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/processing_context.py#L51)
-- `topics`: list of `Topic` instances to subscribe to.
-- `on_assign` (`callable`): callback to provide handling of customized offsets
-on completion of a successful partition re-assignment.
-- `on_revoke` (`callable`): callback to provide handling of offset commits to
-a customized store on the start of a rebalance operation.
-- `on_lost` (`callable`): callback to provide handling in the case the partition
-assignment has been lost. Partitions that have been lost may already be
-owned by other members in the group and therefore committing offsets,
-for example, may fail.
+Initialize a new checkpoint
-
+
-#### RowConsumer.poll\_row
+#### ProcessingContext.commit\_checkpoint
```python
-def poll_row(timeout: float = None) -> Union[Row, List[Row], None]
+def commit_checkpoint(force: bool = False)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/rowconsumer.py#L106)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/processing_context.py#L62)
-Consumes a single message and deserialize it to Row or a list of Rows.
-
-The message is deserialized according to the corresponding Topic.
-If deserializer raises `IgnoreValue` exception, this method will return None.
-If Kafka returns an error, it will be raised as exception.
+Commit the current checkpoint.
-**Arguments**:
+The actual commit will happen only when:
-- `timeout`: poll timeout seconds
+1. The checkpoint has at least one stored offset
+2. The checkpoint is expired or `force=True` is passed
-**Returns**:
+**Arguments**:
-single Row, list of Rows or None
+- `force`: if `True`, commit the checkpoint before its expiration deadline.
-
+
-## quixstreams.checkpointing.checkpoint
+## quixstreams.rowproducer
-
+
-### Checkpoint
+### RowProducer
```python
-class Checkpoint()
+class RowProducer()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L24)
-
-Class to keep track of state updates and consumer offsets and to checkpoint these
-updates on schedule.
-
-
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowproducer.py#L18)
-#### Checkpoint.expired
+A producer class that is capable of serializing Rows to bytes and send them to Kafka.
-```python
-def expired() -> bool
-```
+The serialization is performed according to the Topic serialization settings.
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L49)
+**Arguments**:
-Returns `True` if checkpoint deadline has expired.
+- `broker_address`: Connection settings for Kafka.
+Accepts string with Kafka broker host and port formatted as `:`,
+or a ConnectionConfig object if authentication is required.
+- `extra_config`: A dictionary with additional options that
+will be passed to `confluent_kafka.Producer` as is.
+Note: values passed as arguments override values in `extra_config`.
+- `on_error`: a callback triggered when `RowProducer.produce_row()`
+or `RowProducer.poll()` fail`.
+If producer fails and the callback returns `True`, the exception
+will be logged but not propagated.
+The default callback logs an exception and returns `False`.
+- `flush_timeout`: The time the producer is waiting for all messages to be delivered.
-
+
-#### Checkpoint.empty
+#### RowProducer.produce\_row
```python
-def empty() -> bool
+def produce_row(row: Row,
+ topic: Topic,
+ key: Optional[Any] = _KEY_UNSET,
+ partition: Optional[int] = None,
+ timestamp: Optional[int] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L55)
-
-Returns `True` if checkpoint doesn't have any offsets stored yet.
-
-
-
-
-#### Checkpoint.store\_offset
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowproducer.py#L56)
-```python
-def store_offset(topic: str, partition: int, offset: int)
-```
+Serialize Row to bytes according to the Topic serialization settings
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L62)
+and produce it to Kafka
-Store the offset of the processed message to the checkpoint.
+If this method fails, it will trigger the provided "on_error" callback.
**Arguments**:
-- `topic`: topic name
-- `partition`: partition number
-- `offset`: message offset
+- `row`: Row object
+- `topic`: Topic object
+- `key`: message key, optional
+- `partition`: partition number, optional
+- `timestamp`: timestamp in milliseconds, optional
-
+
-#### Checkpoint.get\_store\_transaction
+#### RowProducer.poll
```python
-def get_store_transaction(
- topic: str,
- partition: int,
- store_name: str = DEFAULT_STATE_STORE_NAME) -> PartitionTransaction
+def poll(timeout: float = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L82)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/rowproducer.py#L96)
-Get a PartitionTransaction for the given store, topic and partition.
+Polls the producer for events and calls `on_delivery` callbacks.
-It will return already started transaction if there's one.
+If `poll()` fails, it will trigger the provided "on_error" callback
**Arguments**:
-- `topic`: topic name
-- `partition`: partition number
-- `store_name`: store name
-
-**Returns**:
-
-instance of `PartitionTransaction`
-
-
-
-#### Checkpoint.commit
-
-```python
-def commit()
-```
-
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/checkpointing/checkpoint.py#L105)
-
-Commit the checkpoint.
-
-This method will:
- 1. Produce the changelogs for each state store
- 2. Flush the producer to ensure everything is delivered.
- 3. Commit topic offsets.
- 4. Flush each state store partition to the disk.
-
-
-
-## quixstreams.checkpointing
-
-
-
-## quixstreams.checkpointing.exceptions
+- `timeout`: timeout in seconds
diff --git a/docs/api-reference/serialization.md b/docs/api-reference/serialization.md
index bdb383979..67351eda3 100644
--- a/docs/api-reference/serialization.md
+++ b/docs/api-reference/serialization.md
@@ -10,7 +10,7 @@
class QuixDeserializer(JSONDeserializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L73)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L73)
Handles Deserialization for any Quix-formatted topic.
@@ -23,18 +23,15 @@ Parses JSON data from either `TimeseriesData` and `EventData` (ignores the rest)
#### QuixDeserializer.\_\_init\_\_
```python
-def __init__(column_name: Optional[str] = None,
- loads: Callable[[Union[bytes, bytearray]], Any] = default_loads)
+def __init__(loads: Callable[[Union[bytes, bytearray]], Any] = default_loads)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L80)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L80)
***Arguments:***
-- `column_name`: if provided, the deserialized value will be wrapped into
-dictionary with `column_name` as a key.
- `loads`: function to parse json from bytes.
Default - :py:func:`quixstreams.utils.json.loads`.
@@ -49,7 +46,7 @@ Default - :py:func:`quixstreams.utils.json.loads`.
def split_values() -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L100)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L97)
Each Quix message might contain data for multiple Rows.
This property informs the downstream processors about that, so they can
@@ -66,7 +63,7 @@ def deserialize(model_key: str, value: Union[List[Mapping],
Mapping]) -> Iterable[Mapping]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L153)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L150)
Deserialization function for particular data types (Timeseries or EventData).
@@ -91,7 +88,7 @@ Iterable of dicts
class QuixTimeseriesSerializer(QuixSerializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L321)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L318)
Serialize data to JSON formatted according to Quix Timeseries format.
@@ -123,7 +120,7 @@ Output:
class QuixEventsSerializer(QuixSerializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/quix.py#L409)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/quix.py#L406)
Serialize data to JSON formatted according to Quix EventData format.
The input value is expected to be a dictionary with the following keys:
@@ -164,7 +161,7 @@ Output:
class BytesDeserializer(Deserializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L44)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L44)
A deserializer to bypass bytes without any changes
@@ -176,7 +173,7 @@ A deserializer to bypass bytes without any changes
class BytesSerializer(Serializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L55)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L53)
A serializer to bypass bytes without any changes
@@ -188,7 +185,7 @@ A serializer to bypass bytes without any changes
class StringDeserializer(Deserializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L64)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L62)
@@ -197,10 +194,10 @@ class StringDeserializer(Deserializer)
#### StringDeserializer.\_\_init\_\_
```python
-def __init__(column_name: Optional[str] = None, codec: str = "utf_8")
+def __init__(codec: str = "utf_8")
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L65)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L63)
Deserializes bytes to strings using the specified encoding.
@@ -219,7 +216,7 @@ A wrapper around `confluent_kafka.serialization.StringDeserializer`.
class IntegerDeserializer(Deserializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L84)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L81)
Deserializes bytes to integers.
@@ -233,7 +230,7 @@ A wrapper around `confluent_kafka.serialization.IntegerDeserializer`.
class DoubleDeserializer(Deserializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L103)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L99)
Deserializes float to IEEE 764 binary64.
@@ -247,7 +244,7 @@ A wrapper around `confluent_kafka.serialization.DoubleDeserializer`.
class StringSerializer(Serializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L122)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L117)
@@ -259,7 +256,7 @@ class StringSerializer(Serializer)
def __init__(codec: str = "utf_8")
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L123)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L118)
Serializes strings to bytes using the specified encoding.
@@ -277,7 +274,7 @@ Serializes strings to bytes using the specified encoding.
class IntegerSerializer(Serializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L135)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L130)
Serializes integers to bytes
@@ -289,7 +286,7 @@ Serializes integers to bytes
class DoubleSerializer(Serializer)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/serializers/simple_types.py#L148)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/serializers/simple_types.py#L143)
Serializes floats to bytes
diff --git a/docs/api-reference/state.md b/docs/api-reference/state.md
index a5a3ddc1b..8e3bb5410 100644
--- a/docs/api-reference/state.md
+++ b/docs/api-reference/state.md
@@ -10,7 +10,7 @@
class State(Protocol)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L136)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L136)
Primary interface for working with key-value state data from `StreamingDataFrame`
@@ -24,7 +24,7 @@ Primary interface for working with key-value state data from `StreamingDataFrame
def get(key: Any, default: Any = None) -> Optional[Any]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L141)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L141)
Get the value for key if key is present in the state, else default
@@ -51,7 +51,7 @@ value or None if the key is not found and `default` is not provided
def set(key: Any, value: Any)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L151)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L151)
Set value for the key.
@@ -72,7 +72,7 @@ Set value for the key.
def delete(key: Any)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L159)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L159)
Delete value for the key.
@@ -94,7 +94,7 @@ This function always returns `None`, even if value is not found.
def exists(key: Any) -> bool
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/types.py#L168)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/types.py#L168)
Check if the key exists in state.
@@ -123,7 +123,7 @@ True if key exists, False otherwise
class RocksDBOptions(RocksDBOptionsType)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/options.py#L25)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/options.py#L25)
RocksDB database options.
@@ -148,7 +148,7 @@ Please see `rocksdict.Options` for a complete description of other options.
def to_options() -> rocksdict.Options
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/state/rocksdb/options.py#L53)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/state/rocksdb/options.py#L53)
Convert parameters to `rocksdict.Options`
diff --git a/docs/api-reference/topics.md b/docs/api-reference/topics.md
index de6b3079f..b0c764c6b 100644
--- a/docs/api-reference/topics.md
+++ b/docs/api-reference/topics.md
@@ -16,7 +16,7 @@
def convert_topic_list(topics: List[Topic]) -> List[ConfluentTopic]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L24)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L24)
Converts `Topic`s to `ConfluentTopic`s as required for Confluent's
@@ -42,7 +42,7 @@ list of confluent_kafka `ConfluentTopic`s
class TopicAdmin()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L47)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L47)
For performing "admin"-level operations on a Kafka cluster, mostly around topics.
@@ -60,7 +60,7 @@ def __init__(broker_address: Union[str, ConnectionConfig],
extra_config: Optional[Mapping] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L54)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L54)
@@ -82,7 +82,7 @@ or a ConnectionConfig object if authentication is required.
def list_topics(timeout: float = -1) -> Dict[str, ConfluentTopicMetadata]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L83)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L83)
Get a list of topics and their metadata from a Kafka cluster
@@ -109,7 +109,7 @@ def inspect_topics(topic_names: List[str],
timeout: float = 30) -> Dict[str, Optional[TopicConfig]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L94)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L94)
A simplified way of getting the topic configurations of the provided topics
@@ -141,7 +141,7 @@ def create_topics(topics: List[Topic],
finalize_timeout: float = 60)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/admin.py#L176)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/admin.py#L176)
Create the given list of topics and confirm they are ready.
@@ -170,7 +170,7 @@ fail (it ignores issues for a topic already existing).
class TopicConfig()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L42)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L42)
Represents all kafka-level configuration for a kafka topic.
@@ -184,7 +184,7 @@ Generally used by Topic and any topic creation procedures.
class Topic()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L83)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L83)
A definition of a Kafka topic.
@@ -209,7 +209,7 @@ def __init__(
timestamp_extractor: Optional[TimestampExtractor] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L92)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L92)
@@ -235,7 +235,7 @@ milliseconds from a deserialized message.
def name() -> str
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L121)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L121)
Topic name
@@ -249,7 +249,7 @@ Topic name
def row_serialize(row: Row, key: Any) -> KafkaMessage
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L131)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L131)
Serialize Row to a Kafka message structure
@@ -277,7 +277,7 @@ def row_deserialize(
message: ConfluentKafkaMessageProto) -> Union[Row, List[Row], None]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/topic.py#L162)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/topic.py#L162)
Deserialize incoming Kafka message to a Row.
@@ -307,7 +307,7 @@ Row, list of Rows or None if the message is ignored.
def affirm_ready_for_create(topics: List[Topic])
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L20)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L20)
Validate a list of topics is ready for creation attempt
@@ -325,7 +325,7 @@ Validate a list of topics is ready for creation attempt
class TopicManager()
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L30)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L30)
The source of all topic management with quixstreams.
@@ -348,7 +348,7 @@ def __init__(topic_admin: TopicAdmin,
create_timeout: float = 60)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L53)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L53)
@@ -370,7 +370,7 @@ def __init__(topic_admin: TopicAdmin,
def changelog_topics() -> Dict[str, Dict[str, Topic]]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L103)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L103)
Note: `Topic`s are the changelogs.
@@ -387,7 +387,7 @@ returns: the changelog topic dict, {topic_name: {suffix: Topic}}
def all_topics() -> Dict[str, Topic]
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L112)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L112)
Every registered topic name mapped to its respective `Topic`.
@@ -405,7 +405,7 @@ def topic_config(num_partitions: Optional[int] = None,
extra_config: Optional[dict] = None) -> TopicConfig
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L220)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L220)
Convenience method for generating a `TopicConfig` with default settings
@@ -439,7 +439,7 @@ def topic(name: str,
timestamp_extractor: Optional[TimestampExtractor] = None) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L241)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L241)
A convenience method for generating a `Topic`. Will use default config options
@@ -480,7 +480,7 @@ def repartition_topic(operation: str,
timeout: Optional[float] = None) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L286)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L286)
Create an internal repartition topic.
@@ -514,7 +514,7 @@ def changelog_topic(topic_name: str,
timeout: Optional[float] = None) -> Topic
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L326)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L326)
Performs all the logic necessary to generate a changelog topic based on a
@@ -561,7 +561,7 @@ def create_topics(topics: List[Topic],
create_timeout: Optional[float] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L383)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L383)
Creates topics via an explicit list of provided `Topics`.
@@ -587,7 +587,7 @@ def create_all_topics(timeout: Optional[float] = None,
create_timeout: Optional[float] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L411)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L411)
A convenience method to create all Topic objects stored on this TopicManager.
@@ -608,7 +608,7 @@ A convenience method to create all Topic objects stored on this TopicManager.
def validate_all_topics(timeout: Optional[float] = None)
```
-[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/51c8064d2623b13b3e11c5acbb33409643f66f3c/quixstreams/models/topics/manager.py#L424)
+[[VIEW SOURCE]](https://github.com/quixio/quix-streams/blob/ea3d07177df3f11deb3c51e8337534408f5f68c1/quixstreams/models/topics/manager.py#L424)
Validates all topics exist and changelogs have correct topic and rep factor.
diff --git a/quixstreams/models/serializers/base.py b/quixstreams/models/serializers/base.py
index 609de16a8..0a8813667 100644
--- a/quixstreams/models/serializers/base.py
+++ b/quixstreams/models/serializers/base.py
@@ -45,14 +45,10 @@ def to_confluent_ctx(self, field: MessageField) -> _SerializationContext:
class Deserializer(abc.ABC):
- def __init__(self, column_name: Optional[str] = None, *args, **kwargs):
+ def __init__(self, *args, **kwargs):
"""
A base class for all Deserializers
-
- :param column_name: if provided, the deserialized value will be wrapped into
- dictionary with `column_name` as a key.
"""
- self.column_name = column_name
@property
def split_values(self) -> bool:
@@ -62,11 +58,6 @@ def split_values(self) -> bool:
"""
return False
- def _to_dict(self, value: Any) -> Union[Any, dict]:
- if self.column_name:
- return {self.column_name: value}
- return value
-
@abc.abstractmethod
def __call__(self, *args, **kwargs) -> Any: ...
diff --git a/quixstreams/models/serializers/json.py b/quixstreams/models/serializers/json.py
index 187c1cee8..0a5a824e1 100644
--- a/quixstreams/models/serializers/json.py
+++ b/quixstreams/models/serializers/json.py
@@ -35,25 +35,21 @@ def _to_json(self, value: Any):
class JSONDeserializer(Deserializer):
def __init__(
self,
- column_name: Optional[str] = None,
loads: Callable[[Union[bytes, bytearray]], Any] = default_loads,
):
"""
Deserializer that parses data from JSON
- :param column_name: if provided, the deserialized value will be wrapped into
- dictionary with `column_name` as a key.
:param loads: function to parse json from bytes.
Default - :py:func:`quixstreams.utils.json.loads`.
"""
- super().__init__(column_name=column_name)
+ super().__init__()
self._loads = loads
def __call__(
self, value: bytes, ctx: SerializationContext
) -> Union[Iterable[Mapping], Mapping]:
try:
- deserialized = self._loads(value)
- return self._to_dict(deserialized)
+ return self._loads(value)
except (ValueError, TypeError) as exc:
raise SerializationError(str(exc)) from exc
diff --git a/quixstreams/models/serializers/quix.py b/quixstreams/models/serializers/quix.py
index 2253f1b37..e9081b623 100644
--- a/quixstreams/models/serializers/quix.py
+++ b/quixstreams/models/serializers/quix.py
@@ -79,16 +79,13 @@ class QuixDeserializer(JSONDeserializer):
def __init__(
self,
- column_name: Optional[str] = None,
loads: Callable[[Union[bytes, bytearray]], Any] = default_loads,
):
"""
- :param column_name: if provided, the deserialized value will be wrapped into
- dictionary with `column_name` as a key.
:param loads: function to parse json from bytes.
Default - :py:func:`quixstreams.utils.json.loads`.
"""
- super().__init__(column_name=column_name, loads=loads)
+ super().__init__(loads=loads)
self._deserializers = {
QModelKey.TIMESERIESDATA: self.deserialize_timeseries,
QModelKey.PARAMETERDATA: self.deserialize_timeseries,
@@ -148,7 +145,7 @@ def deserialize_timeseries(
row_value["Tags"] = {tag: next(values) for tag, values in tags}
row_value[Q_TIMESTAMP_KEY] = timestamp_ns
- yield self._to_dict(row_value)
+ yield row_value
def deserialize(
self, model_key: str, value: Union[List[Mapping], Mapping]
@@ -163,11 +160,11 @@ def deserialize(
return self._deserializers[model_key](value)
def deserialize_event_data(self, value: Mapping) -> Iterable[Mapping]:
- yield self._to_dict(self._parse_event_data(value))
+ yield self._parse_event_data(value)
def deserialize_event_data_list(self, value: List[Mapping]) -> Iterable[Mapping]:
for item in value:
- yield self._to_dict(self._parse_event_data(item))
+ yield self._parse_event_data(item)
def _parse_event_data(self, value: Mapping) -> Mapping:
if not isinstance(value, Mapping):
diff --git a/quixstreams/models/serializers/simple_types.py b/quixstreams/models/serializers/simple_types.py
index 846fa2f44..c63ad5f3e 100644
--- a/quixstreams/models/serializers/simple_types.py
+++ b/quixstreams/models/serializers/simple_types.py
@@ -46,10 +46,8 @@ class BytesDeserializer(Deserializer):
A deserializer to bypass bytes without any changes
"""
- def __call__(
- self, value: bytes, ctx: SerializationContext
- ) -> Union[bytes, Mapping[str, bytes]]:
- return self._to_dict(value)
+ def __call__(self, value: bytes, ctx: SerializationContext) -> bytes:
+ return value
class BytesSerializer(Serializer):
@@ -62,14 +60,14 @@ def __call__(self, value: bytes, ctx: SerializationContext) -> bytes:
class StringDeserializer(Deserializer):
- def __init__(self, column_name: Optional[str] = None, codec: str = "utf_8"):
+ def __init__(self, codec: str = "utf_8"):
"""
Deserializes bytes to strings using the specified encoding.
:param codec: string encoding
A wrapper around `confluent_kafka.serialization.StringDeserializer`.
"""
- super().__init__(column_name=column_name)
+ super().__init__()
self._codec = codec
self._deserializer = _StringDeserializer(codec=self._codec)
@@ -77,8 +75,7 @@ def __init__(self, column_name: Optional[str] = None, codec: str = "utf_8"):
def __call__(
self, value: bytes, ctx: SerializationContext
) -> Union[str, Mapping[str, str]]:
- deserialized = self._deserializer(value=value)
- return self._to_dict(deserialized)
+ return self._deserializer(value=value)
class IntegerDeserializer(Deserializer):
@@ -88,16 +85,15 @@ class IntegerDeserializer(Deserializer):
A wrapper around `confluent_kafka.serialization.IntegerDeserializer`.
"""
- def __init__(self, column_name: Optional[str] = None):
- super().__init__(column_name=column_name)
+ def __init__(self):
+ super().__init__()
self._deserializer = _IntegerDeserializer()
@_wrap_serialization_error
def __call__(
self, value: bytes, ctx: SerializationContext
) -> Union[int, Mapping[str, int]]:
- deserialized = self._deserializer(value=value)
- return self._to_dict(deserialized)
+ return self._deserializer(value=value)
class DoubleDeserializer(Deserializer):
@@ -107,16 +103,15 @@ class DoubleDeserializer(Deserializer):
A wrapper around `confluent_kafka.serialization.DoubleDeserializer`.
"""
- def __init__(self, column_name: Optional[str] = None):
- super().__init__(column_name=column_name)
+ def __init__(self):
+ super().__init__()
self._deserializer = _DoubleDeserializer()
@_wrap_serialization_error
def __call__(
self, value: bytes, ctx: SerializationContext
) -> Union[float, Mapping[str, float]]:
- deserialized = self._deserializer(value=value)
- return self._to_dict(deserialized)
+ return self._deserializer(value=value)
class StringSerializer(Serializer):
diff --git a/tests/test_quixstreams/test_app.py b/tests/test_quixstreams/test_app.py
index 9d83c2465..571a9a310 100644
--- a/tests/test_quixstreams/test_app.py
+++ b/tests/test_quixstreams/test_app.py
@@ -121,11 +121,10 @@ def on_message_processed(topic_, partition, offset):
on_message_processed=on_message_processed,
)
- column_name = "root"
partition_num = 0
topic_in = app.topic(
str(uuid.uuid4()),
- value_deserializer=JSONDeserializer(column_name=column_name),
+ value_deserializer=JSONDeserializer(),
)
topic_out = app.topic(
str(uuid.uuid4()),
@@ -178,7 +177,7 @@ def on_message_processed(topic_, partition, offset):
for row in rows_out:
assert row.topic == topic_out.name
assert row.key == data["key"]
- assert row.value == {column_name: loads(data["value"].decode())}
+ assert row.value == loads(data["value"].decode())
assert row.timestamp == timestamp_ms
assert row.headers == headers
@@ -240,9 +239,7 @@ def count_and_fail(_):
def test_run_consumer_error_raised(self, app_factory, executor):
# Set "auto_offset_reset" to "error" to simulate errors in Consumer
app = app_factory(auto_offset_reset="error")
- topic = app.topic(
- str(uuid.uuid4()), value_deserializer=JSONDeserializer(column_name="root")
- )
+ topic = app.topic(str(uuid.uuid4()), value_deserializer=JSONDeserializer())
sdf = app.dataframe(topic)
# Stop app after 10s if nothing failed
diff --git a/tests/test_quixstreams/test_models/test_quix_serializers.py b/tests/test_quixstreams/test_models/test_quix_serializers.py
index 72427e33b..c0cd3a651 100644
--- a/tests/test_quixstreams/test_models/test_quix_serializers.py
+++ b/tests/test_quixstreams/test_models/test_quix_serializers.py
@@ -261,66 +261,6 @@ def test_deserialize_timeseries_timestamp_field_clash(
)
)
- @pytest.mark.parametrize("as_legacy", [False, True])
- def test_deserialize_timeseries_with_column_name_success(
- self, quix_timeseries_factory, as_legacy
- ):
- message = quix_timeseries_factory(
- binary={"param1": [b"1", None], "param2": [None, b"1"]},
- strings={"param3": [1, None], "param4": [None, 1.1]},
- numeric={"param5": ["1", None], "param6": [None, "a"], "param7": ["", ""]},
- tags={"tag1": ["value1", "value2"], "tag2": ["value3", "value4"]},
- timestamps=[1234567890, 1234567891],
- as_legacy=as_legacy,
- )
-
- expected = [
- {
- "root": {
- "param1": b"1",
- "param2": None,
- "param3": 1,
- "param4": None,
- "param5": "1",
- "param6": None,
- "param7": "",
- "Tags": {"tag1": "value1", "tag2": "value3"},
- "Timestamp": 1234567890,
- }
- },
- {
- "root": {
- "param1": None,
- "param2": b"1",
- "param3": None,
- "param4": 1.1,
- "param5": None,
- "param6": "a",
- "param7": "",
- "Tags": {"tag1": "value2", "tag2": "value4"},
- "Timestamp": 1234567891,
- }
- },
- ]
-
- deserializer = QuixDeserializer(column_name="root")
- rows = list(
- deserializer(
- value=message.value(),
- ctx=SerializationContext(
- topic=message.topic(),
- headers=message.headers(),
- ),
- )
- )
- assert len(rows) == len(expected)
- for item, row in zip(expected, rows):
- assert "root" in row
- value = row["root"]
- item = row["root"]
- for key in item:
- assert item[key] == value[key]
-
@pytest.mark.parametrize("as_legacy", [False, True])
def test_deserialize_eventdata_success(
self, quix_eventdata_factory, quix_eventdata_params_factory, as_legacy
@@ -381,45 +321,6 @@ def test_deserialize_eventdata_list_success(
assert row["Value"] == params.value
assert row["Tags"] == params.tags
- @pytest.mark.parametrize("as_legacy", [False, True])
- def test_deserialize_event_data_with_column(
- self,
- quix_eventdata_list_factory,
- quix_eventdata_params_factory,
- as_legacy,
- ):
- event_params = [
- quix_eventdata_params_factory(
- id="test",
- value={"blabla": 123},
- tags={"tag1": "1"},
- timestamp=1234567790,
- ),
- quix_eventdata_params_factory(
- id="test2",
- value={"blabla2": 1234},
- tags={"tag2": "2"},
- timestamp=1234567891,
- ),
- ]
- message = quix_eventdata_list_factory(params=event_params, as_legacy=as_legacy)
-
- deserializer = QuixDeserializer(column_name="root")
- rows = list(
- deserializer(
- value=message.value(),
- ctx=SerializationContext(topic="test", headers=message.headers()),
- )
- )
- assert len(rows) == 2
- for row, params in zip(rows, event_params):
- assert "root" in row
- row = row["root"]
- assert row["Timestamp"] == params.timestamp
- assert row["Id"] == params.id
- assert row["Value"] == params.value
- assert row["Tags"] == params.tags
-
class TestQuixTimeseriesSerializer:
def test_serialize_dict_success(self):
diff --git a/tests/test_quixstreams/test_models/test_serializers.py b/tests/test_quixstreams/test_models/test_serializers.py
index 0f51435d9..ce3012bd4 100644
--- a/tests/test_quixstreams/test_models/test_serializers.py
+++ b/tests/test_quixstreams/test_models/test_serializers.py
@@ -77,32 +77,6 @@ def test_deserialize_no_column_name_success(
):
assert deserializer(value, ctx=dummy_context) == expected
- @pytest.mark.parametrize(
- "deserializer, value, expected",
- [
- (
- IntegerDeserializer("value"),
- int_to_bytes(123),
- {"value": 123},
- ),
- (DoubleDeserializer("value"), float_to_bytes(123), {"value": 123.0}),
- (DoubleDeserializer("value"), float_to_bytes(123.123), {"value": 123.123}),
- (StringDeserializer("value"), b"abc", {"value": "abc"}),
- (
- StringDeserializer("value", codec="cp1251"),
- "abc".encode("cp1251"),
- {"value": "abc"},
- ),
- (BytesDeserializer("value"), b"123123", {"value": b"123123"}),
- (JSONDeserializer("value"), b"123123", {"value": 123123}),
- (JSONDeserializer("value"), b'{"a":"b"}', {"value": {"a": "b"}}),
- ],
- )
- def test_deserialize_with_column_name_success(
- self, deserializer: Deserializer, value, expected
- ):
- assert deserializer(value, ctx=dummy_context) == expected
-
@pytest.mark.parametrize(
"deserializer, value",
[
diff --git a/tests/test_quixstreams/test_models/test_topics/test_topics.py b/tests/test_quixstreams/test_models/test_topics/test_topics.py
index 739759cd9..ef639a0ca 100644
--- a/tests/test_quixstreams/test_models/test_topics/test_topics.py
+++ b/tests/test_quixstreams/test_models/test_topics/test_topics.py
@@ -40,8 +40,6 @@ def __call__(self, value: bytes, ctx: SerializationContext):
deserialized = self._deserializer(value=value)
if not deserialized % 3:
raise IgnoreMessage("Ignore numbers divisible by 3")
- if self.column_name:
- return {self.column_name: deserialized}
return deserialized
@@ -51,11 +49,11 @@ class TestTopic:
[
(
IntegerDeserializer(),
- IntegerDeserializer("column"),
+ IntegerDeserializer(),
int_to_bytes(1),
int_to_bytes(2),
1,
- {"column": 2},
+ 2,
),
(
DoubleDeserializer(),
@@ -75,11 +73,11 @@ class TestTopic:
),
(
DoubleDeserializer(),
- JSONDeserializer(column_name="root"),
+ JSONDeserializer(),
float_to_bytes(1.1),
json.dumps({"key": "value"}).encode(),
1.1,
- {"root": {"key": "value"}},
+ {"key": "value"},
),
(
BytesDeserializer(),
@@ -194,13 +192,13 @@ def test_row_list_deserialize_success(
def test_row_deserialize_ignorevalueerror_raised(self, topic_manager_topic_factory):
topic = topic_manager_topic_factory(
- value_deserializer=IgnoreDivisibleBy3Deserializer(column_name="value"),
+ value_deserializer=IgnoreDivisibleBy3Deserializer(),
)
row = topic.row_deserialize(
message=ConfluentKafkaMessageStub(key=b"key", value=int_to_bytes(4))
)
assert row
- assert row.value == {"value": 4}
+ assert row.value == 4
row = topic.row_deserialize(
message=ConfluentKafkaMessageStub(key=b"key", value=int_to_bytes(3))