Skip to content

Commit

Permalink
add get started with kafka api (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
YangKian authored Jan 12, 2024
1 parent 3be5e4e commit f88112a
Show file tree
Hide file tree
Showing 3 changed files with 345 additions and 0 deletions.
97 changes: 97 additions & 0 deletions assets/kafka-quick-start.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
version: "3.5"

services:
hserver:
image: hstreamdb/hstream:latest
depends_on:
- zookeeper
- hstore
ports:
- "127.0.0.1:9092:9092"
expose:
- 9092
networks:
- hstream-kafka-quickstart
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /tmp:/tmp
- data_store:/data/store
command:
- bash
- "-c"
- |
set -e
/usr/local/script/wait-for-storage.sh hstore 6440 zookeeper 2181 600 \
/usr/local/bin/hstream-server kafka \
--bind-address 0.0.0.0 --port 9092 \
--server-id 100 \
--seed-nodes "$$(hostname -I | awk '{print $$1}'):6571" \
--advertised-address $$(hostname -I | awk '{print $$1}') \
--metastore-uri zk://zookeeper:2181 \
--store-config /data/store/logdevice.conf \
--store-log-level warning \
--log-with-color
hstore:
image: hstreamdb/hstream:latest
networks:
- hstream-kafka-quickstart
volumes:
- data_store:/data/store
command:
- bash
- "-c"
- |
set -ex
# N.B. "enable-dscp-reflection=false" is required for linux kernel which
# doesn't support dscp reflection, e.g. centos7.
/usr/local/bin/ld-dev-cluster -n 3 --root /data/store \
--use-tcp --tcp-host $$(hostname -I | awk '{print $$1}') \
--user-admin-port 6440 \
--param enable-dscp-reflection=false \
--no-interactive
zookeeper:
image: zookeeper:3.8
expose:
- 2181
networks:
- hstream-kafka-quickstart
volumes:
- data_zk_data:/data
- data_zk_datalog:/datalog

hserver-init:
image: hstreamdb/hstream:latest
depends_on:
- hserver
networks:
- hstream-kafka-quickstart
command:
- bash
- "-c"
- |
timeout=60
until ( \
/usr/local/bin/hstream-kafka-cli --host hserver --port 9092 node status \
) >/dev/null 2>&1; do
>&2 echo 'Waiting for servers ...'
sleep 1
timeout=$$((timeout - 1))
[ $$timeout -le 0 ] && echo 'Timeout!' && exit 1;
done; \
/usr/local/bin/hstream-kafka-cli --host hserver --port 9092 node init
networks:
hstream-kafka-quickstart:
name: hstream-kafka-quickstart

volumes:
data_store:
name: kafka_quickstart_data_store
data_zk_data:
name: kafka_quickstart_data_zk_data
data_zk_datalog:
name: kafka_quickstart_data_zk_datalog
data_prom_config:
name: kafka_quickstart_data_prom_config
97 changes: 97 additions & 0 deletions assets/kafka-quick-start.yaml.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
version: "3.5"

services:
hserver:
image: hstreamdb/hstream:{{ $version() }}
depends_on:
- zookeeper
- hstore
ports:
- "127.0.0.1:9092:9092"
expose:
- 9092
networks:
- hstream-kafka-quickstart
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /tmp:/tmp
- data_store:/data/store
command:
- bash
- "-c"
- |
set -e
/usr/local/script/wait-for-storage.sh hstore 6440 zookeeper 2181 600 \
/usr/local/bin/hstream-server kafka \
--bind-address 0.0.0.0 --port 9092 \
--server-id 100 \
--seed-nodes "$$(hostname -I | awk '{print $$1}'):6571" \
--advertised-address $$(hostname -I | awk '{print $$1}') \
--metastore-uri zk://zookeeper:2181 \
--store-config /data/store/logdevice.conf \
--store-log-level warning \
--log-with-color

hstore:
image: hstreamdb/hstream:{{ $version() }}
networks:
- hstream-kafka-quickstart
volumes:
- data_store:/data/store
command:
- bash
- "-c"
- |
set -ex
# N.B. "enable-dscp-reflection=false" is required for linux kernel which
# doesn't support dscp reflection, e.g. centos7.
/usr/local/bin/ld-dev-cluster -n 3 --root /data/store \
--use-tcp --tcp-host $$(hostname -I | awk '{print $$1}') \
--user-admin-port 6440 \
--param enable-dscp-reflection=false \
--no-interactive

zookeeper:
image: zookeeper:3.8
expose:
- 2181
networks:
- hstream-kafka-quickstart
volumes:
- data_zk_data:/data
- data_zk_datalog:/datalog

hserver-init:
image: hstreamdb/hstream:{{ $version() }}
depends_on:
- hserver
networks:
- hstream-kafka-quickstart
command:
- bash
- "-c"
- |
timeout=60
until ( \
/usr/local/bin/hstream-kafka --host hserver --port 9092 node status \
) >/dev/null 2>&1; do
>&2 echo 'Waiting for servers ...'
sleep 1
timeout=$$((timeout - 1))
[ $$timeout -le 0 ] && echo 'Timeout!' && exit 1;
done; \
/usr/local/bin/hstream-kafka --host hserver --port 9092 node init

networks:
hstream-kafka-quickstart:
name: hstream-kafka-quickstart

volumes:
data_store:
name: kafka_quickstart_data_store
data_zk_data:
name: kafka_quickstart_data_zk_data
data_zk_datalog:
name: kafka_quickstart_data_zk_datalog
data_prom_config:
name: kafka_quickstart_data_prom_config
151 changes: 151 additions & 0 deletions docs/start/get-started-with-kafka-api.md
Original file line number Diff line number Diff line change
@@ -1 +1,152 @@
# Get Started with Kafka API

## Requirement

For optimal performance, we suggest utilizing a Linux kernel version of 4.14 or
higher when initializing an HStream Kafka Cluster.

::: tip
In the case it is not possible for the user to use a Linux kernel version of
4.14 or above, we recommend adding the option `--enable-dscp-reflection=false`
to HStore while starting the HStream Kafka Cluster.
:::

## Installation

### Install docker

::: tip
If you have already installed docker, you can skip this step.
:::

See [Install Docker Engine](https://docs.docker.com/engine/install/), and
install it for your operating system. Please carefully check that you have met
all prerequisites.

Confirm that the Docker daemon is running:

```sh
docker version
```

::: tip
On Linux, Docker needs root privileges. You can also run Docker as a
non-root user, see [Post-installation steps for Linux][non-root-docker].
:::

### Install docker compose

::: tip
If you have already installed docker compose, you can skip this step.
:::

See [Install Docker Compose](https://docs.docker.com/compose/install/), and
install it for your operating system. Please carefully check that you met all
prerequisites.

```sh
docker-compose version
```

## Start HStreamDB Services

::: warning
Do NOT use this configuration in your production environment!
:::

Create a docker-compose.yaml file for docker compose, you can
[download][kafka-quick-start.yaml] or paste the following contents:

<<< @/../assets/kafka-quick-start.yaml.template{yaml-vue}

then run:

```sh
docker-compose -f kafka-quick-start.yaml up
```

If you see some thing like this, then you have a running hstream kafka cluster:

```txt
hserver_1 |[INFO][2024-01-12T02:15:08+0000][app/lib/KafkaServer.hs:193:11][thread#30]Cluster is ready!
```

::: tip
You can also run in background:
```sh
docker-compose -f kafka-quick-start.yaml up -d
```
:::

::: tip
If you want to show logs of server, run:
```sh
docker-compose -f kafka-quick-start.yaml logs -f hserver
```
:::

## Connect HStream Kafka with CLI

You can use `hstream-kafka` command-line interface (CLI), which is included in the `hstreamdb/hstream` image, to interactive with hstream kafka cluster

Start an instance of `hstreamdb/hstream` using Docker:

```sh-vue
docker run -it --rm --name kafka-cli --network host hstreamdb/hstream:{{ $version() }} bash
```

## Create a topic

To create a topic, you can use `hstream-kafka topic create` command. Now we will create a topic with 3 partitions

```sh
hstream-kafka topic create demo --partitions 3
```

```sh
+------+------------+--------------------+
| Name | Partitions | Replication-factor |
+------+------------+--------------------+
| demo | 3 | 1 |
+------+------------+--------------------+
```

## Produce data to a topic

The `hstream-kafka produce` command can be used to produce data to a topic in a interactive shell.
```sh
hstream-kafka produce demo --separator "@" -i
```
- With the `--separator` option, you can specify the separator for key. The default separator is "@". Using the separator, you can assign a key to each record. Record with same key will be append into same partition of the topic.
- Using `-i` option to enter the interactive mode.

```sh
info@This is a info level log.
warn@This is a warn level log.
hello hstream!
```
Here we have written three pieces of data. The first two are associated with specified key. The last one does not specify a key.

For additional information, you can use `hstream-kafka produce -h`.

## Consume data from a topic

To consume data from a particular topic, the `hstream-kafka consume` command is used.

```sh
hstream-kafka consume --group-id test-group --topic demo --earliest --verbose --eof
```
- Using the `--verbose` option will print the creation timestamp and the key of the record. The `--eof` option will tell the consumer to exit after receiving the last message in the partition.

```sh
CreateTimestamp: 1705026820718 Key: info This is a info level log.
CreateTimestamp: 1705026912306 Key: hello hstream!
CreateTimestamp: 1705026833287 Key: warn This is a warn level log.
EOF reached for all 3 partition(s)
Consumed 3 messages (62 bytes)
```

For additional information, you can use `hstream-kafka consume -h`.

[non-root-docker]: https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user
[kafka-quick-start.yaml]: https://raw.githubusercontent.com/hstreamdb/docs-next/main/assets/kafka-quick-start.yaml

0 comments on commit f88112a

Please sign in to comment.