diff --git a/Dockerfile b/Dockerfile index 3466bc5..7259e53 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM openjdk:17-bullseye +FROM openjdk:22-bookworm ENV KAFKA_VERSION=3.6.0 ENV SCALA_VERSION=2.13 @@ -20,7 +20,10 @@ RUN wget -O /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \ && rm /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \ && ln -s /opt/kafka_${SCALA_VERSION}-${KAFKA_VERSION} ${KAFKA_HOME} +COPY ./jmx-exporter-config.yaml /opt/jmx-exporter-config.yaml +RUN wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.17.2/jmx_prometheus_javaagent-0.17.2.jar -O /opt/jmx_prometheus_javaagent-0.17.2.jar +ENV KAFKA_OPTS="-javaagent:/opt/jmx_prometheus_javaagent-0.17.2.jar=9000:/opt/jmx-exporter-config.yaml" + COPY --chown=kafka:kafka ./entrypoint.sh / RUN ["chmod", "+x", "/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"] - diff --git a/README.md b/README.md index a7626af..138fcf9 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Apache Kafka starting in version 3.3 is using the Raft metadata protocol to aban

- +

More info can be found in the official Apache Kafka docs https://kafka.apache.org/documentation/#kraft @@ -14,7 +14,9 @@ More info can be found in the official Apache Kafka docs https://kafka.apache.or # ## Table of Content - [KRaft "Kafka Raft" on Kubernetes](#kraft-kafka-raft-on-kubernetes) +- [](#) - [Table of Content](#table-of-content) +- [](#-1) - [Author](#author) - [Benefits](#benefits) - [Environment variables](#environment-variables) @@ -41,33 +43,30 @@ Stefan Jährling @ System Vertrieb Alexander GmbH ## Environment variables ```yaml -set the default log replicas value - name: REPLICAS value: '3' -define the kubernetes service - name: SERVICE value: kafka-svc -set the used namespace used for the current deployment - name: NAMESPACE value: kafka -here you need to specify the path for the log storage - name: SHARE_DIR value: /mnt/kafka -choose a cluster UUID that every node uses to be able to join - name: CLUSTER_ID value: ys-gRE0zp9AusfyPBDNyON # $ kafka-storage.sh random-uuid; 16 bytes, see docs: https://kafka.apache.org/33/documentation.html#quickstart_startserver -choose the default replicatin factor - - name: DEFAULT_REPLICATION_FACTOR - value: '3' + #FIXME: + # - name: DEFAULT_REPLICATION_FACTOR + # value: '3' -a value for the minimum insync replicas - name: DEFAULT_MIN_INSYNC_REPLICAS value: '2' + + - name: KAFKA_NUM_PARTITIONS + value: '1' ``` ## HowTo @@ -76,8 +75,8 @@ a value for the minimum insync replicas > tar xzf kafka-kraft.tar.gz #1 build docker image - > docker build -t myregistry/kafkakraft: . - > docker build -t myregistry/kafka-connect: ./kafka-connect/ + > docker build -t myregistry/kafkakraft: . + > docker build -t myregistry/kafka-connect: ./kafka-connect/ #1b push image to registry > docker push myregistry/kafkakraft: @@ -98,7 +97,7 @@ v0.6 v0.5a - due to an unfixed bug, backported to kafka v3.3.2 - + v0.5 - added kafka-connect - updated github actions @@ -121,6 +120,6 @@ v0.2 - using kafka v3.3.2 v0.1 -- added Dockerfile +- added Dockerfile - updated entrypoint.sh - switched to kafka v3.3.1 diff --git a/deploy-kafka.yaml b/deploy-kafka.yaml index 31ab49a..9cf1026 100644 --- a/deploy-kafka.yaml +++ b/deploy-kafka.yaml @@ -11,14 +11,21 @@ metadata: name: kafka-svc labels: app: kafka-svc + annotations: + "prometheus.io/scrape": "true" + "prometheus.io/port": "9000" namespace: kafka spec: - clusterIP: None + type: ClusterIP ports: - name: '9092' port: 9092 protocol: TCP targetPort: 9092 + - name: '9000' + port: 9000 + protocol: TCP + targetPort: 9000 selector: app: kafkakraft @@ -41,16 +48,15 @@ spec: labels: app: kafkakraft spec: - ### TODO: add JMX plugin to gather metrics + securityContext: + runAsNonRoot: true + fsGroup: 1001 + runAsUser: 1001 + runAsGroup: 1001 containers: - name: kafka-container - image: kafkakraft/kafkakraft:latest + image: kafkakraft/kafkakraft:3.6.0 imagePullPolicy: Always - # securityContext: # deactivated - issue can be tracked here: https://github.com/stefanjay/kafka-kraft-on-k8s/issues/23 - # allowPrivilegeEscalation: false - # readOnlyRootFilesystem: false - # runAsNonRoot: true - # runAsUser: 1001 ports: - containerPort: 9092 - containerPort: 9093 @@ -64,20 +70,22 @@ spec: - name: SHARE_DIR value: /mnt/kafka - name: CLUSTER_ID - value: jSos9MFXTo6_3BOAj8mOcQ - - name: DEFAULT_REPLICATION_FACTOR - value: '3' + value: QwjfU6MPQ_CMdFsbCx7EGg +# FIXME: # - name: DEFAULT_REPLICATION_FACTOR + # value: '3' - name: DEFAULT_MIN_INSYNC_REPLICAS value: '2' + - name: KAFKA_NUM_PARTITIONS + value: '1' volumeMounts: - - name: kafka-storage-data - mountPath: /mnt/kafka + - name: kafka-storage-data + mountPath: /mnt/kafka volumeClaimTemplates: - - metadata: - name: kafka-storage-data - spec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: 100Gi + - metadata: + name: kafka-storage-data + spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 100Gi diff --git a/entrypoint.sh b/entrypoint.sh index fb82d15..dc31aa1 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -6,31 +6,25 @@ ADVERTISED_LISTENERS="PLAINTEXT://kafka-$NODE_ID.$SERVICE.$NAMESPACE.svc:9092" CONTROLLER_QUORUM_VOTERS="" for i in $( seq 0 $REPLICAS); do - if [[ $i != $REPLICAS ]]; then - CONTROLLER_QUORUM_VOTERS="$CONTROLLER_QUORUM_VOTERS$i@kafka-$i.$SERVICE.$NAMESPACE.svc:9093," - else - CONTROLLER_QUORUM_VOTERS=${CONTROLLER_QUORUM_VOTERS::-1} - fi + if [[ $i != $REPLICAS ]]; then + CONTROLLER_QUORUM_VOTERS="$CONTROLLER_QUORUM_VOTERS$i@kafka-$i.$SERVICE.$NAMESPACE.svc:9093," + else + CONTROLLER_QUORUM_VOTERS=${CONTROLLER_QUORUM_VOTERS::-1} + fi done mkdir -p $SHARE_DIR/$NODE_ID -echo $CLUSTER_ID > $SHARE_DIR/cluster_id -# debug for shell -# sleep 3600 - -# adding setting to reflect the setting from -# deploy-kafka.yaml -echo "default.replication.factor=$KAFKA_DEFAULT_REPLICATION_FACTOR" >> /opt/kafka/config/kraft/server.properties +#FIXME: echo "default.replication.factor=$DEFAULT_REPLICATION_FACTOR" >> /opt/kafka/config/kraft/server.properties sed -e "s+^node.id=.*+node.id=$NODE_ID+" \ --e "s+^num.partitions=.*+num.partitions=$KAFKA_NUM_PARTITIONS+" \ --e "s+^controller.quorum.voters=.*+controller.quorum.voters=$CONTROLLER_QUORUM_VOTERS+" \ --e "s+^listeners=.*+listeners=$LISTENERS+" \ --e "s+^advertised.listeners=.*+advertised.listeners=$ADVERTISED_LISTENERS+" \ --e "s+^log.dirs=.*+log.dirs=$SHARE_DIR/$NODE_ID+" \ -/opt/kafka/config/kraft/server.properties > /opt/kafka/server.properties.updated \ -&& mv /opt/kafka/server.properties.updated /opt/kafka/config/kraft/server.properties + -e "s+^num.partitions=.*+num.partitions=$KAFKA_NUM_PARTITIONS+" \ + -e "s+^controller.quorum.voters=.*+controller.quorum.voters=$CONTROLLER_QUORUM_VOTERS+" \ + -e "s+^listeners=.*+listeners=$LISTENERS+" \ + -e "s+^advertised.listeners=.*+advertised.listeners=$ADVERTISED_LISTENERS+" \ + -e "s+^log.dirs=.*+log.dirs=$SHARE_DIR/$NODE_ID+" \ + /opt/kafka/config/kraft/server.properties > /opt/kafka/server.properties.updated \ + && mv /opt/kafka/server.properties.updated /opt/kafka/config/kraft/server.properties kafka-storage.sh format -t $CLUSTER_ID -c /opt/kafka/config/kraft/server.properties diff --git a/jmx-exporter-config.yaml b/jmx-exporter-config.yaml new file mode 100644 index 0000000..e69de29 diff --git a/kafka-connect/Dockerfile b/kafka-connect/Dockerfile index c334786..adfb3cf 100644 --- a/kafka-connect/Dockerfile +++ b/kafka-connect/Dockerfile @@ -1,4 +1,4 @@ -FROM openjdk:21-bullseye +FROM openjdk:22-bookworm ENV KAFKA_VERSION=3.6.0 ENV SCALA_VERSION=2.13 @@ -20,7 +20,12 @@ RUN wget -O /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \ && rm /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \ && ln -s /opt/kafka_${SCALA_VERSION}-${KAFKA_VERSION} ${KAFKA_HOME} -COPY ./kafka-connect/connect-distributed.properties /opt/kafka/config/connect-distributed.properties +COPY ./connect-distributed.properties /opt/kafka/config/connect-distributed.properties -COPY ./kafka-connect/entrypoint.sh / -ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] +COPY ./jmx-exporter-config.yaml /opt/jmx-exporter-config.yaml +RUN wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.17.2/jmx_prometheus_javaagent-0.17.2.jar -O /opt/jmx_prometheus_javaagent-0.17.2.jar +ENV KAFKA_OPTS="-javaagent:/opt/jmx_prometheus_javaagent-0.17.2.jar=9000:/opt/jmx-exporter-config.yaml" + +COPY --chown=kafka:kafka ./entrypoint.sh / +RUN ["chmod", "+x", "/entrypoint.sh"] +ENTRYPOINT ["/entrypoint.sh"] diff --git a/kafka-connect/connect-distributed.properties b/kafka-connect/connect-distributed.properties index 2eacada..c22a50c 100644 --- a/kafka-connect/connect-distributed.properties +++ b/kafka-connect/connect-distributed.properties @@ -20,7 +20,7 @@ # the `bootstrap.servers` and those specifying replication factors. # unique name for the cluster, used in forming the Connect cluster group. Note that this must not conflict with consumer group IDs -group.id=bdp-connect-cluster +group.id=kafka-connect-cluster # A list of host/port pairs to use for establishing the initial connection to the Kafka cluster. bootstrap.servers=kafka-svc.kafka.svc:9092 @@ -88,11 +88,11 @@ listeners=HTTP://:8083 #rest.advertised.listener= # Set to a list of filesystem paths separated by commas (,) to enable class loading isolation for plugins -# (connectors, converters, transformations). The list should consist of top level directories that include -# any combination of: +# (connectors, converters, transformations). The list should consist of top level directories that include +# any combination of: # a) directories immediately containing jars with plugins and their dependencies # b) uber-jars with plugins and their dependencies # c) directories immediately containing the package directory structure of classes of plugins and their dependencies -# Examples: +# Examples: # plugin.path=/usr/local/share/java,/usr/local/share/kafka/plugins,/opt/connectors, -plugin.path=/opt/kafka/libs/splunk-kafka-connect-v2.1.0.jar +plugin.path=/opt/kafka/libs/splunk-kafka-connect-v2.1.2.jar diff --git a/kafka-connect/deploy-kafkaconnect.yaml b/kafka-connect/deploy-kafkaconnect.yaml index 945bf62..269c5fc 100644 --- a/kafka-connect/deploy-kafkaconnect.yaml +++ b/kafka-connect/deploy-kafkaconnect.yaml @@ -6,6 +6,9 @@ metadata: labels: app: kafka-connect-svc namespace: kafka + annotations: + "prometheus.io/scrape": "true" + "prometheus.io/port": "9000" spec: clusterIP: None ports: @@ -13,6 +16,10 @@ spec: port: 8083 protocol: TCP targetPort: 8083 + - name: '9000' + port: 9000 + protocol: TCP + targetPort: 9000 selector: app: kafka-connect @@ -38,7 +45,7 @@ spec: ### TODO: add JMX plugin to gather metrics containers: - name: kafka-connect-container - image: kafkakraft/kafkakraft-connect:3.4.0 + image: kafkakraft/kafkakraft-connect:3.6.0 imagePullPolicy: Always ports: - containerPort: 8083 diff --git a/kafka-connect/jmx-exporter-config.yaml b/kafka-connect/jmx-exporter-config.yaml new file mode 100644 index 0000000..e69de29 diff --git a/kafka-connect/splunk-kafka-connect-v2.1.2.jar b/kafka-connect/splunk-kafka-connect-v2.1.2.jar new file mode 100644 index 0000000..f24499a Binary files /dev/null and b/kafka-connect/splunk-kafka-connect-v2.1.2.jar differ