kyma-project · kyma-bot · Aug 13, 2024 · Jul 29, 2024 · Jul 31, 2024 · Jul 31, 2024
@@ -13,8 +13,14 @@ on:
           - otel-collector
           - fluent-bit
           - prometheus
-        description: 'Image to test'
+          - logs-otel
+        description: "Image to test"
         required: true
+      duration:
+        type: string
+        description: "Duration of the test in seconds"
+        required: true
+        default: "1200"
 
 run-name: "Load Test for ${{ inputs.image }} on PR-${{ inputs.pr_number }}"
 
@@ -32,7 +38,7 @@ jobs:
       - id: set-matrix
         run: |
           image=${{ github.event.inputs.image }}
-          matrix=$(jq --arg image "$image" 'map( . | select (.image==$image) )' hack/load-tests/matrix_includes.json)                
+          matrix=$(jq --arg image "$image" 'map( . | select (.image==$image) )' hack/load-tests/matrix_includes.json)
           echo "matrix={\"include\":$(echo $matrix)}" >> $GITHUB_OUTPUT
   load-test:
     needs: prepare-matrix
@@ -43,19 +49,23 @@ jobs:
 
     steps:
       - name: Test
-        run:  echo "input = ${{ github.event.inputs.image }}, matrix = ${{ matrix.image }}"
+        run: echo "input = ${{ github.event.inputs.image }}, matrix = ${{ matrix.image }}"
 
       - name: Checkout repo
         uses: actions/checkout@v4
         with:
-          ref: 'refs/pull/${{ github.event.inputs.pr_number }}/head'
+          ref: "refs/pull/${{ github.event.inputs.pr_number }}/head"
           repository: ${{ github.repository }}
 
       - name: Setup Golang
         uses: "./.github/template/setup-golang"
 
       - name: Setup gardener cluster name
-        run:  echo "GARDENER_CLUSTER_NAME=${{ matrix.name }}" >> $GITHUB_ENV
+        run: |
+          ID=$(echo ${{ github.run_id }}${{ github.run_attempt }} | rev )
+          SHORT_NAME=$(echo ${{ matrix.name }} | awk -F- '{for(i=1;i<=NF;i++) printf "%s",substr($i,1,1);print "";}')
+          GARDENER_CLUSTER_NAME=$(echo lt-${SHORT_NAME}-${ID} | cut -c 1-14)
+          echo "GARDENER_CLUSTER_NAME=$GARDENER_CLUSTER_NAME" >> $GITHUB_ENV
 
       # save gardener kubeconfig to a temp file in order to pass it to the command
       - name: Save service account to file
@@ -82,14 +92,14 @@ jobs:
 
       # Run the load test
       - name: Run Load test
-        run: hack/load-tests/run-load-test.sh -n ${{ matrix.name }} -t ${{ matrix.type }} -m ${{ matrix.multi }} -b ${{ matrix.backpressure }}
+        run: hack/load-tests/run-load-test.sh -n ${{ matrix.name }} -t ${{ matrix.type }} -m ${{ matrix.multi }} -b ${{ matrix.backpressure }} -d ${{ github.event.inputs.duration }} -o "${{ matrix.overlay }}"
 
       - name: Upload Results
         uses: actions/upload-artifact@v4
         if: always()
         with:
           name: Results-${{ matrix.name }}
-          path: tests/*.md
+          path: tests/*.json
 
       - name: Deprovision Gardener
         run: make deprovision-gardener
@@ -103,11 +113,11 @@ jobs:
     runs-on: ubuntu-latest
     if: always()
     steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
       - name: Download Results
         uses: actions/download-artifact@v4
       - name: Print Results
         run: |
-          for i in Results-*/*.md; do
-            echo "Results from $i"
-            cat $i
-          done
+          ls -la
+          python3 hack/load-tests/convert-results.py Results-*
@@ -0,0 +1,50 @@
+# Load Test for logs using OtelCollector
+
+This document describes a reproducible test setup to determine the performance of a gateway based on OpenTelemetry
+
+## Infratructure Pre-requisites
+
+- Kubernetes cluster with 2 Nodes with 4 CPU and 16G Memory (n1-standard-4 on GCP)
+- Kubectl > 1.22.x
+- Helm 3.x
+- curl 8.4.x
+- jq 1.6
+
+
+## Test Script
+
+The test scenario is implemented in the existing bash script [run-load-test.sh](../../../hack/load-tests/run-load-test.sh).
+Invoke it with the following parameters:
+
+- `-t logs-otel`
+- `-n` Test name
+- `-d` The test duration in seconds, default is `1200` seconds
+- `-r` The rate of log generation in log/s, default is `1000` logs/s
+
+### Setup
+
+Multiple instances of the `telemetry-gen` tool run and send logs to the `log-gateway` service. The `log-gateway` sends all incoming logs (using the configured pipeline) to the `log-receiver` service.
+`log-receiver` is another instance of an OTel Collector configured to accept as many log entries as possible and directly discard them using the `nop` exporter.
+
+The tests are executed for 20 minutes, so that each test case has a stabilized output and reliable KPIs. Logs generated by the `log-generator` contain one single log line of ~2000 bytes.
+
+
+## Test Results
+
+<div class="table-wrapper" markdown="block">
+
+| config | logs received l/s | logs exported l/s | logs queued | cpu | memory MB | no. restarts of gateway | no. restarts of generator |
+| --- | --- | --- | --- | --- | --- | ---|
+| single | 7193 | 7195 | 16824 | 2.5 | 826 | 0 | 1 |
+| batch | 16428 | 16427 | 0 | 3 | 265 | 0 | 1 |
+</div>
+
+## Interpretation
+
+The results clearly show the beneficial impact of using the batch processor in the gateways pipeline. The batch processor can handle more than twice the amount of logs compared to the single processor. The CPU usage is slightly higher, but the memory usage is significantly lower. The number of logs queued is also significantly lower, which indicates that the batch processor is able to keep up with the incoming logs.
+
+Similar to the setup used for metrics and traces using OTel, a setup of two gateway instances should be enough to handle the maximum number of logs allowed by a CLS (enterprise-plan) instance.
+
+These results are based on a very basic logging pipeline and must be reevaluated as soon as the pipeline setup has been finalized.
+
+Another important factor for the log gateway will be the resource limits configuration. For the tests executed here, no limits were applied.
@@ -0,0 +1,243 @@
+# collect all json files in given directory and convert them to a table line in markdown format
+# Usage: python convert-results.py <directory>
+
+import json
+import os
+from collections import defaultdict
+
+# the input json looks like this:
+# {
+#   "test_name": "No Name",
+#   "test_target": "logs-otel",
+#   "max_pipeline": "false",
+#   "nodes": [
+#     "c1.xlarge",
+#     "c1.xlarge",
+#     "c1.xlarge"
+#   ],
+#   "backpressure_test": "false",
+#   "results": {
+#     "EXPORTED": "7269",
+#     "TYPE": "log",
+#     "QUEUE": "null",
+#     "CPU": "5.7",
+#     "RECEIVED": "7258",
+#     "MEMORY": "210",
+#     "RESTARTS_GATEWAY": "0",
+#     "RESTARTS_GENERATOR": "1"
+#   },
+#   "test_duration": "300"
+# }
+
+
+# templates for table line based on target_type
+templates = {}
+templates['logs-otel'] = (
+    "\n"
+    "| config | logs received | logs exported | logs queued | cpu | memory | no. restarts of gateway | no. restarts of generator "
+    "|"
+    "\n"
+    "| --- | --- | --- | --- | --- | --- | --- "
+    "|"
+    "\n"
+    "| single | {single[results][RECEIVED]} | {single[results][EXPORTED]} | {single[results][QUEUE]} | {single[results][CPU]} | {single[results][MEMORY]} | {single[results][RESTARTS_GATEWAY]} | {single[results][RESTARTS_GENERATOR]} "
+    "|"
+    "\n"
+    "| batch | {batch[results][RECEIVED]} | {batch[results][EXPORTED]} | {batch[results][QUEUE]} | {batch[results][CPU]} | {batch[results][MEMORY]} | {batch[results][RESTARTS_GATEWAY]} | {batch[results][RESTARTS_GENERATOR]} "
+    "|\n"
+)
+templates['logs-fluentbit'] = (
+    "|        Version/Test "
+    "|        Single Pipeline (ci-logs)        |                                          |                                 |                      |               "
+    "|       Multi Pipeline (ci-logs-m)        |                                          |                                 |                      |               "
+    "| Single Pipeline Backpressure (ci-logs-b) |                                          |                                 |                      |               "
+    "| Multi Pipeline Backpressure (ci-logs-mb) |                                          |                                 |                      |               "
+    "|\n"
+    "|--------------------:"
+    "|:---------------------------------------:|:----------------------------------------:|:-------------------------------:|:--------------------:|:-------------:"
+    "|:---------------------------------------:|:----------------------------------------:|:-------------------------------:|:--------------------:|:-------------:"
+    "|:----------------------------------------:|:----------------------------------------:|:-------------------------------:|:--------------------:|:-------------:"
+    "|:----------------------------------------:|:----------------------------------------:|:-------------------------------:|:--------------------:|:-------------:"
+    "|\n"
+    "|                     "
+    "| Input Bytes Processing Rate/sec (KByte) | Output Bytes Processing Rate/sec (KByte) | Filesystem Buffer Usage (KByte) | Pod Memory Usage(MB) | Pod CPU Usage "
+    "| Input Bytes Processing Rate/sec (KByte) | Output Bytes Processing Rate/sec (KByte) | Filesystem Buffer Usage (KByte) | Pod Memory Usage(MB) | Pod CPU Usage "
+    "| Input Bytes Processing Rate/sec (KByte)  | Output Bytes Processing Rate/sec (KByte) | Filesystem Buffer Usage (KByte) | Pod Memory Usage(MB) | Pod CPU Usage "
+    "| Input Bytes Processing Rate/sec (KByte)  | Output Bytes Processing Rate/sec (KByte) | Filesystem Buffer Usage (KByte) | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|\n"
+    "|                     "
+    "| {single[results][RECEIVED]} | {single[results][EXPORTED]} | {single[results][QUEUE]} | {single[results][MEMORY]} | {single[results][CPU]} "
+    "| {multi[results][RECEIVED]} | {multi[results][EXPORTED]} | {multi[results][QUEUE]} | {multi[results][MEMORY]} | {multi[results][CPU]} "
+    "| {bp[results][RECEIVED]} | {bp[results][EXPORTED]} | {bp[results][QUEUE]} | {bp[results][MEMORY]} | {bp[results][CPU]} "
+    "| {multi-bp[results][RECEIVED]} | {multi-bp[results][EXPORTED]} | {multi-bp[results][QUEUE]} | {multi-bp[results][MEMORY]} | {multi-bp[results][CPU]} "
+    "|\n"
+)
+templates['traces'] = (
+    "|       Version/Test "
+    "| Single Pipeline (ci-traces) |                             |                     |                      |               "
+    "| Multi Pipeline (ci-traces-m) |                             |                     |                      |              "
+    "| Single Pipeline Backpressure (ci-traces-b) |                             |                     |                      |               "
+    "| Multi Pipeline Backpressure (ci-traces-mb) |                             |                     |                      |               "
+    "|\n"
+    "|-------------------:"
+    "|:---------------------------:|:---------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|:----------------------------:|:---------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|:------------------------------------------:|:---------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|:------------------------------------------:|:---------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|\n"
+    "|                    "
+    "| Receiver Accepted Spans/sec | Exporter Exported Spans/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "| Receiver Accepted Spans/sec  | Exporter Exported Spans/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|        Receiver Accepted Spans/sec         | Exporter Exported Spans/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|        Receiver Accepted Spans/sec         | Exporter Exported Spans/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|\n"
+    "|                    "
+    "| {single[results][RECEIVED]} | {single[results][EXPORTED]} | {single[results][QUEUE]} | {single[results][MEMORY]} | {single[results][CPU]} "
+    "| {multi[results][RECEIVED]} | {multi[results][EXPORTED]} | {multi[results][QUEUE]} | {multi[results][MEMORY]} | {multi[results][CPU]} "
+    "| {bp[results][RECEIVED]} | {bp[results][EXPORTED]} | {bp[results][QUEUE]} | {bp[results][MEMORY]} | {bp[results][CPU]} "
+    "| {multi-bp[results][RECEIVED]} | {multi-bp[results][EXPORTED]} | {multi-bp[results][QUEUE]} | {multi-bp[results][MEMORY]} | {multi-bp[results][CPU]} "
+    "|\n"
+)
+templates['metrics'] = (
+    "|       Version/Test "
+    "| Single Pipeline (ci-metrics) |                              |                     |                      |               "
+    "| Multi Pipeline (ci-metrics-m) |                              |                     |                      |               "
+    "| Single Pipeline Backpressure (ci-metrics-b) |                              |                     |                      |               "
+    "| Multi Pipeline Backpressure (ci-metrics-mb) |                              |                     |                      |               "
+    "|\n"
+    "|-------------------:"
+    "|:----------------------------:|:----------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|:-----------------------------:|:----------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|:-------------------------------------------:|:----------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|:-------------------------------------------:|:----------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|\n"
+    "|                    "
+    "| Receiver Accepted Metric/sec | Exporter Exported Metric/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "| Receiver Accepted Metric/sec  | Exporter Exported Metric/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|        Receiver Accepted Metric/sec         | Exporter Exported Metric/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|        Receiver Accepted Metric/sec         | Exporter Exported Metric/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|\n"
+    "|                    "
+    "| {single[results][RECEIVED]} | {single[results][EXPORTED]} | {single[results][QUEUE]} | {single[results][MEMORY]} | {single[results][CPU]} "
+    "| {multi[results][RECEIVED]} | {multi[results][EXPORTED]} | {multi[results][QUEUE]} | {multi[results][MEMORY]} | {multi[results][CPU]} "
+    "| {bp[results][RECEIVED]} | {bp[results][EXPORTED]} | {bp[results][QUEUE]} | {bp[results][MEMORY]} | {bp[results][CPU]} "
+    "| {multi-bp[results][RECEIVED]} | {multi-bp[results][EXPORTED]} | {multi-bp[results][QUEUE]} | {multi-bp[results][MEMORY]} | {multi-bp[results][CPU]} "
+    "|\n"
+)
+templates['self-monitor'] = (
+    "| Version/Test "
+    "| Default (ci-self-monitor) |                      |                        |                                  |                      |               "
+    "|\n"
+    "|-------------:"
+    "|:-------------------------:|:--------------------:|:----------------------:|:--------------------------------:|:--------------------:|:-------------:"
+    "|\n"
+    "|              "
+    "|    Scrape Samples/sec     | Total Series Created | WAL Storage Size/bytes | Head Chunk Storage Size in bytes | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|\n"
+    "|              "
+    "| {single[results][SCRAPESAMPLES]} | {single[results][SERIESCREATED]} | {single[results][WALSTORAGESIZE]} | {single[results][HEADSTORAGESIZE]} | {single[results][MEMORY]} |{single[results][CPU]}"
+    "|\n"
+)
+templates['metricagent'] = (
+    "|       Version/Test "
+    "| Single Pipeline (ci-metric-ag) |                              |                     |                      |               "
+    "| Single Pipeline Backpressure (ci-metric-ag-b) |                              |                     |                      |               "
+    "|\n"
+    "|-------------------:"
+    "|:------------------------------:|:----------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|:---------------------------------------------:|:----------------------------:|:-------------------:|:--------------------:|:-------------:"
+    "|\n"
+    "|                    "
+    "|  Receiver Accepted Metric/sec  | Exporter Exported Metric/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|         Receiver Accepted Metric/sec          | Exporter Exported Metric/sec | Exporter Queue Size | Pod Memory Usage(MB) | Pod CPU Usage "
+    "|\n"
+    "|                    "
+    "| {single[results][RECEIVED]} | {single[results][EXPORTED]} | {single[results][QUEUE]} | {single[results][MEMORY]} | {single[results][CPU]} "
+    "| {bp[results][RECEIVED]} | {bp[results][EXPORTED]} | {bp[results][QUEUE]} | {bp[results][MEMORY]} | {bp[results][CPU]} "
+    "|\n"
+)
+
+
+# load all individual json files from the directories and combine them into a single dictionary
+# the result looks like this:
+# {
+#   "metrics": { << test kind (metrics, selfmonitor, metricagent, etc.)
+#     "single": {  << test type (single, multi, bp, multi-bp, etc.)
+#       "test_name": "metrics",
+#       "test_target": "metrics",
+#       "max_pipeline": "false",
+#       "nodes": [
+#         "n1-standard-4",
+#         "n1-standard-4"
+#       ],
+#       "backpressure_test": "false",
+#       "results": {
+#         "EXPORTED": "4477",
+#         "RESTARTS_GATEWAY": "0",
+#         "CPU": "1.5",
+#         "RECEIVED": "4476",
+#         "QUEUE": "0",
+#         "TYPE": "metric",
+#         "MEMORY": "247"
+#       },
+#       "test_duration": "1200",
+#       "overlay": "",
+#       "mode": "single"
+#     }
+#   }
+# }
+def load_results(directories):
+    results = defaultdict(dict)
+    for directory in directories:
+        for filename in os.listdir(directory):
+            if filename.endswith(".json"):
+                filenamePath = os.path.join(directory, filename)
+                with open(filenamePath, mode='r') as f:
+                    data = json.load(f)
+                    print(data)
+                    # calculate a new key by combining test_target, max_pipeline and backpressure_test
+                    key = data['test_target']
+                    test_key = []
+                    if data['max_pipeline'] == 'true':
+                        test_key.append('multi')
+                    if data['backpressure_test'] == 'true':
+                        test_key.append('bp')
+                    if data['overlay'] != "":
+                        test_key.append(data['overlay'])
+                    if len(test_key) == 0:
+                        test_key.append('single')
+                    new_data = defaultdict(str, data)
+                    new_data['results'] = defaultdict(str, data['results'])
+                    new_data['mode'] = '-'.join(test_key)
+                    results[key]['-'.join(test_key)] = new_data
+    return results
+
+def print_results(results):
+    # iterate over all test_targets
+    for test_target, test_run in results.items():
+        template = templates[test_target]
+        try:
+            # print the template with the data from the results
+            print(template.format_map(results[test_target]))
+        except KeyError as e:
+            print("Template {} requires data for entry {}".format( test_target, e))
+
+
+
+# main
+if __name__ == '__main__':
+    import sys
+
+    if len(sys.argv) < 2:
+        print("Usage: python convert-results.py <directories>")
+        sys.exit(1)
+
+    # get all arguments
+    directories = sys.argv[1:]
+    results = load_results(directories)
+
+    # debug print results
+    print(json.dumps(results, indent=2))
+
+    # print the results in markdown format
+    print_results(results)