Instacollector Kafka 1.0.0

instaclustr · Nov 14, 2024 · 1451f08 · 1451f08
1 parent b871bf2
commit 1451f08
Show file tree

Hide file tree

Showing 3 changed files with 544 additions and 0 deletions.
diff --git a/kafka/README.md b/kafka/README.md
@@ -0,0 +1,55 @@
+
+This tool is used to collect information from a Kafka cluster to add in problem diagnosis or review.
+
+# Design info:
+There are two scripts used in instacollector tool. The `node_collector.sh` is supposed to be executed on each Kafka node.
+The `cluster_collector.sh` can be executed on a machine connected to Kafka cluster e.g. user laptop or Jumpbox having connectivity
+with Cassandra cluster.
+
+The node_collector.sh executes Linux and Kafka commands and copies configuration and log files required for cluster health check.
+The cluster_collector.sh executes node_collector.sh on each Kafka node using ssh. The cluster_collector.sh accepts 4 user inputs - 
+
+Enter your kafka environment (vm/docker) :
+Enter username for login on Kafka cluster nodes (Press Enter for default admin) :
+Enter Identity file path: <the identify file in your local machine which is used to connect to the VMs>
+Enter path of the command config file: <kafka command-config file location on the kafka brokers>
+Enter file containing ip addresses/host/container names of Kafka cluster nodes: <the hosts file in your local machine>
+
+
+# Execution settings:
+The cluster_collector.sh has setting of connecting to cluster nodes using key file or id file.
+If the ssh key has passphrase enabled then please use ssh-agent and ssh-add commands to add the passphrase before running cluster_collector.sh.
+If there is another method required for `ssh`, user is requested to change the script as applicable.
+Alternatively, the node_collector.sh can also be executed on individual nodes if cluster_collector.sh is not useful in any case.
+
+The Kafka configuration file locations, data directory location and other settings are used as per Apache Kafka default setup.
+User is requested to change those in node_collector.sh if other values are required. Below are the Kafka & Zookeeper related files 
+which are copied from different nodes.
+
+Kafka Broker Files
+*******************
+server.properties
+server.log
+kafkaServer.out
+kafka-authorizer.log
+controller.log
+state-change.log
+kafka_server_jaas.conf
+kafka-topics/.sh
+kafka-topics/.sh
+kafka-broker-api-versions/.sh
+kafka-consumer-groups/.sh 
+
+Zookeeper Files
+****************
+zookeeper.properties
+zoo.cfg
+log4j.properties
+zoo.log
+zookeeper_jaas.conf
+zookeeper.out
+
+
+**Note:** The scripts should be executed on bash shell.
+
+Please see https://www.instaclustr.com/support/documentation/announcements/instaclustr-open-source-project-status/ for Instaclustr support status of this project
diff --git a/kafka/cluster_collector.sh b/kafka/cluster_collector.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+
+##********************************************************************************************************************
+##********************************************************************************************************************
+## The purpose of this tool is to extract kafka & zookeeper related configuration and log files for troubleshooting. 
+## Following are the list of files that are extracted. Please note that not all files exists in an environment. 
+## All properties with the word "password" in it are replaced with "***" 
+#=============================================================#
+# kafka files and the path variables where they are expected
+# BROKER_CONFIG_PATH
+#   server.properties
+# BROKER_LOG_PATH
+#   server.log
+#   kafkaServer.out
+#   kafka-authorizer.log
+#   controller.log
+#   state-change.log
+# BROKER_JAAS_CONFIG
+#   kafka_server_jaas.conf
+# ZOOKEEPER_CONFIG
+#   zookeeper.properties
+#   zoo.cfg
+#   log4j.properties
+# ZOOKEEPER_LOG_PATH
+#   zoo.log
+# ZOOKEEPER_JAAS_CONFIG
+#   zookeeper_jaas.conf
+# ZOOKEEPER_LOG_PATH
+#   zookeeper.out
+# BROKER_BIN_PATH
+#   kafka-topics/.sh
+#   kafka-topics/.sh
+#   kafka-broker-api-versions/.sh
+#   kafka-consumer-groups/.sh 
+#=============================================================#      
+##
+## In addition to the files above the script also extract the following OS related information - 
+## 1. file system & directory size 
+## 2. io stats
+## 3. file descriptors
+## 4. cpu & memory 
+## 5. contents of the hosts file 
+## 6. output of kafka-topics.sh topic describe 
+## 
+##********************************************************************************************************************
+##********************************************************************************************************************
+## Last Modification Date : 10/29/2021
+## Description		 : Script functionality enhanced to add information related to iostat, df, file descriptor
+##                          cpu & memory info 
+##********************************************************************************************************************
+##********************************************************************************************************************
+
+clear
+
+#GLOBAL VARIABLES
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+INFO_DIR=/tmp/InstaCollection_$(date +%Y%m%d%H%M)
+
+#Collect environment info (VM/docker)
+read -p "Enter your kafka environment (vm/docker) :" kenv
+
+if [[ "${kenv}" == "vm" ]]; then
+  #Collect user info.
+  read -p "Enter username for login on Kafka cluster nodes (Press Enter for default admin) :" user
+  [ -z "${user}" ] && user='admin'
+
+  #user='rahulchakrabarty'
+
+  read -p "Enter Identity file path:" id_file
+  if [[ ! -f ${id_file} || ! -s ${id_file} ]]; then
+      echo "$id_file File not found!" 
+      exit 1
+  fi
+
+  #id_file='/Users/rahulchakrabarty-instaclustr/.ssh/rahulchakrabarty-instaclustr'
+elif [[ "${kenv}" == "docker" ]]; then
+  read -p "Enter docker home directory :" docker_home
+
+  if [ -z "$docker_home" ]; then
+    echo "Docker home directory cannot be empty"
+    exit 1
+  fi
+else
+  echo "Invalid value for environment"
+  exit 1
+fi
+
+read -p "Enter path of the command config file:" config_file
+
+read -p "Enter file containing ip addresses/host/container names of Kafka cluster nodes:" peers_file
+if [[ ! -f ${peers_file} || ! -s ${peers_file} ]]; then
+    echo "$peers_file File not found!"
+    exit 1
+fi
+
+#peers_file='./hosts'
+
+echo "environment $kenv"
+
+#Execute the node_collector on each node or container
+if [ "$kenv" == "vm" ]; then
+  while read peer 
+  do 
+          if [[ -z "$peer" ]]; then
+            break
+          fi
+          ssh -i $id_file $user@$peer "bash -s" < node_collector.sh $peer $config_file &
+  done < "$peers_file"
+else
+  while read peer
+  do
+      if [[ -z "$peer" ]]; then
+        break
+      fi
+      echo "Copying file node_collector.sh to container" 
+      docker cp ./node_collector.sh $peer:$docker_home/
+      docker exec $peer /bin/bash -c "sh $docker_home/node_collector.sh $peer $config_file" &
+  done < "$peers_file"
+fi
+
+#waiting for all node_collectors to complete
+wait
+
+mkdir $INFO_DIR
+
+#copy the data from each node/container
+
+if [ "$kenv" == "vm" ]; then
+  while read peer 
+  do 
+      if [[ -z "$peer" ]]; then
+        break
+      fi
+      mkdir $INFO_DIR/$peer
+      scp -i $id_file $user@$peer:/tmp/InstaCollection.tar.gz $INFO_DIR/$peer/InstaCollection_$peer.tar.gz &
+
+  done < "$peers_file"
+else
+  while read peer
+  do
+      if [[ -z "$peer" ]]; then
+        break
+      fi
+      mkdir $INFO_DIR/$peer
+      docker cp $peer:/tmp/InstaCollection.tar.gz $INFO_DIR/$peer/InstaCollection_$peer.tar.gz & 
+
+  done < "$peers_file"
+
+fi
+
+#waiting for all scp to complete
+wait
+
+#compress the info directory 
+result_file=/tmp/InstaCollection_$(date +%Y%m%d%H%M).tar.gz
+tar -zcf $result_file -C $INFO_DIR .
+rm -r $INFO_DIR
+
+echo "Process complete. File generated : " $result_file