From e0cbd84323d5915b8322ccdd963bfcd394f61ec9 Mon Sep 17 00:00:00 2001 From: ada mancini Date: Tue, 29 Oct 2024 15:45:06 -0400 Subject: [PATCH 1/4] automate a tcpdump process --- scripts/tcpdump.sh | 133 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 scripts/tcpdump.sh diff --git a/scripts/tcpdump.sh b/scripts/tcpdump.sh new file mode 100644 index 0000000..21dca9c --- /dev/null +++ b/scripts/tcpdump.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# +# This script collects pcap files from a Kubernetes host, a Flannel or Calico VXLAN interface, and a pod. +# +# This script must be run as root. +# +# Provide the namespace and name of a pod to collect from. The script will inspect `ip route` to determine the interface name of a given Kubernetes pod and start a tcpdump process attached to that interface. Then, another tcpdump process will be attached to either the Flannel or the Calico VXLAN interface, and finally, a tcpdump process will be attached to the primary interface of the host, filtering for VXLAN traffic. +# +# Usage: +# ./tcpdump.sh [time frame] +# namespace: string, required +# pod-name: string, required +# time frame: date/time string, optional; defaults to '5m'. Example: 100s, 5m, 1h. + +# depends on kubectl, jq, tcpdump, timeout, bash, iproute2, grep, awk, crictl + +# TODO: error handling + +readonly VXLAN_FLAGS="-lttttnnvv" + +namespace=$1 +pod=$2 +timeframe="5m" + +if [[ -z $namespace || -z $pod ]]; then + echo "Usage: $0 " + exit 1 +fi + +if [[ -n $3 ]]; then + timeframe="$3" +fi + +set -euo pipefail + +# if not root, then exit +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + exit 1 +fi + +# check for required binaries +for binary in kubectl jq tcpdump; do + if ! command -v "$binary" &> /dev/null; then + echo "Could not find $binary" + exit 1 + fi +done + + +function getCalicoInterface () { + pod_name="$1" + pod_namespace="$2" + pod_ip=$(kubectl get pod "$pod_name" -n "$pod_namespace" -o json | jq -r '.status.podIP') + # Get the interface name from routing table + pod_interface=$(ip route | grep "$pod_ip" | awk '{print $3}') + echo "$pod_interface" +} + +function getFlannelInterface () { + pod_name="$1" + pod_namespace="$2" + + # check for crictl binary + if ! command -v crictl &> /dev/null; then + echo "Could not find crictl; please install it" + exit 1 + fi + + set -x + # Figure out the pod's container PID + container_id="$(crictl ps | grep "$pod_name" | awk 'NR==1{print $1}')" + echo -n "Found container ID $container_id" + pid="$(crictl inspect "$container_id" | jq .info.pid)" + echo -n "Found PID $pid" + + # link to /var/run/netns so we can use ip netns easily + mkdir -p /var/run/netns + ln -sf "/proc/$pid/ns/net" "/var/run/netns/$pod_name" + + # Get the interface index of the container's eth0: + local index + index=$(ip netns exec "$pod_name" ip link show type veth) + + ils="${index%%:*}" + + + # Clean up the netns symlink, since we don't need it anymore + rm -f "/var/run/netns/${1}" +} + +# Figure out if we're using Flannel or Calico VXLAN +if ip link show | grep flannel > /dev/null; then + vxlan_interface=cni0 + vxlan_port=8472 + cni=flannel +elif ip link show | grep cali > /dev/null; then + vxlan_interface=vxlan.calico + vxlan_port=4789 + cni=calico +else + echo "Could not determine VXLAN interface on host" + exit 1 +fi + +if [[ $cni == "calico" ]]; then + pod_interface=$(getCalicoInterface "$pod" "$namespace") +elif [[ $cni == "flannel" ]]; then + pod_interface=$(getFlannelInterface "$pod" "$namespace") +else + echo "Could not determine pod interface from namespace $namespace and pod $pod" + exit 1 +fi + +# Collect tcpdump from the pod's interface +echo "Collecting tcpdump from pod $pod on interface $pod_interface" +timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$pod_interface" -w "$(hostname)-$pod-pod".pcap & + +# Collect tcpdump from the VXLAN interface +echo "Collecting tcpdump from VXLAN interface $vxlan_interface on port $vxlan_port" +timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$vxlan_interface" -T vxlan port "$vxlan_port" -w "$(hostname)-$vxlan_interface".pcap & + +# Figure out the host's primary interface +host_interface=$(ip route | grep '^default' | awk '{print $5}') +echo "Collecting tcpdump from host interface $host_interface" + +# Collect tcpdump from the host's primary interface +timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$host_interface" -T vxlan port "$vxlan_port" -w "$(hostname)-$host_interface".pcap & + +# Wait for all the tcpdump processes to finish +wait < <(jobs -p) + +echo "Done collecting tcpdump files" From a6e7801bd4a6f7ee130dc1a09e9ee4b04d407060 Mon Sep 17 00:00:00 2001 From: ada mancini Date: Tue, 29 Oct 2024 16:13:58 -0400 Subject: [PATCH 2/4] figure out flannel interface --- scripts/tcpdump.sh | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scripts/tcpdump.sh b/scripts/tcpdump.sh index 21dca9c..3b3f7f4 100644 --- a/scripts/tcpdump.sh +++ b/scripts/tcpdump.sh @@ -67,26 +67,25 @@ function getFlannelInterface () { exit 1 fi - set -x # Figure out the pod's container PID container_id="$(crictl ps | grep "$pod_name" | awk 'NR==1{print $1}')" - echo -n "Found container ID $container_id" pid="$(crictl inspect "$container_id" | jq .info.pid)" - echo -n "Found PID $pid" # link to /var/run/netns so we can use ip netns easily mkdir -p /var/run/netns ln -sf "/proc/$pid/ns/net" "/var/run/netns/$pod_name" - # Get the interface index of the container's eth0: - local index - index=$(ip netns exec "$pod_name" ip link show type veth) - - ils="${index%%:*}" - + # Get the interface index of the container's eth0. + # c_index is the index of the container's eth0, which should be in the form of eth0.if${h_index} + # h_index is the index of the corresponding host veth interface from `ip link show type veth` + local c_index h_index + c_index=$(ip netns exec "$pod_name" ip link show type veth | head -n1 | awk '{print $2}' | sed 's/.*@if//') + h_index=$(ip link show type veth | grep -E "^${c_index}" | awk '{print $2}' | sed 's/@.*//') # Clean up the netns symlink, since we don't need it anymore rm -f "/var/run/netns/${1}" + + echo "$h_index" } # Figure out if we're using Flannel or Calico VXLAN From 15e0ef626626dd07bd853d5d1925031b67193423 Mon Sep 17 00:00:00 2001 From: ada mancini Date: Wed, 30 Oct 2024 12:03:24 -0400 Subject: [PATCH 3/4] capture cni0 when using flannel, too --- scripts/tcpdump.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/tcpdump.sh b/scripts/tcpdump.sh index 3b3f7f4..0894706 100644 --- a/scripts/tcpdump.sh +++ b/scripts/tcpdump.sh @@ -20,7 +20,7 @@ readonly VXLAN_FLAGS="-lttttnnvv" namespace=$1 pod=$2 -timeframe="5m" +timeframe="1m" if [[ -z $namespace || -z $pod ]]; then echo "Usage: $0 " @@ -90,7 +90,7 @@ function getFlannelInterface () { # Figure out if we're using Flannel or Calico VXLAN if ip link show | grep flannel > /dev/null; then - vxlan_interface=cni0 + vxlan_interface=flannel.1 vxlan_port=8472 cni=flannel elif ip link show | grep cali > /dev/null; then @@ -117,7 +117,13 @@ timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$pod_interface" -w "$(hostname)- # Collect tcpdump from the VXLAN interface echo "Collecting tcpdump from VXLAN interface $vxlan_interface on port $vxlan_port" -timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$vxlan_interface" -T vxlan port "$vxlan_port" -w "$(hostname)-$vxlan_interface".pcap & +timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$vxlan_interface" -w "$(hostname)-$vxlan_interface".pcap & + +if [[ $cni == "flannel" ]]; then + # Collect tcpdump also from the cni0 bridge + echo "Collecting tcpdump from cni0 bridge" + timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "cni0" -w "$(hostname)-cni0".pcap & +fi # Figure out the host's primary interface host_interface=$(ip route | grep '^default' | awk '{print $5}') From 66c9f895057962ff0037864e960037a1fb3a6eb8 Mon Sep 17 00:00:00 2001 From: ada mancini Date: Wed, 30 Oct 2024 14:58:25 -0400 Subject: [PATCH 4/4] check for all deps and print better usage dialog --- scripts/tcpdump.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/tcpdump.sh b/scripts/tcpdump.sh index 0894706..5948c04 100644 --- a/scripts/tcpdump.sh +++ b/scripts/tcpdump.sh @@ -20,10 +20,15 @@ readonly VXLAN_FLAGS="-lttttnnvv" namespace=$1 pod=$2 -timeframe="1m" +timeframe="5m" if [[ -z $namespace || -z $pod ]]; then - echo "Usage: $0 " + echo "Usage: ./tcpdump.sh [time frame]" + echo " namespace: string, required" + echo " pod-name: string, required" + echo " time frame: integer with optional suffix 's', 'm', 'h', or 'd'; defaults to '5m'" + echo " Example Usage: ./tcpdump.sh default coredns-coredns-74ff55c5d 300s" + echo " " exit 1 fi @@ -40,7 +45,7 @@ if [[ $EUID -ne 0 ]]; then fi # check for required binaries -for binary in kubectl jq tcpdump; do +for binary in kubectl jq tcpdump ip timeout grep awk; do if ! command -v "$binary" &> /dev/null; then echo "Could not find $binary" exit 1