Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Baremetal #225

Closed
wants to merge 11 commits into from
Closed
442 changes: 427 additions & 15 deletions workloads/kube-burner/common.sh

Large diffs are not rendered by default.

71 changes: 71 additions & 0 deletions workloads/kube-burner/deployment-sampleapp.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${APP_NAME}
namespace: ${PROJECT}
spec:
replicas: ${REPLICAS}
selector:
matchLabels:
app: sample
template:
metadata:
labels:
app: sample
spec:
containers:
- name: app
image: quay.io/smalleni/sampleapp:latest
readinessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 3
ports:
- containerPort: 8080
protocol: TCP
resources:
requests:
cpu: "1"
limits:
cpu: "1"
nodeSelector:
node-role.kubernetes.io/custom: ${NODE_SELECTOR_VALUE}


---
apiVersion: v1
kind: Service
metadata:
name: samplesvc
namespace: ${PROJECT}
spec:
selector:
app: sample
ports:
- port: 80
targetPort: 8080

---
kind: NetworkPolicy
apiVersion: networking.k8s.io/v1
metadata:
name: except
namespace: ${PROJECT}
spec:
podSelector:
matchLabels:
app: ${APP_NAME}
ingress:
- from:
- ipBlock:
cidr: 10.128.0.0/14
except:
- "10.130.36.0/23"
- "10.130.12.0/23"
- "10.128.18.0/23"
- "10.131.10.0/23"
- "10.131.22.0/23"
- "10.128.24.0/23"
- "10.128.14.0/23"
3 changes: 3 additions & 0 deletions workloads/kube-burner/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ export POD_WAIT=${POD_WAIT:-false}
export WAIT_FOR=${WAIT_FOR:-[]}
export VERIFY_OBJECTS=${VERIFY_OBJECTS:-true}
export ERROR_ON_VERIFY=${ERROR_ON_VERIFY:-true}
export CUSTOM_NAME=
export CUSTOM_VALUE=
export CUSTOM_LABEL=

# Remote configuration
export REMOTE_METRIC_PROFILE=${REMOTE_METRIC_PROFILE}
Expand Down
24 changes: 24 additions & 0 deletions workloads/kube-burner/mb_pod.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v1
kind: Pod
metadata:
name: mb-pod
spec:
containers:
- name: mb-container
image: "quay.io/mukrishn/snomb:2"
command: [ "/bin/sh", "-c", "mb -i /etc/config/requests.json -d 86400" ] # -d in seconds
volumeMounts:
- name: config-volume
mountPath: /etc/config
resources:
limits:
memory: '16Gi'
cpu: 16
requests:
memory: '16Gi'
cpu: 16
volumes:
- name: config-volume
configMap:
name: request-configmap
restartPolicy: Never
13 changes: 13 additions & 0 deletions workloads/kube-burner/mcp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---

apiVersion: machineconfiguration.openshift.io/v1
kind: MachineConfigPool
metadata:
name: ${CUSTOM_NAME}
spec:
machineConfigSelector:
matchExpressions:
- {key: machineconfiguration.openshift.io/role, operator: In, values: ["${CUSTOM_VALUE}"]}
nodeSelector:
matchLabels:
node-role.kubernetes.io/custom: ${CUSTOM_LABEL}
13 changes: 13 additions & 0 deletions workloads/kube-burner/run_upgrade_fromgit_baremetal.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

. common.sh

export TOTAL_MCPS=${TOTAL_MCPS:-} # will skip if CREATE_MCPS_BOOL is set to false!
export MCP_NODE_COUNT=${MCP_NODE_COUNT:-} # will skip if CREATE_MCPS_BOOL is set to false!
export CREATE_MCPS_BOOL=true # true or false

baremetal_upgrade_auxiliary
exit 1



106 changes: 80 additions & 26 deletions workloads/network-perf/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export_defaults() {
operator_repo=${OPERATOR_REPO:=https://github.com/cloud-bulldozer/benchmark-operator.git}
operator_branch=${OPERATOR_BRANCH:=master}
CRD=${CRD:-ripsaw-uperf-crd.yaml}
export cr_name=${BENCHMARK:=benchmark}
export _es=${ES_SERVER:-https://search-perfscale-dev-chmf5l4sh66lvxbnadi4bznl3a.us-west-2.es.amazonaws.com:443}
_es_baseline=${ES_SERVER_BASELINE:-https://search-perfscale-dev-chmf5l4sh66lvxbnadi4bznl3a.us-west-2.es.amazonaws.com:443}
export _metadata_collection=${METADATA_COLLECTION:=true}
Expand All @@ -43,11 +44,41 @@ export_defaults() {
export pin=true
export networkpolicy=${NETWORK_POLICY:=false}
export multi_az=${MULTI_AZ:=true}
export baremetalCheck=$(oc get infrastructure cluster -o json | jq .spec.platformSpec.type)
zones=($(oc get nodes -l node-role.kubernetes.io/workload!=,node-role.kubernetes.io/worker -o go-template='{{ range .items }}{{ index .metadata.labels "topology.kubernetes.io/zone" }}{{ "\n" }}{{ end }}' | uniq))
platform=$(oc get infrastructure cluster -o jsonpath='{.status.platformStatus.type}' | tr '[:upper:]' '[:lower:]')
log "Platform is found to be : ${platform} "
# If multi_az we use one node from the two first AZs
if [[ ${platform} == "vsphere" ]]; then

#Check to see if the infrastructure type is baremetal to adjust script as necessary
if [[ "${baremetalCheck}" == '"BareMetal"' ]]; then
log "BareMetal infastructure: setting isBareMetal accordingly"
export isBareMetal=true
else
export isBareMetal=false
fi

#If using baremetal we use different query to find worker nodes
if [[ "${isBareMetal}" == "true" ]]; then
log "Colocating uperf pods for baremetal"
nodeCount=$(oc get nodes --no-headers -l node-role.kubernetes.io/worker | wc -l)
if [[ ${nodeCount} -ge 2 ]]; then
serverNumber=$(( $RANDOM %${nodeCount} + 1 ))
clientNumber=$(( $RANDOM %${nodeCount} + 1 ))
while (( $serverNumber == $clientNumber ))
do
clientNumber=$(( $RANDOM %${nodeCount} + 1 ))
done
export server=$(oc get nodes --no-headers -l node-role.kubernetes.io/worker | awk 'NR=='${serverNumber}'{print $1}')
export client=$(oc get nodes --no-headers -l node-role.kubernetes.io/worker | awk 'NR=='${clientNumber}'{print $1}')
else
log "At least 2 worker nodes are required"
exit 1
fi
log "Finished assigning server and client nodes"
log "Server to be scheduled on node: $server"
log "Client to be scheduled on node: $client"
# If multi_az we use one node from the two first AZs
elif [[ ${platform} == "vsphere" ]]; then
nodes=($(oc get nodes -l node-role.kubernetes.io/worker,node-role.kubernetes.io/workload!="",node-role.kubernetes.io/infra!="" -o jsonpath='{range .items[*]}{ .metadata.labels.kubernetes\.io/hostname}{"\n"}{end}'))
if [[ ${#nodes[@]} -lt 2 ]]; then
log "At least 2 worker nodes placed are required"
Expand Down Expand Up @@ -83,9 +114,13 @@ export_defaults() {
export serviceip=false
elif [ ${WORKLOAD} == "service" ]
then
export _metadata_targeted=false
export hostnetwork=false
export serviceip=true
if [[ "${isBareMetal}" == "true" ]]; then
export _metadata_targeted=true
else
export _metadata_targeted=false
fi
else
export hostnetwork=false
export serviceip=false
Expand Down Expand Up @@ -131,17 +166,21 @@ export_defaults() {
}

deploy_operator() {
log "Removing benchmark-operator namespace, if it already exists"
oc delete namespace benchmark-operator --ignore-not-found
log "Cloning benchmark-operator from branch ${operator_branch} of ${operator_repo}"
rm -rf benchmark-operator
git clone --single-branch --branch ${operator_branch} ${operator_repo} --depth 1
(cd benchmark-operator && make deploy)
kubectl apply -f benchmark-operator/resources/backpack_role.yaml
oc wait --for=condition=available "deployment/benchmark-controller-manager" -n benchmark-operator --timeout=300s
oc adm policy -n benchmark-operator add-scc-to-user privileged -z benchmark-operator
oc adm policy -n benchmark-operator add-scc-to-user privileged -z backpack-view
oc patch scc restricted --type=merge -p '{"allowHostNetwork": true}'
if [[ "${isBareMetal}" == "false" ]]; then
log "Removing benchmark-operator namespace, if it already exists"
oc delete namespace benchmark-operator --ignore-not-found
log "Cloning benchmark-operator from branch ${operator_branch} of ${operator_repo}"
else
log "Baremetal infrastructure: Keeping benchmark-operator namespace"
log "Cloning benchmark-operator from branch ${operator_branch} of ${operator_repo}"
fi
rm -rf benchmark-operator
git clone --single-branch --branch ${operator_branch} ${operator_repo} --depth 1
(cd benchmark-operator && make deploy)
oc wait --for=condition=available "deployment/benchmark-controller-manager" -n benchmark-operator --timeout=300s
oc adm policy -n benchmark-operator add-scc-to-user privileged -z benchmark-operator
oc adm policy -n benchmark-operator add-scc-to-user privileged -z backpack-view
oc patch scc restricted --type=merge -p '{"allowHostNetwork": true}'
}

deploy_workload() {
Expand All @@ -152,7 +191,8 @@ deploy_workload() {
}

check_logs_for_errors() {
client_pod=$(oc get pods -n benchmark-operator --no-headers | awk '{print $1}' | grep uperf-client | awk 'NR==1{print $1}')
uuid=$(oc describe -n benchmark-operator benchmarks/uperf-${cr_name}-${WORKLOAD}-network-${pairs} | grep Suuid | awk '{print $2}')
client_pod=$(oc get pods -n benchmark-operator --no-headers | awk '{print $1}' | grep $uuid | grep uperf-client | awk 'NR==1{print $1}')
if [ ! -z "$client_pod" ]; then
num_critical=$(oc logs ${client_pod} -n benchmark-operator | grep CRITICAL | wc -l)
if [ $num_critical -gt 3 ] ; then
Expand All @@ -173,7 +213,11 @@ wait_for_benchmark() {
log "Cerberus status is False, Cluster is unhealthy"
exit 1
fi
oc describe -n benchmark-operator benchmarks/uperf-benchmark-${WORKLOAD}-network-${pairs} | grep State | grep Complete
if [ "${benchmark_state}" == "Failed" ]; then
log "Benchmark state is Failed, exiting"
exit 1
fi
oc describe -n benchmark-operator benchmarks/uperf-${cr_name}-${WORKLOAD}-network-${pairs} | grep State | grep Complete
if [ $? -eq 0 ]; then
log "uperf workload done!"
uperf_state=$?
Expand Down Expand Up @@ -223,34 +267,43 @@ assign_uuid() {
}

run_benchmark_comparison() {
log "Begining benchamrk comparison"
../../utils/touchstone-compare/run_compare.sh uperf ${baseline_uperf_uuid} ${compare_uperf_uuid} ${pairs}
pairs_array=( "${pairs_array[@]}" "compare_output_${pairs}.yaml" )
log "Finished benchmark comparison"
}

generate_csv() {
log "Generating CSV"
python3 csv_gen.py --files $(echo "${pairs_array[@]}") --latency_tolerance=$latency_tolerance --throughput_tolerance=$throughput_tolerance
log "Finished generating CSV"
}

init_cleanup() {
log "Cloning benchmark-operator from branch ${operator_branch} of ${operator_repo}"
rm -rf /tmp/benchmark-operator
git clone --single-branch --branch ${operator_branch} ${operator_repo} /tmp/benchmark-operator --depth 1
oc delete -f /tmp/benchmark-operator/deploy
oc delete -f /tmp/benchmark-operator/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml
oc delete -f /tmp/benchmark-operator/resources/operator.yaml
if [[ "${isBareMetal}" == "false" ]]; then
log "Cloning benchmark-operator from branch ${operator_branch} of ${operator_repo}"
rm -rf /tmp/benchmark-operator
git clone --single-branch --branch ${operator_branch} ${operator_repo} /tmp/benchmark-operator --depth 1
oc delete -f /tmp/benchmark-operator/deploy
oc delete -f /tmp/benchmark-operator/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml
oc delete -f /tmp/benchmark-operator/resources/operator.yaml
else
log "BareMetal Infrastructure: Skipping cleanup"
fi
}

delete_benchmark() {
oc delete benchmarks.ripsaw.cloudbulldozer.io/uperf-benchmark-${WORKLOAD}-network-${pairs} -n benchmark-operator
oc delete benchmarks.ripsaw.cloudbulldozer.io/uperf-${cr_name}-${WORKLOAD}-network-${pairs} -n benchmark-operator
}

update() {
benchmark_state=$(oc get benchmarks.ripsaw.cloudbulldozer.io/uperf-benchmark-${WORKLOAD}-network-${pairs} -n benchmark-operator -o jsonpath='{.status.state}')
benchmark_uuid=$(oc get benchmarks.ripsaw.cloudbulldozer.io/uperf-benchmark-${WORKLOAD}-network-${pairs} -n benchmark-operator -o jsonpath='{.status.uuid}')
benchmark_current_pair=$(oc get benchmarks.ripsaw.cloudbulldozer.io/uperf-benchmark-${WORKLOAD}-network-${pairs} -n benchmark-operator -o jsonpath='{.spec.workload.args.pair}')
benchmark_state=$(oc get benchmarks.ripsaw.cloudbulldozer.io/uperf-${cr_name}-${WORKLOAD}-network-${pairs} -n benchmark-operator -o jsonpath='{.status.state}')
benchmark_uuid=$(oc get benchmarks.ripsaw.cloudbulldozer.io/uperf-${cr_name}-${WORKLOAD}-network-${pairs} -n benchmark-operator -o jsonpath='{.status.uuid}')
benchmark_current_pair=$(oc get benchmarks.ripsaw.cloudbulldozer.io/uperf-${cr_name}-${WORKLOAD}-network-${pairs} -n benchmark-operator -o jsonpath='{.spec.workload.args.pair}')
}

print_uuid() {
log "Logging uuid.txt"
cat uuid.txt
}

Expand All @@ -264,3 +317,4 @@ export_defaults
init_cleanup
check_cluster_health
deploy_operator

3 changes: 2 additions & 1 deletion workloads/network-perf/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
gspread
gspread-formatting
oauth2client
pyyaml
PyYAML>=5.4.1
make
2 changes: 1 addition & 1 deletion workloads/network-perf/ripsaw-uperf-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
apiVersion: ripsaw.cloudbulldozer.io/v1alpha1
kind: Benchmark
metadata:
name: uperf-benchmark-${WORKLOAD}-network-${pairs}
name: uperf-${cr_name}-${WORKLOAD}-network-${pairs}
namespace: benchmark-operator
spec:
elasticsearch:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ if [[ ${ENABLE_SNAPPY_BACKUP} == "true" ]] ; then
../../utils/snappy-move-results/run_snappy.sh metadata.json $snappy_path
store_on_elastic
rm -rf files_list
fi
fi
echo -e "${bold}Finished workload run_hostnetwork_network_test_gromgit.sh"
1 change: 1 addition & 0 deletions workloads/network-perf/run_multus_network_tests_fromgit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,5 @@ fi
# Cleanup
rm -rf /tmp/benchmark-operator
rm -f compare_output_*.yaml
echo -e "${bold}Finished workload run_multus_network_tests_fromgit.sh"
exit 0
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ delete_benchmark
done
print_uuid
generate_csv
echo -e "${bold}Finished workload run_pod_network_policy_test_fromgit.sh"
3 changes: 2 additions & 1 deletion workloads/network-perf/run_pod_network_test_fromgit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ if [[ ${ENABLE_SNAPPY_BACKUP} == "true" ]] ; then
../../utils/snappy-move-results/run_snappy.sh metadata.json $snappy_path
store_on_elastic
rm -rf files_list
fi
fi
echo -e "${bold}Finished workload run_pod_network_test_fromgit.sh"
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ delete_benchmark
done
print_uuid
generate_csv
echo -e "${bold}Finished workload run_serviceip_network_policy_test_fromgit.sh"
3 changes: 2 additions & 1 deletion workloads/network-perf/run_serviceip_network_test_fromgit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ if [[ ${ENABLE_SNAPPY_BACKUP} == "true" ]] ; then
../../utils/snappy-move-results/run_snappy.sh metadata.json $snappy_path
store_on_elastic
rm -rf files_list
fi
fi
echo -e "${bold}Finished workload run_serviceip_network_test_fromgit.sh"
3 changes: 2 additions & 1 deletion workloads/network-perf/smoke_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ if [[ ${ENABLE_SNAPPY_BACKUP} == "true" ]] ; then
../../utils/snappy-move-results/run_snappy.sh metadata.json $snappy_path
store_on_elastic
rm -rf files_list
fi
fi
echo -e "${bold}Finished workload smoke_test.sh"
Loading