Skip to content

Commit

Permalink
adding multiple Nodes to e2e cluster (#286)
Browse files Browse the repository at this point in the history
* adding multiple Nodes to e2e cluster

* removing kustomize edit image

* updates per pr review

Co-authored-by: Dimitris Gkanatsios <[email protected]>
  • Loading branch information
dgkanatsios and Dimitris Gkanatsios authored Jun 28, 2022
1 parent a6a3031 commit b88a44d
Show file tree
Hide file tree
Showing 19 changed files with 143 additions and 97 deletions.
37 changes: 21 additions & 16 deletions cmd/e2e/build_crashing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"context"
"net"
"time"

. "github.com/onsi/ginkgo/v2"
Expand All @@ -15,14 +14,18 @@ import (
var _ = Describe("Crashing Build", func() {
testBuildCrashingName := "crashing"
testCrashingBuildID := "85ffe8da-c82f-4035-86c5-9d2b5f42d6f7"
It("should become unhealthy", func() {
It("should become Unhealthy, then transition to Healthy and then Unhealthy again", func() {
ctx := context.Background()
kubeConfig := ctrl.GetConfigOrDie()
kubeClient, err := createKubeClient(kubeConfig)
Expect(err).ToNot(HaveOccurred())
err = kubeClient.Create(ctx, createCrashingBuild(testBuildCrashingName, testCrashingBuildID, img))
Expect(err).ToNot(HaveOccurred())

// this test simulates the scenario where
// a GameServerBuild becomes Unhealthy because of multiple crashes
// user manually increases the CrashesToMarkUnhealthy so GameServerBuild transitions to Healthy again
// multiple crashes occur, so the GameServerBuild becomes Unhealthy again
Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb)
Expand All @@ -40,13 +43,16 @@ var _ = Describe("Crashing Build", func() {
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, 45*time.Second, interval).Should(Succeed()) // bigger timeout because of the time crashes take to occur and captured by the controller

// we are updating the GameServerBuild to be able to have more crashes for it to become Unhealthy
// we are updating the GameServerBuild with a big CrashesToMarkUnhealthy to give time to the GameServerBuild to become Healthy
// Reasoning: At one point during running the e2e tests, we noticed that this test failed.
// This is because the GameServers crashed multiple (10) times so the GameServerBuild stayed Unhealthy,
// before having the chance to transition (temporarily) to Healthy. So, by setting it to 1000 we give it more chance to transition to Healthy,
// before decreasing it to 10 (in the next step) so that it can become Unhealthy again.
gsb := &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch := client.MergeFrom(gsb.DeepCopy())
gsb.Spec.CrashesToMarkUnhealthy = 10

gsb.Spec.CrashesToMarkUnhealthy = 1000
err = kubeClient.Patch(ctx, gsb, patch)
Expect(err).ToNot(HaveOccurred())

Expand All @@ -66,7 +72,16 @@ var _ = Describe("Crashing Build", func() {
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, 10*time.Second, interval).Should(Succeed())

// but only temporarily, since the game servers will continue to crash
// we're decreasing the CrashesToMarkUnhealthy to 10 so that the
// GameServerBuild will eventually become Unhealthy
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch = client.MergeFrom(gsb.DeepCopy())
gsb.Spec.CrashesToMarkUnhealthy = 10
err = kubeClient.Patch(ctx, gsb, patch)
Expect(err).ToNot(HaveOccurred())

// now, let's make sure that GameServerBuild is Unhealthy
Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb)
Expand All @@ -83,16 +98,6 @@ var _ = Describe("Crashing Build", func() {
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, 40*time.Second, interval).Should(Succeed())

Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildCrashingName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(2))
gs := gsList.Items[0]
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
}, 10*time.Second, interval).Should(Succeed())
})
})

Expand Down
9 changes: 5 additions & 4 deletions cmd/e2e/build_host_network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,17 @@ var _ = Describe("Build with hostnetwork", func() {
g.Expect(verifyPodsInHostNetwork(ctx, kubeClient, gsb, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// make sure all GameServers have a Public IP and NodeName
Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithHostNetworkName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(3))
gs := gsList.Items[0]
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
for _, gs := range gsList.Items {
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
}
}, timeout, interval).Should(Succeed())

})
})

Expand Down
9 changes: 5 additions & 4 deletions cmd/e2e/build_sleep_before_readyforplayers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,17 @@ var _ = Describe("Build which sleeps before calling GSDK ReadyForPlayers", func(
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// make sure all GameServers have a Public IP and NodeName
Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildSleepBeforeReadyForPlayersName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(3))
gs := gsList.Items[0]
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
for _, gs := range gsList.Items {
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
}
}, timeout, interval).Should(Succeed())

})
})

Expand Down
36 changes: 19 additions & 17 deletions cmd/e2e/build_without_readyforplayers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ import (
)

var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {
testBuildWithoutReadyForPlayers := "withoutreadyforplayers"
testBuildWithoutReadyForPlayersName := "withoutreadyforplayers"
testWithoutReadyForPlayersBuildID := "85ffe8da-c82f-4035-86c5-9d2b5f42d6f8"
It("should have GameServers stuck in Initializing", func() {
ctx := context.Background()
kubeConfig := ctrl.GetConfigOrDie()
kubeClient, err := createKubeClient(kubeConfig)
Expect(err).ToNot(HaveOccurred())
err = kubeClient.Create(ctx, createBuildWithoutReadyForPlayers(testBuildWithoutReadyForPlayers, testWithoutReadyForPlayersBuildID, img))
err = kubeClient.Create(ctx, createBuildWithoutReadyForPlayers(testBuildWithoutReadyForPlayersName, testWithoutReadyForPlayersBuildID, img))
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildWithoutReadyForPlayers,
buildName: testBuildWithoutReadyForPlayersName,
buildID: testWithoutReadyForPlayersBuildID,
initializingCount: 2,
standingByCount: 0,
Expand All @@ -40,7 +40,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

// update the GameServerBuild to 4 standingBy
gsb := &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch := client.MergeFrom(gsb.DeepCopy())
gsb.Spec.StandingBy = 4
Expand All @@ -49,10 +49,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildWithoutReadyForPlayers,
buildName: testBuildWithoutReadyForPlayersName,
buildID: testWithoutReadyForPlayersBuildID,
initializingCount: 4,
standingByCount: 0,
Expand All @@ -64,7 +64,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

// update the GameServerBuild to 0 standingBy
gsb = &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch = client.MergeFrom(gsb.DeepCopy())
gsb.Spec.StandingBy = 0
Expand All @@ -73,10 +73,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildWithoutReadyForPlayers,
buildName: testBuildWithoutReadyForPlayersName,
buildID: testWithoutReadyForPlayersBuildID,
initializingCount: 0,
standingByCount: 0,
Expand All @@ -88,7 +88,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

// update the GameServerBuild to 2 standingBy again
gsb = &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch = client.MergeFrom(gsb.DeepCopy())
gsb.Spec.StandingBy = 2
Expand All @@ -97,10 +97,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildWithoutReadyForPlayers,
buildName: testBuildWithoutReadyForPlayersName,
buildID: testWithoutReadyForPlayersBuildID,
initializingCount: 2,
standingByCount: 0,
Expand All @@ -110,14 +110,16 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// make sure all GameServers have a Public IP and NodeName
Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithoutReadyForPlayers})
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithoutReadyForPlayersName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(2))
gs := gsList.Items[0]
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
for _, gs := range gsList.Items {
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
}
}, timeout, interval).Should(Succeed())

})
Expand Down
31 changes: 21 additions & 10 deletions cmd/e2e/utilities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ const (
portKey string = "gameport"
safeToEvictPodAttribute string = "cluster-autoscaler.kubernetes.io/safe-to-evict"
timeout = time.Second * 30
interval = time.Millisecond * 250
interval = time.Second * 1
thundernetesSystemNamespace = "thundernetes-system"
testNodeCount = 3
)

type AllocationResult struct {
Expand Down Expand Up @@ -97,20 +98,30 @@ func validateThatAllocatedServersHaveReadyForPlayersUnblocked(ctx context.Contex
return err
}

if len(nodeAgentPodList.Items) != 1 {
return fmt.Errorf("expected 1 node agent pod, got %d", len(nodeAgentPodList.Items))
}
nodeAgentPod := nodeAgentPodList.Items[0]

nodeAgentLogs, err := getContainerLogs(ctx, coreClient, nodeAgentPod.Name, nodeAgentName, thundernetesSystemNamespace)
if err != nil {
return err
if len(nodeAgentPodList.Items) != testNodeCount {
return fmt.Errorf("expected %d NodeAgent Pods, got %d", testNodeCount, len(nodeAgentPodList.Items))
}

for _, gameServer := range activeGameServers {
// find the NodeAgent Pod for this GameServer (they have been scheduled in the same Node)
var nodeAgentLogs string
var err error
for _, nodeAgentPod := range nodeAgentPodList.Items {
// when running on kind, the GameServer.Status.PublicIP is equal to the private IP of the Node
if nodeAgentPod.Status.HostIP == gameServer.Status.PublicIP {
nodeAgentLogs, err = getContainerLogs(ctx, coreClient, nodeAgentPod.Name, nodeAgentName, thundernetesSystemNamespace)
if err != nil {
return err
}
}
}
if nodeAgentLogs == "" {
return fmt.Errorf("could not find NodeAgent Pod for GameServer %s", gameServer.Name)
}

Eventually(func() error {
if !strings.Contains(nodeAgentLogs, "sessionCookie:randomCookie") {
return fmt.Errorf("expected to find 'sessionCookie:randomCookie' in nodeAgent logs, got %s", nodeAgentLogs)
return fmt.Errorf("expected to find 'sessionCookie:randomCookie' in NodeAgent logs, got %s", nodeAgentLogs)
}

containerLogs, err := getContainerLogs(ctx, coreClient, gameServer.Name, containerName, gameServer.Namespace)
Expand Down
File renamed without changes.
2 changes: 2 additions & 0 deletions cmd/gameserverapi/deployment/default/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
resources:
- deploy.yaml
4 changes: 4 additions & 0 deletions cmd/gameserverapi/deployment/e2e/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
bases:
- ../default
patchesStrategicMerge:
- nodeselector.yaml
13 changes: 13 additions & 0 deletions cmd/gameserverapi/deployment/e2e/nodeselector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# When this YAML file is used with kustomize, it adds the nodeSelector field to the controller deployment
# It's used in e2e tests, so that the gameserverapi is scheduled on a specific Node, so that we can do the
# listening service port forwarding (5001) correctly
apiVersion: apps/v1
kind: Deployment
metadata:
name: thundernetes-gameserverapi
namespace: thundernetes-system
spec:
template:
spec:
nodeSelector:
kubernetes.io/hostname: kind-worker
1 change: 1 addition & 0 deletions cmd/nodeagent/nodeagentmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ func (n *NodeAgentManager) gameServerCreatedOrUpdated(obj *unstructured.Unstruct

// server is Active, so get session details as well initial players details
sessionID, sessionCookie, initialPlayers := parseSessionDetails(obj, gameServerName, gameServerNamespace)
// sessionCookie:<valueOfCookie> string is looked for in the e2e tests, be careful not to modify it!
logger.Infof("getting values from allocation - GameServer CR, sessionID:%s, sessionCookie:%s, initialPlayers: %v", sessionID, sessionCookie, initialPlayers)

// create the GameServerDetails CR
Expand Down
11 changes: 3 additions & 8 deletions e2e/kind-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
extraPortMappings:
- containerPort: 5000
hostPort: 5000
Expand All @@ -11,11 +12,5 @@ nodes:
hostPort: 5001
listenAddress: "0.0.0.0"
protocol: tcp
- containerPort: 10000
hostPort: 10000
listenAddress: "0.0.0.0"
protocol: tcp
- containerPort: 10001
hostPort: 10001
listenAddress: "0.0.0.0"
protocol: tcp
- role: worker
- role: worker
7 changes: 4 additions & 3 deletions e2e/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ export FAKE_TLS_PUBLIC=/tmp/${RANDOM}.pem
openssl req -x509 -newkey rsa:4096 -nodes -keyout ${FAKE_TLS_PRIVATE} -out ${FAKE_TLS_PUBLIC} -days 365 -subj '/CN=localhost'

echo "-----Compiling, building and deploying the operator to local Kubernetes cluster-----"
IMG=${IMAGE_NAME_OPERATOR}:${IMAGE_TAG} API_SERVICE_SECURITY=usetls make -C "${DIR}"/../pkg/operator deploy
IMG=${IMAGE_NAME_OPERATOR}:${IMAGE_TAG} API_SERVICE_SECURITY=usetls make -C "${DIR}"/../pkg/operator deploye2e

echo "-----Deploying GameServer API-----"
IMAGE_TAG=${IMAGE_TAG} envsubst < cmd/gameserverapi/deploy.yaml | kubectl apply -f -
cd cmd/gameserverapi/deployment/default
"${DIR}"/../pkg/operator/bin/kustomize build ../e2e | IMAGE_TAG=${IMAGE_TAG} envsubst | kubectl apply -f -

echo "-----Waiting for Controller deployment-----"
kubectl wait --for=condition=available --timeout=300s deployment/thundernetes-controller-manager -n thundernetes-system
Expand All @@ -56,7 +57,7 @@ echo "-----Waiting for GameServer API deployment-----"
kubectl wait --for=condition=ready --timeout=300s pod -n thundernetes-system -l app=thundernetes-gameserverapi

echo "-----Running end to end tests-----"
cd cmd/e2e
cd "${DIR}"/../cmd/e2e
# create the test namespaces
kubectl create namespace gameserverapi
kubectl create namespace e2e
Expand Down
6 changes: 4 additions & 2 deletions pkg/operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ docker-push: ## Push docker image with the manager.

.PHONY: create-install-files
create-install-files:
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
## Create install files for thundernetes installed without API security
$(KUSTOMIZE) build config/default | API_SERVICE_SECURITY=none envsubst > ${INSTALL_FILES_FOLDER}/operator.yaml
## Create install files for thundernetes installed with API security
Expand All @@ -111,9 +110,12 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified

.PHONY: deploy
deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
$(KUSTOMIZE) build config/default | envsubst | kubectl apply -f -

.PHONY: deploye2e
deploye2e: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
$(KUSTOMIZE) build config/e2e | envsubst | kubectl apply -f -

.PHONY: undeploy
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/default | kubectl delete --ignore-not-found=$(ignore-not-found) -f -
Expand Down
5 changes: 5 additions & 0 deletions pkg/operator/config/e2e/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bases:
- ../default

patchesStrategicMerge:
- nodeselector.yaml
Loading

0 comments on commit b88a44d

Please sign in to comment.