Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding multiple Nodes to e2e cluster #286

Merged
merged 3 commits into from
Jun 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions cmd/e2e/build_crashing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"context"
"net"
"time"

. "github.com/onsi/ginkgo/v2"
Expand All @@ -15,14 +14,18 @@ import (
var _ = Describe("Crashing Build", func() {
testBuildCrashingName := "crashing"
testCrashingBuildID := "85ffe8da-c82f-4035-86c5-9d2b5f42d6f7"
It("should become unhealthy", func() {
It("should become Unhealthy, then transition to Healthy and then Unhealthy again", func() {
ctx := context.Background()
kubeConfig := ctrl.GetConfigOrDie()
kubeClient, err := createKubeClient(kubeConfig)
Expect(err).ToNot(HaveOccurred())
err = kubeClient.Create(ctx, createCrashingBuild(testBuildCrashingName, testCrashingBuildID, img))
Expect(err).ToNot(HaveOccurred())

// this test simulates the scenario where
// a GameServerBuild becomes Unhealthy because of multiple crashes
// user manually increases the CrashesToMarkUnhealthy so GameServerBuild transitions to Healthy again
// multiple crashes occur, so the GameServerBuild becomes Unhealthy again
Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb)
Expand All @@ -40,13 +43,16 @@ var _ = Describe("Crashing Build", func() {
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, 45*time.Second, interval).Should(Succeed()) // bigger timeout because of the time crashes take to occur and captured by the controller

// we are updating the GameServerBuild to be able to have more crashes for it to become Unhealthy
// we are updating the GameServerBuild with a big CrashesToMarkUnhealthy to give time to the GameServerBuild to become Healthy
// Reasoning: At one point during running the e2e tests, we noticed that this test failed.
// This is because the GameServers crashed multiple (10) times so the GameServerBuild stayed Unhealthy,
// before having the chance to transition (temporarily) to Healthy. So, by setting it to 1000 we give it more chance to transition to Healthy,
// before decreasing it to 10 (in the next step) so that it can become Unhealthy again.
gsb := &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch := client.MergeFrom(gsb.DeepCopy())
gsb.Spec.CrashesToMarkUnhealthy = 10

gsb.Spec.CrashesToMarkUnhealthy = 1000
dgkanatsios marked this conversation as resolved.
Show resolved Hide resolved
err = kubeClient.Patch(ctx, gsb, patch)
Expect(err).ToNot(HaveOccurred())

Expand All @@ -66,7 +72,16 @@ var _ = Describe("Crashing Build", func() {
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, 10*time.Second, interval).Should(Succeed())

// but only temporarily, since the game servers will continue to crash
// we're decreasing the CrashesToMarkUnhealthy to 10 so that the
// GameServerBuild will eventually become Unhealthy
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch = client.MergeFrom(gsb.DeepCopy())
gsb.Spec.CrashesToMarkUnhealthy = 10
err = kubeClient.Patch(ctx, gsb, patch)
Expect(err).ToNot(HaveOccurred())

// now, let's make sure that GameServerBuild is Unhealthy
Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb)
Expand All @@ -83,16 +98,6 @@ var _ = Describe("Crashing Build", func() {
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, 40*time.Second, interval).Should(Succeed())

Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildCrashingName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(2))
gs := gsList.Items[0]
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
}, 10*time.Second, interval).Should(Succeed())
})
})

Expand Down
9 changes: 5 additions & 4 deletions cmd/e2e/build_host_network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,17 @@ var _ = Describe("Build with hostnetwork", func() {
g.Expect(verifyPodsInHostNetwork(ctx, kubeClient, gsb, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// make sure all GameServers have a Public IP and NodeName
Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithHostNetworkName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(3))
gs := gsList.Items[0]
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
for _, gs := range gsList.Items {
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
}
}, timeout, interval).Should(Succeed())

})
})

Expand Down
9 changes: 5 additions & 4 deletions cmd/e2e/build_sleep_before_readyforplayers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,17 @@ var _ = Describe("Build which sleeps before calling GSDK ReadyForPlayers", func(
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// make sure all GameServers have a Public IP and NodeName
Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildSleepBeforeReadyForPlayersName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(3))
gs := gsList.Items[0]
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
for _, gs := range gsList.Items {
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
}
}, timeout, interval).Should(Succeed())

})
})

Expand Down
36 changes: 19 additions & 17 deletions cmd/e2e/build_without_readyforplayers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ import (
)

var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {
testBuildWithoutReadyForPlayers := "withoutreadyforplayers"
testBuildWithoutReadyForPlayersName := "withoutreadyforplayers"
testWithoutReadyForPlayersBuildID := "85ffe8da-c82f-4035-86c5-9d2b5f42d6f8"
It("should have GameServers stuck in Initializing", func() {
ctx := context.Background()
kubeConfig := ctrl.GetConfigOrDie()
kubeClient, err := createKubeClient(kubeConfig)
Expect(err).ToNot(HaveOccurred())
err = kubeClient.Create(ctx, createBuildWithoutReadyForPlayers(testBuildWithoutReadyForPlayers, testWithoutReadyForPlayersBuildID, img))
err = kubeClient.Create(ctx, createBuildWithoutReadyForPlayers(testBuildWithoutReadyForPlayersName, testWithoutReadyForPlayersBuildID, img))
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildWithoutReadyForPlayers,
buildName: testBuildWithoutReadyForPlayersName,
buildID: testWithoutReadyForPlayersBuildID,
initializingCount: 2,
standingByCount: 0,
Expand All @@ -40,7 +40,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

// update the GameServerBuild to 4 standingBy
gsb := &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch := client.MergeFrom(gsb.DeepCopy())
gsb.Spec.StandingBy = 4
Expand All @@ -49,10 +49,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildWithoutReadyForPlayers,
buildName: testBuildWithoutReadyForPlayersName,
buildID: testWithoutReadyForPlayersBuildID,
initializingCount: 4,
standingByCount: 0,
Expand All @@ -64,7 +64,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

// update the GameServerBuild to 0 standingBy
gsb = &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch = client.MergeFrom(gsb.DeepCopy())
gsb.Spec.StandingBy = 0
Expand All @@ -73,10 +73,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildWithoutReadyForPlayers,
buildName: testBuildWithoutReadyForPlayersName,
buildID: testWithoutReadyForPlayersBuildID,
initializingCount: 0,
standingByCount: 0,
Expand All @@ -88,7 +88,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

// update the GameServerBuild to 2 standingBy again
gsb = &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch = client.MergeFrom(gsb.DeepCopy())
gsb.Spec.StandingBy = 2
Expand All @@ -97,10 +97,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb)
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildWithoutReadyForPlayers,
buildName: testBuildWithoutReadyForPlayersName,
buildID: testWithoutReadyForPlayersBuildID,
initializingCount: 2,
standingByCount: 0,
Expand All @@ -110,14 +110,16 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// make sure all GameServers have a Public IP and NodeName
Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithoutReadyForPlayers})
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithoutReadyForPlayersName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(2))
gs := gsList.Items[0]
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
for _, gs := range gsList.Items {
g.Expect(gs.Status.NodeName).ToNot(BeEmpty())
g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil())
}
}, timeout, interval).Should(Succeed())

})
Expand Down
31 changes: 21 additions & 10 deletions cmd/e2e/utilities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ const (
portKey string = "gameport"
safeToEvictPodAttribute string = "cluster-autoscaler.kubernetes.io/safe-to-evict"
timeout = time.Second * 30
interval = time.Millisecond * 250
interval = time.Second * 1
thundernetesSystemNamespace = "thundernetes-system"
testNodeCount = 3
)

type AllocationResult struct {
Expand Down Expand Up @@ -97,20 +98,30 @@ func validateThatAllocatedServersHaveReadyForPlayersUnblocked(ctx context.Contex
return err
}

if len(nodeAgentPodList.Items) != 1 {
return fmt.Errorf("expected 1 node agent pod, got %d", len(nodeAgentPodList.Items))
}
nodeAgentPod := nodeAgentPodList.Items[0]

nodeAgentLogs, err := getContainerLogs(ctx, coreClient, nodeAgentPod.Name, nodeAgentName, thundernetesSystemNamespace)
if err != nil {
return err
if len(nodeAgentPodList.Items) != testNodeCount {
return fmt.Errorf("expected %d NodeAgent Pods, got %d", testNodeCount, len(nodeAgentPodList.Items))
}

for _, gameServer := range activeGameServers {
// find the NodeAgent Pod for this GameServer (they have been scheduled in the same Node)
var nodeAgentLogs string
var err error
for _, nodeAgentPod := range nodeAgentPodList.Items {
// when running on kind, the GameServer.Status.PublicIP is equal to the private IP of the Node
if nodeAgentPod.Status.HostIP == gameServer.Status.PublicIP {
nodeAgentLogs, err = getContainerLogs(ctx, coreClient, nodeAgentPod.Name, nodeAgentName, thundernetesSystemNamespace)
if err != nil {
return err
}
}
}
if nodeAgentLogs == "" {
return fmt.Errorf("could not find NodeAgent Pod for GameServer %s", gameServer.Name)
}

Eventually(func() error {
if !strings.Contains(nodeAgentLogs, "sessionCookie:randomCookie") {
return fmt.Errorf("expected to find 'sessionCookie:randomCookie' in nodeAgent logs, got %s", nodeAgentLogs)
return fmt.Errorf("expected to find 'sessionCookie:randomCookie' in NodeAgent logs, got %s", nodeAgentLogs)
}

containerLogs, err := getContainerLogs(ctx, coreClient, gameServer.Name, containerName, gameServer.Namespace)
Expand Down
2 changes: 2 additions & 0 deletions cmd/gameserverapi/deployment/default/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
resources:
- deploy.yaml
4 changes: 4 additions & 0 deletions cmd/gameserverapi/deployment/e2e/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
bases:
- ../default
patchesStrategicMerge:
- nodeselector.yaml
13 changes: 13 additions & 0 deletions cmd/gameserverapi/deployment/e2e/nodeselector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# When this YAML file is used with kustomize, it adds the nodeSelector field to the controller deployment
# It's used in e2e tests, so that the gameserverapi is scheduled on a specific Node, so that we can do the
# listening service port forwarding (5001) correctly
apiVersion: apps/v1
kind: Deployment
metadata:
name: thundernetes-gameserverapi
namespace: thundernetes-system
spec:
template:
spec:
nodeSelector:
kubernetes.io/hostname: kind-worker
1 change: 1 addition & 0 deletions cmd/nodeagent/nodeagentmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ func (n *NodeAgentManager) gameServerCreatedOrUpdated(obj *unstructured.Unstruct

// server is Active, so get session details as well initial players details
sessionID, sessionCookie, initialPlayers := parseSessionDetails(obj, gameServerName, gameServerNamespace)
// sessionCookie:<valueOfCookie> string is looked for in the e2e tests, be careful not to modify it!
logger.Infof("getting values from allocation - GameServer CR, sessionID:%s, sessionCookie:%s, initialPlayers: %v", sessionID, sessionCookie, initialPlayers)

// create the GameServerDetails CR
Expand Down
11 changes: 3 additions & 8 deletions e2e/kind-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
extraPortMappings:
- containerPort: 5000
hostPort: 5000
Expand All @@ -11,11 +12,5 @@ nodes:
hostPort: 5001
listenAddress: "0.0.0.0"
protocol: tcp
- containerPort: 10000
dgkanatsios marked this conversation as resolved.
Show resolved Hide resolved
hostPort: 10000
listenAddress: "0.0.0.0"
protocol: tcp
- containerPort: 10001
hostPort: 10001
listenAddress: "0.0.0.0"
protocol: tcp
- role: worker
- role: worker
7 changes: 4 additions & 3 deletions e2e/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ export FAKE_TLS_PUBLIC=/tmp/${RANDOM}.pem
openssl req -x509 -newkey rsa:4096 -nodes -keyout ${FAKE_TLS_PRIVATE} -out ${FAKE_TLS_PUBLIC} -days 365 -subj '/CN=localhost'

echo "-----Compiling, building and deploying the operator to local Kubernetes cluster-----"
IMG=${IMAGE_NAME_OPERATOR}:${IMAGE_TAG} API_SERVICE_SECURITY=usetls make -C "${DIR}"/../pkg/operator deploy
IMG=${IMAGE_NAME_OPERATOR}:${IMAGE_TAG} API_SERVICE_SECURITY=usetls make -C "${DIR}"/../pkg/operator deploye2e

echo "-----Deploying GameServer API-----"
IMAGE_TAG=${IMAGE_TAG} envsubst < cmd/gameserverapi/deploy.yaml | kubectl apply -f -
cd cmd/gameserverapi/deployment/default
"${DIR}"/../pkg/operator/bin/kustomize build ../e2e | IMAGE_TAG=${IMAGE_TAG} envsubst | kubectl apply -f -

echo "-----Waiting for Controller deployment-----"
kubectl wait --for=condition=available --timeout=300s deployment/thundernetes-controller-manager -n thundernetes-system
Expand All @@ -56,7 +57,7 @@ echo "-----Waiting for GameServer API deployment-----"
kubectl wait --for=condition=ready --timeout=300s pod -n thundernetes-system -l app=thundernetes-gameserverapi

echo "-----Running end to end tests-----"
cd cmd/e2e
cd "${DIR}"/../cmd/e2e
# create the test namespaces
kubectl create namespace gameserverapi
kubectl create namespace e2e
Expand Down
6 changes: 4 additions & 2 deletions pkg/operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ docker-push: ## Push docker image with the manager.

.PHONY: create-install-files
create-install-files:
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
## Create install files for thundernetes installed without API security
$(KUSTOMIZE) build config/default | API_SERVICE_SECURITY=none envsubst > ${INSTALL_FILES_FOLDER}/operator.yaml
## Create install files for thundernetes installed with API security
Expand All @@ -111,9 +110,12 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified

.PHONY: deploy
deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
$(KUSTOMIZE) build config/default | envsubst | kubectl apply -f -

.PHONY: deploye2e
deploye2e: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
$(KUSTOMIZE) build config/e2e | envsubst | kubectl apply -f -

.PHONY: undeploy
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/default | kubectl delete --ignore-not-found=$(ignore-not-found) -f -
Expand Down
5 changes: 5 additions & 0 deletions pkg/operator/config/e2e/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bases:
- ../default

patchesStrategicMerge:
- nodeselector.yaml
Loading