From 63eb9f3ffb693a05063b227e79827446d45cebfc Mon Sep 17 00:00:00 2001 From: Dimitris Gkanatsios Date: Mon, 20 Jun 2022 22:42:47 -0700 Subject: [PATCH 1/3] adding multiple Nodes to e2e cluster --- cmd/e2e/utilities_test.go | 28 +++++++++++++------ .../{ => deploy/default}/deploy.yaml | 2 +- .../deploy/default/kustomization.yaml | 2 ++ .../deploy/e2e/kustomization.yaml | 4 +++ .../deploy/e2e/nodeselector.yaml | 12 ++++++++ cmd/nodeagent/nodeagentmanager.go | 1 + e2e/kind-config.yaml | 11 ++------ e2e/run.sh | 8 ++++-- pkg/operator/Makefile | 5 ++++ pkg/operator/config/e2e/kustomization.yaml | 5 ++++ pkg/operator/config/e2e/nodeselector.yaml | 14 ++++++++++ 11 files changed, 71 insertions(+), 21 deletions(-) rename cmd/gameserverapi/{ => deploy/default}/deploy.yaml (95%) create mode 100644 cmd/gameserverapi/deploy/default/kustomization.yaml create mode 100644 cmd/gameserverapi/deploy/e2e/kustomization.yaml create mode 100644 cmd/gameserverapi/deploy/e2e/nodeselector.yaml create mode 100644 pkg/operator/config/e2e/kustomization.yaml create mode 100644 pkg/operator/config/e2e/nodeselector.yaml diff --git a/cmd/e2e/utilities_test.go b/cmd/e2e/utilities_test.go index 5753212d..a6cf5649 100644 --- a/cmd/e2e/utilities_test.go +++ b/cmd/e2e/utilities_test.go @@ -97,20 +97,30 @@ func validateThatAllocatedServersHaveReadyForPlayersUnblocked(ctx context.Contex return err } - if len(nodeAgentPodList.Items) != 1 { - return fmt.Errorf("expected 1 node agent pod, got %d", len(nodeAgentPodList.Items)) - } - nodeAgentPod := nodeAgentPodList.Items[0] - - nodeAgentLogs, err := getContainerLogs(ctx, coreClient, nodeAgentPod.Name, nodeAgentName, thundernetesSystemNamespace) - if err != nil { - return err + if len(nodeAgentPodList.Items) != 3 { + return fmt.Errorf("expected 3 NodeAgent Pods, got %d", len(nodeAgentPodList.Items)) } for _, gameServer := range activeGameServers { + // find the NodeAgent Pod for this GameServer (they have been scheduled in the same Node) + var nodeAgentLogs string + var err error + for _, nodeAgentPod := range nodeAgentPodList.Items { + // when running on kind, the GameServer.Status.PublicIP is equal to the private IP of the Node + if nodeAgentPod.Status.HostIP == gameServer.Status.PublicIP { + nodeAgentLogs, err = getContainerLogs(ctx, coreClient, nodeAgentPod.Name, nodeAgentName, thundernetesSystemNamespace) + if err != nil { + return err + } + } + } + if nodeAgentLogs == "" { + return fmt.Errorf("could not find NodeAgent Pod for GameServer %s", gameServer.Name) + } + Eventually(func() error { if !strings.Contains(nodeAgentLogs, "sessionCookie:randomCookie") { - return fmt.Errorf("expected to find 'sessionCookie:randomCookie' in nodeAgent logs, got %s", nodeAgentLogs) + return fmt.Errorf("expected to find 'sessionCookie:randomCookie' in NodeAgent logs, got %s", nodeAgentLogs) } containerLogs, err := getContainerLogs(ctx, coreClient, gameServer.Name, containerName, gameServer.Namespace) diff --git a/cmd/gameserverapi/deploy.yaml b/cmd/gameserverapi/deploy/default/deploy.yaml similarity index 95% rename from cmd/gameserverapi/deploy.yaml rename to cmd/gameserverapi/deploy/default/deploy.yaml index 4fc1c11d..1a989f35 100644 --- a/cmd/gameserverapi/deploy.yaml +++ b/cmd/gameserverapi/deploy/default/deploy.yaml @@ -16,7 +16,7 @@ spec: app: thundernetes-gameserverapi spec: containers: - - image: thundernetes-gameserverapi:${IMAGE_TAG} + - image: thundernetes-gameserverapi name: gameserverapi imagePullPolicy: IfNotPresent resources: diff --git a/cmd/gameserverapi/deploy/default/kustomization.yaml b/cmd/gameserverapi/deploy/default/kustomization.yaml new file mode 100644 index 00000000..50d8735f --- /dev/null +++ b/cmd/gameserverapi/deploy/default/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- deploy.yaml \ No newline at end of file diff --git a/cmd/gameserverapi/deploy/e2e/kustomization.yaml b/cmd/gameserverapi/deploy/e2e/kustomization.yaml new file mode 100644 index 00000000..9541eef3 --- /dev/null +++ b/cmd/gameserverapi/deploy/e2e/kustomization.yaml @@ -0,0 +1,4 @@ +bases: + - ../default +patchesStrategicMerge: + - nodeselector.yaml diff --git a/cmd/gameserverapi/deploy/e2e/nodeselector.yaml b/cmd/gameserverapi/deploy/e2e/nodeselector.yaml new file mode 100644 index 00000000..da95e008 --- /dev/null +++ b/cmd/gameserverapi/deploy/e2e/nodeselector.yaml @@ -0,0 +1,12 @@ +# this YAML file, when used with kustomize, it adds the nodeSelector field to the controller deployment +# it's used in e2e tests, so that the controller is scheduled on a different Node than the GameServer Pods +apiVersion: apps/v1 +kind: Deployment +metadata: + name: thundernetes-gameserverapi + namespace: thundernetes-system +spec: + template: + spec: + nodeSelector: + kubernetes.io/hostname: kind-worker \ No newline at end of file diff --git a/cmd/nodeagent/nodeagentmanager.go b/cmd/nodeagent/nodeagentmanager.go index 24faecc3..3bd5241b 100644 --- a/cmd/nodeagent/nodeagentmanager.go +++ b/cmd/nodeagent/nodeagentmanager.go @@ -247,6 +247,7 @@ func (n *NodeAgentManager) gameServerCreatedOrUpdated(obj *unstructured.Unstruct // server is Active, so get session details as well initial players details sessionID, sessionCookie, initialPlayers := parseSessionDetails(obj, gameServerName, gameServerNamespace) + // sessionCookie: string is looked for in the e2e tests, be careful not to modify it! logger.Infof("getting values from allocation - GameServer CR, sessionID:%s, sessionCookie:%s, initialPlayers: %v", sessionID, sessionCookie, initialPlayers) // create the GameServerDetails CR diff --git a/e2e/kind-config.yaml b/e2e/kind-config.yaml index d59dde51..87c91cd7 100644 --- a/e2e/kind-config.yaml +++ b/e2e/kind-config.yaml @@ -2,6 +2,7 @@ kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane +- role: worker extraPortMappings: - containerPort: 5000 hostPort: 5000 @@ -11,11 +12,5 @@ nodes: hostPort: 5001 listenAddress: "0.0.0.0" protocol: tcp - - containerPort: 10000 - hostPort: 10000 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 10001 - hostPort: 10001 - listenAddress: "0.0.0.0" - protocol: tcp \ No newline at end of file +- role: worker +- role: worker \ No newline at end of file diff --git a/e2e/run.sh b/e2e/run.sh index a9b01a9e..f1582d7c 100755 --- a/e2e/run.sh +++ b/e2e/run.sh @@ -44,10 +44,12 @@ export FAKE_TLS_PUBLIC=/tmp/${RANDOM}.pem openssl req -x509 -newkey rsa:4096 -nodes -keyout ${FAKE_TLS_PRIVATE} -out ${FAKE_TLS_PUBLIC} -days 365 -subj '/CN=localhost' echo "-----Compiling, building and deploying the operator to local Kubernetes cluster-----" -IMG=${IMAGE_NAME_OPERATOR}:${IMAGE_TAG} API_SERVICE_SECURITY=usetls make -C "${DIR}"/../pkg/operator deploy +IMG=${IMAGE_NAME_OPERATOR}:${IMAGE_TAG} API_SERVICE_SECURITY=usetls make -C "${DIR}"/../pkg/operator deploye2e echo "-----Deploying GameServer API-----" -IMAGE_TAG=${IMAGE_TAG} envsubst < cmd/gameserverapi/deploy.yaml | kubectl apply -f - +cd cmd/gameserverapi/deploy/default +"${DIR}"/../pkg/operator/bin/kustomize edit set image thundernetes-gameserverapi=thundernetes-gameserverapi:${IMAGE_TAG} +"${DIR}"/../pkg/operator/bin/kustomize build ../e2e | kubectl apply -f - echo "-----Waiting for Controller deployment-----" kubectl wait --for=condition=available --timeout=300s deployment/thundernetes-controller-manager -n thundernetes-system @@ -56,7 +58,7 @@ echo "-----Waiting for GameServer API deployment-----" kubectl wait --for=condition=ready --timeout=300s pod -n thundernetes-system -l app=thundernetes-gameserverapi echo "-----Running end to end tests-----" -cd cmd/e2e +cd "${DIR}"/../cmd/e2e # create the test namespaces kubectl create namespace gameserverapi kubectl create namespace e2e diff --git a/pkg/operator/Makefile b/pkg/operator/Makefile index cebd3742..f8119f29 100644 --- a/pkg/operator/Makefile +++ b/pkg/operator/Makefile @@ -114,6 +114,11 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} $(KUSTOMIZE) build config/default | envsubst | kubectl apply -f - +.PHONY: deploye2e +deploye2e: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/e2e | envsubst | kubectl apply -f - + .PHONY: undeploy undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. $(KUSTOMIZE) build config/default | kubectl delete --ignore-not-found=$(ignore-not-found) -f - diff --git a/pkg/operator/config/e2e/kustomization.yaml b/pkg/operator/config/e2e/kustomization.yaml new file mode 100644 index 00000000..085bbe95 --- /dev/null +++ b/pkg/operator/config/e2e/kustomization.yaml @@ -0,0 +1,5 @@ +bases: +- ../default + +patchesStrategicMerge: +- nodeselector.yaml \ No newline at end of file diff --git a/pkg/operator/config/e2e/nodeselector.yaml b/pkg/operator/config/e2e/nodeselector.yaml new file mode 100644 index 00000000..5a78d9df --- /dev/null +++ b/pkg/operator/config/e2e/nodeselector.yaml @@ -0,0 +1,14 @@ +# this YAML file, when used with kustomize, it adds the nodeSelector field to the controller deployment +# it's used in e2e tests, so that the controller is scheduled on a different Node than the GameServer Pods +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system + labels: + control-plane: controller-manager +spec: + template: + spec: + nodeSelector: + kubernetes.io/hostname: kind-worker \ No newline at end of file From e669be990ee18a16934bcee800c14fadd44b21a8 Mon Sep 17 00:00:00 2001 From: Dimitris Gkanatsios Date: Fri, 24 Jun 2022 19:40:59 -0700 Subject: [PATCH 2/3] removing kustomize edit image --- cmd/e2e/build_crashing_test.go | 27 +++++------- cmd/e2e/build_host_network_test.go | 9 ++-- ...build_sleep_before_readyforplayers_test.go | 9 ++-- cmd/e2e/build_without_readyforplayers_test.go | 36 ++++++++------- cmd/e2e/utilities_test.go | 7 +-- .../deploy/e2e/nodeselector.yaml | 12 ----- .../default/deploy.yaml | 2 +- .../default/kustomization.yaml | 0 .../e2e/kustomization.yaml | 0 .../deployment/e2e/nodeselector.yaml | 13 ++++++ e2e/run.sh | 5 +-- pkg/operator/Makefile | 3 -- pkg/operator/config/e2e/nodeselector.yaml | 5 ++- .../config/manager/kustomization.yaml | 6 --- pkg/operator/config/manager/manager.yaml | 2 +- pkg/operator/controllers/port_registry.go | 2 +- .../controllers/port_registry_test.go | 44 ++++++++----------- 17 files changed, 85 insertions(+), 97 deletions(-) delete mode 100644 cmd/gameserverapi/deploy/e2e/nodeselector.yaml rename cmd/gameserverapi/{deploy => deployment}/default/deploy.yaml (95%) rename cmd/gameserverapi/{deploy => deployment}/default/kustomization.yaml (100%) rename cmd/gameserverapi/{deploy => deployment}/e2e/kustomization.yaml (100%) create mode 100644 cmd/gameserverapi/deployment/e2e/nodeselector.yaml diff --git a/cmd/e2e/build_crashing_test.go b/cmd/e2e/build_crashing_test.go index 78fb4fac..38cd8f68 100644 --- a/cmd/e2e/build_crashing_test.go +++ b/cmd/e2e/build_crashing_test.go @@ -2,7 +2,6 @@ package main import ( "context" - "net" "time" . "github.com/onsi/ginkgo/v2" @@ -40,13 +39,12 @@ var _ = Describe("Crashing Build", func() { g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed()) }, 45*time.Second, interval).Should(Succeed()) // bigger timeout because of the time crashes take to occur and captured by the controller - // we are updating the GameServerBuild to be able to have more crashes for it to become Unhealthy + // we are updating the GameServerBuild with a big CrashesToMarkUnhealthy to give time to the GameServerBuild to become Healthy gsb := &mpsv1alpha1.GameServerBuild{} err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb) Expect(err).ToNot(HaveOccurred()) patch := client.MergeFrom(gsb.DeepCopy()) - gsb.Spec.CrashesToMarkUnhealthy = 10 - + gsb.Spec.CrashesToMarkUnhealthy = 1000 err = kubeClient.Patch(ctx, gsb, patch) Expect(err).ToNot(HaveOccurred()) @@ -66,7 +64,16 @@ var _ = Describe("Crashing Build", func() { g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed()) }, 10*time.Second, interval).Should(Succeed()) - // but only temporarily, since the game servers will continue to crash + // we're decreasing the CrashesToMarkUnhealthy to 10 so that the + // GameServerBuild will eventually become Unhealthy + err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb) + Expect(err).ToNot(HaveOccurred()) + patch = client.MergeFrom(gsb.DeepCopy()) + gsb.Spec.CrashesToMarkUnhealthy = 10 + err = kubeClient.Patch(ctx, gsb, patch) + Expect(err).ToNot(HaveOccurred()) + + // now, let's make sure that GameServerBuild is Unhealthy Eventually(func(g Gomega) { gsb := &mpsv1alpha1.GameServerBuild{} err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb) @@ -83,16 +90,6 @@ var _ = Describe("Crashing Build", func() { } g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed()) }, 40*time.Second, interval).Should(Succeed()) - - Eventually(func(g Gomega) { - var gsList mpsv1alpha1.GameServerList - err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildCrashingName}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(gsList.Items)).To(Equal(2)) - gs := gsList.Items[0] - g.Expect(gs.Status.NodeName).ToNot(BeEmpty()) - g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil()) - }, 10*time.Second, interval).Should(Succeed()) }) }) diff --git a/cmd/e2e/build_host_network_test.go b/cmd/e2e/build_host_network_test.go index 4a929c12..05e46415 100644 --- a/cmd/e2e/build_host_network_test.go +++ b/cmd/e2e/build_host_network_test.go @@ -105,16 +105,17 @@ var _ = Describe("Build with hostnetwork", func() { g.Expect(verifyPodsInHostNetwork(ctx, kubeClient, gsb, state)).To(Succeed()) }, timeout, interval).Should(Succeed()) + // make sure all GameServers have a Public IP and NodeName Eventually(func(g Gomega) { var gsList mpsv1alpha1.GameServerList err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithHostNetworkName}) Expect(err).ToNot(HaveOccurred()) Expect(len(gsList.Items)).To(Equal(3)) - gs := gsList.Items[0] - g.Expect(gs.Status.NodeName).ToNot(BeEmpty()) - g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil()) + for _, gs := range gsList.Items { + g.Expect(gs.Status.NodeName).ToNot(BeEmpty()) + g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil()) + } }, timeout, interval).Should(Succeed()) - }) }) diff --git a/cmd/e2e/build_sleep_before_readyforplayers_test.go b/cmd/e2e/build_sleep_before_readyforplayers_test.go index d18c8231..7fa70d30 100644 --- a/cmd/e2e/build_sleep_before_readyforplayers_test.go +++ b/cmd/e2e/build_sleep_before_readyforplayers_test.go @@ -99,16 +99,17 @@ var _ = Describe("Build which sleeps before calling GSDK ReadyForPlayers", func( g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed()) }, timeout, interval).Should(Succeed()) + // make sure all GameServers have a Public IP and NodeName Eventually(func(g Gomega) { var gsList mpsv1alpha1.GameServerList err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildSleepBeforeReadyForPlayersName}) Expect(err).ToNot(HaveOccurred()) Expect(len(gsList.Items)).To(Equal(3)) - gs := gsList.Items[0] - g.Expect(gs.Status.NodeName).ToNot(BeEmpty()) - g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil()) + for _, gs := range gsList.Items { + g.Expect(gs.Status.NodeName).ToNot(BeEmpty()) + g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil()) + } }, timeout, interval).Should(Succeed()) - }) }) diff --git a/cmd/e2e/build_without_readyforplayers_test.go b/cmd/e2e/build_without_readyforplayers_test.go index 2eb7ba78..dcf91545 100644 --- a/cmd/e2e/build_without_readyforplayers_test.go +++ b/cmd/e2e/build_without_readyforplayers_test.go @@ -13,22 +13,22 @@ import ( ) var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() { - testBuildWithoutReadyForPlayers := "withoutreadyforplayers" + testBuildWithoutReadyForPlayersName := "withoutreadyforplayers" testWithoutReadyForPlayersBuildID := "85ffe8da-c82f-4035-86c5-9d2b5f42d6f8" It("should have GameServers stuck in Initializing", func() { ctx := context.Background() kubeConfig := ctrl.GetConfigOrDie() kubeClient, err := createKubeClient(kubeConfig) Expect(err).ToNot(HaveOccurred()) - err = kubeClient.Create(ctx, createBuildWithoutReadyForPlayers(testBuildWithoutReadyForPlayers, testWithoutReadyForPlayersBuildID, img)) + err = kubeClient.Create(ctx, createBuildWithoutReadyForPlayers(testBuildWithoutReadyForPlayersName, testWithoutReadyForPlayersBuildID, img)) Expect(err).ToNot(HaveOccurred()) Eventually(func(g Gomega) { gsb := &mpsv1alpha1.GameServerBuild{} - err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb) + err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb) g.Expect(err).ToNot(HaveOccurred()) state := buildState{ - buildName: testBuildWithoutReadyForPlayers, + buildName: testBuildWithoutReadyForPlayersName, buildID: testWithoutReadyForPlayersBuildID, initializingCount: 2, standingByCount: 0, @@ -40,7 +40,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() { // update the GameServerBuild to 4 standingBy gsb := &mpsv1alpha1.GameServerBuild{} - err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb) + err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb) Expect(err).ToNot(HaveOccurred()) patch := client.MergeFrom(gsb.DeepCopy()) gsb.Spec.StandingBy = 4 @@ -49,10 +49,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() { Eventually(func(g Gomega) { gsb := &mpsv1alpha1.GameServerBuild{} - err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb) + err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb) g.Expect(err).ToNot(HaveOccurred()) state := buildState{ - buildName: testBuildWithoutReadyForPlayers, + buildName: testBuildWithoutReadyForPlayersName, buildID: testWithoutReadyForPlayersBuildID, initializingCount: 4, standingByCount: 0, @@ -64,7 +64,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() { // update the GameServerBuild to 0 standingBy gsb = &mpsv1alpha1.GameServerBuild{} - err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb) + err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb) Expect(err).ToNot(HaveOccurred()) patch = client.MergeFrom(gsb.DeepCopy()) gsb.Spec.StandingBy = 0 @@ -73,10 +73,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() { Eventually(func(g Gomega) { gsb := &mpsv1alpha1.GameServerBuild{} - err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb) + err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb) g.Expect(err).ToNot(HaveOccurred()) state := buildState{ - buildName: testBuildWithoutReadyForPlayers, + buildName: testBuildWithoutReadyForPlayersName, buildID: testWithoutReadyForPlayersBuildID, initializingCount: 0, standingByCount: 0, @@ -88,7 +88,7 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() { // update the GameServerBuild to 2 standingBy again gsb = &mpsv1alpha1.GameServerBuild{} - err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb) + err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb) Expect(err).ToNot(HaveOccurred()) patch = client.MergeFrom(gsb.DeepCopy()) gsb.Spec.StandingBy = 2 @@ -97,10 +97,10 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() { Eventually(func(g Gomega) { gsb := &mpsv1alpha1.GameServerBuild{} - err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayers, Namespace: testNamespace}, gsb) + err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutReadyForPlayersName, Namespace: testNamespace}, gsb) g.Expect(err).ToNot(HaveOccurred()) state := buildState{ - buildName: testBuildWithoutReadyForPlayers, + buildName: testBuildWithoutReadyForPlayersName, buildID: testWithoutReadyForPlayersBuildID, initializingCount: 2, standingByCount: 0, @@ -110,14 +110,16 @@ var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() { g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed()) }, timeout, interval).Should(Succeed()) + // make sure all GameServers have a Public IP and NodeName Eventually(func(g Gomega) { var gsList mpsv1alpha1.GameServerList - err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithoutReadyForPlayers}) + err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithoutReadyForPlayersName}) Expect(err).ToNot(HaveOccurred()) Expect(len(gsList.Items)).To(Equal(2)) - gs := gsList.Items[0] - g.Expect(gs.Status.NodeName).ToNot(BeEmpty()) - g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil()) + for _, gs := range gsList.Items { + g.Expect(gs.Status.NodeName).ToNot(BeEmpty()) + g.Expect(net.ParseIP(gs.Status.PublicIP)).ToNot(BeNil()) + } }, timeout, interval).Should(Succeed()) }) diff --git a/cmd/e2e/utilities_test.go b/cmd/e2e/utilities_test.go index a6cf5649..767f786a 100644 --- a/cmd/e2e/utilities_test.go +++ b/cmd/e2e/utilities_test.go @@ -40,8 +40,9 @@ const ( portKey string = "gameport" safeToEvictPodAttribute string = "cluster-autoscaler.kubernetes.io/safe-to-evict" timeout = time.Second * 30 - interval = time.Millisecond * 250 + interval = time.Second * 1 thundernetesSystemNamespace = "thundernetes-system" + testNodeCount = 3 ) type AllocationResult struct { @@ -97,8 +98,8 @@ func validateThatAllocatedServersHaveReadyForPlayersUnblocked(ctx context.Contex return err } - if len(nodeAgentPodList.Items) != 3 { - return fmt.Errorf("expected 3 NodeAgent Pods, got %d", len(nodeAgentPodList.Items)) + if len(nodeAgentPodList.Items) != testNodeCount { + return fmt.Errorf("expected %d NodeAgent Pods, got %d", testNodeCount, len(nodeAgentPodList.Items)) } for _, gameServer := range activeGameServers { diff --git a/cmd/gameserverapi/deploy/e2e/nodeselector.yaml b/cmd/gameserverapi/deploy/e2e/nodeselector.yaml deleted file mode 100644 index da95e008..00000000 --- a/cmd/gameserverapi/deploy/e2e/nodeselector.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# this YAML file, when used with kustomize, it adds the nodeSelector field to the controller deployment -# it's used in e2e tests, so that the controller is scheduled on a different Node than the GameServer Pods -apiVersion: apps/v1 -kind: Deployment -metadata: - name: thundernetes-gameserverapi - namespace: thundernetes-system -spec: - template: - spec: - nodeSelector: - kubernetes.io/hostname: kind-worker \ No newline at end of file diff --git a/cmd/gameserverapi/deploy/default/deploy.yaml b/cmd/gameserverapi/deployment/default/deploy.yaml similarity index 95% rename from cmd/gameserverapi/deploy/default/deploy.yaml rename to cmd/gameserverapi/deployment/default/deploy.yaml index 1a989f35..4fc1c11d 100644 --- a/cmd/gameserverapi/deploy/default/deploy.yaml +++ b/cmd/gameserverapi/deployment/default/deploy.yaml @@ -16,7 +16,7 @@ spec: app: thundernetes-gameserverapi spec: containers: - - image: thundernetes-gameserverapi + - image: thundernetes-gameserverapi:${IMAGE_TAG} name: gameserverapi imagePullPolicy: IfNotPresent resources: diff --git a/cmd/gameserverapi/deploy/default/kustomization.yaml b/cmd/gameserverapi/deployment/default/kustomization.yaml similarity index 100% rename from cmd/gameserverapi/deploy/default/kustomization.yaml rename to cmd/gameserverapi/deployment/default/kustomization.yaml diff --git a/cmd/gameserverapi/deploy/e2e/kustomization.yaml b/cmd/gameserverapi/deployment/e2e/kustomization.yaml similarity index 100% rename from cmd/gameserverapi/deploy/e2e/kustomization.yaml rename to cmd/gameserverapi/deployment/e2e/kustomization.yaml diff --git a/cmd/gameserverapi/deployment/e2e/nodeselector.yaml b/cmd/gameserverapi/deployment/e2e/nodeselector.yaml new file mode 100644 index 00000000..8f39ac84 --- /dev/null +++ b/cmd/gameserverapi/deployment/e2e/nodeselector.yaml @@ -0,0 +1,13 @@ +# When this YAML file is used with kustomize, it adds the nodeSelector field to the controller deployment +# It's used in e2e tests, so that the gameserverapi is scheduled on a specific Node, so that we can do the +# listening service port forwarding (5001) correctly +apiVersion: apps/v1 +kind: Deployment +metadata: + name: thundernetes-gameserverapi + namespace: thundernetes-system +spec: + template: + spec: + nodeSelector: + kubernetes.io/hostname: kind-worker \ No newline at end of file diff --git a/e2e/run.sh b/e2e/run.sh index f1582d7c..c595d65c 100755 --- a/e2e/run.sh +++ b/e2e/run.sh @@ -47,9 +47,8 @@ echo "-----Compiling, building and deploying the operator to local Kubernetes cl IMG=${IMAGE_NAME_OPERATOR}:${IMAGE_TAG} API_SERVICE_SECURITY=usetls make -C "${DIR}"/../pkg/operator deploye2e echo "-----Deploying GameServer API-----" -cd cmd/gameserverapi/deploy/default -"${DIR}"/../pkg/operator/bin/kustomize edit set image thundernetes-gameserverapi=thundernetes-gameserverapi:${IMAGE_TAG} -"${DIR}"/../pkg/operator/bin/kustomize build ../e2e | kubectl apply -f - +cd cmd/gameserverapi/deployment/default +"${DIR}"/../pkg/operator/bin/kustomize build ../e2e | IMAGE_TAG=${IMAGE_TAG} envsubst | kubectl apply -f - echo "-----Waiting for Controller deployment-----" kubectl wait --for=condition=available --timeout=300s deployment/thundernetes-controller-manager -n thundernetes-system diff --git a/pkg/operator/Makefile b/pkg/operator/Makefile index f8119f29..72dd7405 100644 --- a/pkg/operator/Makefile +++ b/pkg/operator/Makefile @@ -84,7 +84,6 @@ docker-push: ## Push docker image with the manager. .PHONY: create-install-files create-install-files: - cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} ## Create install files for thundernetes installed without API security $(KUSTOMIZE) build config/default | API_SERVICE_SECURITY=none envsubst > ${INSTALL_FILES_FOLDER}/operator.yaml ## Create install files for thundernetes installed with API security @@ -111,12 +110,10 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified .PHONY: deploy deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. - cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} $(KUSTOMIZE) build config/default | envsubst | kubectl apply -f - .PHONY: deploye2e deploye2e: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. - cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} $(KUSTOMIZE) build config/e2e | envsubst | kubectl apply -f - .PHONY: undeploy diff --git a/pkg/operator/config/e2e/nodeselector.yaml b/pkg/operator/config/e2e/nodeselector.yaml index 5a78d9df..b33e9db3 100644 --- a/pkg/operator/config/e2e/nodeselector.yaml +++ b/pkg/operator/config/e2e/nodeselector.yaml @@ -1,5 +1,6 @@ -# this YAML file, when used with kustomize, it adds the nodeSelector field to the controller deployment -# it's used in e2e tests, so that the controller is scheduled on a different Node than the GameServer Pods +# When this YAML file is used with kustomize, it adds the nodeSelector field to the controller deployment +# It's used in e2e tests, so that the controller is scheduled on a specific Node, so that we can do the +# allocation API service port forwarding (5000) correctly apiVersion: apps/v1 kind: Deployment metadata: diff --git a/pkg/operator/config/manager/kustomization.yaml b/pkg/operator/config/manager/kustomization.yaml index ba65f4ce..c427e413 100755 --- a/pkg/operator/config/manager/kustomization.yaml +++ b/pkg/operator/config/manager/kustomization.yaml @@ -6,9 +6,3 @@ configMapGenerator: - files: - controller_manager_config.yaml name: manager-config -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -images: -- name: controller - newName: ghcr.io/playfab/thundernetes-operator - newTag: 0.4.1 diff --git a/pkg/operator/config/manager/manager.yaml b/pkg/operator/config/manager/manager.yaml index 2bdf8f38..31f9080e 100644 --- a/pkg/operator/config/manager/manager.yaml +++ b/pkg/operator/config/manager/manager.yaml @@ -33,7 +33,7 @@ spec: - /manager args: - --leader-elect - image: controller:latest + image: ${IMG} imagePullPolicy: IfNotPresent env: - name: MIN_PORT diff --git a/pkg/operator/controllers/port_registry.go b/pkg/operator/controllers/port_registry.go index e3645fb1..e86c66cb 100644 --- a/pkg/operator/controllers/port_registry.go +++ b/pkg/operator/controllers/port_registry.go @@ -107,7 +107,7 @@ func (pr *PortRegistry) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R // calculate how many nodes are ready and schedulable schedulableNodesCount := 0 for i := 0; i < len(nodeList.Items); i++ { - if !nodeList.Items[i].Spec.Unschedulable { + if IsNodeReadyAndSchedulable(&nodeList.Items[i]) { schedulableNodesCount++ } } diff --git a/pkg/operator/controllers/port_registry_test.go b/pkg/operator/controllers/port_registry_test.go index 0359da24..ae8f3ac9 100644 --- a/pkg/operator/controllers/port_registry_test.go +++ b/pkg/operator/controllers/port_registry_test.go @@ -72,11 +72,7 @@ var _ = Describe("Port registry tests", func() { It("should increase/decrease NodeCount when we add/delete Nodes from the cluster", func() { portRegistry, kubeClient := getPortRegistryKubeClientForTesting(testMinPort, testMaxPort) - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - }, - } + node := getNewNodeForTest("node2") err := kubeClient.Create(context.Background(), node) Expect(err).ToNot(HaveOccurred()) // do a manual reconcile since we haven't added the controller to the manager @@ -95,11 +91,7 @@ var _ = Describe("Port registry tests", func() { It("should successfully allocate ports on two Nodes", func() { portRegistry, kubeClient := getPortRegistryKubeClientForTesting(testMinPort, testMaxPort) - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - }, - } + node := getNewNodeForTest("node2") err := kubeClient.Create(context.Background(), node) Expect(err).ToNot(HaveOccurred()) // do a manual reconcile since we haven't added the controller to the manager @@ -169,11 +161,7 @@ var _ = Describe("Port registry tests", func() { verifyExpectedHostPorts(portRegistry, assignedPorts, 8) // add a second Node - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - }, - } + node := getNewNodeForTest("node2") err = kubeClient.Create(context.Background(), node) Expect(err).ToNot(HaveOccurred()) // do a manual reconcile since we haven't added the controller to the manager @@ -236,11 +224,7 @@ var _ = Describe("Port registry with two thousand ports, five hundred on four no // add three nodes for i := 0; i < 3; i++ { - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("node%d", i+2), - }, - } + node := getNewNodeForTest(fmt.Sprintf("node%d", i+2)) err := kubeClient.Create(context.Background(), node) Expect(err).ToNot(HaveOccurred()) portRegistry.Reconcile(context.Background(), reconcile.Request{}) @@ -380,11 +364,7 @@ func verifyHostPortsPerNode(portRegistry *PortRegistry, expectedNodeCount int) e // getPortRegistryKubeClientForTesting returns a PortRegistry and a fake Kubernetes client for testing func getPortRegistryKubeClientForTesting(min, max int32) (*PortRegistry, client.Client) { log := logr.FromContextOrDiscard(context.Background()) - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - }, - } + node := getNewNodeForTest("node1") clientBuilder := fake.NewClientBuilder().WithScheme(scheme.Scheme).WithObjects(node) kubeClient := clientBuilder.Build() Expect(kubeClient).NotTo(BeNil()) @@ -403,3 +383,17 @@ func syncMapToMapInt32Int(sm *sync.Map) map[int32]int { }) return m } + +// getNewNodeForTest returns a new Node for testing +func getNewNodeForTest(name string) *corev1.Node { + return &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, + }, + }, + } +} From 4cf9d4950b0fd077d771150e20538eb42cd4d1d6 Mon Sep 17 00:00:00 2001 From: Dimitris Gkanatsios Date: Tue, 28 Jun 2022 09:59:18 -0700 Subject: [PATCH 3/3] updates per pr review --- cmd/e2e/build_crashing_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cmd/e2e/build_crashing_test.go b/cmd/e2e/build_crashing_test.go index 38cd8f68..92e84348 100644 --- a/cmd/e2e/build_crashing_test.go +++ b/cmd/e2e/build_crashing_test.go @@ -14,7 +14,7 @@ import ( var _ = Describe("Crashing Build", func() { testBuildCrashingName := "crashing" testCrashingBuildID := "85ffe8da-c82f-4035-86c5-9d2b5f42d6f7" - It("should become unhealthy", func() { + It("should become Unhealthy, then transition to Healthy and then Unhealthy again", func() { ctx := context.Background() kubeConfig := ctrl.GetConfigOrDie() kubeClient, err := createKubeClient(kubeConfig) @@ -22,6 +22,10 @@ var _ = Describe("Crashing Build", func() { err = kubeClient.Create(ctx, createCrashingBuild(testBuildCrashingName, testCrashingBuildID, img)) Expect(err).ToNot(HaveOccurred()) + // this test simulates the scenario where + // a GameServerBuild becomes Unhealthy because of multiple crashes + // user manually increases the CrashesToMarkUnhealthy so GameServerBuild transitions to Healthy again + // multiple crashes occur, so the GameServerBuild becomes Unhealthy again Eventually(func(g Gomega) { gsb := &mpsv1alpha1.GameServerBuild{} err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb) @@ -40,6 +44,10 @@ var _ = Describe("Crashing Build", func() { }, 45*time.Second, interval).Should(Succeed()) // bigger timeout because of the time crashes take to occur and captured by the controller // we are updating the GameServerBuild with a big CrashesToMarkUnhealthy to give time to the GameServerBuild to become Healthy + // Reasoning: At one point during running the e2e tests, we noticed that this test failed. + // This is because the GameServers crashed multiple (10) times so the GameServerBuild stayed Unhealthy, + // before having the chance to transition (temporarily) to Healthy. So, by setting it to 1000 we give it more chance to transition to Healthy, + // before decreasing it to 10 (in the next step) so that it can become Unhealthy again. gsb := &mpsv1alpha1.GameServerBuild{} err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildCrashingName, Namespace: testNamespace}, gsb) Expect(err).ToNot(HaveOccurred())