Skip to content
This repository has been archived by the owner on Jan 24, 2021. It is now read-only.

Commit

Permalink
Scale BGP peers
Browse files Browse the repository at this point in the history
  • Loading branch information
mhmxs committed Jun 12, 2020
1 parent 150a6e2 commit e6a5385
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 57 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ During the `api/core/v1/Node` reconcile phases it calculates the right number of

This is a standard Kubebuilder opertor so building and deploying process is similar as a [stock Kubebuilder project](https://book.kubebuilder.io/cronjob-tutorial/running.html).

After first reconcile phase is done don not forget to disable the [node-to-node mesh](https://docs.projectcalico.org/getting-started/kubernetes/hardway/configure-bgp-peering)!

Use latest release:
```
kustomize build config/crd | kubectl apply -f -
Expand Down
1 change: 1 addition & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ rules:
- bgppeers
verbs:
- create
- delete
- get
- list
- update
Expand Down
34 changes: 17 additions & 17 deletions controllers/routereflectorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ type reconcileImplClient interface {
// +kubebuilder:rbac:groups=route-reflector.calico-route-reflector-operator.mhmxs.github.com,resources=routereflectorconfigs,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=route-reflector.calico-route-reflector-operator.mhmxs.github.com,resources=routereflectorconfigs/status,verbs=get;update;patch
// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;update;watch
// +kubebuilder:rbac:groups="crd.projectcalico.org",resources=bgppeers,verbs=get;list;create;update
// +kubebuilder:rbac:groups="crd.projectcalico.org",resources=bgppeers,verbs=get;list;create;update;delete

func (r *RouteReflectorConfigReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
_ = r.Log.WithValues("routereflectorconfig", req.Name)
Expand Down Expand Up @@ -108,13 +108,14 @@ func (r *RouteReflectorConfigReconciler) Reconcile(req ctrl.Request) (ctrl.Resul
log.Errorf("Unable to list nodes because of %s", err.Error())
return nodeListError, err
}
log.Debugf("Total number of nodes %d", len(nodeList.Items))

readyNodes, actualReadyNumber, nodes := r.collectNodeInfo(nodeList.Items)
readyNodes, actualRRNumber, nodes := r.collectNodeInfo(nodeList.Items)
log.Infof("Nodes are ready %d", readyNodes)
log.Infof("Actual number of healthy route reflector nodes are %d", actualReadyNumber)
log.Infof("Actual number of healthy route reflector nodes are %d", actualRRNumber)

expectedNumber := r.Topology.CalculateExpectedNumber(readyNodes)
log.Infof("Expected number of route reflector nodes are %d", expectedNumber)
expectedRRNumber := r.Topology.CalculateExpectedNumber(readyNodes)
log.Infof("Expected number of route reflector nodes are %d", expectedRRNumber)

for n, isReady := range nodes {
if status, ok := routeReflectorsUnderOperation[n.GetUID()]; ok {
Expand All @@ -141,26 +142,27 @@ func (r *RouteReflectorConfigReconciler) Reconcile(req ctrl.Request) (ctrl.Resul
return nodeReverted, nil
}

if !isReady || expectedNumber == actualReadyNumber {
if !isReady || expectedRRNumber == actualRRNumber {
continue
}

if diff := expectedNumber - actualReadyNumber; diff != 0 {
if diff := expectedRRNumber - actualRRNumber; diff != 0 {
if updated, err := r.updateRRStatus(n, diff); err != nil {
log.Errorf("Unable to update node %s because of %s", n.GetName(), err.Error())
return nodeUpdateError, err
} else if updated && diff > 0 {
actualReadyNumber++
actualRRNumber++
} else if updated && diff < 0 {
actualReadyNumber--
actualRRNumber--
}
}
}

if expectedNumber != actualReadyNumber {
log.Infof("Actual number %d is different than expected %d", actualReadyNumber, expectedNumber)
if expectedRRNumber != actualRRNumber {
log.Errorf("Actual number %d is different than expected %d", actualRRNumber, expectedRRNumber)
}

// TODO This has several performance issue, need to fix them
rrLables := client.HasLabels{r.NodeLabelKey}
rrListOptions := client.ListOptions{}
rrLables.ApplyToList(&rrListOptions)
Expand All @@ -171,7 +173,6 @@ func (r *RouteReflectorConfigReconciler) Reconcile(req ctrl.Request) (ctrl.Resul
log.Errorf("Unable to list route reflectors because of %s", err.Error())
return rrListError, err
}

log.Debugf("Route reflectors are: %v", rrList.Items)

existingBGPPeers, err := r.BGPPeer.ListBGPPeers()
Expand All @@ -183,7 +184,6 @@ func (r *RouteReflectorConfigReconciler) Reconcile(req ctrl.Request) (ctrl.Resul
log.Debugf("Existing BGPeers are: %v", existingBGPPeers.Items)

currentBGPPeers := r.Topology.GenerateBGPPeers(rrList.Items, nodes, existingBGPPeers)

log.Debugf("Current BGPeers are: %v", currentBGPPeers)

for _, bp := range currentBGPPeers {
Expand All @@ -194,7 +194,7 @@ func (r *RouteReflectorConfigReconciler) Reconcile(req ctrl.Request) (ctrl.Resul
}

for _, p := range existingBGPPeers.Items {
if !findBGPPeer(p.GetName(), currentBGPPeers) {
if !findBGPPeer(currentBGPPeers, p.GetName()) {
log.Debugf("Removing BGPPeer: %s", p.GetName())
if err := r.BGPPeer.RemoveBGPPeer(&p); err != nil {
log.Errorf("Unable to remove BGPPeer because of %s", err.Error())
Expand Down Expand Up @@ -259,10 +259,10 @@ func (r *RouteReflectorConfigReconciler) updateRRStatus(node *corev1.Node, diff
func (r *RouteReflectorConfigReconciler) collectNodeInfo(allNodes []corev1.Node) (readyNodes int, actualReadyNumber int, filtered map[*corev1.Node]bool) {
filtered = map[*corev1.Node]bool{}

for _, n := range allNodes {
for i, n := range allNodes {
isReady := isNodeReady(&n)
isSchedulable := isNodeSchedulable(&n)
filtered[&n] = isReady && isSchedulable
filtered[&allNodes[i]] = isReady && isSchedulable
if isReady && isSchedulable {
readyNodes++
if r.Topology.IsRouteReflector(string(n.GetUID()), n.GetLabels()) {
Expand Down Expand Up @@ -291,7 +291,7 @@ func isNodeSchedulable(node *corev1.Node) bool {
return true
}

func findBGPPeer(name string, peers []calicoApi.BGPPeer) bool {
func findBGPPeer(peers []calicoApi.BGPPeer, name string) bool {
for _, p := range peers {
if p.GetName() == name {
return true
Expand Down
3 changes: 3 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ func main() {
Max: max,
Ration: ratio,
}
log.Infof("Topology config: %v", topologyConfig)

var topology topologies.Topology
// TODO Validation on topology
if t, ok := os.LookupEnv("ROUTE_REFLECTOR_TOPOLOGY"); ok && t == "multi" {
Expand Down Expand Up @@ -173,6 +175,7 @@ func main() {
}
}

// TODO more sophisticated env parse and validation or use CRD
func parseEnv() (int, int, string, float64, string, string, string) {
var err error
clusterID := defaultClusterID
Expand Down
70 changes: 37 additions & 33 deletions topologies/multi.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ package topologies

import (
"fmt"
"math/rand"
"math"
"strconv"

calicoApi "github.com/projectcalico/libcalico-go/lib/apis/v3"
Expand All @@ -34,8 +34,8 @@ type MultiTopology struct {
}

func (t *MultiTopology) IsRouteReflector(nodeID string, labels map[string]string) bool {
label, ok := labels[t.NodeLabelKey]
return ok && label == t.getNodeLabel(nodeID)
_, ok := labels[t.NodeLabelKey]
return ok
}

func (t *MultiTopology) GetClusterID(nodeID string) string {
Expand All @@ -57,42 +57,39 @@ func (t *MultiTopology) CalculateExpectedNumber(readyNodes int) int {
func (t *MultiTopology) GenerateBGPPeers(routeReflectors []corev1.Node, nodes map[*corev1.Node]bool, existingPeers *calicoApi.BGPPeerList) []calicoApi.BGPPeer {
bgpPeerConfigs := []calicoApi.BGPPeer{}

for n, isReady := range nodes {
if !isReady {
continue
rrConfig := findBGPPeer(existingPeers.Items, DefaultRouteReflectorMeshName)
if rrConfig == nil {
rrConfig = &calicoApi.BGPPeer{
TypeMeta: metav1.TypeMeta{
Kind: calicoApi.KindBGPPeer,
APIVersion: calicoApi.GroupVersionCurrent,
},
ObjectMeta: metav1.ObjectMeta{
Name: DefaultRouteReflectorMeshName,
},
}
}
selector := fmt.Sprintf("has(%s)", t.NodeLabelKey)
rrConfig.Spec = calicoApi.BGPPeerSpec{
NodeSelector: selector,
PeerSelector: selector,
}

if t.IsRouteReflector(string(n.GetUID()), n.GetLabels()) {
selector := fmt.Sprintf("has(%s)", t.NodeLabelKey)
rrConfig := findBGPPeer(DefaultRouteReflectorMeshName, existingPeers)
if rrConfig == nil {
rrConfig = &calicoApi.BGPPeer{
TypeMeta: metav1.TypeMeta{
Kind: calicoApi.KindBGPPeer,
APIVersion: calicoApi.GroupVersionCurrent,
},
ObjectMeta: metav1.ObjectMeta{
Name: DefaultRouteReflectorMeshName,
},
}
}
rrConfig.Spec = calicoApi.BGPPeerSpec{
NodeSelector: "!" + selector,
PeerSelector: selector,
}

bgpPeerConfigs = append(bgpPeerConfigs, *rrConfig)
bgpPeerConfigs = append(bgpPeerConfigs, *rrConfig)

// TODO this could cause rebalancing very ofthen so has performance issues
rrIndex := 0
for n, isReady := range nodes {
if !isReady || t.IsRouteReflector(string(n.GetUID()), n.GetLabels()) {
continue
}

// TODO Do it in a more sophisticaged way
for i := 1; i <= 3; i++ {
rrID := rand.Intn(len(routeReflectors))
name := fmt.Sprintf(DefaultRouteReflectorClientName, rrID)
rr := getRouteReflectorID(string(routeReflectors[rrID].GetUID()))
for i := 1; i <= int(math.Min(float64(len(routeReflectors)), 3)); i++ {
rr := routeReflectors[rrIndex]
rrID := getRouteReflectorID(string(rr.GetUID()))
name := fmt.Sprintf(DefaultRouteReflectorClientName+"-%s", rrID, n.GetUID())

clientConfig := findBGPPeer(DefaultRouteReflectorMeshName, existingPeers)
clientConfig := findBGPPeer(existingPeers.Items, name)
if clientConfig == nil {
clientConfig = &calicoApi.BGPPeer{
TypeMeta: metav1.TypeMeta{
Expand All @@ -105,10 +102,17 @@ func (t *MultiTopology) GenerateBGPPeers(routeReflectors []corev1.Node, nodes ma
}
}
clientConfig.Spec = calicoApi.BGPPeerSpec{
PeerSelector: fmt.Sprintf("%s=='%d'", t.NodeLabelKey, rr),
// TODO make configurable
NodeSelector: fmt.Sprintf("kubernetes.io/hostname=='%s'", n.GetLabels()["kubernetes.io/hostname"]),
PeerSelector: fmt.Sprintf("%s=='%d'", t.NodeLabelKey, rrID),
}

bgpPeerConfigs = append(bgpPeerConfigs, *clientConfig)

rrIndex++
if rrIndex == len(routeReflectors) {
rrIndex = 0
}
}
}

Expand Down
10 changes: 5 additions & 5 deletions topologies/single.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,8 @@ func (t *SingleTopology) CalculateExpectedNumber(readyNodes int) int {
func (t *SingleTopology) GenerateBGPPeers(_ []corev1.Node, _ map[*corev1.Node]bool, existingPeers *calicoApi.BGPPeerList) []calicoApi.BGPPeer {
bgpPeerConfigs := []calicoApi.BGPPeer{}

selector := fmt.Sprintf("has(%s)", t.NodeLabelKey)

rrConfig := findBGPPeer(DefaultRouteReflectorMeshName, existingPeers)
// TODO eliminate code duplication
rrConfig := findBGPPeer(existingPeers.Items, DefaultRouteReflectorMeshName)
if rrConfig == nil {
rrConfig = &calicoApi.BGPPeer{
TypeMeta: metav1.TypeMeta{
Expand All @@ -90,16 +89,17 @@ func (t *SingleTopology) GenerateBGPPeers(_ []corev1.Node, _ map[*corev1.Node]bo
},
}
}
selector := fmt.Sprintf("has(%s)", t.NodeLabelKey)
rrConfig.Spec = calicoApi.BGPPeerSpec{
NodeSelector: "!" + selector,
NodeSelector: selector,
PeerSelector: selector,
}

bgpPeerConfigs = append(bgpPeerConfigs, *rrConfig)

clientConfigName := fmt.Sprintf(DefaultRouteReflectorClientName, 1)

clientConfig := findBGPPeer(clientConfigName, existingPeers)
clientConfig := findBGPPeer(existingPeers.Items, clientConfigName)
if clientConfig == nil {
clientConfig = &calicoApi.BGPPeer{
TypeMeta: metav1.TypeMeta{
Expand Down
4 changes: 2 additions & 2 deletions topologies/topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ type Config struct {
Ration float64
}

func findBGPPeer(name string, peers *calicoApi.BGPPeerList) *calicoApi.BGPPeer {
for _, p := range peers.Items {
func findBGPPeer(peers []calicoApi.BGPPeer, name string) *calicoApi.BGPPeer {
for _, p := range peers {
if p.GetName() == name {
return &p
}
Expand Down

0 comments on commit e6a5385

Please sign in to comment.