diff --git a/cmds/modules/noded/main.go b/cmds/modules/noded/main.go index 4ab1828ed..5b2ffc221 100644 --- a/cmds/modules/noded/main.go +++ b/cmds/modules/noded/main.go @@ -21,6 +21,7 @@ import ( "github.com/threefoldtech/zos/pkg/events" "github.com/threefoldtech/zos/pkg/monitord" "github.com/threefoldtech/zos/pkg/perf" + "github.com/threefoldtech/zos/pkg/perf/publicip" "github.com/threefoldtech/zos/pkg/registrar" "github.com/threefoldtech/zos/pkg/stubs" "github.com/threefoldtech/zos/pkg/utils" @@ -208,8 +209,7 @@ func action(cli *cli.Context) error { cpuBenchmarkTask := perf.NewCPUBenchmarkTask() perfMon.AddTask(&cpuBenchmarkTask) - pubIPTask := perf.NewPublicIPValidationTask() - perfMon.AddTask(pubIPTask) + perfMon.AddTask(publicip.NewTask()) if err = perfMon.Run(ctx); err != nil { return errors.Wrap(err, "failed to run the scheduler") diff --git a/docs/tasks/publicips.md b/docs/tasks/publicips.md index ab718c499..ec75c0b38 100644 --- a/docs/tasks/publicips.md +++ b/docs/tasks/publicips.md @@ -16,7 +16,7 @@ The task is scheduled to run 4 times a day. - Decide if the node should run the task or another one in the farm based on the node ID. The node with the least ID and with power target as up should run it. The other will log why they shouldn't run the task and return with no errors. This is done to ensure only one node runs the task to avoid problems like assigning the same IP. - Get public IPs set on the farm. - Remove all IPs and routes added to the test MacVLAN to ensure any remaining from previous task run are removed. -- Skip IPs that are assigned to a contract and don't include them in the validation or the result. +- Skip IPs that are assigned to a contract. - Set the MacVLAN link up. - Iterate over all public IPs and add them with the provided gateway to the MacVLAN. - Validate the IP by querying an external source that return the public IP for the node. @@ -26,4 +26,4 @@ The task is scheduled to run 4 times a day. ## Result -The task only returns a single map of String (IP) to Boolean indicating if the IP is valid or not. The IPs listed are the unused ones, so IPs already assigned to a deployment are not validated. +The task only returns a single map of String (IP) to IPReport. The report consists of the IP state (valid, invalid or skipped) and the reason for the state. diff --git a/pkg/network/public/public.go b/pkg/network/public/public.go index fd38ef1ac..193eadcc4 100644 --- a/pkg/network/public/public.go +++ b/pkg/network/public/public.go @@ -26,7 +26,7 @@ import ( const ( toZosVeth = "tozos" // veth pair from br-pub to zos publicNsMACDerivationSuffix = "-public" - testMacvlan = "pubtestmacvlan" + testMacvlan = "pub" testNamespace = "pubtestns" // PublicBridge public bridge name, exists only after a call to EnsurePublicSetup @@ -357,12 +357,15 @@ func EnsurePublicSetup(nodeID pkg.Identifier, inf *pkg.PublicConfig) (*netlink.B func ensureTestNamespace(publicBrdige *netlink.Bridge) error { netNS, err := namespace.GetByName(testNamespace) - if err != nil { + if errors.Is(err, os.ErrNotExist) { netNS, err = namespace.Create(testNamespace) if err != nil { return fmt.Errorf("failed to create namespace %s: %w", testNamespace, err) } } + if err != nil { + return fmt.Errorf("failed to get namespace %s: %w", testNamespace, err) + } err = netNS.Do(func(_ ns.NetNS) error { _, err := macvlan.GetByName(testMacvlan) return err diff --git a/pkg/perf/pubip_task.go b/pkg/perf/publicip/publicip_task.go similarity index 73% rename from pkg/perf/pubip_task.go rename to pkg/perf/publicip/publicip_task.go index a899bceae..82e158a25 100644 --- a/pkg/perf/pubip_task.go +++ b/pkg/perf/publicip/publicip_task.go @@ -1,4 +1,4 @@ -package perf +package publicip import ( "context" @@ -7,7 +7,6 @@ import ( "net" "net/http" "os/exec" - "strings" "time" "github.com/cenkalti/backoff/v3" @@ -17,28 +16,48 @@ import ( "github.com/threefoldtech/zos/pkg/environment" "github.com/threefoldtech/zos/pkg/network/macvlan" "github.com/threefoldtech/zos/pkg/network/namespace" + "github.com/threefoldtech/zos/pkg/perf" "github.com/threefoldtech/zos/pkg/stubs" "github.com/vishvananda/netlink" ) -const testMacvlan = "pubtestmacvlan" +const ( + ValidState = "valid" + InvalidState = "invalid" + SkippedState = "skipped" + + IPsNotMatching = "public ip does not match farm ip" + PublicIPDataInvalid = "public ip or gateway data are not valid" + IPIsUsed = "ip is already assigned to a contract" + FetchRealIPFailed = "failed to get real public IP to the node" + + taskSchedule = "0 0 */6 * * *" + taskID = "PublicIPValidation" +) + +const testMacvlan = "pub" const testNamespace = "pubtestns" type publicIPValidationTask struct { - taskID string - schedule string - unusedIPs map[string]bool - publicIPs []substrate.PublicIP + taskID string + schedule string + farmIPsReport map[string]IPReport + publicIPs []substrate.PublicIP } -var _ Task = (*publicIPValidationTask)(nil) +type IPReport struct { + State string `json:"state"` + Reason string `json:"reason"` +} + +var _ perf.Task = (*publicIPValidationTask)(nil) -func NewPublicIPValidationTask() Task { +func NewTask() perf.Task { return &publicIPValidationTask{ - taskID: "PublicIPValidation", - schedule: "0 0 */6 * * *", - unusedIPs: make(map[string]bool), - publicIPs: make([]substrate.PublicIP, 0), + taskID: taskID, + schedule: taskSchedule, + farmIPsReport: make(map[string]IPReport), + publicIPs: make([]substrate.PublicIP, 0), } } @@ -81,12 +100,13 @@ func (p *publicIPValidationTask) Run(ctx context.Context) (interface{}, error) { return nil, fmt.Errorf("failed to get farm with id %d: %w", farmID, err) } p.publicIPs = farm.PublicIPs + deleteOldIPs(farm.PublicIPs, p.farmIPsReport) err = netNS.Do(p.validateIPs) if err != nil { return nil, fmt.Errorf("failed to run public IP validation: %w", err) } - return p.unusedIPs, nil + return p.farmIPsReport, nil } func (p *publicIPValidationTask) validateIPs(_ ns.NetNS) error { @@ -99,30 +119,57 @@ func (p *publicIPValidationTask) validateIPs(_ ns.NetNS) error { if err != nil { log.Err(err).Send() } + for _, publicIP := range p.publicIPs { + if report, ok := p.farmIPsReport[publicIP.IP]; ok && report.State == ValidState { + // no need to test it again + continue + } + p.farmIPsReport[publicIP.IP] = IPReport{ + State: ValidState, + } if publicIP.ContractID != 0 { + p.farmIPsReport[publicIP.IP] = IPReport{ + State: SkippedState, + Reason: IPIsUsed, + } continue } - p.unusedIPs[publicIP.IP] = false ip, ipNet, routes, err := getIPWithRoute(publicIP) if err != nil { + p.farmIPsReport[publicIP.IP] = IPReport{ + State: InvalidState, + Reason: PublicIPDataInvalid, + } log.Err(err).Send() continue } err = macvlan.Install(mv, nil, ipNet, routes, nil) if err != nil { + p.farmIPsReport[publicIP.IP] = IPReport{ + State: InvalidState, + Reason: PublicIPDataInvalid, + } log.Err(err).Msgf("failed to install macvlan %s with ip %s to namespace %s", testMacvlan, ipNet, testNamespace) continue } realIP, err := getRealPublicIP() if err != nil { + p.farmIPsReport[publicIP.IP] = IPReport{ + State: SkippedState, + Reason: FetchRealIPFailed, + } log.Err(err).Msg("failed to get node real IP") + continue } - if ip.String() == strings.TrimSpace(realIP) { - p.unusedIPs[publicIP.IP] = true + if !ip.Equal(realIP) { + p.farmIPsReport[publicIP.IP] = IPReport{ + State: InvalidState, + Reason: IPsNotMatching, + } } err = deleteAllIPsAndRoutes(mv) @@ -137,12 +184,24 @@ func (p *publicIPValidationTask) validateIPs(_ ns.NetNS) error { return nil } +func deleteOldIPs(farmIPs []substrate.PublicIP, oldReport map[string]IPReport) { +outer: + for ip := range oldReport { + for _, publicIP := range farmIPs { + if ip == publicIP.IP { + continue outer + } + } + delete(oldReport, ip) + } +} + func isLeastValidNode(ctx context.Context, farmID uint32, sub *substrate.Substrate) (bool, error) { nodes, err := sub.GetNodes(uint32(farmID)) if err != nil { return false, fmt.Errorf("failed to get farm %d nodes: %w", farmID, err) } - cl := GetZbusClient(ctx) + cl := perf.GetZbusClient(ctx) registrar := stubs.NewRegistrarStub(cl) var nodeID uint32 err = backoff.Retry(func() error { @@ -221,19 +280,22 @@ func getIPWithRoute(publicIP substrate.PublicIP) (net.IP, []*net.IPNet, []*netli return ip, []*net.IPNet{ipNet}, routes, nil } -func getRealPublicIP() (string, error) { +func getRealPublicIP() (net.IP, error) { // for testing now, should change to cloudflare req, err := http.Get("https://api.ipify.org/") if err != nil { - return "", err + return nil, err } defer req.Body.Close() + if req.StatusCode != 200 { + return nil, fmt.Errorf("request to get public IP failed with status code %d", req.StatusCode) + } body, err := io.ReadAll(req.Body) if err != nil { - return "", err + return nil, err } - return string(body), nil + return net.ParseIP(string(body)), nil } func deleteAllIPsAndRoutes(macvlan netlink.Link) error {