diff --git a/gardener/container_network_metrics_provider_linux.go b/gardener/container_network_metrics_provider_linux.go index 74a0b6ca8..e42736102 100644 --- a/gardener/container_network_metrics_provider_linux.go +++ b/gardener/container_network_metrics_provider_linux.go @@ -2,90 +2,136 @@ package gardener import ( "bytes" + "flag" "fmt" - "path/filepath" + "os" "strconv" "strings" "code.cloudfoundry.org/garden" + "code.cloudfoundry.org/guardian/kawasaki/netns" "code.cloudfoundry.org/lager/v3" + "github.com/docker/docker/pkg/reexec" + "github.com/vishvananda/netlink" ) -type SysFSContainerNetworkMetricsProvider struct { - containerizer Containerizer - propertyManager PropertyManager +func init() { + reexec.Register("fetch-container-network-metrics", func() { + var netNsPath, ifName string + + flag.StringVar(&netNsPath, "netNsPath", "", "netNsPath") + flag.StringVar(&ifName, "ifName", "", "ifName") + flag.Parse() + + fd, err := os.Open(netNsPath) + if err != nil { + fmt.Fprintf(os.Stderr, "opening netns '%s': %s", netNsPath, err) + os.Exit(1) + } + defer fd.Close() + + if err = (&netns.Execer{}).Exec(fd, func() error { + link, err := netlink.LinkByName(ifName) + if err != nil { + return fmt.Errorf("could not get link '%s', %w", ifName, err) + } + fmt.Print((&ContainerNetworkStatMarshaller{}).MarshalLink(link)) + return nil + }); err != nil { + fmt.Fprintf(os.Stderr, err.Error()) + os.Exit(1) + } + }) } -func NewSysFSContainerNetworkMetricsProvider( - containerizer Containerizer, - propertyManager PropertyManager, -) *SysFSContainerNetworkMetricsProvider { - return &SysFSContainerNetworkMetricsProvider{ - containerizer: containerizer, - propertyManager: propertyManager, - } +type Opener func(path string) (*os.File, error) + +func (o Opener) Open(path string) (*os.File, error) { + return o(path) } -func (l *SysFSContainerNetworkMetricsProvider) Get(logger lager.Logger, handle string) (*garden.ContainerNetworkStat, error) { - log := logger.Session("container-network-metrics") +type ContainerNetworkStatMarshaller struct { +} - ifName, found := l.propertyManager.Get(handle, ContainerInterfaceKey) - if !found || ifName == "" { - return nil, nil +func (c *ContainerNetworkStatMarshaller) Unmarshal(s string) (*garden.ContainerNetworkStat, error) { + stats := strings.Split(s, ",") + if len(stats) != 2 { + return nil, fmt.Errorf("expected two values but got %q", s) } - stdout := new(bytes.Buffer) - stderr := new(bytes.Buffer) - - process, err := l.containerizer.Run(log, handle, garden.ProcessSpec{ - Path: "cat", - Args: []string{ - networkStatPath(ifName, "rx_bytes"), - networkStatPath(ifName, "tx_bytes"), - }, - }, garden.ProcessIO{ - Stdout: stdout, - Stderr: stderr, - }) - + rxBytes, err := strconv.ParseUint(stats[0], 10, 64) if err != nil { - return nil, fmt.Errorf("running process failed, %w", err) + return nil, fmt.Errorf("could not parse rx_bytes value %q, %w", stats[0], err) } - exitStatus, err := process.Wait() + txBytes, err := strconv.ParseUint(stats[1], 10, 64) if err != nil { - return nil, err + return nil, fmt.Errorf("could not parse tx_bytes value %q, %w", stats[1], err) } - if exitStatus != 0 { - return nil, fmt.Errorf("running process failed with exit status %d, error %q", exitStatus, stderr.String()) - } + return &garden.ContainerNetworkStat{ + RxBytes: rxBytes, + TxBytes: txBytes, + }, nil +} - stats := strings.Split(strings.TrimSpace(stdout.String()), "\n") - if len(stats) != 2 { - return nil, fmt.Errorf("expected two values but got %q", stdout.String()) +func (c *ContainerNetworkStatMarshaller) MarshalLink(link netlink.Link) string { + statistics := link.Attrs().Statistics + return fmt.Sprintf("%d,%d", statistics.RxBytes, statistics.TxBytes) +} + +type LinuxContainerNetworkMetricsProvider struct { + containerizer Containerizer + propertyManager PropertyManager + fileOpener Opener + containerNetworkStatMarshaller *ContainerNetworkStatMarshaller +} + +func NewLinuxContainerNetworkMetricsProvider( + containerizer Containerizer, + propertyManager PropertyManager, + fileOpener Opener, +) *LinuxContainerNetworkMetricsProvider { + return &LinuxContainerNetworkMetricsProvider{ + containerizer: containerizer, + propertyManager: propertyManager, + fileOpener: fileOpener, + containerNetworkStatMarshaller: &ContainerNetworkStatMarshaller{}, } +} - for idx, s := range stats { - stats[idx] = strings.TrimSpace(s) +func (l *LinuxContainerNetworkMetricsProvider) Get(log lager.Logger, handle string) (*garden.ContainerNetworkStat, error) { + log = log.Session("container-network-metrics") + + ifName, found := l.propertyManager.Get(handle, ContainerInterfaceKey) + if !found || ifName == "" { + return nil, nil } - rxBytes, err := strconv.ParseUint(stats[0], 10, 64) + info, err := l.containerizer.Info(log, handle) if err != nil { - return nil, fmt.Errorf("could not parse rx_bytes value %q, %w", stats[0], err) + return nil, err } - txBytes, err := strconv.ParseUint(stats[1], 10, 64) + containerNetNs, err := l.fileOpener.Open(fmt.Sprintf("/proc/%d/ns/net", info.Pid)) if err != nil { - return nil, fmt.Errorf("could not parse tx_bytes value %q, %w", stats[1], err) + return nil, err } + defer containerNetNs.Close() - return &garden.ContainerNetworkStat{ - RxBytes: rxBytes, - TxBytes: txBytes, - }, nil -} + stdout := new(bytes.Buffer) + stderr := new(bytes.Buffer) + + cmd := reexec.Command("fetch-container-network-metrics", + "-ifName", ifName, + "-netNsPath", containerNetNs.Name(), + ) + cmd.Stderr = stderr + cmd.Stdout = stdout + + if err := cmd.Run(); err != nil { + return nil, fmt.Errorf("could not fetch container network metrics, %q, %w", stderr.String(), err) + } -func networkStatPath(ifName, stat string) string { - return filepath.Join("/sys/class/net", ifName, "statistics", stat) + return l.containerNetworkStatMarshaller.Unmarshal(stdout.String()) } diff --git a/gardener/container_network_metrics_provider_linux_test.go b/gardener/container_network_metrics_provider_linux_test.go index b949c85d4..8c15f0123 100644 --- a/gardener/container_network_metrics_provider_linux_test.go +++ b/gardener/container_network_metrics_provider_linux_test.go @@ -1,171 +1,269 @@ package gardener_test import ( + "bytes" "errors" "fmt" - "path/filepath" + "io" + "os" + "os/exec" - "code.cloudfoundry.org/garden" - "code.cloudfoundry.org/garden/gardenfakes" "code.cloudfoundry.org/guardian/gardener" + spec "code.cloudfoundry.org/guardian/gardener/container-spec" fakes "code.cloudfoundry.org/guardian/gardener/gardenerfakes" "code.cloudfoundry.org/lager/v3" "code.cloudfoundry.org/lager/v3/lagertest" + "github.com/docker/docker/pkg/reexec" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/vishvananda/netlink" ) -var _ = Describe("SysFSContainerNetworkMetricsProvider", func() { - var ( - logger lager.Logger - containerizer *fakes.FakeContainerizer - propertyManager *fakes.FakePropertyManager - - networkMetricsProvider *gardener.SysFSContainerNetworkMetricsProvider - ) - - BeforeEach(func() { - logger = lagertest.NewTestLogger("test") - containerizer = new(fakes.FakeContainerizer) - propertyManager = new(fakes.FakePropertyManager) - - networkMetricsProvider = gardener.NewSysFSContainerNetworkMetricsProvider(containerizer, propertyManager) - }) +func init() { + if reexec.Init() { + os.Exit(0) + } +} +var _ = Describe("LinuxContainerNetworkMetricsProvider", func() { Describe("Get", func() { var ( - handle string - ifName string - networkStat garden.ContainerNetworkStat - - networkStatProcess *gardenfakes.FakeProcess + logger lager.Logger + containerizer *fakes.FakeContainerizer + propertyManager *fakes.FakePropertyManager + + networkMetricsProvider *gardener.LinuxContainerNetworkMetricsProvider + + netNsName string + netNsName2 string + netNsPath string + netNsPath2 string + netNsFd *os.File + linkName string + linkName2 string + handle string ) + BeforeEach(func() { + var err error + logger = lagertest.NewTestLogger("test") + containerizer = new(fakes.FakeContainerizer) + propertyManager = new(fakes.FakePropertyManager) + netNsName = fmt.Sprintf("metrics-netns-%d", GinkgoParallelProcess()) + netNsName2 = fmt.Sprintf("metrics-netns2-%d", GinkgoParallelProcess()) + linkName = fmt.Sprintf("metrics-link-%d", GinkgoParallelProcess()) + linkName2 = fmt.Sprintf("metrics-link2-%d", GinkgoParallelProcess()) + netNsPath = "/run/netns/" + netNsName + netNsPath2 = "/run/netns/" + netNsName2 handle = "random-handle" - ifName = "random-eth" - networkStat = garden.ContainerNetworkStat{ - RxBytes: 42, - TxBytes: 43, - } + createNs(netNsName) + netNsFd, err = os.Open(netNsPath) + Expect(err).NotTo(HaveOccurred()) - propertyManager.GetReturnsOnCall(0, ifName, true) + createNs(netNsName2) - networkStatProcess = new(gardenfakes.FakeProcess) - networkStatProcess.WaitReturns(0, nil) + runCommandAssertNoError("ip", "link", "add", linkName, "netns", netNsName, "type", "veth", "peer", "name", linkName2, "netns", netNsName2) - containerizer.RunCalls(func(logger lager.Logger, s string, processSpec garden.ProcessSpec, io garden.ProcessIO) (garden.Process, error) { - _, _ = io.Stdout.Write([]byte(fmt.Sprintf("%d\n%d\n", networkStat.RxBytes, networkStat.TxBytes))) - return networkStatProcess, nil + setupNetworkDevice(linkName, netNsName, "10.0.0.1/24") + setupNetworkDevice(linkName2, netNsName2, "10.0.0.2/24") + + propertyManager.GetReturns(linkName, true) + + pid := 42 + containerizer.InfoReturns(spec.ActualContainerSpec{ + Pid: pid, + }, nil) + + networkMetricsProvider = gardener.NewLinuxContainerNetworkMetricsProvider(containerizer, propertyManager, func(path string) (*os.File, error) { + Expect(path).To(Equal(fmt.Sprintf("/proc/%d/ns/net", pid))) + return netNsFd, nil }) + }) - It("should return network statistics", func() { - actualNetworkMetrics, err := networkMetricsProvider.Get(logger, handle) - Expect(err).NotTo(HaveOccurred()) + AfterEach(func() { + deleteNsIfExists(netNsPath, netNsName) + deleteNsIfExists(netNsPath2, netNsName2) - Expect(containerizer.RunCallCount()).To(Equal(1)) - _, _, spec, _ := containerizer.RunArgsForCall(0) + Eventually(netNsPath).ShouldNot(BeAnExistingFile()) + Eventually(netNsPath2).ShouldNot(BeAnExistingFile()) + }) - Expect(spec.Path).To(Equal("cat")) + It("should return network statistics", func() { + runCommandAssertNoError("ip", "netns", "exec", netNsName, "ping", "10.0.0.2", "-n", "-c", "1", "-s", "4096", "-W", "1") + + // reject second ICMP request to get distinct rx_bytes/tx_bytes numbers + runCommandAssertNoError("ip", "netns", "exec", netNsName2, "iptables", "-A", "INPUT", "-p", "icmp", "-j", "REJECT") + err, _ := runCommand("ip", "netns", "exec", netNsName, "ping", "10.0.0.2", "-n", "-c", "1", "-s", "4096", "-W", "1") + Expect(err).To(HaveOccurred()) - Expect(spec.Args).To(Equal([]string{ - filepath.Join("/sys/class/net/", ifName, "/statistics/rx_bytes"), - filepath.Join("/sys/class/net/", ifName, "/statistics/tx_bytes"), - })) + actualNetworkMetrics, err := networkMetricsProvider.Get(logger, handle) + Expect(err).ToNot(HaveOccurred()) - Expect(actualNetworkMetrics.TxBytes).To(Equal(networkStat.TxBytes)) - Expect(actualNetworkMetrics.RxBytes).To(Equal(networkStat.RxBytes)) + // since there is some noise on the network interface, a delta of 1000 is accepted + Expect(actualNetworkMetrics.TxBytes).To(BeNumerically("~", 8192, 1000)) + Expect(actualNetworkMetrics.RxBytes).To(BeNumerically("~", 4096, 1000)) }) - Context("when the process execution to fetch the network statistics fails", func() { + Context("when the network interface name is not stored in the property manager", func() { BeforeEach(func() { - containerizer.RunReturns(nil, errors.New("processError")) + propertyManager.GetReturns("", false) }) - It("should propagate the error", func() { - _, err := networkMetricsProvider.Get(logger, handle) - Expect(err).To(MatchError(ContainSubstring("processError"))) + It("should return nil", func() { + actualNetworkMetrics, err := networkMetricsProvider.Get(logger, handle) + Expect(err).ToNot(HaveOccurred()) + Expect(actualNetworkMetrics).To(BeNil()) }) }) - Context("when waiting for the process execution to fetch the network statistics fails", func() { + Context("when the container info cannot be found", func() { BeforeEach(func() { - networkStatProcess.WaitReturns(-1, errors.New("waitError")) + containerizer.InfoReturns(spec.ActualContainerSpec{}, errors.New("no-container-info")) }) - It("should propagate the error", func() { - _, err := networkMetricsProvider.Get(logger, handle) - Expect(err).To(MatchError(ContainSubstring("waitError"))) + It("should return an error", func() { + actualNetworkMetrics, err := networkMetricsProvider.Get(logger, handle) + Expect(err).To(MatchError("no-container-info")) + Expect(actualNetworkMetrics).To(BeNil()) }) }) - Context("when the process execution to fetch the network statistics returns an exit status not equal to 0", func() { + Context("network namespace file cannot be found", func() { BeforeEach(func() { - containerizer.RunCalls(func(logger lager.Logger, s string, processSpec garden.ProcessSpec, io garden.ProcessIO) (garden.Process, error) { - _, _ = io.Stderr.Write([]byte("randomStderr")) - return networkStatProcess, nil + networkMetricsProvider = gardener.NewLinuxContainerNetworkMetricsProvider(containerizer, propertyManager, func(path string) (*os.File, error) { + return nil, errors.New("no-network-namespace") }) - networkStatProcess.WaitReturns(42, nil) }) - It("should return an error that contains the exit status and stderr output", func() { - _, err := networkMetricsProvider.Get(logger, handle) - Expect(err).To(MatchError(ContainSubstring("42"))) - Expect(err).To(MatchError(ContainSubstring("randomStderr"))) + It("should return an error", func() { + actualNetworkMetrics, err := networkMetricsProvider.Get(logger, handle) + Expect(err).To(MatchError("no-network-namespace")) + Expect(actualNetworkMetrics).To(BeNil()) }) }) - Context("when network statistics are missing", func() { + Context("network namespace does not exist", func() { BeforeEach(func() { - containerizer.RunReturns(networkStatProcess, nil) + deleteNsIfExists(netNsPath, netNsName) }) It("should return an error", func() { - _, err := networkMetricsProvider.Get(logger, handle) - Expect(err).To(MatchError(ContainSubstring(`expected two values but got ""`))) + actualNetworkMetrics, err := networkMetricsProvider.Get(logger, handle) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring(fmt.Sprintf(`could not fetch container network metrics, "opening netns '%[1]s': open %[1]s: no such file or directory"`, netNsPath))) + Expect(actualNetworkMetrics).To(BeNil()) }) }) - Context("when the rx_bytes value cannot be parsed", func() { + Context("network interface does not exist", func() { BeforeEach(func() { - containerizer.RunCalls(func(logger lager.Logger, s string, processSpec garden.ProcessSpec, io garden.ProcessIO) (garden.Process, error) { - _, _ = io.Stdout.Write([]byte("abc\n42\n")) - return networkStatProcess, nil - }) + propertyManager.GetReturns("random", true) }) It("should return an error", func() { - _, err := networkMetricsProvider.Get(logger, handle) - Expect(err).To(MatchError(ContainSubstring("could not parse rx_bytes value"))) + actualNetworkMetrics, err := networkMetricsProvider.Get(logger, handle) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring(`could not fetch container network metrics, "could not get link 'random'`)) + Expect(actualNetworkMetrics).To(BeNil()) }) }) + }) - Context("when the tx_bytes value cannot be parsed", func() { - BeforeEach(func() { - containerizer.RunCalls(func(logger lager.Logger, s string, processSpec garden.ProcessSpec, io garden.ProcessIO) (garden.Process, error) { - _, _ = io.Stdout.Write([]byte("42\nabc\n")) - return networkStatProcess, nil - }) - }) + Describe("Marshal and Unmarshal ContainerNetworkStat", func() { + var ( + expectedRxBytes uint64 + expectedTxBytes uint64 + link netlink.Link + containerNetworkStatMarshaller *gardener.ContainerNetworkStatMarshaller + ) + + BeforeEach(func() { + expectedRxBytes = 42 + expectedTxBytes = 84 + + link = &netlink.Device{ + LinkAttrs: netlink.LinkAttrs{ + Statistics: &netlink.LinkStatistics{ + RxBytes: expectedRxBytes, + TxBytes: expectedTxBytes, + }, + }, + } + + containerNetworkStatMarshaller = &gardener.ContainerNetworkStatMarshaller{} + }) + + It("should allow marshalling and unmarshalling network container statistics", func() { + str := containerNetworkStatMarshaller.MarshalLink(link) + containerNetworkStat, err := containerNetworkStatMarshaller.Unmarshal(str) + Expect(err).ToNot(HaveOccurred()) + Expect(containerNetworkStat.RxBytes).To(Equal(expectedRxBytes)) + Expect(containerNetworkStat.TxBytes).To(Equal(expectedTxBytes)) + }) + + Context("when an invalid value is unmarshalled", func() { It("should return an error", func() { - _, err := networkMetricsProvider.Get(logger, handle) - Expect(err).To(MatchError(ContainSubstring("could not parse tx_bytes value"))) + str := "random-value" + containerNetworkStat, err := containerNetworkStatMarshaller.Unmarshal(str) + Expect(err).To(MatchError(`expected two values but got "random-value"`)) + Expect(containerNetworkStat).To(BeNil()) }) }) - Context("when the network interface name is not stored in the property manager", func() { - BeforeEach(func() { - propertyManager.GetReturnsOnCall(0, "", false) + Context("when an rx_bytes value is unmarshalled", func() { + + It("should return an error", func() { + str := ",42" + containerNetworkStat, err := containerNetworkStatMarshaller.Unmarshal(str) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("could not parse rx_bytes value")) + Expect(containerNetworkStat).To(BeNil()) }) + }) - It("should return nil", func() { - actualNetworkMetrics, err := networkMetricsProvider.Get(logger, handle) - Expect(err).ToNot(HaveOccurred()) - Expect(actualNetworkMetrics).To(BeNil()) + Context("when an tx_bytes value is unmarshalled", func() { + + It("should return an error", func() { + str := "42," + containerNetworkStat, err := containerNetworkStatMarshaller.Unmarshal(str) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("could not parse tx_bytes value")) + Expect(containerNetworkStat).To(BeNil()) }) }) + }) }) + +func runCommand(arg0 string, argv ...string) (error, string) { + var stdout bytes.Buffer + cmd := exec.Command(arg0, argv...) + cmd.Stdout = io.MultiWriter(&stdout, GinkgoWriter) + cmd.Stderr = GinkgoWriter + return cmd.Run(), stdout.String() +} + +func runCommandAssertNoError(arg0 string, argv ...string) string { + err, s := runCommand(arg0, argv...) + ExpectWithOffset(1, err).To(Succeed()) + return s +} + +func setupNetworkDevice(link, ns, ip string) { + runCommandAssertNoError("ip", "netns", "exec", ns, "ip", "address", "add", ip, "dev", link) + runCommandAssertNoError("ip", "netns", "exec", ns, "ip", "link", "set", "dev", link, "up") +} + +func createNs(ns string) { + runCommandAssertNoError("ip", "netns", "add", ns) +} + +func deleteNsIfExists(path, ns string) { + if _, err := os.Open(path); err == nil { + runCommandAssertNoError("ip", "netns", "del", ns) + } +} diff --git a/guardiancmd/command_linux.go b/guardiancmd/command_linux.go index f3afdc702..bf755ed1f 100644 --- a/guardiancmd/command_linux.go +++ b/guardiancmd/command_linux.go @@ -191,7 +191,7 @@ func (f *LinuxFactory) WireContainerNetworkMetricsProvider(containerizer gardene return gardener.NewNoopContainerNetworkMetricsProvider() } - return gardener.NewSysFSContainerNetworkMetricsProvider(containerizer, propertyManager) + return gardener.NewLinuxContainerNetworkMetricsProvider(containerizer, propertyManager, os.Open) } func initBindMountAndPath(initPathOnHost string) (specs.Mount, string) {