diff --git a/arbnode/resourcemanager/resource_management.go b/arbnode/resourcemanager/resource_management.go index 4a63d428f4..24702349b1 100644 --- a/arbnode/resourcemanager/resource_management.go +++ b/arbnode/resourcemanager/resource_management.go @@ -97,7 +97,7 @@ var DefaultConfig = Config{ // ConfigAddOptions adds the configuration options for resourcemanager. func ConfigAddOptions(prefix string, f *pflag.FlagSet) { - f.String(prefix+".mem-free-limit", DefaultConfig.MemFreeLimit, "RPC calls are throttled if free system memory is below this amount, expressed in bytes or multiples of bytes with suffix B, K, M, G") + f.String(prefix+".mem-free-limit", DefaultConfig.MemFreeLimit, "RPC calls are throttled if free system memory excluding the page cache is below this amount, expressed in bytes or multiples of bytes with suffix B, K, M, G. The limit should be set such that sufficient free memory is left for the page cache in order for the system to be performant") } // httpServer implements http.Handler and wraps calls to inner with a resource @@ -168,7 +168,7 @@ func (_ trivialLimitChecker) String() string { return "trivial" } type cgroupsMemoryFiles struct { limitFile, usageFile, statsFile string - inactiveRe *regexp.Regexp + activeRe, inactiveRe *regexp.Regexp } const defaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/" @@ -178,13 +178,15 @@ var cgroupsV1MemoryFiles = cgroupsMemoryFiles{ limitFile: defaultCgroupsV1MemoryDirectory + "/memory.limit_in_bytes", usageFile: defaultCgroupsV1MemoryDirectory + "/memory.usage_in_bytes", statsFile: defaultCgroupsV1MemoryDirectory + "/memory.stat", - inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`), + activeRe: regexp.MustCompile(`^total_active_file (\d+)`), + inactiveRe: regexp.MustCompile(`^total_inactive_file (\d+)`), } var cgroupsV2MemoryFiles = cgroupsMemoryFiles{ limitFile: defaultCgroupsV2MemoryDirectory + "/memory.max", usageFile: defaultCgroupsV2MemoryDirectory + "/memory.current", statsFile: defaultCgroupsV2MemoryDirectory + "/memory.stat", - inactiveRe: regexp.MustCompile(`inactive_file (\d+)`), + activeRe: regexp.MustCompile(`^active_file (\d+)`), + inactiveRe: regexp.MustCompile(`^inactive_file (\d+)`), } type cgroupsMemoryLimitChecker struct { @@ -200,12 +202,28 @@ func newCgroupsMemoryLimitChecker(files cgroupsMemoryFiles, memLimitBytes int) * } // isLimitExceeded checks if the system memory free is less than the limit. +// It returns true if the limit is exceeded. // -// See the following page for details of calculating the memory used, -// which is reported as container_memory_working_set_bytes in prometheus: +// container_memory_working_set_bytes in prometheus is calculated as +// memory.usage_in_bytes - inactive page cache bytes, see // https://mihai-albert.com/2022/02/13/out-of-memory-oom-in-kubernetes-part-3-memory-metrics-sources-and-tools-to-collect-them/ +// This metric is used by kubernetes to report memory in use by the pod, +// but memory.usage_in_bytes also includes the active page cache, which +// can be evicted by the kernel when more memory is needed, see +// https://github.com/kubernetes/kubernetes/issues/43916 +// The kernel cannot be guaranteed to move a page from a file from +// active to inactive even when the file is closed, or Nitro is exited. +// For larger chains, Nitro's page cache can grow quite large due to +// the large amount of state that is randomly accessed from disk as each +// block is added. So in checking the limit we also include the active +// page cache. +// +// The limit should be set such that the system has a reasonable amount of +// free memory for the page cache, to avoid cache thrashing on chain state +// access. How much "reasonable" is will depend on access patterns, state +// size, and your application's tolerance for latency. func (c *cgroupsMemoryLimitChecker) isLimitExceeded() (bool, error) { - var limit, usage, inactive int + var limit, usage, active, inactive int var err error if limit, err = readIntFromFile(c.files.limitFile); err != nil { return false, err @@ -213,10 +231,13 @@ func (c *cgroupsMemoryLimitChecker) isLimitExceeded() (bool, error) { if usage, err = readIntFromFile(c.files.usageFile); err != nil { return false, err } - if inactive, err = readInactive(c.files.statsFile, c.files.inactiveRe); err != nil { + if active, err = readFromMemStats(c.files.statsFile, c.files.activeRe); err != nil { + return false, err + } + if inactive, err = readFromMemStats(c.files.statsFile, c.files.inactiveRe); err != nil { return false, err } - return limit-(usage-inactive) <= c.memLimitBytes, nil + return limit-(usage-(active+inactive)) <= c.memLimitBytes, nil } func (c cgroupsMemoryLimitChecker) String() string { @@ -236,7 +257,7 @@ func readIntFromFile(fileName string) (int, error) { return limit, nil } -func readInactive(fileName string, re *regexp.Regexp) (int, error) { +func readFromMemStats(fileName string, re *regexp.Regexp) (int, error) { file, err := os.Open(fileName) if err != nil { return 0, err diff --git a/arbnode/resourcemanager/resource_management_test.go b/arbnode/resourcemanager/resource_management_test.go index e9aa2ab7e2..4f52ad017e 100644 --- a/arbnode/resourcemanager/resource_management_test.go +++ b/arbnode/resourcemanager/resource_management_test.go @@ -10,7 +10,7 @@ import ( "testing" ) -func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive int) error { +func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive, active int) error { limitFile, err := os.Create(c.files.limitFile) if err != nil { return err @@ -34,8 +34,8 @@ func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive _, err = fmt.Fprintf(statsFile, `total_cache 1029980160 total_rss 1016209408 total_inactive_file %d -total_active_file 321544192 -`, inactive) +total_active_file %d +`, inactive, active) return err } @@ -44,7 +44,8 @@ func makeCgroupsTestDir(cgroupDir string) cgroupsMemoryFiles { limitFile: cgroupDir + "/memory.limit_in_bytes", usageFile: cgroupDir + "/memory.usage_in_bytes", statsFile: cgroupDir + "/memory.stat", - inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`), + activeRe: regexp.MustCompile(`^total_active_file (\d+)`), + inactiveRe: regexp.MustCompile(`^total_inactive_file (\d+)`), } } @@ -61,6 +62,7 @@ func TestCgroupsMemoryLimit(t *testing.T) { desc string sysLimit int inactive int + active int usage int memLimit string want bool @@ -69,48 +71,54 @@ func TestCgroupsMemoryLimit(t *testing.T) { desc: "limit should be exceeded", sysLimit: 1000, inactive: 50, + active: 25, usage: 1000, - memLimit: "50B", + memLimit: "75B", want: true, }, { desc: "limit should not be exceeded", sysLimit: 1000, inactive: 51, + active: 25, usage: 1000, - memLimit: "50b", + memLimit: "75b", want: false, }, { desc: "limit (MB) should be exceeded", sysLimit: 1000 * 1024 * 1024, inactive: 50 * 1024 * 1024, + active: 25 * 1024 * 1024, usage: 1000 * 1024 * 1024, - memLimit: "50MB", + memLimit: "75MB", want: true, }, { desc: "limit (MB) should not be exceeded", sysLimit: 1000 * 1024 * 1024, inactive: 1 + 50*1024*1024, + active: 25 * 1024 * 1024, usage: 1000 * 1024 * 1024, - memLimit: "50m", + memLimit: "75m", want: false, }, { desc: "limit (GB) should be exceeded", sysLimit: 1000 * 1024 * 1024 * 1024, inactive: 50 * 1024 * 1024 * 1024, + active: 25 * 1024 * 1024 * 1024, usage: 1000 * 1024 * 1024 * 1024, - memLimit: "50G", + memLimit: "75G", want: true, }, { desc: "limit (GB) should not be exceeded", sysLimit: 1000 * 1024 * 1024 * 1024, inactive: 1 + 50*1024*1024*1024, + active: 25 * 1024 * 1024 * 1024, usage: 1000 * 1024 * 1024 * 1024, - memLimit: "50gb", + memLimit: "75gb", want: false, }, } { @@ -121,7 +129,7 @@ func TestCgroupsMemoryLimit(t *testing.T) { t.Fatalf("Parsing memory limit failed: %v", err) } c := newCgroupsMemoryLimitChecker(testFiles, memLimit) - if err := updateFakeCgroupFiles(c, tc.sysLimit, tc.usage, tc.inactive); err != nil { + if err := updateFakeCgroupFiles(c, tc.sysLimit, tc.usage, tc.inactive, tc.active); err != nil { t.Fatalf("Updating cgroup files: %v", err) } exceeded, err := c.isLimitExceeded()