Skip to content

Commit

Permalink
mem-limit: count active pages as maybe freeable
Browse files Browse the repository at this point in the history
  • Loading branch information
Tristan-Wilson committed Sep 27, 2023
1 parent 84a511a commit 53c1c33
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 21 deletions.
41 changes: 31 additions & 10 deletions arbnode/resourcemanager/resource_management.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ var DefaultConfig = Config{

// ConfigAddOptions adds the configuration options for resourcemanager.
func ConfigAddOptions(prefix string, f *pflag.FlagSet) {
f.String(prefix+".mem-free-limit", DefaultConfig.MemFreeLimit, "RPC calls are throttled if free system memory is below this amount, expressed in bytes or multiples of bytes with suffix B, K, M, G")
f.String(prefix+".mem-free-limit", DefaultConfig.MemFreeLimit, "RPC calls are throttled if free system memory excluding the page cache is below this amount, expressed in bytes or multiples of bytes with suffix B, K, M, G. The limit should be set such that sufficient free memory is left for the page cache in order for the system to be performant")
}

// httpServer implements http.Handler and wraps calls to inner with a resource
Expand Down Expand Up @@ -168,7 +168,7 @@ func (_ trivialLimitChecker) String() string { return "trivial" }

type cgroupsMemoryFiles struct {
limitFile, usageFile, statsFile string
inactiveRe *regexp.Regexp
activeRe, inactiveRe *regexp.Regexp
}

const defaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/"
Expand All @@ -178,13 +178,15 @@ var cgroupsV1MemoryFiles = cgroupsMemoryFiles{
limitFile: defaultCgroupsV1MemoryDirectory + "/memory.limit_in_bytes",
usageFile: defaultCgroupsV1MemoryDirectory + "/memory.usage_in_bytes",
statsFile: defaultCgroupsV1MemoryDirectory + "/memory.stat",
inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`),
activeRe: regexp.MustCompile(`^total_active_file (\d+)`),
inactiveRe: regexp.MustCompile(`^total_inactive_file (\d+)`),
}
var cgroupsV2MemoryFiles = cgroupsMemoryFiles{
limitFile: defaultCgroupsV2MemoryDirectory + "/memory.max",
usageFile: defaultCgroupsV2MemoryDirectory + "/memory.current",
statsFile: defaultCgroupsV2MemoryDirectory + "/memory.stat",
inactiveRe: regexp.MustCompile(`inactive_file (\d+)`),
activeRe: regexp.MustCompile(`^active_file (\d+)`),
inactiveRe: regexp.MustCompile(`^inactive_file (\d+)`),
}

type cgroupsMemoryLimitChecker struct {
Expand All @@ -200,23 +202,42 @@ func newCgroupsMemoryLimitChecker(files cgroupsMemoryFiles, memLimitBytes int) *
}

// isLimitExceeded checks if the system memory free is less than the limit.
// It returns true if the limit is exceeded.
//
// See the following page for details of calculating the memory used,
// which is reported as container_memory_working_set_bytes in prometheus:
// container_memory_working_set_bytes in prometheus is calculated as
// memory.usage_in_bytes - inactive page cache bytes, see
// https://mihai-albert.com/2022/02/13/out-of-memory-oom-in-kubernetes-part-3-memory-metrics-sources-and-tools-to-collect-them/
// This metric is used by kubernetes to report memory in use by the pod,
// but memory.usage_in_bytes also includes the active page cache, which
// can be evicted by the kernel when more memory is needed, see
// https://github.com/kubernetes/kubernetes/issues/43916
// The kernel cannot be guaranteed to move a page from a file from
// active to inactive even when the file is closed, or Nitro is exited.
// For larger chains, Nitro's page cache can grow quite large due to
// the large amount of state that is randomly accessed from disk as each
// block is added. So in checking the limit we also include the active
// page cache.
//
// The limit should be set such that the system has a reasonable amount of
// free memory for the page cache, to avoid cache thrashing on chain state
// access. How much "reasonable" is will depend on access patterns, state
// size, and your application's tolerance for latency.
func (c *cgroupsMemoryLimitChecker) isLimitExceeded() (bool, error) {
var limit, usage, inactive int
var limit, usage, active, inactive int
var err error
if limit, err = readIntFromFile(c.files.limitFile); err != nil {
return false, err
}
if usage, err = readIntFromFile(c.files.usageFile); err != nil {
return false, err
}
if inactive, err = readInactive(c.files.statsFile, c.files.inactiveRe); err != nil {
if active, err = readFromMemStats(c.files.statsFile, c.files.activeRe); err != nil {
return false, err
}
if inactive, err = readFromMemStats(c.files.statsFile, c.files.inactiveRe); err != nil {
return false, err
}
return limit-(usage-inactive) <= c.memLimitBytes, nil
return limit-(usage-(active+inactive)) <= c.memLimitBytes, nil
}

func (c cgroupsMemoryLimitChecker) String() string {
Expand All @@ -236,7 +257,7 @@ func readIntFromFile(fileName string) (int, error) {
return limit, nil
}

func readInactive(fileName string, re *regexp.Regexp) (int, error) {
func readFromMemStats(fileName string, re *regexp.Regexp) (int, error) {
file, err := os.Open(fileName)
if err != nil {
return 0, err
Expand Down
30 changes: 19 additions & 11 deletions arbnode/resourcemanager/resource_management_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"testing"
)

func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive int) error {
func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive, active int) error {
limitFile, err := os.Create(c.files.limitFile)
if err != nil {
return err
Expand All @@ -34,8 +34,8 @@ func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive
_, err = fmt.Fprintf(statsFile, `total_cache 1029980160
total_rss 1016209408
total_inactive_file %d
total_active_file 321544192
`, inactive)
total_active_file %d
`, inactive, active)
return err
}

Expand All @@ -44,7 +44,8 @@ func makeCgroupsTestDir(cgroupDir string) cgroupsMemoryFiles {
limitFile: cgroupDir + "/memory.limit_in_bytes",
usageFile: cgroupDir + "/memory.usage_in_bytes",
statsFile: cgroupDir + "/memory.stat",
inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`),
activeRe: regexp.MustCompile(`^total_active_file (\d+)`),
inactiveRe: regexp.MustCompile(`^total_inactive_file (\d+)`),
}
}

Expand All @@ -61,6 +62,7 @@ func TestCgroupsMemoryLimit(t *testing.T) {
desc string
sysLimit int
inactive int
active int
usage int
memLimit string
want bool
Expand All @@ -69,48 +71,54 @@ func TestCgroupsMemoryLimit(t *testing.T) {
desc: "limit should be exceeded",
sysLimit: 1000,
inactive: 50,
active: 25,
usage: 1000,
memLimit: "50B",
memLimit: "75B",
want: true,
},
{
desc: "limit should not be exceeded",
sysLimit: 1000,
inactive: 51,
active: 25,
usage: 1000,
memLimit: "50b",
memLimit: "75b",
want: false,
},
{
desc: "limit (MB) should be exceeded",
sysLimit: 1000 * 1024 * 1024,
inactive: 50 * 1024 * 1024,
active: 25 * 1024 * 1024,
usage: 1000 * 1024 * 1024,
memLimit: "50MB",
memLimit: "75MB",
want: true,
},
{
desc: "limit (MB) should not be exceeded",
sysLimit: 1000 * 1024 * 1024,
inactive: 1 + 50*1024*1024,
active: 25 * 1024 * 1024,
usage: 1000 * 1024 * 1024,
memLimit: "50m",
memLimit: "75m",
want: false,
},
{
desc: "limit (GB) should be exceeded",
sysLimit: 1000 * 1024 * 1024 * 1024,
inactive: 50 * 1024 * 1024 * 1024,
active: 25 * 1024 * 1024 * 1024,
usage: 1000 * 1024 * 1024 * 1024,
memLimit: "50G",
memLimit: "75G",
want: true,
},
{
desc: "limit (GB) should not be exceeded",
sysLimit: 1000 * 1024 * 1024 * 1024,
inactive: 1 + 50*1024*1024*1024,
active: 25 * 1024 * 1024 * 1024,
usage: 1000 * 1024 * 1024 * 1024,
memLimit: "50gb",
memLimit: "75gb",
want: false,
},
} {
Expand All @@ -121,7 +129,7 @@ func TestCgroupsMemoryLimit(t *testing.T) {
t.Fatalf("Parsing memory limit failed: %v", err)
}
c := newCgroupsMemoryLimitChecker(testFiles, memLimit)
if err := updateFakeCgroupFiles(c, tc.sysLimit, tc.usage, tc.inactive); err != nil {
if err := updateFakeCgroupFiles(c, tc.sysLimit, tc.usage, tc.inactive, tc.active); err != nil {
t.Fatalf("Updating cgroup files: %v", err)
}
exceeded, err := c.isLimitExceeded()
Expand Down

0 comments on commit 53c1c33

Please sign in to comment.