Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mem limit count active pages #1885

Merged
merged 5 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 35 additions & 15 deletions arbnode/resourcemanager/resource_management.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func Init(conf *Config) error {
var c limitChecker
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This (var c limitChecker) is unused, you can drop it.

c, err := newCgroupsMemoryLimitCheckerIfSupported(limit)
if errors.Is(err, errNotSupported) {
log.Error("No method for determining memory usage and limits was discovered, disabled memory limit RPC throttling")
log.Error("no method for determining memory usage and limits was discovered, disabled memory limit RPC throttling")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: This was correct previously. It should be uppercase. You can remember it with: if it's displaying the text immediately, that should be uppercase, if it's returned then lowercase [0].

Same below at ln 120: log.Error("error checking memory limit", "err", err, "checker", s.c). Moreover at that line s.c is an interface (limitChecker) so logging that will probably just log pointers to those two methods (String() and isLimitExceeded()) which wouldn't be very useful for debugging. I would log s.c.String() instead).

https://google.github.io/styleguide/go/decisions#error-strings

c = &trivialLimitChecker{}
}

Expand All @@ -60,8 +60,7 @@ func parseMemLimit(limitStr string) (int, error) {
limit int = 1
s string
)
_, err := fmt.Sscanf(limitStr, "%d%s", &limit, &s)
if err != nil {
if _, err := fmt.Sscanf(limitStr, "%d%s", &limit, &s); err != nil {
return 0, err
}

Expand All @@ -76,7 +75,7 @@ func parseMemLimit(limitStr string) (int, error) {
limit <<= 40
case "B":
default:
return 0, fmt.Errorf("Unsupported memory limit suffix string %s", s)
return 0, fmt.Errorf("unsupported memory limit suffix string %s", s)
}

return limit, nil
Expand All @@ -97,7 +96,7 @@ var DefaultConfig = Config{

// ConfigAddOptions adds the configuration options for resourcemanager.
func ConfigAddOptions(prefix string, f *pflag.FlagSet) {
f.String(prefix+".mem-free-limit", DefaultConfig.MemFreeLimit, "RPC calls are throttled if free system memory is below this amount, expressed in bytes or multiples of bytes with suffix B, K, M, G")
f.String(prefix+".mem-free-limit", DefaultConfig.MemFreeLimit, "RPC calls are throttled if free system memory excluding the page cache is below this amount, expressed in bytes or multiples of bytes with suffix B, K, M, G. The limit should be set such that sufficient free memory is left for the page cache in order for the system to be performant")
}

// httpServer implements http.Handler and wraps calls to inner with a resource
Expand All @@ -118,7 +117,7 @@ func (s *httpServer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
exceeded, err := s.c.isLimitExceeded()
limitCheckDurationHistogram.Update(time.Since(start).Nanoseconds())
if err != nil {
log.Error("Error checking memory limit", "err", err, "checker", s.c)
log.Error("error checking memory limit", "err", err, "checker", s.c)
} else if exceeded {
http.Error(w, "Too many requests", http.StatusTooManyRequests)
limitCheckFailureCounter.Inc(1)
Expand Down Expand Up @@ -168,7 +167,7 @@ func (_ trivialLimitChecker) String() string { return "trivial" }

type cgroupsMemoryFiles struct {
limitFile, usageFile, statsFile string
inactiveRe *regexp.Regexp
activeRe, inactiveRe *regexp.Regexp
}

const defaultCgroupsV1MemoryDirectory = "/sys/fs/cgroup/memory/"
Expand All @@ -178,13 +177,15 @@ var cgroupsV1MemoryFiles = cgroupsMemoryFiles{
limitFile: defaultCgroupsV1MemoryDirectory + "/memory.limit_in_bytes",
usageFile: defaultCgroupsV1MemoryDirectory + "/memory.usage_in_bytes",
statsFile: defaultCgroupsV1MemoryDirectory + "/memory.stat",
inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`),
activeRe: regexp.MustCompile(`^total_active_file (\d+)`),
inactiveRe: regexp.MustCompile(`^total_inactive_file (\d+)`),
}
var cgroupsV2MemoryFiles = cgroupsMemoryFiles{
limitFile: defaultCgroupsV2MemoryDirectory + "/memory.max",
usageFile: defaultCgroupsV2MemoryDirectory + "/memory.current",
statsFile: defaultCgroupsV2MemoryDirectory + "/memory.stat",
inactiveRe: regexp.MustCompile(`inactive_file (\d+)`),
activeRe: regexp.MustCompile(`^active_file (\d+)`),
inactiveRe: regexp.MustCompile(`^inactive_file (\d+)`),
}

type cgroupsMemoryLimitChecker struct {
Expand All @@ -200,23 +201,42 @@ func newCgroupsMemoryLimitChecker(files cgroupsMemoryFiles, memLimitBytes int) *
}

// isLimitExceeded checks if the system memory free is less than the limit.
// It returns true if the limit is exceeded.
//
// See the following page for details of calculating the memory used,
// which is reported as container_memory_working_set_bytes in prometheus:
// container_memory_working_set_bytes in prometheus is calculated as
// memory.usage_in_bytes - inactive page cache bytes, see
// https://mihai-albert.com/2022/02/13/out-of-memory-oom-in-kubernetes-part-3-memory-metrics-sources-and-tools-to-collect-them/
// This metric is used by kubernetes to report memory in use by the pod,
// but memory.usage_in_bytes also includes the active page cache, which
// can be evicted by the kernel when more memory is needed, see
// https://github.com/kubernetes/kubernetes/issues/43916
// The kernel cannot be guaranteed to move a page from a file from
// active to inactive even when the file is closed, or Nitro is exited.
// For larger chains, Nitro's page cache can grow quite large due to
// the large amount of state that is randomly accessed from disk as each
// block is added. So in checking the limit we also include the active
// page cache.
//
// The limit should be set such that the system has a reasonable amount of
// free memory for the page cache, to avoid cache thrashing on chain state
// access. How much "reasonable" is will depend on access patterns, state
// size, and your application's tolerance for latency.
func (c *cgroupsMemoryLimitChecker) isLimitExceeded() (bool, error) {
var limit, usage, inactive int
var limit, usage, active, inactive int
var err error
if limit, err = readIntFromFile(c.files.limitFile); err != nil {
return false, err
}
if usage, err = readIntFromFile(c.files.usageFile); err != nil {
return false, err
}
if inactive, err = readInactive(c.files.statsFile, c.files.inactiveRe); err != nil {
if active, err = readFromMemStats(c.files.statsFile, c.files.activeRe); err != nil {
return false, err
}
if inactive, err = readFromMemStats(c.files.statsFile, c.files.inactiveRe); err != nil {
return false, err
}
return limit-(usage-inactive) <= c.memLimitBytes, nil
return limit-(usage-(active+inactive)) <= c.memLimitBytes, nil
}

func (c cgroupsMemoryLimitChecker) String() string {
Expand All @@ -236,7 +256,7 @@ func readIntFromFile(fileName string) (int, error) {
return limit, nil
}

func readInactive(fileName string, re *regexp.Regexp) (int, error) {
func readFromMemStats(fileName string, re *regexp.Regexp) (int, error) {
file, err := os.Open(fileName)
if err != nil {
return 0, err
Expand Down
30 changes: 19 additions & 11 deletions arbnode/resourcemanager/resource_management_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"testing"
)

func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive int) error {
func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive, active int) error {
limitFile, err := os.Create(c.files.limitFile)
if err != nil {
return err
Expand All @@ -34,8 +34,8 @@ func updateFakeCgroupFiles(c *cgroupsMemoryLimitChecker, limit, usage, inactive
_, err = fmt.Fprintf(statsFile, `total_cache 1029980160
total_rss 1016209408
total_inactive_file %d
total_active_file 321544192
`, inactive)
total_active_file %d
`, inactive, active)
return err
}

Expand All @@ -44,7 +44,8 @@ func makeCgroupsTestDir(cgroupDir string) cgroupsMemoryFiles {
limitFile: cgroupDir + "/memory.limit_in_bytes",
usageFile: cgroupDir + "/memory.usage_in_bytes",
statsFile: cgroupDir + "/memory.stat",
inactiveRe: regexp.MustCompile(`total_inactive_file (\d+)`),
activeRe: regexp.MustCompile(`^total_active_file (\d+)`),
inactiveRe: regexp.MustCompile(`^total_inactive_file (\d+)`),
}
}

Expand All @@ -61,6 +62,7 @@ func TestCgroupsMemoryLimit(t *testing.T) {
desc string
sysLimit int
inactive int
active int
usage int
memLimit string
want bool
Expand All @@ -69,48 +71,54 @@ func TestCgroupsMemoryLimit(t *testing.T) {
desc: "limit should be exceeded",
sysLimit: 1000,
inactive: 50,
active: 25,
usage: 1000,
memLimit: "50B",
memLimit: "75B",
want: true,
},
{
desc: "limit should not be exceeded",
sysLimit: 1000,
inactive: 51,
active: 25,
usage: 1000,
memLimit: "50b",
memLimit: "75b",
want: false,
},
{
desc: "limit (MB) should be exceeded",
sysLimit: 1000 * 1024 * 1024,
inactive: 50 * 1024 * 1024,
active: 25 * 1024 * 1024,
usage: 1000 * 1024 * 1024,
memLimit: "50MB",
memLimit: "75MB",
want: true,
},
{
desc: "limit (MB) should not be exceeded",
sysLimit: 1000 * 1024 * 1024,
inactive: 1 + 50*1024*1024,
active: 25 * 1024 * 1024,
usage: 1000 * 1024 * 1024,
memLimit: "50m",
memLimit: "75m",
want: false,
},
{
desc: "limit (GB) should be exceeded",
sysLimit: 1000 * 1024 * 1024 * 1024,
inactive: 50 * 1024 * 1024 * 1024,
active: 25 * 1024 * 1024 * 1024,
usage: 1000 * 1024 * 1024 * 1024,
memLimit: "50G",
memLimit: "75G",
want: true,
},
{
desc: "limit (GB) should not be exceeded",
sysLimit: 1000 * 1024 * 1024 * 1024,
inactive: 1 + 50*1024*1024*1024,
active: 25 * 1024 * 1024 * 1024,
usage: 1000 * 1024 * 1024 * 1024,
memLimit: "50gb",
memLimit: "75gb",
want: false,
},
} {
Expand All @@ -121,7 +129,7 @@ func TestCgroupsMemoryLimit(t *testing.T) {
t.Fatalf("Parsing memory limit failed: %v", err)
}
c := newCgroupsMemoryLimitChecker(testFiles, memLimit)
if err := updateFakeCgroupFiles(c, tc.sysLimit, tc.usage, tc.inactive); err != nil {
if err := updateFakeCgroupFiles(c, tc.sysLimit, tc.usage, tc.inactive, tc.active); err != nil {
t.Fatalf("Updating cgroup files: %v", err)
}
exceeded, err := c.isLimitExceeded()
Expand Down
Loading