Skip to content

Commit

Permalink
GC, throttle; [config change]
Browse files Browse the repository at this point in the history
* more exactl, config.memsys defaults change:
-   "to_gc":       "2gb",
-   "hk_time":     "90s",
+   "to_gc":       "4gb",
+   "hk_time":     "3m",

* always run free-mem when pressure extreme
* EC: extra steps to protect
* part three, prev. commit: 6849e7a

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Nov 16, 2024
1 parent 6849e7a commit 75025d9
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 27 deletions.
27 changes: 15 additions & 12 deletions cmn/oom/oom.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,24 @@ var (
)

func FreeToOS(force bool) bool {
prev := ratomic.LoadInt64(&last)
ival := ivalTime
var (
since time.Duration
now = mono.NanoTime()
prev = ratomic.LoadInt64(&last)
ival = ivalTime
)
if force {
ival = forceTime
}

now := mono.NanoTime()
elapsed := time.Duration(now - prev)
if elapsed < ival {
nlog.Infoln("not running - only", elapsed, "passed since the previous run")
return false
if prev > 0 {
since = time.Duration(now - prev)
if since < ival {
nlog.Infoln("not running - only", since, "<", ival, "passed since the previous run")
return false
}
}
if !ratomic.CompareAndSwapInt64(&running, 0, now) {
nlog.Infoln("(still) running for", elapsed, "- nothing to do")
nlog.Infoln("still running [", since, "]")
return false
}

Expand All @@ -55,9 +59,8 @@ func do(started int64) {
rdebug.FreeOSMemory()

now := mono.NanoTime()
if elapsed := time.Duration(now - started); elapsed > (forceTime >> 1) {
nlog.Warningln("spent", elapsed.String(), "freeing memory")
}
nlog.Warningln("free-mem runtime:", time.Duration(now-started))

ratomic.StoreInt64(&last, now)
ratomic.StoreInt64(&running, 0)
}
7 changes: 5 additions & 2 deletions core/lcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/mono"
"github.com/NVIDIA/aistore/cmn/nlog"
"github.com/NVIDIA/aistore/cmn/oom"
"github.com/NVIDIA/aistore/core/meta"
"github.com/NVIDIA/aistore/fs"
"github.com/NVIDIA/aistore/hk"
Expand Down Expand Up @@ -84,7 +85,7 @@ func UncacheBcks(wg *sync.WaitGroup, bcks ...*meta.Bck) bool {
g.lchk.rc.Inc()
defer g.lchk.rc.Dec()

// mem pressure
// mem pressure; NOTE: may call oom.FreeToOS
if g.lchk.mempDropAll() {
return true // dropped all caches, nothing to do
}
Expand Down Expand Up @@ -243,7 +244,7 @@ func (lchk *lchk) housekeep(int64) time.Duration {
return lchk.timeout
}

// mem pressure
// mem pressure; NOTE: may call oom.FreeToOS
if lchk.mempDropAll() {
return lchk.timeout
}
Expand Down Expand Up @@ -285,6 +286,8 @@ func (lchk *lchk) mempDropAll() bool /*dropped*/ {
nlog.ErrorDepth(1, "oom [", p, "] - dropping all caches")
lchk._drop()
lchk.last = time.Now()

oom.FreeToOS(true)
return true
case memsys.PressureHigh:
nlog.Warningln("high memory pressure")
Expand Down
4 changes: 2 additions & 2 deletions deploy/dev/local/aisnode_config.fspaths.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ cat > $AIS_CONF_FILE <<EOL
"memsys": {
"min_free": "2gb",
"default_buf": "32kb",
"to_gc": "2gb",
"hk_time": "90s",
"to_gc": "4gb",
"hk_time": "3m",
"min_pct_total": 0,
"min_pct_free": 0
},
Expand Down
4 changes: 2 additions & 2 deletions deploy/dev/local/aisnode_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ cat > $AIS_CONF_FILE <<EOL
"memsys": {
"min_free": "2gb",
"default_buf": "32kb",
"to_gc": "2gb",
"hk_time": "90s",
"to_gc": "4gb",
"hk_time": "3m",
"min_pct_total": 0,
"min_pct_free": 0
},
Expand Down
18 changes: 10 additions & 8 deletions ec/putjogger.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/NVIDIA/aistore/cmn/debug"
"github.com/NVIDIA/aistore/cmn/mono"
"github.com/NVIDIA/aistore/cmn/nlog"
"github.com/NVIDIA/aistore/cmn/oom"
"github.com/NVIDIA/aistore/core"
"github.com/NVIDIA/aistore/core/meta"
"github.com/NVIDIA/aistore/fs"
Expand Down Expand Up @@ -146,14 +147,15 @@ func (c *putJogger) _do(req *request, lom *core.LOM) {
if err := c.ec(req, lom); err != nil {
err = cmn.NewErrFailedTo(core.T, req.Action, lom.Cname(), err)
c.parent.AddErr(err, 0)
} else if !c.toDisk { // throttle
c.ntotal++
if (c.micro && fs.IsMicroThrottle(c.ntotal)) || fs.IsMiniThrottle(c.ntotal) {
if pressure := g.pmm.Pressure(); pressure >= memsys.PressureHigh {
time.Sleep(fs.Throttle100ms)
if !c.micro && pressure >= memsys.PressureExtreme {
c.micro = true
}
}
c.ntotal++
if (c.micro && fs.IsMicroThrottle(c.ntotal)) || fs.IsMiniThrottle(c.ntotal) {
if pressure := g.pmm.Pressure(); pressure >= memsys.PressureHigh {
time.Sleep(fs.Throttle100ms)
if !c.micro && pressure >= memsys.PressureExtreme {
// too late?
c.micro = true
oom.FreeToOS(true /*force*/)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion memsys/housekeep_mm.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const (
// hk tunables (via config.Memsys section)
var (
sizeToGC = int64(cos.GiB + cos.GiB>>1) // run GC when sum(`freed`) > sizeToGC
memCheckAbove = 90 * time.Second // memory checking frequency when above low watermark
memCheckAbove = 3 * time.Minute // default HK interval (gets modified up or down)
)

// API: on-demand memory freeing to the user-provided specification
Expand Down
2 changes: 2 additions & 0 deletions stats/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/NVIDIA/aistore/cmn/debug"
"github.com/NVIDIA/aistore/cmn/mono"
"github.com/NVIDIA/aistore/cmn/nlog"
"github.com/NVIDIA/aistore/cmn/oom"
"github.com/NVIDIA/aistore/core"
"github.com/NVIDIA/aistore/core/meta"
"github.com/NVIDIA/aistore/hk"
Expand Down Expand Up @@ -494,6 +495,7 @@ func (r *runner) _mem(mm *memsys.MMSA, set, clr cos.NodeStateFlags) {
set |= cos.OOM
nlog.Errorln(mm.Str(&r.mem))
}
oom.FreeToOS(true)
case pressure >= memsys.PressureHigh:
set |= cos.LowMemory
clr |= cos.OOM
Expand Down
2 changes: 2 additions & 0 deletions xact/xs/blob_download.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/NVIDIA/aistore/cmn/debug"
"github.com/NVIDIA/aistore/cmn/feat"
"github.com/NVIDIA/aistore/cmn/nlog"
"github.com/NVIDIA/aistore/cmn/oom"
"github.com/NVIDIA/aistore/core"
"github.com/NVIDIA/aistore/core/meta"
"github.com/NVIDIA/aistore/memsys"
Expand Down Expand Up @@ -183,6 +184,7 @@ func (p *blobFactory) Start() error {
pressure = mm.Pressure()
)
if pressure >= memsys.PressureExtreme {
oom.FreeToOS(true)
return errors.New(r.Name() + ": extreme memory pressure - not starting")
}
switch pressure {
Expand Down

0 comments on commit 75025d9

Please sign in to comment.