Skip to content

Commit

Permalink
config docs, log config uses array of levels to log; layer state stat…
Browse files Browse the repository at this point in the history
… -- ra25 is done!
  • Loading branch information
rcoreilly committed Nov 24, 2024
1 parent 7a41f99 commit 0e5f848
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 93 deletions.
65 changes: 53 additions & 12 deletions axon/simstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,29 @@ func OpenLogFile(on bool, dt *table.Table, netName, runName, logName string) {
dt.OpenLog(fnm, tensor.Tab)
}

// OpenLogFiles opens the log files for each mode and level of the looper,
// using the given bools in order to determine which logs to open.
// OpenLogFiles opens the log files for modes and levels of the looper,
// based on the lists of level names, ordered by modes in numerical order.
// The netName and runName are used for naming the file, along with
// the mode_level in lower case.
func OpenLogFiles(ls *looper.Stacks, statsDir *tensorfs.Node, netName, runName string, on ...bool) {
non := len(on)
idx := 0
for mode, st := range ls.Stacks {
func OpenLogFiles(ls *looper.Stacks, statsDir *tensorfs.Node, netName, runName string, modeLevels [][]string) {
modes := ls.Modes()
for i, mode := range modes {
if i >= len(modeLevels) {
return
}
levels := modeLevels[i]
st := ls.Stacks[mode]
for _, level := range st.Order {
if idx >= non {
continue
on := false
for _, lev := range levels {
if lev == level.String() {
on = true
break
}
}
if !on[idx] {
idx++
if !on {
continue
}
idx++
logName := strings.ToLower(mode.String() + "_" + level.String())
dt := tensorfs.DirTable(StatsNode(statsDir, mode, level), nil)
fnm := LogFilename(netName, runName, logName)
Expand Down Expand Up @@ -261,7 +267,7 @@ func StatPerTrialMSec(statsDir *tensorfs.Node, statName string, trainMode enums.
// of parameter tuning to ensure everything is in an appropriate dynamic range.
// It only runs for given trainMode at given trialLevel and above,
// with higher levels computing the Mean of lower levels.
func StatLayerActGe(statsDir *tensorfs.Node, net *Network, trainMode enums.Enum, trialLevel enums.Enum, layerNames ...string) func(mode, level enums.Enum, start bool) {
func StatLayerActGe(statsDir *tensorfs.Node, net *Network, trainMode, trialLevel enums.Enum, layerNames ...string) func(mode, level enums.Enum, start bool) {
statNames := []string{"ActMAvg", "ActMMax", "MaxGeM"}
levels := make([]enums.Enum, 10) // should be enough
return func(mode, level enums.Enum, start bool) {
Expand Down Expand Up @@ -313,6 +319,41 @@ func StatLayerActGe(statsDir *tensorfs.Node, net *Network, trainMode enums.Enum,
}
}

// StatLayerState returns a Stats function that records layer state
// It runs for given mode and level, recording given variable
// for given layer names. if isTrialLevel is true, the level is a
// trial level that needs iterating over NData.
func StatLayerState(statsDir *tensorfs.Node, net *Network, smode, slevel enums.Enum, isTrialLevel bool, variable string, layerNames ...string) func(mode, level enums.Enum, start bool) {
return func(mode, level enums.Enum, start bool) {
if mode.Int64() != smode.Int64() || level.Int64() != slevel.Int64() {
return
}
modeDir := statsDir.RecycleDir(mode.String())
levelDir := modeDir.RecycleDir(level.String())
ndata := int(net.Context().NData)
if !isTrialLevel {
ndata = 1
}
for _, lnm := range layerNames {
ly := net.LayerByName(lnm)
name := lnm + "_" + variable
sizes := []int{ndata}
sizes = append(sizes, ly.GetSampleShape().Sizes...)
tsr := tensorfs.Value[float64](levelDir, name, sizes...)
if start {
tsr.SetNumRows(0)
continue
}
for di := range ndata {
row := tsr.DimSize(0)
tsr.SetNumRows(row + 1)
rtsr := tsr.RowTensor(row)
ly.UnitValuesSampleTensor(rtsr, variable, di)
}
}
}
}

// PCAStrongThr is the threshold for counting PCA eigenvalues as "strong"
// Applies to SVD as well.
var PCAStrongThr = 0.01
Expand Down
137 changes: 62 additions & 75 deletions examples/ra25/ra25.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,112 +70,120 @@ const (

// see params.go for params

// ParamConfig has config parameters related to sim params
// ParamConfig has config parameters related to sim params.
type ParamConfig struct {

// size of hidden layer -- can use emer.LaySize for 4D layers
// Hidden1Size is the size of hidden 1 layer.
Hidden1Size vecint.Vector2i `default:"{'X':10,'Y':10}" nest:"+"`

// size of hidden layer -- can use emer.LaySize for 4D layers
// Hidden2Size is the size of hidden 2 layer.
Hidden2Size vecint.Vector2i `default:"{'X':10,'Y':10}" nest:"+"`

// Extra Param Sheet name(s) to use (space separated if multiple) -- must be valid name as listed in compiled-in params or loaded params
// Sheet is the extra params sheet name(s) to use (space separated
// if multiple). Must be valid name as listed in compiled-in params
// or loaded params.
Sheet string

// extra tag to add to file names and logs saved from this run
// Tag is an extra tag to add to file names and logs saved from this run.
Tag string

// user note -- describe the run params etc -- like a git commit message for the run
// Note is additional info to describe the run params etc,
// like a git commit message for the run.
Note string

// Name of the JSON file to input saved parameters from.
File string `nest:"+"`

// Save a snapshot of all current param and config settings in a directory named params_<datestamp> (or _good if Good is true), then quit -- useful for comparing to later changes and seeing multiple views of current params
// SaveAll will save a snapshot of all current param and config settings
// in a directory named params_<datestamp> (or _good if Good is true),
// then quit. Useful for comparing to later changes and seeing multiple
// views of current params.
SaveAll bool `nest:"+"`

// for SaveAll, save to params_good for a known good params state. This can be done prior to making a new release after all tests are passing -- add results to git to provide a full diff record of all params over level.
// Good is for SaveAll, save to params_good for a known good params state.
// This can be done prior to making a new release after all tests are passing.
// Add results to git to provide a full diff record of all params over level.
Good bool `nest:"+"`
}

// RunConfig has config parameters related to running the sim
// RunConfig has config parameters related to running the sim.
type RunConfig struct {

// use the GPU for computation -- generally faster even for small models if NData ~16
// GPU uses the GPU for computation, generally faster than CPU even for
// small models if NData ~16.
GPU bool `default:"true"`

// number of data-parallel items to process in parallel per trial -- works (and is significantly faster) for both CPU and GPU. Results in an effective mini-batch of learning.
// NData is the number of data-parallel items to process in parallel per trial.
// Is significantly faster for both CPU and GPU. Results in an effective
// mini-batch of learning.
NData int `default:"16" min:"1"`

// number of parallel threads for CPU computation -- 0 = use default
// NThreads is the number of parallel threads for CPU computation;
// 0 = use default.
NThreads int `default:"0"`

// starting run number -- determines the random seed -- runs counts from there -- can do all runs in parallel by launching separate jobs with each run, runs = 1
// Run is the _starting_ run number, which determines the random seed.
// NRuns counts up from there. Can do all runs in parallel by launching
// separate jobs with each starting Run, NRuns = 1.
Run int `default:"0"`

// total number of runs to do when running Train
// NRuns is the total number of runs to do when running Train,
// starting from Run.
NRuns int `default:"5" min:"1"`

// total number of epochs per run
// NEpochs is the total number of epochs per run.
NEpochs int `default:"100"`

// stop run after this number of perfect, zero-error epochs
// NZero is how many perfect, zero-error epochs before stopping a Run.
NZero int `default:"2"`

// total number of trials per epoch. Should be an even multiple of NData.
// NTrials is the total number of trials per epoch.
// Should be an even multiple of NData.
NTrials int `default:"32"`

// how often to run through all the test patterns, in terms of training epochs -- can use 0 or -1 for no testing
// TestInterval is how often (in epochs) to run through all the test patterns,
// in terms of training epochs. Can use 0 or -1 for no testing.
TestInterval int `default:"5"`

// how frequently (in epochs) to compute PCA on hidden representations to measure variance?
// PCAInterval is how often (in epochs) to compute PCA on hidden
// representations to measure variance.
PCAInterval int `default:"5"`

// if non-empty, is the name of weights file to load at start of first run -- for testing
// StartWts is the name of weights file to load at start of first run.
StartWts string
}

// LogConfig has config parameters related to logging data
// LogConfig has config parameters related to logging data.
type LogConfig struct {

// if true, save final weights after each run
// SaveWeights will save final weights after each run.
SaveWeights bool

// if true, save train epoch log to file, as .epc.tsv typically
Epoch bool `default:"true" nest:"+"`

// if true, save run log to file, as .run.tsv typically
Run bool `default:"true" nest:"+"`

// if true, save train trial log to file, as .trl.tsv typically. May be large.
Trial bool `default:"false" nest:"+"`

// if true, save testing epoch log to file, as .tst_epc.tsv typically. In general it is better to copy testing items over to the training epoch log and record there.
TestEpoch bool `default:"false" nest:"+"`
// Train has the list of Train mode levels to save log files for.
Train []string `default:"['Run', 'Epoch']" nest:"+"`

// if true, save testing trial log to file, as .tst_trl.tsv typically. May be large.
TestTrial bool `default:"false" nest:"+"`
// Test has the list of Test mode levels to save log files for.
Test []string `nest:"+"`
}

// Config is a standard Sim config -- use as a starting point.
type Config struct {

// specify include files here, and after configuration, it contains list of include files added
// Includes has a list of additional config files to include.
// After configuration, it contains list of include files added.
Includes []string

// open the GUI -- does not automatically run -- if false, then runs automatically and quits
// GUI means open the GUI. Otherwise it runs automatically and quits,
// saving results to log files.
GUI bool `default:"true"`

// log debugging information
// Debug reports debugging information.
Debug bool

// parameter related configuration options
// Params has parameter related configuration options.
Params ParamConfig `display:"add-fields"`

// sim running related configuration options
// Run has sim running related configuration options.
Run RunConfig `display:"add-fields"`

// data logging related configuration options
// Log has data logging related configuration options.
Log LogConfig `display:"add-fields"`
}

Expand Down Expand Up @@ -356,11 +364,9 @@ func (ss *Sim) Init() {
// selected or patterns have been modified etc
ss.GUI.StopNow = false
ss.ApplyParams()
// ss.Net.GPU.SyncParamsToGPU()
ss.InitStats()
ss.NewRun()
ss.TrainUpdate.RecordSyns()
// todo: need to pass the counters function here, instead of calling each time.
ss.TrainUpdate.Update(Train, Trial)
}

Expand Down Expand Up @@ -769,11 +775,10 @@ func (ss *Sim) ConfigStats() {
pcaFunc(mode, level, phase == Start, trnEpc)
})

// ss.Logs.AddCopyFromFloatItems(Train, []Times{Epoch, Run}, Test, Epoch, "Tst", "PhaseDiff", "UnitErr", "PctCor", "PctErr")
//
// axon.LogInputLayer(&ss.Logs, ss.Net, Train)
//
// ss.Logs.AddLayerTensorItems(ss.Net, "Act", Test, Trial, "InputLayer", "TargetLayer")
stateFunc := axon.StatLayerState(ss.Stats, net, Test, Trial, true, "ActM", "Input", "Output")
ss.AddStat(func(mode Modes, level Levels, phase StatsPhase) {
stateFunc(mode, level, phase == Start)
})
}

// StatCounters returns counters string to show at bottom of netview.
Expand All @@ -790,10 +795,13 @@ func (ss *Sim) StatCounters(mode, level enums.Enum) string {
return counters
}
counters += fmt.Sprintf(" TrialName: %s", tensorfs.Value[string](curModeDir, "TrialName").String1D(di))
if level == Cycle || curModeDir.Node("UnitErr") == nil {
statNames := []string{"CorSim", "UnitErr", "Err"}
if level == Cycle || curModeDir.Node(statNames[0]) == nil {
return counters
}
counters += fmt.Sprintf(" UnitErr: %g", tensorfs.Value[float64](curModeDir, "UnitErr").Float1D(di))
for _, name := range statNames {
counters += fmt.Sprintf(" %s: %.4g", name, tensorfs.Value[float64](curModeDir, name).Float1D(di))
}
return counters
}

Expand Down Expand Up @@ -823,32 +831,12 @@ func (ss *Sim) ConfigGUI() {
ss.GUI.UpdateFiles()
ss.InitStats()
ss.GUI.FinalizeGUI(false)

// if ss.Config.Run.GPU {
// // vgpu.Debug = ss.Config.Debug // when debugging GPU..
// ss.Net.ConfigGPUnoGUI(&ss.Context) // must happen after gui or no gui
// core.TheApp.AddQuitCleanFunc(func() {
// ss.Net.GPU.Destroy()
// })
// }
}

// todo: persistent run log
func (ss *Sim) MakeToolbar(p *tree.Plan) {
ss.GUI.AddLooperCtrl(p, ss.Loops)

tree.Add(p, func(w *core.Separator) {})
ss.GUI.AddToolbarItem(p, egui.ToolbarItem{
Label: "Reset RunLog",
Icon: icons.Reset,
Tooltip: "Reset the accumulated log of all Runs, which are tagged with the ParamSet used",
Active: egui.ActiveAlways,
Func: func() {
// ss.Logs.ResetLog(Train, Run)
// ss.GUI.UpdatePlot(Train, Run)
},
})

tree.Add(p, func(w *core.Separator) {})
ss.GUI.AddToolbarItem(p, egui.ToolbarItem{
Label: "New Seed",
Expand Down Expand Up @@ -888,9 +876,8 @@ func (ss *Sim) RunNoGUI() {

runName := ss.SetRunName()
netName := ss.Net.Name

cfg := &ss.Config.Log
axon.OpenLogFiles(ss.Loops, ss.Stats, netName, runName, cfg.Run, cfg.Epoch, cfg.Trial, false, cfg.TestEpoch, cfg.TestTrial)
axon.OpenLogFiles(ss.Loops, ss.Stats, netName, runName, [][]string{cfg.Train, cfg.Test})

mpi.Printf("Running %d Runs starting at %d\n", ss.Config.Run.NRuns, ss.Config.Run.Run)
ss.Loops.Loop(Train, Run).Counter.SetCurMaxPlusN(ss.Config.Run.Run, ss.Config.Run.NRuns)
Expand Down
Loading

0 comments on commit 0e5f848

Please sign in to comment.