From a23b1559ae551b58643ef8b87b00e8cfbc97c736 Mon Sep 17 00:00:00 2001 From: "Randall C. O'Reilly" Date: Fri, 22 Nov 2024 02:24:05 -0800 Subject: [PATCH] NewState implemented in GPU kernel; now learning as effectively in GPU as CPU. also fixed tests --- axon/act-layer.go | 38 +++++++++++-- axon/act-layer.goal | 38 +++++++++++-- axon/act-net.go | 41 ++++++++++++++ axon/act-net.goal | 41 ++++++++++++++ axon/act-path.go | 14 ++--- axon/act-path.goal | 14 ++--- axon/act.go | 22 ++++---- axon/act.goal | 22 ++++---- axon/basic_test.go | 19 ++++--- axon/basic_test.goal | 21 ++++--- axon/gosl.go | 129 +++++++++++++++++++++++++++++++++++++++++++ axon/init-layer.go | 61 +------------------- axon/init-layer.goal | 62 +-------------------- axon/init-net.go | 27 ++------- axon/init-net.goal | 26 ++------- axon/init-path.go | 2 +- axon/init-path.goal | 2 +- 17 files changed, 349 insertions(+), 230 deletions(-) diff --git a/axon/act-layer.go b/axon/act-layer.go index 4bd57daa..7589aef5 100644 --- a/axon/act-layer.go +++ b/axon/act-layer.go @@ -850,6 +850,38 @@ func (ly *LayerParams) CyclePostVSPatchLayer(ctx *Context, pi, di uint32, spi in //////// Phase timescale +// NewStateLayer does NewState at the layer level, called +func (ly *LayerParams) NewStateLayer(ctx *Context) { + actMinusAvg := float32(0) + actPlusAvg := float32(0) + np := uint32(ly.Indexes.NPools) + + for di := uint32(0); di < ctx.NData; di++ { + lpi := ly.PoolIndex(0) + + actMinusAvg += PoolAvgMax(AMAct, AMMinus, Avg, lpi, di) + actPlusAvg += PoolAvgMax(AMAct, AMPlus, Avg, lpi, di) + + ly.Acts.Clamp.IsInput.SetBool(ly.IsInput()) + ly.Acts.Clamp.IsTarget.SetBool(ly.IsTarget()) + LayerStates.Set(-1.0, int(ly.Index), int(LayerRT), int(di)) + + for spi := uint32(0); spi < np; spi++ { + pi := ly.PoolIndex(spi) + ly.NewStatePool(ctx, pi, di) // also calls DecayState on pool + } + } + + // note: long-running averages must be based on aggregate data, drive adaptation + // of Gi layer inhibition. + davg := 1 / float32(ctx.NData) + actMinusAvg *= davg + actPlusAvg *= davg + for di := uint32(0); di < ctx.NData; di++ { + ly.NewStateLayerActAvg(ctx, di, actMinusAvg, actPlusAvg) + } +} + // NewStateLayerActAvg updates ActAvg.ActMAvg and ActPAvg based on current values // that have been averaged across NData already. func (ly *LayerParams) NewStateLayerActAvg(ctx *Context, di uint32, actMinusAvg, actPlusAvg float32) { @@ -861,12 +893,6 @@ func (ly *LayerParams) NewStateLayerActAvg(ctx *Context, di uint32, actMinusAvg, LayerStates.Set(pavg, int(ly.Index), int(LayerActPAvg), int(di)) } -func (ly *LayerParams) NewStateLayer(ctx *Context, di uint32) { - ly.Acts.Clamp.IsInput.SetBool(ly.IsInput()) - ly.Acts.Clamp.IsTarget.SetBool(ly.IsTarget()) - LayerStates.Set(-1, int(ly.Index), int(LayerRT), int(di)) -} - func (ly *LayerParams) NewStatePool(ctx *Context, pi, di uint32) { PoolsInt.Set(0, int(pi), int(Clamped), int(di)) if ly.Acts.Clamp.Add.IsFalse() && ly.Acts.Clamp.IsInput.IsTrue() { diff --git a/axon/act-layer.goal b/axon/act-layer.goal index f65bb6c3..68c08590 100644 --- a/axon/act-layer.goal +++ b/axon/act-layer.goal @@ -849,6 +849,38 @@ func (ly *LayerParams) CyclePostVSPatchLayer(ctx *Context, pi, di uint32, spi in //////// Phase timescale +// NewStateLayer does NewState at the layer level, called +func (ly *LayerParams) NewStateLayer(ctx *Context) { + actMinusAvg := float32(0) + actPlusAvg := float32(0) + np := uint32(ly.Indexes.NPools) + + for di := uint32(0); di < ctx.NData; di++ { + lpi := ly.PoolIndex(0) + + actMinusAvg += PoolAvgMax(AMAct, AMMinus, Avg, lpi, di) + actPlusAvg += PoolAvgMax(AMAct, AMPlus, Avg, lpi, di) + + ly.Acts.Clamp.IsInput.SetBool(ly.IsInput()) + ly.Acts.Clamp.IsTarget.SetBool(ly.IsTarget()) + LayerStates[ly.Index, LayerRT, di] = -1.0 + + for spi := uint32(0); spi < np; spi++ { + pi := ly.PoolIndex(spi) + ly.NewStatePool(ctx, pi, di) // also calls DecayState on pool + } + } + + // note: long-running averages must be based on aggregate data, drive adaptation + // of Gi layer inhibition. + davg := 1 / float32(ctx.NData) + actMinusAvg *= davg + actPlusAvg *= davg + for di := uint32(0); di < ctx.NData; di++ { + ly.NewStateLayerActAvg(ctx, di, actMinusAvg, actPlusAvg) + } +} + // NewStateLayerActAvg updates ActAvg.ActMAvg and ActPAvg based on current values // that have been averaged across NData already. func (ly *LayerParams) NewStateLayerActAvg(ctx *Context, di uint32, actMinusAvg, actPlusAvg float32) { @@ -860,12 +892,6 @@ func (ly *LayerParams) NewStateLayerActAvg(ctx *Context, di uint32, actMinusAvg, LayerStates[ly.Index, LayerActPAvg, di] = pavg } -func (ly *LayerParams) NewStateLayer(ctx *Context, di uint32) { - ly.Acts.Clamp.IsInput.SetBool(ly.IsInput()) - ly.Acts.Clamp.IsTarget.SetBool(ly.IsTarget()) - LayerStates[ly.Index, LayerRT, di] = -1 -} - func (ly *LayerParams) NewStatePool(ctx *Context, pi, di uint32) { PoolsInt[pi, Clamped, di] = 0 if ly.Acts.Clamp.Add.IsFalse() && ly.Acts.Clamp.IsInput.IsTrue() { diff --git a/axon/act-net.go b/axon/act-net.go index efd9af07..6dc32d5d 100644 --- a/axon/act-net.go +++ b/axon/act-net.go @@ -6,6 +6,8 @@ package axon +import "cogentcore.org/core/enums" + // todo: don't even need layer-level ultimately. // Cycle runs n cycles of activation updating. @@ -61,6 +63,21 @@ func (nt *Network) Cycle(ncyc int, getNeurons bool) { // } } +// NewState handles all initialization at start of new input pattern. +// This is called *before* applying external input data and operates across +// all data parallel values. The current Context.NData should be set +// properly prior to calling this and subsequent Cycle methods. +func (nt *Network) NewState(mode enums.Enum, testing bool) { + nix := nt.NetIxs() + ctx := nt.Context() + nd := int(nix.NNeurons * ctx.NData) + ctx.NewState(mode, testing) + ToGPUCtxGlobal() + RunNewStateLayer(int(nix.NLayers)) + RunNewStateNeuron(nd) + RunInitGBuffsPath(int(nix.NPaths)) +} + // InitExt initializes external input state. // Call prior to applying external inputs to layers. func (nt *Network) InitExt() { @@ -295,6 +312,30 @@ func ApplyExtsNeuron(i uint32) { //gosl:kernel Layers[li].ApplyExtsNeuron(ni, di) } +// NewStateLayer is the kernel over Layers (not Data) +// which does new state on pools as well. +func NewStateLayer(li uint32) { //gosl:kernel + ctx := GetCtx(0) + Layers[li].NewStateLayer(ctx) +} + +// NewStateNeuron is the kernel over Neurons * Data to +// do new state on neurons (decay). +func NewStateNeuron(i uint32) { //gosl:kernel + ctx := GetCtx(0) + di := ctx.DataIndex(i) + ni := ctx.ItemIndex(i) + li := NeuronIxs.Value(int(ni), int(NrnLayIndex)) + Layers[li].NewStateNeuron(ctx, ni, di) +} + +// InitGBuffsPath is the kernel over Paths to +// initialize PathGBuf, PathGSyns. +func InitGBuffsPath(pti uint32) { //gosl:kernel + ctx := GetCtx(0) + Paths[pti].InitGBuffs(ctx) +} + // MinusPhasePool is the kernel over Pools * Data to // do pool-level updating after end of minus phase. func MinusPhasePool(i uint32) { //gosl:kernel diff --git a/axon/act-net.goal b/axon/act-net.goal index 4a7f5dc7..8ec23efb 100644 --- a/axon/act-net.goal +++ b/axon/act-net.goal @@ -4,6 +4,8 @@ package axon +import "cogentcore.org/core/enums" + // todo: don't even need layer-level ultimately. // Cycle runs n cycles of activation updating. @@ -55,6 +57,21 @@ func (nt *Network) Cycle(ncyc int, getNeurons bool) { // } } +// NewState handles all initialization at start of new input pattern. +// This is called *before* applying external input data and operates across +// all data parallel values. The current Context.NData should be set +// properly prior to calling this and subsequent Cycle methods. +func (nt *Network) NewState(mode enums.Enum, testing bool) { + nix := nt.NetIxs() + ctx := nt.Context() + nd := int(nix.NNeurons * ctx.NData) + ctx.NewState(mode, testing) + ToGPUCtxGlobal() + RunNewStateLayer(int(nix.NLayers)) + RunNewStateNeuron(nd) + RunInitGBuffsPath(int(nix.NPaths)) +} + // InitExt initializes external input state. // Call prior to applying external inputs to layers. func (nt *Network) InitExt() { @@ -289,6 +306,30 @@ func ApplyExtsNeuron(i uint32) { //gosl:kernel Layers[li].ApplyExtsNeuron(ni, di) } +// NewStateLayer is the kernel over Layers (not Data) +// which does new state on pools as well. +func NewStateLayer(li uint32) { //gosl:kernel + ctx := GetCtx(0) + Layers[li].NewStateLayer(ctx) +} + +// NewStateNeuron is the kernel over Neurons * Data to +// do new state on neurons (decay). +func NewStateNeuron(i uint32) { //gosl:kernel + ctx := GetCtx(0) + di := ctx.DataIndex(i) + ni := ctx.ItemIndex(i) + li := NeuronIxs[ni, NrnLayIndex] + Layers[li].NewStateNeuron(ctx, ni, di) +} + +// InitGBuffsPath is the kernel over Paths to +// initialize PathGBuf, PathGSyns. +func InitGBuffsPath(pti uint32) { //gosl:kernel + ctx := GetCtx(0) + Paths[pti].InitGBuffs(ctx) +} + // MinusPhasePool is the kernel over Pools * Data to // do pool-level updating after end of minus phase. func MinusPhasePool(i uint32) { //gosl:kernel diff --git a/axon/act-path.go b/axon/act-path.go index 8dff0181..ba6a6d96 100644 --- a/axon/act-path.go +++ b/axon/act-path.go @@ -306,21 +306,21 @@ func (pt *PathParams) SendSpike(ctx *Context, ni, di, lni uint32) { // This is not typically needed (called during InitWeights, InitActs) // but can be called when needed. Must be called to completely initialize // prior activity, e.g., full Glong clearing. -func (pt *PathParams) InitGBuffs() { +func (pt *PathParams) InitGBuffs(ctx *Context) { nix := GetNetworkIxs(0) maxd := nix.MaxData mdel := nix.MaxDelay + 1 rnn := pt.Indexes.RecvNeurN npst := pt.Indexes.NPathNeurSt - for dl := range mdel { - for ri := range rnn { - for di := range maxd { - PathGBuf.Set(0.0, int(npst+ri), int(dl), int(di)) + for dl := uint32(0); dl < mdel; dl++ { + for ri := uint32(0); ri < rnn; ri++ { + for di := uint32(0); di < maxd; di++ { + PathGBuf.Set(0, int(npst+ri), int(dl), int(di)) } } } - for ri := range rnn { - for di := range maxd { + for ri := uint32(0); ri < rnn; ri++ { + for di := uint32(0); di < maxd; di++ { PathGSyns.Set(0.0, int(npst+ri), int(di)) } } diff --git a/axon/act-path.goal b/axon/act-path.goal index b3ca91c1..abdec7e3 100644 --- a/axon/act-path.goal +++ b/axon/act-path.goal @@ -304,21 +304,21 @@ func (pt *PathParams) SendSpike(ctx *Context, ni, di, lni uint32) { // This is not typically needed (called during InitWeights, InitActs) // but can be called when needed. Must be called to completely initialize // prior activity, e.g., full Glong clearing. -func (pt *PathParams) InitGBuffs() { +func (pt *PathParams) InitGBuffs(ctx *Context) { nix := GetNetworkIxs(0) maxd := nix.MaxData mdel := nix.MaxDelay + 1 rnn := pt.Indexes.RecvNeurN npst := pt.Indexes.NPathNeurSt - for dl := range mdel { - for ri := range rnn { - for di := range maxd { - PathGBuf[npst+ri, dl, di] = 0.0 + for dl := uint32(0); dl < mdel; dl++ { + for ri := uint32(0); ri < rnn; ri++ { + for di := uint32(0); di < maxd; di++ { + PathGBuf[npst+ri, dl, di] = 0 } } } - for ri := range rnn { - for di := range maxd { + for ri := uint32(0); ri < rnn; ri++ { + for di := uint32(0); di < maxd; di++ { PathGSyns[npst+ri, di] = 0.0 } } diff --git a/axon/act.go b/axon/act.go index be5ec412..f6ed1209 100644 --- a/axon/act.go +++ b/axon/act.go @@ -891,15 +891,15 @@ func (ac *ActParams) DecayAHP(ctx *Context, ni, di uint32, decay float32) { // Called with ac.Decay.Act by Layer during NewState func (ac *ActParams) DecayState(ctx *Context, ni, di uint32, decay, glong, ahp float32) { // always reset these -- otherwise get insanely large values that take forever to update - Neurons.Set(-1, int(ni), int(ISIAvg), int(di)) + Neurons.Set(-1.0, int(ni), int(ISIAvg), int(di)) Neurons.Set(ac.Init.Act, int(ni), int(ActInt), int(di)) - Neurons.Set(0, int(ni), int(Spiked), int(di)) + Neurons.Set(0.0, int(ni), int(Spiked), int(di)) for i := range 8 { Neurons.Set(0.0, int(ni), int(SpkBin0+NeuronVars(i)), int(di)) } if decay > 0 { // no-op for most, but not all.. - Neurons.Set(0, int(ni), int(Spike), int(di)) + Neurons.Set(0.0, int(ni), int(Spike), int(di)) Neurons.SetSub(decay*(Neurons.Value(int(ni), int(Act), int(di))-ac.Init.Act), int(ni), int(Act), int(di)) Neurons.SetSub(decay*(Neurons.Value(int(ni), int(ActInt), int(di))-ac.Init.Act), int(ni), int(ActInt), int(di)) Neurons.SetSub(decay*(Neurons.Value(int(ni), int(GeSyn), int(di))-NeuronAvgs.Value(int(ni), int(GeBase))), int(ni), int(GeSyn), int(di)) @@ -945,14 +945,14 @@ func (ac *ActParams) DecayState(ctx *Context, ni, di uint32, decay, glong, ahp f ac.DecayLearnCa(ctx, ni, di, ac.Decay.LearnCa) } - Neurons.Set(0, int(ni), int(Inet), int(di)) - Neurons.Set(0, int(ni), int(GeRaw), int(di)) - Neurons.Set(0, int(ni), int(GiRaw), int(di)) - Neurons.Set(0, int(ni), int(GModRaw), int(di)) - Neurons.Set(0, int(ni), int(GModSyn), int(di)) - Neurons.Set(0, int(ni), int(GMaintRaw), int(di)) - Neurons.Set(0, int(ni), int(SSGiDend), int(di)) - Neurons.Set(0, int(ni), int(GeExt), int(di)) + Neurons.Set(0.0, int(ni), int(Inet), int(di)) + Neurons.Set(0.0, int(ni), int(GeRaw), int(di)) + Neurons.Set(0.0, int(ni), int(GiRaw), int(di)) + Neurons.Set(0.0, int(ni), int(GModRaw), int(di)) + Neurons.Set(0.0, int(ni), int(GModSyn), int(di)) + Neurons.Set(0.0, int(ni), int(GMaintRaw), int(di)) + Neurons.Set(0.0, int(ni), int(SSGiDend), int(di)) + Neurons.Set(0.0, int(ni), int(GeExt), int(di)) Neurons.SetSub(glong*Neurons.Value(int(ni), int(CtxtGeOrig), int(di)), int(ni), int(CtxtGeOrig), int(di)) } diff --git a/axon/act.goal b/axon/act.goal index 5f624b2f..55efcf4b 100644 --- a/axon/act.goal +++ b/axon/act.goal @@ -889,15 +889,15 @@ func (ac *ActParams) DecayAHP(ctx *Context, ni, di uint32, decay float32) { // Called with ac.Decay.Act by Layer during NewState func (ac *ActParams) DecayState(ctx *Context, ni, di uint32, decay, glong, ahp float32) { // always reset these -- otherwise get insanely large values that take forever to update - Neurons[ni, ISIAvg, di] = -1 + Neurons[ni, ISIAvg, di] = -1.0 Neurons[ni, ActInt, di] = ac.Init.Act - Neurons[ni, Spiked, di] = 0 + Neurons[ni, Spiked, di] = 0.0 for i := range 8 { Neurons[ni, SpkBin0+NeuronVars(i), di] = 0.0 } if decay > 0 { // no-op for most, but not all.. - Neurons[ni, Spike, di] = 0 + Neurons[ni, Spike, di] = 0.0 Neurons[ni, Act, di] -= decay * (Neurons[ni, Act, di] - ac.Init.Act) Neurons[ni, ActInt, di] -= decay * (Neurons[ni, ActInt, di] - ac.Init.Act) Neurons[ni, GeSyn, di] -= decay * (Neurons[ni, GeSyn, di] - NeuronAvgs[ni, GeBase]) @@ -943,14 +943,14 @@ func (ac *ActParams) DecayState(ctx *Context, ni, di uint32, decay, glong, ahp f ac.DecayLearnCa(ctx, ni, di, ac.Decay.LearnCa) } - Neurons[ni, Inet, di] = 0 - Neurons[ni, GeRaw, di] = 0 - Neurons[ni, GiRaw, di] = 0 - Neurons[ni, GModRaw, di] = 0 - Neurons[ni, GModSyn, di] = 0 - Neurons[ni, GMaintRaw, di] = 0 - Neurons[ni, SSGiDend, di] = 0 - Neurons[ni, GeExt, di] = 0 + Neurons[ni, Inet, di] = 0.0 + Neurons[ni, GeRaw, di] = 0.0 + Neurons[ni, GiRaw, di] = 0.0 + Neurons[ni, GModRaw, di] = 0.0 + Neurons[ni, GModSyn, di] = 0.0 + Neurons[ni, GMaintRaw, di] = 0.0 + Neurons[ni, SSGiDend, di] = 0.0 + Neurons[ni, GeExt, di] = 0.0 Neurons[ni, CtxtGeOrig, di] -= glong * Neurons[ni, CtxtGeOrig, di] } diff --git a/axon/basic_test.go b/axon/basic_test.go index ad9b0ec3..5b95f768 100644 --- a/axon/basic_test.go +++ b/axon/basic_test.go @@ -395,10 +395,11 @@ func TestNetActShort(t *testing.T) { } func TestGPUAct(t *testing.T) { - if os.Getenv("TEST_GPU") != "true" { - t.Skip("Set TEST_GPU env var to run GPU tests") - } + // if os.Getenv("TEST_GPU") != "true" { + // t.Skip("Set TEST_GPU env var to run GPU tests") + // } NetActTestShort(t, Tol6, true) + // NetActTest(t, Tol6, true) } // NetActTest runs an activation test on the network and checks @@ -523,6 +524,7 @@ func NetActTest(t *testing.T, tol float32, gpu bool) { } GPURelease() + UseGPU = false } // NetActTestShort runs an activation test on the network and checks @@ -619,6 +621,7 @@ func NetActTestShort(t *testing.T, tol float32, gpu bool) { testNet.PlusPhase() } GPURelease() + UseGPU = false } // ReportValDiffs -- reports diffs between a, b values at given tolerance @@ -767,6 +770,7 @@ func RunDebugAct(t *testing.T, testNet *Network, printValues bool, gpu bool, ini } GPURelease() + UseGPU = false return valMap } @@ -959,6 +963,7 @@ func NetTestLearn(t *testing.T, tol float32, gpu bool) { CompareFloats(tol, outwt, outWts, "outWts", t) GPURelease() + UseGPU = false } func TestNetRLRate(t *testing.T) { @@ -986,7 +991,7 @@ func NetTestRLRate(t *testing.T, tol float32, gpu bool) { printCycs := false printQtrs := false - patHidRLRates := []float32{5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 9.572107e-05, 0.005967056, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 8.116122e-05, 8.9521294e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 7.533143e-05, 0.003373957} + patHidRLRates := []float32{5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 0.000100934616, 0.0029769302, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 8.4988904e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 7.806076e-05, 0.002506172} // these are organized by pattern within and then by test iteration (params) outer // only the single active synapse is represented -- one per pattern @@ -1002,9 +1007,9 @@ func NetTestRLRate(t *testing.T, tol float32, gpu bool) { q3outCaP := make([]float32, 4*nLearnParams) q3outCaD := make([]float32, 4*nLearnParams) - hidDwts := []float32{1.708368e-07, 2.9798115e-05, 3.612578e-07, 1.5011935e-05} + hidDwts := []float32{1.708368e-07, 1.486611e-05, 2.0177201e-07, 1.1150851e-05} outDwts := []float32{0.0009000063, 0.005000009, 0.0022999954, 0.0025999974} - hidWts := []float32{0.5000011, 0.5001788, 0.50000215, 0.5000901} + hidWts := []float32{0.5000011, 0.50008905, 0.5000011, 0.5000669} outWts := []float32{0.5053999, 0.52996504, 0.51379675, 0.51559514} hiddwt := make([]float32, 4*nLearnParams) @@ -1296,7 +1301,7 @@ func TestDebugLearn(t *testing.T) { func TestNDataLearn(t *testing.T) { nd1Values := NetDebugLearn(t, false, false, 1, 1, true, false, false) nd4Values := NetDebugLearn(t, false, false, 4, 4, true, false, false) - ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4", "DWt", "ActAvg", "DTrgAvg", "LayerAvgMaxGeM", "LayerAvgMaxGiM") // todo: LayerAvgMaxGeM and GiM need further investigation + ReportValDiffs(t, Tol7, nd1Values, nd4Values, "nData = 1", "nData = 4", "DWt", "ActAvg", "DTrgAvg", "LayerAvgMaxGeM", "LayerAvgMaxGiM") // todo: LayerAvgMaxGeM and GiM need further investigation } func TestNDataMaxDataLearn(t *testing.T) { diff --git a/axon/basic_test.goal b/axon/basic_test.goal index 9bd17041..8809f078 100644 --- a/axon/basic_test.goal +++ b/axon/basic_test.goal @@ -395,10 +395,11 @@ func TestNetActShort(t *testing.T) { } func TestGPUAct(t *testing.T) { - if os.Getenv("TEST_GPU") != "true" { - t.Skip("Set TEST_GPU env var to run GPU tests") - } + // if os.Getenv("TEST_GPU") != "true" { + // t.Skip("Set TEST_GPU env var to run GPU tests") + // } NetActTestShort(t, Tol6, true) + // NetActTest(t, Tol6, true) } // NetActTest runs an activation test on the network and checks @@ -523,6 +524,7 @@ func NetActTest(t *testing.T, tol float32, gpu bool) { } GPURelease() + UseGPU = false } // NetActTestShort runs an activation test on the network and checks @@ -619,6 +621,7 @@ func NetActTestShort(t *testing.T, tol float32, gpu bool) { testNet.PlusPhase() } GPURelease() + UseGPU = false } // ReportValDiffs -- reports diffs between a, b values at given tolerance @@ -767,6 +770,7 @@ func RunDebugAct(t *testing.T, testNet *Network, printValues bool, gpu bool, ini } GPURelease() + UseGPU = false return valMap } @@ -959,6 +963,7 @@ func NetTestLearn(t *testing.T, tol float32, gpu bool) { CompareFloats(tol, outwt, outWts, "outWts", t) GPURelease() + UseGPU = false } func TestNetRLRate(t *testing.T) { @@ -986,8 +991,8 @@ func NetTestRLRate(t *testing.T, tol float32, gpu bool) { printCycs := false printQtrs := false - patHidRLRates := []float32{5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 9.572107e-05, 0.005967056, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 8.116122e-05, 8.9521294e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 7.533143e-05, 0.003373957} - + patHidRLRates := []float32{5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 0.000100934616, 0.0029769302, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 8.4988904e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 5.0000002e-05, 7.806076e-05, 0.002506172} + // these are organized by pattern within and then by test iteration (params) outer // only the single active synapse is represented -- one per pattern // if there are differences, they will multiply over patterns and layers.. @@ -1002,9 +1007,9 @@ func NetTestRLRate(t *testing.T, tol float32, gpu bool) { q3outCaP := make([]float32, 4*nLearnParams) q3outCaD := make([]float32, 4*nLearnParams) - hidDwts := []float32{1.708368e-07, 2.9798115e-05, 3.612578e-07, 1.5011935e-05} + hidDwts := []float32{1.708368e-07, 1.486611e-05, 2.0177201e-07, 1.1150851e-05} outDwts := []float32{0.0009000063, 0.005000009, 0.0022999954, 0.0025999974} - hidWts := []float32{0.5000011, 0.5001788, 0.50000215, 0.5000901} + hidWts := []float32{0.5000011, 0.50008905, 0.5000011, 0.5000669} outWts := []float32{0.5053999, 0.52996504, 0.51379675, 0.51559514} hiddwt := make([]float32, 4*nLearnParams) @@ -1296,7 +1301,7 @@ func TestDebugLearn(t *testing.T) { func TestNDataLearn(t *testing.T) { nd1Values := NetDebugLearn(t, false, false, 1, 1, true, false, false) nd4Values := NetDebugLearn(t, false, false, 4, 4, true, false, false) - ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4", "DWt", "ActAvg", "DTrgAvg", "LayerAvgMaxGeM", "LayerAvgMaxGiM") // todo: LayerAvgMaxGeM and GiM need further investigation + ReportValDiffs(t, Tol7, nd1Values, nd4Values, "nData = 1", "nData = 4", "DWt", "ActAvg", "DTrgAvg", "LayerAvgMaxGeM", "LayerAvgMaxGiM") // todo: LayerAvgMaxGeM and GiM need further investigation } func TestNDataMaxDataLearn(t *testing.T) { diff --git a/axon/gosl.go b/axon/gosl.go index f026a0fe..5a6ba35f 100644 --- a/axon/gosl.go +++ b/axon/gosl.go @@ -71,9 +71,12 @@ func GPUInit() { gpu.NewComputePipelineShaderFS(shaders, "shaders/DWtSyn.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/GPUTestWrite.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/GatherSpikes.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/InitGBuffsPath.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/LayerGi.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/MinusPhaseNeuron.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/MinusPhasePool.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/NewStateLayer.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/NewStateNeuron.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/PlusPhaseNeuron.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/PlusPhasePool.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/PlusPhaseStartNeuron.wgsl", sy) @@ -574,6 +577,48 @@ func RunOneGatherSpikes(n int, syncVars ...GPUVars) { RunGatherSpikesCPU(n) } } +// RunInitGBuffsPath runs the InitGBuffsPath kernel with given number of elements, +// on either the CPU or GPU depending on the UseGPU variable. +// Can call multiple Run* kernels in a row, which are then all launched +// in the same command submission on the GPU, which is by far the most efficient. +// MUST call RunDone (with optional vars to sync) after all Run calls. +// Alternatively, a single-shot RunOneInitGBuffsPath call does Run and Done for a +// single run-and-sync case. +func RunInitGBuffsPath(n int) { + if UseGPU { + RunInitGBuffsPathGPU(n) + } else { + RunInitGBuffsPathCPU(n) + } +} + +// RunInitGBuffsPathGPU runs the InitGBuffsPath kernel on the GPU. See [RunInitGBuffsPath] for more info. +func RunInitGBuffsPathGPU(n int) { + sy := GPUSystem + pl := sy.ComputePipelines["InitGBuffsPath"] + ce, _ := sy.BeginComputePass() + pl.Dispatch1D(ce, n, 64) +} + +// RunInitGBuffsPathCPU runs the InitGBuffsPath kernel on the CPU. +func RunInitGBuffsPathCPU(n int) { + gpu.VectorizeFunc(0, n, InitGBuffsPath) +} + +// RunOneInitGBuffsPath runs the InitGBuffsPath kernel with given number of elements, +// on either the CPU or GPU depending on the UseGPU variable. +// This version then calls RunDone with the given variables to sync +// after the Run, for a single-shot Run-and-Done call. If multiple kernels +// can be run in sequence, it is much more efficient to do multiple Run* +// calls followed by a RunDone call. +func RunOneInitGBuffsPath(n int, syncVars ...GPUVars) { + if UseGPU { + RunInitGBuffsPathGPU(n) + RunDone(syncVars...) + } else { + RunInitGBuffsPathCPU(n) + } +} // RunLayerGi runs the LayerGi kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched @@ -700,6 +745,90 @@ func RunOneMinusPhasePool(n int, syncVars ...GPUVars) { RunMinusPhasePoolCPU(n) } } +// RunNewStateLayer runs the NewStateLayer kernel with given number of elements, +// on either the CPU or GPU depending on the UseGPU variable. +// Can call multiple Run* kernels in a row, which are then all launched +// in the same command submission on the GPU, which is by far the most efficient. +// MUST call RunDone (with optional vars to sync) after all Run calls. +// Alternatively, a single-shot RunOneNewStateLayer call does Run and Done for a +// single run-and-sync case. +func RunNewStateLayer(n int) { + if UseGPU { + RunNewStateLayerGPU(n) + } else { + RunNewStateLayerCPU(n) + } +} + +// RunNewStateLayerGPU runs the NewStateLayer kernel on the GPU. See [RunNewStateLayer] for more info. +func RunNewStateLayerGPU(n int) { + sy := GPUSystem + pl := sy.ComputePipelines["NewStateLayer"] + ce, _ := sy.BeginComputePass() + pl.Dispatch1D(ce, n, 64) +} + +// RunNewStateLayerCPU runs the NewStateLayer kernel on the CPU. +func RunNewStateLayerCPU(n int) { + gpu.VectorizeFunc(0, n, NewStateLayer) +} + +// RunOneNewStateLayer runs the NewStateLayer kernel with given number of elements, +// on either the CPU or GPU depending on the UseGPU variable. +// This version then calls RunDone with the given variables to sync +// after the Run, for a single-shot Run-and-Done call. If multiple kernels +// can be run in sequence, it is much more efficient to do multiple Run* +// calls followed by a RunDone call. +func RunOneNewStateLayer(n int, syncVars ...GPUVars) { + if UseGPU { + RunNewStateLayerGPU(n) + RunDone(syncVars...) + } else { + RunNewStateLayerCPU(n) + } +} +// RunNewStateNeuron runs the NewStateNeuron kernel with given number of elements, +// on either the CPU or GPU depending on the UseGPU variable. +// Can call multiple Run* kernels in a row, which are then all launched +// in the same command submission on the GPU, which is by far the most efficient. +// MUST call RunDone (with optional vars to sync) after all Run calls. +// Alternatively, a single-shot RunOneNewStateNeuron call does Run and Done for a +// single run-and-sync case. +func RunNewStateNeuron(n int) { + if UseGPU { + RunNewStateNeuronGPU(n) + } else { + RunNewStateNeuronCPU(n) + } +} + +// RunNewStateNeuronGPU runs the NewStateNeuron kernel on the GPU. See [RunNewStateNeuron] for more info. +func RunNewStateNeuronGPU(n int) { + sy := GPUSystem + pl := sy.ComputePipelines["NewStateNeuron"] + ce, _ := sy.BeginComputePass() + pl.Dispatch1D(ce, n, 64) +} + +// RunNewStateNeuronCPU runs the NewStateNeuron kernel on the CPU. +func RunNewStateNeuronCPU(n int) { + gpu.VectorizeFunc(0, n, NewStateNeuron) +} + +// RunOneNewStateNeuron runs the NewStateNeuron kernel with given number of elements, +// on either the CPU or GPU depending on the UseGPU variable. +// This version then calls RunDone with the given variables to sync +// after the Run, for a single-shot Run-and-Done call. If multiple kernels +// can be run in sequence, it is much more efficient to do multiple Run* +// calls followed by a RunDone call. +func RunOneNewStateNeuron(n int, syncVars ...GPUVars) { + if UseGPU { + RunNewStateNeuronGPU(n) + RunDone(syncVars...) + } else { + RunNewStateNeuronCPU(n) + } +} // RunPlusPhaseNeuron runs the PlusPhaseNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched diff --git a/axon/init-layer.go b/axon/init-layer.go index 46559272..e8951dd2 100644 --- a/axon/init-layer.go +++ b/axon/init-layer.go @@ -10,8 +10,6 @@ import ( "cogentcore.org/core/base/randx" ) -// todo: all of this could be done on LayerParams / gpu - // InitWeights initializes the weight values in the network, i.e., resetting learning // Also calls InitActs func (ly *Layer) InitWeights(ctx *Context, nt *Network) { //types:add @@ -184,18 +182,7 @@ func (ly *Layer) InitActs(ctx *Context) { //types:add // Target layers are dynamically updated } } - ly.InitPathGBuffs(ctx) -} - -// InitPathGBuffs initializes the pathway-level conductance buffers and -// conductance integration values for receiving pathways in this layer. -func (ly *Layer) InitPathGBuffs(ctx *Context) { - for _, pt := range ly.RecvPaths { - if pt.Off { - continue - } - pt.Params.InitGBuffs() - } + // ly.InitPathGBuffs(ctx) } // InitWeightsSym initializes the weight symmetry -- higher layers copy weights from lower layers @@ -297,52 +284,6 @@ func (ly *Layer) InitGScale(ctx *Context) { } } -// NewState handles all initialization at start of new input pattern. -// Does NOT call InitGScale() -func (ly *Layer) NewState(ctx *Context) { - nn := ly.NNeurons - np := ly.NPools - - actMinusAvg := float32(0) - actPlusAvg := float32(0) - - for di := uint32(0); di < ctx.NData; di++ { - lpi := ly.Params.PoolIndex(0) - - actMinusAvg += PoolAvgMax(AMAct, AMMinus, Avg, lpi, di) - actPlusAvg += PoolAvgMax(AMAct, AMPlus, Avg, lpi, di) - - ly.Params.NewStateLayer(ctx, di) - - for spi := uint32(0); spi < np; spi++ { - pi := ly.Params.PoolIndex(spi) - ly.Params.NewStatePool(ctx, pi, di) // also calls DecayState on pool - } - - for lni := uint32(0); lni < nn; lni++ { - ni := ly.NeurStIndex + lni - if NeuronIsOff(ni) { - continue - } - // note: this calls the basic neuron-level DecayState - ly.Params.NewStateNeuron(ctx, ni, di) - } - } - - // note: long-running averages must be based on aggregate data, drive adaptation - // of Gi layer inhibition. - davg := 1 / float32(ctx.NData) - actMinusAvg *= davg - actPlusAvg *= davg - for di := uint32(0); di < ctx.NData; di++ { - ly.Params.NewStateLayerActAvg(ctx, di, actMinusAvg, actPlusAvg) - } - - // note: would be somewhat more expensive to only clear the di specific subset - // but all di are decayed every trial anyway so no big deal - ly.InitPathGBuffs(ctx) -} - // NewStateNeurons only calls the neurons part of new state -- for misbehaving GPU func (ly *Layer) NewStateNeurons(ctx *Context) { nn := ly.NNeurons diff --git a/axon/init-layer.goal b/axon/init-layer.goal index 227d68a5..76eee1c5 100644 --- a/axon/init-layer.goal +++ b/axon/init-layer.goal @@ -8,8 +8,6 @@ import ( "cogentcore.org/core/base/randx" ) -// todo: all of this could be done on LayerParams / gpu - // InitWeights initializes the weight values in the network, i.e., resetting learning // Also calls InitActs func (ly *Layer) InitWeights(ctx *Context, nt *Network) { //types:add @@ -182,18 +180,7 @@ func (ly *Layer) InitActs(ctx *Context) { //types:add // Target layers are dynamically updated } } - ly.InitPathGBuffs(ctx) -} - -// InitPathGBuffs initializes the pathway-level conductance buffers and -// conductance integration values for receiving pathways in this layer. -func (ly *Layer) InitPathGBuffs(ctx *Context) { - for _, pt := range ly.RecvPaths { - if pt.Off { - continue - } - pt.Params.InitGBuffs() - } + // ly.InitPathGBuffs(ctx) } // InitWeightsSym initializes the weight symmetry -- higher layers copy weights from lower layers @@ -294,52 +281,6 @@ func (ly *Layer) InitGScale(ctx *Context) { } } -// NewState handles all initialization at start of new input pattern. -// Does NOT call InitGScale() -func (ly *Layer) NewState(ctx *Context) { - nn := ly.NNeurons - np := ly.NPools - - actMinusAvg := float32(0) - actPlusAvg := float32(0) - - for di := uint32(0); di < ctx.NData; di++ { - lpi := ly.Params.PoolIndex(0) - - actMinusAvg += PoolAvgMax(AMAct, AMMinus, Avg, lpi, di) - actPlusAvg += PoolAvgMax(AMAct, AMPlus, Avg, lpi, di) - - ly.Params.NewStateLayer(ctx, di) - - for spi := uint32(0); spi < np; spi++ { - pi := ly.Params.PoolIndex(spi) - ly.Params.NewStatePool(ctx, pi, di) // also calls DecayState on pool - } - - for lni := uint32(0); lni < nn; lni++ { - ni := ly.NeurStIndex + lni - if NeuronIsOff(ni) { - continue - } - // note: this calls the basic neuron-level DecayState - ly.Params.NewStateNeuron(ctx, ni, di) - } - } - - // note: long-running averages must be based on aggregate data, drive adaptation - // of Gi layer inhibition. - davg := 1 / float32(ctx.NData) - actMinusAvg *= davg - actPlusAvg *= davg - for di := uint32(0); di < ctx.NData; di++ { - ly.Params.NewStateLayerActAvg(ctx, di, actMinusAvg, actPlusAvg) - } - - // note: would be somewhat more expensive to only clear the di specific subset - // but all di are decayed every trial anyway so no big deal - ly.InitPathGBuffs(ctx) -} - // NewStateNeurons only calls the neurons part of new state -- for misbehaving GPU func (ly *Layer) NewStateNeurons(ctx *Context) { nn := ly.NNeurons @@ -420,3 +361,4 @@ func (ly *Layer) DecayStateNeuronsAll(ctx *Context, decay, glong, ahp float32) { } } + diff --git a/axon/init-net.go b/axon/init-net.go index c3f61f8a..5b50cd05 100644 --- a/axon/init-net.go +++ b/axon/init-net.go @@ -7,7 +7,6 @@ package axon import ( - "cogentcore.org/core/enums" "cogentcore.org/core/tensor" "github.com/emer/emergent/v2/paths" ) @@ -27,27 +26,6 @@ func GlobalsReset() { } } -// NewState handles all initialization at start of new input pattern. -// This is called *before* applying external input data and operates across -// all data parallel values. The current Context.NData should be set -// properly prior to calling this and subsequent Cycle methods. -func (nt *Network) NewState(mode enums.Enum, testing bool) { - // if nt.GPU.On { // todo: this has a bug in neuron-level access in updating SpkPrv - // - // nt.GPU.RunNewState() - // return - // } - ctx := nt.Context() - ctx.NewState(mode, testing) - for _, ly := range nt.Layers { - if ly.Off { - continue - } - ly.NewState(ctx) - } - ToGPULayersNeurons() -} - // InitWeights initializes synaptic weights and all other associated long-term state variables // including running-average state values (e.g., layer running average activations etc) func (nt *Network) InitWeights() { //types:add @@ -144,7 +122,7 @@ func (nt *Network) DecayState(decay, glong, ahp float32) { ly.DecayState(ctx, di, decay, glong, ahp) } } - // nt.GPU.SyncStateToGPU() + ToGPULayersNeurons() } // DecayStateByType decays activation state for given layer types @@ -180,6 +158,7 @@ func (nt *Network) DecayStateLayers(decay, glong, ahp float32, layers ...string) ly.DecayState(ctx, di, decay, glong, ahp) } } + ToGPULayersNeurons() // nt.GPU.SyncStateToGPU() } @@ -193,6 +172,8 @@ func (nt *Network) InitActs() { //types:add } ly.InitActs(ctx) } + ToGPULayersNeurons() + ToGPU(PathGBufVar, PathGSynsVar) // nt.GPU.SyncStateToGPU() // todo: // nt.GPU.SyncGBufToGPU() // zeros everyone } diff --git a/axon/init-net.goal b/axon/init-net.goal index b4e11a65..0d6217d0 100644 --- a/axon/init-net.goal +++ b/axon/init-net.goal @@ -5,7 +5,6 @@ package axon import ( - "cogentcore.org/core/enums" "cogentcore.org/core/tensor" "github.com/emer/emergent/v2/paths" ) @@ -25,26 +24,6 @@ func GlobalsReset() { } } -// NewState handles all initialization at start of new input pattern. -// This is called *before* applying external input data and operates across -// all data parallel values. The current Context.NData should be set -// properly prior to calling this and subsequent Cycle methods. -func (nt *Network) NewState(mode enums.Enum, testing bool) { - // if nt.GPU.On { // todo: this has a bug in neuron-level access in updating SpkPrv - // nt.GPU.RunNewState() - // return - // } - ctx := nt.Context() - ctx.NewState(mode, testing) - for _, ly := range nt.Layers { - if ly.Off { - continue - } - ly.NewState(ctx) - } - ToGPULayersNeurons() -} - // InitWeights initializes synaptic weights and all other associated long-term state variables // including running-average state values (e.g., layer running average activations etc) func (nt *Network) InitWeights() { //types:add @@ -141,7 +120,7 @@ func (nt *Network) DecayState(decay, glong, ahp float32) { ly.DecayState(ctx, di, decay, glong, ahp) } } - // nt.GPU.SyncStateToGPU() + ToGPULayersNeurons() } // DecayStateByType decays activation state for given layer types @@ -177,6 +156,7 @@ func (nt *Network) DecayStateLayers(decay, glong, ahp float32, layers ...string) ly.DecayState(ctx, di, decay, glong, ahp) } } + ToGPULayersNeurons() // nt.GPU.SyncStateToGPU() } @@ -190,6 +170,8 @@ func (nt *Network) InitActs() { //types:add } ly.InitActs(ctx) } + ToGPULayersNeurons() + ToGPU(PathGBufVar, PathGSynsVar) // nt.GPU.SyncStateToGPU() // todo: // nt.GPU.SyncGBufToGPU() // zeros everyone } diff --git a/axon/init-path.go b/axon/init-path.go index c97dd48b..7f7149a1 100644 --- a/axon/init-path.go +++ b/axon/init-path.go @@ -114,7 +114,7 @@ func (pt *Path) InitWeightsSyn(ctx *Context, syni uint32, rnd randx.Rand, mean, // enforcing current constraints. func (pt *Path) InitWeights(ctx *Context, nt *Network) { pt.Params.Learn.LRate.Init() - pt.Params.InitGBuffs() + pt.Params.InitGBuffs(ctx) rlay := pt.Recv spct := pt.Params.SWts.Init.SPct if rlay.Params.IsTarget() { diff --git a/axon/init-path.goal b/axon/init-path.goal index 44472e66..976a11a2 100644 --- a/axon/init-path.goal +++ b/axon/init-path.goal @@ -112,7 +112,7 @@ func (pt *Path) InitWeightsSyn(ctx *Context, syni uint32, rnd randx.Rand, mean, // enforcing current constraints. func (pt *Path) InitWeights(ctx *Context, nt *Network) { pt.Params.Learn.LRate.Init() - pt.Params.InitGBuffs() + pt.Params.InitGBuffs(ctx) rlay := pt.Recv spct := pt.Params.SWts.Init.SPct if rlay.Params.IsTarget() {