diff --git a/axon/rubicon_net.go b/axon/rubicon_net.go index b5609bf0f..7febc3e0a 100644 --- a/axon/rubicon_net.go +++ b/axon/rubicon_net.go @@ -991,7 +991,7 @@ func (net *Network) AddRubiconOFCus(ctx *Context, nYneur, popY, popX, bgY, bgX, // Makes all appropriate interconnections and sets default parameters. // Needs CS -> BLA, OFC connections to be made. // Returns layers most likely to be used for remaining connections and positions. -func (net *Network) AddRubicon(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfcX int, space float32) (vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPos, ofcPosCT, ofcPosPT, ofcPosPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNeg, ofcNegCT, ofcNegPT, ofcNegPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc *Layer) { +func (net *Network) AddRubicon(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfcX int, space float32) (vSgpi, vSmtxGo, vSmtxNo, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPos, ofcPosCT, ofcPosPT, ofcPosPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNeg, ofcNegCT, ofcNegPT, ofcNegPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc *Layer) { full := prjn.NewFull() var pj *Prjn diff --git a/examples/choose/README.md b/examples/choose/README.md index 017d0b010..0bf95ae35 100644 --- a/examples/choose/README.md +++ b/examples/choose/README.md @@ -2,6 +2,8 @@ This model implements the [Rubicon](../../Rubicon.md) model for goal-driven motivated behavior, in a decision-making task that requires choosing among options with different cost-benefit tradeoffs. This exercises the core cost and benefit representations and goal selection and goal maintenance components of the Rubicon model +* TODO: compute us prob at start -- otherwise keeps doing again and again + ## Arm Maze Bandit task The task paradigm is an N-arm bandit task, implemented as a physical maze-like environment where the simulated rodent must walk down an arm to receive the reward outcome (_US_ = unconditioned stimulus) signalled by the stimulus (_CS_ = conditioned stimulus) visible from the start of the arm. The arms can vary in length and effort (e.g., an elevated hill) to manipulate the cost, and the US varies in value according to internal _Drive_ states (e.g., hunger, thirst) and qualities of the US itself (e.g., how tasty it is). Thus, the decision to enter an arm requires balancing the cost vs. benefit tradeoff or net utility, and all of this is learned through the course of exploring the maze over repeated trials. diff --git a/examples/choose/choose.go b/examples/choose/choose.go index e001161e6..f06272777 100644 --- a/examples/choose/choose.go +++ b/examples/choose/choose.go @@ -182,8 +182,8 @@ func (ss *Sim) ConfigRubicon(trn *armaze.Env) { rp := &ss.Net.Rubicon rp.SetNUSs(&ss.Context, trn.Config.NDrives, 1) rp.Defaults() - rp.USs.PVposGain = 2 // higher = more pos reward (saturating logistic func) - rp.USs.PVnegGain = .1 // global scaling of RP neg level -- was 1 + rp.USs.PVposGain = 2 // higher = more pos reward (saturating logistic func) + rp.USs.PVnegGain = 1 // global scaling of RP neg level -- was 1 rp.LHb.VSPatchGain = 5 rp.LHb.VSPatchNonRewThr = 0.15 @@ -224,9 +224,10 @@ func (ss *Sim) ConfigNet(net *axon.Network) { ny := ev.Config.Params.NYReps narm := ev.Config.NArms - vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc := net.AddRubicon(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space) + vSgpi, vSmtxGo, vSmtxNo, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc := net.AddRubicon(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space) _, _ = plUtil, urgency _, _ = ofcNegUSCT, ofcNegUSPTp + _, _ = vSmtxGo, vSmtxNo plUtilPTp := net.AxonLayerByName("PLutilPTp") @@ -285,6 +286,9 @@ func (ss *Sim) ConfigNet(net *axon.Network) { net.ConnectToBLAAcq(cs, blaNegAcq, full) net.ConnectToBLAExt(cs, blaNegExt, full) + // net.ConnectToVSMatrix(cs, vSmtxGo, full) + // net.ConnectToVSMatrix(cs, vSmtxNo, full) + // OFCus predicts cs net.ConnectToPFCBack(cs, csP, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, full, "CSToPFC") net.ConnectToPFCBack(cs, csP, ofcNegUS, ofcNegUSCT, ofcPosUSPT, ofcNegUSPTp, full, "CSToPFC") diff --git a/examples/choose/configs/01_us_magnitude.toml b/examples/choose/configs/01_us_magnitude.toml index 6121d9667..68ca9b661 100644 --- a/examples/choose/configs/01_us_magnitude.toml +++ b/examples/choose/configs/01_us_magnitude.toml @@ -1,6 +1,6 @@ # Basic test of US magnitude sensitivity, using GroupGoodBad Paradigm = "GroupGoodBad" -USMagRange.Min = 0.3 +USMagRange.Min = 0.1 USMagRange.Max = 1 diff --git a/examples/choose/configs/02_us_probability.toml b/examples/choose/configs/02_us_probability.toml index 48143389a..f17036c15 100644 --- a/examples/choose/configs/02_us_probability.toml +++ b/examples/choose/configs/02_us_probability.toml @@ -1,6 +1,6 @@ # Basic test of US magnitude sensitivity, using GroupGoodBad Paradigm = "GroupGoodBad" -USProbRange.Min = 0.5 +USProbRange.Min = 0.3 USProbRange.Max = 1 diff --git a/examples/choose/params.go b/examples/choose/params.go index 4c00f4870..dd9b1fa44 100644 --- a/examples/choose/params.go +++ b/examples/choose/params.go @@ -28,10 +28,6 @@ var ParamSets = netparams.Sets{ "Layer.Acts.Dend.ModGain": "1.5", // 2 min -- reduces maint early "Layer.Learn.NeuroMod.AChDisInhib": "0.0", // not much effect here.. }}, - {Sel: ".BLALayer", Desc: "", - Params: params.Params{ - "Layer.Learn.NeuroMod.DAModGain": "0.5", - }}, {Sel: ".VSTNLayer", Desc: "all VSTN", Params: params.Params{ "Layer.Acts.Init.GeBase": "0.1", @@ -107,6 +103,10 @@ var ParamSets = netparams.Sets{ "Layer.Inhib.Layer.Gi": "2", // 2 fine with BLANovelInhib prjn "Layer.Inhib.Pool.Gi": "1", }}, + {Sel: "#BLAposExtD2", Desc: "", + Params: params.Params{ + "Layer.CT.GeGain": "0.5", + }}, {Sel: "#BLAnegAcqD2", Desc: "", Params: params.Params{ "Layer.Inhib.Layer.Gi": "1.2", // weaker @@ -182,7 +182,7 @@ var ParamSets = netparams.Sets{ }}, {Sel: ".BLAExtPrjn", Desc: "ext learns relatively fast", Params: params.Params{ - "Prjn.Learn.LRate.Base": "0.005", + "Prjn.Learn.LRate.Base": "0.05", }}, {Sel: ".BLAAcqToGo", Desc: "must dominate", Params: params.Params{ @@ -191,7 +191,7 @@ var ParamSets = netparams.Sets{ }}, {Sel: ".BLAExtToAcq", Desc: "", Params: params.Params{ - "Prjn.PrjnScale.Abs": "0.5", // note: key param -- 0.5 > 1 + "Prjn.PrjnScale.Abs": "2", // note: key param -- 0.5 > 1 }}, {Sel: ".PFCToVSMtx", Desc: "contextual, should be weaker", Params: params.Params{