novelty does not extinguish with negative da -- need some natural unl…

…earning on that pathway because clearly the CS is no longer novel, and you don't want the actual novel US to extinguish.
emer · Apr 22, 2024 · 42f7036 · 42f7036
1 parent b98792e
commit 42f7036
Show file tree

Hide file tree

Showing 6 changed files with 18 additions and 12 deletions.
diff --git a/axon/rubicon_net.go b/axon/rubicon_net.go
@@ -991,7 +991,7 @@ func (net *Network) AddRubiconOFCus(ctx *Context, nYneur, popY, popX, bgY, bgX,
 // Makes all appropriate interconnections and sets default parameters.
 // Needs CS -> BLA, OFC connections to be made.
 // Returns layers most likely to be used for remaining connections and positions.
-func (net *Network) AddRubicon(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfcX int, space float32) (vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPos, ofcPosCT, ofcPosPT, ofcPosPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNeg, ofcNegCT, ofcNegPT, ofcNegPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc *Layer) {
+func (net *Network) AddRubicon(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfcX int, space float32) (vSgpi, vSmtxGo, vSmtxNo, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPos, ofcPosCT, ofcPosPT, ofcPosPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNeg, ofcNegCT, ofcNegPT, ofcNegPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc *Layer) {
 
 	full := prjn.NewFull()
 	var pj *Prjn

diff --git a/examples/choose/README.md b/examples/choose/README.md
@@ -2,6 +2,8 @@
 
 This model implements the [Rubicon](../../Rubicon.md) model for goal-driven motivated behavior, in a decision-making task that requires choosing among options with different cost-benefit tradeoffs.  This exercises the core cost and benefit representations and goal selection and goal maintenance components of the Rubicon model
 
+* TODO: compute us prob at start -- otherwise keeps doing again and again
+
 ## Arm Maze Bandit task
 
 The task paradigm is an N-arm bandit task, implemented as a physical maze-like environment where the simulated rodent must walk down an arm to receive the reward outcome (_US_ = unconditioned stimulus) signalled by the stimulus (_CS_ = conditioned stimulus) visible from the start of the arm.  The arms can vary in length and effort (e.g., an elevated hill) to manipulate the cost, and the US varies in value according to internal _Drive_ states (e.g., hunger, thirst) and qualities of the US itself (e.g., how tasty it is).  Thus, the decision to enter an arm requires balancing the cost vs. benefit tradeoff or net utility, and all of this is learned through the course of exploring the maze over repeated trials.

diff --git a/examples/choose/choose.go b/examples/choose/choose.go
@@ -182,8 +182,8 @@ func (ss *Sim) ConfigRubicon(trn *armaze.Env) {
 	rp := &ss.Net.Rubicon
 	rp.SetNUSs(&ss.Context, trn.Config.NDrives, 1)
 	rp.Defaults()
-	rp.USs.PVposGain = 2  // higher = more pos reward (saturating logistic func)
-	rp.USs.PVnegGain = .1 // global scaling of RP neg level -- was 1
+	rp.USs.PVposGain = 2 // higher = more pos reward (saturating logistic func)
+	rp.USs.PVnegGain = 1 // global scaling of RP neg level -- was 1
 	rp.LHb.VSPatchGain = 5
 	rp.LHb.VSPatchNonRewThr = 0.15
 
@@ -224,9 +224,10 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
 	ny := ev.Config.Params.NYReps
 	narm := ev.Config.NArms
 
-	vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc := net.AddRubicon(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space)
+	vSgpi, vSmtxGo, vSmtxNo, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc := net.AddRubicon(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space)
 	_, _ = plUtil, urgency
 	_, _ = ofcNegUSCT, ofcNegUSPTp
+	_, _ = vSmtxGo, vSmtxNo
 
 	plUtilPTp := net.AxonLayerByName("PLutilPTp")
 
@@ -285,6 +286,9 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
 	net.ConnectToBLAAcq(cs, blaNegAcq, full)
 	net.ConnectToBLAExt(cs, blaNegExt, full)
 
+	// net.ConnectToVSMatrix(cs, vSmtxGo, full)
+	// net.ConnectToVSMatrix(cs, vSmtxNo, full)
+
 	// OFCus predicts cs
 	net.ConnectToPFCBack(cs, csP, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, full, "CSToPFC")
 	net.ConnectToPFCBack(cs, csP, ofcNegUS, ofcNegUSCT, ofcPosUSPT, ofcNegUSPTp, full, "CSToPFC")

diff --git a/examples/choose/configs/01_us_magnitude.toml b/examples/choose/configs/01_us_magnitude.toml
@@ -1,6 +1,6 @@
 # Basic test of US magnitude sensitivity, using GroupGoodBad
 
 Paradigm = "GroupGoodBad"
-USMagRange.Min = 0.3
+USMagRange.Min = 0.1
 USMagRange.Max = 1
 
diff --git a/examples/choose/configs/02_us_probability.toml b/examples/choose/configs/02_us_probability.toml
@@ -1,6 +1,6 @@
 # Basic test of US magnitude sensitivity, using GroupGoodBad
 
 Paradigm = "GroupGoodBad"
-USProbRange.Min = 0.5
+USProbRange.Min = 0.3
 USProbRange.Max = 1
 
diff --git a/examples/choose/params.go b/examples/choose/params.go
@@ -28,10 +28,6 @@ var ParamSets = netparams.Sets{
 				"Layer.Acts.Dend.ModGain":          "1.5", // 2 min -- reduces maint early
 				"Layer.Learn.NeuroMod.AChDisInhib": "0.0", // not much effect here..
 			}},
-		{Sel: ".BLALayer", Desc: "",
-			Params: params.Params{
-				"Layer.Learn.NeuroMod.DAModGain": "0.5",
-			}},
 		{Sel: ".VSTNLayer", Desc: "all VSTN",
 			Params: params.Params{
 				"Layer.Acts.Init.GeBase":      "0.1",
@@ -107,6 +103,10 @@ var ParamSets = netparams.Sets{
 				"Layer.Inhib.Layer.Gi": "2", // 2 fine with BLANovelInhib prjn
 				"Layer.Inhib.Pool.Gi":  "1",
 			}},
+		{Sel: "#BLAposExtD2", Desc: "",
+			Params: params.Params{
+				"Layer.CT.GeGain": "0.5",
+			}},
 		{Sel: "#BLAnegAcqD2", Desc: "",
 			Params: params.Params{
 				"Layer.Inhib.Layer.Gi": "1.2", // weaker
@@ -182,7 +182,7 @@ var ParamSets = netparams.Sets{
 			}},
 		{Sel: ".BLAExtPrjn", Desc: "ext learns relatively fast",
 			Params: params.Params{
-				"Prjn.Learn.LRate.Base": "0.005",
+				"Prjn.Learn.LRate.Base": "0.05",
 			}},
 		{Sel: ".BLAAcqToGo", Desc: "must dominate",
 			Params: params.Params{
@@ -191,7 +191,7 @@ var ParamSets = netparams.Sets{
 			}},
 		{Sel: ".BLAExtToAcq", Desc: "",
 			Params: params.Params{
-				"Prjn.PrjnScale.Abs": "0.5", // note: key param -- 0.5 > 1
+				"Prjn.PrjnScale.Abs": "2", // note: key param -- 0.5 > 1
 			}},
 		{Sel: ".PFCToVSMtx", Desc: "contextual, should be weaker",
 			Params: params.Params{