From 42f7036f6a63db5f54ed68f4dd7da1c6bc1c31f2 Mon Sep 17 00:00:00 2001
From: "Randall C. O'Reilly" <oreilly@ucdavis.edu>
Date: Mon, 22 Apr 2024 00:03:57 -0700
Subject: [PATCH] novelty does not extinguish with negative da -- need some
 natural unlearning on that pathway because clearly the CS is no longer novel,
 and you don't want the actual novel US to extinguish.

---
 axon/rubicon_net.go                            |  2 +-
 examples/choose/README.md                      |  2 ++
 examples/choose/choose.go                      | 10 +++++++---
 examples/choose/configs/01_us_magnitude.toml   |  2 +-
 examples/choose/configs/02_us_probability.toml |  2 +-
 examples/choose/params.go                      | 12 ++++++------
 6 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/axon/rubicon_net.go b/axon/rubicon_net.go
index b5609bf0f..7febc3e0a 100644
--- a/axon/rubicon_net.go
+++ b/axon/rubicon_net.go
@@ -991,7 +991,7 @@ func (net *Network) AddRubiconOFCus(ctx *Context, nYneur, popY, popX, bgY, bgX,
 // Makes all appropriate interconnections and sets default parameters.
 // Needs CS -> BLA, OFC connections to be made.
 // Returns layers most likely to be used for remaining connections and positions.
-func (net *Network) AddRubicon(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfcX int, space float32) (vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPos, ofcPosCT, ofcPosPT, ofcPosPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNeg, ofcNegCT, ofcNegPT, ofcNegPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc *Layer) {
+func (net *Network) AddRubicon(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfcX int, space float32) (vSgpi, vSmtxGo, vSmtxNo, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPos, ofcPosCT, ofcPosPT, ofcPosPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNeg, ofcNegCT, ofcNegPT, ofcNegPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc *Layer) {
 
 	full := prjn.NewFull()
 	var pj *Prjn
diff --git a/examples/choose/README.md b/examples/choose/README.md
index 017d0b010..0bf95ae35 100644
--- a/examples/choose/README.md
+++ b/examples/choose/README.md
@@ -2,6 +2,8 @@
 
 This model implements the [Rubicon](../../Rubicon.md) model for goal-driven motivated behavior, in a decision-making task that requires choosing among options with different cost-benefit tradeoffs.  This exercises the core cost and benefit representations and goal selection and goal maintenance components of the Rubicon model
 
+* TODO: compute us prob at start -- otherwise keeps doing again and again
+
 ## Arm Maze Bandit task
 
 The task paradigm is an N-arm bandit task, implemented as a physical maze-like environment where the simulated rodent must walk down an arm to receive the reward outcome (_US_ = unconditioned stimulus) signalled by the stimulus (_CS_ = conditioned stimulus) visible from the start of the arm.  The arms can vary in length and effort (e.g., an elevated hill) to manipulate the cost, and the US varies in value according to internal _Drive_ states (e.g., hunger, thirst) and qualities of the US itself (e.g., how tasty it is).  Thus, the decision to enter an arm requires balancing the cost vs. benefit tradeoff or net utility, and all of this is learned through the course of exploring the maze over repeated trials.
diff --git a/examples/choose/choose.go b/examples/choose/choose.go
index e001161e6..f06272777 100644
--- a/examples/choose/choose.go
+++ b/examples/choose/choose.go
@@ -182,8 +182,8 @@ func (ss *Sim) ConfigRubicon(trn *armaze.Env) {
 	rp := &ss.Net.Rubicon
 	rp.SetNUSs(&ss.Context, trn.Config.NDrives, 1)
 	rp.Defaults()
-	rp.USs.PVposGain = 2  // higher = more pos reward (saturating logistic func)
-	rp.USs.PVnegGain = .1 // global scaling of RP neg level -- was 1
+	rp.USs.PVposGain = 2 // higher = more pos reward (saturating logistic func)
+	rp.USs.PVnegGain = 1 // global scaling of RP neg level -- was 1
 	rp.LHb.VSPatchGain = 5
 	rp.LHb.VSPatchNonRewThr = 0.15
 
@@ -224,9 +224,10 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
 	ny := ev.Config.Params.NYReps
 	narm := ev.Config.NArms
 
-	vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc := net.AddRubicon(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space)
+	vSgpi, vSmtxGo, vSmtxNo, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPT, ilNegPTp, accCost, plUtil, sc := net.AddRubicon(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space)
 	_, _ = plUtil, urgency
 	_, _ = ofcNegUSCT, ofcNegUSPTp
+	_, _ = vSmtxGo, vSmtxNo
 
 	plUtilPTp := net.AxonLayerByName("PLutilPTp")
 
@@ -285,6 +286,9 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
 	net.ConnectToBLAAcq(cs, blaNegAcq, full)
 	net.ConnectToBLAExt(cs, blaNegExt, full)
 
+	// net.ConnectToVSMatrix(cs, vSmtxGo, full)
+	// net.ConnectToVSMatrix(cs, vSmtxNo, full)
+
 	// OFCus predicts cs
 	net.ConnectToPFCBack(cs, csP, ofcPosUS, ofcPosUSCT, ofcPosUSPT, ofcPosUSPTp, full, "CSToPFC")
 	net.ConnectToPFCBack(cs, csP, ofcNegUS, ofcNegUSCT, ofcPosUSPT, ofcNegUSPTp, full, "CSToPFC")
diff --git a/examples/choose/configs/01_us_magnitude.toml b/examples/choose/configs/01_us_magnitude.toml
index 6121d9667..68ca9b661 100644
--- a/examples/choose/configs/01_us_magnitude.toml
+++ b/examples/choose/configs/01_us_magnitude.toml
@@ -1,6 +1,6 @@
 # Basic test of US magnitude sensitivity, using GroupGoodBad
 
 Paradigm = "GroupGoodBad"
-USMagRange.Min = 0.3
+USMagRange.Min = 0.1
 USMagRange.Max = 1
 
diff --git a/examples/choose/configs/02_us_probability.toml b/examples/choose/configs/02_us_probability.toml
index 48143389a..f17036c15 100644
--- a/examples/choose/configs/02_us_probability.toml
+++ b/examples/choose/configs/02_us_probability.toml
@@ -1,6 +1,6 @@
 # Basic test of US magnitude sensitivity, using GroupGoodBad
 
 Paradigm = "GroupGoodBad"
-USProbRange.Min = 0.5
+USProbRange.Min = 0.3
 USProbRange.Max = 1
 
diff --git a/examples/choose/params.go b/examples/choose/params.go
index 4c00f4870..dd9b1fa44 100644
--- a/examples/choose/params.go
+++ b/examples/choose/params.go
@@ -28,10 +28,6 @@ var ParamSets = netparams.Sets{
 				"Layer.Acts.Dend.ModGain":          "1.5", // 2 min -- reduces maint early
 				"Layer.Learn.NeuroMod.AChDisInhib": "0.0", // not much effect here..
 			}},
-		{Sel: ".BLALayer", Desc: "",
-			Params: params.Params{
-				"Layer.Learn.NeuroMod.DAModGain": "0.5",
-			}},
 		{Sel: ".VSTNLayer", Desc: "all VSTN",
 			Params: params.Params{
 				"Layer.Acts.Init.GeBase":      "0.1",
@@ -107,6 +103,10 @@ var ParamSets = netparams.Sets{
 				"Layer.Inhib.Layer.Gi": "2", // 2 fine with BLANovelInhib prjn
 				"Layer.Inhib.Pool.Gi":  "1",
 			}},
+		{Sel: "#BLAposExtD2", Desc: "",
+			Params: params.Params{
+				"Layer.CT.GeGain": "0.5",
+			}},
 		{Sel: "#BLAnegAcqD2", Desc: "",
 			Params: params.Params{
 				"Layer.Inhib.Layer.Gi": "1.2", // weaker
@@ -182,7 +182,7 @@ var ParamSets = netparams.Sets{
 			}},
 		{Sel: ".BLAExtPrjn", Desc: "ext learns relatively fast",
 			Params: params.Params{
-				"Prjn.Learn.LRate.Base": "0.005",
+				"Prjn.Learn.LRate.Base": "0.05",
 			}},
 		{Sel: ".BLAAcqToGo", Desc: "must dominate",
 			Params: params.Params{
@@ -191,7 +191,7 @@ var ParamSets = netparams.Sets{
 			}},
 		{Sel: ".BLAExtToAcq", Desc: "",
 			Params: params.Params{
-				"Prjn.PrjnScale.Abs": "0.5", // note: key param -- 0.5 > 1
+				"Prjn.PrjnScale.Abs": "2", // note: key param -- 0.5 > 1
 			}},
 		{Sel: ".PFCToVSMtx", Desc: "contextual, should be weaker",
 			Params: params.Params{