add documentation, simplify time window specification

AllenInstitute · Jul 3, 2024 · 0b44257 · 0b44257
1 parent be3a8d2
commit 0b44257
Showing 1 changed file with 217 additions and 31 deletions.
diff --git a/notebooks/decoding_example_egm.ipynb b/notebooks/decoding_example_egm.ipynb
@@ -1,10 +1,34 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Decode context from spikes or facemap\n",
+    "\n",
+    "1 - either use all annotated & uploaded ephys sessions as input or provide a list of session_ids\n",
+    "\n",
+    "2 - set a savepath and filename for the output - one .pkl file per session\n",
+    "\n",
+    "3 - set parameters - descriptions below\n",
+    "\n",
+    "4 - run decoding!"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
@@ -20,11 +44,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#get all uploaded & annotated ephys sessions\n",
+    "#1A get all uploaded & annotated ephys sessions\n",
     "ephys_sessions = tuple(s for s in npc_lims.get_session_info(is_ephys=True, is_uploaded=True, is_annotated=True))"
    ]
   },
@@ -34,7 +58,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#alternatively, provide a list of session ids:\n",
+    "#1B alternatively, provide a list of session ids:\n",
     "session_id_list=['712815_2024-05-22','712815_2024-05-20','708016_2024-05-01']\n",
     "session_list=[]\n",
     "for ss in session_id_list:\n",
@@ -45,44 +69,195 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "712815_2024-05-22_0 loaded\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "probe_insertions.json and annotation info do not match for 712815_2024-05-22 - using annotation info\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "no cached trials table, using npc_sessions\n",
+      "no cached units table, using npc_sessions\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "fetching units: 100%|█████████████████████████| 2/2 [00:13<00:00,  6.55s/device]\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "finished 712815_2024-05-22 SUM\n",
+      "finished 712815_2024-05-22 PF_probeA\n",
+      "finished 712815_2024-05-22 LH\n",
+      "finished 712815_2024-05-22 DG\n",
+      "finished 712815_2024-05-22 PO\n",
+      "finished 712815_2024-05-22 Eth\n",
+      "finished 712815_2024-05-22 CA1_probeB\n",
+      "finished 712815_2024-05-22 all\n",
+      "finished 712815_2024-05-22\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Session 712815_2024-05-20 has known issues: ['https://github.com/AllenInstitute/npc_sessions/issues/96']\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "712815_2024-05-20_0 loaded\n",
+      "no cached units table, using npc_sessions\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "fetching units: 100%|█████████████████████████| 6/6 [00:34<00:00,  5.80s/device]\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "finished 712815_2024-05-20 MD\n",
+      "finished 712815_2024-05-20 MOs_probeA\n",
+      "finished 712815_2024-05-20 VPM\n",
+      "finished 712815_2024-05-20 PO_probeB\n",
+      "finished 712815_2024-05-20 Eth_probeB\n",
+      "finished 712815_2024-05-20 LP_probeB\n",
+      "finished 712815_2024-05-20 CA1_probeB\n",
+      "finished 712815_2024-05-20 PO_probeC\n",
+      "finished 712815_2024-05-20 CP\n",
+      "finished 712815_2024-05-20 SSp\n",
+      "finished 712815_2024-05-20 MOB\n",
+      "finished 712815_2024-05-20 ORBl\n",
+      "finished 712815_2024-05-20 FRP\n",
+      "finished 712815_2024-05-20 MOs_probeE\n",
+      "finished 712815_2024-05-20 ACAv\n",
+      "finished 712815_2024-05-20 ACAd_probeF\n",
+      "finished 712815_2024-05-20 MOs_probeF\n",
+      "finished 712815_2024-05-20 all\n",
+      "finished 712815_2024-05-20\n",
+      "708016_2024-05-01_0 loaded\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "probe_insertions.json and annotation info do not match for 708016_2024-05-01 - using annotation info\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "no cached trials table, using npc_sessions\n",
+      "no cached units table, using npc_sessions\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "fetching units: 100%|█████████████████████████| 5/5 [04:56<00:00, 59.38s/device]\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n",
+      "c:\\Anaconda3\\envs\\dr_analysis\\Lib\\site-packages\\numcodecs\\abc.py:107: UserWarning: Multi-threading is supported for wavpack version>=5.6.4, but current version is 5.5.0. Parallel decoding will not be available.\n",
+      "  return cls(**config)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "finished 708016_2024-05-01 PO\n"
+     ]
+    }
+   ],
    "source": [
-    "#set savepath and filename\n",
-    "savepath=r'\\\\allen\\programs\\mindscope\\workgroups\\templeton\\TTOC\\decoding results\\new_annotations\\linear_shift_20_units'\n",
-    "filename='decoding_results_linear_shift_20_units.pkl'\n",
+    "#2 set savepath and filename\n",
+    "savepath=r'\\\\allen\\programs\\mindscope\\workgroups\\templeton\\TTOC\\decoding results\\new_annotations\\linear_shift_20_units_re_run'\n",
+    "filename='decoding_results_linear_shift_20_units_re_run.pkl'\n",
     "\n",
     "except_list={}\n",
     "\n",
-    "spikes_binsize=0.1\n",
-    "spikes_time_before=0.5\n",
-    "spikes_time_after=0.5\n",
+    "#3 set parameters\n",
+    "#linear shift decoding currently just takes the average firing rate over all bins defined here\n",
+    "spikes_binsize=0.2 #bin size in seconds\n",
+    "spikes_time_before=0.2 #time before the stimulus per trial\n",
+    "spikes_time_after=0.01 #time after the stimulus per trial\n",
     "\n",
-    "decoder_binsize=0.2\n",
-    "decoder_time_before=0.2\n",
-    "decoder_time_after=0.1\n",
+    "# #not used for linear shift decoding, were used in a previous iteration of decoding analysis\n",
+    "# decoder_binsize=0.2\n",
+    "# decoder_time_before=0.2\n",
+    "# decoder_time_after=0.1\n",
     "\n",
     "params={\n",
-    "    'n_units':20,\n",
-    "    'n_repeats':25, \n",
+    "    'n_units':20, #number of units to sample for each area \n",
+    "    'n_repeats':25, #number of times to repeat decoding with different randomly sampled units\n",
     "    'input_data_type':'spikes', #spikes or facemap\n",
     "    'vid_angle':'face', #behavior, face, eye\n",
-    "    'central_section':'4_blocks_plus',\n",
-    "    'exclude_cue_trials':False,\n",
-    "    'n_unit_threshold':20,\n",
-    "    'keep_n_SVDs':500,\n",
-    "    'spikes_binsize':spikes_binsize,\n",
+    "    'central_section':'4_blocks_plus', #for linear shift decoding, how many trials to use for the shift. '4_blocks_plus' is best\n",
+    "    'exclude_cue_trials':False, #option to totally exclude autorewarded trials\n",
+    "    'n_unit_threshold':20, #minimum number of units to include an area in the analysis\n",
+    "    'keep_n_SVDs':500, #number of SVD components to keep for facemap data\n",
+    "    'spikes_binsize':spikes_binsize, \n",
     "    'spikes_time_before':spikes_time_before,\n",
     "    'spikes_time_after':spikes_time_after,\n",
-    "    'decoder_binsize':decoder_binsize,\n",
-    "    'decoder_time_before':decoder_time_before,\n",
-    "    'decoder_time_after':decoder_time_after,\n",
+    "    # 'decoder_binsize':decoder_binsize,\n",
+    "    # 'decoder_time_before':decoder_time_before,\n",
+    "    # 'decoder_time_after':decoder_time_after,\n",
     "    'savepath':savepath,\n",
     "    'filename':filename,\n",
-    "    'use_structure_probe':True,\n",
-    "    'crossval':'5_fold',\n",
-    "    'labels_as_index':True,\n",
+    "    'use_structure_probe':True, #if True, appedn probe name to area name when multiple probes in the same area\n",
+    "    'crossval':'5_fold', #'5_fold' or 'blockwise' - blockwise untested with linear shift\n",
+    "    'labels_as_index':True, #convert labels (context names) to index [0,1]\n",
     "    'decoder_type':'linearSVC', # 'linearSVC' or 'LDA' or 'RandomForest' or 'LogisticRegression'\n",
     "}\n",
     "\n",
@@ -102,9 +277,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'712815_2024-05-22': \"ValueError('Input X contains NaN.\\\\nLinearSVC does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values')\"}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "except_list"
    ]