MunskyGroup · ericron · Jul 29, 2024 · Aug 2, 2024 · Aug 19, 2024 · Aug 19, 2024
diff --git a/notebooks/FISH_pipeline.ipynb b/notebooks/FISH_pipeline.ipynb
@@ -183,6 +183,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
+   "id": "a061b473",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -211,8 +212,8 @@
     "diameter_nucleus=100                         # Approximate nucleus size in pixels\n",
     "diameter_cytosol=200                         # Approximate cytosol size in pixels\n",
     "\n",
-    "psf_z=350                                    # Theoretical size of the PSF emitted by a [rna] spot in the z plan, in nanometers\n",
-    "psf_yx=160                                   # Theoretical size of the PSF emitted by a [rna] spot in the yx plan, in nanometers\n",
+    "mRNA_radius_z=350                                    # Theoretical size of the mRNA_radius emitted by a [rna] spot in the z plan, in nanometers\n",
+    "mRNA_radius_yx=160                                   # Theoretical size of the mRNA_radius emitted by a [rna] spot in the yx plan, in nanometers\n",
     "\n",
     "voxel_size_z=500                             # Microscope conversion px to nanometers in the z axis.\n",
     "voxel_size_yx=160                            # Microscope conversion px to nanometers in the xy axis.\n",
@@ -237,6 +238,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
+   "id": "2a2a6ac1",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -281,6 +283,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "72b35ad6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -336,7 +339,7 @@
    "source": [
     "dataframe_FISH,_,_,_,output_identification_string = fa.PipelineFISH(local_data_dir, channels_with_cytosol, channels_with_nucleus, channels_with_FISH,diameter_nucleus, \n",
     "                                       diameter_cytosol, minimum_spots_cluster, masks_dir=masks_dir,  voxel_size_z=voxel_size_z,\n",
-    "                                       voxel_size_yx=voxel_size_yx ,psf_z=psf_z,psf_yx=psf_yx, show_plots=show_plots,  \n",
+    "                                       voxel_size_yx=voxel_size_yx ,mRNA_radius_z=mRNA_radius_z,mRNA_radius_yx=mRNA_radius_yx, show_plots=show_plots,  \n",
     "                                       file_name_str=data_folder_path.name, \n",
     "                                       optimization_segmentation_method=optimization_segmentation_method,\n",
     "                                       save_all_images=save_all_images,\n",

diff --git a/publications/Ron_2024/Data_Interpretation/DataManagement.py b/publications/Ron_2024/Data_Interpretation/DataManagement.py
@@ -17,7 +17,7 @@
 
 
 class DataManagement:
-    def __init__(self, file_path, Condition, DexConc, Replica, time_value,time_TPL_value,output_path,minimum_spots_cluster=4,mandatory_substring=None,connect_to_NAS=False,path_to_config_file=None,save_csv=True):
+    def __init__(self, file_path, Condition, DexConc, Replica, time_value,time_TPL_value,output_path,minimum_spots_cluster=2,mandatory_substring=None,connect_to_NAS=False,path_to_config_file=None,save_csv=True):
         # This section downloads the zip file from NAS and extracts the dataframe or uses the dataframe from the local folder.
         if connect_to_NAS == False:
             self.file_path = file_path
@@ -67,8 +67,9 @@ def data_processor(self):
         RNA_nuc_list = []  
         RNA_cyto_list = []  
         ts_size_list = []
+        nuc_cluster_list = []
+        cyto_cluster_list = []
 
-        # This code will loop through each cell and store the values in the lists above.
         for i in range(number_cells):
             nuc_area = np.asarray(dataframe.loc[
                 (dataframe['cell_id'] == i)
@@ -97,19 +98,49 @@ def data_processor(self):
                     (dataframe['is_cell_fragmented'] != -1)
                 ].pseudo_cyto_int_ch_0.values[0])
 
-            #  This is counting the number of spots in the nucleus that meet certain conditions and storing this in the nuc variable.
-            nuc = np.asarray(len(dataframe.loc[
+            # Count the number of RNA in the nucleus
+            nuc_spots = len(dataframe.loc[
                 (dataframe['cell_id'] == i) &
                 (dataframe['is_nuc'] == True) &
                 (dataframe['is_cell_fragmented'] != -1)
-            ].spot_id))
+            ].spot_id)
 
-            #  This is counting the number of spots in the nucleus that meet certain conditions and storing this in the nuc variable.
-            cyto = np.asarray(len(dataframe.loc[
+            nuc_cluster_rna = dataframe.loc[
+                (dataframe['cell_id'] == i) &
+                (dataframe['is_nuc'] == True) &
+                (dataframe['is_cell_fragmented'] != -1)
+            ].cluster_size.sum()
+
+            nuc = np.asarray(nuc_spots + nuc_cluster_rna - 1)
+
+            number_nuc_cluster = len(dataframe.loc[
+                (dataframe['cell_id'] == i) &
+                (dataframe['is_cluster'] == True) &
+                (dataframe['is_nuc'] == True) &
+                (dataframe['is_cell_fragmented'] != -1)
+            ].spot_id)
+
+            # Count the number of RNA in the cytoplasm
+            cyto_spots = len(dataframe.loc[
                 (dataframe['cell_id'] == i) &
                 (dataframe['is_nuc'] == False) &
                 (dataframe['is_cell_fragmented'] != -1)
-            ].spot_id))
+            ].spot_id)
+
+            cyto_cluster_rna = dataframe.loc[
+                (dataframe['cell_id'] == i) &
+                (dataframe['is_nuc'] == False) &
+                (dataframe['is_cell_fragmented'] != -1)
+            ].cluster_size.sum()
+
+            cyto = np.asarray(cyto_spots + cyto_cluster_rna - 1)
+
+            cyto_cluster_number = len(dataframe.loc[
+                (dataframe['cell_id'] == i) &
+                (dataframe['is_cluster'] == True) &
+                (dataframe['is_nuc'] == False) &
+                (dataframe['is_cell_fragmented'] != -1)
+            ].spot_id)
 
             ####### This is counting all transcription sites for DUSP1 that are larger than "minimum_spots_cluster".
             ts_size = dataframe.loc[
@@ -141,6 +172,9 @@ def data_processor(self):
             RNA_nuc_list.append(nuc)
             RNA_cyto_list.append(cyto)
             ts_size_list.append(ts_size_array)
+            nuc_cluster_list.append(number_nuc_cluster)
+            cyto_cluster_list.append(cyto_cluster_number)
+
 
         # Create a pandas DataFrame from the list of ts_int values
         df_ts_size_per_cell = pd.DataFrame(ts_size_list) 
@@ -160,8 +194,10 @@ def data_processor(self):
             'Cyto_GR_avg_int': GR_avg_cyto_intensity_list,
             'Nuc_DUSP1_avg_int': DUSP1_avg_nuc_intensity_list, # Only relevant for condition == DUSP1_timesweep and DUSP1_TPL. NaNs for GR_timesweep.
             'Cyto_DUSP1_avg_int': DUSP1_avg_cyto_intensity_list, # Only relevant for condition == DUSP1_timesweep and DUSP1_TPL. NaNs for GR_timesweep.
-            'RNA_DUSP1_nuc': RNA_nuc_list,    # RNA_GR_cyto do we need also for DUSP1?
-            'RNA_DUSP1_cyto': RNA_cyto_list  # RNA_GR_cyto do we need also for DUSP1?
+            'RNA_DUSP1_nuc': RNA_nuc_list,    
+            'RNA_DUSP1_cyto': RNA_cyto_list,
+            'Nuc_cluster_number': nuc_cluster_list,
+            'Cyto_cluster_number': cyto_cluster_list
         }
         # Create a pandas DataFrame from the dictionary
         df_data = pd.DataFrame(data)

diff --git a/publications/Ron_2024/Data_Interpretation/Notebok_Data_Interpretation.ipynb b/publications/Ron_2024/Data_Interpretation/Notebok_Data_Interpretation.ipynb
@@ -103,7 +103,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def summarize_dataframes(list_dirs, Condition, time_list, DexConc_list, time_TPL_value_list=None, Replica='A', minimum_spots_cluster=5, mandatory_substring='nuc_100__cyto_200__psfz_350__psfyx_160', connect_to_NAS=True,save_csv=True):\n",
+    "def summarize_dataframes(list_dirs, Condition, time_list, DexConc_list, time_TPL_value_list=None, Replica='A', minimum_spots_cluster=2, mandatory_substring='nuc_100__cyto_200__psfz_350__psfyx_160', connect_to_NAS=True,save_csv=True):\n",
     "    \"\"\"\n",
     "    Summarizes the dataframes from multiple directories into a single concatenated dataframe.\n",
     "\n",
@@ -114,7 +114,7 @@
     "    - DexConc_list (list): List of DexConc values.\n",
     "    - time_TPL_value_list (list, optional): List of time_TPL values. Defaults to None.\n",
     "    - Replica (str, optional): Replica value. Defaults to 'A'.\n",
-    "    - minimum_spots_cluster (int, optional): Minimum number of spots in a cluster. Defaults to 5.\n",
+    "    - minimum_spots_cluster (int, optional): Minimum number of spots in a cluster. Defaults to 2.\n",
     "    - mandatory_substring (str, optional): Mandatory substring in the file path. Defaults to 'nuc_100__cyto_200__psfz_350__psfyx_160'.\n",
     "    - connect_to_NAS (bool, optional): Flag to connect to NAS. Defaults to True.\n",
     "    - save_csv (bool, optional): Flag to save the concatenated dataframe as a CSV file. Defaults to True.\n",
@@ -738,7 +738,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.8.15"
   }
  },
  "nbformat": 4,