diff --git a/docs/tutorials/generate_in_silico_data.ipynb b/docs/tutorials/generate_in_silico_data.ipynb index 26de65e..8e739f6 100644 --- a/docs/tutorials/generate_in_silico_data.ipynb +++ b/docs/tutorials/generate_in_silico_data.ipynb @@ -36,9 +36,9 @@ "\n", "The first step is to generate a gene population, or set of gene populations.\n", "A gene population is simply a class that stores a 1D tensor called `labels`.\n", - "`labels` is a boolean vector where 1 means the gene is part of the signal group\n", + "`labels` is a boolean vector where 1 means the gene is part of the bound group\n", "(a gene which is both bound and responsive to the TF) while 0 means the gene is\n", - "part of the background or noise group. The length of `labels` is the number of\n", + "part of the background or unbound group. The length of `labels` is the number of\n", "genes in the population, and the index should be considered the unique gene\n", "identifier. In other words, the indicies should never change." ] @@ -50,14 +50,14 @@ "outputs": [], "source": [ "n_genes = 1000\n", - "signal = [0.1, 0.15, 0.2, 0.25, 0.3]\n", + "bound = [0.1, 0.15, 0.2, 0.25, 0.3]\n", "n_sample = [1, 1, 2, 2, 4]\n", "\n", "# this will be a list of length 10 with a GenePopulation object in each element\n", "gene_populations_list = []\n", - "for signal_proportion, n_draws in zip(signal, n_sample):\n", + "for bound_proportion, n_draws in zip(bound, n_sample):\n", " for _ in range(n_draws):\n", - " gene_populations_list.append(generate_gene_population(n_genes, signal_proportion))\n" + " gene_populations_list.append(generate_gene_population(n_genes, bound_proportion))\n" ] }, { @@ -121,7 +121,7 @@ "source": [ "### Method 1: Generating perturbation data with no mean adjustment\n", "\n", - "If you don't pass in a value for `max_mean_adjustment` to `generate_perturbation_effects` it will default to zero, meaning the means of the perturbation effects will not be adjusted in any way and will all be equal to `signal_mean` (deault is 3.0) for bound TF-gene pairs and `noise_mean` (default is 0.0) for unbound TF-gene pairs." + "If you don't pass in a value for `max_mean_adjustment` to `generate_perturbation_effects` it will default to zero, meaning the means of the perturbation effects will not be adjusted in any way and will all be equal to `bound_mean` (deault is 3.0) for bound TF-gene pairs and `unbound_mean` (default is 0.0) for unbound TF-gene pairs." ] }, { @@ -150,7 +150,7 @@ "metadata": {}, "outputs": [], "source": [ - "# if you want to modify the default mean for bound genes, you can pass in the 'signal_mean' parameter\n", + "# if you want to modify the default mean for bound genes, you can pass in the 'bound_mean' parameter\n", "perturbation_effects_list_normal_mean_adjustment = generate_perturbation_effects(\n", " binding_data_tensor, \n", " max_mean_adjustment=10.0\n", @@ -260,7 +260,7 @@ "The final step is to assemble the data into a single tensor. Here is one way.\n", "The order of the matrix in the last dimension is:\n", "\n", - "1. signal/noise label\n", + "1. bound/unbound label\n", "1. binding effect\n", "1. binding pvalue\n", "1. perturbation effect\n", @@ -340,7 +340,7 @@ "\n", "Ensure that the generated data matches expectations.\n", "\n", - "### The signal/noise ratios should match exactly the initial signal ratio" + "### The bound/unbound ratios should match exactly the initial bound ratio" ] }, { @@ -352,7 +352,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "signal/nosie ratio is correct: True\n" + "bound/noise ratio is correct: True\n" ] } ], @@ -360,11 +360,11 @@ "tolerance = 1e-5\n", "are_equal = torch.isclose(\n", " torch.sum(final_data_tensor[:, :, 0] == 1, axis=0),\n", - " torch.tensor([val * n_genes for val, count in zip(signal, n_sample) for _ in range(count)],\n", + " torch.tensor([val * n_genes for val, count in zip(bound, n_sample) for _ in range(count)],\n", " dtype=torch.long),\n", " atol=tolerance)\n", "\n", - "print(f\"signal/nosie ratio is correct: {are_equal.all()}\")" + "print(f\"bound/noise ratio is correct: {are_equal.all()}\")" ] }, { @@ -385,26 +385,34 @@ "name": "stdout", "output_type": "stream", "text": [ - "The noise binding max is 13.157892227172852 and the min is 0.0\n", - "the noise min is 0.0\n", - "the noise mean is 0.3589712679386139 and the std is 1.1559306383132935\n", - "The signal binding max is 78.94734954833984 and the min is 0.1315789520740509\n", - "the signal min is 0.1315789520740509\n", - "the signal mean is 2.4840002059936523 and the std is 6.374814510345459\n" + "The unbound binding max is 13.157892227172852 and the min is 0.0\n", + "the unbound min is 0.0\n", + "the unbound mean is 0.3589712679386139 and the std is 1.1559306383132935\n", + "The bound binding max is 78.94734954833984 and the min is 0.1315789520740509\n", + "the bound min is 0.1315789520740509\n", + "the bound mean is 2.4840002059936523 and the std is 6.374814510345459\n" ] } ], "source": [ "labels = final_data_tensor[:, :, 0].flatten()\n", - "noise_binding = final_data_tensor[:, :, 1].flatten()[labels == 0]\n", - "signal_binding = final_data_tensor[:, :, 1].flatten()[labels == 1]\n", + "unbound_binding = final_data_tensor[:, :, 1].flatten()[labels == 0]\n", + "bound_binding = final_data_tensor[:, :, 1].flatten()[labels == 1]\n", "\n", - "print(f\"The noise binding max is {noise_binding.max()} and the min is {noise_binding.min()}\")\n", - "print(f\"the noise min is {noise_binding.min()}\")\n", - "print(f\"the noise mean is {noise_binding.mean()} and the std is {noise_binding.std()}\")\n", - "print(f\"The signal binding max is {signal_binding.max()} and the min is {signal_binding.min()}\")\n", - "print(f\"the signal min is {signal_binding.min()}\")\n", - "print(f\"the signal mean is {signal_binding.mean()} and the std is {signal_binding.std()}\")" + "print(f\"The unbound binding max is {unbound_binding.max()} and the min is {unbound_binding.min()}\")\n", + "print(f\"the unbound min is {unbound_binding.min()}\")\n", + "print(f\"the unbound mean is {unbound_binding.mean()} and the std is {unbound_binding.std()}\")\n", + "print(f\"The bound binding max is {bound_binding.max()} and the min is {bound_binding.min()}\")\n", + "print(f\"the bound min is {bound_binding.min()}\")\n", + "print(f\"the bound mean is {bound_binding.mean()} and the std is {bound_binding.std()}\")\n", + "\n", + "#this is the output before EJ change to adjustment mean\n", + "# The unbound binding max is 13.157892227172852 and the min is 0.0\n", + "# the unbound min is 0.0\n", + "# the unbound mean is 0.3589712679386139 and the std is 1.1559306383132935\n", + "# The bound binding max is 78.94734954833984 and the min is 0.1315789520740509\n", + "# the bound min is 0.1315789520740509\n", + "# the bound mean is 2.4840002059936523 and the std is 6.374814510345459" ] }, { @@ -414,7 +422,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -427,8 +435,8 @@ "\n", "# Plotting\n", "plt.figure(figsize=(10, 6))\n", - "plt.hist(noise_binding, bins=30, alpha=0.5, label='Label 0', color='orange')\n", - "plt.hist(signal_binding, bins=30, alpha=0.5, label='Label 1', color='blue')\n", + "plt.hist(unbound_binding, bins=30, alpha=0.5, label='Label 0', color='orange')\n", + "plt.hist(bound_binding, bins=30, alpha=0.5, label='Label 1', color='blue')\n", "plt.xlim(0,5)\n", "plt.title('Histogram of Values in the 2nd Column')\n", "plt.xlabel('Values')\n", @@ -453,25 +461,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "The noise binding max is 3.423511505126953 and the min is -3.506139039993286\n", - "the noise min is -3.506139039993286\n", - "the noise mean is 0.010617653839290142 and the std is 0.988001823425293\n", - "The signal binding max is 6.107701301574707 and the min is -6.406703948974609\n", - "the signal min is -6.406703948974609\n", - "the signal mean is -0.011303802020847797 and the std is 3.136451482772827\n" + "The unbound binding max is 3.423511505126953 and the min is -3.506139039993286\n", + "the unbound min is -3.506139039993286\n", + "the unbound mean is 0.010617653839290142 and the std is 0.988001823425293\n", + "The bound binding max is 6.107701301574707 and the min is -6.406703948974609\n", + "the bound min is -6.406703948974609\n", + "the bound mean is -0.011303802020847797 and the std is 3.136451482772827\n" ] } ], "source": [ - "noise_perturbation = final_data_tensor[:, :, 3].flatten()[labels == 0]\n", - "signal_perturbation = final_data_tensor[:, :, 3].flatten()[labels == 1]\n", + "unbound_perturbation = final_data_tensor[:, :, 3].flatten()[labels == 0]\n", + "bound_perturbation = final_data_tensor[:, :, 3].flatten()[labels == 1]\n", + "\n", + "print(f\"The unbound binding max is {unbound_perturbation.max()} and the min is {unbound_perturbation.min()}\")\n", + "print(f\"the unbound min is {unbound_perturbation.min()}\")\n", + "print(f\"the unbound mean is {unbound_perturbation.mean()} and the std is {unbound_perturbation.std()}\")\n", + "print(f\"The bound binding max is {bound_perturbation.max()} and the min is {bound_perturbation.min()}\")\n", + "print(f\"the bound min is {bound_perturbation.min()}\")\n", + "print(f\"the bound mean is {bound_perturbation.mean()} and the std is {bound_perturbation.std()}\")\n", "\n", - "print(f\"The noise binding max is {noise_perturbation.max()} and the min is {noise_perturbation.min()}\")\n", - "print(f\"the noise min is {noise_perturbation.min()}\")\n", - "print(f\"the noise mean is {noise_perturbation.mean()} and the std is {noise_perturbation.std()}\")\n", - "print(f\"The signal binding max is {signal_perturbation.max()} and the min is {signal_perturbation.min()}\")\n", - "print(f\"the signal min is {signal_perturbation.min()}\")\n", - "print(f\"the signal mean is {signal_perturbation.mean()} and the std is {signal_perturbation.std()}\")" + "#pre change data\n", + "# The unbound binding max is 3.423511505126953 and the min is -3.506139039993286\n", + "# the unbound min is -3.506139039993286\n", + "# the unbound mean is 0.010617653839290142 and the std is 0.988001823425293\n", + "# The bound binding max is 6.107701301574707 and the min is -6.406703948974609\n", + "# the bound min is -6.406703948974609\n", + "# the bound mean is -0.011303802020847797 and the std is 3.136451482772827" ] }, { @@ -481,7 +497,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIjCAYAAAAJLyrXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAABTp0lEQVR4nO3deVhV5f7+8XszCwqICmjirKk540RmOZA4Hk3LzAmNMg1NJTU5leKcmkPOdjLUU6Z5yupYDjibkqampuZUKk6gZYDgkXH//vDH/rbDgYXgBn2/rmtfx/2sZ631efbadLh51mAym81mAQAAAAByzM7WBQAAAABAYUOQAgAAAACDCFIAAAAAYBBBCgAAAAAMIkgBAAAAgEEEKQAAAAAwiCAFAAAAAAYRpAAAAADAIIIUAAAAABhEkAJQaFWoUEH9+vWzdRkPvenTp6tSpUqyt7dXvXr1Hth+ly5dKpPJpLNnzz6wfRphMpkUERGR7/vZtm2bTCaT/vOf/+T7vh6Us2fPymQyaenSpTbZf79+/VShQgWb7BvAw4MgBaBAyPqled++fbdd3qJFC9WqVeu+9/Pdd989kF9+HxYbN27UqFGj1KxZM0VGRmry5MnZ+qSlpalkyZJ66qmn7rgds9ksPz8/NWjQID/LLdRWrFih2bNn22z/58+f17hx49S4cWMVL15cJUuWVIsWLbRp0yab1ZQlLi5OI0aMUPXq1eXq6io3Nzf5+/tr4sSJio+Pt3V5AB5RDrYuAABy68SJE7KzM/b3oO+++07z588nTOXQli1bZGdnpyVLlsjJyem2fRwdHfXCCy9o8eLFOnfunMqXL5+tz44dO3ThwgUNHz48v0t+YP73v//JwSHv/m90xYoVOnLkiIYNG5Zn2zTi66+/1tSpU9WlSxcFBwcrPT1dy5cv17PPPquPP/5Y/fv3t0ldP/74o9q3b6+kpCT17t1b/v7+kqR9+/bpvffe044dO7Rx40ab1Abg0UaQAlBoOTs727oEw5KTk+Xm5mbrMnLsypUrKlKkyB1DVJZevXpp0aJF+uyzzzR69Ohsy1esWCE7Ozv16NEjv0p94FxcXGxdQp5q2bKlYmJiVLJkSUvbwIEDVa9ePY0ZM8YmQSo+Pl7PPfec7O3t9dNPP6l69epWyydNmqR//etfD7wuAJA4tQ9AIfb3a6TS0tI0btw4Va1aVS4uLipRooSeeuopRUVFSbp1XcT8+fMl3bq+JeuVJTk5WW+++ab8/Pzk7Oysxx9/XO+//77MZrPVfv/3v//pjTfeUMmSJVWsWDH94x//0MWLF7NdMxMRESGTyaRjx46pZ8+eKl68uOX0t8OHD6tfv36qVKmSXFxc5Ovrq5dffll//PGH1b6ytnHy5En17t1bHh4eKlWqlN59912ZzWadP39enTt3lru7u3x9fTVjxowcfXbp6emaMGGCKleuLGdnZ1WoUEH//Oc/lZKSYuljMpkUGRmp5ORky2d1p2tamjVrpgoVKmjFihXZlqWlpek///mPWrZsqTJlyuR47Ldzp+uSbne9XHx8vIYNG2Y5nlWqVNHUqVOVmZlp1W/lypXy9/dXsWLF5O7urtq1a+uDDz4wXEvWsTp9+rT69esnT09PeXh4qH///rpx48Zdt9WiRQt9++23OnfunOWz/vs1PJmZmZo0aZLKli0rFxcXtW7dWqdPn862rT179qht27by8PCQq6urnnnmGe3ateue43niiSesQpR0648V7du314ULF3T9+nVLe79+/VS0aFFdvHhRXbp0UdGiRVWqVCmNGDFCGRkZVtuIj49Xv3795OHhIU9PTwUHB+f4dLzFixfr4sWLmjlzZrYQJUk+Pj565513rNoWLFigJ554Qs7OzipTpoxCQ0Pvub+s69C2bdtm1X67a7myxh4TE6OOHTuqaNGieuyxxyz/bfn555/VqlUrubm5qXz58tl+JrJOY961a5fCwsJUqlQpubm56bnnntPVq1dz9LkAKBiYkQJQoCQkJOj333/P1p6WlnbPdSMiIjRlyhS98soraty4sRITE7Vv3z4dOHBAzz77rF577TVdunRJUVFR+ve//221rtls1j/+8Q9t3bpVISEhqlevnjZs2KCRI0fq4sWLmjVrlqVvv3799Pnnn6tPnz5q2rSptm/frg4dOtyxrhdeeEFVq1bV5MmTLaEsKipKv/32m/r37y9fX18dPXpUH374oY4ePaoffvjBKuBJ0osvvqgaNWrovffe07fffquJEyfKy8tLixcvVqtWrTR16lR9+umnGjFihBo1aqSnn376rp/VK6+8omXLlun555/Xm2++qT179mjKlCn65ZdftGbNGknSv//9b3344Yfau3evPvroI0nSk08+edvtmUwm9ezZU5MnT9bRo0f1xBNPWJatX79e165dU69evXI19ty4ceOGnnnmGV28eFGvvfaaypUrp927dys8PFyXL1+2XIsUFRWll156Sa1bt9bUqVMlSb/88ot27dqloUOH5mrf3bt3V8WKFTVlyhQdOHBAH330kby9vS3bv523335bCQkJunDhguW7VrRoUas+7733nuzs7DRixAglJCRo2rRp6tWrl/bs2WPps2XLFrVr107+/v4aO3as7OzsFBkZqVatWmnnzp1q3Lix4fHExsbK1dVVrq6uVu0ZGRkKCgpSkyZN9P7772vTpk2aMWOGKleurEGDBkm69XPVuXNnff/99xo4cKBq1KihNWvWKDg4OEf7/uabb1SkSBE9//zzOeofERGhcePGKTAwUIMGDdKJEye0cOFC/fjjj9q1a5ccHR2NDf4OMjIy1K5dOz399NOaNm2aPv30Uw0ePFhubm56++231atXL3Xt2lWLFi1S3759FRAQoIoVK1ptY8iQISpevLjGjh2rs2fPavbs2Ro8eLBWrVqVJzUCeADMAFAAREZGmiXd9fXEE09YrVO+fHlzcHCw5X3dunXNHTp0uOt+QkNDzbf7T99XX31llmSeOHGiVfvzzz9vNplM5tOnT5vNZrN5//79ZknmYcOGWfXr16+fWZJ57NixlraxY8eaJZlfeumlbPu7ceNGtrbPPvvMLMm8Y8eObNsYMGCApS09Pd1ctmxZs8lkMr/33nuW9j///NNcpEgRq8/kdg4ePGiWZH7llVes2keMGGGWZN6yZYulLTg42Ozm5nbX7WU5evSoWZI5PDzcqr1Hjx5mFxcXc0JCgtlszvnYs74TZ86csbT9/TPO8vfvwoQJE8xubm7mkydPWvUbPXq02d7e3hwTE2M2m83moUOHmt3d3c3p6ek5GuNf3el4v/zyy1b9nnvuOXOJEiXuub0OHTqYy5cvn61969atZknmGjVqmFNSUiztH3zwgVmS+eeffzabzWZzZmamuWrVquagoCBzZmampd+NGzfMFStWND/77LMGR2g2nzp1yuzi4mLu06ePVXtwcLBZknn8+PFW7fXr1zf7+/tb3mf9XE2bNs3Slp6ebm7evLlZkjkyMvKu+y9evLi5bt26Oar1ypUrZicnJ3ObNm3MGRkZlvZ58+aZJZk//vhjq/r/+llnfcZbt2612uaZM2ey1Zk19smTJ1vasn72TCaTeeXKlZb248ePZ/ueZH2vAwMDrY7T8OHDzfb29ub4+PgcjReA7XFqH4ACZf78+YqKisr2qlOnzj3X9fT01NGjR3Xq1CnD+/3uu+9kb2+vN954w6r9zTfflNls1rp16yTdml2RpNdff92q35AhQ+647YEDB2ZrK1KkiOXfN2/e1O+//66mTZtKkg4cOJCt/yuvvGL5t729vRo2bCiz2ayQkBBLu6enpx5//HH99ttvd6xFujVWSQoLC7Nqf/PNNyVJ33777V3Xv5OaNWuqfv36WrlypaUtOTlZ33zzjTp27Ch3d3dJxseeG6tXr1bz5s1VvHhx/f7775ZXYGCgMjIytGPHDkm3PrPk5GTL6Z954e/Hu3nz5vrjjz+UmJh4X9vt37+/1bVqzZs3lyTL8T548KBOnTqlnj176o8//rCMOTk5Wa1bt9aOHTuyndZ4Nzdu3NALL7ygIkWK6L333rttn9uN9a/fv++++04ODg6WGSrp1vf3bj8vf5WYmKhixYrlqO+mTZuUmpqqYcOGWd2E5tVXX5W7u3uuv9d38tefyayfPTc3N3Xv3t3S/vjjj8vT0/O2P5MDBgywmn1t3ry5MjIydO7cuTytE0D+4dQ+AAVK48aN1bBhw2ztWb8Q38348ePVuXNnVatWTbVq1VLbtm3Vp0+fHIWwc+fOqUyZMtl+aatRo4Zledb/2tnZZTtNp0qVKnfc9t/7StK1a9c0btw4rVy5UleuXLFalpCQkK1/uXLlrN57eHjIxcUl2zUtHh4e97zWKGsMf6/Z19dXnp6e9/WLXK9evTRixAjt3r1bTz75pL766ivduHHDclqfZHzsuXHq1CkdPnxYpUqVuu3yrP2+/vrr+vzzz9WuXTs99thjatOmjbp37662bdvmet9/P1bFixeXJP3555+WMJnX25Vk+QPC3U6bS0hIsKx3NxkZGerRo4eOHTumdevWqUyZMtn6uLi4ZPt8ixcvbqlHuvVdK126dLbTFB9//PF71iBJ7u7uVtdm3U3W9/bv23ZyclKlSpXyNKDcbuweHh4qW7ZstlNTPTw8rD6TLPc6ngAKPoIUgIfG008/rV9//VVff/21Nm7cqI8++kizZs3SokWLrP56/KD9dQYmS/fu3bV7926NHDlS9erVU9GiRZWZmam2bdvedtbA3t4+R22Sst0c407y4lqkv3vppZc0atQorVixQk8++aRWrFih4sWLq3379pY+RseeE3+/wUFmZqaeffZZjRo16rb9q1WrJkny9vbWwYMHtWHDBq1bt07r1q1TZGSk+vbtq2XLluWqlvs9LrndbtZnN3369Ds+OPnvgeZOXn31Va1du1affvqpWrVqZaievFS9enUdPHhQqamp97xz5P2408/C379XWe40diPHPr++JwAeHIIUgIeKl5eX+vfvr/79+yspKUlPP/20IiIiLEHqTr8wlS9fXps2bdL169etZqWOHz9uWZ71v5mZmTpz5oyqVq1q6Xe7u6fdyZ9//qnNmzdr3LhxGjNmjKU9N6ck5kbWGE6dOmWZcZNuPfQ0Pj7+ts+ByqkyZcqoZcuWWr16td59911FRUWpX79+ll+C73fsxYsXz3YHttTUVF2+fNmqrXLlykpKSlJgYOA9t+nk5KROnTqpU6dOyszM1Ouvv67Fixfr3XffvetMY16732BbuXJlSbdmcXIy7jsZOXKkIiMjNXv2bL300kv3VVP58uW1efNmJSUlWYW4EydO5Gj9Tp06KTo6Wl988cU9a8n63p44cUKVKlWytKempurMmTN3/UyyZoP+/t3iNDsAd8M1UgAeGn8/pa1o0aKqUqWK1S29s57h9PdfmNq3b6+MjAzNmzfPqn3WrFkymUxq166dJCkoKEjSrVss/9XcuXNzXGfWX6L//pfnrDvJ5bes2aG/72/mzJmSdNc7EOZEr169dOXKFb322mtKS0uzOq3vfsdeuXJly/VNWT788MNsMwfdu3dXdHS0NmzYkG0b8fHxSk9Pl5T9O2NnZ2c5FfSv35sHwc3N7b5ObfT391flypX1/vvvKykpKdvynNxae/r06Xr//ff1z3/+M9d3Lfyr9u3bKz09XQsXLrS0ZWRk5PjnZeDAgSpdurTefPNNnTx5MtvyK1euaOLEiZKkwMBAOTk5ac6cOVbfryVLlighIeGu3+vy5cvL3t4+23fr7z/nAPBXzEgBeGjUrFlTLVq0kL+/v7y8vLRv3z795z//0eDBgy19/P39JUlvvPGGgoKCZG9vrx49eqhTp05q2bKl3n77bZ09e1Z169bVxo0b9fXXX2vYsGGWv/b7+/urW7dumj17tv744w/L7c+zfsnLyayCu7u75bbJaWlpeuyxx7Rx40adOXMmHz6V7OrWravg4GB9+OGHio+P1zPPPKO9e/dq2bJl6tKli1q2bHlf2+/WrZtef/11ff311/Lz87O6Ffv9jv2VV17RwIED1a1bNz377LM6dOiQNmzYkO1asZEjR1puctGvXz/5+/srOTlZP//8s/7zn//o7NmzKlmypF555RVdu3ZNrVq1UtmyZXXu3DnNnTtX9erVs5qtexD8/f21atUqhYWFqVGjRipatKg6deqU4/Xt7Oz00UcfqV27dnriiSfUv39/PfbYY7p48aK2bt0qd3d3/fe//73j+mvWrNGoUaNUtWpV1ahRQ5988onV8meffVY+Pj6GxtSpUyc1a9ZMo0eP1tmzZ1WzZk19+eWXOQ6MxYsX15o1a9S+fXvVq1dPvXv3tvwMHzhwQJ999pkCAgIkSaVKlVJ4eLjGjRuntm3b6h//+IdOnDihBQsWqFGjRurdu/cd9+Ph4aEXXnhBc+fOlclkUuXKlbV27dps1/ABwF8RpAA8NN544w1988032rhxo1JSUlS+fHlNnDhRI0eOtPTp2rWrhgwZopUrV+qTTz6R2WxWjx49ZGdnp2+++UZjxozRqlWrFBkZqQoVKmj69OmWu9llWb58uXx9ffXZZ59pzZo1CgwM1KpVq/T444/LxcUlR7WuWLFCQ4YM0fz582U2m9WmTZs7XtSfHz766CNVqlRJS5cu1Zo1a+Tr66vw8HCNHTv2vrft7u6uTp06afXq1XrppZeyhcv7Gfurr76qM2fOaMmSJVq/fr2aN2+uqKgotW7d2qqfq6urtm/frsmTJ2v16tVavny53N3dVa1aNY0bN04eHh6SpN69e+vDDz/UggULFB8fL19fX7344ouKiIiwuvPbg/D666/r4MGDioyM1KxZs1S+fHlDQUq69WDf6OhoTZgwQfPmzVNSUpJ8fX3VpEkTvfbaa3dd99ChQ5JunWbZp0+fbMu3bt1qOEhl/VwNGzZMn3zyiUwmk/7xj39oxowZql+/fo620aRJEx05ckTTp0/Xt99+q3//+9+ys7NTjRo1NHr0aKs/lERERKhUqVKaN2+ehg8fLi8vLw0YMECTJ0++5zOk5s6dq7S0NC1atEjOzs7q3r27pk+frlq1ahkaM4BHh8nMVY0AcN8OHjyo+vXr65NPPrE6lQ0AADycuEYKAAz63//+l61t9uzZsrOzszqNDQAAPLw4tQ8ADJo2bZr279+vli1bysHBwXLb7AEDBsjPz8/W5QEAgAeAU/sAwKCoqCiNGzdOx44dU1JSksqVK6c+ffro7bffloMDf58CAOBRQJACAAAAAIO4RgoAAAAADCJIAQAAAIBBnMwvKTMzU5cuXVKxYsVy9DBNAAAAAA8ns9ms69evq0yZMnd9piBBStKlS5e40xYAAAAAi/Pnz6ts2bJ3XE6QklSsWDFJtz4sd3d3G1cDAAAAwFYSExPl5+dnyQh3QpCSLKfzubu7E6QAAAAA3POSH242AQAAAAAGEaQAAAAAwCCCFAAAAAAYxDVSAAAAwANgNpuVnp6ujIwMW5fySLO3t5eDg8N9P/aIIAUAAADks9TUVF2+fFk3btywdSmQ5OrqqtKlS8vJySnX2yBIAQAAAPkoMzNTZ86ckb29vcqUKSMnJ6f7ng1B7pjNZqWmpurq1as6c+aMqlateteH7t4NQQoAAADIR6mpqcrMzJSfn59cXV1tXc4jr0iRInJ0dNS5c+eUmpoqFxeXXG2Hm00AAAAAD0BuZz6Q9/LiWHA0AQAAAMAgghQAAAAAGMQ1UgAAAIAtHI54sPur82D3t3TpUg0bNkzx8fH3tR2TyaQ1a9aoS5cueVJXXmFGCgAAAEA2/fr1K3Dh5U7mz5+vChUqyMXFRU2aNNHevXvzfZ8EKQAAAACF1qpVqxQWFqaxY8fqwIEDqlu3roKCgnTlypV83S9BCgAAAIBhM2fOVO3ateXm5iY/Pz+9/vrrSkpKytbvq6++UtWqVeXi4qKgoCCdP3/eavnXX3+tBg0ayMXFRZUqVdK4ceOUnp5uqI5XX31V/fv3V82aNbVo0SK5urrq448/vu8x3g1BCgAAAIBhdnZ2mjNnjo4ePaply5Zpy5YtGjVqlFWfGzduaNKkSVq+fLl27dql+Ph49ejRw7J8586d6tu3r4YOHapjx45p8eLFWrp0qSZNmpSjGlJTU7V//34FBgZa1RUYGKjo6Oi8GegdEKQAAAAAGDZs2DC1bNlSFSpUUKtWrTRx4kR9/vnnVn3S0tI0b948BQQEyN/fX8uWLdPu3bst1zCNGzdOo0ePVnBwsCpVqqRnn31WEyZM0OLFi3NUw++//66MjAz5+PhYtfv4+Cg2NjZvBnoH3LUPAAAAgGGbNm3SlClTdPz4cSUmJio9PV03b97UjRs35OrqKklycHBQo0aNLOtUr15dnp6e+uWXX9S4cWMdOnRIu3btspqBysjIyLadgoggBQAAAMCQs2fPqmPHjho0aJAmTZokLy8vff/99woJCVFqamqOA1BSUpLGjRunrl27Zlvm4uJyz/VLliwpe3t7xcXFWbXHxcXJ19c3Z4PJJU7tAwAAAGDI/v37lZmZqRkzZqhp06aqVq2aLl26lK1fenq69u3bZ3l/4sQJxcfHq0aNGpKkBg0a6MSJE6pSpUq2l53dvaOKk5OT/P39tXnzZktbZmamNm/erICAgDwY6Z0xIwUAeLQcjjC+zgN+iCUAFBQJCQk6ePCgVVuJEiVUpUoVpaWlae7cuerUqZN27dqlRYsWZVvf0dFRQ4YM0Zw5c+Tg4KDBgweradOmaty4sSRpzJgx6tixo8qVK6fnn39ednZ2OnTokI4cOaKJEyfmqMawsDAFBwerYcOGaty4sWbPnq3k5GT179//vsd/NwQpAAAAwBYKwR9ptm3bpvr161u1hYSE6KOPPtLMmTM1depUhYeH6+mnn9aUKVPUt29fq76urq5666231LNnT128eFHNmzfXkiVLLMuDgoK0du1ajR8/XlOnTpWjo6OqV6+uV155Jcc1vvjii7p69arGjBmj2NhY1atXT+vXr892A4q8ZjKbzeZ83UMhkJiYKA8PDyUkJMjd3d3W5QAA8tPhCOPrFIJfdgAUXDdv3tSZM2dUsWLFHF33g/x3t2OS02zANVIAAAAAYBBBCgAAAAAMIkgBAAAAgEEEKQAAAAAwyKZBqkKFCjKZTNleoaGhkm5dBBYaGqoSJUqoaNGi6tatW7aHbcXExKhDhw5ydXWVt7e3Ro4cqfT0dFsMBwAAAMAjwqZB6scff9Tly5ctr6ioKEnSCy+8IEkaPny4/vvf/2r16tXavn27Ll26ZPXU44yMDHXo0EGpqanavXu3li1bpqVLl2rMmDE2GQ8AAACAR4NNg1SpUqXk6+trea1du1aVK1fWM888o4SEBC1ZskQzZ85Uq1at5O/vr8jISO3evVs//PCDJGnjxo06duyYPvnkE9WrV0/t2rXThAkTNH/+fKWmptpyaAAAAAAeYgXmGqnU1FR98sknevnll2UymbR//36lpaUpMDDQ0qd69eoqV66coqOjJUnR0dGqXbu21cO2goKClJiYqKNHj95xXykpKUpMTLR6AQAAAEBOFZgg9dVXXyk+Pl79+vWTJMXGxsrJyUmenp5W/Xx8fBQbG2vp8/cnFme9z+pzO1OmTJGHh4fl5efnl3cDAQAAAPDQc7B1AVmWLFmidu3aqUyZMvm+r/DwcIWFhVneJyYmEqYAAADwQEVEPNz7W7p0qYYNG6b4+Pj72o7JZNKaNWvUpUuXPKkrrxSIGalz585p06ZNeuWVVyxtvr6+Sk1NzfbBx8XFydfX19Ln73fxy3qf1ed2nJ2d5e7ubvUCAAAA8H/69etX4MLL7ezYsUOdOnVSmTJlZDKZ9NVXXz2Q/RaIIBUZGSlvb2916NDB0ubv7y9HR0dt3rzZ0nbixAnFxMQoICBAkhQQEKCff/5ZV65csfSJioqSu7u7atas+eAGAAAAAMAmkpOTVbduXc2fP/+B7tfmQSozM1ORkZEKDg6Wg8P/nWno4eGhkJAQhYWFaevWrdq/f7/69++vgIAANW3aVJLUpk0b1axZU3369NGhQ4e0YcMGvfPOOwoNDZWzs7OthgQAAAA89GbOnKnatWvLzc1Nfn5+ev3115WUlJSt31dffaWqVavKxcVFQUFBOn/+vNXyr7/+Wg0aNJCLi4sqVaqkcePGGXoubLt27TRx4kQ999xz9z0mI2wepDZt2qSYmBi9/PLL2ZbNmjVLHTt2VLdu3fT000/L19dXX375pWW5vb291q5dK3t7ewUEBKh3797q27evxo8f/yCHAAAAADxy7OzsNGfOHB09elTLli3Tli1bNGrUKKs+N27c0KRJk7R8+XLt2rVL8fHx6tGjh2X5zp071bdvXw0dOlTHjh3T4sWLtXTpUk2aNOlBD8cwm99sok2bNjKbzbdd5uLiovnz5991mq58+fL67rvv8qs8AAAAALcxbNgwy78rVKigiRMnauDAgVqwYIGlPS0tTfPmzVOTJk0kScuWLVONGjW0d+9eNW7cWOPGjdPo0aMVHBwsSapUqZImTJigUaNGaezYsQ90PEbZPEgBAAAAKHw2bdqkKVOm6Pjx40pMTFR6erpu3rypGzduyNXVVZLk4OCgRo0aWdapXr26PD099csvv6hx48Y6dOiQdu3aZTUDlZGRkW07BRFBCgAAAIAhZ8+eVceOHTVo0CBNmjRJXl5e+v777xUSEqLU1NQcB6CkpCSNGzdOXbt2zbbMxcUlr8vOUwQpAAAAAIbs379fmZmZmjFjhuzsbt124fPPP8/WLz09Xfv27VPjxo0l3boLd3x8vGrUqCFJatCggU6cOKEqVao8uOLzCEEKAAAAwG0lJCTo4MGDVm0lSpRQlSpVlJaWprlz56pTp07atWuXFi1alG19R0dHDRkyRHPmzJGDg4MGDx6spk2bWoLVmDFj1LFjR5UrV07PP/+87OzsdOjQIR05ckQTJ07MUY1JSUk6ffq05f2ZM2d08OBBeXl5qVy5crkf/D0QpAAAyA+HI4yvUycX6wAotCIibF3BvW3btk3169e3agsJCdFHH32kmTNnaurUqQoPD9fTTz+tKVOmqG/fvlZ9XV1d9dZbb6lnz566ePGimjdvriVLlliWBwUFae3atRo/frymTp0qR0dHVa9eXa+88kqOa9y3b59atmxpeR8WFiZJCg4O1tKlS3Mx6pwxme90y7xHSGJiojw8PJSQkCB3d3dblwMAyE+HI4yvk5uA86D2A6DAu3nzps6cOaOKFSsW+Ot+HhV3OyY5zQY2f44UAAAAABQ2BCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAPAAcI+3giMvjgVBCgAAAMhHjo6OkqQbN27YuBJkyToWWccmN3iOFAAAAJCP7O3t5enpqStXrki69Wwlk8lk46oeTWazWTdu3NCVK1fk6ekpe3v7XG+LIAUAAADkM19fX0myhCnYlqenp+WY5BZBCgBQeB2OsHUFAJAjJpNJpUuXlre3t9LS0mxdziPN0dHxvmaishCkAAAAgAfE3t4+T36Jh+1xswkAAAAAMIgZKQAA7uVwhK0rAAAUMMxIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABhGkAAAAAMAgB1sXAAAA/r/DEcbXqZOLdQAA940ZKQAAAAAwiCAFAAAAAAYRpAAAAADAIIIUAAAAABhEkAIAAAAAgwhSAAAAAGAQQQoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABhGkAAAAAMAgmwepixcvqnfv3ipRooSKFCmi2rVra9++fZblZrNZY8aMUenSpVWkSBEFBgbq1KlTVtu4du2aevXqJXd3d3l6eiokJERJSUkPeigAAAAAHhE2DVJ//vmnmjVrJkdHR61bt07Hjh3TjBkzVLx4cUufadOmac6cOVq0aJH27NkjNzc3BQUF6ebNm5Y+vXr10tGjRxUVFaW1a9dqx44dGjBggC2GBAAAAOARYDKbzWZb7Xz06NHatWuXdu7cedvlZrNZZcqU0ZtvvqkRI0ZIkhISEuTj46OlS5eqR48e+uWXX1SzZk39+OOPatiwoSRp/fr1at++vS5cuKAyZcrcs47ExER5eHgoISFB7u7ueTdAAED+Ohxh6wpsr06ErSsAgIdKTrOBTWekvvnmGzVs2FAvvPCCvL29Vb9+ff3rX/+yLD9z5oxiY2MVGBhoafPw8FCTJk0UHR0tSYqOjpanp6clRElSYGCg7OzstGfPntvuNyUlRYmJiVYvAAAAAMgpmwap3377TQsXLlTVqlW1YcMGDRo0SG+88YaWLVsmSYqNjZUk+fj4WK3n4+NjWRYbGytvb2+r5Q4ODvLy8rL0+bspU6bIw8PD8vLz88vroQEAAAB4iDnYcueZmZlq2LChJk+eLEmqX7++jhw5okWLFik4ODjf9hseHq6wsDDL+8TERMIUANja4QhbVwAAQI7ZdEaqdOnSqlmzplVbjRo1FBMTI0ny9fWVJMXFxVn1iYuLsyzz9fXVlStXrJanp6fr2rVrlj5/5+zsLHd3d6sXAAAAAOSUTYNUs2bNdOLECau2kydPqnz58pKkihUrytfXV5s3b7YsT0xM1J49exQQECBJCggIUHx8vPbv32/ps2XLFmVmZqpJkyYPYBQAAAAAHjU2PbVv+PDhevLJJzV58mR1795de/fu1YcffqgPP/xQkmQymTRs2DBNnDhRVatWVcWKFfXuu++qTJky6tKli6RbM1ht27bVq6++qkWLFiktLU2DBw9Wjx49cnTHPgAAAAAwyqZBqlGjRlqzZo3Cw8M1fvx4VaxYUbNnz1avXr0sfUaNGqXk5GQNGDBA8fHxeuqpp7R+/Xq5uLhY+nz66acaPHiwWrduLTs7O3Xr1k1z5syxxZAAAAAAPAJs+hypgoLnSAFAAXA4wtYVFE48RwoA8lSheI4UAAAAABRGBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABhGkAAAAAMAgghQAAAAAGESQAgAAAACDCFIAAAAAYBBBCgAAAAAMIkgBAAAAgEEEKQAAAAAwiCAFAAAAAAYRpAAAAADAIIIUAAAAABhEkAIAAAAAgwhSAAAAAGAQQQoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABjnYugAAAHAfDkcYX6dOLtYBAFhhRgoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAxysHUBAICH0OEIW1cAAEC+YkYKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABtk0SEVERMhkMlm9qlevbll+8+ZNhYaGqkSJEipatKi6deumuLg4q23ExMSoQ4cOcnV1lbe3t0aOHKn09PQHPRQAAAAAjxCb37XviSee0KZNmyzvHRz+r6Thw4fr22+/1erVq+Xh4aHBgwera9eu2rVrlyQpIyNDHTp0kK+vr3bv3q3Lly+rb9++cnR01OTJkx/4WAAAAAA8GmwepBwcHOTr65utPSEhQUuWLNGKFSvUqlUrSVJkZKRq1KihH374QU2bNtXGjRt17Ngxbdq0ST4+PqpXr54mTJigt956SxEREXJycnrQwwEAAADwCLD5NVKnTp1SmTJlVKlSJfXq1UsxMTGSpP379ystLU2BgYGWvtWrV1e5cuUUHR0tSYqOjlbt2rXl4+Nj6RMUFKTExEQdPXr0jvtMSUlRYmKi1QsAAAAAcsqmQapJkyZaunSp1q9fr4ULF+rMmTNq3ry5rl+/rtjYWDk5OcnT09NqHR8fH8XGxkqSYmNjrUJU1vKsZXcyZcoUeXh4WF5+fn55OzAAAAAADzWbntrXrl07y7/r1KmjJk2aqHz58vr8889VpEiRfNtveHi4wsLCLO8TExMJUwAAAAByzOan9v2Vp6enqlWrptOnT8vX11epqamKj4+36hMXF2e5psrX1zfbXfyy3t/uuqsszs7Ocnd3t3oBAAAAQE4VqCCVlJSkX3/9VaVLl5a/v78cHR21efNmy/ITJ04oJiZGAQEBkqSAgAD9/PPPunLliqVPVFSU3N3dVbNmzQdePwAAAIBHg01P7RsxYoQ6deqk8uXL69KlSxo7dqzs7e310ksvycPDQyEhIQoLC5OXl5fc3d01ZMgQBQQEqGnTppKkNm3aqGbNmurTp4+mTZum2NhYvfPOOwoNDZWzs7MthwYAAADgIWbTIHXhwgW99NJL+uOPP1SqVCk99dRT+uGHH1SqVClJ0qxZs2RnZ6du3bopJSVFQUFBWrBggWV9e3t7rV27VoMGDVJAQIDc3NwUHBys8ePH22pIAAAAAB4BJrPZbLZ1EbaWmJgoDw8PJSQkcL0UAOSFwxG2rgB3UyfC1hUAQIGV02xQoK6RAgAAAIDCgCAFAAAAAAYRpAAAAADAIIIUAAAAABhEkAIAAAAAgwhSAAAAAGAQQQoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABhGkAAAAAMAgghQAAAAAGESQAgAAAACDCFIAAAAAYBBBCgAAAAAMIkgBAAAAgEEEKQAAAAAwiCAFAAAAAAYRpAAAAADAIIIUAAAAABhEkAIAAAAAgwhSAAAAAGAQQQoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYlKsg9dtvv+V1HQAAAABQaDjkZqUqVaromWeeUUhIiJ5//nm5uLjkdV0AACC/HI4wvk6dXKwDAA+xXM1IHThwQHXq1FFYWJh8fX312muvae/evXldGwAAAAAUSLkKUvXq1dMHH3ygS5cu6eOPP9bly5f11FNPqVatWpo5c6auXr2a13UCAAAAQIFxXzebcHBwUNeuXbV69WpNnTpVp0+f1ogRI+Tn56e+ffvq8uXLeVUnAAAAABQY9xWk9u3bp9dff12lS5fWzJkzNWLECP3666+KiorSpUuX1Llz57yqEwAAAAAKjFzdbGLmzJmKjIzUiRMn1L59ey1fvlzt27eXnd2tXFaxYkUtXbpUFSpUyMtaAQAAAKBAyFWQWrhwoV5++WX169dPpUuXvm0fb29vLVmy5L6KAwAAAICCKFdB6tSpU/fs4+TkpODg4NxsHgAAAAAKtFxdIxUZGanVq1dna1+9erWWLVt230UBAAAAQEGWqyA1ZcoUlSxZMlu7t7e3Jk+efN9FAQAAAEBBlqsgFRMTo4oVK2ZrL1++vGJiYu67KAAAAAAoyHIVpLy9vXX48OFs7YcOHVKJEiXuuygAAAAAKMhyFaReeuklvfHGG9q6dasyMjKUkZGhLVu2aOjQoerRo0de1wgAAAAABUqu7to3YcIEnT17Vq1bt5aDw61NZGZmqm/fvlwjBQAAAOChl6sZKScnJ61atUrHjx/Xp59+qi+//FK//vqrPv74Yzk5OeWqkPfee08mk0nDhg2ztN28eVOhoaEqUaKEihYtqm7duikuLs5qvZiYGHXo0EGurq7y9vbWyJEjlZ6enqsaAAAAACAncjUjlaVatWqqVq3afRfx448/avHixapTp45V+/Dhw/Xtt99q9erV8vDw0ODBg9W1a1ft2rVLkpSRkaEOHTrI19dXu3fv1uXLl9W3b185OjoyMwYAAAAg3+QqSGVkZGjp0qXavHmzrly5oszMTKvlW7ZsyfG2kpKS1KtXL/3rX//SxIkTLe0JCQlasmSJVqxYoVatWkm69fyqGjVq6IcfflDTpk21ceNGHTt2TJs2bZKPj4/q1aunCRMm6K233lJERESuZ8cAAAAA4G5ydWrf0KFDNXToUGVkZKhWrVqqW7eu1cuI0NBQdejQQYGBgVbt+/fvV1pamlV79erVVa5cOUVHR0uSoqOjVbt2bfn4+Fj6BAUFKTExUUePHr3jPlNSUpSYmGj1AgAAAICcytWM1MqVK/X555+rffv297XzlStX6sCBA/rxxx+zLYuNjZWTk5M8PT2t2n18fBQbG2vp89cQlbU8a9mdTJkyRePGjbuv2gEAAAA8unJ9s4kqVarc147Pnz+voUOH6tNPP5WLi8t9bcuo8PBwJSQkWF7nz59/oPsHAAAAULjlKki9+eab+uCDD2Q2m3O94/379+vKlStq0KCBHBwc5ODgoO3bt2vOnDlycHCQj4+PUlNTFR8fb7VeXFycfH19JUm+vr7Z7uKX9T6rz+04OzvL3d3d6gUAAAAAOZWrU/u+//57bd26VevWrdMTTzwhR0dHq+VffvnlPbfRunVr/fzzz1Zt/fv3V/Xq1fXWW2/Jz89Pjo6O2rx5s7p16yZJOnHihGJiYhQQECBJCggI0KRJk3TlyhV5e3tLkqKiouTu7q6aNWvmZmgAAAAAcE+5ClKenp567rnn7mvHxYoVU61ataza3NzcVKJECUt7SEiIwsLC5OXlJXd3dw0ZMkQBAQFq2rSpJKlNmzaqWbOm+vTpo2nTpik2NlbvvPOOQkND5ezsfF/1AQAAAMCd5CpIRUZG5nUdtzVr1izZ2dmpW7duSklJUVBQkBYsWGBZbm9vr7Vr12rQoEEKCAiQm5ubgoODNX78+AdSHwAAAIBHk8mcywud0tPTtW3bNv3666/q2bOnihUrpkuXLsnd3V1FixbN6zrzVWJiojw8PJSQkMD1UgCQFw5H2LoC5LU6EbauAAAeiJxmg1zNSJ07d05t27ZVTEyMUlJS9Oyzz6pYsWKaOnWqUlJStGjRolwXDgAAAAAFXa4fyNuwYUP9+eefKlKkiKX9ueee0+bNm/OsOAAAAAAoiHI1I7Vz507t3r1bTk5OVu0VKlTQxYsX86QwAAAAACiocjUjlZmZqYyMjGztFy5cULFixe67KAAAAAAoyHIVpNq0aaPZs2db3ptMJiUlJWns2LFq3759XtUGAAAAAAVSrk7tmzFjhoKCglSzZk3dvHlTPXv21KlTp1SyZEl99tlneV0jAMCWDkfYugIAAAqcXAWpsmXL6tChQ1q5cqUOHz6spKQkhYSEqFevXlY3nwAAAACAh1GugpQkOTg4qHfv3nlZCwAAAAAUCrkKUsuXL7/r8r59++aqGAAAAAAoDHIVpIYOHWr1Pi0tTTdu3JCTk5NcXV0JUgAAAAAearm6a9+ff/5p9UpKStKJEyf01FNPcbMJAAAAAA+9XAWp26lataree++9bLNVAAAAAPCwybMgJd26AcWlS5fycpMAAAAAUODk6hqpb775xuq92WzW5cuXNW/ePDVr1ixPCgMAAACAgipXQapLly5W700mk0qVKqVWrVppxowZeVEXAAAAABRYuQpSmZmZeV0HAAAAABQaeXqNFAAAAAA8CnI1IxUWFpbjvjNnzszNLgAAAACgwMpVkPrpp5/0008/KS0tTY8//rgk6eTJk7K3t1eDBg0s/UwmU95UCQAAAAAFSK6CVKdOnVSsWDEtW7ZMxYsXl3TrIb39+/dX8+bN9eabb+ZpkQAAAABQkOTqGqkZM2ZoypQplhAlScWLF9fEiRO5ax8AAACAh16uglRiYqKuXr2arf3q1au6fv36fRcFAAAAAAVZroLUc889p/79++vLL7/UhQsXdOHCBX3xxRcKCQlR165d87pGAAAAAChQcnWN1KJFizRixAj17NlTaWlptzbk4KCQkBBNnz49TwsEAAAAgIImV0HK1dVVCxYs0PTp0/Xrr79KkipXriw3N7c8LQ4AAAAACqL7eiDv5cuXdfnyZVWtWlVubm4ym815VRcAAAAAFFi5ClJ//PGHWrdurWrVqql9+/a6fPmyJCkkJIRbnwMAAAB46OUqSA0fPlyOjo6KiYmRq6urpf3FF1/U+vXr86w4AAAAACiIcnWN1MaNG7VhwwaVLVvWqr1q1ao6d+5cnhQGAAAAAAVVrmakkpOTrWaisly7dk3Ozs73XRQAAAAAFGS5ClLNmzfX8uXLLe9NJpMyMzM1bdo0tWzZMs+KAwAAAICCKFen9k2bNk2tW7fWvn37lJqaqlGjRuno0aO6du2adu3aldc1AgAAAECBkqsZqVq1aunkyZN66qmn1LlzZyUnJ6tr16766aefVLly5byuEQAAAAAKFMMzUmlpaWrbtq0WLVqkt99+Oz9qAgAAAIACzfCMlKOjow4fPpwftQAAAABAoZCrU/t69+6tJUuW5HUtAAAAAFAo5OpmE+np6fr444+1adMm+fv7y83NzWr5zJkz86Q4AAAAACiIDAWp3377TRUqVNCRI0fUoEEDSdLJkyet+phMpryrDgAAAAAKIENBqmrVqrp8+bK2bt0qSXrxxRc1Z84c+fj45EtxAAAAAFAQGbpGymw2W71ft26dkpOT87QgAAAAACjocnWNVJa/BysAAPCQOhxhfJ06uVgHAAoJQzNSJpMp2zVQXBMFAAAA4FFjaEbKbDarX79+cnZ2liTdvHlTAwcOzHbXvi+//DLvKgQAAACAAsZQkAoODrZ637t37zwtBgAAAAAKA0NBKjIyMr/qAAAAAIBCw9A1UgAAAAAAghQAAAAAGEaQAgAAAACDCFIAAAAAYBBBCgAAAAAMsmmQWrhwoerUqSN3d3e5u7srICBA69atsyy/efOmQkNDVaJECRUtWlTdunVTXFyc1TZiYmLUoUMHubq6ytvbWyNHjlR6evqDHgoAAACAR4hNg1TZsmX13nvvaf/+/dq3b59atWqlzp076+jRo5Kk4cOH67///a9Wr16t7du369KlS+ratatl/YyMDHXo0EGpqanavXu3li1bpqVLl2rMmDG2GhIAAACAR4DJbDabbV3EX3l5eWn69Ol6/vnnVapUKa1YsULPP/+8JOn48eOqUaOGoqOj1bRpU61bt04dO3bUpUuX5OPjI0latGiR3nrrLV29elVOTk452mdiYqI8PDyUkJAgd3f3fBsbABRKhyNsXQEKqzoRtq4AAAzLaTYoMNdIZWRkaOXKlUpOTlZAQID279+vtLQ0BQYGWvpUr15d5cqVU3R0tCQpOjpatWvXtoQoSQoKClJiYqJlVut2UlJSlJiYaPUCAAAAgJyyeZD6+eefVbRoUTk7O2vgwIFas2aNatasqdjYWDk5OcnT09Oqv4+Pj2JjYyVJsbGxViEqa3nWsjuZMmWKPDw8LC8/P7+8HRQAAACAh5rNg9Tjjz+ugwcPas+ePRo0aJCCg4N17NixfN1neHi4EhISLK/z58/n6/4AAAAAPFwcbF2Ak5OTqlSpIkny9/fXjz/+qA8++EAvvviiUlNTFR8fbzUrFRcXJ19fX0mSr6+v9u7da7W9rLv6ZfW5HWdnZzk7O+fxSAAAAAA8Kmw+I/V3mZmZSklJkb+/vxwdHbV582bLshMnTigmJkYBAQGSpICAAP3888+6cuWKpU9UVJTc3d1Vs2bNB147AAAAgEeDTWekwsPD1a5dO5UrV07Xr1/XihUrtG3bNm3YsEEeHh4KCQlRWFiYvLy85O7uriFDhiggIEBNmzaVJLVp00Y1a9ZUnz59NG3aNMXGxuqdd95RaGgoM04AAAAA8o1Ng9SVK1fUt29fXb58WR4eHqpTp442bNigZ599VpI0a9Ys2dnZqVu3bkpJSVFQUJAWLFhgWd/e3l5r167VoEGDFBAQIDc3NwUHB2v8+PG2GhIAAACAR0CBe46ULfAcKQC4i8MRtq4AhRXPkQJQCBW650gBAAAAQGFh87v2AQAeoMMRtq4AAICHAjNSAAAAAGAQQQoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABhGkAAAAAMAgghQAAAAAGESQAgAAAACDCFIAAAAAYBBBCgAAAAAMIkgBAAAAgEEEKQAAAAAwiCAFAAAAAAYRpAAAAADAIIIUAAAAABhEkAIAAAAAgwhSAAAAAGAQQQoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgkIOtCwAAAA+pwxHG16mTi3UAwAaYkQIAAAAAgwhSAAAAAGAQQQoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABtk0SE2ZMkWNGjVSsWLF5O3trS5duujEiRNWfW7evKnQ0FCVKFFCRYsWVbdu3RQXF2fVJyYmRh06dJCrq6u8vb01cuRIpaenP8ihAAAAAHiE2DRIbd++XaGhofrhhx8UFRWltLQ0tWnTRsnJyZY+w4cP13//+1+tXr1a27dv16VLl9S1a1fL8oyMDHXo0EGpqanavXu3li1bpqVLl2rMmDG2GBIAAACAR4DJbDabbV1ElqtXr8rb21vbt2/X008/rYSEBJUqVUorVqzQ888/L0k6fvy4atSooejoaDVt2lTr1q1Tx44ddenSJfn4+EiSFi1apLfeektXr16Vk5PTPfebmJgoDw8PJSQkyN3dPV/HCAA2dTjC1hUAd1cnwtYVFCgREYVz20BhltNsUKCukUpISJAkeXl5SZL279+vtLQ0BQYGWvpUr15d5cqVU3R0tCQpOjpatWvXtoQoSQoKClJiYqKOHj162/2kpKQoMTHR6gUAAAAAOVVgglRmZqaGDRumZs2aqVatWpKk2NhYOTk5ydPT06qvj4+PYmNjLX3+GqKylmctu50pU6bIw8PD8vLz88vj0QAAAAB4mDnYuoAsoaGhOnLkiL7//vt831d4eLjCwsIs7xMTEwlTAAqfwxG2rgAAgEdWgQhSgwcP1tq1a7Vjxw6VLVvW0u7r66vU1FTFx8dbzUrFxcXJ19fX0mfv3r1W28u6q19Wn79zdnaWs7NzHo8CQG5w/j8AACiMbBqkzGazhgwZojVr1mjbtm2qWLGi1XJ/f385Ojpq8+bN6tatmyTpxIkTiomJUUBAgCQpICBAkyZN0pUrV+Tt7S1JioqKkru7u2rWrPlgBwQAAB4p/MEGeHTZNEiFhoZqxYoV+vrrr1WsWDHLNU0eHh4qUqSIPDw8FBISorCwMHl5ecnd3V1DhgxRQECAmjZtKklq06aNatasqT59+mjatGmKjY3VO++8o9DQUGadAAAAAOQLmwaphQsXSpJatGhh1R4ZGal+/fpJkmbNmiU7Ozt169ZNKSkpCgoK0oIFCyx97e3ttXbtWg0aNEgBAQFyc3NTcHCwxo8f/6CGAQAAAOARY/NT++7FxcVF8+fP1/z58+/Yp3z58vruu+/ysjQAAGALhyOMr8OzpwDYQIG5/TkAAAAAFBYEKQAAAAAwqEDc/hwAAAAPVn7fcZA7GuJhx4wUAAAAABhEkAIAAAAAgzi1D8BdcWoGAABAdsxIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQ62LgAA8ktEROHePgAAKLiYkQIAAAAAgwhSAAAAAGAQQQoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCeIwU8BHieEQAAwIPFjBQAAAAAGESQAgAAAACDCFIAAAAAYBBBCgAAAAAMIkgBAAAAgEEEKQAAAAAwiCAFAAAAAAYRpAAAAADAIIIUAAAAABhEkAIAAAAAgwhSAAAAAGAQQQoAAAAADHKwdQEAUFhFRBTObQPAg8B/I/GwY0YKAAAAAAwiSAEAAACAQZzaBwAAHmqcBgYgPxCkAKAgiNtm/f7wttv1AgAABQSn9gEAAACAQQQpAAAAADCIIAUAAAAABhGkAAAAAMAgghQAAAAAGESQAgAAAACDCFIAAAAAYBBBCgAAAAAMIkgBAAAAgEEEKQAAAAAwyKZBaseOHerUqZPKlCkjk8mkr776ymq52WzWmDFjVLp0aRUpUkSBgYE6deqUVZ9r166pV69ecnd3l6enp0JCQpSUlPQARwEAAGzqcMTdX3Hbsr8A4D7ZNEglJyerbt26mj9//m2XT5s2TXPmzNGiRYu0Z88eubm5KSgoSDdv3rT06dWrl44ePaqoqCitXbtWO3bs0IABAx7UEAAAAAA8ghxsufN27dqpXbt2t11mNps1e/ZsvfPOO+rcubMkafny5fLx8dFXX32lHj166JdfftH69ev1448/qmHDhpKkuXPnqn379nr//fdVpkyZ2247JSVFKSkplveJiYl5PDLAWkSErSsAAABAXiqw10idOXNGsbGxCgwMtLR5eHioSZMmio6OliRFR0fL09PTEqIkKTAwUHZ2dtqzZ88dtz1lyhR5eHhYXn5+fvk3EAAAAAAPnQIbpGJjYyVJPj4+Vu0+Pj6WZbGxsfL29rZa7uDgIC8vL0uf2wkPD1dCQoLldf78+TyuHgAAAMDDzKan9tmKs7OznJ2dbV0GAAAAgEKqwM5I+fr6SpLi4uKs2uPi4izLfH19deXKFavl6enpunbtmqUPAAAAAOS1AjsjVbFiRfn6+mrz5s2qV6+epFs3hdizZ48GDRokSQoICFB8fLz2798vf39/SdKWLVuUmZmpJk2a2Kp0AI86bq0MAMBDz6ZBKikpSadPn7a8P3PmjA4ePCgvLy+VK1dOw4YN08SJE1W1alVVrFhR7777rsqUKaMuXbpIkmrUqKG2bdvq1Vdf1aJFi5SWlqbBgwerR48ed7xjHwAAAADcL5sGqX379qlly5aW92FhYZKk4OBgLV26VKNGjVJycrIGDBig+Ph4PfXUU1q/fr1cXFws63z66acaPHiwWrduLTs7O3Xr1k1z5sx54GMBAAC5E7Gwha1LAADDbBqkWrRoIbPZfMflJpNJ48eP1/jx4+/Yx8vLSytWrMiP8gAAAADgtgrszSYAAAAAoKAiSAEAAACAQQQpAAAAADCowN7+HAAeZfl98X3EoG35un0AAB52zEgBAAAAgEEEKQAAAAAwiCAFAAAAAAYRpAAAAADAIIIUAAAAABjEXfsAAABQqEREFM5t4+HCjBQAAAAAGESQAgAAAACDCFIAAAAAYBBBCgAAAAAMIkgBAAAAgEEEKQAAAAAwiNufA/8ftzsFAABAThGkAADAoydum/F1fFrkdRUACjGCFADcTW5+2QIAFFr5fYYKZ8A8PLhGCgAAAAAMIkgBAAAAgEEEKQAAAAAwiCAFAAAAAAYRpAAAAADAIIIUAAAAABhEkAIAAAAAgwhSAAAAAGAQD+QFAAD3FLGwha1LsL3cPKDbp0VeVwGggGBGCgAAAAAMIkgBAAAAgEEEKQAAAAAwiGukAOARlJ/Xu0QM2pZv2wYAoKBgRgoAAAAADCJIAQAAAIBBBCkAAAAAMIggBQAAAAAGcbMJAI+O3DxMEwAA4DYIUig0IiJsXQEAAABwC0EKAAAgv+RmJtynRV5XgQIkP/8wzB+dHyyukQIAAAAAgwhSAAAAAGAQp/YBKJy4cUSBFbGwRf5uf9C2fN1+YZXfnzsAwBozUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIm00gT/H8AgAAADwKCFIAAAAFCQ/xBQoFghQAAA8ItygHgIcHQQpA3uL5TgDw4DGLBTxwD02Qmj9/vqZPn67Y2FjVrVtXc+fOVePGjW1dFgAgj+XnrA4P+wUA5NRDEaRWrVqlsLAwLVq0SE2aNNHs2bMVFBSkEydOyNvb29blAQAAFDzMYj108vumX9xUzJrJbDabbV3E/WrSpIkaNWqkefPmSZIyMzPl5+enIUOGaPTo0fdcPzExUR4eHkpISJC7u3t+l3tPfElRYHCaHgDgrwhSyCcF6fffnGaDQj8jlZqaqv379ys8PNzSZmdnp8DAQEVHR992nZSUFKWkpFjeJyQkSLr1oRUEfykNsK3UZFtXAAAoSFIKxu9KePgUkF/DJf1fJrjXfFOhD1K///67MjIy5OPjY9Xu4+Oj48eP33adKVOmaNy4cdna/fz88qVGAAAAAHf23nu2riC769evy8PD447LC32Qyo3w8HCFhYVZ3mdmZuratWsqUaKETCaTDSvLX4mJifLz89P58+cLxCmMMIbjV/hxDAs/jmHhxvEr/DiGhVthOX5ms1nXr19XmTJl7tqv0AepkiVLyt7eXnFxcVbtcXFx8vX1ve06zs7OcnZ2tmrz9PTMrxILHHd39wL95cXdcfwKP45h4ccxLNw4foUfx7BwKwzH724zUVnsHkAd+crJyUn+/v7avHmzpS0zM1ObN29WQECADSsDAAAA8LAq9DNSkhQWFqbg4GA1bNhQjRs31uzZs5WcnKz+/fvbujQAAAAAD6GHIki9+OKLunr1qsaMGaPY2FjVq1dP69evz3YDikeds7Ozxo4dm+20RhQOHL/Cj2NY+HEMCzeOX+HHMSzcHrbj91A8RwoAAAAAHqRCf40UAAAAADxoBCkAAAAAMIggBQAAAAAGEaQAAAAAwCCC1CPs22+/VZMmTVSkSBEVL15cXbp0sXVJyIWUlBTVq1dPJpNJBw8etHU5yIGzZ88qJCREFStWVJEiRVS5cmWNHTtWqampti4NdzF//nxVqFBBLi4uatKkifbu3WvrkpBDU6ZMUaNGjVSsWDF5e3urS5cuOnHihK3LQi699957MplMGjZsmK1LgQEXL15U7969VaJECRUpUkS1a9fWvn37bF3WfSFIPaK++OIL9enTR/3799ehQ4e0a9cu9ezZ09ZlIRdGjRqlMmXK2LoMGHD8+HFlZmZq8eLFOnr0qGbNmqVFixbpn//8p61Lwx2sWrVKYWFhGjt2rA4cOKC6desqKChIV65csXVpyIHt27crNDRUP/zwg6KiopSWlqY2bdooOTnZ1qXBoB9//FGLFy9WnTp1bF0KDPjzzz/VrFkzOTo6at26dTp27JhmzJih4sWL27q0+8Ltzx9B6enpqlChgsaNG6eQkBBbl4P7sG7dOoWFhemLL77QE088oZ9++kn16tWzdVnIhenTp2vhwoX67bffbF0KbqNJkyZq1KiR5s2bJ0nKzMyUn5+fhgwZotGjR9u4Ohh19epVeXt7a/v27Xr66adtXQ5yKCkpSQ0aNNCCBQs0ceJE1atXT7Nnz7Z1WciB0aNHa9euXdq5c6etS8lTzEg9gg4cOKCLFy/Kzs5O9evXV+nSpdWuXTsdOXLE1qXBgLi4OL366qv697//LVdXV1uXg/uUkJAgLy8vW5eB20hNTdX+/fsVGBhoabOzs1NgYKCio6NtWBlyKyEhQZL4mStkQkND1aFDB6ufRRQO33zzjRo2bKgXXnhB3t7eql+/vv71r3/Zuqz7RpB6BGX9xTsiIkLvvPOO1q5dq+LFi6tFixa6du2ajatDTpjNZvXr108DBw5Uw4YNbV0O7tPp06c1d+5cvfbaa7YuBbfx+++/KyMjQz4+PlbtPj4+io2NtVFVyK3MzEwNGzZMzZo1U61atWxdDnJo5cqVOnDggKZMmWLrUpALv/32mxYuXKiqVatqw4YNGjRokN544w0tW7bM1qXdF4LUQ2T06NEymUx3fWVdmyFJb7/9trp16yZ/f39FRkbKZDJp9erVNh7Foy2nx3Du3Lm6fv26wsPDbV0y/iKnx++vLl68qLZt2+qFF17Qq6++aqPKgUdHaGiojhw5opUrV9q6FOTQ+fPnNXToUH366adycXGxdTnIhczMTDVo0ECTJ09W/fr1NWDAAL366qtatGiRrUu7Lw62LgB5580331S/fv3u2qdSpUq6fPmyJKlmzZqWdmdnZ1WqVEkxMTH5WSLuIafHcMuWLYqOjpazs7PVsoYNG6pXr16F/i88hVVOj1+WS5cuqWXLlnryySf14Ycf5nN1yK2SJUvK3t5ecXFxVu1xcXHy9fW1UVXIjcGDB2vt2rXasWOHypYta+tykEP79+/XlStX1KBBA0tbRkaGduzYoXnz5iklJUX29vY2rBD3Urp0aavfOyWpRo0a+uKLL2xUUd4gSD1ESpUqpVKlSt2zn7+/v5ydnXXixAk99dRTkqS0tDSdPXtW5cuXz+8ycRc5PYZz5szRxIkTLe8vXbqkoKAgrVq1Sk2aNMnPEnEXOT1+0q2ZqJYtW1pmhO3sOEGgoHJycpK/v782b95seUxEZmamNm/erMGDB9u2OOSI2WzWkCFDtGbNGm3btk0VK1a0dUkwoHXr1vr555+t2vr376/q1avrrbfeIkQVAs2aNcv2yIGTJ08W+t87CVKPIHd3dw0cOFBjx46Vn5+fypcvr+nTp0uSXnjhBRtXh5woV66c1fuiRYtKkipXrsxfWQuBixcvqkWLFipfvrzef/99Xb161bKMGY6CKSwsTMHBwWrYsKEaN26s2bNnKzk5Wf3797d1aciB0NBQrVixQl9//bWKFStmubbNw8NDRYoUsXF1uJdixYplu57Nzc1NJUqU4Dq3QmL48OF68sknNXnyZHXv3l179+7Vhx9+WOjPxiBIPaKmT58uBwcH9enTR//73//UpEkTbdmypdDfzx8oDKKionT69GmdPn06W/DliRQF04svvqirV69qzJgxio2NVb169bR+/fpsN6BAwbRw4UJJUosWLazaIyMj73k6LoD716hRI61Zs0bh4eEaP368KlasqNmzZ6tXr162Lu2+8BwpAAAAADCIk/IBAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABhGkAACPpBYtWmjYsGG2LgMAUEgRpAAAhU6nTp3Utm3b2y7buXOnTCaTDh8+/ICrAgA8SghSAIBCJyQkRFFRUbpw4UK2ZZGRkWrYsKHq1Kljg8oAAI8KghQAoNDp2LGjSpUqpaVLl1q1JyUlafXq1erSpYteeuklPfbYY3J1dVXt2rX12Wef3XWbJpNJX331lVWbp6en1T7Onz+v7t27y9PTU15eXurcubPOnj1rWb5t2zY1btxYbm5u8vT0VLNmzXTu3Ln7HC0AoCAiSAEACh0HBwf17dtXS5culdlstrSvXr1aGRkZ6t27t/z9/fXtt9/qyJEjGjBggPr06aO9e/fmep9paWkKCgpSsWLFtHPnTu3atUtFixZV27ZtlZqaqvT0dHXp0kXPPPOMDh8+rOjoaA0YMEAmkykvhgwAKGAcbF0AAAC58fLLL2v69Onavn27WrRoIenWaX3dunVT+fLlNWLECEvfIUOGaMOGDfr888/VuHHjXO1v1apVyszM1EcffWQJR5GRkfL09NS2bdvUsGFDJSQkqGPHjqpcubIkqUaNGvc3SABAgcWMFACgUKpevbqefPJJffzxx5Kk06dPa+fOnQoJCVFGRoYmTJig2rVry8vLS0WLFtWGDRsUExOT6/0dOnRIp0+fVrFixVS0aFEVLVpUXl5eunnzpn799Vd5eXmpX79+CgoKUqdOnfTBBx/o8uXLeTVcAEABQ5ACABRaISEh+uKLL3T9+nVFRkaqcuXKeuaZZzR9+nR98MEHeuutt7R161YdPHhQQUFBSk1NveO2TCaT1WmC0q3T+bIkJSXJ399fBw8etHqdPHlSPXv2lHRrhio6OlpPPvmkVq1apWrVqumHH37In8EDAGyKIAUAKLS6d+8uOzs7rVixQsuXL9fLL78sk8mkXbt2qXPnzurdu7fq1q2rSpUq6eTJk3fdVqlSpaxmkE6dOqUbN25Y3jdo0ECnTp2St7e3qlSpYvXy8PCw9Ktfv77Cw8O1e/du1apVSytWrMj7gQMAbI4gBQAotIoWLaoXX3xR4eHhunz5svr16ydJqlq1qqKiorR792798ssveu211xQXF3fXbbVq1Urz5s3TTz/9pH379mngwIFydHS0LO/Vq5dKliypzp07a+fOnTpz5oy2bdumN954QxcuXNCZM2cUHh6u6OhonTt3Ths3btSpU6e4TgoAHlIEKQBAoRYSEqI///xTQUFBKlOmjCTpnXfeUYMGDRQUFKQWLVrI19dXXbp0uet2ZsyYIT8/PzVv3lw9e/bUiBEj5Orqalnu6uqqHTt2qFy5curatatq1KihkJAQ3bx5U+7u7nJ1ddXx48fVrVs3VatWTQMGDFBoaKhee+21/Bw+AMBGTOa/nxAOAAAAALgrZqQAAAAAwCCCFAAAAAAYRJACAAAAAIMIUgAAAABgEEEKAAAAAAwiSAEAAACAQQQpAAAAADCIIAUAAAAABhGkAAAAAMAgghQAAAAAGESQAgAAAACD/h8iuuMlXDc6aAAAAABJRU5ErkJggg==", + "image/png": "", "text/plain": [ "
" ] @@ -493,8 +509,8 @@ "source": [ "# Plotting\n", "plt.figure(figsize=(10, 6))\n", - "plt.hist(noise_perturbation, bins=30, alpha=0.5, label='Label 0', color='orange')\n", - "plt.hist(signal_perturbation, bins=30, alpha=0.5, label='Label 1', color='blue')\n", + "plt.hist(unbound_perturbation, bins=30, alpha=0.5, label='Label 0', color='orange')\n", + "plt.hist(bound_perturbation, bins=30, alpha=0.5, label='Label 1', color='blue')\n", "plt.title('Histogram of Values in the 2nd Column')\n", "plt.xlabel('Values')\n", "plt.ylabel('Frequency')\n", @@ -516,7 +532,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -568,8 +584,8 @@ "perturbation_effects_tf_influenced = generate_perturbation_effects(\n", " binding_data_tensor, \n", " max_mean_adjustment=3.0, # try 0.1, 3.0, and 10.0\n", - " signal_mean=5.0, # try 3.0, 5.0, or 10.0\n", - " noise_mean=0.0, # try adjusting this\n", + " bound_mean=5.0, # try 3.0, 5.0, or 10.0\n", + " unbound_mean=0.0, # try adjusting this\n", ")\n", "perturbation_pvalue_tf_influenced = torch.zeros_like(perturbation_effects_tf_influenced)\n", "for col_idx in range(perturbation_effects_tf_influenced.shape[1]):\n", @@ -597,7 +613,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -607,7 +623,7 @@ } ], "source": [ - "# Plotting. Note that the 'noise' group effects are still range from 0 to 3\n", + "# Plotting. Note that the 'unbound' group effects are still range from 0 to 3\n", "\n", "plt.figure(figsize=(10, 6))\n", "plt.scatter(final_data_tensor_tf_influenced[:, :, 1].flatten(), final_data_tensor_tf_influenced[:, :, 3].flatten().abs(), c=['orange' if x == 0 else 'blue' for x in labels])\n", @@ -622,11 +638,25 @@ "\n", "plt.show()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -640,9 +670,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.1" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/docs/tutorials/hyperparameter_sweep.ipynb b/docs/tutorials/hyperparameter_sweep.ipynb index 36220c1..f62dfc2 100644 --- a/docs/tutorials/hyperparameter_sweep.ipynb +++ b/docs/tutorials/hyperparameter_sweep.ipynb @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -158,8 +158,8 @@ " data_module = SyntheticDataLoader(\n", " batch_size=batch_size,\n", " num_genes=4000,\n", - " signal_mean=3.0,\n", - " signal=[0.5] * 10,\n", + " bound_mean=3.0,\n", + " bound=[0.5] * 10,\n", " n_sample=[1, 2, 2, 4, 4],\n", " val_size=0.1,\n", " test_size=0.1,\n", @@ -208,9 +208,510 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-05-29 13:18:03,548] A new study created in memory with name: CustomizableModelHyperparameterSweep3\n", + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/optuna/distributions.py:524: UserWarning: Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [64] which is of type list.\n", + " warnings.warn(message)\n", + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/optuna/distributions.py:524: UserWarning: Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [256] which is of type list.\n", + " warnings.warn(message)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "About to create model with the following hyperparameters:\n", + "lr: 0.01\n", + "hidden_layer_num: 1\n", + "hidden_layer_sizes: [256]\n", + "activation: Tanh\n", + "optimizer: RMSprop\n", + "L2_regularization_term: 0.1\n", + "dropout_rate: 0.5\n", + "batch_size: 32\n", + "max_epochs: 1\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "/Users/ericjia/yeastdnnexplorer/yeastdnnexplorer/data_loaders/synthetic_data_loader.py:260: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " X_train, Y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(\n", + "/Users/ericjia/yeastdnnexplorer/yeastdnnexplorer/data_loaders/synthetic_data_loader.py:263: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " X_val, Y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(\n", + "/Users/ericjia/yeastdnnexplorer/yeastdnnexplorer/data_loaders/synthetic_data_loader.py:266: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " X_test, Y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(\n", + "\n", + " | Name | Type | Params\n", + "----------------------------------------------------\n", + "0 | activation | Tanh | 0 \n", + "1 | input_layer | Linear | 3.6 K \n", + "2 | hidden_layers | ModuleList | 0 \n", + "3 | output_layer | Linear | 3.3 K \n", + "4 | dropout | Dropout | 0 \n", + "5 | mae | MeanAbsoluteError | 0 \n", + "6 | SMSE | SMSE | 0 \n", + "----------------------------------------------------\n", + "6.9 K Trainable params\n", + "0 Non-trainable params\n", + "6.9 K Total params\n", + "0.028 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/torch/utils/data/dataloader.py:558: UserWarning: This DataLoader will create 15 worker processes in total. Our suggested max number of worker in current system is 8 (`cpuset` is not taken into account), which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n", + " warnings.warn(_create_warning_msg(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9874d59e855a45b09fcd3891e60fc48b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n", + "[I 2024-05-29 13:18:26,417] Trial 0 finished with value: 4.489274501800537 and parameters: {'lr': 0.01, 'hidden_layer_num': 1, 'activation': 'Tanh', 'optimizer': 'RMSprop', 'L2_regularization_term': 0.1, 'dropout_rate': 0.5, 'batch_size': 32, 'max_epochs': 1, 'hidden_layer_sizes_1_layers': [256]}. Best is trial 0 with value: 4.489274501800537.\n", + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/optuna/distributions.py:524: UserWarning: Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [64] which is of type list.\n", + " warnings.warn(message)\n", + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/optuna/distributions.py:524: UserWarning: Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [256] which is of type list.\n", + " warnings.warn(message)\n", + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "About to create model with the following hyperparameters:\n", + "lr: 0.01\n", + "hidden_layer_num: 1\n", + "hidden_layer_sizes: [256]\n", + "activation: LeakyReLU\n", + "optimizer: SGD\n", + "L2_regularization_term: 0.1\n", + "dropout_rate: 0.5\n", + "batch_size: 32\n", + "max_epochs: 1\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params\n", + "----------------------------------------------------\n", + "0 | activation | LeakyReLU | 0 \n", + "1 | input_layer | Linear | 3.6 K \n", + "2 | hidden_layers | ModuleList | 0 \n", + "3 | output_layer | Linear | 3.3 K \n", + "4 | dropout | Dropout | 0 \n", + "5 | mae | MeanAbsoluteError | 0 \n", + "6 | SMSE | SMSE | 0 \n", + "----------------------------------------------------\n", + "6.9 K Trainable params\n", + "0 Non-trainable params\n", + "6.9 K Total params\n", + "0.028 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a3f7fa1a66da47818f9a97b47763e2c6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n", + "[I 2024-05-29 13:18:45,320] Trial 1 finished with value: 6.033911228179932 and parameters: {'lr': 0.01, 'hidden_layer_num': 1, 'activation': 'LeakyReLU', 'optimizer': 'SGD', 'L2_regularization_term': 0.1, 'dropout_rate': 0.5, 'batch_size': 32, 'max_epochs': 1, 'hidden_layer_sizes_1_layers': [256]}. Best is trial 0 with value: 4.489274501800537.\n", + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/optuna/distributions.py:524: UserWarning: Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [64, 32] which is of type list.\n", + " warnings.warn(message)\n", + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/optuna/distributions.py:524: UserWarning: Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [256, 64] which is of type list.\n", + " warnings.warn(message)\n", + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "About to create model with the following hyperparameters:\n", + "lr: 0.01\n", + "hidden_layer_num: 2\n", + "hidden_layer_sizes: [256, 64]\n", + "activation: ReLU\n", + "optimizer: SGD\n", + "L2_regularization_term: 0.0\n", + "dropout_rate: 0.5\n", + "batch_size: 32\n", + "max_epochs: 1\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params\n", + "----------------------------------------------------\n", + "0 | activation | ReLU | 0 \n", + "1 | input_layer | Linear | 3.6 K \n", + "2 | hidden_layers | ModuleList | 16.4 K\n", + "3 | output_layer | Linear | 845 \n", + "4 | dropout | Dropout | 0 \n", + "5 | mae | MeanAbsoluteError | 0 \n", + "6 | SMSE | SMSE | 0 \n", + "----------------------------------------------------\n", + "20.9 K Trainable params\n", + "0 Non-trainable params\n", + "20.9 K Total params\n", + "0.084 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2c96e3c7274a460ebbe021e43699d992", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n", + "[I 2024-05-29 13:19:02,993] Trial 2 finished with value: 6.900921821594238 and parameters: {'lr': 0.01, 'hidden_layer_num': 2, 'activation': 'ReLU', 'optimizer': 'SGD', 'L2_regularization_term': 0.0, 'dropout_rate': 0.5, 'batch_size': 32, 'max_epochs': 1, 'hidden_layer_sizes_2_layers': [256, 64]}. Best is trial 0 with value: 4.489274501800537.\n", + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "About to create model with the following hyperparameters:\n", + "lr: 0.01\n", + "hidden_layer_num: 2\n", + "hidden_layer_sizes: [64, 32]\n", + "activation: Tanh\n", + "optimizer: Adam\n", + "L2_regularization_term: 0.1\n", + "dropout_rate: 0.0\n", + "batch_size: 32\n", + "max_epochs: 1\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params\n", + "----------------------------------------------------\n", + "0 | activation | Tanh | 0 \n", + "1 | input_layer | Linear | 896 \n", + "2 | hidden_layers | ModuleList | 2.1 K \n", + "3 | output_layer | Linear | 429 \n", + "4 | dropout | Dropout | 0 \n", + "5 | mae | MeanAbsoluteError | 0 \n", + "6 | SMSE | SMSE | 0 \n", + "----------------------------------------------------\n", + "3.4 K Trainable params\n", + "0 Non-trainable params\n", + "3.4 K Total params\n", + "0.014 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a854f1a313d34d8192602b17986182b1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n", + "[I 2024-05-29 13:19:19,976] Trial 3 finished with value: 4.5260910987854 and parameters: {'lr': 0.01, 'hidden_layer_num': 2, 'activation': 'Tanh', 'optimizer': 'Adam', 'L2_regularization_term': 0.1, 'dropout_rate': 0.0, 'batch_size': 32, 'max_epochs': 1, 'hidden_layer_sizes_2_layers': [64, 32]}. Best is trial 0 with value: 4.489274501800537.\n", + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/optuna/distributions.py:524: UserWarning: Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [512, 256, 128, 64, 32] which is of type list.\n", + " warnings.warn(message)\n", + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "About to create model with the following hyperparameters:\n", + "lr: 0.01\n", + "hidden_layer_num: 5\n", + "hidden_layer_sizes: [512, 256, 128, 64, 32]\n", + "activation: Tanh\n", + "optimizer: RMSprop\n", + "L2_regularization_term: 0.1\n", + "dropout_rate: 0.5\n", + "batch_size: 32\n", + "max_epochs: 1\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params\n", + "----------------------------------------------------\n", + "0 | activation | Tanh | 0 \n", + "1 | input_layer | Linear | 7.2 K \n", + "2 | hidden_layers | ModuleList | 174 K \n", + "3 | output_layer | Linear | 429 \n", + "4 | dropout | Dropout | 0 \n", + "5 | mae | MeanAbsoluteError | 0 \n", + "6 | SMSE | SMSE | 0 \n", + "----------------------------------------------------\n", + "182 K Trainable params\n", + "0 Non-trainable params\n", + "182 K Total params\n", + "0.729 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5e56902ea3474fcc8f1e106e3fc4f19d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n", + "[I 2024-05-29 13:19:37,861] Trial 4 finished with value: 4.612905502319336 and parameters: {'lr': 0.01, 'hidden_layer_num': 5, 'activation': 'Tanh', 'optimizer': 'RMSprop', 'L2_regularization_term': 0.1, 'dropout_rate': 0.5, 'batch_size': 32, 'max_epochs': 1, 'hidden_layer_sizes_5_layers': [512, 256, 128, 64, 32]}. Best is trial 0 with value: 4.489274501800537.\n" + ] + } + ], "source": [ "STUDY_NAME = \"CustomizableModelHyperparameterSweep3\"\n", "NUM_TRIALS = 5 # you will need a lot more than 5 trials if you have many possible combinations of hyperparams\n", @@ -237,9 +738,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RESULTS======================================================================\n", + "Best hyperparameters: {'lr': 0.01, 'hidden_layer_num': 1, 'activation': 'Tanh', 'optimizer': 'RMSprop', 'L2_regularization_term': 0.1, 'dropout_rate': 0.5, 'batch_size': 32, 'max_epochs': 1, 'hidden_layer_sizes_1_layers': [256]}\n", + "Best loss: 4.489274501800537\n" + ] + } + ], "source": [ "print(\"RESULTS\" + (\"=\" * 70))\n", "print(f\"Best hyperparameters: {best_params}\")\n", @@ -252,11 +763,18 @@ "source": [ "And that's it! Now you could take what you found to be the best hyperparameters and train a model with them for many more epochs. The [Optuna Documentation](https://optuna.readthedocs.io/en/stable/) will be a helpful resource if you'd like to add more to this notebook or the hyperparam sweep functions" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -270,9 +788,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.1" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/docs/tutorials/lightning_crash_course.ipynb b/docs/tutorials/lightning_crash_course.ipynb index f51e3a2..5954640 100644 --- a/docs/tutorials/lightning_crash_course.ipynb +++ b/docs/tutorials/lightning_crash_course.ipynb @@ -38,9 +38,249 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "Missing logger folder: /Users/ericjia/yeastdnnexplorer/docs/tutorials/lightning_logs\n", + "/Users/ericjia/yeastdnnexplorer/yeastdnnexplorer/data_loaders/synthetic_data_loader.py:260: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " X_train, Y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(\n", + "/Users/ericjia/yeastdnnexplorer/yeastdnnexplorer/data_loaders/synthetic_data_loader.py:263: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " X_val, Y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(\n", + "/Users/ericjia/yeastdnnexplorer/yeastdnnexplorer/data_loaders/synthetic_data_loader.py:266: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " X_test, Y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(\n", + "\n", + " | Name | Type | Params\n", + "----------------------------------------------\n", + "0 | mae | MeanAbsoluteError | 0 \n", + "1 | SMSE | SMSE | 0 \n", + "2 | linear1 | Linear | 110 \n", + "----------------------------------------------\n", + "110 Trainable params\n", + "0 Non-trainable params\n", + "110 Total params\n", + "0.000 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ericjia/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/torch/utils/data/dataloader.py:558: UserWarning: This DataLoader will create 15 worker processes in total. Our suggested max number of worker in current system is 8 (`cpuset` is not taken into account), which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n", + " warnings.warn(_create_warning_msg(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "03961a09a4b64a63b68f3cd670bdc8db", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=10` reached.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b68c57cdd4e34f44aac1cc03849ee343", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Testing: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " Test metric DataLoader 0\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " test_mae 1.1637259721755981\n", + " test_mse 1.8661913871765137\n", + " test_smse 10.101052284240723\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + "[{'test_mse': 1.8661913871765137, 'test_mae': 1.1637259721755981, 'test_smse': 10.101052284240723}]\n" + ] + } + ], "source": [ "# define an instance of our simple linear baseline model\n", "model = SimpleModel(\n", @@ -54,11 +294,11 @@ "data_module = SyntheticDataLoader(\n", " batch_size=32,\n", " num_genes=3000,\n", - " signal=[0.5] * 5,\n", + " bound=[0.5] * 5,\n", " n_sample=[1, 1, 2, 2, 4],\n", " val_size=0.1,\n", " test_size=0.1,\n", - " signal_mean=3.0,\n", + " bound_mean=3.0,\n", ")\n", "\n", "# define a trainer instance\n", @@ -85,9 +325,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '../../data/init_analysis_data_20240409/binding/brent_nf_cc'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 23\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# we also have to define a new trainer instance, not really sure why but it seems to be necessary\u001b[39;00m\n\u001b[1;32m 17\u001b[0m trainer \u001b[38;5;241m=\u001b[39m Trainer(\n\u001b[1;32m 18\u001b[0m max_epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m,\n\u001b[1;32m 19\u001b[0m deterministic\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 20\u001b[0m accelerator\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;66;03m# change to \"gpu\" if you have access to one\u001b[39;00m\n\u001b[1;32m 21\u001b[0m )\n\u001b[0;32m---> 23\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnew_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreal_data_module\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 24\u001b[0m test_results \u001b[38;5;241m=\u001b[39m trainer\u001b[38;5;241m.\u001b[39mtest(new_model, real_data_module)\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28mprint\u001b[39m(test_results)\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py:544\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 542\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m=\u001b[39m TrainerStatus\u001b[38;5;241m.\u001b[39mRUNNING\n\u001b[1;32m 543\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 544\u001b[0m \u001b[43mcall\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_and_handle_interrupt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 545\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit_impl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain_dataloaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval_dataloaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatamodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mckpt_path\u001b[49m\n\u001b[1;32m 546\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/call.py:44\u001b[0m, in \u001b[0;36m_call_and_handle_interrupt\u001b[0;34m(trainer, trainer_fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39mlauncher \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39mlauncher\u001b[38;5;241m.\u001b[39mlaunch(trainer_fn, \u001b[38;5;241m*\u001b[39margs, trainer\u001b[38;5;241m=\u001b[39mtrainer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtrainer_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m _TunerExitException:\n\u001b[1;32m 47\u001b[0m _call_teardown_hook(trainer)\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py:580\u001b[0m, in \u001b[0;36mTrainer._fit_impl\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 573\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 574\u001b[0m ckpt_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_checkpoint_connector\u001b[38;5;241m.\u001b[39m_select_ckpt_path(\n\u001b[1;32m 575\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfn,\n\u001b[1;32m 576\u001b[0m ckpt_path,\n\u001b[1;32m 577\u001b[0m model_provided\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 578\u001b[0m model_connected\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlightning_module \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 579\u001b[0m )\n\u001b[0;32m--> 580\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mckpt_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mckpt_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 582\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstopped\n\u001b[1;32m 583\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py:947\u001b[0m, in \u001b[0;36mTrainer._run\u001b[0;34m(self, model, ckpt_path)\u001b[0m\n\u001b[1;32m 944\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__setup_profiler()\n\u001b[1;32m 946\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: preparing data\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 947\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data_connector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprepare_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 949\u001b[0m call\u001b[38;5;241m.\u001b[39m_call_setup_hook(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;66;03m# allow user to set up LightningModule in accelerator environment\u001b[39;00m\n\u001b[1;32m 950\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: configuring model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:94\u001b[0m, in \u001b[0;36m_DataConnector.prepare_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 92\u001b[0m dm_prepare_data_per_node \u001b[38;5;241m=\u001b[39m datamodule\u001b[38;5;241m.\u001b[39mprepare_data_per_node\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (dm_prepare_data_per_node \u001b[38;5;129;01mand\u001b[39;00m local_rank_zero) \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;129;01mnot\u001b[39;00m dm_prepare_data_per_node \u001b[38;5;129;01mand\u001b[39;00m global_rank_zero):\n\u001b[0;32m---> 94\u001b[0m \u001b[43mcall\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_lightning_datamodule_hook\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrainer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprepare_data\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;66;03m# handle lightning module prepare data:\u001b[39;00m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;66;03m# check for prepare_data_per_node before calling lightning_module.prepare_data\u001b[39;00m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m lightning_module \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/call.py:179\u001b[0m, in \u001b[0;36m_call_lightning_datamodule_hook\u001b[0;34m(trainer, hook_name, *args, **kwargs)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(fn):\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mprofile(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[LightningDataModule]\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtrainer\u001b[38;5;241m.\u001b[39mdatamodule\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhook_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/yeastdnnexplorer/yeastdnnexplorer/data_loaders/real_data_loader.py:118\u001b[0m, in \u001b[0;36mRealDataLoader.prepare_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;124;03mThis function reads in the binding data and perturbation data from the CSV files\u001b[39;00m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;124;03mthat we have for these datasets.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 113\u001b[0m \n\u001b[1;32m 114\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 116\u001b[0m brent_cc_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_dir_path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbinding/brent_nf_cc\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 117\u001b[0m brent_nf_csv_files \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m--> 118\u001b[0m f \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlistdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbrent_cc_path\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m f\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 119\u001b[0m ]\n\u001b[1;32m 120\u001b[0m perturb_dataset_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_dir_path, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mperturbation/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mperturbation_dataset_title\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 122\u001b[0m )\n\u001b[1;32m 123\u001b[0m perturb_dataset_csv_files \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 124\u001b[0m f \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m os\u001b[38;5;241m.\u001b[39mlistdir(perturb_dataset_path) \u001b[38;5;28;01mif\u001b[39;00m f\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 125\u001b[0m ]\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../../data/init_analysis_data_20240409/binding/brent_nf_cc'" + ] + } + ], "source": [ "# we need to redefine a new instance with the same params unless we want it to pick up where it left off\n", "new_model = SimpleModel(\n", @@ -139,9 +408,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], "source": [ "# this will be used to save the model checkpoint that performs the best on the validation set\n", "best_model_checkpoint = ModelCheckpoint(\n", @@ -186,9 +466,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '/Users/ericjia/yeastdnnexplorer/docs/tutorials/example/path/not/real.ckpt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m path_to_checkpoint \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexample/path/not/real.ckpt\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# note that we need to use the same model class that was used to save the checkpoint\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mSimpleModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_from_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_to_checkpoint\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# we can load the model and continue training from where it left off\u001b[39;00m\n\u001b[1;32m 9\u001b[0m trainer\u001b[38;5;241m.\u001b[39mfit(model, data_module)\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/utilities/model_helpers.py:125\u001b[0m, in \u001b[0;36m_restricted_classmethod_impl.__get__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m instance \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scripting:\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe classmethod `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmethod\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` cannot be called on an instance.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Please call it on the class type and make sure the return value is used.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 124\u001b[0m )\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/core/module.py:1581\u001b[0m, in \u001b[0;36mLightningModule.load_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)\u001b[0m\n\u001b[1;32m 1492\u001b[0m \u001b[38;5;129m@_restricted_classmethod\u001b[39m\n\u001b[1;32m 1493\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_from_checkpoint\u001b[39m(\n\u001b[1;32m 1494\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 1500\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Self:\n\u001b[1;32m 1501\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint it stores the arguments\u001b[39;00m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;124;03m passed to ``__init__`` in the checkpoint under ``\"hyper_parameters\"``.\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1579\u001b[0m \n\u001b[1;32m 1580\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1581\u001b[0m loaded \u001b[38;5;241m=\u001b[39m \u001b[43m_load_from_checkpoint\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1582\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[arg-type]\u001b[39;49;00m\n\u001b[1;32m 1583\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheckpoint_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1584\u001b[0m \u001b[43m \u001b[49m\u001b[43mmap_location\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1585\u001b[0m \u001b[43m \u001b[49m\u001b[43mhparams_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1586\u001b[0m \u001b[43m \u001b[49m\u001b[43mstrict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1587\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1588\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1589\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(Self, loaded)\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:63\u001b[0m, in \u001b[0;36m_load_from_checkpoint\u001b[0;34m(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m map_location \u001b[38;5;241m=\u001b[39m map_location \u001b[38;5;129;01mor\u001b[39;00m _default_map_location\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m pl_legacy_patch():\n\u001b[0;32m---> 63\u001b[0m checkpoint \u001b[38;5;241m=\u001b[39m \u001b[43mpl_load\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheckpoint_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_location\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmap_location\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;66;03m# convert legacy checkpoints to the new format\u001b[39;00m\n\u001b[1;32m 66\u001b[0m checkpoint \u001b[38;5;241m=\u001b[39m _pl_migrate_checkpoint(\n\u001b[1;32m 67\u001b[0m checkpoint, checkpoint_path\u001b[38;5;241m=\u001b[39m(checkpoint_path \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(checkpoint_path, (\u001b[38;5;28mstr\u001b[39m, Path)) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 68\u001b[0m )\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/lightning_fabric/utilities/cloud_io.py:56\u001b[0m, in \u001b[0;36m_load\u001b[0;34m(path_or_url, map_location)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mhub\u001b[38;5;241m.\u001b[39mload_state_dict_from_url(\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28mstr\u001b[39m(path_or_url),\n\u001b[1;32m 53\u001b[0m map_location\u001b[38;5;241m=\u001b[39mmap_location, \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 54\u001b[0m )\n\u001b[1;32m 55\u001b[0m fs \u001b[38;5;241m=\u001b[39m get_filesystem(path_or_url)\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_url\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mload(f, map_location\u001b[38;5;241m=\u001b[39mmap_location)\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/fsspec/spec.py:1298\u001b[0m, in \u001b[0;36mAbstractFileSystem.open\u001b[0;34m(self, path, mode, block_size, cache_options, compression, **kwargs)\u001b[0m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1297\u001b[0m ac \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mautocommit\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_intrans)\n\u001b[0;32m-> 1298\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_open\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1299\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1300\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1301\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1302\u001b[0m \u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mac\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1303\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1304\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1305\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1306\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compression \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1307\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompression\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m compr\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/fsspec/implementations/local.py:191\u001b[0m, in \u001b[0;36mLocalFileSystem._open\u001b[0;34m(self, path, mode, block_size, **kwargs)\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mauto_mkdir \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parent(path), exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m--> 191\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mLocalFileOpener\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/fsspec/implementations/local.py:355\u001b[0m, in \u001b[0;36mLocalFileOpener.__init__\u001b[0;34m(self, path, mode, autocommit, fs, compression, **kwargs)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompression \u001b[38;5;241m=\u001b[39m get_compression(path, compression)\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocksize \u001b[38;5;241m=\u001b[39m io\u001b[38;5;241m.\u001b[39mDEFAULT_BUFFER_SIZE\n\u001b[0;32m--> 355\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/yeastdnnexplorer-iu4_cpc2-py3.11/lib/python3.11/site-packages/fsspec/implementations/local.py:360\u001b[0m, in \u001b[0;36mLocalFileOpener._open\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mf \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mf\u001b[38;5;241m.\u001b[39mclosed:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mautocommit \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmode:\n\u001b[0;32m--> 360\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mf \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmode)\n\u001b[1;32m 361\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompression:\n\u001b[1;32m 362\u001b[0m compress \u001b[38;5;241m=\u001b[39m compr[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompression]\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/Users/ericjia/yeastdnnexplorer/docs/tutorials/example/path/not/real.ckpt'" + ] + } + ], "source": [ "# Load a model from a checkpoint\n", "# We can load a model from a checkpoint like so:\n", @@ -206,11 +506,18 @@ "# we could also load the model and make predictions\n", "predictions = model(data_module.test_dataloader())" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -224,9 +531,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.1" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/docs/tutorials/testing_model_metrics.ipynb b/docs/tutorials/testing_model_metrics.ipynb index 493715e..8a63f5e 100644 --- a/docs/tutorials/testing_model_metrics.ipynb +++ b/docs/tutorials/testing_model_metrics.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -84,18 +84,18 @@ " using_random_seed: bool,\n", " accelerator: str,\n", " num_genes: int,\n", - " signal_mean: float,\n", + " bound_mean: float,\n", " val_size: float,\n", " test_size: float,\n", - " signal: list[float],\n", + " bound: list[float],\n", " n_sample: list[int],\n", " max_mean_adjustment: float,\n", ") -> LightningModule:\n", " data_module = SyntheticDataLoader(\n", " batch_size=batch_size,\n", " num_genes=num_genes,\n", - " signal_mean=signal_mean,\n", - " signal=signal, # old: [0.1, 0.15, 0.2, 0.25, 0.3],\n", + " bound_mean=bound_mean,\n", + " bound=bound, # old: [0.1, 0.15, 0.2, 0.25, 0.3],\n", " n_sample=n_sample, # sum of this is num of tfs\n", " val_size=val_size,\n", " test_size=test_size,\n", @@ -136,13 +136,1169 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + " | Name | Type | Params\n", + "----------------------------------------------\n", + "0 | mae | MeanAbsoluteError | 0 \n", + "1 | SMSE | SMSE | 0 \n", + "2 | linear1 | Linear | 110 \n", + "----------------------------------------------\n", + "110 Trainable params\n", + "0 Non-trainable params\n", + "110 Total params\n", + "0.000 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "81f0cc52e9c6419ea965fd0eed66b4e1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=10` reached.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6a35ccabcd0e48c28b0fd3725ee0f3a2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Testing: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " Test metric DataLoader 0\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " test_mae 0.5135628581047058\n", + " test_mse 0.416797935962677\n", + " test_smse 10.241324424743652\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + " | Name | Type | Params\n", + "----------------------------------------------\n", + "0 | mae | MeanAbsoluteError | 0 \n", + "1 | SMSE | SMSE | 0 \n", + "2 | linear1 | Linear | 110 \n", + "----------------------------------------------\n", + "110 Trainable params\n", + "0 Non-trainable params\n", + "110 Total params\n", + "0.000 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Printing test results...\n", + "[{'test_mse': 0.416797935962677, 'test_mae': 0.5135628581047058, 'test_smse': 10.241324424743652}]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c5eeb9a7b5944d3c85683e0e0b8a31ac", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=10` reached.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ea3a981f9c0247aba2551941ebd1127c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Testing: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " Test metric DataLoader 0\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " test_mae 0.5821905136108398\n", + " test_mse 0.5283595323562622\n", + " test_smse 10.348736763000488\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + " | Name | Type | Params\n", + "----------------------------------------------\n", + "0 | mae | MeanAbsoluteError | 0 \n", + "1 | SMSE | SMSE | 0 \n", + "2 | linear1 | Linear | 110 \n", + "----------------------------------------------\n", + "110 Trainable params\n", + "0 Non-trainable params\n", + "110 Total params\n", + "0.000 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Printing test results...\n", + "[{'test_mse': 0.5283595323562622, 'test_mae': 0.5821905136108398, 'test_smse': 10.348736763000488}]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a83e317d249c478fa8a8903ed6ffbd52", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=10` reached.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b14fcb56940f4300a3b9357a4a075ae4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Testing: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " Test metric DataLoader 0\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " test_mae 0.8307084441184998\n", + " test_mse 1.050934910774231\n", + " test_smse 10.213595390319824\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Printing test results...\n", + "[{'test_mse': 1.050934910774231, 'test_mae': 0.8307084441184998, 'test_smse': 10.213595390319824}]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params\n", + "----------------------------------------------\n", + "0 | mae | MeanAbsoluteError | 0 \n", + "1 | SMSE | SMSE | 0 \n", + "2 | linear1 | Linear | 110 \n", + "----------------------------------------------\n", + "110 Trainable params\n", + "0 Non-trainable params\n", + "110 Total params\n", + "0.000 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b6177aafea8a40efa7bd3e354a7fdd48", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=10` reached.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ad8ed0e588954f07b698c88b7dde3b7c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Testing: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " Test metric DataLoader 0\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " test_mae 1.1858488321304321\n", + " test_mse 2.014770984649658\n", + " test_smse 10.195466995239258\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + " | Name | Type | Params\n", + "----------------------------------------------\n", + "0 | mae | MeanAbsoluteError | 0 \n", + "1 | SMSE | SMSE | 0 \n", + "2 | linear1 | Linear | 110 \n", + "----------------------------------------------\n", + "110 Trainable params\n", + "0 Non-trainable params\n", + "110 Total params\n", + "0.000 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Printing test results...\n", + "[{'test_mse': 2.014770984649658, 'test_mae': 1.1858488321304321, 'test_smse': 10.195466995239258}]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a467879ee86d4a5b8d15490b21ffd6ab", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=10` reached.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c527988542004759a3cb282abda532a9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Testing: | …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " Test metric DataLoader 0\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " test_mae 2.091959238052368\n", + " test_mse 6.157958984375\n", + " test_smse 11.987293243408203\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + "Printing test results...\n", + "[{'test_mse': 6.157958984375, 'test_mae': 2.091959238052368, 'test_smse': 11.987293243408203}]\n" + ] + } + ], "source": [ - "signal_means = [0.5, 1.0, 2.0, 3.0, 5.0]\n", + "bound_means = [0.5, 1.0, 2.0, 3.0, 5.0]\n", "test_mses = []\n", - "for signal_mean in signal_means:\n", + "for bound_mean in bound_means:\n", " model, test_results = train_simple_model_with_params(\n", " batch_size=32,\n", " lr=0.01,\n", @@ -152,9 +1308,9 @@ " num_genes=1000,\n", " val_size=0.1,\n", " test_size=0.1,\n", - " signal=[0.5] * 5,\n", + " bound=[0.5] * 5,\n", " n_sample=[1, 1, 2, 2, 4], # sum of this is num of tfs\n", - " signal_mean=signal_mean,\n", + " bound_mean=bound_mean,\n", " max_mean_adjustment=0.0\n", " )\n", " test_mses.append(test_results[0][\"test_mse\"])" @@ -169,12 +1325,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -184,12 +1340,12 @@ } ], "source": [ - "plt.plot(signal_means, test_mses, marker=\"o\")\n", - "plt.xlabel(\"Signal Mean\")\n", - "plt.xticks(signal_means, rotation=45)\n", + "plt.plot(bound_means, test_mses, marker=\"o\")\n", + "plt.xlabel(\"bound Mean\")\n", + "plt.xticks(bound_means, rotation=45)\n", "plt.yticks(test_mses)\n", "plt.ylabel(\"Test MSE\")\n", - "plt.title(\"Test MSE as a function of Signal Mean\")\n", + "plt.title(\"Test MSE as a function of bound Mean\")\n", "plt.show()" ] }, @@ -199,7 +1355,7 @@ "source": [ "### Experiment 2\n", "\n", - "We can run a similar experiment where we test the effect of the bound / unbound ratio (aka signal / noise ratio) on the model's MSE" + "We can run a similar experiment where we test the effect of the bound / unbound ratio (aka bound / unbound ratio) on the model's MSE" ] }, { @@ -208,10 +1364,10 @@ "metadata": {}, "outputs": [], "source": [ - "signal_noise_ratios = [0.05, 0.1, 0.25, 0.5, 0.75, 0.9]\n", + "bound_unbound_ratios = [0.05, 0.1, 0.25, 0.5, 0.75, 0.9]\n", "test_mses = []\n", "\n", - "for signal_noise_ratio in signal_noise_ratios:\n", + "for bound_unbound_ratio in bound_unbound_ratios:\n", " model, test_results = train_simple_model_with_params(\n", " batch_size=32,\n", " lr=0.01,\n", @@ -221,9 +1377,9 @@ " num_genes=1000,\n", " val_size=0.1,\n", " test_size=0.1,\n", - " signal=[signal_noise_ratio] * 5,\n", + " bound=[bound_unbound_ratio] * 5,\n", " n_sample=[1, 1, 2, 2, 4],\n", - " signal_mean=3.0,\n", + " bound_mean=3.0,\n", " max_mean_adjustment=0.0\n", " )\n", " print(test_results)\n", @@ -247,12 +1403,12 @@ } ], "source": [ - "plt.plot(signal_noise_ratios, test_mses, marker=\"o\")\n", - "plt.xlabel(\"Percentage of Data in Signal Group\")\n", + "plt.plot(bound_unbound_ratios, test_mses, marker=\"o\")\n", + "plt.xlabel(\"Percentage of Data in bound Group\")\n", "plt.ylabel(\"Test MSE\")\n", - "plt.xticks(signal_noise_ratios, rotation=45)\n", + "plt.xticks(bound_unbound_ratios, rotation=45)\n", "plt.yticks(test_mses)\n", - "plt.title(\"Test MSE as a function of signal/noise ratio (signal mean = 3.0)\")\n", + "plt.title(\"Test MSE as a function of bound/unbound ratio (bound mean = 3.0)\")\n", "plt.show()" ] }, @@ -277,31 +1433,31 @@ "num_genes = 3000\n", "val_size = 0.1\n", "test_size = 0.1\n", - "signal = [0.5] * 5\n", + "bound = [0.5] * 5\n", "n_sample = [1, 1, 2, 2, 4]\n", "random_state = 42\n", "\n", "# the first data loader will load a dataset with a small scale and a small bound mean\n", "small_scale_and_mean_dataloader = SyntheticDataLoader(\n", " num_genes=num_genes,\n", - " signal=signal, \n", + " bound=bound, \n", " n_sample=n_sample,\n", " val_size=val_size,\n", " test_size=test_size,\n", " random_state=random_state,\n", - " signal_mean=1.0,\n", + " bound_mean=1.0,\n", " max_mean_adjustment=1.0\n", ")\n", "\n", "# the second data loader will generate a dataset with a large scale and a large bound mean\n", "large_scale_and_mean_dataloader = SyntheticDataLoader(\n", " num_genes=num_genes,\n", - " signal=signal, \n", + " bound=bound, \n", " n_sample=n_sample,\n", " val_size=val_size,\n", " test_size=test_size,\n", " random_state=random_state,\n", - " signal_mean=10.0,\n", + " bound_mean=10.0,\n", " max_mean_adjustment=10.0\n", ")\n", "\n", @@ -331,7 +1487,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -345,9 +1501,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.1" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/docs/tutorials/visualizing_and_testing_data_generation_methods.ipynb b/docs/tutorials/visualizing_and_testing_data_generation_methods.ipynb index 6167edc..b54cfb7 100644 --- a/docs/tutorials/visualizing_and_testing_data_generation_methods.ipynb +++ b/docs/tutorials/visualizing_and_testing_data_generation_methods.ipynb @@ -4,6 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "# **Visualizing and Testing Data Generation Methods**\n", "In this notebook, we will run an experiment to display the average perturbation effect values that we generate with the 4 different methods we have for perturbation effect generation (other than the method for generating the perturbation effect values, we will be holding everything else the same). \n", "\n", "Recall that we have 4 methods for generating perturbation effect data (see `generate_in_silico_data.ipynb` for more information on these):\n", @@ -19,13 +20,33 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/plain": [ + "42" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# imports\n", - "from yeastdnnexplorer.probability_models.generate_data import (generate_gene_population, \n", - " generate_binding_effects,\n", - " generate_pvalues,\n", - " generate_perturbation_effects)\n", + "from yeastdnnexplorer.probability_models.generate_data import (\n", + " generate_gene_population, \n", + " generate_binding_effects, \n", + " generate_pvalues, \n", + " generate_perturbation_effects\n", + ")\n", "\n", "import torch\n", "import matplotlib.pyplot as plt\n", @@ -50,6 +71,13 @@ "torch.cuda.manual_seed_all(42) # For all CUDA devices" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **Generating the Data**" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -65,14 +93,14 @@ "source": [ "n_genes = 3000\n", "\n", - "signal = [0.5, 0.5, 0.5, 0.5, 0.5]\n", + "bound = [0.5, 0.5, 0.5, 0.5, 0.5]\n", "n_sample = [1, 1, 2, 2, 4]\n", "\n", "# this will be a list of length 10 with a GenePopulation object in each element\n", "gene_populations_list = []\n", - "for signal_proportion, n_draws in zip(signal, n_sample):\n", + "for bound_proportion, n_draws in zip(bound, n_sample):\n", " for _ in range(n_draws):\n", - " gene_populations_list.append(generate_gene_population(n_genes, signal_proportion))\n", + " gene_populations_list.append(generate_gene_population(n_genes, bound_proportion))\n", " \n", "# Generate binding data for each gene population\n", "binding_effect_list = [generate_binding_effects(gene_population)\n", @@ -100,11 +128,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "tf_relationships = {\n", + "# TF relationships\n", + "tf_relationships_dict = {\n", " 0: [1],\n", " 1: [8],\n", " 2: [5, 6],\n", @@ -117,6 +146,8 @@ " 9: [4],\n", "}\n", "\n", + "# TF relationships that incorporate boolean logic; this is more complex than\n", + "# the simple relationships above as it implements \"and\" and \"or\" operations\n", "tf_relationships_dict_boolean_logic = {\n", " 0: [And(3, 4, 8), Or(3, 7), Or(1, 1)],\n", " 1: [And(5, Or(7, 8))],\n", @@ -130,7 +161,19 @@ " 9: [And(6, And(3, Or(0, 9)))],\n", "}\n", "\n", - "def experiment(n_iterations = 10, GENE_IDX = 0):\n", + "def experiment(n_iterations: int = 10, GENE_IDX: int = 0) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:\n", + " \"\"\"\n", + " Conducts an experiment by generating perturbation effects for a specific gene over multiple iterations\n", + " using different methods and averaging the results.\n", + " \n", + " :param n_iterations: Number of iterations to perform.\n", + " :type n_iterations: int\n", + " :param GENE_IDX: Index of the gene to analyze.\n", + " :type GENE_IDX: int\n", + "\n", + " :returns: A tuple containing averaged perturbation effects scores for each method.\n", + " :rtype: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]\n", + " \"\"\"\n", " print(\"Bound (1) and Unbound (0) Labels for gene \" + str(GENE_IDX) + \":\")\n", " print(binding_data_tensor[GENE_IDX, :, 0])\n", "\n", @@ -188,16 +231,9 @@ " return no_mean_adjustment_scores, normal_mean_adjustment_scores, dep_mean_adjustment_scores, boolean_logic_scores" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can run the experiment for n_iterations, I find that you should iterate at least 30 times, but closer to 100 is most ideal. This could take 1-5 minutes depending on your computer." - ] - }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -228,26 +264,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We now plot our results." + "## **Visualizing the Results**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we plot our results." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Bound (signal) TFs for gene 0 are: [3, 4, 5, 6, 7, 9]\n", - "Unbound (noise) TFs for gene 0 are: [0, 1, 2, 8]\n", + "Bound (bound) TFs for gene 0 are: [3, 4, 5, 6, 7, 9]\n", + "Unbound (unbound) TFs for gene 0 are: [0, 1, 2, 8]\n", "tensor([0., 0., 0., 1., 1., 1., 1., 1., 0., 1.])\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -258,8 +301,8 @@ ], "source": [ "x_vals = list(range(sum(n_sample)))\n", - "print(\"Bound (signal) TFs for gene \" + str(GENE_IDX) + \" are: \" + str(binding_data_tensor[GENE_IDX, :, 0].nonzero().flatten().tolist()))\n", - "print(\"Unbound (noise) TFs for gene \" + str(GENE_IDX) + \" are: \" + str((1 - binding_data_tensor[GENE_IDX, :, 0]).nonzero().flatten().tolist()))\n", + "print(\"Bound (bound) TFs for gene \" + str(GENE_IDX) + \" are: \" + str(binding_data_tensor[GENE_IDX, :, 0].nonzero().flatten().tolist()))\n", + "print(\"Unbound (unbound) TFs for gene \" + str(GENE_IDX) + \" are: \" + str((1 - binding_data_tensor[GENE_IDX, :, 0]).nonzero().flatten().tolist()))\n", "print(binding_data_tensor[GENE_IDX, :, 0])\n", "plt.figure(figsize=(10, 6))\n", "\n", @@ -271,6 +314,11 @@ "plt.title('Pertubation Effects for Gene ' + str(GENE_IDX) + ' with Different Adjustment Functions (averaged across 100 trials)')\n", "plt.xlabel('TF Index')\n", "plt.ylabel('Perturbation Effect Val')\n", + "\n", + "#added to compare this to previous graph, REMOVE LATER\n", + "plt.ylim(0,9)\n", + "\n", + "\n", "plt.xticks(x_vals)\n", "plt.grid(True)\n", "plt.legend(['No Mean Adjustment', 'Normal (non-dependent) Mean Adjust', 'Dependent Mean Adjustment', 'Boolean Logic Adjustment'])\n", @@ -281,6 +329,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "The x-axis labels represent the corresponding TFs whose perturbation effect values are being plotted on the y-axis. The color of each plotted point indicates which of the four data generation methods it was derived from. For example, based on the legend included in the graph, a red point was generated using no mean adjustment. This graph allows us to visualize the perturbation effects for the same TF under a variety of conditions.\n", + "\n", "Recall that for the dependent mean adjustment, the TF in question must be bound and all of the TFs in its dependency array (in the tf_relationships dictionary) must be bound as well. This is why we do not adjust the mean for TF 7 despite it being bound, it depends on TF 1 and TF 4 both being bound, and TF1 is not bound.\n", "\n", "Similarly, for the boolean logic adjustment, we do not adjust the mean for 6 despite it being bound because it depends on (TF0 && (TF1 || TF2)) being bound, and none of those 3 TFs are bound to the gene we are studying.\n", @@ -292,55 +342,39 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Training models on data generated from the 4 different methods\n", - "In the next experiment, we will be training the exact same model on data generated from each of these 4 methods. We will also train a simple linear model on all four methods to use as a baseline to compare to. Other than the method used to generate the data, everything else will be held the same." + "## **Training models on data generated from the 4 different methods**\n", + "In the next experiment, we will be training the exact same model on data generated from each of these 4 methods. We will also train a simple linear model on all four methods to use as a baseline to compare to. Other than the method used to generate the data, everything else will be held the same. We define a few helper functions to run our experiment. We make helper functions for things that will mostly be the same across each training loop so that we don't have to keep redefining them." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# define checkpoints and loggers\n", "best_model_checkpoint = ModelCheckpoint(\n", - " monitor=\"val_mse\",\n", + " monitor=\"val_explained_variance\",\n", " mode=\"min\",\n", " filename=\"best-model-{epoch:02d}-{val_loss:.2f}\",\n", " save_top_k=1,\n", ")\n", "\n", - "# Callback to save checkpoints every 5 epochs, regardless of performance\n", - "periodic_checkpoint = ModelCheckpoint(\n", - " filename=\"periodic-{epoch:02d}\",\n", - " every_n_epochs=2,\n", - " save_top_k=-1, # Setting -1 saves all checkpoints\n", - ")\n", + " :param max_mean_adjustment: Maximum mean adjustment value.\n", + " :type max_mean_adjustment: float\n", + " :param adjustment_function: Function to adjust perturbation effects.\n", + " :type adjustment_function: callable\n", + " :param tf_relationships_dict: Dictionary of transcription factor relationships.\n", + " :type tf_relationships_dict: Dict[str, Union[List[int], float]]\n", "\n", - "# define loggers for the model\n", - "tb_logger = TensorBoardLogger(\"logs/tensorboard_logs\")\n", - "csv_logger = CSVLogger(\"logs/csv_logs\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We define a few helper functions to run our experiment. We make helper functions for things that will mostly be the same across each training loop so that we don't have to keep redefining them." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def get_data_module(max_mean_adjustment, adjustment_function = default_perturbation_effect_adjustment_function, tf_relationships_dict = {}):\n", + " :returns: Configured data loader for synthetic data.\n", + " :rtype: SyntheticDataLoader\n", + " \"\"\"\n", " return SyntheticDataLoader(\n", " batch_size=32,\n", " num_genes=4000,\n", - " signal_mean=3.0,\n", - " signal=[0.5] * 5,\n", + " bound_mean=3.0,\n", + " bound=[0.5] * 5,\n", " n_sample=[1, 1, 2, 2, 4], # sum of this is num of tfs\n", " val_size=0.1,\n", " test_size=0.1,\n", @@ -350,7 +384,16 @@ " tf_relationships=tf_relationships_dict,\n", " )\n", "\n", - "def get_model(num_tfs):\n", + "def get_model(num_tfs: int) -> CustomizableModel:\n", + " \"\"\"\n", + " Creates a customizable model.\n", + "\n", + " :param num_tfs: Number of transcription factors.\n", + " :type num_tfs: int\n", + "\n", + " :returns: Configured model.\n", + " :rtype: CustomizableModel\n", + " \"\"\"\n", " return CustomizableModel(\n", " input_dim=num_tfs,\n", " output_dim=num_tfs,\n", @@ -363,226 +406,387 @@ " dropout_rate=0.0,\n", " )\n", "\n", - "def get_linear_model(num_tfs):\n", + "def get_linear_model(num_tfs: int) -> SimpleModel:\n", + " \"\"\"\n", + " Creates a simple linear model.\n", + "\n", + " :param num_tfs: Number of transcription factors.\n", + " :type num_tfs: int\n", + "\n", + " :returns: Configured linear model.\n", + " :rtype: SimpleModel\n", + " \"\"\"\n", " return SimpleModel(\n", " input_dim=num_tfs,\n", " output_dim=num_tfs,\n", " lr=0.01\n", " )\n", "\n", - "def get_trainer():\n", - " # uncomment callbacks or logggers if you would like checkpoints / logs\n", + "def get_trainer() -> Trainer:\n", + " \"\"\"\n", + " Creates a trainer for model training.\n", + "\n", + " :returns: Configured trainer.\n", + " :rtype: Trainer\n", + " \"\"\"\n", " return Trainer(\n", " max_epochs=10,\n", " deterministic=True,\n", " accelerator=\"cpu\",\n", - " # callbacks=[best_model_checkpoint, periodic_checkpoint],\n", - " # logger=[tb_logger, csv_logger],\n", - " )" + " # The following are turned false to reduce the output in the training cells below. You can toggle them to true to see\n", + " # a model summary and training progress if desired \n", + " logger=False, \n", + " enable_progress_bar=False, \n", + " enable_model_summary=False, \n", + " enable_checkpointing=False \n", + " )\n", + "\n", + "def calculate_explained_variance( \n", + " model: torch.nn.Module, data_module: DataLoader\n", + ") -> float:\n", + " \"\"\"\n", + " Calculates the explained variance of a model's predictions on a test dataset.\n", + "\n", + " :param test_results: List of test results containing the expected outcomes.\n", + " :type test_results: List[Union[float, int]]\n", + " :param data_module: Data loader for the test dataset.\n", + " :type data_module: DataLoader\n", + " :param model: The model to evaluate.\n", + " :type model: torch.nn.Module\n", + "\n", + " :returns: The explained variance of the model's predictions.\n", + " :rtype: float\n", + " \"\"\"\n", + " predictions = []\n", + " targets = []\n", + "\n", + " model.eval() # Set the model to evaluation mode\n", + " \n", + " with torch.no_grad(): # Disable gradient calculation\n", + " for batch in data_module.test_dataloader():\n", + " x, y = batch\n", + " outputs = model(x).cpu().numpy()\n", + " predictions.extend(outputs)\n", + " targets.extend(y.cpu().numpy())\n", + " \n", + " # Use scikit-learn to calculate explained variance\n", + " if len(targets) > 0:\n", + " explained_variance = explained_variance_score(targets, predictions)\n", + " return explained_variance\n", + " else:\n", + " return None\n", + "\n", + "# These lists will store the test results for different models and data generation methods\n", + "model_ves = []\n", + "linear_model_test_ves = []" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ - "# These lists will store the test results for different models and data generation methods\n", - "model_mses = []\n", - "linear_model_test_mses = []" + "import torch\n", + "from sklearn.metrics import explained_variance_score\n", + "\n", + "data_module = get_data_module(0.0)\n", + "num_tfs = sum(data_module.n_sample)\n", + "model_ves = [] # List to store explained variance for the non-linear model\n", + "linear_model_test_ves = [] # List to store explained variance for the linear model\n", + "\n", + "def calculate_explained_variance(test_results, data_module, model):\n", + " predictions = []\n", + " targets = []\n", + "\n", + " model.eval() # Set the model to evaluation mode\n", + "\n", + " with torch.no_grad(): # Disable gradient calculation\n", + " for batch in data_module.test_dataloader():\n", + " # Assuming your data is in the format (x, y)\n", + " x, y = batch\n", + " outputs = model(x)\n", + " predictions.append(outputs)\n", + " targets.append(y)\n", + " mse = torch.nn.functional.mse_loss(torch.tensor(predictions), torch.tensor(targets)).item()\n", + " var_y = torch.var(torch.tensor(targets)).item() \n", + " explained_variance = 1 - (mse / var_y)\n", + " return explained_variance \n", + "\n", + "# # Function to calculate explained variance from test results\n", + "# def calculate_explained_variance(test_results, data_module, model):\n", + "# \"\"\"\n", + "# Calculates the explained variance score using PyTorch and scikit-learn.\n", + "\n", + "# Args:\n", + "# test_results: The results dictionary from the trainer.test() function.\n", + "# data_module: The data module containing the test dataloader.\n", + "# model: The trained PyTorch model.\n", + "\n", + "# Returns:\n", + "# float: The explained variance score.\n", + "# \"\"\"\n", + "# predictions = []\n", + "# targets = []\n", + "\n", + "# model.eval() # Set the model to evaluation mode\n", + "\n", + "# with torch.no_grad(): # Disable gradient calculation\n", + "# for batch in data_module.test_dataloader():\n", + "# # Assuming your data is in the format (x, y)\n", + "# x, y = batch\n", + "# outputs = model(x)\n", + "# predictions.append(outputs)\n", + "# targets.append(y)\n", + "\n", + "# predictions = torch.cat(predictions, dim=0).numpy() # Concatenate predictions\n", + "# targets = torch.cat(targets, dim=0).numpy() # Concatenate targets\n", + "\n", + "# return explained_variance_score(targets, predictions)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Train models on data generated with no mean adjustment" + "### **1) Train models on data generated with no mean adjustment**\n", + "We will first compare the models performances on data generated without any mean adjustments. This is the most simple dataset we will generate, and serves as a good starting point for the models." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ + "import warnings\n", + "import logging\n", + "\n", + "# Suppress specific warnings\n", + "warnings.filterwarnings(\"ignore\", category=UserWarning, message=\".*torch.tensor.*\")\n", + "warnings.filterwarnings(\"ignore\", category=UserWarning, message=\".*DataLoader.*\")\n", + "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nonlinear Model Explained Variance: 0.24550879001617432\n", + "Linear Model Explained Variance: -0.00506981611251831\n" + ] + } + ], + "source": [ + "# Initialize data module\n", "data_module = get_data_module(0.0)\n", "num_tfs = sum(data_module.n_sample)\n", "\n", - "# nonlinear model\n", + "# --- Nonlinear Model ---\n", "model = get_model(num_tfs)\n", "trainer = get_trainer()\n", "trainer.fit(model, data_module)\n", - "test_results = trainer.test(model, datamodule=data_module)\n", - "print(\"Printing test results...\")\n", - "print(test_results)\n", - "model_mses.append(test_results[0][\"test_mse\"])\n", + "explained_variance = calculate_explained_variance(model, data_module)\n", + "model_ves.append(explained_variance)\n", + "print(\"Nonlinear Model Explained Variance:\", explained_variance)\n", "\n", - "# linear model\n", + "# --- Linear Model ---\n", "linear_model = get_linear_model(num_tfs)\n", "trainer = get_trainer()\n", "trainer.fit(linear_model, data_module)\n", - "test_results = trainer.test(linear_model, datamodule=data_module)\n", - "print(\"Printing linear model test results\")\n", - "print(test_results)\n", - "linear_model_test_mses.append(test_results[0][\"test_mse\"])" + "explained_variance_linear = calculate_explained_variance(linear_model, data_module)\n", + "linear_model_test_ves.append(explained_variance_linear)\n", + "\n", + "print(\"Linear Model Explained Variance:\", explained_variance_linear)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Train models on data generated with normal mean adjustments" + "The explained variance for the linear model is surprisingly sightly negative in contrast to the nonlinear, customizable model which yielded a significantly larger positive explained variance. This suggests that the customizable model is able to better account for the distribution of the generated data with no mean adjustments, yielding a significantly higher explained variance. It is interesting to consider whether the same relationship will be observed in the next few conditions as the data generation methods becoome increasingly more complex. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **2) Train models on data generated with normal mean adjustments**\n", + "Now, let us perform the same comparison but using this condition, with a normal mean adjustment of 3." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nonlinear Model Explained Variance (Method 2): 0.2549255728721619\n", + "Linear Model Explained Variance (Method 2): 0.07210595607757568\n" + ] + } + ], "source": [ "data_module = get_data_module(3.0)\n", "num_tfs = sum(data_module.n_sample)\n", "\n", - "# nonlinear model\n", + "# Nonlinear model\n", "model = get_model(num_tfs)\n", "trainer = get_trainer()\n", "trainer.fit(model, data_module)\n", - "test_results = trainer.test(model, datamodule=data_module)\n", - "print(\"Printing test results...\")\n", - "print(test_results)\n", - "model_mses.append(test_results[0][\"test_mse\"])\n", + "explained_variance = calculate_explained_variance(model, data_module)\n", + "model_ves.append(explained_variance)\n", + "print(\"Nonlinear Model Explained Variance (Method 2):\", explained_variance)\n", "\n", - "# linear model\n", + "# Linear model\n", "linear_model = get_linear_model(num_tfs)\n", "trainer = get_trainer()\n", "trainer.fit(linear_model, data_module)\n", - "test_results = trainer.test(linear_model, datamodule=data_module)\n", - "print(\"Printing linear model test results\")\n", - "print(test_results)\n", - "linear_model_test_mses.append(test_results[0][\"test_mse\"])" + "explained_variance_linear = calculate_explained_variance(linear_model, data_module)\n", + "linear_model_test_ves.append(explained_variance_linear)\n", + "print(\"Linear Model Explained Variance (Method 2):\", explained_variance_linear)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once again, a similar explained variance metric was obtained using both models. However, this time, the simple linear model achieved a positive value, meaning that it was able to somewhat account for the distribution of the data. However, given the difference in explained variance scores, this once again suggests that the nonlinear, customizable model performs substantially better than the simple linear model based on the generated data with a mean adjustment of 3. It seems that the additional parameters in the nonlinear neural network can better accomodate the complexity of the data relatively better than the simple linear model." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Train model on data generated with dependent mean adjustments (method 3)" + "### **3) Train models on data generated with dependent mean adjustments**\n", + "Now we are implementing a dataset that contains dependent mean adjustments as shown below, with a mean adjustment of 3 if the TF meets the criteria defined by the dictionary." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nonlinear Model Explained Variance (Method 3): 0.18576881289482117\n", + "Linear Model Explained Variance (Method 3): 0.00479055643081665\n" + ] + } + ], "source": [ - "# define dictionary of relations between TFs (see generate_in_silico_data.ipynb for an explanation of how this dict is defined / used)\n", - "tf_relationships_dict = {\n", - " 0: [1],\n", - " 1: [8],\n", - " 2: [5, 6],\n", - " 3: [4],\n", - " 4: [5],\n", - " 5: [9],\n", - " 6: [4],\n", - " 7: [1, 4],\n", - " 8: [6],\n", - " 9: [4],\n", - "}\n", - "\n", - "data_module = get_data_module(\n", - " 3.0, \n", - " adjustment_function=perturbation_effect_adjustment_function_with_tf_relationships, \n", - " tf_relationships_dict=tf_relationships_dict\n", - ")\n", + "data_module = get_data_module(3.0, perturbation_effect_adjustment_function_with_tf_relationships, tf_relationships_dict)\n", "num_tfs = sum(data_module.n_sample)\n", "\n", - "print(\"Number of TFs: \", num_tfs)\n", - "\n", - "# nonlinear model\n", + "# Nonlinear model\n", "model = get_model(num_tfs)\n", "trainer = get_trainer()\n", "trainer.fit(model, data_module)\n", - "test_results = trainer.test(model, datamodule=data_module)\n", - "print(\"Printing test results...\")\n", - "print(test_results)\n", - "model_mses.append(test_results[0][\"test_mse\"])\n", + "explained_variance = calculate_explained_variance(model, data_module)\n", + "model_ves.append(explained_variance)\n", + "print(\"Nonlinear Model Explained Variance (Method 3):\", explained_variance)\n", "\n", - "# linear model\n", + "# Linear model\n", "linear_model = get_linear_model(num_tfs)\n", "trainer = get_trainer()\n", "trainer.fit(linear_model, data_module)\n", - "test_results = trainer.test(linear_model, datamodule=data_module)\n", - "print(\"Printing linear model test results\")\n", - "print(test_results)\n", - "linear_model_test_mses.append(test_results[0][\"test_mse\"])" + "explained_variance_linear = calculate_explained_variance(linear_model, data_module)\n", + "linear_model_test_ves.append(explained_variance_linear)\n", + "print(\"Linear Model Explained Variance (Method 3):\", explained_variance_linear)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Train models on data generated using the binary relations between TFs (method 4)" + "It appears once again that the customizable model obtains a more positive and larger explained variance compared to the simple linear model when implementing dependencies among TFs. It is possible that the added layer of complexity makes it more difficult for the simple linear model to make an accurate prediction. Lastly, it would be interesting to consider how the models will perform on data including more complex dependencies that involve binary relations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **4) Train models on data generated using the binary relations between TFs**\n", + "Similar to the previous condition, we are implementing dependencies between TFs. However, the following dictionary contains simple logic that makes these dependencies far more complex. For example, in order for transcription factor 4 to be perturbed based on the dictionary below, both TFs 1 and 2 need to be considered perturbed in order for this TF to be perturbed as well. Adding this additional layer of complexity will be an interesting challenge: let us see how the two models perform here." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nonlinear Model Explained Variance (Method 4): 0.1844494581222534\n", + "Linear Model Explained Variance (Method 4): 0.012156563997268676\n" + ] + } + ], "source": [ - "tf_relationships_dict_boolean_logic = {\n", - " 0: [And(3, 4, 8), Or(3, 7), Or(1, 1)],\n", - " 1: [And(5, Or(7, 8))],\n", - " 2: [],\n", - " 3: [Or(7, 9), And(6, 7)],\n", - " 4: [And(1, 2)],\n", - " 5: [Or(0, 1, 2, 8, 9)],\n", - " 6: [And(0, Or(1, 2))],\n", - " 7: [Or(2, And(5, 6, 9))],\n", - " 8: [],\n", - " 9: [And(6, And(3, Or(0, 9)))],\n", - "}\n", - "\n", "data_module = get_data_module(\n", " 3.0, \n", " adjustment_function=perturbation_effect_adjustment_function_with_tf_relationships_boolean_logic, \n", - " tf_relationships_dict=tf_relationships_dict_boolean_logic\n", - ")\n", + " tf_relationships_dict=tf_relationships_dict_boolean_logic)\n", + "num_tfs = sum(data_module.n_sample)\n", "\n", - "# nonlinear model\n", + "# Nonlinear model\n", "model = get_model(num_tfs)\n", "trainer = get_trainer()\n", "trainer.fit(model, data_module)\n", - "test_results = trainer.test(model, datamodule=data_module)\n", - "print(\"Printing test results...\")\n", - "print(test_results)\n", - "model_mses.append(test_results[0][\"test_mse\"])\n", + "explained_variance = calculate_explained_variance(model, data_module)\n", + "model_ves.append(explained_variance)\n", + "print(\"Nonlinear Model Explained Variance (Method 4):\", explained_variance)\n", "\n", - "# linear model\n", + "# Linear model\n", "linear_model = get_linear_model(num_tfs)\n", "trainer = get_trainer()\n", "trainer.fit(linear_model, data_module)\n", - "test_results = trainer.test(linear_model, datamodule=data_module)\n", - "print(\"Printing linear model test results\")\n", - "print(test_results)\n", - "linear_model_test_mses.append(test_results[0][\"test_mse\"])" + "explained_variance_linear = calculate_explained_variance(linear_model, data_module)\n", + "linear_model_test_ves.append(explained_variance_linear)\n", + "print(\"Linear Model Explained Variance (Method 4):\", explained_variance_linear)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once again, our customizable model outperforms the simple linear model in terms of obtaining a higher explained variance. Surprisingly, both models acheive explained variance scores that are somewhat similar to their scores previous when implementing dependencies among TFs. This may be of further interest and could use more research to better determine exactly why this is occurring based on the generated data." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now we can plot the results of our experiment. TODO add explantion for plot here? Probably not the right place to put it (I feel like that belongs in the presentation or something, because this notebook could be modified and the explanation wouldn't make sense)" + "## **Visualizing the Explained Variance**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can plot the results across each of the 4 conditions tested above to visualize how the simple linear model and the nonlinear, customizable model perform compared to one another with regard to their explained variance scores." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -594,22 +798,29 @@ "source": [ "data_gen_methods = [\"No Mean Adjustment\", \"Dependent Mean Adjustment\", \"TF Dependent Mean Adjustment\", \"TF Dependent Mean Adjust with Boolean Logic\"]\n", "plt.figure(figsize=(10, 6))\n", - "plt.scatter(data_gen_methods, model_mses, color='blue')\n", - "plt.scatter(data_gen_methods, linear_model_test_mses, color='orange')\n", - "plt.title('Model MSE Comparison (bound mean = 3.0)')\n", + "plt.scatter(data_gen_methods, model_ves, color='blue')\n", + "plt.scatter(data_gen_methods, linear_model_test_ves, color='orange')\n", + "plt.title('Model VE Comparison (bound mean = 3.0)')\n", "plt.xlabel('Model')\n", - "plt.ylabel('MSE')\n", + "plt.ylabel('Variance Explained')\n", "plt.grid(True)\n", "plt.xticks(rotation=45, ha=\"right\")\n", "plt.legend(['Complex (Customizable) Model', 'Linear Model'])\n", - "plt.tight_layout() # Adjust layout to make room for the rotated x-axis labels\n", + "plt.tight_layout()\n", "plt.show()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The x-axis labels the method in which the data was generated according to the 4 options above. The y-axis represents the corresponding variance explained attained by these models. Each point represents the variance explained achieved after generating the data based on the x-axis, and the color of the point represents which model architecture was trained on the data resulting in the specificed explained variance. Now, we can clearly see that across the 4 conditions, the nonlinear, customizable model acheives a significantly higher positive explained variance compared to the simple linear model, which is good because it helps to confirm that the nonlinear model we are using is able to train on the data and better account for the distribution of the data, resulting in a higher explained variance compared to the simple linear model. " + ] } ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -623,9 +834,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.1" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/experiments/simple_model_synthetic_data.py b/experiments/simple_model_synthetic_data.py index 021841d..f0eb4fd 100644 --- a/experiments/simple_model_synthetic_data.py +++ b/experiments/simple_model_synthetic_data.py @@ -61,7 +61,7 @@ def simple_model_synthetic_data_experiment( data_module = SyntheticDataLoader( batch_size=batch_size, num_genes=1000, - signal=[0.1, 0.15, 0.2, 0.25, 0.3], + bound=[0.1, 0.15, 0.2, 0.25, 0.3], n_sample=[1, 1, 2, 2, 4], val_size=0.1, test_size=0.1, diff --git a/yeastdnnexplorer/data_loaders/real_data_loader.py b/yeastdnnexplorer/data_loaders/real_data_loader.py index 4c914a6..0f51ab1 100644 --- a/yeastdnnexplorer/data_loaders/real_data_loader.py +++ b/yeastdnnexplorer/data_loaders/real_data_loader.py @@ -222,7 +222,7 @@ def prepare_data(self) -> None: perturbation_pvalues.values, dtype=torch.float64 ) - # note that we no longer have a signal / noise tensor + # note that we no longer have a bound / unbound tensor # (like for the synthetic data) self.final_data_tensor = torch.stack( [ diff --git a/yeastdnnexplorer/data_loaders/synthetic_data_loader.py b/yeastdnnexplorer/data_loaders/synthetic_data_loader.py index 8c53670..858cacd 100644 --- a/yeastdnnexplorer/data_loaders/synthetic_data_loader.py +++ b/yeastdnnexplorer/data_loaders/synthetic_data_loader.py @@ -27,8 +27,8 @@ def __init__( self, batch_size: int = 32, num_genes: int = 1000, - signal: list[float] = [0.1, 0.2, 0.2, 0.4, 0.5], - signal_mean: float = 3.0, + bound: list[float] = [0.1, 0.2, 0.2, 0.4, 0.5], + bound_mean: float = 3.0, n_sample: list[int] = [1, 2, 2, 4, 4], val_size: float = 0.1, test_size: float = 0.1, @@ -47,10 +47,10 @@ def __init__( :param num_genes: The number of genes in the synthetic data (this is the number of datapoints in our dataset) :type num_genes: int - :param signal: The proportion of genes in each sample group that are put in the - signal grop (i.e. have a non-zero binding effect and expression response) - :type signal: List[int] - :param n_sample: The number of samples to draw from each signal group + :param bound: The proportion of genes in each sample group that are put in the + bound grop (i.e. have a non-zero binding effect and expression response) + :type bound: List[int] + :param n_sample: The number of samples to draw from each bound group :type n_sample: List[int] :param val_size: The proportion of the dataset to include in the validation split @@ -60,23 +60,23 @@ def __init__( :param random_state: The random seed to use for splitting the data (keep this consistent to ensure reproduceability) :type random_state: int - :param signal_mean: The mean of the signal distribution - :type signal_mean: float + :param bound_mean: The mean of the bound distribution + :type bound_mean: float :param max_mean_adjustment: The maximum mean adjustment to apply to the mean - of the signal (bound) perturbation effects + of the bound (bound) perturbation effects :type max_mean_adjustment: float - :param adjustment_function: A function that adjusts the mean of the signal + :param adjustment_function: A function that adjusts the mean of the bound (bound) perturbation effects :type adjustment_function: Callable[[torch.Tensor, float, float, float, dict[int, list[int]]], torch.Tensor] :raises TypeError: If batch_size is not an positive integer :raises TypeError: If num_genes is not an positive integer - :raises TypeError: If signal is not a list of integers or floats + :raises TypeError: If bound is not a list of integers or floats :raises TypeError: If n_sample is not a list of integers :raises TypeError: If val_size is not a float between 0 and 1 (inclusive) :raises TypeError: If test_size is not a float between 0 and 1 (inclusive) :raises TypeError: If random_state is not an integer - :raises TypeError: If signal_mean is not a float + :raises TypeError: If bound_mean is not a float :raises ValueError: If val_size + test_size is greater than 1 (i.e. the splits are too large) @@ -85,10 +85,10 @@ def __init__( raise TypeError("batch_size must be a positive integer") if not isinstance(num_genes, int) or num_genes < 1: raise TypeError("num_genes must be a positive integer") - if not isinstance(signal, list) or not all( - isinstance(x, (int, float)) for x in signal + if not isinstance(bound, list) or not all( + isinstance(x, (int, float)) for x in bound ): - raise TypeError("signal must be a list of integers or floats") + raise TypeError("bound must be a list of integers or floats") if not isinstance(n_sample, list) or not all( isinstance(x, int) for x in n_sample ): @@ -99,17 +99,17 @@ def __init__( raise TypeError("test_size must be a float between 0 and 1 (inclusive)") if not isinstance(random_state, int): raise TypeError("random_state must be an integer") - if not isinstance(signal_mean, float): - raise TypeError("signal_mean must be a float") + if not isinstance(bound_mean, float): + raise TypeError("bound_mean must be a float") if test_size + val_size > 1: raise ValueError("val_size + test_size must be less than or equal to 1") super().__init__() self.batch_size = batch_size self.num_genes = num_genes - self.signal_mean = signal_mean - self.signal = signal or [0.1, 0.15, 0.2, 0.25, 0.3] - self.n_sample = n_sample or [1 for _ in range(len(self.signal))] + self.bound_mean = bound_mean + self.bound = bound or [0.1, 0.15, 0.2, 0.25, 0.3] + self.n_sample = n_sample or [1 for _ in range(len(self.bound))] self.num_tfs = sum(self.n_sample) # sum of all n_sample is the number of TFs self.val_size = val_size self.test_size = test_size @@ -132,10 +132,10 @@ def prepare_data(self) -> None: performed as that is handled in the functions in generate_data.py.""" # this will be a list of length 10 with a GenePopulation object in each element gene_populations_list = [] - for signal_proportion, n_draws in zip(self.signal, self.n_sample): + for bound_proportion, n_draws in zip(self.bound, self.n_sample): for _ in range(n_draws): gene_populations_list.append( - generate_gene_population(self.num_genes, signal_proportion) + generate_gene_population(self.num_genes, bound_proportion) ) # Generate binding data for each gene population @@ -166,7 +166,7 @@ def prepare_data(self) -> None: if self.max_mean_adjustment > 0: perturbation_effects_list = generate_perturbation_effects( binding_data_tensor, - signal_mean=self.signal_mean, + bound_mean=self.bound_mean, tf_index=0, # unused max_mean_adjustment=self.max_mean_adjustment, adjustment_function=self.adjustment_function, @@ -188,7 +188,7 @@ def prepare_data(self) -> None: perturbation_effects_list = [ generate_perturbation_effects( binding_data_tensor[:, tf_index, :].unsqueeze(1), - signal_mean=self.signal_mean, + bound_mean=self.bound_mean, tf_index=0, # unused ) for tf_index in range(sum(self.n_sample)) diff --git a/yeastdnnexplorer/probability_models/generate_data.py b/yeastdnnexplorer/probability_models/generate_data.py index f6d8d4b..a0b6b40 100644 --- a/yeastdnnexplorer/probability_models/generate_data.py +++ b/yeastdnnexplorer/probability_models/generate_data.py @@ -39,39 +39,39 @@ def __repr__(self): def generate_gene_population( - total: int = 1000, signal_group: float = 0.3 + total: int = 1000, bound_group: float = 0.3 ) -> GenePopulation: """ Generate two sets of genes, one of which will be considered genes which show a - signal, and the other which does not. The return is a one dimensional boolean tensor - where a value of '0' means that the gene at that index is part of the noise group - and a '1' means the gene at that index is part of the signal group. The length of - the tensor is the number of genes in this simulated organism. + bound, and the other which does not. The return is a one dimensional boolean tensor + where a value of '0' means that the gene at that index is part of the unbound group + and a '1' means the gene at that index is part of the bound group. The length of the + tensor is the number of genes in this simulated organism. :param total: The total number of genes. defaults to 1000 :type total: int, optional - :param signal_group: The proportion of genes in the signal group. defaults to 0.3 - :type signal_group: float, optional + :param bound_group: The proportion of genes in the bound group. defaults to 0.3 + :type bound_group: float, optional :return: A one dimensional tensor of boolean values where the set of indices with a - value of '1' are the signal group and the set of indices with a value of '0' are - the noise group. + value of '1' are the bound group and the set of indices with a value of '0' are + the unbound group. :rtype: GenePopulation :raises TypeError: if total is not an integer - :raises ValueError: If signal_group is not between 0 and 1 + :raises ValueError: If bound_group is not between 0 and 1 """ if not isinstance(total, int): raise TypeError("total must be an integer") - if not 0 <= signal_group <= 1: - raise ValueError("signal_group must be between 0 and 1") + if not 0 <= bound_group <= 1: + raise ValueError("bound_group must be between 0 and 1") - signal_group_size = int(total * signal_group) - logger.info("Generating %s genes with signal", signal_group_size) + bound_group_size = int(total * bound_group) + logger.info("Generating %s genes with bound", bound_group_size) labels = torch.cat( ( - torch.ones(signal_group_size, dtype=torch.bool), - torch.zeros(total - signal_group_size, dtype=torch.bool), + torch.ones(bound_group_size, dtype=torch.bool), + torch.zeros(total - bound_group_size, dtype=torch.bool), ) )[torch.randperm(total)] @@ -81,15 +81,15 @@ def generate_gene_population( def generate_binding_effects( gene_population: GenePopulation, background_hops_range: tuple[int, int] = (1, 100), - noise_experiment_hops_range: tuple[int, int] = (0, 1), - signal_experiment_hops_range: tuple[int, int] = (1, 6), + unbound_experiment_hops_range: tuple[int, int] = (0, 1), + bound_experiment_hops_range: tuple[int, int] = (1, 6), total_background_hops: int = 1000, total_experiment_hops: int = 76, pseudocount: float = 1e-10, ) -> torch.Tensor: """ Generate enrichment effects for genes using vectorized operations, based on their - signal designation, with separate experiment hops ranges for noise and signal genes. + bound designation, with separate experiment hops ranges for unbound and bound genes. Note that the default values are a scaled down version of actual data. See also https://github.com/cmatKhan/callingCardsTools/blob/main/callingcardstools/PeakCalling/yeast/enrichment.py @@ -99,12 +99,12 @@ def generate_binding_effects( :param background_hops_range: The range of hops for background genes. Defaults to (1, 100) :type background_hops_range: Tuple[int, int], optional - :param noise_experiment_hops_range: The range of hops for noise genes. Defaults to - (0, 1) - :type noise_experiment_hops_range: Tuple[int, int], optional - :param signal_experiment_hops_range: The range of hops for signal genes. Defaults to + :param unbound_experiment_hops_range: The range of hops for unbound genes. Defaults + to (0, 1) + :type unbound_experiment_hops_range: Tuple[int, int], optional + :param bound_experiment_hops_range: The range of hops for bound genes. Defaults to (1, 6) - :type signal_experiment_hops_range: Tuple[int, int], optional + :type bound_experiment_hops_range: Tuple[int, int], optional :param total_background_hops: The total number of background hops. Defaults to 1000 :type total_background_hops: int, optional :param total_experiment_hops: The total number of experiment hops. Defaults to 76 @@ -118,11 +118,11 @@ def generate_binding_effects( :raises TypeError: If total_experiment_hops is not an integer :raises TypeError: If pseudocount is not a float :raises TypeError: If background_hops_range is not a tuple - :raises TypeError: If noise_experiment_hops_range is not a tuple - :raises TypeError: If signal_experiment_hops_range is not a tuple + :raises TypeError: If unbound_experiment_hops_range is not a tuple + :raises TypeError: If bound_experiment_hops_range is not a tuple :raises ValueError: If background_hops_range is not a tuple of length 2 - :raises ValueError: If noise_experiment_hops_range is not a tuple of length 2 - :raises ValueError: If signal_experiment_hops_range is not a tuple of length 2 + :raises ValueError: If unbound_experiment_hops_range is not a tuple of length 2 + :raises ValueError: If bound_experiment_hops_range is not a tuple of length 2 """ # NOTE: torch intervals are half open on the right, so we add 1 to the @@ -139,8 +139,8 @@ def generate_binding_effects( raise TypeError("pseudocount must be a float") for arg, tup in { "background_hops_range": background_hops_range, - "noise_experiment_hops_range": noise_experiment_hops_range, - "signal_experiment_hops_range": signal_experiment_hops_range, + "unbound_experiment_hops_range": unbound_experiment_hops_range, + "bound_experiment_hops_range": bound_experiment_hops_range, }.items(): if not isinstance(tup, tuple): raise TypeError(f"{arg} must be a tuple") @@ -156,22 +156,22 @@ def generate_binding_effects( size=(gene_population.labels.shape[0],), ) - # Generate experiment hops noise genes - noise_experiment_hops = torch.randint( - low=noise_experiment_hops_range[0], - high=noise_experiment_hops_range[1] + 1, + # Generate experiment hops unbound genes + unbound_experiment_hops = torch.randint( + low=unbound_experiment_hops_range[0], + high=unbound_experiment_hops_range[1] + 1, size=(gene_population.labels.shape[0],), ) - # Generate experiment hops signal genes - signal_experiment_hops = torch.randint( - low=signal_experiment_hops_range[0], - high=signal_experiment_hops_range[1] + 1, + # Generate experiment hops bound genes + bound_experiment_hops = torch.randint( + low=bound_experiment_hops_range[0], + high=bound_experiment_hops_range[1] + 1, size=(gene_population.labels.shape[0],), ) - # Use signal designation to select appropriate experiment hops + # Use bound designation to select appropriate experiment hops experiment_hops = torch.where( - gene_population.labels == 1, signal_experiment_hops, noise_experiment_hops + gene_population.labels == 1, bound_experiment_hops, unbound_experiment_hops ) # Calculate enrichment for all genes @@ -230,8 +230,8 @@ def generate_pvalues( def default_perturbation_effect_adjustment_function( binding_enrichment_data: torch.Tensor, - signal_mean: float, - noise_mean: float, + bound_mean: float, + unbound_mean: float, max_adjustment: float, **kwargs, ) -> torch.Tensor: @@ -246,10 +246,10 @@ def default_perturbation_effect_adjustment_function( dimensions [n_genes, n_tfs, 3] where the entries in the third dimension are a matrix with columns [label, enrichment, pvalue]. :type binding_enrichment_data: torch.Tensor - :param signal_mean: The mean for signal genes. - :type signal_mean: float - :param noise_mean: The mean for noise genes. - :type noise_mean: float + :param bound_mean: The mean for bound genes. + :type bound_mean: float + :param unbound_mean: The mean for unbound genes. + :type unbound_mean: float :param max_adjustment: The maximum adjustment to the base mean based on enrichment. :type max_adjustment: float :param tf_relationships: Unused in this function. It is only here to match the @@ -259,37 +259,39 @@ def default_perturbation_effect_adjustment_function( :rtype: torch.Tensor """ - # Extract signal/noise labels and enrichment scores - signal_labels = binding_enrichment_data[:, :, 0] + # Extract bound/unbound labels and enrichment scores + bound_labels = binding_enrichment_data[:, :, 0] enrichment_scores = binding_enrichment_data[:, :, 1] adjusted_mean_matrix = torch.where( - signal_labels == 1, enrichment_scores, torch.zeros_like(enrichment_scores) + bound_labels == 1, enrichment_scores, torch.zeros_like(enrichment_scores) ) - for gene_idx in range(signal_labels.shape[0]): - for tf_index in range(signal_labels.shape[1]): - if signal_labels[gene_idx, tf_index] == 1: - # draw a random value between 0 and 1 to use to control - # magnitude of adjustment - adjustment_multiplier = torch.rand(1) + for gene_idx in range(bound_labels.shape[0]): + for tf_index in range(bound_labels.shape[1]): + if bound_labels[gene_idx, tf_index] == 1: + # divide its enrichment score by the maximum magnitude possible to + # create an adjustment multipler that scales with increasing enrichment + adjustment_multiplier = enrichment_scores[gene_idx, tf_index] / abs( + enrichment_scores.max() * 10 + ) # randomly adjust the gene by some portion of the max adjustment - adjusted_mean_matrix[gene_idx, tf_index] = signal_mean + ( + adjusted_mean_matrix[gene_idx, tf_index] = bound_mean + ( adjustment_multiplier * max_adjustment ) else: # related tfs are not all bound, so set the enrichment - # score to noise mean - adjusted_mean_matrix[gene_idx, tf_index] = noise_mean + # score to unbound mean + adjusted_mean_matrix[gene_idx, tf_index] = unbound_mean return adjusted_mean_matrix def perturbation_effect_adjustment_function_with_tf_relationships_boolean_logic( binding_enrichment_data: torch.Tensor, - signal_mean: float, - noise_mean: float, + bound_mean: float, + unbound_mean: float, max_adjustment: float, tf_relationships: dict[int, list[Relation]], ) -> torch.Tensor: @@ -307,10 +309,10 @@ def perturbation_effect_adjustment_function_with_tf_relationships_boolean_logic( dimensions [n_genes, n_tfs, 3] where the entries in the third dimension are a matrix with columns [label, enrichment, pvalue]. :type binding_enrichment_data: torch.Tensor - :param signal_mean: The mean for signal genes. - :type signal_mean: float - :param noise_mean: The mean for noise genes. - :type noise_mean: float + :param bound_mean: The mean for bound genes. + :type bound_mean: float + :param unbound_mean: The mean for unbound genes. + :type unbound_mean: float :param max_adjustment: The maximum adjustment to the base mean based on enrichment. :type max_adjustment: float :param tf_relationships: A dictionary where the keys are TF indices and the values @@ -354,43 +356,47 @@ def perturbation_effect_adjustment_function_with_tf_relationships_boolean_logic( the binding_data tensor passed into the function" ) - # Extract signal/noise labels and enrichment scores - signal_labels = binding_enrichment_data[:, :, 0] # shape: (num_genes, num_tfs) + # Extract bound/unbound labels and enrichment scores + bound_labels = binding_enrichment_data[:, :, 0] # shape: (num_genes, num_tfs) enrichment_scores = binding_enrichment_data[:, :, 1] # shape: (num_genes, num_tfs) # we set all unbound scores to 0, then we will go through and also set any - # bound scores to noise_mean if the related boolean statements are not satisfied + # bound scores to unbound_mean if the related boolean statements are not satisfied adjusted_mean_matrix = torch.where( - signal_labels == 1, enrichment_scores, torch.zeros_like(enrichment_scores) + bound_labels == 1, enrichment_scores, torch.zeros_like(enrichment_scores) ) # shape: (num_genes, num_tfs) - for gene_idx in range(signal_labels.shape[0]): + for gene_idx in range(bound_labels.shape[0]): for tf_index, relations in tf_relationships.items(): # check if all relations (boolean relationships) # associated with TFs are satisfied - if signal_labels[gene_idx, tf_index] == 1 and all( - relation.evaluate(signal_labels[gene_idx].tolist()) + if bound_labels[gene_idx, tf_index] == 1 and all( + relation.evaluate(bound_labels[gene_idx].tolist()) for relation in relations ): - # draw a random value between 0 and 1 to use to - # control magnitude of adjustment - adjustment_multiplier = torch.rand(1) + # OLD: adjustment_multiplier = torch.rand(1) + # divide its enrichment score by the maximum magnitude possible to + # create an adjustment multipler that scales with increasing enrichment + adjustment_multiplier = enrichment_scores[gene_idx, tf_index] / abs( + enrichment_scores.max() + ) # randomly adjust the gene by some portion of the max adjustment - adjusted_mean_matrix[gene_idx, tf_index] = signal_mean + ( + adjusted_mean_matrix[gene_idx, tf_index] = bound_mean + ( adjustment_multiplier * max_adjustment ) else: - # related tfs are not all bound, set the enrichment score to noise mean - adjusted_mean_matrix[gene_idx, tf_index] = noise_mean + # related tfs are not all bound, set the enrichment score to unbound + # mean + adjusted_mean_matrix[gene_idx, tf_index] = unbound_mean return adjusted_mean_matrix # shape (num_genes, num_tfs) def perturbation_effect_adjustment_function_with_tf_relationships( binding_enrichment_data: torch.Tensor, - signal_mean: float, - noise_mean: float, + bound_mean: float, + unbound_mean: float, max_adjustment: float, tf_relationships: dict[int, list[int]], ) -> torch.Tensor: @@ -405,10 +411,10 @@ def perturbation_effect_adjustment_function_with_tf_relationships( dimensions [n_genes, n_tfs, 3] where the entries in the third dimension are a matrix with columns [label, enrichment, pvalue]. :type binding_enrichment_data: torch.Tensor - :param signal_mean: The mean for signal genes. - :type signal_mean: float - :param noise_mean: The mean for noise genes. - :type noise_mean: float + :param bound_mean: The mean for bound genes. + :type bound_mean: float + :param unbound_mean: The mean for unbound genes. + :type unbound_mean: float :param max_adjustment: The maximum adjustment to the base mean based on enrichment. :type max_adjustment: float :param tf_relationships: A dictionary where the keys are the indices of the TFs and @@ -451,32 +457,36 @@ def perturbation_effect_adjustment_function_with_tf_relationships( binding_data tensor passed into the function" ) - # Extract signal/noise labels and enrichment scores - signal_labels = binding_enrichment_data[:, :, 0] # shape: (num_genes, num_tfs) + # Extract bound/unbound labels and enrichment scores + bound_labels = binding_enrichment_data[:, :, 0] # shape: (num_genes, num_tfs) enrichment_scores = binding_enrichment_data[:, :, 1] # shape: (num_genes, num_tfs) # we set all unbound scores to 0, then we will go through and also - # set any bound scores to noise_mean if the related tfs are not also bound + # set any bound scores to unbound_mean if the related tfs are not also bound adjusted_mean_matrix = torch.where( - signal_labels == 1, enrichment_scores, torch.zeros_like(enrichment_scores) + bound_labels == 1, enrichment_scores, torch.zeros_like(enrichment_scores) ) # shape: (num_genes, num_tfs) - for gene_idx in range(signal_labels.shape[0]): + for gene_idx in range(bound_labels.shape[0]): for tf_index, related_tfs in tf_relationships.items(): - if signal_labels[gene_idx, tf_index] == 1 and torch.all( - signal_labels[gene_idx, related_tfs] == 1 + if bound_labels[gene_idx, tf_index] == 1 and torch.all( + bound_labels[gene_idx, related_tfs] == 1 ): - # draw a random value between 0 and 1 to use to - # control magnitude of adjustment - adjustment_multiplier = torch.rand(1) + # OLD: adjustment_multiplier = torch.rand(1) + # divide its enrichment score by the maximum magnitude possible to + # create an adjustment multipler that scales with increasing enrichment + adjustment_multiplier = enrichment_scores[gene_idx, tf_index] / abs( + enrichment_scores.max() + ) # randomly adjust the gene by some portion of the max adjustment - adjusted_mean_matrix[gene_idx, tf_index] = signal_mean + ( + adjusted_mean_matrix[gene_idx, tf_index] = bound_mean + ( adjustment_multiplier * max_adjustment ) else: - # related tfs are not all bound, set the enrichment score to noise mean - adjusted_mean_matrix[gene_idx, tf_index] = noise_mean + # related tfs are not all bound, set the enrichment score to unbound + # mean + adjusted_mean_matrix[gene_idx, tf_index] = unbound_mean return adjusted_mean_matrix # shape (num_genes, num_tfs) @@ -484,10 +494,10 @@ def perturbation_effect_adjustment_function_with_tf_relationships( def generate_perturbation_effects( binding_data: torch.Tensor, tf_index: int | None = None, - noise_mean: float = 0.0, - noise_std: float = 1.0, - signal_mean: float = 3.0, - signal_std: float = 1.0, + unbound_mean: float = 0.0, + unbound_std: float = 1.0, + bound_mean: float = 3.0, + bound_std: float = 1.0, max_mean_adjustment: float = 0.0, adjustment_function: Callable[ [torch.Tensor, float, float, float], torch.Tensor @@ -512,14 +522,14 @@ def generate_perturbation_effects( are adjusting the means (ie only used if max_mean_adjustment == 0). Defaults to None :type tf_index: int - :param noise_mean: The mean for noise genes. Defaults to 0.0 - :type noise_mean: float, optional - :param noise_std: The standard deviation for noise genes. Defaults to 1.0 - :type noise_std: float, optional - :param signal_mean: The mean for signal genes. Defaults to 3.0 - :type signal_mean: float, optional - :param signal_std: The standard deviation for signal genes. Defaults to 1.0 - :type signal_std: float, optional + :param unbound_mean: The mean for unbound genes. Defaults to 0.0 + :type unbound_mean: float, optional + :param unbound_std: The standard deviation for unbound genes. Defaults to 1.0 + :type unbound_std: float, optional + :param bound_mean: The mean for bound genes. Defaults to 3.0 + :type bound_mean: float, optional + :param bound_std: The standard deviation for bound genes. Defaults to 1.0 + :type bound_std: float, optional :param max_mean_adjustment: The maximum adjustment to the base mean based on enrichment. Defaults to 0.0 :type max_mean_adjustment: float, optional @@ -529,7 +539,7 @@ def generate_perturbation_effects( :raises ValueError: If binding_data is not a 3D tensor with the third dimension having a length of 3 - :raises ValueError: If noise_mean, noise_std, signal_mean, signal_std, + :raises ValueError: If unbound_mean, unbound_std, bound_mean, bound_std, or max_mean_adjustment are not floats """ @@ -545,10 +555,10 @@ def generate_perturbation_effects( # check the rest of the inputs if not all( isinstance(i, float) - for i in (noise_mean, noise_std, signal_mean, signal_std, max_mean_adjustment) + for i in (unbound_mean, unbound_std, bound_mean, bound_std, max_mean_adjustment) ): raise ValueError( - "noise_mean, noise_std, signal_mean, signal_std, " + "unbound_mean, unbound_std, bound_mean, bound_std, " "and max_mean_adjustment must be floats" ) # check the Callable signature @@ -556,14 +566,14 @@ def generate_perturbation_effects( i in inspect.signature(adjustment_function).parameters for i in ( "binding_enrichment_data", - "signal_mean", - "noise_mean", + "bound_mean", + "unbound_mean", "max_adjustment", ) ): raise ValueError( "adjustment_function must have the signature " - "(binding_enrichment_data, signal_mean, noise_mean, max_adjustment)" + "(binding_enrichment_data, bound_mean, unbound_mean, max_adjustment)" ) # Initialize an effects tensor for all genes @@ -578,16 +588,16 @@ def generate_perturbation_effects( device=binding_data.device) * 2 - 1 # fmt: on - # Apply adjustments to the base mean for the signal genes, if necessary + # Apply adjustments to the base mean for the bound genes, if necessary if max_mean_adjustment > 0 and adjustment_function is not None: # Assuming adjustment_function returns a vector of means for each gene. - # Signal genes that meet the criteria for adjustment will be affected by + # bound genes that meet the criteria for adjustment will be affected by # the status of the TFs. What TFs affect a given gene must be specified by # the adjustment_function() adjusted_means = adjustment_function( binding_data, - signal_mean, - noise_mean, + bound_mean, + unbound_mean, max_mean_adjustment, **kwargs, ) @@ -595,27 +605,25 @@ def generate_perturbation_effects( # add adjustments, ensuring they respect the original sign if adjusted_means.ndim == 1: effects = signs * torch.abs( - torch.normal(mean=adjusted_means, std=signal_std) + torch.normal(mean=adjusted_means, std=bound_std) ) else: effects = torch.zeros_like(adjusted_means) for col_idx in range(effects.size(1)): effects[:, col_idx] = signs * torch.abs( - torch.normal(mean=adjusted_means[:, col_idx], std=signal_std) + torch.normal(mean=adjusted_means[:, col_idx], std=bound_std) ) else: - signal_mask = binding_data[:, tf_index, 0] == 1 + bound_mask = binding_data[:, tf_index, 0] == 1 - # Generate effects based on the noise and signal means, applying the sign - effects[~signal_mask] = signs[~signal_mask] * torch.abs( + # Generate effects based on the unbound and bound means, applying the sign + effects[~bound_mask] = signs[~bound_mask] * torch.abs( torch.normal( - mean=noise_mean, std=noise_std, size=(torch.sum(~signal_mask),) + mean=unbound_mean, std=unbound_std, size=(torch.sum(~bound_mask),) ) ) - effects[signal_mask] = signs[signal_mask] * torch.abs( - torch.normal( - mean=signal_mean, std=signal_std, size=(torch.sum(signal_mask),) - ) + effects[bound_mask] = signs[bound_mask] * torch.abs( + torch.normal(mean=bound_mean, std=bound_std, size=(torch.sum(bound_mask),)) ) return effects diff --git a/yeastdnnexplorer/tests/probability_models/test_generate_data.py b/yeastdnnexplorer/tests/probability_models/test_generate_data.py index 00f4d4f..81195c2 100644 --- a/yeastdnnexplorer/tests/probability_models/test_generate_data.py +++ b/yeastdnnexplorer/tests/probability_models/test_generate_data.py @@ -13,10 +13,10 @@ def test_generate_gene_population(): total_genes = 1000 - signal_ratio = 0.3 - signal_group_size = int(total_genes * signal_ratio) + bound_ratio = 0.3 + bound_group_size = int(total_genes * bound_ratio) - gene_population = generate_gene_population(total_genes, signal_ratio) + gene_population = generate_gene_population(total_genes, bound_ratio) # Check if the output is a 1D tensor assert gene_population.labels.ndim == 1 @@ -24,10 +24,10 @@ def test_generate_gene_population(): # Check if the output has the correct shape assert gene_population.labels.shape == torch.Size([total_genes]) - # Check if the second column contains the correct number of signal - # and non-signal genes - assert torch.sum(gene_population.labels) == signal_group_size - assert torch.sum(gene_population.labels == 0) == total_genes - signal_group_size + # Check if the second column contains the correct number of bound + # and non-bound genes + assert torch.sum(gene_population.labels) == bound_group_size + assert torch.sum(gene_population.labels == 0) == total_genes - bound_group_size # Additional tests could include checking the datatype of the tensor elements assert gene_population.labels.dtype == torch.bool @@ -37,7 +37,7 @@ def test_generate_binding_effects_success(): # set torch seed torch.manual_seed(42) # Create a mock GenePopulation with some genes - # labeled as signal and others as noise + # labeled as bound and others as unbound gene_population = GenePopulation(torch.tensor([1, 0, 1, 0], dtype=torch.bool)) # Call generate_binding_effects with valid arguments enrichment = generate_binding_effects(gene_population) @@ -84,7 +84,7 @@ def test_generate_pvalues_invalid_input(): def test_generate_perturbation_effects_with_and_without_adjustment(): torch.manual_seed(42) # Create mock binding data with the first - # column indicating signal (1) or noise (0), + # column indicating bound (1) or unbound (0), # the second column indicates the enrichment, and the third the p-value. # Add an extra dimension for TFs -- the function requires a 3D tensor. binding_data = torch.tensor( @@ -99,77 +99,77 @@ def test_generate_perturbation_effects_with_and_without_adjustment(): ) # Add TF dimension # Specify means and standard deviations - noise_mean = 0.0 - noise_std = 1.0 - signal_mean = 4.0 - signal_std = 1.0 + unbound_mean = 0.0 + unbound_std = 1.0 + bound_mean = 4.0 + bound_std = 1.0 # First, test without mean adjustment effects_without_adjustment = generate_perturbation_effects( binding_data=binding_data, tf_index=0, - noise_mean=noise_mean, - noise_std=noise_std, - signal_mean=signal_mean, - signal_std=signal_std, + unbound_mean=unbound_mean, + unbound_std=unbound_std, + bound_mean=bound_mean, + bound_std=bound_std, max_mean_adjustment=0.0, # No adjustment ) - # Extract masks for signal and noise genes based on labels - signal_mask = binding_data[:, :, 0].squeeze() == 1 - noise_mask = binding_data[:, :, 0].squeeze() == 0 + # Extract masks for bound and unbound genes based on labels + bound_mask = binding_data[:, :, 0].squeeze() == 1 + unbound_mask = binding_data[:, :, 0].squeeze() == 0 # Assert the effects tensor is of the correct shape assert effects_without_adjustment.shape[0] == binding_data.shape[0] assert torch.isclose( - torch.abs(effects_without_adjustment[signal_mask]).mean(), - torch.tensor(signal_mean), - atol=signal_std, + torch.abs(effects_without_adjustment[bound_mask]).mean(), + torch.tensor(bound_mean), + atol=bound_std, ) assert torch.isclose( - torch.abs(effects_without_adjustment[~signal_mask]).mean(), - torch.tensor(noise_mean), - atol=noise_std, + torch.abs(effects_without_adjustment[~bound_mask]).mean(), + torch.tensor(unbound_mean), + atol=unbound_std, ) assert torch.isclose( - torch.abs(effects_without_adjustment[signal_mask]).std(), - torch.tensor(signal_std), - atol=signal_std, + torch.abs(effects_without_adjustment[bound_mask]).std(), + torch.tensor(bound_std), + atol=bound_std, ) assert torch.isclose( - torch.abs(effects_without_adjustment[~signal_mask]).std(), - torch.tensor(noise_std), - atol=noise_std, + torch.abs(effects_without_adjustment[~bound_mask]).std(), + torch.tensor(unbound_std), + atol=unbound_std, ) # Test with mean adjustment effects_with_adjustment = generate_perturbation_effects( binding_data=binding_data, tf_index=0, - noise_mean=noise_mean, - noise_std=noise_std, - signal_mean=signal_mean, - signal_std=signal_std, + unbound_mean=unbound_mean, + unbound_std=unbound_std, + bound_mean=bound_mean, + bound_std=bound_std, max_mean_adjustment=4.0, # Applying adjustment ) - # Assert that signal genes with adjustments have a mean effect greater than + # Assert that bound genes with adjustments have a mean effect greater than # the base mean assert ( - torch.abs(effects_with_adjustment[signal_mask]).mean() - > torch.abs(effects_without_adjustment[signal_mask]).mean() + torch.abs(effects_with_adjustment[bound_mask]).mean() + > torch.abs(effects_without_adjustment[bound_mask]).mean() ) - # Assert that the mean effect for noise genes remains close to the noise mean + # Assert that the mean effect for unbound genes remains close to the unbound mean assert torch.isclose( - torch.abs(effects_with_adjustment[noise_mask]).mean(), - torch.tensor(noise_mean), - atol=noise_std, + torch.abs(effects_with_adjustment[unbound_mask]).mean(), + torch.tensor(unbound_mean), + atol=unbound_std, ) - # and that the noise standard deviation remains close to the noise std + # and that the unbound standard deviation remains close to the unbound std assert torch.isclose( - torch.abs(effects_with_adjustment[noise_mask]).std(), - torch.tensor(noise_std), - atol=noise_std, + torch.abs(effects_with_adjustment[unbound_mask]).std(), + torch.tensor(unbound_std), + atol=unbound_std, )