Updated tutorials and regression metrics

hachmannlab · Nov 9, 2021 · a33f050 · a33f050
1 parent d8be220
commit a33f050
Show file tree

Hide file tree

Showing 7 changed files with 293 additions and 127 deletions.
diff --git a/chemml/__init__.py b/chemml/__init__.py
@@ -1,6 +1,6 @@
 # __name__ = "chemml"
-__version__ = "0.8"
-__author__ = ["Mojtaba Haghighatlari ([email protected])", "Johannes Hachmann ([email protected])"]
+__version__ = "1.0"
+__author__ = ["Aditya Sonpal ([email protected])", "Garuav Vishwakarma ([email protected]) ", "Aatish Pradhan ([email protected])","Mojtaba Haghighatlari ([email protected])", "Johannes Hachmann ([email protected])"]
 
 
 # import sys

diff --git a/chemml/models/keras/mlp.py b/chemml/models/keras/mlp.py
@@ -130,6 +130,7 @@ def save(self, path, filename):
         obj_dict['path_to_file'] = path +'/'+ filename+'.h5'
         obj_df = pd.DataFrame.from_dict(obj_dict,orient='index')
         obj_df.to_csv(path+'/'+filename+'_chemml_model.csv')
+        print("File saved as "+path+"/"+filename+"_chemml_model.csv")
 
     def load(self, path_to_model):
         """

diff --git a/chemml/utils/utilities.py b/chemml/utils/utilities.py
@@ -384,7 +384,7 @@ def regression_metrics(y_true, y_predicted, nfeatures = None):
     metrics_dict['AE'] = [list(ae)]
     metrics_dict['SE'] = [list(se)]
 
-    var = np.mean(np.square(y_predicted - y_mean))
+    var = np.mean(np.square(y_true - y_mean))
 
     metrics_dict['ME'] = np.mean(e)
     # mean absolute error

diff --git a/docs/ipython_notebooks/Convo_nets.ipynb b/docs/ipython_notebooks/Convo_nets.ipynb
@@ -15,7 +15,18 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-11-09 18:22:23.552207: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
+      "2021-11-09 18:22:23.552291: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n",
+      "/mnt/c/Aatish/UB/Mr. Hachmann/master_chemml_wrapper_v2/chemml/chemml/datasets/base.py:87: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only\n",
+      "  features = df.drop(['smiles', 'density_Kg/m3'],1)\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "from chemml.datasets import load_organic_density\n",
@@ -62,7 +73,7 @@
      "output_type": "stream",
      "text": [
       "Tensorising molecules in batches of 100 ...\n",
-      "500/500 [==================================================] - 0s 601us/step\n",
+      "500/500 [==================================================] - 1s 1ms/step\n",
       "Merging batch tensors ...    [DONE]\n"
      ]
     }
@@ -156,6 +167,17 @@
    "execution_count": 6,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-11-09 18:23:03.337511: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n",
+      "2021-11-09 18:23:03.337613: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)\n",
+      "2021-11-09 18:23:03.337645: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (Aatish-HP): /proc/driver/nvidia/version does not exist\n",
+      "2021-11-09 18:23:03.337984: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -262,10 +284,17 @@
       "__________________________________________________________________________________________________\n"
      ]
     },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-11-09 18:23:04.812435: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "<tensorflow.python.keras.callbacks.History at 0x7fac474e07b8>"
+       "<keras.callbacks.History at 0x7feda4198be0>"
       ]
      },
      "execution_count": 7,
@@ -312,8 +341,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Mean Absolute Error = 18.407 kg/m^3\n",
-      "R squared = 0.903\n"
+      "Mean Absolute Error = 16.518 kg/m^3\n",
+      "R squared = 0.935\n"
      ]
     }
    ],
@@ -322,9 +351,9 @@
     "\n",
     "y_pred = model.predict([xatoms_test,xbonds_test,xedges_test])\n",
     "y_pred = y_scale.inverse_transform(y_pred)\n",
-    "metrics_dict = regression_metrics(target_test, list(y_pred.reshape(-1,)))\n",
-    "mae = metrics_dict['MAE']\n",
-    "r_2 = metrics_dict['r_squared']\n",
+    "metrics_df = regression_metrics(target_test, list(y_pred.reshape(-1,)))\n",
+    "mae = metrics_df['MAE'].values[0]\n",
+    "r_2 = metrics_df['r_squared'].values[0]\n",
     "\n",
     "print(\"Mean Absolute Error = {} kg/m^3\".format(mae.round(3)))\n",
     "print(\"R squared = {}\".format(r_2.round(3)))"
@@ -333,9 +362,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python [conda env:chemml_env] *",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-chemml_env-py"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/docs/ipython_notebooks/ga_hyper_opt.ipynb b/docs/ipython_notebooks/ga_hyper_opt.ipynb
@@ -16,6 +16,16 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2021-11-09 18:23:58.580965: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
+      "2021-11-09 18:23:58.581035: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n",
+      "/mnt/c/Aatish/UB/Mr. Hachmann/master_chemml_wrapper_v2/chemml/chemml/datasets/base.py:87: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only\n",
+      "  features = df.drop(['smiles', 'density_Kg/m3'],1)\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -91,7 +101,7 @@
     "    krr = KernelRidge(alpha=individual[0], kernel=individual[1], degree=individual[2])\n",
     "    krr.fit(features[:400], density[:400])\n",
     "    pred = krr.predict(features[400:])\n",
-    "    mae = regression_metrics(density[400:],pred)['MAE']\n",
+    "    mae = regression_metrics(density[400:],pred)['MAE'].values[0]\n",
     "    return mae"
    ]
   },
@@ -162,45 +172,45 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>(4.3428571428571425, linear, 1)</td>\n",
-       "      <td>0.105177</td>\n",
-       "      <td>0.001049</td>\n",
+       "      <td>(2.928571428571429, linear, 1)</td>\n",
+       "      <td>0.102514</td>\n",
+       "      <td>0.000390</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>(4.3428571428571425, linear, 1)</td>\n",
-       "      <td>0.105177</td>\n",
-       "      <td>0.001099</td>\n",
+       "      <td>(1.1057547599530448, linear, 1)</td>\n",
+       "      <td>0.095582</td>\n",
+       "      <td>0.000230</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>(1.3309342128829402, linear, 1)</td>\n",
-       "      <td>0.096677</td>\n",
-       "      <td>0.000536</td>\n",
+       "      <td>(1.1057547599530448, linear, 1)</td>\n",
+       "      <td>0.095582</td>\n",
+       "      <td>0.000312</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>(1.1297049580471703, linear, 5)</td>\n",
-       "      <td>0.095685</td>\n",
-       "      <td>0.000572</td>\n",
+       "      <td>(1.1057547599530448, linear, 1)</td>\n",
+       "      <td>0.095582</td>\n",
+       "      <td>0.000143</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>(1.1297049580471703, linear, 5)</td>\n",
-       "      <td>0.095685</td>\n",
-       "      <td>0.001062</td>\n",
+       "      <td>(1.1057547599530448, linear, 1)</td>\n",
+       "      <td>0.095582</td>\n",
+       "      <td>0.000309</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "                   Best_individual  Fitness_values  Time (hours)\n",
-       "0  (4.3428571428571425, linear, 1)        0.105177      0.001049\n",
-       "1  (4.3428571428571425, linear, 1)        0.105177      0.001099\n",
-       "2  (1.3309342128829402, linear, 1)        0.096677      0.000536\n",
-       "3  (1.1297049580471703, linear, 5)        0.095685      0.000572\n",
-       "4  (1.1297049580471703, linear, 5)        0.095685      0.001062"
+       "0   (2.928571428571429, linear, 1)        0.102514      0.000390\n",
+       "1  (1.1057547599530448, linear, 1)        0.095582      0.000230\n",
+       "2  (1.1057547599530448, linear, 1)        0.095582      0.000312\n",
+       "3  (1.1057547599530448, linear, 1)        0.095582      0.000143\n",
+       "4  (1.1057547599530448, linear, 1)        0.095582      0.000309"
       ]
      },
      "execution_count": 5,
@@ -221,7 +231,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'alpha': 1.1297049580471703, 'kernels': 'linear', 'degree': 5}\n"
+      "{'alpha': 1.1057547599530448, 'kernels': 'linear', 'degree': 1}\n"
      ]
     }
    ],
@@ -281,45 +291,45 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>(1.1297049580471703, linear, 5)</td>\n",
-       "      <td>0.095685</td>\n",
-       "      <td>0.001405</td>\n",
+       "      <td>(1.1057547599530448, linear, 1)</td>\n",
+       "      <td>0.095582</td>\n",
+       "      <td>0.000271</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>(1.1297049580471703, linear, 5)</td>\n",
-       "      <td>0.095685</td>\n",
-       "      <td>0.000581</td>\n",
+       "      <td>(1.1057547599530448, linear, 1)</td>\n",
+       "      <td>0.095582</td>\n",
+       "      <td>0.000260</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>(0.37125764950137985, linear, 2)</td>\n",
-       "      <td>0.091373</td>\n",
-       "      <td>0.000673</td>\n",
+       "      <td>(1.1057547599530448, linear, 1)</td>\n",
+       "      <td>0.095582</td>\n",
+       "      <td>0.000471</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>(0.37125764950137985, linear, 2)</td>\n",
-       "      <td>0.091373</td>\n",
-       "      <td>0.000532</td>\n",
+       "      <td>(1.1057547599530448, linear, 1)</td>\n",
+       "      <td>0.095582</td>\n",
+       "      <td>0.000116</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>(0.37125764950137985, linear, 2)</td>\n",
-       "      <td>0.091373</td>\n",
-       "      <td>0.000353</td>\n",
+       "      <td>(1.0210089362161252, linear, 4)</td>\n",
+       "      <td>0.095203</td>\n",
+       "      <td>0.000316</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                    Best_individual  Fitness_values  Time (hours)\n",
-       "0   (1.1297049580471703, linear, 5)        0.095685      0.001405\n",
-       "1   (1.1297049580471703, linear, 5)        0.095685      0.000581\n",
-       "2  (0.37125764950137985, linear, 2)        0.091373      0.000673\n",
-       "3  (0.37125764950137985, linear, 2)        0.091373      0.000532\n",
-       "4  (0.37125764950137985, linear, 2)        0.091373      0.000353"
+       "                   Best_individual  Fitness_values  Time (hours)\n",
+       "0  (1.1057547599530448, linear, 1)        0.095582      0.000271\n",
+       "1  (1.1057547599530448, linear, 1)        0.095582      0.000260\n",
+       "2  (1.1057547599530448, linear, 1)        0.095582      0.000471\n",
+       "3  (1.1057547599530448, linear, 1)        0.095582      0.000116\n",
+       "4  (1.0210089362161252, linear, 4)        0.095203      0.000316"
       ]
      },
      "execution_count": 8,
@@ -330,20 +340,13 @@
    "source": [
     "fitness_df_resume"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:v2_0.7]",
+   "display_name": "Python [conda env:chemml_env] *",
    "language": "python",
-   "name": "conda-env-v2_0.7-py"
+   "name": "conda-env-chemml_env-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -355,7 +358,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.8.12"
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {

diff --git a/docs/ipython_notebooks/images/Parity.png b/docs/ipython_notebooks/images/Parity.png