From 748916e9d81343e15676b1769455079e1fac9820 Mon Sep 17 00:00:00 2001
From: rkansal47 <raghavk1997@yahoo.co.in>
Date: Mon, 11 Sep 2023 12:03:21 -0500
Subject: [PATCH] update triton addresses

---
 .../tagger_resources/triton_config_ak8.json   |   2 +-
 .../triton_config_new_tagger_ak8.json         |   2 +-
 src/HHbbVV/triton/export_and_check.ipynb      | 138 +++---------------
 3 files changed, 21 insertions(+), 121 deletions(-)

diff --git a/src/HHbbVV/processors/tagger_resources/triton_config_ak8.json b/src/HHbbVV/processors/tagger_resources/triton_config_ak8.json
index cfe5f17a..eb1448c9 100644
--- a/src/HHbbVV/processors/tagger_resources/triton_config_ak8.json
+++ b/src/HHbbVV/processors/tagger_resources/triton_config_ak8.json
@@ -1,6 +1,6 @@
 {
   "model_name": "Dec22_ak8_MD_vminclv2ParT_manual_fixwrap",
-  "model_url": "triton+grpc://67.58.49.52:8001/ak8_MD_vminclv2ParT_manual_fixwrap/1",
+  "model_url": "triton+grpc://67.58.49.48:8001/ak8_MD_vminclv2ParT_manual_fixwrap/1",
   "batch_size": 192,
   "softmax": "True",
   "num_reg": 0
diff --git a/src/HHbbVV/processors/tagger_resources/triton_config_new_tagger_ak8.json b/src/HHbbVV/processors/tagger_resources/triton_config_new_tagger_ak8.json
index ff770815..0e3ef87b 100644
--- a/src/HHbbVV/processors/tagger_resources/triton_config_new_tagger_ak8.json
+++ b/src/HHbbVV/processors/tagger_resources/triton_config_new_tagger_ak8.json
@@ -1,6 +1,6 @@
 {
   "model_name": "2023May30_ak8_MD_inclv8_part_2reg_manual",
-  "model_url": "triton+grpc://67.58.49.52:8001/ak8_MD_inclv8_part_2reg_manual/1",
+  "model_url": "triton+grpc://67.58.49.48:8001/ak8_MD_inclv8_part_2reg_manual/1",
   "batch_size": 192,
   "softmax": "False",
   "num_reg": 2
diff --git a/src/HHbbVV/triton/export_and_check.ipynb b/src/HHbbVV/triton/export_and_check.ipynb
index 2c04c0f7..c0f9ed16 100644
--- a/src/HHbbVV/triton/export_and_check.ipynb
+++ b/src/HHbbVV/triton/export_and_check.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -86,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -127,50 +127,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  0%|          | 0/1 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 1/1 [00:00<00:00,  1.34it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[ -9.643893   -11.078968    -6.423805   ...  -5.95106      0.4450065\n",
-      "    0.25934374]\n",
-      " [-11.023443   -12.613604    -7.5247912  ...  -7.6548076    0.6109172\n",
-      "    0.35383254]\n",
-      " [ -5.1560373   -7.3972573   -2.6038532  ...  -2.6032228    0.46883643\n",
-      "    0.20065397]\n",
-      " [ -9.264318   -11.314613    -6.1544614  ...  -5.425825     0.66927564\n",
-      "    0.41242862]]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# model_url = \"triton+grpc://ailab01.fnal.gov:8001/particlenet_hww/1\"\n",
     "# model_url = \"triton+grpc://prp-gpu-1.t2.ucsd.edu:8001/particlenet_hww/1\"\n",
-    "# model_url = \"triton+grpc://67.58.49.52:8001/ak8_MD_vminclv2ParT_manual_fixwrap/1\"\n",
-    "model_url = \"triton+grpc://67.58.49.52:8001/2023May30_ak8_MD_inclv8_part_2reg_manual/1\"\n",
+    "# model_url = \"triton+grpc://67.58.49.48:8001/ak8_MD_vminclv2ParT_manual_fixwrap/1\"\n",
+    "model_url = \"triton+grpc://67.58.49.48:8001/2023May30_ak8_MD_inclv8_part_2reg_manual/1\"\n",
     "triton_model = wrapped_triton(model_url)\n",
     "for i in tqdm(range(1)):\n",
     "    output = triton_model(input_dict)\n",
@@ -179,26 +143,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ONNX outputs: [[ -9.643891   -11.078964    -6.423809   ...  -5.951058     0.44500652\n",
-      "    0.25934368]\n",
-      " [-11.023443   -12.613605    -7.5247912  ...  -7.654809     0.61091703\n",
-      "    0.35383236]\n",
-      " [ -5.1560345   -7.3972545   -2.6038537  ...  -2.60322      0.46883655\n",
-      "    0.2006541 ]\n",
-      " [ -9.264317   -11.314613    -6.1544585  ...  -5.425826     0.6692767\n",
-      "    0.4124295 ]]\n",
-      "Shape: (4, 316)\n",
-      "Softmax applied: False\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import onnx\n",
     "import onnxruntime as ort\n",
@@ -224,26 +171,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ONNX outputs: [[7.8597404e-06 1.8713886e-06 1.9673070e-04 ... 3.1563517e-04\n",
-      "  1.8921728e-01 1.5715510e-01]\n",
-      " [5.0401678e-07 1.0276534e-07 1.6668266e-05 ... 1.4636063e-05\n",
-      "  5.6909341e-02 4.4008151e-02]\n",
-      " [3.9485330e-04 4.1984258e-05 5.0679673e-03 ... 5.0711799e-03\n",
-      "  1.0946776e-01 8.3717473e-02]\n",
-      " [3.6939423e-06 4.7539822e-07 8.2810249e-05 ... 1.7160318e-04\n",
-      "  7.6136842e-02 5.8890816e-02]]\n",
-      "Shape: (4, 316)\n",
-      "Softmax applied: True\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import onnx\n",
     "import onnxruntime as ort\n",
@@ -269,17 +199,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ONNX cls outputs: [0.65362775 0.8990826  0.8068148  0.8649726 ]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(\"ONNX cls output sums:\", np.sum(outputs[0][:, :-2], axis=1))"
    ]
@@ -294,18 +216,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/raghav/mambaforge/envs/python310/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
     "from ParticleTransformer import ParticleTransformerTagger\n",
@@ -333,22 +246,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "RuntimeError",
-     "evalue": "Error(s) in loading state_dict for ParticleTransformerTagger:\n\tMissing key(s) in state_dict: \"part.fc.0.weight\", \"part.fc.0.bias\". \n\tUnexpected key(s) in state_dict: \"part.fc.1.weight\", \"part.fc.1.bias\", \"part.fc.0.0.weight\", \"part.fc.0.0.bias\". \n\tsize mismatch for pf_embed.embed.1.weight: copying a param with shape torch.Size([64, 25]) from checkpoint, the shape in current model is torch.Size([128, 25]).\n\tsize mismatch for pf_embed.embed.1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.3.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.3.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.4.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for pf_embed.embed.4.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.6.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.6.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.7.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for pf_embed.embed.7.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.1.weight: copying a param with shape torch.Size([64, 11]) from checkpoint, the shape in current model is torch.Size([128, 11]).\n\tsize mismatch for sv_embed.embed.1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.3.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.3.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.4.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for sv_embed.embed.4.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.6.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.6.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.7.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for sv_embed.embed.7.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_token: copying a param with shape torch.Size([1, 1, 64]) from checkpoint, the shape in current model is torch.Size([1, 1, 128]).\n\tsize mismatch for part.pair_embed.embed.1.weight: copying a param with shape torch.Size([32, 4, 1]) from checkpoint, the shape in current model is torch.Size([64, 4, 1]).\n\tsize mismatch for part.pair_embed.embed.1.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.4.weight: copying a param with shape torch.Size([32, 32, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1]).\n\tsize mismatch for part.pair_embed.embed.4.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.7.weight: copying a param with shape torch.Size([32, 32, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1]).\n\tsize mismatch for part.pair_embed.embed.7.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.10.weight: copying a param with shape torch.Size([8, 32, 1]) from checkpoint, the shape in current model is torch.Size([8, 64, 1]).\n\tsize mismatch for part.blocks.0.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.0.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.0.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.0.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.0.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.0.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.1.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.1.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.1.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.1.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.1.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.2.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.2.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.2.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.2.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.2.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.3.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.3.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.3.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.3.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.3.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.4.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.4.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.4.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.4.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.4.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.5.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.5.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.5.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.5.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.5.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.6.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.6.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.6.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.6.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.6.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.7.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.7.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.7.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.7.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.7.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.cls_blocks.0.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.cls_blocks.0.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.cls_blocks.0.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.cls_blocks.0.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.cls_blocks.0.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.cls_blocks.1.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.cls_blocks.1.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.cls_blocks.1.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.cls_blocks.1.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.cls_blocks.1.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m model_dir \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mmodels/model_2023May30/ak8_MD_inclv8_part_2reg_manual.useamp.lite.gm5.ddp-bs768-lr6p75e-3/\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m part_model\u001b[39m.\u001b[39;49mload_state_dict(torch\u001b[39m.\u001b[39;49mload(model_dir \u001b[39m+\u001b[39;49m \u001b[39m\"\u001b[39;49m\u001b[39mnet_best_epoch_state.pt\u001b[39;49m\u001b[39m\"\u001b[39;49m, map_location\u001b[39m=\u001b[39;49mtorch\u001b[39m.\u001b[39;49mdevice(\u001b[39m\"\u001b[39;49m\u001b[39mcpu\u001b[39;49m\u001b[39m\"\u001b[39;49m)))\n\u001b[1;32m      3\u001b[0m _ \u001b[39m=\u001b[39m part_model\u001b[39m.\u001b[39meval()\n\u001b[1;32m      5\u001b[0m data_config \u001b[39m=\u001b[39m {\n\u001b[1;32m      6\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39minput_names\u001b[39m\u001b[39m\"\u001b[39m: [\u001b[39m\"\u001b[39m\u001b[39mpf_features\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mpf_vectors\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mpf_mask\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39msv_features\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39msv_vectors\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39msv_mask\u001b[39m\u001b[39m\"\u001b[39m],\n\u001b[1;32m      7\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39minput_shapes\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     14\u001b[0m     },\n\u001b[1;32m     15\u001b[0m }\n",
-      "File \u001b[0;32m~/mambaforge/envs/python310/lib/python3.10/site-packages/torch/nn/modules/module.py:1671\u001b[0m, in \u001b[0;36mModule.load_state_dict\u001b[0;34m(self, state_dict, strict)\u001b[0m\n\u001b[1;32m   1666\u001b[0m         error_msgs\u001b[39m.\u001b[39minsert(\n\u001b[1;32m   1667\u001b[0m             \u001b[39m0\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mMissing key(s) in state_dict: \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m. \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m   1668\u001b[0m                 \u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin(\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(k) \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m missing_keys)))\n\u001b[1;32m   1670\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(error_msgs) \u001b[39m>\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m-> 1671\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mError(s) in loading state_dict for \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m   1672\u001b[0m                        \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mjoin(error_msgs)))\n\u001b[1;32m   1673\u001b[0m \u001b[39mreturn\u001b[39;00m _IncompatibleKeys(missing_keys, unexpected_keys)\n",
-      "\u001b[0;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for ParticleTransformerTagger:\n\tMissing key(s) in state_dict: \"part.fc.0.weight\", \"part.fc.0.bias\". \n\tUnexpected key(s) in state_dict: \"part.fc.1.weight\", \"part.fc.1.bias\", \"part.fc.0.0.weight\", \"part.fc.0.0.bias\". \n\tsize mismatch for pf_embed.embed.1.weight: copying a param with shape torch.Size([64, 25]) from checkpoint, the shape in current model is torch.Size([128, 25]).\n\tsize mismatch for pf_embed.embed.1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.3.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.3.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.4.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for pf_embed.embed.4.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.6.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.6.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.7.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for pf_embed.embed.7.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.1.weight: copying a param with shape torch.Size([64, 11]) from checkpoint, the shape in current model is torch.Size([128, 11]).\n\tsize mismatch for sv_embed.embed.1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.3.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.3.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.4.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for sv_embed.embed.4.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.6.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.6.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.7.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for sv_embed.embed.7.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_token: copying a param with shape torch.Size([1, 1, 64]) from checkpoint, the shape in current model is torch.Size([1, 1, 128]).\n\tsize mismatch for part.pair_embed.embed.1.weight: copying a param with shape torch.Size([32, 4, 1]) from checkpoint, the shape in current model is torch.Size([64, 4, 1]).\n\tsize mismatch for part.pair_embed.embed.1.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.4.weight: copying a param with shape torch.Size([32, 32, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1]).\n\tsize mismatch for part.pair_embed.embed.4.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.7.weight: copying a param with shape torch.Size([32, 32, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1]).\n\tsize mismatch for part.pair_embed.embed.7.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.10.weight: copying a param with shape torch.Size([8, 32, 1]) from checkpoint, the shape in current model is torch.Size([8, 64, 1]).\n\tsize mismatch for part.blocks.0.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.0.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.0.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.0.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.0.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.0.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.1.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.1.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.1.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.1.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.1.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.2.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.2.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.2.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.2.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.2.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.3.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.3.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.3.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.3.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.3.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.4.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.4.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.4.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.4.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.4.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.5.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.5.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.5.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.5.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.5.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.6.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.6.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.6.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.6.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.6.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.7.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.7.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.7.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.7.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.7.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.cls_blocks.0.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.cls_blocks.0.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.cls_blocks.0.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.cls_blocks.0.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.cls_blocks.0.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.cls_blocks.1.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.cls_blocks.1.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.cls_blocks.1.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.cls_blocks.1.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.cls_blocks.1.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128])."
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "model_dir = (\n",
     "    \"models/model_2023May30/ak8_MD_inclv8_part_2reg_manual.useamp.lite.gm5.ddp-bs768-lr6p75e-3/\"\n",
@@ -551,7 +451,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.8"
+   "version": "3.10.11"
   },
   "orig_nbformat": 4,
   "vscode": {