From 748916e9d81343e15676b1769455079e1fac9820 Mon Sep 17 00:00:00 2001 From: rkansal47 Date: Mon, 11 Sep 2023 12:03:21 -0500 Subject: [PATCH] update triton addresses --- .../tagger_resources/triton_config_ak8.json | 2 +- .../triton_config_new_tagger_ak8.json | 2 +- src/HHbbVV/triton/export_and_check.ipynb | 138 +++--------------- 3 files changed, 21 insertions(+), 121 deletions(-) diff --git a/src/HHbbVV/processors/tagger_resources/triton_config_ak8.json b/src/HHbbVV/processors/tagger_resources/triton_config_ak8.json index cfe5f17a..eb1448c9 100644 --- a/src/HHbbVV/processors/tagger_resources/triton_config_ak8.json +++ b/src/HHbbVV/processors/tagger_resources/triton_config_ak8.json @@ -1,6 +1,6 @@ { "model_name": "Dec22_ak8_MD_vminclv2ParT_manual_fixwrap", - "model_url": "triton+grpc://67.58.49.52:8001/ak8_MD_vminclv2ParT_manual_fixwrap/1", + "model_url": "triton+grpc://67.58.49.48:8001/ak8_MD_vminclv2ParT_manual_fixwrap/1", "batch_size": 192, "softmax": "True", "num_reg": 0 diff --git a/src/HHbbVV/processors/tagger_resources/triton_config_new_tagger_ak8.json b/src/HHbbVV/processors/tagger_resources/triton_config_new_tagger_ak8.json index ff770815..0e3ef87b 100644 --- a/src/HHbbVV/processors/tagger_resources/triton_config_new_tagger_ak8.json +++ b/src/HHbbVV/processors/tagger_resources/triton_config_new_tagger_ak8.json @@ -1,6 +1,6 @@ { "model_name": "2023May30_ak8_MD_inclv8_part_2reg_manual", - "model_url": "triton+grpc://67.58.49.52:8001/ak8_MD_inclv8_part_2reg_manual/1", + "model_url": "triton+grpc://67.58.49.48:8001/ak8_MD_inclv8_part_2reg_manual/1", "batch_size": 192, "softmax": "False", "num_reg": 2 diff --git a/src/HHbbVV/triton/export_and_check.ipynb b/src/HHbbVV/triton/export_and_check.ipynb index 2c04c0f7..c0f9ed16 100644 --- a/src/HHbbVV/triton/export_and_check.ipynb +++ b/src/HHbbVV/triton/export_and_check.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -127,50 +127,14 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1 [00:00 2\u001b[0m part_model\u001b[39m.\u001b[39;49mload_state_dict(torch\u001b[39m.\u001b[39;49mload(model_dir \u001b[39m+\u001b[39;49m \u001b[39m\"\u001b[39;49m\u001b[39mnet_best_epoch_state.pt\u001b[39;49m\u001b[39m\"\u001b[39;49m, map_location\u001b[39m=\u001b[39;49mtorch\u001b[39m.\u001b[39;49mdevice(\u001b[39m\"\u001b[39;49m\u001b[39mcpu\u001b[39;49m\u001b[39m\"\u001b[39;49m)))\n\u001b[1;32m 3\u001b[0m _ \u001b[39m=\u001b[39m part_model\u001b[39m.\u001b[39meval()\n\u001b[1;32m 5\u001b[0m data_config \u001b[39m=\u001b[39m {\n\u001b[1;32m 6\u001b[0m \u001b[39m\"\u001b[39m\u001b[39minput_names\u001b[39m\u001b[39m\"\u001b[39m: [\u001b[39m\"\u001b[39m\u001b[39mpf_features\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mpf_vectors\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mpf_mask\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39msv_features\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39msv_vectors\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39msv_mask\u001b[39m\u001b[39m\"\u001b[39m],\n\u001b[1;32m 7\u001b[0m \u001b[39m\"\u001b[39m\u001b[39minput_shapes\u001b[39m\u001b[39m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 14\u001b[0m },\n\u001b[1;32m 15\u001b[0m }\n", - "File \u001b[0;32m~/mambaforge/envs/python310/lib/python3.10/site-packages/torch/nn/modules/module.py:1671\u001b[0m, in \u001b[0;36mModule.load_state_dict\u001b[0;34m(self, state_dict, strict)\u001b[0m\n\u001b[1;32m 1666\u001b[0m error_msgs\u001b[39m.\u001b[39minsert(\n\u001b[1;32m 1667\u001b[0m \u001b[39m0\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mMissing key(s) in state_dict: \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m. \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 1668\u001b[0m \u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin(\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(k) \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m missing_keys)))\n\u001b[1;32m 1670\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(error_msgs) \u001b[39m>\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m-> 1671\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mError(s) in loading state_dict for \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 1672\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mjoin(error_msgs)))\n\u001b[1;32m 1673\u001b[0m \u001b[39mreturn\u001b[39;00m _IncompatibleKeys(missing_keys, unexpected_keys)\n", - "\u001b[0;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for ParticleTransformerTagger:\n\tMissing key(s) in state_dict: \"part.fc.0.weight\", \"part.fc.0.bias\". \n\tUnexpected key(s) in state_dict: \"part.fc.1.weight\", \"part.fc.1.bias\", \"part.fc.0.0.weight\", \"part.fc.0.0.bias\". \n\tsize mismatch for pf_embed.embed.1.weight: copying a param with shape torch.Size([64, 25]) from checkpoint, the shape in current model is torch.Size([128, 25]).\n\tsize mismatch for pf_embed.embed.1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.3.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.3.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for pf_embed.embed.4.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for pf_embed.embed.4.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.6.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.6.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for pf_embed.embed.7.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for pf_embed.embed.7.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.1.weight: copying a param with shape torch.Size([64, 11]) from checkpoint, the shape in current model is torch.Size([128, 11]).\n\tsize mismatch for sv_embed.embed.1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.3.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.3.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for sv_embed.embed.4.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for sv_embed.embed.4.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.6.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.6.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for sv_embed.embed.7.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for sv_embed.embed.7.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_token: copying a param with shape torch.Size([1, 1, 64]) from checkpoint, the shape in current model is torch.Size([1, 1, 128]).\n\tsize mismatch for part.pair_embed.embed.1.weight: copying a param with shape torch.Size([32, 4, 1]) from checkpoint, the shape in current model is torch.Size([64, 4, 1]).\n\tsize mismatch for part.pair_embed.embed.1.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.2.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.4.weight: copying a param with shape torch.Size([32, 32, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1]).\n\tsize mismatch for part.pair_embed.embed.4.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.5.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.7.weight: copying a param with shape torch.Size([32, 32, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1]).\n\tsize mismatch for part.pair_embed.embed.7.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.running_mean: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.8.running_var: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64]).\n\tsize mismatch for part.pair_embed.embed.10.weight: copying a param with shape torch.Size([8, 32, 1]) from checkpoint, the shape in current model is torch.Size([8, 64, 1]).\n\tsize mismatch for part.blocks.0.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.0.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.0.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.0.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.0.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.0.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.0.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.0.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.1.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.1.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.1.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.1.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.1.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.1.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.1.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.2.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.2.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.2.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.2.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.2.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.2.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.2.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.3.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.3.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.3.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.3.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.3.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.3.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.3.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.4.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.4.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.4.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.4.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.4.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.4.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.4.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.5.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.5.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.5.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.5.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.5.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.5.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.5.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.6.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.6.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.6.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.6.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.6.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.6.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.6.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.blocks.7.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.blocks.7.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.blocks.7.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.blocks.7.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.blocks.7.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.blocks.7.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.blocks.7.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.cls_blocks.0.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.cls_blocks.0.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.cls_blocks.0.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.0.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.cls_blocks.0.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.0.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.cls_blocks.0.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.w_resid: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).\n\tsize mismatch for part.cls_blocks.1.attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).\n\tsize mismatch for part.cls_blocks.1.attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).\n\tsize mismatch for part.cls_blocks.1.attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.post_attn_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.post_attn_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_fc_norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.pre_fc_norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.cls_blocks.1.fc1.weight: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).\n\tsize mismatch for part.cls_blocks.1.fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.post_fc_norm.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.post_fc_norm.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([512]).\n\tsize mismatch for part.cls_blocks.1.fc2.weight: copying a param with shape torch.Size([64, 256]) from checkpoint, the shape in current model is torch.Size([128, 512]).\n\tsize mismatch for part.cls_blocks.1.fc2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.norm.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).\n\tsize mismatch for part.norm.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128])." - ] - } - ], + "outputs": [], "source": [ "model_dir = (\n", " \"models/model_2023May30/ak8_MD_inclv8_part_2reg_manual.useamp.lite.gm5.ddp-bs768-lr6p75e-3/\"\n", @@ -551,7 +451,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.11" }, "orig_nbformat": 4, "vscode": {