You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/pred.py", line 327, in
preds = get_pred(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/pred.py", line 260, in get_pred
output = searcher.generate(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/utils/greedy_search.py", line 32, in generate
result = self._decode(input_ids, **kwargs)
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/utils/greedy_search.py", line 54, in _decode
out = self.model(
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 1162, in forward
outputs = self.model(
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/utils/patch.py", line 102, in model_forward
layer_outputs = decoder_layer(
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 757, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/utils/patch.py", line 16, in hf_forward
ret = forward(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/inf_llm.py", line 64, in forward
o = past_key_value.append(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 774, in append
chunk_o, local_score = self._append(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 520, in _append
global_h_k, global_h_v, global_sliding_window, global_block_map, global_block_num = self.get_global_hidden_and_mask(local_h_q.size(-2), block_topk)
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 419, in get_global_hidden_and_mask
self.global_blocks[u][b_idx].load((global_h_k[u, :, st:ed, :], global_h_v[u, :, st:ed, :]))
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 76, in load
gpu_data, gpu_data_id = self.cache.alloc()
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 19, in alloc
assert len(self.idle_set) > 0
AssertionError
The text was updated successfully, but these errors were encountered:
Traceback (most recent call last):
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/pred.py", line 327, in
preds = get_pred(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/pred.py", line 260, in get_pred
output = searcher.generate(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/utils/greedy_search.py", line 32, in generate
result = self._decode(input_ids, **kwargs)
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/utils/greedy_search.py", line 54, in _decode
out = self.model(
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 1162, in forward
outputs = self.model(
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/utils/patch.py", line 102, in model_forward
layer_outputs = decoder_layer(
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/transformers/models/mistral/modeling_mistral.py", line 757, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/llm/miniconda3/envs/llm_infer/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/utils/patch.py", line 16, in hf_forward
ret = forward(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/inf_llm.py", line 64, in forward
o = past_key_value.append(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 774, in append
chunk_o, local_score = self._append(
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 520, in _append
global_h_k, global_h_v, global_sliding_window, global_block_map, global_block_num = self.get_global_hidden_and_mask(local_h_q.size(-2), block_topk)
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 419, in get_global_hidden_and_mask
self.global_blocks[u][b_idx].load((global_h_k[u, :, st:ed, :], global_h_v[u, :, st:ed, :]))
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 76, in load
gpu_data, gpu_data_id = self.cache.alloc()
File "/dataset-vlm/jingyaoli/LLMInfer/InfLLM/benchmark/inf_llm/attention/context_manager.py", line 19, in alloc
assert len(self.idle_set) > 0
AssertionError
The text was updated successfully, but these errors were encountered: