You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
python -m torch.distributed.launch --nproc_per_node=8 metro/tools/run_metro_bodymesh.py for training:
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 235, in run
pred_camera, pred_3d_joints, pred_vertices_sub2, pred_vertices_sub, pred_vertices = METRO_model(images, smpl, mesh_sampler, meta_masks=meta_masks, is_train=True)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(input, **kwargs)
File "...../MeshTransformer/metro/modeling/bert/modeling_metro.py", line 280, in forward
features = featuresmeta_masks + constant_tensor*(1-meta_masks)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/tensor.py", line 394, in rsub
return _C._VariableFunctions.rsub(self, other)
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 1.52 GiB already allocated; 9.00 MiB free; 1.55 GiB reserved in total by PyTorch)
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 221, in run
gt_vertices_sub2 = mesh_sampler.downsample(gt_vertices, n1=0, n2=2)
File .....MeshTransformer/metro/modeling/_smpl.py", line 272, in downsample
y = spmm(self._D[j], y)
File ".....MeshTransformer/metro/modeling/_smpl.py", line 172, in spmm
return SparseMM.apply(sparse, dense)
File "......MeshTransformer/metro/modeling/_smpl.py", line 161, in forward
return torch.matmul(sparse, dense)
RuntimeError: CUDA error: initialization error when calling cusparseCreate(handle)
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 220, in run
gt_vertices = smpl(gt_pose, gt_betas)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, **kwargs)
File ".....MeshTransformer/metro/modeling/_smpl.py", line 89, in forward
v_shaped = torch.matmul(shapedirs, beta).view(-1, 6890, 3) + v_template
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling cublasCreate(handle)
Also despite having 64 GB memory and reducing batch-size to 2 , still I get :
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 1.52 GiB already allocated; 9.00 MiB free; 1.55 GiB reserved in total by PyTorch)
torch.cuda.empty_cache() did not help.
The text was updated successfully, but these errors were encountered:
While running
python -m torch.distributed.launch --nproc_per_node=8 metro/tools/run_metro_bodymesh.py for training:
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 235, in run
pred_camera, pred_3d_joints, pred_vertices_sub2, pred_vertices_sub, pred_vertices = METRO_model(images, smpl, mesh_sampler, meta_masks=meta_masks, is_train=True)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(input, **kwargs)
File "...../MeshTransformer/metro/modeling/bert/modeling_metro.py", line 280, in forward
features = featuresmeta_masks + constant_tensor*(1-meta_masks)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/tensor.py", line 394, in rsub
return _C._VariableFunctions.rsub(self, other)
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 1.52 GiB already allocated; 9.00 MiB free; 1.55 GiB reserved in total by PyTorch)
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 221, in run
gt_vertices_sub2 = mesh_sampler.downsample(gt_vertices, n1=0, n2=2)
File .....MeshTransformer/metro/modeling/_smpl.py", line 272, in downsample
y = spmm(self._D[j], y)
File ".....MeshTransformer/metro/modeling/_smpl.py", line 172, in spmm
return SparseMM.apply(sparse, dense)
File "......MeshTransformer/metro/modeling/_smpl.py", line 161, in forward
return torch.matmul(sparse, dense)
RuntimeError: CUDA error: initialization error when calling
cusparseCreate(handle)
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 220, in run
gt_vertices = smpl(gt_pose, gt_betas)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, **kwargs)
File ".....MeshTransformer/metro/modeling/_smpl.py", line 89, in forward
v_shaped = torch.matmul(shapedirs, beta).view(-1, 6890, 3) + v_template
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling
cublasCreate(handle)
python=3.7
pytorch==1.4.0
torchvision==0.5.0
cudatoolkit=10.1
Also despite having 64 GB memory and reducing batch-size to 2 , still I get :
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 1.52 GiB already allocated; 9.00 MiB free; 1.55 GiB reserved in total by PyTorch)
torch.cuda.empty_cache() did not help.
The text was updated successfully, but these errors were encountered: