From b3d522d5cb959a27104ef34c68eff9ee42abe383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Gabriel=20Pereira=20Condados?= Date: Wed, 15 May 2024 10:32:13 -0400 Subject: [PATCH 1/5] fix relative path on hubconfig to get cfg_file. --- hubconf.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/hubconf.py b/hubconf.py index 2b9e8e9..2b609ed 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,6 +1,7 @@ dependencies = ['torch', 'torchvision'] import torch +import os try: from mmcv.utils import Config, DictAction except: @@ -41,14 +42,15 @@ def metric3d_convnext_large(pretrain=False, **kwargs): Returns: model (nn.Module): a Metric3D model. ''' - cfg_file = MODEL_TYPE['ConvNeXt-Large']['cfg_file'] + dirname = os.path.dirname(__file__) + cfg_file = os.path.join(dirname, MODEL_TYPE['ConvNeXt-Large']['cfg_file']) ckpt_file = MODEL_TYPE['ConvNeXt-Large']['ckpt_file'] cfg = Config.fromfile(cfg_file) model = get_configured_monodepth_model(cfg) if pretrain: model.load_state_dict( - torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'], + torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'], strict=False, ) return model @@ -62,14 +64,15 @@ def metric3d_vit_small(pretrain=False, **kwargs): Returns: model (nn.Module): a Metric3D model. ''' - cfg_file = MODEL_TYPE['ViT-Small']['cfg_file'] + dirname = os.path.dirname(__file__) + cfg_file = os.path.join(dirname, MODEL_TYPE['ViT-Small']['cfg_file']) ckpt_file = MODEL_TYPE['ViT-Small']['ckpt_file'] cfg = Config.fromfile(cfg_file) model = get_configured_monodepth_model(cfg) if pretrain: model.load_state_dict( - torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'], + torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'], strict=False, ) return model @@ -83,14 +86,15 @@ def metric3d_vit_large(pretrain=False, **kwargs): Returns: model (nn.Module): a Metric3D model. ''' - cfg_file = MODEL_TYPE['ViT-Large']['cfg_file'] + dirname = os.path.dirname(__file__) + cfg_file = os.path.join(dirname, MODEL_TYPE['ViT-Large']['cfg_file']) ckpt_file = MODEL_TYPE['ViT-Large']['ckpt_file'] cfg = Config.fromfile(cfg_file) model = get_configured_monodepth_model(cfg) if pretrain: model.load_state_dict( - torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'], + torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'], strict=False, ) return model @@ -104,14 +108,15 @@ def metric3d_vit_giant2(pretrain=False, **kwargs): Returns: model (nn.Module): a Metric3D model. ''' - cfg_file = MODEL_TYPE['ViT-giant2']['cfg_file'] + dirname = os.path.dirname(__file__) + cfg_file = os.path.join(dirname, MODEL_TYPE['ViT-giant2']['cfg_file']) ckpt_file = MODEL_TYPE['ViT-giant2']['ckpt_file'] cfg = Config.fromfile(cfg_file) model = get_configured_monodepth_model(cfg) if pretrain: model.load_state_dict( - torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'], + torch.hub.load_state_dict_from_url(ckpt_file)['model_state_dict'], strict=False, ) return model @@ -163,7 +168,7 @@ def metric3d_vit_giant2(pretrain=False, **kwargs): # un pad pred_depth = pred_depth.squeeze() pred_depth = pred_depth[pad_info[0] : pred_depth.shape[0] - pad_info[1], pad_info[2] : pred_depth.shape[1] - pad_info[3]] - + # upsample to original size pred_depth = torch.nn.functional.interpolate(pred_depth[None, None, :, :], rgb_origin.shape[:2], mode='bilinear').squeeze() ###################### canonical camera space ###################### @@ -173,14 +178,14 @@ def metric3d_vit_giant2(pretrain=False, **kwargs): pred_depth = pred_depth * canonical_to_real_scale # now the depth is metric pred_depth = torch.clamp(pred_depth, 0, 300) - #### you can now do anything with the metric depth + #### you can now do anything with the metric depth # such as evaluate predicted depth if depth_file is not None: gt_depth = cv2.imread(depth_file, -1) gt_depth = gt_depth / gt_depth_scale gt_depth = torch.from_numpy(gt_depth).float().cuda() assert gt_depth.shape == pred_depth.shape - + mask = (gt_depth > 1e-8) abs_rel_err = (torch.abs(pred_depth[mask] - gt_depth[mask]) / gt_depth[mask]).mean() print('abs_rel_err:', abs_rel_err.item()) \ No newline at end of file From ac4c20b2455e1e5131f85e34e77e1ae43d4a99f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Gabriel=20Pereira=20Condados?= Date: Wed, 15 May 2024 11:46:15 -0400 Subject: [PATCH 2/5] convert from bfloat16 to float --- mono/model/decode_heads/RAFTDepthNormalDPTDecoder5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mono/model/decode_heads/RAFTDepthNormalDPTDecoder5.py b/mono/model/decode_heads/RAFTDepthNormalDPTDecoder5.py index 9af89f9..9bc0b63 100644 --- a/mono/model/decode_heads/RAFTDepthNormalDPTDecoder5.py +++ b/mono/model/decode_heads/RAFTDepthNormalDPTDecoder5.py @@ -216,7 +216,7 @@ def compute_depth_expectation(prob, depth_values): return depth def interpolate_float32(x, size=None, scale_factor=None, mode='nearest', align_corners=None): - with torch.autocast(device_type='cuda', dtype=torch.bfloat16, enabled=False): + with torch.autocast(device_type='cuda', dtype=torch.float, enabled=False): return F.interpolate(x.float(), size=size, scale_factor=scale_factor, mode=mode, align_corners=align_corners) # def upflow8(flow, mode='bilinear'): @@ -225,7 +225,7 @@ def interpolate_float32(x, size=None, scale_factor=None, mode='nearest', align_c def upflow4(flow, mode='bilinear'): new_size = (4 * flow.shape[2], 4 * flow.shape[3]) - with torch.autocast(device_type='cuda', dtype=torch.bfloat16, enabled=False): + with torch.autocast(device_type='cuda', dtype=torch.float, enabled=False): return F.interpolate(flow, size=new_size, mode=mode, align_corners=True) def coords_grid(batch, ht, wd): From b42a2f55d33d896c244a8f0384798a4713e48169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Gabriel=20Pereira=20Condados?= Date: Wed, 15 May 2024 11:49:09 -0400 Subject: [PATCH 3/5] forcing higher ranges --- mono/configs/HourglassDecoder/convlarge.0.3_150.py | 2 +- mono/configs/HourglassDecoder/vit.raft5.giant2.py | 4 ++-- mono/configs/HourglassDecoder/vit.raft5.small.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mono/configs/HourglassDecoder/convlarge.0.3_150.py b/mono/configs/HourglassDecoder/convlarge.0.3_150.py index 37b91c8..99664e3 100644 --- a/mono/configs/HourglassDecoder/convlarge.0.3_150.py +++ b/mono/configs/HourglassDecoder/convlarge.0.3_150.py @@ -17,7 +17,7 @@ focal_length=1000.0, ), depth_range=(0, 1), - depth_normalize=(0.3, 150), + depth_normalize=(0.3, 500), crop_size = (544, 1216), ) diff --git a/mono/configs/HourglassDecoder/vit.raft5.giant2.py b/mono/configs/HourglassDecoder/vit.raft5.giant2.py index caf9eb2..5af4e8e 100644 --- a/mono/configs/HourglassDecoder/vit.raft5.giant2.py +++ b/mono/configs/HourglassDecoder/vit.raft5.giant2.py @@ -15,7 +15,7 @@ ) -max_value = 200 +max_value = 500 # configs of the canonical space data_basic=dict( canonical_space = dict( @@ -25,7 +25,7 @@ depth_range=(0, 1), depth_normalize=(0.1, max_value), crop_size = (616, 1064), # %28 = 0 - clip_depth_range=(0.1, 200), + clip_depth_range=(0.1, 500), vit_size=(616,1064) ) diff --git a/mono/configs/HourglassDecoder/vit.raft5.small.py b/mono/configs/HourglassDecoder/vit.raft5.small.py index 25eb68c..e61f222 100644 --- a/mono/configs/HourglassDecoder/vit.raft5.small.py +++ b/mono/configs/HourglassDecoder/vit.raft5.small.py @@ -15,7 +15,7 @@ ) -max_value = 200 +max_value = 500 # configs of the canonical space data_basic=dict( canonical_space = dict( @@ -25,7 +25,7 @@ depth_range=(0, 1), depth_normalize=(0.1, max_value), crop_size = (616, 1064), # %28 = 0 - clip_depth_range=(0.1, 200), + clip_depth_range=(0.1, 500), vit_size=(616,1064) ) From 595635b53f5727704dd11c3668dd76072453a170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Gabriel=20Pereira=20Condados?= Date: Wed, 15 May 2024 11:53:48 -0400 Subject: [PATCH 4/5] force higher range depth --- mono/configs/HourglassDecoder/vit.raft5.large.py | 4 ++-- mono/configs/HourglassDecoder/vit.raft5.small.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mono/configs/HourglassDecoder/vit.raft5.large.py b/mono/configs/HourglassDecoder/vit.raft5.large.py index 4febdcb..9000cee 100644 --- a/mono/configs/HourglassDecoder/vit.raft5.large.py +++ b/mono/configs/HourglassDecoder/vit.raft5.large.py @@ -15,7 +15,7 @@ ) -max_value = 200 +max_value = 1000 # configs of the canonical space data_basic=dict( canonical_space = dict( @@ -25,7 +25,7 @@ depth_range=(0, 1), depth_normalize=(0.1, max_value), crop_size = (616, 1064), # %28 = 0 - clip_depth_range=(0.1, 200), + clip_depth_range=(0.1, 1000), vit_size=(616,1064) ) diff --git a/mono/configs/HourglassDecoder/vit.raft5.small.py b/mono/configs/HourglassDecoder/vit.raft5.small.py index e61f222..35d9c9b 100644 --- a/mono/configs/HourglassDecoder/vit.raft5.small.py +++ b/mono/configs/HourglassDecoder/vit.raft5.small.py @@ -15,7 +15,7 @@ ) -max_value = 500 +max_value = 1000 # configs of the canonical space data_basic=dict( canonical_space = dict( @@ -25,7 +25,7 @@ depth_range=(0, 1), depth_normalize=(0.1, max_value), crop_size = (616, 1064), # %28 = 0 - clip_depth_range=(0.1, 500), + clip_depth_range=(0.1, 1000), vit_size=(616,1064) ) From 95b91c5acf8d2f125789d7c331c14e9b8b24cc10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Gabriel=20Pereira=20Condados?= Date: Wed, 15 May 2024 11:55:49 -0400 Subject: [PATCH 5/5] returning original ranges --- mono/configs/HourglassDecoder/vit.raft5.large.py | 4 ++-- mono/configs/HourglassDecoder/vit.raft5.small.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mono/configs/HourglassDecoder/vit.raft5.large.py b/mono/configs/HourglassDecoder/vit.raft5.large.py index 9000cee..4febdcb 100644 --- a/mono/configs/HourglassDecoder/vit.raft5.large.py +++ b/mono/configs/HourglassDecoder/vit.raft5.large.py @@ -15,7 +15,7 @@ ) -max_value = 1000 +max_value = 200 # configs of the canonical space data_basic=dict( canonical_space = dict( @@ -25,7 +25,7 @@ depth_range=(0, 1), depth_normalize=(0.1, max_value), crop_size = (616, 1064), # %28 = 0 - clip_depth_range=(0.1, 1000), + clip_depth_range=(0.1, 200), vit_size=(616,1064) ) diff --git a/mono/configs/HourglassDecoder/vit.raft5.small.py b/mono/configs/HourglassDecoder/vit.raft5.small.py index 35d9c9b..25eb68c 100644 --- a/mono/configs/HourglassDecoder/vit.raft5.small.py +++ b/mono/configs/HourglassDecoder/vit.raft5.small.py @@ -15,7 +15,7 @@ ) -max_value = 1000 +max_value = 200 # configs of the canonical space data_basic=dict( canonical_space = dict( @@ -25,7 +25,7 @@ depth_range=(0, 1), depth_normalize=(0.1, max_value), crop_size = (616, 1064), # %28 = 0 - clip_depth_range=(0.1, 1000), + clip_depth_range=(0.1, 200), vit_size=(616,1064) )