From d0341284a593409649d2f2df27c75ad969909279 Mon Sep 17 00:00:00 2001 From: ZhiboRao Date: Sun, 20 Oct 2024 22:34:36 +0800 Subject: [PATCH] add a action --- Datasets/debug_dataset.csv | 2 +- Datasets/whu_reconstruction_val_list.csv | 488 +++++++++--------- README.md | 320 +++--------- Scripts/start_test_whu_dataset.sh | 6 +- Scripts/start_train_pre_us3d_dataset.sh | 8 +- Scripts/start_train_pre_whu_dataset.sh | 8 +- Scripts/start_train_scene_flow_dataset.sh | 43 ++ Scripts/start_train_us3d_dataset.sh | 8 +- Source/Tools/coner.py | 32 ++ Source/Tools/count_disp.py | 109 ++++ Source/Tools/error_map.py | 34 ++ Source/Tools/evalution_stereo_net.py | 7 +- Source/Tools/panchromatic_img.py | 27 +- Source/Tools/vis_gray.py | 28 +- Source/Tools/vis_masked_images.m | 46 ++ .../Models/LaCGwcNet/inference.py | 2 +- .../Models/LaCGwcNet/networks/submodule.py | 2 +- .../Models/StereoT/Networks/__init__.py | 0 .../Models/StereoT/Networks/encoder.py | 158 ++++++ .../Models/StereoT/Networks/mae.py | 301 +++++++++++ .../Models/StereoT/Networks/pos_embed.py | 95 ++++ .../StereoT/Networks/stereo_matching_model.py | 69 +++ .../Models/StereoT/__init__.py | 2 + .../Models/StereoT/inference.py | 141 +++++ .../SwinStereo/Networks/BackBone/mae.py | 89 +++- .../Networks/mask_stereo_matching.py | 32 +- .../Models/SwinStereo/inference.py | 54 +- .../Models/SwinStereo/loss_functions.py | 6 +- .../Models/__init__.py | 5 + Source/UserModelImplementation/user_define.py | 2 + 30 files changed, 1525 insertions(+), 599 deletions(-) create mode 100644 Scripts/start_train_scene_flow_dataset.sh create mode 100644 Source/Tools/coner.py create mode 100644 Source/Tools/count_disp.py create mode 100644 Source/Tools/error_map.py create mode 100644 Source/Tools/vis_masked_images.m create mode 100644 Source/UserModelImplementation/Models/StereoT/Networks/__init__.py create mode 100644 Source/UserModelImplementation/Models/StereoT/Networks/encoder.py create mode 100644 Source/UserModelImplementation/Models/StereoT/Networks/mae.py create mode 100644 Source/UserModelImplementation/Models/StereoT/Networks/pos_embed.py create mode 100644 Source/UserModelImplementation/Models/StereoT/Networks/stereo_matching_model.py create mode 100644 Source/UserModelImplementation/Models/StereoT/__init__.py create mode 100644 Source/UserModelImplementation/Models/StereoT/inference.py diff --git a/Datasets/debug_dataset.csv b/Datasets/debug_dataset.csv index 7ab281b..9351ef8 100644 --- a/Datasets/debug_dataset.csv +++ b/Datasets/debug_dataset.csv @@ -1,2 +1,2 @@ img -/home4/datasets/jack/Documents_home2/DFC2019_track2_trainval/Track_Train/OMA389_028_025_LEFT_RGB.tif \ No newline at end of file +/home4/datasets/jack/Documents_home2/DFC2019_track2_trainval/Track_Train/OMA342_038_036_LEFT_RGB.tif \ No newline at end of file diff --git a/Datasets/whu_reconstruction_val_list.csv b/Datasets/whu_reconstruction_val_list.csv index a330ad0..adf1f63 100644 --- a/Datasets/whu_reconstruction_val_list.csv +++ b/Datasets/whu_reconstruction_val_list.csv @@ -1,245 +1,245 @@ img -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_931.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_931.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/KM_left_226.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/KM_right_226.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_996.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_996.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_241.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_241.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_556.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_556.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_19.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_19.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/KM_left_253.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/KM_right_253.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_736.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_736.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_55.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_55.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_291.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_291.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_8.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_8.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_547.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_547.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_753.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_753.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_219.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_219.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_467.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_467.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_692.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_692.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_562.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_562.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_622.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_622.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_212.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_212.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_67.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_67.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_437.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_437.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_54.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_54.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/KM_left_251.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/KM_right_251.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_655.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_655.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_422.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_422.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_752.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_752.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_197.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_197.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_265.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_265.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_493.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_493.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_394.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_394.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_790.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_790.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_608.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_608.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_34.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_34.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_414.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_414.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_791.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_791.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_564.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_564.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_112.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_112.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_192.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_192.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_906.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_906.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_49.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_49.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_715.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_715.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_524.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_524.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_411.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_411.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_144.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_144.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_831.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_831.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_866.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_866.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_198.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_198.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/SG_left_67.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/SG_right_67.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_919.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_919.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_360.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_360.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_829.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_829.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_143.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_143.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_963.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_963.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_916.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_916.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_674.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_674.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_26.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_26.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_182.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_182.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_690.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_690.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_847.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_847.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_187.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_187.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_820.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_820.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_924.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_924.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_512.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_512.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_95.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_95.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_704.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_704.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_403.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_403.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_147.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_147.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_207.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_207.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_442.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_442.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_911.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_911.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_371.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_371.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_391.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_391.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_184.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_184.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_498.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_498.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_404.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_404.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_920.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_920.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_217.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_217.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_560.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_560.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_926.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_926.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_538.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_538.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_535.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_535.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/SG_left_74.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/SG_right_74.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_556.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_556.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_88.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_88.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_74.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_74.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_693.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_693.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_923.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_923.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_902.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_902.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_921.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_921.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_372.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_372.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_779.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_779.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_927.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_927.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_278.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_278.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_597.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_597.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_34.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_34.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_901.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_901.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_166.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_166.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_175.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_175.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_483.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_483.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_177.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_177.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_105.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_105.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/SG_left_73.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/SG_right_73.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_110.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_110.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_59.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_59.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_347.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_347.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_716.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_716.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_285.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_285.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_728.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_728.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_561.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_561.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_229.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_229.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_475.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_475.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/SG_left_69.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/SG_right_69.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_464.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_464.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_606.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_606.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_352.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_352.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_150.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_150.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_615.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_615.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/QC_left_84.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/QC_right_84.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/SG_left_68.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/SG_right_68.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_327.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_327.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_354.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_354.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/left/YD_left_125.tiff -/home3/datasets/raozhibo/WHU-Stereo/train/val/right/YD_right_125.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_437.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_437.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_422.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_422.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_674.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_674.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/SG_left_74.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/SG_right_74.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_524.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_524.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_704.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_704.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_34.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_34.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_831.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_831.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_512.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_512.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_291.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_291.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_906.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_906.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_391.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_391.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_354.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_354.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_229.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_229.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_177.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_177.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_931.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_931.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_19.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_19.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_105.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_105.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/SG_left_67.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/SG_right_67.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_622.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_622.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_753.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_753.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_403.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_403.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_112.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_112.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_475.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_475.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_84.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_84.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_996.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_996.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_372.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_372.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_728.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_728.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_693.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_693.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_347.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_347.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_74.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_74.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_144.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_144.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_411.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_411.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_241.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_241.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/KM_left_253.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/KM_right_253.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_49.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_49.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/SG_left_69.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/SG_right_69.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_217.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_217.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/KM_left_251.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/KM_right_251.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_916.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_916.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_327.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_327.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_564.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_564.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_655.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_655.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_360.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_360.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_752.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_752.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_535.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_535.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_182.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_182.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_59.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_59.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_606.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_606.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_55.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_55.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_690.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_690.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_414.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_414.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_34.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_34.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_779.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_779.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_143.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_143.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_791.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_791.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_608.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_608.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_715.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_715.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_562.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_562.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_187.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_187.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/SG_left_68.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/SG_right_68.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_394.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_394.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_95.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_95.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_212.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_212.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_921.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_921.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_147.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_147.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_198.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_198.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_597.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_597.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_790.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_790.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_88.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_88.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_125.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_125.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_692.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_692.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_150.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_150.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_560.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_560.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_716.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_716.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_442.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_442.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_207.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_207.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_483.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_483.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_902.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_902.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_927.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_927.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_184.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_184.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_547.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_547.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/KM_left_226.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/KM_right_226.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_67.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_67.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_556.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_556.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_847.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_847.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_963.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_963.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_352.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_352.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_926.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_926.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_615.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_615.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_493.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_493.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_54.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_54.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_467.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_467.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_911.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_911.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_265.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_265.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_197.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_197.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_736.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_736.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_923.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_923.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_8.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_8.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_920.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_920.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_285.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_285.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_175.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_175.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_464.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_464.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_166.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_166.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_192.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_192.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_556.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_556.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_404.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_404.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_538.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_538.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_371.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_371.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_820.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_820.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_901.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_901.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_919.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_919.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_219.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_219.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_924.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_924.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_26.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_26.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_498.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_498.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/SG_left_73.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/SG_right_73.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_829.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_829.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_866.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_866.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/YD_left_561.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/YD_right_561.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_110.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_110.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/left/QC_left_278.tiff +/home/datasets/rzb/whu-stereo/experimental data/with ground truth/val/right/QC_right_278.tiff diff --git a/README.md b/README.md index 4bce93a..4a6453f 100644 --- a/README.md +++ b/README.md @@ -1,263 +1,25 @@ -# Template-jf [![Use the JackFramework Demo](https://github.com/Archaic-Atom/FrameworkTemplate/actions/workflows/build_env.yml/badge.svg?event=push)](https://github.com/Archaic-Atom/FrameworkTemplate/actions/workflows/build_env.yml) ![Python 3.8](https://img.shields.io/badge/python-3.8-green.svg?style=plastic) ![Pytorch 1.7](https://img.shields.io/badge/PyTorch%20-%23EE4C2C.svg?style=plastic) ![cuDnn 7.3.6](https://img.shields.io/badge/cudnn-7.3.6-green.svg?style=plastic) ![License MIT](https://img.shields.io/badge/license-MIT-green.svg?style=plastic) ->This is template project for JackFramework (https://github.com/Archaic-Atom/JackFramework). **It is used to rapidly build the model, without caring about the training process (such as DDP or DP, Tensorboard, et al.)** -Document:https://www.wolai.com/archaic-atom/rqKJVi7M1x44mPT8CdM1TL +# Cascaded Recurrent Networks with Masked Representation Learning for Stereo Matching of High-Resolution Satellite Images -Demo Project: https://github.com/Archaic-Atom/Demo-jf +## Project Overview +This project presents Masked Cascaded Recurrent Networks (MaskCRNet), a method for stereo matching of high-resolution satellite images. It employs masked representation learning to enhance feature extraction and uses cascaded recurrent modules to improve robustness against imperfect rectification, achieving accurate stereo matching for high-resolution satellite images. ---- -### Software Environment -1. OS Environment -``` -os >= linux 16.04 -cudaToolKit == 10.1 -cudnn == 7.3.6 -``` - -2. Python Environment (We provide the whole env in ) -``` -python >= 3.8.5 -pythorch >= 1.15.0 -numpy >= 1.14.5 -opencv >= 3.4.0 -PIL >= 5.1.0 -``` ---- -### Hardware Environment -The framework only can be used in GPUs. - -### Train the model by running: -0. Install the JackFramework lib from Github (https://github.com/Archaic-Atom/JackFramework) -``` -$ cd JackFramework/ -$ ./install.sh -``` - -1. Get the Training list or Testing list (You need rewrite the code by your path, and my related demo code can be found in Source/Tools/genrate_**_traning_path.py) -``` -$ ./GenPath.sh -``` -Please check the path. The source code in Source/Tools. - -2. Implement the model's interface and dataloader's interface of JackFramework in Source/UserModelImplementation/Models/your_model/inference.py and Source/UserModelImplementation/Dataloaders/your_dataloader.py. - -The template of model is shown in follows: -```python -# -*- coding: utf-8 -*- -# import torch -# import torch.nn as nn -# import torch.nn.functional as F -# import torch.optim as optim - -import JackFramework as jf -# import UserModelImplementation.user_define as user_def - - -class YourModelInterface(jf.UserTemplate.ModelHandlerTemplate): - """docstring for DeepLabV3Plus""" - - def __init__(self, args: object) -> object: - super().__init__(args) - self.__args = args - - def get_model(self) -> list: - # args = self.__args - # return model - return [] - - def optimizer(self, model: list, lr: float) -> list: - # args = self.__args - # return opt and sch - return [], [] - - def lr_scheduler(self, sch: object, ave_loss: list, sch_id: int) -> None: - # how to do schenduler - pass - - def inference(self, model: list, input_data: list, model_id: int) -> list: - # args = self.__args - # return output - return [] - - def accuary(self, output_data: list, label_data: list, model_id: int) -> list: - # return acc - # args = self.__args - return [] - - def loss(self, output_data: list, label_data: list, model_id: int) -> list: - # return loss - # args = self.__args - return [] - - # Optional - def pretreatment(self, epoch: int, rank: object) -> None: - # do something before training epoch - pass - - # Optional - def postprocess(self, epoch: int, rank: object, - ave_tower_loss: list, ave_tower_acc: list) -> None: - # do something after training epoch - pass - - # Optional - def load_model(self, model: object, checkpoint: dict, model_id: int) -> bool: - # return False - return False - - # Optional - def load_opt(self, opt: object, checkpoint: dict, model_id: int) -> bool: - # return False - return False - - # Optional - def save_model(self, epoch: int, model_list: list, opt_list: list) -> dict: - # return None - return None - -``` - -The template of Dataloader is shown in follows: -```python -# -*- coding: utf-8 -*- -import time -import JackFramework as jf -# import UserModelImplementation.user_define as user_def - - -class YourDataloader(jf.UserTemplate.DataHandlerTemplate): - """docstring for DataHandlerTemplate""" - - def __init__(self, args: object) -> object: - super().__init__(args) - self.__args = args - self.__result_str = jf.ResultStr() - self.__train_dataset = None - self.__val_dataset = None - self.__imgs_num = 0 - self.__start_time = 0 - - def get_train_dataset(self, path: str, is_training: bool = True) -> object: - # args = self.__args - # return dataset - return None - - def get_val_dataset(self, path: str) -> object: - # return dataset - # args = self.__args - # return dataset - return None - - def split_data(self, batch_data: tuple, is_training: bool) -> list: - self.__start_time = time.time() - if is_training: - # return input_data_list, label_data_list - return [], [] - # return input_data, supplement - return [], [] - - def show_train_result(self, epoch: int, loss: - list, acc: list, - duration: float) -> None: - assert len(loss) == len(acc) # same model number - info_str = self.__result_str.training_result_str(epoch, loss[0], acc[0], duration, True) - jf.log.info(info_str) - - def show_val_result(self, epoch: int, loss: - list, acc: list, - duration: float) -> None: - assert len(loss) == len(acc) # same model number - info_str = self.__result_str.training_result_str(epoch, loss[0], acc[0], duration, False) - jf.log.info(info_str) - - def save_result(self, output_data: list, supplement: list, - img_id: int, model_id: int) -> None: - assert self.__train_dataset is not None - # args = self.__args - # save method - pass - - def show_intermediate_result(self, epoch: int, - loss: list, acc: list) -> str: - assert len(loss) == len(acc) # same model number - return self.__result_str.training_intermediate_result(epoch, loss[0], acc[0]) +## Key Contributions +- **Masked Representation Learning Pre-training Strategy**: Addresses challenges in remote sensing stereo datasets by improving data utilization and feature representation on small datasets. +- **Improved Correlation Computation**: Based on self-attention, cross-attention, and deformable convolutions, it handles imperfect rectification to enhance performance. +- **State-of-the-Art Performance**: Achieves state-of-the-art results on the US3D and WHU-Stereo datasets. -``` - -you must implement the related class for using JackFramework, the demo can be find in Source/UserModelImplementation/Models/Your_Model/inference.py or Source/UserModelImplementation/Dataloaders/your_dataloader.py. Or you can find the other demo in Demo project. - -Next, you need implement the interface file Source/user_interface.py (you can add some parameters in user\_parser function of this file ), as shown in follows: -```python -# -*- coding: utf-8 -*- -import argparse -import JackFramework as jf -# import UserModelImplementation.user_define as user_def - -# model and dataloader -from UserModelImplementation import Models -from UserModelImplementation import Dataloaders - - -class UserInterface(jf.UserTemplate.NetWorkInferenceTemplate): - """docstring for UserInterface""" - - def __init__(self) -> object: - super().__init__() - - def inference(self, args: object) -> object: - dataloader = Dataloaders.dataloaders_zoo(args, args.dataset) - model = Models.model_zoo(args, args.modelName) - return model, dataloader - - def user_parser(self, parser: object) -> object: - # parser.add_argument('--startDisp', type=int, default=user_def.START_DISP, - # help='start disparity') - # return parser - return None - - @staticmethod - def __str2bool(arg: str) -> bool: - if arg.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif arg.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') -``` +## Code Structure -Finally, you need pass this object to JackFramework, as shown in follows: -```python -# -*coding: utf-8 -*- -import JackFramework as jf -from UserModelImplementation.user_interface import UserInterface - - -def main()->None: - app = jf.Application(UserInterface(), "Stereo Matching Models") - app.start() - - -# execute the main function -if __name__ == "__main__": - main() - -``` - -3. Run the program, like: -``` -$ ./Scripts/start_debug_stereo_net.sh -``` ---- -### File Structure ``` -Template-jf +MaskCRNet ├── Datasets # Get it by ./generate_path.sh, you need build folder │ ├── dataset_example_training_list.csv │ └── ... @@ -278,19 +40,55 @@ Template-jf ├── LICENSE └── README.md ``` ---- -### Update log -#### 2021-05-29 -1. Add the depth for transformer; -2. Fork the JackFramework to a new project; -3. Remove the JackFramework from this project. -#### 2021-04-08 -1. Add the stereo; -2. Add transformer. +## Dataset Preparation +1. US3D Dataset: Download from the US3D official website and organize according to the dataset's README. +2. WHU-Stereo Dataset: Download from the WHU-Stereo GitHub page and organize according to the dataset's README. + +## Environment Dependencies + +Ensure you have the following Python libraries installed: + +- torch +- torchvision +- numpy +- JackFramework +- DatasetHandler + +## Training the Model +1. Get the Training list or Testing list (You need rewrite the code by your path, and my related demo code can be found in Source/Tools/genrate_**_traning_path.py) +``` +$ ./Scripts/GenPath.sh +``` + + +2. Run the program, like: +``` +$ ./Scripts/start_debug_stereo_net.sh +``` + +## Testing the Model + +1. Run the program, like: +``` +$ ./Scripts/start_test_stereo_net.sh +``` + +## Citation +If you use this code or method, please cite the following paper: +``` +@article{rao2024cascaded, + title={Cascaded Recurrent Networks with Masked Representation Learning for Stereo Matching of High-Resolution Satellite Images}, + author={Rao, Zhibo and Li, Xing and Xiong, Bangshu and Dai, Yuchao and Shen, Zhelun and Li, Hangbiao and Lou, Yue}, + journal={ISPRS Journal of Photogrammetry and Remote Sensing}, + year={2024}, + url={https://github.com/Archaic-Atom/MaskCRNet} +} +``` + +## Contact Us +For any questions or suggestions, please contact us at: + +- Email: raoxi36@foxmail.com -#### 2021-01-13 -1. Fork a new prject (based on pythorch); -2. Use a new code style; -3. Build the frameworks for pythorch; -4. Write ReadMe +Thank you for using our code! \ No newline at end of file diff --git a/Scripts/start_test_whu_dataset.sh b/Scripts/start_test_whu_dataset.sh index 73812c7..ef55c4d 100755 --- a/Scripts/start_test_whu_dataset.sh +++ b/Scripts/start_test_whu_dataset.sh @@ -1,8 +1,8 @@ #!/bin/bash test_gpus_id=0,1,2,3,4 eva_gpus_id=7 -# test_list_path='./Datasets/whu_stereo_testing_list.csv' -test_list_path='./Datasets/whu_stereo_val_list.csv' +test_list_path='./Datasets/whu_stereo_testing_list.csv' +# test_list_path='./Datasets/whu_stereo_val_list.csv' evalution_format='training' CUDA_VISIBLE_DEVICES=${test_gpus_id} python Source/main.py \ @@ -25,7 +25,7 @@ CUDA_VISIBLE_DEVICES=${test_gpus_id} python Source/main.py \ --pre_train_opt false \ --modelName SwinStereo \ --outputDir ./TestResult/ \ - --modelDir ./Checkpoint/ \ + --modelDir ./Checkpoint_old/ \ --dataset whu CUDA_VISIBLE_DEVICES=${eva_gpus_id} python ./Source/Tools/evalution_stereo_net.py --gt_list_path ${test_list_path} --invaild_value -999 --img_path_format ./ResultImg/%06d_10.tiff diff --git a/Scripts/start_train_pre_us3d_dataset.sh b/Scripts/start_train_pre_us3d_dataset.sh index 38fe3ec..253e8c3 100755 --- a/Scripts/start_train_pre_us3d_dataset.sh +++ b/Scripts/start_train_pre_us3d_dataset.sh @@ -3,8 +3,8 @@ tensorboard_port=6234 dist_port=8809 tensorboard_folder='./log/' -# train_list_path='./Datasets/debug_dataset.csv' -train_list_path='./Datasets/us3d_reconstruction_training_list.csv' +train_list_path='./Datasets/debug_dataset.csv' +# train_list_path='./Datasets/us3d_reconstruction_training_list.csv' echo "The tensorboard_port:" ${tensorboard_port} echo "The dist_port:" ${dist_port} @@ -17,12 +17,12 @@ fi echo "Begin to train the model!" CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 nohup python -u Source/main.py \ --batchSize 1 \ - --gpu 8 \ + --gpu 1 \ --trainListPath ${train_list_path} \ --imgWidth 448 \ --imgHeight 448 \ --dataloaderNum 8 \ - --maxEpochs 1000 \ + --maxEpochs 1 \ --imgNum 2440 \ --sampleNum 1 \ --log ${tensorboard_folder} \ diff --git a/Scripts/start_train_pre_whu_dataset.sh b/Scripts/start_train_pre_whu_dataset.sh index b81f2ea..5979a7c 100755 --- a/Scripts/start_train_pre_whu_dataset.sh +++ b/Scripts/start_train_pre_whu_dataset.sh @@ -3,8 +3,8 @@ tensorboard_port=6234 dist_port=8809 tensorboard_folder='./log/' -# train_list_path='./Datasets/debug_dataset.csv' -train_list_path=./Datasets/whu_reconstruction_training_list.csv +train_list_path='./Datasets/debug_dataset.csv' +# train_list_path=./Datasets/whu_reconstruction_training_list.csv echo "The tensorboard_port:" ${tensorboard_port} echo "The dist_port:" ${dist_port} @@ -17,12 +17,12 @@ fi echo "Begin to train the model!" CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 nohup python -u Source/main.py \ --batchSize 2 \ - --gpu 6 \ + --gpu 1 \ --trainListPath ${train_list_path} \ --imgWidth 448 \ --imgHeight 448 \ --dataloaderNum 12 \ - --maxEpochs 1000 \ + --maxEpochs 1 \ --imgNum 2440 \ --sampleNum 1 \ --log ${tensorboard_folder} \ diff --git a/Scripts/start_train_scene_flow_dataset.sh b/Scripts/start_train_scene_flow_dataset.sh new file mode 100644 index 0000000..42d51ed --- /dev/null +++ b/Scripts/start_train_scene_flow_dataset.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# parameters +tensorboard_port=6234 +dist_port=8809 +tensorboard_folder='./log/' +echo "The tensorboard_port:" ${tensorboard_port} +echo "The dist_port:" ${dist_port} + +# command +# delete the previous tensorboard files +if [ -d "${tensorboard_folder}" ]; then + rm -r ${tensorboard_folder} +fi + +echo "Begin to train the model!" +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 nohup python -u Source/main.py \ + --batchSize 2 \ + --gpu 8 \ + --trainListPath ./Datasets/sceneflow_stereo_training_list.csv \ + --imgWidth 512 \ + --imgHeight 256 \ + --dataloaderNum 12 \ + --maxEpochs 200 \ + --imgNum 35454 \ + --sampleNum 1 \ + --log ${tensorboard_folder} \ + --lr 0.001 \ + --dist true \ + --modelDir ./Checkpoint/ \ + --modelName MaskLacGwcNet \ + --mask false \ + --port ${dist_port} \ + --lr_scheduler false \ + --dataset sceneflow > TrainRun.log 2>&1 & +echo "You can use the command (>> tail -f TrainRun.log) to watch the training process!" + +echo "Start the tensorboard at port:" ${tensorboard_port} +nohup tensorboard --logdir ${tensorboard_folder} --port ${tensorboard_port} \ + --bind_all --load_fast=false > Tensorboard.log 2>&1 & +echo "All processes have started!" + +echo "Begin to watch TrainRun.log file!" +tail -f TrainRun.log diff --git a/Scripts/start_train_us3d_dataset.sh b/Scripts/start_train_us3d_dataset.sh index 19f0d2b..f971452 100755 --- a/Scripts/start_train_us3d_dataset.sh +++ b/Scripts/start_train_us3d_dataset.sh @@ -13,10 +13,10 @@ if [ -d "${tensorboard_folder}" ]; then fi echo "Begin to train the model!" -CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 nohup python -u Source/main.py \ +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6 nohup python -u Source/main.py \ --batchSize 1 \ - --gpu 8 \ - --trainListPath ./Datasets/us3d_stereo_val_list.csv \ + --gpu 7 \ + --trainListPath ./Datasets/1.csv \ --valListPath ./Datasets/us3d_stereo_training_list.csv \ --imgWidth 448 \ --imgHeight 448 \ @@ -31,7 +31,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 nohup python -u Source/main.py \ --dispNum 128 \ --dist true \ --modelDir ./Checkpoint/ \ - --modelName SwinStereo \ + --modelName LacGwcNet \ --port ${dist_port} \ --auto_save_num 1 \ --lr_scheduler false \ diff --git a/Source/Tools/coner.py b/Source/Tools/coner.py new file mode 100644 index 0000000..a980010 --- /dev/null +++ b/Source/Tools/coner.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +import tifffile +import cv2 +import numpy as np + + +def draw_line(img: np.array, line_no: int) -> np.array: + h, _, _ = img.shape + + for i in range(h // line_no): + img[i * line_no, :, 0] = 111 + img[i * line_no, :, 1] = 127 + img[i * line_no, :, 2] = 250 + + return img + + +def main() -> None: + img_l = tifffile.imread('/Users/rhc/Downloads/DFC2019_track2_test/Test-Track2/JAX_160_001_015_LEFT_RGB.tif') + img_r = tifffile.imread('/Users/rhc/Downloads/DFC2019_track2_test/Test-Track2/JAX_160_001_015_RIGHT_RGB.tif') + img_l = draw_line(img_l, 25) + img_r = draw_line(img_r, 25) + # img_l = img_l[2:122, 800:920, :] + # img_r = img_r[2:122, 800:920, :] + img_l = img_l[2 + 45:122 - 45, 800 + 45:920 - 45, :] + img_r = img_r[2 + 45:122 - 45, 800 + 45:920 - 45, :] + cv2.imwrite('/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/example/10.png', img_l) + cv2.imwrite('/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/example/11.png', img_r) + + +if __name__ == '__main__': + main() diff --git a/Source/Tools/count_disp.py b/Source/Tools/count_disp.py new file mode 100644 index 0000000..7c91c77 --- /dev/null +++ b/Source/Tools/count_disp.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +import os +import torch +import pandas as pd +import numpy as np +import re +import argparse +from PIL import Image +import tifffile +import JackFramework as jf + +DEPTH_DIVIDING = 256.0 +ACC_EPSILON = 1e-9 + + +def read_pfm(filename: str) -> tuple: + file = open(filename, 'rb') + color = None + width = None + height = None + scale = None + endian = None + + header = file.readline().decode('utf-8').rstrip() + if header == 'PF': + color = True + elif header == 'Pf': + color = False + else: + raise Exception('Not a PFM file.') + + dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode('utf-8')) + if dim_match: + width, height = map(int, dim_match.groups()) + else: + raise Exception('Malformed PFM header.') + + scale = float(file.readline().rstrip()) + if scale < 0: # little-endian + endian = '<' + scale = -scale + else: + endian = '>' # big-endian + + data = np.fromfile(file, endian + 'f') + shape = (height, width, 3) if color else (height, width) + + data = np.reshape(data, shape) + data = np.flipud(data) + return data, scale + + +def read_label_list(list_path: str) -> list: + input_dataframe = pd.read_csv(list_path) + return input_dataframe["gt_disp"].values + + +def read_disp(path: str) -> np.array: + file_type = os.path.splitext(path)[-1] + if file_type == ".png": + img = np.array(Image.open(path), dtype=np.float32) / float(DEPTH_DIVIDING) + elif file_type == '.pfm': + img, _ = read_pfm(path) + elif file_type == '.tiff': + img = np.array(tifffile.imread(path)) + elif file_type == '.tif': + img = np.array(tifffile.imread(path)) + else: + print('gt file name error!') + return img + + +def parser_args() -> object: + parser = argparse.ArgumentParser( + description="The Evalution process") + parser.add_argument('--list_path', type=str, + default='./Datasets/kitti2015_training_list.csv', + help='list path') + + parser.add_argument('--output_path', type=str, + default='./Datasets/kitti2015_training_list.csv', + help='output path') + return parser.parse_args() + + +def count_disp(list_path: str, output_path: str, start_disp: int = -128, disp_num: int = 448) -> None: + disp_list = read_label_list(list_path) + res_list = [0] * disp_num + print('total:', len(disp_list)) + for idx, path in enumerate(disp_list): + print(idx, path) + disp = read_disp(path).astype(np.int32) + for i in range(disp_num): + disp_values = start_disp + i + res = (disp_values == disp).astype(np.int32).sum() + res_list[i] = res_list[i] + res + + fd_file = jf.FileHandler.open_file(output_path, False) + for data in res_list: + jf.FileHandler.write_file(fd_file, str(data)) + + +def main() -> None: + args = parser_args() + count_disp(args.list_path, args.output_path) + + +if __name__ == '__main__': + main() diff --git a/Source/Tools/error_map.py b/Source/Tools/error_map.py new file mode 100644 index 0000000..3f4512a --- /dev/null +++ b/Source/Tools/error_map.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +import tifffile +import numpy as np +import cv2 + + +def vis_disp(img: np.array) -> np.array: + # print(img.min(), img.max()) + # img = (img - img.min()) / (img.max() - img.min()) * 255 + img = img.astype(np.uint8) + img = cv2.convertScaleAbs(img, alpha=1.0) + img = cv2.applyColorMap(img, cv2.COLORMAP_JET) + return img + + +def main() -> None: + root_path = '/Users/rhc/Documents/Scholar/Ph.D./MyPaper/019_MaskRemoteSensing/MaskRemoteSensing/img/result/whu/' + img_r = root_path + '1.drawio.png' + img_o = root_path + '2.drawio.png' + + img_r = cv2.imread(img_r) + img_o = cv2.imread(img_o) + + error = np.array(img_r - img_o) + + print(error.shape) + error = np.mean(error, axis=2) + error = vis_disp(error) + + cv2.imwrite(root_path + '3.drawio.png', error) + + +if __name__ == "__main__": + main() diff --git a/Source/Tools/evalution_stereo_net.py b/Source/Tools/evalution_stereo_net.py index 65535bf..76ecf65 100644 --- a/Source/Tools/evalution_stereo_net.py +++ b/Source/Tools/evalution_stereo_net.py @@ -37,7 +37,7 @@ def d_1(res: torch.tensor, gt: torch.tensor, start_threshold: int = 2, for i in range(threshold_num): threshold = start_threshold + i acc = (error > threshold) & (error > related_threshold) - acc = (error > threshold) + # acc = (error > threshold) acc_num = acc.int().sum() error_rate = acc_num / (total_num + ACC_EPSILON) acc_res.append(error_rate) @@ -147,6 +147,7 @@ def cal_total(id_num: int, total: np.array, err_total: int, for i in range(threshold_num): d1_res = acc_res[i].cpu() d1_res = d1_res.detach().numpy() + print(d1_res) total[i] = total[i] + d1_res str_data = str_data + str(d1_res) + ' ' @@ -198,15 +199,17 @@ def evalution(epoch: int, img_path_format: str, gt_list_path: str, threshold_num=threshold_num, invaild_value=invaild_value) eval_model = torch.nn.DataParallel(eval_model).cuda() - for i in range(total_img_num): img_path = img_path_format % (i) gt_path = gt_dsp_path[i] + print(img_path, gt_path) img, img_gt = get_data(img_path, gt_path) img, img_gt = data2cuda(img, img_gt) acc_res, mae = eval_model(img, img_gt) + print(acc_res) + print(mae) total, err_total = cal_total(i, total, err_total, acc_res, mae, threshold_num) diff --git a/Source/Tools/panchromatic_img.py b/Source/Tools/panchromatic_img.py index f6f8b64..7e7d24f 100644 --- a/Source/Tools/panchromatic_img.py +++ b/Source/Tools/panchromatic_img.py @@ -1,21 +1,36 @@ # -*- coding: utf-8 -* -from matplotlib import pyplot as plt +# from matplotlib import pyplot as plt import tifffile import cv2 import numpy as np -def show_panchromatic_img(path: str) -> None: +def show_panchromatic_img(path: str, save_path: str) -> None: img = tifffile.imread(path) img = (img - img.min()) / (img.max() - img.min()) img = np.array(img) - cv2.imshow('1', img) - cv2.waitKey(0) + cv2.imwrite(save_path, img * 255) + + +def vis_disp(img: np.array) -> np.array: + print(img.min(), img.max()) + img = (img - img.min()) / (img.max() - img.min()) * 255 + img = img.astype(np.uint8) + img = cv2.convertScaleAbs(img, alpha=1.0) + img = cv2.applyColorMap(img, cv2.COLORMAP_DEEPGREEN) + return img def main() -> None: - path = '/Users/rhc/Downloads/Dataset/experimental data/with ground truth/train/right/YD_right_147.tiff' - show_panchromatic_img(path) + root_path = '/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/result/' + rgb_path = '1.tiff' + rgb_png_path = '1.png' + disp_path = '2.tiff' + disp_png_path = '2.png' + show_panchromatic_img(root_path + rgb_path, root_path + rgb_png_path) + disp_img = tifffile.imread(root_path + disp_path) + disp_img = vis_disp(disp_img) + cv2.imwrite(root_path + disp_png_path, disp_img) if __name__ == "__main__": diff --git a/Source/Tools/vis_gray.py b/Source/Tools/vis_gray.py index 0e909be..c8c51df 100644 --- a/Source/Tools/vis_gray.py +++ b/Source/Tools/vis_gray.py @@ -4,12 +4,30 @@ import numpy as np +def vis_disp(img: np.array) -> np.array: + print(img.min(), img.max()) + img = (img - img.min()) / (img.max() - img.min()) * 255 + img = img.astype(np.uint8) + img = cv2.convertScaleAbs(img, alpha=1.0) + img = cv2.applyColorMap(img, cv2.COLORMAP_DEEPGREEN) + return img + + def main() -> None: - img = tifffile.imread('/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/example/JAX_160_001_015_LEFT_RGB.tif') - #img = img - img.min() - #img = ((img / img.max()) * 255.0).astype(np.uint8) - #img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) - cv2.imwrite('/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/example/8.png', img) + # img = tifffile.imread('/Users/rhc/WorkSpace/Programs/RSStereo/ResultImg/000017_10.tiff') + name = 'OMA_285_031_032_' + + img = tifffile.imread('/Users/rhc/Downloads/DFC2019_track2_test/Test-Track2/' + + name + 'LEFT_RGB.tif') + disp_img = tifffile.imread('/Users/rhc/WorkSpace/Programs/RSStereo/Submission/us3d/' + + name + 'LEFT_DSP.tif') + + disp_img = vis_disp(disp_img) + + cv2.imwrite('/Users/rhc/Documents/Scholar/Ph.D./MyPaper/019_MaskRemoteSensing/MaskRemoteSensing/img/result/us3d/us3d_' + + name + 'rgb.png', img) + cv2.imwrite('/Users/rhc/Documents/Scholar/Ph.D./MyPaper/019_MaskRemoteSensing/MaskRemoteSensing/img/result/us3d/us3d_' + + name + 'dsp.png', disp_img) if __name__ == '__main__': diff --git a/Source/Tools/vis_masked_images.m b/Source/Tools/vis_masked_images.m new file mode 100644 index 0000000..423c04c --- /dev/null +++ b/Source/Tools/vis_masked_images.m @@ -0,0 +1,46 @@ +clc; +clear; + +org_img_path = '/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/imgs/3.png'; +pred_img_path = '/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/imgs/1.png'; +mask_mat_path = '/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/imgs/1.txt'; +org_img=imread(org_img_path); +pred_img=imread(pred_img_path); +masks = load(mask_mat_path); + + + +h = 448; +w = 448; +block_w = 16; +block_h = 16; + +for i=1: length(masks) + mask = masks(i); + if mask < 0.5 + id = i-1; + height_id = floor( id / floor(h / block_w)); + width_id = mod(id , floor(w / block_h)); + start_height = height_id * block_h + 1; + end_height = height_id * block_h + block_h; + start_width = width_id * block_w + 1; + end_width = width_id * block_w + block_w; + pred_img(start_height:end_height,... + start_width: end_width,:) = org_img(start_height:end_height,... + start_width: end_width,:); + else + id = i-1; + height_id = floor( id / floor(h / block_w)); + width_id = mod(id , floor(w / block_h)); + start_height = height_id * block_h + 1; + end_height = height_id * block_h + block_h; + start_width = width_id * block_w + 1; + end_width = width_id * block_w + block_w; + org_img(start_height:end_height,... + start_width: end_width,:) = 127; + end +end + +% imshow(pred_img) +imwrite(pred_img,'/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/imgs/4.png') +imwrite(org_img,'/Users/rhc/WorkSpace/Programs/RSStereo/Tmp/imgs/5.png') \ No newline at end of file diff --git a/Source/UserModelImplementation/Models/LaCGwcNet/inference.py b/Source/UserModelImplementation/Models/LaCGwcNet/inference.py index 8cd9e03..a04214a 100644 --- a/Source/UserModelImplementation/Models/LaCGwcNet/inference.py +++ b/Source/UserModelImplementation/Models/LaCGwcNet/inference.py @@ -35,7 +35,7 @@ def get_model(self) -> list: affinity_settings['win_w'] = 3 affinity_settings['win_h'] = 3 affinity_settings['dilation'] = [1, 2, 4, 8] - model = PSMNet(1, start_disp=args.startDisp, maxdisp=args.dispNum, + model = PSMNet(3, start_disp=args.startDisp, maxdisp=args.dispNum, struct_fea_c=4, fuse_mode='separate', affinity_settings=affinity_settings, udc=True, refine='csr', mask=False) return [model] diff --git a/Source/UserModelImplementation/Models/LaCGwcNet/networks/submodule.py b/Source/UserModelImplementation/Models/LaCGwcNet/networks/submodule.py index 6d4fa4f..bccc714 100644 --- a/Source/UserModelImplementation/Models/LaCGwcNet/networks/submodule.py +++ b/Source/UserModelImplementation/Models/LaCGwcNet/networks/submodule.py @@ -74,7 +74,7 @@ def __init__(self, start_disp, maxdisp, win_size): self.start_disp = start_disp def forward(self, x): - + self.win_siz = 0 if self.win_size > 0: max_d = torch.argmax(x, dim=1, keepdim=True) d_value = [] diff --git a/Source/UserModelImplementation/Models/StereoT/Networks/__init__.py b/Source/UserModelImplementation/Models/StereoT/Networks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Source/UserModelImplementation/Models/StereoT/Networks/encoder.py b/Source/UserModelImplementation/Models/StereoT/Networks/encoder.py new file mode 100644 index 0000000..92c9dc0 --- /dev/null +++ b/Source/UserModelImplementation/Models/StereoT/Networks/encoder.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +import torch +from torch import nn +from functools import partial +from timm.models.vision_transformer import PatchEmbed, Block +from timm.models.layers import to_2tuple + +try: + from .pos_embed import get_2d_sincos_pos_embed +except ImportError: + from pos_embed import get_2d_sincos_pos_embed + + +class ViT(nn.Module): + """docstring for ViT""" + + def __init__(self, img_size: tuple or int = 224, patch_size: int = 16, in_chans: int = 3, + out_chans: int = 3, embed_dim: int = 1024, depth: int = 24, num_heads: int = 16, + mlp_ratio: float = 4., norm_layer: nn = nn.LayerNorm) -> None: + super().__init__() + self.in_chans, self.img_size, self.out_chans = in_chans, to_2tuple(img_size), out_chans + self.patch_embed = PatchEmbed(img_size, patch_size, in_chans, embed_dim) + num_patches = self.patch_embed.num_patches + + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) + self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim), + requires_grad=False) + # fixed sin-cos embedding + self.blocks = nn.ModuleList([ + Block(embed_dim, num_heads, + mlp_ratio, qkv_bias=True, + norm_layer=norm_layer,) + for _ in range(depth) + ]) + self.norm = norm_layer(embed_dim) + self.encoder_pred = nn.Linear(embed_dim, patch_size**2 * out_chans, bias=True) + + def initialize_weights(self): + # initialization initialize (and freeze) pos_embed by sin-cos embedding + pos_embed = get_2d_sincos_pos_embed( + self.pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=True) + self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0)) + + # initialize patch_embed like nn.Linear (instead of nn.Conv2d) + w = self.patch_embed.proj.weight.data + torch.nn.init.xavier_uniform_(w.view([w.shape[0], -1])) + + # timm's trunc_normal_(std=.02) is effectively normal_(std=0.02) as cutoff is too big (2.) + torch.nn.init.normal_(self.cls_token, std=.02) + torch.nn.init.normal_(self.mask_token, std=.02) + + # initialize nn.Linear and nn.LayerNorm + self.apply(self._init_weights) + + def _init_weights(self, m: object) -> None: + if isinstance(m, nn.Linear): + # we use xavier_uniform following official JAX ViT: + torch.nn.init.xavier_uniform_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def patchify(self, imgs: torch.Tensor) -> torch.Tensor: + """ + imgs: (N, 3, H, W) + x: (N, L, patch_size**2 *3) + """ + p = self.patch_embed.patch_size[0] + assert imgs.shape[2] == imgs.shape[3] and imgs.shape[2] % p == 0 + + h = w = imgs.shape[2] // p + x = imgs.reshape(shape=(imgs.shape[0], self.in_chans, h, p, w, p)) + x = torch.einsum('nchpwq->nhwpqc', x) + x = x.reshape(shape=(imgs.shape[0], h * w, p**2 * self.in_chans)) + return x + + def unpatchify(self, x: torch.Tensor) -> torch.Tensor: + """ + x: (N, L, patch_size**2 *3) + imgs: (N, 3, H, W) + """ + p = self.patch_embed.patch_size[0] + h, w = self.img_size[0] // p, self.img_size[1] // p + assert h * w == x.shape[1] + + x = x.reshape(shape=(x.shape[0], h, w, p, p, self.out_chans)) + x = torch.einsum('nhwpqc->nchpwq', x) + return x.reshape(shape=(x.shape[0], self.out_chans, h * p, w * p)) + + def _add_cls_tokens(self, x: torch.Tensor) -> torch.Tensor: + cls_token = self.cls_token + self.pos_embed[:, :1, :] + cls_tokens = cls_token.expand(x.shape[0], -1, -1) + return torch.cat((cls_tokens, x), dim=1) + + def _add_pos_embed(self, x: torch.Tensor) -> torch.Tensor: + x = self.patch_embed(x) + return x + self.pos_embed[:, 1:, :] + + def forward(self, x): + # add pos embed w/o cls token + x = self._add_pos_embed(x) + + # append cls token + x = self._add_cls_tokens(x) + + # apply Transformer blocks + for blk in self.blocks: + x = blk(x) + x = self.encoder_pred(self.norm(x)) + + # remove cls token + return self.unpatchify(x[:, 1:, :]) + + +def mae_vit_base_patch16_dec512d8b(**kwargs): + return ViT(patch_size=16, embed_dim=768, + depth=12, num_heads=12, mlp_ratio=4, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs) + + +def mae_vit_large_patch16_dec512d8b(**kwargs): + return ViT(patch_size=16, embed_dim=1024, + depth=24, num_heads=16, mlp_ratio=4, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs) + + +def mae_vit_huge_patch14_dec512d8b(**kwargs): + return ViT(patch_size=14, embed_dim=1280, + depth=32, num_heads=16, + mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs) + + +# set recommended archs +mae_vit_base_patch16 = mae_vit_base_patch16_dec512d8b # decoder: 512 dim, 8 blocks +mae_vit_large_patch16 = mae_vit_large_patch16_dec512d8b # decoder: 512 dim, 8 blocks +mae_vit_huge_patch14 = mae_vit_huge_patch14_dec512d8b # decoder: 512 dim, 8 blocks + +if __name__ == '__main__': + model = mae_vit_base_patch16(img_size=(1344, 448), in_chans=192) + res = [] + + num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + print(num_params) + num_params = sum(param.numel() for param in model.parameters()) + print(num_params) + model = model.cuda() + left_img = torch.rand(1, 192, 1344, 448).cuda() + for _ in range(100): + res = model(left_img) + print(res.shape) + + # image = model.unpatchify(res[1]) + # print(image.shape) diff --git a/Source/UserModelImplementation/Models/StereoT/Networks/mae.py b/Source/UserModelImplementation/Models/StereoT/Networks/mae.py new file mode 100644 index 0000000..e64c5c3 --- /dev/null +++ b/Source/UserModelImplementation/Models/StereoT/Networks/mae.py @@ -0,0 +1,301 @@ +# -*- coding: utf-8 -*- +from functools import partial +import torch +import torch.nn as nn + +from timm.models.vision_transformer import PatchEmbed, Block +from timm.models.layers import to_2tuple + +try: + from .pos_embed import get_2d_sincos_pos_embed +except ImportError: + from pos_embed import get_2d_sincos_pos_embed + + +class MaskedAutoencoderViT(nn.Module): + """ Masked Autoencoder with VisionTransformer backbone + """ + + def __init__(self, img_size=224, patch_size=16, in_chans=3, + embed_dim=1024, depth=24, num_heads=16, + decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16, + mlp_ratio=4., norm_layer=nn.LayerNorm, norm_pix_loss=False): + super().__init__() + self.in_chans = in_chans + + # -------------------------------------------------------------------------- + # MAE encoder specifics + self.patch_embed = PatchEmbed(img_size, patch_size, in_chans, embed_dim) + num_patches = self.patch_embed.num_patches + + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) + self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim), + requires_grad=False) + # fixed sin-cos embedding + self.blocks = nn.ModuleList([ + Block( + embed_dim, + num_heads, + mlp_ratio, + qkv_bias=True, + norm_layer=norm_layer, + ) + for _ in range(depth) + ]) + self.norm = norm_layer(embed_dim) + # -------------------------------------------------------------------------- + + # -------------------------------------------------------------------------- + # MAE decoder specifics + self.decoder_embed = nn.Linear(embed_dim, decoder_embed_dim, bias=True) + self.mask_token = nn.Parameter(torch.zeros(1, 1, decoder_embed_dim)) + self.decoder_pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, decoder_embed_dim), requires_grad=False) # fixed sin-cos embedding + self.decoder_blocks = nn.ModuleList([ + Block( + decoder_embed_dim, + decoder_num_heads, + mlp_ratio, + qkv_bias=True, + norm_layer=norm_layer,) + for _ in range(decoder_depth) + ]) + + self.decoder_norm = norm_layer(decoder_embed_dim) + self.decoder_pred = nn.Linear(decoder_embed_dim, patch_size**2 * in_chans, bias=True) # decoder to patch + # -------------------------------------------------------------------------- + + self.norm_pix_loss = norm_pix_loss + self.initialize_weights() + + def initialize_weights(self): + # initialization + # initialize (and freeze) pos_embed by sin-cos embedding + pos_embed = get_2d_sincos_pos_embed(self.pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=True) + self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0)) + + decoder_pos_embed = get_2d_sincos_pos_embed(self.decoder_pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=True) + self.decoder_pos_embed.data.copy_(torch.from_numpy(decoder_pos_embed).float().unsqueeze(0)) + + # initialize patch_embed like nn.Linear (instead of nn.Conv2d) + w = self.patch_embed.proj.weight.data + torch.nn.init.xavier_uniform_(w.view([w.shape[0], -1])) + + # timm's trunc_normal_(std=.02) is effectively normal_(std=0.02) as cutoff is too big (2.) + torch.nn.init.normal_(self.cls_token, std=.02) + torch.nn.init.normal_(self.mask_token, std=.02) + + # initialize nn.Linear and nn.LayerNorm + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + # we use xavier_uniform following official JAX ViT: + torch.nn.init.xavier_uniform_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def patchify(self, imgs): + """ + imgs: (N, 3, H, W) + x: (N, L, patch_size**2 *3) + """ + p = self.patch_embed.patch_size[0] + assert imgs.shape[2] == imgs.shape[3] and imgs.shape[2] % p == 0 + + h = w = imgs.shape[2] // p + x = imgs.reshape(shape=(imgs.shape[0], self.in_chans, h, p, w, p)) + x = torch.einsum('nchpwq->nhwpqc', x) + x = x.reshape(shape=(imgs.shape[0], h * w, p**2 * self.in_chans)) + return x + + def unpatchify(self, x): + """ + x: (N, L, patch_size**2 *3) + imgs: (N, 3, H, W) + """ + p = self.patch_embed.patch_size[0] + h = w = int(x.shape[1]**.5) + assert h * w == x.shape[1] + + x = x.reshape(shape=(x.shape[0], h, w, p, p, self.in_chans)) + x = torch.einsum('nhwpqc->nchpwq', x) + return x.reshape(shape=(x.shape[0], self.in_chans, h * p, h * p)) + + def random_masking(self, x, mask_ratio): + """ + Perform per-sample random masking by per-sample shuffling. + Per-sample shuffling is done by argsort random noise. + x: [N, L, D], sequence + """ + N, L, D = x.shape # batch, length, dim + len_keep = int(L * (1 - mask_ratio)) + + noise = torch.rand(N, L, device=x.device) # noise in [0, 1] + + # sort noise for each sample + ids_shuffle = torch.argsort(noise, dim=1) # ascend: small is keep, large is remove + ids_restore = torch.argsort(ids_shuffle, dim=1) + + # keep the first subset + ids_keep = ids_shuffle[:, :len_keep] + x_masked = torch.gather(x, dim=1, index=ids_keep.unsqueeze(-1).repeat(1, 1, D)) + + # generate the binary mask: 0 is keep, 1 is remove + mask = torch.ones([N, L], device=x.device) + mask[:, :len_keep] = 0 + # unshuffle to get the binary mask + mask = torch.gather(mask, dim=1, index=ids_restore) + + return x_masked, mask, ids_restore + + def forward_encoder(self, x, mask_ratio): + # embed patches + x = self.patch_embed(x) + + # add pos embed w/o cls token + x = x + self.pos_embed[:, 1:, :] + + # masking: length -> length * mask_ratio + x, mask, ids_restore = self.random_masking(x, mask_ratio) + + # append cls token + cls_token = self.cls_token + self.pos_embed[:, :1, :] + cls_tokens = cls_token.expand(x.shape[0], -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + + # apply Transformer blocks + for blk in self.blocks: + x = blk(x) + x = self.norm(x) + + return x, mask, ids_restore + + def forward_decoder(self, x, ids_restore): + # embed tokens + x = self.decoder_embed(x) + + # append mask tokens to sequence + mask_tokens = self.mask_token.repeat(x.shape[0], ids_restore.shape[1] + 1 - x.shape[1], 1) + + x_ = torch.cat([x[:, 1:, :], mask_tokens], dim=1) # no cls token + x_ = torch.gather(x_, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[2])) # unshuffle + x = torch.cat([x[:, :1, :], x_], dim=1) # append cls token + + # add pos embed + x = x + self.decoder_pos_embed + + # apply Transformer blocks + for blk in self.decoder_blocks: + x = blk(x) + x = self.decoder_norm(x) + + # predictor projection + x = self.decoder_pred(x) + + # remove cls token + x = x[:, 1:, :] + + return x + + def forward_loss(self, imgs, pred, mask): + """ + imgs: [N, 3, H, W] + pred: [N, L, p*p*3] + mask: [N, L], 0 is keep, 1 is remove, + """ + target = self.patchify(imgs) + if self.norm_pix_loss: + mean = target.mean(dim=-1, keepdim=True) + var = target.var(dim=-1, keepdim=True) + target = (target - mean) / (var + 1.e-6)**.5 + + acc = torch.abs(pred - target) * 255 + acc = acc.mean(dim=-1) + acc = (acc * mask).sum() / mask.sum() + + loss = (pred - target) ** 2 + loss = loss.mean(dim=-1) # [N, L], mean loss per patch + + loss = (loss * mask).sum() / mask.sum() # mean loss on removed patches + return loss, acc + + def forward(self, imgs, mask_ratio=0.75): + latent, mask, ids_restore = self.forward_encoder(imgs, mask_ratio) + # print(ids_restore) + # print(mask) + # print(ids_restore) + # print(mask) + pred = self.forward_decoder(latent, ids_restore) # [N, L, p*p*3] + loss, acc = self.forward_loss(imgs, pred, mask) + return loss, acc, self.unpatchify(pred), mask, latent + + +def mae_vit_base_patch16_dec512d8b(**kwargs): + return MaskedAutoencoderViT( + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + decoder_embed_dim=512, + decoder_depth=8, + decoder_num_heads=16, + mlp_ratio=4, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs + ) + + +def mae_vit_large_patch16_dec512d8b(**kwargs): + return MaskedAutoencoderViT( + patch_size=16, + embed_dim=1024, + depth=24, + num_heads=16, + decoder_embed_dim=512, + decoder_depth=8, + decoder_num_heads=16, + mlp_ratio=4, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs + ) + + +def mae_vit_huge_patch14_dec512d8b(**kwargs): + return MaskedAutoencoderViT( + patch_size=14, + embed_dim=1280, + depth=32, + num_heads=16, + decoder_embed_dim=512, + decoder_depth=8, + decoder_num_heads=16, + mlp_ratio=4, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs + ) + + +# set recommended archs +mae_vit_base_patch16 = mae_vit_base_patch16_dec512d8b # decoder: 512 dim, 8 blocks +mae_vit_large_patch16 = mae_vit_large_patch16_dec512d8b # decoder: 512 dim, 8 blocks +mae_vit_huge_patch14 = mae_vit_huge_patch14_dec512d8b # decoder: 512 dim, 8 blocks + +if __name__ == '__main__': + model = mae_vit_base_patch16(img_size=(448, 448), in_chans=192) + res = [] + + num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + print(num_params) + num_params = sum(param.numel() for param in model.parameters()) + print(num_params) + model = model.cuda() + left_img = torch.rand(1, 192, 448, 448).cuda() + for _ in range(100): + res = model(left_img, mask_ratio=0) + print(res[4].shape) + print(res[1].shape) + # image = model.unpatchify(res[1]) + # print(image.shape) diff --git a/Source/UserModelImplementation/Models/StereoT/Networks/pos_embed.py b/Source/UserModelImplementation/Models/StereoT/Networks/pos_embed.py new file mode 100644 index 0000000..172f4e9 --- /dev/null +++ b/Source/UserModelImplementation/Models/StereoT/Networks/pos_embed.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# -------------------------------------------------------- +# Position embedding utils +# -------------------------------------------------------- +import numpy as np +import torch + + +def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False): + # -------------------------------------------------------- + # 2D sine-cosine position embedding + # References: + # Transformer: + # https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py + # MoCo v3: https://github.com/facebookresearch/moco-v3 + # -------------------------------------------------------- + """ + grid_size: int of the grid height and width + return: + pos_embed: [grid_size*grid_size, embed_dim] or + [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) + """ + grid_h = np.arange(grid_size, dtype=np.float32) + grid_w = np.arange(grid_size, dtype=np.float32) + grid = np.meshgrid(grid_w, grid_h) # here w goes first + grid = np.stack(grid, axis=0) + + grid = grid.reshape([2, 1, grid_size, grid_size]) + pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid) + if cls_token: + pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0) + return pos_embed + + +def get_2d_sincos_pos_embed_from_grid(embed_dim, grid): + assert embed_dim % 2 == 0 + # use half of dimensions to encode grid_h + emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2) + emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2) + return np.concatenate([emb_h, emb_w], axis=1) + + +def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): + """ + embed_dim: output dimension for each position + pos: a list of positions to be encoded: size (M,) + out: (M, D) + """ + assert embed_dim % 2 == 0 + omega = np.arange(embed_dim // 2, dtype=np.float32) + omega /= embed_dim / 2. + omega = 1. / 10000**omega # (D/2,) + + pos = pos.reshape(-1) # (M,) + out = np.einsum('m,d->md', pos, omega) # (M, D/2), outer product + + emb_sin = np.sin(out) # (M, D/2) + emb_cos = np.cos(out) # (M, D/2) + return np.concatenate([emb_sin, emb_cos], axis=1) + + +def interpolate_pos_embed(model, checkpoint_model): + # -------------------------------------------------------- + # Interpolate position embeddings for high-resolution + # References: + # DeiT: https://github.com/facebookresearch/deit + # -------------------------------------------------------- + if 'pos_embed' not in checkpoint_model: + return + pos_embed_checkpoint = checkpoint_model['pos_embed'] + num_patches = model.patch_embed.num_patches + num_extra_tokens = model.pos_embed.shape[-2] - num_patches + # height (== width) for the checkpoint position embedding + orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5) + # height (== width) for the new position embedding + new_size = int(num_patches ** 0.5) + # class_token and dist_token are kept unchanged + if orig_size != new_size: + print(f"Position interpolate from {orig_size}x{orig_size} to {new_size}x{new_size}") + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + embedding_size = pos_embed_checkpoint.shape[-1] + pos_tokens = pos_tokens.reshape( + -1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, size=(new_size, new_size), mode='bicubic', align_corners=False) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + checkpoint_model['pos_embed'] = new_pos_embed diff --git a/Source/UserModelImplementation/Models/StereoT/Networks/stereo_matching_model.py b/Source/UserModelImplementation/Models/StereoT/Networks/stereo_matching_model.py new file mode 100644 index 0000000..39cd720 --- /dev/null +++ b/Source/UserModelImplementation/Models/StereoT/Networks/stereo_matching_model.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +import torch +from torch import nn + +import time +try: + from .encoder import mae_vit_base_patch16 +except ImportError: + from encoder import mae_vit_base_patch16 + + +class StereoMatching(nn.Module): + """docstring for ClassName""" + + def __init__(self, img_size: tuple or int, in_channles: int, + start_disp: int, disp_num: int) -> None: + super().__init__() + self.start_disp, self.disp_num = start_disp, disp_num + self.feature_extraction = mae_vit_base_patch16( + img_size=img_size, in_chans = in_channles, out_chans=1) + self.feature_matching = mae_vit_base_patch16( + img_size=img_size, in_chans = disp_num, out_chans=disp_num) + + def gen_cost(self, left_img: torch.Tensor, right_img: torch.Tensor) -> torch.Tensor: + b, _, h, w = left_img.shape + cost = torch.zeros(b, self.disp_num, h, w).cuda() + for i in range(self.disp_num): + d = self.start_disp + i + if d > 0: + cost[:, i, :, d:] = left_img[:, :, :, d:] * right_img[:, :, :, :-d] + elif d < 0: + cost[:, i, :, :d] = left_img[:, :, :, d:] * right_img[:, :, :, :-d] + else: + cost[:, i, :, :] = left_img[:, :, :, :] * right_img[:, :, :, :] + return cost.contiguous() + + def regress(self, x: torch.Tensor) -> torch.Tensor: + disp_values = torch.arange( + self.start_disp, self.start_disp + self.disp_num).view(1, -1, 1, 1).float().to(x.device) + return torch.sum(x * disp_values, 1) + + def forward(self, left_img: torch.Tensor, right_img: torch.Tensor) -> torch.Tensor: + left_img = self.feature_extraction(left_img) + right_img = self.feature_extraction(right_img) + cost = self.gen_cost(left_img, right_img) + cost = self.feature_matching(cost) + return self.regress(cost) + + +if __name__ == '__main__': + model = StereoMatching(img_size=(448, 448), in_channles=3, start_disp=1, disp_num=192) + + num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + print(num_params) + num_params = sum(param.numel() for param in model.parameters()) + print(num_params) + model = model.cuda() + + img_l = torch.rand(1, 3, 448, 448).cuda() + img_r = torch.rand(1, 3, 448, 448).cuda() + + for _ in range(100): + time_start = time.time() + res = model(img_l, img_r) + time_end = time.time() + print('totally cost', time_end - time_start, res.shape) + + # image = model.unpatchify(res[1]) + # print(image.shape) diff --git a/Source/UserModelImplementation/Models/StereoT/__init__.py b/Source/UserModelImplementation/Models/StereoT/__init__.py new file mode 100644 index 0000000..e992141 --- /dev/null +++ b/Source/UserModelImplementation/Models/StereoT/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +from .inference import StereoTInterface diff --git a/Source/UserModelImplementation/Models/StereoT/inference.py b/Source/UserModelImplementation/Models/StereoT/inference.py new file mode 100644 index 0000000..0cd1bce --- /dev/null +++ b/Source/UserModelImplementation/Models/StereoT/inference.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +import torch +# import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import JackFramework as jf +# import UserModelImplementation.user_define as user_def +import math +from .Networks.stereo_matching_model import StereoMatching + + +class StereoTInterface(jf.UserTemplate.ModelHandlerTemplate): + """docstring for RSStereoInterface""" + MODEL_ID = 0 + LEFT_IMG_ID, RIGHT_IMG_ID, DISP_IMG_ID = 0, 1, 2 + IMG_ID, MASK_IMG_ID, RANDOM_SAMPLE_LIST_ID = 0, 1, 2 + + def __init__(self, args: object) -> object: + super().__init__(args) + self.__args = args + + @staticmethod + def lr_lambda(epoch: int) -> float: + warmup_epochs = 40 + cos_epoch = 1000 + return (epoch / warmup_epochs + if epoch < warmup_epochs + else 0.5 * (1.0 + math.cos(math.pi * (epoch - warmup_epochs) / cos_epoch))) + + def get_model(self) -> list: + args = self.__args + model = StereoMatching((args.imgHeight, args.imgWidth), 3, args.startDisp, args.dispNum) + + return [model] + + def optimizer(self, model: list, lr: float) -> list: + args = self.__args + opt = torch.optim.AdamW(model[0].parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=0.05) + if args.lr_scheduler: + sch = optim.lr_scheduler.LambdaLR(opt, lr_lambda=self.lr_lambda) + else: + sch = None + return [opt], [sch] + + def lr_scheduler(self, sch: object, ave_loss: list, sch_id: int) -> None: + # how to do schenduler + if self.MODEL_ID == sch_id: + sch.step() + # print("current learning rate", sch.get_lr()) + + def inference(self, model: list, input_data: list, model_id: int) -> list: + if self.MODEL_ID == model_id: + outputs = jf.Tools.convert2list(model(input_data[self.LEFT_IMG_ID], + input_data[self.RIGHT_IMG_ID])) + return outputs + + def accuracy(self, output_data: list, label_data: list, model_id: int) -> list: + # return acc + args, res = self.__args, [] + if self.MODEL_ID == model_id: + + gt_left = label_data[0] + mask = (gt_left < args.startDisp + args.dispNum) & (gt_left > args.startDisp) + for idx, disp in enumerate(output_data): + if len(disp.shape) == 3 and idx > len(output_data) - 3: + acc, mae = jf.acc.SMAccuracy.d_1(disp, gt_left * mask, invalid_value=0) + res.extend((acc[1], mae)) + return res + + def loss(self, output_data: list, label_data: list, model_id: int) -> list: + if self.MODEL_ID == model_id: + gt_left = label_data[0] + args = self.__args + mask = (gt_left < args.startDisp + args.dispNum) & (gt_left > args.startDisp) + loss = F.smooth_l1_loss(output_data[0][mask], gt_left[mask]) + return [loss] + + # Optional + def pretreatment(self, epoch: int, rank: object) -> None: + # do something before training epoch + pass + + # Optional + def postprocess(self, epoch: int, rank: object, + ave_tower_loss: list, ave_tower_acc: list) -> None: + # do something after training epoch + pass + + # Optional + def load_model(self, model: object, checkpoint: dict, model_id: int) -> bool: + args = self.__args + # return False + if not args.pre_train_opt: + return self._extracted_from_load_model_5(model, checkpoint, False) + # print(checkpoint['model_0']) + # checkpoint['model_0']['pos_embed'] + # + # state_dict = model.state_dict() + # print(checkpoint['model_0']['feature_extraction']['pos_embed']) + + checkpoint['model_0']['module.feature_extraction.pos_embed'] = self.interpolate_pos_embed( + checkpoint['model_0']['module.feature_extraction.pos_embed'], + 448 * 448 / 16 / 16) + checkpoint['model_0']['module.feature_extraction.decoder_pos_embed'] = self.interpolate_pos_embed( + checkpoint['model_0']['module.feature_extraction.decoder_pos_embed'], + 448 * 448 / 16 / 16) + return self._extracted_from_load_model_5(model, checkpoint, True) + + # TODO Rename this here and in `load_model` + def _extracted_from_load_model_5(self, model, checkpoint, strict): + model.load_state_dict(checkpoint['model_0'], strict=strict) + jf.log.info("Model loaded successfully_add") + return True + + # Optional + def load_opt(self, opt: object, checkpoint: dict, model_id: int) -> bool: + args = self.__args + return not args.pre_train_opt + + # Optional + def save_model(self, epoch: int, model_list: list, opt_list: list) -> dict: + # return None + return None + + @ staticmethod + def interpolate_pos_embed(pos_embed_checkpoint, num_patches) -> None: + embedding_size, num_extra_tokens = pos_embed_checkpoint.shape[-1], 1 + orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5) + new_size = int(num_patches ** 0.5) + # class_token and dist_token are kept unchanged + if orig_size != new_size: + print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, new_size, new_size)) + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, size=(new_size, new_size), mode='bicubic', align_corners=False) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + return torch.cat((extra_tokens, pos_tokens), dim=1) + return pos_embed_checkpoint diff --git a/Source/UserModelImplementation/Models/SwinStereo/Networks/BackBone/mae.py b/Source/UserModelImplementation/Models/SwinStereo/Networks/BackBone/mae.py index 41b5c32..baf4e86 100644 --- a/Source/UserModelImplementation/Models/SwinStereo/Networks/BackBone/mae.py +++ b/Source/UserModelImplementation/Models/SwinStereo/Networks/BackBone/mae.py @@ -42,9 +42,18 @@ def __init__(self, img_size=224, patch_size=16, in_chans=3, self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim), requires_grad=False) # fixed sin-cos embedding - self.blocks = nn.ModuleList([ - Block(embed_dim, num_heads, mlp_ratio, qkv_bias=True, norm_layer=norm_layer) - for i in range(depth)]) + self.blocks = nn.ModuleList( + [ + Block( + embed_dim, + num_heads, + mlp_ratio, + qkv_bias=True, + norm_layer=norm_layer, + ) + for _ in range(depth) + ] + ) self.norm = norm_layer(embed_dim) # -------------------------------------------------------------------------- @@ -56,9 +65,18 @@ def __init__(self, img_size=224, patch_size=16, in_chans=3, self.decoder_pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, decoder_embed_dim), requires_grad=False) # fixed sin-cos embedding - self.decoder_blocks = nn.ModuleList([ - Block(decoder_embed_dim, decoder_num_heads, mlp_ratio, qkv_bias=True, norm_layer=norm_layer) - for i in range(decoder_depth)]) + self.decoder_blocks = nn.ModuleList( + [ + Block( + decoder_embed_dim, + decoder_num_heads, + mlp_ratio, + qkv_bias=True, + norm_layer=norm_layer, + ) + for _ in range(decoder_depth) + ] + ) self.decoder_norm = norm_layer(decoder_embed_dim) self.decoder_pred = nn.Linear(decoder_embed_dim, patch_size**2 * in_chans, bias=True) # decoder to patch @@ -123,8 +141,7 @@ def unpatchify(self, x): x = x.reshape(shape=(x.shape[0], h, w, p, p, self.in_chans)) x = torch.einsum('nhwpqc->nchpwq', x) - imgs = x.reshape(shape=(x.shape[0], self.in_chans, h * p, h * p)) - return imgs + return x.reshape(shape=(x.shape[0], self.in_chans, h * p, h * p)) def random_masking(self, x, mask_ratio): """ @@ -181,6 +198,7 @@ def forward_decoder(self, x, ids_restore): # append mask tokens to sequence mask_tokens = self.mask_token.repeat(x.shape[0], ids_restore.shape[1] + 1 - x.shape[1], 1) + x_ = torch.cat([x[:, 1:, :], mask_tokens], dim=1) # no cls token x_ = torch.gather(x_, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[2])) # unshuffle x = torch.cat([x[:, :1, :], x_], dim=1) # append cls token @@ -225,33 +243,58 @@ def forward_loss(self, imgs, pred, mask): def forward(self, imgs, mask_ratio=0.75): latent, mask, ids_restore = self.forward_encoder(imgs, mask_ratio) + # print(ids_restore) + # print(mask) + print(ids_restore) + print(mask) pred = self.forward_decoder(latent, ids_restore) # [N, L, p*p*3] loss, acc = self.forward_loss(imgs, pred, mask) return loss, acc, self.unpatchify(pred), mask def mae_vit_base_patch16_dec512d8b(**kwargs): - model = MaskedAutoencoderViT( - patch_size=16, embed_dim=768, depth=12, num_heads=12, - decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16, - mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs) - return model + return MaskedAutoencoderViT( + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + decoder_embed_dim=512, + decoder_depth=8, + decoder_num_heads=16, + mlp_ratio=4, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs + ) def mae_vit_large_patch16_dec512d8b(**kwargs): - model = MaskedAutoencoderViT( - patch_size=16, embed_dim=1024, depth=24, num_heads=16, - decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16, - mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs) - return model + return MaskedAutoencoderViT( + patch_size=16, + embed_dim=1024, + depth=24, + num_heads=16, + decoder_embed_dim=512, + decoder_depth=8, + decoder_num_heads=16, + mlp_ratio=4, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs + ) def mae_vit_huge_patch14_dec512d8b(**kwargs): - model = MaskedAutoencoderViT( - patch_size=14, embed_dim=1280, depth=32, num_heads=16, - decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16, - mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs) - return model + return MaskedAutoencoderViT( + patch_size=14, + embed_dim=1280, + depth=32, + num_heads=16, + decoder_embed_dim=512, + decoder_depth=8, + decoder_num_heads=16, + mlp_ratio=4, + norm_layer=partial(nn.LayerNorm, eps=1e-6), + **kwargs + ) # set recommended archs diff --git a/Source/UserModelImplementation/Models/SwinStereo/Networks/mask_stereo_matching.py b/Source/UserModelImplementation/Models/SwinStereo/Networks/mask_stereo_matching.py index d7e8d49..5fed30e 100644 --- a/Source/UserModelImplementation/Models/SwinStereo/Networks/mask_stereo_matching.py +++ b/Source/UserModelImplementation/Models/SwinStereo/Networks/mask_stereo_matching.py @@ -19,8 +19,10 @@ def __init__(self, in_channles: int, reconstruction_channels: int, start_disp: i super().__init__() self.start_disp, self.disp_num = start_disp, disp_num self.pre_train_opt = pre_train_opt - self.feature_extraction = mae_vit_base_patch16(img_size=(448, 448), in_chans=in_channles) - self.num_patches = self.feature_extraction.patch_embed.num_patches + + self.feature_extraction = mae_vit_base_patch16(img_size=(1024, 1024), in_chans=in_channles) + # self.num_patches = self.feature_extraction.patch_embed.num_patches + # self.feature_extraction = Restormer( # inp_channels = in_channles, out_channels = reconstruction_channels, # dim = 48, pre_train_opt = pre_train_opt) @@ -31,21 +33,29 @@ def __init__(self, in_channles: int, reconstruction_channels: int, start_disp: i if not self.pre_train_opt: # self.feature_matching = CREStereo(64) - self.conv1 = nn.Conv2d(192, 320, 1, padding=0) - self.feature_matching = CREStereo(320) + # self.conv1 = nn.Conv2d(192, 320, 1, padding=0) + self.feature_matching = CREStereo(192) # self.feature_matching = PSMNet(1, 384, start_disp = start_disp, maxdisp = disp_num, udc=True, refine='csr') def _mask_pre_train_proc(self, left_img: torch.Tensor, mask_img_patch: torch.Tensor, random_sample_list: torch.Tensor) -> torch.Tensor: - ''' + import cv2 + import numpy as np img = left_img[0, :, :, :].cpu().detach().numpy() print(img.shape) img = img.transpose(1, 2, 0) - cv2.imwrite('/home2/raozhibo/Documents/Programs/RSStereo/Tmp/imgs/3.png', img * 255) - ''' + # server = '/home/rzb/Documents/rzb/Programs/RSStereo' + server = '/home2/raozhibo/Documents/Programs/RSStereo' + cv2.imwrite(server + '/Tmp/imgs/3.png', img * 255) + # output, _, _, _ = self.feature_extraction(mask_img_patch, left_img, random_sample_list) - output, acc, pred, _ = self.feature_extraction(left_img, 0.75) + # return output + + output, acc, pred, mask = self.feature_extraction(left_img, 0.75) + + mask_mat = mask.cpu().detach().numpy() + np.savetxt(server + '/Tmp/imgs/1.txt', mask_mat) return output, acc, pred def _mask_fine_tune_proc(self, left_img: torch.Tensor, right_img: torch.Tensor, @@ -53,8 +63,10 @@ def _mask_fine_tune_proc(self, left_img: torch.Tensor, right_img: torch.Tensor, _, _, _, left_level3 = self.feature_extraction(left_img) _, _, _, right_level3 = self.feature_extraction(right_img) # return self.feature_matching(left_img, left_level3, right_level3) - return self.feature_matching(self.conv1(left_level3), - self.conv1(right_level3), flow_init=flow_init) + return self.feature_matching(left_level3, + right_level3, flow_init = flow_init) + # return self.feature_matching(self.conv1(left_level3), + # self.conv1(right_level3), flow_init=flow_init) def forward(self, left_img: torch.Tensor, right_img: torch.Tensor, random_sample_list: torch.Tensor = None, flow_init=None) -> torch.Tensor: diff --git a/Source/UserModelImplementation/Models/SwinStereo/inference.py b/Source/UserModelImplementation/Models/SwinStereo/inference.py index a9853cd..0a9e7ce 100644 --- a/Source/UserModelImplementation/Models/SwinStereo/inference.py +++ b/Source/UserModelImplementation/Models/SwinStereo/inference.py @@ -26,16 +26,16 @@ def __init__(self, args: object) -> object: def lr_lambda(epoch: int) -> float: warmup_epochs = 40 cos_epoch = 1000 - if epoch < warmup_epochs: - factor = epoch / warmup_epochs - else: - factor = 0.5 * \ - (1. + math.cos(math.pi * (epoch - warmup_epochs) / cos_epoch)) - return factor + return ( + epoch / warmup_epochs + if epoch < warmup_epochs + else 0.5 + * (1.0 + math.cos(math.pi * (epoch - warmup_epochs) / cos_epoch)) + ) def get_model(self) -> list: args = self.__args - if 'whu' == args.dataset: + if args.dataset == 'whu': model = MaskStereoMatching(1, 1, args.startDisp, args.dispNum, args.pre_train_opt) else: model = MaskStereoMatching(3, 3, args.startDisp, args.dispNum, args.pre_train_opt) @@ -50,7 +50,7 @@ def get_model(self) -> list: def optimizer(self, model: list, lr: float) -> list: args = self.__args opt = torch.optim.AdamW(model[0].parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=0.05) - #opt = optim.Adam(model[0].parameters(), lr=lr) + # opt = optim.Adam(model[0].parameters(), lr=lr) if args.lr_scheduler: sch = optim.lr_scheduler.LambdaLR(opt, lr_lambda=self.lr_lambda) else: @@ -71,14 +71,13 @@ def inference(self, model: list, input_data: list, model_id: int) -> list: model(input_data[self.IMG_ID], input_data[self.MASK_IMG_ID], input_data[self.RANDOM_SAMPLE_LIST_ID])) + elif args.mode == 'test': + disp = model(input_data[self.LEFT_IMG_ID], + input_data[self.RIGHT_IMG_ID]) + outputs.append(disp[:, 0, :, :]) else: - if args.mode == 'test': - disp = model(input_data[self.LEFT_IMG_ID], - input_data[self.RIGHT_IMG_ID]) - outputs.append(disp[:, 0, :, :]) - else: - outputs = jf.Tools.convert2list(model(input_data[self.LEFT_IMG_ID], - input_data[self.RIGHT_IMG_ID])) + outputs = jf.Tools.convert2list(model(input_data[self.LEFT_IMG_ID], + input_data[self.RIGHT_IMG_ID])) return outputs def accuracy(self, output_data: list, label_data: list, model_id: int) -> list: @@ -96,8 +95,7 @@ def accuracy(self, output_data: list, label_data: list, model_id: int) -> list: disp = item[:, 0, :, :] if len(disp.shape) == 3 and idx > len(output_data) - 3: acc, mae = jf.acc.SMAccuracy.d_1(disp, gt_left * mask, invalid_value=0) - res.append(acc[1]) - res.append(mae) + res.extend((acc[1], mae)) return res def loss(self, output_data: list, label_data: list, model_id: int) -> list: @@ -109,17 +107,19 @@ def loss(self, output_data: list, label_data: list, model_id: int) -> list: # print(output_data[2] * 255) # print(label_data[0] * 255) # mask = label_data[0] > 0 - ''' import cv2 img = output_data[2][0, :, :, :].cpu().detach().numpy() print(img.shape) img = img.transpose(1, 2, 0) - cv2.imwrite('/home2/raozhibo/Documents/Programs/RSStereo/Tmp/imgs/1.png', img * 255) + # server = '/home/rzb/Documents/rzb/Programs/RSStereo' + server = '/home2/raozhibo/Documents/Programs/RSStereo' + + cv2.imwrite(server + '/Tmp/imgs/1.png', img * 255) print(label_data[0].shape) img = label_data[0][0, :, :, :].cpu().detach().numpy() img = img.transpose(1, 2, 0) - cv2.imwrite('/home2/raozhibo/Documents/Programs/RSStereo/Tmp/imgs/2.png', img * 255) - ''' + cv2.imwrite(server + '/Tmp/imgs/2.png', img * 255) + # mask = label_data[0] > 0 # loss = ((output_data[0][mask] - label_data[0][mask])) ** 2 # loss = loss.mean() @@ -148,13 +148,11 @@ def load_model(self, model: object, checkpoint: dict, model_id: int) -> bool: args = self.__args # return False if not args.pre_train_opt: - model.load_state_dict(checkpoint['model_0'], strict = False) - jf.log.info("Model loaded successfully_add") - return True + return self._extracted_from_load_model_5(model, checkpoint, False) # print(checkpoint['model_0']) # checkpoint['model_0']['pos_embed'] # - #state_dict = model.state_dict() + # state_dict = model.state_dict() # print(checkpoint['model_0']['feature_extraction']['pos_embed']) checkpoint['model_0']['module.feature_extraction.pos_embed'] = self.interpolate_pos_embed( @@ -163,7 +161,11 @@ def load_model(self, model: object, checkpoint: dict, model_id: int) -> bool: checkpoint['model_0']['module.feature_extraction.decoder_pos_embed'] = self.interpolate_pos_embed( checkpoint['model_0']['module.feature_extraction.decoder_pos_embed'], 448 * 448 / 16 / 16) - model.load_state_dict(checkpoint['model_0'], strict = True) + return self._extracted_from_load_model_5(model, checkpoint, True) + + # TODO Rename this here and in `load_model` + def _extracted_from_load_model_5(self, model, checkpoint, strict): + model.load_state_dict(checkpoint['model_0'], strict=strict) jf.log.info("Model loaded successfully_add") return True diff --git a/Source/UserModelImplementation/Models/SwinStereo/loss_functions.py b/Source/UserModelImplementation/Models/SwinStereo/loss_functions.py index 297790b..9a75642 100644 --- a/Source/UserModelImplementation/Models/SwinStereo/loss_functions.py +++ b/Source/UserModelImplementation/Models/SwinStereo/loss_functions.py @@ -135,14 +135,12 @@ def random_noise(img, type): def gradient_x(img): img = F.pad(img, [0, 1, 0, 0], mode='replicate') - gx = img[:, :, :, :-1] - img[:, :, :, 1:] - return gx + return img[:, :, :, :-1] - img[:, :, :, 1:] def gradient_y(img): img = F.pad(img, [0, 0, 0, 1], mode='replicate') - gy = img[:, :, :-1, :] - img[:, :, 1:, :] - return gy + return img[:, :, :-1, :] - img[:, :, 1:, :] def smooth_loss(img, disp): diff --git a/Source/UserModelImplementation/Models/__init__.py b/Source/UserModelImplementation/Models/__init__.py index 73bd698..aeaea3e 100644 --- a/Source/UserModelImplementation/Models/__init__.py +++ b/Source/UserModelImplementation/Models/__init__.py @@ -4,6 +4,7 @@ from .RSStereo import RSStereoInterface from .LaCGwcNet import LacGwcNetworkInterface from .SwinStereo import SwinStereoInterface +# from .StereoT import StereoTInterface def model_zoo(args: object, name: str) -> object: @@ -20,6 +21,10 @@ def model_zoo(args: object, name: str) -> object: jf.log.info("Enter the LacGwcNet model") model = LacGwcNetworkInterface(args) break + if case('StereoT'): + jf.log.info("Enter the StereoT model") + # model = StereoTInterface(args) + break if case(''): model = None jf.log.error("The model's name is error!!!") diff --git a/Source/UserModelImplementation/user_define.py b/Source/UserModelImplementation/user_define.py index 32967ab..833f55e 100644 --- a/Source/UserModelImplementation/user_define.py +++ b/Source/UserModelImplementation/user_define.py @@ -4,5 +4,7 @@ DISP_NUM = 128 # START_DISP = -128 # for whu # DISP_NUM = 192 +# START_DISP = 1 +# DISP_NUM = 192 MASK_OPT = False PRE_TRAIN_OPT = True