diff --git a/tf/README.md b/tf/README.md new file mode 100644 index 0000000..0702ac0 --- /dev/null +++ b/tf/README.md @@ -0,0 +1,111 @@ +## Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer + +### TensorFlow inference using `.pb` and `.onnx` models + +1. [Run inference on TensorFlow-model by using TensorFlow](#run-inference-on-tensorflow-model-by-using-tensorFlow) + +2. [Run inference on ONNX-model by using TensorFlow](#run-inference-on-onnx-model-by-using-tensorflow) + +3. [Make ONNX model from downloaded Pytorch model file](#make-onnx-model-from-downloaded-pytorch-model-file) + + +### Run inference on TensorFlow-model by using TensorFlow + +1) Download the model weights [model-f45da743.pb](https://github.com/AlexeyAB/MiDaS/releases/download/v2/model-f46da743.pb) and place the +file in the `/tf/` folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install TensorFlow +pip install grpcio tensorflow tensorflow-addons +``` + +#### Usage + +1) Place one or more input images in the folder `tf/input`. + +2) Run the model: + + ```shell + python tf/run_pb.py + ``` + +3) The resulting inverse depth maps are written to the `tf/output` folder. + + +### Run inference on ONNX-model by using TensorFlow + +1) Download the model weights [model-f45da743.onnx](https://github.com/AlexeyAB/MiDaS/releases/download/v2/model-f46da743.onnx) and place the +file in the `/tf/` folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install TensorFlow +pip install grpcio tensorflow tensorflow-addons + +# install ONNX and ONNX_TensorFlow +pip install onnx + +git clone https://github.com/onnx/onnx-tensorflow.git && cd onnx-tensorflow && pip install -e . && cd .. +dir +``` + +#### Usage + +1) Place one or more input images in the folder `tf/input`. + +2) Run the model: + + ```shell + python tf/run_onnx.py + ``` + +3) The resulting inverse depth maps are written to the `tf/output` folder. + + + +### Make ONNX model from downloaded Pytorch model file + +1) Download the model weights [model-f45da743.pt](https://github.com/intel-isl/MiDaS/releases/download/v2/model-f46da743.pt) and place the +file in the root folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install ONNX +pip install onnx + +# install PyTorch TorchVision +pip install torch==1.5.1+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html +``` + +#### Usage + +1) Run the converter: + + ```shell + python tf/make_onnx_model.py + ``` + +2) The resulting `model-f46da743.onnx` file is written to the `/tf/` folder. + + +### Requirements + + The code was tested with Python 3.6.9, PyTorch 1.5.1, TensorFlow 2.2.0, TensorFlow-addons 0.8.3, ONNX 1.7.0, ONNX-TensorFlow (GitHub-master-17.07.2020) and OpenCV 4.3.0. + + \ No newline at end of file diff --git a/tf/input/.placeholder b/tf/input/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/tf/make_onnx_model.py b/tf/make_onnx_model.py new file mode 100644 index 0000000..f47166b --- /dev/null +++ b/tf/make_onnx_model.py @@ -0,0 +1,112 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import ntpath +import glob +import torch +import utils +import cv2 +import numpy as np +from torchvision.transforms import Compose, Normalize +from torchvision import transforms + +from shutil import copyfile +import fileinput +import sys +sys.path.append(os.getcwd() + '/..') + +def modify_file(): + modify_filename = '../midas/blocks.py' + copyfile(modify_filename, modify_filename+'.bak') + + with open(modify_filename, 'r') as file : + filedata = file.read() + + filedata = filedata.replace('align_corners=True', 'align_corners=False') + filedata = filedata.replace('import torch.nn as nn', 'import torch.nn as nn\nimport torchvision.models as models') + filedata = filedata.replace('torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")', 'models.resnext101_32x8d()') + + with open(modify_filename, 'w') as file: + file.write(filedata) + +def restore_file(): + modify_filename = '../midas/blocks.py' + copyfile(modify_filename+'.bak', modify_filename) + +modify_file() + +from midas.midas_net import MidasNet +from midas.transforms import Resize, NormalizeImage, PrepareForNet + +restore_file() + + +class MidasNet_preprocessing(MidasNet): + """Network for monocular depth estimation. + """ + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + + mean = torch.tensor([0.485, 0.456, 0.406]) + std = torch.tensor([0.229, 0.224, 0.225]) + x.sub_(mean[None, :, None, None]).div_(std[None, :, None, None]) + + return MidasNet.forward(self, x) + + +def run(model_path): + """Run MonoDepthNN to compute depth maps. + + Args: + model_path (str): path to saved model + """ + print("initialize") + + # select device + + # load network + #model = MidasNet(model_path, non_negative=True) + model = MidasNet_preprocessing(model_path, non_negative=True) + + model.eval() + + print("start processing") + + # input + img_input = np.zeros((3, 384, 384), np.float32) + + # compute + with torch.no_grad(): + sample = torch.from_numpy(img_input).unsqueeze(0) + prediction = model.forward(sample) + prediction = ( + torch.nn.functional.interpolate( + prediction.unsqueeze(1), + size=img_input.shape[:2], + mode="bicubic", + align_corners=False, + ) + .squeeze() + .cpu() + .numpy() + ) + + torch.onnx.export(model, sample, ntpath.basename(model_path).rsplit('.', 1)[0]+'.onnx', opset_version=9) + + print("finished") + + +if __name__ == "__main__": + # set paths + # MODEL_PATH = "model.pt" + MODEL_PATH = "../model-f46da743.pt" + + # compute depth maps + run(MODEL_PATH) diff --git a/tf/output/.placeholder b/tf/output/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/tf/run_onnx.py b/tf/run_onnx.py new file mode 100644 index 0000000..b20421a --- /dev/null +++ b/tf/run_onnx.py @@ -0,0 +1,100 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import glob +import utils +import cv2 +import sys +import numpy as np +sys.path +sys.path.append("C:\Intel\onnx_tf\onnx-tensorflow") +#print(sys.path) + +import onnx +import onnx_tf +import tensorflow as tf + +from transforms import Resize, NormalizeImage, PrepareForNet + + +def run(input_path, output_path, model_path): + """Run MonoDepthNN to compute depth maps. + + Args: + input_path (str): path to input folder + output_path (str): path to output folder + model_path (str): path to saved model + """ + print("initialize") + + # select device + device = "CUDA:0" + #device = "CPU" + print("device: %s" % device) + + # load network + print("loading model...") + model = onnx.load(model_path) + print("checking model...") + onnx.checker.check_model(model) + print("preparing model...") + tf_rep = onnx_tf.backend.prepare(model, device) + + print('inputs:', tf_rep.inputs) + print('outputs:', tf_rep.outputs) + + resize_image = Resize( + 384, + 384, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ) + + def compose2(f1, f2): + return lambda x: f2(f1(x)) + + transform = compose2(resize_image, PrepareForNet()) + + # get input + img_names = glob.glob(os.path.join(input_path, "*")) + num_images = len(img_names) + + # create output folder + os.makedirs(output_path, exist_ok=True) + + print("start processing") + + for ind, img_name in enumerate(img_names): + + print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) + + # input + + img = utils.read_image(img_name) + img_input = transform({"image": img})["image"] + + # compute + output = tf_rep.run(img_input.reshape(1, 3, 384, 384)) + prediction = np.array(output).reshape(384, 384) + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # output + filename = os.path.join( + output_path, os.path.splitext(os.path.basename(img_name))[0] + ) + utils.write_depth(filename, prediction, bits=2) + + print("finished") + + +if __name__ == "__main__": + # set paths + INPUT_PATH = "input" + OUTPUT_PATH = "output" + MODEL_PATH = "model-f46da743.onnx" + + # compute depth maps + run(INPUT_PATH, OUTPUT_PATH, MODEL_PATH) diff --git a/tf/run_pb.py b/tf/run_pb.py new file mode 100644 index 0000000..c0a7ba3 --- /dev/null +++ b/tf/run_pb.py @@ -0,0 +1,106 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import glob +import utils +import cv2 + +import tensorflow as tf + +from transforms import Resize, NormalizeImage, PrepareForNet + +def run(input_path, output_path, model_path): + """Run MonoDepthNN to compute depth maps. + + Args: + input_path (str): path to input folder + output_path (str): path to output folder + model_path (str): path to saved model + """ + print("initialize") + + # the runtime initialization will not allocate all memory on the device to avoid out of GPU memory + gpus = tf.config.experimental.list_physical_devices('GPU') + if gpus: + try: + for gpu in gpus: + #tf.config.experimental.set_memory_growth(gpu, True) + tf.config.experimental.set_virtual_device_configuration(gpu, + [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)]) + except RuntimeError as e: + print(e) + + # load network + graph_def = tf.compat.v1.GraphDef() + with tf.io.gfile.GFile(model_path, 'rb') as f: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + + model_operations = tf.compat.v1.get_default_graph().get_operations() + input_node = '0:0' + output_layer = model_operations[len(model_operations) - 1].name + ':0' + print("Last layer name: ", output_layer) + + resize_image = Resize( + 384, + 384, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ) + + def compose2(f1, f2): + return lambda x: f2(f1(x)) + + transform = compose2(resize_image, PrepareForNet()) + + # get input + img_names = glob.glob(os.path.join(input_path, "*")) + num_images = len(img_names) + + # create output folder + os.makedirs(output_path, exist_ok=True) + + print("start processing") + + with tf.compat.v1.Session() as sess: + try: + # load images + for ind, img_name in enumerate(img_names): + + print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) + + # input + img = utils.read_image(img_name) + img_input = transform({"image": img})["image"] + + # compute + prob_tensor = sess.graph.get_tensor_by_name(output_layer) + prediction, = sess.run(prob_tensor, {input_node: [img_input] }) + prediction = prediction.reshape(384, 384) + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # output + filename = os.path.join( + output_path, os.path.splitext(os.path.basename(img_name))[0] + ) + utils.write_depth(filename, prediction, bits=2) + + except KeyError: + print ("Couldn't find input node: ' + input_node + ' or output layer: " + output_layer + ".") + exit(-1) + + print("finished") + + +if __name__ == "__main__": + # set paths + INPUT_PATH = "input" + OUTPUT_PATH = "output" + MODEL_PATH = "model-f46da743.pb" + + # compute depth maps + run(INPUT_PATH, OUTPUT_PATH, MODEL_PATH) diff --git a/tf/transforms.py b/tf/transforms.py new file mode 100644 index 0000000..350cbc1 --- /dev/null +++ b/tf/transforms.py @@ -0,0 +1,234 @@ +import numpy as np +import cv2 +import math + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/tf/utils.py b/tf/utils.py new file mode 100644 index 0000000..ff9a54b --- /dev/null +++ b/tf/utils.py @@ -0,0 +1,82 @@ +import numpy as np +import sys +import cv2 + + +def write_pfm(path, image, scale=1): + """Write pfm file. + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif ( + len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 + ): # greyscale + color = False + else: + raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + +def read_image(path): + """Read image and output RGB image (0-1). + Args: + path (str): path to file + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + return img + +def write_depth(path, depth, bits=1): + """Write depth map to pfm and png file. + Args: + path (str): filepath without extension + depth (array): depth + """ + write_pfm(path + ".pfm", depth.astype(np.float32)) + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8*bits))-1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = 0 + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + + return \ No newline at end of file