Skip to content
This repository has been archived by the owner on Dec 18, 2024. It is now read-only.

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
David Hafner committed Jun 24, 2019
1 parent d863f03 commit 0b759c3
Show file tree
Hide file tree
Showing 7 changed files with 557 additions and 0 deletions.
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Mixing For MonoDepth

This code allows to compute a depth map based on a single input image. It runs a neural network that was trained by mixing several datasets as described in

>Mixing Datasets for Single-Image Depth Estimation in Diverse Environments.
Rene Ranftl, Katrin Lasinger, Vladlen Koltun

## Setup

1) Download the model weights [model.pt](https://drive.google.com/open?id=1Q9q7dVFhXiNOS1djOlaUUmnJlKMenEoU) and put the file in the same folder as this README.

2) Create and activate conda environment:

```shell
conda env create -f environment.yml
conda activate mixingDatasetsForMonoDepth
```

## Usage

1) Put one or more input images for monocular depth estimation in the folder `input`.

2) Produce depth maps for the images in the `input` folder as follows:

```shell
python run.py
```

3) The resulting depth maps are written to the `output` folder.
90 changes: 90 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
name: mixingDatasetsForMonoDepth
channels:
- pytorch
- defaults
dependencies:
- blas=1.0=mkl
- bzip2=1.0.6=h14c3975_5
- ca-certificates=2019.1.23=0
- cairo=1.14.12=h8948797_3
- certifi=2019.3.9=py36_0
- cffi=1.12.3=py36h2e261b9_0
- cloudpickle=1.0.0=py_0
- cudatoolkit=9.0=h13b8566_0
- cycler=0.10.0=py36_0
- cytoolz=0.9.0.1=py36h14c3975_1
- dask-core=1.2.2=py_0
- dbus=1.13.6=h746ee38_0
- decorator=4.4.0=py36_1
- expat=2.2.6=he6710b0_0
- ffmpeg=4.0=hcdf2ecd_0
- fontconfig=2.13.0=h9420a91_0
- freeglut=3.0.0=hf484d3e_5
- freetype=2.9.1=h8a8886c_1
- glib=2.56.2=hd408876_0
- graphite2=1.3.13=h23475e2_0
- gst-plugins-base=1.14.0=hbbd80ab_1
- gstreamer=1.14.0=hb453b48_1
- harfbuzz=1.8.8=hffaf4a1_0
- hdf5=1.10.2=hba1933b_1
- icu=58.2=h9c2bf20_1
- imageio=2.5.0=py36_0
- intel-openmp=2019.3=199
- jasper=2.0.14=h07fcdf6_1
- jpeg=9b=h024ee3a_2
- kiwisolver=1.1.0=py36he6710b0_0
- libedit=3.1.20181209=hc058e9b_0
- libffi=3.2.1=hd88cf55_4
- libgcc-ng=8.2.0=hdf63c60_1
- libgfortran-ng=7.3.0=hdf63c60_0
- libglu=9.0.0=hf484d3e_1
- libopencv=3.4.2=hb342d67_1
- libopus=1.3=h7b6447c_0
- libpng=1.6.37=hbc83047_0
- libstdcxx-ng=8.2.0=hdf63c60_1
- libtiff=4.0.10=h2733197_2
- libuuid=1.0.3=h1bed415_2
- libvpx=1.7.0=h439df22_0
- libxcb=1.13=h1bed415_1
- libxml2=2.9.9=he19cac6_0
- matplotlib=3.0.3=py36h5429711_0
- mkl=2019.3=199
- mkl_fft=1.0.12=py36ha843d7b_0
- mkl_random=1.0.2=py36hd81dba3_0
- ncurses=6.1=he6710b0_1
- networkx=2.3=py_0
- ninja=1.9.0=py36hfd86e86_0
- numpy=1.16.3=py36h7e9f1db_0
- numpy-base=1.16.3=py36hde5b4d6_0
- olefile=0.46=py36_0
- opencv=3.4.2=py36h6fd60c2_1
- openssl=1.1.1b=h7b6447c_1
- pcre=8.43=he6710b0_0
- pillow=6.0.0=py36h34e0f95_0
- pip=19.1.1=py36_0
- pixman=0.38.0=h7b6447c_0
- py-opencv=3.4.2=py36hb342d67_1
- pycparser=2.19=py36_0
- pyparsing=2.4.0=py_0
- pyqt=5.9.2=py36h05f1152_2
- python=3.6.8=h0371630_0
- python-dateutil=2.8.0=py36_0
- pytorch=1.1.0=py3.6_cuda9.0.176_cudnn7.5.1_0
- pytz=2019.1=py_0
- pywavelets=1.0.3=py36hdd07704_1
- qt=5.9.7=h5867ecd_1
- readline=7.0=h7b6447c_5
- scikit-image=0.15.0=py36he6710b0_0
- scipy=1.2.1=py36h7c811a0_0
- setuptools=41.0.1=py36_0
- sip=4.19.8=py36hf484d3e_0
- six=1.12.0=py36_0
- sqlite=3.28.0=h7b6447c_0
- tk=8.6.8=hbc83047_0
- toolz=0.9.0=py36_0
- torchvision=0.2.2=py_3
- tornado=6.0.2=py36h7b6447c_0
- wheel=0.33.4=py36_0
- xz=5.2.4=h14c3975_4
- zlib=1.2.11=h7b6447c_3
- zstd=1.3.7=h0b5b093_0
Empty file added input/.placeholder
Empty file.
186 changes: 186 additions & 0 deletions monodepth_net.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
"""MonoDepthNet: Network for monocular depth estimation trained by mixing several datasets.
This file contains code that is adapted from
https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
"""
import torch
import torch.nn as nn
from torchvision import models


class MonoDepthNet(nn.Module):
"""Network for monocular depth estimation.
"""

def __init__(self, path=None, features=256):
"""Init.
Args:
path (str, optional): Path to saved model. Defaults to None.
features (int, optional): Number of features. Defaults to 256.
"""
super().__init__()

resnet = models.resnet50(pretrained=False)

self.pretrained = nn.Module()
self.scratch = nn.Module()
self.pretrained.layer1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu,
resnet.maxpool, resnet.layer1)

self.pretrained.layer2 = resnet.layer2
self.pretrained.layer3 = resnet.layer3
self.pretrained.layer4 = resnet.layer4

# adjust channel number of feature maps
self.scratch.layer1_rn = nn.Conv2d(256, features, kernel_size=3, stride=1, padding=1, bias=False)
self.scratch.layer2_rn = nn.Conv2d(512, features, kernel_size=3, stride=1, padding=1, bias=False)
self.scratch.layer3_rn = nn.Conv2d(1024, features, kernel_size=3, stride=1, padding=1, bias=False)
self.scratch.layer4_rn = nn.Conv2d(2048, features, kernel_size=3, stride=1, padding=1, bias=False)

self.scratch.refinenet4 = FeatureFusionBlock(features)
self.scratch.refinenet3 = FeatureFusionBlock(features)
self.scratch.refinenet2 = FeatureFusionBlock(features)
self.scratch.refinenet1 = FeatureFusionBlock(features)

# adaptive output module: 2 convolutions and upsampling
self.scratch.output_conv = nn.Sequential(nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
nn.Conv2d(128, 1, kernel_size=3, stride=1, padding=1),
Interpolate(scale_factor=2, mode='bilinear'))

# load model
if path:
self.load(path)

def forward(self, x):
"""Forward pass.
Args:
x (tensor): input data (image)
Returns:
tensor: depth
"""
layer_1 = self.pretrained.layer1(x)
layer_2 = self.pretrained.layer2(layer_1)
layer_3 = self.pretrained.layer3(layer_2)
layer_4 = self.pretrained.layer4(layer_3)

layer_1_rn = self.scratch.layer1_rn(layer_1)
layer_2_rn = self.scratch.layer2_rn(layer_2)
layer_3_rn = self.scratch.layer3_rn(layer_3)
layer_4_rn = self.scratch.layer4_rn(layer_4)

path_4 = self.scratch.refinenet4(layer_4_rn)
path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
path_1 = self.scratch.refinenet1(path_2, layer_1_rn)

out = self.scratch.output_conv(path_1)

return out

def load(self, path):
"""Load model from file.
Args:
path (str): file path
"""
parameters = torch.load(path)

self.load_state_dict(parameters)


class Interpolate(nn.Module):
"""Interpolation module.
"""

def __init__(self, scale_factor, mode):
"""Init.
Args:
scale_factor (float): scaling
mode (str): interpolation mode
"""
super(Interpolate, self).__init__()

self.interp = nn.functional.interpolate
self.scale_factor = scale_factor
self.mode = mode

def forward(self, x):
"""Forward pass.
Args:
x (tensor): input
Returns:
tensor: interpolated data
"""
x = self.interp(x, scale_factor=self.scale_factor, mode=self.mode, align_corners=False)

return x


class ResidualConvUnit(nn.Module):
"""Residual convolution module.
"""

def __init__(self, features):
"""Init.
Args:
features (int): number of features
"""
super().__init__()

self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True)
self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=False)
self.relu = nn.ReLU(inplace=True)

def forward(self, x):
"""Forward pass.
Args:
x (tensor): input
Returns:
tensor: output
"""
out = self.relu(x)
out = self.conv1(out)
out = self.relu(out)
out = self.conv2(out)

return out + x


class FeatureFusionBlock(nn.Module):
"""Feature fusion block.
"""

def __init__(self, features):
"""Init.
Args:
features (int): number of features
"""
super().__init__()

self.resConfUnit = ResidualConvUnit(features)

def forward(self, *xs):
"""Forward pass.
Returns:
tensor: output
"""
output = xs[0]

if len(xs) == 2:
output += self.resConfUnit(xs[1])

output = self.resConfUnit(output)
output = nn.functional.interpolate(output, scale_factor=2,
mode='bilinear', align_corners=True)

return output
Empty file added output/.placeholder
Empty file.
71 changes: 71 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Compute depth maps for images in the input folder.
"""
import os
import glob
import torch
from monodepth_net import MonoDepthNet
import utils


def run(input_path, output_path, model_path):
"""Run MonoDepthNN to compute depth maps.
Args:
input_path (str): path to input folder
output_path (str): path to output folder
model_path (str): path to saved model
"""
print('initialize')

# select device
device = torch.device('cpu')
print('device: %s' % device)

# load network
model = MonoDepthNet(model_path)
model.to(device)
model.eval()

# get input
img_names = glob.glob(os.path.join(input_path, '*'))
num_images = len(img_names)

# create output folder
os.makedirs(output_path, exist_ok=True)

print("start processing")

for ind, img_name in enumerate(img_names):

print(" processing {} ({}/{})".format(img_name, ind + 1, num_images))

# input
img = utils.read_image(img_name)
img_input = utils.resize_image(img)
img_input = img_input.to(device)

# compute
with torch.no_grad():
out = model.forward(img_input)

depth = utils.resize_depth(out, img.shape[1], img.shape[0])

# output
filename = os.path.join(output_path, os.path.splitext(os.path.basename(img_name))[0])
utils.write_depth(filename, depth)

print("finished")


if __name__ == '__main__':
# set paths
INPUT_PATH = "input"
OUTPUT_PATH = "output"
MODEL_PATH = "model.pt"

# set torch options
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

# compute depth maps
run(INPUT_PATH, OUTPUT_PATH, MODEL_PATH)
Loading

0 comments on commit 0b759c3

Please sign in to comment.