This repository has been archived by the owner on Dec 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 262
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
David Hafner
committed
Jun 24, 2019
1 parent
d863f03
commit 0b759c3
Showing
7 changed files
with
557 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Mixing For MonoDepth | ||
|
||
This code allows to compute a depth map based on a single input image. It runs a neural network that was trained by mixing several datasets as described in | ||
|
||
>Mixing Datasets for Single-Image Depth Estimation in Diverse Environments. | ||
Rene Ranftl, Katrin Lasinger, Vladlen Koltun | ||
|
||
## Setup | ||
|
||
1) Download the model weights [model.pt](https://drive.google.com/open?id=1Q9q7dVFhXiNOS1djOlaUUmnJlKMenEoU) and put the file in the same folder as this README. | ||
|
||
2) Create and activate conda environment: | ||
|
||
```shell | ||
conda env create -f environment.yml | ||
conda activate mixingDatasetsForMonoDepth | ||
``` | ||
|
||
## Usage | ||
|
||
1) Put one or more input images for monocular depth estimation in the folder `input`. | ||
|
||
2) Produce depth maps for the images in the `input` folder as follows: | ||
|
||
```shell | ||
python run.py | ||
``` | ||
|
||
3) The resulting depth maps are written to the `output` folder. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
name: mixingDatasetsForMonoDepth | ||
channels: | ||
- pytorch | ||
- defaults | ||
dependencies: | ||
- blas=1.0=mkl | ||
- bzip2=1.0.6=h14c3975_5 | ||
- ca-certificates=2019.1.23=0 | ||
- cairo=1.14.12=h8948797_3 | ||
- certifi=2019.3.9=py36_0 | ||
- cffi=1.12.3=py36h2e261b9_0 | ||
- cloudpickle=1.0.0=py_0 | ||
- cudatoolkit=9.0=h13b8566_0 | ||
- cycler=0.10.0=py36_0 | ||
- cytoolz=0.9.0.1=py36h14c3975_1 | ||
- dask-core=1.2.2=py_0 | ||
- dbus=1.13.6=h746ee38_0 | ||
- decorator=4.4.0=py36_1 | ||
- expat=2.2.6=he6710b0_0 | ||
- ffmpeg=4.0=hcdf2ecd_0 | ||
- fontconfig=2.13.0=h9420a91_0 | ||
- freeglut=3.0.0=hf484d3e_5 | ||
- freetype=2.9.1=h8a8886c_1 | ||
- glib=2.56.2=hd408876_0 | ||
- graphite2=1.3.13=h23475e2_0 | ||
- gst-plugins-base=1.14.0=hbbd80ab_1 | ||
- gstreamer=1.14.0=hb453b48_1 | ||
- harfbuzz=1.8.8=hffaf4a1_0 | ||
- hdf5=1.10.2=hba1933b_1 | ||
- icu=58.2=h9c2bf20_1 | ||
- imageio=2.5.0=py36_0 | ||
- intel-openmp=2019.3=199 | ||
- jasper=2.0.14=h07fcdf6_1 | ||
- jpeg=9b=h024ee3a_2 | ||
- kiwisolver=1.1.0=py36he6710b0_0 | ||
- libedit=3.1.20181209=hc058e9b_0 | ||
- libffi=3.2.1=hd88cf55_4 | ||
- libgcc-ng=8.2.0=hdf63c60_1 | ||
- libgfortran-ng=7.3.0=hdf63c60_0 | ||
- libglu=9.0.0=hf484d3e_1 | ||
- libopencv=3.4.2=hb342d67_1 | ||
- libopus=1.3=h7b6447c_0 | ||
- libpng=1.6.37=hbc83047_0 | ||
- libstdcxx-ng=8.2.0=hdf63c60_1 | ||
- libtiff=4.0.10=h2733197_2 | ||
- libuuid=1.0.3=h1bed415_2 | ||
- libvpx=1.7.0=h439df22_0 | ||
- libxcb=1.13=h1bed415_1 | ||
- libxml2=2.9.9=he19cac6_0 | ||
- matplotlib=3.0.3=py36h5429711_0 | ||
- mkl=2019.3=199 | ||
- mkl_fft=1.0.12=py36ha843d7b_0 | ||
- mkl_random=1.0.2=py36hd81dba3_0 | ||
- ncurses=6.1=he6710b0_1 | ||
- networkx=2.3=py_0 | ||
- ninja=1.9.0=py36hfd86e86_0 | ||
- numpy=1.16.3=py36h7e9f1db_0 | ||
- numpy-base=1.16.3=py36hde5b4d6_0 | ||
- olefile=0.46=py36_0 | ||
- opencv=3.4.2=py36h6fd60c2_1 | ||
- openssl=1.1.1b=h7b6447c_1 | ||
- pcre=8.43=he6710b0_0 | ||
- pillow=6.0.0=py36h34e0f95_0 | ||
- pip=19.1.1=py36_0 | ||
- pixman=0.38.0=h7b6447c_0 | ||
- py-opencv=3.4.2=py36hb342d67_1 | ||
- pycparser=2.19=py36_0 | ||
- pyparsing=2.4.0=py_0 | ||
- pyqt=5.9.2=py36h05f1152_2 | ||
- python=3.6.8=h0371630_0 | ||
- python-dateutil=2.8.0=py36_0 | ||
- pytorch=1.1.0=py3.6_cuda9.0.176_cudnn7.5.1_0 | ||
- pytz=2019.1=py_0 | ||
- pywavelets=1.0.3=py36hdd07704_1 | ||
- qt=5.9.7=h5867ecd_1 | ||
- readline=7.0=h7b6447c_5 | ||
- scikit-image=0.15.0=py36he6710b0_0 | ||
- scipy=1.2.1=py36h7c811a0_0 | ||
- setuptools=41.0.1=py36_0 | ||
- sip=4.19.8=py36hf484d3e_0 | ||
- six=1.12.0=py36_0 | ||
- sqlite=3.28.0=h7b6447c_0 | ||
- tk=8.6.8=hbc83047_0 | ||
- toolz=0.9.0=py36_0 | ||
- torchvision=0.2.2=py_3 | ||
- tornado=6.0.2=py36h7b6447c_0 | ||
- wheel=0.33.4=py36_0 | ||
- xz=5.2.4=h14c3975_4 | ||
- zlib=1.2.11=h7b6447c_3 | ||
- zstd=1.3.7=h0b5b093_0 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
"""MonoDepthNet: Network for monocular depth estimation trained by mixing several datasets. | ||
This file contains code that is adapted from | ||
https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py | ||
""" | ||
import torch | ||
import torch.nn as nn | ||
from torchvision import models | ||
|
||
|
||
class MonoDepthNet(nn.Module): | ||
"""Network for monocular depth estimation. | ||
""" | ||
|
||
def __init__(self, path=None, features=256): | ||
"""Init. | ||
Args: | ||
path (str, optional): Path to saved model. Defaults to None. | ||
features (int, optional): Number of features. Defaults to 256. | ||
""" | ||
super().__init__() | ||
|
||
resnet = models.resnet50(pretrained=False) | ||
|
||
self.pretrained = nn.Module() | ||
self.scratch = nn.Module() | ||
self.pretrained.layer1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, | ||
resnet.maxpool, resnet.layer1) | ||
|
||
self.pretrained.layer2 = resnet.layer2 | ||
self.pretrained.layer3 = resnet.layer3 | ||
self.pretrained.layer4 = resnet.layer4 | ||
|
||
# adjust channel number of feature maps | ||
self.scratch.layer1_rn = nn.Conv2d(256, features, kernel_size=3, stride=1, padding=1, bias=False) | ||
self.scratch.layer2_rn = nn.Conv2d(512, features, kernel_size=3, stride=1, padding=1, bias=False) | ||
self.scratch.layer3_rn = nn.Conv2d(1024, features, kernel_size=3, stride=1, padding=1, bias=False) | ||
self.scratch.layer4_rn = nn.Conv2d(2048, features, kernel_size=3, stride=1, padding=1, bias=False) | ||
|
||
self.scratch.refinenet4 = FeatureFusionBlock(features) | ||
self.scratch.refinenet3 = FeatureFusionBlock(features) | ||
self.scratch.refinenet2 = FeatureFusionBlock(features) | ||
self.scratch.refinenet1 = FeatureFusionBlock(features) | ||
|
||
# adaptive output module: 2 convolutions and upsampling | ||
self.scratch.output_conv = nn.Sequential(nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), | ||
nn.Conv2d(128, 1, kernel_size=3, stride=1, padding=1), | ||
Interpolate(scale_factor=2, mode='bilinear')) | ||
|
||
# load model | ||
if path: | ||
self.load(path) | ||
|
||
def forward(self, x): | ||
"""Forward pass. | ||
Args: | ||
x (tensor): input data (image) | ||
Returns: | ||
tensor: depth | ||
""" | ||
layer_1 = self.pretrained.layer1(x) | ||
layer_2 = self.pretrained.layer2(layer_1) | ||
layer_3 = self.pretrained.layer3(layer_2) | ||
layer_4 = self.pretrained.layer4(layer_3) | ||
|
||
layer_1_rn = self.scratch.layer1_rn(layer_1) | ||
layer_2_rn = self.scratch.layer2_rn(layer_2) | ||
layer_3_rn = self.scratch.layer3_rn(layer_3) | ||
layer_4_rn = self.scratch.layer4_rn(layer_4) | ||
|
||
path_4 = self.scratch.refinenet4(layer_4_rn) | ||
path_3 = self.scratch.refinenet3(path_4, layer_3_rn) | ||
path_2 = self.scratch.refinenet2(path_3, layer_2_rn) | ||
path_1 = self.scratch.refinenet1(path_2, layer_1_rn) | ||
|
||
out = self.scratch.output_conv(path_1) | ||
|
||
return out | ||
|
||
def load(self, path): | ||
"""Load model from file. | ||
Args: | ||
path (str): file path | ||
""" | ||
parameters = torch.load(path) | ||
|
||
self.load_state_dict(parameters) | ||
|
||
|
||
class Interpolate(nn.Module): | ||
"""Interpolation module. | ||
""" | ||
|
||
def __init__(self, scale_factor, mode): | ||
"""Init. | ||
Args: | ||
scale_factor (float): scaling | ||
mode (str): interpolation mode | ||
""" | ||
super(Interpolate, self).__init__() | ||
|
||
self.interp = nn.functional.interpolate | ||
self.scale_factor = scale_factor | ||
self.mode = mode | ||
|
||
def forward(self, x): | ||
"""Forward pass. | ||
Args: | ||
x (tensor): input | ||
Returns: | ||
tensor: interpolated data | ||
""" | ||
x = self.interp(x, scale_factor=self.scale_factor, mode=self.mode, align_corners=False) | ||
|
||
return x | ||
|
||
|
||
class ResidualConvUnit(nn.Module): | ||
"""Residual convolution module. | ||
""" | ||
|
||
def __init__(self, features): | ||
"""Init. | ||
Args: | ||
features (int): number of features | ||
""" | ||
super().__init__() | ||
|
||
self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True) | ||
self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=False) | ||
self.relu = nn.ReLU(inplace=True) | ||
|
||
def forward(self, x): | ||
"""Forward pass. | ||
Args: | ||
x (tensor): input | ||
Returns: | ||
tensor: output | ||
""" | ||
out = self.relu(x) | ||
out = self.conv1(out) | ||
out = self.relu(out) | ||
out = self.conv2(out) | ||
|
||
return out + x | ||
|
||
|
||
class FeatureFusionBlock(nn.Module): | ||
"""Feature fusion block. | ||
""" | ||
|
||
def __init__(self, features): | ||
"""Init. | ||
Args: | ||
features (int): number of features | ||
""" | ||
super().__init__() | ||
|
||
self.resConfUnit = ResidualConvUnit(features) | ||
|
||
def forward(self, *xs): | ||
"""Forward pass. | ||
Returns: | ||
tensor: output | ||
""" | ||
output = xs[0] | ||
|
||
if len(xs) == 2: | ||
output += self.resConfUnit(xs[1]) | ||
|
||
output = self.resConfUnit(output) | ||
output = nn.functional.interpolate(output, scale_factor=2, | ||
mode='bilinear', align_corners=True) | ||
|
||
return output |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
"""Compute depth maps for images in the input folder. | ||
""" | ||
import os | ||
import glob | ||
import torch | ||
from monodepth_net import MonoDepthNet | ||
import utils | ||
|
||
|
||
def run(input_path, output_path, model_path): | ||
"""Run MonoDepthNN to compute depth maps. | ||
Args: | ||
input_path (str): path to input folder | ||
output_path (str): path to output folder | ||
model_path (str): path to saved model | ||
""" | ||
print('initialize') | ||
|
||
# select device | ||
device = torch.device('cpu') | ||
print('device: %s' % device) | ||
|
||
# load network | ||
model = MonoDepthNet(model_path) | ||
model.to(device) | ||
model.eval() | ||
|
||
# get input | ||
img_names = glob.glob(os.path.join(input_path, '*')) | ||
num_images = len(img_names) | ||
|
||
# create output folder | ||
os.makedirs(output_path, exist_ok=True) | ||
|
||
print("start processing") | ||
|
||
for ind, img_name in enumerate(img_names): | ||
|
||
print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) | ||
|
||
# input | ||
img = utils.read_image(img_name) | ||
img_input = utils.resize_image(img) | ||
img_input = img_input.to(device) | ||
|
||
# compute | ||
with torch.no_grad(): | ||
out = model.forward(img_input) | ||
|
||
depth = utils.resize_depth(out, img.shape[1], img.shape[0]) | ||
|
||
# output | ||
filename = os.path.join(output_path, os.path.splitext(os.path.basename(img_name))[0]) | ||
utils.write_depth(filename, depth) | ||
|
||
print("finished") | ||
|
||
|
||
if __name__ == '__main__': | ||
# set paths | ||
INPUT_PATH = "input" | ||
OUTPUT_PATH = "output" | ||
MODEL_PATH = "model.pt" | ||
|
||
# set torch options | ||
torch.backends.cudnn.enabled = True | ||
torch.backends.cudnn.benchmark = True | ||
|
||
# compute depth maps | ||
run(INPUT_PATH, OUTPUT_PATH, MODEL_PATH) |
Oops, something went wrong.