-
Notifications
You must be signed in to change notification settings - Fork 75
/
Makefile
81 lines (72 loc) · 1.83 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
PROJECT = dd3d
WORKSPACE = /workspace/$(PROJECT)
DOCKER_IMAGE = $(PROJECT):latest
DOCKERFILE ?= Dockerfile
DOCKER_OPTS = \
-it \
--rm \
-e DISPLAY=${DISPLAY} \
-v /data:/data \
-v /tmp:/tmp \
-v /tmp/.X11-unix:/tmp/.X11-unix \
-v /mnt/fsx:/mnt/fsx \
-v /root/.ssh:/root/.ssh \
-v ~/.aws:/root/.aws \
--shm-size=1G \
--ipc=host \
--network=host \
--privileged
DOCKER_BUILD_ARGS = \
--build-arg WORKSPACE=$(WORKSPACE) \
--build-arg AWS_ACCESS_KEY_ID \
--build-arg AWS_SECRET_ACCESS_KEY \
--build-arg AWS_DEFAULT_REGION \
--build-arg WANDB_ENTITY \
--build-arg WANDB_API_KEY \
NGPUS ?= $(shell nvidia-smi -L | wc -l)
MASTER_ADDR ?= 127.0.0.1
MPI_HOSTS ?= localhost:${NGPUS}
MPI_CMD=mpirun \
-x LD_LIBRARY_PATH \
-x PYTHONPATH \
-x MASTER_ADDR=${MASTER_ADDR} \
-x NCCL_LL_THRESHOLD=0 \
-x AWS_ACCESS_KEY_ID \
-x AWS_SECRET_ACCESS_KEY \
-x WANDB_ENTITY \
-x WANDB_API_KEY \
-np ${NGPUS} \
-H ${MPI_HOSTS} \
-x NCCL_SOCKET_IFNAME=^docker0,lo \
--mca btl_tcp_if_exclude docker0,lo \
-mca plm_rsh_args 'p 12345' \
--allow-run-as-root
docker-build:
docker build \
$(DOCKER_BUILD_ARGS) \
-f ./docker/$(DOCKERFILE) \
-t $(DOCKER_IMAGE) .
docker-dev:
nvidia-docker run --name $(PROJECT) \
$(DOCKER_OPTS) \
-v $(PWD):$(WORKSPACE) \
$(DOCKER_IMAGE) bash
dist-run:
nvidia-docker run --name $(PROJECT) --rm \
-e DISPLAY=${DISPLAY} \
-v ~/.torch:/root/.torch \
${DOCKER_OPTS} \
-v $(PWD):$(WORKSPACE) \
${DOCKER_IMAGE} \
${COMMAND}
docker-run: docker-build
nvidia-docker run --name $(PROJECT) --rm \
${DOCKER_OPTS} \
${DOCKER_IMAGE} \
${COMMAND}
docker-run-mpi: docker-build
nvidia-docker run ${DOCKER_OPTS} -v $(PWD)/outputs:$(WORKSPACE)/outputs ${DOCKER_IMAGE} \
bash -c "${MPI_CMD} ${COMMAND}"
clean:
find . -name '"*.pyc' | xargs sudo rm -f && \
find . -name '__pycache__' | xargs sudo rm -rf