-
Notifications
You must be signed in to change notification settings - Fork 0
/
submit_job_config.cfg
63 lines (63 loc) · 1.69 KB
/
submit_job_config.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
protocolVersion: 2
name: bitDelta37
type: job
jobRetryCount: 0
prerequisites:
- type: dockerimage
uri: 'nvcr.io/nvidia/pytorch:23.10-py3'
name: docker_image_0
taskRoles:
taskrole:
instances: 1
completion:
minFailedInstances: 1
taskRetryCount: 0
dockerImage: docker_image_0
resourcePerInstance:
gpu: 8
cpu: 96
memoryMB: 960000
ports:
SynPort: 1
commands:
- 'echo ''root:password1234'' |chpasswd'
- ln -s /mnt/glusterfs-pvc/usr/yibo.han /root/
- ln -s /mnt/glusterfs-pvc/usr/yibo.han/huggingface /root/.cache/
- apt update
- apt upgrade -y
- apt install -y tmux
- apt install -y libgl1-mesa-glx
- cp ~/yibo.han/Anaconda3-2023.09-0-Linux-x86_64.sh ./
- >-
sh Anaconda3-2023.09-0-Linux-x86_64.sh -b -p ~/anaconda &&
~/anaconda/bin/conda init
- echo '' >> ~/.bashrc
- echo '' >> ~/.bashrc
- >-
echo 'alias proxyon="export http_proxy=http://127.0.0.1:10080;export
https_proxy=http://127.0.0.1:10080"' >> ~/.bashrc
- echo 'alias proxyoff="unset http_proxy;unset https_proxy"' >> ~/.bashrc
- >-
sed -ri 's/^#?PermitRootLogin\s+.*/PermitRootLogin yes/'
/etc/ssh/sshd_config /usr/sbin/sshd -D &
- sleep 365d
defaults:
virtualCluster: default
extras:
com.microsoft.pai.runtimeplugin:
- plugin: ssh
parameters:
jobssh: true
userssh:
type: custom
value: ''
- plugin: teamwise_storage
parameters:
storageConfigNames:
- pai-ssd-pvc
- glusterfs-pvc
hivedScheduler:
taskRoles:
taskrole:
skuNum: 8
skuType: gpu-machine