-
Notifications
You must be signed in to change notification settings - Fork 0
/
kubernetes_resolver.py
64 lines (49 loc) · 1.99 KB
/
kubernetes_resolver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json
import os
import re
from kubernetes import client, config
# only works in the cluster itself not from outside or remote.
config.load_incluster_config()
k8 = client.CoreV1Api()
def print_pods_services():
print("All Pods:")
ret_pods = k8.list_namespaced_pod("tensorflow", watch=False)
for i in ret_pods.items:
print("%s\t%s\t%s" %
(i.status.pod_ip, i.metadata.namespace, i.metadata.name))
print("--------------------------------")
print("All Services:")
ret_svc = k8.list_namespaced_service("tensorflow", watch=False)
for i in ret_svc.items:
print("%s\t%s" %
(i.metadata.namespace, i.metadata.name))
print("--------------------------------")
def build_config():
task = {'type': os.environ.get("POD_TASK"), 'index': fetch_task_index()}
cluster = {'chief': build_chief_list(), 'worker': build_worker_list()}
tf_config = {'cluster': cluster, 'task': task}
print(tf_config)
return json.dumps(tf_config)
def build_worker_list(namespace="tensorflow"):
worker_nodes = []
pods = k8.list_namespaced_pod(namespace, watch=False)
for item in pods.items:
if re.match("tensorflow-worker-([0-9]+)", item.metadata.name):
node_name = "%s.worker-svc.tensorflow.svc.cluster.local" \
% item.metadata.name
node_port = item.spec.containers[0].ports[0].container_port
worker_nodes.append("%s:%s" % (node_name, node_port))
return worker_nodes
def fetch_task_index():
if os.environ.get("POD_TASK") == "worker":
pod_name = os.environ.get("POD_NAME")
return int(pod_name.split("-")[2])
return 0
def build_chief_list(namespace="tensorflow"):
chief_nodes = []
services = k8.list_namespaced_service(namespace, watch=False)
for item in services.items:
if "chief" in item.metadata.name:
chief_nodes.append(
"%s:%s" % (item.metadata.name, item.spec.ports[0].port))
return chief_nodes