Skip to content

Commit

Permalink
Helm chart support FATE v2.0.0
Browse files Browse the repository at this point in the history
Signed-off-by: Chenlong Ma <[email protected]>
  • Loading branch information
owlet42 committed Jan 23, 2024
1 parent c6c27c7 commit 5be3917
Show file tree
Hide file tree
Showing 13 changed files with 213 additions and 259 deletions.
4 changes: 2 additions & 2 deletions helm-charts/FATE/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
apiVersion: v1
appVersion: v2.0.0-beta
appVersion: v2.0.0
description: A Helm chart for fate-training
name: fate
version: v2.0.0-beta
version: v2.0.0
home: https://fate.fedai.org
icon: https://aisp-1251170195.cos.ap-hongkong.myqcloud.com/wp-content/uploads/sites/12/2019/09/logo.png
sources:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ spec:
ln -sf /dev/stdout /data/projects/fate/eggroll/logs/eggroll/eggroll.jvm.log
touch /data/projects/fate/eggroll/logs/eggroll/eggroll.jvm.err.log
ln -sf /dev/stderr /data/projects/fate/eggroll/logs/eggroll/eggroll.jvm.err.log
java -Dlog4j.configurationFile=$${EGGROLL_HOME}/conf/log4j2.properties -cp $${EGGROLL_HOME}/lib/*: com.webank.eggroll.core.Bootstrap --bootstraps com.webank.eggroll.core.resourcemanager.ClusterManagerBootstrap -c $${EGGROLL_HOME}/conf/eggroll.properties -p 4670 -s 'EGGROLL_DEAMON'
java -server -Dlog4j.configurationFile=$${EGGROLL_HOME}/conf/log4j2.xml -Dmodule=clustermanager -cp $${EGGROLL_HOME}/lib/*: org.fedai.eggroll.clustermanager.Bootstrap -p 4670 -s EGGROLL_DAEMON
ports:
- containerPort: 4670
livenessProbe:
Expand Down
87 changes: 20 additions & 67 deletions helm-charts/FATE/templates/backends/eggroll/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@ data:
eggroll.properties: |
[eggroll]
# core
eggroll.resourcemanager.nodemanager.net.device=eth0
eggroll.resourcemanager.nodemanager.gpu.num.shell=nvidia.sh
#eggroll.resourcemanager.clustermanager.jdbc.driver.class.name=org.h2.Driver
eggroll.resourcemanager.clustermanager.jdbc.driver.class.name=com.mysql.cj.jdbc.Driver
#eggroll.resourcemanager.clustermanager.jdbc.url=jdbc:h2:./data/meta_h2/eggroll_meta.h2;AUTO_SERVER=TRUE;MODE=MySQL;DATABASE_TO_LOWER=TRUE;SCHEMA=eggroll_meta;
eggroll.resourcemanager.clustermanager.jdbc.url=jdbc:mysql://{{ .Values.externalMysqlIp | default .Values.modules.mysql.ip | default "mysql" }}:{{ .Values.externalMysqlPort | default .Values.modules.mysql.port | default "3306" }}/{{ .Values.externalMysqlDatabase | default .Values.modules.mysql.database | default "eggroll_meta" }}?useSSL=false&serverTimezone={{ .Values.modules.clustermanager.mysqlServerTimezone | default "UTC" }}&characterEncoding=utf8&allowPublicKeyRetrieval=true
eggroll.resourcemanager.clustermanager.jdbc.username={{ .Values.externalMysqlUser | default .Values.modules.mysql.user | default "fate" }}
eggroll.resourcemanager.clustermanager.jdbc.password={{ .Values.externalMysqlPassword | default .Values.modules.mysql.password | default "fate_dev" }}
eggroll.data.dir=data/
eggroll.logs.dir=logs/
Expand All @@ -38,87 +41,37 @@ data:
eggroll.resourcemanager.nodemanager.port=4671
eggroll.resourcemanager.process.tag={{ .Values.partyId }}
# dashboard
eggroll.dashboard.server.port=8083
eggroll.security.session.expired.time=30
eggroll.security.login.username=admin
eggroll.security.login.password=admin
eggroll.security.encrypt.private_key=
eggroll.security.encrypt.enable=false
eggroll.data.dir=/data/projects/fate/eggroll/data/
eggroll.logs.dir=/data/projects/fate/eggroll/logs/
eggroll.bootstrap.root.script=bin/eggroll_boot.sh
eggroll.resourcemanager.bootstrap.egg_pair.exepath=bin/roll_pair/egg_pair_bootstrap.sh
eggroll.resourcemanager.bootstrap.egg_pair.venv=
eggroll.resourcemanager.bootstrap.egg_pair.pythonpath=python
eggroll.resourcemanager.bootstrap.egg_pair.filepath=python/eggroll/roll_pair/egg_pair.py
eggroll.resourcemanager.bootstrap.egg_pair.filepath=python/eggroll/computing/egg_pair/egg_pair.py
eggroll.resourcemanager.bootstrap.egg_pair.ld_library_path=
eggroll.resourcemanager.bootstrap.egg_frame.exepath=bin/roll_pair/roll_pair_master_bootstrap.sh
eggroll.resourcemanager.bootstrap.egg_frame.javahome=/usr/lib/jvm/java-1.8.0-openjdk
eggroll.resourcemanager.bootstrap.egg_frame.classpath=conf/:lib/*
eggroll.resourcemanager.bootstrap.egg_frame.mainclass=com.webank.eggroll.rollframe.EggFrameBootstrap
eggroll.resourcemanager.bootstrap.egg_frame.jvm.options=
eggroll.core.grpc.channel.keepalive.timeout.sec=20
# roll_frame
arrow.enable_unsafe_memory_access=true
# hadoop
hadoop.fs.defaultFS=file:///
# hadoop HA mode
hadoop.dfs.nameservices=
hadoop.dfs.namenode.rpc-address.nn1=
hadoop.dfs.namenode.rpc-address.nn2=
# session
eggroll.session.processors.per.node={{ .Values.modules.nodemanager.sessionProcessorsPerNode | default 2 }}
eggroll.session.start.timeout.ms=180000
# rollpair
eggroll.rollpair.transferpair.sendbuf.size=250000
# rollsite
eggroll.rollsite.coordinator={{ .Values.partyName }}
eggroll.rollsite.host=rollsite
eggroll.rollsite.port=9370
eggroll.rollsite.party.id={{ .Values.partyId }}
eggroll.rollsite.route.table.path=conf/route_table/route_table.json
eggroll.rollsite.route.table.key=
eggroll.rollsite.route.table.whitelist=127.0.0.1
eggroll.rollsite.jvm.options=-XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:logs/eggroll/rollsite.gc.log
eggroll.rollsite.push.max.retry=3
eggroll.rollsite.push.long.retry=2
eggroll.rollsite.push.batches.per.stream=10
eggroll.rollsite.adapter.sendbuf.size=100000
# polling
# {{ .Values.modules.rollsite.polling.enabled }}
# {{ .Values.modules.rollsite.polling.type }}
#
{{- if and .Values.modules.rollsite.polling.enabled ( eq (print .Values.modules.rollsite.polling.type) "client" ) }}
eggroll.rollsite.polling.client.enabled=true
{{- end }}
{{- if and .Values.modules.rollsite.polling.enabled ( eq (print .Values.modules.rollsite.polling.type) "server" ) }}
eggroll.rollsite.polling.server.enabled=true
eggroll.rollsite.polling.concurrency= {{ .Values.modules.rollsite.polling.concurrency | default 50 }}
{{- end }}
# deepspeed
## where deepspeed containers locate, required for deepspeed
eggroll.resourcemanager.nodemanager.containers.data.dir=/data/projects/fate/eggroll/containers
#eggroll.resourcemanager.nodemanager.containers.data.dir=
## which python exec that deepspeed container used, fallback to eggpair venv/bin/python
eggroll.container.deepspeed.python.exec=/data/projects/python/venv/bin/python
#eggroll.container.python.exec=
## provide by submit option for now
#eggroll.container.deepspeed.script.path=
eggroll.container.deepspeed.distributed.backend=nccl
## defaults to cluster manager endpoint
#eggroll.container.deepspeed.distributed.store.host=
#eggroll.container.deepspeed.distributed.store.port=
{{- if .Values.modules.rollsite.enableTLS }}
cert_configs: |
eggroll.core.security.secure.cluster.enabled=true
eggroll.core.security.secure.client.auth.enabled=true
eggroll.core.security.ca.crt.path=conf/cert/ca.pem
eggroll.core.security.crt.path=conf/cert/server.crt
eggroll.core.security.key.path=conf/cert/server.key
eggroll.core.security.client.ca.crt.path=conf/cert/ca.pem
eggroll.core.security.client.crt.path=conf/cert/client.crt
eggroll.core.security.client.key.path=conf/cert/client.key
{{- end }}
{{ end }}
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ spec:
ln -sf /dev/stdout /data/projects/fate/eggroll/logs/eggroll/eggroll.jvm.log
touch /data/projects/fate/eggroll/logs/eggroll/eggroll.jvm.err.log
ln -sf /dev/stderr /data/projects/fate/eggroll/logs/eggroll/eggroll.jvm.err.log
/tini -- java -Dlog4j.configurationFile=$${EGGROLL_HOME}/conf/log4j2.properties -cp $${EGGROLL_HOME}/lib/*: com.webank.eggroll.core.Bootstrap --bootstraps com.webank.eggroll.core.resourcemanager.NodeManagerBootstrap -c $${EGGROLL_HOME}/conf/eggroll.properties -p 4671 -s 'EGGROLL_DEAMON'
/tini -- java -server -Dlog4j.configurationFile=$${EGGROLL_HOME}/conf/log4j2.xml -Dmodule=nodemanager -cp $${EGGROLL_HOME}/lib/*: org.fedai.eggroll.nodemanager.Bootstrap -p 4671 -s EGGROLL_DAEMON
ports:
- containerPort: 4671
livenessProbe:
Expand Down

This file was deleted.

106 changes: 106 additions & 0 deletions helm-charts/FATE/templates/core/osx/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright 2019-2022 VMware, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

{{ if .Values.modules.osx.include }}
kind: ConfigMap
apiVersion: v1
metadata:
name: osx-config
labels:
fateMoudle: osx
{{ include "fate.labels" . | indent 4 }}
data:
route_table.json: |
{
"route_table":
{
{{- range .Values.partyList }}
"{{ .partyId }}": {
"default": [
{
"ip": "{{ .partyIp }}",
"port": {{ .partyPort }}
}
]
},
{{- end }}
"{{ .Values.partyId }}": {
"fateflow": [
{
"ip": "fateflow",
"port": 9360
}
]
}
},
"self_party":[
".Values.partyId"
],
"permission":
{
"default_allow": true
}
}
broker.properties: |
grpc.port= 9370
# Http switch for the server.
# If set to True, the server will open the http port.
# http port configuration can be set through http.port
open.http.server=false
# port of http
http.port=8087
https.port=8088
# whether the http server uses TLS
#ttp.use.tls = false
# whether the grpc server uses TLS?
# If true, a grpc port will be specially opened to listen for TLS requests
# grpc tls port configuration can be set through grpc.tls.port
open.grpc.tls.server=false
grpc.tls.port=9883
# the partyId of self ,multiple partyIds can be set.
# eg: 9999,10000,10001
self.party=9999
# deployment mode, including cluster/standalone,
# respectively representing cluster mode and standalone mode ,
# and standalone is used by default
deploy.mode=standalone
# the zookeeper address needs to be configured when the deployment mode is cluster
zk.url=127.0.0.1:2181
stream.limit.mode=LOCAL
# the IP of the cluster manager component of eggroll
eggroll.cluster.manager.ip = clustermanager
# the port of the cluster manager component of eggroll
eggroll.cluster.manager.port = 4670
# maximum number of message retries
produce.msg.max.try.time =3
http.client.method.config = {"UNARY_CALL":{"reqTimeout":0,"connectionTimeout":0,"socketTimeout":0}}
http.use.tls=false
http.ssl.trust.store.type=PKCS12
http.ssl.key.store.alias=22
http.ssl.key.store.password=123456
mapped.file.size=134217728
#http.ssl.trust.store.path=D:\\44\\127.0.0.1.pfx
server.ca.file=
server.cert.chain.file=
server.private.key.file=
{{ end }}
Loading

0 comments on commit 5be3917

Please sign in to comment.