Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KubeFATE support FATE v2.0.0 #927

Merged
merged 5 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions docker-deploy/.env
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,26 @@ SSH_PORT=22
# SSH_PORT: port of SSH, default 22


KubeFATE_Version=v2.0.0-beta
KubeFATE_Version=v2.0.0-release

# components version

FATEFlow_IMAGE="federatedai/fateflow"
FATEFlow_IMAGE_TAG="v2.0.0-beta"
FATEFlow_IMAGE_TAG="2.0.0-release"
FATEBoard_IMAGE="federatedai/fateboard"
FATEBoard_IMAGE_TAG="v2.0.0-beta"
FATEBoard_IMAGE_TAG="2.0.0-release"
MySQL_IMAGE="mysql"
MySQL_IMAGE_TAG="8.0.28"
Client_IMAGE="federatedai/client"
Client_IMAGE_TAG="v2.0.0-beta"
Client_IMAGE_TAG="2.0.0-release"

EGGRoll_IMAGE="federatedai/eggroll"
EGGRoll_IMAGE_TAG="v2.0.0-beta"
EGGRoll_IMAGE_TAG="2.0.0-release"
OSX_IMAGE="federatedai/osx"
OSX_IMAGE_TAG="2.0.0-release"

Nginx_IMAGE="federatedai/nginx"
Nginx_IMAGE_TAG="v2.0.0-beta"
Nginx_IMAGE_TAG="2.0.0-release"
RabbitMQ_IMAGE="federatedai/rabbitmq"
RabbitMQ_IMAGE_TAG="3.8.3-management"
Pulsar_IMAGE="federatedai/pulsar"
Expand All @@ -35,6 +37,6 @@ Hadoop_NameNode_IMAGE_TAG="2.0.0-hadoop3.2.1-java8"
Hadoop_DataNode_IMAGE="federatedai/hadoop-datanode"
Hadoop_DataNode_IMAGE_TAG="2.0.0-hadoop3.2.1-java8"
Spark_Master_IMAGE="federatedai/spark-master"
Spark_Master_IMAGE_TAG="v2.0.0-beta"
Spark_Master_IMAGE_TAG="2.0.0-release"
Spark_Worker_IMAGE="federatedai/spark-worker"
Spark_Worker_IMAGE_TAG="v2.0.0-beta"
Spark_Worker_IMAGE_TAG="2.0.0-release"
12 changes: 6 additions & 6 deletions docker-deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,13 @@ The output is shown as follows. If the status of each component is `Up`, and the

```bash
NAME IMAGE COMMAND SERVICE CREATED STATUS PORTS
confs-10000-client-1 federatedai/client:v2.0.0-beta "bash -c 'pipeline i…" client About a minute ago Up About a minute 0.0.0.0:20000->20000/tcp, :::20000->20000/tcp
confs-10000-clustermanager-1 federatedai/eggroll:v2.0.0-beta "/tini -- bash -c 'j…" clustermanager About a minute ago Up About a minute 4670/tcp
confs-10000-fateboard-1 federatedai/fateboard:v2.0.0-beta "/bin/sh -c 'java -D…" fateboard About a minute ago Up About a minute 0.0.0.0:8080->8080/tcp, :::8080->8080/tcp
confs-10000-fateflow-1 federatedai/fateflow:v2.0.0-beta "/bin/bash -c 'set -…" fateflow About a minute ago Up About a minute (healthy) 0.0.0.0:9360->9360/tcp, :::9360->9360/tcp, 0.0.0.0:9380->9380/tcp, :::9380->9380/tcp
confs-10000-client-1 federatedai/client:2.0.0-release "bash -c 'pipeline i…" client About a minute ago Up About a minute 0.0.0.0:20000->20000/tcp, :::20000->20000/tcp
confs-10000-clustermanager-1 federatedai/eggroll:2.0.0-release "/tini -- bash -c 'j…" clustermanager About a minute ago Up About a minute 4670/tcp
confs-10000-fateboard-1 federatedai/fateboard:2.0.0-release "/bin/sh -c 'java -D…" fateboard About a minute ago Up About a minute 0.0.0.0:8080->8080/tcp, :::8080->8080/tcp
confs-10000-fateflow-1 federatedai/fateflow:2.0.0-release "/bin/bash -c 'set -…" fateflow About a minute ago Up About a minute (healthy) 0.0.0.0:9360->9360/tcp, :::9360->9360/tcp, 0.0.0.0:9380->9380/tcp, :::9380->9380/tcp
confs-10000-mysql-1 mysql:8.0.28 "docker-entrypoint.s…" mysql About a minute ago Up About a minute 3306/tcp, 33060/tcp
confs-10000-nodemanager-1 federatedai/eggroll:v2.0.0-beta "/tini -- bash -c 'j…" nodemanager About a minute ago Up About a minute 4671/tcp
confs-10000-rollsite-1 federatedai/eggroll:v2.0.0-beta "/tini -- bash -c 'j…" rollsite About a minute ago Up About a minute 0.0.0.0:9370->9370/tcp, :::9370->9370/tcp
confs-10000-nodemanager-1 federatedai/eggroll:2.0.0-release "/tini -- bash -c 'j…" nodemanager About a minute ago Up About a minute 4671/tcp
confs-10000-osx-1 federatedai/osx:2.0.0-release "/tini -- bash -c 'j…" osx About a minute ago Up About a minute 0.0.0.0:9370->9370/tcp, :::9370->9370/tcp
```

### Verifying the deployment
Expand Down
12 changes: 6 additions & 6 deletions docker-deploy/README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,13 +231,13 @@ docker compose ps

```bash
NAME IMAGE COMMAND SERVICE CREATED STATUS PORTS
confs-10000-client-1 federatedai/client:v2.0.0-beta "bash -c 'pipeline i…" client About a minute ago Up About a minute 0.0.0.0:20000->20000/tcp, :::20000->20000/tcp
confs-10000-clustermanager-1 federatedai/eggroll:v2.0.0-beta "/tini -- bash -c 'j…" clustermanager About a minute ago Up About a minute 4670/tcp
confs-10000-fateboard-1 federatedai/fateboard:v2.0.0-beta "/bin/sh -c 'java -D…" fateboard About a minute ago Up About a minute 0.0.0.0:8080->8080/tcp, :::8080->8080/tcp
confs-10000-fateflow-1 federatedai/fateflow:v2.0.0-beta "/bin/bash -c 'set -…" fateflow About a minute ago Up About a minute (healthy) 0.0.0.0:9360->9360/tcp, :::9360->9360/tcp, 0.0.0.0:9380->9380/tcp, :::9380->9380/tcp
confs-10000-client-1 federatedai/client:2.0.0-release "bash -c 'pipeline i…" client About a minute ago Up About a minute 0.0.0.0:20000->20000/tcp, :::20000->20000/tcp
confs-10000-clustermanager-1 federatedai/eggroll:2.0.0-release "/tini -- bash -c 'j…" clustermanager About a minute ago Up About a minute 4670/tcp
confs-10000-fateboard-1 federatedai/fateboard:2.0.0-release "/bin/sh -c 'java -D…" fateboard About a minute ago Up About a minute 0.0.0.0:8080->8080/tcp, :::8080->8080/tcp
confs-10000-fateflow-1 federatedai/fateflow:2.0.0-release "/bin/bash -c 'set -…" fateflow About a minute ago Up About a minute (healthy) 0.0.0.0:9360->9360/tcp, :::9360->9360/tcp, 0.0.0.0:9380->9380/tcp, :::9380->9380/tcp
confs-10000-mysql-1 mysql:8.0.28 "docker-entrypoint.s…" mysql About a minute ago Up About a minute 3306/tcp, 33060/tcp
confs-10000-nodemanager-1 federatedai/eggroll:v2.0.0-beta "/tini -- bash -c 'j…" nodemanager About a minute ago Up About a minute 4671/tcp
confs-10000-rollsite-1 federatedai/eggroll:v2.0.0-beta "/tini -- bash -c 'j…" rollsite About a minute ago Up About a minute 0.0.0.0:9370->9370/tcp, :::9370->9370/tcp
confs-10000-nodemanager-1 federatedai/eggroll:2.0.0-release "/tini -- bash -c 'j…" nodemanager About a minute ago Up About a minute 4671/tcp
confs-10000-osx-1 federatedai/osx:2.0.0-release "/tini -- bash -c 'j…" osx About a minute ago Up About a minute 0.0.0.0:9370->9370/tcp, :::9370->9370/tcp
```

### 验证部署
Expand Down
2 changes: 1 addition & 1 deletion docker-deploy/docker_deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ handleLocally() {

main() {

if [ "$1" = "" ] || [ "$" = "--help" ]; then
if [ "$1" = "" ] || [ "$1" = "--help" ]; then
ShowUsage
exit 1
elif [ "$1" = "--delete" ] || [ "$1" = "--del" ]; then
Expand Down
126 changes: 58 additions & 68 deletions docker-deploy/generate_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ function list_include_item {

function CheckConfig(){
# Check config start
computing_list="Eggroll Spark Spark_local"
computing_list="Eggroll Spark STANDALONE"
spark_federation_list="RabbitMQ Pulsar"
algorithm_list="Basic NN ALL"
device_list="CPU IPCL GPU"
Expand All @@ -67,8 +67,8 @@ function CheckConfig(){
fi

if [ $computing == "Eggroll" ]; then
if [ $federation != "Eggroll" ] || [ $storage != "Eggroll" ]; then
echo "[ERROR]: Please select the correct engine. When eggroll is selected as the computing engine, both Federation and storage must be eggroll engines!"
if [ $federation != "OSX" ] || [ $storage != "Eggroll" ]; then
echo "[ERROR]: Please select the correct engine. When eggroll is selected as the computing engine, both Federation and Storage must be osx/eggroll engines!"
exit 1
fi
fi
Expand All @@ -84,13 +84,13 @@ function CheckConfig(){
fi
fi

if [ "$computing" == "Spark_local" ]; then
if ! $(list_include_item "$spark_federation_list" "$federation"); then
echo "[ERROR]: If you choose the Spark_local computing engine, the federation component must be Pulsar or RabbitMQ!"
exit 1
fi
if [ "$storage" != "LocalFS" ]; then
echo "[ERROR]: If you choose the Spark computing engine, the storage component must be LocalFS!"
if [ "$computing" == "STANDALONE" ]; then
# if ! $(list_include_item "$spark_federation_list" "$federation"); then
# echo "[ERROR]: If you choose the STANDALONE computing engine, the federation component must be Pulsar or RabbitMQ!"
# exit 1
# fi
if [ "$storage" != "STANDALONE" ]; then
echo "[ERROR]: If you choose the Spark computing engine, the storage component must be STANDALONE!"
exit 1
fi
fi
Expand Down Expand Up @@ -180,6 +180,7 @@ GenerateConfig() {
#clustermanager & nodemanager
sed -i "s#<clustermanager.host>#${clustermanager_ip}#g" ./confs-$party_id/confs/eggroll/conf/eggroll.properties
sed -i "s#<clustermanager.port>#${clustermanager_port}#g" ./confs-$party_id/confs/eggroll/conf/eggroll.properties
sed -i "s#<nodemanager.host>#${nodemanager_ip}#g" ./confs-$party_id/confs/eggroll/conf/eggroll.properties
sed -i "s#<nodemanager.port>#${nodemanager_port}#g" ./confs-$party_id/confs/eggroll/conf/eggroll.properties
sed -i "s#<party.id>#${party_id}#g" ./confs-$party_id/confs/eggroll/conf/eggroll.properties

Expand All @@ -206,7 +207,7 @@ GenerateConfig() {
if [ "$federation" == "RabbitMQ" ]; then
cp -r training_template/backends/spark/rabbitmq confs-$party_id/confs/
# delete Pulsar spec
sed -i '203,217d' confs-"$party_id"/docker-compose.yml
sed -i '203,218d' confs-"$party_id"/docker-compose.yml
elif [ "$federation" == "Pulsar" ]; then
cp -r training_template/backends/spark/pulsar confs-$party_id/confs/
# delete RabbitMQ spec
Expand All @@ -215,21 +216,21 @@ GenerateConfig() {
fi
fi

if [ "$computing" == "Spark_local" ]; then
if [ "$computing" == "STANDALONE" ]; then
# computing
cp -r training_template/backends/spark/nginx confs-$party_id/confs/
cp -r training_template/backends/spark/spark confs-$party_id/confs/
# cp -r training_template/backends/spark/nginx confs-$party_id/confs/
# cp -r training_template/backends/spark/spark confs-$party_id/confs/
# storage
if [ "$storage" == "LocalFS" ]; then
if [ "$storage" == "STANDALONE" ]; then
cp training_template/docker-compose-spark-slim.yml confs-$party_id/docker-compose.yml
# federation
if [ "$federation" == "RabbitMQ" ]; then
cp -r training_template/backends/spark/rabbitmq confs-$party_id/confs/
sed -i '149,163d' confs-$party_id/docker-compose.yml
elif [ "$federation" == "Pulsar" ]; then
cp -r training_template/backends/spark/pulsar confs-$party_id/confs/
sed -i '131,147d' confs-$party_id/docker-compose.yml
fi
# if [ "$federation" == "RabbitMQ" ]; then
# cp -r training_template/backends/spark/rabbitmq confs-$party_id/confs/
# sed -i '149,163d' confs-$party_id/docker-compose.yml
# elif [ "$federation" == "Pulsar" ]; then
# cp -r training_template/backends/spark/pulsar confs-$party_id/confs/
# sed -i '131,147d' confs-$party_id/docker-compose.yml
# fi
fi
fi

Expand All @@ -241,7 +242,7 @@ GenerateConfig() {
# Images choose
Suffix=""
# computing
if [ "$computing" == "Spark" ] || [ "$computing" == "Spark_local" ]; then
if [ "$computing" == "Spark" ]; then
Suffix=$Suffix""
fi
# algorithm
Expand All @@ -264,7 +265,7 @@ GenerateConfig() {
if [ "$computing" == "Eggroll" ]; then
sed -i "s#image: \"\${FATEFlow_IMAGE}:\${FATEFlow_IMAGE_TAG}\"#image: \"\${FATEFlow_IMAGE}${Suffix}:\${FATEFlow_IMAGE_TAG}\"#g" ./confs-"$party_id"/docker-compose.yml
sed -i "s#image: \"\${EGGRoll_IMAGE}:\${EGGRoll_IMAGE_TAG}\"#image: \"\${EGGRoll_IMAGE}${Suffix}:\${EGGRoll_IMAGE_TAG}\"#g" ./confs-"$party_id"/docker-compose.yml
elif [ "$computing" == "Spark" ] || [ "$computing" == "Spark_local" ]; then
elif [ "$computing" == "Spark" ] ; then
sed -i "s#image: \"\${FATEFlow_IMAGE}:\${FATEFlow_IMAGE_TAG}\"#image: \"\${FATEFlow_IMAGE}-spark${Suffix}:\${FATEFlow_IMAGE_TAG}\"#g" ./confs-"$party_id"/docker-compose.yml
sed -i "s#image: \"\${Spark_Worker_IMAGE}:\${Spark_Worker_IMAGE_TAG}\"#image: \"\${Spark_Worker_IMAGE}${Suffix}:\${Spark_Worker_IMAGE_TAG}\"#g" ./confs-"$party_id"/docker-compose.yml
fi
Expand All @@ -273,12 +274,12 @@ GenerateConfig() {
if [ "$device" == "GPU" ]; then
line=0 # line refers to the line number of the fateflow `command` line in docker-compose.yaml
if [ "$computing" == "Eggroll" ]; then
line=140
line=141
fi
if [ "$computing" == "Spark" ]; then
line=84
line=85
fi
if [ "$computing" == "Spark_local" ]; then
if [ "$computing" == "STANDALONE" ]; then
line=85
fi
sed -i "${line}i\\
Expand Down Expand Up @@ -345,39 +346,43 @@ GenerateConfig() {
echo mysql module of $party_id done!

# fate_flow
sed -i "s/party_id:/party_id: \"${party_id}\"/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/party_id: .*/party_id: \"${party_id}\"/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/name: <db_name>/name: '${db_name}'/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/user: <db_user>/user: '${db_user}'/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/passwd: <db_passwd>/passwd: '${db_password}'/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/host: <db_host>/host: '${db_ip}'/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/127.0.0.1:8000/${serving_ip}:8000/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml


if [[ "$computing" == "Spark" ]] || [[ "$computing" == "Spark_local" ]] ; then
sed -i "s/proxy_name: rollsite/proxy_name: nginx/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
if [[ "$computing" == "Spark" ]] ; then
sed -i "s/proxy_name: osx/proxy_name: nginx/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/computing: eggroll/computing: spark/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
fi
if [[ "$computing" == "STANDALONE" ]] ; then
# sed -i "s/proxy_name: osx/proxy_name: nginx/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/computing: eggroll/computing: standalone/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
fi
if [[ "$federation" == "Pulsar" ]]; then
sed -i "s/ federation: rollsite/ federation: pulsar/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/ federation: osx/ federation: pulsar/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
elif [[ "$federation" == "RabbitMQ" ]]; then
sed -i "s/ federation: rollsite/ federation: rabbitmq/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/ federation: osx/ federation: rabbitmq/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
fi

if [[ "$storage" == "HDFS" ]]; then
sed -i "s/ storage: eggroll/ storage: hdfs/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
elif [[ "$storage" == "LocalFS" ]]; then
sed -i "s/ storage: eggroll/ storage: localfs/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
elif [[ "$storage" == "STANDALONE" ]]; then
sed -i "s/ storage: eggroll/ storage: standalone/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
fi

if [[ "$computing" == "Spark_local" ]] ; then
sed -i "s#spark.master .*#spark.master local[*]#g" ./confs-$party_id/confs/spark/spark-defaults.conf
fi
# if [[ "$computing" == "STANDALONE" ]] ; then
# sed -i "s#spark.master .*#spark.master local[*]#g" ./confs-$party_id/confs/spark/spark-defaults.conf
# fi

# compute_core
sed -i "s/nodes: .*/nodes: 1/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/cores_per_node: .*/cores_per_node: $compute_core/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml

if [[ "$federation" == "Eggroll" ]]; then
if [[ "$computing" == "Eggroll" ]]; then
sed -i "s/eggroll.session.processors.per.node=.*/eggroll.session.processors.per.node=$compute_core/g" ./confs-$party_id/confs/eggroll/conf/eggroll.properties
fi
if [[ "$computing" == "Spark"* ]]; then
Expand All @@ -386,29 +391,14 @@ GenerateConfig() {
echo fate_flow module of $party_id done!

# federation config
# eggroll
if [[ "$federation" == "Eggroll" ]]; then
cat >./confs-$party_id/confs/eggroll/conf/route_table.json <<EOF
# OSX
sed -i "s/self.party=9999/self.party=${party_id}/g" ./confs-$party_id/confs/osx/conf/broker.properties
if [[ "$federation" == "OSX" ]]; then
cat >./confs-$party_id/confs/osx/conf/route_table.json <<EOF
{
"route_table": {
"default": {
"default": [
{
$(if [ "$exchange_ip" != "" ]; then
echo "
\"ip\": \"${exchange_ip}\",
\"port\": 9371
"
else
echo "
\"ip\": \"${proxy_ip}\",
\"port\": \"${proxy_port}\"
"
fi)
}
]
},
$(for ((j = 0; j < ${#party_list[*]}; j++)); do
"route_table":
{
$(for ((j = 0; j < ${#party_list[*]}; j++)); do
if [ "${party_id}" == "${party_list[${j}]}" ]; then
continue
fi
Expand All @@ -422,19 +412,19 @@ $(for ((j = 0; j < ${#party_list[*]}; j++)); do
"
done)
"${party_id}": {
"default": [{
"ip": "${proxy_ip}",
"port": ${proxy_port}
}],
"fateflow": [{
"ip": "${fate_flow_ip}",
"port": ${fate_flow_grpc_port}
}]
}
},
"permission": {
"default_allow": true
}
},
"self_party":[
"${party_id}"
],
"permission":
{
"default_allow": true
}
}
EOF
fi
Expand Down
4 changes: 2 additions & 2 deletions docker-deploy/parties.conf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ serving_ip_list=(192.168.1.1 192.168.1.2)
# Engines:
# Computing : Eggroll, Spark, Spark_local
computing=Eggroll
# Federation: Eggroll(computing: Eggroll), Pulsar/RabbitMQ(computing: Spark/Spark_local)
federation=Eggroll
# Federation: OSX(computing: Eggroll/Spark/Spark_local), Pulsar/RabbitMQ(computing: Spark/Spark_local)
federation=OSX
# Storage: Eggroll(computing: Eggroll), HDFS(computing: Spark), LocalFS(computing: Spark_local)
storage=Eggroll
# Algorithm: Basic, NN, ALL
Expand Down
Loading
Loading