From 5dbaaf55967f3c36dfceaa0f1424630571cf6cbb Mon Sep 17 00:00:00 2001 From: Kevin Cai Date: Tue, 10 Sep 2024 15:05:02 +0800 Subject: [PATCH] [Enhancement] Add config to turn on balancing tablets between workers in shared-data mode (#50843) Signed-off-by: Kevin Xiaohua Cai --- .../src/main/java/com/starrocks/common/Config.java | 10 ++++++++++ .../main/java/com/starrocks/staros/StarMgrServer.java | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/fe/fe-core/src/main/java/com/starrocks/common/Config.java b/fe/fe-core/src/main/java/com/starrocks/common/Config.java index 68fb7ed3b5afe..4dbc4c131ef7b 100644 --- a/fe/fe-core/src/main/java/com/starrocks/common/Config.java +++ b/fe/fe-core/src/main/java/com/starrocks/common/Config.java @@ -2679,6 +2679,16 @@ public class Config extends ConfigBase { @ConfField(mutable = true, comment = "the max number of threads for lake table delete txnLog when enable batch publish") public static int lake_publish_delete_txnlog_max_threads = 16; + @ConfField(mutable = true, comment = + "Consider balancing between workers during tablet migration in shared data mode. Default: false") + public static boolean lake_enable_balance_tablets_between_workers = false; + + @ConfField(mutable = true, comment = + "Threshold of considering the balancing between workers in shared-data mode, The imbalance factor is " + + "calculated as f = (MAX(tablets) - MIN(tablets)) / AVERAGE(tablets), " + + "if f > lake_balance_tablets_threshold, balancing will be triggered. Default: 0.15") + public static double lake_balance_tablets_threshold = 0.15; + /** * Default lake compaction txn timeout */ diff --git a/fe/fe-core/src/main/java/com/starrocks/staros/StarMgrServer.java b/fe/fe-core/src/main/java/com/starrocks/staros/StarMgrServer.java index f5cb13d392e91..9439d6832a98e 100644 --- a/fe/fe-core/src/main/java/com/starrocks/staros/StarMgrServer.java +++ b/fe/fe-core/src/main/java/com/starrocks/staros/StarMgrServer.java @@ -135,6 +135,8 @@ public void initialize(BDBEnvironment environment, String baseImageDir) throws I com.staros.util.Config.WORKER_HEARTBEAT_INTERVAL_SEC = Config.heartbeat_timeout_second; com.staros.util.Config.WORKER_HEARTBEAT_RETRY_COUNT = Config.heartbeat_retry_times; com.staros.util.Config.GRPC_RPC_TIME_OUT_SEC = Config.starmgr_grpc_timeout_seconds; + com.staros.util.Config.ENABLE_BALANCE_SHARD_NUM_BETWEEN_WORKERS = Config.lake_enable_balance_tablets_between_workers; + com.staros.util.Config.BALANCE_WORKER_SHARDS_THRESHOLD_IN_PERCENT = Config.lake_balance_tablets_threshold; // sync the mutable configVar to StarMgr in case any changes GlobalStateMgr.getCurrentState().getConfigRefreshDaemon().registerListener(() -> { @@ -142,6 +144,8 @@ public void initialize(BDBEnvironment environment, String baseImageDir) throws I com.staros.util.Config.WORKER_HEARTBEAT_INTERVAL_SEC = Config.heartbeat_timeout_second; com.staros.util.Config.WORKER_HEARTBEAT_RETRY_COUNT = Config.heartbeat_retry_times; com.staros.util.Config.GRPC_RPC_TIME_OUT_SEC = Config.starmgr_grpc_timeout_seconds; + com.staros.util.Config.ENABLE_BALANCE_SHARD_NUM_BETWEEN_WORKERS = Config.lake_enable_balance_tablets_between_workers; + com.staros.util.Config.BALANCE_WORKER_SHARDS_THRESHOLD_IN_PERCENT = Config.lake_balance_tablets_threshold; }); // set the following config, in order to provide a customized worker group definition // com.staros.util.Config.RESOURCE_MANAGER_WORKER_GROUP_SPEC_RESOURCE_FILE = "";