Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(backend): 支持 flow 批量失败重试 #3041 #3052

Merged
merged 1 commit into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check_hard_code_ip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
- uses: actions/checkout@v2
- name: Check hard code ip
run: |
RESULT=$(grep -nrE '\b([0-9]{1,3}\.){3}[0-9]{1,3}\b' * | grep -vE '\b[012345678]\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | grep -vE '127\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' grep -vE '192\.168\.[0-9]{1,3}\.[0-9]{1,3}') || true
RESULT=$(grep -nrE '\b([0-9]{1,3}\.){3}[0-9]{1,3}\b' * | grep -v poetry.lock | grep -vE '\b[012345]\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | grep -vE '127\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | grep -vE '192\.168\.[0-9]{1,3}\.[0-9]{1,3}') || true
if [[ ${RESULT} == '' ]]; then
echo "good job!"
else
Expand Down
2 changes: 1 addition & 1 deletion dbm-services/common/dbha/ha-module/test/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func TestGetInstanceByIp(t *testing.T) {
cmdbC := client.CmDBClient{
Client: *c,
}
inf, err := cmdbC.GetDBInstanceInfoByIp("6.6.6.6")
inf, err := cmdbC.GetDBInstanceInfoByIp("127.0.0.6")
if err != nil {
fmt.Printf("get instance failed. err:%s", err.Error())
t.FailNow()
Expand Down
4 changes: 2 additions & 2 deletions dbm-services/mysql/db-priv/service/db_meta_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ GetAllClustersInfo 获取业务下所有集群信息
],
"storages": [
{
"ip": "3.3.3.3",
"ip": "127.0.0.3",
"instance_role": "backend_slave",
"port": 30000
},
{
"ip": "4.4.4.4",
"ip": "127.0.0.4",
"instance_role": "backend_master",
"port": 40000
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (g *GrantReplComp) Example() interface{} {
Params: &GrantReplParam{
Host: "1.1.1.1",
Port: 3306,
ReplHosts: []string{"2.2.2.2", "3.3.3.3"},
ReplHosts: []string{"2.2.2.2", "127.0.0.3"},
},
GeneralParam: &components.GeneralParam{
RuntimeAccountParam: components.RuntimeAccountParam{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ func (a *AddSlaveClusterRoutingComp) Example() interface{} {
Port: 26000,
SlaveInstances: []Instance{
{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 20000,
ShardID: 0,
},
{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 20001,
ShardID: 1,
},
},
SpiderSlaveInstances: []Instance{
{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 25000,
},
{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 25001,
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,13 @@ func (s *SpiderClusterBackendMigrateCutoverComp) Example() interface{} {
Port: 3006,
},
DestMaster: CutoverUnit{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 3306,
User: "xx",
Password: "xx",
},
DestSlave: CutoverUnit{
Host: "4.4.4.4",
Host: "127.0.0.4",
Port: 3306,
User: "xx",
Password: "xx",
Expand All @@ -159,13 +159,13 @@ func (s *SpiderClusterBackendMigrateCutoverComp) Example() interface{} {
Port: 3007,
},
DestMaster: CutoverUnit{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 3307,
User: "xx",
Password: "xx",
},
DestSlave: CutoverUnit{
Host: "4.4.4.4",
Host: "127.0.0.4",
Port: 3307,
User: "xx",
Password: "xx",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (r *SpiderClusterBackendSwitchComp) Example() interface{} {
Port: 3306,
},
Slave: Instance{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 3306,
},
},
Expand All @@ -117,7 +117,7 @@ func (r *SpiderClusterBackendSwitchComp) Example() interface{} {
Port: 3307,
},
Slave: Instance{
Host: "3.3.3.3",
Host: "127.0.0.3",
Port: 3307,
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ package scenesnapshot
+-------+------+-------------------+----+---------+------+-----------+--------------------------------+
| ID | USER | HOST | DB | COMMAND | TIME | STATE | INFO |
+-------+------+-------------------+----+---------+------+-----------+--------------------------------+
| 74590 | root | 10.45.39.34:54219 | | Query | 0 | executing | SELECT ID, USER, |
| 74590 | root | 127.0.0.1:54219 | | Query | 0 | executing | SELECT ID, USER, |
| | | | | | | | HOST, DB, COMMAND, |
| | | | | | | | TIME, STATE, INFO FROM |
| | | | | | | | INFORMATION_SCHEMA.PROCESSLIST |
+-------+------+-------------------+----+---------+------+-----------+--------------------------------+
| 74572 | root | 10.45.39.34:62014 | | Sleep | 2865 | | |
| 74572 | root | 127.0.0.2:62014 | | Sleep | 2865 | | |
+-------+------+-------------------+----+---------+------+-----------+--------------------------------+
*/

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ func TestDecodeClusterNodes(t *testing.T) {
mylog.UnitTestInitLog()
convey.Convey("cluster nodes decode", t, func() {
clusterNodesStr := `
17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 4.4.4.4:30003@40003 master - 0 1655005291000 20 connected 7509-8191
e81c4276dce41ae3ed4a5fe18e460ed5b9f77e8b 3.3.3.3:30003@40003 slave 17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 0 1655005291000 20 connected
17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 127.0.0.4:30003@40003 master - 0 1655005291000 20 connected 7509-8191
e81c4276dce41ae3ed4a5fe18e460ed5b9f77e8b 127.0.0.3:30003@40003 slave 17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 0 1655005291000 20 connected
56e53ca70ef13f3ca1817b0746d64319a4b66fed synctest-redis-rdsplus1-0.synctest-svc.vip:30000@40000 myself,slave 72ffcd1f8d39d1b6011ab38f5e1a42dd6f66f765 0 1655006313000 3 connected
72ffcd1f8d39d1b6011ab38f5e1a42dd6f66f765 synctest-redis-rdsplus1-1.synctest-svc.vip:30000@40000 master - 0 1655006315419 7 connected 5461-10921
`
Expand All @@ -25,7 +25,7 @@ func TestDecodeClusterNodes(t *testing.T) {
}
convey.So(len(nodes), convey.ShouldEqual, 4)
convey.So(nodes[0].NodeID, convey.ShouldEqual, "17922e98b0b8f7a9d233422cf8ae55f2d22fdab7")
convey.So(nodes[0].IP, convey.ShouldEqual, "4.4.4.4")
convey.So(nodes[0].IP, convey.ShouldEqual, "127.0.0.4")
convey.So(nodes[0].Port, convey.ShouldEqual, 30003)
convey.So(nodes[0].SlotsMap, convey.ShouldContainKey, 7560)
convey.So(nodes[1].MasterID, convey.ShouldEqual, "17922e98b0b8f7a9d233422cf8ae55f2d22fdab7")
Expand All @@ -36,12 +36,12 @@ func TestDecodeClusterNodes(t *testing.T) {
})

convey.Convey("cluster nodes decode2", t, func() {
clusterNodesStr := `36b96240e16051711d2391472cfd5900d33dc8bd 5.5.5.5:46000@56000 master - 0 1660014754278 5 connected
a32f9cb266d85ea96a1a87ce56872f339e2a257f 5.5.5.5:45001@55001 master - 0 1660014755280 4 connected 5462-10923
5d555b4ab569de196f71afd275c1edf8c046959a 5.5.5.5:45000@55000 myself,master - 0 1660014753000 1 connected 0-5461
90ed7be9db5e4b78e959ad3b40253c2ffb3d5845 5.5.5.5:46002@56002 master - 0 1660014752269 3 connected
dcff36cc5e915024d12173b1c5a3235e9186f193 5.5.5.5:46001@56001 master - 0 1660014753273 2 connected
ff29e2e2782916a0451d5f4064cb55483f4b2a97 5.5.5.5:45002@55002 master - 0 1660014753000 0 connected 10924-16383
clusterNodesStr := `36b96240e16051711d2391472cfd5900d33dc8bd 127.0.0.5:46000@56000 master - 0 1660014754278 5 connected
a32f9cb266d85ea96a1a87ce56872f339e2a257f 127.0.0.5:45001@55001 master - 0 1660014755280 4 connected 5462-10923
5d555b4ab569de196f71afd275c1edf8c046959a 127.0.0.5:45000@55000 myself,master - 0 1660014753000 1 connected 0-5461
90ed7be9db5e4b78e959ad3b40253c2ffb3d5845 127.0.0.5:46002@56002 master - 0 1660014752269 3 connected
dcff36cc5e915024d12173b1c5a3235e9186f193 127.0.0.5:46001@56001 master - 0 1660014753273 2 connected
ff29e2e2782916a0451d5f4064cb55483f4b2a97 127.0.0.5:45002@55002 master - 0 1660014753000 0 connected 10924-16383
`
nodes, err := DecodeClusterNodes(clusterNodesStr)
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import (
func TestDecodeClusterNodes(t *testing.T) {
convey.Convey("cluster nodes decode", t, func() {
clusterNodesStr := `
17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 4.4.4.4:30003@40003 master - 0 1655005291000 20 connected 7509-8191
e81c4276dce41ae3ed4a5fe18e460ed5b9f77e8b 3.3.3.3:30003@40003 slave 17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 0 1655005291000 20 connected
17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 127.0.0.4:30003@40003 master - 0 1655005291000 20 connected 7509-8191
e81c4276dce41ae3ed4a5fe18e460ed5b9f77e8b 127.0.0.3:30003@40003 slave 17922e98b0b8f7a9d233422cf8ae55f2d22fdab7 0 1655005291000 20 connected
56e53ca70ef13f3ca1817b0746d64319a4b66fed synctest-redis-rdsplus1-0.synctest-svc.vip:30000@40000 myself,slave 72ffcd1f8d39d1b6011ab38f5e1a42dd6f66f765 0 1655006313000 3 connected
72ffcd1f8d39d1b6011ab38f5e1a42dd6f66f765 synctest-redis-rdsplus1-1.synctest-svc.vip:30000@40000 master - 0 1655006315419 7 connected 5461-10921
`
Expand All @@ -22,7 +22,7 @@ func TestDecodeClusterNodes(t *testing.T) {
}
convey.So(len(nodes), convey.ShouldEqual, 4)
convey.So(nodes[0].NodeID, convey.ShouldEqual, "17922e98b0b8f7a9d233422cf8ae55f2d22fdab7")
convey.So(nodes[0].IP, convey.ShouldEqual, "4.4.4.4")
convey.So(nodes[0].IP, convey.ShouldEqual, "127.0.0.4")
convey.So(nodes[0].Port, convey.ShouldEqual, 30003)
convey.So(nodes[0].SlotsMap, convey.ShouldContainKey, 7560)
convey.So(nodes[1].MasterID, convey.ShouldEqual, "17922e98b0b8f7a9d233422cf8ae55f2d22fdab7")
Expand All @@ -33,12 +33,12 @@ func TestDecodeClusterNodes(t *testing.T) {
})

convey.Convey("cluster nodes decode2", t, func() {
clusterNodesStr := `36b96240e16051711d2391472cfd5900d33dc8bd 5.5.5.5:46000@56000 master - 0 1660014754278 5 connected
a32f9cb266d85ea96a1a87ce56872f339e2a257f 5.5.5.5:45001@55001 master - 0 1660014755280 4 connected 5462-10923
5d555b4ab569de196f71afd275c1edf8c046959a 5.5.5.5:45000@55000 myself,master - 0 1660014753000 1 connected 0-5461
90ed7be9db5e4b78e959ad3b40253c2ffb3d5845 5.5.5.5:46002@56002 master - 0 1660014752269 3 connected
dcff36cc5e915024d12173b1c5a3235e9186f193 5.5.5.5:46001@56001 master - 0 1660014753273 2 connected
ff29e2e2782916a0451d5f4064cb55483f4b2a97 5.5.5.5:45002@55002 master - 0 1660014753000 0 connected 10924-16383
clusterNodesStr := `36b96240e16051711d2391472cfd5900d33dc8bd 127.0.0.5:46000@56000 master - 0 1660014754278 5 connected
a32f9cb266d85ea96a1a87ce56872f339e2a257f 127.0.0.5:45001@55001 master - 0 1660014755280 4 connected 5462-10923
5d555b4ab569de196f71afd275c1edf8c046959a 127.0.0.5:45000@55000 myself,master - 0 1660014753000 1 connected 0-5461
90ed7be9db5e4b78e959ad3b40253c2ffb3d5845 127.0.0.5:46002@56002 master - 0 1660014752269 3 connected
dcff36cc5e915024d12173b1c5a3235e9186f193 127.0.0.5:46001@56001 master - 0 1660014753273 2 connected
ff29e2e2782916a0451d5f4064cb55483f4b2a97 127.0.0.5:45002@55002 master - 0 1660014753000 0 connected 10924-16383
`
nodes, err := DecodeClusterNodes(clusterNodesStr)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions dbm-ui/backend/bk_dataview/dashboards/json/kafka.json
Original file line number Diff line number Diff line change
Expand Up @@ -4231,7 +4231,7 @@
"options": {
"mode": "exclude",
"names": [
"172.27.129.217-9092"
"127.0.0.1-9092"
],
"prefix": "All except:",
"readOnly": true
Expand Down Expand Up @@ -4557,7 +4557,7 @@
"options": {
"mode": "exclude",
"names": [
"172.27.128.218"
"127.0.0.1"
],
"prefix": "All except:",
"readOnly": true
Expand Down
4 changes: 2 additions & 2 deletions dbm-ui/backend/bk_dataview/dashboards/json/tendbha.json
Original file line number Diff line number Diff line change
Expand Up @@ -8358,8 +8358,8 @@
{
"current": {
"selected": false,
"text": "9.143.80.224",
"value": "9.143.80.224"
"text": "127.0.0.1",
"value": "127.0.0.1"
},
"datasource": {
"type": "bkmonitor-timeseries-datasource",
Expand Down
30 changes: 30 additions & 0 deletions dbm-ui/backend/db_services/taskflow/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Any, Dict, List, Optional

from bamboo_engine.api import EngineAPIResult
from bamboo_engine.eri import NodeType
from django.utils import timezone
from django.utils.translation import gettext as _

Expand Down Expand Up @@ -78,6 +79,15 @@ def retry_node(self, node_id: str):
"""重试节点"""
return task.retry_node(root_id=self.root_id, node_id=node_id, retry_times=1)

def batch_retry_nodes(self):
"""批量重试节点"""
node_ids = self.get_failed_node_ids()
for node_id in node_ids:
try:
self.retry_node(node_id)
except Exception as err:
logger.error(f"{node_id} retry failed, {err}")

def skip_node(self, node_id: str):
"""跳过节点"""
result = BambooEngine(root_id=self.root_id).skip_node(node_id=node_id)
Expand Down Expand Up @@ -107,6 +117,26 @@ def callback_node(self, node_id: str, desc: Optional[Any]):

return result

def get_failed_node_ids(self) -> List[str]:
"""
获取失败节点ID列表
"""
node_ids = []
tree_states = BambooEngine(root_id=self.root_id).get_pipeline_tree_states()
activities = tree_states.get("activities", {})

def recurse_activities(current_activities):
for act_id, activity in current_activities.items():
# 如果有子流程,递归检查子流程内的活动
if "pipeline" in activity:
pipeline_activities = activity["pipeline"].get("activities", {})
recurse_activities(pipeline_activities)
if activity.get("status") == StateType.FAILED and activity.get("type") == NodeType.ServiceActivity:
node_ids.append(act_id)

recurse_activities(activities)
return node_ids

def get_node_histories(self, node_id: str) -> List[Dict[str, Any]]:
"""获取节点历史版本信息"""
histories = [
Expand Down
9 changes: 9 additions & 0 deletions dbm-ui/backend/db_services/taskflow/views/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ def retry_node(self, requests, *args, **kwargs):
validated_data = self.params_validate(self.get_serializer_class())
return Response(TaskFlowHandler(root_id=root_id).retry_node(node_id=validated_data["node_id"]).result)

@common_swagger_auto_schema(
operation_summary=_("批量重试"),
tags=[SWAGGER_TAG],
)
@action(methods=["POST"], detail=True)
def batch_retry_nodes(self, requests, *args, **kwargs):
root_id = kwargs["root_id"]
return Response(TaskFlowHandler(root_id=root_id).batch_retry_nodes())

@common_swagger_auto_schema(
operation_summary=_("跳过节点"),
tags=[SWAGGER_TAG],
Expand Down
12 changes: 6 additions & 6 deletions dbm-ui/backend/flow/views/migrate_views/pulsar_fake_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,41 +53,41 @@ class FakeInstallPulsarSceneApiView(MigrateFlowView):
"nodes": {
"zookeeper": [
{
"ip": "1.1.1.1",
"ip": "127.0.0.1",
"bk_cloud_id": 0,
"bk_host_id": 1,
"bk_biz_id": 111
},
{
"ip": "2.2.2.2",
"ip": "127.0.0.2",
"bk_cloud_id": 0,
"bk_host_id": 2,
"bk_biz_id": 111
},
{
"ip": "3.3.3.3",
"ip": "127.0.0.3",
"bk_cloud_id": 0,
"bk_host_id": 3,
"bk_biz_id": 111
}
],
"broker": [
{
"ip": "4.4.4.4",
"ip": "127.0.0.4",
"bk_cloud_id": 0,
"bk_host_id": 4,
"bk_biz_id": 111
}
],
"bookkeeper": [
{
"ip": "5.5.5.5",
"ip": "127.0.0.5",
"bk_cloud_id": 0,
"bk_host_id": 5,
"bk_biz_id": 111
},
{
"ip": "6.6.6.6",
"ip": "127.0.0.6",
"bk_cloud_id": 0,
"bk_host_id": 6,
"bk_biz_id": 111
Expand Down
1 change: 1 addition & 0 deletions dbm-ui/bin/environ.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ export APP_TOKEN="xxxxxx"
export DJANGO_SETTINGS_MODULE=config.prod
export BK_LOG_DIR=/tmp/bk-dbm
export BK_IAM_SKIP=true
export DBA_APP_BK_BIZ_ID=0
export DB_NAME="bk_dbm"
export REPORT_DB_NAME="bk_dbm_report"
4 changes: 2 additions & 2 deletions helm-charts/bk-dbm/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,5 @@ dependencies:
description: A Helm chart for bk-dbm
name: bk-dbm
type: application
version: 1.3.0-alpha.35
appVersion: 1.3.0-alpha.35
version: 1.3.0-alpha.36
appVersion: 1.3.0-alpha.36
2 changes: 1 addition & 1 deletion helm-charts/bk-dbm/charts/dbconfig/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 0.0.1-alpha.72
appVersion: 0.0.1-alpha.73
description: A Helm chart for dbconfig
name: dbconfig
type: application
Expand Down
2 changes: 1 addition & 1 deletion helm-charts/bk-dbm/charts/dbm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 1.3.0-alpha.303
appVersion: 1.3.0-alpha.315
description: A Helm chart for dbm
name: dbm
type: application
Expand Down
Loading