From 26566b24a15be278e4097320175f74036a963728 Mon Sep 17 00:00:00 2001 From: gaohongsong Date: Sun, 8 Oct 2023 16:31:55 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=86=B2=E7=AA=81=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../local_tasks/db_meta/update_host_dbmeta.py | 57 ++++++++++++++++++- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/update_host_dbmeta.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/update_host_dbmeta.py index a943b32248..c422c8037b 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/update_host_dbmeta.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/update_host_dbmeta.py @@ -14,15 +14,66 @@ from celery.schedules import crontab +from backend import env from backend.components import CCApi from backend.db_meta.models import Cluster, Machine from backend.db_meta.models.cluster_monitor import SyncFailedMachine from backend.db_periodic_task.local_tasks.register import register_periodic_task +from backend.db_services.ipchooser.query.resource import ResourceQueryHelper from backend.dbm_init.constants import CC_HOST_DBM_ATTR logger = logging.getLogger("celery") +@register_periodic_task(run_every=crontab(minute="*/5")) +def reset_host_dbmeta(bk_biz_id=env.DBA_APP_BK_BIZ_ID, bk_host_ids=None): + """ + 重置业务下空闲集群主机的dbm_meta属性 + """ + now = datetime.datetime.now() + logger.info("[reset_host_dbmeta] start reset begin: %s", now) + + if not bk_host_ids: + machines = ResourceQueryHelper.query_cc_hosts( + { + "bk_biz_id": bk_biz_id, + "bk_inst_id": CCApi.get_biz_internal_module({"bk_biz_id": bk_biz_id}, use_admin=True)["bk_set_id"], + "bk_obj_id": "set", + }, + fields=["bk_host_id"], + )["info"] + bk_host_ids = list(map(lambda x: x["bk_host_id"], machines)) + + # 批量更新接口限制最多500条 + step_size = 496 + reset_hosts, failed_hosts = [], [] + for step in range(len(bk_host_ids) // step_size + 1): + updates = [] + for bk_host_id in bk_host_ids[step * step_size : (step + 1) * step_size]: + updates.append({"properties": {CC_HOST_DBM_ATTR: json.dumps([])}, "bk_host_id": bk_host_id}) + reset_hosts.extend(updates) + + try: + logger.info("[reset_host_dbmeta] batch_update_host: %s", updates) + CCApi.batch_update_host({"update": updates}, use_admin=True) + except Exception as e: # pylint: disable=wildcard-import + failed_hosts.extend(updates) + logger.error("[reset_host_dbmeta] batch reset exception: %s (%s)", updates, e) + + # 容错处理:逐个更新,避免批量更新误伤有效ip + for fail_host in failed_hosts: + try: + CCApi.update_host({"bk_host_id": fail_host["bk_host_id"], "data": fail_host["properties"]}, use_admin=True) + except Exception as e: # pylint: disable=wildcard-import + logger.error("[reset_host_dbmeta] single reset error: %s (%s)", fail_host, e) + + logger.info( + "[reset_host_dbmeta] finish reset end: %s, update_cnt: %s", + datetime.datetime.now() - now, + len(reset_hosts), + ) + + @register_periodic_task(run_every=crontab(minute="*/2")) def update_host_dbmeta(bk_biz_id=None, cluster_id=None, cluster_ips=None, dbm_meta=None): """ @@ -57,12 +108,12 @@ def update_host_dbmeta(bk_biz_id=None, cluster_id=None, cluster_ips=None, dbm_me machines = machines.filter(ip__in=cluster_ips) # 批量更新接口限制最多500条,这里取456条 - STEP = 456 + step_size = 456 updated_hosts, failed_updates = [], [] machine_count = machines.count() - for step in range(machine_count // STEP + 1): + for step in range(machine_count // step_size + 1): updates = [] - for machine in machines[step * STEP : (step + 1) * STEP]: + for machine in machines[step * step_size : (step + 1) * step_size]: cc_dbm_meta = machine.dbm_meta if dbm_meta is None else dbm_meta updates.append( {"properties": {CC_HOST_DBM_ATTR: json.dumps(cc_dbm_meta)}, "bk_host_id": machine.bk_host_id}