diff --git a/dbm-ui/backend/core/encrypt/migrations/0002_alter_asymmetriccipherkey_name.py b/dbm-ui/backend/core/encrypt/migrations/0002_alter_asymmetriccipherkey_name.py new file mode 100644 index 0000000000..0dc93a2909 --- /dev/null +++ b/dbm-ui/backend/core/encrypt/migrations/0002_alter_asymmetriccipherkey_name.py @@ -0,0 +1,22 @@ +# Generated by Django 3.2.19 on 2023-11-28 09:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("encrypt", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="asymmetriccipherkey", + name="name", + field=models.CharField( + choices=[("password", "平台密码的非对称秘钥"), ("proxypass", "透传接口的非对称秘钥"), ("cloud", "云区域服务的非对称秘钥")], + max_length=128, + verbose_name="密钥名称", + ), + ), + ] diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/check_redis_instance.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/check_redis_instance.py new file mode 100644 index 0000000000..cf176a53bf --- /dev/null +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/check_redis_instance.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" + +import logging +from collections import defaultdict + +from django.core.exceptions import ObjectDoesNotExist +from django.db.models import Q +from django.utils.translation import ugettext_lazy as _ + +from backend.db_meta.enums import ClusterType, InstanceRole, InstanceStatus +from backend.db_meta.models import Cluster +from backend.db_report.enums import MetaCheckSubType +from backend.db_report.models import MetaCheckReport + +logger = logging.getLogger("root") + + +def check_redis_instance(): + _check_redis_instance() + + +def _check_redis_instance(): + """ + 孤立实例检查 (孤立的proxy小于2个proxy,孤立的master,孤立的slave) + ALONE_PROXY + ALONE_MASTER + ALONE_SLAVE + + 实例状态异常检查, (不属于RUNNING状态) + STATUS_ABNORMAL + + """ + + # 构建查询条件:tendisplus,ssd,cache 三种类型一起检查,巡检0点发起 + query = ( + Q(cluster_type=ClusterType.TendisPredixyTendisplusCluster) + | Q(cluster_type=ClusterType.TwemproxyTendisSSDInstance) + | Q(cluster_type=ClusterType.TendisTwemproxyRedisInstance) + ) + # 遍历集群 + for c in Cluster.objects.filter(query): + logger.info("+===+++++=== start check {} db meta +++++===++++ ".format(c.immute_domain)) + logger.info("+===+++++=== cluster type is: {} +++++===++++ ".format(c.cluster_type)) + # proxy节点数不能小于2 + if c.proxyinstance_set.count() < 2: + msg = _("集群 {} proxy numbers 小于2, only {}").format(c.immute_domain, c.proxyinstance_set.count()) + MetaCheckReport.objects.create( + bk_biz_id=c.bk_biz_id, + bk_cloud_id=c.bk_cloud_id, + ip="none", + cluster=c.immute_domain, + cluster_type=c.cluster_type, + status=False, + msg=msg, + subtype=MetaCheckSubType.AloneInstance.value, + ) + + # 检查master对应的slave是否缺失 + master_slave_map, slave_master_map = defaultdict(), defaultdict() + for master_obj in c.storageinstance_set.filter(instance_role=InstanceRole.REDIS_MASTER.value): + try: + slave_obj = master_obj.as_ejector.get().receiver + except ObjectDoesNotExist: + logger.error("Error occurred while getting slave_obj") + msg = _("集群{}的master:{} 获取slave失败").format(c.immute_domain, master_obj) + create_meta_alone_report(c, master_obj, msg) + raise NotImplementedError(_("集群{}的master{}get slave_obj failed".format(c.immute_domain, master_obj))) + + # 集群不支持一个主多个从架构 + ifslave = master_slave_map.get(master_obj.machine.ip) + if ifslave and ifslave != slave_obj.machine.ip: + msg = _("unsupport mutil slave with cluster {} 4:{}".format(c.immute_domain, master_obj.machine.ip)) + create_meta_alone_report(c, master_obj, msg) + raise Exception( + "unsupport mutil slave with cluster {} 4:{}".format(c.immute_domain, master_obj.machine.ip) + ) + else: + master_slave_map[master_obj.machine.ip] = slave_obj.machine.ip + # 没获取到对应端口 + if master_obj.port != slave_obj.port: + msg = _("集群{}的master实例:{} 没有slave").format(c.immute_domain, master_obj) + create_meta_alone_report(c, master_obj, msg) + + # 检查slave对应的master是否缺失 + for slave_obj in c.storageinstance_set.filter(instance_role=InstanceRole.REDIS_SLAVE.value): + try: + master_obj = slave_obj.as_receiver.get().ejector + except ObjectDoesNotExist: + logger.error("Error occurred while getting master_obj") + msg = _("集群{}的slave:{} 获取master失败").format(c.immute_domain, slave_obj) + create_meta_alone_report(c, slave_obj, msg) + raise NotImplementedError(_("集群{}的slave{} get master_obj failed".format(c.immute_domain, slave_obj))) + + # 不支持一从多主 + ifmaster = slave_master_map.get(slave_obj.machine.ip) + if ifmaster and ifmaster != master_obj.machine.ip: + msg = _("unsupport mutil master with cluster {} 4:{}".format(c.immute_domain, slave_obj.machine.ip)) + create_meta_alone_report(c, slave_obj, msg) + raise Exception( + "unsupport mutil master for cluster {}:{}".format(c.immute_domain, slave_obj.machine.ip) + ) + else: + slave_master_map[slave_obj.machine.ip] = master_obj.machine.ip + # 没获取到对应端口 + if slave_obj.port != master_obj.port: + msg = _("集群{}的slave实例:{} 没有master").format(c.immute_domain, slave_obj) + create_meta_alone_report(c, slave_obj, msg) + # 实例状态异常 + for instance_obj in c.storageinstance_set.filter(): + create_meta_statue_report(c, instance_obj) + # proxy状态异常 + for instance_obj in c.proxyinstance_set.filter(): + create_meta_statue_report(c, instance_obj) + + +def create_meta_statue_report(c, instance_obj): + """ + 实例状态不为running的写入表中 + """ + if instance_obj.status != InstanceStatus.RUNNING: + msg = _("集群{}的实例:{}实例状态异常:{}").format(c.immute_domain, instance_obj.ip_port, instance_obj.status) + MetaCheckReport.objects.create( + bk_biz_id=c.bk_biz_id, + bk_cloud_id=c.bk_cloud_id, + ip=instance_obj.machine.ip, + port=instance_obj.port, + cluster=c.immute_domain, + cluster_type=c.cluster_type, + status=False, + msg=msg, + subtype=MetaCheckSubType.StatusAbnormal.value, + ) + + +def create_meta_alone_report(c, instance_obj, msg): + """ + 孤立实例写入表中 + """ + MetaCheckReport.objects.create( + bk_biz_id=c.bk_biz_id, + bk_cloud_id=c.bk_cloud_id, + ip=instance_obj.machine.ip, + port=instance_obj.port, + cluster=c.immute_domain, + cluster_type=c.cluster_type, + status=False, + msg=msg, + subtype=MetaCheckSubType.AloneInstance.value, + ) diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/task.py b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/task.py index 5e850ab4b0..9833c44f4b 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/task.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/db_meta/db_meta_check/task.py @@ -15,6 +15,7 @@ from backend.db_periodic_task.local_tasks.register import register_periodic_task from .check_instance_belong import check_instance_belong +from .check_redis_instance import check_redis_instance from .check_replicate_role import check_replicate_role logger = logging.getLogger("celery") @@ -26,5 +27,6 @@ def db_meta_check_task(): """ 巡检校验元数据 """ + check_redis_instance() check_instance_belong() check_replicate_role() diff --git a/dbm-ui/backend/db_periodic_task/local_tasks/redis_backup/check_binlog_backup.py b/dbm-ui/backend/db_periodic_task/local_tasks/redis_backup/check_binlog_backup.py index 09a5cd389f..ea81e71b8d 100644 --- a/dbm-ui/backend/db_periodic_task/local_tasks/redis_backup/check_binlog_backup.py +++ b/dbm-ui/backend/db_periodic_task/local_tasks/redis_backup/check_binlog_backup.py @@ -101,7 +101,7 @@ def _check_tendis_binlog_backup(): # 如果节点维度没有数据,就不用在进行下面的了 # 这里如果提升为集群维度的话,一般会有40*10*24*3=28800个文件,所以按节点维度来查 if not bklogs: - msg = _("无法查找到在时间范围内{}-{},集群{}:{}的全备份日志").format(start_time, end_time, c.immute_domain, instance) + msg = _("无法查找到在时间范围内{}-{},集群{}:{}的binlog备份日志").format(start_time, end_time, c.immute_domain, instance) logger.error(msg) RedisBackupCheckReport.objects.create( creator=c.creator, diff --git a/dbm-ui/backend/db_report/enums/meta_check_sub_type.py b/dbm-ui/backend/db_report/enums/meta_check_sub_type.py index a2eb95cbbc..c458742662 100644 --- a/dbm-ui/backend/db_report/enums/meta_check_sub_type.py +++ b/dbm-ui/backend/db_report/enums/meta_check_sub_type.py @@ -17,3 +17,5 @@ class MetaCheckSubType(str, StructuredEnum): InstanceBelong = EnumField("instance_belong", _("实例集群归属")) ReplicateRole = EnumField("replicate_role", _("数据同步实例角色")) ClusterTopo = EnumField("cluster_topo", _("集群结构")) + AloneInstance = EnumField("alone_instance", _("孤立的实例")) + StatusAbnormal = EnumField("status_abnormal", _("不属于RUNNING状态")) diff --git a/dbm-ui/backend/db_report/mock_data.py b/dbm-ui/backend/db_report/mock_data.py index e5594efe65..d4907bf571 100644 --- a/dbm-ui/backend/db_report/mock_data.py +++ b/dbm-ui/backend/db_report/mock_data.py @@ -126,3 +126,21 @@ {"name": "create_at", "display_name": "心跳超时时间", "format": "text"}, ], } + +# 元数据检查那里还需要在增加redis特有的检查 +REDIS_META_CHECK_DATA = { + "count": 1, + "next": None, + "previous": None, + "results": [ + {"bk_biz_id": 3, "cluster": "xx.xx.xx.xx", "cluster_type": "TwemproxyRedisInstance", "status": True, "msg": ""} + ], + "name": "redis 元数据检查", + "title": [ + {"name": "bk_biz_id", "display_name": "业务", "format": "text"}, + {"name": "cluster", "display_name": "集群名", "format": "text"}, + {"name": "cluster_type", "display_name": "集群类型", "format": "text"}, + {"name": "status", "display_name": "元数据状态", "format": "status"}, + {"name": "msg", "display_name": "详情", "format": "text"}, + ], +} diff --git a/dbm-ui/backend/db_report/urls.py b/dbm-ui/backend/db_report/urls.py index 3338d4c1c6..18d0081fa1 100644 --- a/dbm-ui/backend/db_report/urls.py +++ b/dbm-ui/backend/db_report/urls.py @@ -21,4 +21,6 @@ url("^redis_check/full_backup$", views.RedisFullBackupCheckReportViewSet.as_view({"get": "list"})), url("^redis_check/binlog_backup$", views.RedisBinlogBackupCheckReportViewSet.as_view({"get": "list"})), url("^dbmon/heartbeat$", views.DbmonHeatbeartCheckReportBaseViewSet.as_view({"get": "list"})), + url("^redis_meta_check/status_abnormal$", views.RedisStatusAbnormalCheckReportViewSet.as_view({"get": "list"})), + url("^redis_meta_check/alone_instance$", views.RedisAloneInstanceCheckReportViewSet.as_view({"get": "list"})), ] diff --git a/dbm-ui/backend/db_report/views/__init__.py b/dbm-ui/backend/db_report/views/__init__.py index e5e684e089..2bf74b43f2 100644 --- a/dbm-ui/backend/db_report/views/__init__.py +++ b/dbm-ui/backend/db_report/views/__init__.py @@ -13,4 +13,5 @@ from .dbmon_heartbeat_view import DbmonHeatbeartCheckReportBaseViewSet from .meta_check_view import MetaCheckReportInstanceBelongViewSet from .mysqlbackup_check_view import MysqlBinlogBackupCheckReportViewSet, MysqlFullBackupCheckReportViewSet +from .redis_dbmeta_check_view import RedisAloneInstanceCheckReportViewSet, RedisStatusAbnormalCheckReportViewSet from .redisbackup_check_view import RedisBinlogBackupCheckReportViewSet, RedisFullBackupCheckReportViewSet diff --git a/dbm-ui/backend/db_report/views/redis_dbmeta_check_view.py b/dbm-ui/backend/db_report/views/redis_dbmeta_check_view.py new file mode 100644 index 0000000000..0d30b0fd2a --- /dev/null +++ b/dbm-ui/backend/db_report/views/redis_dbmeta_check_view.py @@ -0,0 +1,106 @@ +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" + + +import logging + +from django.utils.translation import ugettext_lazy as _ +from rest_framework import serializers, status + +from backend.bk_web.swagger import common_swagger_auto_schema +from backend.db_report import mock_data +from backend.db_report.enums import SWAGGER_TAG, MetaCheckSubType, ReportFieldFormat +from backend.db_report.models import MetaCheckReport +from backend.db_report.report_baseview import ReportBaseViewSet + +logger = logging.getLogger("root") + + +class RedisDbmetaCheckReportSerializer(serializers.ModelSerializer): + class Meta: + model = MetaCheckReport + fields = ("bk_biz_id", "cluster", "cluster_type", "status", "msg") + swagger_schema_fields = {"example": mock_data.REDIS_META_CHECK_DATA} + + +class RedisDbmetaCheckReportBaseViewSet(ReportBaseViewSet): + queryset = MetaCheckReport.objects.all() + serializer_class = RedisDbmetaCheckReportSerializer + filter_fields = { # 大部分时候不需要覆盖默认的filter + "bk_biz_id": ["exact"], + "cluster_type": ["exact", "in"], + "create_at": ["gte", "lte"], + "status": ["exact", "in"], + } + report_name = _("redis 元数据检查") + report_title = [ + { + "name": "bk_biz_id", + "display_name": _("业务"), + "format": ReportFieldFormat.TEXT.value, + }, + { + "name": "cluster", + "display_name": _("集群域名"), + "format": ReportFieldFormat.TEXT.value, + }, + { + "name": "cluster_type", + "display_name": _("集群类型"), + "format": ReportFieldFormat.TEXT.value, + }, + { + "name": "status", + "display_name": _("元数据状态"), + "format": ReportFieldFormat.STATUS.value, + }, + { + "name": "msg", + "display_name": _("详情"), + "format": ReportFieldFormat.TEXT.value, + }, + ] + + @common_swagger_auto_schema( + operation_summary=_("redis 元数据检查报告"), + responses={status.HTTP_200_OK: RedisDbmetaCheckReportSerializer()}, + tags=[SWAGGER_TAG], + ) + def list(self, request, *args, **kwargs): + logger.info("list") + return super().list(request, *args, **kwargs) + + +class RedisAloneInstanceCheckReportViewSet(RedisDbmetaCheckReportBaseViewSet): + queryset = MetaCheckReport.objects.filter(subtype=MetaCheckSubType.AloneInstance.value) + serializer_class = RedisDbmetaCheckReportSerializer + report_name = _("孤立节点检查") + + @common_swagger_auto_schema( + operation_summary=_("孤立节点检查报告"), + responses={status.HTTP_200_OK: RedisDbmetaCheckReportSerializer()}, + tags=[SWAGGER_TAG], + ) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs) + + +class RedisStatusAbnormalCheckReportViewSet(RedisDbmetaCheckReportBaseViewSet): + queryset = MetaCheckReport.objects.filter(subtype=MetaCheckSubType.StatusAbnormal.value) + serializer_class = RedisDbmetaCheckReportSerializer + report_name = _("实例状态异常检查") + + @common_swagger_auto_schema( + operation_summary=_("实例状态异常检查"), + responses={status.HTTP_200_OK: RedisDbmetaCheckReportSerializer()}, + tags=[SWAGGER_TAG], + ) + def list(self, request, *args, **kwargs): + return super().list(request, *args, **kwargs)