From f99a4bb2be4cf90f30032d283834b91254fe7a70 Mon Sep 17 00:00:00 2001 From: xfwduke Date: Mon, 11 Sep 2023 10:13:08 +0800 Subject: [PATCH] =?UTF-8?q?feat(mysql):=20mysql-monitor=E5=A2=9E=E5=8A=A0d?= =?UTF-8?q?isable=E5=AD=90=E5=91=BD=E4=BB=A4=20close=20#1024?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mysql/db-tools/mysql-monitor/README.md | 15 +++ .../mysql-monitor/cmd/reschedule_items.go | 125 ++++++++++++++++++ .../mysql-monitor/cmd/subcmd_disable_all.go | 86 ++++++++++++ .../mysql-monitor/cmd/subcmd_reschedule.go | 115 +--------------- .../db-tools/mysql-monitor/pkg/config/init.go | 26 ++++ 5 files changed, 257 insertions(+), 110 deletions(-) create mode 100644 dbm-services/mysql/db-tools/mysql-monitor/cmd/reschedule_items.go create mode 100644 dbm-services/mysql/db-tools/mysql-monitor/cmd/subcmd_disable_all.go diff --git a/dbm-services/mysql/db-tools/mysql-monitor/README.md b/dbm-services/mysql/db-tools/mysql-monitor/README.md index d7919fdc3c..f858fe0a16 100644 --- a/dbm-services/mysql/db-tools/mysql-monitor/README.md +++ b/dbm-services/mysql/db-tools/mysql-monitor/README.md @@ -15,6 +15,21 @@ ## _clean_ * 执行 `mysql-monitor -c runtime.yaml clean` 会删除所有相关的 `mysql-crond entry` +* 会触发 `监控心跳丢失` 的告警 +* 一般只用于下架场景 +* 如果临时停止监控需求, 用下面的 `disable-all` + +## _disable-all_ +`mysql-monitor disable-all -c monitor-config_20000.yaml --staff somebody --with-db-up` +* 保留 `监控心跳` +* 停掉包括`db-up` 在内的所有监控项 + +如果不使用 `--with-db-up`, 则会保留 `db-up` 监控项 + +不修改任何配置文件, _disable_ 不会持久化, 可以随时使用上面提到的 _reschedule_ 恢复回来 + + + ## 硬编码项 目前有两个硬编码项 1. 执行心跳 diff --git a/dbm-services/mysql/db-tools/mysql-monitor/cmd/reschedule_items.go b/dbm-services/mysql/db-tools/mysql-monitor/cmd/reschedule_items.go new file mode 100644 index 0000000000..acde4c0979 --- /dev/null +++ b/dbm-services/mysql/db-tools/mysql-monitor/cmd/reschedule_items.go @@ -0,0 +1,125 @@ +package cmd + +import ( + "fmt" + "strings" + + ma "dbm-services/mysql/db-tools/mysql-crond/api" + "dbm-services/mysql/db-tools/mysql-monitor/pkg/config" + + "golang.org/x/exp/slog" +) + +func reschedule(configFileDir, configFileName, staff string) error { + manager := ma.NewManager(config.MonitorConfig.ApiUrl) + entries, err := manager.Entries() + if err != nil { + slog.Error("reschedule list entries", err) + return err + } + + for _, entry := range entries { + if strings.HasPrefix( + entry.Job.Name, + fmt.Sprintf("mysql-monitor-%d", config.MonitorConfig.Port), + ) { + eid, err := manager.Delete(entry.Job.Name, true) + if err != nil { + slog.Error( + "reschedule delete entry", err, + slog.String("name", entry.Job.Name), + ) + return err + } + slog.Info( + "reschedule delete entry", + slog.String("name", entry.Job.Name), + slog.Int("ID", eid), + ) + } + } + + var hardCodeItems []*config.MonitorItem + itemGroups := make(map[string][]*config.MonitorItem) + for _, ele := range config.ItemsConfig { + // 硬编码监控项先排除掉 + if ele.Name == "db-up" || ele.Name == config.HeartBeatName { + if ele.IsEnable() { + hardCodeItems = append(hardCodeItems, ele) + } + continue + } + + if ele.IsEnable() && ele.IsMatchMachineType() && ele.IsMatchRole() { + var key string + + if ele.Schedule == nil { + key = config.MonitorConfig.DefaultSchedule + } else { + key = *ele.Schedule + } + + if _, ok := itemGroups[key]; !ok { + itemGroups[key] = []*config.MonitorItem{} + } + itemGroups[key] = append(itemGroups[key], ele) + } + } + + for k, v := range itemGroups { + var itemNames []string + for _, j := range v { + itemNames = append(itemNames, j.Name) + } + args := []string{ + "run", + "--items", strings.Join(itemNames, ","), + "-c", configFileName, // use WorkDir + } + eid, err := manager.CreateOrReplace( + ma.JobDefine{ + Name: fmt.Sprintf("mysql-monitor-%d-%s", config.MonitorConfig.Port, k), + Command: executable, + Args: args, + Schedule: k, + Creator: staff, //viper.GetString("staff"), + Enable: true, + WorkDir: configFileDir, + }, true, + ) + if err != nil { + slog.Error("reschedule add entry", err) + return err + } + slog.Info("reschedule add entry", slog.Int("entry id", eid)) + } + + // 注册 hardcode + for _, j := range hardCodeItems { + args := []string{ + "hardcode-run", + "--items", j.Name, //strings.Join(itemNames, ","), + "-c", configFileName, + } + + eid, err := manager.CreateOrReplace( + ma.JobDefine{ + Name: fmt.Sprintf( + "mysql-monitor-%d-hardcode-%s", config.MonitorConfig.Port, j.Name), + Command: executable, + Args: args, + Schedule: config.HardCodeSchedule, + Creator: staff, //viper.GetString("staff"), + Enable: true, + WorkDir: configFileDir, + }, true, + ) + if err != nil { + slog.Error("reschedule add hardcode entry", err) + return err + } + slog.Info("reschedule add hardcode entry", slog.Int("entry id", eid)) + } + + return nil +} diff --git a/dbm-services/mysql/db-tools/mysql-monitor/cmd/subcmd_disable_all.go b/dbm-services/mysql/db-tools/mysql-monitor/cmd/subcmd_disable_all.go new file mode 100644 index 0000000000..25c2c9d25c --- /dev/null +++ b/dbm-services/mysql/db-tools/mysql-monitor/cmd/subcmd_disable_all.go @@ -0,0 +1,86 @@ +package cmd + +import ( + "os" + "path/filepath" + + "github.com/spf13/cobra" + "github.com/spf13/viper" + "golang.org/x/exp/slog" + + "dbm-services/mysql/db-tools/mysql-monitor/pkg/config" +) + +var subCmdDisableAll = &cobra.Command{ + Use: "disable-all", + Short: "disable-all items", + Long: "disable-all items", + RunE: func(cmd *cobra.Command, args []string) error { + configPath := viper.GetString("disable-config") + if !filepath.IsAbs(configPath) { + cwd, err := os.Getwd() + if err != nil { + slog.Error("disable-all get config abs path", err) + return err + } + configPath = filepath.Join(cwd, configPath) + } + configFileDir, configFileName := filepath.Split(configPath) + + err := config.InitConfig(configPath) + if err != nil { + return err + } + initLogger(config.MonitorConfig.Log) + + emptyItemsConfig, err := os.CreateTemp("/tmp", "empty-items.yaml") + if err != nil { + slog.Error("disable-all create empty items config", slog.String("error", err.Error())) + return err + } + defer func() { + _ = emptyItemsConfig.Close() + _ = os.Remove(emptyItemsConfig.Name()) + }() + slog.Info("disable-all create empty items config success") + + config.MonitorConfig.ItemsConfigFile = emptyItemsConfig.Name() + + err = config.LoadMonitorItemsConfig() + if err != nil { + slog.Error("disable-all load items", err) + return err + } + + disableDbUp := viper.GetBool("with-db-up") + if !disableDbUp { + config.InjectMonitorDbUpItem() + } + config.InjectMonitorHeartBeatItem() + + slog.Info("disable-all", + slog.String("staff", viper.GetString("staff"))) + err = reschedule(configFileDir, configFileName, viper.GetString("disable-staff")) + if err != nil { + slog.Error("disable-all sub-cmd", slog.String("error", err.Error())) + return err + } + + return nil + }, +} + +func init() { + subCmdDisableAll.PersistentFlags().StringP("config", "c", "", "config file") + _ = subCmdDisableAll.MarkPersistentFlagRequired("config") + _ = viper.BindPFlag("disable-config", subCmdDisableAll.PersistentFlags().Lookup("config")) + + subCmdDisableAll.PersistentFlags().StringP("staff", "", "", "staff name") + _ = subCmdDisableAll.MarkPersistentFlagRequired("staff") + _ = viper.BindPFlag("disable-staff", subCmdDisableAll.PersistentFlags().Lookup("staff")) + + subCmdDisableAll.PersistentFlags().BoolP("with-db-up", "", false, "also disable db-up") + _ = viper.BindPFlag("with-db-up", subCmdDisableAll.PersistentFlags().Lookup("with-db-up")) + + rootCmd.AddCommand(subCmdDisableAll) +} diff --git a/dbm-services/mysql/db-tools/mysql-monitor/cmd/subcmd_reschedule.go b/dbm-services/mysql/db-tools/mysql-monitor/cmd/subcmd_reschedule.go index 7e403da7c3..2bf6494d36 100644 --- a/dbm-services/mysql/db-tools/mysql-monitor/cmd/subcmd_reschedule.go +++ b/dbm-services/mysql/db-tools/mysql-monitor/cmd/subcmd_reschedule.go @@ -1,12 +1,9 @@ package cmd import ( - "fmt" "os" "path/filepath" - "strings" - ma "dbm-services/mysql/db-tools/mysql-crond/api" "dbm-services/mysql/db-tools/mysql-monitor/pkg/config" "github.com/spf13/cobra" @@ -46,7 +43,8 @@ var subCmdReschedule = &cobra.Command{ return err } - config.InjectHardCodeItem() + config.InjectMonitorDbUpItem() + config.InjectMonitorHeartBeatItem() err = config.WriteMonitorItemsBack() if err != nil { @@ -54,115 +52,12 @@ var subCmdReschedule = &cobra.Command{ return err } - manager := ma.NewManager(config.MonitorConfig.ApiUrl) - entries, err := manager.Entries() + err = reschedule(configFileDir, configFileName, viper.GetString("reschedule-staff")) if err != nil { - slog.Error("reschedule list entries", err) + slog.Error("reschedule sub-cmd", slog.String("error", err.Error())) return err } - for _, entry := range entries { - if strings.HasPrefix( - entry.Job.Name, - fmt.Sprintf("mysql-monitor-%d", config.MonitorConfig.Port), - ) { - eid, err := manager.Delete(entry.Job.Name, true) - if err != nil { - slog.Error( - "reschedule delete entry", err, - slog.String("name", entry.Job.Name), - ) - return err - } - slog.Info( - "reschedule delete entry", - slog.String("name", entry.Job.Name), - slog.Int("ID", eid), - ) - } - } - - var hardCodeItems []*config.MonitorItem - itemGroups := make(map[string][]*config.MonitorItem) - for _, ele := range config.ItemsConfig { - // 硬编码监控项先排除掉 - if ele.Name == "db-up" || ele.Name == config.HeartBeatName { - if ele.IsEnable() { - hardCodeItems = append(hardCodeItems, ele) - } - continue - } - - if ele.IsEnable() && ele.IsMatchMachineType() && ele.IsMatchRole() { - var key string - - if ele.Schedule == nil { - key = config.MonitorConfig.DefaultSchedule - } else { - key = *ele.Schedule - } - - if _, ok := itemGroups[key]; !ok { - itemGroups[key] = []*config.MonitorItem{} - } - itemGroups[key] = append(itemGroups[key], ele) - } - } - - for k, v := range itemGroups { - var itemNames []string - for _, j := range v { - itemNames = append(itemNames, j.Name) - } - args := []string{ - "run", - "--items", strings.Join(itemNames, ","), - "-c", configFileName, // use WorkDir - } - eid, err := manager.CreateOrReplace( - ma.JobDefine{ - Name: fmt.Sprintf("mysql-monitor-%d-%s", config.MonitorConfig.Port, k), - Command: executable, - Args: args, - Schedule: k, - Creator: viper.GetString("staff"), - Enable: true, - WorkDir: configFileDir, - }, true, - ) - if err != nil { - slog.Error("reschedule add entry", err) - return err - } - slog.Info("reschedule add entry", slog.Int("entry id", eid)) - } - - // 注册 hardcode - var itemNames []string - for _, j := range hardCodeItems { - itemNames = append(itemNames, j.Name) - } - args = []string{ - "hardcode-run", - "--items", strings.Join(itemNames, ","), - "-c", configPath, - } - eid, err := manager.CreateOrReplace( - ma.JobDefine{ - Name: fmt.Sprintf("mysql-monitor-%d-hardcode", config.MonitorConfig.Port), - Command: executable, - Args: args, - Schedule: config.HardCodeSchedule, - Creator: viper.GetString("staff"), - Enable: true, - }, true, - ) - if err != nil { - slog.Error("reschedule add hardcode entry", err) - return err - } - slog.Info("reschedule add hardcode entry", slog.Int("entry id", eid)) - return nil }, } @@ -174,7 +69,7 @@ func init() { subCmdReschedule.PersistentFlags().StringP("staff", "", "", "staff name") _ = subCmdReschedule.MarkPersistentFlagRequired("staff") - _ = viper.BindPFlag("staff", subCmdReschedule.PersistentFlags().Lookup("staff")) + _ = viper.BindPFlag("reschedule-staff", subCmdReschedule.PersistentFlags().Lookup("staff")) rootCmd.AddCommand(subCmdReschedule) } diff --git a/dbm-services/mysql/db-tools/mysql-monitor/pkg/config/init.go b/dbm-services/mysql/db-tools/mysql-monitor/pkg/config/init.go index 99af42a4ce..08cd4f3c5a 100644 --- a/dbm-services/mysql/db-tools/mysql-monitor/pkg/config/init.go +++ b/dbm-services/mysql/db-tools/mysql-monitor/pkg/config/init.go @@ -80,6 +80,32 @@ func LoadMonitorItemsConfig() error { return nil } +func InjectMonitorHeartBeatItem() { + enable := true + heartBeatItem := &MonitorItem{ + Name: HeartBeatName, + Enable: &enable, + Schedule: &HardCodeSchedule, //&MonitorConfig.DefaultSchedule, + MachineType: []string{MonitorConfig.MachineType}, + Role: nil, + } + ItemsConfig = injectItem(heartBeatItem, ItemsConfig) + slog.Debug("inject hardcode", slog.Any("items", ItemsConfig)) +} + +func InjectMonitorDbUpItem() { + enable := true + dbUpItem := &MonitorItem{ + Name: "db-up", + Enable: &enable, + Schedule: &HardCodeSchedule, //&MonitorConfig.DefaultSchedule, + MachineType: []string{MonitorConfig.MachineType}, + Role: nil, + } + ItemsConfig = injectItem(dbUpItem, ItemsConfig) + slog.Debug("inject hardcode", slog.Any("items", ItemsConfig)) +} + // InjectHardCodeItem 注入硬编码的心跳和db-up监控 func InjectHardCodeItem() { enable := true