diff --git a/dbm-services/redis/db-tools/dbactuator/go.mod b/dbm-services/redis/db-tools/dbactuator/go.mod index a12516835b..00722ac4a1 100644 --- a/dbm-services/redis/db-tools/dbactuator/go.mod +++ b/dbm-services/redis/db-tools/dbactuator/go.mod @@ -22,12 +22,14 @@ require ( require ( github.com/cespare/xxhash/v2 v2.1.2 // indirect + github.com/clbanning/mxj v1.8.4 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/glebarez/go-sqlite v1.21.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/google/go-querystring v1.1.0 // indirect github.com/google/uuid v1.3.0 // indirect github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -38,11 +40,14 @@ require ( github.com/leodido/go-urn v1.2.3 // indirect github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de // indirect github.com/mattn/go-isatty v0.0.17 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/mozillazg/go-httpheader v0.4.0 // indirect github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rogpeppe/go-internal v1.8.0 // indirect github.com/smartystreets/assertions v1.2.0 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/tencentyun/cos-go-sdk-v5 v0.7.54 // indirect github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect github.com/yusufpapurcu/wmi v1.2.3 // indirect diff --git a/dbm-services/redis/db-tools/dbactuator/go.sum b/dbm-services/redis/db-tools/dbactuator/go.sum index b075ce6a7c..61ba29d938 100644 --- a/dbm-services/redis/db-tools/dbactuator/go.sum +++ b/dbm-services/redis/db-tools/dbactuator/go.sum @@ -1,10 +1,14 @@ +github.com/QcloudApi/qcloud_sign_golang v0.0.0-20141224014652-e4130a326409/go.mod h1:1pk82RBxDY/JZnPQrtqHlUFfCctgdorsd9M06fMynOM= github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/clbanning/mxj v1.8.4 h1:HuhwZtbyvyOw+3Z1AowPkU87JkJUSv751ELWaiTpj8I= +github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= @@ -31,15 +35,21 @@ github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo= github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= +github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/grafov/m3u8 v0.12.0/go.mod h1:nqzOkfBiZJENr52zTVd/Dcl03yzphIMbJqkXGu+u080= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= @@ -62,6 +72,12 @@ github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de h1:V53FWzU6KAZVi1 github.com/lufia/plan9stats v0.0.0-20230110061619-bbe2e5e100de/go.mod h1:JKx41uQRwqlTZabZc+kILPrO/3jlKnQ2Z8b7YiVw5cE= github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mozillazg/go-httpheader v0.2.1/go.mod h1:jJ8xECTlalr6ValeXYdOF8fFUISeBAdw6E61aqQma60= +github.com/mozillazg/go-httpheader v0.4.0 h1:aBn6aRXtFzyDLZ4VIRLsZbbJloagQfMnCiYgOq6hK4w= +github.com/mozillazg/go-httpheader v0.4.0/go.mod h1:PuT8h0pw6efvp8ZeUec1Rs7dwjK08bt6gKSReGMqtdA= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= @@ -100,12 +116,17 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.563/go.mod h1:7sCQWVkxcsR38nffDW057DRGk8mUjK1Ing/EFOK8s8Y= +github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/kms v1.0.563/go.mod h1:uom4Nvi9W+Qkom0exYiJ9VWJjXwyxtPYTkKkaLMlfE0= +github.com/tencentyun/cos-go-sdk-v5 v0.7.54 h1:FRamEhNBbSeggyYfWfzFejTLftgbICocSYFk4PKTSV4= +github.com/tencentyun/cos-go-sdk-v5 v0.7.54/go.mod h1:UN+VdbCl1hg+kKi5RXqZgaP+Boqfmk+D04GRc4XFk70= github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms= diff --git a/dbm-services/redis/db-tools/dbactuator/models/myredis/client.go b/dbm-services/redis/db-tools/dbactuator/models/myredis/client.go index 5b2235b587..b59654098e 100644 --- a/dbm-services/redis/db-tools/dbactuator/models/myredis/client.go +++ b/dbm-services/redis/db-tools/dbactuator/models/myredis/client.go @@ -2176,3 +2176,60 @@ func (db *RedisClient) TailRedisLogFile(tailNLine int) (data string, err error) } return string(dataBytes), nil } + +// IsReshapeRunning 判断tendisplus/tendisssd是否正在执行reshape +func (db *RedisClient) IsReshapeRunning() (ret bool, err error) { + compactInfo, err := db.Info("Compaction") + if err != nil { + return false, err + } + running := compactInfo["current-compaction-status"] + if running == "running" { + return true, nil + } + return false, nil +} + +// WaitTendisReshapeDone 等待tendisplus/tendisssd reshape完成 +func (db *RedisClient) WaitTendisReshapeDone() (err error) { + var msg string + count := 0 + for { + isReshaping, err := db.IsReshapeRunning() + if err != nil { + return err + } + if !isReshaping { + msg = fmt.Sprintf("redis:%s reshape done", db.Addr) + mylog.Logger.Info(msg) + return nil + } + count++ + if (count % 12) == 0 { + msg = fmt.Sprintf("redis:%s reshape is running", db.Addr) + mylog.Logger.Info(msg) + } + time.Sleep(5 * time.Second) + } +} + +// TendisReshapeAndWaitDone tendisplus/tendisssd reshape并等待reshape完成 +func (db *RedisClient) TendisReshapeAndWaitDone() (err error) { + if db.InstanceClient == nil { + err := fmt.Errorf("reshape redis:%s must create a standalone client", db.Addr) + mylog.Logger.Error(err.Error()) + return err + } + isReshaping, err := db.IsReshapeRunning() + if err != nil { + return err + } + if isReshaping { + // 如果正在reshape,则等待reshape完成,不重复执行reshape + return db.WaitTendisReshapeDone() + } + cmd := []interface{}{"reshape"} + // reshape 是阻塞操作,可能会超时,所以不捕获错误 + db.InstanceClient.Do(context.TODO(), cmd...).Result() + return db.WaitTendisReshapeDone() +} diff --git a/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/cluster_reset_flush_meet.go b/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/cluster_reset_flush_meet.go new file mode 100644 index 0000000000..ddd99a1435 --- /dev/null +++ b/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/cluster_reset_flush_meet.go @@ -0,0 +1,219 @@ +package atomredis + +import ( + "encoding/json" + "fmt" + "strconv" + "sync" + "time" + + "github.com/go-playground/validator/v10" + + "dbm-services/redis/db-tools/dbactuator/models/myredis" + "dbm-services/redis/db-tools/dbactuator/pkg/consts" + "dbm-services/redis/db-tools/dbactuator/pkg/jobruntime" + "dbm-services/redis/redis-dts/tclog" +) + +// ClusterResetFlushMeetItem cluster reset flush meet item +type ClusterResetFlushMeetItem struct { + ResetIP string `json:"reset_ip" validate:"required"` + ResetPort int `json:"reset_port" validate:"required"` + ResetRedisPassword string `json:"reset_redis_password" validate:"required"` + MeetIP string `json:"meet_ip" validate:"required"` + MeetPort int `json:"meet_port" validate:"required"` + DoFlushall bool `json:"do_flushall"` // 是否执行flushall + DoClusterMeet bool `json:"do_cluster_meet"` // 是否执行cluster meet +} + +// ResetRedisAddr reset redis addr +func (item *ClusterResetFlushMeetItem) ResetRedisAddr() string { + return fmt.Sprintf("%s:%d", item.ResetIP, item.ResetPort) +} + +// ClusterResetFlushMeetParams 参数 +type ClusterResetFlushMeetParams struct { + ResetFlushMeetParams []ClusterResetFlushMeetItem `json:"reset_flush_meet_params" validate:"required"` +} + +// ClusterResetFlushMeet TODO +type ClusterResetFlushMeet struct { + runtime *jobruntime.JobGenericRuntime + params ClusterResetFlushMeetParams + tasks []*clusterResetFlushMeetTask +} + +// 无实际作用,仅确保实现了 jobruntime.JobRunner 接口 +var _ jobruntime.JobRunner = (*ClusterResetFlushMeet)(nil) + +// NewClusterResetFlushMeet new +func NewClusterResetFlushMeet() jobruntime.JobRunner { + return &ClusterResetFlushMeet{} +} + +// Init 初始化,参数校验 +func (job *ClusterResetFlushMeet) Init(m *jobruntime.JobGenericRuntime) error { + job.runtime = m + + err := json.Unmarshal([]byte(job.runtime.PayloadDecoded), &job.params) + if err != nil { + job.runtime.Logger.Error(fmt.Sprintf("json.Unmarshal failed,err:%+v\n", err)) + return err + } + // 参数有效性检查 + validate := validator.New() + err = validate.Struct(job.params) + if err != nil { + if _, ok := err.(*validator.InvalidValidationError); ok { + job.runtime.Logger.Error("ClusterResetFlushMeet Init params validate failed,err:%v,params:%+v", err, job.params) + return err + } + for _, err := range err.(validator.ValidationErrors) { + job.runtime.Logger.Error("ClusterResetFlushMeet Init params validate failed,err:%v,params:%+v", err, job.params) + return err + } + } + return nil +} + +// Name 名字 +func (job *ClusterResetFlushMeet) Name() string { + return "redis_cluster_reset_flush_meet" +} + +// Run 执行 +func (job *ClusterResetFlushMeet) Run() (err error) { + job.tasks = make([]*clusterResetFlushMeetTask, 0, len(job.params.ResetFlushMeetParams)) + for _, item := range job.params.ResetFlushMeetParams { + task := &clusterResetFlushMeetTask{ + ClusterResetFlushMeetItem: item, + } + job.tasks = append(job.tasks, task) + } + err = job.allInstCconnect() + if err != nil { + return err + } + defer job.allInstDisconnect() + + for _, tmp := range job.tasks { + task := tmp + task.resetAndFlushallAndMeet() + if task.Err != nil { + return task.Err + } + } + return nil +} + +func (job *ClusterResetFlushMeet) allInstCconnect() (err error) { + wg := sync.WaitGroup{} + // 并发确认所有实例是否可连接 + for _, tmp := range job.tasks { + task := tmp + wg.Add(1) + go func(task *clusterResetFlushMeetTask) { + defer wg.Done() + task.createResetConn() + }(task) + } + wg.Wait() + for _, tmp := range job.tasks { + task := tmp + if task.Err != nil { + return task.Err + } + } + return nil +} + +// allInstDisconnect 所有实例断开连接 +func (job *ClusterResetFlushMeet) allInstDisconnect() { + for _, tmp := range job.tasks { + task := tmp + if task.resetRedisConn != nil { + task.resetRedisConn.Close() + task.resetRedisConn = nil + } + } +} + +// Retry 返回可重试次数 +func (job *ClusterResetFlushMeet) Retry() uint { + return 2 +} + +// Rollback 回滚函数,一般不用实现 +func (job *ClusterResetFlushMeet) Rollback() error { + return nil +} + +// clusterResetFlushMeetTask task,为了做并发连接,单独定义一个struct +type clusterResetFlushMeetTask struct { + ClusterResetFlushMeetItem + resetRedisConn *myredis.RedisClient + Err error +} + +// createResetConn 创建连接 +func (task *clusterResetFlushMeetTask) createResetConn() { + task.resetRedisConn, task.Err = myredis.NewRedisClientWithTimeout(task.ResetRedisAddr(), task.ResetRedisPassword, 0, + consts.TendisTypeRedisInstance, 10*time.Hour) +} + +// resetAndFlushallAndMeet cluster reset并flushall并meet +func (task *clusterResetFlushMeetTask) resetAndFlushallAndMeet() { + var role string + var clustreInfo *myredis.CmdClusterInfo + var addrToNodes map[string]*myredis.ClusterNodeData + // 先执行cluster reset + tclog.Logger.Info(fmt.Sprintf("redis %s cluster reset start", task.ResetRedisAddr())) + task.Err = task.resetRedisConn.ClusterReset() + if task.Err != nil { + return + } + for { + role, _ = task.resetRedisConn.GetRole() + clustreInfo, _ = task.resetRedisConn.ClusterInfo() + if role == consts.RedisMasterRole && clustreInfo.ClusterState != consts.ClusterStateOK { + tclog.Logger.Info(fmt.Sprintf("redis %s cluster reset success,current_role:%s cluster_state:%s", + task.ResetRedisAddr(), role, clustreInfo.ClusterState)) + break + } + tclog.Logger.Info(fmt.Sprintf("redis %s cluster reset done,but current_role:%s cluster_state:%s", + task.ResetRedisAddr(), role, clustreInfo.ClusterState)) + time.Sleep(3 * time.Second) + } + if task.DoFlushall { + // 执行flushall + tclog.Logger.Info(fmt.Sprintf("redis %s flushall start", task.ResetRedisAddr())) + cmd := []string{consts.TendisPlusFlushAllRename} // cache 和 tendisplus的 flushall 命令一样 + _, task.Err = task.resetRedisConn.DoCommand(cmd, 0) + if task.Err != nil { + return + } + } + if task.DoClusterMeet { + // 执行cluster meet + tclog.Logger.Info(fmt.Sprintf("redis %s 'cluster meet %s %d' start", + task.ResetRedisAddr(), task.MeetIP, task.MeetPort)) + _, task.Err = task.resetRedisConn.ClusterMeet(task.MeetIP, strconv.Itoa(task.MeetPort)) + if task.Err != nil { + return + } + for { + addrToNodes, task.Err = task.resetRedisConn.GetAddrMapToNodes() + if task.Err != nil { + return + } + if _, ok := addrToNodes[task.ResetRedisAddr()]; ok { + tclog.Logger.Info(fmt.Sprintf("redis %s 'cluster meet %s %d' success", + task.ResetRedisAddr(), task.MeetIP, task.MeetPort)) + break + } + tclog.Logger.Info(fmt.Sprintf("redis %s 'cluster meet %s %d' done,but not in 'cluster nodes'", + task.ResetRedisAddr(), task.MeetIP, task.MeetPort)) + time.Sleep(3 * time.Second) + } + } +} diff --git a/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/redis_config_set.go b/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/redis_config_set.go index 12227a5267..fa98f30bfa 100644 --- a/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/redis_config_set.go +++ b/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/redis_config_set.go @@ -136,7 +136,9 @@ func (job *RedisConfigSet) allInstsAbleToConnect() (err error) { } job.AddrMapConfigFile[addr] = confFile // 获取密码 - if job.params.Role == consts.MetaRolePredixy || job.params.Role == consts.MetaRoleTwemproxy { + if job.params.Role == consts.MetaRolePredixy { + password, err = myredis.GetPredixyAdminPasswdFromConfFlie(port) + } else if job.params.Role == consts.MetaRoleTwemproxy { password, err = myredis.GetProxyPasswdFromConfFlie(port, job.params.Role) } else { password, err = myredis.GetRedisPasswdFromConfFile(port) diff --git a/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/redis_reshape.go b/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/redis_reshape.go new file mode 100644 index 0000000000..4f5dac5083 --- /dev/null +++ b/dbm-services/redis/db-tools/dbactuator/pkg/atomjobs/atomredis/redis_reshape.go @@ -0,0 +1,181 @@ +package atomredis + +import ( + "encoding/json" + "fmt" + "sync" + "time" + + "github.com/go-playground/validator/v10" + + "dbm-services/redis/db-tools/dbactuator/models/myredis" + "dbm-services/redis/db-tools/dbactuator/pkg/consts" + "dbm-services/redis/db-tools/dbactuator/pkg/jobruntime" +) + +// RedisReshapeParam redis reshape param +type RedisReshapeParam struct { + RedisPassword string `json:"redis_password" validate:"required"` + Instances []instItem `json:"instances" validate:"required"` +} + +// reshapeTaskItem reshape task item,便于做并发 +type reshapeTaskItem struct { + instItem + Password string `json:"password"` + redisConn *myredis.RedisClient + Err error +} + +// RedisReshape redis shape +type RedisReshape struct { + runtime *jobruntime.JobGenericRuntime + params RedisReshapeParam + ReshapeTasks []*reshapeTaskItem +} + +// 无实际作用,仅确保实现了 jobruntime.JobRunner 接口 +var _ jobruntime.JobRunner = (*RedisReshape)(nil) + +// NewRedisReshape new +func NewRedisReshape() jobruntime.JobRunner { + return &RedisReshape{} +} + +// Init 初始化 +func (job *RedisReshape) Init(m *jobruntime.JobGenericRuntime) error { + job.runtime = m + err := json.Unmarshal([]byte(job.runtime.PayloadDecoded), &job.params) + if err != nil { + job.runtime.Logger.Error(fmt.Sprintf("json.Unmarshal failed,err:%+v", err)) + return err + } + // 参数有效性检查 + validate := validator.New() + err = validate.Struct(job.params) + if err != nil { + if _, ok := err.(*validator.InvalidValidationError); ok { + job.runtime.Logger.Error("RedisReshape Init params validate failed,err:%v,params:%+v", + err, job.params) + return err + } + for _, err := range err.(validator.ValidationErrors) { + job.runtime.Logger.Error("RedisReshape Init params validate failed,err:%v,params:%+v", + err, job.params) + return err + } + } + return nil +} + +// Name 原子任务名 +func (job *RedisReshape) Name() string { + return "redis_reshape" +} + +// Run Command Run +func (job *RedisReshape) Run() (err error) { + job.ReshapeTasks = make([]*reshapeTaskItem, 0, len(job.params.Instances)) + for _, instItem := range job.params.Instances { + reshapeTask := &reshapeTaskItem{ + instItem: instItem, + Password: job.params.RedisPassword, + } + job.ReshapeTasks = append(job.ReshapeTasks, reshapeTask) + } + err = job.allInstCconnect() + if err != nil { + return err + } + defer job.allInstDisconnect() + + err = job.ReshapeAndWaitDone() + if err != nil { + return err + } + return nil +} + +func (job *RedisReshape) allInstCconnect() (err error) { + wg := sync.WaitGroup{} + // 并发建立连接 + for _, tmp := range job.ReshapeTasks { + task := tmp + wg.Add(1) + go func(task *reshapeTaskItem) { + defer wg.Done() + task.redisConn, task.Err = myredis.NewRedisClientWithTimeout(task.Addr(), task.Password, 0, + consts.TendisTypeRedisInstance, 10*time.Hour) + }(task) + } + wg.Wait() + for _, tmp := range job.ReshapeTasks { + task := tmp + if task.Err != nil { + return task.Err + } + } + return nil +} + +// allInstDisconnect 所有实例断开连接 +func (job *RedisReshape) allInstDisconnect() { + for _, tmp := range job.ReshapeTasks { + task := tmp + if task.redisConn != nil { + task.redisConn.Close() + task.redisConn = nil + } + } +} + +// ReshapeAndWaitDone 多实例并发执行reshape +func (job *RedisReshape) ReshapeAndWaitDone() error { + // 根据salveIP做分组 + tasksMapSlice := make(map[string][]*reshapeTaskItem) + maxCount := 0 + for _, tmp := range job.ReshapeTasks { + task := tmp + tasksMapSlice[task.IP] = append(tasksMapSlice[task.IP], task) + if len(tasksMapSlice[task.IP]) > maxCount { + maxCount = len(tasksMapSlice[task.IP]) + } + } + // 同IP实例间串行,多IP实例间并行 + for idx := 0; idx < maxCount; idx++ { + groupTasks := []*reshapeTaskItem{} + for ip := range tasksMapSlice { + if len(tasksMapSlice[ip]) > idx { + groupTasks = append(groupTasks, tasksMapSlice[ip][idx]) + } + } + wg := sync.WaitGroup{} + for _, taskItem := range groupTasks { + task01 := taskItem + wg.Add(1) + go func(task02 *reshapeTaskItem) { + defer wg.Done() + job.runtime.Logger.Info("tendisplus %s start reshape", task02.Addr()) + task02.Err = task02.redisConn.TendisReshapeAndWaitDone() + }(task01) + } + wg.Wait() + for _, taskItem := range groupTasks { + task01 := taskItem + if task01.Err != nil { + return task01.Err + } + } + } + return nil +} + +// Retry times +func (job *RedisReshape) Retry() uint { + return 2 +} + +// Rollback rollback +func (job *RedisReshape) Rollback() error { + return nil +} diff --git a/dbm-services/redis/db-tools/dbactuator/pkg/jobmanager/jobmanager.go b/dbm-services/redis/db-tools/dbactuator/pkg/jobmanager/jobmanager.go index 86dbf77685..cb722be478 100644 --- a/dbm-services/redis/db-tools/dbactuator/pkg/jobmanager/jobmanager.go +++ b/dbm-services/redis/db-tools/dbactuator/pkg/jobmanager/jobmanager.go @@ -203,6 +203,8 @@ func (m *JobGenericManager) atomjobsMapperLoading() { m.atomJobMapper[atomsys.NewChangePassword().Name()] = atomsys.NewChangePassword m.atomJobMapper[atomredis.NewRedisLoadModules().Name()] = atomredis.NewRedisLoadModules m.atomJobMapper[atomproxy.NewPredixyAddModulesCmds().Name()] = atomproxy.NewPredixyAddModulesCmds + m.atomJobMapper[atomredis.NewRedisReshape().Name()] = atomredis.NewRedisReshape + m.atomJobMapper[atomredis.NewClusterResetFlushMeet().Name()] = atomredis.NewClusterResetFlushMeet // 老备份系统 // m.atomJobMapper[atomredis.NewRedisDataRecover().Name()] = atomredis.NewRedisDataRecover m.atomJobMapper[atomredis.NewRedisDataStructure().Name()] = atomredis.NewRedisDataStructure diff --git a/dbm-services/redis/db-tools/dbmon/pkg/redismaxmemory/redismaxmemory.go b/dbm-services/redis/db-tools/dbmon/pkg/redismaxmemory/redismaxmemory.go index 5c5287b74f..b3a3b486cb 100644 --- a/dbm-services/redis/db-tools/dbmon/pkg/redismaxmemory/redismaxmemory.go +++ b/dbm-services/redis/db-tools/dbmon/pkg/redismaxmemory/redismaxmemory.go @@ -260,6 +260,10 @@ func (job *Job) SetEventSender() { job.Conf.BeatPath, job.Conf.AgentAddress, ) + if job.Err != nil { + mylog.Logger.Error(fmt.Sprintf("set event sender fail,err:%v", job.Err)) + return + } if len(job.Conf.Servers) == 0 { return } diff --git a/dbm-services/redis/redis-dts/main.go b/dbm-services/redis/redis-dts/main.go index f38f8f113e..092a589eea 100644 --- a/dbm-services/redis/redis-dts/main.go +++ b/dbm-services/redis/redis-dts/main.go @@ -77,7 +77,8 @@ func main() { constvar.GetZoneName(), tclog.Logger, wg)) jobers = append(jobers, dtsJob.NewRedisCacheDtsJob(constvar.GetBkCloudID(), localIP, constvar.GetZoneName(), tclog.Logger, wg)) - + jobers = append(jobers, dtsJob.NewTendisplusLightningJob(constvar.GetBkCloudID(), localIP, + constvar.GetZoneName(), tclog.Logger, wg)) for _, jober := range jobers { jober.StartBgWorkers() } diff --git a/dbm-services/redis/redis-dts/models/myredis/myredis.go b/dbm-services/redis/redis-dts/models/myredis/myredis.go index 38df74283e..fb7a68a426 100644 --- a/dbm-services/redis/redis-dts/models/myredis/myredis.go +++ b/dbm-services/redis/redis-dts/models/myredis/myredis.go @@ -394,6 +394,18 @@ func (db *RedisWorker) GetMasterAddrAndPasswd() (masterAddr, masterAuth string, return } +// Loadexternalfiles 加载外部sst文件 +func (db *RedisWorker) Loadexternalfiles(sstDir, slosStr, loadMode string) (err error) { + cmd := []interface{}{"loadexternalfiles", sstDir, slosStr, loadMode} + _, err = db.Client.Do(context.TODO(), cmd...).Result() + if err != nil { + err = fmt.Errorf("redis:%s %+v,err:%v", db.Addr, cmd, err) + db.logger.Error(err.Error()) + return + } + return +} + // Close :关闭client func (db *RedisWorker) Close() { if db.Client == nil { diff --git a/dbm-services/redis/redis-dts/models/mysql/tendisdb/job.go b/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisDtsJob.go similarity index 100% rename from dbm-services/redis/redis-dts/models/mysql/tendisdb/job.go rename to dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisDtsJob.go diff --git a/dbm-services/redis/redis-dts/models/mysql/tendisdb/task.go b/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisDtsTask.go similarity index 98% rename from dbm-services/redis/redis-dts/models/mysql/tendisdb/task.go rename to dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisDtsTask.go index 0a44754bf8..0f0bee90f3 100644 --- a/dbm-services/redis/redis-dts/models/mysql/tendisdb/task.go +++ b/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisDtsTask.go @@ -18,8 +18,8 @@ import ( ) var ( - tbTaskFiledToColunm map[string]string // struct filedName map to colunmName - once01 sync.Once + tbDtsTaskFiledToColunm map[string]string // struct filedName map to colunmName + tbDtsTaskOnce sync.Once ) // TbTendisDTSTask 迁移task @@ -429,11 +429,11 @@ func GetJobSrcIPRunningTasks(billID int64, srcCluster, dstCluster, srcIP string, // DtsTaskStructFieldsToColumns 获取 TbTendisDTSTask 字段名 到 列名之间的对应关系 // 如 filedNames=["BillID","App","User","SrcIP"] 对应的 columnNames=["bill_id","app","user","src_ip"] func DtsTaskStructFieldsToColumns(fieldNames []string, logger *zap.Logger) (columnNames []string, err error) { - once01.Do(func() { + tbDtsTaskOnce.Do(func() { t01 := TbTendisDTSTask{} reg01 := regexp.MustCompile(`column:(\w+)`) getType := reflect.TypeOf(t01) - tbTaskFiledToColunm = make(map[string]string, getType.NumField()) + tbDtsTaskFiledToColunm = make(map[string]string, getType.NumField()) for i := 0; i < getType.NumField(); i++ { field := getType.Field(i) gormTag := string(field.Tag.Get("gorm")) @@ -441,12 +441,12 @@ func DtsTaskStructFieldsToColumns(fieldNames []string, logger *zap.Logger) (colu if len(l01) < 2 { continue } - tbTaskFiledToColunm[field.Name] = l01[1] + tbDtsTaskFiledToColunm[field.Name] = l01[1] } }) columnNames = make([]string, 0, len(fieldNames)) for _, field01 := range fieldNames { - colName, ok := tbTaskFiledToColunm[field01] + colName, ok := tbDtsTaskFiledToColunm[field01] if ok == false { err = fmt.Errorf("struct TbTendisDTSTask have no field:%s", colName) logger.Error(err.Error()) diff --git a/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisplus_lightning_job.go b/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisplus_lightning_job.go new file mode 100644 index 0000000000..27ebb92f4c --- /dev/null +++ b/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisplus_lightning_job.go @@ -0,0 +1,87 @@ +package tendisdb + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/spf13/viper" + "go.uber.org/zap" + + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/pkg/customtime" + "dbm-services/redis/redis-dts/pkg/scrdbclient" +) + +/* +create table tb_tendisplus_lightning_job ( +id bigint NOT NULL primary key, +ticket_id bigint(20) NOT NULL, +user varchar(64) NOT NULL, +bk_biz_id varchar(64) NOT NULL, +bk_cloud_id bigint(20) NOT NULL, +dst_cluster varchar(128) NOT NULL, +dst_cluster_id bigint(20) NOT NULL, +cluster_nodes longtext NOT NULL, +create_time datetime(6) NOT NULL, +key idx_create_time(create_time), +key idx_dst_cluster_id(dst_cluster_id), +key idx_user(user), +unique index uniq_ticket_cluster(ticket_id,dst_cluster) +)ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +*/ + +// TbTendisplusLightningJob TODO +type TbTendisplusLightningJob struct { + // gorm + ID int64 `gorm:"primary_key;column:id;type:bigint(20) unsigned;not null" json:"id"` + TicketID int64 `gorm:"column:ticket_id;type:bigint(20) unsigned;not null" json:"ticket_id"` + User string `gorm:"column:user;type:varchar(64);not null" json:"user"` + BkBizID string `gorm:"column:bk_biz_id;type:varchar(64);not null" json:"bk_biz_id"` + BkCloudID int64 `gorm:"column:bk_cloud_id;type:bigint(20) unsigned;not null" json:"bk_cloud_id"` + DstCluster string `gorm:"column:dst_cluster;type:varchar(128);not null" json:"dst_cluster"` + DstClusterID int64 `gorm:"column:dst_cluster_id;type:bigint(20) unsigned;not null" json:"dst_cluster_id"` + ClusterNodes string `gorm:"column:cluster_nodes;type:longtext;not null" json:"cluster_nodes"` + CreateTime customtime.CustomTime `json:"create_time" gorm:"column:create_time"` // 创建时间 +} + +// TableName 表名 +func (t *TbTendisplusLightningJob) TableName() string { + return "tb_tendisplus_lightning_job" +} + +// GetLightningJob 获取 lightning job对应row +func GetLightningJob( + ticketID int64, dstCluster string, + logger *zap.Logger, +) (jobRows []*TbTendisplusLightningJob, err error) { + var cli01 *scrdbclient.Client + var subURL string + var data *scrdbclient.APIServerResponse + cli01, err = scrdbclient.NewClient(viper.GetString("serviceName"), logger) + if err != nil { + return + } + jobRows = []*TbTendisplusLightningJob{} + subURL = constvar.DbmLightningJobDetailURL + type lightningJobReq struct { + TicketID int64 `json:"ticket_id"` + DstCluster string `json:"dst_cluster"` + } + param := lightningJobReq{ + TicketID: ticketID, + DstCluster: dstCluster, + } + data, err = cli01.Do(http.MethodPost, subURL, param) + if err != nil { + return + } + err = json.Unmarshal(data.Data, &jobRows) + if err != nil { + err = fmt.Errorf("GetLightningJob unmarshal data fail,err:%v,resp.Data:%s,subURL:%s,param:%+v", + err.Error(), string(data.Data), subURL, param) + logger.Error(err.Error()) + return + } + return +} diff --git a/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisplus_lightning_task.go b/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisplus_lightning_task.go new file mode 100644 index 0000000000..dec646721d --- /dev/null +++ b/dbm-services/redis/redis-dts/models/mysql/tendisdb/tendisplus_lightning_task.go @@ -0,0 +1,439 @@ +package tendisdb + +import ( + "encoding/json" + "fmt" + "net/http" + "reflect" + "regexp" + "sync" + + "github.com/spf13/viper" + "go.uber.org/zap" + + "dbm-services/redis/db-tools/dbmon/util" + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/pkg/scrdbclient" +) + +var ( + lightningTaskFiledToColunm map[string]string // struct filedName map to colunmName + lightningTaskOnce sync.Once +) + +/* +create table tb_tendisplus_lightning_task ( +task_id varchar(64) NOT NULL primary key, +ticket_id bigint(20) NOT NULL, +user varchar(64) NOT NULL, +bk_biz_id varchar(64) NOT NULL, +bk_cloud_id bigint(20) NOT NULL, +cos_key varchar(128) NOT NULL, +cos_file_size bigint(20) NOT NULL, +dts_server varchar(128) NOT NULL, +dst_cluster varchar(128) NOT NULL, +dst_cluster_id bigint(20) NOT NULL, +dst_cluster_priority int(11) NOT NULL, +dst_zonename varchar(128) NOT NULL, +task_type varchar(128) NOT NULL, +operate_type varchar(128) NOT NULL, +status int(11) NOT NULL, +message longtext NOT NULL, +create_time datetime(6) NOT NULL, +update_time datetime(6) NOT NULL, +key idx_update_time(update_time), +key idx_dst_cluster_id(dst_cluster_id), +key idx_user(user), +key idx_ticket_cluster(ticket_id,dst_cluster_id) +)ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +*/ + +// TbTendisplusLightningTask TODO +type TbTendisplusLightningTask struct { + // gorm + TaskId string `gorm:"column:task_id;type:varchar(64);primary_key" json:"task_id"` + TicketID int64 `gorm:"column:ticket_id;type:bigint(20) unsigned;not null" json:"ticket_id"` + User string `gorm:"column:user;type:varchar(64);not null" json:"user"` + BkBizID string `gorm:"column:bk_biz_id;type:varchar(64);not null" json:"bk_biz_id"` + BkCloudID int64 `gorm:"column:bk_cloud_id;type:bigint(20) unsigned;not null" json:"bk_cloud_id"` + CosKey string `gorm:"column:cos_key;type:varchar(128);not null" json:"cos_key"` + CosFileSize int64 `gorm:"column:cos_file_size;type:bigint(20) unsigned;not null" json:"cos_file_size"` + DtsServer string `gorm:"column:dts_server;type:varchar(128);not null" json:"dts_server"` + DstCluster string `gorm:"column:dst_cluster;type:varchar(128);not null" json:"dst_cluster"` + DstClusterID int64 `gorm:"column:dst_cluster_id;type:bigint(20) unsigned;not null" json:"dst_cluster_id"` + DstClusterPriority int `gorm:"column:dst_cluster_priority;type:int(11);not null" json:"dst_cluster_priority"` + DstZonename string `gorm:"column:dst_zonename;type:varchar(128);not null" json:"dst_zonename"` + TaskType string `gorm:"column:task_type;type:varchar(128);not null" json:"task_type"` + OperateType string `gorm:"column:operate_type;type:varchar(128);not null" json:"operate_type"` + Status int `gorm:"column:status;type:int(11);not null" json:"status"` + Message string `gorm:"column:message;type:longtext;not null" json:"message"` + CreateTime string `gorm:"column:create_time;type:datetime(6);not null" json:"create_time"` + UpdateTime string `gorm:"column:update_time;type:datetime(6);not null" json:"update_time"` +} + +// TableName 表名 +func (t *TbTendisplusLightningTask) TableName() string { + return "tb_tendisplus_lightning_task" +} + +// TaskLockKey keyname +func (t *TbTendisplusLightningTask) TaskLockKey() string { + return fmt.Sprintf("Lightning_task_lock_%d_%s_%s", + t.TicketID, t.DstCluster, t.TaskId, + ) +} + +// LightningDtsSvrMigratingTasks 获取dtsserver正在迁移的task,与task对应多少dataSize +// 对Tendisplus Lightning 来说,'迁移中'指处于 status=1 状态的task +func LightningDtsSvrMigratingTasks(bkCloudID int64, dtsSvr string, taskTypes []string, + logger *zap.Logger) (tasks []*TbTendisplusLightningTask, dataSize uint64, err error) { + var cli01 *scrdbclient.Client + var subURL string + var data *scrdbclient.APIServerResponse + cli01, err = scrdbclient.NewClient(viper.GetString("serviceName"), logger) + if err != nil { + return + } + tasks = []*TbTendisplusLightningTask{} + if cli01.GetServiceName() == constvar.BkDbm { + subURL = constvar.DbmLightningDtsServerMigratingTasksURL + } + type lightningDtsSvrMigratingTasksReq struct { + BkCloudID int64 `json:"bk_cloud_id"` + DtsServer string `json:"dts_server"` + TaskTypes []string `json:"task_types"` + } + param := lightningDtsSvrMigratingTasksReq{ + BkCloudID: bkCloudID, + DtsServer: dtsSvr, + TaskTypes: taskTypes, + } + data, err = cli01.Do(http.MethodPost, subURL, param) + if err != nil { + return + } + err = json.Unmarshal(data.Data, &tasks) + if err != nil { + err = fmt.Errorf("LightningDtsSvrMigratingTasks unmarshal data fail,err:%v,resp.Data:%s,subURL:%s,param:%+v", + err.Error(), string(data.Data), subURL, param) + logger.Error(err.Error()) + return + } + dataSize = 0 + for _, tmp := range tasks { + task := tmp + dataSize = dataSize + uint64(task.CosFileSize) + } + return +} + +// LightningLast30DaysToExecuteTasks 用于获取最近一个月本地等待执行的lightning tasks +func LightningLast30DaysToExecuteTasks( + bkCloudID int64, + dtsServer, taskType string, + status, limit int, + logger *zap.Logger) (tasks []*TbTendisplusLightningTask, err error) { + var cli01 *scrdbclient.Client + var subURL string + var data *scrdbclient.APIServerResponse + cli01, err = scrdbclient.NewClient(viper.GetString("serviceName"), logger) + if err != nil { + return + } + type lightningLast30DaysToExecTasksReq struct { + BkCloudID int64 `json:"bk_cloud_id"` + DtsServer string `json:"dts_server"` + TaskType string `json:"task_type"` + Status int `json:"status"` + Limit int `json:"limit"` + } + tasks = []*TbTendisplusLightningTask{} + if cli01.GetServiceName() == constvar.BkDbm { + subURL = constvar.DbmLightningLast30DaysToExecuteTasksURL + } + param := lightningLast30DaysToExecTasksReq{ + DtsServer: dtsServer, + TaskType: taskType, + Status: status, + Limit: limit, + } + data, err = cli01.Do(http.MethodPost, subURL, param) + if err != nil { + return + } + err = json.Unmarshal(data.Data, &tasks) + if err != nil { + err = fmt.Errorf("LightningLast30DaysToExecuteTasks unmarshal data fail,err:%v,resp.Data:%s,subURL:%s,param:%+v", + err.Error(), string(data.Data), subURL, param) + logger.Error(err.Error()) + return + } + return +} + +// LightningLast30DaysToScheduleJobs 获取最近30天待调度的Jobs +// jobs必须满足: 有一个待调度的task.dataSize < maxDataSize +func LightningLast30DaysToScheduleJobs(bkCloudID int64, maxDataSize int64, zoneName string, + logger *zap.Logger) (jobs []*TbTendisplusLightningTask, err error) { + var cli01 *scrdbclient.Client + var subURL string + var data *scrdbclient.APIServerResponse + cli01, err = scrdbclient.NewClient(viper.GetString("serviceName"), logger) + if err != nil { + return + } + type lightningLast30DaysToScheduleJobsReq struct { + BkCloudID int64 `json:"bk_cloud_id"` + MaxDataSize int64 `json:"max_data_size"` + ZoneName string `json:"zone_name"` + } + jobs = []*TbTendisplusLightningTask{} + if cli01.GetServiceName() == constvar.BkDbm { + subURL = constvar.DbmLightningLast30DaysToScheduleJobsURL + } + param := lightningLast30DaysToScheduleJobsReq{ + BkCloudID: bkCloudID, + MaxDataSize: maxDataSize, + ZoneName: zoneName, + } + data, err = cli01.Do(http.MethodPost, subURL, param) + if err != nil { + return + } + err = json.Unmarshal(data.Data, &jobs) + if err != nil { + err = fmt.Errorf("LightningLast30DaysToScheduleJobs unmarshal data fail,err:%v,resp.Data:%s,subURL:%s,param:%+v", + err.Error(), string(data.Data), subURL, param) + logger.Error(err.Error()) + return + } + return +} + +// LightningJobToScheduleTasks 获取job中所有待调度的task +// ticketId、dstCluster确定一个job +// dtsserver='1.1.1.1' and status=0 and task_type="" 代表 '未执行' +// 一个job可能部分task执行,部分未执行; +// 根据权重src_weight排序,权重越小,越前面执行 +func LightningJobToScheduleTasks(ticketID int64, dstCluster string, + logger *zap.Logger) (tasks []*TbTendisplusLightningTask, err error) { + if ticketID == 0 || dstCluster == "" { + err = fmt.Errorf("ticketID:%d or dstCluster:%s cann't be empty", + ticketID, dstCluster) + logger.Error(err.Error()) + return tasks, err + } + var cli01 *scrdbclient.Client + var subURL string + var data *scrdbclient.APIServerResponse + cli01, err = scrdbclient.NewClient(viper.GetString("serviceName"), logger) + if err != nil { + return + } + type lightningJobToScheduleTasks struct { + TicketID int64 `json:"ticket_id"` + DstCluster string `json:"dst_cluster"` + } + tasks = []*TbTendisplusLightningTask{} + if cli01.GetServiceName() == constvar.BkDbm { + subURL = constvar.DbmLightningJobToScheduleTasksURL + } + param := lightningJobToScheduleTasks{ + TicketID: ticketID, + DstCluster: dstCluster, + } + data, err = cli01.Do(http.MethodPost, subURL, param) + if err != nil { + return + } + err = json.Unmarshal(data.Data, &tasks) + if err != nil { + err = fmt.Errorf("LightningJobToScheduleTasks unmarshal data fail,err:%v,resp.Data:%s,subURL:%s,param:%+v", + err.Error(), string(data.Data), subURL, param) + logger.Error(err.Error()) + return + } + return +} + +// LightningTaskByID 根据id获得task详细信息 +func LightningTaskByID(taskID string, logger *zap.Logger) (task *TbTendisplusLightningTask, err error) { + if logger == nil { + err = fmt.Errorf("LightningTaskByID logger cannot be nil") + return + } + var cli01 *scrdbclient.Client + var subURL string + var data *scrdbclient.APIServerResponse + cli01, err = scrdbclient.NewClient(viper.GetString("serviceName"), logger) + if err != nil { + return + } + type lightningTaskRowByIDReq struct { + TaskID string `json:"task_id"` + } + + task = &TbTendisplusLightningTask{} + if cli01.GetServiceName() == constvar.BkDbm { + subURL = constvar.DbmLightningTaskRowByIDURL + } + param := lightningTaskRowByIDReq{ + TaskID: taskID, + } + data, err = cli01.Do(http.MethodPost, subURL, param) + if err != nil { + return + } + if len(data.Data) == 4 && string(data.Data) == "null" { + return nil, nil + } + err = json.Unmarshal(data.Data, task) + if err != nil { + err = fmt.Errorf("LightningTaskByID unmarshal data fail,err:%v,resp.Data:%s,subURL:%s,param:%+v", + err.Error(), string(data.Data), subURL, param) + logger.Error(err.Error()) + return + } + return +} + +// LightningTaskStructFieldsToColumns 获取 TbTendisplusLightningTask 字段名 到 列名之间的对应关系 +// 如 filedNames=["BillID","App","User","SrcIP"] 对应的 columnNames=["bill_id","app","user","src_ip"] +func LightningTaskStructFieldsToColumns(fieldNames []string, logger *zap.Logger) (columnNames []string, err error) { + lightningTaskOnce.Do(func() { + t01 := TbTendisplusLightningTask{} + reg01 := regexp.MustCompile(`column:(\w+)`) + getType := reflect.TypeOf(t01) + lightningTaskFiledToColunm = make(map[string]string, getType.NumField()) + for i := 0; i < getType.NumField(); i++ { + field := getType.Field(i) + gormTag := string(field.Tag.Get("gorm")) + l01 := reg01.FindStringSubmatch(gormTag) + if len(l01) < 2 { + continue + } + lightningTaskFiledToColunm[field.Name] = l01[1] + } + }) + columnNames = make([]string, 0, len(fieldNames)) + for _, field01 := range fieldNames { + colName, ok := lightningTaskFiledToColunm[field01] + if ok == false { + err = fmt.Errorf("struct TbTendisplusLightningTask have no field:%s", colName) + logger.Error(err.Error()) + return + } + columnNames = append(columnNames, colName) + } + return +} + +// GetFieldsValue 根据 字段名 从task中获取其字段values +// 如 filedNames=["DtsServer","CosFileSize"] 其对应值为 ret=["1.1.1.1",11111] +func (t *TbTendisplusLightningTask) GetFieldsValue(fieldNames []string, logger *zap.Logger) (ret []interface{}, + err error) { + _, err = LightningTaskStructFieldsToColumns(fieldNames, logger) + if err != nil { + return + } + ret = make([]interface{}, 0, len(fieldNames)) + getValue := reflect.ValueOf(t) + for _, field01 := range fieldNames { + val01 := reflect.Indirect(getValue).FieldByName(field01) + ret = append(ret, val01.Interface()) + } + return +} + +// GetColToValByFields 根据struct fieldName 生成 表列名=>值 之间的对应关系 +func (t *TbTendisplusLightningTask) GetColToValByFields(fieldNames []string, logger *zap.Logger) ( + colToVal map[string]interface{}, err error) { + var columnNames []string + var values []interface{} + columnNames, err = LightningTaskStructFieldsToColumns(fieldNames, logger) + if err != nil { + return + } + values, err = t.GetFieldsValue(fieldNames, logger) + if err != nil { + return + } + colToVal = make(map[string]interface{}, len(fieldNames)) + for idx, col := range columnNames { + colToVal[col] = values[idx] + } + return +} + +// UpdateFieldsValues 根据字段名 自动生成update 语句并进行更新 +// 如 filedNames=["DtsServer","CosFileSize"] +// 生成的update语句: update tb_tendis_dts_task set dts_server=?,cos_file_size=?,update_time=now() where task_id=xxxx; +// 该函数主要目的只更新 值变化的字段,而不是row全部值 +func (t *TbTendisplusLightningTask) UpdateFieldsValues(fieldNames []string, logger *zap.Logger) (err error) { + var colToVal map[string]interface{} + logger.Info(fmt.Sprintf("===>start UpdateFieldsValues fieldNames:%+v", fieldNames)) + colToVal, err = t.GetColToValByFields(fieldNames, logger) + if err != nil { + return err + } + logger.Info(fmt.Sprintf("====>UpdateFieldsValues colToVal:%s", util.ToString(colToVal))) + _, err = UpdateLightningTaskRows([]string{t.TaskId}, colToVal, logger) + return +} + +// UpdateLightningTaskRows 更新tasks多行 +func UpdateLightningTaskRows(taskIDs []string, colToValue map[string]interface{}, + logger *zap.Logger) (rowsAffected int64, + err error) { + var cli01 *scrdbclient.Client + var subURL string + var data *scrdbclient.APIServerResponse + cli01, err = scrdbclient.NewClient(viper.GetString("serviceName"), logger) + if err != nil { + return + } + type dtsTaskRowsUpdateReq struct { + TaskIDs []string `json:"task_ids"` + ColumnToValue map[string]interface{} `json:"col_to_val"` + } + + type dtsTaskRowsUpdateRsp struct { + RowsAffected int64 `json:"rows_affected"` + } + + ret := &dtsTaskRowsUpdateRsp{} + if cli01.GetServiceName() == constvar.BkDbm { + subURL = constvar.DbmLightningUpdateTaskRowsURL + } + param := dtsTaskRowsUpdateReq{ + TaskIDs: taskIDs, + ColumnToValue: colToValue, + } + logger.Info(fmt.Sprintf("====>UpdateLightningTaskRows param:%s", util.ToString(param))) + data, err = cli01.Do(http.MethodPost, subURL, param) + if err != nil { + return + } + err = json.Unmarshal(data.Data, ret) + if err != nil { + err = fmt.Errorf("UpdateLightningTaskRows unmarshal data fail,err:%v,resp.Data:%s,subURL:%s,param:%+v", + err.Error(), string(data.Data), subURL, param) + logger.Error(err.Error()) + return + } + return ret.RowsAffected, nil +} + +// IsAllLightningTasksToForceKill 是否全部tasks都等待被force kill +func IsAllLightningTasksToForceKill(tasks []*TbTendisplusLightningTask) (allForceKill bool) { + if len(tasks) == 0 { + return false + } + for _, t01 := range tasks { + t02 := t01 + if t02.OperateType != constvar.RedisForceKillTaskTodo { + return false + } + } + return true +} diff --git a/dbm-services/redis/redis-dts/pkg/constvar/constvar.go b/dbm-services/redis/redis-dts/pkg/constvar/constvar.go index 03f1a34f98..30b32d43db 100644 --- a/dbm-services/redis/redis-dts/pkg/constvar/constvar.go +++ b/dbm-services/redis/redis-dts/pkg/constvar/constvar.go @@ -104,6 +104,30 @@ const ( TendisplusSendIncrTaskType = "tendisplusSendIncr" ) +// tendisplus lightning task type +const ( + TendisplusLightningCosFileDownload = "lightningCosFileDownload" + TendisplusLightningFileSplit = "lightningFileSplit" + TendisplusLightningGenerateSst = "lightningGenerateSst" + TendisplusLightningScpSst = "lightningScpSst" + TendisplusLightningSlaveLoadSst = "lightningSlaveLoadSst" +) + +// tendisplus lightning tools +const ( + ToolLightningKVFileSplit = "tendisplus_lightning_kvfile_split" + ToolLightningSstGenerator = "tendisplus_lightning_sst_generator" +) + +const ( + // ToolZstd tool + ToolZstd = "zstd" + // DbbakDir dir + DbbakDir = "/data/dbbak/" + // MysqlOSAccount os account + MysqlOSAccount = "mysql" +) + /* migrating tasks type '迁移中' 是指那些正在占用资源 或者 即将占用资源 阶段, 资源主要指磁盘 or 内存 @@ -124,6 +148,13 @@ var ( TendisplusMakeSyncTaskType, TendisplusSendBulkTaskType, } + LightningMigratingTasksType = []string{ + TendisplusLightningCosFileDownload, + TendisplusLightningFileSplit, + TendisplusLightningGenerateSst, + TendisplusLightningScpSst, + // TendisplusLightningSlaveLoadSst + } ) // Tredisdump 结果文件格式 @@ -223,6 +254,16 @@ const ( DbmJobApiGetJobInstanceStatusURL = "/apis/proxypass/jobapi/get_job_instance_status/" DbmJobApiBatchGetJobInstanceIPLogURL = "/apis/proxypass/jobapi/batch_get_job_instance_ip_log/" DbmJobApiTransferFileURL = "/apis/proxypass/jobapi/fast_transfer_file/" + + // DbmLightningDtsServerMigratingTasksURL TODO + // tendisplus Lightning api + DbmLightningDtsServerMigratingTasksURL = "/apis/proxypass/tendisplus_lightning/dts_server_migrating_tasks/" + DbmLightningLast30DaysToExecuteTasksURL = "/apis/proxypass/tendisplus_lightning/last_30_days_to_exec_tasks/" + DbmLightningLast30DaysToScheduleJobsURL = "/apis/proxypass/tendisplus_lightning/last_30_days_to_schedule_jobs/" + DbmLightningJobToScheduleTasksURL = "/apis/proxypass/tendisplus_lightning/job_to_schedule_tasks/" + DbmLightningJobDetailURL = "/apis/proxypass/tendisplus_lightning/job_detail/" + DbmLightningTaskRowByIDURL = "/apis/proxypass/tendisplus_lightning/task_by_task_id/" + DbmLightningUpdateTaskRowsURL = "/apis/proxypass/tendisplus_lightning/tasks_update/" ) // ZonenameTransform 城市转换 @@ -230,8 +271,12 @@ func ZonenameTransform(zoneName string) string { switch zoneName { case "苏州": return "上海" + case "昆山": + return "上海" case "扬州": return "南京" + case "仪征": + return "南京" case "清远": return "广州" default: diff --git a/dbm-services/redis/redis-dts/pkg/dtsJob/base.go b/dbm-services/redis/redis-dts/pkg/dtsJob/base.go index 785ab12713..cd05a5b3a9 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsJob/base.go +++ b/dbm-services/redis/redis-dts/pkg/dtsJob/base.go @@ -89,7 +89,7 @@ func (job *DtsJobBase) BgDtsTaskRunnerWithConcurrency(taskType, dbType string) { latestRow.ID, latestRow.SrcIP, latestRow.SrcPort, latestRow.Status, latestRow.TaskType, taskType)) continue } - task01 := factory.MyTaskFactory(latestRow) + task01 := factory.MyTendisDtsTaskFactory(latestRow) task01.Init() // 执行Init,成功则status=1,失败则status=-1 task01.Execute() } @@ -110,8 +110,7 @@ func (job *DtsJobBase) BgDtsTaskRunnerWithConcurrency(taskType, dbType string) { continue } if len(toExecuteRows) == 0 { - job.logger.Info(fmt.Sprintf("not found to be executed %q task,sleep 10s", taskType), - zap.String("serverIP", job.ServerIP)) + job.logger.Info(fmt.Sprintf("serverIP:%s not found to be executed %s task,sleep 10s", job.ServerIP, taskType)) continue } for _, row := range toExecuteRows { @@ -151,7 +150,7 @@ func (job *DtsJobBase) BgDtsTaskRunnerWithoutLimit(taskType, dbType string) { job.logger.Error(string(debug.Stack())) } }() - task01 := factory.MyTaskFactory(rowData) + task01 := factory.MyTendisDtsTaskFactory(rowData) task01.Init() task01.Execute() }(rowItem) @@ -175,7 +174,7 @@ func (job *DtsJobBase) BgDtsTaskRunnerWithoutLimit(taskType, dbType string) { continue } if len(toExecuteRows) == 0 { - job.logger.Info(fmt.Sprintf("not found to be executed %q task,sleep 10s", taskType), + job.logger.Info(fmt.Sprintf("not found to be executed %s task,sleep 10s", taskType), zap.String("serverIP", job.ServerIP)) continue } diff --git a/dbm-services/redis/redis-dts/pkg/dtsJob/tendisplusLightningJob.go b/dbm-services/redis/redis-dts/pkg/dtsJob/tendisplusLightningJob.go new file mode 100644 index 0000000000..51f7b69c15 --- /dev/null +++ b/dbm-services/redis/redis-dts/pkg/dtsJob/tendisplusLightningJob.go @@ -0,0 +1,430 @@ +package dtsJob + +import ( + "fmt" + "runtime/debug" + "sync" + "time" + + "dbm-services/redis/redis-dts/models/mysql/tendisdb" + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/pkg/dtsTask/factory" + "dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning" + "dbm-services/redis/redis-dts/pkg/osPerf" + "dbm-services/redis/redis-dts/pkg/scrdbclient" + "dbm-services/redis/redis-dts/pkg/txycos" + + "github.com/dustin/go-humanize" + "github.com/jinzhu/gorm" + "github.com/spf13/viper" + "github.com/tencentyun/cos-go-sdk-v5" + "go.uber.org/zap" +) + +// TendisplusLightningJob tendis lightning job +type TendisplusLightningJob struct { + BkCloudID int64 `json:"bk_cloud_id"` + ServerIP string `json:"serverIP"` + ZoneName string `json:"zoneName"` + logger *zap.Logger + wg *sync.WaitGroup + cosWorker *txycos.TxyCosWoker +} + +// NewTendisplusLightningJob new +func NewTendisplusLightningJob(bkCloudID int64, serverIP, zoneName string, + logger *zap.Logger, wg *sync.WaitGroup) (job *TendisplusLightningJob) { + var err error + job = &TendisplusLightningJob{ + BkCloudID: bkCloudID, + ServerIP: serverIP, + ZoneName: zoneName, + logger: logger, + wg: wg, + } + job.cosWorker, err = txycos.NewTxyCosWoker(job.logger) + if err != nil { + panic(err) + } + return +} + +// GetRatioN_LocalDisk 最大使用是本地磁盘的几分之一 +func (job *TendisplusLightningJob) GetRatioN_LocalDisk() (ratioNOfLocalDisk uint64) { + ratioNOfLocalDisk = viper.GetUint64("maxLocalDiskDataSizeRatioNLightning") + if ratioNOfLocalDisk == 0 { + ratioNOfLocalDisk = 4 + } + return +} + +// IsDataMigrationExceedingDiskLimit 检查迁移中数据量是否超过本地磁盘限制 +func (job *TendisplusLightningJob) IsDataMigrationExceedingDiskLimit() (ok bool, + allowedMigrationDataSize int64, err error) { + var myDisk01 osPerf.HostDiskUsage + var msg string + ratioNOfLocalDisk := job.GetRatioN_LocalDisk() + myDisk01, err = osPerf.GetMyHostDisk() + if err != nil { + return + } + if myDisk01.UsageRatio > 50 { + // 如果当前已使用磁盘大于50%,则不继续给自己分配迁移任务 + msg = fmt.Sprintf("%s 磁盘使用率大于50%%,磁盘路径:%s,使用率:%d%%,stop accept tendisplus lightning dts task", + job.ServerIP, myDisk01.DirName, myDisk01.UsageRatio) + job.logger.Info(msg) + return + } + lightningMigratingTasks, lightningMigratingDataSize, err := tendisdb.LightningDtsSvrMigratingTasks( + job.BkCloudID, job.ServerIP, constvar.LightningMigratingTasksType, job.logger) + if err != nil && gorm.IsRecordNotFoundError(err) == false { + return false, 0, err + } + // '我'正在迁移中的数据量大于 本地磁盘的 1/ratioNOfLocalDisk, 则不继续给自己分配迁移任务 + if lightningMigratingDataSize > myDisk01.TotalSize/ratioNOfLocalDisk { + msg = fmt.Sprintf( + "正在迁移中的tendisplus lightning task 数据量:%s > 本地磁盘的1/%d:%s,本地磁盘大小:%s,stop accept tendisplus lightning dts task", + humanize.Bytes(lightningMigratingDataSize), + ratioNOfLocalDisk, + humanize.Bytes(myDisk01.TotalSize/ratioNOfLocalDisk), + humanize.Bytes(myDisk01.TotalSize)) + job.logger.Info(msg) + return false, 0, nil + } + allowedMigrationDataSize = int64(myDisk01.TotalSize/ratioNOfLocalDisk - lightningMigratingDataSize) + if allowedMigrationDataSize < 10*constvar.GiByte { // less than 10GB + msg = fmt.Sprintf( + "本地磁盘可用于迁移的空间:%s,tendisplus lightning 迁移中的数据量:%s,剩余可迁移数据量:%s < 10GB,stop accept tendisplus lightning dts task", + humanize.Bytes(myDisk01.TotalSize/ratioNOfLocalDisk), + humanize.Bytes(lightningMigratingDataSize), + humanize.Bytes(uint64(allowedMigrationDataSize))) + job.logger.Info(msg) + return false, allowedMigrationDataSize, nil + } + // 如果'我'上面还有2个及以上task等待做 lightningCosFileDownload,则不继续认领 + todoBackupTasks := []*tendisdb.TbTendisplusLightningTask{} + for _, task01 := range lightningMigratingTasks { + task02 := task01 + if task02.TaskType == constvar.TendisplusLightningCosFileDownload && task02.Status == 0 { + todoBackupTasks = append(todoBackupTasks, task02) + } + } + if len(todoBackupTasks) >= 2 { + job.logger.Info(fmt.Sprintf( + "tendisplus lightning 正在等待 lightningCosFileDownload 的task数量:%d>=2,stop accept tendisplus lightning dts task", + len(todoBackupTasks))) + return false, allowedMigrationDataSize, nil + } + return true, allowedMigrationDataSize, nil +} + +func (job *TendisplusLightningJob) updateTasksCosFileSize(taskRows []*tendisdb.TbTendisplusLightningTask) ( + anyRowUpdate bool, err error) { + var cosRet *cos.BucketGetResult + var rowFatherTask tendispluslightning.LightningFatherTask + var msg string + anyRowUpdate = false + for _, row := range taskRows { + taskRow := row + job.logger.Info(fmt.Sprintf("start updateTasksCosFileSize ticket_id:%d dst_cluster:%s task_id:%s", + taskRow.TicketID, taskRow.DstCluster, taskRow.TaskId)) + // 如果没有设置过,则默认为 0 + if taskRow.CosFileSize != 0 { + job.logger.Info(fmt.Sprintf("ticket_id:%d dst_cluster:%s task_id:%s cos_file_size:%d skip.....", + taskRow.TicketID, taskRow.DstCluster, taskRow.TaskId, taskRow.CosFileSize)) + continue + } + if taskRow.CosKey == "" { + job.logger.Info(fmt.Sprintf("ticket_id:%d dst_cluster:%s task_id:%s cos_key:%s skip.....", + taskRow.TicketID, taskRow.DstCluster, taskRow.TaskId, taskRow.CosKey)) + continue + } + rowFatherTask = tendispluslightning.NewLightningFatherTask(taskRow) + rowFatherTask.Logger = job.logger + cosRet, err = job.cosWorker.GetFileList(taskRow.CosKey, 100) + if err != nil { + // 更新数据库 + msg += err.Error() + "\n" + rowFatherTask.SetMessage(err.Error()) + rowFatherTask.SetStatus(-1) + rowFatherTask.UpdateRow() + continue + } + if len(cosRet.Contents) == 0 { + // 更新数据库 + err = fmt.Errorf("cos key:%s file.Contents empty records", taskRow.CosKey) + msg += err.Error() + "\n" + rowFatherTask.SetMessage(err.Error()) + rowFatherTask.SetStatus(-1) + rowFatherTask.UpdateRow() + continue + } + job.logger.Info(fmt.Sprintf("ticket_id:%d dst_cluster:%s task_id:%s start update cosFileSize:%d ...", + taskRow.TicketID, taskRow.DstCluster, taskRow.TaskId, cosRet.Contents[0].Size)) + anyRowUpdate = true + rowFatherTask.SetMessage("update fileSize ok") + rowFatherTask.SetCosFileSize(cosRet.Contents[0].Size) + rowFatherTask.SetStatus(0) + rowFatherTask.UpdateRow() + } + if msg != "" { + err = fmt.Errorf(msg) + return + } + return +} + +// ClaimDtsJobs 认领tendisplus lightning dts任务 +func (job *TendisplusLightningJob) ClaimDtsJobs() (err error) { + var diskOk bool + var allowedMigrationDataSize int64 + var toScheduleTasks []*tendisdb.TbTendisplusLightningTask + var acceptOk bool + succClaimTaskCnt := 0 + defer func() { + if r := recover(); r != nil { + job.logger.Error(string(debug.Stack())) + } + }() + for { + time.Sleep(1 * time.Minute) + job.logger.Info(fmt.Sprintf("dts_server:%s start claim tendisplus lightning dts jobs", job.ServerIP)) + // 如果dts_server在黑名单中,则不认领task + if scrdbclient.IsMyselfInBlacklist(job.logger) { + job.logger.Info( + fmt.Sprintf("dts_server:%s in dts_server blacklist,stop accept tendisplus lightning dts task", + job.ServerIP)) + continue + } + diskOk, allowedMigrationDataSize, err = job.IsDataMigrationExceedingDiskLimit() + if err != nil { + continue + } + if !diskOk { + continue + } + // 下面模块执行逻辑: + // - LightningLast30DaysToScheduleJobs 获取最近一个月待调度的Jobs(相同城市) + // - 遍历Jobs + // - lightningJobToScheduleTasks 获取每个job中所有待调度的task + // - 如果task 同时满足条件,则本节点 可调度该task: + // 1. 数据量满足 <= availDiskSize, availDiskSize = 本机磁盘1/fractionalOfLocalDisk - 本机迁移中的(tasks)的dataSize + // 2. 其他dts_server没有在 尝试认领该task + toScheduleJobs, err := tendisdb.LightningLast30DaysToScheduleJobs(job.BkCloudID, allowedMigrationDataSize, + job.ZoneName, job.logger) + if err != nil { + continue + } + if len(toScheduleJobs) == 0 { + job.logger.Info(fmt.Sprintf( + "tendisplus lightningLast30DaysToScheduleJobs empty record,剩余可迁移的数据量:%s,ZoneName:%s", + humanize.Bytes(uint64(allowedMigrationDataSize)), job.ZoneName)) + continue + } + succClaimTaskCnt = 0 + anyRowUpdate := false + for _, tmpJob := range toScheduleJobs { + jobItem := tmpJob + toScheduleTasks, err = tendisdb.LightningJobToScheduleTasks( + jobItem.TicketID, jobItem.DstCluster, job.logger) + if err != nil { + // 执行下一个Job的遍历 + continue + } + if len(toScheduleTasks) == 0 { + + continue + } + // 如果'我'更新了任何task的cosFileSize,则不再继续这个job对应task等认领和执行 + anyRowUpdate, err = job.updateTasksCosFileSize(toScheduleTasks) + if err != nil { + continue + } + if anyRowUpdate { + continue + } + for _, tmpTask := range toScheduleTasks { + taskItem := tmpTask + if allowedMigrationDataSize < 10*constvar.GiByte { + // 如果可用空间小于10GB,则不再继续 + break + } + if taskItem.CosFileSize > allowedMigrationDataSize { + // 数据量过大,遍历job的下一个task + continue + } + // 尝试认领task + acceptOk, err = job.TryAcceptTask(taskItem) + if err != nil { + continue + } + if !acceptOk { + continue + } + allowedMigrationDataSize = allowedMigrationDataSize - taskItem.CosFileSize + succClaimTaskCnt++ + // 如果认领的task个数 超过 backup limit,则等待下一次调度 + if succClaimTaskCnt > job.GetTaskParallelLimit(constvar.TendisplusLightningCosFileDownload) { + break + } + } + if err != nil { + // 执行下一个job的遍历 + continue + } + // 如果认领的task个数 超过 cosfiledownload limit,则等待下一次调度 + if succClaimTaskCnt > job.GetTaskParallelLimit(constvar.TendisplusLightningCosFileDownload) { + break + } + } + } +} + +func (job *TendisplusLightningJob) getFirstTaskType() (taskType string) { + return constvar.TendisplusLightningCosFileDownload +} + +// GetTaskParallelLimit concurrency for task +func (job *TendisplusLightningJob) GetTaskParallelLimit(taskType string) int { + limit := viper.GetInt(taskType + "ParallelLimit") + if limit == 0 { + limit = 5 // 默认值5 + } + return limit +} + +// TryAcceptTask 上锁,尝试认领任务 +func (job *TendisplusLightningJob) TryAcceptTask(taskRow *tendisdb.TbTendisplusLightningTask) (succ bool, err error) { + var lockOK bool + scrCli, err := scrdbclient.NewClient(viper.GetString("serviceName"), job.logger) + if err != nil { + return false, err + } + // 获取锁,尝试认领该task + lockOK, err = scrCli.DtsLockKey(taskRow.TaskLockKey(), job.ServerIP, 120) + if err != nil { + return false, err + } + if !lockOK { + // 已经有其他dtsserver在尝试认领该task,遍历下一个task + job.logger.Info(fmt.Sprintf( + `taskId:%s ticketID:%d dstCluster:%s 已经有其他dts_server在调度,放弃调度`, + taskRow.TaskId, taskRow.TicketID, taskRow.DstCluster)) + return false, nil + } + job.logger.Info(fmt.Sprintf("myself:%s get task dts lock ok,key:%s", job.ServerIP, taskRow.TaskLockKey())) + // 尝试认领task成功 + job.logger.Info(fmt.Sprintf( + `myself:%s 认领task,下一步开始迁移,taskId:%s ticketID:%d dstCluster:%s`, + job.ServerIP, taskRow.TaskId, taskRow.TicketID, taskRow.DstCluster)) + taskRow.DtsServer = job.ServerIP + taskRow.TaskType = job.getFirstTaskType() + taskRow.Status = 0 + taskRow.UpdateFieldsValues([]string{"DtsServer", "TaskType", "Status"}, job.logger) + return true, nil +} + +// BgDtsTaskRunnerWithConcurrency 执行子task,限制并发度,如backup、tredisdump等task任务 +// 如拉起5个goroutine执行 backup tasks, 拉起 5个goroutine执行 tredisdump tasks +func (job *TendisplusLightningJob) BgDtsTaskRunnerWithConcurrency(taskType string) { + var err error + wg := sync.WaitGroup{} + genChan := make(chan *tendisdb.TbTendisplusLightningTask) + limit := job.GetTaskParallelLimit(taskType) + status := 0 + perTaskNum := 5 + + for worker := 0; worker < limit; worker++ { + wg.Add(1) + go func() { + defer wg.Done() + defer func() { + if r := recover(); r != nil { + job.logger.Error(string(debug.Stack())) + } + }() + for oldRow := range genChan { + // 可能在等待调度过程中row01数据已经改变,所以重新获取数据 + latestRow, err := tendisdb.LightningTaskByID(oldRow.TaskId, job.logger) + if err != nil { + latestRow = oldRow + } + if latestRow == nil { + job.logger.Warn(fmt.Sprintf("根据task_id:%s获取task row失败,taskRow:%v", oldRow.TaskId, latestRow)) + continue + } + if latestRow.Status != 0 || latestRow.TaskType != taskType { + job.logger.Info(fmt.Sprintf("task_id:%s status=%d taskType=%s. 期待的taskType:%s 已经在运行中,不做任何处理", + latestRow.TaskId, latestRow.Status, latestRow.TaskType, taskType)) + continue + } + task01 := factory.MyTendisplusLightningTaskFactory(latestRow) + task01.Init() // 执行Init,成功则status=1,失败则status=-1 + task01.Execute() + } + }() + } + go func() { + defer close(genChan) + var toExecuteRows []*tendisdb.TbTendisplusLightningTask + for { + if !tendisdb.IsAllLightningTasksToForceKill(toExecuteRows) { + // 如果所有dts tasks都是 ForceKillTaskTodo 状态,则大概率该dts job用户已强制终止, 无需sleep + // 否则 sleep 10s + time.Sleep(10 * time.Second) + } + toExecuteRows, err = tendisdb.LightningLast30DaysToExecuteTasks(job.BkCloudID, job.ServerIP, taskType, + status, perTaskNum, job.logger) + if err != nil { + continue + } + if len(toExecuteRows) == 0 { + job.logger.Info(fmt.Sprintf("serverIP:%s not found to be executed %s task,sleep 10s", job.ServerIP, taskType)) + continue + } + for _, row := range toExecuteRows { + toDoRow := row + // 将task放入channel,等待消费者goroutine真正处理 + genChan <- toDoRow + } + } + }() + wg.Wait() +} + +// StartBgWorkers 拉起多个后台goroutine +func (job *TendisplusLightningJob) StartBgWorkers() { + // tendisplus lightning job + // 在tasks被认领后,后台负责执行task的worker + go func() { + job.wg.Add(1) + defer job.wg.Done() + job.BgDtsTaskRunnerWithConcurrency(constvar.TendisplusLightningCosFileDownload) + }() + go func() { + job.wg.Add(1) + defer job.wg.Done() + job.BgDtsTaskRunnerWithConcurrency(constvar.TendisplusLightningFileSplit) + }() + go func() { + job.wg.Add(1) + defer job.wg.Done() + job.BgDtsTaskRunnerWithConcurrency(constvar.TendisplusLightningGenerateSst) + }() + go func() { + job.wg.Add(1) + defer job.wg.Done() + job.BgDtsTaskRunnerWithConcurrency(constvar.TendisplusLightningScpSst) + }() + go func() { + job.wg.Add(1) + defer job.wg.Done() + job.BgDtsTaskRunnerWithConcurrency(constvar.TendisplusLightningSlaveLoadSst) + }() + // 根据dts_server自身情况尝试认领 task + go func() { + job.wg.Add(1) + defer job.wg.Done() + job.ClaimDtsJobs() + }() +} diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/factory/factory.go b/dbm-services/redis/redis-dts/pkg/dtsTask/factory/factory.go index a983953def..67ea443578 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/factory/factory.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/factory/factory.go @@ -6,6 +6,7 @@ import ( "dbm-services/redis/redis-dts/pkg/constvar" "dbm-services/redis/redis-dts/pkg/dtsTask/rediscache" "dbm-services/redis/redis-dts/pkg/dtsTask/tendisplus" + "dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning" "dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd" ) @@ -17,8 +18,8 @@ type MyTasker interface { Execute() } -// MyTaskFactory task工厂 -func MyTaskFactory(taskRow *tendisdb.TbTendisDTSTask) MyTasker { +// MyTendisDtsTaskFactory task工厂 +func MyTendisDtsTaskFactory(taskRow *tendisdb.TbTendisDTSTask) MyTasker { if taskRow.TaskType == (&tendisssd.TendisBackupTask{}).TaskType() { // tendis-ssd return tendisssd.NewTendisBackupTask(taskRow) @@ -44,3 +45,19 @@ func MyTaskFactory(taskRow *tendisdb.TbTendisDTSTask) MyTasker { } return nil } + +// MyTendisplusLightningTaskFactory task工厂 +func MyTendisplusLightningTaskFactory(taskRow *tendisdb.TbTendisplusLightningTask) MyTasker { + if taskRow.TaskType == (&tendispluslightning.CosFileDownloadTask{}).TaskType() { + return tendispluslightning.NewCosFileDownloadTask(taskRow) + } else if taskRow.TaskType == (&tendispluslightning.FileSplitTask{}).TaskType() { + return tendispluslightning.NewFileSplitTask(taskRow) + } else if taskRow.TaskType == (&tendispluslightning.GenerateSstTask{}).TaskType() { + return tendispluslightning.NewGenerateSstTask(taskRow) + } else if taskRow.TaskType == (&tendispluslightning.ScpSstTask{}).TaskType() { + return tendispluslightning.NewScpSstTask(taskRow) + } else if taskRow.TaskType == (&tendispluslightning.SlaveLoadSstTask{}).TaskType() { + return tendispluslightning.NewSlaveLoadSstTask(taskRow) + } + return nil +} diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/init.go b/dbm-services/redis/redis-dts/pkg/dtsTask/init.go index e689b810ab..cfcb01a3d8 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/init.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/init.go @@ -23,8 +23,8 @@ import ( "go.uber.org/zap" ) -// FatherTask 迁移父task -type FatherTask struct { +// TendisDtsFatherTask 迁移父task +type TendisDtsFatherTask struct { RowData *tendisdb.TbTendisDTSTask `json:"rowData"` valueChangedFields []string // 值已变化的字段名 TaskDir string `json:"taskDir"` @@ -32,27 +32,27 @@ type FatherTask struct { Err error `json:"-"` } -// NewFatherTask 新建tredisdump task -func NewFatherTask(row *tendisdb.TbTendisDTSTask) FatherTask { - ret := FatherTask{} +// NewDtsFatherTask 新建tredisdump task +func NewDtsFatherTask(row *tendisdb.TbTendisDTSTask) TendisDtsFatherTask { + ret := TendisDtsFatherTask{} ret.RowData = row return ret } // SetStatus 设置status的值 -func (t *FatherTask) SetStatus(status int) { +func (t *TendisDtsFatherTask) SetStatus(status int) { t.RowData.Status = status t.valueChangedFields = append(t.valueChangedFields, "Status") } // SetTaskType 设置task_type的值 -func (t *FatherTask) SetTaskType(taskType string) { +func (t *TendisDtsFatherTask) SetTaskType(taskType string) { t.RowData.TaskType = taskType t.valueChangedFields = append(t.valueChangedFields, "TaskType") } // SetMessage 设置message的值 -func (t *FatherTask) SetMessage(format string, args ...interface{}) { +func (t *TendisDtsFatherTask) SetMessage(format string, args ...interface{}) { if len(args) == 0 { t.RowData.Message = format } else { @@ -62,92 +62,92 @@ func (t *FatherTask) SetMessage(format string, args ...interface{}) { } // SetFetchFile set function -func (t *FatherTask) SetFetchFile(file string) { +func (t *TendisDtsFatherTask) SetFetchFile(file string) { t.RowData.FetchFile = file t.valueChangedFields = append(t.valueChangedFields, "FetchFile") } // SetSqlfileDir set function -func (t *FatherTask) SetSqlfileDir(dir string) { +func (t *TendisDtsFatherTask) SetSqlfileDir(dir string) { t.RowData.SqlfileDir = dir t.valueChangedFields = append(t.valueChangedFields, "SqlfileDir") } // SetSyncOperate set function -func (t *FatherTask) SetSyncOperate(op string) { +func (t *TendisDtsFatherTask) SetSyncOperate(op string) { t.RowData.SyncOperate = op t.valueChangedFields = append(t.valueChangedFields, "SyncOperate") } // SetTendisBinlogLag set function -func (t *FatherTask) SetTendisBinlogLag(lag int64) { +func (t *TendisDtsFatherTask) SetTendisBinlogLag(lag int64) { t.RowData.TendisBinlogLag = lag t.valueChangedFields = append(t.valueChangedFields, "TendisBinlogLag") } // SetSrcNewLogCount set function -func (t *FatherTask) SetSrcNewLogCount(logcnt int64) { +func (t *TendisDtsFatherTask) SetSrcNewLogCount(logcnt int64) { t.RowData.SrcNewLogCount = logcnt t.valueChangedFields = append(t.valueChangedFields, "SrcNewLogCount") } // SetSrcOldLogCount set function -func (t *FatherTask) SetSrcOldLogCount(logcnt int64) { +func (t *TendisDtsFatherTask) SetSrcOldLogCount(logcnt int64) { t.RowData.SrcOldLogCount = logcnt t.valueChangedFields = append(t.valueChangedFields, "SrcOldLogCount") } // SetIsSrcLogCountRestored set function -func (t *FatherTask) SetIsSrcLogCountRestored(isRestored int) { +func (t *TendisDtsFatherTask) SetIsSrcLogCountRestored(isRestored int) { t.RowData.IsSrcLogCountRestored = isRestored t.valueChangedFields = append(t.valueChangedFields, "IsSrcLogCountRestored") } // SetIgnoreErrlist set function -func (t *FatherTask) SetIgnoreErrlist(errlist string) { +func (t *TendisDtsFatherTask) SetIgnoreErrlist(errlist string) { t.RowData.IgnoreErrlist = errlist t.valueChangedFields = append(t.valueChangedFields, "IgnoreErrlist") } // SetSyncerPort set function -func (t *FatherTask) SetSyncerPort(syncport int) { +func (t *TendisDtsFatherTask) SetSyncerPort(syncport int) { t.RowData.SyncerPort = syncport t.valueChangedFields = append(t.valueChangedFields, "SyncerPort") } // SetSyncerPid set function -func (t *FatherTask) SetSyncerPid(syncpid int) { +func (t *TendisDtsFatherTask) SetSyncerPid(syncpid int) { t.RowData.SyncerPid = syncpid t.valueChangedFields = append(t.valueChangedFields, "SyncerPid") } // SetSrcHaveListKeys set function -func (t *FatherTask) SetSrcHaveListKeys(havelist int) { +func (t *TendisDtsFatherTask) SetSrcHaveListKeys(havelist int) { t.RowData.SrcHaveListKeys = havelist t.valueChangedFields = append(t.valueChangedFields, "SrcHaveListKeys") } // SetTendisbackupFile set function -func (t *FatherTask) SetTendisbackupFile(file string) { +func (t *TendisDtsFatherTask) SetTendisbackupFile(file string) { t.RowData.TendisbackupFile = file t.valueChangedFields = append(t.valueChangedFields, "TendisbackupFile") } // SetDtsServer set function -func (t *FatherTask) SetDtsServer(svrip string) { +func (t *TendisDtsFatherTask) SetDtsServer(svrip string) { t.RowData.DtsServer = svrip t.valueChangedFields = append(t.valueChangedFields, "DtsServer") } // UpdateDbAndLogLocal update db相关字段 并记录本地日志 -func (t *FatherTask) UpdateDbAndLogLocal(format string, args ...interface{}) { +func (t *TendisDtsFatherTask) UpdateDbAndLogLocal(format string, args ...interface{}) { t.SetMessage(format, args...) t.UpdateRow() t.Logger.Info(t.RowData.Message) } // UpdateRow update tendisdb相关字段(值变化了的字段) -func (t *FatherTask) UpdateRow() { +func (t *TendisDtsFatherTask) UpdateRow() { if len(t.valueChangedFields) == 0 { return } @@ -156,7 +156,7 @@ func (t *FatherTask) UpdateRow() { } // Init 初始化 -func (t *FatherTask) Init() { +func (t *TendisDtsFatherTask) Init() { defer func() { if t.Err != nil { t.SetStatus(-1) @@ -178,7 +178,7 @@ func (t *FatherTask) Init() { } // InitTaskDir 初始化本地任务目录 -func (t *FatherTask) InitTaskDir() error { +func (t *TendisDtsFatherTask) InitTaskDir() error { currExecPath, err := util.CurrentExecutePath() if err != nil { return err @@ -195,7 +195,7 @@ func (t *FatherTask) InitTaskDir() error { } // InitLogger 初始化日志文件logger -func (t *FatherTask) InitLogger() error { +func (t *TendisDtsFatherTask) InitLogger() error { err := t.InitTaskDir() if err != nil { return nil @@ -213,7 +213,7 @@ func (t *FatherTask) InitLogger() error { } // IsSupportPipeImport 是否支持 redis-cli --pipe < $file 导入 -func (t *FatherTask) IsSupportPipeImport() bool { +func (t *TendisDtsFatherTask) IsSupportPipeImport() bool { // if strings.HasPrefix(t.RowData.DstCluster, "tendisx") || constvar.IsGlobalEnv() == true { // return true // } @@ -221,7 +221,7 @@ func (t *FatherTask) IsSupportPipeImport() bool { } // TredisdumpOuputFormat tredisdump结果文件内容格式,resp格式 或 普通命令格式 -func (t *FatherTask) TredisdumpOuputFormat() string { +func (t *TendisDtsFatherTask) TredisdumpOuputFormat() string { if t.IsSupportPipeImport() { return constvar.TredisdumpRespFormat } @@ -229,7 +229,7 @@ func (t *FatherTask) TredisdumpOuputFormat() string { } // TredisdumpOuputFileSize tredisdump结果文件大小 -func (t *FatherTask) TredisdumpOuputFileSize() uint64 { +func (t *TendisDtsFatherTask) TredisdumpOuputFileSize() uint64 { var fileSize uint64 = 0 var sizeStr string if t.IsSupportPipeImport() { @@ -253,7 +253,7 @@ func (t *FatherTask) TredisdumpOuputFileSize() uint64 { } // ImportParallelLimit 导入并发度 -func (t *FatherTask) ImportParallelLimit() int { +func (t *TendisDtsFatherTask) ImportParallelLimit() int { limit := 0 if t.IsSupportPipeImport() { limit = viper.GetInt("respFileImportParallelLimit") @@ -274,7 +274,7 @@ func (t *FatherTask) ImportParallelLimit() int { } // ImportTimeout 导入并发度 -func (t *FatherTask) ImportTimeout() int { +func (t *TendisDtsFatherTask) ImportTimeout() int { timeout := 0 if t.IsSupportPipeImport() { timeout = viper.GetInt("respFileImportTimeout") @@ -294,7 +294,7 @@ func (t *FatherTask) ImportTimeout() int { return timeout } -func (t *FatherTask) newSrcRedisClient() *myredis.RedisWorker { +func (t *TendisDtsFatherTask) newSrcRedisClient() *myredis.RedisWorker { srcAddr := fmt.Sprintf("%s:%d", t.RowData.SrcIP, t.RowData.SrcPort) srcPasswd, err := base64.StdEncoding.DecodeString(t.RowData.SrcPassword) if err != nil { @@ -312,7 +312,7 @@ func (t *FatherTask) newSrcRedisClient() *myredis.RedisWorker { } // SaveSrcSSDKeepCount 保存source ssd的 slave-log-keep-count值 -func (t *FatherTask) SaveSrcSSDKeepCount() { +func (t *TendisDtsFatherTask) SaveSrcSSDKeepCount() { var logcnt int64 srcClient := t.newSrcRedisClient() if t.Err != nil { @@ -341,7 +341,7 @@ func (t *FatherTask) SaveSrcSSDKeepCount() { } // RestoreSrcSSDKeepCount 恢复source ssd的 slave-log-keep-count值 -func (t *FatherTask) RestoreSrcSSDKeepCount() { +func (t *TendisDtsFatherTask) RestoreSrcSSDKeepCount() { srcClient := t.newSrcRedisClient() if t.Err != nil { return @@ -359,7 +359,7 @@ func (t *FatherTask) RestoreSrcSSDKeepCount() { } // ChangeSrcSSDKeepCount 修改source ssd的 slave-log-keep-count值 -func (t *FatherTask) ChangeSrcSSDKeepCount(dstKeepCount int64) { +func (t *TendisDtsFatherTask) ChangeSrcSSDKeepCount(dstKeepCount int64) { srcClient := t.newSrcRedisClient() if t.Err != nil { return @@ -374,7 +374,7 @@ func (t *FatherTask) ChangeSrcSSDKeepCount(dstKeepCount int64) { } // GetSyncSeqFromFullBackup get sync pos from full backup -func (t *FatherTask) GetSyncSeqFromFullBackup() (ret *SyncSeqItem) { +func (t *TendisDtsFatherTask) GetSyncSeqFromFullBackup() (ret *SyncSeqItem) { var err error ret = &SyncSeqItem{} syncPosFile := filepath.Join(t.RowData.SqlfileDir, "sync-pos.txt") @@ -411,7 +411,7 @@ func (t *FatherTask) GetSyncSeqFromFullBackup() (ret *SyncSeqItem) { } // ConfirmSrcRedisBinlogOK confirm binlog seq is OK in src redis -func (t *FatherTask) ConfirmSrcRedisBinlogOK(seq uint64) { +func (t *TendisDtsFatherTask) ConfirmSrcRedisBinlogOK(seq uint64) { srcAddr := fmt.Sprintf("%s:%d", t.RowData.SrcIP, t.RowData.SrcPort) srcPasswd, err := base64.StdEncoding.DecodeString(t.RowData.SrcPassword) if err != nil { @@ -450,7 +450,7 @@ func (t *FatherTask) ConfirmSrcRedisBinlogOK(seq uint64) { } // ClearSrcHostBackup clear src redis remote backup -func (t *FatherTask) ClearSrcHostBackup() { +func (t *TendisDtsFatherTask) ClearSrcHostBackup() { if strings.Contains(t.RowData.TendisbackupFile, "REDIS_FULL_rocksdb_") == false { return } @@ -484,7 +484,7 @@ func (t *FatherTask) ClearSrcHostBackup() { } // ClearLocalFetchBackup clear src redis local backup -func (t *FatherTask) ClearLocalFetchBackup() { +func (t *TendisDtsFatherTask) ClearLocalFetchBackup() { srcAddr := fmt.Sprintf("%s_%d", t.RowData.SrcIP, t.RowData.SrcPort) if strings.Contains(t.RowData.FetchFile, srcAddr) == false { // fetchFile 必须包含 srcAddr,否则不确定传入的是什么参数,对未知目录 rm -rf 很危险 @@ -502,7 +502,7 @@ func (t *FatherTask) ClearLocalFetchBackup() { } // ClearLocalSQLDir clear local sql dir(backup to commands) -func (t *FatherTask) ClearLocalSQLDir() { +func (t *TendisDtsFatherTask) ClearLocalSQLDir() { srcAddr := fmt.Sprintf("%s_%d", t.RowData.SrcIP, t.RowData.SrcPort) if strings.Contains(t.RowData.SqlfileDir, srcAddr) == false { // fetchFile 必须包含 srcAddr,否则不确定传入的是什么参数,对未知目录 rm -rf 很危险 @@ -521,7 +521,7 @@ func (t *FatherTask) ClearLocalSQLDir() { // DealProcessPid 处理进程id; // 如用户发送 ForceKillTaskTodo '强制终止' 指令,则tredisdump、redis-cli等命令均执行kill操作 -func (t *FatherTask) DealProcessPid(pid int) error { +func (t *TendisDtsFatherTask) DealProcessPid(pid int) error { go func(pid01 int) { bakTaskType := t.RowData.TaskType bakStatus := t.RowData.Status @@ -574,7 +574,7 @@ func (t *FatherTask) DealProcessPid(pid int) error { } // TredisdumpThreadCnt get tredisdump threadcnt -func (t *FatherTask) TredisdumpThreadCnt() int { +func (t *TendisDtsFatherTask) TredisdumpThreadCnt() int { threadCnt := viper.GetInt("tredisdumpTheadCnt") if threadCnt <= 0 { threadCnt = 10 // default 10 @@ -585,7 +585,7 @@ func (t *FatherTask) TredisdumpThreadCnt() int { } // SaveIgnoreErrs 记录忽略的错误类型 -func (t *FatherTask) SaveIgnoreErrs(igErrs []string) { +func (t *TendisDtsFatherTask) SaveIgnoreErrs(igErrs []string) { isUpdated := false for _, igErr := range igErrs { if strings.Contains(t.RowData.IgnoreErrlist, igErr) == false { @@ -603,12 +603,12 @@ func (t *FatherTask) SaveIgnoreErrs(igErrs []string) { } // IsMatchAny is match all -func (t *FatherTask) IsMatchAny(reg01 string) bool { +func (t *TendisDtsFatherTask) IsMatchAny(reg01 string) bool { return reg01 == "*" || reg01 == ".*" || reg01 == "^.*$" } // RefreshRowData refresh task row data -func (task *FatherTask) RefreshRowData() { +func (task *TendisDtsFatherTask) RefreshRowData() { row01, err := tendisdb.GetTaskByID(task.RowData.ID, task.Logger) if err != nil { task.Err = err @@ -623,12 +623,12 @@ func (task *FatherTask) RefreshRowData() { } // GetSrcRedisAddr 源redis_addr -func (task *FatherTask) GetSrcRedisAddr() string { +func (task *TendisDtsFatherTask) GetSrcRedisAddr() string { return task.RowData.SrcIP + ":" + strconv.Itoa(task.RowData.SrcPort) } // GetSrcRedisPasswd 源redis_password -func (task *FatherTask) GetSrcRedisPasswd() string { +func (task *TendisDtsFatherTask) GetSrcRedisPasswd() string { srcPasswd, err := base64.StdEncoding.DecodeString(task.RowData.SrcPassword) if err != nil { task.Err = fmt.Errorf("decode srcPassword fail,err:%v,taskid:%d", err, task.RowData.ID) @@ -639,12 +639,12 @@ func (task *FatherTask) GetSrcRedisPasswd() string { } // GetDstRedisAddr 目的redis_addr -func (task *FatherTask) GetDstRedisAddr() string { +func (task *TendisDtsFatherTask) GetDstRedisAddr() string { return task.RowData.DstCluster } // GetDstRedisPasswd 目的redis_password -func (task *FatherTask) GetDstRedisPasswd() string { +func (task *TendisDtsFatherTask) GetDstRedisPasswd() string { dstPasswd, err := base64.StdEncoding.DecodeString(task.RowData.DstPassword) if err != nil { task.Err = fmt.Errorf("decode DstPassword fail,err:%v,taskid:%d", err, task.RowData.ID) @@ -655,7 +655,7 @@ func (task *FatherTask) GetDstRedisPasswd() string { } // DisableDstClusterSlowlog dst cluster 'config set slowlog-log-slower-than -1' -func (task *FatherTask) DisableDstClusterSlowlog() { +func (task *TendisDtsFatherTask) DisableDstClusterSlowlog() { dstProxyAddrs, err := util.LookupDbDNSIPs(task.RowData.DstCluster) if err != nil { task.Logger.Error(err.Error()) @@ -682,7 +682,7 @@ func (task *FatherTask) DisableDstClusterSlowlog() { } // EnableDstClusterSlowlog dst cluster 'config set slowlog-log-slower-than 100000' -func (task *FatherTask) EnableDstClusterSlowlog() { +func (task *TendisDtsFatherTask) EnableDstClusterSlowlog() { dstProxyAddrs, err := util.LookupDbDNSIPs(task.RowData.DstCluster) if err != nil { task.Logger.Error(err.Error()) diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/rediscache/makeCacheSync.go b/dbm-services/redis/redis-dts/pkg/dtsTask/rediscache/makeCacheSync.go index 3f0acb6801..fa6fc584ea 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/rediscache/makeCacheSync.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/rediscache/makeCacheSync.go @@ -35,7 +35,7 @@ const ( // MakeCacheSyncTask cache_task type MakeCacheSyncTask struct { - dtsTask.FatherTask + dtsTask.TendisDtsFatherTask RedisShakeBin string `json:"redisSahkeBin"` ShakeLogFile string `json:"shakeLogFile"` ShakeConfFile string `json:"shakeConfFile"` @@ -61,7 +61,7 @@ func (task *MakeCacheSyncTask) NextTask() string { // NewMakeCacheSyncTask 新建一个 RedisShake启动task func NewMakeCacheSyncTask(row *tendisdb.TbTendisDTSTask) *MakeCacheSyncTask { return &MakeCacheSyncTask{ - FatherTask: dtsTask.NewFatherTask(row), + TendisDtsFatherTask: dtsTask.NewDtsFatherTask(row), } } diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisplus/makeSync.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisplus/makeSync.go index 064b0e27e2..89d50da6d6 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisplus/makeSync.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisplus/makeSync.go @@ -22,7 +22,7 @@ import ( // MakeSyncTask 启动redis-sync type MakeSyncTask struct { - dtsTask.FatherTask + dtsTask.TendisDtsFatherTask RedisCliTool string `json:"redisCliTool"` RedisSyncTool string `json:"redisSyncTool"` SyncLogFile string `json:"syncLogFile"` @@ -43,7 +43,7 @@ func (task *MakeSyncTask) NextTask() string { // NewMakeSyncTask 新建一个 redis-sync启动task func NewMakeSyncTask(row *tendisdb.TbTendisDTSTask) *MakeSyncTask { return &MakeSyncTask{ - FatherTask: dtsTask.NewFatherTask(row), + TendisDtsFatherTask: dtsTask.NewDtsFatherTask(row), } } diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/fatherTask.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/fatherTask.go new file mode 100644 index 0000000000..eebd510574 --- /dev/null +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/fatherTask.go @@ -0,0 +1,203 @@ +package tendispluslightning + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "path/filepath" + "strings" + + "go.uber.org/zap" + + "dbm-services/redis/redis-dts/models/mysql/tendisdb" + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/tclog" + "dbm-services/redis/redis-dts/util" +) + +// LightningFatherTask lightning father task +type LightningFatherTask struct { + RowData *tendisdb.TbTendisplusLightningTask `json:"rowData"` + valueChangedFields []string // 值已变化的字段名 + TaskDir string `json:"taskDir"` + Logger *zap.Logger `json:"-"` + Err error `json:"-"` +} + +// NewLightningFatherTask new lightning father task +func NewLightningFatherTask(rowData *tendisdb.TbTendisplusLightningTask) LightningFatherTask { + return LightningFatherTask{ + RowData: rowData, + valueChangedFields: []string{}, + } +} + +// SetStatus 设置status的值 +func (t *LightningFatherTask) SetStatus(status int) { + t.RowData.Status = status + t.valueChangedFields = append(t.valueChangedFields, "Status") +} + +// SetCosFileSize 设置cosFileSize的值 +func (task *LightningFatherTask) SetCosFileSize(cosFileSize int64) { + task.RowData.CosFileSize = cosFileSize + task.valueChangedFields = append(task.valueChangedFields, "CosFileSize") +} + +// SetMessage 设置message的值 +func (t *LightningFatherTask) SetMessage(format string, args ...interface{}) { + if len(args) == 0 { + t.RowData.Message = format + } else { + t.RowData.Message = fmt.Sprintf(format, args...) + } + t.valueChangedFields = append(t.valueChangedFields, "Message") +} + +// SetOperateType set operateType +func (t *LightningFatherTask) SetOperateType(op string) { + t.RowData.OperateType = op + t.valueChangedFields = append(t.valueChangedFields, "OperateType") +} + +// SetTaskType 设置task_type的值 +func (t *LightningFatherTask) SetTaskType(taskType string) { + t.RowData.TaskType = taskType + t.valueChangedFields = append(t.valueChangedFields, "TaskType") +} + +// UpdateRow update tendisdb相关字段(值变化了的字段) +func (t *LightningFatherTask) UpdateRow() { + if len(t.valueChangedFields) == 0 { + return + } + t.RowData.UpdateFieldsValues(t.valueChangedFields, t.Logger) + t.valueChangedFields = []string{} +} + +// Init 初始化 +func (t *LightningFatherTask) Init() { + defer func() { + if t.Err != nil { + t.SetStatus(-1) + t.SetMessage(t.Err.Error()) + } else { + t.SetStatus(1) // 更新为running状态 + } + t.UpdateRow() + }() + t.Err = t.InitLogger() + if t.Err != nil { + return + } + if t.RowData.OperateType == constvar.RedisForceKillTaskTodo { + t.RowData.OperateType = constvar.RedisForceKillTaskSuccess + t.Err = fmt.Errorf(constvar.RedisForceKillTaskSuccess + "...") + return + } +} + +// InitTaskDir 初始化本地任务目录 +func (t *LightningFatherTask) InitTaskDir() error { + currExecPath, err := util.CurrentExecutePath() + if err != nil { + return err + } + domainPort := strings.Split(t.RowData.DstCluster, ":") + subDir := fmt.Sprintf("tasks/%d_%s_%s/%s", t.RowData.TicketID, + domainPort[0], domainPort[1], t.RowData.TaskId) + t.TaskDir = filepath.Join(currExecPath, subDir) + err = util.MkDirIfNotExists(t.TaskDir) + if err != nil { + return err + } + return nil +} + +// InitLogger 初始化日志文件logger +func (t *LightningFatherTask) InitLogger() error { + err := t.InitTaskDir() + if err != nil { + return nil + } + logFile := fmt.Sprintf("task_%s.log", t.RowData.TaskId) + fullPath := filepath.Join(t.TaskDir, logFile) + t.Logger = tclog.NewFileLogger(fullPath) + return nil +} + +// GetLocalCosFile 获取本地cos文件保存路径 +func (t *LightningFatherTask) GetLocalCosFile() string { + return t.RowData.TaskId + ".cos_binary" +} + +// GetSplitOutputDir 获取split输出目录 +func (t *LightningFatherTask) GetSplitOutputDir() string { + return "split_outout_dir" +} + +// GetSlaveNodeSstDir 获取slave节点sst文件保存目录 +func (t *LightningFatherTask) GetSlaveNodeSstDir(slaveIP, slavePort string) string { + // return filepath.Join("slave_nodes_sst_dir", slaveIP+"_"+slavePort) + return "slave_nodes_sst_dir_" + slaveIP + "_" + slavePort +} + +type clusterNodeItem struct { + MasterAddr string `json:"master_addr"` + SlaveAddr string `json:"slave_addr"` + Slots string `json:"slots"` + RedisPasswordEncode string `json:"redis_password_encode"` + RedisPassword string `json:"redis_password"` +} + +// GetSlaveIpPort 获取slave的ip和port +func (c *clusterNodeItem) GetSlaveIpPort() (ip, port string, err error) { + if c.SlaveAddr == "" { + err = fmt.Errorf("slave_addr is empty,data:%s", util.ToString(c)) + return + } + tempList := strings.Split(c.SlaveAddr, ":") + if len(tempList) != 2 { + err = fmt.Errorf("slave_addr:%s format error,data:%s", c.SlaveAddr, util.ToString(c)) + return + } + ip = tempList[0] + port = tempList[1] + return +} + +// GetDstClusterNodes 获取目标集群 cluster nodes信息 +func (task *LightningFatherTask) GetDstClusterNodes() (clusterNodes []*clusterNodeItem) { + var lightningJobsRows []*tendisdb.TbTendisplusLightningJob + var passwordDecode []byte + lightningJobsRows, task.Err = tendisdb.GetLightningJob(task.RowData.TicketID, task.RowData.DstCluster, task.Logger) + if task.Err != nil { + return + } + if len(lightningJobsRows) == 0 { + task.Err = fmt.Errorf("获取ticket_id:%d dst_cluster:%s cluster nodes信息为空", + task.RowData.TicketID, + task.RowData.DstCluster) + task.Logger.Error(task.Err.Error()) + return + } + task.Err = json.Unmarshal([]byte(lightningJobsRows[0].ClusterNodes), &clusterNodes) + if task.Err != nil { + task.Err = fmt.Errorf("unmarshal clusterNodes:%s fail,err:%v", lightningJobsRows[0].ClusterNodes, task.Err) + task.Logger.Error(task.Err.Error()) + return + } + for _, tmp := range clusterNodes { + clusterNode := tmp + passwordDecode, task.Err = base64.StdEncoding.DecodeString(clusterNode.RedisPasswordEncode) + if task.Err != nil { + task.Err = fmt.Errorf("base64 decode redis password:%s fail,err:%v", clusterNode.RedisPasswordEncode, task.Err) + task.Logger.Error(task.Err.Error()) + return + } + clusterNode.RedisPassword = string(passwordDecode) + task.Logger.Info(fmt.Sprintf("slave_addr:%s encodePassword:%s password:%s", + clusterNode.SlaveAddr, clusterNode.RedisPasswordEncode, clusterNode.RedisPassword)) + } + return +} diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step1CosFileDownload.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step1CosFileDownload.go new file mode 100644 index 0000000000..02a13210cd --- /dev/null +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step1CosFileDownload.go @@ -0,0 +1,94 @@ +// Package tendispluslightning TODO +package tendispluslightning + +import ( + "fmt" + "path/filepath" + "time" + + "dbm-services/redis/redis-dts/models/mysql/tendisdb" + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/pkg/txycos" + "dbm-services/redis/redis-dts/util" +) + +// CosFileDownloadTask TODO +type CosFileDownloadTask struct { + LightningFatherTask + cosWorker *txycos.TxyCosWoker +} + +// NewCosFileDownloadTask cos 文件下载task +func NewCosFileDownloadTask(row *tendisdb.TbTendisplusLightningTask) *CosFileDownloadTask { + return &CosFileDownloadTask{ + LightningFatherTask: NewLightningFatherTask(row), + } +} + +// TaskType task类型 +func (task *CosFileDownloadTask) TaskType() string { + return constvar.TendisplusLightningCosFileDownload +} + +// NextTask 下一个task类型 +func (task *CosFileDownloadTask) NextTask() string { + return constvar.TendisplusLightningFileSplit +} + +func (task *CosFileDownloadTask) initTxyCos() { + if task.cosWorker == nil { + task.cosWorker, task.Err = txycos.NewTxyCosWoker(task.Logger) + } +} + +// Execute TODO +func (task *CosFileDownloadTask) Execute() { + if task.Err != nil { + return + } + defer func() { + if task.Err != nil { + task.SetStatus(-1) + task.SetMessage(task.Err.Error()) + task.UpdateRow() + } else { + task.SetTaskType(task.NextTask()) + task.SetStatus(0) + task.SetMessage("等待执行cos文件split") + task.UpdateRow() + } + }() + + task.SetStatus(1) + task.SetMessage("开始下载cos中文件") + task.UpdateRow() + + // 初始化 txy cos + task.initTxyCos() + if task.Err != nil { + return + } + + localCosFile := task.GetLocalCosFile() + localFullPath := filepath.Join(task.TaskDir, localCosFile) + // 如果本地文件存在先删除 + if util.FileExists(localFullPath) { + rmCmd := fmt.Sprintf("cd %s && rm -rf %s", task.TaskDir, localCosFile) + task.Logger.Info(rmCmd) + util.RunLocalCmd("bash", []string{"-c", rmCmd}, "", nil, 120*time.Second, task.Logger) + } + + // 下载cos文件 + task.Logger.Info(fmt.Sprintf("start download cos file,cosKey:%s,localFile:%s", + task.RowData.CosKey, + localFullPath)) + task.Err = task.cosWorker.DownloadAFile(task.RowData.CosKey, localFullPath) + if task.Err != nil { + return + } + // cos文件下载成功 + task.Logger.Info(fmt.Sprintf("download cos file success,cosKey:%s,localFile:%s", + task.RowData.CosKey, + localFullPath)) + +} diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step2FileSplit.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step2FileSplit.go new file mode 100644 index 0000000000..fb9793a66e --- /dev/null +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step2FileSplit.go @@ -0,0 +1,89 @@ +// Package tendispluslightning TODO +package tendispluslightning + +import ( + "fmt" + "path/filepath" + "time" + + "dbm-services/redis/redis-dts/models/mysql/tendisdb" + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/util" +) + +// FileSplitTask TODO +type FileSplitTask struct { + LightningFatherTask +} + +// NewFileSplitTask 新建一个task +func NewFileSplitTask(row *tendisdb.TbTendisplusLightningTask) *FileSplitTask { + return &FileSplitTask{ + LightningFatherTask: NewLightningFatherTask(row), + } +} + +// TaskType task类型 +func (task *FileSplitTask) TaskType() string { + return constvar.TendisplusLightningFileSplit +} + +// NextTask 下一个task类型 +func (task *FileSplitTask) NextTask() string { + return constvar.TendisplusLightningGenerateSst +} + +// Execute TODO +func (task *FileSplitTask) Execute() { + if task.Err != nil { + return + } + defer func() { + if task.Err != nil { + task.SetStatus(-1) + task.SetMessage(task.Err.Error()) + task.UpdateRow() + } else { + task.SetTaskType(task.NextTask()) + task.SetStatus(0) + task.SetMessage("等待执行sst文件生成") + task.UpdateRow() + } + }() + + task.SetStatus(1) + task.SetMessage("开始执行cos文件split") + task.UpdateRow() + + // 确保 tendisplus_lightning_kv_split 工具存在 + kvSplitTool, err := util.IsToolExecutableInCurrDir(constvar.ToolLightningKVFileSplit) + if err != nil { + task.Err = err + return + } + + // 如果本地split输出目录存在,则删除 + splitOutDir := task.GetSplitOutputDir() + fullPath := filepath.Join(task.TaskDir, splitOutDir) + if util.FileExists(fullPath) { + rmCmd := fmt.Sprintf("cd %s && rm -rf %s", task.TaskDir, splitOutDir) + task.Logger.Info(rmCmd) + _, task.Err = util.RunLocalCmd("bash", []string{"-c", rmCmd}, "", nil, time.Hour, task.Logger) + if task.Err != nil { + return + } + } + // 创建split输出目录 + task.Err = util.MkDirIfNotExists(fullPath) + if task.Err != nil { + return + } + splitCmd := fmt.Sprintf("cd %s && %s %s %s", task.TaskDir, kvSplitTool, task.GetLocalCosFile(), splitOutDir) + task.Logger.Info(splitCmd) + _, task.Err = util.RunLocalCmd("bash", []string{"-c", splitCmd}, "", nil, 10*time.Hour, task.Logger) + if task.Err != nil { + return + } + // split 成功 + task.Logger.Info("file split success") +} diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step3GenerateSST.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step3GenerateSST.go new file mode 100644 index 0000000000..246340ed3c --- /dev/null +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step3GenerateSST.go @@ -0,0 +1,129 @@ +// Package tendispluslightning TODO +package tendispluslightning + +import ( + "fmt" + "path/filepath" + "strconv" + "time" + + "dbm-services/redis/redis-dts/models/mysql/tendisdb" + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/util" +) + +// GenerateSstTask TODO +type GenerateSstTask struct { + LightningFatherTask +} + +// NewGenerateSstTask 新建一个task +func NewGenerateSstTask(row *tendisdb.TbTendisplusLightningTask) *GenerateSstTask { + return &GenerateSstTask{ + LightningFatherTask: NewLightningFatherTask(row), + } +} + +// TaskType task类型 +func (task *GenerateSstTask) TaskType() string { + return constvar.TendisplusLightningGenerateSst +} + +// NextTask 下一个task类型 +func (task *GenerateSstTask) NextTask() string { + return constvar.TendisplusLightningScpSst +} + +// Execute TODO +func (task *GenerateSstTask) Execute() { + if task.Err != nil { + return + } + defer func() { + if task.Err != nil { + task.SetStatus(-1) + task.SetMessage(task.Err.Error()) + task.UpdateRow() + } else { + task.SetTaskType(task.NextTask()) + task.SetStatus(0) + task.SetMessage("等待执行scp sst") + task.UpdateRow() + } + }() + + // 确保 tendisplus_lightning_sst_generator 工具存在 + sstGenTool, err := util.IsToolExecutableInCurrDir(constvar.ToolLightningSstGenerator) + if err != nil { + task.Err = err + return + } + + // 先清理之前的sst目录 + var slaveSstSaveDir string + var fullPath string + var rmCmd string + + // 获取dst集群的nodes信息 + clusterNodes := task.GetDstClusterNodes() + if task.Err != nil { + return + } + for _, tmp := range clusterNodes { + clusterNode := tmp + ip, port, err := clusterNode.GetSlaveIpPort() + if err != nil { + task.Err = err + task.Logger.Error(task.Err.Error()) + return + } + slaveSstSaveDir = task.GetSlaveNodeSstDir(ip, port) + fullPath = filepath.Join(task.TaskDir, slaveSstSaveDir) + if util.FileExists(fullPath) { + rmCmd = fmt.Sprintf("cd %s && rm -rf %s", task.TaskDir, slaveSstSaveDir) + task.Logger.Info(rmCmd) + _, task.Err = util.RunLocalCmd("bash", []string{"-c", rmCmd}, "", nil, time.Hour, task.Logger) + if task.Err != nil { + return + } + } + } + // 获取到 split输出目录 + splitOutputDir := task.GetSplitOutputDir() + // 在创建sst目录,并执行 sst_generator + for _, tmp := range clusterNodes { + clusterNode := tmp + ip, port, err := clusterNode.GetSlaveIpPort() + if err != nil { + task.Err = err + task.Logger.Error(task.Err.Error()) + return + } + slaveSstSaveDir = task.GetSlaveNodeSstDir(ip, port) + var fullPath string + // 创建目录,$taskDir/$slaveSstDir/0、/1、/2、....、/9,对应的是tendisplus的10个kvstore + for i := 0; i < 10; i++ { + fullPath = filepath.Join(task.TaskDir, slaveSstSaveDir, strconv.Itoa(i)) + if !util.FileExists(fullPath) { + mkCmd := fmt.Sprintf("mkdir -p %s", fullPath) + task.Logger.Info(mkCmd) + _, task.Err = util.RunLocalCmd("bash", []string{"-c", mkCmd}, "", nil, time.Hour, task.Logger) + if task.Err != nil { + return + } + } + } + // 执行 sst_generator + sstGenCmd := fmt.Sprintf("cd %s && %s -i %s -o %s -s %q -t 5", + task.TaskDir, + sstGenTool, splitOutputDir, + slaveSstSaveDir, clusterNode.Slots) + task.Logger.Info(sstGenCmd) + _, task.Err = util.RunLocalCmd("bash", []string{"-c", sstGenCmd}, "", nil, 10*time.Hour, task.Logger) + if task.Err != nil { + return + } + } + task.Logger.Info("generate sst success") + return +} diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step4ScpSST.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step4ScpSST.go new file mode 100644 index 0000000000..1f17d53e66 --- /dev/null +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step4ScpSST.go @@ -0,0 +1,185 @@ +// Package tendispluslightning TODO +package tendispluslightning + +import ( + "fmt" + "path/filepath" + "sync" + "time" + + "dbm-services/redis/redis-dts/models/mysql/tendisdb" + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/pkg/scrdbclient" + "dbm-services/redis/redis-dts/util" +) + +// ScpSstTask TODO +type ScpSstTask struct { + LightningFatherTask +} + +// NewScpSstTask 新建一个task +func NewScpSstTask(row *tendisdb.TbTendisplusLightningTask) *ScpSstTask { + return &ScpSstTask{ + LightningFatherTask: NewLightningFatherTask(row), + } +} + +// TaskType task类型 +func (task *ScpSstTask) TaskType() string { + return constvar.TendisplusLightningScpSst +} + +// NextTask 下一个task类型 +func (task *ScpSstTask) NextTask() string { + return constvar.TendisplusLightningSlaveLoadSst +} + +// Execute TODO +func (task *ScpSstTask) Execute() { + if task.Err != nil { + return + } + defer func() { + if task.Err != nil { + task.SetStatus(-1) + task.SetMessage(task.Err.Error()) + task.UpdateRow() + } else { + task.SetTaskType(task.NextTask()) + task.SetStatus(0) + task.SetMessage("等待执行scp sst") + task.UpdateRow() + } + }() + + zstdTool, err := util.IsToolExecutableInCurrDir(constvar.ToolZstd) + if err != nil { + task.Logger.Error(err.Error()) + task.Err = err + return + } + localIp, err := util.GetLocalIP() + if err != nil { + task.Logger.Error(err.Error()) + task.Err = err + return + } + + // 获取dst集群的nodes信息 + clusterNodes := task.GetDstClusterNodes() + if task.Err != nil { + return + } + var slaveSstSaveDir string + var fullPath string + var tarFile string + var zstdFile string + var tarCmd string // 打包命令 + var zstdCmd string // 压缩命令 + var rmCmd string // 删除命令 + transferMap := map[string][]string{} // key是dst ip,value是 zstd文件列表 + for _, tmp := range clusterNodes { + clusterNode := tmp + slaveIP, slavePort, _ := clusterNode.GetSlaveIpPort() + slaveSstSaveDir = task.GetSlaveNodeSstDir(slaveIP, slavePort) + tarFile = slaveSstSaveDir + ".tar" + zstdFile = tarFile + ".zst" + fullPath = filepath.Join(task.TaskDir, slaveSstSaveDir) + + // 如果 tar不存在,zst文件存在,则跳过 打包压缩环节 + isSkipCompress := util.FileExists(filepath.Join(task.TaskDir, zstdFile)) && + !util.FileExists(filepath.Join(task.TaskDir, tarFile)) + if !isSkipCompress { + if !util.FileExists(fullPath) { + task.Err = fmt.Errorf("slave_sst_dir:%s not exists", fullPath) + task.Logger.Error(task.Err.Error()) + return + } + if util.FileExists(tarFile) { + // 删除老的tar包 + rmCmd = fmt.Sprintf("cd %s && rm -rf %s", task.TaskDir, tarFile) + task.Logger.Info(rmCmd) + util.RunLocalCmd("bash", []string{"-c", rmCmd}, "", nil, time.Hour, task.Logger) + } + // 打包 + tarCmd = fmt.Sprintf("cd %s && tar -cf %s %s && rm -rf %s", task.TaskDir, tarFile, slaveSstSaveDir, slaveSstSaveDir) + task.Logger.Info(tarCmd) + _, task.Err = util.RunLocalCmd("bash", []string{"-c", tarCmd}, "", nil, time.Hour, task.Logger) + if task.Err != nil { + return + } + // 压缩 + zstdCmd = fmt.Sprintf("cd %s && %s %s && rm -rf %s", task.TaskDir, zstdTool, tarFile, tarFile) + task.Logger.Info(zstdCmd) + _, task.Err = util.RunLocalCmd("bash", []string{"-c", zstdCmd}, "", nil, time.Hour, task.Logger) + if task.Err != nil { + return + } + } + // 构造参数 + if _, ok := transferMap[slaveIP]; !ok { + transferMap[slaveIP] = []string{} + } + transferMap[slaveIP] = append(transferMap[slaveIP], filepath.Join(task.TaskDir, zstdFile)) + + } + // 多个slave ip 并发传输文件 + type transferWorker struct { + SrcIP string + SrcFiles []string + DstIP string + DstDir string + Err error + } + wokers := []*transferWorker{} + for dstIP, files := range transferMap { + wokers = append(wokers, &transferWorker{ + SrcIP: localIp, + SrcFiles: files, + DstIP: dstIP, + DstDir: constvar.DbbakDir, + }) + } + var wg sync.WaitGroup + for _, woker01 := range wokers { + woker := woker01 + wg.Add(1) + go func(woker *transferWorker) { + defer wg.Done() + var cli *scrdbclient.Client + cli, woker.Err = scrdbclient.NewClient(constvar.BkDbm, task.Logger) + if woker.Err != nil { + return + } + param := scrdbclient.TransferFileReq{ + SourceList: []scrdbclient.TransferFileSourceItem{ + { + BkCloudID: int(task.RowData.BkCloudID), + IP: localIp, + Account: constvar.MysqlOSAccount, + FileList: woker.SrcFiles, + }, + }, + TargetAccount: constvar.MysqlOSAccount, + TargetDir: woker.DstDir, + TargetIPList: []scrdbclient.IPItem{ + {BkCloudID: int(task.RowData.BkCloudID), IP: woker.DstIP}, + }, + Timeout: 86300, + } + woker.Err = cli.SendNew(param, 5) + if woker.Err != nil { + return + } + }(woker) + } + wg.Wait() + for _, woker01 := range wokers { + woker := woker01 + if woker.Err != nil { + task.Err = woker.Err + return + } + } +} diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step5SlaveLoadSST.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step5SlaveLoadSST.go new file mode 100644 index 0000000000..951849de8c --- /dev/null +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendispluslightning/step5SlaveLoadSST.go @@ -0,0 +1,146 @@ +// Package tendispluslightning TODO +package tendispluslightning + +import ( + "fmt" + "path/filepath" + "sync" + + "dbm-services/redis/redis-dts/models/myredis" + "dbm-services/redis/redis-dts/models/mysql/tendisdb" + "dbm-services/redis/redis-dts/pkg/constvar" + "dbm-services/redis/redis-dts/pkg/scrdbclient" +) + +// SlaveLoadSstTask TODO +type SlaveLoadSstTask struct { + LightningFatherTask +} + +// NewSlaveLoadSstTask TODO +// NewScpSstTask 新建一个task +func NewSlaveLoadSstTask(row *tendisdb.TbTendisplusLightningTask) *SlaveLoadSstTask { + return &SlaveLoadSstTask{ + LightningFatherTask: NewLightningFatherTask(row), + } +} + +// TaskType task类型 +func (task *SlaveLoadSstTask) TaskType() string { + return constvar.TendisplusLightningSlaveLoadSst +} + +// NextTask 下一个task类型,没有下一个task type了 +func (task *SlaveLoadSstTask) NextTask() string { + return constvar.TendisplusLightningSlaveLoadSst +} + +// Execute TODO +func (task *SlaveLoadSstTask) Execute() { + if task.Err != nil { + return + } + defer func() { + if task.Err != nil { + task.SetStatus(-1) + task.SetMessage(task.Err.Error()) + task.UpdateRow() + } else { + task.SetTaskType(task.NextTask()) + task.SetStatus(2) + task.SetMessage("slave load sst文件完成") + task.UpdateRow() + } + }() + clusterNodes := task.GetDstClusterNodes() + if task.Err != nil { + return + } + + var slaveSstSaveDir string + var tarFile string + var zstdFile string + var unCompressCmd string + + type unCompressWorker struct { + SlaveIP string + Cmd string + Err error + } + workers := []*unCompressWorker{} + for _, tmp := range clusterNodes { + clusterNode := tmp + slaveIP, slavePort, _ := clusterNode.GetSlaveIpPort() + slaveSstSaveDir = task.GetSlaveNodeSstDir(slaveIP, slavePort) + tarFile = slaveSstSaveDir + ".tar" + zstdFile = tarFile + ".zst" + unCompressCmd = fmt.Sprintf("cd %s && rm -rf %s && /home/mysql/dbtools/zstd -d %s && tar -xf %s && rm -rf %s", + constvar.DbbakDir, slaveSstSaveDir, zstdFile, tarFile, tarFile) + workers = append(workers, &unCompressWorker{ + SlaveIP: slaveIP, + Cmd: unCompressCmd, + }) + } + // 多个slave ip并发执行命令 + var wg sync.WaitGroup + for _, worker := range workers { + wg.Add(1) + go func(worker *unCompressWorker) { + defer wg.Done() + var cli *scrdbclient.Client + cli, worker.Err = scrdbclient.NewClient(constvar.BkDbm, task.Logger) + if worker.Err != nil { + return + } + _, worker.Err = cli.ExecNew(scrdbclient.FastExecScriptReq{ + Account: constvar.MysqlOSAccount, + Timeout: 3600, + ScriptLanguage: 1, + ScriptContent: worker.Cmd, + IPList: []scrdbclient.IPItem{ + { + BkCloudID: int(task.RowData.BkCloudID), + IP: worker.SlaveIP, + }, + }, + }, 5) + if worker.Err != nil { + return + } + }(worker) + } + wg.Wait() + + for _, worker := range workers { + if worker.Err != nil { + task.Err = worker.Err + return + } + } + // slave执行 loadexternalfiles 命令 + var slaveSaveDir string + for _, tmp := range clusterNodes { + clusterNode := tmp + slaveIP, slavePort, _ := clusterNode.GetSlaveIpPort() + slaveSstSaveDir = task.GetSlaveNodeSstDir(slaveIP, slavePort) + slaveSaveDir = filepath.Join(constvar.DbbakDir, slaveSstSaveDir) + task.TendisplusSlaveLoadSST(clusterNode, slaveSaveDir) + if task.Err != nil { + return + } + } +} + +// TendisplusSlaveLoadSST TODO +func (task *SlaveLoadSstTask) TendisplusSlaveLoadSST(cNode *clusterNodeItem, slaveSaveDir string) { + var slaveConn *myredis.RedisWorker + slaveConn, task.Err = myredis.NewRedisClient(cNode.SlaveAddr, cNode.RedisPassword, 0, task.Logger) + if task.Err != nil { + return + } + defer slaveConn.Close() + task.Err = slaveConn.Loadexternalfiles(slaveSaveDir, "all", "copy") + if task.Err != nil { + return + } +} diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/backupFileFetch.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/backupFileFetch.go index d5633a03b1..234d0e4808 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/backupFileFetch.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/backupFileFetch.go @@ -12,7 +12,7 @@ import ( // BakcupFileFetchTask 备份拉取task type BakcupFileFetchTask struct { - dtsTask.FatherTask + dtsTask.TendisDtsFatherTask } // TaskType task类型 @@ -28,7 +28,7 @@ func (task *BakcupFileFetchTask) NextTask() string { // NewBakcupFileFetchTask 新建一个备份拉取task func NewBakcupFileFetchTask(row *tendisdb.TbTendisDTSTask) *BakcupFileFetchTask { return &BakcupFileFetchTask{ - FatherTask: dtsTask.NewFatherTask(row), + TendisDtsFatherTask: dtsTask.NewDtsFatherTask(row), } } diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/cmdsImporter.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/cmdsImporter.go index 81697bb314..c07f949d61 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/cmdsImporter.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/cmdsImporter.go @@ -27,7 +27,7 @@ import ( // CmdsImporterTask 命令导入task type CmdsImporterTask struct { - dtsTask.FatherTask + dtsTask.TendisDtsFatherTask DelFiles []string `json:"delFiles"` OutputFiles []string `json:"outputFiles"` ListFiles []string `json:"listFiles"` @@ -52,7 +52,7 @@ func (task *CmdsImporterTask) NextTask() string { // NewCmdsImporterTask 新建一个命令导入task func NewCmdsImporterTask(row *tendisdb.TbTendisDTSTask) *CmdsImporterTask { return &CmdsImporterTask{ - FatherTask: dtsTask.NewFatherTask(row), + TendisDtsFatherTask: dtsTask.NewDtsFatherTask(row), } } diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/makeSync.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/makeSync.go index 85758d159c..7dd4e141d1 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/makeSync.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/makeSync.go @@ -29,7 +29,7 @@ const ( // MakeSyncTask 启动redis-sync type MakeSyncTask struct { - dtsTask.FatherTask + dtsTask.TendisDtsFatherTask RedisCliTool string `json:"redisCliTool"` RedisSyncTool string `json:"redisSyncTool"` SyncLogFile string `json:"syncLogFile"` @@ -56,7 +56,7 @@ func (task *MakeSyncTask) NextTask() string { // NewMakeSyncTask 新建一个 redis-sync启动task func NewMakeSyncTask(row *tendisdb.TbTendisDTSTask) *MakeSyncTask { return &MakeSyncTask{ - FatherTask: dtsTask.NewFatherTask(row), + TendisDtsFatherTask: dtsTask.NewDtsFatherTask(row), } } diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/tendisBackup.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/tendisBackup.go index 1567e4c217..7a37c79725 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/tendisBackup.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/tendisBackup.go @@ -21,7 +21,7 @@ import ( // TendisBackupTask src tendisssd备份task type TendisBackupTask struct { - dtsTask.FatherTask + dtsTask.TendisDtsFatherTask srcClient *myredis.RedisWorker `json:"-"` dstClient *myredis.RedisWorker `json:"-"` } @@ -39,7 +39,7 @@ func (task *TendisBackupTask) NextTask() string { // NewTendisBackupTask 新建一个src tendisssd备份拉取task func NewTendisBackupTask(row *tendisdb.TbTendisDTSTask) *TendisBackupTask { return &TendisBackupTask{ - FatherTask: dtsTask.NewFatherTask(row), + TendisDtsFatherTask: dtsTask.NewDtsFatherTask(row), } } @@ -54,7 +54,7 @@ func (task *TendisBackupTask) Init() { task.UpdateDbAndLogLocal(task.Err.Error()) } }() - task.FatherTask.Init() + task.TendisDtsFatherTask.Init() if task.Err != nil { return } diff --git a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/tendisdump.go b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/tendisdump.go index 9de4da6861..1295c3c872 100644 --- a/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/tendisdump.go +++ b/dbm-services/redis/redis-dts/pkg/dtsTask/tendisssd/tendisdump.go @@ -18,7 +18,7 @@ import ( // TredisdumpTask 对备份文件执行tredisdump type TredisdumpTask struct { - dtsTask.FatherTask + dtsTask.TendisDtsFatherTask } // TaskType task类型 @@ -34,7 +34,7 @@ func (task *TredisdumpTask) NextTask() string { // NewTredisdumpTask 新建tredisdump task func NewTredisdumpTask(row *tendisdb.TbTendisDTSTask) *TredisdumpTask { return &TredisdumpTask{ - FatherTask: dtsTask.NewFatherTask(row), + TendisDtsFatherTask: dtsTask.NewDtsFatherTask(row), } } diff --git a/dbm-services/redis/redis-dts/pkg/txycos/txycos.go b/dbm-services/redis/redis-dts/pkg/txycos/txycos.go new file mode 100644 index 0000000000..8a5b897d98 --- /dev/null +++ b/dbm-services/redis/redis-dts/pkg/txycos/txycos.go @@ -0,0 +1,172 @@ +// Package txycos TODO +package txycos + +import ( + "context" + "fmt" + "net/http" + "net/url" + + "dbm-services/redis/redis-dts/util" + + "github.com/spf13/viper" + "github.com/tencentyun/cos-go-sdk-v5" + "go.uber.org/zap" +) + +// TxyCosWoker 腾讯云cos客户端 +type TxyCosWoker struct { + URL string `json:"url"` // 接口地址 + SecretID string `json:"secretId"` // 密钥 + SecretKey string `json:"secretKey"` // 密钥的密文 + cosClient *cos.Client + logger *zap.Logger +} + +// NewTxyCosWoker 创建一个TxyCosWoker +func NewTxyCosWoker(logger *zap.Logger) (ret *TxyCosWoker, err error) { + ret = &TxyCosWoker{ + URL: viper.GetString("txycos.url"), + SecretID: viper.GetString("txycos.secret_id"), + SecretKey: viper.GetString("txycos.secret_key"), + logger: logger, + } + if ret.URL == "" || ret.SecretID == "" || ret.SecretKey == "" { + err = fmt.Errorf("txycos.url:%s,secret_id:%s,secret_key:%s cannot be empty", ret.URL, ret.SecretID, ret.SecretKey) + ret.logger.Error(err.Error()) + return nil, err + } + u, err := url.Parse(ret.URL) + if err != nil { + err = fmt.Errorf("txycos.url parse failed,err:%v,url:%s", err, ret.URL) + ret.logger.Error(err.Error()) + return nil, err + } + b := &cos.BaseURL{BucketURL: u} + ret.cosClient = cos.NewClient(b, &http.Client{ + Transport: &cos.AuthorizationTransport{ + SecretID: ret.SecretID, + SecretKey: ret.SecretKey, + }, + }) + return +} + +// BucketList 桶列表 +func (t *TxyCosWoker) BucketList() (ret *cos.ServiceGetResult, err error) { + ret, resp, err := t.cosClient.Service.Get(context.Background()) + if err != nil { + err = fmt.Errorf("BucketList failed,err:%v", err) + t.logger.Error(err.Error()) + return + } + if resp.StatusCode != http.StatusOK { + err = fmt.Errorf("BucketList failed,resp.StatusCode:%d\n", resp.StatusCode) + t.logger.Error(err.Error()) + return + } + return +} + +// BucketCreate 创建存储桶 +func (t *TxyCosWoker) BucketCreate() (err error) { + opt := &cos.BucketPutOptions{ + XCosACL: "private", + } + resp, err := t.cosClient.Bucket.Put(context.Background(), opt) + if err != nil { + err = fmt.Errorf("BucketCreate failed,err:%v", err) + t.logger.Error(err.Error()) + return + } + if resp.StatusCode != http.StatusOK { + err = fmt.Errorf("BucketCreate failed,resp.StatusCode:%d\n", resp.StatusCode) + t.logger.Error(err.Error()) + return + } + return nil +} + +// PutAFile 文件上传 +func (t *TxyCosWoker) PutAFile(key, filepath string) (err error) { + opt := &cos.ObjectPutOptions{ + ObjectPutHeaderOptions: &cos.ObjectPutHeaderOptions{ + ContentType: "text/html", + }, + ACLHeaderOptions: &cos.ACLHeaderOptions{ + XCosACL: "private", + }, + } + + resp, err := t.cosClient.Object.PutFromFile(context.Background(), key, filepath, opt) + if err != nil { + err = fmt.Errorf("PutAFile failed,err:%v\n", err) + t.logger.Error(err.Error()) + return + } + if resp.StatusCode != http.StatusOK { + err = fmt.Errorf("PutAFile failed,resp.StatusCode:%d,key:%s,filepath:%s\n", resp.StatusCode, key, filepath) + t.logger.Error(err.Error()) + return + } + return nil +} + +// GetFileList 获取文件列表 +func (t *TxyCosWoker) GetFileList(prefix string, maxKeys int) (ret *cos.BucketGetResult, err error) { + + opt := &cos.BucketGetOptions{ + Prefix: prefix, + MaxKeys: maxKeys, + } + + ret, resp, err := t.cosClient.Bucket.Get(context.Background(), opt) + if err != nil { + err = fmt.Errorf("GetFileList failed,err:%v,opt:%s\n", err, util.ToString(opt)) + t.logger.Error(err.Error()) + return + } + if resp.StatusCode != http.StatusOK { + err = fmt.Errorf("GetFileList failed,resp.StatusCode:%d,opt:%s\n", resp.StatusCode, util.ToString(opt)) + t.logger.Error(err.Error()) + return + } + return +} + +// DownloadAFile 从存储桶下载文件 +func (t *TxyCosWoker) DownloadAFile(key, savePath string) (err error) { + opt := &cos.MultiDownloadOptions{ + ThreadPoolSize: 5, + } + resp, err := t.cosClient.Object.Download( + context.Background(), key, savePath, opt, + ) + if err != nil { + err = fmt.Errorf("DownloadAFile failed,err:%v\n", err) + t.logger.Error(err.Error()) + return + } + if resp.StatusCode != http.StatusOK { + err = fmt.Errorf("DownloadAFile failed,resp.StatusCode:%d,key:%s\n", resp.StatusCode, key) + t.logger.Error(err.Error()) + return + } + return nil +} + +// DeleteAFile 从存储桶删除文件 +func (t *TxyCosWoker) DeleteAFile(key string) (err error) { + resp, err := t.cosClient.Object.Delete(context.Background(), key) + if err != nil { + err = fmt.Errorf("DeleteAFile failed,err:%v,key:%s\n", err, key) + t.logger.Error(err.Error()) + return + } + if resp.StatusCode != http.StatusOK { + err = fmt.Errorf("DeleteAFile failed,resp.StatusCode:%d,key:%s\n", resp.StatusCode, key) + t.logger.Error(err.Error()) + return + } + return nil +} diff --git a/dbm-services/redis/redis-dts/util/util.go b/dbm-services/redis/redis-dts/util/util.go index fb4c4f7afa..9335dfbc18 100644 --- a/dbm-services/redis/redis-dts/util/util.go +++ b/dbm-services/redis/redis-dts/util/util.go @@ -287,3 +287,12 @@ func IsToolExecutableInCurrDir(tool string) (fullPath string, err error) { } return } + +// FileExists 检查目录是否已经存在 +func FileExists(path string) bool { + _, err := os.Stat(path) + if err != nil { + return os.IsExist(err) + } + return true +} diff --git a/dbm-ui/backend/db_proxy/views/redis_dts/serializers.py b/dbm-ui/backend/db_proxy/views/redis_dts/serializers.py index a5e24c1703..845f2fc2fa 100644 --- a/dbm-ui/backend/db_proxy/views/redis_dts/serializers.py +++ b/dbm-ui/backend/db_proxy/views/redis_dts/serializers.py @@ -109,3 +109,47 @@ class DtsTestRedisConnectionSerializer(BaseProxyPassSerializer): infos = serializers.ListField( help_text=_("复制列表"), child=DtsDataCopyBaseItemSerializer(), allow_empty=False, required=True ) + + +# tendisplus Lightning +class LightningDtsSvrMigatingTasksSerializer(BaseProxyPassSerializer): + bk_cloud_id = serializers.IntegerField(help_text=_("云区域ID"), required=True) + dts_server = serializers.IPAddressField(help_text=_("DTS_server IP"), required=True) + task_types = serializers.ListField( + help_text=_("task类型列表"), child=serializers.CharField(), allow_empty=False, required=True + ) + + +class LightningLast30DaysToExecTasksSerializer(BaseProxyPassSerializer): + bk_cloud_id = serializers.IntegerField(help_text=_("云区域ID"), required=True) + dts_server = serializers.IPAddressField(help_text=_("DTS_server IP"), required=True) + task_type = serializers.CharField(help_text=_("task类型"), required=True) + limit = serializers.IntegerField(help_text=_("限制条数"), required=False) + status = serializers.IntegerField(help_text=_("任务状态"), required=False) + + +class LightningLast30DaysToScheJobsSerializer(BaseProxyPassSerializer): + bk_cloud_id = serializers.IntegerField(help_text=_("云区域ID"), required=True) + max_data_size = serializers.IntegerField(help_text=_("最大数据量"), required=True) + zone_name = serializers.CharField(help_text=_("城市名"), required=True) + + +class LightningJobTasksSerializer(BaseProxyPassSerializer): + ticket_id = serializers.CharField(help_text=_("单据ID"), required=True) + dst_cluster = serializers.CharField(help_text=_("目标集群"), required=True) + + +class LightningJobToScheTasksSerializer(BaseProxyPassSerializer): + ticket_id = serializers.CharField(help_text=_("单据ID"), required=True) + dst_cluster = serializers.CharField(help_text=_("目标集群"), required=True) + + +class LightningTaskByIDSerializer(BaseProxyPassSerializer): + task_id = serializers.CharField(help_text=_("task id"), required=True) + + +class LightningTasksUpdateSerializer(BaseProxyPassSerializer): + task_ids = serializers.ListField( + help_text=_("子任务ID列表"), child=serializers.CharField(), allow_empty=False, required=True + ) + col_to_val = serializers.DictField(child=serializers.CharField()) diff --git a/dbm-ui/backend/db_proxy/views/redis_dts/views.py b/dbm-ui/backend/db_proxy/views/redis_dts/views.py index c3a3278dcd..cefb11d1d1 100644 --- a/dbm-ui/backend/db_proxy/views/redis_dts/views.py +++ b/dbm-ui/backend/db_proxy/views/redis_dts/views.py @@ -27,6 +27,13 @@ DtsTasksUpdateSerializer, DtsTestRedisConnectionSerializer, IsDtsserverInBlacklistSerializer, + LightningDtsSvrMigatingTasksSerializer, + LightningJobTasksSerializer, + LightningJobToScheTasksSerializer, + LightningLast30DaysToExecTasksSerializer, + LightningLast30DaysToScheJobsSerializer, + LightningTaskByIDSerializer, + LightningTasksUpdateSerializer, ) from backend.db_proxy.views.views import BaseProxyPassViewSet from backend.db_services.redis.redis_dts.apis import ( @@ -43,7 +50,14 @@ get_job_to_schedule_tasks, get_last_30days_to_exec_tasks, get_last_30days_to_schedule_jobs, + get_lightning_job_detail, is_dtsserver_in_blacklist, + lightning_dts_server_migrating_tasks, + lightning_dts_task_by_id, + lightning_job_to_schedule_tasks, + lightning_last_30days_to_exec_tasks, + lightning_last_30days_to_schedule_jobs, + lightning_tasks_updates, ) @@ -193,7 +207,7 @@ def get_job_to_schedule_tasks(self, request): return Response(get_job_to_schedule_tasks(validated_data)) @common_swagger_auto_schema( - operation_summary=_("获取一个job的所有待调度的tasks"), + operation_summary=_("获取一个job的中某个slave机器上运行中的tasks"), request_body=DtsJobSrcIPRunningTasksSerializer, tags=[SWAGGER_TAG], ) @@ -208,7 +222,7 @@ def get_job_src_ip_running_tasks(self, request): return Response(get_job_src_ip_running_tasks(validated_data)) @common_swagger_auto_schema( - operation_summary=_("获取一个job的所有待调度的tasks"), + operation_summary=_("根据task_id获取task详情"), request_body=DtsTaskByTaskIDSerializer, tags=[SWAGGER_TAG], ) @@ -248,3 +262,109 @@ def update_tasks(self, request): def test_redis_connection(self, request): validated_data = self.params_validate(self.get_serializer_class()) return Response(dts_test_redis_connections(validated_data)) + + # tendisplus Lightning + @common_swagger_auto_schema( + operation_summary=_("获取dts server迁移中的Lightning任务"), + request_body=LightningDtsSvrMigatingTasksSerializer, + tags=[SWAGGER_TAG], + ) + @action( + methods=["POST"], + detail=False, + serializer_class=LightningDtsSvrMigatingTasksSerializer, + url_path="tendisplus_lightning/dts_server_migrating_tasks", + ) + def lightning_dts_server_migrating_tasks(self, request): + validated_data = self.params_validate(self.get_serializer_class()) + return Response(lightning_dts_server_migrating_tasks(validated_data)) + + @common_swagger_auto_schema( + operation_summary=_("获取最近30天内task_type类型的等待执行的tasks"), + request_body=LightningLast30DaysToExecTasksSerializer, + tags=[SWAGGER_TAG], + ) + @action( + methods=["POST"], + detail=False, + serializer_class=LightningLast30DaysToExecTasksSerializer, + url_path="tendisplus_lightning/last_30_days_to_exec_tasks", + ) + def lightning_last_30days_to_exec_tasks(self, request): + validated_data = self.params_validate(self.get_serializer_class()) + return Response(lightning_last_30days_to_exec_tasks(validated_data)) + + @common_swagger_auto_schema( + operation_summary=_("获取最近30天内的等待调度的lightning jobs"), + request_body=LightningLast30DaysToScheJobsSerializer, + tags=[SWAGGER_TAG], + ) + @action( + methods=["POST"], + detail=False, + serializer_class=LightningLast30DaysToScheJobsSerializer, + url_path="tendisplus_lightning/last_30_days_to_schedule_jobs", + ) + def lightning_last_30days_to_schedule_jobs(self, request): + validated_data = self.params_validate(self.get_serializer_class()) + return Response(lightning_last_30days_to_schedule_jobs(validated_data)) + + @common_swagger_auto_schema( + operation_summary=_("获取数据导入任务详情"), + request_body=LightningJobTasksSerializer, + tags=[SWAGGER_TAG], + ) + @action( + methods=["POST"], + detail=False, + serializer_class=LightningJobTasksSerializer, + url_path="tendisplus_lightning/job_detail", + ) + def lightning_job_detail(self, request): + validated_data = self.params_validate(self.get_serializer_class()) + return Response(get_lightning_job_detail(validated_data)) + + @common_swagger_auto_schema( + operation_summary=_("获取一个job的所有待调度的tasks"), + request_body=LightningJobToScheTasksSerializer, + tags=[SWAGGER_TAG], + ) + @action( + methods=["POST"], + detail=False, + serializer_class=LightningJobToScheTasksSerializer, + url_path="tendisplus_lightning/job_to_schedule_tasks", + ) + def lightning_job_to_schedule_tasks(self, request): + validated_data = self.params_validate(self.get_serializer_class()) + return Response(lightning_job_to_schedule_tasks(validated_data)) + + @common_swagger_auto_schema( + operation_summary=_("根据task_id获取task详情"), + request_body=LightningTaskByIDSerializer, + tags=[SWAGGER_TAG], + ) + @action( + methods=["POST"], + detail=False, + serializer_class=LightningTaskByIDSerializer, + url_path="tendisplus_lightning/task_by_task_id", + ) + def lightning_dts_task_by_id(self, request): + validated_data = self.params_validate(self.get_serializer_class()) + return Response(lightning_dts_task_by_id(validated_data)) + + @common_swagger_auto_schema( + operation_summary=_("批量更新dts_tasks"), + request_body=LightningTasksUpdateSerializer, + tags=[SWAGGER_TAG], + ) + @action( + methods=["POST"], + detail=False, + serializer_class=LightningTasksUpdateSerializer, + url_path="tendisplus_lightning/tasks_update", + ) + def lightning_tasks_updates(self, request): + validated_data = self.params_validate(self.get_serializer_class()) + return Response({"rows_affected": lightning_tasks_updates(validated_data)}) diff --git a/dbm-ui/backend/db_services/redis/redis_dts/apis.py b/dbm-ui/backend/db_services/redis/redis_dts/apis.py index 9681def46f..3bf99450fc 100644 --- a/dbm-ui/backend/db_services/redis/redis_dts/apis.py +++ b/dbm-ui/backend/db_services/redis/redis_dts/apis.py @@ -29,8 +29,11 @@ TbTendisDtsDistributeLock, TbTendisDTSJob, TbTendisDtsTask, + TendisplusLightningJob, + TendisplusLightningTask, dts_task_clean_pwd_and_fmt_time, dts_task_format_time, + lightning_task_format_time, ) from .util import dts_job_cnt_and_status, dts_task_status, is_in_incremental_sync @@ -559,3 +562,177 @@ def dts_test_redis_connections(payload: dict): "test redis connection failed,redis_addr:{},please check redis and password is ok".format(redis_addr) ) return True + + +# tendisplus Lightning + + +def lightning_dts_server_migrating_tasks(payload: dict) -> list: + """ + 获取dts server迁移中的任务 + 对Tendisplus Lightning 来说,'迁移中'指处于 status=1 状态的task + """ + + dts_server = payload.get("dts_server") + task_types = payload.get("task_types") + current_time = datetime.now(timezone.utc).astimezone() + thirty_days_ago = current_time - timedelta(days=30) + + where = Q(bk_cloud_id=payload.get("bk_cloud_id")) + if dts_server: + where = where & Q(dts_server=dts_server) + if task_types: + where = where & Q(task_type__in=task_types) + where = where & Q(update_time__gt=thirty_days_ago) + where = where & Q(status__in=[0, 1]) + + rets = [] + for task in TendisplusLightningTask.objects.filter(where): + json_data = model_to_dict(task) + lightning_task_format_time(json_data, task) + rets.append(json_data) + return rets + + +def lightning_last_30days_to_exec_tasks(payload: dict) -> list: + """获取最近30天内task_type类型的等待执行的tasks""" + + bk_cloud_id = payload.get("bk_cloud_id") + dts_server = payload.get("dts_server") + task_type = payload.get("task_type") + limit = payload.get("limit") + status = payload.get("status") + dts_server = dts_server.strip() + task_type = task_type.strip() + current_time = datetime.now(timezone.utc).astimezone() + thirty_days_ago = current_time - timedelta(days=30) + + where = Q(bk_cloud_id=bk_cloud_id) + if dts_server: + where = where & Q(dts_server=dts_server) + if task_type: + where = where & Q(task_type=task_type) + if limit <= 0: + limit = 1 + where = where & Q(status=status) + where = where & Q(create_time__gt=thirty_days_ago) + tasks = TendisplusLightningTask.objects.filter(where).order_by("-dst_cluster_priority", "create_time")[:limit] + if not tasks: + # logger.warning( + # "lightning_last_30days_to_exec_tasks empty records" + # ",bk_cloud_id:{},dts_server:{},task_type:{},status:{}".format( + # bk_cloud_id, dts_server, task_type, status + # ) + # ) + return [] + rets = [] + for task in tasks: + json_data = model_to_dict(task) + lightning_task_format_time(json_data, task) + rets.append(json_data) + return rets + + +def lightning_last_30days_to_schedule_jobs(payload: dict) -> list: + """获取最近30天内的等待调度的jobs + ticket_id,dst_cluster唯一确定一个dts_job + 获取的dts_jobs必须满足: + 有一个待调度的task.dataSize < maxDataSize & status=0 & taskType="" & dtsServer="1.1.1.1" + """ + + max_data_size = payload.get("max_data_size") + zone_name = payload.get("zone_name") + current_time = datetime.now(timezone.utc).astimezone() + thirty_days_ago = current_time - timedelta(days=30) + + where = Q(bk_cloud_id=payload.get("bk_cloud_id")) + where = where & Q(cos_file_size__lte=max_data_size) + if zone_name: + where = where & Q(dst_zonename=zone_name) + where = where & Q(dts_server="1.1.1.1") & Q(task_type="") & Q(status=0) & Q(create_time__gt=thirty_days_ago) + jobs = TendisplusLightningTask.objects.filter(where).order_by("-dst_cluster_priority", "create_time") + if not jobs: + # logger.warning( + # "lightning_last_30days_to_schedule_jobs empty records," + # "bk_cloud_id={},max_data_size={},zone_name={}".format( + # bk_cloud_id, max_data_size, zone_name + # ) + # ) + return [] + rets = [] + unique_set = set() + for job in jobs: + job_uniq_key = "{}-{}".format(job.ticket_id, job.dst_cluster) + if job_uniq_key in unique_set: + continue + unique_set.add(job_uniq_key) + json_data = model_to_dict(job) + lightning_task_format_time(json_data, job) + rets.append(json_data) + return rets + + +def get_lightning_job_detail(payload: dict) -> list: + """获取数据导入任务详情""" + ticket_id = payload.get("ticket_id") + dst_cluster = payload.get("dst_cluster") + + return TendisplusLightningJob.objects.filter(Q(ticket_id=ticket_id) & Q(dst_cluster=dst_cluster)).values() + + +def lightning_job_to_schedule_tasks(payload: dict) -> list: + """获取一个job的所有待调度的tasks""" + # ticket_id: int, dst_cluster: str + ticket_id = payload.get("ticket_id") + dst_cluster = payload.get("dst_cluster") + if not ticket_id or not dst_cluster: + raise Exception("invalid params,ticket_id={},dst_cluster={} all can't be empty".format(ticket_id, dst_cluster)) + current_time = datetime.now(timezone.utc).astimezone() + thirty_days_ago = current_time - timedelta(days=30) + + where = Q(ticket_id=ticket_id) & Q(dst_cluster=dst_cluster) + where = where & Q(update_time__gt=thirty_days_ago) & Q(dts_server="1.1.1.1") & Q(task_type="") & Q(status=0) + tasks = TendisplusLightningTask.objects.filter(where) + if not tasks: + logger.warning( + "lightning_job_to_schedule_tasks empty records,ticket_id={},dst_cluster={}".format(ticket_id, dst_cluster) + ) + return [] + + rets = [] + for task in tasks: + json_data = model_to_dict(task) + lightning_task_format_time(json_data, task) + rets.append(json_data) + return rets + + +def lightning_dts_task_by_id(payload: dict) -> dict: + """根据task_id获取dts_task""" + task_id = payload.get("task_id") + + try: + task = TendisplusLightningTask.objects.get(task_id=task_id) + except TendisplusLightningTask.DoesNotExist: + logger.warning("lightning task not found,task_id={}".format(task_id)) + return None + + json_data = model_to_dict(task) + lightning_task_format_time(json_data, task) + return json_data + + +def lightning_tasks_updates(paylod: dict): + """批量更新dts_tasks + :param + task_ids: task_id列表 + update_params: 列名和值的对应关系,如 {"status": 1,"message": "test"} + """ + task_ids = paylod.get("task_ids") + col_to_val = paylod.get("col_to_val") + if not task_ids: + raise Exception("invalid params,task_ids can't be empty") + if not col_to_val: + raise Exception("invalid params,update_params can't be empty") + rows_affected = TendisplusLightningTask.objects.filter(task_id__in=task_ids).update(**col_to_val) + return rows_affected diff --git a/dbm-ui/backend/db_services/redis/redis_dts/migrations/0017_auto_20240908_1038_squashed_0018_auto_20240914_1754.py b/dbm-ui/backend/db_services/redis/redis_dts/migrations/0017_auto_20240908_1038_squashed_0018_auto_20240914_1754.py new file mode 100644 index 0000000000..fdae41e9e4 --- /dev/null +++ b/dbm-ui/backend/db_services/redis/redis_dts/migrations/0017_auto_20240908_1038_squashed_0018_auto_20240914_1754.py @@ -0,0 +1,249 @@ +# Generated by Django 3.2.25 on 2024-09-14 09:59 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + replaces = [("redis_dts", "0017_auto_20240908_1038"), ("redis_dts", "0018_auto_20240914_1754")] + + dependencies = [ + ("redis_dts", "0016_auto_20240307_0946"), + ] + + operations = [ + migrations.CreateModel( + name="TendisplusTunnelerJob", + fields=[ + ("id", models.BigAutoField(primary_key=True, serialize=False, verbose_name="ID")), + ("ticket_id", models.BigIntegerField(verbose_name="单据ID")), + ("user", models.CharField(default="", max_length=64, verbose_name="申请人")), + ("bk_biz_id", models.CharField(default="", max_length=64, verbose_name="业务bk_biz_d")), + ("bk_cloud_id", models.BigIntegerField(verbose_name="云区域ID")), + ("dst_cluster", models.CharField(max_length=128, verbose_name="目标集群")), + ("dst_cluster_id", models.BigIntegerField(verbose_name="目标集群ID")), + ("cluster_nodes", models.TextField(verbose_name="集群节点")), + ("create_time", models.DateTimeField(auto_now_add=True, verbose_name="创建时间")), + ], + options={ + "verbose_name": "Tendisplus Tunneler Job", + "verbose_name_plural": "Tendisplus Tunneler Job", + "db_table": "tb_tendisplus_tunneler_job", + }, + ), + migrations.CreateModel( + name="TendisplusTunnelerTask", + fields=[ + ("task_id", models.CharField(max_length=64, primary_key=True, serialize=False, verbose_name="任务ID")), + ("ticket_id", models.BigIntegerField(verbose_name="单据ID")), + ("user", models.CharField(default="", max_length=64, verbose_name="申请人")), + ("bk_biz_id", models.CharField(default="", max_length=64, verbose_name="业务bk_biz_d")), + ("bk_cloud_id", models.BigIntegerField(verbose_name="云区域ID")), + ("cos_key", models.CharField(max_length=128, verbose_name="cos文件key")), + ("cos_file_size", models.BigIntegerField(verbose_name="cos文件大小")), + ("dts_server", models.CharField(max_length=128, verbose_name="dts服务地址")), + ("dst_cluster", models.CharField(max_length=128, verbose_name="目标集群")), + ("dst_cluster_id", models.BigIntegerField(verbose_name="目标集群ID")), + ("dst_cluster_priority", models.IntegerField(default=0, verbose_name="目标集群优先级,越大优先级越高")), + ("status", models.IntegerField(default=0, verbose_name="任务状态")), + ("message", models.TextField(verbose_name="任务消息")), + ("create_time", models.DateTimeField(auto_now_add=True, verbose_name="创建时间")), + ("update_time", models.DateTimeField(auto_now=True, verbose_name="更新时间")), + ], + options={ + "verbose_name": "Tendisplus Tunneler Task", + "verbose_name_plural": "Tendisplus Tunneler Task", + "db_table": "tb_tendisplus_tunneler_task", + }, + ), + migrations.AddIndex( + model_name="tendisplustunnelertask", + index=models.Index(fields=["update_time"], name="tb_tendispl_update__425a81_idx"), + ), + migrations.AddIndex( + model_name="tendisplustunnelertask", + index=models.Index(fields=["dst_cluster_id"], name="tb_tendispl_dst_clu_c3fb72_idx"), + ), + migrations.AddIndex( + model_name="tendisplustunnelertask", + index=models.Index(fields=["user"], name="tb_tendispl_user_d85fd6_idx"), + ), + migrations.AddIndex( + model_name="tendisplustunnelertask", + index=models.Index(fields=["ticket_id", "dst_cluster_id"], name="tb_tendispl_ticket__da0496_idx"), + ), + migrations.AddIndex( + model_name="tendisplustunnelerjob", + index=models.Index(fields=["create_time"], name="tb_tendispl_create__b2f3e3_idx"), + ), + migrations.AddIndex( + model_name="tendisplustunnelerjob", + index=models.Index(fields=["dst_cluster_id"], name="tb_tendispl_dst_clu_c52482_idx"), + ), + migrations.AddIndex( + model_name="tendisplustunnelerjob", + index=models.Index(fields=["user"], name="tb_tendispl_user_65d13e_idx"), + ), + migrations.AddConstraint( + model_name="tendisplustunnelerjob", + constraint=models.UniqueConstraint(fields=("ticket_id", "dst_cluster"), name="uniq_ticket_cluster"), + ), + migrations.CreateModel( + name="TendisplusLightningTask", + fields=[ + ("task_id", models.CharField(max_length=64, primary_key=True, serialize=False, verbose_name="任务ID")), + ("ticket_id", models.BigIntegerField(verbose_name="单据ID")), + ("user", models.CharField(default="", max_length=64, verbose_name="申请人")), + ("bk_biz_id", models.CharField(default="", max_length=64, verbose_name="业务bk_biz_d")), + ("bk_cloud_id", models.BigIntegerField(verbose_name="云区域ID")), + ("cos_key", models.CharField(max_length=128, verbose_name="cos文件key")), + ("cos_file_size", models.BigIntegerField(verbose_name="cos文件大小")), + ("dts_server", models.CharField(max_length=128, verbose_name="dts服务地址")), + ("dst_cluster", models.CharField(max_length=128, verbose_name="目标集群")), + ("dst_cluster_id", models.BigIntegerField(verbose_name="目标集群ID")), + ("dst_cluster_priority", models.IntegerField(default=0, verbose_name="目标集群优先级,越大优先级越高")), + ("dst_zonename", models.CharField(max_length=128, verbose_name="目标集群城市")), + ("task_type", models.CharField(max_length=128, verbose_name="任务类型")), + ("operate_type", models.CharField(max_length=128, verbose_name="操作类型")), + ("status", models.IntegerField(default=0, verbose_name="任务状态")), + ("message", models.TextField(verbose_name="任务消息")), + ("create_time", models.DateTimeField(auto_now_add=True, verbose_name="创建时间")), + ("update_time", models.DateTimeField(auto_now=True, verbose_name="更新时间")), + ], + options={ + "verbose_name": "Tendisplus Lightning Task", + "verbose_name_plural": "Tendisplus Lightning Task", + "db_table": "tb_tendisplus_lightning_task", + }, + ), + migrations.RenameModel( + old_name="TendisplusTunnelerJob", + new_name="TendisplusLightningJob", + ), + migrations.DeleteModel( + name="TendisplusTunnelerTask", + ), + migrations.AlterModelOptions( + name="tendispluslightningjob", + options={"verbose_name": "Tendisplus Lightning Job", "verbose_name_plural": "Tendisplus Lightning Job"}, + ), + migrations.RemoveIndex( + model_name="tendispluslightningjob", + name="tb_tendispl_create__b2f3e3_idx", + ), + migrations.RemoveIndex( + model_name="tendispluslightningjob", + name="tb_tendispl_dst_clu_c52482_idx", + ), + migrations.RemoveIndex( + model_name="tendispluslightningjob", + name="tb_tendispl_user_65d13e_idx", + ), + migrations.AddIndex( + model_name="tendispluslightningjob", + index=models.Index(fields=["create_time"], name="tb_tendispl_create__2a0918_idx"), + ), + migrations.AddIndex( + model_name="tendispluslightningjob", + index=models.Index(fields=["dst_cluster_id"], name="tb_tendispl_dst_clu_f167e8_idx"), + ), + migrations.AddIndex( + model_name="tendispluslightningjob", + index=models.Index(fields=["user"], name="tb_tendispl_user_3b18a1_idx"), + ), + migrations.AlterModelTable( + name="tendispluslightningjob", + table="tb_tendisplus_lightning_job", + ), + migrations.AddIndex( + model_name="tendispluslightningtask", + index=models.Index(fields=["update_time"], name="tb_tendispl_update__4c107b_idx"), + ), + migrations.AddIndex( + model_name="tendispluslightningtask", + index=models.Index(fields=["dst_cluster_id"], name="tb_tendispl_dst_clu_0013b5_idx"), + ), + migrations.AddIndex( + model_name="tendispluslightningtask", + index=models.Index(fields=["user"], name="tb_tendispl_user_c6340e_idx"), + ), + migrations.AddIndex( + model_name="tendispluslightningtask", + index=models.Index(fields=["ticket_id", "dst_cluster_id"], name="tb_tendispl_ticket__942488_idx"), + ), + migrations.AlterField( + model_name="tendispluslightningjob", + name="bk_cloud_id", + field=models.BigIntegerField(default=0, verbose_name="云区域ID"), + ), + migrations.AlterField( + model_name="tendispluslightningjob", + name="cluster_nodes", + field=models.TextField(default="", verbose_name="集群节点"), + ), + migrations.AlterField( + model_name="tendispluslightningjob", + name="dst_cluster", + field=models.CharField(default="", max_length=128, verbose_name="目标集群"), + ), + migrations.AlterField( + model_name="tendispluslightningjob", + name="dst_cluster_id", + field=models.BigIntegerField(default=0, verbose_name="目标集群ID"), + ), + migrations.AlterField( + model_name="tendispluslightningjob", + name="ticket_id", + field=models.BigIntegerField(default=0, verbose_name="单据ID"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="bk_cloud_id", + field=models.BigIntegerField(default=0, verbose_name="云区域ID"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="cos_file_size", + field=models.BigIntegerField(default=0, verbose_name="cos文件大小"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="cos_key", + field=models.CharField(default="", max_length=128, verbose_name="cos文件key"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="dst_cluster", + field=models.CharField(default="", max_length=128, verbose_name="目标集群"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="dst_cluster_id", + field=models.BigIntegerField(default=0, verbose_name="目标集群ID"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="dst_zonename", + field=models.CharField(default="", max_length=128, verbose_name="目标集群城市"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="dts_server", + field=models.CharField(default="", max_length=128, verbose_name="dts服务地址"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="message", + field=models.TextField(default="", verbose_name="任务消息"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="operate_type", + field=models.CharField(default="", max_length=128, verbose_name="操作类型"), + ), + migrations.AlterField( + model_name="tendispluslightningtask", + name="task_type", + field=models.CharField(default="", max_length=128, verbose_name="任务类型"), + ), + ] diff --git a/dbm-ui/backend/db_services/redis/redis_dts/models/__init__.py b/dbm-ui/backend/db_services/redis/redis_dts/models/__init__.py index bee20c85ae..a7b622510f 100644 --- a/dbm-ui/backend/db_services/redis/redis_dts/models/__init__.py +++ b/dbm-ui/backend/db_services/redis/redis_dts/models/__init__.py @@ -15,3 +15,5 @@ from .tb_tendis_dts_job import TbTendisDTSJob from .tb_tendis_dts_switch_backup import TbTendisDtsSwitchBackup from .tb_tendis_dts_task import TbTendisDtsTask, dts_task_clean_pwd_and_fmt_time, dts_task_format_time +from .tb_tendisplus_lightning_job import TendisplusLightningJob +from .tb_tendisplus_lightning_task import TendisplusLightningTask, lightning_task_format_time diff --git a/dbm-ui/backend/db_services/redis/redis_dts/models/tb_tendisplus_lightning_job.py b/dbm-ui/backend/db_services/redis/redis_dts/models/tb_tendisplus_lightning_job.py new file mode 100644 index 0000000000..738c21507d --- /dev/null +++ b/dbm-ui/backend/db_services/redis/redis_dts/models/tb_tendisplus_lightning_job.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from django.db import models +from django.utils.translation import ugettext_lazy as _ + + +class TendisplusLightningJob(models.Model): + id = models.BigAutoField(_("ID"), primary_key=True) + ticket_id = models.BigIntegerField(_("单据ID"), default=0) + user = models.CharField(_("申请人"), max_length=64, default="") + bk_biz_id = models.CharField(_("业务bk_biz_d"), max_length=64, default="") + bk_cloud_id = models.BigIntegerField(_("云区域ID"), default=0) + dst_cluster = models.CharField(_("目标集群"), max_length=128, default="") + dst_cluster_id = models.BigIntegerField(_("目标集群ID"), default=0) + cluster_nodes = models.TextField(_("集群节点"), default="") + create_time = models.DateTimeField(auto_now_add=True, verbose_name=_("创建时间")) + + class Meta: + verbose_name = _("Tendisplus Lightning Job") + verbose_name_plural = _("Tendisplus Lightning Job") + db_table = "tb_tendisplus_lightning_job" + + indexes = [ + models.Index(fields=["create_time"]), + models.Index(fields=["dst_cluster_id"]), + models.Index(fields=["user"]), + ] + + constraints = [ + models.UniqueConstraint( + fields=["ticket_id", "dst_cluster"], + name="uniq_ticket_cluster", + ) + ] diff --git a/dbm-ui/backend/db_services/redis/redis_dts/models/tb_tendisplus_lightning_task.py b/dbm-ui/backend/db_services/redis/redis_dts/models/tb_tendisplus_lightning_task.py new file mode 100644 index 0000000000..075eed9ba1 --- /dev/null +++ b/dbm-ui/backend/db_services/redis/redis_dts/models/tb_tendisplus_lightning_task.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +from django.db import models +from django.utils.translation import ugettext_lazy as _ + +from backend.utils.time import datetime2str + + +class TendisplusLightningTask(models.Model): + task_id = models.CharField(_("任务ID"), max_length=64, primary_key=True) + ticket_id = models.BigIntegerField(_("单据ID")) + user = models.CharField(_("申请人"), max_length=64, default="") + bk_biz_id = models.CharField(_("业务bk_biz_d"), max_length=64, default="") + bk_cloud_id = models.BigIntegerField(_("云区域ID"), default=0) + cos_key = models.CharField(_("cos文件key"), max_length=128, default="") + cos_file_size = models.BigIntegerField(_("cos文件大小"), default=0) + dts_server = models.CharField(_("dts服务地址"), max_length=128, default="") + dst_cluster = models.CharField(_("目标集群"), max_length=128, default="") + dst_cluster_id = models.BigIntegerField(_("目标集群ID"), default=0) + dst_cluster_priority = models.IntegerField(_("目标集群优先级,越大优先级越高"), default=0) + dst_zonename = models.CharField(_("目标集群城市"), max_length=128, default="") + task_type = models.CharField(_("任务类型"), max_length=128, default="") + operate_type = models.CharField(_("操作类型"), max_length=128, default="") + status = models.IntegerField(_("任务状态"), default=0) + message = models.TextField(_("任务消息"), default="") + create_time = models.DateTimeField(auto_now_add=True, verbose_name=_("创建时间")) + update_time = models.DateTimeField(auto_now=True, verbose_name=_("更新时间")) + + class Meta: + verbose_name = _("Tendisplus Lightning Task") + verbose_name_plural = _("Tendisplus Lightning Task") + db_table = "tb_tendisplus_lightning_task" + indexes = [ + models.Index(fields=["update_time"]), + models.Index(fields=["dst_cluster_id"]), + models.Index(fields=["user"]), + models.Index(fields=["ticket_id", "dst_cluster_id"]), + ] + + +def lightning_task_format_time(json_data: dict, row: TendisplusLightningTask): + json_data["create_time"] = datetime2str(row.create_time) + json_data["update_time"] = datetime2str(row.update_time) diff --git a/dbm-ui/backend/flow/consts.py b/dbm-ui/backend/flow/consts.py index 810b754272..63eecd64af 100644 --- a/dbm-ui/backend/flow/consts.py +++ b/dbm-ui/backend/flow/consts.py @@ -480,6 +480,8 @@ class RedisActuatorActionEnum(str, StructuredEnum): CONFIG_SET = EnumField("config_set", _("config_set")) LOAD_MODULES = EnumField("load_modules", _("load_modules")) PREDIXY_ADD_MODULES_CMDS = EnumField("predixy_add_modules_cmds", _("predixy_add_modules_cmds")) + RESHAPE = EnumField("reshape", _("reshape")) + CLUSTER_RESET_FLUSH_MEET = EnumField("cluster_reset_flush_meet", _("cluster_reset_flush_meet")) class MongoDBActuatorActionEnum(str, StructuredEnum): diff --git a/dbm-ui/backend/flow/engine/bamboo/scene/redis/tendisplus_lightning_data.py b/dbm-ui/backend/flow/engine/bamboo/scene/redis/tendisplus_lightning_data.py new file mode 100644 index 0000000000..f3b47d6296 --- /dev/null +++ b/dbm-ui/backend/flow/engine/bamboo/scene/redis/tendisplus_lightning_data.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import logging.config +from collections import defaultdict +from copy import deepcopy +from dataclasses import asdict +from typing import Dict, Optional + +from django.utils.translation import ugettext as _ + +from backend.configuration.constants import DBType +from backend.db_meta.models import Cluster +from backend.db_services.redis.util import is_tendisplus_instance_type +from backend.flow.engine.bamboo.scene.common.builder import Builder, SubBuilder +from backend.flow.engine.bamboo.scene.common.get_file_list import GetFileList +from backend.flow.plugins.components.collections.redis.exec_actuator_script import ExecuteDBActuatorScriptComponent +from backend.flow.plugins.components.collections.redis.get_redis_payload import GetRedisActPayloadComponent +from backend.flow.plugins.components.collections.redis.redis_db_meta import RedisDBMetaComponent +from backend.flow.plugins.components.collections.redis.tendisplus_lightning import TendisplusClusterLightningComponent +from backend.flow.plugins.components.collections.redis.trans_flies import TransFileComponent +from backend.flow.utils.base.payload_handler import PayloadHandler +from backend.flow.utils.redis.redis_act_playload import RedisActPayload +from backend.flow.utils.redis.redis_context_dataclass import ActKwargs, TendisplusLightningContext +from backend.flow.utils.redis.redis_db_meta import RedisDBMeta +from backend.flow.utils.redis.redis_proxy_util import async_multi_clusters_precheck, lightning_cluster_nodes + +logger = logging.getLogger("flow") + + +class TendisPlusLightningData(object): + """ + Tendisplus Lightning 批量,快速导入数据 + """ + + def __init__(self, root_id: str, data: Optional[Dict]): + """ + @param root_id : 任务流程定义的root_id + @param data : 单据传递过来的参数列表,是dict格式 + data = { + "bk_biz_id":"", + "ticket_type": "TENDISPLUS_LIGHTNING_DATA" + "infos":[ + "cluster_id":1, + "cos_file_keys": [ + "xxxx", + "yyy" + ] + ] + } + """ + self.root_id = root_id + self.data = data + self.precheck(self.data["infos"]) + + @staticmethod + def precheck(infos: list): + """ + @summary: 预检查 + """ + to_precheck_cluster_ids = set() + for input_item in infos: + # 如果重复,则报错 + if input_item["cluster_id"] in to_precheck_cluster_ids: + raise Exception( + _("cluster_id:{}重复,同一个集群不能同时执行多个Tendisplus Lightning").format(input_item["cluster_id"]) + ) + to_precheck_cluster_ids.add(input_item["cluster_id"]) + # 并发检查多个cluster的proxy、redis实例状态 + async_multi_clusters_precheck(to_precheck_cluster_ids) + # 检查集群类型 和 是否有running的slave + for input_item in infos: + cluster = Cluster.objects.get(id=input_item["cluster_id"]) + if not is_tendisplus_instance_type(cluster.cluster_type): + raise Exception( + _("cluster_id:{} immute_domain:{} cluster_type:{} Tendisplus Lightning 仅支持Tendisplus集群").format( + cluster.id, cluster.immute_domain, cluster.cluster_type + ) + ) + # 确保每个master都有个running的slave + lightning_cluster_nodes(input_item["cluster_id"]) + + def lightning_data_flow(self): + redis_pipeline = Builder(root_id=self.root_id, data=self.data) + trans_files = GetFileList(db_type=DBType.Redis) + act_kwargs = ActKwargs() + act_kwargs.set_trans_data_dataclass = TendisplusLightningContext.__name__ + act_kwargs.file_list = trans_files.redis_base() + act_kwargs.is_update_trans_data = True + sub_pipelines = [] + + for input_item in self.data["infos"]: + cluster = Cluster.objects.get(id=input_item["cluster_id"]) + passwd_ret = PayloadHandler.redis_get_cluster_password(cluster) + sub_pipeline = SubBuilder(root_id=self.root_id, data=self.data) + cluster_kwargs = deepcopy(act_kwargs) + sub_pipeline.add_act( + act_name=_("初始化配置-{}".format(cluster.immute_domain)), + act_component_code=GetRedisActPayloadComponent.code, + kwargs=asdict(cluster_kwargs), + ) + + cluster_nodes = lightning_cluster_nodes(cluster.id) + slave_ip_instance = defaultdict(list) + slave_ips_set = set() + master_slave_pairs = [] + for node in cluster_nodes: + slave_ip, slave_port = node["slave_addr"].split(":") + master_ip, master_port = node["master_addr"].split(":") + slave_ips_set.add(slave_ip) + slave_ip_instance[slave_ip].append({"ip": slave_ip, "port": int(slave_port)}) + master_slave_pairs.append( + { + "master": {"ip": master_ip, "port": int(master_port)}, + "slave": {"ip": slave_ip, "port": int(slave_port)}, + } + ) + + acts_list = [] + for ip in slave_ips_set: + # 下发介质 + act_kwargs.exec_ip = ip + acts_list.append( + { + "act_name": _("{}-下发介质包").format(ip), + "act_component_code": TransFileComponent.code, + "kwargs": asdict(act_kwargs), + } + ) + if acts_list: + sub_pipeline.add_parallel_acts(acts_list=acts_list) + + # 批量导入数据,并等待任务完成 + cluster_kwargs.cluster["cluster_id"] = cluster.id + cluster_kwargs.cluster["cos_file_keys"] = input_item["cos_file_keys"] + sub_pipeline.add_act( + act_name=_("批量生成sst文件并导入"), + act_component_code=TendisplusClusterLightningComponent.code, + kwargs=asdict(cluster_kwargs), + ) + + # slave执行reshape + acts_list = [] + for slave_ip, instances in slave_ip_instance.items(): + cluster_kwargs.exec_ip = slave_ip + cluster_kwargs.cluster = {"instances": instances, "redis_password": passwd_ret.get("redis_password")} + cluster_kwargs.get_redis_payload_func = RedisActPayload.tendisplus_reshape.__name__ + acts_list.append( + { + "act_name": _("执行reshape: {}").format(slave_ip), + "act_component_code": ExecuteDBActuatorScriptComponent.code, + "kwargs": asdict(cluster_kwargs), + } + ) + if acts_list: + sub_pipeline.add_parallel_acts(acts_list=acts_list) + + # slave执行cluster failover,slave会变成 new master + slave_ips_list = list(slave_ips_set) + cluster_kwargs.exec_ip = slave_ips_list[0] + cluster_kwargs.cluster = { + "redis_master_slave_pairs": master_slave_pairs, + "redis_password": passwd_ret.get("redis_password"), + } + cluster_kwargs.get_redis_payload_func = RedisActPayload.redis_cluster_failover.__name__ + sub_pipeline.add_act( + act_name=_("slave执行cluster failover"), + act_component_code=ExecuteDBActuatorScriptComponent.code, + kwargs=asdict(cluster_kwargs), + ) + # 断开new_master->new_slave同步关系 + # old master(new slave)上执行 cluster reset + flushall + cluster meet + reset_flush_meet_params = [] + for node in cluster_nodes: + slave_ip, slave_port = node["slave_addr"].split(":") + master_ip, master_port = node["master_addr"].split(":") + reset_flush_meet_params.append( + { + "reset_ip": master_ip, + "reset_port": int(master_port), + "reset_redis_password": passwd_ret.get("redis_password"), + "meet_ip": slave_ip, + "meet_port": int(slave_port), + "do_flushall": True, + "do_cluster_meet": True, + } + ) + cluster_kwargs.exec_ip = slave_ips_list[0] + cluster_kwargs.cluster = {"reset_flush_meet_params": reset_flush_meet_params} + cluster_kwargs.get_redis_payload_func = RedisActPayload.redis_clsuter_reset_flush_meet.__name__ + sub_pipeline.add_act( + act_name=_("new_master->new_slave断开同步关系"), + act_component_code=ExecuteDBActuatorScriptComponent.code, + kwargs=asdict(cluster_kwargs), + ) + # 重建同步关系 + replica_pairs_params = [] + for node in cluster_nodes: + slave_ip, slave_port = node["slave_addr"].split(":") + master_ip, master_port = node["master_addr"].split(":") + replica_pairs_params.append( + { + "master_ip": slave_ip, + "master_port": int(slave_port), + "master_auth": passwd_ret.get("redis_password"), + "slave_ip": master_ip, + "slave_port": int(master_port), + "slave_password": passwd_ret.get("redis_password"), + } + ) + cluster_kwargs.exec_ip = slave_ips_list[0] + cluster_kwargs.cluster = {"replica_pairs": replica_pairs_params} + cluster_kwargs.get_redis_payload_func = RedisActPayload.redis_init_batch_replicate.__name__ + sub_pipeline.add_act( + act_name=_("new_master->new_slave重建同步关系"), + act_component_code=ExecuteDBActuatorScriptComponent.code, + kwargs=asdict(cluster_kwargs), + ) + + # 更新元数据 + cluster_kwargs.cluster["meta_func_name"] = RedisDBMeta.swith_master_slave_for_cluster_faiover.__name__ + cluster_kwargs.cluster["cluster_ids"] = [cluster.id] + sub_pipeline.add_act( + act_name=_("元数据更新"), + act_component_code=RedisDBMetaComponent.code, + kwargs=asdict(cluster_kwargs), + ) + + sub_pipelines.append( + sub_pipeline.build_sub_process(sub_name=_("集群{}批量导入数据").format(cluster.immute_domain)) + ) + redis_pipeline.add_parallel_sub_pipeline(sub_flow_list=sub_pipelines) + redis_pipeline.run_pipeline() diff --git a/dbm-ui/backend/flow/engine/controller/redis.py b/dbm-ui/backend/flow/engine/controller/redis.py index 48debf1ee7..d94be79cfa 100644 --- a/dbm-ui/backend/flow/engine/controller/redis.py +++ b/dbm-ui/backend/flow/engine/controller/redis.py @@ -57,6 +57,7 @@ ) from backend.flow.engine.bamboo.scene.redis.redis_twemproxy_cluster_apply_flow import RedisClusterApplyFlow from backend.flow.engine.bamboo.scene.redis.single_proxy_shutdown import SingleProxyShutdownFlow +from backend.flow.engine.bamboo.scene.redis.tendisplus_lightning_data import TendisPlusLightningData from backend.flow.engine.controller.base import BaseController @@ -393,3 +394,10 @@ def redis_cluster_load_modules(self): """ flow = RedisClusterLoadModulesSceneFlow(root_id=self.root_id, data=self.ticket_data) flow.batch_clusters_load_modules() + + def tendisplus_lightning_data(self): + """ + tendisplus lightning 数据导入 + """ + flow = TendisPlusLightningData(root_id=self.root_id, data=self.ticket_data) + flow.lightning_data_flow() diff --git a/dbm-ui/backend/flow/plugins/components/collections/redis/tendisplus_lightning.py b/dbm-ui/backend/flow/plugins/components/collections/redis/tendisplus_lightning.py new file mode 100644 index 0000000000..97bd76442d --- /dev/null +++ b/dbm-ui/backend/flow/plugins/components/collections/redis/tendisplus_lightning.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import datetime +import json +import traceback +import uuid +from typing import List + +from django.db import transaction +from django.utils.translation import ugettext as _ +from pipeline.component_framework.component import Component +from pipeline.core.flow.activity import Service, StaticIntervalGenerator + +import backend.flow.utils.redis.redis_context_dataclass as flow_context +from backend.db_services.redis.redis_dts.models import TendisplusLightningJob, TendisplusLightningTask +from backend.flow.plugins.components.collections.common.base_service import BaseService +from backend.flow.utils.redis.redis_context_dataclass import ActKwargs, TendisplusLightningContext +from backend.flow.utils.redis.redis_proxy_util import get_cluster_info_by_cluster_id, lightning_cluster_nodes + + +class TendisplusClusterLightningService(BaseService): + """ + tendisplus集群迁移 + """ + + __need_schedule__ = True + interval = StaticIntervalGenerator(30) + + def _execute(self, data, parent_data): + kwargs: ActKwargs = data.get_one_of_inputs("kwargs") + global_data = data.get_one_of_inputs("global_data") + trans_data: TendisplusLightningContext = data.get_one_of_inputs("trans_data") + + if trans_data is None or trans_data == "${trans_data}": + # 表示没有加载上下文内容,则在此添加 + trans_data = getattr(flow_context, kwargs["set_trans_data_dataclass"])() + + if trans_data.ticket_id and trans_data.dst_cluster: + """如果 ticket_id 和 dst_cluster 已经存在,则表示已经执行过了,无需重复插入""" + return True + try: + ticket_id = int(global_data["uid"]) + bk_biz_id = int(global_data["bk_biz_id"]) + cluster_id = int(kwargs["cluster"]["cluster_id"]) + cos_file_keys = kwargs["cluster"]["cos_file_keys"] + cluster_info = get_cluster_info_by_cluster_id(cluster_id) + cluster_nodes_data = lightning_cluster_nodes(cluster_id=cluster_id) + with transaction.atomic(): + job = TendisplusLightningJob() + job.ticket_id = ticket_id + job.user = global_data["created_by"] + job.bk_biz_id = str(bk_biz_id) + job.bk_cloud_id = cluster_info["bk_cloud_id"] + job.dst_cluster = cluster_info["immute_domain"] + ":" + str(cluster_info["proxy_port"]) + job.dst_cluster_id = cluster_id + job.cluster_nodes = json.dumps(cluster_nodes_data) + job.create_time = datetime.datetime.now() + job.save() + + for cos_file_key in cos_file_keys: + task = TendisplusLightningTask() + task.task_id = uuid.uuid1().hex + task.ticket_id = job.ticket_id + task.user = job.user + task.bk_biz_id = job.bk_biz_id + task.bk_cloud_id = cluster_info["bk_cloud_id"] + task.cos_key = cos_file_key + task.cos_file_size = 0 + task.dts_server = "1.1.1.1" + task.dst_cluster = job.dst_cluster + task.dst_cluster_id = job.dst_cluster_id + task.dst_cluster_priority = 1 + task.dst_zonename = cluster_info["cluster_city_name"] + task.task_type = "" + task.status = 0 + task.create_time = datetime.datetime.now() + task.update_time = datetime.datetime.now() + task.save() + + except Exception as e: + traceback.print_exc() + self.log_error("tendisplus lightning failed:{}".format(e)) + return False + self.log_info("tendisplus lightning success") + trans_data.ticket_id = ticket_id + trans_data.dst_cluster = cluster_info["immute_domain"] + ":" + str(cluster_info["proxy_port"]) + data.outputs["trans_data"] = trans_data + return True + + def _schedule(self, data, parent_data, callback_data=None) -> bool: + kwargs = data.get_one_of_inputs("kwargs") + trans_data: TendisplusLightningContext = data.get_one_of_inputs("trans_data") + + if trans_data is None or trans_data == "${trans_data}": + # 表示没有加载上下文内容,则在此添加 + trans_data = getattr(flow_context, kwargs["set_trans_data_dataclass"])() + ticket_id = trans_data.ticket_id + dst_cluster = trans_data.dst_cluster + tasks_fows = TendisplusLightningTask.objects.filter(ticket_id=ticket_id, dst_cluster=dst_cluster) + if self.__is_any_task_fail(tasks_fows): + self.log_info(_("ticket_id:{} dst_cluster:{} 有任务失败").format(ticket_id, dst_cluster)) + self.finish_schedule() + return False + if self.__is_any_task_running(tasks_fows): + self.log_info(_("ticket_id:{} dst_cluster:{} 有任务仍然在运行中").format(ticket_id, dst_cluster)) + return True + # 任务全部都成功了 + self.log_info(_("ticket_id:{} dst_cluster:{} 全部任务执行成功").format(ticket_id, dst_cluster)) + self.finish_schedule() + return True + + def __is_any_task_fail(self, tasks: List[TendisplusLightningTask]) -> bool: + """ + 判断是否还有任务失败 + """ + for task in tasks: + if task.status == -1: + return True + + def __is_any_task_running(self, tasks: List[TendisplusLightningTask]) -> bool: + """ + 判断是否还有任务运行中 + """ + for task in tasks: + if task.status in [0, 1]: + return True + + def inputs_format(self) -> List: + return [ + Service.InputItem(name="kwargs", key="kwargs", type="dict", requiredc=True), + Service.InputItem(name="global_data", key="global_data", type="dict", required=True), + ] + + +class TendisplusClusterLightningComponent(Component): + name = __name__ + code = "tendisplus_cluster_lightning" + bound_service = TendisplusClusterLightningService diff --git a/dbm-ui/backend/flow/utils/redis/redis_act_playload.py b/dbm-ui/backend/flow/utils/redis/redis_act_playload.py index b660d01af3..bb9eb4e94b 100644 --- a/dbm-ui/backend/flow/utils/redis/redis_act_playload.py +++ b/dbm-ui/backend/flow/utils/redis/redis_act_playload.py @@ -2095,12 +2095,12 @@ def redis_cluster_failover(self, **kwargs) -> dict: "redis_password":"xxxx", "redis_master_slave_pairs":[ { - "master": {"ip":"a.a.a.a","port":"30000"}, - "slave": {"ip":"b.b.b.b","port":"30000"} + "master": {"ip":"a.a.a.a","port":30000}, + "slave": {"ip":"b.b.b.b","port":30000} }, { - "master": {"ip":"a.a.a.a","port":"30001"}, - "slave": {"ip":"b.b.b.b","port":"30001"} + "master": {"ip":"a.a.a.a","port":30001}, + "slave": {"ip":"b.b.b.b","port":30001} } ], "force":false @@ -2445,3 +2445,30 @@ def redis_custer_rename_domain_update_dbconfig(self, cluster_map: dict) -> Any: "level_value": new_domain, } DBConfigApi.upsert_conf_item(upsert_param) + + def tendisplus_reshape(self, **kwargs) -> dict: + """ + tendisplus reshape + """ + params = kwargs["params"] + return { + "db_type": DBActuatorTypeEnum.Redis.value, + "action": DBActuatorTypeEnum.Redis.value + "_" + RedisActuatorActionEnum.RESHAPE.value, + "payload": { + "instances": params["instances"], + "redis_password": params["redis_password"], + }, + } + + def redis_clsuter_reset_flush_meet(self, **kwargs) -> dict: + """ + redis cluster reset+flush+meet + """ + params = kwargs["params"] + return { + "db_type": DBActuatorTypeEnum.Redis.value, + "action": DBActuatorTypeEnum.Redis.value + "_" + RedisActuatorActionEnum.CLUSTER_RESET_FLUSH_MEET.value, + "payload": { + "reset_flush_meet_params": params["reset_flush_meet_params"], + }, + } diff --git a/dbm-ui/backend/flow/utils/redis/redis_context_dataclass.py b/dbm-ui/backend/flow/utils/redis/redis_context_dataclass.py index 8e5e4c9536..cee9eb0255 100644 --- a/dbm-ui/backend/flow/utils/redis/redis_context_dataclass.py +++ b/dbm-ui/backend/flow/utils/redis/redis_context_dataclass.py @@ -201,3 +201,15 @@ class DownloadBackupFileKwargs: login_passwd: str cluster: dict = None tendis_backup_info: list = None # 占位:执行备份后的信息 + + +@dataclass() +class TendisplusLightningContext: + """ + 定义tendisplus lightning 数据批量导入的变量结构体 + """ + + redis_act_payload: Optional[Any] = None # 代表获取payload参数的类 + tendis_backup_info: list = None # 执行备份后的信息 + ticket_id: int = None # 代表dts job id,对应表tb_tendis_dts_job + dst_cluster: str = None # 代表目标集群 diff --git a/dbm-ui/backend/flow/utils/redis/redis_db_meta.py b/dbm-ui/backend/flow/utils/redis/redis_db_meta.py index 7d158a034e..b8ed2a5d26 100644 --- a/dbm-ui/backend/flow/utils/redis/redis_db_meta.py +++ b/dbm-ui/backend/flow/utils/redis/redis_db_meta.py @@ -1816,3 +1816,51 @@ def update_cluster_entry(self) -> bool: ) ) return True + + @transaction.atomic + def swith_master_slave_for_cluster_faiover(self): + """ + 交换cluster集群的master和slave + """ + for cluster_id in self.cluster["cluster_ids"]: + cluster = Cluster.objects.get(id=cluster_id) + cc_manage = CcManage(cluster.bk_biz_id, cluster.cluster_type) + bk_host_ids = [] + cluster_masters = cluster.storageinstance_set.filter(instance_role=InstanceRole.REDIS_MASTER.value) + for master_obj in cluster_masters: + slave_obj = master_obj.as_ejector.get().receiver + + bk_host_ids.append(master_obj.machine.bk_host_id) + bk_host_ids.append(slave_obj.machine.bk_host_id) + + new_master_obj = slave_obj + new_master_obj.instance_role = InstanceRole.REDIS_MASTER + new_master_obj.instance_inner_role = InstanceInnerRole.MASTER + new_master_obj.cluster_type = cluster.cluster_type + new_master_obj.save(update_fields=["instance_role", "instance_inner_role", "cluster_type"]) + + new_slave_obj = master_obj + new_slave_obj.instance_role = InstanceRole.REDIS_SLAVE + new_slave_obj.instance_inner_role = InstanceInnerRole.SLAVE + new_slave_obj.cluster_type = cluster.cluster_type + new_slave_obj.save(update_fields=["instance_role", "instance_inner_role", "cluster_type"]) + + StorageInstanceTuple.objects.filter(ejector=master_obj, receiver=slave_obj).update( + ejector=new_master_obj, receiver=new_slave_obj + ) + + for proxy in cluster.proxyinstance_set.all(): + proxy.storageinstance.remove(master_obj) + proxy.storageinstance.add(new_master_obj) + proxy.save() + # 切换新master服务实例角色标签 + cc_manage.add_label_for_service_instance( + bk_instance_ids=[new_master_obj.bk_instance_id], + labels_dict={"instance_role": InstanceRole.REDIS_MASTER.value}, + ) + # 切换新slave服务实例角色标签 + cc_manage.add_label_for_service_instance( + bk_instance_ids=[new_slave_obj.bk_instance_id], + labels_dict={"instance_role": InstanceRole.REDIS_SLAVE.value}, + ) + cc_manage.update_host_properties(bk_host_ids) diff --git a/dbm-ui/backend/flow/utils/redis/redis_proxy_util.py b/dbm-ui/backend/flow/utils/redis/redis_proxy_util.py index c4ea061b7f..ae8a4265ec 100644 --- a/dbm-ui/backend/flow/utils/redis/redis_proxy_util.py +++ b/dbm-ui/backend/flow/utils/redis/redis_proxy_util.py @@ -46,9 +46,12 @@ ConfigTypeEnum, MediumEnum, RedisCapacityUpdateType, + RedisRole, ) from backend.flow.utils.base.payload_handler import PayloadHandler +from backend.flow.utils.redis.redis_cluster_nodes import decode_cluster_nodes from backend.flow.utils.redis.redis_util import version_ge +from backend.utils.string import base64_encode logger = logging.getLogger("flow") @@ -1107,3 +1110,56 @@ async def async_handler(cluster_ids: List[int]): await asyncio.gather(*tasks) return asyncio.run(async_handler(cluster_ids)) + + +def lightning_cluster_nodes(cluster_id: int) -> list: + """ + 解析redis 'cluster nodes'命令返回的信息 + return: + [ + {"master_addr":"a.a.a.a:30000","slave_addr":"b.b.b.b:30000","slots":"0-4095","redis_password":"xxxx"} + {"master_addr":"c.c.c.c:30000","slave_addr":"d.d.d.d:30000","slots":"4096-8191","redis_password":"xxxx"} + ] + """ + cluster = Cluster.objects.get(id=cluster_id) + one_master = cluster.storageinstance_set.filter( + instance_role=InstanceRole.REDIS_MASTER.value, status=InstanceStatus.RUNNING + ).first() + passwd_ret = PayloadHandler.redis_get_cluster_password(cluster) + master_addrs = ["{}:{}".format(one_master.machine.ip, one_master.port)] + resp = DRSApi.redis_rpc( + { + "addresses": master_addrs, + "db_num": 0, + "password": passwd_ret.get("redis_password"), + "command": "cluster nodes", + "bk_cloud_id": cluster.bk_cloud_id, + } + ) + if len(resp) == 0 or (not resp[0]["result"]): + raise Exception(_("redis集群 {} master {} cluster nodes 命令执行失败").format(cluster.immute_domain, master_addrs)) + cluster_nodes_raw_data = resp[0]["result"] + node_ret = decode_cluster_nodes(cluster_nodes_raw_data) + node_list = node_ret[0] + id_nodemap = {node.node_id: node for node in node_list} + master_to_item = {} + redis_password_encode = base64_encode(passwd_ret.get("redis_password")) + for node in node_list: + if node.get_role() == RedisRole.SLAVE.value and node.is_running(): + master_id = node.master_id + master_node = id_nodemap.get(master_id) + master_to_item[master_node.addr] = { + "master_addr": master_node.addr, + "slave_addr": node.addr, + "slots": master_node.slot_src_str, + "redis_password_encode": redis_password_encode, + } + masters_with_slots = [] + for node in node_list: + if node.get_role() == RedisRole.MASTER.value and node.slot_cnt > 0: + masters_with_slots.append(node) + for master in masters_with_slots: + master_addr = master.addr + if master_addr not in master_to_item: + raise Exception(_("redis集群 {} master {} 没有对应running的slave节点").format(cluster.immute_domain, master_addr)) + return list(master_to_item.values()) diff --git a/dbm-ui/backend/ticket/builders/redis/tendisplus_lightning_data.py b/dbm-ui/backend/ticket/builders/redis/tendisplus_lightning_data.py new file mode 100644 index 0000000000..95809fde6c --- /dev/null +++ b/dbm-ui/backend/ticket/builders/redis/tendisplus_lightning_data.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-DB管理系统(BlueKing-BK-DBM) available. +Copyright (C) 2017-2023 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at https://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import logging.config + +from django.utils.translation import ugettext_lazy as _ +from rest_framework import serializers + +from backend.flow.engine.bamboo.scene.redis.tendisplus_lightning_data import TendisPlusLightningData +from backend.flow.engine.controller.redis import RedisController +from backend.ticket import builders +from backend.ticket.builders.common.base import SkipToRepresentationMixin +from backend.ticket.builders.redis.base import BaseRedisTicketFlowBuilder, ClusterValidateMixin +from backend.ticket.constants import TicketType + +logger = logging.getLogger("flow") + + +class TendisPlusLightningDataSerializer(SkipToRepresentationMixin, serializers.Serializer): + """TendisPlus闪电导入数据""" + + class InfoSerializer(ClusterValidateMixin, serializers.Serializer): + cluster_id = serializers.IntegerField(help_text=_("集群ID")) + cos_file_keys = serializers.ListField(help_text=_("cos文件key列表"), child=serializers.CharField()) + + infos = serializers.ListField(help_text=_("参数列表"), child=InfoSerializer()) + + def validate(self, attr): + TendisPlusLightningData.precheck(attr["infos"]) + return attr + + +class TendisplusLightingDataParamBuilder(builders.FlowParamBuilder): + controller = RedisController.tendisplus_lightning_data + + def format_ticket_data(self): + super().format_ticket_data() + + +@builders.BuilderFactory.register(TicketType.REDIS_TENDISPLUS_LIGHTNING_DATA, is_apply=False) +class RedisClusterRenameDomainFlowBuilder(BaseRedisTicketFlowBuilder): + serializer = TendisPlusLightningDataSerializer + inner_flow_builder = TendisplusLightingDataParamBuilder + inner_flow_name = _("tendisplus闪电导入数据") + default_need_itsm = False + default_need_manual_confirm = False + + def patch_ticket_detail(self): + super().patch_ticket_detail() diff --git a/dbm-ui/backend/ticket/constants.py b/dbm-ui/backend/ticket/constants.py index 11807d966f..78a14e673b 100644 --- a/dbm-ui/backend/ticket/constants.py +++ b/dbm-ui/backend/ticket/constants.py @@ -335,9 +335,10 @@ def get_cluster_type_by_ticket(cls, ticket_type): REDIS_CLUSTER_PROXYS_UPGRADE = TicketEnumField("REDIS_CLUSTER_PROXYS_UPGRADE", _("Redis 集群proxys版本升级"), register_iam=False) # noqa REDIS_DIRTY_MACHINE_CLEAR = TicketEnumField("REDIS_DIRTY_MACHINE_CLEAR", _("Redis脏机清理"), register_iam=False) REDIS_CLUSTER_STORAGES_CLI_CONNS_KILL = TicketEnumField("REDIS_CLUSTER_STORAGES_CLI_CONNS_KILL", _("Redis 集群存储层cli连接kill"), register_iam=False) # noqa - REDIS_CLUSTER_RENAME_DOMAIN = TicketEnumField("REDIS_CLUSTER_RENAME_DOMAIN", _("Redis集群域名重命名")) + REDIS_CLUSTER_RENAME_DOMAIN = TicketEnumField("REDIS_CLUSTER_RENAME_DOMAIN", _("Redis集群域名重命名"), _("集群维护")) REDIS_CLUSTER_MAXMEMORY_SET = TicketEnumField("REDIS_CLUSTER_MAXMEMORY_SET", _("Redis 集群设置maxmemory")) # noqa REDIS_CLUSTER_LOAD_MODULES = TicketEnumField("REDIS_CLUSTER_LOAD_MODULES", _("Redis 集群加载modules")) # noqa + REDIS_TENDISPLUS_LIGHTNING_DATA= TicketEnumField("REDIS_TENDISPLUS_LIGHTNING_DATA", _("Tendisplus闪电导入数据"), _("集群维护")) # noqa # 大数据 KAFKA_APPLY = TicketEnumField("KAFKA_APPLY", _("Kafka 集群部署"), register_iam=False)