From 26e90e9fffdd3a89ea796d2f7ed3b70cb98ae870 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Mon, 24 Jun 2024 16:52:21 +0800 Subject: [PATCH] scheduler: skip evict-leader-scheduler when setting schedule deny label (#8303) ref tikv/pd#7300, close tikv/pd#7853 - add a real cluster test to test `skip evict-leader-scheduler when setting schedule deny label` - add `DeleteStoreLabel` API and `DeleteScheduler` API Signed-off-by: okJiang <819421878@qq.com> --- .gitignore | 1 + Makefile | 1 + client/http/interface.go | 27 +++ client/http/request_info.go | 2 + .../schedulers/scheduler_controller.go | 6 +- server/cluster/cluster.go | 3 + tests/integrations/client/http_client_test.go | 12 +- tests/integrations/realcluster/Makefile | 9 +- tests/integrations/realcluster/deploy.sh | 9 +- tests/integrations/realcluster/pd.toml | 5 + .../realcluster/reboot_pd_test.go | 3 + .../realcluster/scheduler_test.go | 188 ++++++++++++++++++ .../realcluster/transfer_leader_test.go | 73 ------- tests/integrations/realcluster/wait_tiup.sh | 2 +- 14 files changed, 261 insertions(+), 80 deletions(-) create mode 100644 tests/integrations/realcluster/pd.toml create mode 100644 tests/integrations/realcluster/scheduler_test.go delete mode 100644 tests/integrations/realcluster/transfer_leader_test.go diff --git a/.gitignore b/.gitignore index b9be6099e24..fb9f0424418 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ coverage *.txt go.work* embedded_assets_handler.go +*.log diff --git a/Makefile b/Makefile index dca00012114..5f5ac871f18 100644 --- a/Makefile +++ b/Makefile @@ -280,6 +280,7 @@ test-tso-consistency: install-tools REAL_CLUSTER_TEST_PATH := $(ROOT_PATH)/tests/integrations/realcluster test-real-cluster: + @ rm -rf ~/.tiup/data/pd_real_cluster_test # testing with the real cluster... cd $(REAL_CLUSTER_TEST_PATH) && $(MAKE) check diff --git a/client/http/interface.go b/client/http/interface.go index 3684e19b1f5..f90ab19624f 100644 --- a/client/http/interface.go +++ b/client/http/interface.go @@ -51,6 +51,7 @@ type Client interface { GetStore(context.Context, uint64) (*StoreInfo, error) DeleteStore(context.Context, uint64) error SetStoreLabels(context.Context, int64, map[string]string) error + DeleteStoreLabel(ctx context.Context, storeID int64, labelKey string) error GetHealthStatus(context.Context) ([]Health, error) /* Config-related interfaces */ GetConfig(context.Context) (map[string]any, error) @@ -65,6 +66,7 @@ type Client interface { /* Scheduler-related interfaces */ GetSchedulers(context.Context) ([]string, error) CreateScheduler(ctx context.Context, name string, storeID uint64) error + DeleteScheduler(ctx context.Context, name string) error SetSchedulerDelay(context.Context, string, int64) error /* Rule-related interfaces */ GetAllPlacementRuleBundles(context.Context) ([]*GroupBundle, error) @@ -81,6 +83,10 @@ type Client interface { DeletePlacementRuleGroupByID(context.Context, string) error GetAllRegionLabelRules(context.Context) ([]*LabelRule, error) GetRegionLabelRulesByIDs(context.Context, []string) ([]*LabelRule, error) + // `SetRegionLabelRule` sets the label rule for a region. + // When a label rule (deny scheduler) is set, + // 1. All schedulers will be disabled except for the evict-leader-scheduler. + // 2. The merge-checker will be disabled, preventing these regions from being merged. SetRegionLabelRule(context.Context, *LabelRule) error PatchRegionLabelRules(context.Context, *LabelRulePatch) error /* Scheduling-related interfaces */ @@ -339,6 +345,19 @@ func (c *client) SetStoreLabels(ctx context.Context, storeID int64, storeLabels WithBody(jsonInput)) } +// DeleteStoreLabel deletes the labels of a store. +func (c *client) DeleteStoreLabel(ctx context.Context, storeID int64, labelKey string) error { + jsonInput, err := json.Marshal(labelKey) + if err != nil { + return errors.Trace(err) + } + return c.request(ctx, newRequestInfo(). + WithName(deleteStoreLabelName). + WithURI(LabelByStoreID(storeID)). + WithMethod(http.MethodDelete). + WithBody(jsonInput)) +} + // GetHealthStatus gets the health status of the cluster. func (c *client) GetHealthStatus(ctx context.Context) ([]Health, error) { var healths []Health @@ -762,6 +781,14 @@ func (c *client) CreateScheduler(ctx context.Context, name string, storeID uint6 WithBody(inputJSON)) } +// DeleteScheduler deletes a scheduler from PD cluster. +func (c *client) DeleteScheduler(ctx context.Context, name string) error { + return c.request(ctx, newRequestInfo(). + WithName(deleteSchedulerName). + WithURI(SchedulerByName(name)). + WithMethod(http.MethodDelete)) +} + // AccelerateSchedule accelerates the scheduling of the regions within the given key range. // The keys in the key range should be encoded in the hex bytes format (without encoding to the UTF-8 bytes). func (c *client) AccelerateSchedule(ctx context.Context, keyRange *KeyRange) error { diff --git a/client/http/request_info.go b/client/http/request_info.go index 40bd0368250..783220bcc60 100644 --- a/client/http/request_info.go +++ b/client/http/request_info.go @@ -41,6 +41,7 @@ const ( getStoreName = "GetStore" deleteStoreName = "DeleteStore" setStoreLabelsName = "SetStoreLabels" + deleteStoreLabelName = "DeleteStoreLabel" getHealthStatusName = "GetHealthStatus" getConfigName = "GetConfig" setConfigName = "SetConfig" @@ -53,6 +54,7 @@ const ( getReplicateConfigName = "GetReplicateConfig" getSchedulersName = "GetSchedulers" createSchedulerName = "CreateScheduler" + deleteSchedulerName = "DeleteScheduler" setSchedulerDelayName = "SetSchedulerDelay" getAllPlacementRuleBundlesName = "GetAllPlacementRuleBundles" getPlacementRuleBundleByGroupName = "GetPlacementRuleBundleByGroup" diff --git a/pkg/schedule/schedulers/scheduler_controller.go b/pkg/schedule/schedulers/scheduler_controller.go index ea480a06845..04c74af7964 100644 --- a/pkg/schedule/schedulers/scheduler_controller.go +++ b/pkg/schedule/schedulers/scheduler_controller.go @@ -456,6 +456,7 @@ func (s *ScheduleController) Stop() { // Schedule tries to create some operators. func (s *ScheduleController) Schedule(diagnosable bool) []*operator.Operator { + _, isEvictLeaderScheduler := s.Scheduler.(*evictLeaderScheduler) retry: for i := 0; i < maxScheduleRetries; i++ { // no need to retry if schedule should stop to speed exit @@ -486,7 +487,10 @@ retry: if labelMgr == nil { continue } - if labelMgr.ScheduleDisabled(region) { + + // If the evict-leader-scheduler is disabled, it will obstruct the restart operation of tikv by the operator. + // Refer: https://docs.pingcap.com/tidb-in-kubernetes/stable/restart-a-tidb-cluster#perform-a-graceful-restart-to-a-single-tikv-pod + if labelMgr.ScheduleDisabled(region) && !isEvictLeaderScheduler { denySchedulersByLabelerCounter.Inc() continue retry } diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 534d8361b2a..5c15856cec6 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -1206,6 +1206,9 @@ func (c *RaftCluster) DeleteStoreLabel(storeID uint64, labelKey string) error { if store == nil { return errs.ErrInvalidStoreID.FastGenByArgs(storeID) } + if len(store.GetLabels()) == 0 { + return errors.Errorf("the label key %s does not exist", labelKey) + } newStore := typeutil.DeepClone(store.GetMeta(), core.StoreFactory) labels := make([]*metapb.StoreLabel, 0, len(newStore.GetLabels())-1) for _, label := range newStore.GetLabels() { diff --git a/tests/integrations/client/http_client_test.go b/tests/integrations/client/http_client_test.go index 229a658639d..1d7d4488692 100644 --- a/tests/integrations/client/http_client_test.go +++ b/tests/integrations/client/http_client_test.go @@ -560,9 +560,14 @@ func (suite *httpClientTestSuite) TestSchedulers() { re.NoError(err) err = client.SetSchedulerDelay(ctx, "not-exist", 100) re.ErrorContains(err, "500 Internal Server Error") // TODO: should return friendly error message + + re.NoError(client.DeleteScheduler(ctx, schedulerName)) + schedulers, err = client.GetSchedulers(ctx) + re.NoError(err) + re.NotContains(schedulers, schedulerName) } -func (suite *httpClientTestSuite) TestSetStoreLabels() { +func (suite *httpClientTestSuite) TestStoreLabels() { re := suite.Require() client := suite.client ctx, cancel := context.WithCancel(suite.ctx) @@ -590,6 +595,11 @@ func (suite *httpClientTestSuite) TestSetStoreLabels() { for key, value := range storeLabels { re.Equal(value, labelsMap[key]) } + + re.NoError(client.DeleteStoreLabel(ctx, firstStore.Store.ID, "zone")) + store, err := client.GetStore(ctx, uint64(firstStore.Store.ID)) + re.NoError(err) + re.Empty(store.Store.Labels) } func (suite *httpClientTestSuite) TestTransferLeader() { diff --git a/tests/integrations/realcluster/Makefile b/tests/integrations/realcluster/Makefile index e161d52a86e..28c918ec2bf 100644 --- a/tests/integrations/realcluster/Makefile +++ b/tests/integrations/realcluster/Makefile @@ -50,7 +50,14 @@ kill_cluster: fi test: - CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover || { exit 1; } + CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover || (\ + echo "follow is pd-0 log\n" ; \ + cat ~/.tiup/data/pd_real_cluster_test/pd-0/pd.log ; \ + echo "follow is pd-1 log\n" ; \ + cat ~/.tiup/data/pd_real_cluster_test/pd-1/pd.log ; \ + echo "follow is pd-2 log\n" ; \ + cat ~/.tiup/data/pd_real_cluster_test/pd-2/pd.log ; \ + exit 1) install-tools: cd $(ROOT_PATH) && $(MAKE) install-tools diff --git a/tests/integrations/realcluster/deploy.sh b/tests/integrations/realcluster/deploy.sh index 31bf17655f8..f6f567314f0 100755 --- a/tests/integrations/realcluster/deploy.sh +++ b/tests/integrations/realcluster/deploy.sh @@ -1,6 +1,8 @@ #!/bin/bash # deploy `tiup playground` +set -x + TIUP_BIN_DIR=$HOME/.tiup/bin/tiup CUR_PATH=$(pwd) @@ -19,15 +21,16 @@ if [ ! -d "bin" ] || [ ! -e "bin/tikv-server" ] && [ ! -e "bin/tidb-server" ] && color-green "downloading binaries..." color-green "this may take a few minutes, you can also download them manually and put them in the bin directory." make pd-server WITH_RACE=1 - $TIUP_BIN_DIR playground nightly --kv 3 --tiflash 1 --db 1 --pd 3 --without-monitor --tag pd_test \ - --pd.binpath ./bin/pd-server \ + $TIUP_BIN_DIR playground nightly --kv 3 --tiflash 1 --db 1 --pd 3 --without-monitor --tag pd_real_cluster_test \ + --pd.binpath ./bin/pd-server --pd.config ./tests/integrations/realcluster/pd.toml \ > $CUR_PATH/playground.log 2>&1 & else # CI will download the binaries in the prepare phase. # ref https://github.com/PingCAP-QE/ci/blob/387e9e533b365174962ccb1959442a7070f9cd66/pipelines/tikv/pd/latest/pull_integration_realcluster_test.groovy#L55-L68 color-green "using existing binaries..." $TIUP_BIN_DIR playground nightly --kv 3 --tiflash 1 --db 1 --pd 3 --without-monitor \ - --pd.binpath ./bin/pd-server --kv.binpath ./bin/tikv-server --db.binpath ./bin/tidb-server --tiflash.binpath ./bin/tiflash --tag pd_test \ + --pd.binpath ./bin/pd-server --kv.binpath ./bin/tikv-server --db.binpath ./bin/tidb-server \ + --tiflash.binpath ./bin/tiflash --tag pd_real_cluster_test --pd.config ./tests/integrations/realcluster/pd.toml \ > $CUR_PATH/playground.log 2>&1 & fi diff --git a/tests/integrations/realcluster/pd.toml b/tests/integrations/realcluster/pd.toml new file mode 100644 index 00000000000..876c7f13af2 --- /dev/null +++ b/tests/integrations/realcluster/pd.toml @@ -0,0 +1,5 @@ +[schedule] +patrol-region-interval = "100ms" + +[log] +level = "debug" diff --git a/tests/integrations/realcluster/reboot_pd_test.go b/tests/integrations/realcluster/reboot_pd_test.go index b8914e87bd8..14c86f2dedb 100644 --- a/tests/integrations/realcluster/reboot_pd_test.go +++ b/tests/integrations/realcluster/reboot_pd_test.go @@ -51,6 +51,9 @@ func TestReloadLabel(t *testing.T) { storeLabels[label.Key] = label.Value } re.NoError(pdHTTPCli.SetStoreLabels(ctx, firstStore.Store.ID, storeLabels)) + defer func() { + re.NoError(pdHTTPCli.DeleteStoreLabel(ctx, firstStore.Store.ID, "zone")) + }() checkLabelsAreEqual := func() { resp, err := pdHTTPCli.GetStore(ctx, uint64(firstStore.Store.ID)) diff --git a/tests/integrations/realcluster/scheduler_test.go b/tests/integrations/realcluster/scheduler_test.go new file mode 100644 index 00000000000..0ed6f6c6b76 --- /dev/null +++ b/tests/integrations/realcluster/scheduler_test.go @@ -0,0 +1,188 @@ +// Copyright 2024 TiKV Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package realcluster + +import ( + "context" + "fmt" + "sort" + "testing" + "time" + + "github.com/stretchr/testify/require" + pd "github.com/tikv/pd/client/http" + "github.com/tikv/pd/client/testutil" + "github.com/tikv/pd/pkg/schedule/labeler" + "github.com/tikv/pd/pkg/schedule/schedulers" +) + +// https://github.com/tikv/pd/issues/6988#issuecomment-1694924611 +// https://github.com/tikv/pd/issues/6897 +func TestTransferLeader(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + resp, err := pdHTTPCli.GetLeader(ctx) + re.NoError(err) + oldLeader := resp.Name + + var newLeader string + for i := 0; i < 2; i++ { + if resp.Name != fmt.Sprintf("pd-%d", i) { + newLeader = fmt.Sprintf("pd-%d", i) + } + } + + // record scheduler + re.NoError(pdHTTPCli.CreateScheduler(ctx, schedulers.EvictLeaderName, 1)) + defer func() { + re.NoError(pdHTTPCli.DeleteScheduler(ctx, schedulers.EvictLeaderName)) + }() + res, err := pdHTTPCli.GetSchedulers(ctx) + re.NoError(err) + oldSchedulersLen := len(res) + + re.NoError(pdHTTPCli.TransferLeader(ctx, newLeader)) + // wait for transfer leader to new leader + time.Sleep(1 * time.Second) + resp, err = pdHTTPCli.GetLeader(ctx) + re.NoError(err) + re.Equal(newLeader, resp.Name) + + res, err = pdHTTPCli.GetSchedulers(ctx) + re.NoError(err) + re.Len(res, oldSchedulersLen) + + // transfer leader to old leader + re.NoError(pdHTTPCli.TransferLeader(ctx, oldLeader)) + // wait for transfer leader + time.Sleep(1 * time.Second) + resp, err = pdHTTPCli.GetLeader(ctx) + re.NoError(err) + re.Equal(oldLeader, resp.Name) + + res, err = pdHTTPCli.GetSchedulers(ctx) + re.NoError(err) + re.Len(res, oldSchedulersLen) +} + +func TestRegionLabelDenyScheduler(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + regions, err := pdHTTPCli.GetRegions(ctx) + re.NoError(err) + re.GreaterOrEqual(len(regions.Regions), 1) + region1 := regions.Regions[0] + + err = pdHTTPCli.DeleteScheduler(ctx, schedulers.BalanceLeaderName) + if err == nil { + defer func() { + pdHTTPCli.CreateScheduler(ctx, schedulers.BalanceLeaderName, 0) + }() + } + + re.NoError(pdHTTPCli.CreateScheduler(ctx, schedulers.GrantLeaderName, uint64(region1.Leader.StoreID))) + defer func() { + pdHTTPCli.DeleteScheduler(ctx, schedulers.GrantLeaderName) + }() + + // wait leader transfer + testutil.Eventually(re, func() bool { + regions, err := pdHTTPCli.GetRegions(ctx) + re.NoError(err) + for _, region := range regions.Regions { + if region.Leader.StoreID != region1.Leader.StoreID { + return false + } + } + return true + }, testutil.WithWaitFor(time.Minute)) + + // disable schedule for region1 + labelRule := &pd.LabelRule{ + ID: "rule1", + Labels: []pd.RegionLabel{{Key: "schedule", Value: "deny"}}, + RuleType: "key-range", + Data: labeler.MakeKeyRanges(region1.StartKey, region1.EndKey), + } + re.NoError(pdHTTPCli.SetRegionLabelRule(ctx, labelRule)) + defer func() { + pdHTTPCli.PatchRegionLabelRules(ctx, &pd.LabelRulePatch{DeleteRules: []string{labelRule.ID}}) + }() + labelRules, err := pdHTTPCli.GetAllRegionLabelRules(ctx) + re.NoError(err) + re.Len(labelRules, 2) + sort.Slice(labelRules, func(i, j int) bool { + return labelRules[i].ID < labelRules[j].ID + }) + re.Equal(labelRule.ID, labelRules[1].ID) + re.Equal(labelRule.Labels, labelRules[1].Labels) + re.Equal(labelRule.RuleType, labelRules[1].RuleType) + + // enable evict leader scheduler, and check it works + re.NoError(pdHTTPCli.DeleteScheduler(ctx, schedulers.GrantLeaderName)) + re.NoError(pdHTTPCli.CreateScheduler(ctx, schedulers.EvictLeaderName, uint64(region1.Leader.StoreID))) + defer func() { + pdHTTPCli.DeleteScheduler(ctx, schedulers.EvictLeaderName) + }() + testutil.Eventually(re, func() bool { + regions, err := pdHTTPCli.GetRegions(ctx) + re.NoError(err) + for _, region := range regions.Regions { + if region.Leader.StoreID == region1.Leader.StoreID { + return false + } + } + return true + }, testutil.WithWaitFor(time.Minute)) + + re.NoError(pdHTTPCli.DeleteScheduler(ctx, schedulers.EvictLeaderName)) + re.NoError(pdHTTPCli.CreateScheduler(ctx, schedulers.GrantLeaderName, uint64(region1.Leader.StoreID))) + defer func() { + pdHTTPCli.DeleteScheduler(ctx, schedulers.GrantLeaderName) + }() + testutil.Eventually(re, func() bool { + regions, err := pdHTTPCli.GetRegions(ctx) + re.NoError(err) + for _, region := range regions.Regions { + if region.ID == region1.ID { + continue + } + if region.Leader.StoreID != region1.Leader.StoreID { + return false + } + } + return true + }, testutil.WithWaitFor(time.Minute)) + + pdHTTPCli.PatchRegionLabelRules(ctx, &pd.LabelRulePatch{DeleteRules: []string{labelRule.ID}}) + labelRules, err = pdHTTPCli.GetAllRegionLabelRules(ctx) + re.NoError(err) + re.Len(labelRules, 1) + + testutil.Eventually(re, func() bool { + regions, err := pdHTTPCli.GetRegions(ctx) + re.NoError(err) + for _, region := range regions.Regions { + if region.Leader.StoreID != region1.Leader.StoreID { + return false + } + } + return true + }, testutil.WithWaitFor(time.Minute)) +} diff --git a/tests/integrations/realcluster/transfer_leader_test.go b/tests/integrations/realcluster/transfer_leader_test.go deleted file mode 100644 index 0000f7e14a5..00000000000 --- a/tests/integrations/realcluster/transfer_leader_test.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2023 TiKV Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package realcluster - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -// https://github.com/tikv/pd/issues/6988#issuecomment-1694924611 -// https://github.com/tikv/pd/issues/6897 -func TestTransferLeader(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - resp, err := pdHTTPCli.GetLeader(ctx) - re.NoError(err) - oldLeader := resp.Name - - var newLeader string - for i := 0; i < 2; i++ { - if resp.Name != fmt.Sprintf("pd-%d", i) { - newLeader = fmt.Sprintf("pd-%d", i) - } - } - - // record scheduler - err = pdHTTPCli.CreateScheduler(ctx, "evict-leader-scheduler", 1) - re.NoError(err) - res, err := pdHTTPCli.GetSchedulers(ctx) - re.NoError(err) - oldSchedulersLen := len(res) - - re.NoError(pdHTTPCli.TransferLeader(ctx, newLeader)) - // wait for transfer leader to new leader - time.Sleep(1 * time.Second) - resp, err = pdHTTPCli.GetLeader(ctx) - re.NoError(err) - re.Equal(newLeader, resp.Name) - - res, err = pdHTTPCli.GetSchedulers(ctx) - re.NoError(err) - re.Len(res, oldSchedulersLen) - - // transfer leader to old leader - re.NoError(pdHTTPCli.TransferLeader(ctx, oldLeader)) - // wait for transfer leader - time.Sleep(1 * time.Second) - resp, err = pdHTTPCli.GetLeader(ctx) - re.NoError(err) - re.Equal(oldLeader, resp.Name) - - res, err = pdHTTPCli.GetSchedulers(ctx) - re.NoError(err) - re.Len(res, oldSchedulersLen) -} diff --git a/tests/integrations/realcluster/wait_tiup.sh b/tests/integrations/realcluster/wait_tiup.sh index 497774f9e96..3a8c02a969e 100755 --- a/tests/integrations/realcluster/wait_tiup.sh +++ b/tests/integrations/realcluster/wait_tiup.sh @@ -12,7 +12,7 @@ fi for ((i=0; i<${MAX_TIMES}; i++)); do sleep ${INTERVAL} - $TIUP_BIN_DIR playground display --tag pd_test + $TIUP_BIN_DIR playground display --tag pd_real_cluster_test if [ $? -eq 0 ]; then exit 0 fi