Skip to content

Commit

Permalink
enhance: Whether to enable mergeSort mode when performing mixCompacti…
Browse files Browse the repository at this point in the history
…on (#37664)

issue: #37579

Signed-off-by: Cai Zhang <[email protected]>
  • Loading branch information
xiaocai2333 authored Nov 19, 2024
1 parent 33bfb25 commit dae4160
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 2 deletions.
1 change: 1 addition & 0 deletions configs/milvus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,7 @@ dataNode:
compaction:
levelZeroBatchMemoryRatio: 0.5 # The minimal memory ratio of free memory for level zero compaction executing in batch mode
levelZeroMaxBatchSize: -1 # Max batch size refers to the max number of L1/L2 segments in a batch when executing L0 compaction. Default to -1, any value that is less than 1 means no limit. Valid range: >= 1.
useMergeSort: false # Whether to enable mergeSort mode when performing mixCompaction.
gracefulStopTimeout: 1800 # seconds. force stop node without graceful stop
slot:
slotCap: 16 # The maximum number of tasks(e.g. compaction, importing) allowed to run concurrently on a datanode
Expand Down
2 changes: 1 addition & 1 deletion internal/datanode/compaction/mix_compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) {
}

var res []*datapb.CompactionSegment
if allSorted && len(t.plan.GetSegmentBinlogs()) > 1 {
if paramtable.Get().DataNodeCfg.UseMergeSort.GetAsBool() && allSorted && len(t.plan.GetSegmentBinlogs()) > 1 {
log.Info("all segments are sorted, use merge sort")
res, err = mergeSortMultipleSegments(ctxTimeout, t.plan, t.collectionID, t.partitionID, t.maxRows, t.binlogIO,
t.plan.GetSegmentBinlogs(), t.tr, t.currentTs, t.plan.GetCollectionTtl(), t.bm25FieldIDs)
Expand Down
2 changes: 2 additions & 0 deletions internal/datanode/compaction/mix_compactor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ func (s *MixCompactionTaskSuite) TestCompactTwoToOneWithBM25() {
}

func (s *MixCompactionTaskSuite) TestCompactSortedSegment() {
paramtable.Get().Save("dataNode.compaction.useMergeSort", "true")
defer paramtable.Get().Reset("dataNode.compaction.useMergeSort")
segments := []int64{1001, 1002, 1003}
alloc := allocator.NewLocalAllocator(100, math.MaxInt64)
s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil)
Expand Down
10 changes: 10 additions & 0 deletions pkg/util/paramtable/component_param.go
Original file line number Diff line number Diff line change
Expand Up @@ -4247,6 +4247,7 @@ type dataNodeConfig struct {
// Compaction
L0BatchMemoryRatio ParamItem `refreshable:"true"`
L0CompactionMaxBatchSize ParamItem `refreshable:"true"`
UseMergeSort ParamItem `refreshable:"true"`

GracefulStopTimeout ParamItem `refreshable:"true"`

Expand Down Expand Up @@ -4578,6 +4579,15 @@ if this parameter <= 0, will set it as 10`,
}
p.L0CompactionMaxBatchSize.Init(base.mgr)

p.UseMergeSort = ParamItem{
Key: "dataNode.compaction.useMergeSort",
Version: "2.5.0",
Doc: "Whether to enable mergeSort mode when performing mixCompaction.",
DefaultValue: "false",
Export: true,
}
p.UseMergeSort.Init(base.mgr)

p.GracefulStopTimeout = ParamItem{
Key: "dataNode.gracefulStopTimeout",
Version: "2.3.7",
Expand Down
4 changes: 3 additions & 1 deletion tests/python_client/testcases/test_compaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,13 +599,15 @@ def test_compact_after_binary_index(self):
collection_w.wait_for_compaction_completed()
c_plans = collection_w.get_compaction_plans(check_task=CheckTasks.check_merge_compact)[0]

old_segmentIDs = [c_plans.plans[0].target]
old_segmentIDs.extend(c_plans.plans[0].sources)
# waiting for handoff completed and search
cost = 180
start = time()
while True:
sleep(1)
segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0]
if len(segment_info) != 0 and segment_info[0].segmentID == c_plans.plans[0].target:
if len(segment_info) != 0 and segment_info[0].segmentID not in old_segmentIDs and segment_info[0].is_sorted:
log.debug(segment_info)
break
if time() - start > cost:
Expand Down

0 comments on commit dae4160

Please sign in to comment.