Skip to content

Commit

Permalink
enhance: add multiply factor when loading index (#38721)
Browse files Browse the repository at this point in the history
issue: #38715
pr: #38716

Signed-off-by: chyezh <[email protected]>
  • Loading branch information
chyezh authored Dec 25, 2024
1 parent 8fe883f commit 5d2f454
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
2 changes: 2 additions & 0 deletions configs/milvus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,8 @@ dataCoord:
maxClusterSizeRatio: 10 # maximum cluster size / avg size in Kmeans train
maxClusterSize: 5g # maximum cluster size in Kmeans train
syncSegmentsInterval: 300 # The time interval for regularly syncing segments
index:
memSizeEstimateMultiplier: 2 # When the memory size is not setup by index procedure, multiplier to estimate the memory size of index data
enableGarbageCollection: true # Switch value to control if to enable garbage collection to clear the discarded data in MinIO or S3 service.
gc:
interval: 3600 # The interval at which data coord performs garbage collection, unit: second.
Expand Down
3 changes: 2 additions & 1 deletion internal/querynodev2/segments/segment.go
Original file line number Diff line number Diff line change
Expand Up @@ -1346,7 +1346,8 @@ func GetCLoadInfoWithFunc(ctx context.Context,
IndexFiles: indexInfo.GetIndexFilePaths(),
IndexEngineVersion: indexInfo.GetCurrentIndexVersion(),
IndexStoreVersion: indexInfo.GetIndexStoreVersion(),
IndexFileSize: indexInfo.GetIndexSize(),
// TODO: For quickly fixing, we add the multiplier here, but those logic should be put at the datacoord after we add the mem size for each index.
IndexFileSize: int64(paramtable.Get().DataCoordCfg.IndexMemSizeEstimateMultiplier.GetAsFloat() * float64(indexInfo.GetIndexSize())),
}

// 2.
Expand Down
12 changes: 12 additions & 0 deletions pkg/util/paramtable/component_param.go
Original file line number Diff line number Diff line change
Expand Up @@ -3273,6 +3273,9 @@ type dataCoordConfig struct {
ChannelCheckpointMaxLag ParamItem `refreshable:"true"`
SyncSegmentsInterval ParamItem `refreshable:"false"`

// Index related configuration
IndexMemSizeEstimateMultiplier ParamItem `refreshable:"true"`

// Clustering Compaction
ClusteringCompactionEnable ParamItem `refreshable:"true"`
ClusteringCompactionAutoEnable ParamItem `refreshable:"true"`
Expand Down Expand Up @@ -3749,6 +3752,15 @@ During compaction, the size of segment # of rows is able to exceed segment max #
}
p.LevelZeroCompactionTriggerDeltalogMaxNum.Init(base.mgr)

p.IndexMemSizeEstimateMultiplier = ParamItem{
Key: "dataCoord.index.memSizeEstimateMultiplier",
Version: "2.4.19",
DefaultValue: "2",
Doc: "When the memory size is not setup by index procedure, multiplier to estimate the memory size of index data",
Export: true,
}
p.IndexMemSizeEstimateMultiplier.Init(base.mgr)

p.ClusteringCompactionEnable = ParamItem{
Key: "dataCoord.compaction.clustering.enable",
Version: "2.4.7",
Expand Down

0 comments on commit 5d2f454

Please sign in to comment.