Skip to content

Commit

Permalink
enhance: [10kcp] Reduce GetRecoveryInfo calls (#37891)
Browse files Browse the repository at this point in the history
1. Introduce a data view mechanism for DataCoord, attempting to update
each collection's data view periodically.
2. QueryCoord maintains a cache of data view versions. Before
batch-fetching recovery info, it retrieves all versions and only fetches
recovery info for collections with updated versions.
3. Return DataCoord's current data view when fetching RecoverInfo.

issue: #37743,
#37630

pr: #37863

Signed-off-by: bigsheeper <[email protected]>
  • Loading branch information
bigsheeper authored Nov 21, 2024
1 parent ce8069c commit bf90e55
Show file tree
Hide file tree
Showing 17 changed files with 656 additions and 45 deletions.
28 changes: 28 additions & 0 deletions internal/datacoord/dataview/data_view.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package dataview

import "github.com/milvus-io/milvus/internal/proto/datapb"

const InitialDataViewVersion = 0

type DataView struct {
CollectionID int64
Channels map[string]*datapb.VchannelInfo
Segments map[int64]struct{}
Version int64
}
33 changes: 33 additions & 0 deletions internal/datacoord/dataview/update_chan.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package dataview

import "sync"

var updateChan chan int64
var initOnce sync.Once

func initUpdateChan() {
initOnce.Do(func() {
updateChan = make(chan int64, 1024)
})
}

// NotifyUpdate used to trigger updating data view immediately.
func NotifyUpdate(collectionID int64) {
updateChan <- collectionID
}
158 changes: 158 additions & 0 deletions internal/datacoord/dataview/view_manager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package dataview

import (
"sync"
"time"

"go.uber.org/zap"

"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)

type PullNewDataViewFunction func(collectionID int64) (*DataView, error)

type ViewManager interface {
Get(collectionID int64) (*DataView, error)
GetVersion(collectionID int64) int64

Start()
Close()
}

type dataViewManager struct {
pullFn PullNewDataViewFunction
currentViews *typeutil.ConcurrentMap[int64, *DataView]

closeOnce sync.Once
closeChan chan struct{}
}

func NewDataViewManager(pullFn PullNewDataViewFunction) ViewManager {
initUpdateChan()
return &dataViewManager{
pullFn: pullFn,
currentViews: typeutil.NewConcurrentMap[int64, *DataView](),
closeChan: make(chan struct{}),
}
}

func (m *dataViewManager) Get(collectionID int64) (*DataView, error) {
if view, ok := m.currentViews.Get(collectionID); ok {
return view, nil
}
view, err := m.pullFn(collectionID)
if err != nil {
return nil, err
}
m.currentViews.GetOrInsert(collectionID, view)
return view, nil
}

func (m *dataViewManager) GetVersion(collectionID int64) int64 {
if view, ok := m.currentViews.Get(collectionID); ok {
return view.Version
}
return InitialDataViewVersion
}

func (m *dataViewManager) Start() {
ticker := time.NewTicker(paramtable.Get().DataCoordCfg.DataViewUpdateInterval.GetAsDuration(time.Second))
defer ticker.Stop()
for {
select {
case <-m.closeChan:
log.Info("data view manager exited")
return
case <-ticker.C:
// periodically update all data view
for _, collectionID := range m.currentViews.Keys() {
m.TryUpdateDataView(collectionID)
}
case collectionID := <-updateChan:
m.TryUpdateDataView(collectionID)
}
}
}

func (m *dataViewManager) Close() {
m.closeOnce.Do(func() {
close(m.closeChan)
})
}

func (m *dataViewManager) update(view *DataView) {
_, ok := m.currentViews.GetOrInsert(view.CollectionID, view)
if ok {
log.Info("update new data view", zap.Int64("collectionID", view.CollectionID), zap.Int64("version", view.Version))
}
}

func (m *dataViewManager) TryUpdateDataView(collectionID int64) {
newView, err := m.pullFn(collectionID)
if err != nil {
log.Warn("pull new data view failed", zap.Int64("collectionID", collectionID), zap.Error(err))
// notify to trigger pull again
NotifyUpdate(collectionID)
return
}

currentView, ok := m.currentViews.Get(collectionID)
if !ok {
m.currentViews.GetOrInsert(collectionID, newView)
return
}
// no-op if the incoming version is less than the current version.
if newView.Version <= currentView.Version {
return
}

// check if channel info has been updated.
for channel, new := range newView.Channels {
current, ok := currentView.Channels[channel]
if !ok {
m.update(newView)
return
}
if !funcutil.SliceSetEqual(new.GetLevelZeroSegmentIds(), current.GetLevelZeroSegmentIds()) ||
!funcutil.SliceSetEqual(new.GetUnflushedSegmentIds(), current.GetUnflushedSegmentIds()) ||
!funcutil.SliceSetEqual(new.GetFlushedSegmentIds(), current.GetFlushedSegmentIds()) ||
!funcutil.SliceSetEqual(new.GetIndexedSegmentIds(), current.GetIndexedSegmentIds()) ||
!funcutil.SliceSetEqual(new.GetDroppedSegmentIds(), current.GetDroppedSegmentIds()) {
m.update(newView)
return
}
if !typeutil.MapEqual(new.GetPartitionStatsVersions(), current.GetPartitionStatsVersions()) {
m.update(newView)
return
}
// TODO: It might be too frequent.
if new.GetSeekPosition().GetTimestamp() > current.GetSeekPosition().GetTimestamp() {
m.update(newView)
return
}
}

// check if segment info has been updated.
if !typeutil.MapEqual(newView.Segments, currentView.Segments) {
m.currentViews.GetOrInsert(collectionID, newView)
}
}
6 changes: 6 additions & 0 deletions internal/datacoord/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
globalIDAllocator "github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/coordinator/coordclient"
"github.com/milvus-io/milvus/internal/datacoord/broker"
"github.com/milvus-io/milvus/internal/datacoord/dataview"
datanodeclient "github.com/milvus-io/milvus/internal/distributed/datanode/client"
indexnodeclient "github.com/milvus-io/milvus/internal/distributed/indexnode/client"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
Expand Down Expand Up @@ -126,6 +127,7 @@ type Server struct {
importMeta ImportMeta
importScheduler ImportScheduler
importChecker ImportChecker
viewManager dataview.ViewManager

compactionTrigger trigger
compactionHandler compactionPlanContext
Expand Down Expand Up @@ -403,6 +405,8 @@ func (s *Server) initDataCoord() error {
s.importScheduler = NewImportScheduler(s.meta, s.cluster, s.allocator, s.importMeta, s.buildIndexCh)
s.importChecker = NewImportChecker(s.meta, s.broker, s.cluster, s.allocator, s.importMeta)

s.viewManager = dataview.NewDataViewManager(s.pullNewDataView)

s.syncSegmentsScheduler = newSyncSegmentsScheduler(s.meta, s.channelManager, s.sessionManager)

s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx)
Expand Down Expand Up @@ -723,6 +727,7 @@ func (s *Server) startServerLoop() {
s.startIndexService(s.serverLoopCtx)
go s.importScheduler.Start()
go s.importChecker.Start()
go s.viewManager.Start()
s.garbageCollector.start()
s.syncSegmentsScheduler.Start()
}
Expand Down Expand Up @@ -1115,6 +1120,7 @@ func (s *Server) Stop() error {

s.importScheduler.Close()
s.importChecker.Close()
s.viewManager.Close()
s.syncSegmentsScheduler.Stop()

s.stopCompaction()
Expand Down
Loading

0 comments on commit bf90e55

Please sign in to comment.