From e72ec0f98fbb994f0928816a2705829b217bbbf4 Mon Sep 17 00:00:00 2001
From: Letong Han <106566639+letonghan@users.noreply.github.com>
Date: Tue, 13 Aug 2024 12:13:38 +0800
Subject: [PATCH] Update Milvus docker-compose.yaml (#459)

Signed-off-by: letonghan <letong.han@intel.com>
Signed-off-by: BaoHuiling <huiling.bao@intel.com>
---
 comps/vectorstores/langchain/milvus/README.md |  2 +-
 .../langchain/milvus/docker-compose.yml       | 22 +-----
 .../vectorstores/langchain/milvus/milvus.yaml | 70 +++++++++--------
 tests/test_vectorstores_langchain_milvus.sh   | 77 +++++++++++++++++++
 4 files changed, 118 insertions(+), 53 deletions(-)
 create mode 100644 tests/test_vectorstores_langchain_milvus.sh

diff --git a/comps/vectorstores/langchain/milvus/README.md b/comps/vectorstores/langchain/milvus/README.md
index d02508351..b0f19caf4 100644
--- a/comps/vectorstores/langchain/milvus/README.md
+++ b/comps/vectorstores/langchain/milvus/README.md
@@ -6,7 +6,7 @@ Configure your Milvus instance to suit your application scenarios by adjusting c
 Customized the path to store data, default is /volumes
 
 ```bash
-export DOCKER_VOLUME_DIRECTORY=./your_path
+export DOCKER_VOLUME_DIRECTORY=${your_path}
 ```
 
 ## 2. Run Milvus service
diff --git a/comps/vectorstores/langchain/milvus/docker-compose.yml b/comps/vectorstores/langchain/milvus/docker-compose.yml
index 125463752..d6c39d0f0 100644
--- a/comps/vectorstores/langchain/milvus/docker-compose.yml
+++ b/comps/vectorstores/langchain/milvus/docker-compose.yml
@@ -7,10 +7,6 @@ services:
   etcd:
     container_name: milvus-etcd
     image: quay.io/coreos/etcd:v3.5.5
-    deploy:
-      resources:
-        limits:
-          cpus: "0.5"
     environment:
       - ETCD_AUTO_COMPACTION_MODE=revision
       - ETCD_AUTO_COMPACTION_RETENTION=1000
@@ -28,10 +24,6 @@ services:
   minio:
     container_name: milvus-minio
     image: minio/minio:RELEASE.2023-03-20T20-16-18Z
-    deploy:
-      resources:
-        limits:
-          cpus: "0.5"
     environment:
       MINIO_ACCESS_KEY: minioadmin
       MINIO_SECRET_KEY: minioadmin
@@ -49,31 +41,25 @@ services:
 
   standalone:
     container_name: milvus-standalone
-    image: milvusdb/milvus:latest
-    deploy:
-      resources:
-        limits:
-          cpus: "8"
-          memory: 32G
+    image: milvusdb/milvus:v2.4.6
     command: ["milvus", "run", "standalone"]
     security_opt:
       - seccomp:unconfined
     environment:
       ETCD_ENDPOINTS: etcd:2379
       MINIO_ADDRESS: minio:9000
-      DNNL_ENABLE: 0
     volumes:
-      - ./milvus.yaml:/milvus/configs/milvus.yaml
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml
       - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:9092/healthz"]
+      test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
       interval: 30s
       start_period: 90s
       timeout: 20s
       retries: 3
     ports:
       - "19530:19530"
-      - "9092:9092"
+      - "9091:9091"
     depends_on:
       - "etcd"
       - "minio"
diff --git a/comps/vectorstores/langchain/milvus/milvus.yaml b/comps/vectorstores/langchain/milvus/milvus.yaml
index de29dfe3d..b9f22cb3d 100644
--- a/comps/vectorstores/langchain/milvus/milvus.yaml
+++ b/comps/vectorstores/langchain/milvus/milvus.yaml
@@ -105,7 +105,9 @@ minio:
   region: # Specify minio storage system location region
   useVirtualHost: false # Whether use virtual host mode for bucket
   requestTimeoutMs: 10000 # minio timeout for request time in milliseconds
-  listObjectsMaxKeys: 0 # The maximum number of objects requested per batch in minio ListObjects rpc, 0 means using oss client by default, decrease these configuration if ListObjects timeout
+  # The maximum number of objects requested per batch in minio ListObjects rpc,
+  # 0 means using oss client by default, decrease these configuration if ListObjects timeout
+  listObjectsMaxKeys: 0
 
 # Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka.
 # You can change your mq by setting mq.type field.
@@ -120,6 +122,10 @@ mq:
   pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds
   pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes
   mqBufSize: 16 # MQ client consumer buffer length
+  dispatcher:
+    mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge
+    targetBufSize: 16 # the length of channel buffer for targe
+    maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack
 
 # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services.
 pulsar:
@@ -182,7 +188,7 @@ natsmq:
 # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests
 rootCoord:
   dmlChannelNum: 16 # The number of dml channels created at system startup
-  maxPartitionNum: 4096 # Maximum number of partitions in a collection
+  maxPartitionNum: 1024 # Maximum number of partitions in a collection
   minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed
   enableActiveStandby: false
   maxDatabaseNum: 64 # Maximum number of database
@@ -200,7 +206,6 @@ rootCoord:
 proxy:
   timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick
   healthCheckTimeout: 3000 # ms, the interval that to do component healthy check
-  healthCheckTimetout: 3000 # ms, the interval that to do component healthy check
   msgStream:
     timeTick:
       bufSize: 512
@@ -217,6 +222,7 @@ proxy:
   ginLogging: true
   ginLogSkipPaths: / # skip url path for gin log
   maxTaskNum: 1024 # max task number of proxy task queue
+  mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection
   accessLog:
     enable: false # if use access log
     minioEnable: false # if upload sealed access log file to minio
@@ -244,7 +250,7 @@ proxy:
     port: # high-level restful api
     acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64
     enablePprof: true # Whether to enable pprof middleware on the metrics port
-  ip: 0.0.0.0 # if not specified, use the first unicastable address
+  ip: # if not specified, use the first unicastable address
   port: 19530
   internalPort: 19529
   grpc:
@@ -282,6 +288,8 @@ queryCoord:
   channelTaskTimeout: 60000 # 1 minute
   segmentTaskTimeout: 120000 # 2 minute
   distPullInterval: 500
+  collectionObserverInterval: 200
+  checkExecutedFlagInterval: 100
   heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available
   loadTimeoutSeconds: 600
   distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds
@@ -298,6 +306,7 @@ queryCoord:
   checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session
   gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
   enableStoppingBalance: true # whether enable stopping balance
+  channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode
   cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds
   ip: # if not specified, use the first unicastable address
   port: 19531
@@ -320,6 +329,7 @@ queryNode:
       nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist
       memExpansionRate: 1.15 # extra memory needed by building interim index
       buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num
+    knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
   loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
   enableDisk: false # enable querynode load disk index, and search on disk index
   maxDiskUsagePercentage: 95
@@ -327,17 +337,22 @@ queryNode:
     enabled: true
     memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024
     readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed`
-    # options: async, sync, off.
+    # options: async, sync, disable.
     # Specifies the necessity for warming up the chunk cache.
-    # 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the
+    # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the
     # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency
     # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage;
-    # 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query.
-    warmup: async
+    # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query.
+    warmup: disable
   mmap:
     mmapEnabled: false # Enable mmap for loading data
-  mmapEnabled: false # Enable mmap for loading data
-  lazyloadEnabled: false # Enable lazyload for loading data
+  lazyload:
+    enabled: false # Enable lazyload for loading data
+    waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve
+    requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default
+    requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default
+    maxRetryTimes: 1 # max retry times for lazy load, 1 by default
+    maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default
   grouping:
     enabled: true
     maxNQ: 1000
@@ -403,9 +418,11 @@ indexNode:
 dataCoord:
   channel:
     watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer.
+    balanceWithRpc: true # Whether to enable balance with RPC, default to use etcd watch
+    legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels
     balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing
     balanceInterval: 360 # The interval with which the channel manager check dml channel balance status
-    checkInterval: 10 # The interval in seconds with which the channel manager advances channel states
+    checkInterval: 1 # The interval in seconds with which the channel manager advances channel states
     notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds).
   segment:
     maxSize: 1024 # Maximum size of a segment in MB
@@ -485,7 +502,7 @@ dataNode:
       coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds
   segment:
     insertBufSize: 16777216 # Max buffer size to flush for a single segment.
-    deleteBufBytes: 67108864 # Max buffer size in bytes to flush del for a single channel, default as 16MB
+    deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB
     syncPeriod: 600 # The period to sync segments if buffer is not empty.
   memory:
     forceSyncEnable: true # Set true to force sync if memory usage is too high
@@ -536,8 +553,6 @@ log:
 grpc:
   log:
     level: WARNING
-  serverMaxSendSize: 536870912
-  serverMaxRecvSize: 268435456
   gracefulStopTimeout: 10 # second, time to wait graceful stop finish
   client:
     compressionEnabled: false
@@ -550,8 +565,6 @@ grpc:
     minResetInterval: 1000
     maxCancelError: 32
     minSessionCheckInterval: 200
-  clientMaxSendSize: 268435456
-  clientMaxRecvSize: 536870912
 
 # Configure the proxy tls enable.
 tls:
@@ -560,18 +573,6 @@ tls:
   caPemPath: configs/cert/ca.pem
 
 common:
-  chanNamePrefix:
-    cluster: by-dev
-    rootCoordTimeTick: rootcoord-timetick
-    rootCoordStatistics: rootcoord-statistics
-    rootCoordDml: rootcoord-dml
-    replicateMsg: replicate-msg
-    queryTimeTick: queryTimeTick
-    dataCoordTimeTick: datacoord-timetick-channel
-    dataCoordSegmentInfo: segment-info-channel
-  subNamePrefix:
-    dataCoordSubNamePrefix: dataCoord
-    dataNodeSubNamePrefix: dataNode
   defaultPartitionName: _default # default partition name for a collection
   defaultIndexName: _default_idx # default index name
   entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire
@@ -617,7 +618,7 @@ common:
   ttMsgEnabled: true # Whether the instance disable sending ts messages
   traceLogMode: 0 # trace request info
   bloomFilterSize: 100000 # bloom filter initial size
-  maxBloomFalsePositive: 0.05 # max false positive rate for bloom filter
+  maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter
 
 # QuotaConfig, configurations of Milvus quota and limits.
 # By default, we enable:
@@ -631,7 +632,7 @@ common:
 #   4. DQL result rate protection;
 # If necessary, you can also manually force to deny RW requests.
 quotaAndLimits:
-  enabled: false # `true` to enable quota and limits, `false` to disable.
+  enabled: true # `true` to enable quota and limits, `false` to disable.
   # quotaCenterCollectInterval is the time interval that quotaCenter
   # collects metrics from Proxies, Query cluster and Data cluster.
   # seconds, (0 ~ 65536)
@@ -649,10 +650,10 @@ quotaAndLimits:
     db:
       max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex
   flushRate:
-    enabled: false
+    enabled: true
     max: -1 # qps, default no limit, rate for flush
     collection:
-      max: -1 # qps, default no limit, rate for flush at collection level.
+      max: 0.1 # qps, default no limit, rate for flush at collection level.
     db:
       max: -1 # qps of db level, default no limit, rate for flush
   compactionRate:
@@ -719,6 +720,7 @@ quotaAndLimits:
   limits:
     maxCollectionNum: 65536
     maxCollectionNumPerDB: 65536
+    maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit
     maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes
   limitWriting:
     # forceDeny false means dml requests are allowed (except for some
@@ -786,8 +788,8 @@ quotaAndLimits:
 
 trace:
   # trace exporter type, default is stdout,
-  # optional values: ['stdout', 'jaeger', 'otlp']
-  exporter: stdout
+  # optional values: ['noop','stdout', 'jaeger', 'otlp']
+  exporter: noop
   # fraction of traceID based sampler,
   # optional values: [0, 1]
   # Fractions >= 1 will always sample. Fractions < 0 are treated as zero.
diff --git a/tests/test_vectorstores_langchain_milvus.sh b/tests/test_vectorstores_langchain_milvus.sh
new file mode 100644
index 000000000..60303017d
--- /dev/null
+++ b/tests/test_vectorstores_langchain_milvus.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -xe
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+
+function start_service() {
+    cd $WORKPATH/comps/vectorstores/langchain/milvus
+    rm -rf volumes/
+
+    docker compose up -d
+
+    sleep 60s
+}
+
+function validate_vectorstore() {
+    PORT="19530"
+    COLLECTION_NAME="test_col"
+
+    # test create collection
+    echo "[ test create ] creating collection.."
+    create_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/collections/create"  -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"dbName\": \"default\", \"dimension\": 2, \"metricType\": \"L2\", \"primaryField\": \"id\", \"vectorField\": \"vector\"}")
+    echo $create_response >> ${LOG_PATH}/milvus_create_col.log
+    if [[ $(echo $create_response | grep '{"code":200') ]]; then
+        echo "[ test create ] create collection succeed"
+    else
+        echo "[ test create ] create collection failed"
+        exit 1
+    fi
+
+    # test insert data
+    echo "[ test insert ] inserting data.."
+    insert_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/insert" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"data\": [{\"vector\":[1,2]}] }")
+    echo $insert_response >> ${LOG_PATH}/milvus_insert_data.log
+    if [[ $(echo $insert_response | grep '{"code":200,"data":{"insertCount":1') ]]; then
+        echo "[ test insert ] insert data succeed"
+    else
+        echo "[ test insert ] insert data failed"
+        exit 1
+    fi
+
+    # test search data
+    echo "[ test search ] searching data.."
+    search_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/search" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"vector\":[1,2] }")
+    echo $search_response>> ${LOG_PATH}/milvus_search_data.log
+    if [[ $(echo $search_response | grep '{"code":200,"data":') ]]; then
+        echo "[ test search ] search data succeed"
+    else
+        echo "[ test search ] search data failed"
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=milvus-*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    start_service
+
+    validate_vectorstore
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main