Skip to content

Commit

Permalink
feat: vec-270 support separate index a nd vector data namespaces (#50)
Browse files Browse the repository at this point in the history
* feat: vec-270 support separate data and index namespaces in the quote search example

* feat: support separate index and vector data namespaces in the image search example

* chore: change docker example aerospike config to use separate index and vector namespaces

* feat: add avs_index_set and avs_index_namespace to image and quote example configs
  • Loading branch information
dwelch-spike authored Aug 5, 2024
1 parent 2204223 commit fef7a21
Show file tree
Hide file tree
Showing 14 changed files with 236 additions and 19 deletions.
38 changes: 28 additions & 10 deletions docker/config/aerospike.conf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Aerospike database configuration file for use with systemd.

service {
cluster-name prism-demo
cluster-name docker-demo
proto-fd-max 15000
}

Expand All @@ -25,9 +25,9 @@ network {
}

heartbeat {
address any
mode mesh
port 3002
mode multicast
multicast-group 239.1.99.222
port 9918

# To use unicast-mesh heartbeats, remove the 3 lines above, and see
# aerospike_mesh.conf for alternative.
Expand All @@ -45,22 +45,40 @@ network {
}
}

namespace test {
namespace avs-index {
replication-factor 1
nsup-period 60

storage-engine device {
file /opt/aerospike/data/index.dat
filesize 8G
}
}

namespace avs-data {
replication-factor 2
nsup-period 60

storage-engine device {
file /opt/aerospike/data/test.dat
filesize 5G
file /opt/aerospike/data/data.dat
filesize 8G
}
}

namespace avs-meta {
replication-factor 1
nsup-period 100

storage-engine device {
file /opt/aerospike/data/avs-meta.dat
filesize 5G
storage-engine memory {
data-size 1G
}

# To use file storage backing, comment out the line above and use the
# following lines instead.
# storage-engine device {
# file /opt/aerospike/data/bar.dat
# filesize 16G
# data-in-memory true # Store data in memory in addition to file.
# }
}

Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ spec:
value: {{ .Values.quoteSearchConfig.avsNamespace | quote }}
- name: AVS_SET
value: {{ .Values.quoteSearchConfig.avsSet | quote }}
- name: AVS_INDEX_NAMESPACE
value: {{ .Values.quoteSearchConfig.avsIndexNamespace | quote }}
- name: AVS_INDEX_SET
value: {{ .Values.quoteSearchConfig.avsIndexSet | quote }}
- name: AVS_VERIFY_TLS
value: {{ .Values.quoteSearchConfig.avsVerifyTls | quote }}
- name: AVS_MAX_RESULTS
Expand Down
5 changes: 4 additions & 1 deletion prism-image-search/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,10 @@ If not set defaults are used.
| AVS_HOST | localhost | AVS server seed host |
| AVS_PORT | 5000 | AVS server seed host port |
| AVS_ADVERTISED_LISTENER| | An optional advertised listener to use if configured on the AVS server |
| AVS_NAMESPACE | test | The aerospike namespace for storing the image records and index |
| AVS_NAMESPACE | test | The Aerospike namespace for storing the image records |
| AVS_SET | image-data | The Aerospike set for storing the image records |
| AVS_INDEX_NAMESPACE | test | The Aerospike namespace for storing the HNSW index |
| AVS_INDEX_SET | image-index | The Aerospike set for storing the HNSW index |
| AVS_INDEX_NAME | prism-image-search | The name of the index |
| AVS_MAX_RESULTS | 20 | Maximum number of vector search results to return |
| AVS_IS_LOADBALANCER | False | If true, the first seed address will be treated as a load balancer node.```
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Aerospike database configuration file for use with systemd.

service {
cluster-name prism-demo
proto-fd-max 15000
}


logging {
file /var/log/aerospike/aerospike.log {
context any info
}

# Send log messages to stdout
console {
context any info
context query critical
}
}

network {
service {
address any
port 3000
}

heartbeat {
mode multicast
multicast-group 239.1.99.222
port 9918

# To use unicast-mesh heartbeats, remove the 3 lines above, and see
# aerospike_mesh.conf for alternative.

interval 150
timeout 10
}

fabric {
port 3001
}

info {
port 3003
}
}

namespace test {
replication-factor 1
nsup-period 60

storage-engine device {
file /opt/aerospike/data/index.dat
filesize 16G
}
}

namespace avs-meta {
replication-factor 1
nsup-period 100

storage-engine memory {
data-size 1G
}

# To use file storage backing, comment out the line above and use the
# following lines instead.
# storage-engine device {
# file /opt/aerospike/data/bar.dat
# filesize 16G
# data-in-memory true # Store data in memory in addition to file.
# }
}

Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,30 @@ network {
}
}

namespace test {
namespace avs-index {
replication-factor 1
nsup-period 60

storage-engine device {
file /opt/aerospike/data/index.dat
filesize 8G
}
}

storage-engine memory {
data-size 2G
namespace avs-data {
replication-factor 2
nsup-period 60

storage-engine device {
file /opt/aerospike/data/data.dat
filesize 8G
}
}

namespace avs-meta {
replication-factor 1
nsup-period 100

storage-engine memory {
data-size 1G
}
Expand Down
7 changes: 7 additions & 0 deletions prism-image-search/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ services:
command:
- "--config-file"
- "/opt/aerospike/etc/aerospike/aerospike.conf"
# use this line to store all index and vector data in the default namespace (test)
# - "/opt/aerospike/etc/aerospike/aerospike-single-namespace.conf"
healthcheck:
# test: [ "CMD", "asinfo", "-U", "admin", "-P", "admin", "-p", "3000", "-v", "build" ]
test: [ "CMD", "asinfo", "-p", "3000", "-v", "build" ]
Expand Down Expand Up @@ -44,6 +46,11 @@ services:
AVS_PORT: "5000"
APP_NUM_QUOTES: "5000"
GRPC_DNS_RESOLVER: native
# comment out the following lines to use the default namespace (test) to store all index and vector data
AVS_NAMESPACE: avs-data
AVS_SET: quote-data
AVS_INDEX_NAMESPACE: avs-index
AVS_INDEX_SET: quote-index
volumes:
- ./container-volumes/prism/images:/prism/static/images/data

Expand Down
2 changes: 2 additions & 0 deletions prism-image-search/prism/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class Config(object):
AVS_INDEX_NAME = os.environ.get("AVS_INDEX_NAME") or "prism-image-search"
AVS_NAMESPACE = os.environ.get("AVS_NAMESPACE") or "test"
AVS_SET = os.environ.get("AVS_SET") or "image-data"
AVS_INDEX_NAMESPACE = os.environ.get("AVS_INDEX_NAMESPACE") or "test"
AVS_INDEX_SET = os.environ.get("AVS_INDEX_SET") or "image-index"
AVS_VERIFY_TLS = get_bool_env("VERIFY_TLS", True)
AVS_MAX_RESULTS = int(os.environ.get("AVS_MAX_RESULTS") or 20)
MAX_CONTENT_LENGTH = int(os.environ.get("MAX_CONTENT_LENGTH") or 10485760)
Expand Down
1 change: 1 addition & 0 deletions prism-image-search/prism/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def create_index():
vector_field="image_embedding",
dimensions=MODEL_DIM,
vector_distance_metric=types.VectorDistanceMetric.COSINE,
index_storage=types.IndexStorage(namespace=Config.AVS_INDEX_NAMESPACE, set_name=Config.AVS_INDEX_SET),
)
except Exception as e:
logger.critical("Failed to connect to avs client %s", str(e))
Expand Down
5 changes: 4 additions & 1 deletion quote-semantic-search/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ If not set defaults are used.
| AVS_HOST | localhost | AVS server seed host |
| AVS_PORT | 5000 | AVS server seed host port |
| AVS_ADVERTISED_LISTENER| | An optional advertised listener to use if configured on the AVS server |
| AVS_NAMESPACE | test | The aerospike namespace for storing the image records and index |
| AVS_NAMESPACE | test | The Aerospike namespace for storing the quote records |
| AVS_SET | quote-data | The Aerospike set for storing the quote records |
| AVS_INDEX_NAMESPACE | test | The Aerospike namespace for storing the HNSW index |
| AVS_INDEX_SET | quote-index | The Aerospike set for storing the HNSW index |
| AVS_INDEX_NAME | quote-search | The name of the index |
| AVS_MAX_RESULTS | 20 | Maximum number of vector search results to return |
| AVS_IS_LOADBALANCER | False | If true, the first seed address will be treated as a load balancer node.```
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Aerospike database configuration file for use with systemd.

service {
cluster-name quote-demo
proto-fd-max 15000
}


logging {
file /var/log/aerospike/aerospike.log {
context any info
}

# Send log messages to stdout
console {
context any info
context query critical
}
}

network {
service {
address any
port 3000
}

heartbeat {
mode multicast
multicast-group 239.1.99.222
port 9918

# To use unicast-mesh heartbeats, remove the 3 lines above, and see
# aerospike_mesh.conf for alternative.

interval 150
timeout 10
}

fabric {
port 3001
}

info {
port 3003
}
}

namespace test {
replication-factor 1
nsup-period 60

storage-engine device {
file /opt/aerospike/data/test.dat
filesize 16G
}
}

namespace avs-meta {
replication-factor 1
nsup-period 100

storage-engine memory {
data-size 1G
}

# To use file storage backing, comment out the line above and use the
# following lines instead.
# storage-engine device {
# file /opt/aerospike/data/bar.dat
# filesize 16G
# data-in-memory true # Store data in memory in addition to file.
# }
}

Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,23 @@ network {
}
}

namespace test {
namespace avs-index {
replication-factor 1
nsup-period 60

storage-engine device {
file /opt/aerospike/data/test.dat
filesize 16G
file /opt/aerospike/data/index.dat
filesize 8G
}
}

namespace avs-data {
replication-factor 2
nsup-period 60

storage-engine device {
file /opt/aerospike/data/data.dat
filesize 8G
}
}

Expand Down
7 changes: 7 additions & 0 deletions quote-semantic-search/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ services:
command:
- "--config-file"
- "/opt/aerospike/etc/aerospike/aerospike.conf"
# use this line to store all index and vector data in the default namespace (test)
# - "/opt/aerospike/etc/aerospike/aerospike-single-namespace.conf"
healthcheck:
# test: [ "CMD", "asinfo", "-U", "admin", "-P", "admin", "-p", "3000", "-v", "build" ]
test: [ "CMD", "asinfo", "-p", "3000", "-v", "build" ]
Expand Down Expand Up @@ -48,6 +50,11 @@ services:
AVS_PORT: "5000"
APP_NUM_QUOTES: "5000"
GRPC_DNS_RESOLVER: native
# comment out the following lines to use the default namespace (test) to store all index and vector data
AVS_NAMESPACE: avs-data
AVS_SET: quote-data
AVS_INDEX_NAMESPACE: avs-index
AVS_INDEX_SET: quote-index

networks:
avs-demo: {}
2 changes: 2 additions & 0 deletions quote-semantic-search/quote-search/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class Config(object):
AVS_INDEX_NAME = os.environ.get("AVS_INDEX_NAME") or "quote-semantic-search"
AVS_NAMESPACE = os.environ.get("AVS_NAMESPACE") or "test"
AVS_SET = os.environ.get("AVS_SET") or "quote-data"
AVS_INDEX_NAMESPACE = os.environ.get("AVS_INDEX_NAMESPACE") or "test"
AVS_INDEX_SET = os.environ.get("AVS_INDEX_SET") or "quote-index"
AVS_VERIFY_TLS = get_bool_env("VERIFY_TLS", True)
AVS_MAX_RESULTS = int(os.environ.get("AVS_MAX_RESULTS") or 5)
INDEXER_PARALLELISM = int(os.environ.get("APP_INDEXER_PARALLELISM") or 1)
Expand Down
1 change: 1 addition & 0 deletions quote-semantic-search/quote-search/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def create_index():
vector_field="quote_embedding",
dimensions=MODEL_DIM,
vector_distance_metric=types.VectorDistanceMetric.COSINE,
index_storage=types.IndexStorage(namespace=Config.AVS_INDEX_NAMESPACE, set_name=Config.AVS_INDEX_SET),
)

index_created = True
Expand Down

0 comments on commit fef7a21

Please sign in to comment.