Skip to content

Commit

Permalink
linted
Browse files Browse the repository at this point in the history
  • Loading branch information
shivankacker committed Jul 3, 2024
1 parent 81a58cf commit 264305d
Show file tree
Hide file tree
Showing 10 changed files with 50 additions and 41 deletions.
4 changes: 2 additions & 2 deletions ayushma/utils/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

import openai
from django.conf import settings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.callbacks.manager import AsyncCallbackManager
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.llms import AzureOpenAI
from langchain.prompts import (
ChatPromptTemplate,
MessagesPlaceholder,
PromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.prompts.chat import BaseStringMessagePromptTemplate
Expand Down
2 changes: 1 addition & 1 deletion ayushma/utils/openaiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def get_reference(text, openai_key, namespace, top_k):
raise Exception(
"[Reference] Error generating embeddings for split text"
)
# find similar embeddings from vector index for each embedding
# find similar embeddings from vector index for each embedding

flat_embeddings = [item for sublist in embeddings for item in sublist]
vdb = VectorDB()
Expand Down
2 changes: 1 addition & 1 deletion ayushma/utils/upsert.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def upsert(
] # remove blank lines

embeds = get_embedding(lines_batch)
partition_name=str(external_id).replace("-", "_")
partition_name = str(external_id).replace("-", "_")

VectorDB().insert(
vectors=embeds,
Expand Down
60 changes: 34 additions & 26 deletions ayushma/utils/vectordb.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from abc import ABC, abstractmethod
import json
from abc import ABC, abstractmethod

from django.conf import settings
from pymilvus import MilvusClient
from pinecone import Pinecone
from pymilvus import MilvusClient


class AbstractVectorDB(ABC):
Expand All @@ -17,15 +18,15 @@ def initialize(self) -> None:
pass

@abstractmethod
def get_or_create_partition(self, partition_name : str):
def get_or_create_partition(self, partition_name: str):
pass

@abstractmethod
def insert(self, vectors, texts, subject, partition_name):
pass

@abstractmethod
def get_or_create_collection(self, collection_name : str = None):
def get_or_create_collection(self, collection_name: str = None):
pass

@abstractmethod
Expand All @@ -41,36 +42,42 @@ def delete_subject(self, subject, partition_name):
pass

@abstractmethod
def search(self, embeddings, partition_name, limit = None):
def search(self, embeddings, partition_name, limit=None):
pass


class MilvusVectorDB(AbstractVectorDB):
collection_name = settings.MILVUS_COLLECTION

def initialize(self) -> None:
self.client = MilvusClient(
uri=settings.MILVUS_URL,
)
self.get_or_create_collection()
def get_or_create_partition(self, partition_name : str):

def get_or_create_partition(self, partition_name: str):
partitions = self.client.list_partitions(collection_name=self.collection_name)
if partition_name not in partitions:
self.client.create_partition(collection_name=self.collection_name, partition_name=partition_name)
self.client.create_partition(
collection_name=self.collection_name, partition_name=partition_name
)

def insert(self, vectors, texts, subject, partition_name):

self.get_or_create_partition(partition_name)

data = [{"id": i, "vector": vectors[i], "text": texts[i], "subject": subject} for i in range(len(vectors))]
data = [
{"id": i, "vector": vectors[i], "text": texts[i], "subject": subject}
for i in range(len(vectors))
]

self.client.insert(
collection_name=self.collection_name,
data=data,
partition_name=partition_name,
)
def get_or_create_collection(self, collection_name : str = None):

def get_or_create_collection(self, collection_name: str = None):
if collection_name is None:
collection_name = self.collection_name
if not self.client.has_collection(collection_name=collection_name):
Expand All @@ -79,7 +86,7 @@ def get_or_create_collection(self, collection_name : str = None):
dimension=self.dimensions,
)

def search(self, embeddings, partition_name, limit = None):
def search(self, embeddings, partition_name, limit=None):
self.get_or_create_partition(partition_name)

results = self.client.search(
Expand All @@ -90,7 +97,7 @@ def search(self, embeddings, partition_name, limit = None):
output_fields=["text", "subject"],
)
return results[0]

def sanitize(self, references):
sanitized_reference = {}

Expand All @@ -107,19 +114,23 @@ def sanitize(self, references):
pass

return json.dumps(sanitized_reference)

def delete_partition(self, partition_name):
self.client.drop_partition(collection_name=self.collection_name, partition_name=partition_name)
self.client.drop_partition(
collection_name=self.collection_name, partition_name=partition_name
)

def delete_subject(self, subject, partition_name):
self.client.delete(
collection_name=self.collection_name,
partition_name=partition_name,
filter='subject in ["' + str(subject)+ '"]',
filter='subject in ["' + str(subject) + '"]',
)


class PineconeVectorDB(AbstractVectorDB):
collection_name = settings.PINECONE_INDEX

def initialize(self):
self.client = Pinecone(
api_key=settings.PINECONE_API_KEY,
Expand All @@ -130,9 +141,7 @@ def get_or_create_partition(self, partition_name):
pass

def insert(self, vectors, texts, subject, partition_name):
meta = [
{"text": texts[i], "document": subject} for i in range(len(vectors))
]
meta = [{"text": texts[i], "document": subject} for i in range(len(vectors))]
ids = [str(i) for i in range(len(vectors))]
data = zip(ids, vectors, meta)

Expand All @@ -144,7 +153,7 @@ def insert(self, vectors, texts, subject, partition_name):
def get_or_create_collection(self, collection_name=None):
if collection_name is None:
collection_name = self.collection_name
indexes = self.client.list_indexes().get("indexes",[])
indexes = self.client.list_indexes().get("indexes", [])
print("Indexes", indexes)
index_names = [index["name"] for index in indexes]
if collection_name not in index_names:
Expand Down Expand Up @@ -179,7 +188,6 @@ def sanitize(self, references):

return json.dumps(sanitized_reference)


def delete_partition(self, partition_name):
index = self.client.Index(self.collection_name)
index.delete(namespace=partition_name, deleteAll=True)
Expand All @@ -190,18 +198,18 @@ def delete_subject(self, subject, partition_name):
namespace=partition_name,
filter={"document": subject},
)



class VectorDB:
def __init__(self):
vector_db_type = settings.VECTOR_DB.lower()
if vector_db_type == 'milvus':
if vector_db_type == "milvus":
self.vector_db = MilvusVectorDB()
elif vector_db_type == 'pinecone':
elif vector_db_type == "pinecone":
self.vector_db = PineconeVectorDB()
else:
raise ValueError(f"Unsupported VECTOR_DB type: {settings.VECTOR_DB}")
self.vector_db.initialize()

def __getattr__(self, name):
return getattr(self.vector_db, name)

4 changes: 3 additions & 1 deletion ayushma/views/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ def perform_create(self, serializer):
)
@action(detail=True, methods=["get"])
def feedbacks(self, *args, **kwarg):
q = ChatFeedback.objects.filter(chat_message__chat__external_id=kwarg["external_id"])
q = ChatFeedback.objects.filter(
chat_message__chat__external_id=kwarg["external_id"]
)
serialized_data = ChatFeedbackSerializer(q, many=True).data
return Response(
{"data": serialized_data},
Expand Down
4 changes: 2 additions & 2 deletions ayushma/views/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from utils.views.base import BaseModelViewSet
from utils.views.mixins import PartialUpdateModelMixin


class ProjectDocumentViewSet(
BaseModelViewSet,
PartialUpdateModelMixin,
Expand Down Expand Up @@ -63,12 +64,11 @@ def perform_create(self, serializer):
print(e)
pass


upsert_doc.delay(document.external_id, doc_url)

def perform_destroy(self, instance):
# delete namespace from vectorDB

try:
VectorDB().delete_subject(
partition_name=self.kwargs["project_external_id"].replace("-", "_"),
Expand Down
3 changes: 1 addition & 2 deletions core/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import environ
import openai
import pinecone
from corsheaders.defaults import default_headers
from django.urls import reverse_lazy

Expand Down Expand Up @@ -351,7 +350,7 @@
PINECONE_API_KEY = env("PINECONE_API_KEY", default="")
PINECONE_INDEX = env("PINECONE_INDEX", default="")

VECTOR_DB=env("VECTOR_DB", default="milvus")
VECTOR_DB = env("VECTOR_DB", default="milvus")

# Milvus
MILVUS_URL = env("MILVUS_URL", default="http://milvus:19530")
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ services:
- seccomp:unconfined
networks:
- default

networks:
default:
name: ayushma_local
name: ayushma_local
4 changes: 2 additions & 2 deletions docker-compose.production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ services:
- seccomp:unconfined
networks:
- default

networks:
default:
name: ayushma_production
name: ayushma_production
4 changes: 2 additions & 2 deletions prompt-example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ references = {reference}

Output Format (follow the below format strictly and you must provide the references ids array in all your responses after the result. Do not mention about the references anywhere else):

Ayushma: <Your response here>
References: <array of reference_ids (in array format: uuid[] (example : [uuid1, uuid2]) "include all the reference uuids in this array that are relevant and from which you formed the result">
Ayushma: <Your response here>
References: <array of reference_ids (in array format: uuid[] (example : [uuid1, uuid2]) "include all the reference uuids in this array that are relevant and from which you formed the result">

0 comments on commit 264305d

Please sign in to comment.