linted

ohcnetwork · Jul 3, 2024 · 264305d · 264305d
1 parent 81a58cf
commit 264305d
Show file tree

Hide file tree

Showing 10 changed files with 50 additions and 41 deletions.
diff --git a/ayushma/utils/langchain.py b/ayushma/utils/langchain.py
@@ -2,14 +2,14 @@
 
 import openai
 from django.conf import settings
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
 from langchain.callbacks.manager import AsyncCallbackManager
+from langchain.chains import LLMChain
 from langchain.chat_models import ChatOpenAI
 from langchain.llms import AzureOpenAI
 from langchain.prompts import (
     ChatPromptTemplate,
     MessagesPlaceholder,
+    PromptTemplate,
     SystemMessagePromptTemplate,
 )
 from langchain.prompts.chat import BaseStringMessagePromptTemplate

diff --git a/ayushma/utils/openaiapi.py b/ayushma/utils/openaiapi.py
@@ -122,7 +122,7 @@ def get_reference(text, openai_key, namespace, top_k):
                 raise Exception(
                     "[Reference] Error generating embeddings for split text"
                 )
-    # find similar embeddings from vector index for each embedding    
+    # find similar embeddings from vector index for each embedding
 
     flat_embeddings = [item for sublist in embeddings for item in sublist]
     vdb = VectorDB()

diff --git a/ayushma/utils/upsert.py b/ayushma/utils/upsert.py
@@ -94,7 +94,7 @@ def upsert(
         ]  # remove blank lines
 
         embeds = get_embedding(lines_batch)
-        partition_name=str(external_id).replace("-", "_")
+        partition_name = str(external_id).replace("-", "_")
 
         VectorDB().insert(
             vectors=embeds,

diff --git a/ayushma/utils/vectordb.py b/ayushma/utils/vectordb.py
@@ -1,8 +1,9 @@
-from abc import ABC, abstractmethod
 import json
+from abc import ABC, abstractmethod
+
 from django.conf import settings
-from pymilvus import MilvusClient
 from pinecone import Pinecone
+from pymilvus import MilvusClient
 
 
 class AbstractVectorDB(ABC):
@@ -17,15 +18,15 @@ def initialize(self) -> None:
         pass
 
     @abstractmethod
-    def get_or_create_partition(self, partition_name : str):
+    def get_or_create_partition(self, partition_name: str):
         pass
 
     @abstractmethod
     def insert(self, vectors, texts, subject, partition_name):
         pass
 
     @abstractmethod
-    def get_or_create_collection(self, collection_name : str = None):
+    def get_or_create_collection(self, collection_name: str = None):
         pass
 
     @abstractmethod
@@ -41,36 +42,42 @@ def delete_subject(self, subject, partition_name):
         pass
 
     @abstractmethod
-    def search(self, embeddings, partition_name, limit = None):
+    def search(self, embeddings, partition_name, limit=None):
         pass
 
+
 class MilvusVectorDB(AbstractVectorDB):
     collection_name = settings.MILVUS_COLLECTION
-    
+
     def initialize(self) -> None:
         self.client = MilvusClient(
             uri=settings.MILVUS_URL,
         )
         self.get_or_create_collection()
-    
-    def get_or_create_partition(self, partition_name : str):
+
+    def get_or_create_partition(self, partition_name: str):
         partitions = self.client.list_partitions(collection_name=self.collection_name)
         if partition_name not in partitions:
-            self.client.create_partition(collection_name=self.collection_name, partition_name=partition_name)
+            self.client.create_partition(
+                collection_name=self.collection_name, partition_name=partition_name
+            )
 
     def insert(self, vectors, texts, subject, partition_name):
 
         self.get_or_create_partition(partition_name)
 
-        data = [{"id": i, "vector": vectors[i], "text": texts[i], "subject": subject} for i in range(len(vectors))]
+        data = [
+            {"id": i, "vector": vectors[i], "text": texts[i], "subject": subject}
+            for i in range(len(vectors))
+        ]
 
         self.client.insert(
             collection_name=self.collection_name,
             data=data,
             partition_name=partition_name,
         )
-    
-    def get_or_create_collection(self, collection_name : str = None):
+
+    def get_or_create_collection(self, collection_name: str = None):
         if collection_name is None:
             collection_name = self.collection_name
         if not self.client.has_collection(collection_name=collection_name):
@@ -79,7 +86,7 @@ def get_or_create_collection(self, collection_name : str = None):
                 dimension=self.dimensions,
             )
 
-    def search(self, embeddings, partition_name, limit = None):
+    def search(self, embeddings, partition_name, limit=None):
         self.get_or_create_partition(partition_name)
 
         results = self.client.search(
@@ -90,7 +97,7 @@ def search(self, embeddings, partition_name, limit = None):
             output_fields=["text", "subject"],
         )
         return results[0]
-    
+
     def sanitize(self, references):
         sanitized_reference = {}
 
@@ -107,19 +114,23 @@ def sanitize(self, references):
                 pass
 
         return json.dumps(sanitized_reference)
-    
+
     def delete_partition(self, partition_name):
-        self.client.drop_partition(collection_name=self.collection_name, partition_name=partition_name)
+        self.client.drop_partition(
+            collection_name=self.collection_name, partition_name=partition_name
+        )
 
     def delete_subject(self, subject, partition_name):
         self.client.delete(
             collection_name=self.collection_name,
             partition_name=partition_name,
-            filter='subject in ["' + str(subject)+ '"]',
+            filter='subject in ["' + str(subject) + '"]',
         )
 
+
 class PineconeVectorDB(AbstractVectorDB):
     collection_name = settings.PINECONE_INDEX
+
     def initialize(self):
         self.client = Pinecone(
             api_key=settings.PINECONE_API_KEY,
@@ -130,9 +141,7 @@ def get_or_create_partition(self, partition_name):
         pass
 
     def insert(self, vectors, texts, subject, partition_name):
-        meta = [
-            {"text": texts[i], "document": subject} for i in range(len(vectors))
-        ]
+        meta = [{"text": texts[i], "document": subject} for i in range(len(vectors))]
         ids = [str(i) for i in range(len(vectors))]
         data = zip(ids, vectors, meta)
 
@@ -144,7 +153,7 @@ def insert(self, vectors, texts, subject, partition_name):
     def get_or_create_collection(self, collection_name=None):
         if collection_name is None:
             collection_name = self.collection_name
-        indexes = self.client.list_indexes().get("indexes",[])
+        indexes = self.client.list_indexes().get("indexes", [])
         print("Indexes", indexes)
         index_names = [index["name"] for index in indexes]
         if collection_name not in index_names:
@@ -179,7 +188,6 @@ def sanitize(self, references):
 
         return json.dumps(sanitized_reference)
 
-
     def delete_partition(self, partition_name):
         index = self.client.Index(self.collection_name)
         index.delete(namespace=partition_name, deleteAll=True)
@@ -190,18 +198,18 @@ def delete_subject(self, subject, partition_name):
             namespace=partition_name,
             filter={"document": subject},
         )
-
+
+
 class VectorDB:
     def __init__(self):
         vector_db_type = settings.VECTOR_DB.lower()
-        if vector_db_type == 'milvus':
+        if vector_db_type == "milvus":
             self.vector_db = MilvusVectorDB()
-        elif vector_db_type == 'pinecone':
+        elif vector_db_type == "pinecone":
             self.vector_db = PineconeVectorDB()
         else:
             raise ValueError(f"Unsupported VECTOR_DB type: {settings.VECTOR_DB}")
         self.vector_db.initialize()
 
     def __getattr__(self, name):
         return getattr(self.vector_db, name)
-
diff --git a/ayushma/views/chat.py b/ayushma/views/chat.py
@@ -104,7 +104,9 @@ def perform_create(self, serializer):
     )
     @action(detail=True, methods=["get"])
     def feedbacks(self, *args, **kwarg):
-        q = ChatFeedback.objects.filter(chat_message__chat__external_id=kwarg["external_id"])
+        q = ChatFeedback.objects.filter(
+            chat_message__chat__external_id=kwarg["external_id"]
+        )
         serialized_data = ChatFeedbackSerializer(q, many=True).data
         return Response(
             {"data": serialized_data},

diff --git a/ayushma/views/document.py b/ayushma/views/document.py
@@ -17,6 +17,7 @@
 from utils.views.base import BaseModelViewSet
 from utils.views.mixins import PartialUpdateModelMixin
 
+
 class ProjectDocumentViewSet(
     BaseModelViewSet,
     PartialUpdateModelMixin,
@@ -63,12 +64,11 @@ def perform_create(self, serializer):
             print(e)
             pass
 
-
         upsert_doc.delay(document.external_id, doc_url)
 
     def perform_destroy(self, instance):
         # delete namespace from vectorDB
-        
+
         try:
             VectorDB().delete_subject(
                 partition_name=self.kwargs["project_external_id"].replace("-", "_"),

diff --git a/core/settings/base.py b/core/settings/base.py
@@ -6,7 +6,6 @@
 
 import environ
 import openai
-import pinecone
 from corsheaders.defaults import default_headers
 from django.urls import reverse_lazy
 
@@ -351,7 +350,7 @@
 PINECONE_API_KEY = env("PINECONE_API_KEY", default="")
 PINECONE_INDEX = env("PINECONE_INDEX", default="")
 
-VECTOR_DB=env("VECTOR_DB", default="milvus")
+VECTOR_DB = env("VECTOR_DB", default="milvus")
 
 # Milvus
 MILVUS_URL = env("MILVUS_URL", default="http://milvus:19530")

diff --git a/docker-compose.local.yaml b/docker-compose.local.yaml
@@ -69,7 +69,7 @@ services:
       - seccomp:unconfined
     networks:
       - default
-    
+
 networks:
   default:
-    name: ayushma_local
+    name: ayushma_local
diff --git a/docker-compose.production.yaml b/docker-compose.production.yaml
@@ -67,7 +67,7 @@ services:
       - seccomp:unconfined
     networks:
       - default
-    
+
 networks:
   default:
-    name: ayushma_production
+    name: ayushma_production
diff --git a/prompt-example.txt b/prompt-example.txt
@@ -9,5 +9,5 @@ references = {reference}
 
 Output Format (follow the below format strictly and you must provide the references ids array in all your responses after the result. Do not mention about the references anywhere else):
 
-Ayushma: <Your response here> 
-References: <array of reference_ids (in array format: uuid[] (example : [uuid1, uuid2]) "include all the reference uuids in this array that are relevant and from which you formed the result">
+Ayushma: <Your response here>
+References: <array of reference_ids (in array format: uuid[] (example : [uuid1, uuid2]) "include all the reference uuids in this array that are relevant and from which you formed the result">