Skip to content

Commit

Permalink
updated model for embeddings
Browse files Browse the repository at this point in the history
ayyubibrahimi committed Nov 8, 2023
1 parent 991dffc commit 6fd95b4
Showing 6 changed files with 33 additions and 33 deletions.
2 changes: 1 addition & 1 deletion packages/backend/src/cache/faiss_index_general.dvc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: 71ba40a724f16e67f72d45805b5782c1.dir
- md5: 1da4b1d88045a2adbc87e5d11c0a6af8.dir
size: 85685475
nfiles: 2
hash: md5
2 changes: 1 addition & 1 deletion packages/backend/src/cache/faiss_index_in_depth.dvc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: 9ebc1797364f0107c0e944eb9f1c5dbc.dir
- md5: 038f50d4fd4c158b138d626f0fe19c2e.dir
size: 85685475
nfiles: 2
hash: md5
24 changes: 11 additions & 13 deletions packages/backend/src/preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import os
from langchain.document_loaders import (
Docx2txtLoader,
JSONLoader,
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -13,13 +12,12 @@
from pathlib import Path
import shutil


logger = logging.getLogger(__name__)
dir = Path(__file__).parent.absolute()


def create_embeddings():
# llm = ChatOpenAI()
llm = ChatOpenAI(model="gpt-4-1106-preview")

base_embeddings = OpenAIEmbeddings()

@@ -38,18 +36,18 @@ def create_embeddings():
input_variables=["user_query"], template=in_depth_prompt_template
)

# llm_chain_general = LLMChain(llm=llm, prompt=general_prompt)
# llm_chain_in_depth = LLMChain(llm=llm, prompt=in_depth_prompt)
llm_chain_general = LLMChain(llm=llm, prompt=general_prompt)
llm_chain_in_depth = LLMChain(llm=llm, prompt=in_depth_prompt)

# general_embeddings = HypotheticalDocumentEmbedder(
# llm_chain=llm_chain_general,
# base_embeddings=base_embeddings,
# )
# in_depth_embeddings = HypotheticalDocumentEmbedder(
# llm_chain=llm_chain_in_depth, base_embeddings=base_embeddings
# )
general_embeddings = HypotheticalDocumentEmbedder(
llm_chain=llm_chain_general,
base_embeddings=base_embeddings,
)
in_depth_embeddings = HypotheticalDocumentEmbedder(
llm_chain=llm_chain_in_depth, base_embeddings=base_embeddings
)

return base_embeddings, base_embeddings
return general_embeddings, in_depth_embeddings


def metadata_func_minutes_and_agendas(record: dict, metadata: dict) -> dict:
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: 71ba40a724f16e67f72d45805b5782c1.dir
- md5: 1da4b1d88045a2adbc87e5d11c0a6af8.dir
size: 85685475
nfiles: 2
hash: md5
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: 9ebc1797364f0107c0e944eb9f1c5dbc.dir
- md5: 038f50d4fd4c158b138d626f0fe19c2e.dir
size: 85685475
nfiles: 2
hash: md5
34 changes: 18 additions & 16 deletions packages/googlecloud/functions/getanswer/helper.py
Original file line number Diff line number Diff line change
@@ -37,34 +37,36 @@ def get_dbs():


def create_embeddings():
llm = ChatOpenAI(model="gpt-4-1106-preview")

base_embeddings = OpenAIEmbeddings()

general_prompt_template = """
As an AI assistant tasked with generating brief general summaries, your role is to provide succinct, balanced information from the transcripts of New Orleans City Council meetings in response to the question "{question}". The response should not exceed one paragraph in length. If the available information from the transcripts is insufficient to accurately summarize the issue, please respond with 'Insufficient information available.' If the question extends beyond the scope of information contained in the transcripts, state 'I don't know.'
As an AI assistant, your role is to provide concise, balanced summaries from the transcripts of New Orleans City Council meetings in response to the user's query "{user_query}". Your response should not exceed one paragraph in length. If the available information from the transcripts is insufficient to accurately summarize the issue, respond with 'Insufficient information available.' If the user's query extends beyond the scope of information contained in the transcripts, state 'I don't know.'
Answer:"""

in_depth_prompt_template = """
As an AI assistant tasked with providing in-depth dialogical summaries, your role is to provide comprehensive information from the transcripts of New Orleans City Council meetings. Your response should mimic the structure of a real conversation, often involving more than two exchanges between the parties. The dialogue should recreate the actual exchanges that occurred between city council members and external stakeholders in response to the question "{question}". For specific queries related to any votes that took place, your response should include detailed information. This should cover the ordinance number, who moved and seconded the motion, how each council member voted, and the final outcome of the vote. For each statement, response, and voting action, provide a summary, followed by a direct quote from the meeting transcript to ensure the context and substance of the discussion is preserved. If a question is about the voting results on a particular initiative, include in your response how each council member voted, if they were present, and if there were any abstentions or recusals. Always refer back to the original transcript to ensure accuracy. If the available information from the transcripts is insufficient to accurately answer the question or recreate the dialogue, please respond with 'Insufficient information available.' If the question extends beyond the scope of information contained in the transcripts, state 'I don't know.'
As an AI assistant, use the New Orleans City Council transcript data that you were trained on to provide an in-depth and balanced response to the following query: "{user_query}"
Answer:"""

general_prompt = PromptTemplate(
input_variables=["question"], template=general_prompt_template
input_variables=["user_query"], template=general_prompt_template
)
in_depth_prompt = PromptTemplate(
input_variables=["question"], template=in_depth_prompt_template
input_variables=["user_query"], template=in_depth_prompt_template
)

# llm_chain_general = LLMChain(llm=llm, prompt=general_prompt)
# llm_chain_in_depth = LLMChain(llm=llm, prompt=in_depth_prompt)

base_embeddings = OpenAIEmbeddings()

# general_embeddings = HypotheticalDocumentEmbedder(
# llm_chain=llm_chain_general, base_embeddings=base_embeddings
# )
# in_depth_embeddings = HypotheticalDocumentEmbedder(
# llm_chain=llm_chain_in_depth, base_embeddings=base_embeddings
# )
llm_chain_general = LLMChain(llm=llm, prompt=general_prompt)
llm_chain_in_depth = LLMChain(llm=llm, prompt=in_depth_prompt)

return base_embeddings, base_embeddings
general_embeddings = HypotheticalDocumentEmbedder(
llm_chain=llm_chain_general,
base_embeddings=base_embeddings,
)
in_depth_embeddings = HypotheticalDocumentEmbedder(
llm_chain=llm_chain_in_depth, base_embeddings=base_embeddings
)
return general_embeddings, in_depth_embeddings


def sort_retrived_documents(doc_list):

0 comments on commit 6fd95b4

Please sign in to comment.