-
Notifications
You must be signed in to change notification settings - Fork 3
/
test.py
102 lines (78 loc) · 2.59 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import json
import os
from pydantic import BaseModel
from typing import List, Optional
import requests
import numpy as np
from predictionguard import PredictionGuard
client = PredictionGuard()
def cosine_similarity(a, b):
"""Calculates the cosine similarity between two vectors."""
dot_product = np.dot(a, b)
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
return dot_product / (norm_a * norm_b)
class RetrievalRequest(BaseModel):
table: str
query: str
class LLMConfig(BaseModel):
max_tokens: Optional[int] = 500
temperature: Optional[float] = 0.1
class AnswerRequest(BaseModel):
retrieval: RetrievalRequest
llm: Optional[LLMConfig] = LLMConfig(
max_tokens=500,
temperature=0.1
)
def rag_answer(answer_request):
url = os.environ.get("RAG_API_URL") + '/answers'
payload = json.dumps(answer_request.dict())
headers = {'Content-Type': 'application/json'}
response = requests.request("POST", url, headers=headers, data=payload)
return response.json()
def embed(query):
"""
Function to embed the given query using embModel.
Args:
query (str): The input query to be embedded.
Returns:
numpy array: The embedded representation of the input query.
"""
response = client.embeddings.create(
model="multilingual-e5-large-instruct",
input=query,
truncate=True
)
return response["data"][0]["embedding"]
def run_table_test(table):
# Loop over keys in the table dictionary and
# embed the value.
embs = {}
for key, value in table.items():
embs[key] = embed(value)
# Loop over the keys in the table dictionary, get
# the rag answers and emb them.
embs_rag = {}
for key, value in table.items():
answer_request = AnswerRequest(retrieval=RetrievalRequest(
table="testtxt",
query=key,
hyde=True))
response = rag_answer(answer_request)
embs_rag[key] = embed(response['answer'])
# Loop over the keys in the table dictionary,
# compare the embs with embs_rag using cosine similarity
# and save the results in an array
results = []
for key, value in table.items():
similarity = cosine_similarity(embs[key], embs_rag[key])
results.append(similarity)
# Print out the max, min, and mean of the results
print("Max:", max(results))
print("Min:", min(results))
print("Mean:", np.mean(results))
if __name__ == "__main__":
# Load the test fixture.
with open('fixture.json', 'r') as f:
fixture = json.load(f)
run_table_test(fixture)