-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
29 lines (24 loc) · 980 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from crawler import Crawler
from indexer import Indexer
from query_processor import QuerryProcessor
from document import Document
from time import sleep
if __name__ == '__main__':
# sleep(5.0)
# print("THREAD-TIME!")
crawler = Crawler('https://www.in.gr', 20, 5, True, 'BFS')
crawler.initializeCrawl()
ind = Indexer(Crawler.documents)
query = input("Enter your search query:")
ind.add_document(Document('search_query', query))
print('Building Indexer...')
ind.create_indexer()
print('Calculating TF-IDFs. May take a while.')
ind.calculate_scores()
qp = QuerryProcessor(ind.inverted_index, len(ind.documents))
docs_with_cos_ = qp.compare_documents()
docs_with_cos_ = sorted(docs_with_cos_, key=lambda x: x[1],
reverse=True) # sorting based on cosine similarity scores
print(f'Showing top results based on your query "{query}":')
for doc in docs_with_cos_:
print(doc[0].link)