Kalebu · atugharajohn · Apr 8, 2024 · Apr 9, 2024 · Apr 12, 2024 · Apr 13, 2024
diff --git a/A_John.txt b/A_John.txt
@@ -0,0 +1,2 @@
+Believe in yourself and all that you are. 
+Know that there is something inside you that is greater than any obstacle.
diff --git a/README.md b/README.md
@@ -41,7 +41,6 @@ $ Plagiarism-checker-Python-> python3 app.py
 ('fatma.txt', 'juma.txt', 0.18643448370323362)
 
 ```
-
 ## A Python Library?
 
 Would you like to use a Python library instead to help you compare strings and documents without spending time writing the vectorizers by yourself, then take a look at [Pysimilar](https://github.com/Kalebu/pysimilar).
@@ -57,7 +56,7 @@ you can raise an issue.
 
 ## Pull Requests
 
-If you have something to add, I welcome pull requests on improvement; your helpful contribution will be merged as soon as possible.
+If you have something to add, I welcome pull requests on improvement; your helpful contribution will be merged as soon as possible. 
 
 ## Give it a Star
 

diff --git a/app.py b/app.py
@@ -15,20 +15,21 @@ def similarity(doc1, doc2): return cosine_similarity([doc1, doc2])
 s_vectors = list(zip(student_files, vectors))
 plagiarism_results = set()
 
+# threshold for the similarity score
 
-def check_plagiarism():
+def check_plagiarism(threshold=0.8):
     global s_vectors
     for student_a, text_vector_a in s_vectors:
         new_vectors = s_vectors.copy()
         current_index = new_vectors.index((student_a, text_vector_a))
         del new_vectors[current_index]
         for student_b, text_vector_b in new_vectors:
             sim_score = similarity(text_vector_a, text_vector_b)[0][1]
-            student_pair = sorted((student_a, student_b))
-            score = (student_pair[0], student_pair[1], sim_score)
-            plagiarism_results.add(score)
+            if sim_score > threshold:
+                student_pair = sorted((student_a, student_b))
+                score = (student_pair[0], student_pair[1], sim_score)
+                plagiarism_results.add(score)
     return plagiarism_results
 
-
 for data in check_plagiarism():
     print(data)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Believe in yourself and all that you are.
		Know that there is something inside you that is greater than any obstacle.