-
Notifications
You must be signed in to change notification settings - Fork 0
/
sc.py
105 lines (75 loc) · 3.02 KB
/
sc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#source code
#importing dependencies
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidVectorizer
from sklearn.metrics.pairwise import cosine_similarity
#data collection and pre-processing
#loading data from csv file to apandas dataframe
df = pd.read_csv('Data/movies.csv')
#printing the first 5 rows of the dataframe
df.head()
#number of rows and columns in dataframe
movies_data.shape
#selecting relevant features for recommendations
selected_features = ['genres','keywords','runtime','cast','director']
print(selected_features)
#replacing null values with null string
for feauture in selected_features:
movies_data[features] = movies_data[features].fillna('')
#combining all the 5 selected features
combined_features= movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['runtime']+' '+movies_data['cast']+' '+movies_data['director']
print(combined_features)
#converting text data to feature vectors
vectorizer = TfidVectorizer()
feature_vectors = vectorizer-fir_transform(combined_features)
print(feature_vectors)
#cosine similarity
#getting the similarity score using cosine similarity
similarity = cosine_similarity(feature_vectors)
print(similarity)
#getting the movie name from user
movie_name= input("Enter your favourite movie name:")
#creating a list with all the movies names given in the dataset
list_of_all_titles= movies_data['title'].tolist()
print(list_of_all_titles)
#finding the close match for the movie name given by the user
find_close_match= difflib.get_close_matches(movie_name, list_of_all_tiltes)
print(find_close_match)
close_match= find_close_match[0]
print(close_match)
#finding the index of the movie with title
index_of_the_movie= movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)
#getting a list of similar movies
similarity_score= list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)
len(similarity_score)
#sorting based on similarity score
sorted_similar_movies= sorted(similarity_score, key= lambda x:x[1], reverse= True)
print(sorted_similar_movies)
#print the name of similar movies based on index
print("Movies suggested for you: \n")
i=1
for movie in sorted_similar_movies
index= movie[0]
title_from_index= movies_data[movies_data.index==index)['title'].values[0]
if (i<30):
print(i,'.',title_from_index)
i+=1
movie_name= input("Enter your favourite movie name:")
list_of_all_titles= movies_data['title'].tolist()
find_close_match= difflib.get_close_matches(movie_name, list_of_all_tiltes)
close_match= find_close_match[0]
index_of_the_movie= movies_data[movies_data.title == close_match]['index'].values[0]
similarity_score= list(enumerate(similarity[index_of_the_movie]))
sorted_similar_movies= sorted(similarity_score, key= lambda x:x[1], reverse= True)
print("Movies suggested for you: \n")
i=1
for movie in sorted_similar_movies
index= movie[0]
title_from_index= movies_data[movies_data.index==index)['title'].values[0]
if (i<30):
print(i,'.',title_from_index)
i+=1