-
Notifications
You must be signed in to change notification settings - Fork 0
/
explore_song_lyrics.py
69 lines (47 loc) · 1.95 KB
/
explore_song_lyrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Explore song lyrics
#pandas
import pandas as pd
#chart url
url = 'http://kworb.net/spotify/country/cr_weekly.html'
df = pd.read_html(url)
dfs = df[0]
dfs
#lyrics genius
import lyricsgenius as lg
# Here you need to insert your token from de client in your genius app
genius = lg.Genius("your token")
# Changing name of columns
dfs[['singer', 'song_name']] = dfs['Artist and Title'].str.rsplit(' - ',n=1, expand=True)
# initialize a list to store the lyrics
lyrics_list = []
# iterate over the rows of the dataframe
for i, row in dfs.iterrows():
# get the artist and song name from the row
artist = row['singer']
song_name = row['song_name']
# search for the song lyrics
song = genius.search_song(song_name, artist)
if song is not None:
# append the lyrics to the list if the song was found
lyrics_list.append(song.lyrics)
else:
# append None if the song was not found
lyrics_list.append(None)
# Here we make text readable (no "/" and no "/n" )
lyrics_list = [s.replace("\n", "") for s in lyrics_list]
my_list = [s.replace("\n", " ").replace("\\", " ") if s is not None else s for s in lyrics_list]
import re
my_list = [s for s in my_list if isinstance(s, str)]
my_list = [re.sub(r'\[.*?\]', '', s) for s in my_list]
# Join full text
text = " ".join(my_list)
# Explore lyrics in a world cloud
from wordcloud import WordCloud
import matplotlib.pyplot as plt
sw = {'-','a','ante', 'bajo', 'cada', 'con','cómo', 'como', 'cuando', 'de', 'del', 'desde', 'e','eh', 'el', 'en', 'entre', 'es', 'eso', 'este', 'está', 'la', 'las',
'le', 'les', 'lo', 'los','más', 'mi', 'mi', 'me', 'muy', 'ni', 'no', 'o', 'oh', 'para', 'pero', 'por', 'que', 'qué', 'se', 'sé', 'si', 'sin', 'sobre', 'su',
'sus','ti', 'te', 'tu', 'tú','uh','un', 'una', 'uno', 'y', 'ya', 'yo'}
wordcloud = WordCloud(stopwords=sw).generate(text)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()