-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapeLyrics.py
161 lines (115 loc) · 4.54 KB
/
scrapeLyrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import time
from bs4 import BeautifulSoup
import requests
import re
import tekore as tk
import unidecode
import os
# Comment out codes in getCurrentInfo() and copy and paste the following code into the function
# Run the program, accept the pop window and copy and paste the redirect link in command window
# delete the code in getCurrentInfo and uncomment the original code
# setup the cfg file for the first time
'''
client_id = 'ENTER YOUR CLIENT ID'
client_secret = 'ENTER YOUR CLIENT SECRET'
redirect_uri = 'https://example.com/callback'
user_token = tk.prompt_for_user_token(
client_id,
client_secret,
redirect_uri,
scope=tk.scope.every
)
spotify = tk.Spotify(user_token)
conf = (client_id, client_secret, redirect_uri, user_token.refresh_token)
tk.config_to_file('tekore.cfg', conf)
'''
def getCurrentInfo(lastSong):
# initialise the client
conf = tk.config_from_file('tekore.cfg', return_refresh=True)
user_token = tk.refresh_user_token(*conf[:2], conf[3])
spotify = tk.Spotify(user_token)
songName = spotify.playback_currently_playing(None, True).item.name
artistList = spotify.playback_currently_playing(None, True).item.artists
artistName = ','.join(artist.name.replace(',','') for artist in artistList)
if lastSong != songName:
os.system('CLS')
try:
print(unidecode.unidecode(songName) + ' - ' + unidecode.unidecode(artistName) + '\n')
except:
print('Song name or artist name contains special character')
getGeniusURL(artistName, songName)
return songName
def getGeniusURL(songArtists, songName):
url = 'https://genius.com/'
mObject = re.search(r'\s?\(feat\.?\s*(.*?)\)',songName)
if mObject:
try:
nameMObject = re.search('.'+mObject[1].lower(),songArtists.lower())
songArtists = songArtists.lower().replace(nameMObject[0].lower(),'')
songName = songName.replace(mObject[0],'')
except:
print('irregular pattern for feature artists')
songName = re.sub(r'\s?-.*','',songName)
url = url + standardizeString(songArtists,'songArtists') + '-'
url= url+ standardizeString(songName, 'songName')+'-lyrics'
ScrapeLyrics(url)
def standardizeString(s,type):
sdString = unidecode.unidecode(s)
sdString = sdString.lower()
sdString = re.sub('\s','-',sdString)
sdString = re.sub('[()\'.!?%]','',sdString)
sdString = re.sub('&','and',sdString)
# sdString = re.sub(r'[^\x00-\x7f]','',sdString)
if type == 'songArtists':
sdString = sdString.capitalize()
sdString = re.sub(',','-and-',sdString)
if type == 'songName':
sdString = re.sub('[,]','',sdString)
return sdString
# Directly calls Spotify API to receive and parse the JSON object into song name and artist name
# Extremely low rate limit
# This is needed if directly using Spotify API
# SPOTIFY_ACCESS_TOKEN = ''
'''
def getCurrentTrack(token):
currentTrack = requests.get(
'https://api.spotify.com/v1/me/player',
headers={
"Authorization" : f"Bearer {token}"
}
)
# Parse JSON and returns a JSON with desired information
SpotResponse = currentTrack.json()
pprint(SpotResponse, indent = 4)
trackID = SpotResponse['item']['id']
trackName = SpotResponse['item']['name']
trackLink = SpotResponse['item']['external_urls']['spotify']
artistList = [artist for artist in SpotResponse['item']['artists']]
trackArtists = ','.join([artist['name'] for artist in artistList])
TrackInfo = {
"id" : trackID,
"name" : trackName,
"artists" : trackArtists,
"link" : trackLink
}
return TrackInfo
'''
def ScrapeLyrics(url):
source = requests.get(url).text
soup = BeautifulSoup(source, 'html.parser')
# print(soup.encode("utf-8"))
try:
songLyrics = soup.select_one('div[class^="Lyrics__Container"], .song_body-lyrics p').get_text(separator="\n")
# songLyrics = re.sub(u'\u2005','', songLyrics)
songLyrics = re.sub(r'\[.*?\]', '', songLyrics).strip()
print(unidecode.unidecode(songLyrics))
except:
print('Cannot find lyrics for this song')
def main():
currentSong = getCurrentInfo('')
while True:
currentSong = getCurrentInfo(currentSong)
# input('Press ENTER to exit')
time.sleep(1)
if __name__ == '__main__':
main()