-
Notifications
You must be signed in to change notification settings - Fork 0
/
tagging.py
277 lines (241 loc) · 12.1 KB
/
tagging.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
import re
import os
import literals
from utility import subprocess_popen
from utility import slugify
class Tagging:
def __init__(self, config):
self.config = config
self.encoding = literals.utf_8
def _fix_coding_issue(self, line):
try:
decoded_line = line.decode(self.encoding)
except:
decoded_line = line
finally:
decoded_line = decoded_line.replace('’', '\'', )
return decoded_line
def _titlecase(self, s):
return re.sub(r"[A-Za-z]+('[A-Za-z]+)?", lambda word: word.group(0).capitalize(), s)
def get_album_tags_from_cuefile(self, cuefile_obj):
config = self.config
cuefile = cuefile_obj.cuefile
encoding = config.cuefile_encoding
tag_dict = dict()
tag_dict[1] = dict()
n_track = 0
n_lossless_file = 0
global_artist = None if config.args.performer is None else config.args.performer.title()
global_genre = None if config.args.genre is None else config.args.genre.title()
album = None if config.args.album is None else config.args.album.title()
first_track_detected = False
# detecting multiple lossless files
lossless_files = dict()
with open(cuefile, encoding=encoding) as cuefile_fd:
for cuefile_line in cuefile_fd.readlines():
title = None
if config.args.year is None:
year_match = re.match(r'^ *\t*(REM )?DATE *\t*["\']?([0-9]*)["\']? *$', cuefile_line, re.IGNORECASE)
if year_match is not None:
year = year_match.group(2)
continue
else:
year = config.args.year
if config.args.performer is None:
artist_match = re.match(r'^ *\t*(REM )?PERFORMER *\t*["\'](.*)["\'] *$', cuefile_line, re.IGNORECASE)
if artist_match is not None:
artist = artist_match.group(2)
if first_track_detected:
track_tag_dict[literals.artist] = artist.title()
global_artist = artist.title()
continue
elif first_track_detected:
track_tag_dict[literals.artist] = global_artist
if config.args.genre is None:
genre_match = re.match(r'^ *\t*(REM )?GENRE *\t*["\'](.*)["\'] *$', cuefile_line, re.IGNORECASE)
if genre_match is not None:
genre = genre_match.group(2)
if first_track_detected:
track_tag_dict[literals.genre] = genre.title()
global_genre = genre.title()
continue
elif first_track_detected:
track_tag_dict[literals.genre] = global_genre
title_match = re.match(r'^ *\t*(REM )?TITLE *\t*["\'](.*)["\'] *$', cuefile_line, re.IGNORECASE)
if title_match is not None:
if first_track_detected:
title = title_match.group(2)
track_tag_dict[literals.title] = title.title()
track_tag_dict[literals.outfile] = f'{n_track:02d} {slugify(title)}.m4a'
track_tag_dict[literals.album] = album
elif config.args.album is None:
album = title_match.group(2)
continue
file_match = re.match(r'^ *\t*(REM )?TRACK .*$', cuefile_line, re.IGNORECASE)
if file_match is not None:
first_track_detected = True
n_track += 1
tag_dict[1][n_track] = dict()
track_tag_dict = tag_dict[1][n_track]
track_tag_dict[literals.global_genre] = global_genre
track_tag_dict[literals.genre] = global_genre
track_tag_dict[literals.year] = year
track_tag_dict[literals.album] = album
track_tag_dict[literals.comment] = 'Generated by all new lossless2lossy.py!'
track_tag_dict[literals.disctotal] = '1'
track_tag_dict[literals.global_artist] = global_artist
track_tag_dict[literals.artist] = global_artist
track_tag_dict[literals.title] = ''
track_tag_dict[literals.infile] = ''
track_tag_dict[literals.outfile] = f'{n_track:02d} {slugify(title)}.m4a'
if config.args.cover is not None:
track_tag_dict[literals.cover] = config.args.cover
else:
track_tag_dict[literals.cover] = ''
continue
file_match = re.match(r'^ *\t*FILE *\t*"(.*)" *(WAVE)?(FLAC)?(APE)? *\t*$', cuefile_line, re.IGNORECASE)
if file_match is not None:
n_lossless_file += 1
lossless_files[n_lossless_file] = file_match.group(1)
lossless_files_len = len(lossless_files)
if lossless_files_len == 0:
print('malformed cuefile: no lossless file specified')
exit(-1)
elif lossless_files_len > 1:
config.single_lossless_file = False
if lossless_files_len != len(tag_dict[1]):
print('malformed cuefile: number of lossless files differs from number of tracks')
exit(-1)
for track, lossless_file in lossless_files.items():
lossless_file = os.path.join(os.path.dirname(os.path.abspath(cuefile)), lossless_file)
filename, ext = os.path.splitext(lossless_file)
tag_dict[1][track][literals.losslessfile] = lossless_file
tag_dict[1][track][literals.infile] = filename + '.wav'
else:
config.single_lossless_file = True
config.single_lossless_file_name = lossless_files[1]
for track, track_dict in tag_dict[1].items():
track_dict[literals.infile] = f'split-track{track:02d}.wav'
return tag_dict
def get_album_tags_from_dir(self):
config = self.config
cwd = os.getcwd()
audio_source_files = [f for f in os.listdir(cwd) if f.endswith('.ape')
or f.endswith('.wv')
or f.endswith('.flac')
or f.endswith('.m4a')]
tag_dict = dict()
decode_stderr = list()
for track_file in audio_source_files:
if config.decoder == literals.ffmpeg:
decode_cmd = config.other_tools[config.decoder].copy()
decode_cmd.append('-i')
decode_cmd.append(track_file)
decode_cmd.append('-y')
decode_cmd.append('-f')
decode_cmd.append('ffmetadata')
decode_subprocess = subprocess_popen(decode_cmd)
decode_subprocess.wait()
decode_stderr.append(decode_subprocess.stderr)
else:
raise NotImplementedError()
track_idx = 0
for track_file in audio_source_files:
artist = None
album = None
year = None
disc = 1
disctotal = None
title = None
genre = None
track = None
filename, ext = os.path.splitext(track_file)
converted_filename = filename + '.wav'
if config.decoder == literals.ffmpeg:
decode_output = decode_stderr[track_idx]
for line in decode_output.readlines():
decoded_line = self._detect_tag_line_encoding(line)
decoded_line = self._fix_coding_issue(decoded_line)
if config.args.performer is None:
artist_match = re.match(r'^ +ARTIST +: +(.*)$', decoded_line, re.IGNORECASE)
if artist_match is not None:
artist = artist_match.group(1)
continue
else:
artist = config.args.performer
if config.args.album is None:
album_match = re.match(r'^ +ALBUM +: +(.*)$', decoded_line, re.IGNORECASE)
if album_match is not None:
album = album_match.group(1)
continue
else:
album = config.args.album
if config.args.year is None:
year_match = re.match(r' +DATE +: +([0-9][0-9][0-9][0-9]).?[0-9]?.?[0-9]?', decoded_line,
re.IGNORECASE)
if year_match is not None:
year = int(year_match.group(1))
continue
else:
year = config.args.year
disc_match = re.match(r'^ +disc +: +([0-9]+)$', decoded_line, re.IGNORECASE)
if disc_match is not None:
disc = int(disc_match.group(1))
continue
disctotal_match = re.match(r'^ +DISCTOTAL +: +([0-9]+)$', decoded_line,
re.IGNORECASE)
if disctotal_match is not None:
disctotal = int(disctotal_match.group(1))
continue
title_match = re.match(r'^ +title +: +(.*)$', decoded_line, re.IGNORECASE)
if title_match is not None:
title = title_match.group(1)
continue
if config.args.genre is None:
genre_match = re.match(r'^ +GENRE +: +(.*)$', decoded_line, re.IGNORECASE)
if genre_match is not None:
genre = genre_match.group(1)
continue
else:
genre = config.args.genre
track_match = re.match(r'^ +track +: +([0-9]+)/?[0-9]*$', decoded_line, re.IGNORECASE)
if track_match is not None:
track = int(track_match.group(1))
continue
else:
raise NotImplementedError()
track_tag_dict = dict()
track_tag_dict[literals.artist] = self._titlecase(artist.title())
track_tag_dict[literals.album] = self._titlecase(album.title())
track_tag_dict[literals.year] = year
track_tag_dict[literals.title] = self._titlecase(title.title())
track_tag_dict[literals.genre] = self._titlecase(genre.title())
track_tag_dict[literals.comment] = 'Generated by all new lossless2lossy.py!'
# disctotal is unused from now because infered by the size of the tag_dict dict
track_tag_dict[literals.disctotal] = disctotal
track_tag_dict[literals.losslessfile] = track_file
track_tag_dict[literals.infile] = converted_filename
track_tag_dict[literals.outfile] = f'{track:02d} {slugify(title)}.m4a'
if disc not in tag_dict:
tag_dict[disc] = dict()
tag_dict[disc][track] = track_tag_dict
track_idx += 1
if config.args.cover is not None:
track_tag_dict[literals.cover] = config.args.cover
else:
track_tag_dict[literals.cover] = ''
config.single_lossless_file = False
return tag_dict
def _detect_tag_line_encoding(self, line):
try:
decoded_line = line.decode(self.encoding)
except UnicodeDecodeError:
encodings = (literals.utf_8, literals.cp1252, literals.windows_1252)
for enc in encodings:
try:
decoded_line = line.decode(enc)
self.encoding = enc
break
except UnicodeDecodeError:
continue
return decoded_line