-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnatc_of_musicgen_finetuner_n&tc.py
375 lines (309 loc) · 24 KB
/
natc_of_musicgen_finetuner_n&tc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# -*- coding: utf-8 -*-
"""NATC of musicgen-finetuner-N&TC.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/12I8sUqhTTlB2neG0cVdffeXIESlNKze4
"""
# @title connect your drive
from google.colab import drive
drive.mount('/content/drive')
import pip
pip.main(['uninstall', 'torch -y'])
pip.main(['install', 'torch==2.1.0', 'torchvision==0.16.0', 'torchaudio==2.1.0'])
# @title run this first (fix 4 error)
#!pip uninstall torch -y
#!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0
# Commented out IPython magic to ensure Python compatibility.
# @title downloads and installs
import git
#!git clone https://github.com/facebookresearch/audiocraft.git
# %cd audiocraft
!pip install -e .
!pip install dora-search
!pip install numba
!sudo apt-get install build-essential libeigen3-dev libyaml-dev libfftw3-dev libtag1-dev libchromaprint-dev
!pip install -U essentia-tensorflow
!curl https://essentia.upf.edu/models/classification-heads/genre_discogs400/genre_discogs400-discogs-effnet-1.pb --output genre_discogs400-discogs-effnet-1.pb
!curl https://essentia.upf.edu/models/feature-extractors/discogs-effnet/discogs-effnet-bs64-1.pb --output discogs-effnet-bs64-1.pb
!curl https://essentia.upf.edu/models/classification-heads/mtg_jamendo_moodtheme/mtg_jamendo_moodtheme-discogs-effnet-1.pb --output mtg_jamendo_moodtheme-discogs-effnet-1.pb
!curl https://essentia.upf.edu/models/classification-heads/mtg_jamendo_instrument/mtg_jamendo_instrument-discogs-effnet-1.pb --output mtg_jamendo_instrument-discogs-effnet-1.pb
!python -m pip install -U pip setuptools wheel
!python -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
# @title define the name of your model
!mkdir /content/dataset
!mkdir /content/dataset_split
model_name = "oval" #@param {type:"string"}
# Commented out IPython magic to ensure Python compatibility.
# @title upload dataset
from google.colab import files
# %cd /content/dataset
uploaded = files.upload()
for filename in uploaded.keys():
print(f"Uploaded file '{filename}' with length {len(uploaded[filename])} bytes")
# @title prepare audios to train
import os
import json
import random
import librosa
from pydub import AudioSegment
import wave
# import yt_dlp
import re
import shutil
# params
dataset_folder_link = "/content/dataset"
folder_to_save_dataset_in = "/content/dataset_split"
# os.makedirs(folder_to_save_dataset_in, exist_ok=True)
custom_model_keywords = "yeah"
# skip_scraping = youtube_playlist_link == ""
temp_output_dir = "/content/temp_dataset"
os.makedirs(temp_output_dir, exist_ok=True)
for item in os.listdir(dataset_folder_link):
src_path = os.path.join(dataset_folder_link, item)
dst_path = os.path.join(temp_output_dir, item)
if os.path.isdir(src_path):
shutil.copytree(src_path, dst_path)
else:
shutil.copy2(src_path, dst_path)
from functools import partial
from tqdm import tqdm
tqdm = partial(tqdm, position=0, leave=True)
# youtube scraper
# ydl_opts = {
# 'format': 'bestaudio/best',
# 'outtmpl': os.path.join(temp_output_dir, '%(title)s.%(ext)s'),
# 'postprocessors': [{
# 'key': 'FFmpegExtractAudio',
# 'preferredcodec': 'mp3',
# 'preferredquality': '128',
# }],
# 'quiet': True,
# 'extract_flat': True,
# # 'force_generic_extractor': True,
# }
# if not skip_scraping:
# with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# info_dict = ydl.extract_info(youtube_playlist_link, download=False)
# if 'entries' in tqdm(info_dict):
# for i, entry in enumerate(info_dict['entries']):
# print(f"extracting {entry['title']} {entry['url']} ({i}/{len(info_dict['entries'])})")
# try:
# ydl.download([entry['url']])
# except:
# print(f"failed to download {entry['url']}")
# print(len(os.listdir(temp_output_dir)))
# splitter
print('splitting and resampling...')
os.makedirs(os.path.join(folder_to_save_dataset_in, 'split'), exist_ok=True)
for filename in tqdm(os.listdir(temp_output_dir)):
if filename.endswith(('.mp3', '.wav', '.flac')):
audio = AudioSegment.from_file(os.path.join(temp_output_dir, filename))
audio = audio.set_frame_rate(44100)
for i in range(0, len(audio), 30000):
chunk = audio[i:i+30000]
chunk.export(os.path.join(folder_to_save_dataset_in, "split/" + re.sub(r'\W+', '_', filename[:-4]) + f" - chunk{i//1000}.wav"), format="wav")
os.remove(os.path.join(temp_output_dir, filename))
# autolabeller
genre_labels = ["Blues---Boogie Woogie", "Blues---Chicago Blues", "Blues---Country Blues", "Blues---Delta Blues", "Blues---Electric Blues", "Blues---Harmonica Blues", "Blues---Jump Blues", "Blues---Louisiana Blues", "Blues---Modern Electric Blues", "Blues---Piano Blues", "Blues---Rhythm & Blues", "Blues---Texas Blues", "Brass & Military---Brass Band", "Brass & Military---Marches", "Brass & Military---Military", "Children's---Educational", "Children's---Nursery Rhymes", "Children's---Story", "Classical---Baroque", "Classical---Choral", "Classical---Classical", "Classical---Contemporary", "Classical---Impressionist", "Classical---Medieval", "Classical---Modern", "Classical---Neo-Classical", "Classical---Neo-Romantic", "Classical---Opera", "Classical---Post-Modern", "Classical---Renaissance", "Classical---Romantic", "Electronic---Abstract", "Electronic---Acid", "Electronic---Acid House", "Electronic---Acid Jazz", "Electronic---Ambient", "Electronic---Bassline", "Electronic---Beatdown", "Electronic---Berlin-School", "Electronic---Big Beat", "Electronic---Bleep", "Electronic---Breakbeat", "Electronic---Breakcore", "Electronic---Breaks", "Electronic---Broken Beat", "Electronic---Chillwave", "Electronic---Chiptune", "Electronic---Dance-pop", "Electronic---Dark Ambient", "Electronic---Darkwave", "Electronic---Deep House", "Electronic---Deep Techno", "Electronic---Disco", "Electronic---Disco Polo", "Electronic---Donk", "Electronic---Downtempo", "Electronic---Drone", "Electronic---Drum n Bass", "Electronic---Dub", "Electronic---Dub Techno", "Electronic---Dubstep", "Electronic---Dungeon Synth", "Electronic---EBM", "Electronic---Electro", "Electronic---Electro House", "Electronic---Electroclash", "Electronic---Euro House", "Electronic---Euro-Disco", "Electronic---Eurobeat", "Electronic---Eurodance", "Electronic---Experimental", "Electronic---Freestyle", "Electronic---Future Jazz", "Electronic---Gabber", "Electronic---Garage House", "Electronic---Ghetto", "Electronic---Ghetto House", "Electronic---Glitch", "Electronic---Goa Trance", "Electronic---Grime", "Electronic---Halftime", "Electronic---Hands Up", "Electronic---Happy Hardcore", "Electronic---Hard House", "Electronic---Hard Techno", "Electronic---Hard Trance", "Electronic---Hardcore", "Electronic---Hardstyle", "Electronic---Hi NRG", "Electronic---Hip Hop", "Electronic---Hip-House", "Electronic---House", "Electronic---IDM", "Electronic---Illbient", "Electronic---Industrial", "Electronic---Italo House", "Electronic---Italo-Disco", "Electronic---Italodance", "Electronic---Jazzdance", "Electronic---Juke", "Electronic---Jumpstyle", "Electronic---Jungle", "Electronic---Latin", "Electronic---Leftfield", "Electronic---Makina", "Electronic---Minimal", "Electronic---Minimal Techno", "Electronic---Modern Classical", "Electronic---Musique Concrète", "Electronic---Neofolk", "Electronic---New Age", "Electronic---New Beat", "Electronic---New Wave", "Electronic---Noise", "Electronic---Nu-Disco", "Electronic---Power Electronics", "Electronic---Progressive Breaks", "Electronic---Progressive House", "Electronic---Progressive Trance", "Electronic---Psy-Trance", "Electronic---Rhythmic Noise", "Electronic---Schranz", "Electronic---Sound Collage", "Electronic---Speed Garage", "Electronic---Speedcore", "Electronic---Synth-pop", "Electronic---Synthwave", "Electronic---Tech House", "Electronic---Tech Trance", "Electronic---Techno", "Electronic---Trance", "Electronic---Tribal", "Electronic---Tribal House", "Electronic---Trip Hop", "Electronic---Tropical House", "Electronic---UK Garage", "Electronic---Vaporwave", "Folk, World, & Country---African", "Folk, World, & Country---Bluegrass", "Folk, World, & Country---Cajun", "Folk, World, & Country---Canzone Napoletana", "Folk, World, & Country---Catalan Music", "Folk, World, & Country---Celtic", "Folk, World, & Country---Country", "Folk, World, & Country---Fado", "Folk, World, & Country---Flamenco", "Folk, World, & Country---Folk", "Folk, World, & Country---Gospel", "Folk, World, & Country---Highlife", "Folk, World, & Country---Hillbilly", "Folk, World, & Country---Hindustani", "Folk, World, & Country---Honky Tonk", "Folk, World, & Country---Indian Classical", "Folk, World, & Country---Laïkó", "Folk, World, & Country---Nordic", "Folk, World, & Country---Pacific", "Folk, World, & Country---Polka", "Folk, World, & Country---Raï", "Folk, World, & Country---Romani", "Folk, World, & Country---Soukous", "Folk, World, & Country---Séga", "Folk, World, & Country---Volksmusik", "Folk, World, & Country---Zouk", "Folk, World, & Country---Éntekhno", "Funk / Soul---Afrobeat", "Funk / Soul---Boogie", "Funk / Soul---Contemporary R&B", "Funk / Soul---Disco", "Funk / Soul---Free Funk", "Funk / Soul---Funk", "Funk / Soul---Gospel", "Funk / Soul---Neo Soul", "Funk / Soul---New Jack Swing", "Funk / Soul---P.Funk", "Funk / Soul---Psychedelic", "Funk / Soul---Rhythm & Blues", "Funk / Soul---Soul", "Funk / Soul---Swingbeat", "Funk / Soul---UK Street Soul", "Hip Hop---Bass Music", "Hip Hop---Boom Bap", "Hip Hop---Bounce", "Hip Hop---Britcore", "Hip Hop---Cloud Rap", "Hip Hop---Conscious", "Hip Hop---Crunk", "Hip Hop---Cut-up/DJ", "Hip Hop---DJ Battle Tool", "Hip Hop---Electro", "Hip Hop---G-Funk", "Hip Hop---Gangsta", "Hip Hop---Grime", "Hip Hop---Hardcore Hip-Hop", "Hip Hop---Horrorcore", "Hip Hop---Instrumental", "Hip Hop---Jazzy Hip-Hop", "Hip Hop---Miami Bass", "Hip Hop---Pop Rap", "Hip Hop---Ragga HipHop", "Hip Hop---RnB/Swing", "Hip Hop---Screw", "Hip Hop---Thug Rap", "Hip Hop---Trap", "Hip Hop---Trip Hop", "Hip Hop---Turntablism", "Jazz---Afro-Cuban Jazz", "Jazz---Afrobeat", "Jazz---Avant-garde Jazz", "Jazz---Big Band", "Jazz---Bop", "Jazz---Bossa Nova", "Jazz---Contemporary Jazz", "Jazz---Cool Jazz", "Jazz---Dixieland", "Jazz---Easy Listening", "Jazz---Free Improvisation", "Jazz---Free Jazz", "Jazz---Fusion", "Jazz---Gypsy Jazz", "Jazz---Hard Bop", "Jazz---Jazz-Funk", "Jazz---Jazz-Rock", "Jazz---Latin Jazz", "Jazz---Modal", "Jazz---Post Bop", "Jazz---Ragtime", "Jazz---Smooth Jazz", "Jazz---Soul-Jazz", "Jazz---Space-Age", "Jazz---Swing", "Latin---Afro-Cuban", "Latin---Baião", "Latin---Batucada", "Latin---Beguine", "Latin---Bolero", "Latin---Boogaloo", "Latin---Bossanova", "Latin---Cha-Cha", "Latin---Charanga", "Latin---Compas", "Latin---Cubano", "Latin---Cumbia", "Latin---Descarga", "Latin---Forró", "Latin---Guaguancó", "Latin---Guajira", "Latin---Guaracha", "Latin---MPB", "Latin---Mambo", "Latin---Mariachi", "Latin---Merengue", "Latin---Norteño", "Latin---Nueva Cancion", "Latin---Pachanga", "Latin---Porro", "Latin---Ranchera", "Latin---Reggaeton", "Latin---Rumba", "Latin---Salsa", "Latin---Samba", "Latin---Son", "Latin---Son Montuno", "Latin---Tango", "Latin---Tejano", "Latin---Vallenato", "Non-Music---Audiobook", "Non-Music---Comedy", "Non-Music---Dialogue", "Non-Music---Education", "Non-Music---Field Recording", "Non-Music---Interview", "Non-Music---Monolog", "Non-Music---Poetry", "Non-Music---Political", "Non-Music---Promotional", "Non-Music---Radioplay", "Non-Music---Religious", "Non-Music---Spoken Word", "Pop---Ballad", "Pop---Bollywood", "Pop---Bubblegum", "Pop---Chanson", "Pop---City Pop", "Pop---Europop", "Pop---Indie Pop", "Pop---J-pop", "Pop---K-pop", "Pop---Kayōkyoku", "Pop---Light Music", "Pop---Music Hall", "Pop---Novelty", "Pop---Parody", "Pop---Schlager", "Pop---Vocal", "Reggae---Calypso", "Reggae---Dancehall", "Reggae---Dub", "Reggae---Lovers Rock", "Reggae---Ragga", "Reggae---Reggae", "Reggae---Reggae-Pop", "Reggae---Rocksteady", "Reggae---Roots Reggae", "Reggae---Ska", "Reggae---Soca", "Rock---AOR", "Rock---Acid Rock", "Rock---Acoustic", "Rock---Alternative Rock", "Rock---Arena Rock", "Rock---Art Rock", "Rock---Atmospheric Black Metal", "Rock---Avantgarde", "Rock---Beat", "Rock---Black Metal", "Rock---Blues Rock", "Rock---Brit Pop", "Rock---Classic Rock", "Rock---Coldwave", "Rock---Country Rock", "Rock---Crust", "Rock---Death Metal", "Rock---Deathcore", "Rock---Deathrock", "Rock---Depressive Black Metal", "Rock---Doo Wop", "Rock---Doom Metal", "Rock---Dream Pop", "Rock---Emo", "Rock---Ethereal", "Rock---Experimental", "Rock---Folk Metal", "Rock---Folk Rock", "Rock---Funeral Doom Metal", "Rock---Funk Metal", "Rock---Garage Rock", "Rock---Glam", "Rock---Goregrind", "Rock---Goth Rock", "Rock---Gothic Metal", "Rock---Grindcore", "Rock---Grunge", "Rock---Hard Rock", "Rock---Hardcore", "Rock---Heavy Metal", "Rock---Indie Rock", "Rock---Industrial", "Rock---Krautrock", "Rock---Lo-Fi", "Rock---Lounge", "Rock---Math Rock", "Rock---Melodic Death Metal", "Rock---Melodic Hardcore", "Rock---Metalcore", "Rock---Mod", "Rock---Neofolk", "Rock---New Wave", "Rock---No Wave", "Rock---Noise", "Rock---Noisecore", "Rock---Nu Metal", "Rock---Oi", "Rock---Parody", "Rock---Pop Punk", "Rock---Pop Rock", "Rock---Pornogrind", "Rock---Post Rock", "Rock---Post-Hardcore", "Rock---Post-Metal", "Rock---Post-Punk", "Rock---Power Metal", "Rock---Power Pop", "Rock---Power Violence", "Rock---Prog Rock", "Rock---Progressive Metal", "Rock---Psychedelic Rock", "Rock---Psychobilly", "Rock---Pub Rock", "Rock---Punk", "Rock---Rock & Roll", "Rock---Rockabilly", "Rock---Shoegaze", "Rock---Ska", "Rock---Sludge Metal", "Rock---Soft Rock", "Rock---Southern Rock", "Rock---Space Rock", "Rock---Speed Metal", "Rock---Stoner Rock", "Rock---Surf", "Rock---Symphonic Rock", "Rock---Technical Death Metal", "Rock---Thrash", "Rock---Twist", "Rock---Viking Metal", "Rock---Yé-Yé", "Stage & Screen---Musical", "Stage & Screen---Score", "Stage & Screen---Soundtrack", "Stage & Screen---Theme"]
mood_theme_classes = ["action", "adventure", "advertising", "background", "ballad", "calm", "children", "christmas", "commercial", "cool", "corporate", "dark", "deep", "documentary", "drama", "dramatic", "dream", "emotional", "energetic", "epic", "fast", "film", "fun", "funny", "game", "groovy", "happy", "heavy", "holiday", "hopeful", "inspiring", "love", "meditative", "melancholic", "melodic", "motivational", "movie", "nature", "party", "positive", "powerful", "relaxing", "retro", "romantic", "sad", "sexy", "slow", "soft", "soundscape", "space", "sport", "summer", "trailer", "travel", "upbeat", "uplifting"]
instrument_classes = ["accordion", "acousticbassguitar", "acousticguitar", "bass", "beat", "bell", "bongo", "brass", "cello", "clarinet", "classicalguitar", "computer", "doublebass", "drummachine", "drums", "electricguitar", "electricpiano", "flute", "guitar", "harmonica", "harp", "horn", "keyboard", "oboe", "orchestra", "organ", "pad", "percussion", "piano", "pipeorgan", "rhodes", "sampler", "saxophone", "strings", "synthesizer", "trombone", "trumpet", "viola", "violin", "voice"]
from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D
import numpy as np
def filter_predictions(predictions, class_list, threshold=0.1):
predictions_mean = np.mean(predictions, axis=0)
sorted_indices = np.argsort(predictions_mean)[::-1]
filtered_indices = [i for i in sorted_indices if predictions_mean[i] > threshold]
filtered_labels = [class_list[i] for i in filtered_indices]
filtered_values = [predictions_mean[i] for i in filtered_indices]
return filtered_labels, filtered_values
def make_comma_separated_unique(tags):
seen_tags = set()
result = []
for tag in ', '.join(tags).split(', '):
if tag not in seen_tags:
result.append(tag)
seen_tags.add(tag)
return ', '.join(result)
def get_audio_features(audio_filename):
audio = MonoLoader(filename=audio_filename, sampleRate=16000, resampleQuality=4)()
embedding_model = TensorflowPredictEffnetDiscogs(graphFilename="discogs-effnet-bs64-1.pb", output="PartitionedCall:1")
embeddings = embedding_model(audio)
result_dict = {}
genre_model = TensorflowPredict2D(graphFilename="genre_discogs400-discogs-effnet-1.pb", input="serving_default_model_Placeholder", output="PartitionedCall:0")
predictions = genre_model(embeddings)
filtered_labels, _ = filter_predictions(predictions, genre_labels)
filtered_labels = ', '.join(filtered_labels).replace("---", ", ").split(', ')
result_dict['genres'] = make_comma_separated_unique(filtered_labels)
mood_model = TensorflowPredict2D(graphFilename="mtg_jamendo_moodtheme-discogs-effnet-1.pb")
predictions = mood_model(embeddings)
filtered_labels, _ = filter_predictions(predictions, mood_theme_classes, threshold=0.05)
result_dict['moods'] = make_comma_separated_unique(filtered_labels)
instrument_model = TensorflowPredict2D(graphFilename="mtg_jamendo_instrument-discogs-effnet-1.pb")
predictions = instrument_model(embeddings)
filtered_labels, _ = filter_predictions(predictions, instrument_classes)
result_dict['instruments'] = filtered_labels
return result_dict
# separado da última célula para
print('autolabelling...')
dataset_path = os.path.join(folder_to_save_dataset_in, 'split')
with open(os.path.join(folder_to_save_dataset_in, "train.jsonl"), "w") as train_file, \
open(os.path.join(folder_to_save_dataset_in, "test.jsonl"), "w") as eval_file:
dset = os.listdir(dataset_path)
random.shuffle(dset)
for filename in tqdm(dset):
try:
result = get_audio_features(os.path.join(dataset_path, filename))
except:
result = {"genres": [], "moods": [], "instruments": []}
y, sr = librosa.load(os.path.join(dataset_path, filename))
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
tempo = np.round(tempo)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
key = np.argmax(np.sum(chroma, axis=1))
key = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'][key]
length = librosa.get_duration(y=y, sr=sr)
entry = {"key": f"{key}",
"artist": "",
"sample_rate": 44100,
"file_extension": "wav",
"description": "",
"keywords": f"{custom_model_keywords}",
"duration": length,
"bpm": float(tempo), # Convert to float if it's an ndarray
"genre": result.get('genres', []), # Ensure it's a list
"title": filename,
"name": "",
"instrument": result.get('instruments', []), # Ensure it's a list
"moods": result.get('moods', []), # Ensure it's a list
"path": os.path.join(dataset_path, filename)
}
if random.random() < 0.85:
train_file.write(json.dumps(entry) + '\n')
else:
eval_file.write(json.dumps(entry) + '\n')
from numba import cuda
device = cuda.get_current_device()
device.reset()
config_path = os.path.join(folder_to_save_dataset_in, "train.yaml")
package_str = "package"
yaml_contents = f"""#@{package_str} __global__
datasource:
max_channels: 2
max_sample_rate: 44100
evaluate: egs/eval
generate: egs/train
train: egs/train
valid: egs/eval
"""
with open(config_path, 'w') as yaml_file:
yaml_file.write(yaml_contents)
type(length)
type(tempo)
# Commented out IPython magic to ensure Python compatibility.
# @title finetuner
# %cd /content/audiocraft
folder_dataset_is_saved_in = "/content/dataset_split"
# @markdown # define the numbers of epochs:
num_epochs = 1 # @param {type:"number"}
folder_path = os.path.join('/content/drive/My Drive/', model_name)
# Function to check and create the folder
def create_folder(path):
if not os.path.exists(path):
os.makedirs(path)
print(f"Folder '{model_name}' created in your Drive.")
else:
print("Error: That model already exists in your Drive.")
# Call the function
create_folder(folder_path)
folder_to_save_checkpoints_in = folder_path
train_in_stereo = True # @param {type:"boolean"}
os.makedirs("/content/audiocraft/egs/train", exist_ok=True)
os.makedirs("/content/audiocraft/egs/eval", exist_ok=True)
!cp "$folder_dataset_is_saved_in/train.jsonl" /content/audiocraft/egs/train/data.jsonl
!cp "$folder_dataset_is_saved_in/test.jsonl" /content/audiocraft/egs/eval/data.jsonl
!cp "$folder_dataset_is_saved_in/train.yaml" /content/audiocraft/config/dset/audio/train.yaml
# %env USER=nobody
if train_in_stereo:
command = (
"dora -P audiocraft run "
" solver=musicgen/musicgen_base_32khz"
" model/lm/model_scale=small"
" continue_from=//pretrained/facebook/musicgen-stereo-small"
" conditioner=text2music"
" dset=audio/train"
" dataset.num_workers=2"
" dataset.valid.num_samples=1"
" dataset.batch_size=2"
" schedule.cosine.warmup=8"
" optim.optimizer=adamw"
" optim.lr=1e-4"
f" optim.epochs={num_epochs}"
" optim.updates_per_epoch=1000"
" optim.adam.weight_decay=0.01"
" generate.lm.prompted_samples=False"
" generate.lm.gen_gt_samples=True"
# stereo configs
" channels=2"
" interleave_stereo_codebooks.use=True"
" transformer_lm.n_q=8"
" transformer_lm.card=2048"
" codebooks_pattern.delay.delays='[0, 0, 1, 1, 2, 2, 3, 3]'"
)
else:
command = (
"dora -P audiocraft run "
" solver=musicgen/musicgen_base_32khz"
" model/lm/model_scale=small"
" continue_from=//pretrained/facebook/musicgen-small"
" conditioner=text2music"
" dset=audio/train"
" dataset.num_workers=2"
" dataset.valid.num_samples=1"
" dataset.batch_size=2"
" schedule.cosine.warmup=8"
" optim.optimizer=adamw"
" optim.lr=1e-4"
f" optim.epochs={num_epochs}"
" optim.updates_per_epoch=1000"
" optim.adam.weight_decay=0.01"
" generate.lm.prompted_samples=False"
" generate.lm.gen_gt_samples=True"
)
!{command}
# @title export checkpoint for inference
import os
root_dir = "/tmp/audiocraft_nobody/xps/"
subfolders = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
joined_paths = [os.path.join(root_dir, subfolder) for subfolder in subfolders]
SIG = max(joined_paths, key=os.path.getmtime)
from audiocraft.utils import export
from audiocraft import train
xp = train.main.get_xp_from_sig(SIG)
export.export_lm(xp.folder / 'checkpoint.th', os.path.join(folder_to_save_checkpoints_in, 'state_dict.bin'))
export.export_pretrained_compression_model('facebook/encodec_32khz', os.path.join(folder_to_save_checkpoints_in, 'compression_state_dict.bin'))
# @title generator
folder_checkpoints_are_saved_in = folder_to_save_checkpoints_in
# folder_checkpoints_are_saved_in = "/content/drive/MyDrive/bill-avans-alone-again"
prompt = "beautiful melody" #@param {type:"string"}
generate_length = 10 #@param {type:"number"}
from audiocraft.data.audio import audio_write
import IPython.display as ipd
from tempfile import NamedTemporaryFile
import gradio as gr
from google.colab import files
from audiocraft.data.audio import audio_write
from audiocraft.models import MusicGen
musicgen = MusicGen.get_pretrained(folder_checkpoints_are_saved_in)
musicgen.set_generation_params(duration=generate_length)
num_tracks = 3 #@param {type: "number"}
for i in range(num_tracks): #quantidade de faixas
wavs = musicgen.generate([prompt])
for idx, one_wav in enumerate(wavs):
audio_write(f'{idx}', one_wav.cpu(), musicgen.sample_rate, strategy="loudness", loudness_compressor=True)
ipd.display(ipd.Audio(one_wav.cpu(), rate=32000))
output = wavs.detach().cpu().float()[0]
with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
audio_write(file.name, output, musicgen.sample_rate, strategy="loudness", add_suffix=False)
waveform_video = gr.make_waveform(file.name)
file.name #If you are at all interested
files.download(file.name)