-
Notifications
You must be signed in to change notification settings - Fork 111
/
dataset.py
88 lines (72 loc) · 2.8 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Preprocesses MIDI files
"""
import numpy as np
import math
import random
from joblib import Parallel, delayed
import multiprocessing
from constants import *
from midi_util import load_midi
from util import *
def compute_beat(beat, notes_in_bar):
return one_hot(beat % notes_in_bar, notes_in_bar)
def compute_completion(beat, len_melody):
return np.array([beat / len_melody])
def compute_genre(genre_id):
""" Computes a vector that represents a particular genre """
genre_hot = np.zeros((NUM_STYLES,))
start_index = sum(len(s) for i, s in enumerate(styles) if i < genre_id)
styles_in_genre = len(styles[genre_id])
genre_hot[start_index:start_index + styles_in_genre] = 1 / styles_in_genre
return genre_hot
def stagger(data, time_steps):
dataX, dataY = [], []
# Buffer training for first event
data = ([np.zeros_like(data[0])] * time_steps) + list(data)
# Chop a sequence into measures
for i in range(0, len(data) - time_steps, NOTES_PER_BAR):
dataX.append(data[i:i + time_steps])
dataY.append(data[i + 1:(i + time_steps + 1)])
return dataX, dataY
def load_all(styles, batch_size, time_steps):
"""
Loads all MIDI files as a piano roll.
(For Keras)
"""
note_data = []
beat_data = []
style_data = []
note_target = []
# TODO: Can speed this up with better parallel loading. Order gaurentee.
styles = [y for x in styles for y in x]
for style_id, style in enumerate(styles):
style_hot = one_hot(style_id, NUM_STYLES)
# Parallel process all files into a list of music sequences
seqs = Parallel(n_jobs=multiprocessing.cpu_count(), backend='threading')(delayed(load_midi)(f) for f in get_all_files([style]))
for seq in seqs:
if len(seq) >= time_steps:
# Clamp MIDI to note range
seq = clamp_midi(seq)
# Create training data and labels
train_data, label_data = stagger(seq, time_steps)
note_data += train_data
note_target += label_data
beats = [compute_beat(i, NOTES_PER_BAR) for i in range(len(seq))]
beat_data += stagger(beats, time_steps)[0]
style_data += stagger([style_hot for i in range(len(seq))], time_steps)[0]
note_data = np.array(note_data)
beat_data = np.array(beat_data)
style_data = np.array(style_data)
note_target = np.array(note_target)
return [note_data, note_target, beat_data, style_data], [note_target]
def clamp_midi(sequence):
"""
Clamps the midi base on the MIN and MAX notes
"""
return sequence[:, MIN_NOTE:MAX_NOTE, :]
def unclamp_midi(sequence):
"""
Restore clamped MIDI sequence back to MIDI note values
"""
return np.pad(sequence, ((0, 0), (MIN_NOTE, 0), (0, 0)), 'constant')