Skip to content

Commit

Permalink
Make training of segnet, unet and classifiers easier by providing a s…
Browse files Browse the repository at this point in the history
…ingle entry point to all training steps (#54)

* Fixed 'TypeError: Cannot convert 4.999899999999999e-07 to EagerTensor of dtype int64' in training, fixes #39

https://stackoverflow.com/questions/76511182/tensorflow-custom-learning-rate-scheduler-gives-unexpected-eagertensor-type-erro

* --format was deprecated in ruff and replaced wtih --output-format

* Added a single entry point to train all models

* Added convenience wrapper for oemer

* Tried to figure out the definitions for the dense dataset and to document them in code

There is likely an official definition somewhere but I just couldn't find it. So I looked at example and tried to reconstruct the mapping. Unknown basically means that I just couldn't see the symbol on the picture.

* Decreased queue sizes as otherwise the training process crashed with an out of memory exception after it used up about 30GB of memory

* Added model outputs to git ignore

* Added checks for dataset folders

* Using default training params

* Added workarounds for removal of np.float

* Using dataset definitions

* Added type annotations

* Added a train_all_rests even if the resulting model is right now not used in oemer

* segnet and unet should now pick the correct model

* Changed label definitions from what appears to be used in oemer right now

* With this commit the resulting arch.json matches the one inside of oemer/ceckpoints/seg_net/arch.json

* Avoid that the OMR processes finishes prematurely (#53)

* Fixed typos in comments

* IndexError while scanning for a dot should not abort the whole process

* Bound check while getting the note label

* Added check if label is in the note_type_map

* Filter staffs instead of aborting with an exception

* Bound check during symbol extraction

* Marking notes as invalid instead of aborting with an exception

* Bound check

* Fixed type error

* Fixed TypeError at start of unet or segnet training (#52)

* Fixed 'TypeError: Cannot convert 4.999899999999999e-07 to EagerTensor of dtype int64' in training, fixes #39

https://stackoverflow.com/questions/76511182/tensorflow-custom-learning-rate-scheduler-gives-unexpected-eagertensor-type-erro

* --format was deprecated in ruff and replaced wtih --output-format

* HoughLinesP can return None if no lines are found

* Fixed error which happens if no rest bboxes were found

* Limited try/except block

* Fixed typo

* Use fixed versions for the linter dependencies to avoid that results are different for the same source code level on different test runs due to update of the dependencies

* Fixed type errors which came up with the recent version of cv2

* Going back to the newest version of ruff and mypy as the type errors were introduced by cv2

* Fix install from github command in README

---------

Co-authored-by: Yoyo <[email protected]>
  • Loading branch information
liebharc and BreezeWhite authored Feb 17, 2024
1 parent 681d5df commit 072b69e
Show file tree
Hide file tree
Showing 10 changed files with 376 additions and 124 deletions.
21 changes: 20 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,23 @@ checkpoints/

*.musicxml
*.mp3
*.swp
*.swp

# Model training datasets
/ds2_dense
/CvcMuscima-Distortions

# Model training checkpoints and outputs
/seg_unet
/test_data
/train_data
/*.model
/*.h5
/*.json

/segnet_*
/unet_*
/rests_*
/all_rests_*
/sfn_*
/clef_*
3 changes: 3 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from oemer import ete

ete.main()
23 changes: 10 additions & 13 deletions oemer/build_label.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import sys
import os
import random
from PIL import Image

import cv2
import numpy as np

from .constant_min import CLASS_CHANNEL_MAP
from .constant_min import CLASS_CHANNEL_MAP, CHANNEL_NUM
from .dense_dataset_definitions import DENSE_DATASET_DEFINITIONS as DEF


HALF_WHOLE_NOTE = [39, 41, 42, 43, 45, 46, 47, 49]
HALF_WHOLE_NOTE = DEF.NOTEHEADS_HOLLOW + DEF.NOTEHEADS_WHOLE + [42]


def fill_hole(gt, tar_color):
Expand Down Expand Up @@ -75,12 +77,12 @@ def build_label(seg_path):
color_set = set(np.unique(arr))
color_set.remove(0) # Remove background color from the candidates

total_chs = len(set(CLASS_CHANNEL_MAP.values())) + 2 # Plus 'background' and 'others' channel.
total_chs = CHANNEL_NUM
output = np.zeros(arr.shape + (total_chs,))

output[..., 0] = np.where(arr==0, 1, 0)
for color in color_set:
ch = CLASS_CHANNEL_MAP.get(color, -1)
ch = CLASS_CHANNEL_MAP.get(color, 0)
if (ch != 0) and color in HALF_WHOLE_NOTE:
note = fill_hole(arr, color)
output[..., ch] += note
Expand All @@ -101,12 +103,7 @@ def find_example(dataset_path: str, color: int, max_count=100, mark_value=200):


if __name__ == "__main__":
seg_folder = '/media/kohara/ADATA HV620S/dataset/ds2_dense/segmentation'
files = os.listdir(seg_folder)
path = os.path.join(seg_folder, random.choice(files))
#out = build_label(path)

color = 45
arr = find_example(color) # type: ignore
arr = np.where(arr==200, color, arr)
out = fill_hole(arr, color)
seg_folder = 'ds2_dense/segmentation'
color = int(sys.argv[1])
with_background, without_background = find_example(seg_folder, color)
cv2.imwrite("example.png", with_background)
52 changes: 51 additions & 1 deletion oemer/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def _collect(color, out_path, samples=100):
img = imaugs.resize(Image.fromarray(patch.astype(np.uint8)), width=tar_w, height=tar_h)

seed = random.randint(0, 1000)
np.float = float # Monkey patch to workaround removal of np.float
img = imaugs.perspective_transform(img, seed=seed, sigma=3)
img = np.where(np.array(img)>0, 255, 0)
Image.fromarray(img.astype(np.uint8)).save(out_path / f"{idx}.png")
Expand Down Expand Up @@ -118,10 +119,12 @@ def train(folders):
model.fit(train_x, train_y)
return model, class_map

def build_class_map(folders):
return {idx: Path(ff).name for idx, ff in enumerate(folders)}

def train_tf(folders):
import tensorflow as tf
class_map = {idx: Path(ff).name for idx, ff in enumerate(folders)}
class_map = build_class_map(folders)
train_x = []
train_y = []
samples = None
Expand Down Expand Up @@ -234,6 +237,53 @@ def predict(region, model_name):
pred = model.predict(np.array(region).reshape(1, -1))
return m_info['class_map'][pred[0]]

def train_rests_above8(filename = "rests_above8.model"):
folders = ["rest_8th", "rest_16th", "rest_32nd", "rest_64th"]
model, class_map = train_tf([f"train_data/{folder}" for folder in folders])
test_tf(model, [f"test_data/{folder}" for folder in folders])
output = {'model': model, 'w': TARGET_WIDTH, 'h': TARGET_HEIGHT, 'class_map': class_map}
pickle.dump(output, open(filename, "wb"))


def train_rests(filename = "rests.model"):
folders = ["rest_whole", "rest_quarter", "rest_8th"]
model, class_map = train_tf([f"train_data/{folder}" for folder in folders])
test_tf(model, [f"test_data/{folder}" for folder in folders])
output = {'model': model, 'w': TARGET_WIDTH, 'h': TARGET_HEIGHT, 'class_map': class_map}
pickle.dump(output, open(filename, "wb"))


def train_all_rests(filename = "all_rests.model"):
folders = ["rest_whole", "rest_quarter", "rest_8th", "rest_16th", "rest_32nd", "rest_64th"]
model, class_map = train_tf([f"train_data/{folder}" for folder in folders])
test_tf(model, [f"test_data/{folder}" for folder in folders])
output = {'model': model, 'w': TARGET_WIDTH, 'h': TARGET_HEIGHT, 'class_map': class_map}
pickle.dump(output, open(filename, "wb"))


def train_sfn(filename = "sfn.model"):
folders = ["sharp", "flat", "natural"]
model, class_map = train_tf([f"train_data/{folder}" for folder in folders])
test_tf(model, [f"test_data/{folder}" for folder in folders])
output = {'model': model, 'w': TARGET_WIDTH, 'h': TARGET_HEIGHT, 'class_map': class_map}
pickle.dump(output, open(filename, "wb"))


def train_clefs(filename = "clef.model"):
folders = ["gclef", "fclef"]
model, class_map = train_tf([f"train_data/{folder}" for folder in folders])
test_tf(model, [f"test_data/{folder}" for folder in folders])
output = {'model': model, 'w': TARGET_WIDTH, 'h': TARGET_HEIGHT, 'class_map': class_map}
pickle.dump(output, open(filename, "wb"))


def train_noteheads():
folders = ["notehead_solid", "notehead_hollow"]
model, class_map = train_tf([f"train_data/{folder}" for folder in folders])
test_tf(model, [f"test_data/{folder}" for folder in folders])
output = {'model': model, 'w': TARGET_WIDTH, 'h': TARGET_HEIGHT, 'class_map': class_map}
pickle.dump(output, open(f"notehead.model", "wb"))


if __name__ == "__main__":
samples = 400
Expand Down
30 changes: 16 additions & 14 deletions oemer/constant.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
from enum import Enum, auto

from oemer.dense_dataset_definitions import DENSE_DATASET_DEFINITIONS as DEF


CLASS_CHANNEL_LIST = [
[165, 2], # staff, ledgerLine
[35, 37, 38], # noteheadBlack
[39, 41, 42], # noteheadHalf
[43, 45, 46, 47, 49], # noteheadWhole
[64, 58, 59, 60, 66, 63, 69, 68, 61, 62, 67, 65], # flags
[146, 51], # beam, augmentationDot
[3, 52], # barline, stem
[74, 70, 72, 76], # accidentalSharp, accidentalFlat, accidentalNatural, accidentalDoubleSharp
[80, 78, 79], # keySharp, keyFlat, keyNatural
[97, 100, 99, 98, 101, 102, 103, 104, 96, 163], # rests
[136, 156, 137, 155, 152, 151, 153, 154, 149, 155], # tuplets
[145, 147], # slur, tie
[10, 13, 12, 19, 11, 20], # clefs
[25, 24, 29, 22, 23, 28, 27, 34, 30, 21, 33, 26], # timeSigs
DEF.STAFF + DEF.LEDGERLINE,
DEF.NOTEHEADS_SOLID + [38],
DEF.NOTEHEADS_HOLLOW + [42],
DEF.NOTEHEADS_WHOLE + [46],
DEF.FLAG_DOWN + DEF.FLAG_UP + [59, 65],
DEF.BEAM + DEF.DOT,
DEF.BARLINE_BETWEEN + DEF.STEM,
DEF.ALL_ACCIDENTALS,
DEF.ALL_KEYS,
DEF.ALL_RESTS + [163],
DEF.TUPETS,
DEF.SLUR_AND_TIE,
DEF.ALL_CLEFS + DEF.NUMBERS,
DEF.TIME_SIGNATURE_SUBSET
]

CLASS_CHANNEL_MAP = {
Expand Down
17 changes: 7 additions & 10 deletions oemer/constant_min.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from oemer.dense_dataset_definitions import DENSE_DATASET_DEFINITIONS as DEF


CLASS_CHANNEL_LIST = [
[165, 2], # staff, ledgerLine
[35, 37, 38, 39, 41, 42, 43, 45, 46, 47, 49, 52], # notehead, stem
[
64, 58, 60, 66, 63, 69, 68, 61, 62, 67, 65, 59, 146, # flags, beam
97, 100, 99, 98, 101, 102, 103, 104, 96, 163, # rests
80, 78, 79, 74, 70, 72, 76, 3, # sharp, flat, natural, barline
10, 13, 12, 19, 11, 20, 51, # clefs, augmentationDot,
25, 24, 29, 22, 23, 28, 27, 34, 30, 21, 33, 26, # timeSigs
]
DEF.STEM + DEF.ALL_RESTS_EXCEPT_LARGE + DEF.BARLINE_BETWEEN + DEF.BARLINE_END,
DEF.NOTEHEADS_ALL,
DEF.ALL_CLEFS + DEF.ALL_KEYS + DEF.ALL_ACCIDENTALS,
]

CLASS_CHANNEL_MAP = {
Expand All @@ -16,4 +13,4 @@
for color in colors
}

CHANNEL_NUM = len(CLASS_CHANNEL_LIST) + 2
CHANNEL_NUM = len(CLASS_CHANNEL_LIST) + 1 # Plus 'background' and 'others' channel.
78 changes: 78 additions & 0 deletions oemer/dense_dataset_definitions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
class Symbols:
BACKGROUND = [0]
LEDGERLINE = [2]
BARLINE_BETWEEN = [3]
BARLINE_END = [4]
ALL_BARLINES = BARLINE_BETWEEN + BARLINE_END
REPEAT_DOTS = [7]
G_GLEF = [10]
C_CLEF = [11, 12]
F_CLEF = [13]
ALL_CLEFS = G_GLEF + C_CLEF + F_CLEF
NUMBERS = [19, 20]
TIME_SIGNATURE_SUBSET = [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34]
TIME_SIGNATURE = TIME_SIGNATURE_SUBSET + [31, 32] # Oemer hasn't used these in the past
NOTEHEAD_FULL_ON_LINE = [35]
UNKNOWN = [36, 38, 40, 128, 143, 144, 148, 150, 157, 159, 160, 161, 162, 163, 164, 167, 170, 171]
NOTEHEAD_FULL_BETWEEN_LINES = [37]
NOTEHEAD_HOLLOW_ON_LINE = [39]
NOTEHEAD_HOLLOW_BETWEEN_LINE = [41]
WHOLE_NOTE_ON_LINE = [43]
WHOLE_NOTE_BETWEEN_LINE = [45]
DOUBLE_WHOLE_NOTE_ON_LINE = [47]
DOUBLE_WHOLE_NOTE_BETWEEN_LINE = [49]
NOTEHEADS_SOLID = NOTEHEAD_FULL_ON_LINE + NOTEHEAD_FULL_BETWEEN_LINES
NOTEHEADS_HOLLOW = NOTEHEAD_HOLLOW_ON_LINE + NOTEHEAD_HOLLOW_BETWEEN_LINE
NOTEHEADS_WHOLE = WHOLE_NOTE_ON_LINE + WHOLE_NOTE_BETWEEN_LINE + DOUBLE_WHOLE_NOTE_ON_LINE + DOUBLE_WHOLE_NOTE_BETWEEN_LINE
NOTEHEADS_ALL = NOTEHEAD_FULL_ON_LINE + NOTEHEAD_FULL_BETWEEN_LINES + NOTEHEAD_HOLLOW_ON_LINE + NOTEHEAD_HOLLOW_BETWEEN_LINE + WHOLE_NOTE_ON_LINE + WHOLE_NOTE_BETWEEN_LINE + DOUBLE_WHOLE_NOTE_ON_LINE + DOUBLE_WHOLE_NOTE_BETWEEN_LINE
DOT = [51]
STEM = [52]
TREMOLO = [53, 54, 55, 56]
FLAG_DOWN = [58, 60, 61, 62, 63]
FLAG_UP = [64, 66, 67, 68, 69]
FLAT = [70]
NATURAL = [72]
SHARP = [74]
DOUBLE_SHARP = [76]
ALL_ACCIDENTALS = FLAT + NATURAL + SHARP + DOUBLE_SHARP
KEY_FLAT = [78]
KEY_NATURAL = [79]
KEY_SHARP = [80]
ALL_KEYS = KEY_FLAT + KEY_NATURAL + KEY_SHARP
ACCENT_ABOVE = [81]
ACCENT_BELOW = [82]
STACCATO_ABOVE = [83]
STACCATO_BELOW = [84]
TENUTO_ABOVE = [85]
TENUTO_BELOW = [86]
STACCATISSIMO_ABOVE = [87]
STACCATISSIMO_BELOW = [88]
MARCATO_ABOVE = [89]
MARCATO_BELOW = [90]
FERMATA_ABOVE = [91]
FERMATA_BELOW = [92]
BREATH_MARK = [93]
REST_LARGE = [95]
REST_LONG = [96]
REST_BREVE = [97]
REST_FULL = [98]
REST_QUARTER = [99]
REST_EIGHTH = [100]
REST_SIXTEENTH = [101]
REST_THIRTY_SECOND = [102]
REST_SIXTY_FOURTH = [103]
REST_ONE_HUNDRED_TWENTY_EIGHTH = [104]
ALL_RESTS_EXCEPT_LARGE = REST_LONG + REST_BREVE + REST_FULL + REST_QUARTER + REST_EIGHTH + REST_SIXTEENTH + REST_THIRTY_SECOND + REST_SIXTY_FOURTH + REST_ONE_HUNDRED_TWENTY_EIGHTH
ALL_RESTS = ALL_RESTS_EXCEPT_LARGE
TRILL = [127]
GRUPPETO = [129]
MORDENT = [130]
DOWN_BOW = [131]
UP_BOW = [132]
SYMBOL = [133, 134, 135, 138, 139, 141, 142]
TUPETS = [136, 137, 149, 151, 152, 153, 154, 155, 156]
SLUR_AND_TIE = [145, 147]
BEAM = [146]
STAFF = [165]

DENSE_DATASET_DEFINITIONS = Symbols()
46 changes: 29 additions & 17 deletions oemer/models/unet.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,19 @@ def my_conv_block(inp, kernels, kernel_size=(3, 3), strides=(1, 1)):
return out


def my_conv_small_block(inp, kernels, kernel_size=(3, 3), strides=(1, 1)):
inp = L.Conv2D(kernels, kernel_size, strides=strides, padding='same', dtype=tf.float32)(inp)
out = L.Activation("relu")(L.LayerNormalization()(inp))
out = L.Dropout(0.3)(out)
out = L.Add()([inp, out])
out = L.Activation("relu")(L.LayerNormalization()(out))
return out


def my_trans_conv_block(inp, kernels, kernel_size=(3, 3), strides=(1, 1)):
inp = L.Conv2DTranspose(kernels, kernel_size, strides=strides, padding='same', dtype=tf.float32)(inp)
out = L.Activation("relu")(L.LayerNormalization()(inp))
out = L.Conv2D(kernels, kernel_size, padding='same', dtype=tf.float32)(out)
#out = L.Activation("relu")(L.LayerNormalization()(inp))
out = L.Conv2D(kernels, kernel_size, padding='same', dtype=tf.float32)(inp)
out = L.Activation("relu")(L.LayerNormalization()(out))
out = L.Dropout(0.3)(out)
out = L.Add()([inp, out])
Expand All @@ -157,25 +166,25 @@ def u_net(win_size=288, out_class=3):
inp = L.Input(shape=(win_size, win_size, 3))
tensor = L.SeparableConv2D(128, (3, 3), activation="relu", padding='same')(inp)

l1 = my_conv_block(tensor, 64, (3, 3), strides=(2, 2)) # 128
l1 = my_conv_block(l1, 128, (3, 3))
l1 = my_conv_block(l1, 128, (3, 3))
l1 = my_conv_small_block(tensor, 64, (3, 3), strides=(2, 2))
l1 = my_conv_small_block(l1, 64, (3, 3))
l1 = my_conv_small_block(l1, 64, (3, 3))

skip = my_conv_block(l1, 128, (3, 3), strides=(2, 2)) # 64
l2 = my_conv_block(skip, 128, (3, 3))
l2 = my_conv_block(l2, 128, (3, 3))
l2 = my_conv_block(l2, 128, (3, 3))
l2 = my_conv_block(l2, 128, (3, 3))
skip = my_conv_small_block(l1, 128, (3, 3), strides=(2, 2))
l2 = my_conv_small_block(skip, 128, (3, 3))
l2 = my_conv_small_block(l2, 128, (3, 3))
l2 = my_conv_small_block(l2, 128, (3, 3))
l2 = my_conv_small_block(l2, 128, (3, 3))
l2 = L.Concatenate()([skip, l2])

l3 = my_conv_block(l2, 256, (3, 3))
l3 = my_conv_block(l3, 256, (3, 3))
l3 = my_conv_block(l3, 256, (3, 3))
l3 = my_conv_block(l3, 256, (3, 3))
l3 = my_conv_block(l3, 256, (3, 3))
l3 = my_conv_small_block(l2, 256, (3, 3))
l3 = my_conv_small_block(l3, 256, (3, 3))
l3 = my_conv_small_block(l3, 256, (3, 3))
l3 = my_conv_small_block(l3, 256, (3, 3))
l3 = my_conv_small_block(l3, 256, (3, 3))
l3 = L.Concatenate()([l2, l3])

bot = my_conv_block(l3, 256, (3, 3), strides=(2, 2)) # 32
bot = my_conv_small_block(l3, 256, (3, 3), strides=(2, 2)) # 32
st1 = L.SeparableConv2D(256, (3, 3), padding='same', dtype=tf.float32)(bot)
st1 = L.Activation("relu")(L.LayerNormalization()(st1))
st2 = L.SeparableConv2D(256, (3, 3), dilation_rate=(2, 2), padding='same', dtype=tf.float32)(bot)
Expand All @@ -189,20 +198,23 @@ def u_net(win_size=288, out_class=3):
norm = L.Activation("relu")(L.LayerNormalization()(st))
bot = my_trans_conv_block(norm, 256, (3, 3), strides=(2, 2)) # 64

tl3 = L.Conv2D(256, (3, 3), padding='same', dtype=tf.float32)(bot)
tl3 = L.Conv2D(128, (3, 3), padding='same', dtype=tf.float32)(bot)
tl3 = L.Activation("relu")(L.LayerNormalization()(tl3))
tl3 = L.Concatenate()([tl3, l3])
tl3 = my_conv_small_block(tl3, 128, (3, 3))
tl3 = my_trans_conv_block(tl3, 128, (3, 3))

# Head 1
tl2 = L.Conv2D(128, (3, 3), padding='same', dtype=tf.float32)(tl3)
tl2 = L.Activation("relu")(L.LayerNormalization()(tl2))
tl2 = L.Concatenate()([tl2, l2])
tl2 = my_conv_small_block(tl2, 128, (3, 3))
tl2 = my_trans_conv_block(tl2, 128, (3, 3), strides=(2, 2)) # 128

tl1 = L.Conv2D(128, (3, 3), padding='same', dtype=tf.float32)(tl2)
tl1 = L.Activation("relu")(L.LayerNormalization()(tl1))
tl1 = L.Concatenate()([tl1, l1])
tl1 = my_conv_small_block(tl1, 128, (3, 3))
tl1 = my_trans_conv_block(tl1, 128, (3, 3), strides=(2, 2)) # 256

out1 = L.Conv2D(out_class, (1, 1), activation='softmax', padding='same', dtype=tf.float32)(tl1)
Expand Down
Loading

0 comments on commit 072b69e

Please sign in to comment.