diff --git a/README.md b/README.md index 5bd7087..97047c6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # HEAR 2021 Baseline A simple DSP-based audio embedding consisting of a Mel-frequency spectrogram followed -by a random projection. Serves as the baseline model for the HEAR 2021 and implements +by a random projection. Serves as the naive baseline model for the HEAR 2021 and implements the [common API](https://neuralaudio.ai/hear2021-holistic-evaluation-of-audio-representations.html#common-api) required by the competition evaluation. @@ -26,6 +26,16 @@ git clone https://github.com/neuralaudio/hear-baseline.git python3 -m pip install ./hear-baseline ``` +### Naive Baseline Model +The naive baseline model produces log-scaled Mel-frequency spectrograms using a +256-band Mel filter. Each frame of the spectrogram is then projected to 4096 +dimensions using a random projection matrix. Weights for the projection matrix were +generated by sampling a normal distribution and are stored in this repository in the +file `saved_models/naive_baseline.pt`. + +Using a random projection is less efficient +than a CNN but is one of the simplest models to implement from a coding perspective. + ### Usage Audio embeddings can be computed using one of two methods: 1) @@ -38,7 +48,7 @@ import torch import hearbaseline # Load model with weights - located in the root directory of this repo -model = hearbaseline.load_model("./baseline_weights.pt") +model = hearbaseline.load_model("saved_models/naive_baseline.pt") # Create a batch of 2 white noise clips that are 2-seconds long # and compute scene embeddings for each clip diff --git a/hearbaseline/__init__.py b/hearbaseline/__init__.py index e16518e..da5ad5d 100644 --- a/hearbaseline/__init__.py +++ b/hearbaseline/__init__.py @@ -1 +1 @@ -from .baseline import load_model, get_scene_embeddings, get_timestamp_embeddings +from .naive import load_model, get_scene_embeddings, get_timestamp_embeddings diff --git a/hearbaseline/baseline.py b/hearbaseline/naive.py similarity index 100% rename from hearbaseline/baseline.py rename to hearbaseline/naive.py diff --git a/baseline_weights.pt b/saved_models/naive_baseline.pt similarity index 100% rename from baseline_weights.pt rename to saved_models/naive_baseline.pt diff --git a/tests/test_baseline.py b/tests/test_baseline.py index 61b8be8..fac84c0 100644 --- a/tests/test_baseline.py +++ b/tests/test_baseline.py @@ -10,7 +10,7 @@ get_timestamp_embeddings, ) from hearbaseline.util import frame_audio -import hearbaseline.baseline as baseline +import hearbaseline.naive as baseline torch.backends.cudnn.deterministic = True