Skip to content

Commit

Permalink
add sidekit source code
Browse files Browse the repository at this point in the history
  • Loading branch information
pchampio committed Oct 8, 2021
1 parent 3bef4f5 commit 4e5d66f
Show file tree
Hide file tree
Showing 57 changed files with 21,337 additions and 24 deletions.
139 changes: 139 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,142 @@ kaldi
corpora
kaldifeat
*.egg-info/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/
43 changes: 19 additions & 24 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,28 @@ nj=$(nproc)

home=$PWD

# python/CONDA
# CUDA version
CUDAROOT=/usr/local/cuda
if [ "$(id -g --name)" == "lium" ]; then
CUDAROOT=/opt/cuda/10.2 # LIUM Cluster
echo "Using local \$CUDAROOT: $CUDAROOT"
fi

[ ! -d $CUDAROOT ] && echo "CUDAROOT: '$FILE' does not exist." && exit 1

cuda_version=$($CUDAROOT/bin/nvcc --version | grep "Cuda compilation tools" | cut -d" " -f5 | sed s/,//)
cuda_version_witout_dot=$(echo $cuda_version | xargs | sed 's/\.//')

# CONDA
conda_url=https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
conda_url=https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-x86_64.sh

# PYTORCH version
# PYTORCH
torch_version=1.8.2
torchvision_version=0.9.2
torchaudio_version=0.8.2
torch_wheels="https://download.pytorch.org/whl/lts/1.8/torch_lts.html"

# CUDA version
cuda_version=10.2

torch_wheels="https://download.pytorch.org/whl/lts/1.8/torch_lts.html"

venv_dir=$PWD/venv

Expand All @@ -35,38 +44,24 @@ if [ ! -f $mark ]; then
. $venv_dir/bin/activate

echo "Installing conda dependencies"
yes | conda install -c conda-forge sox || exit 1
yes | conda install -c conda-forge libflac || exit 1
yes | conda install -c conda-forge cudatoolkit=$cuda_version || exit 1
yes | conda install -c conda-forge sox
yes | conda install -c conda-forge libflac
touch $mark
fi
source $venv_dir/bin/activate

exit 0

# CUDA version
CUDAROOT=/usr/local/cuda
if [ "$(id -g --name)" == "lium" ]; then
CUDAROOT=/opt/cuda/10.2 # LIUM Cluster
echo "Using local \$CUDAROOT: $CUDAROOT"
fi
_cuda_version=$($CUDAROOT/bin/nvcc --version | grep "Cuda compilation tools" | cut -d" " -f5 | sed s/,//)
if [[ $cuda_version != $_cuda_version ]]; then
echo "CUDA env not properly setup! (installed cuda v$cuda_version != in path cuda v$_cuda_version)"
exit 1
fi
cuda_version_witout_dot=$(echo $cuda_version | xargs | sed 's/\.//')

export PATH=$CUDAROOT/bin:$PATH
export LD_LIBRARY_PATH=$CUDAROOT/lib64:$LD_LIBRARY_PATH
export CFLAGS="-I$CUDAROOT/include $CFLAGS"
export CUDA_HOME=$CUDAROOT
export CUDA_PATH=$CUDAROOT

echo "if [ \$(which python) != $venv_dir/bin/python ]; then source $venv_dir/bin/activate; fi; export CUDAROOT=$CUDAROOT; export LD_LIBRARY_PATH=$LD_LIBRARY_PATH;" > env.sh

mark=.done-pytorch
if [ ! -f $mark ]; then
echo " == Installing pytorch $torch_version for cuda $cuda_version =="
# pip3 install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
pip3 install torch==$torch_version+cu$cuda_version_witout_dot torchvision==$torchvision_version+cu$cuda_version_witout_dot torchaudio==$torchaudio_version -f $torch_wheels
cd $home
touch $mark
Expand Down
3 changes: 3 additions & 0 deletions sidekit/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.pyc
*.DS_Store
docs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
log
model
.ipynb_checkpoints
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Dataset description


# General options
data_path: /lium/scratch/larcher/
data_file_extension: .wav
dataset_csv: list/vox12.csv

# DEV4S smaller dataset:
# awk -F, 'x[$1]<10;{x[$1]++}' vox12.csv > vox12_DEV.csv
# dataset_csv: list/vox12_DEV.csv

# DEV4S smaller dataset (with only long utterances):
# (head -n 2 list/vox12_DEV.csv ; cat list/vox12_DEV.csv | sort -t , -k 5,5 -rn | head -n 7204) > list/vox12_DEV_long.csv
# tail -n +2 list/vox12_DEV.csv | cut -d , -f 1 | sort | uniq > /tmp/len
# tail -n +2 list/vox12_DEV_long.csv | cut -d, -f2- > /tmp/vox12_DEV_long_side.csv
# paste -d',' /tmp/len /tmp/vox12_DEV_long_side.csv > /tmp/vox12_DEV_long_wrongspkid.csv
# (head -n 2 list/vox12_DEV.csv; cat /tmp/vox12_DEV_long_wrongspkid.csv) > list/vox12_DEV_long.csv
# dataset_csv: list/vox12_DEV_long_sorted.csv

sample_rate: 16000

validation_ratio: 0.02
batch_size: 512

# Training set
train:
duration: 3.
chunk_per_segment: -1
overlap: 3.

sampler:
examples_per_speaker: 1
samples_per_speaker: 100
augmentation_replica: 1

transform_number: 1

transformation:
pipeline: add_reverb,add_noise,filtering,phone_filtering,codec
spec_aug: 0.5
temp_aug: 0.5

add_noise:
noise_db_csv: /lium/raid01_c/larcher/data/musan_split.csv
data_path: /lium/scratch/larcher/musan_split/

add_reverb:
rir_db_csv: list/reverb.csv
data_path: /lium/scratch/amehrish/Database/REVERB_UPDATED/REVERB/RIRS_NOISES/

# Validation set
valid:
duration: 3.

transformation:
pipeline:
spec_aug: 0.5
temp_aug: 0.5

add_noise:
noise_db_csv: /lium/raid01_c/larcher/data/musan_split.csv
data_path: /lium/scratch/larcher/musan_split/

# Test set
test:
idmap: /lium/raid01_c/larcher/data/vox1_test_cleaned_idmap.h5
ndx: /lium/raid01_c/larcher/data/vox1_test_cleaned_ndx.h5
key: /lium/raid01_c/larcher/data/vox1_test_cleaned_key.h5
data_path: /lium/scratch/larcher/voxceleb1/test/wav
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Dataset description


# General options
data_path: /lium/scratch/larcher/
data_file_extension: .wav
dataset_csv: list/voxceleb2_dev.csv
sample_rate: 16000


validation_ratio: 0.02
batch_size: 256


# Training set
train:
duration: 4
chunk_per_segment: -1
overlap: 3.

sampler:
examples_per_speaker: 1
samples_per_speaker: 100
augmentation_replica: 1

transform_number: 1

transformation:
pipeline: add_reverb,add_noise,filtering,codec
spec_aug: 0.5
temp_aug: 0.5

add_noise:
noise_db_csv: /lium/raid01_c/larcher/data/musan_split.csv
data_path: /lium/scratch/larcher/musan_split/

add_reverb:
rir_db_csv: /lium/home/amehrish/Database/REVERB_UPDATED/REVERB/Reverb.csv
data_path: /lium/home/amehrish/Database/REVERB_UPDATED/REVERB/RIRS_NOISES/

# Validation set
valid:
duration: 4.

transformation:
pipeline:
spec_aug: 0.5
temp_aug: 0.5

add_noise:
noise_db_csv: /lium/raid01_c/larcher/data/musan_split.csv
data_path: /lium/scratch/larcher/musan_split/

# Test set
test:
idmap: /lium/raid01_c/larcher/data/vox1_test_cleaned_idmap.h5
ndx: /lium/raid01_c/larcher/data/vox1_test_cleaned_ndx.h5
key: /lium/raid01_c/larcher/data/vox1_test_cleaned_key.h5
data_path: /lium/scratch/larcher/voxceleb1/test/wav
Loading

0 comments on commit 4e5d66f

Please sign in to comment.