Skip to content

Commit

Permalink
Update tests workflow, drop support of GpyOpt, tests for smac does no…
Browse files Browse the repository at this point in the history
…t run
  • Loading branch information
noscode committed Nov 5, 2024
1 parent 1168503 commit 123cf03
Show file tree
Hide file tree
Showing 49 changed files with 12,818 additions and 442 deletions.
131 changes: 34 additions & 97 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-20.04, windows-2019, macos-latest]
python-version: [3.6, 3.8]
python-version: ["3.8", "3.10"]
include:
- os: windows-2019
triplet: x64-windows
env:
CONDA_ENV_NAME: gadma_env
defaults:
run:
shell: bash -el {0}

steps:
- name: Cancel previous runs.
Expand All @@ -31,120 +34,54 @@ jobs:
- name: Checkout.
uses: actions/checkout@v2

- name: Set up Python 3.
uses: actions/setup-python@v2
- name: Set up Conda environment.
uses: conda-incubator/setup-miniconda@v3
continue-on-error: true
with:
auto-update-conda: true
python-version: ${{ matrix.python-version }}

- name: Install minimal requirements before everything else
channels: conda-forge,bioconda
activate-environment: ${{ env.CONDA_ENV_NAME }}

- name: Install dadi and scikit-allel using conda.
run: |
pip3 install -r requirements/minimal.txt
# - name: Install conda (Windows)
# if: runner.os == 'Windows' && matrix.python-version == '3.8'
# uses: conda-incubator/setup-miniconda@v2
# with:
# python-version: ${{ matrix.python }}
# channels: conda-forge, r, bioconda
# #channel-priority: strict
# auto-update-conda: true
# use-only-tar-bz2: true
#
# # For future, to check msprime versions: conda search msprime --info
# - name: Install msprime via conda (Windows python3.8)
# if: runner.os == 'Windows' && matrix.python-version == '3.8'
# shell: bash -l {0} #We need a login shell to get conda
# run: |
# # We install msprime from conda to avoid GSL-related build issues
# # when pip installing without binary wheels (on Windows).
# # We install a special version of msprime 0.7.4=py36
# #conda activate ${{ env.CONDA_ENV_NAME }}
# conda create -n ${{ env.CONDA_ENV_NAME }} msprime python=${{ matrix.python-version }}

- name: Install hdf5, msprime and momi2 (MacOS).
conda install dadi scikit-allel
- name: Install dependencies for installation of moments and ignore numpy version in pyproject.toml for MacOS (python3.8).
run: |
brew install gsl homebrew/core/hdf5
pip3 install msprime
brew install libomp llvm
export LDFLAGS="-L/usr/local/opt/llvm/lib"
export CPPFLAGS="-I/usr/local/opt/llvm/include"
CC=$(brew --prefix llvm)/bin/clang pip install momi
if: matrix.os == 'macos-10.15'

# - name: Install GADMA and its dependencies (Windows python3.8).
# if: runner.os == 'Windows' && matrix.python-version == '3.8'
# shell: bash -l {0}
# run: |
# conda activate ${{ env.CONDA_ENV_NAME }}
# bash install
# pip3 install -r requirements/bayes_opt.txt
pip install Cython
pip install "setuptools_scm>=8"
pip install --no-build-isolation moments-popgen
if: runner.os == 'MacOS' && matrix.python-version == '3.8'

- name: Install GADMA and its dependencies.
run: |
bash install
pip3 install Cython==0.29.18 # otherwise ConfigsSpace cause error for macOS
# TypeError: Expected float, got numpy.float64 when using Float
pip3 install -r requirements/bayes_opt.txt
pip3 install -r requirements/bayes_opt_addit.txt
- name: Uninstall SMAC (Windows).
if: runner.os == 'Windows'
shell: bash -l {0}
run: |
pip3 uninstall -y smac
- name: Uninstall SMAC (MacOS py3.6)
if: runner.os == 'MacOS' && matrix.python-version == '3.6'
run: pip3 uninstall -y smac

- name: Install momentsLD engine.
pip install .
- name: Use old matplotlib for tests to enable moments pictures.
run: |
pip3 uninstall -y moments
pip3 install Cython==0.29.18 # the last version cause erros on Windows and MacOS
pip3 install setuptools==58.2.0 # to install with setup.py
git clone --depth 1 --branch moments-1.1.11 https://bitbucket.org/simongravel/moments.git
cd moments
pip install -r requirements.txt
python3 setup.py build_ext --inplace
pip3 install .
cd ..
# For some reason the last version of wheel cause an error for momi and dadi installation
# The error is the following: ModuleNotFoundError: No module named 'numpy'
- name: Use specific version of wheel for momi and dadi installation (Linux)
if: matrix.python-version == '3.8'
run: |
python3 -m pip install wheel==0.38.4
- name: Install dadi==2.2.0 for Python3.8
if: matrix.python-version == '3.8'
run: |
pip3 install dadi==2.2.0
- name: Install momi engine (Linux).
if: runner.os == 'Linux'
run: |
python3 -m pip install momi
conda install "matplotlib<3.5"
- name: Check installations of packages.
run: |
python3 -c "import numpy"
python3 -c "import scipy"
python3 -c "import dadi"
python3 -c "import moments"
python3 -c "import gadma"
python -c "import numpy"
python -c "import scipy"
python -c "import dadi"
python -c "import moments"
python -c "import gadma"
- name: Show versions of all installed packages.
run: python3 -m pip freeze
run: |
conda list
- name: Show available engines and optimizations in gadma.
run: |
python3 -c "import gadma;print(gadma.engines.engine._registered_engines.keys())"
python3 -c "import gadma;print(gadma.optimizers.global_optimizer._registered_global_optimizers.keys())"
python -c "import gadma;print(gadma.engines.engine._registered_engines.keys())"
python -c "import gadma;print(gadma.optimizers.global_optimizer._registered_global_optimizers.keys())"
- name: Install dependencies for tests.
run: |
pip3 install -r requirements/tests.txt
pip install -r requirements/tests.txt
- name: Run tests and codecov.
run: |
Expand All @@ -156,5 +93,5 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
flags: unittests
if: runner.os == 'Linux' && matrix.python-version == '3.6'
if: runner.os == 'Linux' && matrix.python-version == '3.8'

4 changes: 2 additions & 2 deletions gadma/code_generator/moments_ld_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def _print_momentsLD_load_data(engine, data_holder):

ret_str += "if preprocessed_data is not None:\n"
ret_str += " with open(preprocessed_data, \"rb\") as fin:\n"
ret_str += " region_stats = pickle.load(fin)\n"
ret_str += " region_stats, data = pickle.load(fin)\n"
ret_str += "else:\n"
ret_str += f" for bed_file in sorted(os.listdir(bed_files)):\n"
ret_str += " chrom = bed_file.split('_')[-2]\n"
Expand Down Expand Up @@ -266,7 +266,7 @@ def _print_momentsLD_load_data(engine, data_holder):
ret_str += " )\n"
ret_str += " })\n"
ret_str += " reg_num += 1\n"
ret_str += "data = moments.LD.Parsing.bootstrap_data(region_stats)\n\n"
ret_str += " data = moments.LD.Parsing.bootstrap_data(region_stats)\n\n"
return ret_str


Expand Down
2 changes: 1 addition & 1 deletion gadma/data/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,6 @@ def create_recombination_maps_from_rate(
for chrom in chrom2len:
rec_dist = chrom2len[chrom]*recombination_rate*100
with open(os.path.join(output_dir, f"auto_map_{chrom}.txt"), 'w') as f:
f.write("Pos\tMap(cM)\n")
f.write(f"Pos\t{chrom}\n")
f.write("0\t0\n")
f.write(f"{chrom2len[chrom]}\t{rec_dist}\n")
6 changes: 5 additions & 1 deletion gadma/engines/dadi_moments_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,11 +619,15 @@ def _read_fsc_data(module, data_holder):
if not data_holder.outgroup:
mask = np.arange(ndim)
mask = np.where(mask > ndim / 2, True, False)

data = module.Spectrum(total / n_observations,
pop_ids=data_holder.population_labels,
data_folded=not data_holder.outgroup,
mask=mask)
# we should mask first and last elements in data
# moments is failing to do so
# equavalent to mask_corners() function
data.mask[tuple(np.array([0 for _ in data.mask.shape]))] = True
data.mask[tuple(np.array(data.mask.shape) - 1)] = True
if data_holder.projections:
data = data.project(data_holder.projections)

Expand Down
11 changes: 5 additions & 6 deletions gadma/engines/moments_ld_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def _read_data_one_job(args):
Function for reading data using multiprocessing
"""
reg_num, kwargs = args
print(reg_num)
results = {
str(reg_num):
moments.LD.Parsing.compute_ld_statistics(
Expand Down Expand Up @@ -157,7 +156,7 @@ def _get_region_stats(cls, data_holder):
data_holder.recombination_maps,
f"{prefix}_{chrom}.{extension}"
)
parsing_kwargs["map_name"] = chrom
parsing_kwargs["map_name"] = chrom
# Check for r_bins
if parsing_kwargs["r_bins"] is None:
parsing_kwargs["r_bins"] = cls.r_bins
Expand All @@ -173,7 +172,6 @@ def _get_region_stats(cls, data_holder):
all_kwargs.append([str(reg_num-1), parsing_kwargs])

create_h5_file(data_holder.filename)
print(len(all_kwargs))
n_processes = cls.n_processes
if n_processes == 1:
result = []
Expand All @@ -197,12 +195,13 @@ def _read_data(cls, data_holder):
# If we have no preprocessed data then we create it
if data_holder.preprocessed_data is None:
region_stats = cls._get_region_stats(data_holder)
data = moments.LD.Parsing.bootstrap_data(region_stats)
# remove created h5 file
os.remove(os.path.splitext(data_holder.filename)[0] + ".h5")
else:
print("Read preprocessed data")
with open(data_holder.preprocessed_data, "rb") as fin:
region_stats = pickle.load(fin)

data = moments.LD.Parsing.bootstrap_data(region_stats)
_, data = pickle.load(fin)
return data

@staticmethod
Expand Down
73 changes: 36 additions & 37 deletions gadma/get_confidence_intervals_for_ld.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,19 @@ def main():
hasattr(module, "model_func"),
hasattr(module, "rs"),
hasattr(module, "opt_params"),
hasattr(module, "rep_data_file"),
hasattr(module, "data_file"),
hasattr(module, "param_names")
]):
model_func = getattr(module, "model_func")
rs = getattr(module, "rs")
opt_params = getattr(module, "opt_params")
rep_data_file = getattr(module, "rep_data_file")
data_file = getattr(module, "data_file")
param_names = getattr(module, "param_names")
else:
raise ValueError("Data for CI evaluation is not valid! Check it!")

with open(rep_data_file, "rb") as file:
region_stats = pickle.load(file)
data = moments.LD.Parsing.bootstrap_data(region_stats)
with open(data_file, "rb") as file:
region_stats, data = pickle.load(file)

uncerts_fim = moments.LD.Godambe.FIM_uncert(
model_func,
Expand All @@ -65,40 +64,40 @@ def main():
lower_fim = opt_params - 1.96 * uncerts_fim
upper_fim = opt_params + 1.96 * uncerts_fim

num_boots = 100
norm_idx = 0
bootstrap_sets = moments.LD.Parsing.get_bootstrap_sets(
region_stats, num_bootstraps=num_boots, normalization=norm_idx)

uncerts_gim = moments.LD.Godambe.GIM_uncert(
model_func,
bootstrap_sets,
opt_params,
data["means"],
data["varcovs"],
r_edges=rs,
)

lower_gim = opt_params - 1.96 * uncerts_gim
upper_gim = opt_params + 1.96 * uncerts_gim
# num_boots = 100
# norm_idx = 0
# bootstrap_sets = moments.LD.Parsing.get_bootstrap_sets(
# region_stats, num_bootstraps=num_boots, normalization=norm_idx)
#
# uncerts_gim = moments.LD.Godambe.GIM_uncert(
# model_func,
# bootstrap_sets,
# opt_params,
# data["means"],
# data["varcovs"],
# r_edges=rs,
# )
#
# lower_gim = opt_params - 1.96 * uncerts_gim
# upper_gim = opt_params + 1.96 * uncerts_gim

lower_fim_phys_units = copy.deepcopy(lower_fim)
upper_fim_phys_units = copy.deepcopy(upper_fim)
lower_gim_phys_units = copy.deepcopy(lower_gim)
upper_gim_phys_units = copy.deepcopy(upper_gim)
# lower_gim_phys_units = copy.deepcopy(lower_gim)
# upper_gim_phys_units = copy.deepcopy(upper_gim)

phys_units_boundaries_list = [
lower_fim_phys_units,
upper_fim_phys_units,
lower_gim_phys_units,
upper_gim_phys_units
# lower_gim_phys_units,
# upper_gim_phys_units
]

gen_units_boundaries_list = [
lower_fim,
upper_fim,
lower_gim,
upper_gim
# lower_gim,
# upper_gim
]

for bound in gen_units_boundaries_list:
Expand All @@ -122,26 +121,26 @@ def main():
f"{lower_fim[num]} "
f"- {upper_fim[num]}" for num in range(len(param_names))
]
gim_bounds_list = [
f"{lower_gim[num]} "
f"- {upper_gim[num]}" for num in range(len(param_names))
]
# gim_bounds_list = [
# f"{lower_gim[num]} "
# f"- {upper_gim[num]}" for num in range(len(param_names))
# ]
fim_bounds_list_phys_units = [
f"{lower_fim_phys_units[num]} "
f"- {upper_fim_phys_units[num]}" for num in range(len(param_names))
]
gim_bounds_list_phys_units = [
f"{lower_gim_phys_units[num]} "
f"- {upper_gim_phys_units[num]}" for num in range(len(param_names))
]
# gim_bounds_list_phys_units = [
# f"{lower_gim_phys_units[num]} "
# f"- {upper_gim_phys_units[num]}" for num in range(len(param_names))
# ]

all_ci_data = {
"Param names": param_names,
"Opt params": opt_params,
"FIM": fim_bounds_list,
"GIM": gim_bounds_list,
# "GIM": gim_bounds_list,
"FIM phys units": fim_bounds_list_phys_units,
"GIM phys units": gim_bounds_list_phys_units
# "GIM phys units": gim_bounds_list_phys_units
}
results = os.path.join(os.path.dirname(filename), "ci_results.xlsx")
all_ci_dataframe = pd.DataFrame(data=all_ci_data)
Expand Down
2 changes: 1 addition & 1 deletion gadma/optimizers/smac_optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def __init__(self, api_config, config_space, parallel_setting="LS"):
prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rng),
)

cont_dims = np.array(np.where(np.array(types) == 0)[0], dtype=np.int)
cont_dims = np.array(np.where(np.array(types) == 0)[0], dtype=int)
cat_dims = np.where(np.array(types) != 0)[0]

if len(cont_dims) > 0:
Expand Down
Loading

0 comments on commit 123cf03

Please sign in to comment.