diff --git a/.travis.yml b/.travis.yml index 26c838a..a371311 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,14 +34,7 @@ install: - export CXX=g++ script: - - conda config --add channels https://conda.binstar.org/omnia - - conda build devtools/conda-recipe - - conda install --yes --use-local ensembler-dev - # - source devtools/travis-ci/test.sh - - conda install --yes nose - - pushd .; cd / - - nosetests ensembler -v --exe -a modeller - - popd + - source devtools/travis-ci/test.sh after_success: - echo "after_success" diff --git a/devtools/conda-recipe/meta.yaml b/devtools/conda-recipe/meta.yaml index 0a84453..c6af25f 100644 --- a/devtools/conda-recipe/meta.yaml +++ b/devtools/conda-recipe/meta.yaml @@ -18,12 +18,12 @@ requirements: run: - python - - modeller + - modeller ==9.15 - mdtraj - msmbuilder - biopython - openmm - - pdbfixer + - pdbfixer ==1.2 # - numpy - lxml - pyyaml diff --git a/devtools/travis-ci/test.sh b/devtools/travis-ci/test.sh index 2730423..9dfc961 100644 --- a/devtools/travis-ci/test.sh +++ b/devtools/travis-ci/test.sh @@ -1,10 +1,16 @@ -# This runs unit tests +# This installs the program and runs unit tests +set -e +conda config --add channels https://conda.anaconda.org/omnia +conda build devtools/conda-recipe +conda install --yes --use-local ensembler-dev +conda install --yes nose +pushd .; cd / +nosetests ensembler -v --exe -a unit if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]; then - conda install --yes nose - pushd .; cd / nosetests ensembler -v --exe -a modeller - popd else echo "This is a pull request. Secure environment variables are not available, so will not attempt to run Modeller tests." fi + +popd \ No newline at end of file diff --git a/ensembler/cli_commands/package_models.py b/ensembler/cli_commands/package_models.py index e24285e..da3d85b 100644 --- a/ensembler/cli_commands/package_models.py +++ b/ensembler/cli_commands/package_models.py @@ -54,6 +54,7 @@ helpstring = '\n\n'.join([helpstring_header, '\n\n'.join(helpstring_unique_options), '\n\n'.join(helpstring_nonunique_options)]) docopt_helpstring = '\n\n'.join(helpstring_unique_options) + def dispatch(args): if args['--package_for']: package_for = args['--package_for'] diff --git a/ensembler/cli_commands/refine_implicit.py b/ensembler/cli_commands/refine_implicit.py index b34fb1e..87a9338 100644 --- a/ensembler/cli_commands/refine_implicit.py +++ b/ensembler/cli_commands/refine_implicit.py @@ -122,6 +122,6 @@ def dispatch(args): retry_failed_runs=args['--retry_failed_runs'], ff=args['--ff'], implicit_water_model=args['--water_model'], - verbose=args['--verbose'], + loglevel=loglevel, **api_params ) \ No newline at end of file diff --git a/ensembler/core.py b/ensembler/core.py index 3e11446..878a8ba 100644 --- a/ensembler/core.py +++ b/ensembler/core.py @@ -212,6 +212,11 @@ class ManualOverrides: """ Reads in user-defined override data from a YAML file named "manual-overrides.yaml" + Parameters + ---------- + manual_overrides_filepath: str + In normal use, this should not need to be set. Defaults to 'manual-overrides.yaml' + Example file contents --------------------- @@ -231,17 +236,27 @@ class ManualOverrides: - 4Q2A - 4CTB - 4QOX + refinement: + ph: 8.0 + custom_residue_variants: + DDR1_HUMAN_D0_PROTONATED: + # keyed by 0-based residue index + 35: ASH + + Or see `ensembler/tests/example_project/manual_overrides.yaml` for an example file. """ - def __init__(self): - import yaml - if os.path.exists(manual_overrides_filename): - with open(manual_overrides_filename, 'r') as manual_overrides_file: + def __init__(self, manual_overrides_filepath=None): + if not manual_overrides_filepath: + manual_overrides_filepath = manual_overrides_filename + if os.path.exists(manual_overrides_filepath): + with open(manual_overrides_filepath, 'r') as manual_overrides_file: manual_overrides_yaml = yaml.load(manual_overrides_file, Loader=YamlLoader) else: manual_overrides_yaml = {} self.target = TargetManualOverrides(manual_overrides_yaml) self.template = TemplateManualOverrides(manual_overrides_yaml) + self.refinement = RefinementManualOverrides(manual_overrides_yaml) class TargetManualOverrides: @@ -258,7 +273,7 @@ class TargetManualOverrides: """ def __init__(self, manual_overrides_yaml): target_dict = manual_overrides_yaml.get('target-selection') - if target_dict != None: + if target_dict is not None: self.domain_spans = target_dict.get('domain-spans') else: self.domain_spans = {} @@ -282,7 +297,7 @@ class TemplateManualOverrides: """ def __init__(self, manual_overrides_yaml): template_dict = manual_overrides_yaml.get('template-selection') - if template_dict != None: + if template_dict is not None: self.min_domain_len = template_dict.get('min-domain-len') self.max_domain_len = template_dict.get('max-domain-len') self.domain_spans = template_dict.get('domain-spans') @@ -294,6 +309,29 @@ def __init__(self, manual_overrides_yaml): self.skip_pdbs = [] +class RefinementManualOverrides: + """ + Parameters + ---------- + manual_overrides_yaml: dict + + Attributes + ---------- + ph: float or NoneType + custom_residue_variants_by_targetid: dict or NoneType + dict with structure {`targetid`: {residue_index: residue_name}, ...} where + e.g. {'DDR1_HUMAN_D0_PROTONATED': {35: 'ASH'}} + """ + def __init__(self, manual_overrides_yaml): + refinement_dict = manual_overrides_yaml.get('refinement') + if refinement_dict is not None: + self.ph = refinement_dict.get('ph') + self.custom_residue_variants_by_targetid = refinement_dict.get('custom_residue_variants') + else: + self.ph = None + self.custom_residue_variants_by_targetid = {} + + def gen_metadata_filename(ensembler_stage, metadata_file_index): for modelling_stage in ['build_models', 'cluster_models', 'refine_implicit_md', 'solvate_models', 'determine_nwaters', 'refine_explicit_md']: if ensembler_stage == modelling_stage: diff --git a/ensembler/initproject.py b/ensembler/initproject.py index c11080b..2b9cadb 100644 --- a/ensembler/initproject.py +++ b/ensembler/initproject.py @@ -3,20 +3,19 @@ import sys import os import re - +import shutil from lxml import etree import Bio.SeqUtils import Bio.SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord - import ensembler import ensembler.version import ensembler.targetexplorer import ensembler.uniprot import ensembler.pdb -from ensembler.utils import file_exists_and_not_empty -from ensembler.core import mpistate, logger +from ensembler.utils import file_exists_and_not_empty, get_installed_resource_filename +from ensembler.core import mpistate, logger, manual_overrides_filename class TemplateData: @@ -43,6 +42,7 @@ def __init__(self, project_toplevel_dir, run_main=True): @ensembler.utils.notify_when_done def _init_project(self): self._create_project_dirs() + self._write_manual_overrides_file() self._write_init_metadata() def _create_project_dirs(self): @@ -56,6 +56,16 @@ def _create_project_dirs(self): ensembler.utils.create_dir(os.path.join(self.project_toplevel_dir, ensembler.core.default_project_dirnames.templates_structures_resolved)) ensembler.utils.create_dir(os.path.join(self.project_toplevel_dir, ensembler.core.default_project_dirnames.templates_structures_modeled_loops)) + def _write_manual_overrides_file(self): + if not os.path.exists(manual_overrides_filename): + template_manual_overrides_filepath = get_installed_resource_filename( + os.path.join('resources', 'template-manual-overrides.yaml') + ) + manual_overrides_filepath = os.path.join( + self.project_toplevel_dir, manual_overrides_filename + ) + shutil.copy(template_manual_overrides_filepath, manual_overrides_filepath) + def _write_init_metadata(self): project_metadata = ensembler.core.ProjectMetadata(project_stage='init', project_toplevel_dir=self.project_toplevel_dir) init_metadata = self._gen_init_metadata() diff --git a/ensembler/modeling.py b/ensembler/modeling.py index c9c9f72..e3045d2 100644 --- a/ensembler/modeling.py +++ b/ensembler/modeling.py @@ -135,7 +135,7 @@ def pdbfix_template(template_full_seq, overwrite_structures=False): template_full_seq.id + '.pdb' ) fixer = pdbfixer.PDBFixer(filename=template_filepath) - chainid = fixer.structureChains[0].chain_id + chainid = next(fixer.structure.iter_chains()).chain_id seq_obj = simtk.openmm.app.internal.pdbstructure.Sequence(chainid) for r in template_full_seq.seq: resi3 = Bio.SeqUtils.seq3(r).upper() @@ -179,6 +179,8 @@ def pdbfix_template(template_full_seq, overwrite_structures=False): 'MPI rank %d pdbfixer error for template %s - see logfile' % (mpistate.rank, template_full_seq.id) ) + logger.debug(e) + logger.debug(trbk) def remove_missing_residues_at_termini(fixer, len_full_seq): diff --git a/ensembler/packaging.py b/ensembler/packaging.py index 93c11c9..ec57aa1 100644 --- a/ensembler/packaging.py +++ b/ensembler/packaging.py @@ -4,7 +4,7 @@ from ensembler.core import get_targets_and_templates, select_templates_by_seqid_cutoff from ensembler.utils import set_loglevel, read_file_contents_gz_or_not import simtk.unit as unit -import simtk.openmm as openmm +import simtk.openmm as mm import mdtraj fah_projects_dir = os.path.join(default_project_dirnames.packaged_models, 'fah-projects') @@ -15,9 +15,9 @@ def package_for_fah(process_only_these_targets=None, template_seqid_cutoff=None, nclones=1, archive=False, openmm_platform='Reference', - timestep=2.0 * unit.femtoseconds, - collision_rate=1.0 / unit.picosecond, temperature=300.0 * unit.kelvin, + collision_rate=1.0 / unit.picosecond, + timestep=2.0 * unit.femtoseconds, loglevel=None): """ Create the input files and directory structure necessary to start a Folding@Home project. @@ -76,9 +76,9 @@ def package_for_fah(process_only_these_targets=None, system = setup_system_and_integrator_files( target, sorted_valid_templates[0], - timestep, + temperature, collision_rate, - temperature + timestep ) renumbered_resnums = get_renumbered_topol_resnums(target) @@ -110,7 +110,7 @@ def package_for_fah(process_only_these_targets=None, nclones, temperature, collision_rate, - temperature, + timestep, openmm_platform, renumbered_resnums, ) @@ -206,10 +206,11 @@ def create_target_project_dir(target): def setup_system_and_integrator_files(target, template, - timestep, + temperature, collision_rate, - temperature + timestep ): + logger.debug('Copying system and integrator files for template {}'.format(template.id)) models_target_dir = os.path.join(default_project_dirnames.models, target.id) template_dir = os.path.join(models_target_dir, template.id) target_project_dir = os.path.join(fah_projects_dir, target.id) @@ -218,10 +219,10 @@ def setup_system_and_integrator_files(target, dest_system_filepath = os.path.join(target_project_dir, 'system.xml') dest_integrator_filepath = os.path.join(target_project_dir, 'integrator.xml') - system = openmm.XmlSerializer.deserialize( + system = mm.XmlSerializer.deserialize( read_file_contents_gz_or_not(source_system_filepath) ) - state = openmm.XmlSerializer.deserialize( + state = mm.XmlSerializer.deserialize( read_file_contents_gz_or_not(source_state_filepath) ) @@ -230,17 +231,17 @@ def setup_system_and_integrator_files(target, system.setDefaultPeriodicBoxVectors(*box_vectors) # Create new integrator to use. - integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) + integrator = mm.LangevinIntegrator(temperature, collision_rate, timestep) # TODO: Make sure MonteCarloBarostat temperature matches set temperature. # Serialize System. with open(dest_system_filepath, 'w') as dest_system_file: - dest_system_file.write(openmm.XmlSerializer.serialize(system)) + dest_system_file.write(mm.XmlSerializer.serialize(system)) # Serialize Integrator with open(dest_integrator_filepath, 'w') as dest_integrator_file: - dest_integrator_file.write(openmm.XmlSerializer.serialize(integrator)) + dest_integrator_file.write(mm.XmlSerializer.serialize(integrator)) return system @@ -337,7 +338,7 @@ def generate_fah_run(target_project_dir, read_file_contents_gz_or_not(source_system_structure_filepath) ) - state = openmm.XmlSerializer.deserialize( + state = mm.XmlSerializer.deserialize( read_file_contents_gz_or_not(source_openmm_state_filepath) ) @@ -346,13 +347,12 @@ def generate_fah_run(target_project_dir, run_seqid_file.write(read_file_contents_gz_or_not(source_seqid_filepath)) # Create new integrator to use. - integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) + integrator = mm.LangevinIntegrator(temperature, collision_rate, timestep) # Create Context so we can randomize velocities. - platform = openmm.Platform.getPlatformByName(openmm_platform) - context = openmm.Context(system, integrator, platform) + platform = mm.Platform.getPlatformByName(openmm_platform) + context = mm.Context(system, integrator, platform) context.setPositions(state.getPositions()) - context.setVelocities(state.getVelocities()) box_vectors = state.getPeriodicBoxVectors() context.setPeriodicBoxVectors(*box_vectors) @@ -371,7 +371,7 @@ def generate_fah_run(target_project_dir, enforcePeriodicBox=True ) with open(state_filename, 'w') as state_file: - state_file.write(openmm.XmlSerializer.serialize(state)) + state_file.write(mm.XmlSerializer.serialize(state)) except Exception as e: import traceback diff --git a/ensembler/refinement.py b/ensembler/refinement.py index 0fdfa34..ddd2470 100644 --- a/ensembler/refinement.py +++ b/ensembler/refinement.py @@ -9,11 +9,12 @@ import warnings import socket from collections import deque +from copy import deepcopy import numpy as np import Bio import ensembler import ensembler.version -from ensembler.core import mpistate, logger +from ensembler.core import mpistate, logger, ManualOverrides import simtk.unit as unit import simtk.openmm as openmm import simtk.openmm.app as app @@ -23,8 +24,9 @@ def refine_implicit_md( openmm_platform=None, gpupn=1, process_only_these_targets=None, process_only_these_templates=None, template_seqid_cutoff=None, - verbose=False, write_trajectory=False, + write_trajectory=False, include_disulfide_bonds=False, + custom_residue_variants=None, ff='amber99sbildn', implicit_water_model='amber99_obc', sim_length=100.0 * unit.picoseconds, @@ -35,15 +37,30 @@ def refine_implicit_md( minimization_tolerance=10.0 * unit.kilojoules_per_mole / unit.nanometer, minimization_steps=20, nsteps_per_iteration=500, - ph=7.0, + ph=None, retry_failed_runs=False, - cpu_platform_threads=1): + cpu_platform_threads=1, + loglevel=None): # TODO - refactor - '''Run MD refinement in implicit solvent. + """Run MD refinement in implicit solvent. MPI-enabled. - ''' + """ + ensembler.utils.set_loglevel(loglevel) gpuid = mpistate.rank % gpupn + manual_overrides = ManualOverrides() + if ph is None: + if manual_overrides.refinement.ph is not None: + ph = manual_overrides.refinement.ph + else: + ph = 7.0 + if custom_residue_variants is None: + custom_residue_variants = deepcopy( + manual_overrides.refinement.custom_residue_variants_by_targetid + ) + + if (sim_length / timestep) < nsteps_per_iteration: + nsteps_per_iteration = int(sim_length / timestep) models_dir = os.path.abspath(ensembler.core.default_project_dirnames.models) @@ -72,7 +89,7 @@ def refine_implicit_md( def simulate_implicit_md(): - if verbose: print("Reading model...") + logger.debug("Reading model...") with gzip.open(model_filename) as model_file: pdb = app.PDBFile(model_file) @@ -94,23 +111,23 @@ def simulate_implicit_md(): topology = modeller.getTopology() positions = modeller.getPositions() - if verbose: print("Constructing System object...") + logger.debug("Constructing System object...") if cutoff is None: system = forcefield.createSystem(topology, nonbondedMethod=app.NoCutoff, constraints=app.HBonds) else: system = forcefield.createSystem(topology, nonbondedMethod=app.CutoffNonPeriodic, nonbondedCutoff=cutoff, constraints=app.HBonds) - if verbose: print("Creating Context...") + logger.debug("Creating Context...") integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) context = openmm.Context(system, integrator, platform, platform_properties) context.setPositions(positions) - if verbose: print("Minimizing structure...") + logger.debug("Minimizing structure...") openmm.LocalEnergyMinimizer.minimize(context, minimization_tolerance, minimization_steps) if write_trajectory: # Open trajectory for writing. - if verbose: print("Opening trajectory for writing...") + logger.debug("Opening trajectory for writing...") trajectory_filename = os.path.join(model_dir, 'implicit-trajectory.pdb.gz') trajectory_outfile = gzip.open(trajectory_filename, 'w') app.PDBFile.writeHeader(topology, file=trajectory_outfile) @@ -120,7 +137,7 @@ def simulate_implicit_md(): energy_outfile = open(energy_filename, 'w') energy_outfile.write('# iteration | simulation time (ps) | potential_energy (kT) | kinetic_energy (kT) | ns per day\n') - if verbose: print("Running dynamics...") + logger.debug("Running dynamics...") import time initial_time = time.time() for iteration in range(niterations): @@ -134,7 +151,7 @@ def simulate_implicit_md(): final_time = time.time() elapsed_time = (final_time - initial_time) * unit.seconds ns_per_day = (simulation_time / elapsed_time) / (unit.nanoseconds / unit.day) - if verbose: print( + logger.debug( " %8.1f ps : potential %8.3f kT | kinetic %8.3f kT | %.3f ns/day | %.3f s remain" % ( simulation_time / unit.picoseconds, potential_energy / kT, kinetic_energy / kT, @@ -203,14 +220,16 @@ def simulate_implicit_md(): modeller = app.Modeller(reference_pdb.topology, reference_pdb.positions) reference_topology = modeller.topology reference_variants = modeller.addHydrogens(forcefield, pH=ph) - if verbose: - print("Reference variants extracted:") - if reference_variants != None: - for (residue_index, residue) in enumerate(reference_variants): - if residue != None: - print("%8d %s" % (residue_index+1, residue)) - print("") - else: print(reference_variants) + if target.id in custom_residue_variants: + apply_custom_residue_variants(reference_variants, custom_residue_variants[target.id]) + logger.debug("Reference variants extracted:") + if reference_variants is not None: + for (residue_index, residue) in enumerate(reference_variants): + if residue is not None: + logger.debug("%8d %s" % (residue_index+1, residue)) + logger.debug("") + else: + logger.debug(reference_variants) if template_seqid_cutoff: process_only_these_templates = ensembler.core.select_templates_by_seqid_cutoff(target.id, seqid_cutoff=template_seqid_cutoff) @@ -241,23 +260,26 @@ def simulate_implicit_md(): # Check to make sure the initial model file is present. model_filename = os.path.join(model_dir, 'model.pdb.gz') if not os.path.exists(model_filename): - if verbose: print('model.pdb.gz not present: target %s template %s rank %d gpuid %d' % (target.id, template.id, mpistate.rank, gpuid)) + logger.debug('model.pdb.gz not present: target %s template %s rank %d gpuid %d' % (target.id, template.id, mpistate.rank, gpuid)) continue pdb_filename = os.path.join(model_dir, 'implicit-refined.pdb.gz') - print("-------------------------------------------------------------------------") - print("Simulating %s => %s in implicit solvent for %.1f ps (MPI rank: %d, GPU ID: %d)" % (target.id, template.id, niterations * nsteps_per_iteration * timestep / unit.picoseconds, mpistate.rank, gpuid)) - print("-------------------------------------------------------------------------") + logger.info("-------------------------------------------------------------------------") + logger.info("Simulating %s => %s in implicit solvent for %.1f ps (MPI rank: %d, GPU ID: %d)" % (target.id, template.id, niterations * nsteps_per_iteration * timestep / unit.picoseconds, mpistate.rank, gpuid)) + logger.info("-------------------------------------------------------------------------") # Open log file log_data = { 'mpi_rank': mpistate.rank, 'gpuid': gpuid if 'CUDA_VISIBLE_DEVICES' not in os.environ else os.environ['CUDA_VISIBLE_DEVICES'], 'openmm_platform': openmm_platform, - 'sim_length': '%s' % sim_length, 'finished': False, - } + 'sim_length': str(sim_length), + 'timestep': str(timestep), + 'temperature': str(temperature), + 'ph': ph, + } log_file = ensembler.core.LogFile(log_filepath) log_file.log(new_log_data=log_data) @@ -269,7 +291,7 @@ def simulate_implicit_md(): 'finished': True, 'timing': timing, 'successful': True, - } + } log_file.log(new_log_data=log_data) except Exception as e: trbk = traceback.format_exc() @@ -285,11 +307,10 @@ def simulate_implicit_md(): 'timing': timing, 'finished': True, 'successful': False, - } + } log_file.log(new_log_data=log_data) - if verbose: - print('Finished template loop: rank %d' % mpistate.rank) + logger.debug('Finished template loop: rank %d' % mpistate.rank) mpistate.comm.Barrier() @@ -303,12 +324,23 @@ def simulate_implicit_md(): metadata = { 'target_id': target.id, 'datestamp': datestamp, - 'template_seqid_cutoff': template_seqid_cutoff, + 'timing': ensembler.core.strf_timedelta(target_timedelta), + 'openmm_platform': openmm_platform, 'process_only_these_targets': process_only_these_targets, 'process_only_these_templates': process_only_these_templates, - 'timing': ensembler.core.strf_timedelta(target_timedelta), + 'template_seqid_cutoff': template_seqid_cutoff, + 'write_trajectory': write_trajectory, + 'include_disulfide_bonds': include_disulfide_bonds, + 'custom_residue_variants': custom_residue_variants, 'ff': ff, 'implicit_water_model': implicit_water_model, + 'sim_length': str(sim_length), + 'timestep': str(timestep), + 'temperature': str(temperature), + 'collision_rate': str(collision_rate), + 'cutoff': str(cutoff), + 'nsteps_per_iteration': nsteps_per_iteration, + 'ph': ph, 'nsuccessful_refinements': nsuccessful_refinements, 'python_version': sys.version.split('|')[0].strip(), 'python_full_version': ensembler.core.literal_str(sys.version), @@ -326,7 +358,7 @@ def simulate_implicit_md(): mpistate.comm.Barrier() if mpistate.rank == 0: - print('Done.') + logger.info('Done.') def auto_select_openmm_platform(available_platform_names=None): @@ -396,6 +428,29 @@ def remove_disulfide_bonds_from_topology(topology): [topology._bonds.pop(b) for b in remove_bond_indices] +def apply_custom_residue_variants(variants, custom_variants_dict): + """ + Applies custom residue names to a list of residue names. + Acts on `variants` list in-place. + + Parameters + ---------- + variants: list of str + typically generated from openmm.app.modeller.addHydrogens + custom_variants_dict: dict + keyed by 0-based residue index. Values should be residue name string. + e.g. {35: 'HID'} + """ + for residue_index in custom_variants_dict: + if residue_index >= len(variants): + raise Exception( + 'Custom residue variant index ({}: \'{}\') out of range of variants (len: {})'.format( + residue_index, custom_variants_dict[residue_index], len(variants) + ) + ) + variants[residue_index] = custom_variants_dict[residue_index] + + def solvate_models(process_only_these_targets=None, process_only_these_templates=None, template_seqid_cutoff=None, ff='amber99sbildn', diff --git a/ensembler/resources/template-manual-overrides.yaml b/ensembler/resources/template-manual-overrides.yaml new file mode 100644 index 0000000..254d30f --- /dev/null +++ b/ensembler/resources/template-manual-overrides.yaml @@ -0,0 +1,22 @@ +# target-selection: +# domain-spans: +# ABL1_HUMAN_D0: 242-513 +# template-selection: +# min-domain-len: 0 +# max-domain-len: 350 +# domain-spans: +# ABL1_HUMAN_D0: 242-513 +# skip-pdbs: +# - 4CYJ +# - 4P41 +# - 4P2W +# - 4QTD +# - 4Q2A +# - 4CTB +# - 4QOX +# refinement: +# ph: 8.0 +# custom_residue_variants: +# DDR1_HUMAN_D0_PROTONATED: +# # keyed by 0-based residue index +# 35: ASH diff --git a/ensembler/tests/example_project/manual-overrides.yaml b/ensembler/tests/example_project/manual-overrides.yaml new file mode 100644 index 0000000..153c493 --- /dev/null +++ b/ensembler/tests/example_project/manual-overrides.yaml @@ -0,0 +1,22 @@ +target-selection: + domain-spans: + ABL1_HUMAN_D0: 242-513 +template-selection: + min-domain-len: 0 + max-domain-len: 350 + domain-spans: + ABL1_HUMAN_D0: 242-513 + skip-pdbs: + - 4CYJ + - 4P41 + - 4P2W + - 4QTD + - 4Q2A + - 4CTB + - 4QOX +refinement: + ph: 8.0 + custom_residue_variants: + EGFR_HUMAN_D0: + # keyed by 0-based residue index + 49: ASH diff --git a/ensembler/tests/integrationtest_utils.py b/ensembler/tests/integrationtest_utils.py index 1433a38..48ad307 100644 --- a/ensembler/tests/integrationtest_utils.py +++ b/ensembler/tests/integrationtest_utils.py @@ -5,7 +5,7 @@ import contextlib import ensembler.initproject from ensembler.core import default_project_dirnames -from ensembler.tests.utils import get_installed_resource_filename +from ensembler.utils import get_installed_resource_filename @contextlib.contextmanager @@ -32,13 +32,24 @@ def __init__(self, project_dir='.'): def init(self): ensembler.initproject.InitProject(self.project_dir) - shutil.copy(get_installed_resource_filename(os.path.join('example_project', 'meta0.yaml')), self.project_dir) + shutil.copy( + get_installed_resource_filename(os.path.join( + 'tests', 'example_project', 'meta0.yaml' + )), + self.project_dir + ) + shutil.copy( + get_installed_resource_filename(os.path.join( + 'tests', 'example_project', 'manual-overrides.yaml' + )), + self.project_dir + ) def targets(self): self.init() distutils.dir_util.copy_tree( get_installed_resource_filename( - os.path.join('example_project', default_project_dirnames.targets) + os.path.join('tests', 'example_project', default_project_dirnames.targets) ), os.path.join(self.project_dir, default_project_dirnames.targets) ) @@ -47,23 +58,23 @@ def templates_resolved(self): self.targets() shutil.copy( get_installed_resource_filename( - os.path.join('example_project', default_project_dirnames.templates, 'meta0.yaml') + os.path.join('tests', 'example_project', default_project_dirnames.templates, 'meta0.yaml') ), os.path.join(self.project_dir, default_project_dirnames.templates)) shutil.copy( get_installed_resource_filename( - os.path.join('example_project', default_project_dirnames.templates, 'templates-resolved-seq.fa') + os.path.join('tests', 'example_project', default_project_dirnames.templates, 'templates-resolved-seq.fa') ), os.path.join(self.project_dir, default_project_dirnames.templates)) shutil.copy( get_installed_resource_filename( - os.path.join('example_project', default_project_dirnames.templates, 'templates-full-seq.fa') + os.path.join('tests', 'example_project', default_project_dirnames.templates, 'templates-full-seq.fa') ), os.path.join(self.project_dir, default_project_dirnames.templates) ) distutils.dir_util.copy_tree( get_installed_resource_filename( - os.path.join('example_project', default_project_dirnames.templates_structures_resolved) + os.path.join('tests', 'example_project', default_project_dirnames.templates_structures_resolved) ), os.path.join(self.project_dir, default_project_dirnames.templates_structures_resolved) ) @@ -72,7 +83,7 @@ def templates_modeled_loops(self): self.templates_resolved() distutils.dir_util.copy_tree( get_installed_resource_filename( - os.path.join('example_project', default_project_dirnames.templates_structures_modeled_loops) + os.path.join('tests', 'example_project', default_project_dirnames.templates_structures_modeled_loops) ), os.path.join(self.project_dir, default_project_dirnames.templates_structures_modeled_loops) ) @@ -168,12 +179,22 @@ def _copy_modeling_files(self, target_level_files=None, template_level_files=Non for target in self.targets_list: for filename in target_level_files: shutil.copy( - get_installed_resource_filename(os.path.join('example_project', default_project_dirnames.models, target, filename)), + get_installed_resource_filename(os.path.join( + 'tests', + 'example_project', + default_project_dirnames.models, + target, filename + )), os.path.join(self.project_dir, default_project_dirnames.models, target) ) for template in self.templates_list: for filename in template_level_files: shutil.copy( - get_installed_resource_filename(os.path.join('example_project', default_project_dirnames.models, target, template, filename)), + get_installed_resource_filename(os.path.join( + 'tests', + 'example_project', + default_project_dirnames.models, + target, template, filename + )), os.path.join(self.project_dir, default_project_dirnames.models, target, template) - ) \ No newline at end of file + ) diff --git a/ensembler/tests/test_core.py b/ensembler/tests/test_core.py index 890ae67..e06e3b2 100644 --- a/ensembler/tests/test_core.py +++ b/ensembler/tests/test_core.py @@ -1,6 +1,9 @@ -import ensembler -import ensembler.param_parsers -import simtk.unit +import os +import shutil +from ensembler.core import ProjectMetadata, manual_overrides_filename, ManualOverrides +from ensembler.param_parsers import parse_api_params_string, eval_quantity_string +from simtk import unit +from ensembler.utils import enter_temp_dir, get_installed_resource_filename from nose.plugins.attrib import attr @@ -11,16 +14,37 @@ def test_import_ensembler_version(): @attr('unit') def test_project_metadata(): - ensembler.core.ProjectMetadata(project_stage='init') + ProjectMetadata(project_stage='init') @attr('unit') def test_parse_api_params_string(): - params_dict = ensembler.param_parsers.parse_api_params_string('{"a": 3.2 / picoseconds, "b": "x", "c": 2.4}') - assert params_dict == {'a': 3.2 / simtk.unit.picosecond, 'b': 'x', 'c': 2.4} + params_dict = parse_api_params_string('{"a": 3.2 / picoseconds, "b": "x", "c": 2.4}') + assert params_dict == {'a': 3.2 / unit.picosecond, 'b': 'x', 'c': 2.4} @attr('unit') def test_eval_quantity_string(): - quantity = ensembler.param_parsers.eval_quantity_string('2 picoseconds') - assert quantity == 2 * simtk.unit.picosecond \ No newline at end of file + quantity = eval_quantity_string('2 picoseconds') + assert quantity == 2 * unit.picosecond + + +@attr('unit') +def test_manual_overrides_file(): + with enter_temp_dir(): + ref_manual_overrides_file = get_installed_resource_filename( + os.path.join('tests', 'example_project', 'manual-overrides.yaml') + ) + shutil.copy(ref_manual_overrides_file, manual_overrides_filename) + manual_overrides = ManualOverrides() + assert manual_overrides.target.domain_spans == {'ABL1_HUMAN_D0': '242-513'} + assert manual_overrides.template.min_domain_len == 0 + assert manual_overrides.template.max_domain_len == 350 + assert manual_overrides.template.domain_spans == {'ABL1_HUMAN_D0': '242-513'} + assert manual_overrides.template.skip_pdbs == [ + '4CYJ', '4P41', '4P2W', '4QTD', '4Q2A', '4CTB', '4QOX' + ] + assert manual_overrides.refinement.ph == 8.0 + assert manual_overrides.refinement.custom_residue_variants_by_targetid == { + 'EGFR_HUMAN_D0': {49: 'ASH'} + } diff --git a/ensembler/tests/test_gather_templates.py b/ensembler/tests/test_gather_templates.py index dfa615c..5bb9973 100644 --- a/ensembler/tests/test_gather_templates.py +++ b/ensembler/tests/test_gather_templates.py @@ -4,7 +4,7 @@ from io import StringIO else: from StringIO import StringIO -from ensembler.tests.utils import get_installed_resource_filename +from ensembler.utils import get_installed_resource_filename import ensembler.pdb from mock import Mock from nose.plugins.attrib import attr @@ -13,7 +13,7 @@ @attr('unit') def test_extract_residues_by_resnum_from_4CFE(): # 4CFE contains a 'TPO' residue - pdb_input_filepath = get_installed_resource_filename(os.path.join('resources', '4CFE.pdb.gz')) + pdb_input_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', '4CFE.pdb.gz')) template = Mock() template.chainid= 'A' template.resolved_pdbresnums = [str(x) for x in range(16, 269)] @@ -25,7 +25,7 @@ def test_extract_residues_by_resnum_from_4CFE(): @attr('unit') def test_extract_residues_by_resnum_from_3HLL(): # 3HLL contains resnums '56A' and '93B' - pdb_input_filepath = get_installed_resource_filename(os.path.join('resources', '3HLL.pdb.gz')) + pdb_input_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', '3HLL.pdb.gz')) template = Mock() template.chainid = 'A' template.resolved_pdbresnums = [str(x) for x in range(24, 172) + range(183, 309)] @@ -38,7 +38,7 @@ def test_extract_residues_by_resnum_from_3HLL(): @attr('unit') def test_extract_residues_by_resnum_output(): - pdb_input_filepath = get_installed_resource_filename(os.path.join('resources', '3HLL.pdb.gz')) + pdb_input_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', '3HLL.pdb.gz')) template = Mock() template.chainid = 'A' template.resolved_pdbresnums = [str(x) for x in range(24, 172) + range(183, 309)] diff --git a/ensembler/tests/test_initproject.py b/ensembler/tests/test_initproject.py index 7ff3cfc..335e9cc 100644 --- a/ensembler/tests/test_initproject.py +++ b/ensembler/tests/test_initproject.py @@ -1,8 +1,6 @@ import os - from lxml import etree from nose.plugins.attrib import attr - import ensembler import ensembler.cli import ensembler.cli_commands @@ -10,9 +8,7 @@ import ensembler.tests import ensembler.core import ensembler.uniprot -from ensembler.utils import enter_temp_dir - -from ensembler.tests.utils import get_installed_resource_filename +from ensembler.utils import enter_temp_dir, get_installed_resource_filename from ensembler.tests.integrationtest_utils import integrationtest_context @@ -30,6 +26,7 @@ def test_initproject(): assert os.path.exists(ensembler.core.default_project_dirnames.templates_structures_resolved) assert os.path.exists(ensembler.core.default_project_dirnames.templates_structures_modeled_loops) assert os.path.exists('meta0.yaml') + assert os.path.exists(ensembler.core.manual_overrides_filename) @attr('unit') @@ -103,7 +100,7 @@ def test_extract_targets_from_targetexplorer_json(): @attr('unit') def test_attempt_symlink_structure_files(): pdbid = '4CFE' - structure_paths = [get_installed_resource_filename(os.path.join('resources'))] + structure_paths = [get_installed_resource_filename(os.path.join('tests', 'resources'))] with enter_temp_dir(): os.mkdir('pdb') project_pdb_filepath = os.path.join('pdb', pdbid + '.pdb.gz') @@ -116,7 +113,7 @@ def test_attempt_symlink_structure_files(): def test_log_unique_domain_names(): with open( get_installed_resource_filename( - os.path.join('resources', 'uniprot-CK1-kinases.xml') + os.path.join('tests', 'resources', 'uniprot-CK1-kinases.xml') ) ) as uniprotxml_file: uniprotxml_string = ensembler.uniprot.remove_uniprot_xmlns(uniprotxml_file.read()) diff --git a/ensembler/tests/test_loopmodel.py b/ensembler/tests/test_loopmodel.py index 778d83c..ca6777c 100644 --- a/ensembler/tests/test_loopmodel.py +++ b/ensembler/tests/test_loopmodel.py @@ -5,14 +5,33 @@ from nose.plugins.attrib import attr import ensembler import ensembler.initproject -from ensembler.tests.utils import get_installed_resource_filename +from ensembler.core import get_templates_full_seq +from ensembler.utils import get_installed_resource_filename, set_loglevel from ensembler.tests.integrationtest_utils import integrationtest_context from ensembler.modeling import pdbfix_templates, pdbfix_template, loopmodel_template @attr('unit') def test_pdbfix_KC1D_HUMAN_D0_4KB8_D(): - template_pdb_gz_filepath = get_installed_resource_filename(os.path.join('resources', 'KC1D_HUMAN_D0_4KB8_D.pdb.gz')) + set_loglevel('debug') + with integrationtest_context(set_up_project_stage='templates_resolved'): + template = Mock() + template.id = 'KC1D_HUMAN_D0_4KB8_D' + template.seq = 'LRVGNRYRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLFHRQGFSYDYVFDWNMLKFGASRAADDAERERRDREERLRH' + missing_residues = pdbfix_template(template) + + assert (0, 278) not in missing_residues + assert missing_residues == { + (0, 8): ['SER', 'GLY', 'SER', 'PHE', 'GLY'], + (0, 28): ['VAL', 'LYS', 'THR', 'LYS', 'HIS'], + (0, 141): ['ARG', 'THR', 'HIS'], + } + + +@attr('unit') +def test_pdbfix_KC1D_HUMAN_D0_4KB8_D_old(): + set_loglevel('debug') + template_pdb_gz_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'KC1D_HUMAN_D0_4KB8_D.pdb.gz')) template_pdb_filepath = os.path.join(ensembler.core.default_project_dirnames.templates_structures_resolved, 'KC1D_HUMAN_D0_4KB8_D.pdb') with ensembler.utils.enter_temp_dir(): ensembler.utils.create_dir(ensembler.core.default_project_dirnames.templates_structures_resolved) @@ -39,7 +58,8 @@ def test_pdbfix_KC1D_HUMAN_D0_4KB8_D(): @attr('unit') def test_pdbfix_ABL1_HUMAN_D0_2E2B_B(): - template_pdb_gz_filepath = get_installed_resource_filename(os.path.join('resources', 'ABL1_HUMAN_D0_2E2B_B.pdb.gz')) + set_loglevel('debug') + template_pdb_gz_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'ABL1_HUMAN_D0_2E2B_B.pdb.gz')) template_pdb_filepath = os.path.join(ensembler.core.default_project_dirnames.templates_structures_resolved, 'ABL1_HUMAN_D0_2E2B_B.pdb') with ensembler.utils.enter_temp_dir(): ensembler.utils.create_dir(ensembler.core.default_project_dirnames.templates_structures_resolved) @@ -65,9 +85,10 @@ def test_pdbfix_ABL1_HUMAN_D0_2E2B_B(): @attr('unit') def test_pdbfix_templates(): - template1_pdb_gz_filepath = get_installed_resource_filename(os.path.join('resources', 'KC1D_HUMAN_D0_4KB8_D.pdb.gz')) + set_loglevel('debug') + template1_pdb_gz_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'KC1D_HUMAN_D0_4KB8_D.pdb.gz')) template1_pdb_filepath = os.path.join(ensembler.core.default_project_dirnames.templates_structures_resolved, 'KC1D_HUMAN_D0_4KB8_D.pdb') - template2_pdb_gz_filepath = get_installed_resource_filename(os.path.join('resources', 'KC1D_HUMAN_D0_3UYS_D.pdb.gz')) + template2_pdb_gz_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'KC1D_HUMAN_D0_3UYS_D.pdb.gz')) template2_pdb_filepath = os.path.join(ensembler.core.default_project_dirnames.templates_structures_resolved, 'KC1D_HUMAN_D0_3UYS_D.pdb') with ensembler.utils.enter_temp_dir(): ensembler.utils.create_dir(ensembler.core.default_project_dirnames.templates_structures_resolved) diff --git a/ensembler/tests/test_modeling.py b/ensembler/tests/test_modeling.py index 0c03b69..d11da55 100644 --- a/ensembler/tests/test_modeling.py +++ b/ensembler/tests/test_modeling.py @@ -6,10 +6,9 @@ import ensembler import ensembler.tests import ensembler.modeling -from ensembler.tests.utils import get_installed_resource_filename from ensembler.tests.integrationtest_utils import integrationtest_context import ensembler.cli_commands -from ensembler.utils import enter_temp_dir +from ensembler.utils import enter_temp_dir, get_installed_resource_filename @attr('modeller') @@ -20,8 +19,8 @@ def test_import_modeller(): @attr('modeller') def test_build_model(): - template_filepath = get_installed_resource_filename(os.path.join('resources', 'mock_template.pdb')) - aln_filepath = get_installed_resource_filename(os.path.join('resources', 'mock_template-alignment.pir')) + template_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'mock_template.pdb')) + aln_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'mock_template-alignment.pir')) with enter_temp_dir(): target = Mock() @@ -60,7 +59,7 @@ def test_build_model(): @attr('unit') def test_align_command(): - ref_resources_dirpath = get_installed_resource_filename('example_project') + ref_resources_dirpath = get_installed_resource_filename(os.path.join('tests', 'example_project')) with integrationtest_context(set_up_project_stage='templates_modeled_loops'): targets = ['KC1D_HUMAN_D0', 'EGFR_HUMAN_D0'] templates = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A'] diff --git a/ensembler/tests/test_packaging.py b/ensembler/tests/test_packaging.py index cfeb4ee..47879ce 100644 --- a/ensembler/tests/test_packaging.py +++ b/ensembler/tests/test_packaging.py @@ -1,11 +1,15 @@ import os -from nose.plugins.attrib import attr +import gzip +import simtk.openmm as mm from ensembler.packaging import package_for_fah from ensembler.core import default_project_dirnames +from ensembler.utils import get_installed_resource_filename from ensembler.tests.integrationtest_utils import integrationtest_context +from nose.plugins.attrib import attr @attr('unit') +@attr('slow') def test_package_for_fah(): with integrationtest_context(set_up_project_stage='refined_explicit'): package_for_fah( @@ -55,3 +59,19 @@ def test_package_for_fah(): 'RUN{}'.format(run_id), run_filename )) + + # test whether kinetic energy in new state file is reasonable + test_state_filepath = os.path.join(packaged_project_base_path, 'RUN0', 'state0.xml') + with open(test_state_filepath) as test_state_file: + test_state = mm.XmlSerializer.deserialize(test_state_file.read()) + ref_state_filepath = get_installed_resource_filename(os.path.join( + 'tests', 'example_project', 'models', + 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'explicit-state.xml.gz' + )) + with gzip.open(ref_state_filepath) as ref_state_file: + ref_state = mm.XmlSerializer.deserialize(ref_state_file.read()) + test_state_kinetic_energy = test_state.getKineticEnergy() + ref_state_kinetic_energy = ref_state.getKineticEnergy() + assert abs( + test_state_kinetic_energy - ref_state_kinetic_energy + ) < ref_state_kinetic_energy diff --git a/ensembler/tests/test_refinement.py b/ensembler/tests/test_refinement.py new file mode 100644 index 0000000..dccd95b --- /dev/null +++ b/ensembler/tests/test_refinement.py @@ -0,0 +1,56 @@ +import os +import yaml +import mdtraj +from ensembler.refinement import refine_implicit_md +from simtk import unit +from ensembler.core import default_project_dirnames +from ensembler.tests.integrationtest_utils import integrationtest_context +from nose.plugins.attrib import attr + + +@attr('unit') +@attr('slow') +def test_refine_implicit_md_short(): + with integrationtest_context(set_up_project_stage='clustered'): + targetid = 'EGFR_HUMAN_D0' + templateid = 'KC1D_HUMAN_D0_4KB8_D' + refine_implicit_md( + process_only_these_targets=[targetid], + process_only_these_templates=[templateid], + sim_length=2.0*unit.femtosecond, + nsteps_per_iteration=1, + loglevel='debug' + ) + implicit_metadata_filepath = os.path.join( + default_project_dirnames.models, targetid, 'refine_implicit_md-meta0.yaml' + ) + implicit_model_filepath = os.path.join( + default_project_dirnames.models, targetid, templateid, 'implicit-refined.pdb.gz' + ) + implicit_energies_filepath = os.path.join( + default_project_dirnames.models, targetid, templateid, 'implicit-energies.txt' + ) + implicit_log_filepath = os.path.join( + default_project_dirnames.models, targetid, templateid, 'implicit-log.yaml' + ) + + assert all(map( + os.path.exists, + [implicit_model_filepath, implicit_energies_filepath, implicit_log_filepath] + )) + with open(implicit_log_filepath) as implicit_log_file: + implicit_log = yaml.load(implicit_log_file) + assert implicit_log.get('finished') is True + assert implicit_log.get('successful') is True + assert implicit_log.get('ph') == 8.0 + assert os.path.exists(implicit_metadata_filepath) + with open(implicit_metadata_filepath) as implicit_metadata_file: + implicit_metadata = yaml.load(implicit_metadata_file) + assert implicit_metadata.get('refine_implicit_md').get('custom_residue_variants') == { + 'EGFR_HUMAN_D0': {49: 'ASH'} + } + implicit_model_traj = mdtraj.load_pdb(implicit_model_filepath) + resis = [resi for resi in implicit_model_traj.top.residues] + resi49 = resis[49] + resi49_atom_strings = [str(atom) for atom in resi49.atoms] + assert 'ASP50-HD2' in resi49_atom_strings diff --git a/ensembler/tests/test_tools.py b/ensembler/tests/test_tools.py index b2f9843..c6d8c01 100644 --- a/ensembler/tests/test_tools.py +++ b/ensembler/tests/test_tools.py @@ -1,8 +1,14 @@ from ensembler.tools.inspect import LoopmodelLogs +from ensembler.tools.mktraj import MkTrajImplicitStart from ensembler.tests.integrationtest_utils import integrationtest_context def test_loopmodel_logs(): with integrationtest_context(set_up_project_stage='templates_modeled_loops'): loopmodel_logs = LoopmodelLogs() - loopmodel_logs.add_missing_resis_data() \ No newline at end of file + loopmodel_logs.add_missing_resis_data() + + +def test_mktraj_implicit_start(): + with integrationtest_context(set_up_project_stage='refined_explicit'): + MkTrajImplicitStart(targetid='EGFR_HUMAN_D0', loglevel='debug') diff --git a/ensembler/tests/utils.py b/ensembler/tests/utils.py index f15e02c..c5112da 100644 --- a/ensembler/tests/utils.py +++ b/ensembler/tests/utils.py @@ -1,8 +1,5 @@ -import ensembler.tests import functools import nose -import os -from pkg_resources import resource_filename def expected_failure(test): @@ -17,32 +14,3 @@ def inner(*args, **kwargs): 'A failure was expected, but this test appeared to pass. You may want to remove the expected_failure decorator.' ) return inner - - -def get_installed_resource_filename(relative_path): - """Get the full path to one of the reference files shipped for testing. - In the source distribution, these files are in ``ensembler/tests/resources`` and - ``ensembler/tests/example_project``, but on installation, they're moved to somewhere - in the user's python site-packages directory. - This function uses the pkg_resources package to find the file within the installation directory - structure. - - Parameters - ---------- - name : str - Name of the file to load (with respect to the ``ensembler/tests`` folder). - - Examples - -------- - get_installed_resource_filename('example_project/meta0.yaml') - """ - - fn = resource_filename(ensembler.tests.__name__, relative_path) - - if not os.path.exists(fn): - raise ValueError( - "Sorry! {0} does not exist." - "If you just added it, you'll have to re-install".format(relative_path) - ) - - return fn \ No newline at end of file diff --git a/ensembler/tools/mktraj.py b/ensembler/tools/mktraj.py index c57a10b..ad05fb2 100644 --- a/ensembler/tools/mktraj.py +++ b/ensembler/tools/mktraj.py @@ -37,6 +37,10 @@ def __init__(self, targetid, ensembler_stage=None, traj_filepath=None, topol_fil traj : mdtraj.Trajectory df : pandas.DataFrame models data (e.g. sequence identities): + + Examples + -------- + >>> MkTraj(targetid='EGFR_HUMAN_D0') """ ensembler.utils.set_loglevel(loglevel) ensembler.core.check_project_toplevel_dir() @@ -148,7 +152,16 @@ class MkTrajImplicitStart(MkTraj): def __init__(self, targetid, traj_filepath=None, topol_filepath=None, models_data_filepath=None, process_only_these_templates=None, loglevel=None, run_main=True): - """Quick hack. + """ + Makes trajectory of the model files with added hydrogens, but prior to any refinement. + For the specified target, makes a single topology pdb file, a single trajectory xtc file, + and individual pdb files for each model. + + See docs on `MkTraj` for further info on paramters. + + Examples + -------- + MkTrajImplicitStart(targetid='EGFR_HUMAN_D0') """ ensembler.utils.set_loglevel(loglevel) ensembler.core.check_project_toplevel_dir() @@ -255,4 +268,4 @@ def _gen_implicit_start_models( except Exception as e: print 'Error for model {0}: {1}'.format(templateid, e) continue - # import ipdb; ipdb.set_trace() \ No newline at end of file + # import ipdb; ipdb.set_trace() diff --git a/ensembler/utils.py b/ensembler/utils.py index cbc4399..30c0751 100644 --- a/ensembler/utils.py +++ b/ensembler/utils.py @@ -5,7 +5,9 @@ import functools import shutil import tempfile +from pkg_resources import resource_filename from ensembler.core import logger, mpistate +import ensembler def nonefn(): @@ -135,3 +137,32 @@ def read_file_contents_gz_or_not(base_filepath): raise IOError('File {} not found'.format(base_filepath)) return contents + + +def get_installed_resource_filename(relative_path): + """ + Returns the installation path of a resource file shipped with the code. + + Parameters + ---------- + name: str + Name of the file to load (relative to the `ensembler` main code directory). + + Returns + ------- + installed_filepath: str + absolute path of the installed file + + Examples + -------- + >>> get_installed_resource_filename('tests/resources/example_project/meta0.yaml') + """ + installed_filepath = resource_filename(ensembler.__name__, relative_path) + + if not os.path.exists(installed_filepath): + raise ValueError( + "Sorry! {0} does not exist." + "If you just added it, you'll have to re-install".format(relative_path) + ) + + return installed_filepath diff --git a/setup.py b/setup.py index 87ca3c6..f06a354 100644 --- a/setup.py +++ b/setup.py @@ -89,19 +89,13 @@ def write_version_py(filename=ensembler_version_filepath): ########################## -def find_package_data(): +def find_package_data(dir_to_search=None): package_data = [] - basepath = os.path.join('ensembler', 'tests') - dirs_to_search = [ - os.path.join('ensembler', 'tests', 'resources'), - os.path.join('ensembler', 'tests', 'example_project') - ] - for dir_to_search in dirs_to_search: - for dir, subdirs, files in os.walk(dir_to_search): - for file in files: - if file[0] != '.': - filepath = os.path.join(dir, file).replace(basepath + os.path.sep, '') - package_data.append(filepath) + for dir, subdirs, files in os.walk(dir_to_search): + for file in files: + if file[0] != '.': + filepath = os.path.join(dir, file).replace(dir_to_search + os.path.sep, '') + package_data.append(filepath) return package_data @@ -124,7 +118,10 @@ def find_package_data(): 'ensembler.tools', 'ensembler.tests', ], - package_data = {'ensembler.tests': find_package_data()}, + package_data = { + 'ensembler': find_package_data(dir_to_search='ensembler'), + 'ensembler.tests': find_package_data(dir_to_search=os.path.join('ensembler', 'tests')), + }, entry_points = {'console_scripts': [