diff --git a/metatlas/untargeted/run_untargeted_pipeline.py b/metatlas/untargeted/run_untargeted_pipeline.py index 21b2ad40..4d1956aa 100644 --- a/metatlas/untargeted/run_untargeted_pipeline.py +++ b/metatlas/untargeted/run_untargeted_pipeline.py @@ -27,7 +27,7 @@ def main(): logging.info(f'Arguments used: {args}') ##### Step 1/7: Syncing LIMS and NERSC to identify new projects with raw data that are not yet in the untargeted task list - new_projects = mzm.update_new_untargeted_tasks(validate_names=args.validate_names, \ + new_projects = mzm.update_new_untargeted_tasks(validate_names=args.validate_names, mzmine_batch_params=args.mzmine_batch_params, \ output_dir=args.output_dir, raw_data_dir=args.raw_data_dir, raw_data_subdir=args.raw_data_subdir, \ background_designator=args.background_designator,skip_sync=step_bools[0]) @@ -82,6 +82,7 @@ def add_arguments(parser): parser.add_argument('--overwrite_fbmn', action='store_true', help='Overwrite existing fbmn results files that are already in the output directory') ## Step 1 only parser.add_argument('--validate_names', action='store_true', help='Validate filenames and project names') + parser.add_argument('--mzmine_batch_params', type=str, default=None, help='Add custom mzmine batch parameters xml') ## Step 1.5 only parser.add_argument('--hard_raw_data_mirror', action='store_true', help='Run the raw data mirror to GNPS2 before proceeding with pipeline') ## Step 2 only diff --git a/metatlas/untargeted/tools.py b/metatlas/untargeted/tools.py index 7e2d962f..27e29309 100644 --- a/metatlas/untargeted/tools.py +++ b/metatlas/untargeted/tools.py @@ -21,7 +21,7 @@ import shutil from typing import List, Dict, Union, Optional -BATCH_FILE_PATH = '/global/common/software/m2650/mzmine_parameters/batch_files/' +BATCH_FILE_PATH = '/global/common/software/m2650/mzmine_parameters/batch_files' BINARY_PATH = '/global/common/software/m2650/mzmine_parameters/MZmine' key_file = '/global/cfs/cdirs/metatlas/labkey_user.txt' @@ -52,8 +52,10 @@ #SBATCH -t 3:00:00 """ -mzine_batch_params_file = "/global/common/software/m2650/mzmine_parameters/batch_files/mzmine-3.7.2-batchparams.xml" -mzine_batch_params_file_iqx = "/global/common/software/m2650/mzmine_parameters/batch_files/IQX-mzmine-3.7.2-batchparams.xml" +mzine_batch_params_file = f"{BATCH_FILE_PATH}/mzmine-3.7.2-batchparams.xml" +mzine_batch_params_file_iqx = f"{BATCH_FILE_PATH}/IQX-mzmine-3.7.2-batchparams.xml" +mzine_batch_params_file_pos = f"{BATCH_FILE_PATH}/POS-mzmine-3.7.2-batchparams.xml" +mzine_batch_params_file_neg = f"{BATCH_FILE_PATH}/NEG-mzmine-3.7.2-batchparams.xml" def call_logger(log_filename: str, log_level: str, log_format: str): logging.basicConfig(filename=log_filename, level=log_level, format=log_format, filemode='a') @@ -1992,6 +1994,7 @@ def write_metadata_per_new_project( def update_new_untargeted_tasks( background_designator: List[str], validate_names: bool, + mzmine_batch_params: str, skip_sync: bool, output_dir: str, raw_data_dir: str, @@ -2081,17 +2084,21 @@ def update_new_untargeted_tasks( lims_untargeted_table_updater['output_dir'] = output_dir _, validate_machine_name, _ = vfn.field_exists(PurePath(project_name), field_num=6) logging.info(tab_print("Inferred machine name: %s"%(validate_machine_name), 2)) - if validate_machine_name is None: # Assume more lenient parameters if machine name cannot be validated - mzmine_running_parameters = mzine_batch_params_file_iqx - mzmine_parameter = 5 - elif any(substring in validate_machine_name.lower() for substring in ("iqx", "idx")): - mzmine_running_parameters = mzine_batch_params_file_iqx - mzmine_parameter = 5 - elif any(substring in validate_machine_name.lower() for substring in ("exp", "exploris", "qe")): - mzmine_running_parameters = mzine_batch_params_file - mzmine_parameter = 2 - else: # Assume more lenient parameters if machine name cannot be validated - mzmine_running_parameters = mzine_batch_params_file_iqx + if mzmine_batch_params is None: + if validate_machine_name is None: # Assume more lenient parameters if machine name cannot be validated + mzmine_running_parameters = mzine_batch_params_file_iqx + mzmine_parameter = 5 + elif any(substring in validate_machine_name.lower() for substring in ("iqx", "idx")): + mzmine_running_parameters = mzine_batch_params_file_iqx + mzmine_parameter = 5 + elif any(substring in validate_machine_name.lower() for substring in ("exp", "exploris", "qe")): + mzmine_running_parameters = mzine_batch_params_file + mzmine_parameter = 2 + else: # Assume more lenient parameters if machine name cannot be validated + mzmine_running_parameters = mzine_batch_params_file_iqx + mzmine_parameter = 5 + else: + mzmine_running_parameters = mzmine_batch_params mzmine_parameter = 5 logging.info(tab_print("Using MZmine parameters: %s"%(os.path.basename(mzmine_running_parameters)), 2)) lims_untargeted_table_updater['mzmine_parameter_sheet'] = mzmine_running_parameters