4dn-dcic · clarabakker · Apr 13, 2021 · Apr 13, 2021 · Apr 13, 2021 · Apr 15, 2021
diff --git a/chalicelib/check_setup.json b/chalicelib/check_setup.json
@@ -1921,6 +1921,18 @@
             }
         }
     },
+    "cut_and_run_status" : {
+        "title": "CUT&RUN Pipeline",
+        "group": "Pipeline checks",
+        "schedule": {
+            "hourly_checks": {
+                "webdev": {
+                    "kwargs": {"primary": true},
+                    "dependencies": []
+                }
+            }
+        }
+    },
     "scale_down_elasticsearch_production": {
         "title": "Scale down production ElasticSearch Cluster",
         "group": "System checks",

diff --git a/chalicelib/checks/helpers/wfr_utils.py b/chalicelib/checks/helpers/wfr_utils.py
@@ -147,6 +147,18 @@
                 "run_time": 200,
                 "accepted_versions": ['v1.2']
     },
+    'cut_and_run_workflow': {
+        "run_time": 200,
+        "accepted_versions": ['v1']
+    },
+    'cut_and_run_ctl_workflow': {
+        "run_time": 200,
+        "accepted_versions": ['v1']
+    },
+    'cut_and_run_peaks': {
+        "run_time": 200,
+        "accepted_versions": ['v1']
+    },
     'mcoolQC': {
                 "run_time": 200,
                 "accepted_versions": ['v1']
@@ -209,7 +221,7 @@
     'DNA SPRITE': [''],
     'RNA-DNA SPRITE': [''],
     'GAM': [''],
-    'CUT&RUN': [''],
+    'CUT&RUN': ['CUT_AND_RUN_v1'],
     'TRIP': ['']
     }
 
@@ -442,7 +454,7 @@ def stepper(library, keep,
     problematic_run = keep['problematic_run']
     missing_run = keep['missing_run']
 
-    # Lets get the repoinse from one of the input files that will be used in this step
+    # Let's get the response from one of the input files that will be used in this step
     # if it is a list take the first item, if not use it as is
     # new_step_input_file must be the @id
     # also check for qc status
@@ -607,7 +619,7 @@ def get_wfr_out(emb_file, wfr_name, key=None, all_wfrs=None, versions=None,
         if len(same_type_wfrs) >= error_at_failed_runs:
             return {'status': "no complete run, too many errors"}
 
-        return {'status': "no complete run, errrored"}
+        return {'status': "no complete run, errored"}
     # if other statuses, started running
     elif run_duration < run:
         return {'status': "running"}
@@ -704,6 +716,7 @@ def extract_file_info(obj_id, arg_name, additional_parameters, auth, env, rename
                     my_bucket = raw_bucket
                 buckets.append(my_bucket)
         # check bucket consistency
+        print("Buckets: ", buckets)
         assert len(list(set(buckets))) == 1
         template['uuid'] = uuid
         if rename:
@@ -1408,7 +1421,8 @@ def patch_complete_data(patch_data, pipeline_type, auth, move_to_pc=False, pc_ap
               'margi': "iMARGI Processing Pipeline - Preliminary Files",
               'rnaseq': "ENCODE RNA-Seq Pipeline - Preliminary Files",
               'insulation_scores_and_boundaries': "Insulation scores and boundaries calls - Preliminary Files",
-              'compartments': "Compartments Signals - Preliminary Files"}
+              'compartments': "Compartments Signals - Preliminary Files",
+              "cutnrun": "CUT&RUN Pipeline - Preliminary Files"}
     """move files to other processed_files field."""
     if not patch_data.get('patch_opf'):
         return ['no content in patch_opf, skipping']
@@ -1543,7 +1557,8 @@ def run_missing_wfr(input_json, input_files_and_params, run_name, auth, env, fs_
 def start_missing_run(run_info, auth, env, fs_env):
     attr_keys = ['fastq1', 'fastq', 'input_pairs', 'input_bams', 'input_fastqs',
                  'fastq_R1', 'input_bam', 'rna.fastqs_R1', 'mad_qc.quantfiles', 'mcoolfile',
-                 'chip.ctl_fastqs', 'chip.fastqs', 'chip.tas', 'atac.fastqs', 'atac.tas']
+                 'chip.ctl_fastqs', 'chip.fastqs', 'chip.tas', 'atac.fastqs', 'atac.tas',
+                 'input_fastqs_R1', 'input_fastqs_R2', 'input_bedpe']
     run_settings = run_info[1]
     inputs = run_info[2]
     name_tag = run_info[3]
@@ -1568,7 +1583,7 @@ def start_missing_run(run_info, auth, env, fs_env):
     if not attr_file:
         possible_keys = [i for i in inputs.keys() if i != 'additional_file_parameters']
         error_message = ('one of these argument names {} which carry the input file -not the references-'
-                         ' should be added to att_keys dictionary on foursight cgap_utils.py function start_missing_run').format(possible_keys)
+                         ' should be added to att_keys dictionary on foursight wfr_utils.py function start_missing_run').format(possible_keys)
         raise ValueError(error_message)
     attributions = get_attribution(ff_utils.get_metadata(attr_file, auth))
     settings = wfrset_utils.step_settings(run_settings[0], run_settings[1], attributions, run_settings[2])

diff --git a/chalicelib/checks/helpers/wfrset_utils.py b/chalicelib/checks/helpers/wfrset_utils.py
@@ -421,6 +421,72 @@ def step_settings(step_name, my_organism, attribution, overwrite=None):
         "overwrite_input_extra": False,
         "config": {"ebs_size": 10, "instance_type": "c5ad.2xlarge"}
     },
+    {
+        "app_name": "cut_and_run_workflow",
+        "workflow_uuid": "c5db38be-f139-4157-9832-398bda2c62d2",
+        "parameters": {
+            "nthreads_trim": 4,
+            "nthreads_aln": 4
+            },
+        "config": {'mem': 8, 'cpu': 4, 'ebs_size': 28},
+        "custom_pf_fields": {
+            "out_bam": {
+                "genome_assembly": genome,
+                "file_type": "read positions",
+                "description": "Alignment output file from CUT&RUN"
+                },
+            "out_bedpe": {
+                "genome_assembly": genome,
+                "file_type": "intermediate file",
+                "description": "Filtered reads, output file from CUT&RUN"
+            }
+        }
+    },
+    {
+        "app_name": "cut_and_run_ctl_workflow",
+        "workflow_uuid": "04895a25-b609-4fc8-b0d5-9dd9e45d9237",
+        "parameters": {
+            "nthreads_trim": 4,
+            "nthreads_aln": 4
+        },
+        "config": {'mem': 8, 'cpu': 4, 'ebs_size': 20},
+        "custom_pf_fields": {
+            "out_bam": {
+                "genome_assembly": genome,
+                "file_type": "read positions",
+                "description": "Alignment output file from CUT&RUN",
+                'disable_wfr_inputs': True
+                },
+            "out_bedpe": {
+                "genome_assembly": genome,
+                "file_type": "intermediate file",
+                "description": "Filtered reads, output file from CUT&RUN",
+                'disable_wfr_inputs': True
+            }
+        }
+    },
+    {
+        "app_name": "cut_and_run_peaks",
+        "workflow_uuid": "b43bcc4e-d566-4fbf-a0bb-375a2ad517d8",
+        "config": {'mem': 32, 'cpu': 8, 'ebs_size': 50},
+        'custom_pf_fields': {
+            "out_bedg": {
+                "genome_assembly": genome,
+                "file_type": "peaks",
+                "description": "Peaks output file from CUT&RUN"
+                },
+            "out_narrow_bed": {
+                "genome_assembly": genome,
+                "file_type": "peaks",
+                "description": "Location of max signal in peaks output file from CUT&RUN"
+                },
+            "out_bw": {
+                "genome_assembly": genome,
+                "file_type": "signal fold change",
+                "description": "Signal track from CUT&RUN"
+            }
+        }
+    },
     # temp
     {
         "app_name": "",