Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CUT&RUN to wfr_checks #465

Open
wants to merge 42 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
4ea8ae0
set up
clarabakker Apr 13, 2021
4dc2853
original
clarabakker Apr 13, 2021
222f0f4
Resolved merge conflict by temporarily eliminating new wfr check
clarabakker Apr 13, 2021
b314060
temp rm wfr_checks
clarabakker Apr 15, 2021
24cee6d
replaced file, added new fn
clarabakker Apr 15, 2021
05f1373
still building check body, added setup
clarabakker May 7, 2021
b8095d5
temporarily removing to handle rebase issue
clarabakker May 7, 2021
35a5494
Merge branch 'master' into cut_and_run
clarabakker May 7, 2021
729c8c2
merged in master, resolved fn
clarabakker May 7, 2021
c8fbff8
check main body, settings
clarabakker Jun 7, 2021
8592f22
full check
clarabakker Jun 15, 2021
bba798c
configuration tweaks, typo fix in error message
clarabakker Jun 17, 2021
45cfbc9
added EC2 config
clarabakker Jun 29, 2021
469dac3
merge in master and resolve conflicts (additions for CUT&RUN)
clarabakker Jun 29, 2021
83b0dcd
fix for yml dict error (actually just a nested array) and more cpu/me…
clarabakker Jul 7, 2021
e234757
added missing patch data
clarabakker Jul 9, 2021
105d8be
patch data fix testing, adding files to exp_id instead of set_acc
clarabakker Jul 13, 2021
f753bd7
Merge branch 'master' into cut_and_run
clarabakker Jul 27, 2021
25045e5
adjustments to processed files' description, order, and patch data
clarabakker Aug 13, 2021
51a460b
fixed comments, removed extra print statements
clarabakker Aug 17, 2021
0a84b15
modified query to ignore expsets with completed wfrs and added dict f…
clarabakker Aug 24, 2021
63283cf
added workflow settings and step for control files
clarabakker Sep 8, 2021
5932b3b
Merge remote-tracking branch 'origin/use_tibanna' into cut_tib
clarabakker Sep 17, 2021
934179e
added memory for testing w/ Picard
clarabakker Sep 17, 2021
4a32a2f
Merge branch 'use_tibanna' of https://github.com/4dn-dcic/foursight i…
SooLee Sep 17, 2021
cd4ea6c
Merge branch 'master' into cut_tib
clarabakker Sep 24, 2021
416784c
added ebs_size to CUT&RUN (increased default)
clarabakker Sep 24, 2021
66d6458
Merge branch 'sfn_fix_multistep' into cut_tib
clarabakker Sep 24, 2021
4061c16
updated cut&run only for fs_env in start_tasks
clarabakker Sep 24, 2021
1d60186
replaced bg with bedpe for step1 outputs/step2 input
clarabakker Oct 12, 2021
777f3e7
Restructured CUT&RUN step2 (postaln) for bio-replicate merging, proce…
clarabakker Oct 21, 2021
53aa5af
added bw to CUT&RUN step2 output
clarabakker Oct 21, 2021
d47fb37
shifted CUT&RUN memory requirements for new workflows
clarabakker Nov 3, 2021
f1414d0
delete misc comments, make bg2bw skippable, bump version
clarabakker Nov 12, 2021
ad96008
Merge branch 'master' into cut_tib
clarabakker Nov 12, 2021
4d8f42e
Update wfrset_utils.py
clarabakker Nov 12, 2021
9c65429
Update wfrset_utils.py
clarabakker Nov 12, 2021
3932da4
simplify chrom.sizes selection
clarabakker Dec 3, 2021
8894cd2
Merge branch 'cut_tib' of https://github.com/4dn-dcic/foursight into …
clarabakker Dec 3, 2021
dfa8e81
add third output for bed narrower peak region (max signal)
clarabakker Dec 4, 2021
1db9811
(part 2) add third output for bed narrower peaks region (max signal)
clarabakker Dec 4, 2021
c8d7257
output file specification and ebs size increase
clarabakker Jan 4, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions chalicelib/check_setup.json
Original file line number Diff line number Diff line change
Expand Up @@ -1921,6 +1921,18 @@
}
}
},
"cut_and_run_status" : {
"title": "CUT&RUN Pipeline",
"group": "Pipeline checks",
"schedule": {
"hourly_checks": {
"webdev": {
"kwargs": {"primary": true},
"dependencies": []
}
}
}
},
"scale_down_elasticsearch_production": {
"title": "Scale down production ElasticSearch Cluster",
"group": "System checks",
Expand Down
27 changes: 21 additions & 6 deletions chalicelib/checks/helpers/wfr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,18 @@
"run_time": 200,
"accepted_versions": ['v1.2']
},
'cut_and_run_workflow': {
"run_time": 200,
"accepted_versions": ['v1']
},
'cut_and_run_ctl_workflow': {
"run_time": 200,
"accepted_versions": ['v1']
},
'cut_and_run_peaks': {
"run_time": 200,
"accepted_versions": ['v1']
},
'mcoolQC': {
"run_time": 200,
"accepted_versions": ['v1']
Expand Down Expand Up @@ -209,7 +221,7 @@
'DNA SPRITE': [''],
'RNA-DNA SPRITE': [''],
'GAM': [''],
'CUT&RUN': [''],
'CUT&RUN': ['CUT_AND_RUN_v1'],
'TRIP': ['']
}

Expand Down Expand Up @@ -442,7 +454,7 @@ def stepper(library, keep,
problematic_run = keep['problematic_run']
missing_run = keep['missing_run']

# Lets get the repoinse from one of the input files that will be used in this step
# Let's get the response from one of the input files that will be used in this step
# if it is a list take the first item, if not use it as is
# new_step_input_file must be the @id
# also check for qc status
Expand Down Expand Up @@ -607,7 +619,7 @@ def get_wfr_out(emb_file, wfr_name, key=None, all_wfrs=None, versions=None,
if len(same_type_wfrs) >= error_at_failed_runs:
return {'status': "no complete run, too many errors"}

return {'status': "no complete run, errrored"}
return {'status': "no complete run, errored"}
# if other statuses, started running
elif run_duration < run:
return {'status': "running"}
Expand Down Expand Up @@ -704,6 +716,7 @@ def extract_file_info(obj_id, arg_name, additional_parameters, auth, env, rename
my_bucket = raw_bucket
buckets.append(my_bucket)
# check bucket consistency
print("Buckets: ", buckets)
assert len(list(set(buckets))) == 1
template['uuid'] = uuid
if rename:
Expand Down Expand Up @@ -1408,7 +1421,8 @@ def patch_complete_data(patch_data, pipeline_type, auth, move_to_pc=False, pc_ap
'margi': "iMARGI Processing Pipeline - Preliminary Files",
'rnaseq': "ENCODE RNA-Seq Pipeline - Preliminary Files",
'insulation_scores_and_boundaries': "Insulation scores and boundaries calls - Preliminary Files",
'compartments': "Compartments Signals - Preliminary Files"}
'compartments': "Compartments Signals - Preliminary Files",
"cutnrun": "CUT&RUN Pipeline - Preliminary Files"}
"""move files to other processed_files field."""
if not patch_data.get('patch_opf'):
return ['no content in patch_opf, skipping']
Expand Down Expand Up @@ -1543,7 +1557,8 @@ def run_missing_wfr(input_json, input_files_and_params, run_name, auth, env, fs_
def start_missing_run(run_info, auth, env, fs_env):
attr_keys = ['fastq1', 'fastq', 'input_pairs', 'input_bams', 'input_fastqs',
'fastq_R1', 'input_bam', 'rna.fastqs_R1', 'mad_qc.quantfiles', 'mcoolfile',
'chip.ctl_fastqs', 'chip.fastqs', 'chip.tas', 'atac.fastqs', 'atac.tas']
'chip.ctl_fastqs', 'chip.fastqs', 'chip.tas', 'atac.fastqs', 'atac.tas',
'input_fastqs_R1', 'input_fastqs_R2', 'input_bedpe']
run_settings = run_info[1]
inputs = run_info[2]
name_tag = run_info[3]
Expand All @@ -1568,7 +1583,7 @@ def start_missing_run(run_info, auth, env, fs_env):
if not attr_file:
possible_keys = [i for i in inputs.keys() if i != 'additional_file_parameters']
error_message = ('one of these argument names {} which carry the input file -not the references-'
' should be added to att_keys dictionary on foursight cgap_utils.py function start_missing_run').format(possible_keys)
' should be added to att_keys dictionary on foursight wfr_utils.py function start_missing_run').format(possible_keys)
raise ValueError(error_message)
attributions = get_attribution(ff_utils.get_metadata(attr_file, auth))
settings = wfrset_utils.step_settings(run_settings[0], run_settings[1], attributions, run_settings[2])
Expand Down
66 changes: 66 additions & 0 deletions chalicelib/checks/helpers/wfrset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,72 @@ def step_settings(step_name, my_organism, attribution, overwrite=None):
"overwrite_input_extra": False,
"config": {"ebs_size": 10, "instance_type": "c5ad.2xlarge"}
},
{
"app_name": "cut_and_run_workflow",
"workflow_uuid": "c5db38be-f139-4157-9832-398bda2c62d2",
"parameters": {
"nthreads_trim": 4,
"nthreads_aln": 4
},
"config": {'mem': 8, 'cpu': 4, 'ebs_size': 28},
"custom_pf_fields": {
"out_bam": {
"genome_assembly": genome,
"file_type": "read positions",
"description": "Alignment output file from CUT&RUN"
},
"out_bedpe": {
"genome_assembly": genome,
"file_type": "intermediate file",
"description": "Filtered reads, output file from CUT&RUN"
}
}
},
{
"app_name": "cut_and_run_ctl_workflow",
"workflow_uuid": "04895a25-b609-4fc8-b0d5-9dd9e45d9237",
"parameters": {
"nthreads_trim": 4,
"nthreads_aln": 4
},
"config": {'mem': 8, 'cpu': 4, 'ebs_size': 20},
"custom_pf_fields": {
"out_bam": {
"genome_assembly": genome,
"file_type": "read positions",
"description": "Alignment output file from CUT&RUN",
'disable_wfr_inputs': True
},
"out_bedpe": {
"genome_assembly": genome,
"file_type": "intermediate file",
"description": "Filtered reads, output file from CUT&RUN",
'disable_wfr_inputs': True
}
}
},
{
"app_name": "cut_and_run_peaks",
"workflow_uuid": "b43bcc4e-d566-4fbf-a0bb-375a2ad517d8",
"config": {'mem': 32, 'cpu': 8, 'ebs_size': 50},
'custom_pf_fields': {
"out_bedg": {
"genome_assembly": genome,
"file_type": "peaks",
"description": "Peaks output file from CUT&RUN"
},
"out_narrow_bed": {
"genome_assembly": genome,
"file_type": "peaks",
"description": "Location of max signal in peaks output file from CUT&RUN"
},
"out_bw": {
"genome_assembly": genome,
"file_type": "signal fold change",
"description": "Signal track from CUT&RUN"
}
}
},
# temp
{
"app_name": "",
Expand Down
Loading