Skip to content

Commit

Permalink
Fix public release (#375)
Browse files Browse the repository at this point in the history
* Add public data guide template

* Add public data guide

* Don't copy over samples to retract

* Reorder maf file
  • Loading branch information
thomasyu888 authored Jan 12, 2021
1 parent ae48e0d commit 4b23ce1
Show file tree
Hide file tree
Showing 4 changed files with 515 additions and 8 deletions.
14 changes: 8 additions & 6 deletions bin/consortium_to_public.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ def generate_dashboard_html(genie_version, staging=False,
genie_pass: GENIE synapse password
"""
markdown_render_cmd = ['Rscript',
os.path.join(PWD, '../genie/dashboard_markdown_generator.R'),
genie_version,
'--template_path',
os.path.join(PWD, '../genie/dashboardTemplate.Rmd')]
markdown_render_cmd = [
'Rscript', os.path.join(PWD, '../R/dashboard_markdown_generator.R'),
genie_version, '--template_path',
os.path.join(PWD, '../templates/dashboardTemplate.Rmd')
]

if genie_user is not None and genie_pass is not None:
markdown_render_cmd.extend(['--syn_user', genie_user,
Expand All @@ -51,7 +51,9 @@ def generate_data_guide(genie_version, oncotree_version=None,
genie_pass=None):
"""Generates the GENIE data guide"""

template_path = os.path.join(PWD, '../data_guide/data_guide_template.Rnw')
template_path = os.path.join(
PWD, '../templates/public_data_guide_template.Rnw'
)
with open(template_path, 'r') as template_file:
template_str = template_file.read()

Expand Down
5 changes: 3 additions & 2 deletions genie/consortium_to_public.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,8 @@ def consortiumToPublic(syn, processingDate, genie_version,
"data_gene_matrix.txt",
"data_clinical_patient.txt",
"data_guide.pdf",
"release_notes.pdf"]):
"release_notes.pdf",
"samples_to_retract.csv"]):
# data_gene_matrix was processed above because it had to be
# used for generating caselists
continue
Expand Down Expand Up @@ -211,7 +212,7 @@ def consortiumToPublic(syn, processingDate, genie_version,
elif "mutation" in entName:
mutation = syn.get(entId, followLink=True)
mutationDf = pd.read_csv(mutation.path, sep="\t", comment="#")
mutationDf = commonVariantFilter(mutationDf)
# mutationDf = commonVariantFilter(mutationDf)
mutationDf['FILTER'] = "PASS"
mutationDf = mutationDf[
mutationDf['Tumor_Sample_Barcode'].isin(publicReleaseSamples)]
Expand Down
7 changes: 7 additions & 0 deletions genie/database_to_staging.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,11 @@ def store_maf_files(syn,
with open(MUTATIONS_CENTER_PATH % center, 'w'):
pass
used_entities = []
# Must get the headers (because can't assume headers are the same order)
maf_ent = syn.get(centerMafSynIdsDf.id[0])
headerdf = pd.read_csv(maf_ent.path, sep="\t", comment="#", nrows=0)
column_order = headerdf.columns

for _, mafSynId in enumerate(centerMafSynIdsDf.id):
maf_ent = syn.get(mafSynId)
logger.info(maf_ent.path)
Expand All @@ -682,6 +687,8 @@ def store_maf_files(syn,
chunksize=100000)

for mafchunk in mafchunks:
# Reorder column headers
mafchunk = mafchunk[column_order]
# Get center for center staging maf
# Configure maf
configured_mafdf = configure_maf(
Expand Down
Loading

0 comments on commit 4b23ce1

Please sign in to comment.