-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #107 from 4dn-dcic/add_opf2eset_nb
Add opf2eset nb
- Loading branch information
Showing
3 changed files
with
134 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
127 changes: 127 additions & 0 deletions
127
notebooks/useful_notebooks/15_add_opf_collections_to_sets.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Uses information from a submitter PF spreadsheet to add files to the appropriate place in indicated linked items\n", | ||
"### Currently for ExperimentSets opfs but should be extend to Experiments, Pubs and possibly pages\n", | ||
"\n", | ||
"#### Setup\n", | ||
"\n", | ||
"- Provide a title and description to use for the opf section.\n", | ||
"- indicate path to the workbook" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from dcicutils import ff_utils\n", | ||
"from functions.notebook_functions import *\n", | ||
"from functions.wfr import *\n", | ||
"\n", | ||
"# get key from keypairs.json\n", | ||
"my_key = get_key('andyprod')\n", | ||
"\n", | ||
"# set title here\n", | ||
"opf_type = 'supplementary'\n", | ||
"opf_title = \"Analysis results provided by the data submitters - the Diao Lab\"\n", | ||
"opf_desc = \"The results were generated by the Diao lab using the code available at https://github.com/jianhong/hicar/releases/tag/2.0.0rc\"\n", | ||
"\n", | ||
"# location of excel processed file sheet\n", | ||
"xcel_file = '/Users/andrew/Documents/work/4DN_Metadata/Diao_Yarui_lab/HiCAR_RNAseq_myoblast_diff/230830_hicar_processed_results_ajs_upd.xlsx'\n", | ||
"xcel, sheets = digest_xlsx(xcel_file)\n", | ||
"xcel_data = reader(xcel)\n", | ||
"\n", | ||
"fieldnames = next(xcel_data)\n", | ||
"\n", | ||
"# create a dict with dataset 2 processd files \n", | ||
"dset2opfs = {}\n", | ||
"for row in xcel_data:\n", | ||
" if row[0].startswith('#'):\n", | ||
" continue\n", | ||
" data = dict(zip(fieldnames, row))\n", | ||
" data = {k: v for k, v in data.items() if v}\n", | ||
" file_alias = data.get('aliases')\n", | ||
" dataset = data.get('# linked_datasets')\n", | ||
" dset2opfs.setdefault(dataset, []).append(file_alias)\n", | ||
" \n", | ||
"print(dset2opfs)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# go through the dictionary and create a patch for the dataset \n", | ||
"# need to check and maintain other opfs\n", | ||
"\n", | ||
"# if True do the action, if false just report\n", | ||
"action = True\n", | ||
"\n", | ||
"for ds, opfs in dset2opfs.items():\n", | ||
" opf_obj = {'type': opf_type, 'title': opf_title, 'files': opfs}\n", | ||
" if opf_desc:\n", | ||
" opf_obj['description'] = opf_desc\n", | ||
" \n", | ||
" dset = ff_utils.get_metadata(ds, my_key, add_on='frame=raw')\n", | ||
" curr_opfs = dset.get('other_processed_files', [])\n", | ||
" if curr_opfs:\n", | ||
" curr_titles = [i.get('title') for i in curr_opfs]\n", | ||
" if opf_title in curr_titles:\n", | ||
" print('ERROR: {} has been used as a title already for {} - NO GO!'.format(ds, opf_title))\n", | ||
" continue\n", | ||
" # in this specific case we want to insert the new one into\n", | ||
" # the first position of the list\n", | ||
" new_opf_grps = [opf_obj] + curr_opfs\n", | ||
" \n", | ||
" patch_data = {'other_processed_files': new_opf_grps}\n", | ||
" if action:\n", | ||
" res = ff_utils.patch_metadata(patch_data, dset['uuid'], key = my_key)\n", | ||
" print(res)\n", | ||
" else:\n", | ||
" print(\"\\n\")\n", | ||
" print(\"{}\\t{}\".format(dset.get('uuid'), dset.get('accession')))\n", | ||
" print(opf_obj)\n", | ||
" print(\"\\n\\n\")\n", | ||
" print(patch_data)\n", | ||
" \n", | ||
" \n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"anaconda-cloud": {}, | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "dcicwrangling" | ||
version = "2.3.0" | ||
version = "2.4.0" | ||
description = "Scripts and Jupyter notebooks for 4DN wrangling" | ||
authors = ["4DN-DCIC Team <[email protected]>"] | ||
license = "MIT" | ||
|