Skip to content

Commit

Permalink
Merge pull request #107 from 4dn-dcic/add_opf2eset_nb
Browse files Browse the repository at this point in the history
Add opf2eset nb
  • Loading branch information
aschroed authored Sep 6, 2023
2 parents e6a22e4 + 20f3191 commit 467f628
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 1 deletion.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ dcicwrangling
Change Log
----------

2.4.0
=====

`PR:107 add useful notebook #15 to add opf collections to esets <https://github.com/4dn-dcic/dcicwrangling/pull/107>_`

* added a new useful notebook that allows you to use a lab submitted processed file sheet to link replicate sets to other processed files collections for that set

2.3.0
=====
Expand Down
127 changes: 127 additions & 0 deletions notebooks/useful_notebooks/15_add_opf_collections_to_sets.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Uses information from a submitter PF spreadsheet to add files to the appropriate place in indicated linked items\n",
"### Currently for ExperimentSets opfs but should be extend to Experiments, Pubs and possibly pages\n",
"\n",
"#### Setup\n",
"\n",
"- Provide a title and description to use for the opf section.\n",
"- indicate path to the workbook"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from dcicutils import ff_utils\n",
"from functions.notebook_functions import *\n",
"from functions.wfr import *\n",
"\n",
"# get key from keypairs.json\n",
"my_key = get_key('andyprod')\n",
"\n",
"# set title here\n",
"opf_type = 'supplementary'\n",
"opf_title = \"Analysis results provided by the data submitters - the Diao Lab\"\n",
"opf_desc = \"The results were generated by the Diao lab using the code available at https://github.com/jianhong/hicar/releases/tag/2.0.0rc\"\n",
"\n",
"# location of excel processed file sheet\n",
"xcel_file = '/Users/andrew/Documents/work/4DN_Metadata/Diao_Yarui_lab/HiCAR_RNAseq_myoblast_diff/230830_hicar_processed_results_ajs_upd.xlsx'\n",
"xcel, sheets = digest_xlsx(xcel_file)\n",
"xcel_data = reader(xcel)\n",
"\n",
"fieldnames = next(xcel_data)\n",
"\n",
"# create a dict with dataset 2 processd files \n",
"dset2opfs = {}\n",
"for row in xcel_data:\n",
" if row[0].startswith('#'):\n",
" continue\n",
" data = dict(zip(fieldnames, row))\n",
" data = {k: v for k, v in data.items() if v}\n",
" file_alias = data.get('aliases')\n",
" dataset = data.get('# linked_datasets')\n",
" dset2opfs.setdefault(dataset, []).append(file_alias)\n",
" \n",
"print(dset2opfs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# go through the dictionary and create a patch for the dataset \n",
"# need to check and maintain other opfs\n",
"\n",
"# if True do the action, if false just report\n",
"action = True\n",
"\n",
"for ds, opfs in dset2opfs.items():\n",
" opf_obj = {'type': opf_type, 'title': opf_title, 'files': opfs}\n",
" if opf_desc:\n",
" opf_obj['description'] = opf_desc\n",
" \n",
" dset = ff_utils.get_metadata(ds, my_key, add_on='frame=raw')\n",
" curr_opfs = dset.get('other_processed_files', [])\n",
" if curr_opfs:\n",
" curr_titles = [i.get('title') for i in curr_opfs]\n",
" if opf_title in curr_titles:\n",
" print('ERROR: {} has been used as a title already for {} - NO GO!'.format(ds, opf_title))\n",
" continue\n",
" # in this specific case we want to insert the new one into\n",
" # the first position of the list\n",
" new_opf_grps = [opf_obj] + curr_opfs\n",
" \n",
" patch_data = {'other_processed_files': new_opf_grps}\n",
" if action:\n",
" res = ff_utils.patch_metadata(patch_data, dset['uuid'], key = my_key)\n",
" print(res)\n",
" else:\n",
" print(\"\\n\")\n",
" print(\"{}\\t{}\".format(dset.get('uuid'), dset.get('accession')))\n",
" print(opf_obj)\n",
" print(\"\\n\\n\")\n",
" print(patch_data)\n",
" \n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicwrangling"
version = "2.3.0"
version = "2.4.0"
description = "Scripts and Jupyter notebooks for 4DN wrangling"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 467f628

Please sign in to comment.