-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
100 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"###\n", | ||
"\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"outfile = \"dogbreeds_merged.tsv\"\n", | ||
"template_url=\"https://docs.google.com/spreadsheets/d/e/2PACX-1vSuwLXikgq08frK7d8yFSdWTS8P1erx5bS_QiLdHhfKV4ulJlRrqkVaVhC7b3O6Z8ixrvJgoCBy8YLq/pub?gid=1655315858&single=true&output=tsv\"\n", | ||
"term_to_obsolete = \"VBO:0200238\"\n", | ||
"term_to_merge_into = \"VBO:0200239\"\n", | ||
"issue = 123" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 31, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "KeyError", | ||
"evalue": "\"['obsolesence_reason'] not in index\"", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", | ||
"Cell \u001b[0;32mIn[31], line 42\u001b[0m\n\u001b[1;32m 40\u001b[0m out \u001b[38;5;241m=\u001b[39m merge(df, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVBO:0200238\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVBO:0200239\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 41\u001b[0m out\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdogbreeds_merged.tsv\u001b[39m\u001b[38;5;124m\"\u001b[39m,sep\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 42\u001b[0m \u001b[43mout\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mobsolesence_reason\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mobsolete\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mterm_label\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvbo_id\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreplacement_term\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreplacement_label\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGH_issue\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mobsoletion_type\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcontributors\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msynonym_label_from_merged_term\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msource_for_merged_term\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msynonym_type_most_common_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\n", | ||
"File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/grainyhead/lib/python3.11/site-packages/pandas/core/frame.py:4096\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 4094\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[1;32m 4095\u001b[0m key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 4096\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumns\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 4098\u001b[0m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[1;32m 4099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n", | ||
"File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/grainyhead/lib/python3.11/site-packages/pandas/core/indexes/base.py:6200\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 6197\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6198\u001b[0m keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 6200\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6202\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m 6203\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[1;32m 6204\u001b[0m \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n", | ||
"File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/grainyhead/lib/python3.11/site-packages/pandas/core/indexes/base.py:6252\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 6249\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6251\u001b[0m not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[0;32m-> 6252\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", | ||
"\u001b[0;31mKeyError\u001b[0m: \"['obsolesence_reason'] not in index\"" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"def merge(df, term_to_obsolete, term_to_merge_into, issue=1):\n", | ||
" df_term_to_merge_into = df[df[\"vbo_id\"]==term_to_merge_into].copy()\n", | ||
" df_term_to_obsolete = df[df[\"vbo_id\"]==term_to_obsolete].copy()\n", | ||
" df_term_to_obsolete2 = df_term_to_obsolete.copy()\n", | ||
" \n", | ||
" term_to_merge_into_label = df_term_to_merge_into[\"term_label\"].values[0]\n", | ||
" term_to_obsolete_label = df_term_to_obsolete[\"term_label\"].values[0]\n", | ||
" \n", | ||
" list_of_columns_to_keep = [\"term_label\", \"vbo_id\", \"contributor\"]\n", | ||
" for column in df_term_to_obsolete.columns:\n", | ||
" if column not in list_of_columns_to_keep:\n", | ||
" df_term_to_obsolete[column] = ''\n", | ||
" \n", | ||
" df_term_to_obsolete['term_label'] = \"obsolete \" + df_term_to_obsolete['term_label']\n", | ||
" df_term_to_obsolete['obsoletion_type'] = \"owl:Class\"\n", | ||
" df_term_to_obsolete['obsolete'] = \"true\"\n", | ||
" df_term_to_obsolete['contributors'] = \"\"\n", | ||
" df_term_to_obsolete['replacement_term'] = term_to_merge_into\n", | ||
" df_term_to_obsolete['replacement_label'] = term_to_merge_into_label\n", | ||
" df_term_to_obsolete['obsolescence_reason'] = \"terms merged\"\n", | ||
" df_term_to_obsolete['GH_issue'] = \"https://github.com/monarch-initiative/vertebrate-breed-ontology/issues/{issue}\".format(issue=issue)\n", | ||
" \n", | ||
" # Update duplicated\n", | ||
" df_term_to_obsolete2['vbo_id'] = term_to_merge_into\n", | ||
" df_term_to_obsolete2['term_label'] = ''\n", | ||
" df_term_to_obsolete2['synonym_label_from_merged_term'] = term_to_obsolete_label\n", | ||
" df_term_to_obsolete2['source_for_merged_term'] = term_to_obsolete\n", | ||
" df_term_to_obsolete2['synonym_type_most_common_name'] = ''\n", | ||
" df_term_to_obsolete2['GH_issue'] = \"https://github.com/monarch-initiative/vertebrate-breed-ontology/issues/{issue}\".format(issue=issue) \n", | ||
"\n", | ||
" return pd.concat([df_term_to_obsolete, df_term_to_obsolete2, df_term_to_merge_into])\n", | ||
"\n", | ||
"df=pd.read_csv(template_url,sep=\"\\t\")\n", | ||
"out = merge(df, term_to_obsolete, term_to_merge_into,issue=issue)\n", | ||
"out.to_csv(outfile,sep=\"\\t\",index=False)\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |