From 6d07cbae6626ff35c356172189ef73d09a53f40a Mon Sep 17 00:00:00 2001 From: Jaclyn Beck Date: Tue, 19 Nov 2024 16:14:03 -0800 Subject: [PATCH 1/4] Added notebook to fetch Pharos class information for genes in Agora --- .../AG-1563_Preprocess_Pharos_Class.ipynb | 564 ++++++++++++++++++ 1 file changed, 564 insertions(+) create mode 100644 data_analysis/agora/notebooks/AG-1563_Preprocess_Pharos_Class.ipynb diff --git a/data_analysis/agora/notebooks/AG-1563_Preprocess_Pharos_Class.ipynb b/data_analysis/agora/notebooks/AG-1563_Preprocess_Pharos_Class.ipynb new file mode 100644 index 00000000..74544eb9 --- /dev/null +++ b/data_analysis/agora/notebooks/AG-1563_Preprocess_Pharos_Class.ipynb @@ -0,0 +1,564 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create a Pharos class input file\n", + "\n", + "This notebook creates a file that contains gene symbols, Ensembl IDs, UniProt IDs, and Pharos class for each gene. The steps are:\n", + "1. Query Pharos for all targets in their database\n", + "2. Parse the JSON response as a data frame\n", + "3. Merge this data with Agora's existing UniProt -> Ensembl ID map file\n", + "\n", + "Step 3 simultaneously maps the IDs given by Pharos (UniProt IDs and gene symbols only) to the corresponding Ensembl IDs, and narrows the data down to only genes that exist in Agora's data files." + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import pandas as pd\n", + "import synapseclient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query Pharos for all targets in the database\n", + "\n", + "Pharos uses GraphQL for its API, so the query is formatted as JSON that matches their \"DownloadResult\" schema. The lone \"data\" value inside signifies to return the data only, not any status or metadata about the request." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = \"https://pharos-api.ncats.io/graphql\"\n", + "\n", + "query_body = \"\"\"\n", + "{\n", + " download(model: \"Targets\", fields: [\"UniProt\", \"Symbol\", \"Target Development Level\"], sqlOnly: false) {\n", + " data\n", + " }\n", + "}\n", + "\"\"\"\n", + "\n", + "response = requests.post(url=url, json={\"query\": query_body})\n", + "\n", + "if not response.ok:\n", + " print(\"Error querying Pharos\")\n", + " response.raise_for_status()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Turn the response into a DataFrame\n", + "\n", + "The response is a JSON string with the following structure:\n", + "\n", + "```\n", + "data {\n", + " download {\n", + " data {\n", + " [list of dictionary items with fields \"id\", \"UniProt\", \"Symbol\", and \"Target Development Level\"]]\n", + " }\n", + " }\n", + "}\n", + "```\n", + "\n", + "Calling `json_normalize` on the inner \"data\" item will turn that list into a data frame. " + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uniprot_idhgnc_symbolpharos_class
0P32929CTHTchem
1A4D0Y5C7orf77Tdark
2Q49A92C8orf34Tbio
3Q9UFW8CGGBP1Tbio
4Q96K31C8orf76Tdark
............
20407P49286MTNR1BTclin
20408P84157MXRA7Tdark
20409Q9NP71MLXIPLTbio
20410Q9BWT6MND1Tbio
20411Q8IVL6P3H3Tbio
\n", + "

20412 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " uniprot_id hgnc_symbol pharos_class\n", + "0 P32929 CTH Tchem\n", + "1 A4D0Y5 C7orf77 Tdark\n", + "2 Q49A92 C8orf34 Tbio\n", + "3 Q9UFW8 CGGBP1 Tbio\n", + "4 Q96K31 C8orf76 Tdark\n", + "... ... ... ...\n", + "20407 P49286 MTNR1B Tclin\n", + "20408 P84157 MXRA7 Tdark\n", + "20409 Q9NP71 MLXIPL Tbio\n", + "20410 Q9BWT6 MND1 Tbio\n", + "20411 Q8IVL6 P3H3 Tbio\n", + "\n", + "[20412 rows x 3 columns]" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res_str = json.loads(response.content)\n", + "pharos_df = pd.json_normalize(res_str[\"data\"][\"download\"][\"data\"])\n", + "\n", + "pharos_df = pharos_df.rename(\n", + " columns={\n", + " \"UniProt\": \"uniprot_id\",\n", + " \"Symbol\": \"hgnc_symbol\",\n", + " \"Target Development Level\": \"pharos_class\",\n", + " }\n", + ").drop(columns=\"id\")\n", + "\n", + "pharos_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Map UniProt IDs to Ensembl IDs\n", + "\n", + "Uses the UniProt -> Ensembl ID Agora source file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "UPGRADE AVAILABLE\n", + "\n", + "A more recent version of the Synapse Client (4.6.0) is available. Your version (4.0.0) can be upgraded by typing:\n", + " pip install --upgrade synapseclient\n", + "\n", + "Python Synapse Client version 4.6.0 release notes\n", + "\n", + "https://python-docs.synapse.org/news/\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
UniProtKB_accessionRESOURCE_IDENTIFIER
0A0A075B6I4ENSG00000211642
1Q13641ENSG00000146242
2Q6PCB7ENSG00000130304
3Q7Z591ENSG00000106948
4Q5SZD1ENSG00000197261
.........
18456Q6ZUI0ENSG00000188001
18457O43747ENSG00000166747
18458Q9UBU2ENSG00000155011
18459Q86VY9ENSG00000164484
18460P02655ENSG00000234906
\n", + "

18461 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " UniProtKB_accession RESOURCE_IDENTIFIER\n", + "0 A0A075B6I4 ENSG00000211642\n", + "1 Q13641 ENSG00000146242\n", + "2 Q6PCB7 ENSG00000130304\n", + "3 Q7Z591 ENSG00000106948\n", + "4 Q5SZD1 ENSG00000197261\n", + "... ... ...\n", + "18456 Q6ZUI0 ENSG00000188001\n", + "18457 O43747 ENSG00000166747\n", + "18458 Q9UBU2 ENSG00000155011\n", + "18459 Q86VY9 ENSG00000164484\n", + "18460 P02655 ENSG00000234906\n", + "\n", + "[18461 rows x 2 columns]" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Temporarily hard-coded until this file is in the config.yaml file\n", + "uniprot_syn_id = \"syn54113663\"\n", + "\n", + "syn = synapseclient.Synapse()\n", + "syn.login(silent=True)\n", + "\n", + "uniprot_path = syn.get(uniprot_syn_id)\n", + "uniprot_df = pd.read_table(uniprot_path.path, sep=\"\\t\")\n", + "\n", + "uniprot_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ensembl_gene_iduniprot_idhgnc_symbolpharos_class
8825ENSG00000000003O43657TSPAN6Tbio
4568ENSG00000000005Q9H2S6TNMDTbio
194ENSG00000000419O60762DPM1Tbio
3186ENSG00000000457Q8IZE3SCYL3Tbio
11997ENSG00000000460Q9NSG2C1orf112Tbio
...............
7879ENSG00000288705P35504UGT1A5Tbio
1485ENSG00000288722P23610F8A1Tbio
1682ENSG00000288784Q8IX94CTAGE4Tdark
2959ENSG00000289721W6CW81PYDC5Tbio
7278ENSG00000291237P04179SOD2Tbio
\n", + "

18361 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ensembl_gene_id uniprot_id hgnc_symbol pharos_class\n", + "8825 ENSG00000000003 O43657 TSPAN6 Tbio\n", + "4568 ENSG00000000005 Q9H2S6 TNMD Tbio\n", + "194 ENSG00000000419 O60762 DPM1 Tbio\n", + "3186 ENSG00000000457 Q8IZE3 SCYL3 Tbio\n", + "11997 ENSG00000000460 Q9NSG2 C1orf112 Tbio\n", + "... ... ... ... ...\n", + "7879 ENSG00000288705 P35504 UGT1A5 Tbio\n", + "1485 ENSG00000288722 P23610 F8A1 Tbio\n", + "1682 ENSG00000288784 Q8IX94 CTAGE4 Tdark\n", + "2959 ENSG00000289721 W6CW81 PYDC5 Tbio\n", + "7278 ENSG00000291237 P04179 SOD2 Tbio\n", + "\n", + "[18361 rows x 4 columns]" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df = pd.merge(\n", + " left=pharos_df,\n", + " right=uniprot_df,\n", + " how=\"inner\",\n", + " left_on=\"uniprot_id\",\n", + " right_on=\"UniProtKB_accession\",\n", + ").rename(columns={\"RESOURCE_IDENTIFIER\": \"ensembl_gene_id\"})\n", + "\n", + "final_df = final_df[\n", + " [\"ensembl_gene_id\", \"uniprot_id\", \"hgnc_symbol\", \"pharos_class\"]\n", + "].sort_values(by=\"ensembl_gene_id\")\n", + "\n", + "final_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save to a file" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [], + "source": [ + "final_df.to_csv(\"../output/pharos_classes.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "agora-data-tools-ywFp1Gf9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 494405a67fb9317cadbc40bb3158b89e7c379355 Mon Sep 17 00:00:00 2001 From: Jaclyn Beck Date: Tue, 19 Nov 2024 16:35:36 -0800 Subject: [PATCH 2/4] Moved the Pharos notebook to the correct location in the folder structure --- .../AG-1563_Preprocess_Pharos_Class.ipynb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) rename data_analysis/agora/notebooks/{ => preprocessing}/AG-1563_Preprocess_Pharos_Class.ipynb (98%) diff --git a/data_analysis/agora/notebooks/AG-1563_Preprocess_Pharos_Class.ipynb b/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb similarity index 98% rename from data_analysis/agora/notebooks/AG-1563_Preprocess_Pharos_Class.ipynb rename to data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb index 74544eb9..fb8acbfa 100644 --- a/data_analysis/agora/notebooks/AG-1563_Preprocess_Pharos_Class.ipynb +++ b/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -199,7 +199,7 @@ "[20412 rows x 3 columns]" ] }, - "execution_count": 128, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -352,7 +352,7 @@ "[18461 rows x 2 columns]" ] }, - "execution_count": 127, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -372,7 +372,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -502,7 +502,7 @@ "[18361 rows x 4 columns]" ] }, - "execution_count": 135, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -532,7 +532,7 @@ }, { "cell_type": "code", - "execution_count": 137, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ From 8e2325b45d1b34aed29874bd9d43a2c025d40ef5 Mon Sep 17 00:00:00 2001 From: Jaclyn Beck Date: Tue, 19 Nov 2024 16:44:56 -0800 Subject: [PATCH 3/4] Fixed output file path and addressed SonarCloud issue --- .../preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb b/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb index fb8acbfa..f9015ad3 100644 --- a/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb +++ b/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb @@ -514,6 +514,7 @@ " how=\"inner\",\n", " left_on=\"uniprot_id\",\n", " right_on=\"UniProtKB_accession\",\n", + " validate=\"one_to_many\",\n", ").rename(columns={\"RESOURCE_IDENTIFIER\": \"ensembl_gene_id\"})\n", "\n", "final_df = final_df[\n", @@ -536,7 +537,7 @@ "metadata": {}, "outputs": [], "source": [ - "final_df.to_csv(\"../output/pharos_classes.csv\", index=False)" + "final_df.to_csv(\"../../output/pharos_classes.csv\", index=False)" ] } ], From 9c2044de77631d04bb302071ead2f09e37e031c9 Mon Sep 17 00:00:00 2001 From: Jaclyn Beck Date: Fri, 22 Nov 2024 13:13:33 -0800 Subject: [PATCH 4/4] Cleared outputs from notebook --- .../AG-1563_Preprocess_Pharos_Class.ipynb | 394 +----------------- 1 file changed, 9 insertions(+), 385 deletions(-) diff --git a/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb b/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb index f9015ad3..0fbf48d5 100644 --- a/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb +++ b/data_analysis/agora/notebooks/preprocessing/AG-1563_Preprocess_Pharos_Class.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -81,129 +81,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
uniprot_idhgnc_symbolpharos_class
0P32929CTHTchem
1A4D0Y5C7orf77Tdark
2Q49A92C8orf34Tbio
3Q9UFW8CGGBP1Tbio
4Q96K31C8orf76Tdark
............
20407P49286MTNR1BTclin
20408P84157MXRA7Tdark
20409Q9NP71MLXIPLTbio
20410Q9BWT6MND1Tbio
20411Q8IVL6P3H3Tbio
\n", - "

20412 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " uniprot_id hgnc_symbol pharos_class\n", - "0 P32929 CTH Tchem\n", - "1 A4D0Y5 C7orf77 Tdark\n", - "2 Q49A92 C8orf34 Tbio\n", - "3 Q9UFW8 CGGBP1 Tbio\n", - "4 Q96K31 C8orf76 Tdark\n", - "... ... ... ...\n", - "20407 P49286 MTNR1B Tclin\n", - "20408 P84157 MXRA7 Tdark\n", - "20409 Q9NP71 MLXIPL Tbio\n", - "20410 Q9BWT6 MND1 Tbio\n", - "20411 Q8IVL6 P3H3 Tbio\n", - "\n", - "[20412 rows x 3 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "res_str = json.loads(response.content)\n", "pharos_df = pd.json_normalize(res_str[\"data\"][\"download\"][\"data\"])\n", @@ -230,133 +110,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "UPGRADE AVAILABLE\n", - "\n", - "A more recent version of the Synapse Client (4.6.0) is available. Your version (4.0.0) can be upgraded by typing:\n", - " pip install --upgrade synapseclient\n", - "\n", - "Python Synapse Client version 4.6.0 release notes\n", - "\n", - "https://python-docs.synapse.org/news/\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
UniProtKB_accessionRESOURCE_IDENTIFIER
0A0A075B6I4ENSG00000211642
1Q13641ENSG00000146242
2Q6PCB7ENSG00000130304
3Q7Z591ENSG00000106948
4Q5SZD1ENSG00000197261
.........
18456Q6ZUI0ENSG00000188001
18457O43747ENSG00000166747
18458Q9UBU2ENSG00000155011
18459Q86VY9ENSG00000164484
18460P02655ENSG00000234906
\n", - "

18461 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " UniProtKB_accession RESOURCE_IDENTIFIER\n", - "0 A0A075B6I4 ENSG00000211642\n", - "1 Q13641 ENSG00000146242\n", - "2 Q6PCB7 ENSG00000130304\n", - "3 Q7Z591 ENSG00000106948\n", - "4 Q5SZD1 ENSG00000197261\n", - "... ... ...\n", - "18456 Q6ZUI0 ENSG00000188001\n", - "18457 O43747 ENSG00000166747\n", - "18458 Q9UBU2 ENSG00000155011\n", - "18459 Q86VY9 ENSG00000164484\n", - "18460 P02655 ENSG00000234906\n", - "\n", - "[18461 rows x 2 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Temporarily hard-coded until this file is in the config.yaml file\n", "uniprot_syn_id = \"syn54113663\"\n", @@ -372,141 +128,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ensembl_gene_iduniprot_idhgnc_symbolpharos_class
8825ENSG00000000003O43657TSPAN6Tbio
4568ENSG00000000005Q9H2S6TNMDTbio
194ENSG00000000419O60762DPM1Tbio
3186ENSG00000000457Q8IZE3SCYL3Tbio
11997ENSG00000000460Q9NSG2C1orf112Tbio
...............
7879ENSG00000288705P35504UGT1A5Tbio
1485ENSG00000288722P23610F8A1Tbio
1682ENSG00000288784Q8IX94CTAGE4Tdark
2959ENSG00000289721W6CW81PYDC5Tbio
7278ENSG00000291237P04179SOD2Tbio
\n", - "

18361 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ensembl_gene_id uniprot_id hgnc_symbol pharos_class\n", - "8825 ENSG00000000003 O43657 TSPAN6 Tbio\n", - "4568 ENSG00000000005 Q9H2S6 TNMD Tbio\n", - "194 ENSG00000000419 O60762 DPM1 Tbio\n", - "3186 ENSG00000000457 Q8IZE3 SCYL3 Tbio\n", - "11997 ENSG00000000460 Q9NSG2 C1orf112 Tbio\n", - "... ... ... ... ...\n", - "7879 ENSG00000288705 P35504 UGT1A5 Tbio\n", - "1485 ENSG00000288722 P23610 F8A1 Tbio\n", - "1682 ENSG00000288784 Q8IX94 CTAGE4 Tdark\n", - "2959 ENSG00000289721 W6CW81 PYDC5 Tbio\n", - "7278 ENSG00000291237 P04179 SOD2 Tbio\n", - "\n", - "[18361 rows x 4 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "final_df = pd.merge(\n", " left=pharos_df,\n", @@ -533,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [