From 5b9ba37e084933255aee01509b2ba6a567320b24 Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Fri, 5 Apr 2024 17:55:22 +0300 Subject: [PATCH] Update release to merged axiom annotations --- src/ontology/vbo.Makefile | 9 ++++ src/scripts/dadis_client/vbo_sandbox.ipynb | 59 ++++++++++++++-------- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/src/ontology/vbo.Makefile b/src/ontology/vbo.Makefile index d07ea2f..2c87ee1 100644 --- a/src/ontology/vbo.Makefile +++ b/src/ontology/vbo.Makefile @@ -93,3 +93,12 @@ dadis-local-sync: $(COMPONENTSDIR)/dadisbreedcountry.owl .PHONY: dadistransbound dadis-transboundary-sync: $(COMPONENTSDIR)/dadistransbound.owl + +########################################### +##### Release preprocessing ############### +########################################### + +$(EDIT_PREPROCESSED): $(SRC) + $(ROBOT) merge --input $< --output $@.merged.owl + owltools --use-catalog $@.merged.owl --merge-axiom-annotations -o -f owl $@.normalised.owl + $(ROBOT) convert --input $@.normalised.owl --format ofn --output $@ diff --git a/src/scripts/dadis_client/vbo_sandbox.ipynb b/src/scripts/dadis_client/vbo_sandbox.ipynb index f901406..c12e417 100644 --- a/src/scripts/dadis_client/vbo_sandbox.ipynb +++ b/src/scripts/dadis_client/vbo_sandbox.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -19,24 +19,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "\"['obsolesence_reason'] not in index\"", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[31], line 42\u001b[0m\n\u001b[1;32m 40\u001b[0m out \u001b[38;5;241m=\u001b[39m merge(df, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVBO:0200238\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVBO:0200239\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 41\u001b[0m out\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdogbreeds_merged.tsv\u001b[39m\u001b[38;5;124m\"\u001b[39m,sep\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\t\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 42\u001b[0m \u001b[43mout\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mobsolesence_reason\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mobsolete\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mterm_label\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvbo_id\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreplacement_term\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreplacement_label\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGH_issue\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mobsoletion_type\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcontributors\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msynonym_label_from_merged_term\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msource_for_merged_term\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msynonym_type_most_common_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\n", - "File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/grainyhead/lib/python3.11/site-packages/pandas/core/frame.py:4096\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 4094\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[1;32m 4095\u001b[0m key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 4096\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumns\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 4098\u001b[0m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[1;32m 4099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/grainyhead/lib/python3.11/site-packages/pandas/core/indexes/base.py:6200\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 6197\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6198\u001b[0m keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 6200\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6202\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m 6203\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[1;32m 6204\u001b[0m \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/grainyhead/lib/python3.11/site-packages/pandas/core/indexes/base.py:6252\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 6249\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6251\u001b[0m not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[0;32m-> 6252\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mKeyError\u001b[0m: \"['obsolesence_reason'] not in index\"" - ] - } - ], + "outputs": [], "source": [ "def merge(df, term_to_obsolete, term_to_merge_into, issue=1):\n", " df_term_to_merge_into = df[df[\"vbo_id\"]==term_to_merge_into].copy()\n", @@ -70,9 +55,41 @@ "\n", " return pd.concat([df_term_to_obsolete, df_term_to_obsolete2, df_term_to_merge_into])\n", "\n", - "df=pd.read_csv(template_url,sep=\"\\t\")\n", - "out = merge(df, term_to_obsolete, term_to_merge_into,issue=issue)\n", - "out.to_csv(outfile,sep=\"\\t\",index=False)\n" + "\n", + "def obsolete_and_write(template_url, term_to_obsolete, term_to_merge_into, issue, outfile):\n", + " df=pd.read_csv(template_url,sep=\"\\t\")\n", + " out = merge(df, term_to_obsolete, term_to_merge_into,issue=issue)\n", + " out.to_csv(outfile,sep=\"\\t\",index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obsolete_and_write(template_url=\"dogbreeds_merged.tsv\",\n", + " template_url=\"https://docs.google.com/spreadsheets/d/e/2PACX-1vSuwLXikgq08frK7d8yFSdWTS8P1erx5bS_QiLdHhfKV4ulJlRrqkVaVhC7b3O6Z8ixrvJgoCBy8YLq/pub?gid=1655315858&single=true&output=tsv\",\n", + " term_to_obsolete = \"VBO:0200238\",\n", + " term_to_merge_into = \"VBO:0200239\",\n", + " issue = 123,\n", + " outfile = \"dogbreeds_merged.tsv\"\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obsolete_and_write(template_url=\"dogbreeds_merged.tsv\",\n", + " template_url=\"https://docs.google.com/spreadsheets/d/e/2PACX-1vSuwLXikgq08frK7d8yFSdWTS8P1erx5bS_QiLdHhfKV4ulJlRrqkVaVhC7b3O6Z8ixrvJgoCBy8YLq/pub?gid=1655315858&single=true&output=tsv\",\n", + " term_to_obsolete = \"VBO:0200238\",\n", + " term_to_merge_into = \"VBO:0200239\",\n", + " issue = 123,\n", + " outfile = \"dogbreeds_merged.tsv\"\n", + ")" ] } ],