From f57b7698e9ad83871401ac2c5a8dffbc583220a2 Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Thu, 12 Nov 2020 20:06:25 -0800 Subject: [PATCH] utils_extra check_value url quote on n3 fail if we receive a string that we think should be a url then check to see if rdflib can safely serialize it, on failure, url quote it this is not efficient but it is at least somewhat safe note that uri dealiasing has to be done to be able to do a full comparison between uris to determine if they even have a chance of being the same uri, so this is just another layer in that where the escape sequences have to be undone as one step along the way to finding the canonical represenation of the uri --- pyontutils/utils_extra.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pyontutils/utils_extra.py b/pyontutils/utils_extra.py index e6e01358..92c336bf 100644 --- a/pyontutils/utils_extra.py +++ b/pyontutils/utils_extra.py @@ -2,6 +2,7 @@ Reused utilties that depend on packages outside the python standard library. """ import hashlib +from urllib.parse import quote as url_quote import rdflib @@ -15,7 +16,18 @@ def check_value(v): if isinstance(v, rdflib.Literal) or isinstance(v, rdflib.URIRef): return v elif isinstance(v, str) and v.startswith('http'): - return rdflib.URIRef(v) + # FIXME this is dumb and dangerous but whatever + uri = rdflib.URIRef(v) + try: + uri.n3() + except: + # dois allow ... non-url and non-identifier chars + # that must be escaped or we have to use strings + # FIXME this WILL induce an aliasing problem if + # another process quotes using a different rule + uri = rdflib.URIRef(url_quote(v, ':/;()')) + + return uri else: return rdflib.Literal(v)