-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from HeardLibrary/v1-4
v1.4 release
- Loading branch information
Showing
5 changed files
with
317 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 43, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import requests # best library to manage HTTP transactions\n", | ||
"from bs4 import BeautifulSoup # web-scraping library\n", | ||
"import json\n", | ||
"from time import sleep\n", | ||
"import csv\n", | ||
"import math\n", | ||
"from fuzzywuzzy import fuzz # fuzzy logic matching\n", | ||
"from fuzzywuzzy import process\n", | ||
"import xml.etree.ElementTree as et # library to traverse XML tree\n", | ||
"import urllib\n", | ||
"import datetime\n", | ||
"import string\n", | ||
"from pathlib import Path\n", | ||
"\n", | ||
"# ---------------\n", | ||
"# Configuration data\n", | ||
"# ---------------\n", | ||
"\n", | ||
"graph_name = 'http://nursing'\n", | ||
"accept_media_type = 'text/turtle'\n", | ||
"sparql_endpoint = \"https://sparql.vanderbilt.edu/sparql\"\n", | ||
"request_header_dictionary = {\n", | ||
" #'Content-Type': 'application/sparql-query',\n", | ||
" 'Accept' : accept_media_type\n", | ||
"}\n", | ||
"\n", | ||
"# Load endpoint password from file in home directory\n", | ||
"directory = 'home'\n", | ||
"filename = 'sparql_vanderbilt_edu_password.txt'\n", | ||
"pwd = load_credential(filename, directory)\n", | ||
"\n", | ||
"# ---------------\n", | ||
"# Function definitions\n", | ||
"# ---------------\n", | ||
"\n", | ||
"# Load password from local drive\n", | ||
"# value of directory should be either 'home' or 'working'\n", | ||
"def load_credential(filename, directory):\n", | ||
" cred = ''\n", | ||
" # to change the script to look for the credential in the working directory, change the value of home to empty string\n", | ||
" if directory == 'home':\n", | ||
" home = str(Path.home()) #gets path to home directory; supposed to work for Win and Mac\n", | ||
" credential_path = home + '/' + filename\n", | ||
" else:\n", | ||
" directory = 'working'\n", | ||
" credential_path = filename\n", | ||
" try:\n", | ||
" with open(credential_path, 'rt', encoding='utf-8') as file_object:\n", | ||
" cred = file_object.read()\n", | ||
" except:\n", | ||
" print(filename + ' file not found - is it in your ' + directory + ' directory?')\n", | ||
" exit()\n", | ||
" return(cred)\n", | ||
"\n", | ||
"def retrieve_direct_statements(sparql_endpoint):\n", | ||
" query = '''\n", | ||
"construct {?item ?directProp ?value.}\n", | ||
"from <''' + graph_name + '''>\n", | ||
"where {\n", | ||
" ?item ?p ?statement.\n", | ||
" ?statement ?ps ?value.\n", | ||
" filter(substr(str(?ps),1,39)=\"http://www.wikidata.org/prop/statement/\")\n", | ||
" bind(substr(str(?ps),40) as ?id)\n", | ||
" bind(substr(str(?p),30) as ?id)\n", | ||
" bind(iri(concat(\"http://www.wikidata.org/prop/direct/\", ?id)) as ?directProp)\n", | ||
" }\n", | ||
"'''\n", | ||
" results = []\n", | ||
" r = requests.get(sparql_endpoint, params={'query' : query}, headers=request_header_dictionary)\n", | ||
" return r.text\n", | ||
"\n", | ||
"def perform_sparql_update(sparql_endpoint, pwd, update_command):\n", | ||
" # SPARQL Update requires HTTP POST\n", | ||
" hdr = {'Content-Type' : 'application/sparql-update'}\n", | ||
" r = requests.post(sparql_endpoint, auth=('admin', pwd), headers=hdr, data = update_command)\n", | ||
" print(str(r.status_code) + ' ' + r.url)\n", | ||
" print(r.text)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 44, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# ---------------\n", | ||
"# Construct the direct property statements entailed by the Wikibase model and retrieve from endpoint \n", | ||
"# ---------------\n", | ||
"\n", | ||
"graph_text = retrieve_direct_statements(sparql_endpoint)\n", | ||
"#print(graph_text)\n", | ||
"print('constructed triples retrieved')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 45, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# remove prefixes from response Turtle, which are not necessary since IRIs are unabbreviated\n", | ||
"graph_text_list = graph_text.split('\\n')\n", | ||
"# print(graph_text_list)\n", | ||
"graph_text = ''\n", | ||
"for line in graph_text_list:\n", | ||
" try:\n", | ||
" if line[0] != '@':\n", | ||
" graph_text += line + '\\n'\n", | ||
" except:\n", | ||
" pass\n", | ||
"#print()\n", | ||
"#print(graph_text)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 46, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"200 https://sparql.vanderbilt.edu/sparql\n", | ||
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\"><html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\"><title>blazegraph™ by SYSTAP</title\n", | ||
"></head\n", | ||
"><body<p>totalElapsed=1ms, elapsed=1ms, connFlush=0ms, batchResolve=0, whereClause=0ms, deleteClause=0ms, insertClause=0ms</p\n", | ||
"><hr><p>COMMIT: totalElapsed=356ms, commitTime=1596944443099, mutationCount=776</p\n", | ||
"></html\n", | ||
">\n", | ||
"\n", | ||
"done\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Send SPARQL 1.1 UPDATE to endpoint to add the constructed triples into the graph\n", | ||
"\n", | ||
"update_command = '''INSERT DATA\n", | ||
"{ GRAPH <''' + graph_name + '''> { \n", | ||
"''' + graph_text + '''\n", | ||
"}}'''\n", | ||
"\n", | ||
"#print(update_command)\n", | ||
"\n", | ||
"perform_sparql_update(sparql_endpoint, pwd, update_command)\n", | ||
"\n", | ||
"print()\n", | ||
"print('done')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import requests # best library to manage HTTP transactions | ||
import json | ||
from time import sleep | ||
import csv | ||
import math | ||
import urllib | ||
import datetime | ||
import string | ||
from pathlib import Path | ||
|
||
# --------------- | ||
# Configuration data | ||
# --------------- | ||
|
||
graph_name = 'http://nursing' | ||
accept_media_type = 'text/turtle' | ||
sparql_endpoint = "https://sparql.vanderbilt.edu/sparql" | ||
request_header_dictionary = { | ||
#'Content-Type': 'application/sparql-query', | ||
'Accept' : accept_media_type | ||
} | ||
|
||
# Load endpoint password from file in home directory | ||
directory = 'home' | ||
filename = 'sparql_vanderbilt_edu_password.txt' | ||
|
||
# --------------- | ||
# Function definitions | ||
# --------------- | ||
|
||
# Load password from local drive | ||
# value of directory should be either 'home' or 'working' | ||
def load_credential(filename, directory): | ||
cred = '' | ||
# to change the script to look for the credential in the working directory, change the value of home to empty string | ||
if directory == 'home': | ||
home = str(Path.home()) #gets path to home directory; supposed to work for Win and Mac | ||
credential_path = home + '/' + filename | ||
else: | ||
directory = 'working' | ||
credential_path = filename | ||
try: | ||
with open(credential_path, 'rt', encoding='utf-8') as file_object: | ||
cred = file_object.read() | ||
except: | ||
print(filename + ' file not found - is it in your ' + directory + ' directory?') | ||
exit() | ||
return(cred) | ||
|
||
def retrieve_direct_statements(sparql_endpoint): | ||
query = ''' | ||
construct {?item ?directProp ?value.} | ||
from <''' + graph_name + '''> | ||
where { | ||
?item ?p ?statement. | ||
?statement ?ps ?value. | ||
filter(substr(str(?ps),1,39)="http://www.wikidata.org/prop/statement/") | ||
bind(substr(str(?ps),40) as ?id) | ||
bind(substr(str(?p),30) as ?id) | ||
bind(iri(concat("http://www.wikidata.org/prop/direct/", ?id)) as ?directProp) | ||
} | ||
''' | ||
r = requests.get(sparql_endpoint, params={'query' : query}, headers=request_header_dictionary) | ||
return r.text | ||
|
||
def perform_sparql_update(sparql_endpoint, pwd, update_command): | ||
# SPARQL Update requires HTTP POST | ||
hdr = {'Content-Type' : 'application/sparql-update'} | ||
r = requests.post(sparql_endpoint, auth=('admin', pwd), headers=hdr, data = update_command) | ||
print(str(r.status_code) + ' ' + r.url) | ||
print(r.text) | ||
|
||
# --------------- | ||
# Construct the direct property statements entailed by the Wikibase model and retrieve from endpoint | ||
# --------------- | ||
|
||
graph_text = retrieve_direct_statements(sparql_endpoint) | ||
print('constructed triples retrieved') | ||
|
||
# remove prefixes from response Turtle, which are not necessary since IRIs are unabbreviated | ||
graph_text_list = graph_text.split('\n') | ||
graph_text = '' | ||
for line in graph_text_list: | ||
try: | ||
if line[0] != '@': | ||
graph_text += line + '\n' | ||
except: | ||
pass | ||
|
||
# Send SPARQL 1.1 UPDATE to endpoint to add the constructed triples into the graph | ||
|
||
update_command = '''INSERT DATA | ||
{ GRAPH <''' + graph_name + '''> { | ||
''' + graph_text + ''' | ||
}}''' | ||
|
||
pwd = load_credential(filename, directory) | ||
perform_sparql_update(sparql_endpoint, pwd, update_command) | ||
|
||
print() | ||
print('done') |
Oops, something went wrong.