Skip to content

Commit

Permalink
Add UTF-8 encoding to all requests.post() HTTP POST bodies
Browse files Browse the repository at this point in the history
  • Loading branch information
Steve Baskauf committed Jan 27, 2021
1 parent d610240 commit 7cdd3d7
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 12 deletions.
8 changes: 6 additions & 2 deletions vanderbot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Here are some queries that can be run to explore the data:

[Number of clinical trials at Vanderbilt by principal investigator](https://w.wiki/XKK)

The current release is [v1.6.3](https://github.com/HeardLibrary/linked-data/releases/tag/v1.6.3).
The current release is [v1.6.4](https://github.com/HeardLibrary/linked-data/releases/tag/v1.6.4).

## How it works

Expand Down Expand Up @@ -179,5 +179,9 @@ Version 1.6.3 is a minor upgrade that adds an updated version of the HTML, Javas

The upgrade now supports monolingual string values the complex value types globecoordinate and quantity. Other scripts were not affected.

## Release v1.6.4 (2021-01-27)

Version 1.6.4 contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as part of a SPARQL query contained non-Latin characters.

----
Revised 2021-01-26
Revised 2021-01-27
12 changes: 8 additions & 4 deletions vanderbot/vb3_match_wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
# -----------------------------------------
# Version 1.5 change notes (2020-09-08):
# - no changes
# -----------------------------------------
# Version 1.6.4 change notes (2021-01-27):
# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as
# part of a SPARQL query contained non-Latin characters.

import requests # best library to manage HTTP transactions
from bs4 import BeautifulSoup # web-scraping library
Expand Down Expand Up @@ -228,7 +232,7 @@ def searchNameAtWikidata(name):
#print('searching for ', name)
results = []
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
try:
data = r.json()
statements = data['results']['bindings']
Expand Down Expand Up @@ -264,7 +268,7 @@ def searchWikidataDescription(qId):
}'''
#print(query)
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
try:
data = r.json()
statements = data['results']['bindings']
Expand Down Expand Up @@ -310,7 +314,7 @@ def searchWikidataArticle(qId):
}'''
#print(query)
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
try:
data = r.json()
statements = data['results']['bindings']
Expand Down Expand Up @@ -1073,7 +1077,7 @@ def identifiedInCrossref(doi, employee):

# The endpoint defaults to returning XML, so the Accept: header is required
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers={'Accept' : 'application/json'})
r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)

data = r.json()
#print(json.dumps(data,indent = 2))
Expand Down
6 changes: 5 additions & 1 deletion vanderbot/vb6_upload_wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@
# -----------------------------------------
# Version 1.6.2 change notes (2020-12-01):
# - Fixes a bug where an error was raised when a reference property did not have a value.
# -----------------------------------------
# Version 1.6.4 change notes (2021-01-27):
# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as
# part of a SPARQL query contained non-Latin characters.

import json
import requests
Expand Down Expand Up @@ -203,7 +207,7 @@ def searchLabelsDescriptionsAtWikidata(qIds, labelType, language):

returnValue = []
# r = requests.get(endpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
r = requests.post(endpointUrl, data=query, headers=requestHeaderDictionary)
r = requests.post(endpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
data = r.json()
results = data['results']['bindings']
for result in results:
Expand Down
15 changes: 10 additions & 5 deletions vanderbot/vb_common_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
# -----------------------------------------
# Version 1.5 change notes (2020-09-08):
# - no changes
# -----------------------------------------
# Version 1.6.4 change notes (2021-01-27):
# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as
# part of a SPARQL query contained non-Latin characters.


import requests # best library to manage HTTP transactions
from bs4 import BeautifulSoup # web-scraping library
Expand Down Expand Up @@ -237,7 +242,7 @@ def searchWikidataForQIdByOrcid(orcid, wikidataEndpointUrl, sparqlSleep):
results = []
acceptMediaType = 'application/json'
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers = generateHeaderDictionary(acceptMediaType))
r = requests.post(wikidataEndpointUrl, data=query, headers = generateHeaderDictionary(acceptMediaType))
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers = generateHeaderDictionary(acceptMediaType))
try:
data = r.json()
statements = data['results']['bindings']
Expand Down Expand Up @@ -317,7 +322,7 @@ def __init__(self, **kwargs):
# send a generic query and return a list of Q IDs
def generic_query(self, query):
# r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
r = requests.post(self.endpoint, data=query, headers=self.requestheader)
r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
results_list = []
try:
#if 1==1: # replace try: to let errors occur, also comment out the except: clause
Expand Down Expand Up @@ -357,7 +362,7 @@ def single_property_values_for_item(self, qid):
}'''
#print(query)
# r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
r = requests.post(self.endpoint, data=query, headers=self.requestheader)
r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
results_list = []
try:
#if 1==1: # replace try: to let errors occur, also comment out the except: clause
Expand Down Expand Up @@ -423,7 +428,7 @@ def labels_descriptions(self, qids):

results_list = []
# r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
r = requests.post(self.endpoint, data=query, headers=self.requestheader)
r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
data = r.json()
results = data['results']['bindings']
for result in results:
Expand Down Expand Up @@ -482,7 +487,7 @@ def search_statement(self, qids, reference_property_list):

results_list = []
# r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
r = requests.post(self.endpoint, data=query, headers=self.requestheader)
r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
data = r.json()
results = data['results']['bindings']
# NOTE: There may be more than one reference per statement.
Expand Down

0 comments on commit 7cdd3d7

Please sign in to comment.