Add UTF-8 encoding to all requests.post() HTTP POST bodies

HeardLibrary · Jan 27, 2021 · 7cdd3d7 · 7cdd3d7
1 parent d610240
commit 7cdd3d7
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 12 deletions.
diff --git a/vanderbot/README.md b/vanderbot/README.md
@@ -32,7 +32,7 @@ Here are some queries that can be run to explore the data:
 
 [Number of clinical trials at Vanderbilt by principal investigator](https://w.wiki/XKK)
 
-The current release is [v1.6.3](https://github.com/HeardLibrary/linked-data/releases/tag/v1.6.3).
+The current release is [v1.6.4](https://github.com/HeardLibrary/linked-data/releases/tag/v1.6.4).
 
 ## How it works
 
@@ -179,5 +179,9 @@ Version 1.6.3 is a minor upgrade that adds an updated version of the HTML, Javas
 
 The upgrade now supports monolingual string values the complex value types globecoordinate and quantity. Other scripts were not affected.
 
+## Release v1.6.4 (2021-01-27)
+
+Version 1.6.4 contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as part of a SPARQL query contained non-Latin characters.
+
 ----
-Revised 2021-01-26
+Revised 2021-01-27
diff --git a/vanderbot/vb3_match_wikidata.py b/vanderbot/vb3_match_wikidata.py
@@ -24,6 +24,10 @@
 # -----------------------------------------
 # Version 1.5 change notes (2020-09-08):
 # - no changes
+# -----------------------------------------
+# Version 1.6.4 change notes (2021-01-27):
+# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as 
+# part of a SPARQL query contained non-Latin characters.
 
 import requests   # best library to manage HTTP transactions
 from bs4 import BeautifulSoup # web-scraping library
@@ -228,7 +232,7 @@ def searchNameAtWikidata(name):
     #print('searching for ', name)
     results = []
     # r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
-    r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
+    r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
     try:
         data = r.json()
         statements = data['results']['bindings']
@@ -264,7 +268,7 @@ def searchWikidataDescription(qId):
       }'''
     #print(query)
     # r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
-    r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
+    r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
     try:
         data = r.json()
         statements = data['results']['bindings']
@@ -310,7 +314,7 @@ def searchWikidataArticle(qId):
       }'''
     #print(query)
     # r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
-    r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
+    r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
     try:
         data = r.json()
         statements = data['results']['bindings']
@@ -1073,7 +1077,7 @@ def identifiedInCrossref(doi, employee):
 
 # The endpoint defaults to returning XML, so the Accept: header is required
 # r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers={'Accept' : 'application/json'})
-r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
+r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
 
 data = r.json()
 #print(json.dumps(data,indent = 2))

diff --git a/vanderbot/vb6_upload_wikidata.py b/vanderbot/vb6_upload_wikidata.py
@@ -83,6 +83,10 @@
 # -----------------------------------------
 # Version 1.6.2 change notes (2020-12-01):
 # - Fixes a bug where an error was raised when a reference property did not have a value.
+# -----------------------------------------
+# Version 1.6.4 change notes (2021-01-27):
+# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as 
+# part of a SPARQL query contained non-Latin characters.
 
 import json
 import requests
@@ -203,7 +207,7 @@ def searchLabelsDescriptionsAtWikidata(qIds, labelType, language):
 
     returnValue = []
     # r = requests.get(endpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
-    r = requests.post(endpointUrl, data=query, headers=requestHeaderDictionary)
+    r = requests.post(endpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
     data = r.json()
     results = data['results']['bindings']
     for result in results:

diff --git a/vanderbot/vb_common_code.py b/vanderbot/vb_common_code.py
@@ -22,6 +22,11 @@
 # -----------------------------------------
 # Version 1.5 change notes (2020-09-08):
 # - no changes
+# -----------------------------------------
+# Version 1.6.4 change notes (2021-01-27):
+# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as 
+# part of a SPARQL query contained non-Latin characters.
+
 
 import requests   # best library to manage HTTP transactions
 from bs4 import BeautifulSoup # web-scraping library
@@ -237,7 +242,7 @@ def searchWikidataForQIdByOrcid(orcid, wikidataEndpointUrl, sparqlSleep):
     results = []
     acceptMediaType = 'application/json'
     # r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers = generateHeaderDictionary(acceptMediaType))
-    r = requests.post(wikidataEndpointUrl, data=query, headers = generateHeaderDictionary(acceptMediaType))
+    r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers = generateHeaderDictionary(acceptMediaType))
     try:
         data = r.json()
         statements = data['results']['bindings']
@@ -317,7 +322,7 @@ def __init__(self, **kwargs):
     # send a generic query and return a list of Q IDs
     def generic_query(self, query):
         # r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
-        r = requests.post(self.endpoint, data=query, headers=self.requestheader)
+        r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
         results_list = []
         try:
         #if 1==1: # replace try: to let errors occur, also comment out the except: clause
@@ -357,7 +362,7 @@ def single_property_values_for_item(self, qid):
     }'''
         #print(query)
         # r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
-        r = requests.post(self.endpoint, data=query, headers=self.requestheader)
+        r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
         results_list = []
         try:
         #if 1==1: # replace try: to let errors occur, also comment out the except: clause
@@ -423,7 +428,7 @@ def labels_descriptions(self, qids):
 
         results_list = []
         # r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
-        r = requests.post(self.endpoint, data=query, headers=self.requestheader)
+        r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
         data = r.json()
         results = data['results']['bindings']
         for result in results:
@@ -482,7 +487,7 @@ def search_statement(self, qids, reference_property_list):
 
         results_list = []
         # r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
-        r = requests.post(self.endpoint, data=query, headers=self.requestheader)
+        r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
         data = r.json()
         results = data['results']['bindings']
         # NOTE: There may be more than one reference per statement.