Skip to content

Commit

Permalink
Revised attempt to escape quotes in content, using xml.sax.saxutils.e…
Browse files Browse the repository at this point in the history
…scape. Results for existing configs are valid XML.
  • Loading branch information
tucotuco committed Jul 9, 2023
1 parent e879849 commit a2dc19a
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions build/buildxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
# One extension can be generated per run of the script, with the extension's name and
# destination file as parameters (see main() for syntax).
#
__version__ = '2023-07-09T12:46-03:00'
__version__ = '2023-07-09T20:21-03:00'

import csv
import sys
import argparse
from xml.sax.saxutils import escape

class CSVtoXMLConverter:
'''
Expand Down Expand Up @@ -566,13 +567,17 @@ def get_xml(self, extension_name):
xml += ' xmlns:dc="http://purl.org/dc/terms/"\n'
xml += ' xsi:schemaLocation="http://rs.gbif.org/extension/ http://rs.gbif.org/schema/extension.xsd"\n'
xml += f' dc:title="{extension.get("title")}"\n'
xml += f' name="{extension_name}" namespace="{extension.get("namespace")}" rowType="{extension.get("rowType")}"\n'
xml += f' name="{extension_name}"\n'
xml += f' namespace="{extension.get("namespace")}"\n'
xml += f' rowType="{extension.get("rowType")}"\n'
xml += f' dc:issued="{extension.get("dc:issued")}"\n'
subject = extension.get("dc:subject")
if subject is not None:
xml += f' dc:subject="{extension.get("dc:subject")}"\n'
xml += f' dc:relation="{extension.get("dc:relation")}"\n'
xml += f' dc:description="{extension.get("dc:description")}">\n'
description = extension.get("dc:description")
description = escape(description, {'"':'"'})
xml += f' dc:description="{description}">\n'
xml += '\n'
with open(self.csv_file_path, 'r') as csv_file:
reader = csv.reader(csv_file)
Expand Down Expand Up @@ -603,18 +608,21 @@ def get_xml(self, extension_name):
description = row_dict["definition"]
if row_dict.get("comments") is not None and len(row_dict.get("comments"))>0:
description += f' {row_dict["comments"]}'
description = escape(description, {'"':'"'})
term_xml += f'dc:description="{description}" '
examples = row_dict.get("examples") or ""
examples = escape(examples, {'"':'"'})
term_xml += f'examples="{examples}" '
if row_dict["term_localName"] in extension.get("required"):
term_xml += f'required="true"/>'
else:
term_xml += f'required="false"/>'
xml += f' {term_xml}\n'
for addition in extension.get("gbif_additions"):
addition = escape(addition,{'"':'"'})
xml += f' {addition}'
xml += "</extension>"
return encoded_quotes(xml)
return xml

def write_xml(self, extension_name, filename):
'''
Expand All @@ -624,9 +632,6 @@ def write_xml(self, extension_name, filename):
with open(filename, 'w') as xml_file:
xml_file.write(self.get_xml(extension_name))

def encoded_quotes(s):
return s.replace('"', '&quot;')

def _getoptions():
''' Parse command line options and return them.'''
parser = argparse.ArgumentParser()
Expand Down

0 comments on commit a2dc19a

Please sign in to comment.