-
Notifications
You must be signed in to change notification settings - Fork 1
/
iiif_tei_additions.py
39 lines (32 loc) · 1.43 KB
/
iiif_tei_additions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import csv
import xml.etree.ElementTree as eT
import os
import argparse
# Arguments
parser = argparse.ArgumentParser()
parser.add_argument('--xml', default='../Encodage/Moreno-TEI-files/', type=str, help="Le chemin du dossier des fichiers xml à charger.")
parser.add_argument('--csv', default='../pliegos_iiif.csv', type=str, help="Le chemin du fichier CSV contenant les URI IIIF")
args = parser.parse_args()
# Conversion cvs > list
with open(args.csv) as csvFile:
csv_reader = csv.reader(csvFile)
list_rows = list(csv_reader)
# Parsing TEI files
for xmlFile in os.listdir(args.xml):
if xmlFile.endswith('.xml'):
xml_path = os.path.join(args.xml, xmlFile)
ns = {'tei': 'http://www.tei-c.org/ns/1.0'}
eT.register_namespace('', 'http://www.tei-c.org/ns/1.0')
tree = eT.parse(xml_path)
root = tree.getroot()
for i in range(len(list_rows)):
# Linking IIIF URI (Images) to TEI <pb>
for pb in root.findall(".//tei:pb", ns):
imgName = pb.get('source')
if imgName == list_rows[i][7]:
pb.set('facs', list_rows[i][8][29:])
# Linking IIIF Manifest URI to TEI <facsimile>
facsimile = root.find(".//tei:facsimile", ns)
if facsimile is not None:
facsimile.set('facs', list_rows[i][9])
tree.write(xml_path, encoding="UTF-8", xml_declaration=True)