-
Notifications
You must be signed in to change notification settings - Fork 0
/
parselc.php
103 lines (93 loc) · 3.32 KB
/
parselc.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
<?php
/**
* LoC DGT Parsing Tool
*
* Parses the XML version of the MARC, as transformed by Daniel.
*
* @author Robbie Hott
* @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause
* @copyright 2018 the Rector and Visitors of the University of Virginia
*/
if ($argc < 2) {
die("Usage: php parselc.php marcxmlfile.xml\n");
}
$xml = simplexml_load_string(file_get_contents($argv[1]));
$json = json_encode($xml, JSON_UNESCAPED_SLASHES|JSON_PRETTY_PRINT);
$arr = json_decode($json, true);
$vocab = [];
foreach ($xml->children() as $record) {
$local = [];
$local["categories"] = [];
$local["broader"] = [];
$local["related"] = [];
foreach ($record->children() as $name => $c) {
if ($name == "controlfield") {
// get id
if (hasAttribute($c, "tag", "001"))
$local["id"] = "$c";
} else if ($name == "datafield") {
// get type
if (hasAttribute($c, "tag", "072")) {
foreach ($c->children() as $sf => $v) {
if ($sf == "subfield" && hasAttribute($v, "code", "a"))
array_push($local["categories"], "$v");
}
}
// get preferred
if (hasAttribute($c, "tag", "150")) {
foreach ($c->children() as $sf => $v) {
if ($sf == "subfield" && hasAttribute($v, "code", "a"))
$local["preferred"] = "$v";
}
}
// get alternate
if (hasAttribute($c, "tag", "450")) {
foreach ($c->children() as $sf => $v) {
if ($sf == "subfield" && hasAttribute($v, "code", "a")) {
if (!isset($local["alternate"]))
$local["alternate"] = [];
array_push($local["alternate"], "$v");
}
}
}
// get related
if (hasAttribute($c, "tag", "550")) {
$term = null;
$broader = false;
foreach ($c->children() as $sf => $v) {
if ($sf == "subfield" && hasAttribute($v, "code", "w") && $v == 'g')
$broader = true;
if ($sf == "subfield" && hasAttribute($v, "code", "0"))
$term = $v;
}
if ($broader && $term != null) {
array_push($local["broader"], "$term");
} else if (!$broader && $term != null) {
array_push($local["related"], "$term");
}
}
}
}
if (isset($local["id"])) {
$vocab[$local["id"]] = $local;
}
}
// Print out a JSON object containing all the vocabulary
echo json_encode($vocab, JSON_PRETTY_PRINT);
// Print out notices if any of the vocabluary are contained in multiple categories
foreach ($vocab as $i => $t) {
if (count($t["categories"]) > 1)
echo "\n$i ({$t["preferred"]}) had multiple categories";
}
echo "\n";
// Helper function to make getting attribute values easier
function hasAttribute(&$xml, $att, $val=null) {
foreach ($xml->attributes() as $k=>$v) {
if ($k == $att) {
if ($val != null)
return $val == $v;
return $v;
}
}
return false;
}