-
Notifications
You must be signed in to change notification settings - Fork 4
/
xpath_select.xsl
161 lines (140 loc) · 6.92 KB
/
xpath_select.xsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="2.0"
xmlns:eac="urn:isbn:1-931666-33-4"
xmlns:xlink="http://www.w3.org/1999/xlink"
exclude-result-prefixes="#all"
>
<!--
Return results from xpath select
extension-element-prefixes="date exsl"
Example command line using find:
find lds_cpf -type f -exec saxon.sh {} xpath_select.xsl \; >> xpath.log 2>&1 &
-->
<xsl:output method="xml" indent="yes"/>
<xsl:variable name="cr" select="'
'"/>
<xsl:param name="search"/>
<!--
This is a small script just to run an xpath on some xml. The Linux xpath utility doesn't quite give
useful output, although a Perl script similar to xpath, based on XML::LibXML or XML::Xpath would be
good and very fast. And it would have dynamic xpath execution.
No dynamic xpath in XSLT, so you'll just have to copy the xpath from the for-each into the
container/@xpath attribute.
When creating a new xpath, copy the most similar good one into test="true()" and change the old test to "false()" in
order to save it for future reference.
-->
<xsl:template match="/">
<xsl:choose>
<xsl:when test="true()">
<!--
jun 29 2015 a real, working example
saxon.sh /data/source/nara/archive/nara_corp.xml xpath_select.xsl > xpath.log 2>&1 &
> grep found xpath.log | wc -l
54986
-->
<container xpath="XMLExport/das_items/organization">
<xsl:for-each
select="XMLExport/das_items/organization">
<found><xsl:value-of select="naId"/></found>
</xsl:for-each>
</container>
<container xpath="XMLExport/organization">
<xsl:for-each
select="XMLExport/organization">
<found><xsl:value-of select="naId"/></found>
</xsl:for-each>
</container>
</xsl:when>
<xsl:when test="false()">
<!--
Find examples of existDates that has <date>
-->
<container xpath="eac:eac-cpf/eac:cpfDescription/eac:description/eac:existDates[eac:date]">
<xsl:for-each
select="eac:eac-cpf/eac:cpfDescription/eac:description/eac:existDates[eac:date]">
<found>
<xsl:copy-of select="."/>
</found>
</xsl:for-each>
</container>
</xsl:when>
<xsl:when test="false()">
<!--
Count person records.
-->
<container xpath="/ead/archdesc/descgrp/bioghist">
<xsl:for-each
select="/ead/archdesc/descgrp/bioghist">
<found>
<xsl:value-of select="concat('len: ', string-length(.))"/>
</found>
</xsl:for-each>
</container>
</xsl:when>
<xsl:when test="false()">
<!--
Count person records.
-->
<container xpath="(/XMLExport/person|XMLExport/das_items/person)">
<xsl:for-each
select="(/XMLExport/person|XMLExport/das_items/person)">
<found naId="{naId}">
<xsl:value-of select="name"/>
</found>
</xsl:for-each>
</container>
</xsl:when>
<xsl:when test="false()">
<!--
Count person records with descriptionReference that has title, but no titleHierarchy.
The outer for-each selects a <person> record so we can get the naId. The inner for-each is
mostly used to set the context, but also serves as an xsl:if (which is what every for-each
does, but here we're clear about the if-ish behavior).
Use position() = 1 with the inner for-each because we only need one record to hit in order
to count the <person> as needing a fix.
-->
<container xpath="(/XMLExport/person|XMLExport/das_items/person)//descriptionReference[title and not(titleHierarchy)]">
<xsl:for-each
select="(/XMLExport/person|XMLExport/das_items/person)">
<xsl:variable name="naId" select="naId"/>
<xsl:for-each
select=".//descriptionReference[title and not(titleHierarchy) and position() = 1]">
<found naId="{$naId}">
<xsl:copy-of select="."/>
</found>
</xsl:for-each>
</xsl:for-each>
</container>
</xsl:when>
<xsl:when test="false()">
<container xpath="//eac:cpfRelation/@xlink:arcrole">
<xsl:for-each select="//eac:cpfRelation/@xlink:arcrole">
<found>
<xsl:value-of select="."/>
</found>
</xsl:for-each>
</container>
</xsl:when>
<xsl:when test="false()">
<container xpath="//person/contributorTypeArray/contributorType/termName">
<xsl:for-each select="//person/contributorTypeArray/contributorType/termName">
<found>
<xsl:value-of select="concat('contributorType: ', .)"/>
</found>
</xsl:for-each>
</container>
</xsl:when>
<xsl:when test="false()">
<container xpath="(//person)[linkCounts/totalDescriptionLinkCount = 0]/naId">
<!-- <xsl:copy-of select="(//organizationName)[linkCounts/totalDescriptionLinkCount = 0]"/> -->
<!-- <xsl:for-each select="(//organizationName)[linkCounts/totalDescriptionLinkCount = 0]/naId"> -->
<xsl:for-each select="(//person)[linkCounts/totalDescriptionLinkCount = 0]/naId">
<found>
<xsl:copy-of select="concat('https://catalog.archives.gov/id/', .)"/>
</found>
</xsl:for-each>
</container>
</xsl:when>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>