-
Notifications
You must be signed in to change notification settings - Fork 1
/
report_cpf_entity_part.pl
executable file
·101 lines (80 loc) · 2.78 KB
/
report_cpf_entity_part.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/perl
# Author: Tom Laudeman
# The Institute for Advanced Technology in the Humanities
# Copyright 2014 University of Virginia. Licensed under the Educational Community License, Version 2.0
# (the "License"); you may not use this file except in compliance with the License. You may obtain a
# copy of the License at
# http://www.osedu.org/licenses/ECL-2.0
# http://opensource.org/licenses/ECL-2.0
# Unless required by applicable law or agreed to in writing, software distributed under the License is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing permissions and limitations under the
# License.
# ./report_cpf_entity_part.pl dir=lc_cpf_final/ > tmp.txt
use strict;
# use session_lib qw(:all);
use XML::XPath;
use CGI; # Handles command line name=value pairs.
use Time::HiRes qw(usleep nanosleep);
main();
exit();
my $usage = "Usage $0 dir=somedirectory";
sub main
{
$| = 1; # unbuffer stdout.
# Cache each URL that we have already checked. No reason to re-check them.
my %check_url;
# Yes, I know we are a commandline app, but Perl's CGI allows us
# to use named args which is kind of nice, and as simple as it gets.
my $qq = new CGI;
my %ch = $qq->Vars();
if (! exists($ch{dir}))
{
die "No directory specified.\n$usage\n";
}
if (! -e $ch{dir} || ! -d $ch{dir})
{
die "Specified does not exist or is not a directory.\n$usage\n";
}
# The linux find command will not work on a symlinked dir that doesn't have a trailing / so check and add one.
if ($ch{dir} !~ m/\/$/)
{
$ch{dir} = "$ch{dir}/";
}
print "Scanning: $ch{dir}\n";
$XML::XPath::Namespaces = 0;
my @files = `find $ch{dir} -type f`;
chomp(@files);
print "Find done. File count: ". scalar(@files) . "\n";
my $xx = 0;
foreach my $file (@files)
{
print " file: $file\n";
my $xpath = XML::XPath->new(filename => $file);
my $nodes = $xpath->find('/eac-cpf/cpfDescription/identity/entityType');
if ($nodes)
{
foreach my $node ($nodes->get_nodelist())
{
# my $val = $node->toString();
my $val = $node->string_value();
print "et: $val ";
}
}
$nodes = $xpath->find('/eac-cpf/cpfDescription/identity/nameEntry/part');
if ($nodes)
{
foreach my $node ($nodes->get_nodelist())
{
# my $val = $node->toString();
my $val = $node->string_value();
print "part: $val\n\n";
}
}
# $xx++;
# if ($xx > 10)
# {
# exit();
# }
}
}