From 7ac2e8cade963732da95fefeb4b5bb070ebef699 Mon Sep 17 00:00:00 2001 From: Johanna Date: Thu, 20 Jul 2017 12:09:41 -0300 Subject: [PATCH 1/4] Replaces simplexml with dom. --- includes/utilities.inc | 68 +++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/includes/utilities.inc b/includes/utilities.inc index 3f19eae..85a0fa5 100644 --- a/includes/utilities.inc +++ b/includes/utilities.inc @@ -177,20 +177,20 @@ function islandora_newspaper_group_issues(array $issues) { */ function islandora_newspaper_get_date_issued_from_mods(AbstractDatastream $datastream) { $out = FALSE; - $file = file_create_filename("{$datastream->parent->id}_{$datastream->id}.xml", 'temporary://'); - $datastream->getContent($file); - @$doc = simplexml_load_file($file); - if ($doc) { - $doc->registerXPathNamespace('ns', 'http://www.loc.gov/mods/v3'); + $dom = new DOMDocument(); + $dom->loadXML($datastream->content); + $xpath = new DomXPath($dom); + if ($dom) { + $xpath->registerNamespace('ns', 'http://www.loc.gov/mods/v3'); // Assumes the canonical date issued exists in the first mods document under // origin info and is not specified as a point, additional logic could be // added to process different encodings. - $dates = $doc->xpath('//ns:mods[1]/ns:originInfo/ns:dateIssued[not(@point)][1]'); - $result = (string) reset($dates); + $dates = $xpath->query('//ns:mods/ns:originInfo/ns:dateIssued[not(@point)]')->item(0); try { - if (empty($result)) { + if (empty($dates)) { throw new Exception('mods:dateIssued element was empty.'); } + $result = $dates->nodeValue; $out = new DateTime($result); } catch (Exception $e) { @@ -199,7 +199,6 @@ function islandora_newspaper_get_date_issued_from_mods(AbstractDatastream $datas watchdog_exception('islandora_newspaper', $e, $msg, $vars, WATCHDOG_ERROR); } } - file_unmanaged_delete($file); return $out; } @@ -310,17 +309,16 @@ EOQ; */ function islandora_newspaper_set_mods_date_issued(AbstractDatastream $datastream, DateTime $date) { $out = FALSE; - $file = file_create_filename("{$datastream->parent->id}_{$datastream->id}.xml", 'temporary://'); - $datastream->getContent($file); - @$doc = simplexml_load_file($file); - if ($doc) { - $doc->registerXPathNamespace('ns', 'http://www.loc.gov/mods/v3'); + $dom = new DOMDocument(); + $dom->loadXML($datastream->content); + $xpath = new DomXPath($dom); + if ($dom) { + $xpath->registerNamespace('ns', 'http://www.loc.gov/mods/v3'); // Assumes the canonical date issued exists in the first mods document under // origin info and is not specified as a point, additional logic could be // added to process different encodings. - $parent = FALSE; - $dates = $doc->xpath('//ns:mods[1]/ns:originInfo/ns:dateIssued[not(@point)][1]'); - if (is_array($dates) && count($dates) > 0) { + $dates = $xpath->query('//ns:mods/ns:originInfo/ns:dateIssued[not(@point)]'); + if ($dates->length > 0) { $removal = array(); // XX: Because you can't remove elements in a foreach loop // we collect them. Then we can add the new one and remove @@ -329,32 +327,28 @@ function islandora_newspaper_set_mods_date_issued(AbstractDatastream $datastream $removal[] = $d; } foreach ($removal as $r) { - $dom = dom_import_simplexml($r); - $dom->parentNode->removeChild($dom); + $r->parentNode->removeChild($r); } } - $origin = $doc->xpath('//ns:mods[1]/ns:originInfo'); - if ($origin) { - $parent = reset($origin); + $mods = $xpath->query('//ns:mods')->item(0); + $prefix = ($mods->prefix) ? ($mods->prefix) . ':' : ''; + $new_date = $dom->createElement($prefix . 'dateIssued', $date->format("Y-m-d")); + $new_date->setAttribute('encoding', 'iso8601'); + $origin = $xpath->query('ns:originInfo', $mods)->item(0); + if (!$origin) { + $origin = $dom->createElement($prefix . 'originInfo'); + $origin = $mods->appendChild($origin); } - if ($parent) { - $new_date = $parent->addChild('dateIssued', $date->format("Y-m-d"), 'http://www.loc.gov/mods/v3'); - $new_date->addAttribute('encoding', 'iso8601'); - try { - $datastream->setContentFromString($doc->asXML()); - $out = TRUE; - } - catch (Exception $e) { - $msg = 'Failed to get save MODS datastream for @pid'; - $vars = array('@pid' => $datastream->parent->id); - watchdog_exception('islandora_newspaper', $e, $msg, $vars, WATCHDOG_ERROR); - } + $new_date = $origin->appendChild($new_date); + try { + $datastream->setContentFromString($dom->saveXML()); + $out = TRUE; } - else { + catch (Exception $e) { + $msg = 'Failed to get save MODS datastream for @pid'; $vars = array('@pid' => $datastream->parent->id); - watchdog('islandora_newspaper', 'Failed to get originInfo from MODS for @pid', $vars, WATCHDOG_ERROR); + watchdog_exception('islandora_newspaper', $e, $msg, $vars, WATCHDOG_ERROR); } } - file_unmanaged_delete($file); return $out; } From be023e4ba173763ab2081077ff724e4c13ee97d8 Mon Sep 17 00:00:00 2001 From: Johanna Date: Fri, 21 Jul 2017 14:36:12 -0300 Subject: [PATCH 2/4] Get rid of extra loop and clean things up. Implementing suggesting from code review. --- includes/utilities.inc | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/includes/utilities.inc b/includes/utilities.inc index 85a0fa5..dcffb21 100644 --- a/includes/utilities.inc +++ b/includes/utilities.inc @@ -319,25 +319,16 @@ function islandora_newspaper_set_mods_date_issued(AbstractDatastream $datastream // added to process different encodings. $dates = $xpath->query('//ns:mods/ns:originInfo/ns:dateIssued[not(@point)]'); if ($dates->length > 0) { - $removal = array(); - // XX: Because you can't remove elements in a foreach loop - // we collect them. Then we can add the new one and remove - // the old. foreach ($dates as $d) { - $removal[] = $d; - } - foreach ($removal as $r) { - $r->parentNode->removeChild($r); + $d->parentNode->removeChild($d); } } - $mods = $xpath->query('//ns:mods')->item(0); - $prefix = ($mods->prefix) ? ($mods->prefix) . ':' : ''; - $new_date = $dom->createElement($prefix . 'dateIssued', $date->format("Y-m-d")); + $new_date = $dom->createElementNS($dom->firstChild->namespaceURI, 'dateIssued', $date->format("Y-m-d")); $new_date->setAttribute('encoding', 'iso8601'); - $origin = $xpath->query('ns:originInfo', $mods)->item(0); + $origin = $xpath->query('//ns:mods/ns:originInfo')->item(0); if (!$origin) { - $origin = $dom->createElement($prefix . 'originInfo'); - $origin = $mods->appendChild($origin); + $origin = $dom->createElementNS($dom->firstChild->namespaceURI, 'originInfo'); + $origin = $dom->firstChild->appendChild($origin); } $new_date = $origin->appendChild($new_date); try { From d34b9ee1f37ce99aace635c82a09bcae2702d9e1 Mon Sep 17 00:00:00 2001 From: Johanna Date: Mon, 24 Jul 2017 13:22:47 -0300 Subject: [PATCH 3/4] Get rid of redundant 'if'. --- includes/utilities.inc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/includes/utilities.inc b/includes/utilities.inc index dcffb21..48e78b7 100644 --- a/includes/utilities.inc +++ b/includes/utilities.inc @@ -318,10 +318,8 @@ function islandora_newspaper_set_mods_date_issued(AbstractDatastream $datastream // origin info and is not specified as a point, additional logic could be // added to process different encodings. $dates = $xpath->query('//ns:mods/ns:originInfo/ns:dateIssued[not(@point)]'); - if ($dates->length > 0) { - foreach ($dates as $d) { - $d->parentNode->removeChild($d); - } + foreach ($dates as $d) { + $d->parentNode->removeChild($d); } $new_date = $dom->createElementNS($dom->firstChild->namespaceURI, 'dateIssued', $date->format("Y-m-d")); $new_date->setAttribute('encoding', 'iso8601'); From 13d2df7f4f2ba37281a575bcd7aabb23604c3d8a Mon Sep 17 00:00:00 2001 From: Johanna Date: Mon, 24 Jul 2017 16:27:47 -0300 Subject: [PATCH 4/4] Fix the check for loaded MODS datastream. --- includes/utilities.inc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/includes/utilities.inc b/includes/utilities.inc index 48e78b7..deeaeba 100644 --- a/includes/utilities.inc +++ b/includes/utilities.inc @@ -178,9 +178,8 @@ function islandora_newspaper_group_issues(array $issues) { function islandora_newspaper_get_date_issued_from_mods(AbstractDatastream $datastream) { $out = FALSE; $dom = new DOMDocument(); - $dom->loadXML($datastream->content); - $xpath = new DomXPath($dom); - if ($dom) { + if ($dom->loadXML($datastream->content)) { + $xpath = new DomXPath($dom); $xpath->registerNamespace('ns', 'http://www.loc.gov/mods/v3'); // Assumes the canonical date issued exists in the first mods document under // origin info and is not specified as a point, additional logic could be @@ -310,9 +309,8 @@ EOQ; function islandora_newspaper_set_mods_date_issued(AbstractDatastream $datastream, DateTime $date) { $out = FALSE; $dom = new DOMDocument(); - $dom->loadXML($datastream->content); - $xpath = new DomXPath($dom); - if ($dom) { + if ($dom->loadXML($datastream->content)) { + $xpath = new DomXPath($dom); $xpath->registerNamespace('ns', 'http://www.loc.gov/mods/v3'); // Assumes the canonical date issued exists in the first mods document under // origin info and is not specified as a point, additional logic could be