From ef14070e33f2b8535c2fa65c4ce72a07fa41104d Mon Sep 17 00:00:00 2001 From: Uwe Hartwig Date: Wed, 29 Nov 2023 12:05:00 +0100 Subject: [PATCH] [app][fix] closes #56 --- pom.xml | 2 +- .../derivans/data/MetadataHandler.java | 19 +- .../de/ulb/digital/derivans/TestDerivans.java | 4 +- .../derivans/TestDerivansExportKitodo2.java | 7 + .../TestDerivansExportKitodo3Issue.java | 84 + .../derivans/TestDerivansFulltextODEM.java | 7 + .../de/ulb/digital/derivans/TestHelper.java | 45 +- .../de/ulb/digital/derivans/TestResource.java | 9 +- .../config/TestDerivansConfiguration.java | 6 +- .../config/TestDerivansPathResolver.java | 2 +- .../derivate/TestImageDerivateerJPG.java | 4 +- ...680621.xml => zd2-1021634069-18680621.xml} | 0 .../mets/kitodo3/zd2-253780594-18920720.xml | 261 +++ src/test/resources/mets/mets_1_12.xsd | 1854 +++++++++++++++++ 14 files changed, 2288 insertions(+), 16 deletions(-) create mode 100644 src/test/java/de/ulb/digital/derivans/TestDerivansExportKitodo3Issue.java rename src/test/resources/mets/kitodo3/{zd2-1021634069-18680621/1021634069-18680621.xml => zd2-1021634069-18680621.xml} (100%) create mode 100755 src/test/resources/mets/kitodo3/zd2-253780594-18920720.xml create mode 100644 src/test/resources/mets/mets_1_12.xsd diff --git a/pom.xml b/pom.xml index d7ec7fc..d7ac06c 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 de.ulb digital-derivans - 1.8.4 + 1.8.5 Universität- und Landesbibliothek Sachsen-Anhalt diff --git a/src/main/java/de/ulb/digital/derivans/data/MetadataHandler.java b/src/main/java/de/ulb/digital/derivans/data/MetadataHandler.java index c5e909f..7f0a4a7 100644 --- a/src/main/java/de/ulb/digital/derivans/data/MetadataHandler.java +++ b/src/main/java/de/ulb/digital/derivans/data/MetadataHandler.java @@ -133,10 +133,27 @@ public boolean write() { return true; } + /** + * + * 2023-11-29 + * + * Due conflicts with Kitodo3 Export XML ensure + * that agent is inserted as very first element + * if no other agent tags present or as last + * agent entry + * + * @param fileId + * @return + */ public String enrichAgent(String fileId) { Element agent = createAgentSection(fileId); Element hdrSection = getMetsHdr(); - hdrSection.addContent(agent); + var agents = hdrSection.getChildren("agent", NS_METS); + if (agents.isEmpty()) { + hdrSection.addContent(0, agent); + } else { + hdrSection.addContent(agents.size()-1, agent); + } return agent.getChildText("note", NS_METS); } diff --git a/src/test/java/de/ulb/digital/derivans/TestDerivans.java b/src/test/java/de/ulb/digital/derivans/TestDerivans.java index 895fc29..4c795d1 100644 --- a/src/test/java/de/ulb/digital/derivans/TestDerivans.java +++ b/src/test/java/de/ulb/digital/derivans/TestDerivans.java @@ -98,7 +98,7 @@ void testDerivatesOnlyWithPath(@TempDir Path tempDir) throws Exception { Path pathTarget = tempDir.resolve("only_images"); Path pathImageMax = pathTarget.resolve("MAX"); Files.createDirectories(pathImageMax); - TestHelper.generateJpgs(pathImageMax, 1240, 1754, 6); + TestHelper.generateImages(pathImageMax, 1240, 1754, 6, "%04d.jpg"); // act DerivansParameter dp = new DerivansParameter(); @@ -154,7 +154,7 @@ void testDerivatesWithCLIsetImages(@TempDir Path tempDir) throws Exception { Path pathTarget = tempDir.resolve("conf_images"); Path pathImageMax = pathTarget.resolve(imgDir); Files.createDirectories(pathImageMax); - TestHelper.generateJpgs(pathImageMax, 620, 877, 6); + TestHelper.generateImages(pathImageMax, 620, 877, 6, "%04d.jpg"); DerivansParameter dp = new DerivansParameter(); dp.setPathInput(pathTarget); dp.setImages(imgDir); diff --git a/src/test/java/de/ulb/digital/derivans/TestDerivansExportKitodo2.java b/src/test/java/de/ulb/digital/derivans/TestDerivansExportKitodo2.java index 305a8d8..5feab5c 100644 --- a/src/test/java/de/ulb/digital/derivans/TestDerivansExportKitodo2.java +++ b/src/test/java/de/ulb/digital/derivans/TestDerivansExportKitodo2.java @@ -126,6 +126,13 @@ void testMETSUpdateSuccess() throws Exception { } + @Test + void testResultXMLvalid() throws Exception { + var resultXML = workDir.resolve("058141367.xml"); + var pathMETSXSD = TestResource.METS_1_12_XSD.get(); + assertTrue(TestHelper.validateXML(resultXML, pathMETSXSD)); + } + public static Path fixtureMetadataTIFK2(Path tempDir, Path srcMets) throws IOException { Path pathTarget = tempDir.resolve("058141367"); if (Files.exists(pathTarget)) { diff --git a/src/test/java/de/ulb/digital/derivans/TestDerivansExportKitodo3Issue.java b/src/test/java/de/ulb/digital/derivans/TestDerivansExportKitodo3Issue.java new file mode 100644 index 0000000..2a064bf --- /dev/null +++ b/src/test/java/de/ulb/digital/derivans/TestDerivansExportKitodo3Issue.java @@ -0,0 +1,84 @@ +package de.ulb.digital.derivans; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.file.Files; +import java.nio.file.Path; + +import org.jdom2.Document; +import org.jdom2.Element; +import org.jdom2.xpath.XPathExpression; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import de.ulb.digital.derivans.config.DerivansConfiguration; +import de.ulb.digital.derivans.config.DerivansParameter; + +/** + * + * Kitodo3 exporter setup for newspaper issue + * with TIF images and METS/MODS metadata + * Ensure resulting METS conforms to METS XSD + * + * used config: src/test/resources/config/derivans.ini + * + * @author hartwig + * + */ +public class TestDerivansExportKitodo3Issue { + + @TempDir + static Path tempDir; + + static Path workDir; + + static int nExpectedImages = 17; + + static String issueLabel = "253780594-18920720"; + + @BeforeAll + public static void setupBeforeClass() throws Exception { + + workDir = tempDir.resolve(issueLabel); + Path pathTargetMets = workDir.resolve(issueLabel + ".xml"); + // usually Kitodo3 has it's images in "images/max" + // but due export workflow images are moved to "MAX" + var pathImageDir = workDir.resolve("MAX"); + Files.createDirectories(pathImageDir); + var pathRes = TestResource.K3_ZD2_253780594.get(); + Files.copy(pathRes, pathTargetMets); + TestHelper.generateImages(pathImageDir, 120, 200, 7, "%08d.tif"); + DerivansParameter dp = new DerivansParameter(); + dp.setPathInput(pathTargetMets); + DerivansConfiguration dc = new DerivansConfiguration(dp); + Derivans derivans = new Derivans(dc); + + // act + derivans.create(); + } + + @Test + void testPDFWritten() throws Exception { + Path pdfWritten = workDir.resolve("25378059418920720.pdf"); + assertTrue(Files.exists(pdfWritten)); + } + + @Test + void testPDFFilePointerWritten() throws Exception { + Document doc = TestHelper.readXMLDocument(workDir.resolve(issueLabel + ".xml")); + var xprFilePtr = ".//mets:fptr[@FILEID='PDF_25378059418920720']"; + XPathExpression xpath = TestHelper.generateXpression(xprFilePtr); + Element el = xpath.evaluateFirst(doc); + assertNotNull(el); + } + + @Test + void testResultXMLvalid() throws Exception { + var resultXML = workDir.resolve(issueLabel + ".xml"); + var pathMETSXSD = TestResource.METS_1_12_XSD.get(); + assertTrue(TestHelper.validateXML(resultXML, pathMETSXSD)); + } + +} diff --git a/src/test/java/de/ulb/digital/derivans/TestDerivansFulltextODEM.java b/src/test/java/de/ulb/digital/derivans/TestDerivansFulltextODEM.java index 9d01731..cbbfaab 100644 --- a/src/test/java/de/ulb/digital/derivans/TestDerivansFulltextODEM.java +++ b/src/test/java/de/ulb/digital/derivans/TestDerivansFulltextODEM.java @@ -128,4 +128,11 @@ void testPage07HasCertainLength() throws Exception { var textPage07 = TestHelper.getTextAsSingleLine(pdfPath, 7); assertEquals(1328, textPage07.length()); } + + @Test + void testResultXMLvalid() throws Exception { + var resultXML = workDir.resolve("mets.xml"); + var pathMETSXSD = TestResource.METS_1_12_XSD.get(); + assertTrue(TestHelper.validateXML(resultXML, pathMETSXSD)); + } } diff --git a/src/test/java/de/ulb/digital/derivans/TestHelper.java b/src/test/java/de/ulb/digital/derivans/TestHelper.java index 6bc3217..69bd7c8 100644 --- a/src/test/java/de/ulb/digital/derivans/TestHelper.java +++ b/src/test/java/de/ulb/digital/derivans/TestHelper.java @@ -14,6 +14,16 @@ import java.util.List; import javax.imageio.ImageIO; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.Source; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamSource; +import javax.xml.validation.Schema; +import javax.xml.validation.SchemaFactory; +import javax.xml.validation.Validator; import org.jdom2.Document; import org.jdom2.Element; @@ -23,6 +33,7 @@ import org.jdom2.xpath.XPathBuilder; import org.jdom2.xpath.XPathExpression; import org.jdom2.xpath.XPathFactory; +import org.xml.sax.SAXException; import de.ulb.digital.derivans.data.IMetadataStore; import de.ulb.digital.derivans.derivate.PDFInspector; @@ -35,15 +46,17 @@ */ public class TestHelper { - public static void generateJpgs(Path imageDir, int width, int height, int number) throws IOException { + public static void generateImages(Path imageDir, int width, int height, int number, String labelFormat) + throws IOException { if (Files.exists(imageDir)) { Files.delete(imageDir); } Files.createDirectory(imageDir); + var imageFormat = labelFormat.substring(labelFormat.lastIndexOf('.') + 1).toUpperCase(); for (int i = 1; i <= number; i++) { - String imagePath = String.format("%04d.jpg", i); - Path jpgFile = imageDir.resolve(imagePath); - writeImage(jpgFile, width, height, BufferedImage.TYPE_3BYTE_BGR, "JPG"); + String imageLabel = String.format(labelFormat, i); + Path imagePath = imageDir.resolve(imageLabel); + writeImage(imagePath, width, height, BufferedImage.TYPE_3BYTE_BGR, imageFormat); } } @@ -135,4 +148,28 @@ public static String getTextAsSingleLine(Path writtenData, int pageNr) throws Ex PDFInspector inspector = new PDFInspector(writtenData); return inspector.getPageTextLinebreaksReplaced(pageNr); } + + /** + * + * In case of invalid Documents {@link org.xml.sax.SAXParseException} + * will be thrown. + * In case of valid Documents, nothing is thrown but "true" returned. + * + */ + public static boolean validateXML(Path xmlPath, Path xsdPath) throws SAXException, IOException, + ParserConfigurationException { + var dbf = DocumentBuilderFactory.newInstance(); + String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; + String METS_SCHEMA = "http://www.loc.gov/METS/"; + dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, METS_SCHEMA); + dbf.setNamespaceAware(true); + DocumentBuilder parser = dbf.newDocumentBuilder(); + org.w3c.dom.Document document = parser.parse(xmlPath.toFile()); + SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + Source schemaFile = new StreamSource(xsdPath.toFile()); + Schema schema = factory.newSchema(schemaFile); + Validator validator = schema.newValidator(); + validator.validate(new DOMSource(document)); + return true; + } } diff --git a/src/test/java/de/ulb/digital/derivans/TestResource.java b/src/test/java/de/ulb/digital/derivans/TestResource.java index 6be3f22..c3a5865 100644 --- a/src/test/java/de/ulb/digital/derivans/TestResource.java +++ b/src/test/java/de/ulb/digital/derivans/TestResource.java @@ -67,7 +67,8 @@ public enum TestResource { /** * Kitodo 3 */ - K3_ZD2_1021634069(Path.of("src/test/resources/mets/kitodo3/zd2-1021634069-18680621/1021634069-18680621.xml")), + K3_ZD2_1021634069(Path.of("src/test/resources/mets/kitodo3/zd2-1021634069-18680621.xml")), + K3_ZD2_253780594(Path.of("src/test/resources/mets/kitodo3/zd2-253780594-18920720.xml")), /** * Image Processing @@ -103,6 +104,12 @@ public enum TestResource { */ METS_ZD_ISSUE_182327845018001101(Path.of("src/test/resources/mets/zdp-182327845018001101.xml")), + /** + * + * Test XSDs + * + */ + METS_1_12_XSD(Path.of("src/test/resources/mets/mets_1_12.xsd")), ; private Path path; diff --git a/src/test/java/de/ulb/digital/derivans/config/TestDerivansConfiguration.java b/src/test/java/de/ulb/digital/derivans/config/TestDerivansConfiguration.java index 1adcd66..52bcf4c 100644 --- a/src/test/java/de/ulb/digital/derivans/config/TestDerivansConfiguration.java +++ b/src/test/java/de/ulb/digital/derivans/config/TestDerivansConfiguration.java @@ -148,7 +148,7 @@ void testDefaultLocalConfiguration(@TempDir Path tempDir) throws Exception { Path pathInput = tempDir.resolve("default_local"); Path pathImageMax = pathInput.resolve(imgDir); Files.createDirectories(pathImageMax); - TestHelper.generateJpgs(pathImageMax, 620, 877, 6); + TestHelper.generateImages(pathImageMax, 620, 877, 6,"%04d.jpg"); var params = new DerivansParameter(); params.setPathInput(pathInput); @@ -191,7 +191,7 @@ void testConfigurationRelativeImageDir(@TempDir Path tempDir) throws Exception { Path pathInput = tempDir.resolve("default_local"); Path pathImageMax = pathInput.resolve(customImageSubDir); Files.createDirectories(pathImageMax); - TestHelper.generateJpgs(pathImageMax, 620, 877, 6); + TestHelper.generateImages(pathImageMax, 620, 877, 6, "%04d.jpg"); var params = new DerivansParameter(); params.setPathInput(pathInput); params.setImages(customImageSubDir); @@ -233,7 +233,7 @@ void testConfigurationAbsoluteImageDir(@TempDir Path tempDir) throws Exception { Files.createDirectory(pathInput); Path pathImageMax = tempDir.resolve(customImageSubDir); Files.createDirectories(pathImageMax); - TestHelper.generateJpgs(pathImageMax, 620, 877, 6); + TestHelper.generateImages(pathImageMax, 620, 877, 6, "%04d.jpg"); var params = new DerivansParameter(); params.setPathInput(pathInput); params.setImages(pathImageMax.toString()); diff --git a/src/test/java/de/ulb/digital/derivans/config/TestDerivansPathResolver.java b/src/test/java/de/ulb/digital/derivans/config/TestDerivansPathResolver.java index 6776f81..654a2bc 100644 --- a/src/test/java/de/ulb/digital/derivans/config/TestDerivansPathResolver.java +++ b/src/test/java/de/ulb/digital/derivans/config/TestDerivansPathResolver.java @@ -28,7 +28,7 @@ void testPathResolverDefaultConf(@TempDir Path tempDir) throws Exception { Path pathTarget = tempDir.resolve("only_images"); Path pathImageMax = pathTarget.resolve("MAX"); Files.createDirectories(pathImageMax); - TestHelper.generateJpgs(pathImageMax, 1240, 1754, 6); + TestHelper.generateImages(pathImageMax, 1240, 1754, 6, "%04d.jpg"); DerivansParameter dp = new DerivansParameter(); dp.setPathInput(pathTarget); DerivansConfiguration dc = new DerivansConfiguration(dp); diff --git a/src/test/java/de/ulb/digital/derivans/derivate/TestImageDerivateerJPG.java b/src/test/java/de/ulb/digital/derivans/derivate/TestImageDerivateerJPG.java index 4dc3872..6dc3be8 100644 --- a/src/test/java/de/ulb/digital/derivans/derivate/TestImageDerivateerJPG.java +++ b/src/test/java/de/ulb/digital/derivans/derivate/TestImageDerivateerJPG.java @@ -37,12 +37,10 @@ class TestImageDerivateerJPG { @BeforeAll public static void setupBeforeClass() throws IOException { Path imageDir = sharedTempDir.resolve("IMAGE"); - int width = 3500; int height = 5500; int number = 8; - - TestHelper.generateJpgs(imageDir, width, height, number); + TestHelper.generateImages(imageDir, width, height, number, "%04d.jpg"); } @Test diff --git a/src/test/resources/mets/kitodo3/zd2-1021634069-18680621/1021634069-18680621.xml b/src/test/resources/mets/kitodo3/zd2-1021634069-18680621.xml similarity index 100% rename from src/test/resources/mets/kitodo3/zd2-1021634069-18680621/1021634069-18680621.xml rename to src/test/resources/mets/kitodo3/zd2-1021634069-18680621.xml diff --git a/src/test/resources/mets/kitodo3/zd2-253780594-18920720.xml b/src/test/resources/mets/kitodo3/zd2-253780594-18920720.xml new file mode 100755 index 0000000..3076990 --- /dev/null +++ b/src/test/resources/mets/kitodo3/zd2-253780594-18920720.xml @@ -0,0 +1,261 @@ + + + 31527 + + + + + + text + + Angerstein + 1892-07-20 + + + Universitäts- und Landesbibliothek Sachsen-Anhalt + + Halle (Saale) + + 2023 + + urn:nbn:de:gbv:3:3-171133730-25378059418920720-17 + + 25378059418920720 + 28-01-99 + 13-04-23 + + + Der Harz-Bote + + + + 782174345 + + 2764718-3 + + 1460486-3 + + 253780594 + + + + Der Harz-Bote + + + + ger + Latf + + + AZ + 1516514412012/159341 + + + Universitäts- und Landesbibliothek Sachsen-Anhalt + Pon Ya 4795, 2° + + + + 1892. + + + Nr. 58. + + + Public Domain Mark 1.0 + + + + + + + + + + + + + + + + + Universitäts- und Landesbibliothek Sachsen-Anhalt + Share_it + http://www.bibliothek.uni-halle.de + mailto:auskunft@bibliothek.uni-halle.de + + + + + + + + + https://lhhal.gbv.de/DB=1/XMLPRS=N/PPN?PPN=253780594 + https://opendata.uni-halle.de/simple-search?query=25378059418920720 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/test/resources/mets/mets_1_12.xsd b/src/test/resources/mets/mets_1_12.xsd new file mode 100644 index 0000000..b723111 --- /dev/null +++ b/src/test/resources/mets/mets_1_12.xsd @@ -0,0 +1,1854 @@ + + + + + + + + + METS: Metadata Encoding and Transmission Standard + + This document is available under the Creative Commons CC0 1.0 Universal Public Domain Dedication (http://creativecommons.org/publicdomain/zero/1.0/). + The Digital Library Federation, as creator of this document, has waived all rights to it worldwide under copyright law, including + all related and neighboring rights, to the extent allowed by law. For the full text see http://creativecommons.org/publicdomain/zero/1.0/legalcode. + + Prepared for the Digital Library Federation by Jerome McDonough, New York University, + with the assistance of Michael Alexander (British Library), Joachim Bauer (Content Conversion Specialists, Germany), + Rick Beaubien (University of California), Terry Catapano (Columbia University), Morgan Cundiff (Library of Congress), + Susan Dahl (University of Alberta), Markus Enders (State and University Library, Göttingen/British Library), + Richard Gartner (Bodleian Library at Oxford/King's College, London), Thomas Habing (University of Illinois at Urbana-Champaign), + Nancy Hoebelheinrich (Stanford University/Knowledge Motifs LLC), Arwen Hutt (U.C. San Diego), + Mark Kornbluh (Michigan State University), Cecilia Preston (Preston & Lynch), Merrilee Proffitt (Research Libraries Group), + Clay Redding (Library of Congress), Jenn Riley (Indiana University), Richard Rinehart (Berkeley Art Museum/Pacific Film Archive), + Mackenzie Smith (Massachusetts Institute of Technology), Tobias Steinke (German National Library), + Taylor Surface (OCLC), Brian Tingle (California Digital Library) and Robin Wendler (Harvard University), + Robert Wolfe (Massachusetts Institute of Technology), Patrick Yott (Brown University). + + October, 2019 + Version 1.12.1 + + + + Change History + + April 23, 2001: Alpha Draft completed + June 7, 2001: Beta completed + + 6/7/2001 Beta Changes: + + 1. add 'Time' as a possible time code value, as well as TCF. + 2. Make dmdSec ID attribute required; make ID attribute optional on MDRef/MDWrap. + 3. Add 'Label' attribute to StructMap, along with 'Type'. + 4. Add DDI and FGDC as potential metadata schemes to enumeration. + 5. Enable an "otherMDtype" attribute for MDWrap/MDRef and any other element where + there's an 'other' in the enumerated possibilities. + 6. Add a "profile" attribute to METS element. + 7. Revised mptr declaration so that it's like FLocat/MDRef (and not like XLink) + 8. Extend internal documentation of <area> attributes. + 9. Add "other" to the possible set of LOCTYPEs. + 10. Change ADMIDS to ADMID on FileGrp. + 11. Change "N" to "Order" on <div> element. + 12. Change "Number" to "order label" on <div> element + 13. Add createdate and lastmoddate attributes to mets element. + 14. Allow <div> and <area> elements to link to administrative metadata sections. + 15. Normalize attribute pointing facilities for file element and mdRef. + 16. Provide a LOCTYPE of "other" and an "otherloctype" attribute for pointing to external files. + 17. Drop PDI from enumeration of LOCTYPES. + 18. Make MDTYPE required in mdRef and mdWrap. + 19. Rename preservationMD to digiprovMD. + 20. Add optional CHECKSUM attribute to FContent element. + 21. Modularize declarations of fileGrpType and mdSecType attributes and enumerations to + simplify maintenance. + 22. Add TYPE attribute to structMap. + 23. Declare structMap element using structMapType rather than direct declaration. + 24. Add area element as possible subelement to <div>, along with par and seq. + 25. Change mdSec model to ALL, to enable differing order of mdRef/mdWrap elements. + 26. Extend documentation on <par> and <seq> elements. + + + + October 22, 2001: Gamma completed + + 10/22/2001 Gamma changes: + 1. Added optional fileSec element beneath METS root element to contain fileGrps. + 2. Created subsidiary schema file xlink.xsd for XLink attributes, restored XLink attributes + to mptr element, and added XLink support to mdRef and FLocat. + 3. Created new element metsHdr to handle metadata regarding METS document + itself (analogous to TEI Header). Moved CREATEDATE and LASTMODDATE attributes + to metsHdr, and added new RECORDSTATUS attribute. Added new subsidiary elements + agent and altRecordID to metsHdr. + 4. Made CREATEDATE and LASTMODDATE attributes type xsd:dateTime to allow more precise + recording of when work was done. + 5. Changed all attributes using data type of xsd:binary to xsd:base64Binary to conform to final + W3C schema recommendations. + 6. Cleaned up annotations/documentation. + + + + December 19, 2001: Epsilon and PROTOFINAL completed + + 12/19/2001 Epsilon changes: + + 1. Changed sequence operator for StructMap so that only 1 root div element is permitted. + 2. Add new roles to agent element's role attribute and support for extensible 'other' role. + 3. Add support for extensible 'other' type attribute on agent element. + 4. Yet more documentation clean up. + 5. Relocate CHECKSUM attribute from FContent to File element. + 6. Change the file element's CREATED attribute and fileGroup's VERSDATE attribute to + a type of xsd:dateTime + 7. Change attribute name DMD for div element to DMDID for consistency's sake. + 8. Added new behaviorSec for support of referencing executable code from METS object + + + + February 8, 2002: Zeta bug fix to final + + 2/8/2002 Zeta changes + + 1. Eliminated redundant VRA in metadata type enumeration. + 2. Changed mdWrap content model, adding xmlData element to eliminate + ambiguous content model + + + + June 3, 2002: Version 1.1 + + 6/3/2002 v1.1 changes: + + 1. Add new structLink section for recording hyperlinks between media represented by structMap nodes. + 2. Allow a <par> element to contain a <seq> + + + + Dec. 27, 2002: Version 1.2 + + 12/27/2002 v1.2 changes: + 1. Add “USE” attribute to FileGrp, File, FLocat and FContent; + 2. Make FLocat repeatable; + 3. Have FContent mimic mdWrap in using separate binData/xmlData sections; + 4. Copyright statement added; + 5. Allow both FLocat and Fcontent in single file element; + 6. Allow behaviorSec elements to group through GROUPID attribute; + 7. allow descriptive and administrative metadata sections to be grouped through GROUPID attribute; + 8. allow <file> element to point to descriptive metadata via DMDID attribute; + 9. allow descriptive metadata and all forms of administrative metadata to point to administrative metadata via ADMID attribute; + 10. CREATED and STATUS attributes added to all desc. and adm. metadata sections; and + 11. clean up documentation in elements to reflect reality. + + + + + May 8, 2003: Version 1.3 + + 05/05/2003 v1.3 changes: + + 1. Change “2. OBJID: a primary identifier assigned to the original source document” to “2. OBJID: a primary identifier assigned to the METS object.” + 2. Add MODS to MDTYPEs. + 3. Modify <file> attributes so that instead of just CHECKSUM we have CHECKSUM and CHECKSUMTYPE, where CHECKSUMTYPE is a controlled vocabulary as follows: + HAVAL, MD5, SHA-1, SHA-256, SHA-384, SHA-512, TIGER, WHIRLPOOL + 4.Alter BehaviorSec to make it recursive, and add a new behavior element to wrap mechanism and interfaceDef elements. + + + + May 1, 2004: Version 1.4 + + 05/01/2003 v1.4 changes: + + 1. Moved attribute documentation out of element documentation + (thank you, Brian Tingle). + 2. New CONTENTIDS attribute (and URIs simpleType) added to div, fptr, + mptr and area elements for mapping MPEG21 DII Identifier values + 3. XLink namespace URI changed to conform with XLink recommendation. + 4. ID Attribute added to FContent. + 5. ID Attribute addedt to structLink. + 6. ID Attribute added to smLink. + 7. "LOM" added as metadata type. + + + + April 12, 2005: Version 1.5 + + 04/12/2005 v1.5 changes: + + 1. Made file element recursive to deal with PREMIS Onion Layer model and + support XFDU-ish unpacking specification. + 2. Add <stream> element beneath <file> to allow linking of metadata to + subfile structures. + 3. Modify structLink TO and FROM attributes to put them in XLink namespace. + 4. Make processContents "lax" for all xsd:any elements. + + + + October 18, 2006: Version 1.6 + + 10/18/2006 v1.6 changes: + + 1. add ID to stream and transformFile + 2. add ADMID to metsHdr + 3. make smLink/@xlink:to and smLink/@xlink:from required + + + + October 16, 2007/ Jan 20, 2008: Version 1.7 + + 10/16/2007 01/30/2008 v 1.7 changes: + + 1. create parType complex type to allow a seq to contain a par + 2. create FILECORE attribute group with MIMETYPE, SIZE, CHECKSUM, CHECKSUMTYPE; + change fileType, mdWrapType and mdRefType use the attribute group, so mdType and mdRef end + up with new SIZE, CHECKSUM, and CHECKSUMTYPE attributes (file does not change) + 20080130 + 2a. CREATED added to FILECORE + 3. PREMIS:OBJECT PREMIS:AGENT PREMIS:RIGHTS PREMIS:EVENT added to MDTYPE value enumeration + + + + April 2009: Version 1.8 + + Version 1.8 changes: + 1. Add CRC32, Adler-32, MNP to the enumerated values constraining CHECKSUMTYPE to align with MIX messageDigestAlgorithm constraints. + 2. Add TEXTMD and METSRIGHTS to the enumeration values constraining MDTYPE. + 3. Add an MDTYPEVERSION attribute as a companion to the MDTYPE attribute in the mdRef and mdWrap elements. + 4. ID and STRUCTID attributes on the behavior element made optional. Depending on whether the behavior applies to a transformFile element or div elements in the structMap, only one or the other of the attributes would pertain. + 5. Documentation aligned with the METS Primer, and corrected. + 6. xml:lang="en" atttribute value added to every <documentation> element + 7. xlink:extendedLink support added to the <structLink> element by means of a new <smLinkGrp> element, and its child <smLocatorLink> and <smArcLink> elements. + + + + 2010: Version 1.9 + + Version 1.9 Changes: + 1. Added a <metsDocumentID> element to the <metsHdr> for recording a unique identifier for the METS document itself where this is different from the OBJID, the identifier for the entire digital object represented by the METS document. + 2. Added "ISO 19115:2003 NAP" to the enumerated values for the MDTYPE attribute in the METADATA attribute group. + 3. Added "XPTR" to the enumerated values for the BETYPE attribute on the areaType data type + 4. Added BEGIN, END and BETYPE attributes to the <file> and <stream> elements for specifying the location of a nested file or a stream within it's parent file. + + + + March 2012: Version 1.9.1 + + Version 1.9.1 Changes: + 1. Added 'EAC-CPF' as potential metadata scheme to MDTYPE enumeration + EAC-CPF = Encoded Archival Context - Corporate Bodies, Persons, and Families + http://eac.staatsbibliothek-berlin.de/eac-cpf-schema.html + + + + July 2013: Version 1.10 + + Version 1.10 Changes: + 1. Added 'LIDO' as potential metadata scheme to MDTYPE enumeration + LIDO = Lightweight Information Describing Objects + http://network.icom.museum/cidoc/working-groups/data-harvesting-and-interchange/lido-technical/specification/ + 2. Added xsd:anyAttribute with namespace ##other and processContents lax to these METS elements: + mets + metsHdr + dmdSec + amdSec + techMD + rightsMD + sourceMD + digiprovMD + fileSec + fileGrp + file + structMap + fptr + structLink + behaviorSec + This will allow arbitrary new attributes to be added to these elements to support local needs. + + + + January 2015: Version 1.10.1 + + Version 1.10.1 Changes: + 1. Fixed bug: The anyAttribute declaration was inadvertently added to the FLocat element when it should have been on the file element. This + has been corrected in this version. + + + + May 2015: Version 1.11 + + Version 1.11 Changes: + 1. Added new attributes, ORDER, ORDERLABEL, and LABEL, to these METS elements: + par + seq + area + 2. Also added xsd:anyAttribute with namespace ##other and processContents lax to these elements. This will allow arbitrary new attributes to be added to these elements to support local needs. + + + + May 2018: Version 1.12 + + Version 1.12 Changes: + + 1. Added xsd:anyAttribute with namespace ##other and processContents lax to these elements: + agent/note + + This will allow arbitrary new attributes to be added to this element to support local needs. The original use case was to identify the type of the note. + + + October 2019: Version 1.12.1 + + Version 1.12.1 Changes: + + 1. Move comments with version history to xsd:documentation elements + + This allows tools that generate documentation from schemas to include the change history in the generated documentation. + + + + + METS: Metadata Encoding and Transmission Standard. + METS is intended to provide a standardized XML format for transmission of complex digital library objects between systems. As such, it can be seen as filling a role similar to that defined for the Submission Information Package (SIP), Archival Information Package (AIP) and Dissemination Information Package (DIP) in the Reference Model for an Open Archival Information System. The root element <mets> establishes the container for the information being stored and/or transmitted by the standard. + + + + + + + + + + + metsType: Complex Type for METS Sections + A METS document consists of seven possible subsidiary sections: metsHdr (METS document header), dmdSec (descriptive metadata section), amdSec (administrative metadata section), fileGrp (file inventory group), structLink (structural map linking), structMap (structural map) and behaviorSec (behaviors section). + + + + + + + The mets header element <metsHdr> captures metadata about the METS document itself, not the digital object the METS document encodes. Although it records a more limited set of metadata, it is very similar in function and purpose to the headers employed in other schema such as the Text Encoding Initiative (TEI) or in the Encoded Archival Description (EAD). + + + + + + + agent: + The agent element <agent> provides for various parties and their roles with respect to the METS record to be documented. + + + + + + + + The element <name> can be used to record the full name of the document agent. + + + + + + + The <note> element can be used to record any additional information regarding the agent's activities with respect to the METS document. + + + + + + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + ROLE (string/R): Specifies the function of the agent with respect to the METS record. The allowed values are: +CREATOR: The person(s) or institution(s) responsible for the METS document. +EDITOR: The person(s) or institution(s) that prepares the metadata for encoding. +ARCHIVIST: The person(s) or institution(s) responsible for the document/collection. +PRESERVATION: The person(s) or institution(s) responsible for preservation functions. +DISSEMINATOR: The person(s) or institution(s) responsible for dissemination functions. +CUSTODIAN: The person(s) or institution(s) charged with the oversight of a document/collection. +IPOWNER: Intellectual Property Owner: The person(s) or institution holding copyright, trade or service marks or other intellectual property rights for the object. +OTHER: Use OTHER if none of the preceding values pertains and clarify the type and location specifier being used in the OTHERROLE attribute (see below). + + + + + + + + + + + + + + + + + + OTHERROLE (string/O): Denotes a role not contained in the allowed values set if OTHER is indicated in the ROLE attribute. + + + + + + TYPE (string/O): is used to specify the type of AGENT. It must be one of the following values: +INDIVIDUAL: Use if an individual has served as the agent. +ORGANIZATION: Use if an institution, corporate body, association, non-profit enterprise, government, religious body, etc. has served as the agent. +OTHER: Use OTHER if none of the preceding values pertain and clarify the type of agent specifier being used in the OTHERTYPE attribute + + + + + + + + + + + + + OTHERTYPE (string/O): Specifies the type of agent when the value OTHER is indicated in the TYPE attribute. + + + + + + + + + The alternative record identifier element <altRecordID> allows one to use alternative record identifier values for the digital object represented by the METS document; the primary record identifier is stored in the OBJID attribute in the root <mets> element. + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + TYPE (string/O): A description of the identifier type (e.g., OCLC record number, LCCN, etc.). + + + + + + + + + + + The metsDocument identifier element <metsDocumentID> allows a unique identifier to be assigned to the METS document itself. This may be different from the OBJID attribute value in the root <mets> element, which uniquely identifies the entire digital object represented by the METS document. + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + TYPE (string/O): A description of the identifier type. + + + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + ADMID (IDREFS/O): Contains the ID attribute values of the <techMD>, <sourceMD>, <rightsMD> and/or <digiprovMD> elements within the <amdSec> of the METS document that contain administrative metadata pertaining to the METS document itself. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + CREATEDATE (dateTime/O): Records the date/time the METS document was created. + + + + + + LASTMODDATE (dateTime/O): Is used to indicate the date/time the METS document was last modified. + + + + + + RECORDSTATUS (string/O): Specifies the status of the METS document. It is used for internal processing purposes. + + + + + + + + + + A descriptive metadata section <dmdSec> records descriptive metadata pertaining to the METS object as a whole or one of its components. The <dmdSec> element conforms to same generic datatype as the <techMD>, <rightsMD>, <sourceMD> and <digiprovMD> elements, and supports the same sub-elements and attributes. A descriptive metadata element can either wrap the metadata (mdWrap) or reference it in an external location (mdRef) or both. METS allows multiple <dmdSec> elements; and descriptive metadata can be associated with any METS element that supports a DMDID attribute. Descriptive metadata can be expressed according to many current description standards (i.e., MARC, MODS, Dublin Core, TEI Header, EAD, VRA, FGDC, DDI) or a locally produced XML schema. + + + + + + + The administrative metadata section <amdSec> contains the administrative metadata pertaining to the digital object, its components and any original source material from which the digital object is derived. The <amdSec> is separated into four sub-sections that accommodate technical metadata (techMD), intellectual property rights (rightsMD), analog/digital source metadata (sourceMD), and digital provenance metadata (digiprovMD). Each of these subsections can either wrap the metadata (mdWrap) or reference it in an external location (mdRef) or both. Multiple instances of the <amdSec> element can occur within a METS document and multiple instances of its subsections can occur in one <amdSec> element. This allows considerable flexibility in the structuring of the administrative metadata. METS does not define a vocabulary or syntax for encoding administrative metadata. Administrative metadata can be expressed within the amdSec sub-elements according to many current community defined standards, or locally produced XML schemas. + + + + + + The overall purpose of the content file section element <fileSec> is to provide an inventory of and the location for the content files that comprise the digital object being described in the METS document. + + + + + + + + A sequence of file group elements <fileGrp> can be used group the digital files comprising the content of a METS object either into a flat arrangement or, because each file group element can itself contain one or more file group elements, into a nested (hierarchical) arrangement. In the case where the content files are images of different formats and resolutions, for example, one could group the image content files by format and create a separate <fileGrp> for each image format/resolution such as: +-- one <fileGrp> for the thumbnails of the images +-- one <fileGrp> for the higher resolution JPEGs of the image +-- one <fileGrp> for the master archival TIFFs of the images +For a text resource with a variety of content file types one might group the content files at the highest level by type, and then use the <fileGrp> element’s nesting capabilities to subdivide a <fileGrp> by format within the type, such as: +-- one <fileGrp> for all of the page images with nested <fileGrp> elements for each image format/resolution (tiff, jpeg, gif) +-- one <fileGrp> for a PDF version of all the pages of the document +-- one <fileGrp> for a TEI encoded XML version of the entire document or each of its pages. +A <fileGrp> may contain zero or more <fileGrp> elements and or <file> elements. + + + + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + + + The structural map section <structMap> is the heart of a METS document. It provides a means for organizing the digital content represented by the <file> elements in the <fileSec> of the METS document into a coherent hierarchical structure. Such a hierarchical structure can be presented to users to facilitate their comprehension and navigation of the digital content. It can further be applied to any purpose requiring an understanding of the structural relationship of the content files or parts of the content files. The organization may be specified to any level of granularity (intellectual and or physical) that is desired. Since the <structMap> element is repeatable, more than one organization can be applied to the digital content represented by the METS document. The hierarchical structure specified by a <structMap> is encoded as a tree of nested <div> elements. A <div> element may directly point to content via child file pointer <fptr> elements (if the content is represented in the <fileSec<) or child METS pointer <mptr> elements (if the content is represented by an external METS document). The <fptr> element may point to a single whole <file> element that manifests its parent <div<, or to part of a <file> that manifests its <div<. It can also point to multiple files or parts of files that must be played/displayed either in sequence or in parallel to reveal its structural division. In addition to providing a means for organizing content, the <structMap> provides a mechanism for linking content at any hierarchical level with relevant descriptive and administrative metadata. + + + + + + + The structural link section element <structLink> allows for the specification of hyperlinks between the different components of a METS structure that are delineated in a structural map. This element is a container for a single, repeatable element, <smLink> which indicates a hyperlink between two nodes in the structural map. The <structLink> section in the METS document is identified using its XML ID attributes. + + + + + + + + + + + + A behavior section element <behaviorSec> associates executable behaviors with content in the METS document by means of a repeatable behavior <behavior> element. This element has an interface definition <interfaceDef> element that represents an abstract definition of the set of behaviors represented by a particular behavior section. A <behavior> element also has a <mechanism> element which is used to point to a module of executable code that implements and runs the behavior defined by the interface definition. The <behaviorSec> element, which is repeatable as well as nestable, can be used to group individual behaviors within the structure of the METS document. Such grouping can be useful for organizing families of behaviors together or to indicate other relationships between particular behaviors. + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + OBJID (string/O): Is the primary identifier assigned to the METS object as a whole. Although this attribute is not required, it is strongly recommended. This identifier is used to tag the entire METS object to external systems, in contrast with the ID identifier. + + + + + + LABEL (string/O): Is a simple title string used to identify the object/entity being described in the METS document for the user. + + + + + + TYPE (string/O): Specifies the class or type of the object, e.g.: book, journal, stereograph, dataset, video, etc. + + + + + + PROFILE (string/O): Indicates to which of the registered profile(s) the METS document conforms. For additional information about PROFILES see Chapter 5 of the METS Primer. + + + + + + + + amdSecType: Complex Type for Administrative Metadata Sections + The administrative metadata section consists of four possible subsidiary sections: techMD (technical metadata for text/image/audio/video files), rightsMD (intellectual property rights metadata), sourceMD (analog/digital source metadata), and digiprovMD (digital provenance metadata, that is, the history of migrations/translations performed on a digital library object from it's original digital capture/encoding). + + + + + + + A technical metadata element <techMD> records technical metadata about a component of the METS object, such as a digital content file. The <techMD> element conforms to same generic datatype as the <dmdSec>, <rightsMD>, <sourceMD> and <digiprovMD> elements, and supports the same sub-elements and attributes. A technical metadata element can either wrap the metadata (mdWrap) or reference it in an external location (mdRef) or both. METS allows multiple <techMD> elements; and technical metadata can be associated with any METS element that supports an ADMID attribute. Technical metadata can be expressed according to many current technical description standards (such as MIX and textMD) or a locally produced XML schema. + + + + + + + An intellectual property rights metadata element <rightsMD> records information about copyright and licensing pertaining to a component of the METS object. The <rightsMD> element conforms to same generic datatype as the <dmdSec>, <techMD>, <sourceMD> and <digiprovMD> elements, and supports the same sub-elements and attributes. A rights metadata element can either wrap the metadata (mdWrap) or reference it in an external location (mdRef) or both. METS allows multiple <rightsMD> elements; and rights metadata can be associated with any METS element that supports an ADMID attribute. Rights metadata can be expressed according current rights description standards (such as CopyrightMD and rightsDeclarationMD) or a locally produced XML schema. + + + + + + + A source metadata element <sourceMD> records descriptive and administrative metadata about the source format or media of a component of the METS object such as a digital content file. It is often used for discovery, data administration or preservation of the digital object. The <sourceMD> element conforms to same generic datatype as the <dmdSec>, <techMD>, <rightsMD>, and <digiprovMD> elements, and supports the same sub-elements and attributes. A source metadata element can either wrap the metadata (mdWrap) or reference it in an external location (mdRef) or both. METS allows multiple <sourceMD> elements; and source metadata can be associated with any METS element that supports an ADMID attribute. Source metadata can be expressed according to current source description standards (such as PREMIS) or a locally produced XML schema. + + + + + + + A digital provenance metadata element <digiprovMD> can be used to record any preservation-related actions taken on the various files which comprise a digital object (e.g., those subsequent to the initial digitization of the files such as transformation or migrations) or, in the case of born digital materials, the files’ creation. In short, digital provenance should be used to record information that allows both archival/library staff and scholars to understand what modifications have been made to a digital object and/or its constituent parts during its life cycle. This information can then be used to judge how those processes might have altered or corrupted the object’s ability to accurately represent the original item. One might, for example, record master derivative relationships and the process by which those derivations have been created. Or the <digiprovMD> element could contain information regarding the migration/transformation of a file from its original digitization (e.g., OCR, TEI, etc.,)to its current incarnation as a digital object (e.g., JPEG2000). The <digiprovMD> element conforms to same generic datatype as the <dmdSec>, <techMD>, <rightsMD>, and <sourceMD> elements, and supports the same sub-elements and attributes. A digital provenance metadata element can either wrap the metadata (mdWrap) or reference it in an external location (mdRef) or both. METS allows multiple <digiprovMD> elements; and digital provenance metadata can be associated with any METS element that supports an ADMID attribute. Digital provenance metadata can be expressed according to current digital provenance description standards (such as PREMIS) or a locally produced XML schema. + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + fileGrpType: Complex Type for File Groups + The file group is used to cluster all of the digital files composing a digital library object in a hierarchical arrangement (fileGrp is recursively defined to enable the creation of the hierarchy). Any file group may contain zero or more file elements. File elements in turn can contain one or more FLocat elements (a pointer to a file containing content for this object) and/or a FContent element (the contents of the file, in either XML or Base64 encoding). + + + + + + + + The file element <file> provides access to the content files for the digital object being described by the METS document. A <file> element may contain one or more <FLocat> elements which provide pointers to a content file and/or a <FContent> element which wraps an encoded version of the file. Embedding files using <FContent> can be a valuable feature for exchanging digital objects between repositories or for archiving versions of digital objects for off-site storage. All <FLocat> and <FContent> elements should identify and/or contain identical copies of a single file. The <file> element is recursive, thus allowing sub-files or component files of a larger file to be listed in the inventory. Alternatively, by using the <stream> element, a smaller component of a file or of a related file can be placed within a <file> element. Finally, by using the <transformFile> element, it is possible to include within a <file> element a different version of a file that has undergone a transformation for some reason, such as format migration. + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + VERSDATE (dateTime/O): An optional dateTime attribute specifying the date this version/fileGrp of the digital object was created. + + + + + + ADMID (IDREF/O): Contains the ID attribute values of the <techMD>, <sourceMD>, <rightsMD> and/or <digiprovMD> elements within the <amdSec> of the METS document applicable to all of the files in a particular file group. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + USE (string/O): A tagging attribute to indicate the intended use of files within this file group (e.g., master, reference, thumbnails for image files). A USE attribute can be expressed at the<fileGrp> level, the <file> level, the <FLocat> level and/or the <FContent> level. A USE attribute value at the <fileGrp> level should pertain to all of the files in the <fileGrp>. A USE attribute at the <file> level should pertain to all copies of the file as represented by subsidiary <FLocat> and/or <FContent> elements. A USE attribute at the <FLocat> or <FContent> level pertains to the particular copy of the file that is either referenced (<FLocat>) or wrapped (<FContent>). + + + + + + + + structMapType: Complex Type for Structural Maps + The structural map (structMap) outlines a hierarchical structure for the original object being encoded, using a series of nested div elements. + + + + + + + The structural divisions of the hierarchical organization provided by a <structMap> are represented by division <div> elements, which can be nested to any depth. Each <div> element can represent either an intellectual (logical) division or a physical division. Every <div> node in the structural map hierarchy may be connected (via subsidiary <mptr> or <fptr> elements) to content files which represent that div's portion of the whole document. + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + TYPE (string/O): Identifies the type of structure represented by the <structMap>. For example, a <structMap> that represented a purely logical or intellectual structure could be assigned a TYPE value of “logical” whereas a <structMap> that represented a purely physical structure could be assigned a TYPE value of “physical”. However, the METS schema neither defines nor requires a common vocabulary for this attribute. A METS profile, however, may well constrain the values for the <structMap> TYPE. + + + + + + LABEL (string/O): Describes the <structMap> to viewers of the METS document. This would be useful primarily where more than one <structMap> is provided for a single object. A descriptive LABEL value, in that case, could clarify to users the purpose of each of the available structMaps. + + + + + + + + + divType: Complex Type for Divisions + The METS standard represents a document structurally as a series of nested div elements, that is, as a hierarchy (e.g., a book, which is composed of chapters, which are composed of subchapters, which are composed of text). Every div node in the structural map hierarchy may be connected (via subsidiary mptr or fptr elements) to content files which represent that div's portion of the whole document. + +SPECIAL NOTE REGARDING DIV ATTRIBUTE VALUES: +to clarify the differences between the ORDER, ORDERLABEL, and LABEL attributes for the <div> element, imagine a text with 10 roman numbered pages followed by 10 arabic numbered pages. Page iii would have an ORDER of "3", an ORDERLABEL of "iii" and a LABEL of "Page iii", while page 3 would have an ORDER of "13", an ORDERLABEL of "3" and a LABEL of "Page 3". + + + + + + + Like the <fptr> element, the METS pointer element <mptr> represents digital content that manifests its parent <div> element. Unlike the <fptr>, which either directly or indirectly points to content represented in the <fileSec> of the parent METS document, the <mptr> element points to content represented by an external METS document. Thus, this element allows multiple discrete and separate METS documents to be organized at a higher level by a separate METS document. For example, METS documents representing the individual issues in the series of a journal could be grouped together and organized by a higher level METS document that represents the entire journal series. Each of the <div> elements in the <structMap> of the METS document representing the journal series would point to a METS document representing an issue. It would do so via a child <mptr> element. Thus the <mptr> element gives METS users considerable flexibility in managing the depth of the <structMap> hierarchy of individual METS documents. The <mptr> element points to an external METS document by means of an xlink:href attribute and associated XLink attributes. + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + CONTENTIDS (URI/O): Content IDs for the content represented by the <mptr> (equivalent to DIDL DII or Digital Item Identifier, a unique external ID). + + + + + + + + + The <fptr> or file pointer element represents digital content that manifests its parent <div> element. The content represented by an <fptr> element must consist of integral files or parts of files that are represented by <file> elements in the <fileSec>. Via its FILEID attribute, an <fptr> may point directly to a single integral <file> element that manifests a structural division. However, an <fptr> element may also govern an <area> element, a <par>, or a <seq> which in turn would point to the relevant file or files. A child <area> element can point to part of a <file> that manifests a division, while the <par> and <seq> elements can point to multiple files or parts of files that together manifest a division. More than one <fptr> element can be associated with a <div> element. Typically sibling <fptr> elements represent alternative versions, or manifestations, of the same content + + + + + + + + The <par> or parallel files element aggregates pointers to files, parts of files, and/or sequences of files or parts of files that must be played or displayed simultaneously to manifest a block of digital content represented by an <fptr> element. This might be the case, for example, with multi-media content, where a still image might have an accompanying audio track that comments on the still image. In this case, a <par> element would aggregate two <area> elements, one of which pointed to the image file and one of which pointed to the audio file that must be played in conjunction with the image. The <area> element associated with the image could be further qualified with SHAPE and COORDS attributes if only a portion of the image file was pertinent and the <area> element associated with the audio file could be further qualified with BETYPE, BEGIN, EXTTYPE, and EXTENT attributes if only a portion of the associated audio file should be played in conjunction with the image. + + + + + + + The sequence of files element <seq> aggregates pointers to files, parts of files and/or parallel sets of files or parts of files that must be played or displayed sequentially to manifest a block of digital content. This might be the case, for example, if the parent <div> element represented a logical division, such as a diary entry, that spanned multiple pages of a diary and, hence, multiple page image files. In this case, a <seq> element would aggregate multiple, sequentially arranged <area> elements, each of which pointed to one of the image files that must be presented sequentially to manifest the entire diary entry. If the diary entry started in the middle of a page, then the first <area> element (representing the page on which the diary entry starts) might be further qualified, via its SHAPE and COORDS attributes, to specify the specific, pertinent area of the associated image file. + + + + + + + The area element <area> typically points to content consisting of just a portion or area of a file represented by a <file> element in the <fileSec>. In some contexts, however, the <area> element can also point to content represented by an integral file. A single <area> element would appear as the direct child of a <fptr> element when only a portion of a <file>, rather than an integral <file>, manifested the digital content represented by the <fptr>. Multiple <area> elements would appear as the direct children of a <par> element or a <seq> element when multiple files or parts of files manifested the digital content represented by an <fptr> element. When used in the context of a <par> or <seq> element an <area> element can point either to an integral file or to a segment of a file as necessary. + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + FILEID (IDREF/O): An optional attribute that provides the XML ID identifying the <file> element that links to and/or contains the digital content represented by the <fptr>. A <fptr> element should only have a FILEID attribute value if it does not have a child <area>, <par> or <seq> element. If it has a child element, then the responsibility for pointing to the relevant content falls to this child element or its descendants. + + + + + + CONTENTIDS (URI/O): Content IDs for the content represented by the <fptr> (equivalent to DIDL DII or Digital Item Identifier, a unique external ID). + + + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + DMDID (IDREFS/O): Contains the ID attribute values identifying the <dmdSec>, elements in the METS document that contain or link to descriptive metadata pertaining to the structural division represented by the current <div> element. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + ADMID (IDREFS/O): Contains the ID attribute values identifying the <rightsMD>, <sourceMD>, <techMD> and/or <digiprovMD> elements within the <amdSec> of the METS document that contain or link to administrative metadata pertaining to the structural division represented by the <div> element. Typically the <div> ADMID attribute would be used to identify the <rightsMD> element or elements that pertain to the <div>, but it could be used anytime there was a need to link a <div> with pertinent administrative metadata. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + TYPE (string/O): An attribute that specifies the type of structural division that the <div> element represents. Possible <div> TYPE attribute values include: chapter, article, page, track, segment, section etc. METS places no constraints on the possible TYPE values. Suggestions for controlled vocabularies for TYPE may be found on the METS website. + + + + + + CONTENTIDS (URI/O): Content IDs for the content represented by the <div> (equivalent to DIDL DII or Digital Item Identifier, a unique external ID). + + + + + + xlink:label - an xlink label to be referred to by an smLink element + + + + + + parType: Complex Type for Parallel Files + The <par> or parallel files element aggregates pointers to files, parts of files, and/or sequences of files or parts of files that must be played or displayed simultaneously to manifest a block of digital content represented by an <fptr> element. + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + + seqType: Complex Type for Sequences of Files + The seq element should be used to link a div to a set of content files when those files should be played/displayed sequentially to deliver content to a user. Individual <area> subelements within the seq element provide the links to the files or portions thereof. + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + + areaType: Complex Type for Area Linking + The area element provides for more sophisticated linking between a div element and content files representing that div, be they text, image, audio, or video files. An area element can link a div to a point within a file, to a one-dimension segment of a file (e.g., text segment, image line, audio/video clip), or a two-dimensional section of a file (e.g, subsection of an image, or a subsection of the video display of a video file. The area element has no content; all information is recorded within its various attributes. + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + FILEID (IDREF/R): An attribute which provides the XML ID value that identifies the <file> element in the <fileSec> that then points to and/or contains the digital content represented by the <area> element. It must contain an ID value represented in an ID attribute associated with a <file> element in the <fileSec> element in the same METS document. + + + + + + SHAPE (string/O): An attribute that can be used as in HTML to define the shape of the relevant area within the content file pointed to by the <area> element. Typically this would be used with image content (still image or video frame) when only a portion of an integal image map pertains. If SHAPE is specified then COORDS must also be present. SHAPE should be used in conjunction with COORDS in the manner defined for the shape and coords attributes on an HTML4 <area> element. SHAPE must contain one of the following values: +RECT +CIRCLE +POLY + + + + + + + + + + + + + COORDS (string/O): Specifies the coordinates in an image map for the shape of the pertinent area as specified in the SHAPE attribute. While technically optional, SHAPE and COORDS must both appear together to define the relevant area of image content. COORDS should be used in conjunction with SHAPE in the manner defined for the COORDs and SHAPE attributes on an HTML4 <area> element. COORDS must be a comma delimited string of integer value pairs representing coordinates (plus radius in the case of CIRCLE) within an image map. Number of coordinates pairs depends on shape: RECT: x1, y1, x2, y2; CIRC: x1, y1; POLY: x1, y1, x2, y2, x3, y3 . . . + + + + + + BEGIN (string/O): An attribute that specifies the point in the content file where the relevant section of content begins. It can be used in conjunction with either the END attribute or the EXTENT attribute as a means of defining the relevant portion of the referenced file precisely. It can only be interpreted meaningfully in conjunction with the BETYPE or EXTTYPE, which specify the kind of beginning/ending point values or beginning/extent values that are being used. The BEGIN attribute can be used with or without a companion END or EXTENT element. In this case, the end of the content file is assumed to be the end point. + + + + + + END (string/O): An attribute that specifies the point in the content file where the relevant section of content ends. It can only be interpreted meaningfully in conjunction with the BETYPE, which specifies the kind of ending point values being used. Typically the END attribute would only appear in conjunction with a BEGIN element. + + + + + + BETYPE: Begin/End Type. + BETYPE (string/O): An attribute that specifies the kind of BEGIN and/or END values that are being used. For example, if BYTE is specified, then the BEGIN and END point values represent the byte offsets into a file. If IDREF is specified, then the BEGIN element specifies the ID value that identifies the element in a structured text file where the relevant section of the file begins; and the END value (if present) would specify the ID value that identifies the element with which the relevant section of the file ends. Must be one of the following values: +BYTE +IDREF +SMIL +MIDI +SMPTE-25 +SMPTE-24 +SMPTE-DF30 +SMPTE-NDF30 +SMPTE-DF29.97 +SMPTE-NDF29.97 +TIME +TCF +XPTR + + + + + + + + + + + + + + + + + + + + + + + EXTENT (string/O): An attribute that specifies the extent of the relevant section of the content file. Can only be interpreted meaningfully in conjunction with the EXTTYPE which specifies the kind of value that is being used. Typically the EXTENT attribute would only appear in conjunction with a BEGIN element and would not be used if the BEGIN point represents an IDREF. + + + + + + EXTTYPE (string/O): An attribute that specifies the kind of EXTENT values that are being used. For example if BYTE is specified then EXTENT would represent a byte count. If TIME is specified the EXTENT would represent a duration of time. EXTTYPE must be one of the following values: +BYTE +SMIL +MIDI +SMPTE-25 +SMPTE-24 +SMPTE-DF30 +SMPTE-NDF30 +SMPTE-DF29.97 +SMPTE-NDF29.97 +TIME +TCF. + + + + + + + + + + + + + + + + + + + + + ADMID (IDREFS/O): Contains the ID attribute values identifying the <rightsMD>, <sourceMD>, <techMD> and/or <digiprovMD> elements within the <amdSec> of the METS document that contain or link to administrative metadata pertaining to the content represented by the <area> element. Typically the <area> ADMID attribute would be used to identify the <rightsMD> element or elements that pertain to the <area>, but it could be used anytime there was a need to link an <area> with pertinent administrative metadata. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer + + + + + + CONTENTIDS (URI/O): Content IDs for the content represented by the <area> (equivalent to DIDL DII or Digital Item Identifier, a unique external ID). + + + + + + + + + structLinkType: Complex Type for Structural Map Linking + The Structural Map Linking section allows for the specification of hyperlinks between different components of a METS structure delineated in a structural map. structLink contains a single, repeatable element, smLink. Each smLink element indicates a hyperlink between two nodes in the structMap. The structMap nodes recorded in smLink are identified using their XML ID attribute values. + + + + + + + The Structural Map Link element <smLink> identifies a hyperlink between two nodes in the structural map. You would use <smLink>, for instance, to note the existence of hypertext links between web pages, if you wished to record those links within METS. NOTE: <smLink> is an empty element. The location of the <smLink> element to which the <smLink> element is pointing MUST be stored in the xlink:href attribute. + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + xlink:arcrole - the role of the link, as per the xlink specification. See http://www.w3.org/TR/xlink/ + + + + + + + xlink:title - a title for the link (if needed), as per the xlink specification. See http://www.w3.org/TR/xlink/ + + + + + + + xlink:show - see the xlink specification at http://www.w3.org/TR/xlink/ + + + + + + + xlink:actuate - see the xlink specification at http://www.w3.org/TR/xlink/ + + + + + + + xlink:to - the value of the label for the element in the structMap you are linking to. + + + + + + + xlink:from - the value of the label for the element in the structMap you are linking from. + + + + + + + + + The structMap link group element <smLinkGrp> provides an implementation of xlink:extendLink, and provides xlink compliant mechanisms for establishing xlink:arcLink type links between 2 or more <div> elements in <structMap> element(s) occurring within the same METS document or different METS documents. The smLinkGrp could be used as an alternative to the <smLink> element to establish a one-to-one link between <div> elements in the same METS document in a fully xlink compliant manner. However, it can also be used to establish one-to-many or many-to-many links between <div> elements. For example, if a METS document contains two <structMap> elements, one of which represents a purely logical structure and one of which represents a purely physical structure, the <smLinkGrp> element would provide a means of mapping a <div> representing a logical entity (for example, a newspaper article) with multiple <div> elements in the physical <structMap> representing the physical areas that together comprise the logical entity (for example, the <div> elements representing the page areas that together comprise the newspaper article). + + + + + + + + The structMap locator link element <smLocatorLink> is of xlink:type "locator". It provides a means of identifying a <div> element that will participate in one or more of the links specified by means of <smArcLink> elements within the same <smLinkGrp>. The participating <div> element that is represented by the <smLocatorLink> is identified by means of a URI in the associate xlink:href attribute. The lowest level of this xlink:href URI value should be a fragment identifier that references the ID value that identifies the relevant <div> element. For example, "xlink:href='#div20'" where "div20" is the ID value that identifies the pertinent <div> in the current METS document. Although not required by the xlink specification, an <smLocatorLink> element will typically include an xlink:label attribute in this context, as the <smArcLink> elements will reference these labels to establish the from and to sides of each arc link. + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + + + The structMap arc link element <smArcLink> is of xlink:type "arc" It can be used to establish a traversal link between two <div> elements as identified by <smLocatorLink> elements within the same smLinkGrp element. The associated xlink:from and xlink:to attributes identify the from and to sides of the arc link by referencing the xlink:label attribute values on the participating smLocatorLink elements. + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + ARCTYPE (string/O):The ARCTYPE attribute provides a means of specifying the relationship between the <div> elements participating in the arc link, and hence the purpose or role of the link. While it can be considered analogous to the xlink:arcrole attribute, its type is a simple string, rather than anyURI. ARCTYPE has no xlink specified meaning, and the xlink:arcrole attribute should be used instead of or in addition to the ARCTYPE attribute when full xlink compliance is desired with respect to specifying the role or purpose of the arc link. + + + + + + ADMID (IDREFS/O): Contains the ID attribute values identifying the <sourceMD>, <techMD>, <digiprovMD> and/or <rightsMD> elements within the <amdSec> of the METS document that contain or link to administrative metadata pertaining to <smArcLink>. Typically the <smArcLink> ADMID attribute would be used to identify one or more <sourceMD> and/or <techMD> elements that refine or clarify the relationship between the xlink:from and xlink:to sides of the arc. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + + + + + ARCLINKORDER (enumerated string/O): ARCLINKORDER is used to indicate whether the order of the smArcLink elements aggregated by the smLinkGrp element is significant. If the order is significant, then a value of "ordered" should be supplied. Value defaults to "unordered" Note that the ARLINKORDER attribute has no xlink specified meaning. + + + + + + + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + behaviorSecType: Complex Type for Behavior Sections + Behaviors are executable code which can be associated with parts of a METS object. The behaviorSec element is used to group individual behaviors within a hierarchical structure. Such grouping can be useful to organize families of behaviors together or to indicate other relationships between particular behaviors. + + + + + + + + A behavior element <behavior> can be used to associate executable behaviors with content in the METS document. This element has an interface definition <interfaceDef> element that represents an abstract definition of a set of behaviors represented by a particular behavior. A <behavior> element also has a behavior mechanism <mechanism> element, a module of executable code that implements and runs the behavior defined abstractly by the interface definition. + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + CREATED (dateTime/O): Specifies the date and time of creation for the <behaviorSec> + + + + + + LABEL (string/O): A text description of the behavior section. + + + + + + + + behaviorType: Complex Type for Behaviors + A behavior can be used to associate executable behaviors with content in the METS object. A behavior element has an interface definition element that represents an abstract definition of the set of behaviors represented by a particular behavior. A behavior element also has an behavior mechanism which is a module of executable code that implements and runs the behavior defined abstractly by the interface definition. + + + + + + + The interface definition <interfaceDef> element contains a pointer to an abstract definition of a single behavior or a set of related behaviors that are associated with the content of a METS object. The interface definition object to which the <interfaceDef> element points using xlink:href could be another digital object, or some other entity, such as a text file which describes the interface or a Web Services Description Language (WSDL) file. Ideally, an interface definition object contains metadata that describes a set of behaviors or methods. It may also contain files that describe the intended usage of the behaviors, and possibly files that represent different expressions of the interface definition. + + + + + + + A mechanism element <mechanism> contains a pointer to an executable code module that implements a set of behaviors defined by an interface definition. The <mechanism> element will be a pointer to another object (a mechanism object). A mechanism object could be another METS object, or some other entity (e.g., a WSDL file). A mechanism object should contain executable code, pointers to executable code, or specifications for binding to network services (e.g., web services). + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. In the case of a <behavior> element that applies to a <transformFile> element, the ID value must be present and would be referenced from the transformFile/@TRANSFORMBEHAVIOR attribute. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + STRUCTID (IDREFS/O): An XML IDREFS attribute used to link a <behavior> to one or more <div> elements within a <structMap> in the METS document. The content to which the STRUCTID points is considered input to the executable behavior mechanism defined for the behavior. If the <behavior> applies to one or more <div> elements, then the STRUCTID attribute must be present. + + + + + + BTYPE (string/O): The behavior type provides a means of categorizing the related behavior. + + + + + CREATED (dateTime/O): The dateTime of creation for the behavior. + + + + + + LABEL (string/O): A text description of the behavior. + + + + + + GROUPID (string/O): An identifier that establishes a correspondence between the given behavior and other behaviors, typically used to facilitate versions of behaviors. + + + + + + ADMID (IDREFS/O): An optional attribute listing the XML ID values of administrative metadata sections within the METS document pertaining to this behavior. + + + + + + + objectType: complexType for interfaceDef and mechanism elements + The mechanism and behavior elements point to external objects--an interface definition object or an executable code object respectively--which together constitute a behavior that can be applied to one or more <div> elements in a <structMap>. + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + LABEL (string/O): A text description of the entity represented. + + + + + + + + + mdSecType: Complex Type for Metadata Sections + A generic framework for pointing to/including metadata within a METS document, a la Warwick Framework. + + + + + + + The metadata reference element <mdRef> element is a generic element used throughout the METS schema to provide a pointer to metadata which resides outside the METS document. NB: <mdRef> is an empty element. The location of the metadata must be recorded in the xlink:href attribute, supplemented by the XPTR attribute as needed. + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + + + LABEL (string/O): Provides a label to display to the viewer of the METS document that identifies the associated metadata. + + + + + + XPTR (string/O): Locates the point within a file to which the <mdRef> element refers, if applicable. + + + + + + + + + A metadata wrapper element <mdWrap> provides a wrapper around metadata embedded within a METS document. The element is repeatable. Such metadata can be in one of two forms: 1) XML-encoded metadata, with the XML-encoding identifying itself as belonging to a namespace other than the METS document namespace. 2) Any arbitrary binary or textual form, PROVIDED that the metadata is Base64 encoded and wrapped in a <binData> element within the internal descriptive metadata element. + + + + + + + + The binary data wrapper element <binData> is used to contain Base64 encoded metadata. + + + + + + The xml data wrapper element <xmlData> is used to contain XML encoded metadata. The content of an <xmlData> element can be in any namespace or in no namespace. As permitted by the XML Schema Standard, the processContents attribute value for the metadata in an <xmlData> is set to “lax”. Therefore, if the source schema and its location are identified by means of an XML schemaLocation attribute, then an XML processor will validate the elements for which it can find declarations. If a source schema is not identified, or cannot be found at the specified schemaLocation, then an XML validator will check for well-formedness, but otherwise skip over the elements appearing in the <xmlData> element. + + + + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + + LABEL: an optional string attribute providing a label to display to the viewer of the METS document identifying the metadata. + + + + + + + + + ID (ID/R): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. The ID attribute on the <dmdSec>, <techMD>, <sourceMD>, <rightsMD> and <digiprovMD> elements (which are all of mdSecType) is required, and its value should be referenced from one or more DMDID attributes (when the ID identifies a <dmdSec> element) or ADMID attributes (when the ID identifies a <techMD>, <sourceMD>, <rightsMD> or <digiprovMD> element) that are associated with other elements in the METS document. The following elements support references to a <dmdSec> via a DMDID attribute: <file>, <stream>, <div>. The following elements support references to <techMD>, <sourceMD>, <rightsMD> and <digiprovMD> elements via an ADMID attribute: <metsHdr>, <dmdSec>, <techMD>, <sourceMD>, <rightsMD>, <digiprovMD>, <fileGrp>, <file>, <stream>, <div>, <area>, <behavior>. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + GROUPID (string/O): This identifier is used to indicate that different metadata sections may be considered as part of a group. Two metadata sections with the same GROUPID value are to be considered part of the same group. For example this facility might be used to group changed versions of the same metadata if previous versions are maintained in a file for tracking purposes. + + + + + + ADMID (IDREFS/O): Contains the ID attribute values of the <digiprovMD>, <techMD>, <sourceMD> and/or <rightsMD> elements within the <amdSec> of the METS document that contain administrative metadata pertaining to the current mdSecType element. Typically used in this context to reference preservation metadata (digiprovMD) which applies to the current metadata. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + CREATED (dateTime/O): Specifies the date and time of creation for the metadata. + + + + + + STATUS (string/O): Indicates the status of this metadata (e.g., superseded, current, etc.). + + + + + + + + fileType: Complex Type for Files + The file element provides access to content files for a METS object. A file element may contain one or more FLocat elements, which provide pointers to a content file, and/or an FContent element, which wraps an encoded version of the file. Note that ALL FLocat and FContent elements underneath a single file element should identify/contain identical copies of a single file. + + + + + + + + The file location element <FLocat> provides a pointer to the location of a content file. It uses the XLink reference syntax to provide linking information indicating the actual location of the content file, along with other attributes specifying additional linking information. NOTE: <FLocat> is an empty element. The location of the resource pointed to MUST be stored in the xlink:href attribute. + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + + USE (string/O): A tagging attribute to indicate the intended use of the specific copy of the file represented by the <FLocat> element (e.g., service master, archive master). A USE attribute can be expressed at the<fileGrp> level, the <file> level, the <FLocat> level and/or the <FContent> level. A USE attribute value at the <fileGrp> level should pertain to all of the files in the <fileGrp>. A USE attribute at the <file> level should pertain to all copies of the file as represented by subsidiary <FLocat> and/or <FContent> elements. A USE attribute at the <FLocat> or <FContent> level pertains to the particular copy of the file that is either referenced (<FLocat>) or wrapped (<FContent>). + + + + + + + + + + The file content element <FContent> is used to identify a content file contained internally within a METS document. The content file must be either Base64 encoded and contained within the subsidiary <binData> wrapper element, or consist of XML information and be contained within the subsidiary <xmlData> wrapper element. + + + + + + + + A binary data wrapper element <binData> is used to contain a Base64 encoded file. + + + + + + + An xml data wrapper element <xmlData> is used to contain an XML encoded file. The content of an <xmlData> element can be in any namespace or in no namespace. As permitted by the XML Schema Standard, the processContents attribute value for the metadata in an <xmlData> element is set to “lax”. Therefore, if the source schema and its location are identified by means of an xsi:schemaLocation attribute, then an XML processor will validate the elements for which it can find declarations. If a source schema is not identified, or cannot be found at the specified schemaLocation, then an XML validator will check for well-formedness, but otherwise skip over the elements appearing in the <xmlData> element. + + + + + + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + USE (string/O): A tagging attribute to indicate the intended use of the specific copy of the file represented by the <FContent> element (e.g., service master, archive master). A USE attribute can be expressed at the<fileGrp> level, the <file> level, the <FLocat> level and/or the <FContent> level. A USE attribute value at the <fileGrp> level should pertain to all of the files in the <fileGrp>. A USE attribute at the <file> level should pertain to all copies of the file as represented by subsidiary <FLocat> and/or <FContent> elements. A USE attribute at the <FLocat> or <FContent> level pertains to the particular copy of the file that is either referenced (<FLocat>) or wrapped (<FContent>). + + + + + + + + + A component byte stream element <stream> may be composed of one or more subsidiary streams. An MPEG4 file, for example, might contain separate audio and video streams, each of which is associated with technical metadata. The repeatable <stream> element provides a mechanism to record the existence of separate data streams within a particular file, and the opportunity to associate <dmdSec> and <amdSec> with those subsidiary data streams if desired. + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + streamType (string/O): The IANA MIME media type for the bytestream. + + + + + OWNERID (string/O): Used to provide a unique identifier (which could include a URI) assigned to the file. This identifier may differ from the URI used to retrieve the file. + + + + + + ADMID (IDREFS/O): Contains the ID attribute values of the <techMD>, <sourceMD>, <rightsMD> and/or <digiprovMD> elements within the <amdSec> of the METS document that contain administrative metadata pertaining to the bytestream. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + DMDID (IDREFS/O): Contains the ID attribute values identifying the <dmdSec>, elements in the METS document that contain or link to descriptive metadata pertaining to the content file stream represented by the current <stream> element. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + BEGIN (string/O): An attribute that specifies the point in the parent <file> where the current <stream> begins. It can be used in conjunction with the END attribute as a means of defining the location of the stream within its parent file. However, the BEGIN attribute can be used with or without a companion END attribute. When no END attribute is specified, the end of the parent file is assumed also to be the end point of the stream. The BEGIN and END attributes can only be interpreted meaningfully in conjunction with a BETYPE attribute, which specifies the kind of beginning/ending point values that are being used. + + + + + + END (string/O): An attribute that specifies the point in the parent <file> where the <stream> ends. It can only be interpreted meaningfully in conjunction with the BETYPE, which specifies the kind of ending point values being used. Typically the END attribute would only appear in conjunction with a BEGIN attribute. + + + + + + BETYPE: Begin/End Type. + BETYPE (string/O): An attribute that specifies the kind of BEGIN and/or END values that are being used. Currently BYTE is the only valid value that can be used in conjunction with nested <file> or <stream> elements. + + + + + + + + + + + + + + + + The transform file element <transformFile> provides a means to access any subsidiary files listed below a <file> element by indicating the steps required to "unpack" or transform the subsidiary files. This element is repeatable and might provide a link to a <behavior> in the <behaviorSec> that performs the transformation. + + + + + + + ID (ID/O): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + TRANSFORMTYPE (string/R): Is used to indicate the type of transformation needed to render content of a file accessible. This may include unpacking a file into subsidiary files/streams. The controlled value constraints for this XML string include “decompression” and “decryption”. Decompression is defined as the action of reversing data compression, i.e., the process of encoding information using fewer bits than an unencoded representation would use by means of specific encoding schemas. Decryption is defined as the process of restoring data that has been obscured to make it unreadable without special knowledge (encrypted data) to its original form. + + + + + + + + + + + TRANSFORM-ALGORITHM (string/R): Specifies the decompression or decryption routine used to access the contents of the file. Algorithms for compression can be either loss-less or lossy. + + + + + TRANSFORMKEY (string/O): A key to be used with the transform algorithm for accessing the file’s contents. + + + + + TRANSFORMBEHAVIOR (string/O): An IDREF to a behavior element for this transformation. + + + + + TRANSFORMORDER (postive-integer/R): The order in which the instructions must be followed in order to unpack or transform the container file. + + + + + + + + + + + ID (ID/R): This attribute uniquely identifies the element within the METS document, and would allow the element to be referenced unambiguously from another element or document via an IDREF or an XPTR. Typically, the ID attribute value on a <file> element would be referenced from one or more FILEID attributes (which are of type IDREF) on <fptr>and/or <area> elements within the <structMap>. Such references establish links between structural divisions (<div> elements) and the specific content files or parts of content files that manifest them. For more information on using ID attributes for internal and external linking see Chapter 4 of the METS Primer. + + + + + + SEQ (integer/O): Indicates the sequence of this <file> relative to the others in its <fileGrp>. + + + + + + + OWNERID (string/O): A unique identifier assigned to the file by its owner. This may be a URI which differs from the URI used to retrieve the file. + + + + + + ADMID (IDREFS/O): Contains the ID attribute values of the <techMD>, <sourceMD>, <rightsMD> and/or <digiprovMD> elements within the <amdSec> of the METS document that contain administrative metadata pertaining to the file. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + DMDID (IDREFS/O): Contains the ID attribute values identifying the <dmdSec>, elements in the METS document that contain or link to descriptive metadata pertaining to the content file represented by the current <file> element. For more information on using METS IDREFS and IDREF type attributes for internal linking, see Chapter 4 of the METS Primer. + + + + + + GROUPID (string/O): An identifier that establishes a correspondence between this file and files in other file groups. Typically, this will be used to associate a master file in one file group with the derivative files made from it in other file groups. + + + + + + USE (string/O): A tagging attribute to indicate the intended use of all copies of the file aggregated by the <file> element (e.g., master, reference, thumbnails for image files). A USE attribute can be expressed at the<fileGrp> level, the <file> level, the <FLocat> level and/or the <FContent> level. A USE attribute value at the <fileGrp> level should pertain to all of the files in the <fileGrp>. A USE attribute at the <file> level should pertain to all copies of the file as represented by subsidiary <FLocat> and/or <FContent> elements. A USE attribute at the <FLocat> or <FContent> level pertains to the particular copy of the file that is either referenced (<FLocat>) or wrapped (<FContent>). + + + + + + BEGIN (string/O): An attribute that specifies the point in the parent <file> where the current <file> begins. When used in conjunction with a <file> element, this attribute is only meaningful when this element is nested, and its parent <file> element represents a container file. It can be used in conjunction with the END attribute as a means of defining the location of the current file within its parent file. However, the BEGIN attribute can be used with or without a companion END attribute. When no END attribute is specified, the end of the parent file is assumed also to be the end point of the current file. The BEGIN and END attributes can only be interpreted meaningfully in conjunction with a BETYPE attribute, which specifies the kind of beginning/ending point values that are being used. + + + + + + END (string/O): An attribute that specifies the point in the parent <file> where the current, nested <file> ends. It can only be interpreted meaningfully in conjunction with the BETYPE, which specifies the kind of ending point values being used. Typically the END attribute would only appear in conjunction with a BEGIN attribute. + + + + + + BETYPE: Begin/End Type. + BETYPE (string/O): An attribute that specifies the kind of BEGIN and/or END values that are being used. Currently BYTE is the only valid value that can be used in conjunction with nested <file> or <stream> elements. + + + + + + + + + + + + + + + + + + + ORDER (integer/O): A representation of the element's order among its siblings (e.g., its absolute, numeric sequence). For an example, and clarification of the distinction between ORDER and ORDERLABEL, see the description of the ORDERLABEL attribute. + + + + + + ORDERLABEL (string/O): A representation of the element's order among its siblings (e.g., “xii”), or of any non-integer native numbering system. It is presumed that this value will still be machine actionable (e.g., it would support ‘go to page ___’ function), and it should not be used as a replacement/substitute for the LABEL attribute. To understand the differences between ORDER, ORDERLABEL and LABEL, imagine a text with 10 roman numbered pages followed by 10 arabic numbered pages. Page iii would have an ORDER of “3”, an ORDERLABEL of “iii” and a LABEL of “Page iii”, while page 3 would have an ORDER of “13”, an ORDERLABEL of “3” and a LABEL of “Page 3”. + + + + + + LABEL (string/O): An attribute used, for example, to identify a <div> to an end user viewing the document. Thus a hierarchical arrangement of the <div> LABEL values could provide a table of contents to the digital content represented by a METS document and facilitate the users’ navigation of the digital object. Note that a <div> LABEL should be specific to its level in the structural map. In the case of a book with chapters, the book <div> LABEL should have the book title and the chapter <div>; LABELs should have the individual chapter titles, rather than having the chapter <div> LABELs combine both book title and chapter title . For further of the distinction between LABEL and ORDERLABEL see the description of the ORDERLABEL attribute. + + + + + + + + + MDTYPE (string/R): Is used to indicate the type of the associated metadata. It must have one of the following values: +MARC: any form of MARC record +MODS: metadata in the Library of Congress MODS format +EAD: Encoded Archival Description finding aid +DC: Dublin Core +NISOIMG: NISO Technical Metadata for Digital Still Images +LC-AV: technical metadata specified in the Library of Congress A/V prototyping project +VRA: Visual Resources Association Core +TEIHDR: Text Encoding Initiative Header +DDI: Data Documentation Initiative +FGDC: Federal Geographic Data Committee metadata +LOM: Learning Object Model +PREMIS: PREservation Metadata: Implementation Strategies +PREMIS:OBJECT: PREMIS Object entiry +PREMIS:AGENT: PREMIS Agent entity +PREMIS:RIGHTS: PREMIS Rights entity +PREMIS:EVENT: PREMIS Event entity +TEXTMD: textMD Technical metadata for text +METSRIGHTS: Rights Declaration Schema +ISO 19115:2003 NAP: North American Profile of ISO 19115:2003 descriptive metadata +EAC-CPF: Encoded Archival Context - Corporate Bodies, Persons, and Families +LIDO: Lightweight Information Describing Objects +OTHER: metadata in a format not specified above + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OTHERMDTYPE (string/O): Specifies the form of metadata in use when the value OTHER is indicated in the MDTYPE attribute. + + + + + + MDTYPEVERSION(string/O): Provides a means for recording the version of the type of metadata (as recorded in the MDTYPE or OTHERMDTYPE attribute) that is being used. This may represent the version of the underlying data dictionary or metadata model rather than a schema version. + + + + + + + LOCTYPE (string/R): Specifies the locator type used in the xlink:href attribute. Valid values for LOCTYPE are: + ARK + URN + URL + PURL + HANDLE + DOI + OTHER + + + + + + + + + + + + + + + + + OTHERLOCTYPE (string/O): Specifies the locator type when the value OTHER is used in the LOCTYPE attribute. Although optional, it is strongly recommended when OTHER is used. + + + + + + + + MIMETYPE (string/O): The IANA MIME media type for the associated file or wrapped content. Some values for this attribute can be found on the IANA website. + + + + + + SIZE (long/O): Specifies the size in bytes of the associated file or wrapped content. + + + + + + CREATED (dateTime/O): Specifies the date and time of creation for the associated file or wrapped content. + + + + + + CHECKSUM (string/O): Provides a checksum value for the associated file or wrapped content. + + + + + + CHECKSUMTYPE (enumerated string/O): Specifies the checksum algorithm used to produce the value contained in the CHECKSUM attribute. CHECKSUMTYPE must contain one of the following values: + Adler-32 + CRC32 + HAVAL + MD5 + MNP + SHA-1 + SHA-256 + SHA-384 + SHA-512 + TIGER + WHIRLPOOL + + + + + + + + + + + + + + + + + + + +