diff --git a/src/main/java/gov/loc/repository/bagit/creator/BagCreator.java b/src/main/java/gov/loc/repository/bagit/creator/BagCreator.java index cbe0c76ac..a0d24394c 100644 --- a/src/main/java/gov/loc/repository/bagit/creator/BagCreator.java +++ b/src/main/java/gov/loc/repository/bagit/creator/BagCreator.java @@ -18,6 +18,7 @@ import gov.loc.repository.bagit.domain.Version; import gov.loc.repository.bagit.hash.Hasher; import gov.loc.repository.bagit.hash.SupportedAlgorithm; +import gov.loc.repository.bagit.util.PathUtils; import gov.loc.repository.bagit.writer.BagitFileWriter; import gov.loc.repository.bagit.writer.ManifestWriter; @@ -26,6 +27,8 @@ */ public final class BagCreator { private static final Logger logger = LoggerFactory.getLogger(BagCreator.class); + private static final int LATEST_MAJOR_VERSION = 0; + private static final int LATEST_MINOR_VERSION = 97; private BagCreator(){} @@ -42,11 +45,11 @@ private BagCreator(){} * @return a {@link Bag} object representing the newly created bagit bag */ public static Bag bagInPlace(final Path root, final Collection algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{ - final Bag bag = new Bag(new Version(0, 97)); + final Bag bag = new Bag(new Version(LATEST_MAJOR_VERSION, LATEST_MINOR_VERSION)); bag.setRootDir(root); logger.info("Creating a bag with version: [{}] in directory: [{}]", bag.getVersion(), root); - final Path dataDir = root.resolve("data"); + final Path dataDir = PathUtils.getDataDir(bag); Files.createDirectory(dataDir); final DirectoryStream directoryStream = Files.newDirectoryStream(root); for(final Path path : directoryStream){ @@ -61,7 +64,7 @@ public static Bag bagInPlace(final Path root, final Collection getPayLoadManifests() { - return payLoadManifests; - } - - public void setPayLoadManifests(final Set payLoadManifests) { - this.payLoadManifests = payLoadManifests; - } - - public Set getTagManifests() { - return tagManifests; - } - - public void setTagManifests(final Set tagManifests) { - this.tagManifests = tagManifests; - } - - public List getItemsToFetch() { - return itemsToFetch; - } - - public void setItemsToFetch(final List itemsToFetch) { - this.itemsToFetch = itemsToFetch; - } - - public List> getMetadata() { - return metadata; - } - - public void setMetadata(final List> metadata) { - this.metadata = metadata; - } - - public Charset getFileEncoding() { - return fileEncoding; - } - - public void setFileEncoding(final Charset fileEncoding) { - this.fileEncoding = fileEncoding; - } - @Override public String toString() { final StringBuilder sb = new StringBuilder(95); @@ -155,6 +111,50 @@ public boolean equals(final Object obj) { Objects.equals(this.itemsToFetch, other.getItemsToFetch()) && Objects.equals(this.metadata, other.getMetadata()); } + + public Version getVersion(){ + return version; + } + + public Set getPayLoadManifests() { + return payLoadManifests; + } + + public void setPayLoadManifests(final Set payLoadManifests) { + this.payLoadManifests = payLoadManifests; + } + + public Set getTagManifests() { + return tagManifests; + } + + public void setTagManifests(final Set tagManifests) { + this.tagManifests = tagManifests; + } + + public List getItemsToFetch() { + return itemsToFetch; + } + + public void setItemsToFetch(final List itemsToFetch) { + this.itemsToFetch = itemsToFetch; + } + + public List> getMetadata() { + return metadata; + } + + public void setMetadata(final List> metadata) { + this.metadata = metadata; + } + + public Charset getFileEncoding() { + return fileEncoding; + } + + public void setFileEncoding(final Charset fileEncoding) { + this.fileEncoding = fileEncoding; + } public Path getRootDir() { return rootDir; @@ -167,4 +167,5 @@ public void setRootDir(final Path rootDir) { public void setVersion(final Version version) { this.version = version; } + } diff --git a/src/main/java/gov/loc/repository/bagit/exceptions/PayloadOxumDoesNotExistException.java b/src/main/java/gov/loc/repository/bagit/exceptions/PayloadOxumDoesNotExistException.java index 476f532c6..b371a99a6 100644 --- a/src/main/java/gov/loc/repository/bagit/exceptions/PayloadOxumDoesNotExistException.java +++ b/src/main/java/gov/loc/repository/bagit/exceptions/PayloadOxumDoesNotExistException.java @@ -5,6 +5,7 @@ /** * The {@link Bag} object should contain the Payload-Oxum metatdata key value pair, * this class represents the error when trying to calculate the payload-oxum and it doesn't exist on the bag object. + * Or if the payload-byte-count and payload-file-count don't exist for versions 1.0+ */ public class PayloadOxumDoesNotExistException extends RuntimeException { private static final long serialVersionUID = 1L; diff --git a/src/main/java/gov/loc/repository/bagit/reader/BagitFileValues.java b/src/main/java/gov/loc/repository/bagit/reader/BagitFileValues.java new file mode 100644 index 000000000..b8c3207a0 --- /dev/null +++ b/src/main/java/gov/loc/repository/bagit/reader/BagitFileValues.java @@ -0,0 +1,35 @@ +package gov.loc.repository.bagit.reader; + +import java.nio.charset.Charset; + +import gov.loc.repository.bagit.domain.Version; + +/** + * A simple data object for passing around all the bagit.txt file values + */ +public class BagitFileValues { + private final Version version; + private final Charset encoding; + private final Long payloadByteCount; + private final Long payloadFileCount; + + public BagitFileValues(final Version version, final Charset encoding, final Long payloadByteCount, final Long payloadFileCount){ + this.version = version; + this.encoding = encoding; + this.payloadByteCount = payloadByteCount; + this.payloadFileCount = payloadFileCount; + } + + public Version getVersion() { + return version; + } + public Charset getEncoding() { + return encoding; + } + public Long getPayloadByteCount() { + return payloadByteCount; + } + public Long getPayloadFileCount() { + return payloadFileCount; + } +} diff --git a/src/main/java/gov/loc/repository/bagit/reader/BagitTextFileReader.java b/src/main/java/gov/loc/repository/bagit/reader/BagitTextFileReader.java index d7129e4b3..7bb50cd8b 100644 --- a/src/main/java/gov/loc/repository/bagit/reader/BagitTextFileReader.java +++ b/src/main/java/gov/loc/repository/bagit/reader/BagitTextFileReader.java @@ -4,8 +4,8 @@ import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Path; -import java.util.List; import java.util.AbstractMap.SimpleImmutableEntry; +import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,23 +32,67 @@ private BagitTextFileReader(){ * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec */ public static SimpleImmutableEntry readBagitTextFile(final Path bagitFile) throws IOException, UnparsableVersionException, InvalidBagMetadataException{ + final BagitFileValues values = parseValues(bagitFile); + + return new SimpleImmutableEntry(values.getVersion(), values.getEncoding()); + } + + /** + * Read the Payload-Byte-Count and Payload-File-Count from the bagit.txt file + * @since bagic specification 1.0 + * + * @param bagitFile the bagit.txt file to read + * + * @return the payload byte count, payload file count (in that order) + * + * @throws IOException if there is a problem reading a file + * @throws UnparsableVersionException if there is a problem parsing the bagit version number + * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec + */ + public static SimpleImmutableEntry readPayloadByteAndFileCount(final Path bagitFile) throws UnparsableVersionException, IOException, InvalidBagMetadataException{ + final BagitFileValues values = parseValues(bagitFile); + + return new SimpleImmutableEntry(values.getPayloadByteCount(), values.getPayloadFileCount()); + } + + /** + * Read version, file encoding, and (possibly) payload byte and file count + * + * @param bagitFile the bagit.txt file to read + * + * @return all the possible bagit.txt file field values + * + * @throws IOException if there is a problem reading a file + * @throws UnparsableVersionException if there is a problem parsing the bagit version number + * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec + */ + public static BagitFileValues parseValues(final Path bagitFile) throws UnparsableVersionException, IOException, InvalidBagMetadataException{ logger.debug("Reading [{}] file", bagitFile); final List> pairs = KeyValueReader.readKeyValuesFromFile(bagitFile, ":", StandardCharsets.UTF_8); - String version = ""; + + Version version = null; Charset encoding = StandardCharsets.UTF_8; + Long payloadByteCount = null; + Long payloadFileCount = null; + for(final SimpleImmutableEntry pair : pairs){ if("BagIt-Version".equals(pair.getKey())){ - version = pair.getValue(); - logger.debug("BagIt-Version is [{}]", version); + version = parseVersion(pair.getValue()); } if("Tag-File-Character-Encoding".equals(pair.getKey())){ encoding = Charset.forName(pair.getValue()); - logger.debug("Tag-File-Character-Encoding is [{}]", encoding); } + if("Payload-Byte-Count".equals(pair.getKey())){ //assume version is 1.0+ + payloadByteCount = Long.valueOf(pair.getValue()); + } + if("Payload-File-Count".equals(pair.getKey())){ //assume version is 1.0+ + payloadFileCount = Long.valueOf(pair.getValue()); + } + logger.debug("[{}] is [{}]", pair.getKey(), pair.getValue()); } - return new SimpleImmutableEntry(parseVersion(version), encoding); + return new BagitFileValues(version, encoding, payloadByteCount, payloadFileCount); } /* diff --git a/src/main/java/gov/loc/repository/bagit/util/PathUtils.java b/src/main/java/gov/loc/repository/bagit/util/PathUtils.java index a14f73dfe..f0040dc7c 100644 --- a/src/main/java/gov/loc/repository/bagit/util/PathUtils.java +++ b/src/main/java/gov/loc/repository/bagit/util/PathUtils.java @@ -67,9 +67,6 @@ public static boolean isHidden(final Path path) throws IOException{ return Files.isHidden(path); } - /* - * Get the directory that contains the payload files. - */ /** * With bagit version 2.0 (.bagit) * payload files are no longer in the "data" directory. This method accounts for this diff --git a/src/main/java/gov/loc/repository/bagit/verify/BagVerifier.java b/src/main/java/gov/loc/repository/bagit/verify/BagVerifier.java index f5ac12ad4..6dfd56f40 100644 --- a/src/main/java/gov/loc/repository/bagit/verify/BagVerifier.java +++ b/src/main/java/gov/loc/repository/bagit/verify/BagVerifier.java @@ -18,6 +18,7 @@ import gov.loc.repository.bagit.domain.Manifest; import gov.loc.repository.bagit.exceptions.CorruptChecksumException; import gov.loc.repository.bagit.exceptions.FileNotInPayloadDirectoryException; +import gov.loc.repository.bagit.exceptions.InvalidBagMetadataException; import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException; import gov.loc.repository.bagit.exceptions.InvalidPayloadOxumException; import gov.loc.repository.bagit.exceptions.MaliciousPathException; @@ -25,6 +26,7 @@ import gov.loc.repository.bagit.exceptions.MissingPayloadDirectoryException; import gov.loc.repository.bagit.exceptions.MissingPayloadManifestException; import gov.loc.repository.bagit.exceptions.PayloadOxumDoesNotExistException; +import gov.loc.repository.bagit.exceptions.UnparsableVersionException; import gov.loc.repository.bagit.exceptions.UnsupportedAlgorithmException; import gov.loc.repository.bagit.exceptions.VerificationException; import gov.loc.repository.bagit.hash.BagitAlgorithmNameToSupportedAlgorithmMapping; @@ -61,8 +63,12 @@ public BagVerifier(final ExecutorService executor, final BagitAlgorithmNameToSup * * @param bag the {@link Bag} object you wish to check * @return true if the bag can be quickly verified + * + * @throws IOException if there is a problem reading a file + * @throws UnparsableVersionException if there is a problem parsing the bagit version number + * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec */ - public boolean canQuickVerify(final Bag bag){ + public boolean canQuickVerify(final Bag bag) throws UnparsableVersionException, IOException, InvalidBagMetadataException{ return QuickVerifier.canQuickVerify(bag); } @@ -72,13 +78,15 @@ public boolean canQuickVerify(final Bag bag){ * @param bag the bag to verify by payload-oxum * @param ignoreHiddenFiles ignore hidden files found in payload directory * - * @throws IOException if there is an error reading a file * @throws InvalidPayloadOxumException if either the total bytes or the number of files * calculated for the payload directory of the bag is different than the supplied values + * @throws IOException if there is a problem reading a file + * @throws UnparsableVersionException if there is a problem parsing the bagit version number + * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec * @throws PayloadOxumDoesNotExistException if the bag does not contain a payload-oxum. * To check, run {@link BagVerifier#canQuickVerify} */ - public void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) throws IOException, InvalidPayloadOxumException{ + public void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) throws IOException, InvalidPayloadOxumException, UnparsableVersionException, InvalidBagMetadataException{ QuickVerifier.quicklyVerify(bag, ignoreHiddenFiles); } @@ -150,7 +158,7 @@ void checkHashes(final Manifest manifest) throws CorruptChecksumException, Inter *
  • every element is present *
  • every file in the payload manifest(s) are present *
  • every file in the tag manifest(s) are present. Tag files not listed in a tag manifest may be present. - *
  • every file in the data directory must be listed in at least one payload manifest + *
  • every file in the payload directory must be listed in at least one payload manifest *
  • each element must comply with the bagit spec * * diff --git a/src/main/java/gov/loc/repository/bagit/verify/MandatoryVerifier.java b/src/main/java/gov/loc/repository/bagit/verify/MandatoryVerifier.java index 297c1377b..93fa67573 100644 --- a/src/main/java/gov/loc/repository/bagit/verify/MandatoryVerifier.java +++ b/src/main/java/gov/loc/repository/bagit/verify/MandatoryVerifier.java @@ -33,7 +33,7 @@ private MandatoryVerifier(){ } /** - * make sure all the fetch items exist in the data directory + * make sure all the fetch items exist in the payload directory * * @param items the items that needed to be fetched for the bag to be complete * @param bagDir the root directory of the bag @@ -76,8 +76,8 @@ public static void checkBagitFileExists(final Path rootDir, final Version versio * @throws MissingPayloadDirectoryException if the bag does not contain the payload directory */ public static void checkPayloadDirectoryExists(final Bag bag) throws MissingPayloadDirectoryException{ - logger.info("Checking if special payload directory exists (only for version 0.97 and earlier)"); final Path dataDir = PathUtils.getDataDir(bag); + logger.info("Checking if payload directory [{}] exists", dataDir); if(!Files.exists(dataDir)){ throw new MissingPayloadDirectoryException("File [" + dataDir + "] should exist but it doesn't"); diff --git a/src/main/java/gov/loc/repository/bagit/verify/QuickVerifier.java b/src/main/java/gov/loc/repository/bagit/verify/QuickVerifier.java index 318be1dd7..b64272bdf 100644 --- a/src/main/java/gov/loc/repository/bagit/verify/QuickVerifier.java +++ b/src/main/java/gov/loc/repository/bagit/verify/QuickVerifier.java @@ -9,63 +9,152 @@ import org.slf4j.LoggerFactory; import gov.loc.repository.bagit.domain.Bag; +import gov.loc.repository.bagit.exceptions.InvalidBagMetadataException; import gov.loc.repository.bagit.exceptions.InvalidPayloadOxumException; import gov.loc.repository.bagit.exceptions.PayloadOxumDoesNotExistException; +import gov.loc.repository.bagit.exceptions.UnparsableVersionException; +import gov.loc.repository.bagit.reader.BagitFileValues; +import gov.loc.repository.bagit.reader.BagitTextFileReader; import gov.loc.repository.bagit.util.PathUtils; /** - * responsible for all things related to quick verification. Quick verification does not - * mean that a Bag is valid, only that a cursory check has been made. For a full verification - * see {@link BagVerifier} + * responsible for all things related to quick verification. Quick verification + * does not mean that a Bag is valid, only that a cursory check has been made. + * For a full verification see {@link BagVerifier} */ public final class QuickVerifier { private static final Logger logger = LoggerFactory.getLogger(QuickVerifier.class); private static final String PAYLOAD_OXUM_REGEX = "\\d+\\.\\d+"; - - private QuickVerifier(){ - //intentionally left empty + + private QuickVerifier() { + // intentionally left empty } - + /** - * Determine if we can quickly verify by comparing the number of files and the total number of bytes expected + * Determine if we can quickly verify by comparing the number of files and the + * total number of bytes expected * - * @param bag the {@link Bag} object you wish to check + * @param bag + * the {@link Bag} object you wish to check + * * @return true if the bag can be quickly verified + * + * @throws IOException if there is a problem reading a file + * @throws UnparsableVersionException if there is a problem parsing the bagit version number + * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec */ - public static boolean canQuickVerify(final Bag bag){ + public static boolean canQuickVerify(final Bag bag) throws UnparsableVersionException, IOException, InvalidBagMetadataException { + boolean payloadInfoExists = false; final String payloadOxum = getPayloadOxum(bag); - logger.debug("Found payload-oxum [{}] for bag [{}]", payloadOxum, bag.getRootDir()); - return payloadOxum != null && payloadOxum.matches(PAYLOAD_OXUM_REGEX) && bag.getItemsToFetch().size() == 0; + final BagitFileValues bagitValues = BagitTextFileReader.parseValues(bag.getRootDir().resolve("bagit.txt")); + + if (bagitValues.getPayloadByteCount() != null && bagitValues.getPayloadFileCount() != null) { + logger.debug("Found payload byte and file count, using that instead of payload-oxum"); + if(payloadOxum != null){ + comparePayloadOxumWithByteAndFileCount(payloadOxum, bagitValues.getPayloadByteCount(), bagitValues.getPayloadFileCount()); + } + payloadInfoExists = true; + } + + if (payloadOxum != null && payloadOxum.matches(PAYLOAD_OXUM_REGEX)) { + logger.debug("Found payload-oxum [{}] for bag [{}]", payloadOxum, bag.getRootDir()); + payloadInfoExists = true; + } + + return payloadInfoExists && bag.getItemsToFetch().size() == 0; } - + /* * Get the Payload-Oxum value from the key value pairs */ - private static String getPayloadOxum(final Bag bag){ - for(final SimpleImmutableEntry keyValue : bag.getMetadata()){ - if("Payload-Oxum".equals(keyValue.getKey())){ + private static String getPayloadOxum(final Bag bag) { + for (final SimpleImmutableEntry keyValue : bag.getMetadata()) { + if ("Payload-Oxum".equals(keyValue.getKey())) { return keyValue.getValue(); } } return null; } - + + private static void comparePayloadOxumWithByteAndFileCount(final String payloadOxum, final Long payloadByteCount, + final Long payloadFileCount) { + final SimpleImmutableEntry payloadOxumValues = parsePayloadOxum(payloadOxum); + + if(!payloadOxumValues.getKey().equals(payloadByteCount)){ + logger.warn("Payload-Oxum byte count [{}] does not match Payload-Byte-Count [{}]!", payloadOxumValues.getKey(), payloadByteCount); + } + + if(!payloadOxumValues.getValue().equals(payloadFileCount)){ + logger.warn("Payload-Oxum file count [{}] does not match Payload-File-Count [{}]!", payloadOxumValues.getValue(), payloadFileCount); + } + } + /** - * Quickly verify by comparing the number of files and the total number of bytes expected + * Quickly verify by comparing the number of files and the total number of + * bytes expected * - * @param bag the bag to verify by payload-oxum + * @param bag the bag to quickly verify * @param ignoreHiddenFiles ignore hidden files found in payload directory * * @throws IOException if there is an error reading a file - * @throws InvalidPayloadOxumException if either the total bytes or the number of files - * calculated for the payload directory of the bag is different than the supplied values - * @throws PayloadOxumDoesNotExistException if the bag does not contain a payload-oxum. - * To check, run {@link BagVerifier#canQuickVerify} + * @throws InvalidPayloadOxumException + * if either the total bytes or the number of files calculated for + * the payload directory of the bag is different than the supplied + * values + * @throws UnparsableVersionException if there is a problem parsing the bagit version number + * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec + * @throws PayloadOxumDoesNotExistException + * if the bag does not contain a payload-oxum. To check, run + * {@link BagVerifier#canQuickVerify} */ - public static void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) throws IOException, InvalidPayloadOxumException{ + public static void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) + throws IOException, InvalidPayloadOxumException, UnparsableVersionException, InvalidBagMetadataException { + final SimpleImmutableEntry byteAndFileCount = getByteAndFileCount(bag); + + final Path payloadDir = PathUtils.getDataDir(bag); + final FileCountAndTotalSizeVistor vistor = new FileCountAndTotalSizeVistor(ignoreHiddenFiles); + Files.walkFileTree(payloadDir, vistor); + logger.info("supplied payload-oxum: [{}.{}], Calculated payload-oxum: [{}.{}], for payload directory [{}]", + byteAndFileCount.getKey(), byteAndFileCount.getValue(), vistor.getTotalSize(), vistor.getCount(), payloadDir); + + if (byteAndFileCount.getKey() != vistor.getTotalSize()) { + throw new InvalidPayloadOxumException( + "Invalid total size. Expected " + byteAndFileCount.getKey() + " but calculated " + vistor.getTotalSize()); + } + if (byteAndFileCount.getValue() != vistor.getCount()) { + throw new InvalidPayloadOxumException( + "Invalid file count. Expected " + byteAndFileCount.getValue() + " but found " + vistor.getCount() + " files"); + } + } + + /** + * get either the payload-oxum values or the payload-byte-count and + * payload-file-count + * + * @param bag + * the bag to get the payload info from + * + * @return the byte count, the file count + * + * @throws IOException if there is a problem reading a file + * @throws UnparsableVersionException if there is a problem parsing the bagit version number + * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec + */ + private static SimpleImmutableEntry getByteAndFileCount(final Bag bag) throws UnparsableVersionException, IOException, InvalidBagMetadataException { + final BagitFileValues bagitValues = BagitTextFileReader.parseValues(bag.getRootDir().resolve("bagit.txt")); + + if (bagitValues.getPayloadByteCount() != null && bagitValues.getPayloadFileCount() != null) { + return new SimpleImmutableEntry(bagitValues.getPayloadByteCount(), bagitValues.getPayloadFileCount()); + } + final String payloadOxum = getPayloadOxum(bag); - if(payloadOxum == null || !payloadOxum.matches(PAYLOAD_OXUM_REGEX)){ - throw new PayloadOxumDoesNotExistException("Payload-Oxum does not exist in bag."); + return parsePayloadOxum(payloadOxum); + } + + private static SimpleImmutableEntry parsePayloadOxum(final String payloadOxum){ + if (payloadOxum == null || !payloadOxum.matches(PAYLOAD_OXUM_REGEX)) { + throw new PayloadOxumDoesNotExistException( + "Payload-Oxum or payload-byte-count and payload-file-count does not exist in bag."); } final String[] parts = payloadOxum.split("\\."); @@ -74,16 +163,6 @@ public static void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) logger.debug("Parsing [{}] for the number of files to find in the payload directory", parts[1]); final long numberOfFiles = Long.parseLong(parts[1]); - final Path payloadDir = PathUtils.getDataDir(bag); - final FileCountAndTotalSizeVistor vistor = new FileCountAndTotalSizeVistor(ignoreHiddenFiles); - Files.walkFileTree(payloadDir, vistor); - logger.info("supplied payload-oxum: [{}], Calculated payload-oxum: [{}.{}], for payload directory [{}]", payloadOxum, vistor.getTotalSize(), vistor.getCount(), payloadDir); - - if(totalSize != vistor.getTotalSize()){ - throw new InvalidPayloadOxumException("Invalid total size. Expected " + totalSize + "but calculated " + vistor.getTotalSize()); - } - if(numberOfFiles != vistor.getCount()){ - throw new InvalidPayloadOxumException("Invalid file count. Expected " + numberOfFiles + "but found " + vistor.getCount() + " files"); - } + return new SimpleImmutableEntry<>(totalSize, numberOfFiles); } } diff --git a/src/main/java/gov/loc/repository/bagit/writer/BagWriter.java b/src/main/java/gov/loc/repository/bagit/writer/BagWriter.java index 8232046fb..f185022e6 100644 --- a/src/main/java/gov/loc/repository/bagit/writer/BagWriter.java +++ b/src/main/java/gov/loc/repository/bagit/writer/BagWriter.java @@ -5,7 +5,10 @@ import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.AbstractMap.SimpleImmutableEntry; +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Map.Entry; import java.util.Set; @@ -21,6 +24,7 @@ */ public final class BagWriter { private static final Logger logger = LoggerFactory.getLogger(BagWriter.class); + private static final String PAYLOAD_OXUM_KEY = "Payload-Oxum"; private BagWriter(){ //intentionally left empty @@ -41,9 +45,14 @@ private BagWriter(){ public static void write(final Bag bag, final Path outputDir) throws IOException, NoSuchAlgorithmException{ logger.debug("writing payload files"); final Path bagitDir = PayloadWriter.writeVersionDependentPayloadFiles(bag, outputDir); + + logger.debug("Calculating the payload oxum"); + final SimpleImmutableEntry payloadByteAndFileCount = calculatePayloadByteAndFileCount(bag.getPayLoadManifests()); + updateMetadataWithPayloadInfo(bag, payloadByteAndFileCount.getKey(), payloadByteAndFileCount.getValue()); logger.debug("writing the bagit.txt file"); - BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), bagitDir); + BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), payloadByteAndFileCount.getKey(), + payloadByteAndFileCount.getValue(), bagitDir); logger.debug("writing the payload manifest(s)"); ManifestWriter.writePayloadManifests(bag.getPayLoadManifests(), bagitDir, bag.getRootDir(), bag.getFileEncoding()); @@ -65,7 +74,33 @@ public static void write(final Bag bag, final Path outputDir) throws IOException } } + private static SimpleImmutableEntry calculatePayloadByteAndFileCount(final Set manifests) throws IOException{ + long byteCount = 0l; + + final Set uniquePaths = new HashSet<>(); + for(final Manifest manifest : manifests){ + uniquePaths.addAll(manifest.getFileToChecksumMap().keySet()); + } + + for(final Path file : uniquePaths){ + byteCount += Files.size(file); + } + + return new SimpleImmutableEntry<>(byteCount, Long.valueOf(uniquePaths.size())); + } + private static void updateMetadataWithPayloadInfo(final Bag bag, final long byteCount, final long fileCount){ + final List> updatedMetadata = new ArrayList<>(); + + for(final SimpleImmutableEntry metadata : bag.getMetadata()){ + if(!PAYLOAD_OXUM_KEY.equals(metadata.getKey())){ + updatedMetadata.add(metadata); + } + } + + updatedMetadata.add(new SimpleImmutableEntry<>(PAYLOAD_OXUM_KEY, byteCount + "." + fileCount)); + bag.setMetadata(updatedMetadata); + } /* * Update the tag manifest cause the checksum of the other tag files will have changed since we just wrote them out to disk diff --git a/src/main/java/gov/loc/repository/bagit/writer/BagitFileWriter.java b/src/main/java/gov/loc/repository/bagit/writer/BagitFileWriter.java index 917bc258d..d56ae0391 100644 --- a/src/main/java/gov/loc/repository/bagit/writer/BagitFileWriter.java +++ b/src/main/java/gov/loc/repository/bagit/writer/BagitFileWriter.java @@ -15,31 +15,45 @@ public final class BagitFileWriter { private static final Logger logger = LoggerFactory.getLogger(BagitFileWriter.class); + private static final Version ONE_DOT_ZERO = new Version(1, 0); + private BagitFileWriter(){ //intentionally left empty } /** - * Write the bagit.txt file in required UTF-8 encoding. + * Write the bagit.txt file in required UTF-8 encoding for versions 1.0+ * * @param version the version of the bag to write out * @param encoding the encoding of the tag files + * @param payloadByteCount the total number of bytes for all files in the payload directory + * @param payloadFileCount the total number of files in the payload directory * @param outputDir the root of the bag * * @throws IOException if there was a problem writing the file */ - public static void writeBagitFile(final Version version, final Charset encoding, final Path outputDir) throws IOException{ + public static void writeBagitFile(final Version version, final Charset encoding, final Long payloadByteCount, + final Long payloadFileCount, final Path outputDir) throws IOException{ + writeBagitFileInternal(version, encoding, payloadByteCount, payloadFileCount, outputDir); + } + + private static void writeBagitFileInternal(final Version version, final Charset encoding, final Long payloadByteCount, + final Long payloadFileCount, final Path outputDir) throws IOException{ final Path bagitPath = outputDir.resolve("bagit.txt"); logger.debug("Writing bagit.txt file to [{}]", outputDir); + final StringBuilder sb = new StringBuilder(100); - final String firstLine = "BagIt-Version : " + version + System.lineSeparator(); - logger.debug("Writing line [{}] to [{}]", firstLine, bagitPath); - Files.write(bagitPath, firstLine.getBytes(StandardCharsets.UTF_8), - StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE); + sb.append("BagIt-Version : ").append(version).append(System.lineSeparator()) + .append("Tag-File-Character-Encoding : ").append(encoding).append(System.lineSeparator()); + + logger.debug("Writing [{}] to [{}]", sb.toString(), bagitPath); + if(version.compareTo(ONE_DOT_ZERO) >= 0 && payloadByteCount != null && payloadFileCount != null){ //if it is 1.0 or greater + sb.append("Payload-Byte-Count : ").append(payloadByteCount).append(System.lineSeparator()) + .append("Payload-File-Count : ").append(payloadFileCount).append(System.lineSeparator()); + } - final String secondLine = "Tag-File-Character-Encoding : " + encoding + System.lineSeparator(); - logger.debug("Writing line [{}] to [{}]", secondLine, bagitPath); - Files.write(bagitPath, secondLine.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.APPEND); + Files.write(bagitPath, sb.toString().getBytes(StandardCharsets.UTF_8), + StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE); } } diff --git a/src/main/java/gov/loc/repository/bagit/writer/PayloadWriter.java b/src/main/java/gov/loc/repository/bagit/writer/PayloadWriter.java index 84a106a89..59fd634f6 100644 --- a/src/main/java/gov/loc/repository/bagit/writer/PayloadWriter.java +++ b/src/main/java/gov/loc/repository/bagit/writer/PayloadWriter.java @@ -12,6 +12,7 @@ import gov.loc.repository.bagit.domain.Bag; import gov.loc.repository.bagit.domain.Manifest; import gov.loc.repository.bagit.domain.Version; +import gov.loc.repository.bagit.util.PathUtils; public final class PayloadWriter { private static final Logger logger = LoggerFactory.getLogger(PayloadWriter.class); @@ -35,7 +36,7 @@ static Path writeVersionDependentPayloadFiles(final Bag bag, final Path outputDi else{ final Path dataDir = outputDir.resolve("data"); Files.createDirectories(dataDir); - writePayloadFiles(bag.getPayLoadManifests(), dataDir, bag.getRootDir().resolve("data")); + writePayloadFiles(bag.getPayLoadManifests(), dataDir, PathUtils.getDataDir(bag)); } return bagitDir; @@ -45,8 +46,8 @@ static Path writeVersionDependentPayloadFiles(final Bag bag, final Path outputDi * Write the payload file(s) to the output directory * * @param payloadManifests the set of objects representing the payload manifests - * @param outputDir the data directory of the bag - * @param bagDataDir the data directory of the bag + * @param outputDir the payload directory of the bag + * @param bagDataDir the payload directory of the bag * * @throws IOException if there was a problem writing a file */ diff --git a/src/test/java/gov/loc/repository/bagit/verify/QuickVerifierTest.java b/src/test/java/gov/loc/repository/bagit/verify/QuickVerifierTest.java index a557110b4..b1f21e51d 100644 --- a/src/test/java/gov/loc/repository/bagit/verify/QuickVerifierTest.java +++ b/src/test/java/gov/loc/repository/bagit/verify/QuickVerifierTest.java @@ -36,13 +36,21 @@ public void testCanQuickVerify() throws Exception{ } @Test - public void testQuickVerify() throws Exception{ + public void testQuickVerifyUsingPayloadOxum() throws Exception{ Path passingRootDir = Paths.get(new File("src/test/resources/bags/v0_94/bag").toURI()); Bag bag = reader.read(passingRootDir); QuickVerifier.quicklyVerify(bag, true); } + @Test + public void testQuickVerifyUsingPayloadByteAndFileCount() throws Exception{ + Path passingRootDir = Paths.get(new File("src/test/resources/bags/v1_0/bag").toURI()); + Bag bag = reader.read(passingRootDir); + + QuickVerifier.quicklyVerify(bag, true); + } + @Test(expected=PayloadOxumDoesNotExistException.class) public void testExceptionIsThrownWhenPayloadOxumDoesntExist() throws Exception{ Bag bag = reader.read(rootDir); diff --git a/src/test/java/gov/loc/repository/bagit/writer/BagitFileWriterTest.java b/src/test/java/gov/loc/repository/bagit/writer/BagitFileWriterTest.java index 52d1ec0dc..f26b4b6ba 100644 --- a/src/test/java/gov/loc/repository/bagit/writer/BagitFileWriterTest.java +++ b/src/test/java/gov/loc/repository/bagit/writer/BagitFileWriterTest.java @@ -31,16 +31,41 @@ public void testWriteBagitFile() throws Exception{ Path bagit = rootDirPath.resolve("bagit.txt"); assertFalse(Files.exists(bagit)); - BagitFileWriter.writeBagitFile(new Version(0, 97), StandardCharsets.UTF_8, rootDirPath); + BagitFileWriter.writeBagitFile(new Version(0, 97), StandardCharsets.UTF_8, 0l, 0l, rootDirPath); assertTrue(Files.exists(bagit)); //test truncating existing long originalModified = Files.getLastModifiedTime(bagit).toMillis(); long size = Files.size(bagit); - BagitFileWriter.writeBagitFile(new Version(0, 97), StandardCharsets.UTF_8, rootDirPath); + BagitFileWriter.writeBagitFile(new Version(0, 97), StandardCharsets.UTF_8, 0l, 0l, rootDirPath); assertTrue(Files.exists(bagit)); assertTrue(Files.getLastModifiedTime(bagit) + " should be >= " + originalModified, Files.getLastModifiedTime(bagit).toMillis() >= originalModified); assertEquals(size, Files.size(bagit)); } + + @Test + public void testBagitFileWritesOptionalLines() throws Exception{ + File rootDir = folder.newFolder(); + Path rootDirPath = Paths.get(rootDir.toURI()); + Path bagit = rootDirPath.resolve("bagit.txt"); + + assertFalse(Files.exists(bagit)); + BagitFileWriter.writeBagitFile(new Version(1, 0), StandardCharsets.UTF_8, 5l, 5l, rootDirPath); + assertTrue(Files.exists(bagit)); + assertEquals(4, Files.readAllLines(bagit).size()); + } + + @Test //should not write payload byte and file count lines for version older than 1.0 + public void testBagitFileDoesntWritesOptionalLines() throws Exception{ + File rootDir = folder.newFolder(); + Path rootDirPath = Paths.get(rootDir.toURI()); + Path bagit = rootDirPath.resolve("bagit.txt"); + + assertFalse(Files.exists(bagit)); + BagitFileWriter.writeBagitFile(new Version(0, 97), StandardCharsets.UTF_8, 5l, 5l, rootDirPath); + assertTrue(Files.exists(bagit)); + assertEquals(2, Files.readAllLines(bagit).size()); + } + } diff --git a/src/test/resources/bags/v1_0/bag/bag-info.txt b/src/test/resources/bags/v1_0/bag/bag-info.txt new file mode 100644 index 000000000..1f9f175e0 --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/bag-info.txt @@ -0,0 +1,15 @@ +Source-Organization: Spengler University +Organization-Address: 1400 Elm St., Cupertino, California, 95014 +Contact-Name: Edna Janssen +Contact-Phone: +1 408-555-1212 +Contact-Email: ej@spengler.edu +External-Description: Uncompressed greyscale TIFF images from the + Yoshimuri papers collection. +Bagging-Date: 2008-01-15 +External-Identifier: spengler_yoshimuri_001 +Bag-Size: 260 GB +Bag-Group-Identifier: spengler_yoshimuri +Bag-Count: 1 of 15 +Internal-Sender-Identifier: /storage/images/yoshimuri +Internal-Sender-Description: Uncompressed greyscale TIFFs created from + microfilm. diff --git a/src/test/resources/bags/v1_0/bag/bagit.txt b/src/test/resources/bags/v1_0/bag/bagit.txt new file mode 100644 index 000000000..fa4bb7150 --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/bagit.txt @@ -0,0 +1,4 @@ +BagIt-Version: 1.0 +Tag-File-Character-Encoding: UTF-8 +Payload-Byte-Count: 25 +Payload-File-Count: 5 \ No newline at end of file diff --git a/src/test/resources/bags/v1_0/bag/data/dir1/test3.txt b/src/test/resources/bags/v1_0/bag/data/dir1/test3.txt new file mode 100644 index 000000000..29f446afe --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/data/dir1/test3.txt @@ -0,0 +1 @@ +test3 \ No newline at end of file diff --git a/src/test/resources/bags/v1_0/bag/data/dir2/dir3/test5.txt b/src/test/resources/bags/v1_0/bag/data/dir2/dir3/test5.txt new file mode 100644 index 000000000..4f1bd3350 --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/data/dir2/dir3/test5.txt @@ -0,0 +1 @@ +test5 \ No newline at end of file diff --git a/src/test/resources/bags/v1_0/bag/data/dir2/test4.txt b/src/test/resources/bags/v1_0/bag/data/dir2/test4.txt new file mode 100644 index 000000000..747522145 --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/data/dir2/test4.txt @@ -0,0 +1 @@ +test4 \ No newline at end of file diff --git a/src/test/resources/bags/v1_0/bag/data/test1.txt b/src/test/resources/bags/v1_0/bag/data/test1.txt new file mode 100644 index 000000000..f079749c4 --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/data/test1.txt @@ -0,0 +1 @@ +test1 \ No newline at end of file diff --git a/src/test/resources/bags/v1_0/bag/data/test2.txt b/src/test/resources/bags/v1_0/bag/data/test2.txt new file mode 100644 index 000000000..d606037cb --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/data/test2.txt @@ -0,0 +1 @@ +test2 \ No newline at end of file diff --git a/src/test/resources/bags/v1_0/bag/manifest-md5.txt b/src/test/resources/bags/v1_0/bag/manifest-md5.txt new file mode 100644 index 000000000..63164c5a8 --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/manifest-md5.txt @@ -0,0 +1,5 @@ +8ad8757baa8564dc136c1e07507f4a98 data/dir1/test3.txt +e3d704f3542b44a621ebed70dc0efe13 data/dir2/dir3/test5.txt +86985e105f79b95d6bc918fb45ec7727 data/dir2/test4.txt +5a105e8b9d40e1329780d62ea2265d8a data/test1.txt +ad0234829205b9033196ba818f7a872b data/test2.txt diff --git a/src/test/resources/bags/v1_0/bag/tagmanifest-md5.txt b/src/test/resources/bags/v1_0/bag/tagmanifest-md5.txt new file mode 100644 index 000000000..e8407cebc --- /dev/null +++ b/src/test/resources/bags/v1_0/bag/tagmanifest-md5.txt @@ -0,0 +1,4 @@ +41b89090f32a9ef33226b48f1b98dddf bagit.txt +ba8644f8c8b7adb3d5cf3ad4245606e8 manifest-md5.txt +68b1dabaea8770a0e9411dc5d99341f9 bag-info.txt +e9bae3ce1d7ac00b0b1aa2fbddc50cfb addl_tags/tag1.txt \ No newline at end of file