diff --git a/dspace-api/src/main/java/org/dspace/administer/FileDownloader.java b/dspace-api/src/main/java/org/dspace/administer/FileDownloader.java new file mode 100644 index 000000000000..fb592627adef --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/administer/FileDownloader.java @@ -0,0 +1,229 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.administer; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.sql.SQLException; +import java.util.List; +import java.util.UUID; +import java.util.stream.Stream; + +import org.apache.commons.cli.ParseException; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.Bitstream; +import org.dspace.content.BitstreamFormat; +import org.dspace.content.Bundle; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.BitstreamFormatService; +import org.dspace.content.service.BitstreamService; +import org.dspace.content.service.ItemService; +import org.dspace.content.service.WorkspaceItemService; +import org.dspace.core.Context; +import org.dspace.eperson.EPerson; +import org.dspace.eperson.factory.EPersonServiceFactory; +import org.dspace.eperson.service.EPersonService; +import org.dspace.identifier.IdentifierNotFoundException; +import org.dspace.identifier.IdentifierNotResolvableException; +import org.dspace.identifier.factory.IdentifierServiceFactory; +import org.dspace.identifier.service.IdentifierService; +import org.dspace.scripts.DSpaceRunnable; +import org.dspace.scripts.configuration.ScriptConfiguration; +import org.dspace.utils.DSpace; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class FileDownloader extends DSpaceRunnable { + + private static final Logger log = LoggerFactory.getLogger(FileDownloader.class); + private boolean help = false; + private UUID itemUUID; + private int workspaceID; + private String pid; + private URI uri; + private String epersonMail; + private String bitstreamName; + private EPersonService epersonService; + private ItemService itemService; + private WorkspaceItemService workspaceItemService; + private IdentifierService identifierService; + private BitstreamService bitstreamService; + private BitstreamFormatService bitstreamFormatService; + private final HttpClient httpClient = HttpClient.newBuilder() + .followRedirects(HttpClient.Redirect.NORMAL) + .build(); + + /** + * This method will return the Configuration that the implementing DSpaceRunnable uses + * + * @return The {@link ScriptConfiguration} that this implementing DspaceRunnable uses + */ + @Override + public FileDownloaderConfiguration getScriptConfiguration() { + return new DSpace().getServiceManager().getServiceByName("file-downloader", + FileDownloaderConfiguration.class); + } + + /** + * This method has to be included in every script and handles the setup of the script by parsing the CommandLine + * and setting the variables + * + * @throws ParseException If something goes wrong + */ + @Override + public void setup() throws ParseException { + log.debug("Setting up {}", FileDownloader.class.getName()); + if (commandLine.hasOption("h")) { + help = true; + return; + } + + if (!commandLine.hasOption("u")) { + throw new ParseException("No URL option has been provided"); + } + + if (!commandLine.hasOption("i") && !commandLine.hasOption("w") && !commandLine.hasOption("p")) { + throw new ParseException("No item id option has been provided"); + } + + if (getEpersonIdentifier() == null && !commandLine.hasOption("e")) { + throw new ParseException("No eperson option has been provided"); + } + + + this.epersonService = EPersonServiceFactory.getInstance().getEPersonService(); + this.itemService = ContentServiceFactory.getInstance().getItemService(); + this.workspaceItemService = ContentServiceFactory.getInstance().getWorkspaceItemService(); + this.bitstreamService = ContentServiceFactory.getInstance().getBitstreamService(); + this.bitstreamFormatService = ContentServiceFactory.getInstance().getBitstreamFormatService(); + this.identifierService = IdentifierServiceFactory.getInstance().getIdentifierService(); + + try { + uri = new URI(commandLine.getOptionValue("u")); + } catch (URISyntaxException e) { + throw new ParseException("The provided URL is not a valid URL"); + } + + if (commandLine.hasOption("i")) { + itemUUID = UUID.fromString(commandLine.getOptionValue("i")); + } else if (commandLine.hasOption("w")) { + workspaceID = Integer.parseInt(commandLine.getOptionValue("w")); + } else if (commandLine.hasOption("p")) { + pid = commandLine.getOptionValue("p"); + } + + epersonMail = commandLine.getOptionValue("e"); + + if (commandLine.hasOption("n")) { + bitstreamName = commandLine.getOptionValue("n"); + } + } + + /** + * This method has to be included in every script and this will be the main execution block for the script that'll + * contain all the logic needed + * + * @throws Exception If something goes wrong + */ + @Override + public void internalRun() throws Exception { + log.debug("Running {}", FileDownloader.class.getName()); + if (help) { + printHelp(); + return; + } + + Context context = new Context(); + context.setCurrentUser(getEperson(context)); + + //find the item by the given id + Item item = findItem(context); + if (item == null) { + throw new IllegalArgumentException("No item found for the given ID"); + } + + HttpRequest request = HttpRequest.newBuilder() + .uri(uri) + .build(); + + HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + + if (response.statusCode() >= 400) { + throw new IllegalArgumentException("The provided URL returned a status code of " + response.statusCode()); + } + + //use the provided value, the content-disposition header, the last part of the uri + if (bitstreamName == null) { + bitstreamName = response.headers().firstValue("Content-Disposition") + .filter(value -> value.contains("filename=")).flatMap(value -> Stream.of(value.split(";")) + .filter(v -> v.contains("filename=")) + .findFirst() + .map(fvalue -> fvalue.replaceFirst("filename=", "").replaceAll("\"", ""))) + .orElse(uri.getPath().substring(uri.getPath().lastIndexOf('/') + 1)); + } + + try (InputStream is = response.body()) { + saveFileToItem(context, item, is, bitstreamName); + } + + context.commit(); + } + + private Item findItem(Context context) throws SQLException { + if (itemUUID != null) { + return itemService.find(context, itemUUID); + } else if (workspaceID != 0) { + return workspaceItemService.find(context, workspaceID).getItem(); + } else { + try { + DSpaceObject dso = identifierService.resolve(context, pid); + if (dso instanceof Item) { + return (Item) dso; + } else { + throw new IllegalArgumentException("The provided identifier does not resolve to an item"); + } + } catch (IdentifierNotFoundException | IdentifierNotResolvableException e) { + throw new IllegalArgumentException(e); + } + } + } + + private void saveFileToItem(Context context, Item item, InputStream is, String name) + throws SQLException, AuthorizeException, IOException { + log.debug("Saving file to item {}", item.getID()); + List originals = item.getBundles("ORIGINAL"); + Bitstream b; + if (originals.isEmpty()) { + b = itemService.createSingleBitstream(context, is, item); + } else { + Bundle bundle = originals.get(0); + b = bitstreamService.create(context, bundle, is); + } + b.setName(context, name); + //now guess format of the bitstream + BitstreamFormat bf = bitstreamFormatService.guessFormat(context, b); + b.setFormat(context, bf); + } + + private EPerson getEperson(Context context) throws SQLException { + if (getEpersonIdentifier() != null) { + return epersonService.find(context, getEpersonIdentifier()); + } else { + return epersonService.findByEmail(context, epersonMail); + } + } +} + diff --git a/dspace-api/src/main/java/org/dspace/administer/FileDownloaderConfiguration.java b/dspace-api/src/main/java/org/dspace/administer/FileDownloaderConfiguration.java new file mode 100644 index 000000000000..848b2d99f7c0 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/administer/FileDownloaderConfiguration.java @@ -0,0 +1,73 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.administer; + +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.dspace.scripts.configuration.ScriptConfiguration; + +public class FileDownloaderConfiguration extends ScriptConfiguration { + + private Class dspaceRunnableClass; + + /** + * Generic getter for the dspaceRunnableClass + * + * @return the dspaceRunnableClass value of this ScriptConfiguration + */ + @Override + public Class getDspaceRunnableClass() { + return dspaceRunnableClass; + } + + /** + * Generic setter for the dspaceRunnableClass + * + * @param dspaceRunnableClass The dspaceRunnableClass to be set on this IndexDiscoveryScriptConfiguration + */ + @Override + public void setDspaceRunnableClass(Class dspaceRunnableClass) { + this.dspaceRunnableClass = dspaceRunnableClass; + } + + /** + * The getter for the options of the Script + * + * @return the options value of this ScriptConfiguration + */ + @Override + public Options getOptions() { + if (options == null) { + + Options options = new Options(); + OptionGroup ids = new OptionGroup(); + + options.addOption("h", "help", false, "help"); + + options.addOption("u", "url", true, "source url"); + options.getOption("u").setRequired(true); + + options.addOption("i", "uuid", true, "item uuid"); + options.addOption("w", "wsid", true, "workspace id"); + options.addOption("p", "pid", true, "item pid (e.g. handle or doi)"); + ids.addOption(options.getOption("i")); + ids.addOption(options.getOption("w")); + ids.addOption(options.getOption("p")); + ids.setRequired(true); + + options.addOption("e", "eperson", true, "eperson email"); + options.getOption("e").setRequired(false); + + options.addOption("n", "name", true, "name of the file/bitstream"); + options.getOption("n").setRequired(false); + + super.options = options; + } + return options; + } +} diff --git a/dspace-api/src/test/data/dspaceFolder/config/spring/api/scripts.xml b/dspace-api/src/test/data/dspaceFolder/config/spring/api/scripts.xml index e388065b68fd..738e11f7b432 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/spring/api/scripts.xml +++ b/dspace-api/src/test/data/dspaceFolder/config/spring/api/scripts.xml @@ -91,4 +91,9 @@ + + + + + diff --git a/dspace-api/src/test/java/org/dspace/administer/FileDownloaderIT.java b/dspace-api/src/test/java/org/dspace/administer/FileDownloaderIT.java new file mode 100644 index 000000000000..ee75fddc57e4 --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/administer/FileDownloaderIT.java @@ -0,0 +1,110 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.administer; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.mockserver.model.HttpRequest.request; +import static org.mockserver.model.HttpResponse.response; + +import java.util.List; + +import org.dspace.AbstractIntegrationTestWithDatabase; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.content.Bitstream; +import org.dspace.content.Collection; +import org.dspace.content.Community; +import org.dspace.content.Item; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.BitstreamService; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.mockserver.junit.MockServerRule; + + +public class FileDownloaderIT extends AbstractIntegrationTestWithDatabase { + + @Rule + public MockServerRule mockServerRule = new MockServerRule(this); + + private Item item; + + //Prepare a community and a collection before the test + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + context.setCurrentUser(admin); + Community community = CommunityBuilder.createCommunity(context).build(); + Collection collection = CollectionBuilder.createCollection(context, community).build(); + item = ItemBuilder.createItem(context, collection).withTitle("FileDownloaderIT Item").build(); + + mockServerRule.getClient().when(request() + .withMethod("GET") + .withPath("/test400") + ).respond( + response() + .withStatusCode(400) + .withBody("test") + ); + + mockServerRule.getClient().when(request() + .withMethod("GET") + .withPath("/test") + ).respond( + response() + .withStatusCode(200) + .withHeader("Content-Disposition", "attachment; filename=\"test.txt\"") + .withBody("test") + ); + } + + //Test that when an error occurs no bitstream is actually added to the item + @Test() + public void testDownloadFileError() throws Exception { + + + BitstreamService bitstreamService = ContentServiceFactory.getInstance().getBitstreamService(); + int oldBitCount = bitstreamService.countTotal(context); + + int port = mockServerRule.getPort(); + String[] args = new String[]{"file-downloader", "-i", item.getID().toString(), + "-u", String.format("http://localhost:%s/test400", port), "-e", "admin@email.com"}; + try { + runDSpaceScript(args); + } catch (IllegalArgumentException e) { + assertEquals(0, item.getBundles().size()); + int newBitCount = bitstreamService.countTotal(context); + assertEquals(oldBitCount, newBitCount); + return; + } + assertEquals("Not expecting to get here", 0, 1); + } + + + //Test that FileDownlaoder actually adds the bitstream to the item + @Test + public void testDownloadFile() throws Exception { + + int port = mockServerRule.getPort(); + String[] args = new String[] {"file-downloader", "-i", item.getID().toString(), + "-u", String.format("http://localhost:%s/test", port), "-e", "admin@email.com"}; + runDSpaceScript(args); + + + assertEquals(1, item.getBundles().size()); + List bs = item.getBundles().get(0).getBitstreams(); + assertEquals(1, bs.size()); + assertNotNull("Expecting name to be defined", bs.get(0).getName()); + + } + +} \ No newline at end of file diff --git a/dspace/config/spring/rest/scripts.xml b/dspace/config/spring/rest/scripts.xml index 5fe487ddcc5d..a8ef279383ae 100644 --- a/dspace/config/spring/rest/scripts.xml +++ b/dspace/config/spring/rest/scripts.xml @@ -69,4 +69,9 @@ + + + + +