forked from DSpace/DSpace
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cherry-pick clarin v7 into dtq dev (#820)
* cherry-picked DataCite related changes from customer/uk * Add a script that adds a file from url to an item intended for large file workflows * Add ways to influence the bitstream name * add more options to specify an item * Expose resourceId (#1134) A BE part of #1127 - this exposes the resourceId so it can be used in the handle mgmt table * fixes #1135 - findEpersonByNetId should stop searching when it finds an eperson - moved the `return eperson` inside the for cycle (after eperson non null check). - removed the eperson param (both callers were passing in `null`)
- Loading branch information
Showing
12 changed files
with
455 additions
and
11 deletions.
There are no files selected for viewing
229 changes: 229 additions & 0 deletions
229
dspace-api/src/main/java/org/dspace/administer/FileDownloader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
/** | ||
* The contents of this file are subject to the license and copyright | ||
* detailed in the LICENSE and NOTICE files at the root of the source | ||
* tree and available online at | ||
* | ||
* http://www.dspace.org/license/ | ||
*/ | ||
package org.dspace.administer; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.net.http.HttpClient; | ||
import java.net.http.HttpRequest; | ||
import java.net.http.HttpResponse; | ||
import java.sql.SQLException; | ||
import java.util.List; | ||
import java.util.UUID; | ||
import java.util.stream.Stream; | ||
|
||
import org.apache.commons.cli.ParseException; | ||
import org.dspace.authorize.AuthorizeException; | ||
import org.dspace.content.Bitstream; | ||
import org.dspace.content.BitstreamFormat; | ||
import org.dspace.content.Bundle; | ||
import org.dspace.content.DSpaceObject; | ||
import org.dspace.content.Item; | ||
import org.dspace.content.factory.ContentServiceFactory; | ||
import org.dspace.content.service.BitstreamFormatService; | ||
import org.dspace.content.service.BitstreamService; | ||
import org.dspace.content.service.ItemService; | ||
import org.dspace.content.service.WorkspaceItemService; | ||
import org.dspace.core.Context; | ||
import org.dspace.eperson.EPerson; | ||
import org.dspace.eperson.factory.EPersonServiceFactory; | ||
import org.dspace.eperson.service.EPersonService; | ||
import org.dspace.identifier.IdentifierNotFoundException; | ||
import org.dspace.identifier.IdentifierNotResolvableException; | ||
import org.dspace.identifier.factory.IdentifierServiceFactory; | ||
import org.dspace.identifier.service.IdentifierService; | ||
import org.dspace.scripts.DSpaceRunnable; | ||
import org.dspace.scripts.configuration.ScriptConfiguration; | ||
import org.dspace.utils.DSpace; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
|
||
public class FileDownloader extends DSpaceRunnable<FileDownloaderConfiguration> { | ||
|
||
private static final Logger log = LoggerFactory.getLogger(FileDownloader.class); | ||
private boolean help = false; | ||
private UUID itemUUID; | ||
private int workspaceID; | ||
private String pid; | ||
private URI uri; | ||
private String epersonMail; | ||
private String bitstreamName; | ||
private EPersonService epersonService; | ||
private ItemService itemService; | ||
private WorkspaceItemService workspaceItemService; | ||
private IdentifierService identifierService; | ||
private BitstreamService bitstreamService; | ||
private BitstreamFormatService bitstreamFormatService; | ||
private final HttpClient httpClient = HttpClient.newBuilder() | ||
.followRedirects(HttpClient.Redirect.NORMAL) | ||
.build(); | ||
|
||
/** | ||
* This method will return the Configuration that the implementing DSpaceRunnable uses | ||
* | ||
* @return The {@link ScriptConfiguration} that this implementing DspaceRunnable uses | ||
*/ | ||
@Override | ||
public FileDownloaderConfiguration getScriptConfiguration() { | ||
return new DSpace().getServiceManager().getServiceByName("file-downloader", | ||
FileDownloaderConfiguration.class); | ||
} | ||
|
||
/** | ||
* This method has to be included in every script and handles the setup of the script by parsing the CommandLine | ||
* and setting the variables | ||
* | ||
* @throws ParseException If something goes wrong | ||
*/ | ||
@Override | ||
public void setup() throws ParseException { | ||
log.debug("Setting up {}", FileDownloader.class.getName()); | ||
if (commandLine.hasOption("h")) { | ||
help = true; | ||
return; | ||
} | ||
|
||
if (!commandLine.hasOption("u")) { | ||
throw new ParseException("No URL option has been provided"); | ||
} | ||
|
||
if (!commandLine.hasOption("i") && !commandLine.hasOption("w") && !commandLine.hasOption("p")) { | ||
throw new ParseException("No item id option has been provided"); | ||
} | ||
|
||
if (getEpersonIdentifier() == null && !commandLine.hasOption("e")) { | ||
throw new ParseException("No eperson option has been provided"); | ||
} | ||
|
||
|
||
this.epersonService = EPersonServiceFactory.getInstance().getEPersonService(); | ||
this.itemService = ContentServiceFactory.getInstance().getItemService(); | ||
this.workspaceItemService = ContentServiceFactory.getInstance().getWorkspaceItemService(); | ||
this.bitstreamService = ContentServiceFactory.getInstance().getBitstreamService(); | ||
this.bitstreamFormatService = ContentServiceFactory.getInstance().getBitstreamFormatService(); | ||
this.identifierService = IdentifierServiceFactory.getInstance().getIdentifierService(); | ||
|
||
try { | ||
uri = new URI(commandLine.getOptionValue("u")); | ||
} catch (URISyntaxException e) { | ||
throw new ParseException("The provided URL is not a valid URL"); | ||
} | ||
|
||
if (commandLine.hasOption("i")) { | ||
itemUUID = UUID.fromString(commandLine.getOptionValue("i")); | ||
} else if (commandLine.hasOption("w")) { | ||
workspaceID = Integer.parseInt(commandLine.getOptionValue("w")); | ||
} else if (commandLine.hasOption("p")) { | ||
pid = commandLine.getOptionValue("p"); | ||
} | ||
|
||
epersonMail = commandLine.getOptionValue("e"); | ||
|
||
if (commandLine.hasOption("n")) { | ||
bitstreamName = commandLine.getOptionValue("n"); | ||
} | ||
} | ||
|
||
/** | ||
* This method has to be included in every script and this will be the main execution block for the script that'll | ||
* contain all the logic needed | ||
* | ||
* @throws Exception If something goes wrong | ||
*/ | ||
@Override | ||
public void internalRun() throws Exception { | ||
log.debug("Running {}", FileDownloader.class.getName()); | ||
if (help) { | ||
printHelp(); | ||
return; | ||
} | ||
|
||
Context context = new Context(); | ||
context.setCurrentUser(getEperson(context)); | ||
|
||
//find the item by the given id | ||
Item item = findItem(context); | ||
if (item == null) { | ||
throw new IllegalArgumentException("No item found for the given ID"); | ||
} | ||
|
||
HttpRequest request = HttpRequest.newBuilder() | ||
.uri(uri) | ||
.build(); | ||
|
||
HttpResponse<InputStream> response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); | ||
|
||
if (response.statusCode() >= 400) { | ||
throw new IllegalArgumentException("The provided URL returned a status code of " + response.statusCode()); | ||
} | ||
|
||
//use the provided value, the content-disposition header, the last part of the uri | ||
if (bitstreamName == null) { | ||
bitstreamName = response.headers().firstValue("Content-Disposition") | ||
.filter(value -> value.contains("filename=")).flatMap(value -> Stream.of(value.split(";")) | ||
.filter(v -> v.contains("filename=")) | ||
.findFirst() | ||
.map(fvalue -> fvalue.replaceFirst("filename=", "").replaceAll("\"", ""))) | ||
.orElse(uri.getPath().substring(uri.getPath().lastIndexOf('/') + 1)); | ||
} | ||
|
||
try (InputStream is = response.body()) { | ||
saveFileToItem(context, item, is, bitstreamName); | ||
} | ||
|
||
context.commit(); | ||
} | ||
|
||
private Item findItem(Context context) throws SQLException { | ||
if (itemUUID != null) { | ||
return itemService.find(context, itemUUID); | ||
} else if (workspaceID != 0) { | ||
return workspaceItemService.find(context, workspaceID).getItem(); | ||
} else { | ||
try { | ||
DSpaceObject dso = identifierService.resolve(context, pid); | ||
if (dso instanceof Item) { | ||
return (Item) dso; | ||
} else { | ||
throw new IllegalArgumentException("The provided identifier does not resolve to an item"); | ||
} | ||
} catch (IdentifierNotFoundException | IdentifierNotResolvableException e) { | ||
throw new IllegalArgumentException(e); | ||
} | ||
} | ||
} | ||
|
||
private void saveFileToItem(Context context, Item item, InputStream is, String name) | ||
throws SQLException, AuthorizeException, IOException { | ||
log.debug("Saving file to item {}", item.getID()); | ||
List<Bundle> originals = item.getBundles("ORIGINAL"); | ||
Bitstream b; | ||
if (originals.isEmpty()) { | ||
b = itemService.createSingleBitstream(context, is, item); | ||
} else { | ||
Bundle bundle = originals.get(0); | ||
b = bitstreamService.create(context, bundle, is); | ||
} | ||
b.setName(context, name); | ||
//now guess format of the bitstream | ||
BitstreamFormat bf = bitstreamFormatService.guessFormat(context, b); | ||
b.setFormat(context, bf); | ||
} | ||
|
||
private EPerson getEperson(Context context) throws SQLException { | ||
if (getEpersonIdentifier() != null) { | ||
return epersonService.find(context, getEpersonIdentifier()); | ||
} else { | ||
return epersonService.findByEmail(context, epersonMail); | ||
} | ||
} | ||
} | ||
|
73 changes: 73 additions & 0 deletions
73
dspace-api/src/main/java/org/dspace/administer/FileDownloaderConfiguration.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/** | ||
* The contents of this file are subject to the license and copyright | ||
* detailed in the LICENSE and NOTICE files at the root of the source | ||
* tree and available online at | ||
* | ||
* http://www.dspace.org/license/ | ||
*/ | ||
package org.dspace.administer; | ||
|
||
import org.apache.commons.cli.OptionGroup; | ||
import org.apache.commons.cli.Options; | ||
import org.dspace.scripts.configuration.ScriptConfiguration; | ||
|
||
public class FileDownloaderConfiguration extends ScriptConfiguration<FileDownloader> { | ||
|
||
private Class<FileDownloader> dspaceRunnableClass; | ||
|
||
/** | ||
* Generic getter for the dspaceRunnableClass | ||
* | ||
* @return the dspaceRunnableClass value of this ScriptConfiguration | ||
*/ | ||
@Override | ||
public Class<FileDownloader> getDspaceRunnableClass() { | ||
return dspaceRunnableClass; | ||
} | ||
|
||
/** | ||
* Generic setter for the dspaceRunnableClass | ||
* | ||
* @param dspaceRunnableClass The dspaceRunnableClass to be set on this IndexDiscoveryScriptConfiguration | ||
*/ | ||
@Override | ||
public void setDspaceRunnableClass(Class<FileDownloader> dspaceRunnableClass) { | ||
this.dspaceRunnableClass = dspaceRunnableClass; | ||
} | ||
|
||
/** | ||
* The getter for the options of the Script | ||
* | ||
* @return the options value of this ScriptConfiguration | ||
*/ | ||
@Override | ||
public Options getOptions() { | ||
if (options == null) { | ||
|
||
Options options = new Options(); | ||
OptionGroup ids = new OptionGroup(); | ||
|
||
options.addOption("h", "help", false, "help"); | ||
|
||
options.addOption("u", "url", true, "source url"); | ||
options.getOption("u").setRequired(true); | ||
|
||
options.addOption("i", "uuid", true, "item uuid"); | ||
options.addOption("w", "wsid", true, "workspace id"); | ||
options.addOption("p", "pid", true, "item pid (e.g. handle or doi)"); | ||
ids.addOption(options.getOption("i")); | ||
ids.addOption(options.getOption("w")); | ||
ids.addOption(options.getOption("p")); | ||
ids.setRequired(true); | ||
|
||
options.addOption("e", "eperson", true, "eperson email"); | ||
options.getOption("e").setRequired(false); | ||
|
||
options.addOption("n", "name", true, "name of the file/bitstream"); | ||
options.getOption("n").setRequired(false); | ||
|
||
super.options = options; | ||
} | ||
return options; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.