From 27683b3cef2cb3f0dae3cb9ff3ab06a3a1201d33 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 10 Jul 2022 21:25:28 -0400 Subject: [PATCH] Some more cleanup for the harvesting subsystem migration to gdcc/xoai (#8372) --- .../server/web/servlet/OAIServlet.java | 43 ++++++------------- .../xoai/DataverseXoaiItemRepository.java | 24 ++++------- .../xoai/DataverseXoaiSetRepository.java | 5 +-- 3 files changed, 23 insertions(+), 49 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 7fc03dc90d6..8cbde519446 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -71,17 +71,21 @@ public class OAIServlet extends HttpServlet { SystemConfig systemConfig; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.web.servlet.OAIServlet"); - protected HashMap attributesMap = new HashMap(); // If we are going to stick with this solution - of providing a minimalist // xml record containing a link to the proprietary json metadata API for // "dataverse json harvesting", we'll probably want to create minimalistic, // but valid schemas for this format as well. - // (although the more I'm thinking about this... these records just don't seem - // needed at all) + // UPDATE: we are keeping this hack on the server side purely for backward + // compatibility with older (pre v6) Dataverses who may still be using the + // format. Once v6 has been around for a while, we will get rid of it completely. + // Starting this version, harvesting clients will not be making GetRecord + // calls at all when using harvesting dataverse_json; instead they will only + // be calling ListIdentifiers, and then making direct calls to the export + // API of the remote Dataverse, to obtain the records in native json. This + // is how we should have implemented this in the first place, really. private static final String DATAVERSE_EXTENDED_METADATA_FORMAT = "dataverse_json"; - private static final String DATAVERSE_EXTENDED_METADATA_NAMESPACE = ""; - private static final String DATAVERSE_EXTENDED_METADATA_SCHEMA = ""; - + private static final String DATAVERSE_EXTENDED_METADATA_NAMESPACE = "Custom Dataverse metadata in JSON format (Dataverse4 to Dataverse4 harvesting only)"; + private static final String DATAVERSE_EXTENDED_METADATA_SCHEMA = "JSON schema pending"; private Context xoaiContext; private SetRepository setRepository; @@ -98,7 +102,6 @@ public void init(ServletConfig config) throws ServletException { if (isDataverseOaiExtensionsSupported()) { xoaiContext = addDataverseJsonMetadataFormat(xoaiContext); } - //addMetadataFormatConditions(xoaiContext); setRepository = new DataverseXoaiSetRepository(setService); itemRepository = new DataverseXoaiItemRepository(recordService, datasetService, systemConfig.getDataverseSiteUrl()); @@ -108,7 +111,6 @@ public void init(ServletConfig config) throws ServletException { xoaiRepository = new Repository() .withSetRepository(setRepository) .withItemRepository(itemRepository) - //.withResumptionTokenFormatter(new SimpleResumptionTokenFormat()) .withConfiguration(repositoryConfiguration); dataProvider = new DataProvider(getXoaiContext(), getXoaiRepository()); @@ -158,15 +160,6 @@ private Context addDataverseJsonMetadataFormat(Context context) { return context; } - /* No longer needed after the modifications on the gdcc/xoai side - private void addMetadataFormatConditions(Context context) { - for (MetadataFormat metadataFormat : context.getMetadataFormats()) { - UsePregeneratedMetadataFormat condition = new UsePregeneratedMetadataFormat(); - condition.withMetadataFormat(metadataFormat); - metadataFormat.withCondition(condition); - } - }*/ - private boolean isDataverseOaiExtensionsSupported() { return true; } @@ -185,6 +178,7 @@ private RepositoryConfiguration createRepositoryConfiguration() { InternetAddress systemEmailAddress = MailUtil.parseSystemAddress(settingsService.getValueForKey(SettingsServiceBean.Key.SystemEmail)); RepositoryConfiguration repositoryConfiguration = RepositoryConfiguration.defaults() + .withEnableMetadataAttributes(true) .withRepositoryName(repositoryName) .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") .withCompression("gzip") @@ -193,8 +187,7 @@ private RepositoryConfiguration createRepositoryConfiguration() { .withDeleteMethod(DeletedRecord.TRANSIENT) .withMaxListIdentifiers(systemConfig.getOaiServerMaxIdentifiers()) .withMaxListRecords(systemConfig.getOaiServerMaxRecords()) - .withMaxListSets(systemConfig.getOaiServerMaxSets()) - .withEnableMetadataAttributes(true); + .withMaxListSets(systemConfig.getOaiServerMaxSets()); return repositoryConfiguration; } @@ -239,21 +232,13 @@ private void processRequest(HttpServletRequest httpServletRequest, HttpServletRe "Sorry. OAI Service is disabled on this Dataverse node."); return; } - - //OAIRequestParametersBuilder parametersBuilder = newXoaiRequest(); + RawRequest rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap()); - /*for (Object p : httpServletRequest.getParameterMap().keySet()) { - String parameterName = (String)p; - String parameterValue = httpServletRequest.getParameter(parameterName); - parametersBuilder = parametersBuilder.with(parameterName, parameterValue); - - }*/ - OAIPMH handle = dataProvider.handle(rawRequest); response.setContentType("text/xml;charset=UTF-8"); - XmlWriter xmlWriter = new XmlWriter(response.getOutputStream()); + XmlWriter xmlWriter = new XmlWriter(response.getOutputStream(), repositoryConfiguration); xmlWriter.write(handle); xmlWriter.flush(); xmlWriter.close(); diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java index 6f246309593..c9dfe65d722 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java @@ -1,10 +1,7 @@ package edu.harvard.iq.dataverse.harvest.server.xoai; import io.gdcc.xoai.dataprovider.exceptions.handler.IdDoesNotExistException; -import io.gdcc.xoai.exceptions.OAIException; import io.gdcc.xoai.dataprovider.filter.ScopedFilter; -//import io.gdcc.xoai.dataprovider.handlers.results.ListItemIdentifiersResult; -//import io.gdcc.xoai.dataprovider.handlers.results.ListItemsResults; import io.gdcc.xoai.dataprovider.model.Item; import io.gdcc.xoai.dataprovider.model.ItemIdentifier; import io.gdcc.xoai.dataprovider.model.Set; @@ -19,7 +16,6 @@ import edu.harvard.iq.dataverse.util.StringUtil; import io.gdcc.xoai.dataprovider.exceptions.handler.HandlerException; import io.gdcc.xoai.dataprovider.exceptions.handler.NoMetadataFormatsException; -import io.gdcc.xoai.dataprovider.filter.Scope; import io.gdcc.xoai.dataprovider.repository.ResultsPage; import io.gdcc.xoai.model.oaipmh.ResumptionToken; import io.gdcc.xoai.model.oaipmh.results.record.Metadata; @@ -40,7 +36,7 @@ */ public class DataverseXoaiItemRepository implements ItemRepository { - private static Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.xoai.XitemRepository"); + private static Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.xoai.DataverseXoaiItemRepository"); private OAIRecordServiceBean recordService; private DatasetServiceBean datasetService; @@ -60,7 +56,7 @@ public DataverseXoaiItemRepository (OAIRecordServiceBean recordService, DatasetS public Item getItem(String identifier) throws IdDoesNotExistException { // I'm assuming we don't want to use this version of getItem // that does not specify the requested metadata format, ever - // in our implementation - ? + // in our implementation - ? (L.A.) throw new IdDoesNotExistException("Metadata Format is Required"); } @@ -153,8 +149,6 @@ public ResultsPage getItemIdentifiers(List filters List oaiRecords = recordService.findOaiRecordsBySetName(setSpec, from, until); - //logger.fine("total " + oaiRecords.size() + " returned"); - List xoaiItems = new ArrayList<>(); if (oaiRecords != null && !oaiRecords.isEmpty()) { @@ -169,7 +163,6 @@ public ResultsPage getItemIdentifiers(List filters addExtraSets(xoaiItems, setSpec, from, until); boolean hasMore = offset + maxResponseLength < oaiRecords.size(); - //ListItemIdentifiersResult result = new ListItemIdentifiersResult(hasMore, xoaiItems); ResultsPage result = new ResultsPage(resumptionToken, hasMore, xoaiItems, oaiRecords.size()); logger.fine("returning result with " + xoaiItems.size() + " items."); return result; @@ -178,9 +171,6 @@ public ResultsPage getItemIdentifiers(List filters return new ResultsPage(resumptionToken, false, xoaiItems, 0); } - /* ResultsPage getItems( - final List filters, final MetadataFormat metadataFormat, final int maxResponseLength, - final ResumptionToken.Value resumptionToken) throws HandlerException; */ @Override public ResultsPage getItems(List filters, MetadataFormat metadataFormat, int maxResponseLength, ResumptionToken.Value resumptionToken) throws HandlerException { int offset = Long.valueOf(resumptionToken.getOffset()).intValue(); @@ -194,7 +184,8 @@ public ResultsPage getItems(List filters, MetadataFormat met + ", from=" + from + ", until=" + until); - // this is not needed, is it? + // this is not needed, is it? (the parameters should be pre-validated + // on the gdcc/xoai side by this point) if (metadataFormat == null) { throw new NoMetadataFormatsException("Metadata Format is Required"); } @@ -252,7 +243,6 @@ public ResultsPage getItems(List filters, MetadataFormat met addExtraSets(xoaiItems, setSpec, from, until); boolean hasMore = offset + maxResponseLength < oaiRecords.size(); - //ListItemsResults result = new ListItemsResults(hasMore, xoaiItems); ResultsPage result = new ResultsPage(resumptionToken, hasMore, xoaiItems, oaiRecords.size()); logger.fine("returning result with " + xoaiItems.size() + " items."); return result; @@ -300,8 +290,10 @@ private Metadata getDatasetMetadata(Dataset dataset, String metadataPrefix) thro // Solely for backward compatibility, for older Dataverse harvesting clients // that may still be relying on harvesting "dataverse_json"; // we will want to eventually get rid of this hack! - String apiUrl = customDataverseJsonApiUri(dataset.getGlobalId().asString()); - metadata = new Metadata(new EchoElement("custom metadata")).withAttribute("directApiCall", apiUrl); + // @Deprecated(since = "5.0") + metadata = new Metadata( + new EchoElement("custom metadata")) + .withAttribute("directApiCall", customDataverseJsonApiUri(dataset.getGlobalId().asString())); } else { InputStream pregeneratedMetadataStream; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java index f1a0ecf185f..edbe1da8d69 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java @@ -2,7 +2,6 @@ import io.gdcc.xoai.model.xoai.Element; import io.gdcc.xoai.dataprovider.repository.SetRepository; -//import io.gdcc.xoai.dataprovider.handlers.results.ListSetsResult; import io.gdcc.xoai.dataprovider.model.Set; import io.gdcc.xoai.model.xoai.XOAIMetadata; import edu.harvard.iq.dataverse.harvest.server.OAISet; @@ -37,7 +36,6 @@ public void setSetService(OAISetServiceBean setService) { @Override public boolean supportSets() { - logger.fine("calling supportSets()"); List dataverseOAISets = setService.findAllNamedSets(); if (dataverseOAISets == null || dataverseOAISets.isEmpty()) { @@ -47,7 +45,7 @@ public boolean supportSets() { } @Override - public List getSets() { //int offset, int length) { + public List getSets() { logger.fine("calling retrieveSets()"); List dataverseOAISets = setService.findAllNamedSets(); List XOAISets = new ArrayList(); @@ -71,7 +69,6 @@ public List getSets() { //int offset, int length) { @Override public boolean exists(String setSpec) { - // return true; return setService.setExists(setSpec); }