From 52ca8a94aba382e69928e18455eeadf15a5568e3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 21 Jun 2024 16:32:35 -0400 Subject: [PATCH 01/11] fix getProviderType --- .../pidproviders/doi/datacite/DataCiteDOIProvider.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java index cd765933796..b1dc49b7c6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java @@ -223,8 +223,7 @@ protected String getProviderKeyName() { @Override public String getProviderType() { - // TODO Auto-generated method stub - return null; + return TYPE; } public String getMdsUrl() { From 1f87aa3cd9a1bbaebebc7e8df9cc4b6f246fbf08 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 21 Jun 2024 19:17:08 -0400 Subject: [PATCH 02/11] citation/case-insensitive DOI fixes --- .../iq/dataverse/api/MakeDataCountApi.java | 23 +++++++++++++++---- .../DatasetExternalCitationsServiceBean.java | 12 ++++++---- .../pidproviders/AbstractPidProvider.java | 13 ++++++++--- .../pidproviders/doi/AbstractDOIProvider.java | 22 +++++++++++++++++- 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java index 1f2f1039327..306b863c9e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -19,6 +19,9 @@ import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.util.Iterator; import java.util.List; @@ -152,10 +155,17 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE // DataCite wants "doi=", not "doi:". String authorityPlusIdentifier = persistentId.replaceFirst("doi:", ""); // Request max page size and then loop to handle multiple pages - URL url = new URL(JvmSettings.DATACITE_REST_API_URL.lookup() + + URL url = null; + try { + url = new URI(JvmSettings.DATACITE_REST_API_URL.lookup(pidProvider.getId()) + "/events?doi=" + authorityPlusIdentifier + - "&source=crossref&page[size]=1000"); + "&source=crossref&page[size]=1000").toURL(); + } catch (URISyntaxException e) { + //Nominally this means a config error/ bad DATACITE_REST_API_URL for this provider + logger.warning("Unable to create URL for " + persistentId + ", pidProvider " + pidProvider.getId()); + return error(Status.INTERNAL_SERVER_ERROR, "Unable to create DataCite URL to retrieve citations."); + } logger.fine("Retrieving Citations from " + url.toString()); boolean nextPage = true; JsonArrayBuilder dataBuilder = Json.createArrayBuilder(); @@ -178,7 +188,12 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE dataBuilder.add(iter.next()); } if (links.containsKey("next")) { - url = new URL(links.getString("next")); + try { + url = new URI(links.getString("next")).toURL(); + } catch (URISyntaxException e) { + logger.warning("Unable to create URL from DataCite response: " + links.getString("next")); + return error(Status.INTERNAL_SERVER_ERROR, "Unable to retrieve all results from DataCite"); + } } else { nextPage = false; } @@ -187,7 +202,7 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE JsonArray allData = dataBuilder.build(); List datasetExternalCitations = datasetExternalCitationsService.parseCitations(allData); /* - * ToDo: If this is the only source of citations, we should remove all the existing ones for the dataset and repopuate them. + * ToDo: If this is the only source of citations, we should remove all the existing ones for the dataset and repopulate them. * As is, this call doesn't remove old citations if there are now none (legacy issue if we decide to stop counting certain types of citation * as we've done for 'hasPart'). * If there are some, this call individually checks each one and if a matching item exists, it removes it and adds it back. Faster and better to delete all and diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java index 50c24274bb2..9f930240edd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java @@ -7,6 +7,9 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -59,12 +62,11 @@ public List parseCitations(JsonArray citations) { if (inboundRelationships.contains(relationship)) { Dataset localDs = null; if (objectUri.contains("doi")) { - String globalId = objectUri.replace("https://", "").replace("doi.org/", "doi:").toUpperCase().replace("DOI:", "doi:"); - localDs = datasetService.findByGlobalId(globalId); + localDs = datasetService.findByGlobalId(objectUri); exCit.setDataset(localDs); } exCit.setCitedByUrl(subjectUri); - + if (localDs != null && !exCit.getCitedByUrl().isEmpty()) { datasetExternalCitations.add(exCit); } @@ -72,9 +74,9 @@ public List parseCitations(JsonArray citations) { if (outboundRelationships.contains(relationship)) { Dataset localDs = null; if (subjectUri.contains("doi")) { - String globalId = subjectUri.replace("https://", "").replace("doi.org/", "doi:").toUpperCase().replace("DOI:", "doi:"); - localDs = datasetService.findByGlobalId(globalId); + localDs = datasetService.findByGlobalId(subjectUri); exCit.setDataset(localDs); + } exCit.setCitedByUrl(objectUri); diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java index f6d142aac96..d1a1331c348 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java @@ -36,9 +36,9 @@ public abstract class AbstractPidProvider implements PidProvider { private String datafilePidFormat = null; - private HashSet managedSet; + protected HashSet managedSet; - private HashSet excludedSet; + protected HashSet excludedSet; private String id; private String label; @@ -313,10 +313,17 @@ protected GlobalId parsePersistentId(String protocol, String identifierString) { } public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + return parsePersistentId(protocol, authority, identifier, false); + } + + public GlobalId parsePersistentId(String protocol, String authority, String identifier, boolean isCaseInsensitive) { logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getId()); if (!PidProvider.isValidGlobalId(protocol, authority, identifier)) { return null; } + if(isCaseInsensitive) { + identifier = identifier.toUpperCase(); + } // Check authority/identifier if this is a provider that manages specific // identifiers // /is not one of the unmanaged providers that has null authority @@ -333,7 +340,7 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden logger.fine("managed in " + getId() + ": " + getManagedSet().contains(cleanIdentifier)); logger.fine("excluded from " + getId() + ": " + getExcludedSet().contains(cleanIdentifier)); - if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder())) + if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder().toUpperCase())) || getManagedSet().contains(cleanIdentifier)) && !getExcludedSet().contains(cleanIdentifier))) { return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java index 43e34e74c59..532ea9e1fbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.pidproviders.doi; import java.util.Arrays; +import java.util.HashSet; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; @@ -26,6 +27,25 @@ public abstract class AbstractDOIProvider extends AbstractPidProvider { public AbstractDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) { super(id, label, DOI_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); + //Create case insensitive (converted toUpperCase) managedSet and excludedSet + HashSet cleanManagedSet = new HashSet(); + for(String entry: managedSet) { + if(entry.startsWith(DOI_PROTOCOL)) { + cleanManagedSet.add(DOI_PROTOCOL + entry.substring(DOI_PROTOCOL.length()).toUpperCase()); + } else { + logger.warning("Non-DOI found in managedSet of pidProvider id: " + getId() + ": " + entry + ". Entry is being dropped."); + } + } + managedSet = cleanManagedSet; + HashSet cleanExcludedSet = new HashSet(); + for(String entry: excludedSet) { + if(entry.startsWith(DOI_PROTOCOL)) { + cleanExcludedSet.add(DOI_PROTOCOL + entry.substring(DOI_PROTOCOL.length()).toUpperCase()); + } else { + logger.warning("Non-DOI found in excludedSet of pidProvider id: " + getId() + ": " + entry + ". Entry is being dropped."); + } + } + excludedSet = cleanExcludedSet; } //For Unmanged provider @@ -67,7 +87,7 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden if (!DOI_PROTOCOL.equals(protocol)) { return null; } - return super.parsePersistentId(protocol, authority, identifier); + return super.parsePersistentId(protocol, authority, identifier, true); } public String getUrlPrefix() { From b1cc199d5d47fb3ebc8426b6dbb524ad19622d7a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 2 Jul 2024 14:32:56 -0400 Subject: [PATCH 03/11] avoid having an empty entry & log warning in managed/excluded lists --- .../iq/dataverse/pidproviders/AbstractPidProvider.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java index d1a1331c348..e26f7f3acc1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java @@ -60,8 +60,12 @@ protected AbstractPidProvider(String id, String label, String protocol, String a this.shoulder = shoulder; this.identifierGenerationStyle = identifierGenerationStyle; this.datafilePidFormat = datafilePidFormat; - this.managedSet = new HashSet(Arrays.asList(managedList.split(",\\s"))); - this.excludedSet = new HashSet(Arrays.asList(excludedList.split(",\\s"))); + if(!managedList.isEmpty()) { + this.managedSet = new HashSet(Arrays.asList(managedList.split(",\\s"))); + } + if(!excludedList.isEmpty()) { + this.excludedSet = new HashSet(Arrays.asList(excludedList.split(",\\s"))); + } if (logger.isLoggable(Level.FINE)) { Iterator iter = managedSet.iterator(); while (iter.hasNext()) { From 08595ec646c48dd92781975a6e44c4b873ffd2ef Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 2 Jul 2024 14:47:10 -0400 Subject: [PATCH 04/11] avoid nulls --- .../iq/dataverse/pidproviders/AbstractPidProvider.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java index e26f7f3acc1..250eae7e5fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java @@ -36,9 +36,9 @@ public abstract class AbstractPidProvider implements PidProvider { private String datafilePidFormat = null; - protected HashSet managedSet; + protected HashSet managedSet = new HashSet(); - protected HashSet excludedSet; + protected HashSet excludedSet = new HashSet(); private String id; private String label; @@ -47,8 +47,6 @@ protected AbstractPidProvider(String id, String label, String protocol) { this.id = id; this.label = label; this.protocol = protocol; - this.managedSet = new HashSet(); - this.excludedSet = new HashSet(); } protected AbstractPidProvider(String id, String label, String protocol, String authority, String shoulder, @@ -61,10 +59,10 @@ protected AbstractPidProvider(String id, String label, String protocol, String a this.identifierGenerationStyle = identifierGenerationStyle; this.datafilePidFormat = datafilePidFormat; if(!managedList.isEmpty()) { - this.managedSet = new HashSet(Arrays.asList(managedList.split(",\\s"))); + this.managedSet.addAll(Arrays.asList(managedList.split(",\\s"))); } if(!excludedList.isEmpty()) { - this.excludedSet = new HashSet(Arrays.asList(excludedList.split(",\\s"))); + this.excludedSet.addAll(Arrays.asList(excludedList.split(",\\s"))); } if (logger.isLoggable(Level.FINE)) { Iterator iter = managedSet.iterator(); From e98a91f41adadcf1550852800db1fba8631c4e1b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 20 Jul 2024 15:22:41 -0400 Subject: [PATCH 05/11] add is-supplement-to relation --- .../makedatacount/DatasetExternalCitationsServiceBean.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java index 9f930240edd..fa56432cc3c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java @@ -43,7 +43,8 @@ public class DatasetExternalCitationsServiceBean implements java.io.Serializable Arrays.asList( "cites", "references", - "supplements")); + "supplements", + "is-supplement-to")); static ArrayList outboundRelationships = new ArrayList( Arrays.asList( "is-cited-by", From 62fb0e83e5701a0683aabebaf10e360056b72297 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 20 Jul 2024 15:34:52 -0400 Subject: [PATCH 06/11] release note --- .../10708 - MDC Citation and DOI parsing improvements.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/10708 - MDC Citation and DOI parsing improvements.md diff --git a/doc/release-notes/10708 - MDC Citation and DOI parsing improvements.md b/doc/release-notes/10708 - MDC Citation and DOI parsing improvements.md new file mode 100644 index 00000000000..1dcd293df77 --- /dev/null +++ b/doc/release-notes/10708 - MDC Citation and DOI parsing improvements.md @@ -0,0 +1,3 @@ +MDC Citation retrieval with the PID settings has been fixed. +DOI parsing in Dataverse is case insensitive, improving interaction with services that may change the case. +Warnings related to managed/excluded PID lists for PID providers have been reduced From 1cd9b7ce3d3e56bc2b0e1c2451215aaf5ff3430e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 20 Jul 2024 15:43:37 -0400 Subject: [PATCH 07/11] test tweak for case insensitivity --- .../edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java index cffac741c78..b58c057c25c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java @@ -237,7 +237,8 @@ public void testDOIParsing() throws IOException { assertEquals(pid1String, pid3.asString()); assertEquals("dc1", pid3.getProviderId()); - String pid4String = "doi:10.5072/FK3ABCDEF"; + //Also test case insensitive + String pid4String = "doi:10.5072/fk3ABCDEF"; GlobalId pid4 = PidUtil.parseAsGlobalID(pid4String); assertEquals(pid4String, pid4.asString()); assertEquals("dc2", pid4.getProviderId()); From bd500cfb2f01f6fc21e7f6fbb9a59d5219326bcc Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 3 Sep 2024 17:45:37 -0400 Subject: [PATCH 08/11] test fix --- .../edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java index b58c057c25c..b041b24dd29 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java @@ -240,7 +240,9 @@ public void testDOIParsing() throws IOException { //Also test case insensitive String pid4String = "doi:10.5072/fk3ABCDEF"; GlobalId pid4 = PidUtil.parseAsGlobalID(pid4String); - assertEquals(pid4String, pid4.asString()); + // Lower case is recognized by converting to upper case internally, so we need to test vs. the upper case string + // I.e. we are verifying that the lower case string is parsed the same as the upper case string, buth give an internal upper case PID representation + assertEquals(pid4String.toUpperCase(), pid4.asString()); assertEquals("dc2", pid4.getProviderId()); String pid5String = "doi:10.5072/FK2ABCDEF"; From 7a0ee7fc208f12531ab00261eb78ecf156851c95 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 3 Sep 2024 18:00:07 -0400 Subject: [PATCH 09/11] keep doi: lower case --- .../edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java index b041b24dd29..9cb9e086ccd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java @@ -240,9 +240,9 @@ public void testDOIParsing() throws IOException { //Also test case insensitive String pid4String = "doi:10.5072/fk3ABCDEF"; GlobalId pid4 = PidUtil.parseAsGlobalID(pid4String); - // Lower case is recognized by converting to upper case internally, so we need to test vs. the upper case string - // I.e. we are verifying that the lower case string is parsed the same as the upper case string, buth give an internal upper case PID representation - assertEquals(pid4String.toUpperCase(), pid4.asString()); + // Lower case is recognized by converting to upper case internally, so we need to test vs. the upper case identifier + // I.e. we are verifying that the lower case string is parsed the same as the upper case string, both give an internal upper case PID representation + assertEquals("doi:10.5072/FK3ABCDEF", pid4.asString()); assertEquals("dc2", pid4.getProviderId()); String pid5String = "doi:10.5072/FK2ABCDEF"; From 8a6744fc808eaa7abaf5ac28d6405333f4d7dc2e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 22:27:42 +0000 Subject: [PATCH 10/11] Bump actions/download-artifact from 3 to 4.1.7 in /.github/workflows Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4.1.7. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4.1.7) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- .github/workflows/deploy_beta_testing.yml | 2 +- .github/workflows/maven_unit_test.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_beta_testing.yml b/.github/workflows/deploy_beta_testing.yml index 028f0140cc9..efe3e0d8621 100644 --- a/.github/workflows/deploy_beta_testing.yml +++ b/.github/workflows/deploy_beta_testing.yml @@ -45,7 +45,7 @@ jobs: - uses: actions/checkout@v3 - name: Download war artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4.1.7 with: name: built-app path: ./ diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml index 4ad4798bc64..fe335f5f45a 100644 --- a/.github/workflows/maven_unit_test.yml +++ b/.github/workflows/maven_unit_test.yml @@ -105,7 +105,7 @@ jobs: cache: maven # Get the build output from the unit test job - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4.1.7 with: name: java-artifacts - run: | @@ -137,7 +137,7 @@ jobs: cache: maven # Get the build output from the integration test job - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4.1.7 with: name: java-reportdir - run: tar -xvf java-reportdir.tar From 01f0bd3ed8428b946a394dcbcf94c61207341d5d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 27 Sep 2024 16:14:25 -0400 Subject: [PATCH 11/11] abstract cleaning to separate method --- .../pidproviders/doi/AbstractDOIProvider.java | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java index fae40ddb77e..70ce1ec4c14 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -28,27 +28,23 @@ public abstract class AbstractDOIProvider extends AbstractPidProvider { public AbstractDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) { super(id, label, DOI_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); //Create case insensitive (converted toUpperCase) managedSet and excludedSet - HashSet cleanManagedSet = new HashSet(); - for(String entry: managedSet) { - if(entry.startsWith(DOI_PROTOCOL)) { - cleanManagedSet.add(DOI_PROTOCOL + entry.substring(DOI_PROTOCOL.length()).toUpperCase()); - } else { - logger.warning("Non-DOI found in managedSet of pidProvider id: " + getId() + ": " + entry + ". Entry is being dropped."); - } - } - managedSet = cleanManagedSet; - HashSet cleanExcludedSet = new HashSet(); - for(String entry: excludedSet) { + managedSet = clean(managedSet, "managed"); + excludedSet = clean(excludedSet, "excluded"); + } + + private HashSet clean(HashSet originalSet, String setName) { + HashSet cleanSet = new HashSet(); + for(String entry: originalSet) { if(entry.startsWith(DOI_PROTOCOL)) { - cleanExcludedSet.add(DOI_PROTOCOL + entry.substring(DOI_PROTOCOL.length()).toUpperCase()); + cleanSet.add(DOI_PROTOCOL + entry.substring(DOI_PROTOCOL.length()).toUpperCase()); } else { - logger.warning("Non-DOI found in excludedSet of pidProvider id: " + getId() + ": " + entry + ". Entry is being dropped."); + logger.warning("Non-DOI found in " + setName + " set of pidProvider id: " + getId() + ": " + entry + ". Entry is being dropped."); } } - excludedSet = cleanExcludedSet; + return cleanSet; } - //For Unmanged provider + //For Unmanaged provider public AbstractDOIProvider(String name, String label) { super(name, label, DOI_PROTOCOL); }