From 3f7ca7e0725dae04a0252b157eca4aa6f4f54a6e Mon Sep 17 00:00:00 2001 From: Jozef Misutka <332350+vidiecan@users.noreply.github.com> Date: Fri, 9 Feb 2024 13:50:41 +0100 Subject: [PATCH] remove discofeed related fetching from obsolete discojuice servers (#508) * remove discofeed related fetching from obsolete discojuice servers * check style, turned off in testing, turned of by default * fix failing test --------- Co-authored-by: jm --- .../test/data/dspaceFolder/config/local.cfg | 5 +- .../ClarinDiscoJuiceFeedsDownloadService.java | 88 ++++++------------- .../ClarinDiscoJuiceFeedsUpdateScheduler.java | 7 +- .../ClarinDiscoJuiceFeedsControllerIT.java | 16 +++- dspace/config/clarin-dspace.cfg | 2 - 5 files changed, 44 insertions(+), 74 deletions(-) diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index 85f5b1234fe5..5843e677968b 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -220,12 +220,9 @@ featured.service.teitok.description = A web-based platform for viewing, creating ##### Shibboleth ##### # Turn off the discofeed, it is allowed by default -shibboleth.discofeed.allowed = true +shibboleth.discofeed.allowed = false # File where is DiscoJuiceFeed response shibboleth.discofeed.url = TEST:/org/dspace/app/rest/discofeedResponse.json - -# Configuration properties for DiscoJuice -discojuice.feeds = edugain, dfn, cesnet, surfnet2, haka, kalmar # CRON job refresh time definition - default is refresh in every 2 hours. discojuice.refresh = 0 */2 * * * ? # Comma separated list of entityIDs; we try to guess country on these diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsDownloadService.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsDownloadService.java index e4d5435f62bb..b069fb1ec3ad 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsDownloadService.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsDownloadService.java @@ -7,7 +7,6 @@ */ package org.dspace.app.rest; -import static org.apache.commons.lang.StringUtils.isBlank; import static org.apache.commons.lang.StringUtils.isNotBlank; import java.io.BufferedInputStream; @@ -55,10 +54,9 @@ public class ClarinDiscoJuiceFeedsDownloadService implements InitializingBean { protected static Logger log = org.apache.logging.log4j.LogManager.getLogger( ClarinDiscoJuiceFeedsDownloadService.class); - private static final String DISCOJUICE_URL = "https://static.discojuice.org/feeds/"; /** - * contains entityIDs of idps we wish to set the country to something different than discojuice feeds suggests + * contains entityIDs of idps we wish to set the country to something different then discovery feeds suggests **/ private Set rewriteCountries; protected static DatabaseReader locationService; @@ -94,84 +92,50 @@ public void afterPropertiesSet() throws Exception { } for (String country : propRewriteCountries) { - country = country.trim(); - rewriteCountries.add(country); + rewriteCountries.add(country.trim()); } } public String createFeedsContent() { - log.debug("Going to create feeds content."); - String[] feedsConfig = configurationService.getArrayProperty("discojuice.feeds"); - String shibbolethDiscoFeedUrl = configurationService.getProperty("shibboleth.discofeed.url"); + log.debug("Starting to create feeds content."); - if (StringUtils.isEmpty(shibbolethDiscoFeedUrl)) { - throw new RuntimeException("Cannot load the property `shibboleth.discofeed.url` from the configuration " + - "file, maybe it is not set in the configuration file"); - } + String shibbolethDiscoFeedUrl = configurationService.getProperty("shibboleth.discofeed.url"); - if (ArrayUtils.isEmpty(feedsConfig)) { - throw new RuntimeException("Cannot load the property `discojuice.feeds` from the configuration " + + if (StringUtils.isBlank(shibbolethDiscoFeedUrl)) { + throw new IllegalStateException( + "Cannot load the property `shibboleth.discofeed.url` from the configuration " + "file, maybe it is not set in the configuration file"); } - String old_value = System.getProperty("jsse.enableSNIExtension"); + String origSniVal = System.getProperty("jsse.enableSNIExtension"); System.setProperty("jsse.enableSNIExtension", "false"); + try { - final Map shibDiscoEntities = toMap(shrink( - ClarinDiscoJuiceFeedsDownloadService.downloadJSON(shibbolethDiscoFeedUrl))); - - //true is the default http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html - old_value = (old_value == null) ? "true" : old_value; - System.setProperty("jsse.enableSNIExtension", old_value); + final Map shibDiscoEntities = toMap(shrink( + ClarinDiscoJuiceFeedsDownloadService.downloadJSON(shibbolethDiscoFeedUrl))); - String feedsContent = ""; - Set processedEntities = new HashSet<>(); - //loop through disco cdn feeds - for (String feed : feedsConfig) { - Map feedMap = toMap( - ClarinDiscoJuiceFeedsDownloadService.downloadJSON(DISCOJUICE_URL + feed.trim())); - //loop through entities in one feed - for (Map.Entry entry: feedMap.entrySet()) { - String entityID = entry.getKey(); - JSONObject cdnEntity = entry.getValue(); - //keep only entities from shibboleth, add only once, but copy geo, icon, country - if (shibDiscoEntities.containsKey(entityID) && !processedEntities.contains(entityID)) { - JSONObject geo = (JSONObject) cdnEntity.get("geo"); - String icon = (String) cdnEntity.get("icon"); - String country = (String) cdnEntity.get("country"); - JSONObject shibEntity = shibDiscoEntities.get(entityID); - if (geo != null) { - shibEntity.put("geo", geo); - } - if (icon != null) { - shibEntity.put("icon", icon); - } - if (country != null) { - shibEntity.put("country", country); - } - processedEntities.add(entityID); + // iterate through the entities to update countries as needed + shibDiscoEntities.forEach((entityId, shibEntity) -> { + if (rewriteCountries.contains(entityId) || StringUtils.isBlank((String) shibEntity.get("country"))) { + String oldCountry = (String) shibEntity.remove("country"); + String newCountry = guessCountry(shibEntity); + shibEntity.put("country", newCountry); + log.debug("Changed country for {} from {} to {}", entityId, oldCountry, newCountry); } - } - } + }); - //loop through shib entities, we show these... - for (JSONObject shibEntity : shibDiscoEntities.values()) { - //rewrite or guess countries - if (rewriteCountries.contains(shibEntity.get("entityID")) || isBlank((String)shibEntity.get("country"))) { - String old_country = (String)shibEntity.remove("country"); - String new_country = guessCountry(shibEntity); - shibEntity.put("country", new_country); - log.debug(String.format("For %s changed country from %s to %s", shibEntity.get("entityID"), - old_country, new_country)); + if (shibDiscoEntities.isEmpty()) { + return null; } - } - if (shibDiscoEntities.isEmpty()) { - return null; - } else { JSONArray ret = new JSONArray(); ret.addAll(shibDiscoEntities.values()); return ret.toJSONString(); + + } finally { + // true is the default http://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html + origSniVal = (origSniVal == null) ? "true" : origSniVal; + System.setProperty("jsse.enableSNIExtension", origSniVal); } } diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsUpdateScheduler.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsUpdateScheduler.java index 1e95adbb5c92..b86bf65ee9d6 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsUpdateScheduler.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsUpdateScheduler.java @@ -49,17 +49,18 @@ public void afterPropertiesSet() throws Exception { */ @Scheduled(cron = "${discojuice.refresh:-}") public void cronJobSch() { - boolean isAllowed = configurationService.getBooleanProperty("shibboleth.discofeed.allowed", true); + // 2024/02 - unless explicitly turned on, do not use discofeed + boolean isAllowed = configurationService.getBooleanProperty("shibboleth.discofeed.allowed", false); if (!isAllowed) { return; } - log.debug("CRON Job - going to download the discojuice feeds."); + log.debug("CRON Job - going to download the discovery feeds."); String newFeedsContent = clarinDiscoJuiceFeedsDownloadService.createFeedsContent(); if (isNotBlank(newFeedsContent)) { feedsContent = newFeedsContent; } else { - log.error("Failed to obtain discojuice feeds!"); + log.error("Failed to obtain additional discovery feeds!"); } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsControllerIT.java index 52cccb0d777c..ef09947dd766 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinDiscoJuiceFeedsControllerIT.java @@ -17,6 +17,7 @@ import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; + /** * Test class for the controller ClarinDiscoJuiceFeedsController * @@ -27,13 +28,21 @@ public class ClarinDiscoJuiceFeedsControllerIT extends AbstractControllerIntegra @Autowired ConfigurationService configurationService; + @Autowired + ClarinDiscoJuiceFeedsUpdateScheduler clarinDiscoJuiceFeedsUpdateScheduler; + @Test public void getDiscoFeeds() throws Exception { String authTokenAdmin = getAuthToken(eperson.getEmail(), password); + String configKey = "shibboleth.discofeed.allowed"; + boolean origVal = configurationService.getBooleanProperty(configKey); + configurationService.setProperty(configKey, true); + clarinDiscoJuiceFeedsUpdateScheduler.afterPropertiesSet(); + // Expected response created from the test file: `discofeedResponse.json` // Wrapped to the `callback` string = `dj_md_1` - String responseString = "dj_md_1([{\"country\":\"CZ\",\"keywords\":[\"Identity Provider for employees and " + + String expStr = "dj_md_1([{\"country\":\"CZ\",\"keywords\":[\"Identity Provider for employees and " + "readers of the Archiepiscopal Gymnasium in Kromeriz - Library\",\"Identity Provider pro zamstnance " + "a tene knihovny Arcibiskupskho gymnzia v Kromi\",\"Arcibiskupsk gymnzium v Kromi - " + "Knihovna\"],\"entityID\":\"https:\\/\\/agkm.cz\\/idp\\/shibboleth\",\"title\":\"Archiepiscopal " + @@ -47,12 +56,13 @@ public void getDiscoFeeds() throws Exception { "\"Studijn a vdeck knihovna v Hradci Krlov\"],\"entityID\":\"https:\\/\\/aleph.svkhk.cz\\" + "/idp\\/shibboleth\",\"title\":\"The Research Library in Hradec Krlov\"}])"; - // Load bitstream from the item. // Request with callback getClient(authTokenAdmin).perform(get("/api/discojuice/feeds?callback=dj_md_1")) .andExpect(status().isOk()) .andExpect(content().contentType(APPLICATION_JAVASCRIPT_UTF8)) - .andExpect(content().string(responseString)); + .andExpect(content().string(expStr)); + + configurationService.setProperty(configKey, origVal); } } diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index c3223c48dc83..89bc0d12795d 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -139,8 +139,6 @@ featured.service.teitok.description = A web-based platform for viewing, creating # File where is DiscoJuiceFeed response shibboleth.discofeed.url = https://lindat.mff.cuni.cz/Shibboleth.sso/DiscoFeed -# Configuration properties for DiscoJuice -discojuice.feeds = edugain, dfn, cesnet, surfnet2, haka, kalmar # CRON job refresh time definition - default is refresh in every 2 hours. discojuice.refresh = 0 0 */2 * * ? # Comma separated list of entityIDs; we try to guess country on these