From c01af03e588ec447bc0fd10b21391cbb1f52e9ea Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 6 Dec 2023 07:30:58 +0100 Subject: [PATCH] ufal/be-shibboleth-headers-encoding (#464) * givenname and last name values are converted to UTF-8 encoding * Delete eperson in tests --- .../clarin/ClarinShibAuthentication.java | 8 +-- .../dspace/authenticate/clarin/Headers.java | 58 +++++++++++++++--- .../ClarinShibbolethLoginFilterIT.java | 61 +++++++++++++++++-- dspace/config/clarin-dspace.cfg | 4 ++ 4 files changed, 115 insertions(+), 16 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/authenticate/clarin/ClarinShibAuthentication.java b/dspace-api/src/main/java/org/dspace/authenticate/clarin/ClarinShibAuthentication.java index a10c291ff10f..d553d67308c6 100644 --- a/dspace-api/src/main/java/org/dspace/authenticate/clarin/ClarinShibAuthentication.java +++ b/dspace-api/src/main/java/org/dspace/authenticate/clarin/ClarinShibAuthentication.java @@ -695,8 +695,8 @@ protected EPerson registerNewEPerson(Context context, HttpServletRequest request // Header values String netid = Util.formatNetId(findSingleAttribute(request, netidHeader), org); String email = findSingleAttribute(request, emailHeader); - String fname = findSingleAttribute(request, fnameHeader); - String lname = findSingleAttribute(request, lnameHeader); + String fname = Headers.updateValueByCharset(findSingleAttribute(request, fnameHeader)); + String lname = Headers.updateValueByCharset(findSingleAttribute(request, lnameHeader)); // If the values are not in the request headers try to retrieve it from `shibheaders`. if (StringUtils.isEmpty(netid)) { @@ -817,8 +817,8 @@ protected void updateEPerson(Context context, HttpServletRequest request, EPerso String netid = Util.formatNetId(findSingleAttribute(request, netidHeader), shibheaders.get_idp()); String email = findSingleAttribute(request, emailHeader); - String fname = findSingleAttribute(request, fnameHeader); - String lname = findSingleAttribute(request, lnameHeader); + String fname = Headers.updateValueByCharset(findSingleAttribute(request, fnameHeader)); + String lname = Headers.updateValueByCharset(findSingleAttribute(request, lnameHeader)); // If the values are not in the request headers try to retrieve it from `shibheaders`. if (StringUtils.isEmpty(netid)) { diff --git a/dspace-api/src/main/java/org/dspace/authenticate/clarin/Headers.java b/dspace-api/src/main/java/org/dspace/authenticate/clarin/Headers.java index e683ac2e140d..3305661d194f 100644 --- a/dspace-api/src/main/java/org/dspace/authenticate/clarin/Headers.java +++ b/dspace-api/src/main/java/org/dspace/authenticate/clarin/Headers.java @@ -9,6 +9,7 @@ package org.dspace.authenticate.clarin; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashMap; @@ -16,8 +17,11 @@ import java.util.Map; import javax.servlet.http.HttpServletRequest; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.services.ConfigurationService; +import org.dspace.utils.DSpace; /** * Helper class for request headers. @@ -29,9 +33,11 @@ public class Headers { private static final Logger log = LogManager.getLogger(org.dspace.authenticate.clarin.Headers.class); // variables // + private static ConfigurationService configurationService = new DSpace().getConfigurationService(); private HashMap> headers_ = new HashMap>(); private String header_separator_ = null; + private static String EMPTY_STRING = ""; // ctors @@ -157,17 +163,53 @@ private List header2values(String header) { /** - * Convert ISO header value to UTF-8 - * @param value ISO header value String - * @return + * Convert ISO header value to UTF-8 or return UTF-8 value if it is not ISO. + * @param value ISO/UTF-8 header value String + * @return Converted ISO value to UTF-8 or UTF-8 value from input */ - private String updateValueByCharset(String value) { + public static String updateValueByCharset(String value) { + String inputEncoding = configurationService.getProperty("shibboleth.name.conversion.inputEncoding", + "ISO-8859-1"); + String outputEncoding = configurationService.getProperty("shibboleth.name.conversion.outputEncoding", + "UTF-8"); + + if (StringUtils.isBlank(value)) { + value = EMPTY_STRING; + } + + // If the value is not ISO-8859-1, then it is already UTF-8 + if (!isISOType(value)) { + return value; + } + try { - return new String(value.getBytes("ISO-8859-1"), "UTF-8"); + // Encode the string to UTF-8 + return new String(value.getBytes(inputEncoding), outputEncoding); } catch (UnsupportedEncodingException ex) { - log.warn("Failed to reconvert shibboleth attribute with value (" - + value + ").", ex); + log.warn("Cannot convert the value: " + value + " from " + inputEncoding + " to " + outputEncoding + + " because of: " + ex.getMessage()); + return value; + } + } + + /** + * Check if the value is ISO-8859-1 encoded. + * @param value String to check + * @return true if the value is ISO-8859-1 encoded, false otherwise + */ + private static boolean isISOType(String value) { + try { + // Encode the string to ISO-8859-1 + byte[] iso8859Bytes = value.getBytes(StandardCharsets.ISO_8859_1); + + // Decode the bytes back to a string using ISO-8859-1 + String decodedString = new String(iso8859Bytes, StandardCharsets.ISO_8859_1); + + // Compare the original string with the decoded string + return StringUtils.equals(value, decodedString); + } catch (Exception e) { + // An exception occurred, so the input is not ISO-8859-1 + return false; } - return value; } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/security/ClarinShibbolethLoginFilterIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/security/ClarinShibbolethLoginFilterIT.java index ba793b45a9c9..8b62e95bed79 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/security/ClarinShibbolethLoginFilterIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/security/ClarinShibbolethLoginFilterIT.java @@ -446,15 +446,18 @@ public void testRedirectToGivenUntrustedUrl() throws Exception { } @Test - public void testUTF8ShibHeaders() throws Exception { + public void testISOShibHeaders() throws Exception { + String testMail = "test@email.edu"; + String testIdp = IDP_TEST_EPERSON + "test"; + String testNetId = NET_ID_TEST_EPERSON + "000"; // NOTE: The initial call to /shibboleth comes *from* an external Shibboleth site. So, it is always // unauthenticated, but it must include some expected SHIB attributes. // SHIB-MAIL attribute is the default email header sent from Shibboleth after a successful login. // In this test we are simply mocking that behavior by setting it to an existing EPerson. String token = getClient().perform(get("/api/authn/shibboleth") - .header("SHIB-MAIL", clarinEperson.getEmail()) - .header("Shib-Identity-Provider", IDP_TEST_EPERSON) - .header("SHIB-NETID", NET_ID_TEST_EPERSON) + .header("SHIB-MAIL", testMail) + .header("Shib-Identity-Provider", testIdp) + .header("SHIB-NETID", testNetId) .header("SHIB-GIVENNAME", "knihovna KůÅ\u0088 test ŽluÅ¥ouÄ\u008Dký")) .andExpect(status().is3xxRedirection()) .andExpect(redirectedUrl("http://localhost:4000")) @@ -466,6 +469,56 @@ public void testUTF8ShibHeaders() throws Exception { .andExpect(jsonPath("$.authenticated", is(true))) .andExpect(jsonPath("$.authenticationMethod", is("shibboleth"))); + // Check if was created a user with such email and netid. + EPerson ePerson = ePersonService.findByNetid(context, Util.formatNetId(testNetId, testIdp)); + assertTrue(Objects.nonNull(ePerson)); + assertEquals(ePerson.getEmail(), testMail); + assertEquals(ePerson.getFirstName(), "knihovna Kůň test Žluťoučký"); + + EPersonBuilder.deleteEPerson(ePerson.getID()); + + getClient(token).perform( + get("/api/authz/authorizations/search/object") + .param("embed", "feature") + .param("feature", feature) + .param("uri", utils.linkToSingleResource(ePersonRest, "self").getHref())) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.page.totalElements", is(0))) + .andExpect(jsonPath("$._embedded").doesNotExist()); + } + + @Test + public void testUTF8ShibHeaders() throws Exception { + String testMail = "test@email.edu"; + String testIdp = IDP_TEST_EPERSON + "test"; + String testNetId = NET_ID_TEST_EPERSON + "000"; + // NOTE: The initial call to /shibboleth comes *from* an external Shibboleth site. So, it is always + // unauthenticated, but it must include some expected SHIB attributes. + // SHIB-MAIL attribute is the default email header sent from Shibboleth after a successful login. + // In this test we are simply mocking that behavior by setting it to an existing EPerson. + String token = getClient().perform(get("/api/authn/shibboleth") + .header("SHIB-MAIL", testMail) + .header("Shib-Identity-Provider", testIdp) + .header("SHIB-NETID", testNetId) + .header("SHIB-GIVENNAME", "knihovna Kůň test Žluťoučký")) + .andExpect(status().is3xxRedirection()) + .andExpect(redirectedUrl("http://localhost:4000")) + .andReturn().getResponse().getHeader("Authorization"); + + + getClient(token).perform(get("/api/authn/status")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.authenticated", is(true))) + .andExpect(jsonPath("$.authenticationMethod", is("shibboleth"))); + + // Check if was created a user with such email and netid. + EPerson ePerson = ePersonService.findByNetid(context, Util.formatNetId(testNetId, testIdp)); + assertTrue(Objects.nonNull(ePerson)); + assertEquals(ePerson.getEmail(), testMail); + assertEquals(ePerson.getFirstName(), "knihovna Kůň test Žluťoučký"); + + EPersonBuilder.deleteEPerson(ePerson.getID()); + getClient(token).perform( get("/api/authz/authorizations/search/object") .param("embed", "feature") diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index 2c50811ec00a..3d317c77b41a 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -224,3 +224,7 @@ themed.by.company.name = dataquest s.r.o. #### Authority configuration `authority.cfg` ## dc.relation authority is configured only because of correct item importing, but it is not used anymore. authority.controlled.dc.relation = true + +#nameConversion +shibboleth.name.conversion.inputEncoding = ISO-8859-1 +shibboleth.name.conversion.outputEncoding = UTF-8