Skip to content

Commit

Permalink
Break out base64 test and url test methods to util classes
Browse files Browse the repository at this point in the history
  • Loading branch information
pieter-edelman-nictiz committed Jun 21, 2024
1 parent 944db4a commit 4d64736
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 117 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,12 @@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWIS
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.isNotBlank;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
Expand All @@ -57,7 +55,6 @@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWIS

import javax.annotation.Nonnull;

import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;
import org.fhir.ucum.Decimal;
Expand Down Expand Up @@ -2835,20 +2832,20 @@ else if (Utilities.isAllWhitespace(e.primitiveValue()))
if (type.equals("base64Binary")) {
String encoded = e.primitiveValue();
if (isNotBlank(encoded)) {
boolean bok = isValidBase64(encoded);
boolean bok = Base64Util.isValidBase64(encoded);
if (!bok) {
String value = encoded.length() < 100 ? encoded : "(snip)";
ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, false, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_VALID, value) && ok;
} else {
boolean wsok = !base64HasWhitespace(encoded);
boolean wsok = !Base64Util.base64HasWhitespace(encoded);
if (VersionUtilities.isR5VerOrLater(this.context.getVersion())) {
ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, wsok, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_NO_WS_ERROR) && ok;
} else {
warning(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, wsok, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_NO_WS_WARNING);
}
}
if (bok && context.hasExtension(ToolingExtensions.EXT_MAX_SIZE)) {
int size = countBase64DecodedBytes(encoded);
int size = Base64Util.countBase64DecodedBytes(encoded);
long def = Long.parseLong(ToolingExtensions.readStringExtension(context, ToolingExtensions.EXT_MAX_SIZE));
ok = rule(errors, NO_RULE_DATE, IssueType.STRUCTURE, e.line(), e.col(), path, size <= def, I18nConstants.TYPE_SPECIFIC_CHECKS_DT_BASE64_TOO_LONG, size, def) && ok;
}
Expand Down Expand Up @@ -3267,70 +3264,6 @@ private boolean containsHtmlTags(String cnt) {
return false;
}

/**
* Technically this is not bulletproof as some invalid base64 won't be caught,
* but I think it's good enough. The original code used Java8 Base64 decoder
* but I've replaced it with a regex for 2 reasons:
* 1. This code will run on any version of Java
* 2. This code doesn't actually decode, which is much easier on memory use for big payloads
*/
private boolean isValidBase64(String theEncoded) {
if (theEncoded == null) {
return false;
}
int charCount = 0;
boolean ok = true;
for (int i = 0; i < theEncoded.length(); i++) {
char nextChar = theEncoded.charAt(i);
if (Utilities.isWhitespace(nextChar)) {
continue;
}
if (Character.isLetterOrDigit(nextChar)) {
charCount++;
}
if (nextChar == '/' || nextChar == '=' || nextChar == '+') {
charCount++;
}
}

if (charCount > 0 && charCount % 4 != 0) {
ok = false;
}
return ok;
}

private boolean base64HasWhitespace(String theEncoded) {
if (theEncoded == null) {
return false;
}
for (int i = 0; i < theEncoded.length(); i++) {
char nextChar = theEncoded.charAt(i);
if (Utilities.isWhitespace(nextChar)) {
return true;
}
}
return false;

}


private int countBase64DecodedBytes(String theEncoded) {
Base64InputStream inputStream = new Base64InputStream(new ByteArrayInputStream(theEncoded.getBytes(StandardCharsets.UTF_8)));
try {
try {
for (int counter = 0; ; counter++) {
if (inputStream.read() == -1) {
return counter;
}
}
} finally {
inputStream.close();
}
} catch (IOException e) {
throw new IllegalStateException(e); // should not happen
}
}

private boolean isDefinitionURL(String url) {
return Utilities.existsInList(url,

Expand Down Expand Up @@ -3485,10 +3418,10 @@ private boolean checkUrls(List<ValidationMessage> errors, Element e, String path
for (XhtmlNode node : list) {
if (node.getNodeType() == NodeType.Element) {
if ("a".equals(node.getName())) {
String msg = checkValidUrl(node.getAttribute("href"));
String msg = UrlUtil.checkValidUrl(node.getAttribute("href"), context);
ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, msg == null, I18nConstants.XHTML_URL_INVALID, node.getAttribute("href"), msg) && ok;
} else if ("img".equals(node.getName())) {
String msg = checkValidUrl(node.getAttribute("src"));
String msg = UrlUtil.checkValidUrl(node.getAttribute("src"), context);
ok = rule(errors, NO_RULE_DATE, IssueType.INVALID, e.line(), e.col(), path, msg == null, I18nConstants.XHTML_URL_INVALID, node.getAttribute("src"), msg) && ok;
}
ok = checkUrls(errors, e, path, node.getChildNodes()) && ok;
Expand All @@ -3497,49 +3430,6 @@ private boolean checkUrls(List<ValidationMessage> errors, Element e, String path
return ok;
}

private String checkValidUrl(String value) {
if (value == null) {
return null;
}
if (Utilities.noString(value)) {
return context.formatMessage(I18nConstants.XHTML_URL_EMPTY);
}

if (value.startsWith("data:")) {
String[] p = value.substring(5).split("\\,");
if (p.length < 2) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_NO_DATA, value);
} else if (p.length > 2) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID_COMMA, value);
} else if (!p[0].endsWith(";base64") || !isValidBase64(p[1])) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID, value);
} else {
if (p[0].startsWith(" ")) {
p[0] = Utilities.trimWS(p[0]);
}
String mMsg = MimeTypeUtil.checkValidMimeType(p[0].substring(0, p[0].lastIndexOf(";")));
if (mMsg != null) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_MIMETYPE, value, mMsg);
}
}
return null;
} else {
Set<Character> invalidChars = new HashSet<>();
int c = 0;
for (char ch : value.toCharArray()) {
if (!(Character.isDigit(ch) || Character.isAlphabetic(ch) || Utilities.existsInList(ch, ';', '?', ':', '@', '&', '=', '+', '$', '.', ',', '/', '%', '-', '_', '~', '#', '[', ']', '!', '\'', '(', ')', '*', '|' ))) {
c++;
invalidChars.add(ch);
}
}
if (invalidChars.isEmpty()) {
return null;
} else {
return context.formatMessagePlural(c, I18nConstants.XHTML_URL_INVALID_CHARS, invalidChars.toString());
}
}
}

private boolean checkInnerNS(List<ValidationMessage> errors, Element e, String path, List<XhtmlNode> list) {
boolean ok = true;
for (XhtmlNode node : list) {
Expand Down Expand Up @@ -3806,9 +3696,9 @@ private boolean checkAttachment(List<ValidationMessage> errors, String path, Ele
String b64 = element.getChildValue("data");
// Note: If the value isn't valid, we're not adding an error here, as the test to the
// child Base64Binary will catch it and we don't want to log it twice
boolean bok = isValidBase64(b64);
boolean bok = Base64Util.isValidBase64(b64);
if (bok && element.hasChild("size", false)) {
size = countBase64DecodedBytes(b64);
size = Base64Util.countBase64DecodedBytes(b64);
String sz = element.getChildValue("size");
ok = rule(errors, NO_RULE_DATE, IssueType.STRUCTURE, element.line(), element.col(), path, Long.toString(size).equals(sz), I18nConstants.TYPE_SPECIFIC_CHECKS_DT_ATT_SIZE_CORRECT, sz, size) && ok;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package org.hl7.fhir.validation.instance.utils;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.commons.codec.binary.Base64InputStream;

import org.hl7.fhir.utilities.Utilities;

public class Base64Util {
/**
* Technically this is not bulletproof as some invalid base64 won't be caught,
* but I think it's good enough. The original code used Java8 Base64 decoder
* but I've replaced it with a regex for 2 reasons:
* 1. This code will run on any version of Java
* 2. This code doesn't actually decode, which is much easier on memory use for big payloads
*/
public static boolean isValidBase64(String theEncoded) {
if (theEncoded == null) {
return false;
}
int charCount = 0;
boolean ok = true;
for (int i = 0; i < theEncoded.length(); i++) {
char nextChar = theEncoded.charAt(i);
if (Utilities.isWhitespace(nextChar)) {
continue;
}
if (Character.isLetterOrDigit(nextChar)) {
charCount++;
}
if (nextChar == '/' || nextChar == '=' || nextChar == '+') {
charCount++;
}
}

if (charCount > 0 && charCount % 4 != 0) {
ok = false;
}
return ok;
}

public static boolean base64HasWhitespace(String theEncoded) {
if (theEncoded == null) {
return false;
}
for (int i = 0; i < theEncoded.length(); i++) {
char nextChar = theEncoded.charAt(i);
if (Utilities.isWhitespace(nextChar)) {
return true;
}
}
return false;
}

public static int countBase64DecodedBytes(String theEncoded) {
Base64InputStream inputStream = new Base64InputStream(new ByteArrayInputStream(theEncoded.getBytes(StandardCharsets.UTF_8)));
try {
try {
for (int counter = 0; ; counter++) {
if (inputStream.read() == -1) {
return counter;
}
}
} finally {
inputStream.close();
}
} catch (IOException e) {
throw new IllegalStateException(e); // should not happen
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.hl7.fhir.validation.instance.utils;

import java.util.HashSet;
import java.util.Set;

import org.hl7.fhir.r5.context.IWorkerContext;
import org.hl7.fhir.utilities.i18n.I18nConstants;
import org.hl7.fhir.utilities.Utilities;
import org.hl7.fhir.validation.instance.utils.Base64Util;

public class UrlUtil {
public static String checkValidUrl(String value, IWorkerContext context) {
if (value == null) {
return null;
}
if (Utilities.noString(value)) {
return context.formatMessage(I18nConstants.XHTML_URL_EMPTY);
}

if (value.startsWith("data:")) {
String[] p = value.substring(5).split("\\,");
if (p.length < 2) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_NO_DATA, value);
} else if (p.length > 2) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID_COMMA, value);
} else if (p[0].endsWith(";base64") && !Base64Util.isValidBase64(p[1])) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_DATA_INVALID, value);
} else {
if (p[0].startsWith(" ")) {
p[0] = Utilities.trimWS(p[0]);
}
String mimetype = (p[0].endsWith(";base64")) ? p[0].substring(0, p[0].lastIndexOf(";")) : p[0];
if (!mimetype.trim().isEmpty()) {
String mMsg = MimeTypeUtil.checkValidMimeType(mimetype);
if (mMsg != null) {
return context.formatMessage(I18nConstants.XHTML_URL_DATA_MIMETYPE, value, mMsg);
}
}
}
return null;
} else {
Set<Character> invalidChars = new HashSet<>();
int c = 0;
for (char ch : value.toCharArray()) {
if (!(Character.isDigit(ch) || Character.isAlphabetic(ch) || Utilities.existsInList(ch, ';', '?', ':', '@', '&', '=', '+', '$', '.', ',', '/', '%', '-', '_', '~', '#', '[', ']', '!', '\'', '(', ')', '*', '|' ))) {
c++;
invalidChars.add(ch);
}
}
if (invalidChars.isEmpty()) {
return null;
} else {
return context.formatMessagePlural(c, I18nConstants.XHTML_URL_INVALID_CHARS, invalidChars.toString());
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package org.hl7.fhir.validation.instance.utils;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.junit.jupiter.params.provider.CsvSource;

import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertEquals;

public class Base64UtilTest {

@ParameterizedTest
@ValueSource(strings = {
"AAAA",
"AAAABBBB",
"AA AA BB BB ",
"ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789 /+===="
})
public void testValidBase64(String base64) {
assertTrue(Base64Util.isValidBase64(base64));
}

@ParameterizedTest
@ValueSource(strings = {
"A==", // Improperly padded
"A(B=", // Invalid character in the defualt encoding
})
public void testInvalidBase64(String base64) {
assertFalse(Base64Util.isValidBase64(base64));
}

@ParameterizedTest
@ValueSource(strings = {
"AA AA",
"AA AA",
"AAAA ",
"AA\nAA",
})
public void testBase64WithWhitespace(String base64) {
assertTrue(Base64Util.base64HasWhitespace(base64));
}

public void testBase64WithoutWhitespace(String base64) {
assertFalse(Base64Util.base64HasWhitespace("AAAA"));
}

@ParameterizedTest
@CsvSource({
"AAAA, 3",
"AAAABBBB, 6",
"AAA=, 2",
})
public void testValidBase64(String base64, int length) {
assertEquals(Base64Util.countBase64DecodedBytes(base64), length);
}

}
Loading

0 comments on commit 4d64736

Please sign in to comment.