diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java index 772ff738..a50b03b0 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java @@ -1,5 +1,6 @@ package org.aim42.htmlsanitycheck; +import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Getter; @@ -32,11 +33,18 @@ public class Configuration { File sourceDir; File checkingResultsDir; File junitResultsDir; - Boolean consoleReport; - Boolean failOnErrors; - Integer httpConnectionTimeout; - Boolean ignoreLocalhost; - Boolean ignoreIPAddresses; + @Builder.Default + Boolean consoleReport = false; + @Builder.Default + Boolean failOnErrors = false; + @Builder.Default + Integer httpConnectionTimeout = 5000; + @Getter(AccessLevel.NONE) + @Builder.Default + Boolean ignoreLocalhost = false; + @Getter(AccessLevel.NONE) + @Builder.Default + Boolean ignoreIPAddresses = false; /* * Explanation for configuring http status codes: * The standard http status codes are defined in class @link NetUtil and can @@ -155,4 +163,12 @@ public void validate() throws MisconfigurationException { throw new MisconfigurationException("checks to execute have to be a non-empty list"); } } + + public boolean isIgnoreLocalhost() { + return ignoreLocalhost != null && ignoreLocalhost; + } + + public boolean isIgnoreIPAddresses() { + return ignoreIPAddresses != null && ignoreIPAddresses; + } } diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/BrokenHttpLinksChecker.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/BrokenHttpLinksChecker.java index e7addd3a..5ff2ad56 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/BrokenHttpLinksChecker.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/BrokenHttpLinksChecker.java @@ -24,6 +24,10 @@ @Slf4j class BrokenHttpLinksChecker extends Checker { + static { + TrustAllCertificates.install(); + } + // get the (configured) statusCodes, just syntactic sugar... private final Set successCodes; private final Set warningCodes; @@ -97,92 +101,85 @@ private void checkAllHttpLinks() { protected void doubleCheckSingleHttpLink(String href) { - - - // to create appropriate error messages - String problem; - // bookkeeping: getCheckingResults().incNrOfChecks(); try { URL url = new URL(href); - - // check if localhost-URL checkIfLocalhostURL(url, href); - - // check if (numerical) IP address checkIfIPAddress(url, href); + checkHttpLinkWithRetry(url, href); + } catch (MalformedURLException exception) { + Finding malformedURLFinding = new Finding("malformed URL exception with href=" + href); + getCheckingResults().addFinding(malformedURLFinding); + } + } + + private void checkHttpLinkWithRetry(URL url, String href) { + String problem; + try { + HttpURLConnection firstConnection = getNewURLConnection(url); - try { - HttpURLConnection firstConnection = getNewURLConnection(url); + // try to connect + firstConnection.connect(); + int responseCode = firstConnection.getResponseCode(); - // try to connect - firstConnection.connect(); - int responseCode = firstConnection.getResponseCode(); + // issue 218 and 219: some web servers respond with 403 or 405 + // when given HEAD requests. Therefore, try to GET + if (successCodes.contains(responseCode)) { + return; + } + // issue 244: special case for redirects + // thanks to https://stackoverflow.com/questions/39718059/read-from-url-in-groovy-with-redirect + else if (Web.HTTP_REDIRECT_CODES.contains(responseCode)) { + String newLocation; + if (firstConnection.getHeaderField("Location") != null) { + newLocation = firstConnection.getHeaderField("Location"); + + problem = String.format("Warning: %s returned statuscode %d, new location: %s", href, responseCode, newLocation); + getCheckingResults().addFinding(new Finding(problem)); - // issue 218 and 219: some web servers respond with 403 or 405 - // when given HEAD requests. Therefore, try GET - if (successCodes.contains(responseCode)) { + } + } + // in case of errors or warnings, + // try again with GET. + else { + HttpURLConnection secondConnection = getNewURLConnection(url); + secondConnection.setRequestMethod("GET"); + int finalResponseCode = secondConnection.getResponseCode(); + secondConnection.disconnect(); + + if (successCodes.contains(finalResponseCode)) { return; + } else if (warningCodes.contains(finalResponseCode)) { + problem = "Warning:"; + } else if (errorCodes.contains(finalResponseCode)) { + problem = "Error:"; + } else { + problem = "Error: Unknown or unclassified response code:"; } - // issue 244: special case for redirects - // thanks to https://stackoverflow.com/questions/39718059/read-from-url-in-groovy-with-redirect - else if (Web.HTTP_REDIRECT_CODES.contains(responseCode)) { - String newLocation; - if (firstConnection.getHeaderField("Location") != null) { - newLocation = firstConnection.getHeaderField("Location"); - problem = String.format("Warning: %s returned statuscode %d, new location: %s", href, responseCode, newLocation); - getCheckingResults().addFinding(new Finding(problem)); + problem += String.format(" %s returned statuscode %d.", href, responseCode); - } - } - // in case of errors or warnings, - // try again with GET. - else { - HttpURLConnection secondConnection = getNewURLConnection(url); - secondConnection.setRequestMethod("GET"); - int finalResponseCode = secondConnection.getResponseCode(); - secondConnection.disconnect(); - - if (successCodes.contains(finalResponseCode)) { - return; - } else if (warningCodes.contains(finalResponseCode)) { - problem = "Warning:"; - } else if (errorCodes.contains(finalResponseCode)) { - problem = "Error:"; - } else { - problem = "Error: Unknown or unclassified response code:"; - } - - problem += String.format(" %s returned statuscode %d.", href, responseCode); + getCheckingResults().addFinding(new Finding(problem)); - getCheckingResults().addFinding(new Finding(problem)); + } // else - } // else + // cleanup firstConnection + firstConnection.disconnect(); - // cleanup firstConnection - firstConnection.disconnect(); - - } catch (UnknownHostException exception) { - Finding unknownHostFinding = new Finding("Unknown host with href=" + href); - getCheckingResults().addFinding(unknownHostFinding); - } catch (IOException exception) { - Finding someException = new Finding("exception " + exception + " with href=" + href); - getCheckingResults().addFinding(someException); - } - } catch (MalformedURLException exception) { - Finding malformedURLFinding = new Finding("malformed URL exception with href=" + href); - getCheckingResults().addFinding(malformedURLFinding); + } catch (UnknownHostException exception) { + Finding unknownHostFinding = new Finding("Unknown host with href=" + href); + getCheckingResults().addFinding(unknownHostFinding); + } catch (IOException exception) { + Finding someException = new Finding("exception " + exception + " with href=" + href); + getCheckingResults().addFinding(someException); } } private HttpURLConnection getNewURLConnection(URL url) throws IOException { - TrustAllCertificates.install(); - HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("HEAD"); @@ -205,7 +202,7 @@ private HttpURLConnection getNewURLConnection(URL url) throws IOException { // if configured, ip addresses in URLs yield warnings private void checkIfIPAddress(URL url, String href) { - if (!getMyConfig().getIgnoreIPAddresses()) { + if (!getMyConfig().isIgnoreIPAddresses()) { String host = url.getHost(); if (host.matches("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}")) { @@ -217,7 +214,7 @@ private void checkIfIPAddress(URL url, String href) { // if configured ,localhost-URLs yield warnings! private void checkIfLocalhostURL(URL url, String href) { - if (!getMyConfig().getIgnoreLocalhost()) { + if (!getMyConfig().isIgnoreLocalhost()) { String host = url.getHost(); if (("localhost".equals(host)) || host.startsWith("127.0.0")) { Finding localhostWarning = new Finding("Warning: localhost urls indicates suspicious environment dependency: href=" + href); diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/DuplicateIdChecker.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/DuplicateIdChecker.java index b0553676..4d9972d4 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/DuplicateIdChecker.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/DuplicateIdChecker.java @@ -11,7 +11,6 @@ @Slf4j public class DuplicateIdChecker extends Checker { - private Set idStringsSet; private List idStringsList; public DuplicateIdChecker(Configuration pConfig) { @@ -27,6 +26,7 @@ protected void initCheckingResultsDescription() { @Override protected SingleCheckResults check(final HtmlPage pageToCheck) { + Set idStringsSet; log.trace("Checking '{}'", pageToCheck.getFile()); //get list of all tagsWithId '<... id="XYZ"...' in html file @@ -55,20 +55,4 @@ private void checkForDuplicateDefinition(final String idString) { } } - - public Set getIdStringsSet() { - return idStringsSet; - } - - public void setIdStringsSet(Set idStringsSet) { - this.idStringsSet = idStringsSet; - } - - public List getIdStringsList() { - return idStringsList; - } - - public void setIdStringsList(List idStringsList) { - this.idStringsList = idStringsList; - } } diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/ImageMapChecker.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/ImageMapChecker.java index db04f626..41a5164e 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/ImageMapChecker.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/ImageMapChecker.java @@ -91,8 +91,10 @@ private void checkDuplicateMapNames() { Set mapNameSet = new HashSet<>(mapNames); mapNameSet.stream() - .peek(a -> getCheckingResults().incNrOfChecks()) - .filter(name -> mapNames.stream().filter(name2 -> name2.equals(name)).count() > 1) + .filter(name -> { + getCheckingResults().incNrOfChecks(); + return mapNames.stream().filter(name2 -> name2.equals(name)).count() > 1; + }) .forEach(mapName -> getCheckingResults().addFinding( new Finding(mapNames.stream().filter(name2 -> name2.equals(mapName)).count() + " imagemaps with identical name \"" + mapName + "\" exist."))); @@ -138,8 +140,10 @@ private void checkAreaHrefsForMapName(String mapName) { // TODO replace checkEmptyMaps with additional check here areaHrefs.stream() - .peek(a -> getCheckingResults().incNrOfChecks()) - .filter(Web::isCrossReference) + .filter(href -> { + getCheckingResults().incNrOfChecks(); + return Web.isCrossReference(href); + }) .forEach(href -> checkLocalHref(href, mapName, areaHrefs)); } diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/MissingAltInImageTagsChecker.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/MissingAltInImageTagsChecker.java index 593f95bd..6cd4a316 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/MissingAltInImageTagsChecker.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/MissingAltInImageTagsChecker.java @@ -31,8 +31,8 @@ protected SingleCheckResults check(final HtmlPage pageToCheck) { getCheckingResults().setNrOfChecks(pageToCheck.getAllImageTags().size()); // see HtmlPageSpec for behavior: missing or empty alt-attributes are included... - pageToCheck.getAllImageTagsWithMissingAltAttribute().stream() - .forEach(element -> reportSingleImageTagWithMissingAlt(element)); + pageToCheck.getAllImageTagsWithMissingAltAttribute() + .forEach(this::reportSingleImageTagWithMissingAlt); return getCheckingResults(); diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/MissingImageFilesChecker.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/MissingImageFilesChecker.java index a3940468..19f37ff0 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/MissingImageFilesChecker.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/MissingImageFilesChecker.java @@ -45,7 +45,7 @@ protected SingleCheckResults check(final HtmlPage pageToCheck) { //get list of all image-tags " localResourcesSet = new HashSet<>(localResourcesList); - logger.debug("local resources set: " + localResourcesSet); + logger.debug("local resources set: {}", localResourcesSet); final File file1 = pageToCheck.getFile(); final File file = (file1 == null ? null : file1.getParentFile()); @@ -82,7 +83,7 @@ private void checkAllLocalResources(Set localResources) { private void checkSingleLocalResource(String localResource) { // the localResource is either path+filename or filename or directory - logger.debug("single resource to be checked: + " + localResource); + logger.debug("single resource to be checked: {}", localResource); // bookkeeping: getCheckingResults().incNrOfChecks(); @@ -92,13 +93,12 @@ private void checkSingleLocalResource(String localResource) { try { localResourcePath = new URI(localResource).getPath(); } catch (URISyntaxException e) { - throw new RuntimeException(e); + throw new InvalidUriSyntaxException(e); } if (localResourcePath == null) { // For example, javascript:; return; - } diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/UnknownCheckerException.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/UnknownCheckerException.java index 7fbc2125..7d754a82 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/UnknownCheckerException.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/check/UnknownCheckerException.java @@ -4,8 +4,4 @@ public class UnknownCheckerException extends RuntimeException { public UnknownCheckerException(String message) { super(message); } - - public UnknownCheckerException(String message, String checkerName) { - super(message + ": " + checkerName); - } } diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/tools/InvalidUriSyntaxException.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/tools/InvalidUriSyntaxException.java new file mode 100644 index 00000000..fb03bdaa --- /dev/null +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/tools/InvalidUriSyntaxException.java @@ -0,0 +1,26 @@ +package org.aim42.htmlsanitycheck.tools; + +public class InvalidUriSyntaxException extends RuntimeException { + public InvalidUriSyntaxException(Throwable cause) { + super(cause); + } +} + +/************************************************************************* + * This is free software - without ANY guarantee! + * + * Copyright Dr. Gernot Starke, arc42.org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + *********************************************************************** */ \ No newline at end of file diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/tools/Web.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/tools/Web.java index e90a4e36..b1ae241e 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/tools/Web.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/tools/Web.java @@ -47,6 +47,7 @@ private static Set extendedByRedirects(Set first) { return Collections.unmodifiableSet(result); } + @SuppressWarnings("squid:S2386") // The Set is computed public static final Set HTTP_WARNING_CODES = extendedByRedirects(new HashSet<>(Arrays.asList( // tag::HTTP_WARNING_CODES[] 100, 101, 102 @@ -67,6 +68,7 @@ private static Set extendedByRedirects(Set first) { // end::HTTP_ERROR_CODES[] ))); + @SuppressWarnings("squid:S2386") // The Set is computed public static final Set POSSIBLE_EXTENSIONS = initExtensions(); private static final Pattern httpPattern = Pattern.compile("^https?:"); @@ -237,12 +239,6 @@ public static boolean isLocalResource(String link) { } } - public static class InvalidUriSyntaxException extends RuntimeException { - public InvalidUriSyntaxException(Throwable cause) { - super(cause); - } - } - /** * helper to identify "file scheme" */ diff --git a/htmlSanityCheck-core/src/test/groovy/org/aim42/htmlsanitycheck/tools/WebSpec.groovy b/htmlSanityCheck-core/src/test/groovy/org/aim42/htmlsanitycheck/tools/WebSpec.groovy index 1ea9866e..a3a44221 100644 --- a/htmlSanityCheck-core/src/test/groovy/org/aim42/htmlsanitycheck/tools/WebSpec.groovy +++ b/htmlSanityCheck-core/src/test/groovy/org/aim42/htmlsanitycheck/tools/WebSpec.groovy @@ -116,7 +116,7 @@ class WebSpec extends Specification { Web.isLocalResource(invalidUri) then: - thrown(Web.InvalidUriSyntaxException) + thrown(InvalidUriSyntaxException) where: invalidUri << [