From 0de53bc3f2ec830ac1c46e4b41a7152f872aae36 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Thu, 24 May 2018 00:42:44 +0200
Subject: [PATCH 01/28] Replace arrays with lists

---
 pom.xml                                       | 10 +++---
 .../serritor/api/HttpHeadResponse.java        | 34 +++++--------------
 2 files changed, 13 insertions(+), 31 deletions(-)
diff --git a/pom.xml b/pom.xml
index 7467534..85de756 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>com.github.peterbencze</groupId>
     <artifactId>serritor</artifactId>
-    <version>1.3.1</version>
+    <version>1.3.2</version>
     <packaging>jar</packaging>
     
     <name>Serritor</name>
@@ -54,17 +54,17 @@
         <dependency>
             <groupId>org.seleniumhq.selenium</groupId>
             <artifactId>selenium-java</artifactId>
-            <version>3.11.0</version>
+            <version>3.12.0</version>
         </dependency>
         <dependency>
             <groupId>org.seleniumhq.selenium</groupId>
             <artifactId>htmlunit-driver</artifactId>
-            <version>2.29.3</version>
+            <version>2.30.0</version>
         </dependency>
         <dependency>
             <groupId>com.google.guava</groupId>
             <artifactId>guava</artifactId>
-            <version>24.1-jre</version>
+            <version>25.0-jre</version>
         </dependency>
         <dependency>
             <groupId>junit</groupId>
@@ -75,7 +75,7 @@
         <dependency>
             <groupId>org.mockito</groupId>
             <artifactId>mockito-core</artifactId>
-            <version>2.18.0</version>
+            <version>2.18.3</version>
             <scope>test</scope>
         </dependency>
     </dependencies>
diff --git a/src/main/java/com/github/peterbencze/serritor/api/HttpHeadResponse.java b/src/main/java/com/github/peterbencze/serritor/api/HttpHeadResponse.java
index 93f2aed..d1cd6f7 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/HttpHeadResponse.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/HttpHeadResponse.java
@@ -15,9 +15,10 @@
  */
 package com.github.peterbencze.serritor.api;
 
+import java.util.Arrays;
+import java.util.List;
 import java.util.Locale;
 import org.apache.http.Header;
-import org.apache.http.HeaderIterator;
 import org.apache.http.HttpResponse;
 import org.apache.http.ProtocolVersion;
 import org.apache.http.StatusLine;
@@ -48,10 +49,10 @@ public boolean containsHeader(final String name) {
     /**
      * Returns all the headers of this response.
      *
-     * @return The array of headers
+     * @return The list of all the headers
      */
-    public Header[] getAllHeaders() {
-        return response.getAllHeaders();
+    public List<Header> getAllHeaders() {
+        return Arrays.asList(response.getAllHeaders());
     }
 
     /**
@@ -68,10 +69,10 @@ public Header getFirstHeader(final String name) {
      * Returns all the headers with a specified name of this response.
      *
      * @param name The name of the headers
-     * @return The array of headers
+     * @return The list of headers with a specified name
      */
-    public Header[] getHeaders(final String name) {
-        return response.getHeaders(name);
+    public List<Header> getHeaders(final String name) {
+        return Arrays.asList(response.getHeaders(name));
     }
 
     /**
@@ -93,25 +94,6 @@ public ProtocolVersion getProtocolVersion() {
         return response.getProtocolVersion();
     }
 
-    /**
-     * Returns an iterator of all the headers.
-     *
-     * @return An iterator of all the headers
-     */
-    public HeaderIterator headerIterator() {
-        return response.headerIterator();
-    }
-
-    /**
-     * Returns an iterator of the headers with a given name.
-     *
-     * @param name The name of the headers
-     * @return An iterator of the headers with a given name
-     */
-    public HeaderIterator headerIterator(final String name) {
-        return response.headerIterator(name);
-    }
-
     /**
      * Obtains the locale of this response.
      *

From 56945858ecf5bf8884976e0cba37e6b0ee430093 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 27 May 2018 11:35:36 +0200
Subject: [PATCH 02/28] Add cookie store update mechanism for the HTTP client

---
 .../peterbencze/serritor/api/BaseCrawler.java | 53 +++++++++++++++----
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index a35f72e..d004e2a 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -32,13 +32,17 @@
 import java.util.List;
 import java.util.concurrent.TimeUnit;
 import org.apache.commons.lang3.SerializationUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.Validate;
 import org.apache.http.Header;
 import org.apache.http.HttpResponse;
 import org.apache.http.client.HttpClient;
 import org.apache.http.client.methods.HttpHead;
 import org.apache.http.client.protocol.HttpClientContext;
+import org.apache.http.impl.client.BasicCookieStore;
 import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.impl.cookie.BasicClientCookie;
+import org.openqa.selenium.Cookie;
 import org.openqa.selenium.JavascriptExecutor;
 import org.openqa.selenium.TimeoutException;
 import org.openqa.selenium.WebDriver;
@@ -54,19 +58,12 @@ public abstract class BaseCrawler {
 
     private final CrawlerConfiguration config;
 
-    // Indicates if the crawler is currently running or not
     private boolean isStopped;
-
-    // Indicates if the crawling should be stopped (used for cancelling the loop in the run method)
     private boolean stopCrawling;
-
-    // Used for sending HTTP HEAD requests and receiving associate responses
+    private BasicCookieStore cookieStore;
     private HttpClient httpClient;
-
     private WebDriver webDriver;
-
     private CrawlFrontier crawlFrontier;
-
     private CrawlDelayMechanism crawlDelayMechanism;
 
     protected BaseCrawler(final CrawlerConfiguration config) {
@@ -105,7 +102,10 @@ private void start(final WebDriver driver, final CrawlFrontier frontierToUse) {
             Validate.validState(isStopped, "The crawler is already started.");
 
             isStopped = false;
-            httpClient = HttpClientBuilder.create().build();
+            cookieStore = new BasicCookieStore();
+            httpClient = HttpClientBuilder.create()
+                    .setDefaultCookieStore(cookieStore)
+                    .build();
             webDriver = Validate.notNull(driver, "The webdriver cannot be null.");
             crawlFrontier = frontierToUse;
             crawlDelayMechanism = createCrawlDelayMechanism();
@@ -263,6 +263,9 @@ private void run() {
                 } else {
                     onResponseTimeout(htmlResponse);
                 }
+
+                // Update the client's cookie store, so it will have the same state as the browser.
+                updateClientCookieStore();
             } else {
                 // URLs that point to non-HTML content should not be opened in the browser
 
@@ -339,6 +342,38 @@ private void performDelay() {
         }
     }
 
+    /**
+     * Adds all the browser cookies for the current domain to the HTTP client's
+     * cookie store, replacing any existing equivalent ones.
+     */
+    private void updateClientCookieStore() {
+        webDriver.manage()
+                .getCookies()
+                .stream()
+                .map(BaseCrawler::convertBrowserCookie)
+                .forEach(cookieStore::addCookie);
+    }
+
+    /**
+     * Converts a browser cookie to a HTTP client one.
+     *
+     * @param browserCookie The browser cookie to be converted
+     * @return The converted HTTP client cookie
+     */
+    private static BasicClientCookie convertBrowserCookie(final Cookie browserCookie) {
+        BasicClientCookie clientCookie = new BasicClientCookie(browserCookie.getName(), browserCookie.getValue());
+        clientCookie.setDomain(browserCookie.getDomain());
+        clientCookie.setPath(browserCookie.getPath());
+        clientCookie.setExpiryDate(browserCookie.getExpiry());
+        clientCookie.setSecure(browserCookie.isSecure());
+
+        if (browserCookie.isHttpOnly()) {
+            clientCookie.setAttribute("httponly", StringUtils.EMPTY);
+        }
+
+        return clientCookie;
+    }
+
     /**
      * Called when the crawler is about to begin its operation.
      */

From 72486419a229f150b87086f8b185ba0ff18b3ac3 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 27 May 2018 13:21:28 +0200
Subject: [PATCH 03/28] Modify functional interface of validator

---
 .../serritor/api/helper/UrlFinder.java           | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
index 24ca816..4015764 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
@@ -24,7 +24,7 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
-import java.util.function.Function;
+import java.util.function.Predicate;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -44,7 +44,7 @@ public final class UrlFinder {
     private final Set<Pattern> urlPatterns;
     private final Set<By> locatingMechanisms;
     private final Set<String> attributes;
-    private final Function<String, Boolean> validator;
+    private final Predicate<String> validator;
 
     private UrlFinder(final UrlFinderBuilder builder) {
         urlPatterns = builder.urlPatterns;
@@ -97,7 +97,7 @@ private List<String> findUrlsInAttributeValue(final String attributeValue) {
                     while (urlPatternMatcher.find()) {
                         String foundUrl = urlPatternMatcher.group().trim();
 
-                        if (validator.apply(foundUrl)) {
+                        if (validator.test(foundUrl)) {
                             foundUrls.add(foundUrl);
                         }
                     }
@@ -110,13 +110,13 @@ public static final class UrlFinderBuilder {
         
         private static final Set<By> DEFAULT_LOCATING_MECHANISMS = Sets.newHashSet(By.tagName("a"));
         private static final Set<String> DEFAULT_ATTRIBUTES = Sets.newHashSet("href");
-        private static final Function<String, Boolean> DEFAULT_VALIDATOR = UrlFinderBuilder::isValidUrl;
+        private static final Predicate<String> DEFAULT_VALIDATOR = UrlFinderBuilder::isValidUrl;
 
         private final Set<Pattern> urlPatterns;
 
         private Set<By> locatingMechanisms;
         private Set<String> attributes;
-        private Function<String, Boolean> validator;
+        private Predicate<String> validator;
 
         /**
          * Constructs a <code>UrlFinderBuilder</code> instance that can be used
@@ -195,12 +195,12 @@ public UrlFinderBuilder setAttribute(final String attribute) {
         }
 
         /**
-         * Sets a function to be used for validating found URLs.
+         * Sets a predicate to be used for validating found URLs.
          *
-         * @param validator The validator function
+         * @param validator The validator predicate
          * @return The <code>UrlFinderBuilder</code> instance
          */
-        public UrlFinderBuilder setValidator(final Function<String, Boolean> validator) {
+        public UrlFinderBuilder setValidator(final Predicate<String> validator) {
             Validate.notNull(validator, "The validator function cannot be null.");
 
             this.validator = validator;

From cf77bb81b8680ae2eca0143ddd88b84e8c6d3a5f Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 27 May 2018 20:41:41 +0200
Subject: [PATCH 04/28] Refactor run method

---
 .../peterbencze/serritor/api/BaseCrawler.java | 94 +++++++++----------
 1 file changed, 42 insertions(+), 52 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index d004e2a..c31bf81 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -206,75 +206,65 @@ private void run() {
             CrawlCandidate currentCandidate = crawlFrontier.getNextCandidate();
 
             URI currentCandidateUrl = currentCandidate.getCandidateUrl();
-            String currentRequestUrlAsString = currentCandidateUrl.toString();
-
-            HttpHeadResponse httpHeadResponse;
             URI responseUrl = currentCandidateUrl;
+            HttpClientContext context = HttpClientContext.create();
 
-            try {
-                HttpClientContext context = HttpClientContext.create();
+            // Update the client's cookie store, so it will have the same state as the browser.
+            updateClientCookieStore();
 
+            try {
                 // Send an HTTP HEAD request to the current URL to determine its availability and content type
-                httpHeadResponse = getHttpHeadResponse(currentCandidateUrl, context);
+                HttpHeadResponse httpHeadResponse = getHttpHeadResponse(currentCandidateUrl, context);
 
                 // If the request has been redirected, get the final URL
                 List<URI> redirectLocations = context.getRedirectLocations();
                 if (redirectLocations != null) {
                     responseUrl = redirectLocations.get(redirectLocations.size() - 1);
                 }
-            } catch (IOException ex) {
-                UnsuccessfulRequest unsuccessfulRequest = new UnsuccessfulRequestBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
-                        currentCandidate.getCrawlRequest())
-                        .setException(ex)
-                        .build();
-
-                onUnsuccessfulRequest(unsuccessfulRequest);
-                continue;
-            }
-
-            // If the request has been redirected, a new crawl request should be created for the redirected URL
-            if (!responseUrl.toString().equals(currentRequestUrlAsString)) {
-                CrawlRequest redirectedCrawlRequest = new CrawlRequestBuilder(responseUrl).setPriority(currentCandidate.getPriority()).build();
-                crawlFrontier.feedRequest(redirectedCrawlRequest, false);
-
-                continue;
-            }
-
-            // Check if the content of the response is HTML
-            if (isContentHtml(httpHeadResponse)) {
-                boolean timedOut = false;
-
-                try {
-                    // Open the URL in the browser
-                    webDriver.get(currentRequestUrlAsString);
-                } catch (TimeoutException ex) {
-                    timedOut = true;
-                }
 
-                HtmlResponse htmlResponse = new HtmlResponseBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
-                        currentCandidate.getCrawlRequest())
-                        .setHttpHeadResponse(httpHeadResponse)
-                        .setWebDriver(webDriver)
-                        .build();
-
-                // Check if the request has timed out
-                if (!timedOut) {
-                    onResponseComplete(htmlResponse);
+                if (!responseUrl.equals(currentCandidateUrl)) {
+                    // If the request has been redirected, a new crawl request should be created for the redirected URL
+                    
+                    CrawlRequest redirectedCrawlRequest = new CrawlRequestBuilder(responseUrl).setPriority(currentCandidate.getPriority()).build();
+                    crawlFrontier.feedRequest(redirectedCrawlRequest, false);
+                } else if (isContentHtml(httpHeadResponse)) {
+                    boolean isTimedOut = false;
+
+                    try {
+                        // Open the URL in the browser
+                        webDriver.get(currentCandidateUrl.toString());
+                    } catch (TimeoutException exception) {
+                        isTimedOut = true;
+                    }
+
+                    HtmlResponse htmlResponse = new HtmlResponseBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
+                            currentCandidate.getCrawlRequest())
+                            .setHttpHeadResponse(httpHeadResponse)
+                            .setWebDriver(webDriver)
+                            .build();
+
+                    if (!isTimedOut) {
+                        onResponseComplete(htmlResponse);
+                    } else {
+                        onResponseTimeout(htmlResponse);
+                    }
                 } else {
-                    onResponseTimeout(htmlResponse);
-                }
+                    // URLs that point to non-HTML content should not be opened in the browser
 
-                // Update the client's cookie store, so it will have the same state as the browser.
-                updateClientCookieStore();
-            } else {
-                // URLs that point to non-HTML content should not be opened in the browser
+                    NonHtmlResponse nonHtmlResponse = new NonHtmlResponseBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
+                            currentCandidate.getCrawlRequest())
+                            .setHttpHeadResponse(httpHeadResponse)
+                            .build();
 
-                NonHtmlResponse nonHtmlResponse = new NonHtmlResponseBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
+                    onNonHtmlResponse(nonHtmlResponse);
+                }
+            } catch (IOException exception) {
+                UnsuccessfulRequest unsuccessfulRequest = new UnsuccessfulRequestBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
                         currentCandidate.getCrawlRequest())
-                        .setHttpHeadResponse(httpHeadResponse)
+                        .setException(exception)
                         .build();
 
-                onNonHtmlResponse(nonHtmlResponse);
+                onUnsuccessfulRequest(unsuccessfulRequest);
             }
 
             performDelay();

From 808990285ba7e0bb589344ac6bf24ab53b7c3854 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 27 May 2018 22:43:35 +0200
Subject: [PATCH 05/28] Remove http head response from callback parameters,
 refact

---
 .../peterbencze/serritor/api/BaseCrawler.java |  71 +++++------
 .../serritor/api/HtmlResponse.java            |  48 ++------
 .../serritor/api/HttpHeadResponse.java        | 114 ------------------
 .../serritor/api/NonHtmlResponse.java         |  39 ++----
 .../serritor/api/UnsuccessfulRequest.java     |  34 ++----
 .../serritor/internal/CallbackParameter.java  |  30 ++---
 .../serritor/internal/CrawlCandidate.java     |  10 ++
 7 files changed, 77 insertions(+), 269 deletions(-)
 delete mode 100644 src/main/java/com/github/peterbencze/serritor/api/HttpHeadResponse.java

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index c31bf81..2e12f06 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -16,9 +16,6 @@
 package com.github.peterbencze.serritor.api;
 
 import com.github.peterbencze.serritor.api.CrawlRequest.CrawlRequestBuilder;
-import com.github.peterbencze.serritor.api.HtmlResponse.HtmlResponseBuilder;
-import com.github.peterbencze.serritor.api.NonHtmlResponse.NonHtmlResponseBuilder;
-import com.github.peterbencze.serritor.api.UnsuccessfulRequest.UnsuccessfulRequestBuilder;
 import com.github.peterbencze.serritor.internal.AdaptiveCrawlDelayMechanism;
 import com.github.peterbencze.serritor.internal.CrawlCandidate;
 import com.github.peterbencze.serritor.internal.CrawlDelayMechanism;
@@ -202,69 +199,56 @@ private void run() {
         onBegin();
 
         while (!stopCrawling && crawlFrontier.hasNextCandidate()) {
-            // Get the next crawl candidate from the queue
             CrawlCandidate currentCandidate = crawlFrontier.getNextCandidate();
-
-            URI currentCandidateUrl = currentCandidate.getCandidateUrl();
-            URI responseUrl = currentCandidateUrl;
+            URI candidateUrl = currentCandidate.getCandidateUrl();
+            URI refererUrl = currentCandidate.getRefererUrl();
+            int crawlDepth = currentCandidate.getCrawlDepth();
+            CrawlRequest crawlRequest = currentCandidate.getCrawlRequest();
+            URI responseUrl = candidateUrl;
             HttpClientContext context = HttpClientContext.create();
+            HttpResponse httpHeadResponse = null;
+            boolean isUnsuccessfulRequest = false;
 
             // Update the client's cookie store, so it will have the same state as the browser.
             updateClientCookieStore();
 
             try {
                 // Send an HTTP HEAD request to the current URL to determine its availability and content type
-                HttpHeadResponse httpHeadResponse = getHttpHeadResponse(currentCandidateUrl, context);
+                httpHeadResponse = getHttpHeadResponse(candidateUrl, context);
+            } catch (IOException exception) {
+                onUnsuccessfulRequest(new UnsuccessfulRequest(refererUrl, crawlDepth, crawlRequest, exception));
+                isUnsuccessfulRequest = true;
+            }
 
-                // If the request has been redirected, get the final URL
+            if (!isUnsuccessfulRequest) {
                 List<URI> redirectLocations = context.getRedirectLocations();
                 if (redirectLocations != null) {
+                    // If the request has been redirected, get the final URL
                     responseUrl = redirectLocations.get(redirectLocations.size() - 1);
                 }
 
-                if (!responseUrl.equals(currentCandidateUrl)) {
+                if (!responseUrl.equals(candidateUrl)) {
                     // If the request has been redirected, a new crawl request should be created for the redirected URL
+
+                    CrawlRequestBuilder builder = new CrawlRequestBuilder(responseUrl).setPriority(currentCandidate.getPriority());
+                    currentCandidate.getMetadata().ifPresent(builder::setMetadata);
                     
-                    CrawlRequest redirectedCrawlRequest = new CrawlRequestBuilder(responseUrl).setPriority(currentCandidate.getPriority()).build();
-                    crawlFrontier.feedRequest(redirectedCrawlRequest, false);
+                    crawlFrontier.feedRequest(builder.build(), false);
                 } else if (isContentHtml(httpHeadResponse)) {
-                    boolean isTimedOut = false;
+                    HtmlResponse response = new HtmlResponse(refererUrl, crawlDepth, crawlRequest, webDriver);
 
                     try {
                         // Open the URL in the browser
-                        webDriver.get(currentCandidateUrl.toString());
+                        webDriver.get(candidateUrl.toString());
                     } catch (TimeoutException exception) {
-                        isTimedOut = true;
+                        onResponseTimeout(response);
                     }
 
-                    HtmlResponse htmlResponse = new HtmlResponseBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
-                            currentCandidate.getCrawlRequest())
-                            .setHttpHeadResponse(httpHeadResponse)
-                            .setWebDriver(webDriver)
-                            .build();
-
-                    if (!isTimedOut) {
-                        onResponseComplete(htmlResponse);
-                    } else {
-                        onResponseTimeout(htmlResponse);
-                    }
+                    onResponseComplete(response);
                 } else {
                     // URLs that point to non-HTML content should not be opened in the browser
-
-                    NonHtmlResponse nonHtmlResponse = new NonHtmlResponseBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
-                            currentCandidate.getCrawlRequest())
-                            .setHttpHeadResponse(httpHeadResponse)
-                            .build();
-
-                    onNonHtmlResponse(nonHtmlResponse);
+                    onNonHtmlResponse(new NonHtmlResponse(refererUrl, crawlDepth, crawlRequest));
                 }
-            } catch (IOException exception) {
-                UnsuccessfulRequest unsuccessfulRequest = new UnsuccessfulRequestBuilder(currentCandidate.getRefererUrl(), currentCandidate.getCrawlDepth(),
-                        currentCandidate.getCrawlRequest())
-                        .setException(exception)
-                        .build();
-
-                onUnsuccessfulRequest(unsuccessfulRequest);
             }
 
             performDelay();
@@ -279,10 +263,9 @@ private void run() {
      * @param destinationUrl The URL to crawl
      * @return The HTTP HEAD response
      */
-    private HttpHeadResponse getHttpHeadResponse(final URI destinationUrl, final HttpClientContext context) throws IOException {
+    private HttpResponse getHttpHeadResponse(final URI destinationUrl, final HttpClientContext context) throws IOException {
         HttpHead headRequest = new HttpHead(destinationUrl.toString());
-        HttpResponse response = httpClient.execute(headRequest, context);
-        return new HttpHeadResponse(response);
+        return httpClient.execute(headRequest, context);
     }
 
     /**
@@ -292,7 +275,7 @@ private HttpHeadResponse getHttpHeadResponse(final URI destinationUrl, final Htt
      * @return <code>true</code> if the content is HTML, <code>false</code>
      * otherwise
      */
-    private static boolean isContentHtml(final HttpHeadResponse httpHeadResponse) {
+    private static boolean isContentHtml(final HttpResponse httpHeadResponse) {
         Header contentTypeHeader = httpHeadResponse.getFirstHeader("Content-Type");
         return contentTypeHeader != null && contentTypeHeader.getValue().contains("text/html");
     }
diff --git a/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java b/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java
index 442d493..563fa83 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java
@@ -26,23 +26,20 @@
  */
 public final class HtmlResponse extends CallbackParameter {
 
-    private final HttpHeadResponse httpHeadResponse;
     private final WebDriver webDriver;
 
-    private HtmlResponse(final HtmlResponseBuilder builder) {
-        super(builder);
-
-        httpHeadResponse = builder.httpHeadResponse;
-        webDriver = builder.webDriver;
-    }
-
     /**
-     * Returns the HTTP HEAD response.
-     *
-     * @return The HTTP HEAD response
+     * Constructs a <code>HtmlResponse</code> instance.
+     * 
+     * @param refererUrl The referer URL
+     * @param crawlDepth The current crawl depth
+     * @param crawlRequest The processed crawl request
+     * @param webDriver The <code>WebDriver</code> instance
      */
-    public HttpHeadResponse getHttpHeadResponse() {
-        return httpHeadResponse;
+    public HtmlResponse(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest, final WebDriver webDriver) {
+        super(refererUrl, crawlDepth, crawlRequest);
+
+        this.webDriver = webDriver;
     }
 
     /**
@@ -53,29 +50,4 @@ public HttpHeadResponse getHttpHeadResponse() {
     public WebDriver getWebDriver() {
         return webDriver;
     }
-
-    public static final class HtmlResponseBuilder extends CallbackParameterBuilder {
-
-        private HttpHeadResponse httpHeadResponse;
-        private WebDriver webDriver;
-
-        public HtmlResponseBuilder(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest) {
-            super(refererUrl, crawlDepth, crawlRequest);
-        }
-
-        public HtmlResponseBuilder setHttpHeadResponse(final HttpHeadResponse httpHeadResponse) {
-            this.httpHeadResponse = httpHeadResponse;
-            return this;
-        }
-
-        public HtmlResponseBuilder setWebDriver(final WebDriver webDriver) {
-            this.webDriver = webDriver;
-            return this;
-        }
-
-        @Override
-        public HtmlResponse build() {
-            return new HtmlResponse(this);
-        }
-    }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/api/HttpHeadResponse.java b/src/main/java/com/github/peterbencze/serritor/api/HttpHeadResponse.java
deleted file mode 100644
index d1cd6f7..0000000
--- a/src/main/java/com/github/peterbencze/serritor/api/HttpHeadResponse.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/* 
- * Copyright 2017 Peter Bencze.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.github.peterbencze.serritor.api;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.Locale;
-import org.apache.http.Header;
-import org.apache.http.HttpResponse;
-import org.apache.http.ProtocolVersion;
-import org.apache.http.StatusLine;
-
-/**
- * Represents a response of a HTTP HEAD request.
- *
- * @author Peter Bencze
- */
-public final class HttpHeadResponse {
-
-    private final HttpResponse response;
-
-    public HttpHeadResponse(final HttpResponse response) {
-        this.response = response;
-    }
-
-    /**
-     * Checks if a certain header is present in this message.
-     *
-     * @param name The name of the header
-     * @return <code>true</code> if present, <code>false</code> otherwise
-     */
-    public boolean containsHeader(final String name) {
-        return response.containsHeader(name);
-    }
-
-    /**
-     * Returns all the headers of this response.
-     *
-     * @return The list of all the headers
-     */
-    public List<Header> getAllHeaders() {
-        return Arrays.asList(response.getAllHeaders());
-    }
-
-    /**
-     * Returns the first header with a specified name of this response.
-     *
-     * @param name The name of the header
-     * @return The first header with the specified name
-     */
-    public Header getFirstHeader(final String name) {
-        return response.getFirstHeader(name);
-    }
-
-    /**
-     * Returns all the headers with a specified name of this response.
-     *
-     * @param name The name of the headers
-     * @return The list of headers with a specified name
-     */
-    public List<Header> getHeaders(final String name) {
-        return Arrays.asList(response.getHeaders(name));
-    }
-
-    /**
-     * Returns the last header with a specified name of this response.
-     *
-     * @param name The name of the header
-     * @return The last header with a specified name
-     */
-    public Header getLastHeader(final String name) {
-        return response.getLastHeader(name);
-    }
-
-    /**
-     * Returns the protocol version this response is compatible with.
-     *
-     * @return The compatible protocol version
-     */
-    public ProtocolVersion getProtocolVersion() {
-        return response.getProtocolVersion();
-    }
-
-    /**
-     * Obtains the locale of this response.
-     *
-     * @return The locale of this response
-     */
-    public Locale getLocale() {
-        return response.getLocale();
-    }
-
-    /**
-     * Obtains the status line of this response.
-     *
-     * @return The status line of this response
-     */
-    public StatusLine getStatusLine() {
-        return response.getStatusLine();
-    }
-}
diff --git a/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java b/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java
index 0d3e6cf..7de2862 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java
@@ -28,21 +28,15 @@
  */
 public final class NonHtmlResponse extends CallbackParameter {
 
-    private final HttpHeadResponse httpHeadResponse;
-
-    private NonHtmlResponse(final NonHtmlResponseBuilder builder) {
-        super(builder);
-
-        httpHeadResponse = builder.httpHeadResponse;
-    }
-
     /**
-     * Returns the HTTP HEAD response.
-     *
-     * @return The HTTP HEAD response
+     * Constructs a <code>NonHtmlResponse</code> instance.
+     * 
+     * @param refererUrl The referer URL
+     * @param crawlDepth The current crawl depth
+     * @param crawlRequest The processed crawl request
      */
-    public HttpHeadResponse getHttpHeadResponse() {
-        return httpHeadResponse;
+    public NonHtmlResponse(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest) {
+        super(refererUrl, crawlDepth, crawlRequest);
     }
     
     /**
@@ -54,23 +48,4 @@ public HttpHeadResponse getHttpHeadResponse() {
     public void downloadFile(final File destination) throws IOException {
         FileUtils.copyURLToFile(getCrawlRequest().getRequestUrl().toURL(), destination);
     }
-
-    public static final class NonHtmlResponseBuilder extends CallbackParameterBuilder {
-
-        private HttpHeadResponse httpHeadResponse;
-
-        public NonHtmlResponseBuilder(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest) {
-            super(refererUrl, crawlDepth, crawlRequest);
-        }
-
-        public NonHtmlResponseBuilder setHttpHeadResponse(final HttpHeadResponse httpHeadResponse) {
-            this.httpHeadResponse = httpHeadResponse;
-            return this;
-        }
-
-        @Override
-        public NonHtmlResponse build() {
-            return new NonHtmlResponse(this);
-        }
-    }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java b/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java
index 7d379d5..97ca2cb 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java
@@ -28,10 +28,19 @@ public final class UnsuccessfulRequest extends CallbackParameter {
 
     private final IOException exception;
 
-    private UnsuccessfulRequest(final UnsuccessfulRequestBuilder builder) {
-        super(builder);
+    /**
+     * Constructs a <code>UnsuccessfulRequest</code> instance.
+     *
+     * @param refererUrl The referer URL
+     * @param crawlDepth The current crawl depth
+     * @param crawlRequest The processed crawl request
+     * @param exception The exception that was thrown while trying to fulfill
+     * the request
+     */
+    public UnsuccessfulRequest(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest, final IOException exception) {
+        super(refererUrl, crawlDepth, crawlRequest);
 
-        exception = builder.exception;
+        this.exception = exception;
     }
 
     /**
@@ -43,23 +52,4 @@ private UnsuccessfulRequest(final UnsuccessfulRequestBuilder builder) {
     public IOException getException() {
         return exception;
     }
-
-    public static final class UnsuccessfulRequestBuilder extends CallbackParameterBuilder {
-
-        private IOException exception;
-
-        public UnsuccessfulRequestBuilder(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest) {
-            super(refererUrl, crawlDepth, crawlRequest);
-        }
-
-        public UnsuccessfulRequestBuilder setException(final IOException exception) {
-            this.exception = exception;
-            return this;
-        }
-
-        @Override
-        public UnsuccessfulRequest build() {
-            return new UnsuccessfulRequest(this);
-        }
-    }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java b/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java
index cb6ae0b..61f47df 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java
@@ -30,10 +30,17 @@ public abstract class CallbackParameter {
     private final URI refererUrl;
     private final CrawlRequest crawlRequest;
 
-    protected CallbackParameter(final CallbackParameterBuilder builder) {
-        crawlDepth = builder.crawlDepth;
-        refererUrl = builder.refererUrl;
-        crawlRequest = builder.crawlRequest;
+    /**
+     * Base constructor for the callback parameters.
+     * 
+     * @param refererUrl The referer URL
+     * @param crawlDepth The current crawl depth
+     * @param crawlRequest The processed crawl request
+     */
+    protected CallbackParameter(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest) {
+        this.refererUrl = refererUrl;
+        this.crawlDepth = crawlDepth;
+        this.crawlRequest = crawlRequest;
     }
 
     /**
@@ -62,19 +69,4 @@ public final int getCrawlDepth() {
     public final CrawlRequest getCrawlRequest() {
         return crawlRequest;
     }
-
-    public static abstract class CallbackParameterBuilder {
-
-        private final URI refererUrl;
-        private final int crawlDepth;
-        private final CrawlRequest crawlRequest;
-
-        public CallbackParameterBuilder(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest) {
-            this.refererUrl = refererUrl;
-            this.crawlDepth = crawlDepth;
-            this.crawlRequest = crawlRequest;
-        }
-
-        public abstract CallbackParameter build();
-    }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlCandidate.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlCandidate.java
index b5041b9..1639570 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlCandidate.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlCandidate.java
@@ -19,6 +19,7 @@
 import com.google.common.net.InternetDomainName;
 import java.io.Serializable;
 import java.net.URI;
+import java.util.Optional;
 
 /**
  * Represents a candidate for crawling that will be surely processed by the
@@ -82,6 +83,15 @@ public int getCrawlDepth() {
     public int getPriority() {
         return crawlRequest.getPriority();
     }
+    
+    /**
+     * Returns metadata associated with the request.
+     *
+     * @return The request's metadata
+     */
+    public Optional<Serializable> getMetadata() {
+        return crawlRequest.getMetadata();
+    }
 
     /**
      * Returns the crawl request from which this candidate was constructed.

From 4f615bd9ef51d7385d542e3ef429c3c9d7a5b39a Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 27 May 2018 22:51:22 +0200
Subject: [PATCH 06/28] Add null check for metadata setter

---
 .../java/com/github/peterbencze/serritor/api/CrawlRequest.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
index 4188a54..8036ac8 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
@@ -21,6 +21,7 @@
 import java.io.Serializable;
 import java.net.URI;
 import java.util.Optional;
+import org.apache.commons.lang3.Validate;
 
 /**
  * Represents a crawl request that might be processed by the crawler in the
@@ -138,7 +139,7 @@ public CrawlRequestBuilder setPriority(final int priority) {
          * @return The <code>CrawlRequestBuilder</code> instance
          */
         public CrawlRequestBuilder setMetadata(final Serializable metadata) {
-            this.metadata = metadata;
+            this.metadata = Validate.notNull(metadata, "The metadata cannot be null.");
             return this;
         }
 

From 5b99e06dc5cf93fd5c4c9d0897a713526c3e6e4c Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Mon, 28 May 2018 23:58:52 +0200
Subject: [PATCH 07/28] Add the possibility of accessing crawl candidate in
 callback parameters

---
 .../peterbencze/serritor/api/BaseCrawler.java | 12 ++--
 .../{internal => api}/CrawlCandidate.java     | 55 ++++++++++++-------
 .../serritor/api/CrawlRequest.java            | 52 +++++++++---------
 .../serritor/api/HtmlResponse.java            | 19 +++----
 .../serritor/api/NonHtmlResponse.java         | 24 ++++----
 .../serritor/api/UnsuccessfulRequest.java     | 15 ++---
 .../serritor/internal/CallbackParameter.java  | 48 ++++------------
 .../serritor/internal/CrawlFrontier.java      |  3 +-
 .../serritor/internal/CrawlFrontierTest.java  |  1 +
 9 files changed, 103 insertions(+), 126 deletions(-)
 rename src/main/java/com/github/peterbencze/serritor/{internal => api}/CrawlCandidate.java (66%)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 2e12f06..bb9841b 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -17,7 +17,6 @@
 
 import com.github.peterbencze.serritor.api.CrawlRequest.CrawlRequestBuilder;
 import com.github.peterbencze.serritor.internal.AdaptiveCrawlDelayMechanism;
-import com.github.peterbencze.serritor.internal.CrawlCandidate;
 import com.github.peterbencze.serritor.internal.CrawlDelayMechanism;
 import com.github.peterbencze.serritor.internal.CrawlFrontier;
 import com.github.peterbencze.serritor.internal.FixedCrawlDelayMechanism;
@@ -201,9 +200,6 @@ private void run() {
         while (!stopCrawling && crawlFrontier.hasNextCandidate()) {
             CrawlCandidate currentCandidate = crawlFrontier.getNextCandidate();
             URI candidateUrl = currentCandidate.getCandidateUrl();
-            URI refererUrl = currentCandidate.getRefererUrl();
-            int crawlDepth = currentCandidate.getCrawlDepth();
-            CrawlRequest crawlRequest = currentCandidate.getCrawlRequest();
             URI responseUrl = candidateUrl;
             HttpClientContext context = HttpClientContext.create();
             HttpResponse httpHeadResponse = null;
@@ -216,7 +212,7 @@ private void run() {
                 // Send an HTTP HEAD request to the current URL to determine its availability and content type
                 httpHeadResponse = getHttpHeadResponse(candidateUrl, context);
             } catch (IOException exception) {
-                onUnsuccessfulRequest(new UnsuccessfulRequest(refererUrl, crawlDepth, crawlRequest, exception));
+                onUnsuccessfulRequest(new UnsuccessfulRequest(currentCandidate, exception));
                 isUnsuccessfulRequest = true;
             }
 
@@ -232,10 +228,10 @@ private void run() {
 
                     CrawlRequestBuilder builder = new CrawlRequestBuilder(responseUrl).setPriority(currentCandidate.getPriority());
                     currentCandidate.getMetadata().ifPresent(builder::setMetadata);
-                    
+
                     crawlFrontier.feedRequest(builder.build(), false);
                 } else if (isContentHtml(httpHeadResponse)) {
-                    HtmlResponse response = new HtmlResponse(refererUrl, crawlDepth, crawlRequest, webDriver);
+                    HtmlResponse response = new HtmlResponse(currentCandidate, webDriver);
 
                     try {
                         // Open the URL in the browser
@@ -247,7 +243,7 @@ private void run() {
                     onResponseComplete(response);
                 } else {
                     // URLs that point to non-HTML content should not be opened in the browser
-                    onNonHtmlResponse(new NonHtmlResponse(refererUrl, crawlDepth, crawlRequest));
+                    onNonHtmlResponse(new NonHtmlResponse(currentCandidate));
                 }
             }
 
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlCandidate.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
similarity index 66%
rename from src/main/java/com/github/peterbencze/serritor/internal/CrawlCandidate.java
rename to src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
index 1639570..43571c8 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlCandidate.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
@@ -13,17 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.internal;
+package com.github.peterbencze.serritor.api;
 
-import com.github.peterbencze.serritor.api.CrawlRequest;
 import com.google.common.net.InternetDomainName;
 import java.io.Serializable;
 import java.net.URI;
 import java.util.Optional;
 
 /**
- * Represents a candidate for crawling that will be surely processed by the
- * crawler.
+ * Represents a candidate to be crawled by the crawler.
  *
  * @author Peter Bencze
  */
@@ -33,14 +31,14 @@ public final class CrawlCandidate implements Serializable {
     private final int crawlDepth;
     private final CrawlRequest crawlRequest;
 
-    public CrawlCandidate(final CrawlCandidateBuilder builder) {
+    private CrawlCandidate(final CrawlCandidateBuilder builder) {
         this.crawlRequest = builder.crawlRequest;
         this.refererUrl = builder.refererUrl;
         this.crawlDepth = builder.crawlDepth;
     }
 
     /**
-     * Returns the referer's URL.
+     * Returns the referer URL.
      *
      * @return The URL of the referer
      */
@@ -49,7 +47,7 @@ public URI getRefererUrl() {
     }
 
     /**
-     * Returns the candidate's URL.
+     * Returns the candidate URL.
      *
      * @return The URL of the candidate
      */
@@ -58,7 +56,7 @@ public URI getCandidateUrl() {
     }
 
     /**
-     * Returns the domain of the candidate's URL.
+     * Returns the domain of the candidate URL.
      *
      * @return The domain of the candidate URL
      */
@@ -69,7 +67,7 @@ public InternetDomainName getDomain() {
     /**
      * Returns the crawl depth of the candidate.
      *
-     * @return The crawl depth
+     * @return The crawl depth of the candidate
      */
     public int getCrawlDepth() {
         return crawlDepth;
@@ -78,30 +76,24 @@ public int getCrawlDepth() {
     /**
      * Returns the priority of the candidate.
      *
-     * @return The priority
+     * @return The priority of the candidate
      */
     public int getPriority() {
         return crawlRequest.getPriority();
     }
-    
+
     /**
-     * Returns metadata associated with the request.
+     * Returns the metadata associated with the candidate.
      *
-     * @return The request's metadata
+     * @return The metadata associated with the candidate
      */
     public Optional<Serializable> getMetadata() {
         return crawlRequest.getMetadata();
     }
 
     /**
-     * Returns the crawl request from which this candidate was constructed.
-     *
-     * @return The <code>CrawlRequest</code> instance
+     * Builds crawl candidates to be crawled by the crawler.
      */
-    public CrawlRequest getCrawlRequest() {
-        return crawlRequest;
-    }
-
     public static final class CrawlCandidateBuilder {
 
         private final CrawlRequest crawlRequest;
@@ -109,20 +101,43 @@ public static final class CrawlCandidateBuilder {
         private URI refererUrl;
         private int crawlDepth;
 
+        /**
+         * Creates a {@link CrawlCandidateBuilder} instance.
+         *
+         * @param request The {@link CrawlRequest} instance from which this
+         * candidate is built
+         */
         public CrawlCandidateBuilder(final CrawlRequest request) {
             crawlRequest = request;
         }
 
+        /**
+         * Sets the referer URL.
+         *
+         * @param refererUrl The referer URL
+         * @return The {@link CrawlCandidateBuilder} instance
+         */
         public CrawlCandidateBuilder setRefererUrl(final URI refererUrl) {
             this.refererUrl = refererUrl;
             return this;
         }
 
+        /**
+         * Sets the crawl depth of the candidate.
+         *
+         * @param crawlDepth The crawl depth of the candidate
+         * @return The {@link CrawlCandidateBuilder} instance
+         */
         public CrawlCandidateBuilder setCrawlDepth(final int crawlDepth) {
             this.crawlDepth = crawlDepth;
             return this;
         }
 
+        /**
+         * Builds the configured {@link CrawlCandidate} instance.
+         *
+         * @return The configured {@link CrawlCandidate} instance
+         */
         public CrawlCandidate build() {
             return new CrawlCandidate(this);
         }
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
index 8036ac8..3f0f7a7 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
@@ -24,9 +24,8 @@
 import org.apache.commons.lang3.Validate;
 
 /**
- * Represents a crawl request that might be processed by the crawler in the
- * future. The reason why it is not sure that it will be processed is because it
- * might get filtered out by one of the enabled filters.
+ * Represents a crawl request that may be completed by the crawler. If request
+ * filtering is enabled, it could get filtered out.
  *
  * @author Peter Bencze
  */
@@ -35,7 +34,7 @@ public final class CrawlRequest implements Serializable {
     private final URI requestUrl;
     private final int priority;
     private final Serializable metadata;
-    
+
     private transient InternetDomainName domain;
 
     private CrawlRequest(final CrawlRequestBuilder builder) {
@@ -46,16 +45,16 @@ private CrawlRequest(final CrawlRequestBuilder builder) {
     }
 
     /**
-     * Returns the request's URL.
+     * Returns the request URL.
      *
-     * @return The URL of the request
+     * @return The request URL
      */
     public URI getRequestUrl() {
         return requestUrl;
     }
 
     /**
-     * Returns the domain of the request's URL.
+     * Returns the domain of the request URL.
      *
      * @return The domain of the request URL
      */
@@ -64,7 +63,7 @@ public InternetDomainName getDomain() {
     }
 
     /**
-     * Returns the request's priority.
+     * Returns the priority of the request.
      *
      * @return The priority of the request
      */
@@ -73,30 +72,31 @@ public int getPriority() {
     }
 
     /**
-     * Returns metadata associated with the request.
+     * Returns the metadata associated with the request.
      *
-     * @return The request's metadata
+     * @return The metadata associated with the request
      */
     public Optional<Serializable> getMetadata() {
         return Optional.ofNullable(metadata);
     }
 
+    /**
+     * Builds crawl requests which can be fed to the crawler.
+     */
     public static final class CrawlRequestBuilder {
 
         private static final int DEFAULT_PRIORITY = 0;
 
         private final URI requestUrl;
         private final InternetDomainName domain;
-        
+
         private int priority;
         private Serializable metadata;
 
         /**
-         * Constructs a <code>CrawlRequestBuilder</code> instance that can be
-         * used to create CrawRequest instances.
+         * Creates a {@link CrawlRequestBuilder} instance.
          *
-         * @param requestUrl The request's URL given as a <code>URL</code>
-         * instance
+         * @param requestUrl The request URL
          */
         public CrawlRequestBuilder(final URI requestUrl) {
             this.requestUrl = requestUrl;
@@ -109,22 +109,20 @@ public CrawlRequestBuilder(final URI requestUrl) {
         }
 
         /**
-         * Constructs a <code>CrawlRequestBuilder</code> instance that can be
-         * used to create <code>CrawRequest</code> instances.
+         * Creates a {@link CrawlRequestBuilder} instance.
          *
-         * @param requestUrl The request's URL given as a <code>String</code>
-         * instance
+         * @param requestUrl The request URL
          */
         public CrawlRequestBuilder(final String requestUrl) {
             this(URI.create(requestUrl));
         }
 
         /**
-         * Sets the request's priority.
+         * Sets the priority of the request.
          *
          * @param priority The priority of the request (higher number means
          * higher priority)
-         * @return The <code>CrawlRequestBuilder</code> instance
+         * @return The {@link CrawlRequestBuilder} instance
          */
         public CrawlRequestBuilder setPriority(final int priority) {
             this.priority = priority;
@@ -132,11 +130,11 @@ public CrawlRequestBuilder setPriority(final int priority) {
         }
 
         /**
-         * Sets additional metadata for the request which can be later accessed
-         * when the crawler processed the request.
+         * Sets the metadata of the request which can be later accessed when the
+         * crawler completed the request.
          *
          * @param metadata The metadata associated with the request
-         * @return The <code>CrawlRequestBuilder</code> instance
+         * @return The {@link CrawlRequestBuilder} instance
          */
         public CrawlRequestBuilder setMetadata(final Serializable metadata) {
             this.metadata = Validate.notNull(metadata, "The metadata cannot be null.");
@@ -144,9 +142,9 @@ public CrawlRequestBuilder setMetadata(final Serializable metadata) {
         }
 
         /**
-         * Builds the configured <code>CrawlRequest</code> instance.
+         * Builds the configured {@link CrawlRequest} instance.
          *
-         * @return The configured <code>CrawlRequest</code> instance
+         * @return The configured {@link CrawlRequest} instance
          */
         public CrawlRequest build() {
             return new CrawlRequest(this);
@@ -155,7 +153,7 @@ public CrawlRequest build() {
 
     private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {
         in.defaultReadObject();
-        
+
         domain = InternetDomainName.from(requestUrl.getHost());
     }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java b/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java
index 563fa83..20d4a6e 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java
@@ -16,7 +16,6 @@
 package com.github.peterbencze.serritor.api;
 
 import com.github.peterbencze.serritor.internal.CallbackParameter;
-import java.net.URI;
 import org.openqa.selenium.WebDriver;
 
 /**
@@ -29,23 +28,21 @@ public final class HtmlResponse extends CallbackParameter {
     private final WebDriver webDriver;
 
     /**
-     * Constructs a <code>HtmlResponse</code> instance.
-     * 
-     * @param refererUrl The referer URL
-     * @param crawlDepth The current crawl depth
-     * @param crawlRequest The processed crawl request
-     * @param webDriver The <code>WebDriver</code> instance
+     * Creates an {@link HtmlResponse} instance.
+     *
+     * @param crawlCandidate The crawled {@link CrawlCandidate} instance
+     * @param webDriver The {@link WebDriver} instance
      */
-    public HtmlResponse(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest, final WebDriver webDriver) {
-        super(refererUrl, crawlDepth, crawlRequest);
+    public HtmlResponse(final CrawlCandidate crawlCandidate, final WebDriver webDriver) {
+        super(crawlCandidate);
 
         this.webDriver = webDriver;
     }
 
     /**
-     * Returns the <code>WebDriver</code> instance for the browser.
+     * Returns the {@link WebDriver} instance of the browser.
      *
-     * @return The <code>WebDriver</code> instance
+     * @return The {@link WebDriver} instance of the browser
      */
     public WebDriver getWebDriver() {
         return webDriver;
diff --git a/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java b/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java
index 7de2862..e2cbedb 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java
@@ -18,7 +18,6 @@
 import com.github.peterbencze.serritor.internal.CallbackParameter;
 import java.io.File;
 import java.io.IOException;
-import java.net.URI;
 import org.apache.commons.io.FileUtils;
 
 /**
@@ -29,23 +28,22 @@
 public final class NonHtmlResponse extends CallbackParameter {
 
     /**
-     * Constructs a <code>NonHtmlResponse</code> instance.
-     * 
-     * @param refererUrl The referer URL
-     * @param crawlDepth The current crawl depth
-     * @param crawlRequest The processed crawl request
+     * Creates a {@link NonHtmlResponse} instance.
+     *
+     * @param crawlCandidate The crawled {@link CrawlCandidate} instance
      */
-    public NonHtmlResponse(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest) {
-        super(refererUrl, crawlDepth, crawlRequest);
+    public NonHtmlResponse(final CrawlCandidate crawlCandidate) {
+        super(crawlCandidate);
     }
-    
+
     /**
      * Downloads the file specified by the request URL.
-     * 
-     * @param destination The destination <code>File</code> instance
-     * @throws IOException If the URL cannot be opened or I/O error occurs while downloading the file
+     *
+     * @param destination The destination {@link File} instance
+     * @throws IOException If the URL cannot be opened or I/O error occurs while
+     * downloading the file
      */
     public void downloadFile(final File destination) throws IOException {
-        FileUtils.copyURLToFile(getCrawlRequest().getRequestUrl().toURL(), destination);
+        FileUtils.copyURLToFile(getCrawlCandidate().getCandidateUrl().toURL(), destination);
     }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java b/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java
index 97ca2cb..a55c970 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java
@@ -17,7 +17,6 @@
 
 import com.github.peterbencze.serritor.internal.CallbackParameter;
 import java.io.IOException;
-import java.net.URI;
 
 /**
  * Represents an unsuccessful request.
@@ -29,25 +28,23 @@ public final class UnsuccessfulRequest extends CallbackParameter {
     private final IOException exception;
 
     /**
-     * Constructs a <code>UnsuccessfulRequest</code> instance.
+     * Creates an {@link UnsuccessfulRequest} instance.
      *
-     * @param refererUrl The referer URL
-     * @param crawlDepth The current crawl depth
-     * @param crawlRequest The processed crawl request
+     * @param crawlCandidate The crawled {@link CrawlCandidate} instance
      * @param exception The exception that was thrown while trying to fulfill
      * the request
      */
-    public UnsuccessfulRequest(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest, final IOException exception) {
-        super(refererUrl, crawlDepth, crawlRequest);
+    public UnsuccessfulRequest(final CrawlCandidate crawlCandidate, final IOException exception) {
+        super(crawlCandidate);
 
         this.exception = exception;
     }
 
     /**
-     * Returns the exception that was thrown while trying to fulfill the
+     * Returns the exception which was thrown while trying to fulfill the
      * request.
      *
-     * @return The <code>IOException</code> instance
+     * @return The thrown {@link IOException} instance
      */
     public IOException getException() {
         return exception;
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java b/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java
index 61f47df..881e3d6 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java
@@ -15,58 +15,32 @@
  */
 package com.github.peterbencze.serritor.internal;
 
-import com.github.peterbencze.serritor.api.CrawlRequest;
-import java.net.URI;
-import java.util.Optional;
+import com.github.peterbencze.serritor.api.CrawlCandidate;
 
 /**
- * The base class from which all callback parameters inherit from.
+ * Base class from which all callback parameters inherit from.
  *
  * @author Peter Bencze
  */
 public abstract class CallbackParameter {
 
-    private final int crawlDepth;
-    private final URI refererUrl;
-    private final CrawlRequest crawlRequest;
+    private final CrawlCandidate crawlCandidate;
 
     /**
-     * Base constructor for the callback parameters.
-     * 
-     * @param refererUrl The referer URL
-     * @param crawlDepth The current crawl depth
-     * @param crawlRequest The processed crawl request
-     */
-    protected CallbackParameter(final URI refererUrl, final int crawlDepth, final CrawlRequest crawlRequest) {
-        this.refererUrl = refererUrl;
-        this.crawlDepth = crawlDepth;
-        this.crawlRequest = crawlRequest;
-    }
-
-    /**
-     * Returns the referer URL.
-     *
-     * @return The referer URL
-     */
-    public final Optional<URI> getRefererUrl() {
-        return Optional.ofNullable(refererUrl);
-    }
-
-    /**
-     * Returns the current crawl depth.
+     * Base constructor of callback parameters.
      *
-     * @return The current crawl depth
+     * @param crawlCandidate The crawled {@link CrawlCandidate} instance
      */
-    public final int getCrawlDepth() {
-        return crawlDepth;
+    protected CallbackParameter(final CrawlCandidate crawlCandidate) {
+        this.crawlCandidate = crawlCandidate;
     }
 
     /**
-     * Returns the crawl request that was processed by the crawler.
+     * Returns the crawl candidate which was crawled by the crawler.
      *
-     * @return The processed <code>CrawlRequest</code> instance
+     * @return The crawled {@link CrawlCandidate} instance
      */
-    public final CrawlRequest getCrawlRequest() {
-        return crawlRequest;
+    public final CrawlCandidate getCrawlCandidate() {
+        return crawlCandidate;
     }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
index bdcf569..c38cd3e 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
@@ -15,9 +15,10 @@
  */
 package com.github.peterbencze.serritor.internal;
 
+import com.github.peterbencze.serritor.api.CrawlCandidate;
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import com.github.peterbencze.serritor.api.CrawlRequest;
-import com.github.peterbencze.serritor.internal.CrawlCandidate.CrawlCandidateBuilder;
+import com.github.peterbencze.serritor.api.CrawlCandidate.CrawlCandidateBuilder;
 import java.io.Serializable;
 import java.net.URI;
 import java.util.Arrays;
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java b/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
index 79c5131..6e38a26 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
@@ -15,6 +15,7 @@
  */
 package com.github.peterbencze.serritor.internal;
 
+import com.github.peterbencze.serritor.api.CrawlCandidate;
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import com.github.peterbencze.serritor.api.CrawlRequest;
 import com.github.peterbencze.serritor.api.CrawlRequest.CrawlRequestBuilder;

From b88b15870d0dc9d54a3977c6f8a587ab486d41fe Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 3 Jun 2018 18:31:55 +0200
Subject: [PATCH 08/28] Modify event handling

---
 .../peterbencze/serritor/api/BaseCrawler.java | 76 ++++++++++++-------
 .../NonHtmlContentEvent.java}                 | 21 ++---
 .../PageLoadEvent.java}                       | 21 ++---
 .../api/event/PageLoadTimeoutEvent.java       | 52 +++++++++++++
 .../RequestErrorEvent.java}                   | 23 +++---
 .../api/event/RequestRedirectEvent.java       | 51 +++++++++++++
 .../serritor/api/helper/UrlFinder.java        | 12 +--
 ...allbackParameter.java => EventObject.java} | 14 ++--
 .../serritor/api/helper/UrlFinderTest.java    | 12 +--
 9 files changed, 202 insertions(+), 80 deletions(-)
 rename src/main/java/com/github/peterbencze/serritor/api/{NonHtmlResponse.java => event/NonHtmlContentEvent.java} (59%)
 rename src/main/java/com/github/peterbencze/serritor/api/{HtmlResponse.java => event/PageLoadEvent.java} (55%)
 create mode 100644 src/main/java/com/github/peterbencze/serritor/api/event/PageLoadTimeoutEvent.java
 rename src/main/java/com/github/peterbencze/serritor/api/{UnsuccessfulRequest.java => event/RequestErrorEvent.java} (54%)
 create mode 100644 src/main/java/com/github/peterbencze/serritor/api/event/RequestRedirectEvent.java
 rename src/main/java/com/github/peterbencze/serritor/internal/{CallbackParameter.java => EventObject.java} (67%)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index bb9841b..09c88ce 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -16,6 +16,11 @@
 package com.github.peterbencze.serritor.api;
 
 import com.github.peterbencze.serritor.api.CrawlRequest.CrawlRequestBuilder;
+import com.github.peterbencze.serritor.api.event.NonHtmlContentEvent;
+import com.github.peterbencze.serritor.api.event.PageLoadEvent;
+import com.github.peterbencze.serritor.api.event.PageLoadTimeoutEvent;
+import com.github.peterbencze.serritor.api.event.RequestErrorEvent;
+import com.github.peterbencze.serritor.api.event.RequestRedirectEvent;
 import com.github.peterbencze.serritor.internal.AdaptiveCrawlDelayMechanism;
 import com.github.peterbencze.serritor.internal.CrawlDelayMechanism;
 import com.github.peterbencze.serritor.internal.CrawlFrontier;
@@ -195,7 +200,7 @@ protected final void crawl(final List<CrawlRequest> requests) {
      * Defines the workflow of the crawler.
      */
     private void run() {
-        onBegin();
+        onStart();
 
         while (!stopCrawling && crawlFrontier.hasNextCandidate()) {
             CrawlCandidate currentCandidate = crawlFrontier.getNextCandidate();
@@ -212,7 +217,7 @@ private void run() {
                 // Send an HTTP HEAD request to the current URL to determine its availability and content type
                 httpHeadResponse = getHttpHeadResponse(candidateUrl, context);
             } catch (IOException exception) {
-                onUnsuccessfulRequest(new UnsuccessfulRequest(currentCandidate, exception));
+                onRequestError(new RequestErrorEvent(currentCandidate, exception));
                 isUnsuccessfulRequest = true;
             }
 
@@ -225,32 +230,39 @@ private void run() {
 
                 if (!responseUrl.equals(candidateUrl)) {
                     // If the request has been redirected, a new crawl request should be created for the redirected URL
-
                     CrawlRequestBuilder builder = new CrawlRequestBuilder(responseUrl).setPriority(currentCandidate.getPriority());
                     currentCandidate.getMetadata().ifPresent(builder::setMetadata);
+                    CrawlRequest redirectedRequest = builder.build();
 
-                    crawlFrontier.feedRequest(builder.build(), false);
+                    crawlFrontier.feedRequest(redirectedRequest, false);
+                    onRequestRedirect(new RequestRedirectEvent(currentCandidate, redirectedRequest));
                 } else if (isContentHtml(httpHeadResponse)) {
-                    HtmlResponse response = new HtmlResponse(currentCandidate, webDriver);
+                    boolean isTimedOut = false;
+                    TimeoutException exception = null;
 
                     try {
                         // Open the URL in the browser
                         webDriver.get(candidateUrl.toString());
-                    } catch (TimeoutException exception) {
-                        onResponseTimeout(response);
+                    } catch (TimeoutException exc) {
+                        isTimedOut = true;
+                        exception = exc;
                     }
 
-                    onResponseComplete(response);
+                    if (!isTimedOut) {
+                        onPageLoad(new PageLoadEvent(currentCandidate, webDriver));
+                    } else {
+                        onPageLoadTimeout(new PageLoadTimeoutEvent(currentCandidate, exception));
+                    }
                 } else {
                     // URLs that point to non-HTML content should not be opened in the browser
-                    onNonHtmlResponse(new NonHtmlResponse(currentCandidate));
+                    onNonHtmlContent(new NonHtmlContentEvent(currentCandidate));
                 }
             }
 
             performDelay();
         }
 
-        onFinish();
+        onStop();
     }
 
     /**
@@ -344,49 +356,55 @@ private static BasicClientCookie convertBrowserCookie(final Cookie browserCookie
     }
 
     /**
-     * Called when the crawler is about to begin its operation.
+     * Callback which gets called when the crawler is started.
+     */
+    protected void onStart() {
+    }
+
+    /**
+     * Callback which gets called when the browser loads the page.
+     *
+     * @param event The {@link PageLoadEvent} instance
      */
-    protected void onBegin() {
+    protected void onPageLoad(final PageLoadEvent event) {
     }
 
     /**
-     * Called after the browser loads the given URL.
+     * Callback which gets called when the content type is not HTML.
      *
-     * @param response The HTML response
+     * @param event The {@link NonHtmlContentEvent} instance
      */
-    protected void onResponseComplete(final HtmlResponse response) {
+    protected void onNonHtmlContent(final NonHtmlContentEvent event) {
     }
 
     /**
-     * Called when the loading of the given URL times out in the browser. Use
-     * this callback with caution: the page might be half-loaded or not loaded
-     * at all.
+     * Callback which gets called when a request error occurs.
      *
-     * @param response The HTML response
+     * @param event The {@link RequestErrorEvent} instance
      */
-    protected void onResponseTimeout(final HtmlResponse response) {
+    protected void onRequestError(final RequestErrorEvent event) {
     }
 
     /**
-     * Called when getting a non-HTML response.
+     * Callback which gets called when a request is redirected.
      *
-     * @param response The non-HTML response
+     * @param event The {@link RequestRedirectEvent} instance
      */
-    protected void onNonHtmlResponse(final NonHtmlResponse response) {
+    protected void onRequestRedirect(final RequestRedirectEvent event) {
     }
 
     /**
-     * Called when an exception occurs while sending an initial HEAD request to
-     * the given URL.
+     * Callback which gets called when the page does not load in the browser
+     * within the timeout period.
      *
-     * @param request The unsuccessful request
+     * @param event The {@link PageLoadTimeoutEvent} instance
      */
-    protected void onUnsuccessfulRequest(final UnsuccessfulRequest request) {
+    protected void onPageLoadTimeout(final PageLoadTimeoutEvent event) {
     }
 
     /**
-     * Called when the crawler successfully finishes its operation.
+     * Callback which gets called when the crawler is stopped.
      */
-    protected void onFinish() {
+    protected void onStop() {
     }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java b/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
similarity index 59%
rename from src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java
rename to src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
index e2cbedb..7fc4670 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/NonHtmlResponse.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
@@ -13,34 +13,35 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.api;
+package com.github.peterbencze.serritor.api.event;
 
-import com.github.peterbencze.serritor.internal.CallbackParameter;
+import com.github.peterbencze.serritor.api.CrawlCandidate;
+import com.github.peterbencze.serritor.internal.EventObject;
 import java.io.File;
 import java.io.IOException;
 import org.apache.commons.io.FileUtils;
 
 /**
- * Represents a non-HTML response.
+ * Event which gets delivered when the content type is not HTML.
  *
  * @author Peter Bencze
  */
-public final class NonHtmlResponse extends CallbackParameter {
+public final class NonHtmlContentEvent extends EventObject {
 
     /**
-     * Creates a {@link NonHtmlResponse} instance.
+     * Creates a {@link NonHtmlContentEvent} instance.
      *
-     * @param crawlCandidate The crawled {@link CrawlCandidate} instance
+     * @param crawlCandidate the current crawl candidate
      */
-    public NonHtmlResponse(final CrawlCandidate crawlCandidate) {
+    public NonHtmlContentEvent(final CrawlCandidate crawlCandidate) {
         super(crawlCandidate);
     }
 
     /**
-     * Downloads the file specified by the request URL.
+     * Downloads the file specified by the URL.
      *
-     * @param destination The destination {@link File} instance
-     * @throws IOException If the URL cannot be opened or I/O error occurs while
+     * @param destination the destination file
+     * @throws IOException if the URL cannot be opened or I/O error occurs while
      * downloading the file
      */
     public void downloadFile(final File destination) throws IOException {
diff --git a/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java b/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadEvent.java
similarity index 55%
rename from src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java
rename to src/main/java/com/github/peterbencze/serritor/api/event/PageLoadEvent.java
index 20d4a6e..cd3726b 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/HtmlResponse.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadEvent.java
@@ -13,36 +13,37 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.api;
+package com.github.peterbencze.serritor.api.event;
 
-import com.github.peterbencze.serritor.internal.CallbackParameter;
+import com.github.peterbencze.serritor.api.CrawlCandidate;
+import com.github.peterbencze.serritor.internal.EventObject;
 import org.openqa.selenium.WebDriver;
 
 /**
- * Represents an HTML response.
+ * Event which gets delivered when the browser loads the page.
  *
  * @author Peter Bencze
  */
-public final class HtmlResponse extends CallbackParameter {
+public final class PageLoadEvent extends EventObject {
 
     private final WebDriver webDriver;
 
     /**
-     * Creates an {@link HtmlResponse} instance.
+     * Creates a {@link PageLoadEvent} instance.
      *
-     * @param crawlCandidate The crawled {@link CrawlCandidate} instance
-     * @param webDriver The {@link WebDriver} instance
+     * @param crawlCandidate the current crawl candidate
+     * @param webDriver the <code>WebDriver</code> to control the browser
      */
-    public HtmlResponse(final CrawlCandidate crawlCandidate, final WebDriver webDriver) {
+    public PageLoadEvent(final CrawlCandidate crawlCandidate, final WebDriver webDriver) {
         super(crawlCandidate);
 
         this.webDriver = webDriver;
     }
 
     /**
-     * Returns the {@link WebDriver} instance of the browser.
+     * Returns the <code>WebDriver</code> to control the browser.
      *
-     * @return The {@link WebDriver} instance of the browser
+     * @return the <code>WebDriver</code> to control the browser
      */
     public WebDriver getWebDriver() {
         return webDriver;
diff --git a/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadTimeoutEvent.java b/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadTimeoutEvent.java
new file mode 100644
index 0000000..a2b88b8
--- /dev/null
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadTimeoutEvent.java
@@ -0,0 +1,52 @@
+/* 
+ * Copyright 2018 Peter Bencze.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.github.peterbencze.serritor.api.event;
+
+import com.github.peterbencze.serritor.api.CrawlCandidate;
+import com.github.peterbencze.serritor.internal.EventObject;
+import org.openqa.selenium.TimeoutException;
+
+/**
+ * Event which gets delivered when a page does not load in the browser within
+ * the timeout period.
+ *
+ * @author Peter Bencze
+ */
+public final class PageLoadTimeoutEvent extends EventObject {
+    
+    private final TimeoutException exception;
+    
+    /**
+     * Creates a {@link PageLoadTimeoutEvent} instance.
+     * 
+     * @param crawlCandidate the current crawl candidate
+     * @param exception the thrown exception
+     */
+    public PageLoadTimeoutEvent(final CrawlCandidate crawlCandidate, final TimeoutException exception) {
+        super(crawlCandidate);
+        
+        this.exception = exception;
+    }
+
+    /**
+     * Returns the thrown exception.
+     * 
+     * @return the thrown exception
+     */
+    public TimeoutException getException() {
+        return exception;
+    }
+}
diff --git a/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java b/src/main/java/com/github/peterbencze/serritor/api/event/RequestErrorEvent.java
similarity index 54%
rename from src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java
rename to src/main/java/com/github/peterbencze/serritor/api/event/RequestErrorEvent.java
index a55c970..305840b 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/UnsuccessfulRequest.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/RequestErrorEvent.java
@@ -13,38 +13,37 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.api;
+package com.github.peterbencze.serritor.api.event;
 
-import com.github.peterbencze.serritor.internal.CallbackParameter;
+import com.github.peterbencze.serritor.api.CrawlCandidate;
+import com.github.peterbencze.serritor.internal.EventObject;
 import java.io.IOException;
 
 /**
- * Represents an unsuccessful request.
+ * Event which gets delivered when a request error occurs.
  *
  * @author Peter Bencze
  */
-public final class UnsuccessfulRequest extends CallbackParameter {
+public final class RequestErrorEvent extends EventObject {
 
     private final IOException exception;
 
     /**
-     * Creates an {@link UnsuccessfulRequest} instance.
+     * Creates a {@link RequestErrorEvent} instance.
      *
-     * @param crawlCandidate The crawled {@link CrawlCandidate} instance
-     * @param exception The exception that was thrown while trying to fulfill
-     * the request
+     * @param crawlCandidate the current crawl candidate
+     * @param exception the thrown exception
      */
-    public UnsuccessfulRequest(final CrawlCandidate crawlCandidate, final IOException exception) {
+    public RequestErrorEvent(final CrawlCandidate crawlCandidate, final IOException exception) {
         super(crawlCandidate);
 
         this.exception = exception;
     }
 
     /**
-     * Returns the exception which was thrown while trying to fulfill the
-     * request.
+     * Returns the thrown exception.
      *
-     * @return The thrown {@link IOException} instance
+     * @return the thrown exception
      */
     public IOException getException() {
         return exception;
diff --git a/src/main/java/com/github/peterbencze/serritor/api/event/RequestRedirectEvent.java b/src/main/java/com/github/peterbencze/serritor/api/event/RequestRedirectEvent.java
new file mode 100644
index 0000000..d142d0a
--- /dev/null
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/RequestRedirectEvent.java
@@ -0,0 +1,51 @@
+/* 
+ * Copyright 2018 Peter Bencze.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.github.peterbencze.serritor.api.event;
+
+import com.github.peterbencze.serritor.api.CrawlCandidate;
+import com.github.peterbencze.serritor.api.CrawlRequest;
+import com.github.peterbencze.serritor.internal.EventObject;
+
+/**
+ * Event which gets delivered when a request is redirected.
+ *
+ * @author Peter Bencze
+ */
+public final class RequestRedirectEvent extends EventObject {
+
+    private final CrawlRequest redirectedCrawlRequest;
+
+    /**
+     * Creates a {@link RequestRedirectEvent} instance.
+     *
+     * @param crawlCandidate the current crawl candidate
+     * @param redirectedCrawlRequest the crawl request for the redirected URL
+     */
+    public RequestRedirectEvent(final CrawlCandidate crawlCandidate, final CrawlRequest redirectedCrawlRequest) {
+        super(crawlCandidate);
+
+        this.redirectedCrawlRequest = redirectedCrawlRequest;
+    }
+
+    /**
+     * Returns the crawl request for the redirected URL.
+     *
+     * @return the crawl request for the redirected URL
+     */
+    public CrawlRequest getRedirectedCrawlRequest() {
+        return redirectedCrawlRequest;
+    }
+}
diff --git a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
index 4015764..8347654 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
@@ -15,7 +15,7 @@
  */
 package com.github.peterbencze.serritor.api.helper;
 
-import com.github.peterbencze.serritor.api.HtmlResponse;
+import com.github.peterbencze.serritor.api.event.PageLoadEvent;
 import com.google.common.collect.Sets;
 import com.google.common.net.InternetDomainName;
 import java.net.URI;
@@ -54,17 +54,17 @@ private UrlFinder(final UrlFinderBuilder builder) {
     }
 
     /**
-     * Returns a list of validated URLs found in the response's HTML source.
+     * Returns a list of validated URLs found in the page's HTML source.
      *
-     * @param response The <code>HtmlResponse</code> instance
-     * @return The list of found URLs
+     * @param event the {@link PageLoadEvent} instance
+     * @return the list of found URLs in the page's HTML source
      */
-    public List<String> findUrlsInResponse(final HtmlResponse response) {
+    public List<String> findUrlsInPage(final PageLoadEvent event) {
         Set<String> foundUrls = new HashSet<>();
 
         // Find elements using the specified locating mechanisms
         Set<WebElement> extractedElements = locatingMechanisms.stream()
-                .map(response.getWebDriver()::findElements)
+                .map(event.getWebDriver()::findElements)
                 .flatMap(List::stream)
                 .collect(Collectors.toSet());
 
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java b/src/main/java/com/github/peterbencze/serritor/internal/EventObject.java
similarity index 67%
rename from src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java
rename to src/main/java/com/github/peterbencze/serritor/internal/EventObject.java
index 881e3d6..89d6c33 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CallbackParameter.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/EventObject.java
@@ -18,27 +18,27 @@
 import com.github.peterbencze.serritor.api.CrawlCandidate;
 
 /**
- * Base class from which all callback parameters inherit from.
+ * Base class from which all event objects shall be derived.
  *
  * @author Peter Bencze
  */
-public abstract class CallbackParameter {
+public abstract class EventObject {
 
     private final CrawlCandidate crawlCandidate;
 
     /**
-     * Base constructor of callback parameters.
+     * Base constructor of all event objects.
      *
-     * @param crawlCandidate The crawled {@link CrawlCandidate} instance
+     * @param crawlCandidate the current crawl candidate
      */
-    protected CallbackParameter(final CrawlCandidate crawlCandidate) {
+    protected EventObject(final CrawlCandidate crawlCandidate) {
         this.crawlCandidate = crawlCandidate;
     }
 
     /**
-     * Returns the crawl candidate which was crawled by the crawler.
+     * Returns the current crawl candidate.
      *
-     * @return The crawled {@link CrawlCandidate} instance
+     * @return the current crawl candidate
      */
     public final CrawlCandidate getCrawlCandidate() {
         return crawlCandidate;
diff --git a/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java b/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
index 86e5fa6..9412b5b 100644
--- a/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
@@ -15,7 +15,7 @@
  */
 package com.github.peterbencze.serritor.api.helper;
 
-import com.github.peterbencze.serritor.api.HtmlResponse;
+import com.github.peterbencze.serritor.api.event.PageLoadEvent;
 import com.github.peterbencze.serritor.api.helper.UrlFinder.UrlFinderBuilder;
 import java.util.Arrays;
 import java.util.List;
@@ -43,7 +43,7 @@ public final class UrlFinderTest {
     private static final String URL_WITH_INVALID_DOMAIN = "http://invalid.domain";
     
     private WebDriver mockedDriver;
-    private HtmlResponse mockedResponse;
+    private PageLoadEvent mockedEvent;
     private WebElement mockedElementWithValidUrl;
     private WebElement mockedElementWithInvalidUrlFormat;
     private WebElement mockedElementWithInvalidDomain; 
@@ -51,10 +51,10 @@ public final class UrlFinderTest {
 
     @Before
     public void initialize() {
-        mockedResponse = Mockito.mock(HtmlResponse.class);
+        mockedEvent = Mockito.mock(PageLoadEvent.class);
         
         mockedDriver = Mockito.mock(WebDriver.class);
-        Mockito.when(mockedResponse.getWebDriver())
+        Mockito.when(mockedEvent.getWebDriver())
                 .thenReturn(mockedDriver);     
         
         mockedElementWithValidUrl = Mockito.mock(WebElement.class);
@@ -77,7 +77,7 @@ public void initialize() {
     }
 
     @Test
-    public void testFindUrlsInResponse() {
-        Assert.assertEquals(Arrays.asList(VALID_URL), urlFinder.findUrlsInResponse(mockedResponse));
+    public void testFindUrlsInPage() {
+        Assert.assertEquals(Arrays.asList(VALID_URL), urlFinder.findUrlsInPage(mockedEvent));
     }
 }

From ed60d9f50f462580268d2ba55bb65ff430aec977 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 3 Jun 2018 23:40:44 +0200
Subject: [PATCH 09/28] Rewrite javadocs

---
 .../peterbencze/serritor/api/BaseCrawler.java | 97 ++++++++++---------
 .../serritor/api/CrawlCandidate.java          | 42 ++++----
 .../serritor/api/CrawlDelayStrategy.java      |  3 +-
 .../serritor/api/CrawlRequest.java            | 29 +++---
 .../serritor/api/CrawlStrategy.java           |  3 +-
 .../serritor/api/CrawlerConfiguration.java    | 88 +++++++++--------
 .../serritor/api/helper/UrlFinder.java        | 57 ++++++-----
 .../internal/AdaptiveCrawlDelayMechanism.java | 12 +--
 .../internal/CrawlDelayMechanism.java         | 12 +--
 .../serritor/internal/CrawlDomain.java        | 12 +--
 .../serritor/internal/CrawlFrontier.java      | 46 ++++-----
 .../internal/FixedCrawlDelayMechanism.java    |  8 +-
 .../internal/RandomCrawlDelayMechanism.java   |  8 +-
 .../serritor/api/helper/UrlFinderTest.java    | 26 ++---
 .../AdaptiveCrawlDelayMechanismTest.java      | 33 +++----
 .../serritor/internal/CrawlDomainTest.java    | 24 ++---
 .../serritor/internal/CrawlFrontierTest.java  |  4 +-
 .../FixedCrawlDelayMechanismTest.java         | 10 +-
 18 files changed, 262 insertions(+), 252 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 09c88ce..83b0498 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -67,6 +67,11 @@ public abstract class BaseCrawler {
     private CrawlFrontier crawlFrontier;
     private CrawlDelayMechanism crawlDelayMechanism;
 
+    /**
+     * Base constructor of all crawlers.
+     *
+     * @param config the configuration of the crawler
+     */
     protected BaseCrawler(final CrawlerConfiguration config) {
         this.config = config;
 
@@ -82,23 +87,23 @@ public final void start() {
     }
 
     /**
-     * Starts the crawler using the browser specified by the
+     * Starts the crawler using the browser specified by the given
      * <code>WebDriver</code> instance.
      *
-     * @param driver The <code>WebDriver</code> instance that will be used by
-     * the crawler
+     * @param webDriver the <code>WebDriver</code> instance to control the
+     * browser
      */
-    public final void start(final WebDriver driver) {
-        start(driver, new CrawlFrontier(config));
+    public final void start(final WebDriver webDriver) {
+        start(webDriver, new CrawlFrontier(config));
     }
 
     /**
-     * Constructs all the necessary objects and runs the crawler.
+     * Initializes and runs the crawler.
      *
-     * @param frontierToUse The <code>CrawlFrontier</code> instance to be used
-     * by the crawler.
+     * @param crawlFrontier the <code>CrawlFrontier</code> instance to be used
+     * by the crawler to manage crawl requests
      */
-    private void start(final WebDriver driver, final CrawlFrontier frontierToUse) {
+    private void start(final WebDriver webDriver, final CrawlFrontier crawlFrontier) {
         try {
             Validate.validState(isStopped, "The crawler is already started.");
 
@@ -107,8 +112,8 @@ private void start(final WebDriver driver, final CrawlFrontier frontierToUse) {
             httpClient = HttpClientBuilder.create()
                     .setDefaultCookieStore(cookieStore)
                     .build();
-            webDriver = Validate.notNull(driver, "The webdriver cannot be null.");
-            crawlFrontier = frontierToUse;
+            this.webDriver = Validate.notNull(webDriver, "The webdriver cannot be null.");
+            this.crawlFrontier = crawlFrontier;
             crawlDelayMechanism = createCrawlDelayMechanism();
 
             run();
@@ -122,9 +127,9 @@ private void start(final WebDriver driver, final CrawlFrontier frontierToUse) {
     }
 
     /**
-     * Saves the current state of the crawler to the specified output stream.
+     * Saves the current state of the crawler to the given output stream.
      *
-     * @param out The <code>OutputStream</code> instance to use
+     * @param out the output stream
      */
     public final void saveState(final OutputStream out) {
         // Check if the crawler has been started at least once, otherwise we have nothing to save
@@ -137,25 +142,25 @@ public final void saveState(final OutputStream out) {
     /**
      * Resumes a previously saved state using HtmlUnit headless browser.
      *
-     * @param in The <code>InputStream</code> instance to use
+     * @param in the input stream from which the state should be loaded
      */
     public final void resumeState(final InputStream in) {
         resumeState(new HtmlUnitDriver(true), in);
     }
 
     /**
-     * Resumes a previously saved state using the browser specified by the
-     * WebDriver instance.
+     * Resumes a previously saved state using the browser specified by the given
+     * <code>WebDriver</code> instance.
      *
-     * @param driver The <code>WebDriver</code> instance to be used by the
-     * crawler
-     * @param in The <code>InputStream</code> instance to use
+     * @param webDriver the <code>WebDriver</code> instance to control the
+     * browser
+     * @param in the input stream from which the state should be loaded
      */
-    public final void resumeState(final WebDriver driver, final InputStream in) {
+    public final void resumeState(final WebDriver webDriver, final InputStream in) {
         // Re-create crawl frontier from the saved state
-        CrawlFrontier frontierToUse = SerializationUtils.deserialize(in);
+        CrawlFrontier deserializedCrawlFrontier = SerializationUtils.deserialize(in);
 
-        start(driver, frontierToUse);
+        start(webDriver, deserializedCrawlFrontier);
     }
 
     /**
@@ -170,12 +175,10 @@ public final void stop() {
     }
 
     /**
-     * Passes a crawl request to the crawl frontier. The crawler must be
-     * running, otherwise use
-     * {@link CrawlerConfiguration.CrawlerConfigurationBuilder#addCrawlSeed(com.github.peterbencze.serritor.api.CrawlRequest)}
-     * for adding crawl seeds.
+     * Feeds a crawl request to the crawler. The crawler should be running,
+     * otherwise the request has to be added as a crawl seed instead.
      *
-     * @param request The <code>CrawlRequest</code> instance
+     * @param request the crawl request
      */
     protected final void crawl(final CrawlRequest request) {
         Validate.notNull(request, "The request cannot be null.");
@@ -185,12 +188,10 @@ protected final void crawl(final CrawlRequest request) {
     }
 
     /**
-     * Passes multiple crawl requests to the crawl frontier. The crawler must be
-     * running, otherwise use
-     * {@link CrawlerConfiguration.CrawlerConfigurationBuilder#addCrawlSeeds(java.util.List)}
-     * for adding crawl seeds.
+     * Feeds multiple crawl requests to the crawler. The crawler should be
+     * running, otherwise the requests have to be added as crawl seeds instead.
      *
-     * @param requests The list of <code>CrawlRequest</code> instances
+     * @param requests the list of crawl requests
      */
     protected final void crawl(final List<CrawlRequest> requests) {
         requests.forEach(this::crawl);
@@ -266,10 +267,12 @@ private void run() {
     }
 
     /**
-     * Returns a HTTP HEAD response for the given URL.
+     * Sends an HTTP HEAD request to the given URL and returns the response.
      *
-     * @param destinationUrl The URL to crawl
-     * @return The HTTP HEAD response
+     * @param destinationUrl the destination URL
+     * @throws IOException if an error occurs while trying to fulfill the
+     * request
+     * @return the HTTP HEAD response
      */
     private HttpResponse getHttpHeadResponse(final URI destinationUrl, final HttpClientContext context) throws IOException {
         HttpHead headRequest = new HttpHead(destinationUrl.toString());
@@ -277,10 +280,10 @@ private HttpResponse getHttpHeadResponse(final URI destinationUrl, final HttpCli
     }
 
     /**
-     * Indicates if the content of the response is HTML or not.
+     * Indicates if the response's content type is HTML.
      *
-     * @param httpHeadResponse The HTTP HEAD response
-     * @return <code>true</code> if the content is HTML, <code>false</code>
+     * @param httpHeadResponse the HTTP HEAD response
+     * @return <code>true</code> if the content type is HTML, <code>false</code>
      * otherwise
      */
     private static boolean isContentHtml(final HttpResponse httpHeadResponse) {
@@ -289,9 +292,9 @@ private static boolean isContentHtml(final HttpResponse httpHeadResponse) {
     }
 
     /**
-     * Constructs the crawl delay mechanism specified in the configuration.
+     * Creates the crawl delay mechanism according to the configuration.
      *
-     * @return The crawl delay mechanism
+     * @return the created crawl delay mechanism
      */
     private CrawlDelayMechanism createCrawlDelayMechanism() {
         switch (config.getCrawlDelayStrategy()) {
@@ -338,8 +341,8 @@ private void updateClientCookieStore() {
     /**
      * Converts a browser cookie to a HTTP client one.
      *
-     * @param browserCookie The browser cookie to be converted
-     * @return The converted HTTP client cookie
+     * @param browserCookie the browser cookie to be converted
+     * @return the converted HTTP client cookie
      */
     private static BasicClientCookie convertBrowserCookie(final Cookie browserCookie) {
         BasicClientCookie clientCookie = new BasicClientCookie(browserCookie.getName(), browserCookie.getValue());
@@ -364,7 +367,7 @@ protected void onStart() {
     /**
      * Callback which gets called when the browser loads the page.
      *
-     * @param event The {@link PageLoadEvent} instance
+     * @param event the <code>PageLoadEvent</code> instance
      */
     protected void onPageLoad(final PageLoadEvent event) {
     }
@@ -372,7 +375,7 @@ protected void onPageLoad(final PageLoadEvent event) {
     /**
      * Callback which gets called when the content type is not HTML.
      *
-     * @param event The {@link NonHtmlContentEvent} instance
+     * @param event the <code>NonHtmlContentEvent</code> instance
      */
     protected void onNonHtmlContent(final NonHtmlContentEvent event) {
     }
@@ -380,7 +383,7 @@ protected void onNonHtmlContent(final NonHtmlContentEvent event) {
     /**
      * Callback which gets called when a request error occurs.
      *
-     * @param event The {@link RequestErrorEvent} instance
+     * @param event the <code>RequestErrorEvent</code> instance
      */
     protected void onRequestError(final RequestErrorEvent event) {
     }
@@ -388,7 +391,7 @@ protected void onRequestError(final RequestErrorEvent event) {
     /**
      * Callback which gets called when a request is redirected.
      *
-     * @param event The {@link RequestRedirectEvent} instance
+     * @param event the <code>RequestRedirectEvent</code> instance
      */
     protected void onRequestRedirect(final RequestRedirectEvent event) {
     }
@@ -397,7 +400,7 @@ protected void onRequestRedirect(final RequestRedirectEvent event) {
      * Callback which gets called when the page does not load in the browser
      * within the timeout period.
      *
-     * @param event The {@link PageLoadTimeoutEvent} instance
+     * @param event the <code>PageLoadTimeoutEvent</code> instance
      */
     protected void onPageLoadTimeout(final PageLoadTimeoutEvent event) {
     }
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
index 43571c8..c219c0e 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
@@ -21,7 +21,7 @@
 import java.util.Optional;
 
 /**
- * Represents a candidate to be crawled by the crawler.
+ * Represents a candidate for crawling.
  *
  * @author Peter Bencze
  */
@@ -40,59 +40,59 @@ private CrawlCandidate(final CrawlCandidateBuilder builder) {
     /**
      * Returns the referer URL.
      *
-     * @return The URL of the referer
+     * @return the URL of the referer
      */
     public URI getRefererUrl() {
         return refererUrl;
     }
 
     /**
-     * Returns the candidate URL.
+     * Returns the request URL.
      *
-     * @return The URL of the candidate
+     * @return the URL of the request
      */
     public URI getCandidateUrl() {
         return crawlRequest.getRequestUrl();
     }
 
     /**
-     * Returns the domain of the candidate URL.
+     * Returns the domain of the request URL.
      *
-     * @return The domain of the candidate URL
+     * @return the domain of the request URL
      */
     public InternetDomainName getDomain() {
         return crawlRequest.getDomain();
     }
 
     /**
-     * Returns the crawl depth of the candidate.
+     * Returns the crawl depth of the request.
      *
-     * @return The crawl depth of the candidate
+     * @return the crawl depth of the request
      */
     public int getCrawlDepth() {
         return crawlDepth;
     }
 
     /**
-     * Returns the priority of the candidate.
+     * Returns the priority of the request.
      *
-     * @return The priority of the candidate
+     * @return the priority of the request
      */
     public int getPriority() {
         return crawlRequest.getPriority();
     }
 
     /**
-     * Returns the metadata associated with the candidate.
+     * Returns the metadata associated with the request.
      *
-     * @return The metadata associated with the candidate
+     * @return the metadata associated with the request
      */
     public Optional<Serializable> getMetadata() {
         return crawlRequest.getMetadata();
     }
 
     /**
-     * Builds crawl candidates to be crawled by the crawler.
+     * Builds {@link CrawlCandidate} instances.
      */
     public static final class CrawlCandidateBuilder {
 
@@ -104,7 +104,7 @@ public static final class CrawlCandidateBuilder {
         /**
          * Creates a {@link CrawlCandidateBuilder} instance.
          *
-         * @param request The {@link CrawlRequest} instance from which this
+         * @param request the <code>CrawlRequest</code> instance from which this
          * candidate is built
          */
         public CrawlCandidateBuilder(final CrawlRequest request) {
@@ -114,8 +114,8 @@ public CrawlCandidateBuilder(final CrawlRequest request) {
         /**
          * Sets the referer URL.
          *
-         * @param refererUrl The referer URL
-         * @return The {@link CrawlCandidateBuilder} instance
+         * @param refererUrl the referer URL
+         * @return the <code>CrawlCandidateBuilder</code> instance
          */
         public CrawlCandidateBuilder setRefererUrl(final URI refererUrl) {
             this.refererUrl = refererUrl;
@@ -123,10 +123,10 @@ public CrawlCandidateBuilder setRefererUrl(final URI refererUrl) {
         }
 
         /**
-         * Sets the crawl depth of the candidate.
+         * Sets the crawl depth of the request.
          *
-         * @param crawlDepth The crawl depth of the candidate
-         * @return The {@link CrawlCandidateBuilder} instance
+         * @param crawlDepth the crawl depth of the request
+         * @return the <code>CrawlCandidateBuilder</code> instance
          */
         public CrawlCandidateBuilder setCrawlDepth(final int crawlDepth) {
             this.crawlDepth = crawlDepth;
@@ -134,9 +134,9 @@ public CrawlCandidateBuilder setCrawlDepth(final int crawlDepth) {
         }
 
         /**
-         * Builds the configured {@link CrawlCandidate} instance.
+         * Builds the configured <code>CrawlCandidate</code> instance.
          *
-         * @return The configured {@link CrawlCandidate} instance
+         * @return the configured <code>CrawlCandidate</code> instance
          */
         public CrawlCandidate build() {
             return new CrawlCandidate(this);
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlDelayStrategy.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlDelayStrategy.java
index 0c10e7b..a7bc47b 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlDelayStrategy.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlDelayStrategy.java
@@ -16,7 +16,8 @@
 package com.github.peterbencze.serritor.api;
 
 /**
- * Available crawl delay strategies that can be used by the crawler.
+ * Available crawl delay strategies which define how the delay between each
+ * request is determined.
  * 
  * @author Peter Bencze
  */
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
index 3f0f7a7..08038f2 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
@@ -47,7 +47,7 @@ private CrawlRequest(final CrawlRequestBuilder builder) {
     /**
      * Returns the request URL.
      *
-     * @return The request URL
+     * @return the request URL
      */
     public URI getRequestUrl() {
         return requestUrl;
@@ -56,7 +56,7 @@ public URI getRequestUrl() {
     /**
      * Returns the domain of the request URL.
      *
-     * @return The domain of the request URL
+     * @return the domain of the request URL
      */
     public InternetDomainName getDomain() {
         return domain;
@@ -65,7 +65,7 @@ public InternetDomainName getDomain() {
     /**
      * Returns the priority of the request.
      *
-     * @return The priority of the request
+     * @return the priority of the request
      */
     public int getPriority() {
         return priority;
@@ -74,14 +74,14 @@ public int getPriority() {
     /**
      * Returns the metadata associated with the request.
      *
-     * @return The metadata associated with the request
+     * @return the metadata associated with the request
      */
     public Optional<Serializable> getMetadata() {
         return Optional.ofNullable(metadata);
     }
 
     /**
-     * Builds crawl requests which can be fed to the crawler.
+     * Builds {@link CrawlRequest} instances.
      */
     public static final class CrawlRequestBuilder {
 
@@ -96,7 +96,7 @@ public static final class CrawlRequestBuilder {
         /**
          * Creates a {@link CrawlRequestBuilder} instance.
          *
-         * @param requestUrl The request URL
+         * @param requestUrl the request URL
          */
         public CrawlRequestBuilder(final URI requestUrl) {
             this.requestUrl = requestUrl;
@@ -111,7 +111,7 @@ public CrawlRequestBuilder(final URI requestUrl) {
         /**
          * Creates a {@link CrawlRequestBuilder} instance.
          *
-         * @param requestUrl The request URL
+         * @param requestUrl the request URL
          */
         public CrawlRequestBuilder(final String requestUrl) {
             this(URI.create(requestUrl));
@@ -120,9 +120,9 @@ public CrawlRequestBuilder(final String requestUrl) {
         /**
          * Sets the priority of the request.
          *
-         * @param priority The priority of the request (higher number means
+         * @param priority the priority of the request (higher number means
          * higher priority)
-         * @return The {@link CrawlRequestBuilder} instance
+         * @return the <code>CrawlRequestBuilder</code> instance
          */
         public CrawlRequestBuilder setPriority(final int priority) {
             this.priority = priority;
@@ -130,11 +130,10 @@ public CrawlRequestBuilder setPriority(final int priority) {
         }
 
         /**
-         * Sets the metadata of the request which can be later accessed when the
-         * crawler completed the request.
+         * Sets the metadata associated with the request.
          *
-         * @param metadata The metadata associated with the request
-         * @return The {@link CrawlRequestBuilder} instance
+         * @param metadata the metadata associated with the request
+         * @return the <code>CrawlRequestBuilder</code> instance
          */
         public CrawlRequestBuilder setMetadata(final Serializable metadata) {
             this.metadata = Validate.notNull(metadata, "The metadata cannot be null.");
@@ -142,9 +141,9 @@ public CrawlRequestBuilder setMetadata(final Serializable metadata) {
         }
 
         /**
-         * Builds the configured {@link CrawlRequest} instance.
+         * Builds the configured <code>CrawlRequest</code> instance.
          *
-         * @return The configured {@link CrawlRequest} instance
+         * @return the configured <code>CrawlRequest</code> instance
          */
         public CrawlRequest build() {
             return new CrawlRequest(this);
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlStrategy.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlStrategy.java
index c88435b..c505932 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlStrategy.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlStrategy.java
@@ -16,7 +16,8 @@
 package com.github.peterbencze.serritor.api;
 
 /**
- * Available strategies that can be used while crawling.
+ * Available crawl strategies that define the order in which crawl requests are
+ * processed.
  *
  * @author Peter Bencze
  */
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlerConfiguration.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlerConfiguration.java
index 8cdaa71..bcc2d25 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlerConfiguration.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlerConfiguration.java
@@ -25,7 +25,7 @@
 import org.apache.commons.lang3.Validate;
 
 /**
- * This class contains the settings of the crawler.
+ * Contains the settings of the crawler.
  *
  * @author Peter Bencze
  */
@@ -58,7 +58,7 @@ private CrawlerConfiguration(final CrawlerConfigurationBuilder builder) {
     /**
      * Returns the set of allowed crawl domains.
      *
-     * @return The set of allowed crawl domains
+     * @return the set of allowed crawl domains
      */
     public Set<CrawlDomain> getAllowedCrawlDomains() {
         return allowedCrawlDomains;
@@ -67,7 +67,7 @@ public Set<CrawlDomain> getAllowedCrawlDomains() {
     /**
      * Returns the set of crawl seeds.
      *
-     * @return The set of crawl seeds
+     * @return the set of crawl seeds
      */
     public Set<CrawlRequest> getCrawlSeeds() {
         return crawlSeeds;
@@ -76,14 +76,14 @@ public Set<CrawlRequest> getCrawlSeeds() {
     /**
      * Returns the crawl strategy of the crawler.
      *
-     * @return The crawl strategy
+     * @return the crawl strategy of the crawler
      */
     public CrawlStrategy getCrawlStrategy() {
         return crawlStrategy;
     }
 
     /**
-     * Indicates if duplicate request filtering is enabled or not.
+     * Indicates if duplicate request filtering is enabled.
      *
      * @return <code>true</code> if enabled, <code>false</code> otherwise
      */
@@ -92,7 +92,7 @@ public boolean isDuplicateRequestFilteringEnabled() {
     }
 
     /**
-     * Indicates if offsite request filtering is enabled or not.
+     * Indicates if offsite request filtering is enabled.
      *
      * @return <code>true</code> if enabled, <code>false</code> otherwise
      */
@@ -101,18 +101,18 @@ public boolean isOffsiteRequestFilteringEnabled() {
     }
 
     /**
-     * Returns the maximum possible crawl depth.
+     * Returns the maximum crawl depth.
      *
-     * @return The maximum crawl depth
+     * @return the maximum crawl depth
      */
     public int getMaximumCrawlDepth() {
         return maxCrawlDepth;
     }
 
     /**
-     * Returns the crawl delay strategy used by the crawler.
+     * Returns the crawl delay strategy of the crawler.
      *
-     * @return The crawl delay strategy
+     * @return the crawl delay strategy of the crawler
      */
     public CrawlDelayStrategy getCrawlDelayStrategy() {
         return crawlDelayStrategy;
@@ -121,7 +121,7 @@ public CrawlDelayStrategy getCrawlDelayStrategy() {
     /**
      * Returns the exact duration of delay between each request.
      *
-     * @return The duration of delay in milliseconds
+     * @return the duration of delay in milliseconds
      */
     public long getFixedCrawlDelayDurationInMillis() {
         return fixedCrawlDelayDurationInMillis;
@@ -130,7 +130,7 @@ public long getFixedCrawlDelayDurationInMillis() {
     /**
      * Returns the minimum duration of delay between each request.
      *
-     * @return The minimum duration of delay in milliseconds
+     * @return the minimum duration of delay in milliseconds
      */
     public long getMinimumCrawlDelayDurationInMillis() {
         return minCrawlDelayDurationInMillis;
@@ -139,12 +139,15 @@ public long getMinimumCrawlDelayDurationInMillis() {
     /**
      * Returns the maximum duration of delay between each request.
      *
-     * @return The maximum duration of delay in milliseconds
+     * @return the maximum duration of delay in milliseconds
      */
     public long getMaximumCrawlDelayDurationInMillis() {
         return maxCrawlDelayDurationInMillis;
     }
 
+    /**
+     * Builds {@link CrawlerConfiguration} instances.
+     */
     public static final class CrawlerConfigurationBuilder {
 
         private static final CrawlStrategy DEFAULT_CRAWL_STRATEGY = CrawlStrategy.BREADTH_FIRST;
@@ -168,6 +171,9 @@ public static final class CrawlerConfigurationBuilder {
         private long minCrawlDelayDurationInMillis;
         private long maxCrawlDelayDurationInMillis;
 
+        /**
+         * Creates a {@link CrawlerConfigurationBuilder} instance.
+         */
         public CrawlerConfigurationBuilder() {
             // Initialize with default values
             allowedCrawlDomains = new HashSet<>();
@@ -185,8 +191,8 @@ public CrawlerConfigurationBuilder() {
         /**
          * Appends an internet domain to the list of allowed crawl domains.
          *
-         * @param allowedCrawlDomain A well-formed internet domain name
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param allowedCrawlDomain a well-formed internet domain name
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder addAllowedCrawlDomain(final String allowedCrawlDomain) {
             InternetDomainName domain = InternetDomainName.from(allowedCrawlDomain);
@@ -201,9 +207,9 @@ public CrawlerConfigurationBuilder addAllowedCrawlDomain(final String allowedCra
          * Appends a list of internet domains to the list of allowed crawl
          * domains.
          *
-         * @param allowedCrawlDomains A list of well-formed internet domain
+         * @param allowedCrawlDomains a list of well-formed internet domain
          * names
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder addAllowedCrawlDomains(final List<String> allowedCrawlDomains) {
             allowedCrawlDomains.forEach(this::addAllowedCrawlDomain);
@@ -213,9 +219,8 @@ public CrawlerConfigurationBuilder addAllowedCrawlDomains(final List<String> all
         /**
          * Appends a crawl request to the set of crawl seeds.
          *
-         * @param request The <code>CrawlRequest</code> instance which
-         * represents the crawl seed
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param request the crawl request which represents a crawl seed
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder addCrawlSeed(final CrawlRequest request) {
             Validate.notNull(request, "The request cannot be null.");
@@ -227,9 +232,9 @@ public CrawlerConfigurationBuilder addCrawlSeed(final CrawlRequest request) {
         /**
          * Appends a list of crawl requests to the set of crawl seeds.
          *
-         * @param requests The list of <code>CrawlRequest</code> instances which
-         * represent the crawl seeds
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param requests the list of crawl requests which represent crawl
+         * seeds
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder addCrawlSeeds(final List<CrawlRequest> requests) {
             requests.forEach(this::addCrawlSeed);
@@ -241,8 +246,8 @@ public CrawlerConfigurationBuilder addCrawlSeeds(final List<CrawlRequest> reques
          * strategy orders crawl requests by the lowest crawl depth, whereas
          * depth-first orders them by the highest crawl depth.
          *
-         * @param strategy The crawl strategy
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param strategy the crawl strategy
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setCrawlStrategy(final CrawlStrategy strategy) {
             Validate.notNull(strategy, "The strategy cannot be null.");
@@ -256,7 +261,7 @@ public CrawlerConfigurationBuilder setCrawlStrategy(final CrawlStrategy strategy
          *
          * @param filterDuplicateRequests <code>true</code> means enabled,
          * <code>false</code> means disabled
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setDuplicateRequestFiltering(final boolean filterDuplicateRequests) {
             this.filterDuplicateRequests = filterDuplicateRequests;
@@ -268,7 +273,7 @@ public CrawlerConfigurationBuilder setDuplicateRequestFiltering(final boolean fi
          *
          * @param filterOffsiteRequests <code>true</code> means enabled,
          * <code>false</code> means disabled
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setOffsiteRequestFiltering(final boolean filterOffsiteRequests) {
             this.filterOffsiteRequests = filterOffsiteRequests;
@@ -276,11 +281,11 @@ public CrawlerConfigurationBuilder setOffsiteRequestFiltering(final boolean filt
         }
 
         /**
-         * Sets the maximum possible crawl depth. It should be a non-negative
-         * number where 0 means there is no limit.
+         * Sets the maximum crawl depth. It should be a non-negative
+         * number (0 means no limit).
          *
-         * @param maxCrawlDepth The maximum crawl depth
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param maxCrawlDepth the maximum crawl depth
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setMaximumCrawlDepth(final int maxCrawlDepth) {
             Validate.isTrue(maxCrawlDepth >= 0, "The maximum crawl depth cannot be negative.");
@@ -290,10 +295,11 @@ public CrawlerConfigurationBuilder setMaximumCrawlDepth(final int maxCrawlDepth)
         }
 
         /**
-         * Sets the crawl delay strategy to be used by the crawler.
+         * Sets the crawl delay strategy to be used by the crawler. This
+         * strategy defines how the delay between each request is determined.
          *
-         * @param strategy The crawl delay strategy
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param strategy the crawl delay strategy
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setCrawlDelayStrategy(final CrawlDelayStrategy strategy) {
             Validate.notNull(strategy, "The strategy cannot be null.");
@@ -305,8 +311,8 @@ public CrawlerConfigurationBuilder setCrawlDelayStrategy(final CrawlDelayStrateg
         /**
          * Sets the exact duration of delay between each request.
          *
-         * @param fixedCrawlDelayDuration The duration of delay
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param fixedCrawlDelayDuration the duration of delay
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setFixedCrawlDelayDuration(final Duration fixedCrawlDelayDuration) {
             Validate.notNull(fixedCrawlDelayDuration, "The duration cannot be null.");
@@ -318,8 +324,8 @@ public CrawlerConfigurationBuilder setFixedCrawlDelayDuration(final Duration fix
         /**
          * Sets the minimum duration of delay between each request.
          *
-         * @param minCrawlDelayDuration The minimum duration of delay
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param minCrawlDelayDuration the minimum duration of delay
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setMinimumCrawlDelayDuration(final Duration minCrawlDelayDuration) {
             Validate.notNull(minCrawlDelayDuration, "The duration cannot be null.");
@@ -336,8 +342,8 @@ public CrawlerConfigurationBuilder setMinimumCrawlDelayDuration(final Duration m
         /**
          * Sets the maximum duration of delay between each request.
          *
-         * @param maxCrawlDelayDuration The maximum duration of delay
-         * @return The <code>CrawlerConfigurationBuilder</code> instance
+         * @param maxCrawlDelayDuration the maximum duration of delay
+         * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setMaximumCrawlDelayDuration(final Duration maxCrawlDelayDuration) {
             Validate.notNull(maxCrawlDelayDuration, "The duration cannot be null.");
@@ -353,7 +359,7 @@ public CrawlerConfigurationBuilder setMaximumCrawlDelayDuration(final Duration m
         /**
          * Builds the configured <code>CrawlerConfiguration</code> instance.
          *
-         * @return The configured <code>CrawlerConfiguration</code> instance
+         * @return the configured <code>CrawlerConfiguration</code> instance
          */
         public CrawlerConfiguration build() {
             return new CrawlerConfiguration(this);
diff --git a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
index 8347654..33946e2 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
@@ -34,8 +34,7 @@
 import org.openqa.selenium.WebElement;
 
 /**
- * A helper class which can be used to find URLs in HTML sources using regular
- * expressions.
+ * Finds URLs in HTML page sources using regular expressions.
  *
  * @author Peter Bencze
  */
@@ -56,8 +55,8 @@ private UrlFinder(final UrlFinderBuilder builder) {
     /**
      * Returns a list of validated URLs found in the page's HTML source.
      *
-     * @param event the {@link PageLoadEvent} instance
-     * @return the list of found URLs in the page's HTML source
+     * @param event the <code>PageLoadEvent</code> instance
+     * @return the list of found URLs
      */
     public List<String> findUrlsInPage(final PageLoadEvent event) {
         Set<String> foundUrls = new HashSet<>();
@@ -85,8 +84,8 @@ public List<String> findUrlsInPage(final PageLoadEvent event) {
     /**
      * Returns a list of validated URLs found in the attribute's value.
      *
-     * @param attributeValue The value of the attribute
-     * @return The list of found URLs
+     * @param attributeValue the value of the attribute
+     * @return the list of found URLs
      */
     private List<String> findUrlsInAttributeValue(final String attributeValue) {
         List<String> foundUrls = new ArrayList<>();
@@ -106,8 +105,11 @@ private List<String> findUrlsInAttributeValue(final String attributeValue) {
         return foundUrls;
     }
 
+    /**
+     * Builds {@link UrlFinder} instances.
+     */
     public static final class UrlFinderBuilder {
-        
+
         private static final Set<By> DEFAULT_LOCATING_MECHANISMS = Sets.newHashSet(By.tagName("a"));
         private static final Set<String> DEFAULT_ATTRIBUTES = Sets.newHashSet("href");
         private static final Predicate<String> DEFAULT_VALIDATOR = UrlFinderBuilder::isValidUrl;
@@ -119,21 +121,18 @@ public static final class UrlFinderBuilder {
         private Predicate<String> validator;
 
         /**
-         * Constructs a <code>UrlFinderBuilder</code> instance that can be used
-         * to create <code>UrlFinder</code> instances.
+         * Creates a {@link UrlFinderBuilder} instance.
          *
-         * @param urlPattern The pattern which will be used to find URLs
+         * @param urlPattern the pattern to use to find URLs
          */
         public UrlFinderBuilder(final Pattern urlPattern) {
             this(Arrays.asList(urlPattern));
         }
 
         /**
-         * Constructs a <code>UrlFinderBuilder</code> instance that can be used
-         * to create <code>UrlFinder</code> instances. It
+         * Creates a {@link UrlFinderBuilder} instance.
          *
-         * @param urlPatterns The list of patterns which will be used to find
-         * URLs
+         * @param urlPatterns the list of patterns to use to find URLs
          */
         public UrlFinderBuilder(final List<Pattern> urlPatterns) {
             Validate.noNullElements(urlPatterns, "URL patterns cannot be null.");
@@ -148,9 +147,9 @@ public UrlFinderBuilder(final List<Pattern> urlPatterns) {
          * Sets the locating mechanism used by the finder. Only elements matched
          * by the locator will be considered when searching for URLs.
          *
-         * @param locatingMechanism The <code>By</code> locating mechanism
+         * @param locatingMechanism the <code>By</code> locating mechanism
          * instance
-         * @return The <code>UrlFinderBuilder</code> instance
+         * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setLocatingMechanism(final By locatingMechanism) {
             return setLocatingMechanisms(Arrays.asList(locatingMechanism));
@@ -160,9 +159,9 @@ public UrlFinderBuilder setLocatingMechanism(final By locatingMechanism) {
          * Sets the locating mechanisms used by the finder. Only elements
          * matched by the locators will be considered when searching for URLs.
          *
-         * @param locatingMechanisms The list of <code>By</code> locating
+         * @param locatingMechanisms the list of <code>By</code> locating
          * mechanism instances
-         * @return The <code>UrlFinderBuilder</code> instance
+         * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setLocatingMechanisms(final List<By> locatingMechanisms) {
             Validate.noNullElements(locatingMechanisms, "Locating mechanisms cannot be null.");
@@ -172,10 +171,10 @@ public UrlFinderBuilder setLocatingMechanisms(final List<By> locatingMechanisms)
         }
 
         /**
-         * Sets which attributes to search for URLs.
+         * Sets the list of attribute names to search for URLs.
          *
-         * @param attributes The list of attribute names
-         * @return The <code>UrlFinderBuilder</code> instance
+         * @param attributes the list of attribute names
+         * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setAttributes(final List<String> attributes) {
             Validate.noNullElements(attributes, "Attributes cannot be null.");
@@ -185,10 +184,10 @@ public UrlFinderBuilder setAttributes(final List<String> attributes) {
         }
 
         /**
-         * Sets which attribute to search for URLs.
+         * Sets the attribute name to search for URLs.
          *
-         * @param attribute The name of the attribute
-         * @return The <code>UrlFinderBuilder</code> instance
+         * @param attribute the attribute name
+         * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setAttribute(final String attribute) {
             return setAttributes(Arrays.asList(attribute));
@@ -197,8 +196,8 @@ public UrlFinderBuilder setAttribute(final String attribute) {
         /**
          * Sets a predicate to be used for validating found URLs.
          *
-         * @param validator The validator predicate
-         * @return The <code>UrlFinderBuilder</code> instance
+         * @param validator the validator predicate
+         * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setValidator(final Predicate<String> validator) {
             Validate.notNull(validator, "The validator function cannot be null.");
@@ -208,9 +207,9 @@ public UrlFinderBuilder setValidator(final Predicate<String> validator) {
         }
 
         /**
-         * Builds the configured URL finder.
+         * Builds the configured <code>UrlFinder</code> instance.
          *
-         * @return The configured <code>UrlFinder</code> instance
+         * @return the configured <code>UrlFinder</code> instance
          */
         public UrlFinder build() {
             return new UrlFinder(this);
@@ -219,7 +218,7 @@ public UrlFinder build() {
         /**
          * The default URL validator function.
          *
-         * @param url The URL to be validated
+         * @param url the URL to validate
          * @return <code>true</code> if the URL is valid, <code>false</code>
          * otherwise
          */
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java
index dfedfdb..06b0df1 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java
@@ -32,12 +32,12 @@ public final class AdaptiveCrawlDelayMechanism implements CrawlDelayMechanism {
     private final JavascriptExecutor jsExecutor;
 
     /**
-     * Constructs a new <code>AdaptiveCrawlDelayMechanism</code> instance.
+     * Creates an {@link AdaptiveCrawlDelayMechanism} instance.
      *
-     * @param config The <code>CrawlerConfiguration</code> instance which
-     * specifies the minimum and maximum delay.
-     * @param jsExecutor The <code>WebDriver</code> instance which is capable of
-     * executing JavaScript.
+     * @param config the crawler configuration which specifies the minimum and
+     * maximum delay
+     * @param jsExecutor the {@link WebDriver} instance which is capable of
+     * executing JavaScript
      */
     public AdaptiveCrawlDelayMechanism(final CrawlerConfiguration config, final JavascriptExecutor jsExecutor) {
         minDelayInMillis = config.getMinimumCrawlDelayDurationInMillis();
@@ -61,7 +61,7 @@ public boolean isBrowserCompatible() {
      * than the minimum, it returns the minimum delay. If the calculated delay
      * is higher than the maximum, it returns the maximum delay.
      *
-     * @return The delay in milliseconds
+     * @return the delay in milliseconds
      */
     @Override
     public long getDelay() {
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlDelayMechanism.java
index 34317b1..cbb4634 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlDelayMechanism.java
@@ -16,16 +16,16 @@
 package com.github.peterbencze.serritor.internal;
 
 /**
- * An interface that every crawl delay mechanism should implement.
- * 
+ * An interface which should be implemented by every crawl delay mechanism.
+ *
  * @author Peter Bencze
  */
 public interface CrawlDelayMechanism {
-    
+
     /**
-     * Returns the delay that should pass between each request.
-     * 
-     * @return The duration of delay in milliseconds
+     * Returns the delay which should pass between each request.
+     *
+     * @return the duration of delay in milliseconds
      */
     long getDelay();
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlDomain.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlDomain.java
index 89bba42..7fb007e 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlDomain.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlDomain.java
@@ -29,19 +29,19 @@ public final class CrawlDomain implements Serializable {
     private final ImmutableList<String> parts;
 
     /**
-     * Constructs a new <code>CrawlDomain</code> instance.
+     * Creates a <code>CrawlDomain</code> instance.
      *
-     * @param domain An immutable well-formed internet domain name
+     * @param domain an immutable well-formed internet domain name
      */
     public CrawlDomain(final InternetDomainName domain) {
         parts = domain.parts();
     }
 
     /**
-     * Indicates if two <code>CrawlDomain</code> instances are equal or not.
+     * Indicates if two <code>CrawlDomain</code> instances are equal.
      * Crawl domains with the same domain name are considered equal.
      *
-     * @param obj A <code>CrawlDomain</code> instance
+     * @param obj a <code>CrawlDomain</code> instance
      * @return <code>true</code> if equal, <code>false</code> otherwise
      */
     @Override
@@ -62,7 +62,7 @@ public boolean equals(final Object obj) {
      * Calculates the hash code from the individual components of the domain
      * name.
      *
-     * @return The hash code for the crawl domain
+     * @return the hash code for the crawl domain
      */
     @Override
     public int hashCode() {
@@ -72,7 +72,7 @@ public int hashCode() {
     /**
      * Indicates if this crawl domain contains the specific internet domain.
      *
-     * @param domain An immutable well-formed internet domain name
+     * @param domain an immutable well-formed internet domain name
      * @return <code>true</code> if belongs, <code>false</code> otherwise
      */
     public boolean contains(final InternetDomainName domain) {
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
index c38cd3e..af7c839 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
@@ -16,9 +16,9 @@
 package com.github.peterbencze.serritor.internal;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
-import com.github.peterbencze.serritor.api.CrawlerConfiguration;
-import com.github.peterbencze.serritor.api.CrawlRequest;
 import com.github.peterbencze.serritor.api.CrawlCandidate.CrawlCandidateBuilder;
+import com.github.peterbencze.serritor.api.CrawlRequest;
+import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import java.io.Serializable;
 import java.net.URI;
 import java.util.Arrays;
@@ -32,8 +32,7 @@
 import org.apache.commons.codec.digest.DigestUtils;
 
 /**
- * Provides an interface for the crawler to manage crawl requests while
- * crawling.
+ * Manages crawl requests and provides crawl candidates to the crawler.
  *
  * @author Peter Bencze
  */
@@ -48,11 +47,16 @@ public final class CrawlFrontier implements Serializable {
 
     private CrawlCandidate currentCandidate;
 
+    /**
+     * Creates a {@link CrawlFrontier} instance.
+     *
+     * @param config the crawler configuration
+     */
     public CrawlFrontier(final CrawlerConfiguration config) {
         this.config = config;
 
         allowedCrawlDomains = config.getAllowedCrawlDomains();
-        
+
         urlFingerprints = new HashSet<>();
 
         // Construct a priority queue according to the crawl strategy specified in the configuration
@@ -68,23 +72,22 @@ public CrawlFrontier(final CrawlerConfiguration config) {
     /**
      * Feeds a crawl request to the frontier.
      *
-     * @param request The <code>CrawlRequest</code> instance to be fed
-     * @param isCrawlSeed <code>true</code> if the request is a crawl seed,
-     * <code>false</code> otherwise
+     * @param request the crawl request
+     * @param isCrawlSeed indicates if the request is a crawl seed
      */
     public void feedRequest(final CrawlRequest request, final boolean isCrawlSeed) {
         if (config.isOffsiteRequestFilteringEnabled()) {
             // Check if the request's domain is in the allowed crawl domains
-            
+
             boolean inCrawlDomain = false;
-            
+
             for (CrawlDomain allowedCrawlDomain : allowedCrawlDomains) {
                 if (allowedCrawlDomain.contains(request.getDomain())) {
                     inCrawlDomain = true;
                     break;
                 }
             }
-            
+
             if (!inCrawlDomain) {
                 return;
             }
@@ -92,10 +95,9 @@ public void feedRequest(final CrawlRequest request, final boolean isCrawlSeed) {
 
         if (config.isDuplicateRequestFilteringEnabled()) {
             // Check if the URL has already been crawled
-            
+
             String urlFingerprint = createFingerprintForUrl(request.getRequestUrl());
 
-            
             if (urlFingerprints.contains(urlFingerprint)) {
                 return;
             }
@@ -135,9 +137,9 @@ public boolean hasNextCandidate() {
     }
 
     /**
-     * Gets the next candidate from the queue.
+     * Returns the next crawl candidate from the queue.
      *
-     * @return The next <code>CrawlCandidate</code> instance
+     * @return the next crawl candidate from the queue
      */
     public CrawlCandidate getNextCandidate() {
         currentCandidate = candidates.poll();
@@ -147,11 +149,11 @@ public CrawlCandidate getNextCandidate() {
     /**
      * Creates the fingerprint of the given URL.
      *
-     * @param url The URL that the fingerprint will be created for
-     * @return The fingerprint of the URL
+     * @param url the URL for which the fingerprint is created
+     * @return the fingerprint of the URL
      */
     private static String createFingerprintForUrl(final URI url) {
-        // First, we start off with the host only
+        // We start off with the host only
         StringBuilder truncatedUrl = new StringBuilder(url.getHost());
 
         // If there is a path in the URL, we append it after the host
@@ -174,15 +176,15 @@ private static String createFingerprintForUrl(final URI url) {
                     .forEachOrdered(truncatedUrl::append);
         }
 
-        // Finally, create the SHA-256 hash
         return DigestUtils.sha256Hex(truncatedUrl.toString());
     }
 
     /**
-     * Creates a new priority queue using the specified strategy.
+     * Creates a priority queue using the strategy specified in the
+     * configuration.
      *
-     * @return The <code>PriorityQueue</code> instance for crawl requests using
-     * the given comparator
+     * @return the priority queue using the strategy specified in the
+     * configuration
      */
     private PriorityQueue<CrawlCandidate> createPriorityQueue() {
         switch (config.getCrawlStrategy()) {
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanism.java
index a3f84c8..2dcfa95 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanism.java
@@ -28,10 +28,10 @@ public final class FixedCrawlDelayMechanism implements CrawlDelayMechanism {
     private final long delayInMillis;
 
     /**
-     * Constructs a new <code>FixedCrawlDelayMechanism</code> instance.
+     * Creates a {@link FixedCrawlDelayMechanism} instance.
      *
-     * @param config The <code>CrawlerConfiguration</code> instance which
-     * specifies the fixed delay duration.
+     * @param config the crawler configuration which specifies the fixed delay
+     * duration
      */
     public FixedCrawlDelayMechanism(final CrawlerConfiguration config) {
         this.delayInMillis = config.getFixedCrawlDelayDurationInMillis();
@@ -40,7 +40,7 @@ public FixedCrawlDelayMechanism(final CrawlerConfiguration config) {
     /**
      * Returns the fixed delay specified in the configuration.
      *
-     * @return The delay in milliseconds
+     * @return the delay in milliseconds
      */
     @Override
     public long getDelay() {
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/RandomCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/RandomCrawlDelayMechanism.java
index f8a7446..6353884 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/RandomCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/RandomCrawlDelayMechanism.java
@@ -30,10 +30,10 @@ public final class RandomCrawlDelayMechanism implements CrawlDelayMechanism {
     private final long upperLimit;
 
     /**
-     * Constructs a new <code>RandomCrawlDelayMechanism</code> instance.
+     * Creates a {@link RandomCrawlDelayMechanism} instance.
      *
-     * @param config The <code>CrawlerConfiguration</code> instance which
-     * specifies the minimum and maximum delay.
+     * @param config the crawler configuration which specifies the minimum and
+     * maximum delay.
      */
     public RandomCrawlDelayMechanism(final CrawlerConfiguration config) {
         lowerLimit = config.getMinimumCrawlDelayDurationInMillis();
@@ -44,7 +44,7 @@ public RandomCrawlDelayMechanism(final CrawlerConfiguration config) {
      * Returns a random delay between the minimum and maximum range specified in
      * the configuration.
      *
-     * @return The delay in milliseconds
+     * @return the delay in milliseconds
      */
     @Override
     public long getDelay() {
diff --git a/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java b/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
index 9412b5b..1758b17 100644
--- a/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
@@ -29,50 +29,50 @@
 import org.openqa.selenium.WebElement;
 
 /**
- * Test cases for <code>UrlFinder</code>.
+ * Test cases for {@link UrlFinder}.
  *
  * @author Peter Bencze
  */
 public final class UrlFinderTest {
-    
+
     private static final Pattern URL_PATTERN = Pattern.compile(".+valid-url.+");
     private static final String ATTRIBUTE = "href";
     private static final String TAG_NAME = "a";
     private static final String VALID_URL = "http://valid-url.com";
     private static final String INVALID_URL = "invalid-url";
     private static final String URL_WITH_INVALID_DOMAIN = "http://invalid.domain";
-    
+
     private WebDriver mockedDriver;
     private PageLoadEvent mockedEvent;
     private WebElement mockedElementWithValidUrl;
     private WebElement mockedElementWithInvalidUrlFormat;
-    private WebElement mockedElementWithInvalidDomain; 
+    private WebElement mockedElementWithInvalidDomain;
     private UrlFinder urlFinder;
 
     @Before
     public void initialize() {
         mockedEvent = Mockito.mock(PageLoadEvent.class);
-        
+
         mockedDriver = Mockito.mock(WebDriver.class);
         Mockito.when(mockedEvent.getWebDriver())
-                .thenReturn(mockedDriver);     
-        
+                .thenReturn(mockedDriver);
+
         mockedElementWithValidUrl = Mockito.mock(WebElement.class);
         Mockito.when(mockedElementWithValidUrl.getAttribute(Mockito.eq(ATTRIBUTE)))
-                .thenReturn(VALID_URL); 
-        
+                .thenReturn(VALID_URL);
+
         mockedElementWithInvalidUrlFormat = Mockito.mock(WebElement.class);
         Mockito.when(mockedElementWithInvalidUrlFormat.getAttribute(Mockito.eq(ATTRIBUTE)))
-                .thenReturn(INVALID_URL); 
-        
+                .thenReturn(INVALID_URL);
+
         mockedElementWithInvalidDomain = Mockito.mock(WebElement.class);
         Mockito.when(mockedElementWithInvalidDomain.getAttribute(Mockito.eq(ATTRIBUTE)))
                 .thenReturn(URL_WITH_INVALID_DOMAIN);
 
-        List<WebElement> elementList = Arrays.asList(mockedElementWithValidUrl, mockedElementWithInvalidUrlFormat, mockedElementWithInvalidDomain);         
+        List<WebElement> elementList = Arrays.asList(mockedElementWithValidUrl, mockedElementWithInvalidUrlFormat, mockedElementWithInvalidDomain);
         Mockito.when(mockedDriver.findElements(By.tagName(TAG_NAME)))
                 .thenReturn(elementList);
-        
+
         urlFinder = new UrlFinderBuilder(URL_PATTERN).build();
     }
 
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanismTest.java b/src/test/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanismTest.java
index 60d5b3e..aef99c0 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanismTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanismTest.java
@@ -15,7 +15,6 @@
  */
 package com.github.peterbencze.serritor.internal;
 
-
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import java.time.Duration;
 import org.junit.Assert;
@@ -25,61 +24,61 @@
 import org.openqa.selenium.JavascriptExecutor;
 
 /**
- * Test cases for <code>AdaptiveCrawlDelayMechanism</code>.
- * 
+ * Test cases for {@link AdaptiveCrawlDelayMechanism}.
+ *
  * @author Peter Bencze
  */
 public final class AdaptiveCrawlDelayMechanismTest {
-    
+
     private static final long LOWER_DELAY_DURATION_IN_MILLIS = Duration.ZERO.toMillis();
-    private static final long MINIMUM_DELAY_DURATION_IN_MILLIS = Duration.ofSeconds(1).toMillis(); 
+    private static final long MINIMUM_DELAY_DURATION_IN_MILLIS = Duration.ofSeconds(1).toMillis();
     private static final long IN_RANGE_DELAY_DURATION_IN_MILLIS = Duration.ofSeconds(2).toMillis();
     private static final long MAXIMUM_DELAY_DURATION_IN_MILLIS = Duration.ofSeconds(3).toMillis();
     private static final long HIGHER_DELAY_DURATION_IN_MILLIS = Duration.ofSeconds(4).toMillis();
-    
+
     private CrawlerConfiguration mockedConfig;
-    private JavascriptExecutor mockedJsExecutor;  
+    private JavascriptExecutor mockedJsExecutor;
     private AdaptiveCrawlDelayMechanism crawlDelayMechanism;
-    
+
     @Before
     public void initialize() {
         mockedConfig = Mockito.mock(CrawlerConfiguration.class);
         Mockito.when(mockedConfig.getMinimumCrawlDelayDurationInMillis())
-                .thenReturn(MINIMUM_DELAY_DURATION_IN_MILLIS);  
+                .thenReturn(MINIMUM_DELAY_DURATION_IN_MILLIS);
         Mockito.when(mockedConfig.getMaximumCrawlDelayDurationInMillis())
                 .thenReturn(MAXIMUM_DELAY_DURATION_IN_MILLIS);
-        
+
         mockedJsExecutor = Mockito.mock(JavascriptExecutor.class);
-        
+
         crawlDelayMechanism = new AdaptiveCrawlDelayMechanism(mockedConfig, mockedJsExecutor);
     }
-    
+
     @Test
     public void testDelayLowerThanMinimum() {
         // Return a delay which is lower than the predefined minimum
         Mockito.when(mockedJsExecutor.executeScript(Mockito.anyString()))
                 .thenReturn(LOWER_DELAY_DURATION_IN_MILLIS);
-        
+
         // The minimum delay should be returned
         Assert.assertEquals(mockedConfig.getMinimumCrawlDelayDurationInMillis(), crawlDelayMechanism.getDelay());
     }
-    
+
     @Test
     public void testDelayHigherThanMaximum() {
         // Return a delay which is higher than the predefined maximum
         Mockito.when(mockedJsExecutor.executeScript(Mockito.anyString()))
                 .thenReturn(HIGHER_DELAY_DURATION_IN_MILLIS);
-        
+
         // The maximum delay should be returned
         Assert.assertEquals(mockedConfig.getMaximumCrawlDelayDurationInMillis(), crawlDelayMechanism.getDelay());
     }
-    
+
     @Test
     public void testDelayBetweenRange() {
         // Return an in range delay
         Mockito.when(mockedJsExecutor.executeScript(Mockito.anyString()))
                 .thenReturn(IN_RANGE_DELAY_DURATION_IN_MILLIS);
-        
+
         // The in range delay should be returned
         Assert.assertEquals(IN_RANGE_DELAY_DURATION_IN_MILLIS, crawlDelayMechanism.getDelay());
     }
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/CrawlDomainTest.java b/src/test/java/com/github/peterbencze/serritor/internal/CrawlDomainTest.java
index 8226d10..81d390d 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/CrawlDomainTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/CrawlDomainTest.java
@@ -20,46 +20,46 @@
 import org.junit.Test;
 
 /**
- * Test cases for <code>CrawlDomain</code>.
- * 
+ * Test cases for {@link CrawlDomain}.
+ *
  * @author Peter Bencze
  */
 public final class CrawlDomainTest {
-    
+
     private static final InternetDomainName DOMAIN = InternetDomainName.from("test.com");
     private static final InternetDomainName SUBDOMAIN = InternetDomainName.from("sub.test.com");
-    
+
     private static final int DOMAIN_PARTS_HASHCODE = DOMAIN.parts().hashCode();
-    
+
     private static final CrawlDomain CRAWL_DOMAIN_0 = new CrawlDomain(DOMAIN);
     private static final CrawlDomain CRAWL_DOMAIN_1 = new CrawlDomain(DOMAIN);
     private static final CrawlDomain CRAWL_DOMAIN_2 = new CrawlDomain(SUBDOMAIN);
-    
+
     @Test
     public void testEquals() {
         // A crawl domain should be equal with itself
         Assert.assertEquals(CRAWL_DOMAIN_0, CRAWL_DOMAIN_0);
-        
+
         // Crawl domains with the same domain should be equal
         Assert.assertEquals(CRAWL_DOMAIN_0, CRAWL_DOMAIN_1);
-        
+
         // Crawl domains with different domains should not be equal
         Assert.assertNotEquals(CRAWL_DOMAIN_0, CRAWL_DOMAIN_2);
     }
-    
+
     @Test
     public void testHashCode() {
         Assert.assertEquals(DOMAIN_PARTS_HASHCODE, CRAWL_DOMAIN_0.hashCode());
     }
-    
+
     @Test
     public void testContains() {
         // A crawl domain should contain its own domain
         Assert.assertTrue(CRAWL_DOMAIN_0.contains(DOMAIN));
-        
+
         // A crawl domain should contain its own domain's subdomain
         Assert.assertTrue(CRAWL_DOMAIN_0.contains(SUBDOMAIN));
-        
+
         // A crawl domain should not contain a domain different from its own domain
         Assert.assertFalse(CRAWL_DOMAIN_2.contains(DOMAIN));
     }
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java b/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
index 6e38a26..0413ffa 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
@@ -16,10 +16,10 @@
 package com.github.peterbencze.serritor.internal;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
-import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import com.github.peterbencze.serritor.api.CrawlRequest;
 import com.github.peterbencze.serritor.api.CrawlRequest.CrawlRequestBuilder;
 import com.github.peterbencze.serritor.api.CrawlStrategy;
+import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import com.github.peterbencze.serritor.api.CrawlerConfiguration.CrawlerConfigurationBuilder;
 import java.net.URI;
 import java.util.Arrays;
@@ -31,7 +31,7 @@
 import org.mockito.Mockito;
 
 /**
- * Test cases for <code>CrawlFrontier</code>.
+ * Test cases for {@link CrawlFrontier}.
  *
  * @author Peter Bencze
  */
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanismTest.java b/src/test/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanismTest.java
index b2955bc..6ac7ed4 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanismTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanismTest.java
@@ -23,21 +23,21 @@
 import org.mockito.Mockito;
 
 /**
- * Test cases for <code>FixedCrawlDelayMechanism</code>.
- * 
+ * Test cases for {@link FixedCrawlDelayMechanism}.
+ *
  * @author Peter Bencze
  */
 public class FixedCrawlDelayMechanismTest {
-    
+
     private CrawlerConfiguration config;
     private FixedCrawlDelayMechanism crawlDelayMechanism;
-    
+
     @Before
     public void initialize() {
         config = Mockito.spy(new CrawlerConfigurationBuilder().build());
         crawlDelayMechanism = new FixedCrawlDelayMechanism(config);
     }
-    
+
     @Test
     public void testGetDelay() {
         // The delay should be the same as in the configuration

From 024d6c0570d158dbceebf8c3086f0b682da8966e Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 3 Jun 2018 23:43:46 +0200
Subject: [PATCH 10/28] Fix javadoc link

---
 .../serritor/internal/AdaptiveCrawlDelayMechanism.java        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java
index 06b0df1..1c95712 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java
@@ -36,8 +36,8 @@ public final class AdaptiveCrawlDelayMechanism implements CrawlDelayMechanism {
      *
      * @param config the crawler configuration which specifies the minimum and
      * maximum delay
-     * @param jsExecutor the {@link WebDriver} instance which is capable of
-     * executing JavaScript
+     * @param jsExecutor the {@link org.openqa.selenium.WebDriver} instance
+     * which is capable of executing JavaScript
      */
     public AdaptiveCrawlDelayMechanism(final CrawlerConfiguration config, final JavascriptExecutor jsExecutor) {
         minDelayInMillis = config.getMinimumCrawlDelayDurationInMillis();

From d658886dc114bf618d402750431cac554ef26263 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sun, 3 Jun 2018 23:56:56 +0200
Subject: [PATCH 11/28] Refactor crawl delay mechanisms to a separate package

---
 .../com/github/peterbencze/serritor/api/BaseCrawler.java  | 8 ++++----
 .../AdaptiveCrawlDelayMechanism.java                      | 2 +-
 .../{ => crawldelaymechanism}/CrawlDelayMechanism.java    | 2 +-
 .../FixedCrawlDelayMechanism.java                         | 2 +-
 .../RandomCrawlDelayMechanism.java                        | 2 +-
 .../AdaptiveCrawlDelayMechanismTest.java                  | 2 +-
 .../FixedCrawlDelayMechanismTest.java                     | 2 +-
 7 files changed, 10 insertions(+), 10 deletions(-)
 rename src/main/java/com/github/peterbencze/serritor/internal/{ => crawldelaymechanism}/AdaptiveCrawlDelayMechanism.java (97%)
 rename src/main/java/com/github/peterbencze/serritor/internal/{ => crawldelaymechanism}/CrawlDelayMechanism.java (92%)
 rename src/main/java/com/github/peterbencze/serritor/internal/{ => crawldelaymechanism}/FixedCrawlDelayMechanism.java (95%)
 rename src/main/java/com/github/peterbencze/serritor/internal/{ => crawldelaymechanism}/RandomCrawlDelayMechanism.java (96%)
 rename src/test/java/com/github/peterbencze/serritor/internal/{ => crawldelaymechanism}/AdaptiveCrawlDelayMechanismTest.java (97%)
 rename src/test/java/com/github/peterbencze/serritor/internal/{ => crawldelaymechanism}/FixedCrawlDelayMechanismTest.java (95%)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 83b0498..008f7d8 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -21,11 +21,11 @@
 import com.github.peterbencze.serritor.api.event.PageLoadTimeoutEvent;
 import com.github.peterbencze.serritor.api.event.RequestErrorEvent;
 import com.github.peterbencze.serritor.api.event.RequestRedirectEvent;
-import com.github.peterbencze.serritor.internal.AdaptiveCrawlDelayMechanism;
-import com.github.peterbencze.serritor.internal.CrawlDelayMechanism;
+import com.github.peterbencze.serritor.internal.crawldelaymechanism.AdaptiveCrawlDelayMechanism;
+import com.github.peterbencze.serritor.internal.crawldelaymechanism.CrawlDelayMechanism;
 import com.github.peterbencze.serritor.internal.CrawlFrontier;
-import com.github.peterbencze.serritor.internal.FixedCrawlDelayMechanism;
-import com.github.peterbencze.serritor.internal.RandomCrawlDelayMechanism;
+import com.github.peterbencze.serritor.internal.crawldelaymechanism.FixedCrawlDelayMechanism;
+import com.github.peterbencze.serritor.internal.crawldelaymechanism.RandomCrawlDelayMechanism;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanism.java
similarity index 97%
rename from src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java
rename to src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanism.java
index 1c95712..6d3926d 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanism.java
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.internal;
+package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import org.openqa.selenium.JavascriptExecutor;
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/CrawlDelayMechanism.java
similarity index 92%
rename from src/main/java/com/github/peterbencze/serritor/internal/CrawlDelayMechanism.java
rename to src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/CrawlDelayMechanism.java
index cbb4634..d788ece 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/CrawlDelayMechanism.java
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.internal;
+package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 /**
  * An interface which should be implemented by every crawl delay mechanism.
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanism.java
similarity index 95%
rename from src/main/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanism.java
rename to src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanism.java
index 2dcfa95..f287e8a 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanism.java
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.internal;
+package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/RandomCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/RandomCrawlDelayMechanism.java
similarity index 96%
rename from src/main/java/com/github/peterbencze/serritor/internal/RandomCrawlDelayMechanism.java
rename to src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/RandomCrawlDelayMechanism.java
index 6353884..cd2b035 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/RandomCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/RandomCrawlDelayMechanism.java
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.internal;
+package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import java.util.concurrent.ThreadLocalRandom;
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanismTest.java b/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanismTest.java
similarity index 97%
rename from src/test/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanismTest.java
rename to src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanismTest.java
index aef99c0..98340d3 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/AdaptiveCrawlDelayMechanismTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanismTest.java
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.internal;
+package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import java.time.Duration;
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanismTest.java b/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanismTest.java
similarity index 95%
rename from src/test/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanismTest.java
rename to src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanismTest.java
index 6ac7ed4..d0a96ce 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/FixedCrawlDelayMechanismTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanismTest.java
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package com.github.peterbencze.serritor.internal;
+package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import com.github.peterbencze.serritor.api.CrawlerConfiguration.CrawlerConfigurationBuilder;

From a76d4d4947850f7cfd810a92624769c2242331ba Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Mon, 4 Jun 2018 00:53:13 +0200
Subject: [PATCH 12/28] Add handling for JavaScript redirects

---
 .../peterbencze/serritor/api/BaseCrawler.java | 105 ++++++++++--------
 1 file changed, 61 insertions(+), 44 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 008f7d8..3be6c69 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -21,9 +21,9 @@
 import com.github.peterbencze.serritor.api.event.PageLoadTimeoutEvent;
 import com.github.peterbencze.serritor.api.event.RequestErrorEvent;
 import com.github.peterbencze.serritor.api.event.RequestRedirectEvent;
+import com.github.peterbencze.serritor.internal.CrawlFrontier;
 import com.github.peterbencze.serritor.internal.crawldelaymechanism.AdaptiveCrawlDelayMechanism;
 import com.github.peterbencze.serritor.internal.crawldelaymechanism.CrawlDelayMechanism;
-import com.github.peterbencze.serritor.internal.CrawlFrontier;
 import com.github.peterbencze.serritor.internal.crawldelaymechanism.FixedCrawlDelayMechanism;
 import com.github.peterbencze.serritor.internal.crawldelaymechanism.RandomCrawlDelayMechanism;
 import java.io.IOException;
@@ -205,8 +205,7 @@ private void run() {
 
         while (!stopCrawling && crawlFrontier.hasNextCandidate()) {
             CrawlCandidate currentCandidate = crawlFrontier.getNextCandidate();
-            URI candidateUrl = currentCandidate.getCandidateUrl();
-            URI responseUrl = candidateUrl;
+            String candidateUrl = currentCandidate.getCandidateUrl().toString();
             HttpClientContext context = HttpClientContext.create();
             HttpResponse httpHeadResponse = null;
             boolean isUnsuccessfulRequest = false;
@@ -223,32 +222,34 @@ private void run() {
             }
 
             if (!isUnsuccessfulRequest) {
+                String responseUrl = candidateUrl;
                 List<URI> redirectLocations = context.getRedirectLocations();
                 if (redirectLocations != null) {
-                    // If the request has been redirected, get the final URL
-                    responseUrl = redirectLocations.get(redirectLocations.size() - 1);
+                    // If the request was redirected, get the final URL
+                    responseUrl = redirectLocations.get(redirectLocations.size() - 1).toString();
                 }
 
                 if (!responseUrl.equals(candidateUrl)) {
-                    // If the request has been redirected, a new crawl request should be created for the redirected URL
-                    CrawlRequestBuilder builder = new CrawlRequestBuilder(responseUrl).setPriority(currentCandidate.getPriority());
-                    currentCandidate.getMetadata().ifPresent(builder::setMetadata);
-                    CrawlRequest redirectedRequest = builder.build();
-
-                    crawlFrontier.feedRequest(redirectedRequest, false);
-                    onRequestRedirect(new RequestRedirectEvent(currentCandidate, redirectedRequest));
+                    // If the request was redirected, a new crawl request should be created for the redirected URL
+                    handleRequestRedirect(currentCandidate, responseUrl);
                 } else if (isContentHtml(httpHeadResponse)) {
                     boolean isTimedOut = false;
                     TimeoutException exception = null;
 
                     try {
                         // Open the URL in the browser
-                        webDriver.get(candidateUrl.toString());
+                        webDriver.get(candidateUrl);
                     } catch (TimeoutException exc) {
                         isTimedOut = true;
                         exception = exc;
                     }
 
+                    String loadedPageUrl = webDriver.getCurrentUrl();
+                    if (!loadedPageUrl.equals(candidateUrl)) {
+                        // If the request was redirected (using JavaScript), a new crawl request should be created for the redirected URL
+                        handleRequestRedirect(currentCandidate, loadedPageUrl);
+                    }
+
                     if (!isTimedOut) {
                         onPageLoad(new PageLoadEvent(currentCandidate, webDriver));
                     } else {
@@ -266,6 +267,29 @@ private void run() {
         onStop();
     }
 
+    /**
+     * Creates the crawl delay mechanism according to the configuration.
+     *
+     * @return the created crawl delay mechanism
+     */
+    private CrawlDelayMechanism createCrawlDelayMechanism() {
+        switch (config.getCrawlDelayStrategy()) {
+            case FIXED:
+                return new FixedCrawlDelayMechanism(config);
+            case RANDOM:
+                return new RandomCrawlDelayMechanism(config);
+            case ADAPTIVE:
+                AdaptiveCrawlDelayMechanism adaptiveCrawlDelay = new AdaptiveCrawlDelayMechanism(config, (JavascriptExecutor) webDriver);
+                if (!adaptiveCrawlDelay.isBrowserCompatible()) {
+                    throw new UnsupportedOperationException("The Navigation Timing API is not supported by the browser.");
+                }
+
+                return adaptiveCrawlDelay;
+        }
+
+        throw new IllegalArgumentException("Unsupported crawl delay strategy.");
+    }
+
     /**
      * Sends an HTTP HEAD request to the given URL and returns the response.
      *
@@ -274,8 +298,8 @@ private void run() {
      * request
      * @return the HTTP HEAD response
      */
-    private HttpResponse getHttpHeadResponse(final URI destinationUrl, final HttpClientContext context) throws IOException {
-        HttpHead headRequest = new HttpHead(destinationUrl.toString());
+    private HttpResponse getHttpHeadResponse(final String destinationUrl, final HttpClientContext context) throws IOException {
+        HttpHead headRequest = new HttpHead(destinationUrl);
         return httpClient.execute(headRequest, context);
     }
 
@@ -292,38 +316,19 @@ private static boolean isContentHtml(final HttpResponse httpHeadResponse) {
     }
 
     /**
-     * Creates the crawl delay mechanism according to the configuration.
+     * Creates a crawl request for the redirected URL, feeds it to the crawler
+     * and calls the appropriate event callback.
      *
-     * @return the created crawl delay mechanism
+     * @param currentCrawlCandidate the current crawl candidate
+     * @param redirectedUrl the URL of the redirected request
      */
-    private CrawlDelayMechanism createCrawlDelayMechanism() {
-        switch (config.getCrawlDelayStrategy()) {
-            case FIXED:
-                return new FixedCrawlDelayMechanism(config);
-            case RANDOM:
-                return new RandomCrawlDelayMechanism(config);
-            case ADAPTIVE:
-                AdaptiveCrawlDelayMechanism adaptiveCrawlDelay = new AdaptiveCrawlDelayMechanism(config, (JavascriptExecutor) webDriver);
-                if (!adaptiveCrawlDelay.isBrowserCompatible()) {
-                    throw new UnsupportedOperationException("The Navigation Timing API is not supported by the browser.");
-                }
-
-                return adaptiveCrawlDelay;
-        }
+    private void handleRequestRedirect(final CrawlCandidate currentCrawlCandidate, final String redirectedUrl) {
+        CrawlRequestBuilder builder = new CrawlRequestBuilder(redirectedUrl).setPriority(currentCrawlCandidate.getPriority());
+        currentCrawlCandidate.getMetadata().ifPresent(builder::setMetadata);
+        CrawlRequest redirectedRequest = builder.build();
 
-        throw new IllegalArgumentException("Unsupported crawl delay strategy.");
-    }
-
-    /**
-     * Delays the next request.
-     */
-    private void performDelay() {
-        try {
-            TimeUnit.MILLISECONDS.sleep(crawlDelayMechanism.getDelay());
-        } catch (InterruptedException ex) {
-            Thread.currentThread().interrupt();
-            stopCrawling = true;
-        }
+        crawlFrontier.feedRequest(redirectedRequest, false);
+        onRequestRedirect(new RequestRedirectEvent(currentCrawlCandidate, redirectedRequest));
     }
 
     /**
@@ -358,6 +363,18 @@ private static BasicClientCookie convertBrowserCookie(final Cookie browserCookie
         return clientCookie;
     }
 
+    /**
+     * Delays the next request.
+     */
+    private void performDelay() {
+        try {
+            TimeUnit.MILLISECONDS.sleep(crawlDelayMechanism.getDelay());
+        } catch (InterruptedException ex) {
+            Thread.currentThread().interrupt();
+            stopCrawling = true;
+        }
+    }
+
     /**
      * Callback which gets called when the crawler is started.
      */

From 9a3c1020bc4fd41ee870c0cb51675dc7d45cfa22 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Mon, 4 Jun 2018 23:51:24 +0200
Subject: [PATCH 13/28] Call page load timeout event callback in the catch
 clause

---
 .../peterbencze/serritor/api/BaseCrawler.java  | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 3be6c69..da2be12 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -233,27 +233,19 @@ private void run() {
                     // If the request was redirected, a new crawl request should be created for the redirected URL
                     handleRequestRedirect(currentCandidate, responseUrl);
                 } else if (isContentHtml(httpHeadResponse)) {
-                    boolean isTimedOut = false;
-                    TimeoutException exception = null;
-
                     try {
-                        // Open the URL in the browser
+                        // Open URL in browser
                         webDriver.get(candidateUrl);
-                    } catch (TimeoutException exc) {
-                        isTimedOut = true;
-                        exception = exc;
+                    } catch (TimeoutException exception) {
+                        onPageLoadTimeout(new PageLoadTimeoutEvent(currentCandidate, exception));
                     }
-
+                    
                     String loadedPageUrl = webDriver.getCurrentUrl();
                     if (!loadedPageUrl.equals(candidateUrl)) {
                         // If the request was redirected (using JavaScript), a new crawl request should be created for the redirected URL
                         handleRequestRedirect(currentCandidate, loadedPageUrl);
-                    }
-
-                    if (!isTimedOut) {
-                        onPageLoad(new PageLoadEvent(currentCandidate, webDriver));
                     } else {
-                        onPageLoadTimeout(new PageLoadTimeoutEvent(currentCandidate, exception));
+                        onPageLoad(new PageLoadEvent(currentCandidate, webDriver));
                     }
                 } else {
                     // URLs that point to non-HTML content should not be opened in the browser

From c7887a3c19e7f47526a4e91082a82f00d0a907b9 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Tue, 5 Jun 2018 23:13:46 +0200
Subject: [PATCH 14/28] Fix NPE when the host is undefined

---
 .../com/github/peterbencze/serritor/api/helper/UrlFinder.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
index 33946e2..eaafc4a 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
@@ -225,7 +225,7 @@ public UrlFinder build() {
         private static boolean isValidUrl(final String url) {
             try {
                 return InternetDomainName.isValid(URI.create(url).getHost());
-            } catch (IllegalArgumentException e) {
+            } catch (IllegalArgumentException | NullPointerException exc) {
                 return false;
             }
         }

From f878f8f8aa89460473493d4da1d2b3cd223309aa Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Tue, 5 Jun 2018 23:14:50 +0200
Subject: [PATCH 15/28] Update dependency and plugin versions

---
 pom.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index 85de756..6501efe 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>com.github.peterbencze</groupId>
     <artifactId>serritor</artifactId>
-    <version>1.3.2</version>
+    <version>1.4.0</version>
     <packaging>jar</packaging>
     
     <name>Serritor</name>
@@ -59,12 +59,12 @@
         <dependency>
             <groupId>org.seleniumhq.selenium</groupId>
             <artifactId>htmlunit-driver</artifactId>
-            <version>2.30.0</version>
+            <version>2.31.0</version>
         </dependency>
         <dependency>
             <groupId>com.google.guava</groupId>
             <artifactId>guava</artifactId>
-            <version>25.0-jre</version>
+            <version>25.1-jre</version>
         </dependency>
         <dependency>
             <groupId>junit</groupId>
@@ -98,7 +98,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-javadoc-plugin</artifactId>
-                <version>2.10.4</version>
+                <version>3.0.1</version>
                 <executions>
                     <execution>
                         <id>attach-javadoc</id>

From cfc83fd1ab0ed037ccf69898f57e7e7766c3220c Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Tue, 5 Jun 2018 23:30:54 +0200
Subject: [PATCH 16/28] Rename getCandidateUrl to getRequestUrl

---
 .../peterbencze/serritor/api/BaseCrawler.java |  2 +-
 .../serritor/api/CrawlCandidate.java          |  2 +-
 .../api/event/NonHtmlContentEvent.java        |  2 +-
 .../serritor/internal/CrawlFrontier.java      |  2 +-
 .../serritor/internal/CrawlFrontierTest.java  | 24 +++++++++----------
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index da2be12..971b196 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -205,7 +205,7 @@ private void run() {
 
         while (!stopCrawling && crawlFrontier.hasNextCandidate()) {
             CrawlCandidate currentCandidate = crawlFrontier.getNextCandidate();
-            String candidateUrl = currentCandidate.getCandidateUrl().toString();
+            String candidateUrl = currentCandidate.getRequestUrl().toString();
             HttpClientContext context = HttpClientContext.create();
             HttpResponse httpHeadResponse = null;
             boolean isUnsuccessfulRequest = false;
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
index c219c0e..48ed647 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
@@ -51,7 +51,7 @@ public URI getRefererUrl() {
      *
      * @return the URL of the request
      */
-    public URI getCandidateUrl() {
+    public URI getRequestUrl() {
         return crawlRequest.getRequestUrl();
     }
 
diff --git a/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java b/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
index 7fc4670..3fe89e6 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
@@ -45,6 +45,6 @@ public NonHtmlContentEvent(final CrawlCandidate crawlCandidate) {
      * downloading the file
      */
     public void downloadFile(final File destination) throws IOException {
-        FileUtils.copyURLToFile(getCrawlCandidate().getCandidateUrl().toURL(), destination);
+        FileUtils.copyURLToFile(getCrawlCandidate().getRequestUrl().toURL(), destination);
     }
 }
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
index af7c839..0c47bb8 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
@@ -116,7 +116,7 @@ public void feedRequest(final CrawlRequest request, final boolean isCrawlSeed) {
                 return;
             }
 
-            builder = new CrawlCandidateBuilder(request).setRefererUrl(currentCandidate.getCandidateUrl())
+            builder = new CrawlCandidateBuilder(request).setRefererUrl(currentCandidate.getRequestUrl())
                     .setCrawlDepth(nextCrawlDepth);
         } else {
             builder = new CrawlCandidateBuilder(request);
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java b/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
index 0413ffa..a477d51 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
@@ -182,7 +182,7 @@ public void testDisabledDuplicateRequestFiltering() {
         Assert.assertTrue(frontier.hasNextCandidate());
 
         // Check if the URLs match
-        Assert.assertEquals(DUPLICATE_ROOT_URL_0, frontier.getNextCandidate().getCandidateUrl());
+        Assert.assertEquals(DUPLICATE_ROOT_URL_0, frontier.getNextCandidate().getRequestUrl());
     }
 
     @Test
@@ -212,7 +212,7 @@ public void testDisabledOffsiteRequestFiltering() {
         Assert.assertTrue(frontier.hasNextCandidate());
 
         // Check if the URLs match
-        Assert.assertEquals(OFFSITE_URL.toString(), frontier.getNextCandidate().getCandidateUrl().toString());
+        Assert.assertEquals(OFFSITE_URL.toString(), frontier.getNextCandidate().getRequestUrl().toString());
     }
 
     @Test
@@ -221,7 +221,7 @@ public void testGetNextCandidateUsingBreadthFirstCrawlStrategy() {
         CrawlCandidate nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be root URL 1.
-        Assert.assertEquals(ROOT_URL_1, nextCandidate.getCandidateUrl());
+        Assert.assertEquals(ROOT_URL_1, nextCandidate.getRequestUrl());
 
         // Check the crawl depth of this candidate, it should be 0 because it is a root URL.
         Assert.assertEquals(ROOT_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -236,7 +236,7 @@ public void testGetNextCandidateUsingBreadthFirstCrawlStrategy() {
         nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be root URL 0.
-        Assert.assertEquals(ROOT_URL_0, nextCandidate.getCandidateUrl());
+        Assert.assertEquals(ROOT_URL_0, nextCandidate.getRequestUrl());
 
         // Check the crawl depth of this candidate, it should be 0 again because it is also a root URL.
         Assert.assertEquals(ROOT_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -252,7 +252,7 @@ public void testGetNextCandidateUsingBreadthFirstCrawlStrategy() {
         nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be child URL 2.
-        Assert.assertEquals(CHILD_URL_2.toString(), nextCandidate.getCandidateUrl().toString());
+        Assert.assertEquals(CHILD_URL_2.toString(), nextCandidate.getRequestUrl().toString());
 
         // Check the crawl depth of this candidate, it should be 1 because it is a child URL that comes from root URL 1.
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -265,7 +265,7 @@ public void testGetNextCandidateUsingBreadthFirstCrawlStrategy() {
         nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this request, it should be a child URL.
-        Assert.assertTrue(nextCandidate.getCandidateUrl().toString().contains(CHILD_URL_PATH));
+        Assert.assertTrue(nextCandidate.getRequestUrl().toString().contains(CHILD_URL_PATH));
 
         // Check the crawl depth of this candidate, it should be 1 again because it is a child URL that comes from root URL 0.
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -277,7 +277,7 @@ public void testGetNextCandidateUsingBreadthFirstCrawlStrategy() {
         nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be a child URL.
-        Assert.assertTrue(nextCandidate.getCandidateUrl().toString().contains(CHILD_URL_PATH));
+        Assert.assertTrue(nextCandidate.getRequestUrl().toString().contains(CHILD_URL_PATH));
 
         // Check the crawl depth of this candidate, it should be 1 again becaise it is another child URL that also comes from root URL 0.
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -301,7 +301,7 @@ public void testGetNextCandidateUsingDepthFirstCrawlStrategy() {
         CrawlCandidate nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be root URL 1
-        Assert.assertEquals(ROOT_URL_1, nextCandidate.getCandidateUrl());
+        Assert.assertEquals(ROOT_URL_1, nextCandidate.getRequestUrl());
 
         // Check the crawl depth of this candidate, it should be 0 because it is a root URL
         Assert.assertEquals(ROOT_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -317,7 +317,7 @@ public void testGetNextCandidateUsingDepthFirstCrawlStrategy() {
         nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be a child URL
-        Assert.assertTrue(nextCandidate.getCandidateUrl().toString().contains(CHILD_URL_PATH));
+        Assert.assertTrue(nextCandidate.getRequestUrl().toString().contains(CHILD_URL_PATH));
 
         // Check the crawl depth of this candidate, it should be 1 because it is a child URL that comes from root URL 1
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -329,7 +329,7 @@ public void testGetNextCandidateUsingDepthFirstCrawlStrategy() {
         nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be root URL 0
-        Assert.assertEquals(ROOT_URL_0, nextCandidate.getCandidateUrl());
+        Assert.assertEquals(ROOT_URL_0, nextCandidate.getRequestUrl());
 
         // Check the crawl depth of this candidate, it should be 0 again because it is also a root URL
         Assert.assertEquals(ROOT_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -345,7 +345,7 @@ public void testGetNextCandidateUsingDepthFirstCrawlStrategy() {
         nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be child URL 0
-        Assert.assertEquals(CHILD_URL_0.toString(), nextCandidate.getCandidateUrl().toString());
+        Assert.assertEquals(CHILD_URL_0.toString(), nextCandidate.getRequestUrl().toString());
 
         // Check the crawl depth of this candidate, it should be 1 again because it is a child URL that comes from root URL 0
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
@@ -357,7 +357,7 @@ public void testGetNextCandidateUsingDepthFirstCrawlStrategy() {
         nextCandidate = frontier.getNextCandidate();
 
         // Check the URL of this candidate, it should be child URL 1
-        Assert.assertEquals(CHILD_URL_1.toString(), nextCandidate.getCandidateUrl().toString());
+        Assert.assertEquals(CHILD_URL_1.toString(), nextCandidate.getRequestUrl().toString());
 
         // Check the crawl depth of this candidate, it should be 1 again becaise it is a child URL that also comes from root URL 0
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());

From 34eaaa7bdae0e42ae3a7b45cda21bdc2b2393b9f Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Mon, 11 Jun 2018 22:07:13 +0200
Subject: [PATCH 17/28] Add checkstyle plugin, reformat code to comply with the
 style guide

---
 checkstyle.xml                                | 255 ++++++++++++++++++
 pom.xml                                       |  23 ++
 .../peterbencze/serritor/api/BaseCrawler.java |  95 ++++---
 .../serritor/api/CrawlCandidate.java          |   8 +-
 .../serritor/api/CrawlDelayStrategy.java      |  10 +-
 .../serritor/api/CrawlRequest.java            |  12 +-
 .../serritor/api/CrawlStrategy.java           |   6 +-
 .../serritor/api/CrawlerConfiguration.java    |  88 +++---
 .../api/event/NonHtmlContentEvent.java        |   8 +-
 .../serritor/api/event/PageLoadEvent.java     |   5 +-
 .../api/event/PageLoadTimeoutEvent.java       |  21 +-
 .../serritor/api/event/RequestErrorEvent.java |   5 +-
 .../api/event/RequestRedirectEvent.java       |   8 +-
 .../serritor/api/helper/UrlFinder.java        |  28 +-
 .../serritor/internal/CrawlDomain.java        |  12 +-
 .../serritor/internal/CrawlFrontier.java      |  55 ++--
 .../serritor/internal/EventObject.java        |   3 +-
 .../AdaptiveCrawlDelayMechanism.java          |  38 +--
 .../CrawlDelayMechanism.java                  |   3 +-
 .../FixedCrawlDelayMechanism.java             |  10 +-
 .../RandomCrawlDelayMechanism.java            |  13 +-
 .../serritor/api/helper/UrlFinderTest.java    |   7 +-
 .../serritor/internal/CrawlDomainTest.java    |  13 +-
 .../serritor/internal/CrawlFrontierTest.java  | 156 +++--------
 .../AdaptiveCrawlDelayMechanismTest.java      |  15 +-
 .../FixedCrawlDelayMechanismTest.java         |   7 +-
 26 files changed, 573 insertions(+), 331 deletions(-)
 create mode 100644 checkstyle.xml

diff --git a/checkstyle.xml b/checkstyle.xml
new file mode 100644
index 0000000..52ef575
--- /dev/null
+++ b/checkstyle.xml
@@ -0,0 +1,255 @@
+<?xml version="1.0"?>
+<!DOCTYPE module PUBLIC
+          "-//Checkstyle//DTD Checkstyle Configuration 1.3//EN"
+          "http://checkstyle.sourceforge.net/dtds/configuration_1_3.dtd">
+
+<!--
+    Checkstyle configuration that checks the Google coding conventions from Google Java Style
+    that can be found at https://google.github.io/styleguide/javaguide.html.
+
+    Checkstyle is very configurable. Be sure to read the documentation at
+    http://checkstyle.sf.net (or in your downloaded distribution).
+
+    To completely disable a check, just comment it out or delete it from the file.
+
+   Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
+-->
+
+<module name = "Checker">
+    <property name="charset" value="UTF-8"/>
+
+    <property name="severity" value="warning"/>
+
+    <property name="fileExtensions" value="java, properties, xml"/>
+    <!-- Checks for whitespace                               -->
+    <!-- See http://checkstyle.sf.net/config_whitespace.html -->
+    <module name="FileTabCharacter">
+        <property name="eachLine" value="true"/>
+    </module>
+
+    <module name="SuppressWarningsFilter" />
+
+    <module name="TreeWalker">
+        <module name="SuppressWarningsHolder" />
+        <module name="OuterTypeFilename"/>
+        <module name="IllegalTokenText">
+            <property name="tokens" value="STRING_LITERAL, CHAR_LITERAL"/>
+            <property name="format"
+                      value="\\u00(09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\(0(10|11|12|14|15|42|47)|134)"/>
+            <property name="message"
+                      value="Consider using special escape sequence instead of octal value or Unicode escaped value."/>
+        </module>
+        <module name="AvoidEscapedUnicodeCharacters">
+            <property name="allowEscapesForControlCharacters" value="true"/>
+            <property name="allowByTailComment" value="true"/>
+            <property name="allowNonPrintableEscapes" value="true"/>
+        </module>
+        <module name="LineLength">
+            <property name="max" value="100"/>
+            <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+        </module>
+        <module name="AvoidStarImport"/>
+        <module name="OneTopLevelClass"/>
+        <module name="NoLineWrap"/>
+        <module name="EmptyBlock">
+            <property name="option" value="TEXT"/>
+            <property name="tokens"
+                      value="LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH"/>
+        </module>
+        <module name="NeedBraces"/>
+        <module name="LeftCurly"/>
+        <module name="RightCurly">
+            <property name="id" value="RightCurlySame"/>
+            <property name="tokens"
+                      value="LITERAL_TRY, LITERAL_CATCH, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE,
+                    LITERAL_DO"/>
+        </module>
+        <module name="RightCurly">
+            <property name="id" value="RightCurlyAlone"/>
+            <property name="option" value="alone"/>
+            <property name="tokens"
+                      value="CLASS_DEF, METHOD_DEF, CTOR_DEF, LITERAL_FOR, LITERAL_WHILE, STATIC_INIT,
+                    INSTANCE_INIT"/>
+        </module>
+        <module name="WhitespaceAround">
+            <property name="allowEmptyConstructors" value="true"/>
+            <property name="allowEmptyMethods" value="true"/>
+            <property name="allowEmptyTypes" value="true"/>
+            <property name="allowEmptyLoops" value="true"/>
+            <message key="ws.notFollowed"
+                     value="WhitespaceAround: ''{0}'' is not followed by whitespace. Empty blocks may only be represented as '{}' when not part of a multi-block statement (4.1.3)"/>
+            <message key="ws.notPreceded"
+                     value="WhitespaceAround: ''{0}'' is not preceded with whitespace."/>
+        </module>
+        <module name="OneStatementPerLine"/>
+        <module name="MultipleVariableDeclarations"/>
+        <module name="ArrayTypeStyle"/>
+        <module name="MissingSwitchDefault"/>
+        <module name="FallThrough"/>
+        <module name="UpperEll"/>
+        <module name="ModifierOrder"/>
+        <module name="EmptyLineSeparator">
+            <property name="allowNoEmptyLineBetweenFields" value="true"/>
+        </module>
+        <module name="SeparatorWrap">
+            <property name="id" value="SeparatorWrapDot"/>
+            <property name="tokens" value="DOT"/>
+            <property name="option" value="nl"/>
+        </module>
+        <module name="SeparatorWrap">
+            <property name="id" value="SeparatorWrapComma"/>
+            <property name="tokens" value="COMMA"/>
+            <property name="option" value="EOL"/>
+        </module>
+        <module name="SeparatorWrap">
+            <!-- ELLIPSIS is EOL until https://github.com/google/styleguide/issues/258 -->
+            <property name="id" value="SeparatorWrapEllipsis"/>
+            <property name="tokens" value="ELLIPSIS"/>
+            <property name="option" value="EOL"/>
+        </module>
+        <module name="SeparatorWrap">
+            <!-- ARRAY_DECLARATOR is EOL until https://github.com/google/styleguide/issues/259 -->
+            <property name="id" value="SeparatorWrapArrayDeclarator"/>
+            <property name="tokens" value="ARRAY_DECLARATOR"/>
+            <property name="option" value="EOL"/>
+        </module>
+        <module name="SeparatorWrap">
+            <property name="id" value="SeparatorWrapMethodRef"/>
+            <property name="tokens" value="METHOD_REF"/>
+            <property name="option" value="nl"/>
+        </module>
+        <module name="PackageName">
+            <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/>
+            <message key="name.invalidPattern"
+                     value="Package name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="TypeName">
+            <message key="name.invalidPattern"
+                     value="Type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="MemberName">
+            <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9]*$"/>
+            <message key="name.invalidPattern"
+                     value="Member name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="ParameterName">
+            <property name="format" value="^[a-z]([a-z0-9][a-zA-Z0-9]*)?$"/>
+            <message key="name.invalidPattern"
+                     value="Parameter name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="CatchParameterName">
+            <property name="format" value="^[a-z]([a-z0-9][a-zA-Z0-9]*)?$"/>
+            <message key="name.invalidPattern"
+                     value="Catch parameter name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="LocalVariableName">
+            <property name="tokens" value="VARIABLE_DEF"/>
+            <property name="format" value="^[a-z]([a-z0-9][a-zA-Z0-9]*)?$"/>
+            <message key="name.invalidPattern"
+                     value="Local variable name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="ClassTypeParameterName">
+            <property name="format" value="(^[A-Z][0-9]?)$|([A-Z][a-zA-Z0-9]*[T]$)"/>
+            <message key="name.invalidPattern"
+                     value="Class type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="MethodTypeParameterName">
+            <property name="format" value="(^[A-Z][0-9]?)$|([A-Z][a-zA-Z0-9]*[T]$)"/>
+            <message key="name.invalidPattern"
+                     value="Method type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="InterfaceTypeParameterName">
+            <property name="format" value="(^[A-Z][0-9]?)$|([A-Z][a-zA-Z0-9]*[T]$)"/>
+            <message key="name.invalidPattern"
+                     value="Interface type name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="NoFinalizer"/>
+        <module name="GenericWhitespace">
+            <message key="ws.followed"
+                     value="GenericWhitespace ''{0}'' is followed by whitespace."/>
+            <message key="ws.preceded"
+                     value="GenericWhitespace ''{0}'' is preceded with whitespace."/>
+            <message key="ws.illegalFollow"
+                     value="GenericWhitespace ''{0}'' should followed by whitespace."/>
+            <message key="ws.notPreceded"
+                     value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
+        </module>
+        <!--
+        <module name="Indentation">
+            <property name="basicOffset" value="2"/>
+            <property name="braceAdjustment" value="0"/>
+            <property name="caseIndent" value="2"/>
+            <property name="throwsIndent" value="4"/>
+            <property name="lineWrappingIndentation" value="4"/>
+            <property name="arrayInitIndent" value="2"/>
+        </module>
+        -->
+        <module name="AbbreviationAsWordInName">
+            <property name="ignoreFinal" value="false"/>
+            <property name="allowedAbbreviationLength" value="1"/>
+        </module>
+        <module name="OverloadMethodsDeclarationOrder"/>
+        <module name="VariableDeclarationUsageDistance"/>
+        <module name="CustomImportOrder">
+            <property name="sortImportsInGroupAlphabetically" value="true"/>
+            <property name="separateLineBetweenGroups" value="true"/>
+            <property name="customImportOrderRules" value="STATIC###THIRD_PARTY_PACKAGE"/>
+        </module>
+        <module name="MethodParamPad"/>
+        <module name="NoWhitespaceBefore">
+            <property name="tokens"
+                      value="COMMA, SEMI, POST_INC, POST_DEC, DOT, ELLIPSIS, METHOD_REF"/>
+            <property name="allowLineBreaks" value="true"/>
+        </module>
+        <module name="ParenPad"/>
+        <module name="OperatorWrap">
+            <property name="option" value="NL"/>
+            <property name="tokens"
+                      value="BAND, BOR, BSR, BXOR, DIV, EQUAL, GE, GT, LAND, LE, LITERAL_INSTANCEOF, LOR,
+                    LT, MINUS, MOD, NOT_EQUAL, PLUS, QUESTION, SL, SR, STAR, METHOD_REF "/>
+        </module>
+        <module name="AnnotationLocation">
+            <property name="id" value="AnnotationLocationMostCases"/>
+            <property name="tokens"
+                      value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
+        </module>
+        <module name="AnnotationLocation">
+            <property name="id" value="AnnotationLocationVariables"/>
+            <property name="tokens" value="VARIABLE_DEF"/>
+            <property name="allowSamelineMultipleAnnotations" value="true"/>
+        </module>
+        <module name="NonEmptyAtclauseDescription"/>
+        <module name="JavadocTagContinuationIndentation"/>
+        <module name="SummaryJavadoc">
+            <property name="forbiddenSummaryFragments"
+                      value="^@return the *|^This method returns |^A [{]@code [a-zA-Z0-9]+[}]( is a )"/>
+        </module>
+        <module name="JavadocParagraph"/>
+        <module name="AtclauseOrder">
+            <property name="tagOrder" value="@param, @return, @throws, @deprecated"/>
+            <property name="target"
+                      value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF, VARIABLE_DEF"/>
+        </module>
+        <module name="JavadocMethod">
+            <property name="scope" value="public"/>
+            <property name="allowMissingParamTags" value="true"/>
+            <property name="allowMissingThrowsTags" value="true"/>
+            <property name="allowMissingReturnTag" value="true"/>
+            <property name="minLineCount" value="2"/>
+            <property name="allowedAnnotations" value="Override, Test"/>
+            <property name="allowThrowsTagsForSubclasses" value="true"/>
+        </module>
+        <module name="MethodName">
+            <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9_]*$"/>
+            <message key="name.invalidPattern"
+                     value="Method name ''{0}'' must match pattern ''{1}''."/>
+        </module>
+        <module name="SingleLineJavadoc">
+            <property name="ignoreInlineTags" value="false"/>
+        </module>
+        <module name="EmptyCatchBlock">
+            <property name="exceptionVariableName" value="expected"/>
+        </module>
+        <module name="CommentsIndentation"/>
+    </module>
+</module>
diff --git a/pom.xml b/pom.xml
index 6501efe..d57489c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -108,6 +108,29 @@
                     </execution>
                 </executions>
             </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-checkstyle-plugin</artifactId>
+                <version>3.0.0</version>
+                <dependencies>
+                    <dependency>
+                        <groupId>com.puppycrawl.tools</groupId>
+                        <artifactId>checkstyle</artifactId>
+                        <version>8.10.1</version>
+                    </dependency>
+                </dependencies>
+                <configuration>
+                    <consoleOutput>true</consoleOutput>
+                    <configLocation>checkstyle.xml</configLocation>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>check</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-gpg-plugin</artifactId>
diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 971b196..e200a84 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api;
 
 import com.github.peterbencze.serritor.api.CrawlRequest.CrawlRequestBuilder;
@@ -50,8 +51,8 @@
 import org.openqa.selenium.htmlunit.HtmlUnitDriver;
 
 /**
- * Provides a skeletal implementation of a crawler to minimize the effort for
- * users to implement their own.
+ * Provides a skeletal implementation of a crawler to minimize the effort for users to implement
+ * their own.
  *
  * @author Peter Bencze
  */
@@ -87,11 +88,9 @@ public final void start() {
     }
 
     /**
-     * Starts the crawler using the browser specified by the given
-     * <code>WebDriver</code> instance.
+     * Starts the crawler using the browser specified by the given <code>WebDriver</code> instance.
      *
-     * @param webDriver the <code>WebDriver</code> instance to control the
-     * browser
+     * @param webDriver the <code>WebDriver</code> instance to control the browser
      */
     public final void start(final WebDriver webDriver) {
         start(webDriver, new CrawlFrontier(config));
@@ -100,8 +99,8 @@ public final void start(final WebDriver webDriver) {
     /**
      * Initializes and runs the crawler.
      *
-     * @param crawlFrontier the <code>CrawlFrontier</code> instance to be used
-     * by the crawler to manage crawl requests
+     * @param crawlFrontier the <code>CrawlFrontier</code> instance to be used by the crawler to
+     *                      manage crawl requests
      */
     private void start(final WebDriver webDriver, final CrawlFrontier crawlFrontier) {
         try {
@@ -133,7 +132,8 @@ private void start(final WebDriver webDriver, final CrawlFrontier crawlFrontier)
      */
     public final void saveState(final OutputStream out) {
         // Check if the crawler has been started at least once, otherwise we have nothing to save
-        Validate.validState(crawlFrontier != null, "Cannot save state at this point. The crawler should be started first.");
+        Validate.validState(crawlFrontier != null,
+                "Cannot save state at this point. The crawler should be started first.");
 
         // Save the crawl frontier's current state
         SerializationUtils.serialize(crawlFrontier, out);
@@ -152,9 +152,8 @@ public final void resumeState(final InputStream in) {
      * Resumes a previously saved state using the browser specified by the given
      * <code>WebDriver</code> instance.
      *
-     * @param webDriver the <code>WebDriver</code> instance to control the
-     * browser
-     * @param in the input stream from which the state should be loaded
+     * @param webDriver the <code>WebDriver</code> instance to control the browser
+     * @param in        the input stream from which the state should be loaded
      */
     public final void resumeState(final WebDriver webDriver, final InputStream in) {
         // Re-create crawl frontier from the saved state
@@ -175,21 +174,22 @@ public final void stop() {
     }
 
     /**
-     * Feeds a crawl request to the crawler. The crawler should be running,
-     * otherwise the request has to be added as a crawl seed instead.
+     * Feeds a crawl request to the crawler. The crawler should be running, otherwise the request
+     * has to be added as a crawl seed instead.
      *
      * @param request the crawl request
      */
     protected final void crawl(final CrawlRequest request) {
         Validate.notNull(request, "The request cannot be null.");
-        Validate.validState(!isStopped, "The crawler is not started. Maybe you meant to add this request as a crawl seed?");
+        Validate.validState(!isStopped,
+                "The crawler is not started. Maybe you meant to add this request as a crawl seed?");
 
         crawlFrontier.feedRequest(request, false);
     }
 
     /**
-     * Feeds multiple crawl requests to the crawler. The crawler should be
-     * running, otherwise the requests have to be added as crawl seeds instead.
+     * Feeds multiple crawl requests to the crawler. The crawler should be running, otherwise the
+     * requests have to be added as crawl seeds instead.
      *
      * @param requests the list of crawl requests
      */
@@ -210,11 +210,11 @@ private void run() {
             HttpResponse httpHeadResponse = null;
             boolean isUnsuccessfulRequest = false;
 
-            // Update the client's cookie store, so it will have the same state as the browser.
+            // Update the client's cookie store, so it will have the same state as the browser
             updateClientCookieStore();
 
             try {
-                // Send an HTTP HEAD request to the current URL to determine its availability and content type
+                // Send an HTTP HEAD request to determine its availability and content type
                 httpHeadResponse = getHttpHeadResponse(candidateUrl, context);
             } catch (IOException exception) {
                 onRequestError(new RequestErrorEvent(currentCandidate, exception));
@@ -230,7 +230,7 @@ private void run() {
                 }
 
                 if (!responseUrl.equals(candidateUrl)) {
-                    // If the request was redirected, a new crawl request should be created for the redirected URL
+                    // Create a new crawl request for the redirected URL
                     handleRequestRedirect(currentCandidate, responseUrl);
                 } else if (isContentHtml(httpHeadResponse)) {
                     try {
@@ -239,10 +239,10 @@ private void run() {
                     } catch (TimeoutException exception) {
                         onPageLoadTimeout(new PageLoadTimeoutEvent(currentCandidate, exception));
                     }
-                    
+
                     String loadedPageUrl = webDriver.getCurrentUrl();
                     if (!loadedPageUrl.equals(candidateUrl)) {
-                        // If the request was redirected (using JavaScript), a new crawl request should be created for the redirected URL
+                        // Create a new crawl request for the redirected URL (JavaScript redirect)
                         handleRequestRedirect(currentCandidate, loadedPageUrl);
                     } else {
                         onPageLoad(new PageLoadEvent(currentCandidate, webDriver));
@@ -264,6 +264,7 @@ private void run() {
      *
      * @return the created crawl delay mechanism
      */
+    @SuppressWarnings("checkstyle:MissingSwitchDefault")
     private CrawlDelayMechanism createCrawlDelayMechanism() {
         switch (config.getCrawlDelayStrategy()) {
             case FIXED:
@@ -271,12 +272,14 @@ private CrawlDelayMechanism createCrawlDelayMechanism() {
             case RANDOM:
                 return new RandomCrawlDelayMechanism(config);
             case ADAPTIVE:
-                AdaptiveCrawlDelayMechanism adaptiveCrawlDelay = new AdaptiveCrawlDelayMechanism(config, (JavascriptExecutor) webDriver);
-                if (!adaptiveCrawlDelay.isBrowserCompatible()) {
-                    throw new UnsupportedOperationException("The Navigation Timing API is not supported by the browser.");
+                AdaptiveCrawlDelayMechanism mechanism
+                        = new AdaptiveCrawlDelayMechanism(config, (JavascriptExecutor) webDriver);
+                if (!mechanism.isBrowserCompatible()) {
+                    throw new UnsupportedOperationException("The Navigation Timing API is not "
+                            + "supported by the browser.");
                 }
 
-                return adaptiveCrawlDelay;
+                return mechanism;
         }
 
         throw new IllegalArgumentException("Unsupported crawl delay strategy.");
@@ -286,11 +289,14 @@ private CrawlDelayMechanism createCrawlDelayMechanism() {
      * Sends an HTTP HEAD request to the given URL and returns the response.
      *
      * @param destinationUrl the destination URL
-     * @throws IOException if an error occurs while trying to fulfill the
-     * request
+     *
      * @return the HTTP HEAD response
+     *
+     * @throws IOException if an error occurs while trying to fulfill the request
      */
-    private HttpResponse getHttpHeadResponse(final String destinationUrl, final HttpClientContext context) throws IOException {
+    private HttpResponse getHttpHeadResponse(
+            final String destinationUrl,
+            final HttpClientContext context) throws IOException {
         HttpHead headRequest = new HttpHead(destinationUrl);
         return httpClient.execute(headRequest, context);
     }
@@ -299,8 +305,8 @@ private HttpResponse getHttpHeadResponse(final String destinationUrl, final Http
      * Indicates if the response's content type is HTML.
      *
      * @param httpHeadResponse the HTTP HEAD response
-     * @return <code>true</code> if the content type is HTML, <code>false</code>
-     * otherwise
+     *
+     * @return <code>true</code> if the content type is HTML, <code>false</code> otherwise
      */
     private static boolean isContentHtml(final HttpResponse httpHeadResponse) {
         Header contentTypeHeader = httpHeadResponse.getFirstHeader("Content-Type");
@@ -308,14 +314,17 @@ private static boolean isContentHtml(final HttpResponse httpHeadResponse) {
     }
 
     /**
-     * Creates a crawl request for the redirected URL, feeds it to the crawler
-     * and calls the appropriate event callback.
+     * Creates a crawl request for the redirected URL, feeds it to the crawler and calls the
+     * appropriate event callback.
      *
      * @param currentCrawlCandidate the current crawl candidate
-     * @param redirectedUrl the URL of the redirected request
+     * @param redirectedUrl         the URL of the redirected request
      */
-    private void handleRequestRedirect(final CrawlCandidate currentCrawlCandidate, final String redirectedUrl) {
-        CrawlRequestBuilder builder = new CrawlRequestBuilder(redirectedUrl).setPriority(currentCrawlCandidate.getPriority());
+    private void handleRequestRedirect(
+            final CrawlCandidate currentCrawlCandidate,
+            final String redirectedUrl) {
+        CrawlRequestBuilder builder = new CrawlRequestBuilder(redirectedUrl)
+                .setPriority(currentCrawlCandidate.getPriority());
         currentCrawlCandidate.getMetadata().ifPresent(builder::setMetadata);
         CrawlRequest redirectedRequest = builder.build();
 
@@ -324,8 +333,8 @@ private void handleRequestRedirect(final CrawlCandidate currentCrawlCandidate, f
     }
 
     /**
-     * Adds all the browser cookies for the current domain to the HTTP client's
-     * cookie store, replacing any existing equivalent ones.
+     * Adds all the browser cookies for the current domain to the HTTP client's cookie store,
+     * replacing any existing equivalent ones.
      */
     private void updateClientCookieStore() {
         webDriver.manage()
@@ -339,10 +348,12 @@ private void updateClientCookieStore() {
      * Converts a browser cookie to a HTTP client one.
      *
      * @param browserCookie the browser cookie to be converted
+     *
      * @return the converted HTTP client cookie
      */
     private static BasicClientCookie convertBrowserCookie(final Cookie browserCookie) {
-        BasicClientCookie clientCookie = new BasicClientCookie(browserCookie.getName(), browserCookie.getValue());
+        BasicClientCookie clientCookie
+                = new BasicClientCookie(browserCookie.getName(), browserCookie.getValue());
         clientCookie.setDomain(browserCookie.getDomain());
         clientCookie.setPath(browserCookie.getPath());
         clientCookie.setExpiryDate(browserCookie.getExpiry());
@@ -406,8 +417,8 @@ protected void onRequestRedirect(final RequestRedirectEvent event) {
     }
 
     /**
-     * Callback which gets called when the page does not load in the browser
-     * within the timeout period.
+     * Callback which gets called when the page does not load in the browser within the timeout
+     * period.
      *
      * @param event the <code>PageLoadTimeoutEvent</code> instance
      */
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
index 48ed647..9b238d7 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlCandidate.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api;
 
 import com.google.common.net.InternetDomainName;
@@ -104,8 +105,7 @@ public static final class CrawlCandidateBuilder {
         /**
          * Creates a {@link CrawlCandidateBuilder} instance.
          *
-         * @param request the <code>CrawlRequest</code> instance from which this
-         * candidate is built
+         * @param request the <code>CrawlRequest</code> instance from which this candidate is built
          */
         public CrawlCandidateBuilder(final CrawlRequest request) {
             crawlRequest = request;
@@ -115,6 +115,7 @@ public CrawlCandidateBuilder(final CrawlRequest request) {
          * Sets the referer URL.
          *
          * @param refererUrl the referer URL
+         *
          * @return the <code>CrawlCandidateBuilder</code> instance
          */
         public CrawlCandidateBuilder setRefererUrl(final URI refererUrl) {
@@ -126,6 +127,7 @@ public CrawlCandidateBuilder setRefererUrl(final URI refererUrl) {
          * Sets the crawl depth of the request.
          *
          * @param crawlDepth the crawl depth of the request
+         *
          * @return the <code>CrawlCandidateBuilder</code> instance
          */
         public CrawlCandidateBuilder setCrawlDepth(final int crawlDepth) {
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlDelayStrategy.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlDelayStrategy.java
index a7bc47b..4a80d8b 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlDelayStrategy.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlDelayStrategy.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api;
 
 /**
- * Available crawl delay strategies which define how the delay between each
- * request is determined.
- * 
+ * Available crawl delay strategies which define how the delay between each request is determined.
+ *
  * @author Peter Bencze
  */
 public enum CrawlDelayStrategy {
-    
+
     FIXED,
     ADAPTIVE,
     RANDOM
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
index 08038f2..f1c6e4a 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlRequest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api;
 
 import com.google.common.net.InternetDomainName;
@@ -24,8 +25,8 @@
 import org.apache.commons.lang3.Validate;
 
 /**
- * Represents a crawl request that may be completed by the crawler. If request
- * filtering is enabled, it could get filtered out.
+ * Represents a crawl request that may be completed by the crawler. If request filtering is enabled,
+ * it could get filtered out.
  *
  * @author Peter Bencze
  */
@@ -120,8 +121,8 @@ public CrawlRequestBuilder(final String requestUrl) {
         /**
          * Sets the priority of the request.
          *
-         * @param priority the priority of the request (higher number means
-         * higher priority)
+         * @param priority the priority of the request (higher number means higher priority)
+         *
          * @return the <code>CrawlRequestBuilder</code> instance
          */
         public CrawlRequestBuilder setPriority(final int priority) {
@@ -133,6 +134,7 @@ public CrawlRequestBuilder setPriority(final int priority) {
          * Sets the metadata associated with the request.
          *
          * @param metadata the metadata associated with the request
+         *
          * @return the <code>CrawlRequestBuilder</code> instance
          */
         public CrawlRequestBuilder setMetadata(final Serializable metadata) {
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlStrategy.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlStrategy.java
index c505932..e449892 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlStrategy.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlStrategy.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api;
 
 /**
- * Available crawl strategies that define the order in which crawl requests are
- * processed.
+ * Available crawl strategies that define the order in which crawl requests are processed.
  *
  * @author Peter Bencze
  */
diff --git a/src/main/java/com/github/peterbencze/serritor/api/CrawlerConfiguration.java b/src/main/java/com/github/peterbencze/serritor/api/CrawlerConfiguration.java
index bcc2d25..d5aef15 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/CrawlerConfiguration.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/CrawlerConfiguration.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api;
 
 import com.github.peterbencze.serritor.internal.CrawlDomain;
@@ -155,9 +156,12 @@ public static final class CrawlerConfigurationBuilder {
         private static final boolean FILTER_OFFSITE_REQUESTS_BY_DEFAULT = false;
         private static final int DEFAULT_MAX_CRAWL_DEPTH = 0;
         private static final CrawlDelayStrategy DEFAULT_CRAWL_DELAY = CrawlDelayStrategy.FIXED;
-        private static final long DEFAULT_FIXED_CRAWL_DELAY_IN_MILLIS = Duration.ZERO.toMillis();
-        private static final long DEFAULT_MIN_CRAWL_DELAY_IN_MILLIS = Duration.ofSeconds(1).toMillis();
-        private static final long DEFAULT_MAX_CRAWL_DELAY_IN_MILLIS = Duration.ofMinutes(1).toMillis();
+        private static final long DEFAULT_FIXED_CRAWL_DELAY_IN_MILLIS
+                = Duration.ZERO.toMillis();
+        private static final long DEFAULT_MIN_CRAWL_DELAY_IN_MILLIS
+                = Duration.ofSeconds(1).toMillis();
+        private static final long DEFAULT_MAX_CRAWL_DELAY_IN_MILLIS
+                = Duration.ofMinutes(1).toMillis();
 
         private final Set<CrawlDomain> allowedCrawlDomains;
         private final Set<CrawlRequest> crawlSeeds;
@@ -192,26 +196,29 @@ public CrawlerConfigurationBuilder() {
          * Appends an internet domain to the list of allowed crawl domains.
          *
          * @param allowedCrawlDomain a well-formed internet domain name
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder addAllowedCrawlDomain(final String allowedCrawlDomain) {
             InternetDomainName domain = InternetDomainName.from(allowedCrawlDomain);
 
-            Validate.isTrue(domain.isUnderPublicSuffix(), String.format("The domain (\"%s\") is not under public suffix.", allowedCrawlDomain));
+            Validate.isTrue(domain.isUnderPublicSuffix(),
+                    String.format("The domain (\"%s\") is not under public suffix.",
+                            allowedCrawlDomain));
 
             allowedCrawlDomains.add(new CrawlDomain(domain));
             return this;
         }
 
         /**
-         * Appends a list of internet domains to the list of allowed crawl
-         * domains.
+         * Appends a list of internet domains to the list of allowed crawl domains.
+         *
+         * @param allowedCrawlDomains a list of well-formed internet domain names
          *
-         * @param allowedCrawlDomains a list of well-formed internet domain
-         * names
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
-        public CrawlerConfigurationBuilder addAllowedCrawlDomains(final List<String> allowedCrawlDomains) {
+        public CrawlerConfigurationBuilder addAllowedCrawlDomains(
+                final List<String> allowedCrawlDomains) {
             allowedCrawlDomains.forEach(this::addAllowedCrawlDomain);
             return this;
         }
@@ -220,6 +227,7 @@ public CrawlerConfigurationBuilder addAllowedCrawlDomains(final List<String> all
          * Appends a crawl request to the set of crawl seeds.
          *
          * @param request the crawl request which represents a crawl seed
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder addCrawlSeed(final CrawlRequest request) {
@@ -232,8 +240,8 @@ public CrawlerConfigurationBuilder addCrawlSeed(final CrawlRequest request) {
         /**
          * Appends a list of crawl requests to the set of crawl seeds.
          *
-         * @param requests the list of crawl requests which represent crawl
-         * seeds
+         * @param requests the list of crawl requests which represent crawl seeds
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder addCrawlSeeds(final List<CrawlRequest> requests) {
@@ -242,11 +250,12 @@ public CrawlerConfigurationBuilder addCrawlSeeds(final List<CrawlRequest> reques
         }
 
         /**
-         * Sets the crawl strategy to be used by the crawler. Breadth-first
-         * strategy orders crawl requests by the lowest crawl depth, whereas
-         * depth-first orders them by the highest crawl depth.
+         * Sets the crawl strategy to be used by the crawler. Breadth-first strategy orders crawl
+         * requests by the lowest crawl depth, whereas depth-first orders them by the highest crawl
+         * depth.
          *
          * @param strategy the crawl strategy
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setCrawlStrategy(final CrawlStrategy strategy) {
@@ -259,11 +268,13 @@ public CrawlerConfigurationBuilder setCrawlStrategy(final CrawlStrategy strategy
         /**
          * Enables or disables duplicate request filtering.
          *
-         * @param filterDuplicateRequests <code>true</code> means enabled,
-         * <code>false</code> means disabled
+         * @param filterDuplicateRequests <code>true</code> means enabled, <code>false</code> means
+         *                                disabled
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
-        public CrawlerConfigurationBuilder setDuplicateRequestFiltering(final boolean filterDuplicateRequests) {
+        public CrawlerConfigurationBuilder setDuplicateRequestFiltering(
+                final boolean filterDuplicateRequests) {
             this.filterDuplicateRequests = filterDuplicateRequests;
             return this;
         }
@@ -271,20 +282,22 @@ public CrawlerConfigurationBuilder setDuplicateRequestFiltering(final boolean fi
         /**
          * Enables or disables offsite request filtering.
          *
-         * @param filterOffsiteRequests <code>true</code> means enabled,
-         * <code>false</code> means disabled
+         * @param filterOffsiteRequests <code>true</code> means enabled, <code>false</code> means
+         *                              disabled
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
-        public CrawlerConfigurationBuilder setOffsiteRequestFiltering(final boolean filterOffsiteRequests) {
+        public CrawlerConfigurationBuilder setOffsiteRequestFiltering(
+                final boolean filterOffsiteRequests) {
             this.filterOffsiteRequests = filterOffsiteRequests;
             return this;
         }
 
         /**
-         * Sets the maximum crawl depth. It should be a non-negative
-         * number (0 means no limit).
+         * Sets the maximum crawl depth. It should be a non-negative number (0 means no limit).
          *
          * @param maxCrawlDepth the maximum crawl depth
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
         public CrawlerConfigurationBuilder setMaximumCrawlDepth(final int maxCrawlDepth) {
@@ -295,13 +308,15 @@ public CrawlerConfigurationBuilder setMaximumCrawlDepth(final int maxCrawlDepth)
         }
 
         /**
-         * Sets the crawl delay strategy to be used by the crawler. This
-         * strategy defines how the delay between each request is determined.
+         * Sets the crawl delay strategy to be used by the crawler. This strategy defines how the
+         * delay between each request is determined.
          *
          * @param strategy the crawl delay strategy
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
-        public CrawlerConfigurationBuilder setCrawlDelayStrategy(final CrawlDelayStrategy strategy) {
+        public CrawlerConfigurationBuilder setCrawlDelayStrategy(
+                final CrawlDelayStrategy strategy) {
             Validate.notNull(strategy, "The strategy cannot be null.");
 
             crawlDelayStrategy = strategy;
@@ -312,9 +327,11 @@ public CrawlerConfigurationBuilder setCrawlDelayStrategy(final CrawlDelayStrateg
          * Sets the exact duration of delay between each request.
          *
          * @param fixedCrawlDelayDuration the duration of delay
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
-        public CrawlerConfigurationBuilder setFixedCrawlDelayDuration(final Duration fixedCrawlDelayDuration) {
+        public CrawlerConfigurationBuilder setFixedCrawlDelayDuration(
+                final Duration fixedCrawlDelayDuration) {
             Validate.notNull(fixedCrawlDelayDuration, "The duration cannot be null.");
 
             fixedCrawlDelayDurationInMillis = fixedCrawlDelayDuration.toMillis();
@@ -325,15 +342,19 @@ public CrawlerConfigurationBuilder setFixedCrawlDelayDuration(final Duration fix
          * Sets the minimum duration of delay between each request.
          *
          * @param minCrawlDelayDuration the minimum duration of delay
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
-        public CrawlerConfigurationBuilder setMinimumCrawlDelayDuration(final Duration minCrawlDelayDuration) {
+        public CrawlerConfigurationBuilder setMinimumCrawlDelayDuration(
+                final Duration minCrawlDelayDuration) {
             Validate.notNull(minCrawlDelayDuration, "The duration cannot be null.");
-            Validate.isTrue(!minCrawlDelayDuration.isNegative(), "The minimum crawl delay cannot be negative.");
+            Validate.isTrue(!minCrawlDelayDuration.isNegative(),
+                    "The minimum crawl delay cannot be negative.");
 
             long minDelayDurationInMillis = minCrawlDelayDuration.toMillis();
 
-            Validate.isTrue(minDelayDurationInMillis < maxCrawlDelayDurationInMillis, "The minimum crawl delay should be less than the maximum.");
+            Validate.isTrue(minDelayDurationInMillis < maxCrawlDelayDurationInMillis,
+                    "The minimum crawl delay should be less than the maximum.");
 
             minCrawlDelayDurationInMillis = minDelayDurationInMillis;
             return this;
@@ -343,14 +364,17 @@ public CrawlerConfigurationBuilder setMinimumCrawlDelayDuration(final Duration m
          * Sets the maximum duration of delay between each request.
          *
          * @param maxCrawlDelayDuration the maximum duration of delay
+         *
          * @return the <code>CrawlerConfigurationBuilder</code> instance
          */
-        public CrawlerConfigurationBuilder setMaximumCrawlDelayDuration(final Duration maxCrawlDelayDuration) {
+        public CrawlerConfigurationBuilder setMaximumCrawlDelayDuration(
+                final Duration maxCrawlDelayDuration) {
             Validate.notNull(maxCrawlDelayDuration, "The duration cannot be null.");
 
             long maxDelayDurationInMillis = maxCrawlDelayDuration.toMillis();
 
-            Validate.isTrue(maxDelayDurationInMillis > minCrawlDelayDurationInMillis, "The maximum crawl delay should be higher than the minimum.");
+            Validate.isTrue(maxDelayDurationInMillis > minCrawlDelayDurationInMillis,
+                    "The maximum crawl delay should be higher than the minimum.");
 
             maxCrawlDelayDurationInMillis = maxDelayDurationInMillis;
             return this;
diff --git a/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java b/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
index 3fe89e6..932df11 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/NonHtmlContentEvent.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api.event;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
@@ -41,8 +42,9 @@ public NonHtmlContentEvent(final CrawlCandidate crawlCandidate) {
      * Downloads the file specified by the URL.
      *
      * @param destination the destination file
-     * @throws IOException if the URL cannot be opened or I/O error occurs while
-     * downloading the file
+     *
+     * @throws IOException if the URL cannot be opened or I/O error occurs while downloading the
+     *                     file
      */
     public void downloadFile(final File destination) throws IOException {
         FileUtils.copyURLToFile(getCrawlCandidate().getRequestUrl().toURL(), destination);
diff --git a/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadEvent.java b/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadEvent.java
index cd3726b..d83e394 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadEvent.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadEvent.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api.event;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
@@ -32,7 +33,7 @@ public final class PageLoadEvent extends EventObject {
      * Creates a {@link PageLoadEvent} instance.
      *
      * @param crawlCandidate the current crawl candidate
-     * @param webDriver the <code>WebDriver</code> to control the browser
+     * @param webDriver      the <code>WebDriver</code> to control the browser
      */
     public PageLoadEvent(final CrawlCandidate crawlCandidate, final WebDriver webDriver) {
         super(crawlCandidate);
diff --git a/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadTimeoutEvent.java b/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadTimeoutEvent.java
index a2b88b8..e6c3e16 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadTimeoutEvent.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/PageLoadTimeoutEvent.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api.event;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
@@ -20,30 +21,30 @@
 import org.openqa.selenium.TimeoutException;
 
 /**
- * Event which gets delivered when a page does not load in the browser within
- * the timeout period.
+ * Event which gets delivered when a page does not load in the browser within the timeout period.
  *
  * @author Peter Bencze
  */
 public final class PageLoadTimeoutEvent extends EventObject {
-    
+
     private final TimeoutException exception;
-    
+
     /**
      * Creates a {@link PageLoadTimeoutEvent} instance.
-     * 
+     *
      * @param crawlCandidate the current crawl candidate
-     * @param exception the thrown exception
+     * @param exception      the thrown exception
      */
-    public PageLoadTimeoutEvent(final CrawlCandidate crawlCandidate, final TimeoutException exception) {
+    public PageLoadTimeoutEvent(final CrawlCandidate crawlCandidate,
+            final TimeoutException exception) {
         super(crawlCandidate);
-        
+
         this.exception = exception;
     }
 
     /**
      * Returns the thrown exception.
-     * 
+     *
      * @return the thrown exception
      */
     public TimeoutException getException() {
diff --git a/src/main/java/com/github/peterbencze/serritor/api/event/RequestErrorEvent.java b/src/main/java/com/github/peterbencze/serritor/api/event/RequestErrorEvent.java
index 305840b..bbce9b3 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/event/RequestErrorEvent.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/RequestErrorEvent.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api.event;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
@@ -32,7 +33,7 @@ public final class RequestErrorEvent extends EventObject {
      * Creates a {@link RequestErrorEvent} instance.
      *
      * @param crawlCandidate the current crawl candidate
-     * @param exception the thrown exception
+     * @param exception      the thrown exception
      */
     public RequestErrorEvent(final CrawlCandidate crawlCandidate, final IOException exception) {
         super(crawlCandidate);
diff --git a/src/main/java/com/github/peterbencze/serritor/api/event/RequestRedirectEvent.java b/src/main/java/com/github/peterbencze/serritor/api/event/RequestRedirectEvent.java
index d142d0a..188ba3e 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/event/RequestRedirectEvent.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/event/RequestRedirectEvent.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api.event;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
@@ -31,10 +32,11 @@ public final class RequestRedirectEvent extends EventObject {
     /**
      * Creates a {@link RequestRedirectEvent} instance.
      *
-     * @param crawlCandidate the current crawl candidate
+     * @param crawlCandidate         the current crawl candidate
      * @param redirectedCrawlRequest the crawl request for the redirected URL
      */
-    public RequestRedirectEvent(final CrawlCandidate crawlCandidate, final CrawlRequest redirectedCrawlRequest) {
+    public RequestRedirectEvent(final CrawlCandidate crawlCandidate,
+            final CrawlRequest redirectedCrawlRequest) {
         super(crawlCandidate);
 
         this.redirectedCrawlRequest = redirectedCrawlRequest;
diff --git a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
index eaafc4a..65ec266 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/helper/UrlFinder.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api.helper;
 
 import com.github.peterbencze.serritor.api.event.PageLoadEvent;
@@ -56,6 +57,7 @@ private UrlFinder(final UrlFinderBuilder builder) {
      * Returns a list of validated URLs found in the page's HTML source.
      *
      * @param event the <code>PageLoadEvent</code> instance
+     *
      * @return the list of found URLs
      */
     public List<String> findUrlsInPage(final PageLoadEvent event) {
@@ -85,6 +87,7 @@ public List<String> findUrlsInPage(final PageLoadEvent event) {
      * Returns a list of validated URLs found in the attribute's value.
      *
      * @param attributeValue the value of the attribute
+     *
      * @return the list of found URLs
      */
     private List<String> findUrlsInAttributeValue(final String attributeValue) {
@@ -144,11 +147,11 @@ public UrlFinderBuilder(final List<Pattern> urlPatterns) {
         }
 
         /**
-         * Sets the locating mechanism used by the finder. Only elements matched
-         * by the locator will be considered when searching for URLs.
+         * Sets the locating mechanism used by the finder. Only elements matched by the locator will
+         * be considered when searching for URLs.
+         *
+         * @param locatingMechanism the <code>By</code> locating mechanism instance
          *
-         * @param locatingMechanism the <code>By</code> locating mechanism
-         * instance
          * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setLocatingMechanism(final By locatingMechanism) {
@@ -156,11 +159,11 @@ public UrlFinderBuilder setLocatingMechanism(final By locatingMechanism) {
         }
 
         /**
-         * Sets the locating mechanisms used by the finder. Only elements
-         * matched by the locators will be considered when searching for URLs.
+         * Sets the locating mechanisms used by the finder. Only elements matched by the locators
+         * will be considered when searching for URLs.
+         *
+         * @param locatingMechanisms the list of <code>By</code> locating mechanism instances
          *
-         * @param locatingMechanisms the list of <code>By</code> locating
-         * mechanism instances
          * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setLocatingMechanisms(final List<By> locatingMechanisms) {
@@ -174,6 +177,7 @@ public UrlFinderBuilder setLocatingMechanisms(final List<By> locatingMechanisms)
          * Sets the list of attribute names to search for URLs.
          *
          * @param attributes the list of attribute names
+         *
          * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setAttributes(final List<String> attributes) {
@@ -187,6 +191,7 @@ public UrlFinderBuilder setAttributes(final List<String> attributes) {
          * Sets the attribute name to search for URLs.
          *
          * @param attribute the attribute name
+         *
          * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setAttribute(final String attribute) {
@@ -197,6 +202,7 @@ public UrlFinderBuilder setAttribute(final String attribute) {
          * Sets a predicate to be used for validating found URLs.
          *
          * @param validator the validator predicate
+         *
          * @return the <code>UrlFinderBuilder</code> instance
          */
         public UrlFinderBuilder setValidator(final Predicate<String> validator) {
@@ -219,8 +225,8 @@ public UrlFinder build() {
          * The default URL validator function.
          *
          * @param url the URL to validate
-         * @return <code>true</code> if the URL is valid, <code>false</code>
-         * otherwise
+         *
+         * @return <code>true</code> if the URL is valid, <code>false</code> otherwise
          */
         private static boolean isValidUrl(final String url) {
             try {
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlDomain.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlDomain.java
index 7fb007e..3fec9fa 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlDomain.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlDomain.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal;
 
 import com.google.common.collect.ImmutableList;
@@ -38,10 +39,11 @@ public CrawlDomain(final InternetDomainName domain) {
     }
 
     /**
-     * Indicates if two <code>CrawlDomain</code> instances are equal.
-     * Crawl domains with the same domain name are considered equal.
+     * Indicates if two <code>CrawlDomain</code> instances are equal. Crawl domains with the same
+     * domain name are considered equal.
      *
      * @param obj a <code>CrawlDomain</code> instance
+     *
      * @return <code>true</code> if equal, <code>false</code> otherwise
      */
     @Override
@@ -59,8 +61,7 @@ public boolean equals(final Object obj) {
     }
 
     /**
-     * Calculates the hash code from the individual components of the domain
-     * name.
+     * Calculates the hash code from the individual components of the domain name.
      *
      * @return the hash code for the crawl domain
      */
@@ -73,6 +74,7 @@ public int hashCode() {
      * Indicates if this crawl domain contains the specific internet domain.
      *
      * @param domain an immutable well-formed internet domain name
+     *
      * @return <code>true</code> if belongs, <code>false</code> otherwise
      */
     public boolean contains(final InternetDomainName domain) {
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
index 0c47bb8..f18207b 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
@@ -54,15 +55,10 @@ public final class CrawlFrontier implements Serializable {
      */
     public CrawlFrontier(final CrawlerConfiguration config) {
         this.config = config;
-
         allowedCrawlDomains = config.getAllowedCrawlDomains();
-
         urlFingerprints = new HashSet<>();
-
-        // Construct a priority queue according to the crawl strategy specified in the configuration
         candidates = createPriorityQueue();
 
-        // Feed initial crawl requests (seeds)
         config.getCrawlSeeds()
                 .forEach((CrawlRequest request) -> {
                     feedRequest(request, true);
@@ -72,13 +68,11 @@ public CrawlFrontier(final CrawlerConfiguration config) {
     /**
      * Feeds a crawl request to the frontier.
      *
-     * @param request the crawl request
+     * @param request     the crawl request
      * @param isCrawlSeed indicates if the request is a crawl seed
      */
     public void feedRequest(final CrawlRequest request, final boolean isCrawlSeed) {
         if (config.isOffsiteRequestFilteringEnabled()) {
-            // Check if the request's domain is in the allowed crawl domains
-
             boolean inCrawlDomain = false;
 
             for (CrawlDomain allowedCrawlDomain : allowedCrawlDomains) {
@@ -94,8 +88,6 @@ public void feedRequest(final CrawlRequest request, final boolean isCrawlSeed) {
         }
 
         if (config.isDuplicateRequestFilteringEnabled()) {
-            // Check if the URL has already been crawled
-
             String urlFingerprint = createFingerprintForUrl(request.getRequestUrl());
 
             if (urlFingerprints.contains(urlFingerprint)) {
@@ -111,26 +103,24 @@ public void feedRequest(final CrawlRequest request, final boolean isCrawlSeed) {
             int crawlDepthLimit = config.getMaximumCrawlDepth();
             int nextCrawlDepth = currentCandidate.getCrawlDepth() + 1;
 
-            // If a crawl depth limit is set, check if the candidate's crawl depth is less than or equal to the limit
             if (crawlDepthLimit != 0 && nextCrawlDepth > crawlDepthLimit) {
                 return;
             }
 
-            builder = new CrawlCandidateBuilder(request).setRefererUrl(currentCandidate.getRequestUrl())
+            builder = new CrawlCandidateBuilder(request)
+                    .setRefererUrl(currentCandidate.getRequestUrl())
                     .setCrawlDepth(nextCrawlDepth);
         } else {
             builder = new CrawlCandidateBuilder(request);
         }
 
-        // Finally, add constructed candidate to the queue
         candidates.add(builder.build());
     }
 
     /**
      * Indicates if there are any candidates left in the queue.
      *
-     * @return <code>true</code> if there are candidates in the queue,
-     * <code>false</code> otherwise
+     * @return <code>true</code> if there are candidates in the queue, <code>false</code> otherwise
      */
     public boolean hasNextCandidate() {
         return !candidates.isEmpty();
@@ -147,23 +137,21 @@ public CrawlCandidate getNextCandidate() {
     }
 
     /**
-     * Creates the fingerprint of the given URL.
+     * Creates the fingerprint of the given URL. If the URL contains query parameters, it sorts
+     * them. This way URLs with different order of query parameters get the same fingerprint.
      *
      * @param url the URL for which the fingerprint is created
+     *
      * @return the fingerprint of the URL
      */
     private static String createFingerprintForUrl(final URI url) {
-        // We start off with the host only
         StringBuilder truncatedUrl = new StringBuilder(url.getHost());
 
-        // If there is a path in the URL, we append it after the host
         String path = url.getPath();
         if (path != null && !"/".equals(path)) {
             truncatedUrl.append(path);
         }
 
-        // If there are any query params, we sort and append them to what we got so far
-        // This is required in order to detect already crawled URLs with different order of query params
         String query = url.getQuery();
         if (query != null) {
             truncatedUrl.append("?");
@@ -180,20 +168,29 @@ private static String createFingerprintForUrl(final URI url) {
     }
 
     /**
-     * Creates a priority queue using the strategy specified in the
-     * configuration.
+     * Creates a priority queue using the strategy specified in the configuration.
      *
-     * @return the priority queue using the strategy specified in the
-     * configuration
+     * @return the priority queue using the strategy specified in the configuration
      */
+    @SuppressWarnings("checkstyle:MissingSwitchDefault")
     private PriorityQueue<CrawlCandidate> createPriorityQueue() {
+        Function crawlDepthGetter
+                = (Function<CrawlCandidate, Integer> & Serializable) CrawlCandidate::getCrawlDepth;
+        Function priorityGetter
+                = (Function<CrawlCandidate, Integer> & Serializable) CrawlCandidate::getPriority;
+
         switch (config.getCrawlStrategy()) {
             case BREADTH_FIRST:
-                return new PriorityQueue<>(Comparator.comparing((Function<CrawlCandidate, Integer> & Serializable) CrawlCandidate::getCrawlDepth)
-                        .thenComparing((Function<CrawlCandidate, Integer> & Serializable) CrawlCandidate::getPriority, Comparator.reverseOrder()));
+                Comparator breadthFirstComparator = Comparator.comparing(crawlDepthGetter)
+                        .thenComparing(priorityGetter, Comparator.reverseOrder());
+
+                return new PriorityQueue<>(breadthFirstComparator);
             case DEPTH_FIRST:
-                return new PriorityQueue<>(Comparator.comparing((Function<CrawlCandidate, Integer> & Serializable) CrawlCandidate::getCrawlDepth, Comparator.reverseOrder())
-                        .thenComparing((Function<CrawlCandidate, Integer> & Serializable) CrawlCandidate::getPriority, Comparator.reverseOrder()));
+                Comparator depthFirstComparator
+                        = Comparator.comparing(crawlDepthGetter, Comparator.reverseOrder())
+                                .thenComparing(priorityGetter, Comparator.reverseOrder());
+
+                return new PriorityQueue<>(depthFirstComparator);
         }
 
         throw new IllegalArgumentException("Unsupported crawl strategy.");
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/EventObject.java b/src/main/java/com/github/peterbencze/serritor/internal/EventObject.java
index 89d6c33..05e5898 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/EventObject.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/EventObject.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanism.java
index 6d3926d..13e3484 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanism.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,20 +13,25 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import org.openqa.selenium.JavascriptExecutor;
 
 /**
- * A crawl delay mechanism, in which case the delay corresponds to the page
- * loading time, if it is between the specified range, otherwise the minimum or
- * maximum duration is used.
+ * A crawl delay mechanism, in which case the delay corresponds to the page loading time, if it is
+ * between the specified range, otherwise the minimum or maximum duration is used.
  *
  * @author Peter Bencze
  */
 public final class AdaptiveCrawlDelayMechanism implements CrawlDelayMechanism {
 
+    private static final String BROWSER_COMPATIBILITY_JS = "return ('performance' in window) && "
+            + "('timing' in window.performance)";
+    private static final String DELAY_CALCULATION_JS = "return performance.timing.loadEventEnd - "
+            + "performance.timing.navigationStart;";
+
     private final long minDelayInMillis;
     private final long maxDelayInMillis;
     private final JavascriptExecutor jsExecutor;
@@ -34,12 +39,13 @@ public final class AdaptiveCrawlDelayMechanism implements CrawlDelayMechanism {
     /**
      * Creates an {@link AdaptiveCrawlDelayMechanism} instance.
      *
-     * @param config the crawler configuration which specifies the minimum and
-     * maximum delay
-     * @param jsExecutor the {@link org.openqa.selenium.WebDriver} instance
-     * which is capable of executing JavaScript
+     * @param config     the crawler configuration which specifies the minimum and maximum delay
+     * @param jsExecutor the {@link org.openqa.selenium.WebDriver} instance which is capable of
+     *                   executing JavaScript
      */
-    public AdaptiveCrawlDelayMechanism(final CrawlerConfiguration config, final JavascriptExecutor jsExecutor) {
+    public AdaptiveCrawlDelayMechanism(
+            final CrawlerConfiguration config,
+            final JavascriptExecutor jsExecutor) {
         minDelayInMillis = config.getMinimumCrawlDelayDurationInMillis();
         maxDelayInMillis = config.getMaximumCrawlDelayDurationInMillis();
         this.jsExecutor = jsExecutor;
@@ -48,24 +54,22 @@ public AdaptiveCrawlDelayMechanism(final CrawlerConfiguration config, final Java
     /**
      * Checks if the browser supports the Navigation Timing API.
      *
-     * @return <code>true</code> if the browser is compatible,
-     * <code>false</code> otherwise
+     * @return <code>true</code> if the browser is compatible, <code>false</code> otherwise
      */
     public boolean isBrowserCompatible() {
-        return (boolean) jsExecutor.executeScript("return ('performance' in window) && ('timing' in window.performance)");
+        return (boolean) jsExecutor.executeScript(BROWSER_COMPATIBILITY_JS);
     }
 
     /**
-     * Calculates the page loading time and returns the delay accordingly,
-     * between the specified min-max range. If the calculated delay is smaller
-     * than the minimum, it returns the minimum delay. If the calculated delay
-     * is higher than the maximum, it returns the maximum delay.
+     * Calculates the page loading time and returns the delay accordingly, between the specified
+     * min-max range. If the calculated delay is smaller than the minimum, it returns the minimum
+     * delay. If the calculated delay is higher than the maximum, it returns the maximum delay.
      *
      * @return the delay in milliseconds
      */
     @Override
     public long getDelay() {
-        long delayInMillis = (long) jsExecutor.executeScript("return performance.timing.loadEventEnd - performance.timing.navigationStart;");
+        long delayInMillis = (long) jsExecutor.executeScript(DELAY_CALCULATION_JS);
 
         if (delayInMillis < minDelayInMillis) {
             return minDelayInMillis;
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/CrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/CrawlDelayMechanism.java
index d788ece..4f1d34d 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/CrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/CrawlDelayMechanism.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 /**
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanism.java
index f287e8a..9713f8b 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanism.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,13 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 
 /**
- * A crawl delay mechanism, in which case the delay is constant and equals to
- * the duration specified in the configuration.
+ * A crawl delay mechanism, in which case the delay is constant and equals to the duration specified
+ * in the configuration.
  *
  * @author Peter Bencze
  */
@@ -30,8 +31,7 @@ public final class FixedCrawlDelayMechanism implements CrawlDelayMechanism {
     /**
      * Creates a {@link FixedCrawlDelayMechanism} instance.
      *
-     * @param config the crawler configuration which specifies the fixed delay
-     * duration
+     * @param config the crawler configuration which specifies the fixed delay duration
      */
     public FixedCrawlDelayMechanism(final CrawlerConfiguration config) {
         this.delayInMillis = config.getFixedCrawlDelayDurationInMillis();
diff --git a/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/RandomCrawlDelayMechanism.java b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/RandomCrawlDelayMechanism.java
index cd2b035..a457da3 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/RandomCrawlDelayMechanism.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/RandomCrawlDelayMechanism.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,14 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
 import java.util.concurrent.ThreadLocalRandom;
 
 /**
- * A crawl delay mechanism in which case the duration is randomized between the
- * specified minimum and maximum range.
+ * A crawl delay mechanism in which case the duration is randomized between the specified minimum
+ * and maximum range.
  *
  * @author Peter Bencze
  */
@@ -32,8 +33,7 @@ public final class RandomCrawlDelayMechanism implements CrawlDelayMechanism {
     /**
      * Creates a {@link RandomCrawlDelayMechanism} instance.
      *
-     * @param config the crawler configuration which specifies the minimum and
-     * maximum delay.
+     * @param config the crawler configuration which specifies the minimum and maximum delay.
      */
     public RandomCrawlDelayMechanism(final CrawlerConfiguration config) {
         lowerLimit = config.getMinimumCrawlDelayDurationInMillis();
@@ -41,8 +41,7 @@ public RandomCrawlDelayMechanism(final CrawlerConfiguration config) {
     }
 
     /**
-     * Returns a random delay between the minimum and maximum range specified in
-     * the configuration.
+     * Returns a random delay between the minimum and maximum range specified in the configuration.
      *
      * @return the delay in milliseconds
      */
diff --git a/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java b/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
index 1758b17..d33da36 100644
--- a/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/api/helper/UrlFinderTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.api.helper;
 
 import com.github.peterbencze.serritor.api.event.PageLoadEvent;
@@ -69,7 +70,9 @@ public void initialize() {
         Mockito.when(mockedElementWithInvalidDomain.getAttribute(Mockito.eq(ATTRIBUTE)))
                 .thenReturn(URL_WITH_INVALID_DOMAIN);
 
-        List<WebElement> elementList = Arrays.asList(mockedElementWithValidUrl, mockedElementWithInvalidUrlFormat, mockedElementWithInvalidDomain);
+        List<WebElement> elementList
+                = Arrays.asList(mockedElementWithValidUrl, mockedElementWithInvalidUrlFormat,
+                        mockedElementWithInvalidDomain);
         Mockito.when(mockedDriver.findElements(By.tagName(TAG_NAME)))
                 .thenReturn(elementList);
 
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/CrawlDomainTest.java b/src/test/java/com/github/peterbencze/serritor/internal/CrawlDomainTest.java
index 81d390d..4bdb829 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/CrawlDomainTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/CrawlDomainTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal;
 
 import com.google.common.net.InternetDomainName;
@@ -37,13 +38,8 @@ public final class CrawlDomainTest {
 
     @Test
     public void testEquals() {
-        // A crawl domain should be equal with itself
         Assert.assertEquals(CRAWL_DOMAIN_0, CRAWL_DOMAIN_0);
-
-        // Crawl domains with the same domain should be equal
         Assert.assertEquals(CRAWL_DOMAIN_0, CRAWL_DOMAIN_1);
-
-        // Crawl domains with different domains should not be equal
         Assert.assertNotEquals(CRAWL_DOMAIN_0, CRAWL_DOMAIN_2);
     }
 
@@ -54,13 +50,8 @@ public void testHashCode() {
 
     @Test
     public void testContains() {
-        // A crawl domain should contain its own domain
         Assert.assertTrue(CRAWL_DOMAIN_0.contains(DOMAIN));
-
-        // A crawl domain should contain its own domain's subdomain
         Assert.assertTrue(CRAWL_DOMAIN_0.contains(SUBDOMAIN));
-
-        // A crawl domain should not contain a domain different from its own domain
         Assert.assertFalse(CRAWL_DOMAIN_2.contains(DOMAIN));
     }
 }
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java b/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
index a477d51..6ddf172 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/CrawlFrontierTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2017 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal;
 
 import com.github.peterbencze.serritor.api.CrawlCandidate;
@@ -40,11 +41,14 @@ public final class CrawlFrontierTest {
     // Allowed crawl domains
     private static final String ALLOWED_CRAWL_DOMAIN_0 = "root-url-0.com";
     private static final String ALLOWED_CRAWL_DOMAIN_1 = "root-url-1.com";
-    private static final List<String> ALLOWED_CRAWL_DOMAINS = Arrays.asList(ALLOWED_CRAWL_DOMAIN_0, ALLOWED_CRAWL_DOMAIN_1);
+    private static final List<String> ALLOWED_CRAWL_DOMAINS
+            = Arrays.asList(ALLOWED_CRAWL_DOMAIN_0, ALLOWED_CRAWL_DOMAIN_1);
 
     // Root URLs
-    private static final URI ROOT_URL_0 = URI.create("http://root-url-0.com?param1=foo&param2=bar#fragment");
-    private static final URI DUPLICATE_ROOT_URL_0 = URI.create("https://root-url-0.com?param2=bar&param1=foo");
+    private static final URI ROOT_URL_0
+            = URI.create("http://root-url-0.com?param1=foo&param2=bar#fragment");
+    private static final URI DUPLICATE_ROOT_URL_0
+            = URI.create("https://root-url-0.com?param2=bar&param1=foo");
     private static final URI ROOT_URL_1 = URI.create("http://root-url-1.com");
 
     // Root URL crawl depth
@@ -55,18 +59,25 @@ public final class CrawlFrontierTest {
     private static final int ROOT_URL_1_PRIORITY = 1;
 
     // Root URL crawl requests
-    private static final CrawlRequest ROOT_URL_0_CRAWL_REQUEST = new CrawlRequestBuilder(ROOT_URL_0).setPriority(ROOT_URL_0_PRIORITY).build();
-    private static final CrawlRequest DUPLICATE_ROOT_URL_0_CRAWL_REQUEST = new CrawlRequestBuilder(DUPLICATE_ROOT_URL_0).build();
-    private static final CrawlRequest ROOT_URL_1_CRAWL_REQUEST = new CrawlRequestBuilder(ROOT_URL_1).setPriority(ROOT_URL_1_PRIORITY).build();
-    private static final List<CrawlRequest> CRAWL_SEEDS = Arrays.asList(ROOT_URL_0_CRAWL_REQUEST, ROOT_URL_1_CRAWL_REQUEST);
+    private static final CrawlRequest ROOT_URL_0_CRAWL_REQUEST
+            = new CrawlRequestBuilder(ROOT_URL_0).setPriority(ROOT_URL_0_PRIORITY).build();
+    private static final CrawlRequest DUPLICATE_ROOT_URL_0_CRAWL_REQUEST
+            = new CrawlRequestBuilder(DUPLICATE_ROOT_URL_0).build();
+    private static final CrawlRequest ROOT_URL_1_CRAWL_REQUEST
+            = new CrawlRequestBuilder(ROOT_URL_1).setPriority(ROOT_URL_1_PRIORITY).build();
+    private static final List<CrawlRequest> CRAWL_SEEDS
+            = Arrays.asList(ROOT_URL_0_CRAWL_REQUEST, ROOT_URL_1_CRAWL_REQUEST);
 
     // Child URL path
     private static final String CHILD_URL_PATH = "/child";
 
     // Child URLs
-    private static final URI CHILD_URL_0 = URI.create(String.format("http://root-url-0.com%s-0", CHILD_URL_PATH));
-    private static final URI CHILD_URL_1 = URI.create(String.format("http://root-url-0.com%s-1", CHILD_URL_PATH));
-    private static final URI CHILD_URL_2 = URI.create(String.format("http://root-url-1.com%s-0", CHILD_URL_PATH));
+    private static final URI CHILD_URL_0
+            = URI.create(String.format("http://root-url-0.com%s-0", CHILD_URL_PATH));
+    private static final URI CHILD_URL_1
+            = URI.create(String.format("http://root-url-0.com%s-1", CHILD_URL_PATH));
+    private static final URI CHILD_URL_2
+            = URI.create(String.format("http://root-url-1.com%s-0", CHILD_URL_PATH));
 
     // Child URL crawl depth
     private static final int CHILD_URL_CRAWL_DEPTH = 1;
@@ -76,10 +87,13 @@ public final class CrawlFrontierTest {
     private static final int CHILD_URL_1_PRIORITY = CHILD_URL_0_PRIORITY;
     private static final int CHILD_URL_2_PRIORITY = 1;
 
-    // Child URL crawl requests  
-    private static final CrawlRequest CHILD_URL_0_CRAWL_REQUEST = new CrawlRequestBuilder(CHILD_URL_0).setPriority(CHILD_URL_0_PRIORITY).build();
-    private static final CrawlRequest CHILD_URL_1_CRAWL_REQUEST = new CrawlRequestBuilder(CHILD_URL_1).setPriority(CHILD_URL_1_PRIORITY).build();
-    private static final CrawlRequest CHILD_URL_2_CRAWL_REQUEST = new CrawlRequestBuilder(CHILD_URL_2).setPriority(CHILD_URL_2_PRIORITY).build();
+    // Child URL crawl requests
+    private static final CrawlRequest CHILD_URL_0_CRAWL_REQUEST
+            = new CrawlRequestBuilder(CHILD_URL_0).setPriority(CHILD_URL_0_PRIORITY).build();
+    private static final CrawlRequest CHILD_URL_1_CRAWL_REQUEST
+            = new CrawlRequestBuilder(CHILD_URL_1).setPriority(CHILD_URL_1_PRIORITY).build();
+    private static final CrawlRequest CHILD_URL_2_CRAWL_REQUEST
+            = new CrawlRequestBuilder(CHILD_URL_2).setPriority(CHILD_URL_2_PRIORITY).build();
 
     // Offsite URL
     private static final URI OFFSITE_URL = URI.create("http://offsite-url.com");
@@ -88,7 +102,8 @@ public final class CrawlFrontierTest {
     private static final int OFFSITE_URL_PRIORITY = 0;
 
     // Offsite URL crawl request
-    private static final CrawlRequest OFFSITE_URL_CRAWL_REQUEST = new CrawlRequestBuilder(OFFSITE_URL).setPriority(OFFSITE_URL_PRIORITY).build();
+    private static final CrawlRequest OFFSITE_URL_CRAWL_REQUEST
+            = new CrawlRequestBuilder(OFFSITE_URL).setPriority(OFFSITE_URL_PRIORITY).build();
 
     // Max crawl depth
     private static final int MAX_CRAWL_DEPTH = 1;
@@ -108,38 +123,27 @@ public void initialize() {
 
     @Test
     public void testHasNextCandidateWithCandidatesInQueue() {
-        // Check if there are any candidates in the queue, the method should return true
         Assert.assertTrue(frontier.hasNextCandidate());
 
-        // Get the next candidate from the queue
         frontier.getNextCandidate();
 
-        // Check if there are any candidates in the queue, the method should return true again
         Assert.assertTrue(frontier.hasNextCandidate());
 
-        // Get the next candidate from the queue
         frontier.getNextCandidate();
 
-        // Check if there are any candidates in the queue, the method should return false at this point
         Assert.assertFalse(frontier.hasNextCandidate());
 
-        // Feed child crawl requests
         frontier.feedRequest(CHILD_URL_0_CRAWL_REQUEST, false);
         frontier.feedRequest(CHILD_URL_1_CRAWL_REQUEST, false);
 
-        // Check if there are any candidates in the queue, the method should return true
         Assert.assertTrue(frontier.hasNextCandidate());
 
-        // Get the next candidate from the queue
         frontier.getNextCandidate();
 
-        // Check if there are any candidates in the queue, the method should return true once again
         Assert.assertTrue(frontier.hasNextCandidate());
 
-        // Get the next candidate from the queue
         frontier.getNextCandidate();
 
-        // Finally, check if there are any candidates in the queue, the method should return false at this point
         Assert.assertFalse(frontier.hasNextCandidate());
     }
 
@@ -148,10 +152,9 @@ public void testHasNextCandidateWithEmptyQueue() {
         Mockito.when(config.getCrawlSeeds())
                 .thenReturn(Collections.EMPTY_SET);
 
-        // Create frontier without any crawl seeds
+        // Create crawl frontier without crawl seeds
         frontier = new CrawlFrontier(config);
 
-        // Check if there are any candidates in the queue, the method should return false
         Assert.assertFalse(frontier.hasNextCandidate());
     }
 
@@ -159,10 +162,8 @@ public void testHasNextCandidateWithEmptyQueue() {
     public void testEnabledDuplicateRequestFiltering() {
         clearCrawlCandidateQueue();
 
-        // Feed a duplicate crawl request
         frontier.feedRequest(DUPLICATE_ROOT_URL_0_CRAWL_REQUEST, false);
 
-        // Check if the candidate was added to the queue, the method should return false
         Assert.assertFalse(frontier.hasNextCandidate());
     }
 
@@ -172,16 +173,11 @@ public void testDisabledDuplicateRequestFiltering() {
         Mockito.when(config.isDuplicateRequestFilteringEnabled())
                 .thenReturn(false);
 
-        // Clear the crawl candidate queue of the frontier
         clearCrawlCandidateQueue();
 
-        // Feed a duplicate crawl request
         frontier.feedRequest(DUPLICATE_ROOT_URL_0_CRAWL_REQUEST, true);
 
-        // Check if the candidates was added to the queue, the method should return true
         Assert.assertTrue(frontier.hasNextCandidate());
-
-        // Check if the URLs match
         Assert.assertEquals(DUPLICATE_ROOT_URL_0, frontier.getNextCandidate().getRequestUrl());
     }
 
@@ -189,10 +185,8 @@ public void testDisabledDuplicateRequestFiltering() {
     public void testEnabledOffsiteRequestFiltering() {
         clearCrawlCandidateQueue();
 
-        // Feed an offsite request
         frontier.feedRequest(OFFSITE_URL_CRAWL_REQUEST, false);
 
-        // Check if the candidate was added to the queue, the method should return false
         Assert.assertFalse(frontier.hasNextCandidate());
     }
 
@@ -202,90 +196,53 @@ public void testDisabledOffsiteRequestFiltering() {
         Mockito.when(config.isOffsiteRequestFilteringEnabled())
                 .thenReturn(false);
 
-        // Clear the crawl candidate queue of the frontier
         clearCrawlCandidateQueue();
 
-        // Feed an offsite request
         frontier.feedRequest(OFFSITE_URL_CRAWL_REQUEST, false);
 
-        // Check if the candidates was added to the queue, the method should return true
         Assert.assertTrue(frontier.hasNextCandidate());
-
-        // Check if the URLs match
-        Assert.assertEquals(OFFSITE_URL.toString(), frontier.getNextCandidate().getRequestUrl().toString());
+        Assert.assertEquals(OFFSITE_URL.toString(),
+                frontier.getNextCandidate().getRequestUrl().toString());
     }
 
     @Test
     public void testGetNextCandidateUsingBreadthFirstCrawlStrategy() {
-        // Get the crawl candidate of root URL 1.
         CrawlCandidate nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be root URL 1.
         Assert.assertEquals(ROOT_URL_1, nextCandidate.getRequestUrl());
-
-        // Check the crawl depth of this candidate, it should be 0 because it is a root URL.
         Assert.assertEquals(ROOT_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Check the priority of this candidate, it should be 1.
         Assert.assertEquals(ROOT_URL_1_PRIORITY, nextCandidate.getPriority());
 
-        // Feed a child request that come from root URL 1.
         frontier.feedRequest(CHILD_URL_2_CRAWL_REQUEST, false);
 
-        // Get the crawl candidate of root URL 0.
         nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be root URL 0.
         Assert.assertEquals(ROOT_URL_0, nextCandidate.getRequestUrl());
-
-        // Check the crawl depth of this candidate, it should be 0 again because it is also a root URL.
         Assert.assertEquals(ROOT_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Check the priority of this candidate, it should be 0.
         Assert.assertEquals(ROOT_URL_0_PRIORITY, nextCandidate.getPriority());
 
-        // Feed 2 child requests that come from root URL 0.
         frontier.feedRequest(CHILD_URL_0_CRAWL_REQUEST, false);
         frontier.feedRequest(CHILD_URL_1_CRAWL_REQUEST, false);
 
-        // Get the crawl candidate of child URL 2.
         nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be child URL 2.
         Assert.assertEquals(CHILD_URL_2.toString(), nextCandidate.getRequestUrl().toString());
-
-        // Check the crawl depth of this candidate, it should be 1 because it is a child URL that comes from root URL 1.
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Check the priority of this candidate, it should be 1.
         Assert.assertEquals(CHILD_URL_2_PRIORITY, nextCandidate.getPriority());
 
-        // Get the crawl candidate of a child URL.
-        // Note: a priority queue does not ensure FIFO order when elements have the same depth and priority
+        // a priority queue doesn't ensure FIFO order when elements have the same depth and priority
         nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this request, it should be a child URL.
         Assert.assertTrue(nextCandidate.getRequestUrl().toString().contains(CHILD_URL_PATH));
-
-        // Check the crawl depth of this candidate, it should be 1 again because it is a child URL that comes from root URL 0.
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
 
-        // Get the priority of this candidate
         int previousChildCandidatePriority = nextCandidate.getPriority();
 
-        // Get the crawl candidate of the next child URL.
         nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be a child URL.
         Assert.assertTrue(nextCandidate.getRequestUrl().toString().contains(CHILD_URL_PATH));
-
-        // Check the crawl depth of this candidate, it should be 1 again becaise it is another child URL that also comes from root URL 0.
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Compare the priority of this candidate to the previous candidate's priority.
         Assert.assertEquals(previousChildCandidatePriority, nextCandidate.getPriority());
-
-        // There should be no more candidates left at this point.
         Assert.assertFalse(frontier.hasNextCandidate());
     }
 
@@ -297,75 +254,41 @@ public void testGetNextCandidateUsingDepthFirstCrawlStrategy() {
         // Create frontier with depth-first crawl strategy
         frontier = new CrawlFrontier(config);
 
-        // Get the crawl candidate of root URL 1
         CrawlCandidate nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be root URL 1
         Assert.assertEquals(ROOT_URL_1, nextCandidate.getRequestUrl());
-
-        // Check the crawl depth of this candidate, it should be 0 because it is a root URL
         Assert.assertEquals(ROOT_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Check the priority of this candidate, it should be 1
         Assert.assertEquals(ROOT_URL_1_PRIORITY, nextCandidate.getPriority());
 
-        // Feed a child request that comes from root URL 1
         frontier.feedRequest(CHILD_URL_2_CRAWL_REQUEST, false);
 
-        // Get the crawl candidate of a child URL
-        // Note: a priority queue does not ensure FIFO order when elements have the same depth and priority
+        // a priority queue doesn't ensure FIFO order when elements have the same depth and priority
         nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be a child URL
         Assert.assertTrue(nextCandidate.getRequestUrl().toString().contains(CHILD_URL_PATH));
-
-        // Check the crawl depth of this candidate, it should be 1 because it is a child URL that comes from root URL 1
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Check the priority of this candidate, it should be 1
         Assert.assertEquals(CHILD_URL_2_PRIORITY, nextCandidate.getPriority());
 
-        // Get the crawl candidate of root URL 0.
         nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be root URL 0
         Assert.assertEquals(ROOT_URL_0, nextCandidate.getRequestUrl());
-
-        // Check the crawl depth of this candidate, it should be 0 again because it is also a root URL
         Assert.assertEquals(ROOT_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Check the priority of this candidate, it should be 0
         Assert.assertEquals(ROOT_URL_0_PRIORITY, nextCandidate.getPriority());
 
-        // Feed 2 child requests that come from root URL 0
         frontier.feedRequest(CHILD_URL_0_CRAWL_REQUEST, false);
         frontier.feedRequest(CHILD_URL_1_CRAWL_REQUEST, false);
 
-        // Get the crawl candidate of child URL 0
         nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be child URL 0
         Assert.assertEquals(CHILD_URL_0.toString(), nextCandidate.getRequestUrl().toString());
-
-        // Check the crawl depth of this candidate, it should be 1 again because it is a child URL that comes from root URL 0
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Check the priority of this candidate, it should be 0
         Assert.assertEquals(CHILD_URL_0_PRIORITY, nextCandidate.getPriority());
 
-        // Get the crawl candidate of child URL 1
         nextCandidate = frontier.getNextCandidate();
 
-        // Check the URL of this candidate, it should be child URL 1
         Assert.assertEquals(CHILD_URL_1.toString(), nextCandidate.getRequestUrl().toString());
-
-        // Check the crawl depth of this candidate, it should be 1 again becaise it is a child URL that also comes from root URL 0
         Assert.assertEquals(CHILD_URL_CRAWL_DEPTH, nextCandidate.getCrawlDepth());
-
-        // Check the priority of this candidate, it should be 0
         Assert.assertEquals(CHILD_URL_1_PRIORITY, nextCandidate.getPriority());
-
-        // There should be no more candidates left at this point
         Assert.assertFalse(frontier.hasNextCandidate());
     }
 
@@ -374,27 +297,20 @@ public void testCrawlDepthLimitation() {
         Mockito.when(config.getMaximumCrawlDepth())
                 .thenReturn(MAX_CRAWL_DEPTH);
 
-        // Clear the crawl candidate queue of the frontier
         clearCrawlCandidateQueue();
 
-        // Feed a child request, its crawl depth will be 1
         frontier.feedRequest(CHILD_URL_0_CRAWL_REQUEST, false);
 
-        // Get the crawl candidate of the previously added child URL
         CrawlCandidate nextCandidate = frontier.getNextCandidate();
 
-        // Check its crawl depth, it should be less than or equal to the limit
         Assert.assertTrue(nextCandidate.getCrawlDepth() <= MAX_CRAWL_DEPTH);
 
-        // Feed another child request, its crawl depth will be 2 which is above the limit
         frontier.feedRequest(CHILD_URL_1_CRAWL_REQUEST, false);
 
-        // There should be no more candidates at this point
         Assert.assertFalse(frontier.hasNextCandidate());
     }
 
     private void clearCrawlCandidateQueue() {
-        // Loop until there are no remaining candidates in the queue
         while (frontier.hasNextCandidate()) {
             frontier.getNextCandidate();
         }
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanismTest.java b/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanismTest.java
index 98340d3..166df00 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanismTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/AdaptiveCrawlDelayMechanismTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
@@ -55,31 +56,27 @@ public void initialize() {
 
     @Test
     public void testDelayLowerThanMinimum() {
-        // Return a delay which is lower than the predefined minimum
         Mockito.when(mockedJsExecutor.executeScript(Mockito.anyString()))
                 .thenReturn(LOWER_DELAY_DURATION_IN_MILLIS);
 
-        // The minimum delay should be returned
-        Assert.assertEquals(mockedConfig.getMinimumCrawlDelayDurationInMillis(), crawlDelayMechanism.getDelay());
+        Assert.assertEquals(mockedConfig.getMinimumCrawlDelayDurationInMillis(),
+                crawlDelayMechanism.getDelay());
     }
 
     @Test
     public void testDelayHigherThanMaximum() {
-        // Return a delay which is higher than the predefined maximum
         Mockito.when(mockedJsExecutor.executeScript(Mockito.anyString()))
                 .thenReturn(HIGHER_DELAY_DURATION_IN_MILLIS);
 
-        // The maximum delay should be returned
-        Assert.assertEquals(mockedConfig.getMaximumCrawlDelayDurationInMillis(), crawlDelayMechanism.getDelay());
+        Assert.assertEquals(mockedConfig.getMaximumCrawlDelayDurationInMillis(),
+                crawlDelayMechanism.getDelay());
     }
 
     @Test
     public void testDelayBetweenRange() {
-        // Return an in range delay
         Mockito.when(mockedJsExecutor.executeScript(Mockito.anyString()))
                 .thenReturn(IN_RANGE_DELAY_DURATION_IN_MILLIS);
 
-        // The in range delay should be returned
         Assert.assertEquals(IN_RANGE_DELAY_DURATION_IN_MILLIS, crawlDelayMechanism.getDelay());
     }
 }
diff --git a/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanismTest.java b/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanismTest.java
index d0a96ce..535f5f4 100644
--- a/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanismTest.java
+++ b/src/test/java/com/github/peterbencze/serritor/internal/crawldelaymechanism/FixedCrawlDelayMechanismTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2018 Peter Bencze.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package com.github.peterbencze.serritor.internal.crawldelaymechanism;
 
 import com.github.peterbencze.serritor.api.CrawlerConfiguration;
@@ -40,7 +41,7 @@ public void initialize() {
 
     @Test
     public void testGetDelay() {
-        // The delay should be the same as in the configuration
-        Assert.assertEquals(config.getFixedCrawlDelayDurationInMillis(), crawlDelayMechanism.getDelay());
+        Assert.assertEquals(config.getFixedCrawlDelayDurationInMillis(),
+                crawlDelayMechanism.getDelay());
     }
 }

From 24e76a09a2c728699b367309d9852852f7d11b31 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Tue, 12 Jun 2018 23:44:15 +0200
Subject: [PATCH 18/28] Publish Javadoc to GitHub Pages

---
 pom.xml | 55 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 46 insertions(+), 9 deletions(-)

diff --git a/pom.xml b/pom.xml
index d57489c..8b9e42d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,18 +5,18 @@
     <artifactId>serritor</artifactId>
     <version>1.4.0</version>
     <packaging>jar</packaging>
-    
+
     <name>Serritor</name>
     <description>An open source web crawler framework built upon Selenium and written in Java</description>
     <url>https://github.com/peterbencze/serritor</url>
-    
+
     <licenses>
         <license>
             <name>Apache License, Version 2.0</name>
             <url>https://www.apache.org/licenses/LICENSE-2.0</url>
         </license>
     </licenses>
-    
+
     <developers>
         <developer>
             <name>Peter Bencze</name>
@@ -26,13 +26,13 @@
             </roles>
         </developer>
     </developers>
-    
+
     <scm>
         <connection>scm:git:git://github.com/peterbencze/serritor.git</connection>
         <developerConnection>scm:git:https://github.com/peterbencze/serritor.git</developerConnection>
         <url>https://github.com/peterbencze/serritor/tree/master</url>
     </scm>
-    
+
     <distributionManagement>
         <snapshotRepository>
             <id>ossrh</id>
@@ -43,13 +43,13 @@
             <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
         </repository>
     </distributionManagement>
-    
+
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
         <maven.compiler.source>1.8</maven.compiler.source>
         <maven.compiler.target>1.8</maven.compiler.target>
     </properties>
-    
+
     <dependencies>
         <dependency>
             <groupId>org.seleniumhq.selenium</groupId>
@@ -79,7 +79,7 @@
             <scope>test</scope>
         </dependency>
     </dependencies>
-    
+
     <build>
         <plugins>
             <plugin>
@@ -106,6 +106,12 @@
                             <goal>jar</goal>
                         </goals>
                     </execution>
+                    <execution>
+                        <goals>
+                            <goal>javadoc</goal>
+                        </goals>
+                        <phase>site</phase>
+                    </execution>
                 </executions>
             </plugin>
             <plugin>
@@ -156,6 +162,37 @@
                     <autoReleaseAfterClose>true</autoReleaseAfterClose>
                 </configuration>
             </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-site-plugin</artifactId>
+                <version>3.7.1</version>
+                <configuration>
+                    <skip>true</skip>
+                    <skipDeploy>true</skipDeploy>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-scm-publish-plugin</artifactId>
+                <version>3.0.0</version>
+                <configuration>
+                    <serverId>github</serverId>
+                    <pubScmUrl>${project.scm.developerConnection}</pubScmUrl>
+                    <scmBranch>gh-pages</scmBranch>
+                    <checkinComment>Update Javadoc via Maven</checkinComment>
+                    <content>${project.reporting.outputDirectory}/apidocs</content>
+                    <siteOutputEncoding>UTF-8</siteOutputEncoding>
+                    <tryUpdate>true</tryUpdate>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>publish-scm</goal>
+                        </goals>
+                        <phase>site</phase>
+                    </execution>
+                </executions>
+            </plugin>
         </plugins>
     </build>
-</project>
\ No newline at end of file
+</project>

From ce1ad97987fecefe004773142a4d7990270c3d51 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Wed, 13 Jun 2018 01:04:16 +0200
Subject: [PATCH 19/28] Fix fingerprint creation for URL with single slash path

---
 .../com/github/peterbencze/serritor/internal/CrawlFrontier.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
index f18207b..c2c337c 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
@@ -148,7 +148,7 @@ private static String createFingerprintForUrl(final URI url) {
         StringBuilder truncatedUrl = new StringBuilder(url.getHost());
 
         String path = url.getPath();
-        if (path != null && !"/".equals(path)) {
+        if (path != null) {
             truncatedUrl.append(path);
         }
 

From 0ef0eab7b09c2507639eb4f8a902517f9e7f5dd5 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Wed, 13 Jun 2018 23:31:03 +0200
Subject: [PATCH 20/28] Add logging

---
 .../peterbencze/serritor/api/BaseCrawler.java     | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index e200a84..1ac57e4 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -33,6 +33,8 @@
 import java.net.URI;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 import org.apache.commons.lang3.SerializationUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.Validate;
@@ -58,6 +60,8 @@
  */
 public abstract class BaseCrawler {
 
+    private static final Logger LOGGER = Logger.getLogger(BaseCrawler.class.getName());
+
     private final CrawlerConfiguration config;
 
     private boolean isStopped;
@@ -382,6 +386,7 @@ private void performDelay() {
      * Callback which gets called when the crawler is started.
      */
     protected void onStart() {
+        LOGGER.info("onStart");
     }
 
     /**
@@ -390,6 +395,7 @@ protected void onStart() {
      * @param event the <code>PageLoadEvent</code> instance
      */
     protected void onPageLoad(final PageLoadEvent event) {
+        LOGGER.log(Level.INFO, "onPageLoad: {0}", event.getCrawlCandidate().getRequestUrl());
     }
 
     /**
@@ -398,6 +404,7 @@ protected void onPageLoad(final PageLoadEvent event) {
      * @param event the <code>NonHtmlContentEvent</code> instance
      */
     protected void onNonHtmlContent(final NonHtmlContentEvent event) {
+        LOGGER.log(Level.INFO, "onNonHtmlContent: {0}", event.getCrawlCandidate().getRequestUrl());
     }
 
     /**
@@ -406,6 +413,7 @@ protected void onNonHtmlContent(final NonHtmlContentEvent event) {
      * @param event the <code>RequestErrorEvent</code> instance
      */
     protected void onRequestError(final RequestErrorEvent event) {
+        LOGGER.log(Level.INFO, "onRequestError: {0}", event.getCrawlCandidate().getRequestUrl());
     }
 
     /**
@@ -414,6 +422,11 @@ protected void onRequestError(final RequestErrorEvent event) {
      * @param event the <code>RequestRedirectEvent</code> instance
      */
     protected void onRequestRedirect(final RequestRedirectEvent event) {
+        LOGGER.log(Level.INFO, "onRequestRedirect: {0} -> {1}",
+                new Object[]{
+                    event.getCrawlCandidate().getRequestUrl(),
+                    event.getRedirectedCrawlRequest().getRequestUrl()
+                });
     }
 
     /**
@@ -423,11 +436,13 @@ protected void onRequestRedirect(final RequestRedirectEvent event) {
      * @param event the <code>PageLoadTimeoutEvent</code> instance
      */
     protected void onPageLoadTimeout(final PageLoadTimeoutEvent event) {
+        LOGGER.log(Level.INFO, "onPageLoadTimeout: {0}", event.getCrawlCandidate().getRequestUrl());
     }
 
     /**
      * Callback which gets called when the crawler is stopped.
      */
     protected void onStop() {
+        LOGGER.info("onStop");
     }
 }

From 1560599948116e528ad0570cc57f10423657273b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Bencze?= <benczepeter95@gmail.com>
Date: Wed, 13 Jun 2018 23:55:09 +0200
Subject: [PATCH 21/28] Update README

---
 README.md | 52 +++++++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 1bf7e74..0cc726d 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,37 @@
 Serritor
 ========
 
-Serritor is an open source web crawler framework built upon [Selenium](http://www.seleniumhq.org/) and written in Java. Crawling dynamic web pages is no longer a problem!
+Serritor is an open source web crawler framework built upon [Selenium](http://www.seleniumhq.org/) and written in Java. It can be used to crawl dynamic web pages that use JavaScript.
 
-## Installation
-### Using Maven
+## Using Serritor in your build
+### Maven
 
 Add the following dependency to your pom.xml:
 ```xml
 <dependency>
     <groupId>com.github.peterbencze</groupId>
     <artifactId>serritor</artifactId>
-    <version>1.3.1</version>
+    <version>1.4.0</version>
 </dependency>
 ```
 
-### Without Maven
+### Gradle
+
+Add the following dependency to your build.gradle:
+```groovy
+compile group: 'com.github.peterbencze', name: 'serritor', version: '1.4.0'
+```
+
+### Manual dependencies
 
 The standalone JAR files are available on the [releases](https://github.com/peterbencze/serritor/releases) page.
 
 ## Documentation
-See the [Wiki](https://github.com/peterbencze/serritor/wiki) page.
+* The [Wiki](https://github.com/peterbencze/serritor/wiki) contains usage information and examples
+* The Javadoc is available [here](https://peterbencze.github.io/serritor/)
 
 ## Quickstart
-_BaseCrawler_ provides a skeletal implementation of a crawler to minimize the effort to create your own. First, create a class that extends _BaseCrawler_. In this class, you can implement the behavior of your crawler. There are callbacks available for every stage of crawling. Below you can find an example:
+The _BaseCrawler_ abstract class provides a skeletal implementation of a crawler to minimize the effort to create your own. The extending class should define the logic of the crawler. Below you can find a simple example that is enough to get you started:
 ```java
 public class MyCrawler extends BaseCrawler {
 
@@ -37,31 +45,24 @@ public class MyCrawler extends BaseCrawler {
     }
 
     @Override
-    protected void onResponseComplete(final HtmlResponse response) {
+    protected void onPageLoad(final PageLoadEvent event) {
         // Crawl every URL that match the given pattern
-        urlFinder.findUrlsInResponse(response)
+        urlFinder.findUrlsInPage(event)
                 .stream()
                 .map(CrawlRequestBuilder::new)
                 .map(CrawlRequestBuilder::build)
                 .forEach(this::crawl);
-    }
-
-    @Override
-    protected void onNonHtmlResponse(final NonHtmlResponse response) {
-        System.out.println("Received a non-HTML response from: " + response.getCrawlRequest().getRequestUrl());
-    }
-
-    @Override
-    protected void onUnsuccessfulRequest(final UnsuccessfulRequest request) {
-        System.out.println("Could not get response from: " + request.getCrawlRequest().getRequestUrl());
+        
+        // ...
     }
 }
 ```
 By default, the crawler uses [HtmlUnit headless browser](http://htmlunit.sourceforge.net/):
 ```java
-public static void main(String[] args) {
+public static void main(final String[] args) {
     // Create the configuration
-    CrawlerConfiguration config = new CrawlerConfigurationBuilder().setOffsiteRequestFiltering(true)
+    CrawlerConfiguration config = new CrawlerConfigurationBuilder()
+            .setOffsiteRequestFiltering(true)
             .addAllowedCrawlDomain("example.com")
             .addCrawlSeed(new CrawlRequestBuilder("http://example.com").build())
             .build();
@@ -73,11 +74,12 @@ public static void main(String[] args) {
     crawler.start();
 }
 ```
-Of course, you can also use any other browsers by specifying a corresponding _WebDriver_ instance:
+Of course, you can also use any other browsers by specifying a corresponding `WebDriver` instance:
 ```java
-public static void main(String[] args) {
+public static void main(final String[] args) {
     // Create the configuration
-    CrawlerConfiguration config = new CrawlerConfigurationBuilder().setOffsiteRequestFiltering(true)
+    CrawlerConfiguration config = new CrawlerConfigurationBuilder()
+            .setOffsiteRequestFiltering(true)
             .addAllowedCrawlDomain("example.com")
             .addCrawlSeed(new CrawlRequestBuilder("http://example.com").build())
             .build();
@@ -90,7 +92,7 @@ public static void main(String[] args) {
 }
 ```
 
-That's it! In just a few lines you can make a crawler that crawls every link it finds, while filtering duplicate and offsite requests. You also get access to the _WebDriver_ instance, so you can use all the features that are provided by Selenium.
+That's it! In just a few lines you can create a crawler that crawls every link it finds, while filtering duplicate and offsite requests. You also get access to the `WebDriver` instance, so you can use all the features that are provided by Selenium.
 
 ## License
 The source code of Serritor is made available under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0).

From 23a18129ce7f579ac02010bd0788191ab068cd4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Bencze?= <benczepeter95@gmail.com>
Date: Sat, 16 Jun 2018 18:12:24 +0200
Subject: [PATCH 22/28] Update README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0cc726d..a3b4f4d 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ The standalone JAR files are available on the [releases](https://github.com/pete
 * The Javadoc is available [here](https://peterbencze.github.io/serritor/)
 
 ## Quickstart
-The _BaseCrawler_ abstract class provides a skeletal implementation of a crawler to minimize the effort to create your own. The extending class should define the logic of the crawler. Below you can find a simple example that is enough to get you started:
+The `BaseCrawler` abstract class provides a skeletal implementation of a crawler to minimize the effort to create your own. The extending class should define the logic of the crawler. Below you can find a simple example that is enough to get you started:
 ```java
 public class MyCrawler extends BaseCrawler {
 

From 7337ef8b3d6eef3d9f3dd889550a0530bee5ed66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Bencze?= <benczepeter95@gmail.com>
Date: Sun, 17 Jun 2018 00:48:57 +0200
Subject: [PATCH 23/28] Update README

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a3b4f4d..b4f25c8 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,9 @@ The standalone JAR files are available on the [releases](https://github.com/pete
 * The Javadoc is available [here](https://peterbencze.github.io/serritor/)
 
 ## Quickstart
-The `BaseCrawler` abstract class provides a skeletal implementation of a crawler to minimize the effort to create your own. The extending class should define the logic of the crawler. Below you can find a simple example that is enough to get you started:
+The `BaseCrawler` abstract class provides a skeletal implementation of a crawler to minimize the effort to create your own. The extending class should define the logic of the crawler.
+
+Below you can find a simple example that is enough to get you started:
 ```java
 public class MyCrawler extends BaseCrawler {
 

From 6e5d3bc9ec0dbfed0b12efcea71b711881d2735d Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Tue, 19 Jun 2018 22:03:27 +0200
Subject: [PATCH 24/28] Remove unnecessary field

---
 .../github/peterbencze/serritor/internal/CrawlFrontier.java | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
index c2c337c..d3fb6e0 100644
--- a/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
+++ b/src/main/java/com/github/peterbencze/serritor/internal/CrawlFrontier.java
@@ -40,10 +40,7 @@
 public final class CrawlFrontier implements Serializable {
 
     private final CrawlerConfiguration config;
-
-    private final Set<CrawlDomain> allowedCrawlDomains;
     private final Set<String> urlFingerprints;
-
     private final Queue<CrawlCandidate> candidates;
 
     private CrawlCandidate currentCandidate;
@@ -55,7 +52,6 @@ public final class CrawlFrontier implements Serializable {
      */
     public CrawlFrontier(final CrawlerConfiguration config) {
         this.config = config;
-        allowedCrawlDomains = config.getAllowedCrawlDomains();
         urlFingerprints = new HashSet<>();
         candidates = createPriorityQueue();
 
@@ -75,7 +71,7 @@ public void feedRequest(final CrawlRequest request, final boolean isCrawlSeed) {
         if (config.isOffsiteRequestFilteringEnabled()) {
             boolean inCrawlDomain = false;
 
-            for (CrawlDomain allowedCrawlDomain : allowedCrawlDomains) {
+            for (CrawlDomain allowedCrawlDomain : config.getAllowedCrawlDomains()) {
                 if (allowedCrawlDomain.contains(request.getDomain())) {
                     inCrawlDomain = true;
                     break;

From 3a727e10d76988a6307b569c87714b8156c22d7b Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Thu, 21 Jun 2018 00:48:45 +0200
Subject: [PATCH 25/28] Fix possible inconsistent state when resuming crawls

---
 .../peterbencze/serritor/api/BaseCrawler.java | 73 ++++++++++++-------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 1ac57e4..76470c3 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -30,7 +30,9 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.Serializable;
 import java.net.URI;
+import java.util.HashMap;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
 import java.util.logging.Level;
@@ -62,15 +64,15 @@ public abstract class BaseCrawler {
 
     private static final Logger LOGGER = Logger.getLogger(BaseCrawler.class.getName());
 
-    private final CrawlerConfiguration config;
-
-    private boolean isStopped;
-    private boolean stopCrawling;
+    private CrawlerConfiguration config;
+    private CrawlFrontier crawlFrontier;
     private BasicCookieStore cookieStore;
     private HttpClient httpClient;
     private WebDriver webDriver;
-    private CrawlFrontier crawlFrontier;
     private CrawlDelayMechanism crawlDelayMechanism;
+    private boolean isStopped;
+    private boolean isStopping;
+    private boolean canSaveState;
 
     /**
      * Base constructor of all crawlers.
@@ -82,6 +84,9 @@ protected BaseCrawler(final CrawlerConfiguration config) {
 
         // Indicate that the crawler is not running
         isStopped = true;
+
+        // Cannot save state until the crawler has not been started at least once
+        canSaveState = false;
     }
 
     /**
@@ -97,34 +102,38 @@ public final void start() {
      * @param webDriver the <code>WebDriver</code> instance to control the browser
      */
     public final void start(final WebDriver webDriver) {
-        start(webDriver, new CrawlFrontier(config));
+        start(webDriver, false);
     }
 
     /**
-     * Initializes and runs the crawler.
+     * Performs initialization and runs the crawler.
      *
-     * @param crawlFrontier the <code>CrawlFrontier</code> instance to be used by the crawler to
-     *                      manage crawl requests
+     * @param isResuming indicates if a previously saved state is to be resumed
      */
-    private void start(final WebDriver webDriver, final CrawlFrontier crawlFrontier) {
+    private void start(final WebDriver webDriver, final boolean isResuming) {
         try {
-            Validate.validState(isStopped, "The crawler is already started.");
+            Validate.validState(isStopped, "The crawler is already running.");
+
+            this.webDriver = Validate.notNull(webDriver, "The webdriver cannot be null.");
+
+            if (!isResuming) {
+                cookieStore = new BasicCookieStore();
+                crawlFrontier = new CrawlFrontier(config);
+            }
 
-            isStopped = false;
-            cookieStore = new BasicCookieStore();
             httpClient = HttpClientBuilder.create()
                     .setDefaultCookieStore(cookieStore)
                     .build();
-            this.webDriver = Validate.notNull(webDriver, "The webdriver cannot be null.");
-            this.crawlFrontier = crawlFrontier;
             crawlDelayMechanism = createCrawlDelayMechanism();
+            isStopped = false;
+            canSaveState = true;
 
             run();
         } finally {
             // Always close the browser
             webDriver.quit();
 
-            stopCrawling = false;
+            isStopping = false;
             isStopped = true;
         }
     }
@@ -135,12 +144,15 @@ private void start(final WebDriver webDriver, final CrawlFrontier crawlFrontier)
      * @param out the output stream
      */
     public final void saveState(final OutputStream out) {
-        // Check if the crawler has been started at least once, otherwise we have nothing to save
-        Validate.validState(crawlFrontier != null,
-                "Cannot save state at this point. The crawler should be started first.");
+        Validate.validState(canSaveState,
+                "Cannot save state at this point. The crawler should be started at least once.");
+
+        HashMap<Class<? extends Serializable>, Serializable> stateObjects = new HashMap<>();
+        stateObjects.put(config.getClass(), config);
+        stateObjects.put(crawlFrontier.getClass(), crawlFrontier);
+        stateObjects.put(cookieStore.getClass(), cookieStore);
 
-        // Save the crawl frontier's current state
-        SerializationUtils.serialize(crawlFrontier, out);
+        SerializationUtils.serialize(stateObjects, out);
     }
 
     /**
@@ -160,10 +172,15 @@ public final void resumeState(final InputStream in) {
      * @param in        the input stream from which the state should be loaded
      */
     public final void resumeState(final WebDriver webDriver, final InputStream in) {
-        // Re-create crawl frontier from the saved state
-        CrawlFrontier deserializedCrawlFrontier = SerializationUtils.deserialize(in);
+        HashMap<Class<? extends Serializable>, Serializable> stateObjects
+                = SerializationUtils.deserialize(in);
+
+        config = (CrawlerConfiguration) stateObjects.get(CrawlerConfiguration.class);
+        crawlFrontier = (CrawlFrontier) stateObjects.get(CrawlFrontier.class);
+        cookieStore = (BasicCookieStore) stateObjects.get(BasicCookieStore.class);
 
-        start(webDriver, deserializedCrawlFrontier);
+        // Resume crawling
+        start(webDriver, true);
     }
 
     /**
@@ -171,10 +188,10 @@ public final void resumeState(final WebDriver webDriver, final InputStream in) {
      */
     public final void stop() {
         Validate.validState(!isStopped, "The crawler is not started.");
-        Validate.validState(!stopCrawling, "The stop method has already been called.");
+        Validate.validState(!isStopping, "The stop method has already been called.");
 
         // Indicate that the crawling should be stopped
-        stopCrawling = true;
+        isStopping = true;
     }
 
     /**
@@ -207,7 +224,7 @@ protected final void crawl(final List<CrawlRequest> requests) {
     private void run() {
         onStart();
 
-        while (!stopCrawling && crawlFrontier.hasNextCandidate()) {
+        while (!isStopping && crawlFrontier.hasNextCandidate()) {
             CrawlCandidate currentCandidate = crawlFrontier.getNextCandidate();
             String candidateUrl = currentCandidate.getRequestUrl().toString();
             HttpClientContext context = HttpClientContext.create();
@@ -378,7 +395,7 @@ private void performDelay() {
             TimeUnit.MILLISECONDS.sleep(crawlDelayMechanism.getDelay());
         } catch (InterruptedException ex) {
             Thread.currentThread().interrupt();
-            stopCrawling = true;
+            isStopping = true;
         }
     }
 

From a7c6c018e6db851f8e5915536fdd8194bc2b0224 Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Thu, 21 Jun 2018 22:06:50 +0200
Subject: [PATCH 26/28] Fix incorrect event handling

---
 .../peterbencze/serritor/api/BaseCrawler.java   | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 76470c3..75fd471 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -254,19 +254,24 @@ private void run() {
                     // Create a new crawl request for the redirected URL
                     handleRequestRedirect(currentCandidate, responseUrl);
                 } else if (isContentHtml(httpHeadResponse)) {
+                    boolean isTimedOut = false;
+
                     try {
                         // Open URL in browser
                         webDriver.get(candidateUrl);
                     } catch (TimeoutException exception) {
+                        isTimedOut = true;
                         onPageLoadTimeout(new PageLoadTimeoutEvent(currentCandidate, exception));
                     }
 
-                    String loadedPageUrl = webDriver.getCurrentUrl();
-                    if (!loadedPageUrl.equals(candidateUrl)) {
-                        // Create a new crawl request for the redirected URL (JavaScript redirect)
-                        handleRequestRedirect(currentCandidate, loadedPageUrl);
-                    } else {
-                        onPageLoad(new PageLoadEvent(currentCandidate, webDriver));
+                    if (!isTimedOut) {
+                        String loadedPageUrl = webDriver.getCurrentUrl();
+                        if (!loadedPageUrl.equals(candidateUrl)) {
+                            // Create a new crawl request for the redirected URL (JS redirect)
+                            handleRequestRedirect(currentCandidate, loadedPageUrl);
+                        } else {
+                            onPageLoad(new PageLoadEvent(currentCandidate, webDriver));
+                        }
                     }
                 } else {
                     // URLs that point to non-HTML content should not be opened in the browser

From 107c4159a1ad369836713081fcb8278d3b9b12e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Bencze?= <benczepeter95@gmail.com>
Date: Thu, 21 Jun 2018 23:39:03 +0200
Subject: [PATCH 27/28] Update README

---
 README.md | 52 ++++++++++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index b4f25c8..66d4da1 100644
--- a/README.md
+++ b/README.md
@@ -61,37 +61,33 @@ public class MyCrawler extends BaseCrawler {
 ```
 By default, the crawler uses [HtmlUnit headless browser](http://htmlunit.sourceforge.net/):
 ```java
-public static void main(final String[] args) {
-    // Create the configuration
-    CrawlerConfiguration config = new CrawlerConfigurationBuilder()
-            .setOffsiteRequestFiltering(true)
-            .addAllowedCrawlDomain("example.com")
-            .addCrawlSeed(new CrawlRequestBuilder("http://example.com").build())
-            .build();
-
-    // Create the crawler using the configuration above
-    MyCrawler crawler = new MyCrawler(config);
-
-    // Start it
-    crawler.start();
-}
+// Create the configuration
+CrawlerConfiguration config = new CrawlerConfigurationBuilder()
+        .setOffsiteRequestFiltering(true)
+        .addAllowedCrawlDomain("example.com")
+        .addCrawlSeed(new CrawlRequestBuilder("http://example.com").build())
+        .build();
+
+// Create the crawler using the configuration above
+MyCrawler crawler = new MyCrawler(config);
+
+// Start it
+crawler.start();
 ```
 Of course, you can also use any other browsers by specifying a corresponding `WebDriver` instance:
 ```java
-public static void main(final String[] args) {
-    // Create the configuration
-    CrawlerConfiguration config = new CrawlerConfigurationBuilder()
-            .setOffsiteRequestFiltering(true)
-            .addAllowedCrawlDomain("example.com")
-            .addCrawlSeed(new CrawlRequestBuilder("http://example.com").build())
-            .build();
-
-    // Create the crawler using the configuration above
-    MyCrawler crawler = new MyCrawler(config);
-
-    // Start it
-    crawler.start(new ChromeDriver());
-}
+// Create the configuration
+CrawlerConfiguration config = new CrawlerConfigurationBuilder()
+        .setOffsiteRequestFiltering(true)
+        .addAllowedCrawlDomain("example.com")
+        .addCrawlSeed(new CrawlRequestBuilder("http://example.com").build())
+        .build();
+
+// Create the crawler using the configuration above
+MyCrawler crawler = new MyCrawler(config);
+
+// Start it
+crawler.start(new ChromeDriver());
 ```
 
 That's it! In just a few lines you can create a crawler that crawls every link it finds, while filtering duplicate and offsite requests. You also get access to the `WebDriver` instance, so you can use all the features that are provided by Selenium.

From 2264bdd4444b595d07b281e18ad3d8796f59d9ab Mon Sep 17 00:00:00 2001
From: Peter Bencze <benczepeter95@gmail.com>
Date: Sat, 23 Jun 2018 15:40:08 +0200
Subject: [PATCH 28/28] Change exception message

---
 .../java/com/github/peterbencze/serritor/api/BaseCrawler.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
index 75fd471..75bb6d2 100644
--- a/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
+++ b/src/main/java/com/github/peterbencze/serritor/api/BaseCrawler.java
@@ -188,7 +188,7 @@ public final void resumeState(final WebDriver webDriver, final InputStream in) {
      */
     public final void stop() {
         Validate.validState(!isStopped, "The crawler is not started.");
-        Validate.validState(!isStopping, "The stop method has already been called.");
+        Validate.validState(!isStopping, "The crawler is already stopping.");
 
         // Indicate that the crawling should be stopped
         isStopping = true;