From 0340d1995f8270704b725d7f3e9bde24125d58c9 Mon Sep 17 00:00:00 2001
From: John Hooks <bitmachina@outlook.com>
Date: Thu, 14 Dec 2023 16:42:16 -0800
Subject: [PATCH] feat: add stemmers from tntsearch

---
 bin/local-wp-shell.sh                         |    3 +
 phpstan.neon.dist                             |    4 +
 .../WP_HTML_Tag_Processor.php                 |   44 +-
 src/Exceptions/Exception.php                  |    5 -
 src/Exceptions/IndexNotFoundException.php     |    5 +
 src/Exceptions/IndexedSearchException.php     |    7 +
 src/Exceptions/TokenizationException.php      |    5 +
 src/Helpers/Str.php                           |   30 +-
 src/Index/IndexRepository.php                 |    8 +
 src/Stemmer/ArabicStemmer.php                 |  180 +++
 src/Stemmer/CroatianStemmer.php               |  340 +++++
 src/Stemmer/FrenchStemmer.php                 |  720 ++++++++++
 src/Stemmer/GermanStemmer.php                 |  258 ++++
 src/Stemmer/ItalianStemmer.php                |  463 ++++++
 src/Stemmer/LatvianStemmer.php                |  212 +++
 src/Stemmer/NoStemmer.php                     |   11 +
 src/Stemmer/PolishStemmer.php                 |  163 +++
 src/Stemmer/PorterStemmer.php                 |  403 ++++++
 src/Stemmer/PortugeseStemmer.php              |  766 ++++++++++
 src/Stemmer/RussianStemmer.php                |  112 ++
 src/Stemmer/Stemmer.php                       |    8 +
 src/Stemmer/UkrainianStemmer.php              |  113 ++
 src/Stopwords/croatian.json                   |  182 +++
 src/Stopwords/english.json                    |  187 +++
 src/Stopwords/french.json                     |  159 ++
 src/Stopwords/german.json                     |  234 +++
 src/Stopwords/italian.json                    |  281 ++++
 src/Stopwords/latvian.json                    |  165 +++
 src/Stopwords/russian.json                    |  153 ++
 src/Stopwords/spanish.json                    |  315 ++++
 src/Stopwords/ukrainian.json                  | 1279 +++++++++++++++++
 src/Tokenizer/HtmlTokenizer.php               |   90 ++
 src/Tokenizer/Tokenizer.php                   |    4 +-
 33 files changed, 6890 insertions(+), 19 deletions(-)
 create mode 100755 bin/local-wp-shell.sh
 rename src/{Tokenizer => Compat}/WP_HTML_Tag_Processor.php (98%)
 delete mode 100644 src/Exceptions/Exception.php
 create mode 100644 src/Exceptions/IndexNotFoundException.php
 create mode 100644 src/Exceptions/IndexedSearchException.php
 create mode 100644 src/Exceptions/TokenizationException.php
 create mode 100644 src/Index/IndexRepository.php
 create mode 100644 src/Stemmer/ArabicStemmer.php
 create mode 100644 src/Stemmer/CroatianStemmer.php
 create mode 100644 src/Stemmer/FrenchStemmer.php
 create mode 100644 src/Stemmer/GermanStemmer.php
 create mode 100644 src/Stemmer/ItalianStemmer.php
 create mode 100644 src/Stemmer/LatvianStemmer.php
 create mode 100644 src/Stemmer/NoStemmer.php
 create mode 100644 src/Stemmer/PolishStemmer.php
 create mode 100644 src/Stemmer/PorterStemmer.php
 create mode 100644 src/Stemmer/PortugeseStemmer.php
 create mode 100644 src/Stemmer/RussianStemmer.php
 create mode 100644 src/Stemmer/Stemmer.php
 create mode 100644 src/Stemmer/UkrainianStemmer.php
 create mode 100644 src/Stopwords/croatian.json
 create mode 100644 src/Stopwords/english.json
 create mode 100644 src/Stopwords/french.json
 create mode 100644 src/Stopwords/german.json
 create mode 100644 src/Stopwords/italian.json
 create mode 100644 src/Stopwords/latvian.json
 create mode 100644 src/Stopwords/russian.json
 create mode 100644 src/Stopwords/spanish.json
 create mode 100644 src/Stopwords/ukrainian.json
 create mode 100644 src/Tokenizer/HtmlTokenizer.php

diff --git a/bin/local-wp-shell.sh b/bin/local-wp-shell.sh
new file mode 100755
index 0000000..652f377
--- /dev/null
+++ b/bin/local-wp-shell.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+docker compose --file docker-compose.yml --env-file .env exec -w /var/www/html/wp-content/plugins/indexed-search wordpress bash -c "vendor/bin/wp --allow-root shell"
diff --git a/phpstan.neon.dist b/phpstan.neon.dist
index af0014e..41ae88d 100644
--- a/phpstan.neon.dist
+++ b/phpstan.neon.dist
@@ -13,3 +13,7 @@ parameters:
     scanDirectories:
         - tests
         - vendor-prod
+    excludePaths:
+        - src/Compat/WP_HTML_Tag_Processor.php
+        - src/Stemmer/*
+        - tests/*/_wordpress/*
diff --git a/src/Tokenizer/WP_HTML_Tag_Processor.php b/src/Compat/WP_HTML_Tag_Processor.php
similarity index 98%
rename from src/Tokenizer/WP_HTML_Tag_Processor.php
rename to src/Compat/WP_HTML_Tag_Processor.php
index 34d9e0d..cdc8a2c 100644
--- a/src/Tokenizer/WP_HTML_Tag_Processor.php
+++ b/src/Compat/WP_HTML_Tag_Processor.php
@@ -1,6 +1,6 @@
 <?php
 
-namespace WpBlocks\Search\Tokenizer;
+namespace WpBlocks\Search\Compat;
 
 use WP_HTML_Attribute_Token;
 use WP_HTML_Span;
@@ -53,7 +53,7 @@
  *    no-ambiguous-ampersand rule, and it improperly handles the way semicolons may
  *    or may not terminate a character reference.
  *
- * @package WordPress
+ * @package    WordPress
  * @subpackage HTML-API
  *
  * @since 6.2.0
@@ -715,7 +715,9 @@ public function next_tag($query = null)
      */
     public function class_list()
     {
-        /** @var string $class contains the string value of the class attribute, with character references decoded. */
+        /**
+         * @var string $class contains the string value of the class attribute, with character references decoded.
+         */
         $class = $this->get_attribute('class');
 
         if (!is_string($class)) {
@@ -912,7 +914,7 @@ public function release_bookmark($name)
      * Skips contents of generic rawtext elements.
      *
      * @since 6.3.2
-     * @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm
+     * @see   https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm
      *
      * @param string $tag_name The uppercase tag name which will close the RAWTEXT region.
      *
@@ -932,7 +934,7 @@ private function skip_rawtext($tag_name)
      * Skips contents of RCDATA elements, namely title and textarea tags.
      *
      * @since 6.2.0
-     * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
+     * @see   https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
      *
      * @param string $tag_name The uppercase tag name which will close the RCDATA region.
      *
@@ -1518,8 +1520,8 @@ private function after_tag()
      * (they are accumulated in different data formats for performance).
      *
      * @since 6.2.0
-     * @see WP_HTML_Tag_Processor::$lexical_updates
-     * @see WP_HTML_Tag_Processor::$classname_updates
+     * @see   WP_HTML_Tag_Processor::$lexical_updates
+     * @see   WP_HTML_Tag_Processor::$classname_updates
      */
     private function class_name_updates_to_attributes_updates()
     {
@@ -1551,7 +1553,6 @@ private function class_name_updates_to_attributes_updates()
          * attribute, skipping removed classes on the way, and then appending
          * added classes at the end. Only when finished processing will the
          * value contain the final new value.
-
          *
          * @var string $class
          */
@@ -1999,7 +2000,7 @@ public function get_attribute($name)
      *     $p->get_attribute_names_with_prefix( 'data-' ) === null;
      *
      * @since 6.2.0
-     * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive
+     * @see   https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive
      *
      * @param string $prefix Prefix of requested attribute names.
      *
@@ -2374,7 +2375,7 @@ public function remove_class($class_name)
      * Returns the string representation of the HTML Tag Processor.
      *
      * @since 6.2.0
-     * @see WP_HTML_Tag_Processor::get_updated_html()
+     * @see   WP_HTML_Tag_Processor::get_updated_html()
      *
      * @return string The processed HTML.
      */
@@ -2567,4 +2568,27 @@ private function matches()
 
         return true;
     }
+
+    /**
+     * Modifications by WpBlocks\Search
+     */
+    public function get_token_starts_at()
+    {
+        return $this->token_starts_at;
+    }
+
+    public function get_token_ends_at()
+    {
+        return $this->token_starts_at + $this->token_length;
+    }
+
+    public function get_is_closing_tag()
+    {
+        return $this->is_closing_tag;
+    }
+
+    public function substr(int $offset, ?int $length = null)
+    {
+        return substr($this->html, $offset, $length);
+    }
 }
diff --git a/src/Exceptions/Exception.php b/src/Exceptions/Exception.php
deleted file mode 100644
index 37931fb..0000000
--- a/src/Exceptions/Exception.php
+++ /dev/null
@@ -1,5 +0,0 @@
-<?php
-
-namespace WpBlocks\Search\Exception;
-
-interface Exception {}
diff --git a/src/Exceptions/IndexNotFoundException.php b/src/Exceptions/IndexNotFoundException.php
new file mode 100644
index 0000000..5eff6f5
--- /dev/null
+++ b/src/Exceptions/IndexNotFoundException.php
@@ -0,0 +1,5 @@
+<?php
+
+namespace WpBlocks\Search\Exceptions;
+
+class IndexNotFoundException extends IndexedSearchException {}
diff --git a/src/Exceptions/IndexedSearchException.php b/src/Exceptions/IndexedSearchException.php
new file mode 100644
index 0000000..c33a03b
--- /dev/null
+++ b/src/Exceptions/IndexedSearchException.php
@@ -0,0 +1,7 @@
+<?php
+
+namespace WpBlocks\Search\Exceptions;
+
+use Exception;
+
+class IndexedSearchException extends Exception {}
diff --git a/src/Exceptions/TokenizationException.php b/src/Exceptions/TokenizationException.php
new file mode 100644
index 0000000..7fa9629
--- /dev/null
+++ b/src/Exceptions/TokenizationException.php
@@ -0,0 +1,5 @@
+<?php
+
+namespace WpBlocks\Search\Exceptions;
+
+class TokenizationException extends IndexedSearchException {}
diff --git a/src/Helpers/Str.php b/src/Helpers/Str.php
index ea66918..f56ee1e 100644
--- a/src/Helpers/Str.php
+++ b/src/Helpers/Str.php
@@ -5,10 +5,36 @@
 class Str
 {
     /**
-     * @return string[]
+     * Inspired by TNTSearch
+     * https://github.com/teamtnt/tntsearch/blob/c8863c626a47bcb73f860abfe8eed9fb3cde3be8/src/Support/Tokenizer.php
+     */
+    protected static string $splitWordsPattern = '/[^\p{L}\p{N}\p{Pc}\p{Pd}@]+/u';
+
+    /**
+     * @return array<int,string>
      */
     public static function splitOnWhitespace(string $text): array
     {
-        return preg_split('/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY);
+        $result = preg_split('/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY);
+
+        if ($result === false) {
+            throw new \WpBlocks\Search\Exceptions\TokenizationException();
+        }
+
+        return $result;
+    }
+
+    /**
+     * @return array<int,string>
+     */
+    public static function splitWords(string $text): array
+    {
+        $result = preg_split(self::$splitWordsPattern, $text, -1, PREG_SPLIT_NO_EMPTY);
+
+        if ($result === false) {
+            throw new \WpBlocks\Search\Exceptions\TokenizationException();
+        }
+
+        return $result;
     }
 }
diff --git a/src/Index/IndexRepository.php b/src/Index/IndexRepository.php
new file mode 100644
index 0000000..ba1bfb8
--- /dev/null
+++ b/src/Index/IndexRepository.php
@@ -0,0 +1,8 @@
+<?php
+
+namespace WpBlocks\Search\Index\IndexRepository;
+
+interface IndexRepository
+{
+    public function create(string $name): int;
+}
diff --git a/src/Stemmer/ArabicStemmer.php b/src/Stemmer/ArabicStemmer.php
new file mode 100644
index 0000000..30b2248
--- /dev/null
+++ b/src/Stemmer/ArabicStemmer.php
@@ -0,0 +1,180 @@
+<?php
+
+/*
+ * This is a reimplementation of AR-PHP Arabic stemmer.
+ * The original author is Khaled Al-Sham'aa <khaled@ar-php.org>
+ *
+ * Simple stemmer for arabic language rewritten by Nenad Tičarić
+ *
+ * @link https://github.com/teamtnt/tntsearch
+ *
+ * Copyright (c) 2016 Nenad Tičarić nticaric@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+namespace WpBlocks\Search\Stemmer;
+
+class ArabicStemmer implements Stemmer
+{
+    private static $_verbPre  = 'وأسفلي';
+
+    private static $_verbPost = 'ومكانيه';
+
+    private static $_verbMay;
+
+    private static $_verbMaxPre  = 4;
+
+    private static $_verbMaxPost = 6;
+
+    private static $_verbMinStem = 2;
+
+    private static $_nounPre  = 'ابفكلوأ';
+
+    private static $_nounPost = 'اتةكمنهوي';
+
+    private static $_nounMay;
+
+    private static $_nounMaxPre  = 4;
+
+    private static $_nounMaxPost = 6;
+
+    private static $_nounMinStem = 2;
+
+    /**
+     * Loads initialize values
+     *
+     * @ignore
+     */
+    public function __construct()
+    {
+        self::$_verbMay = self::$_verbPre . self::$_verbPost;
+        self::$_nounMay = self::$_nounPre . self::$_nounPost;
+    }
+
+    /**
+     * Get rough stem of the given Arabic word
+     *
+     * @param string $word Arabic word you would like to get its stem
+     *
+     * @return string Arabic stem of the word
+     *
+     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
+     */
+    public static function stem($word)
+    {
+        $nounStem = self::roughStem(
+            $word,
+            self::$_nounMay,
+            self::$_nounPre,
+            self::$_nounPost,
+            self::$_nounMaxPre,
+            self::$_nounMaxPost,
+            self::$_nounMinStem
+        );
+        $verbStem = self::roughStem(
+            $word,
+            self::$_verbMay,
+            self::$_verbPre,
+            self::$_verbPost,
+            self::$_verbMaxPre,
+            self::$_verbMaxPost,
+            self::$_verbMinStem
+        );
+
+        if (mb_strlen($nounStem, 'UTF-8') < mb_strlen($verbStem, 'UTF-8')) {
+            $stem = $nounStem;
+        } else {
+            $stem = $verbStem;
+        }
+
+        return $stem;
+    }
+
+    /**
+     * Get rough stem of the given Arabic word (under specific rules)
+     *
+     * @param string  $word      Arabic word you would like to get its stem
+     * @param string  $notChars  Arabic chars those can't be in postfix or prefix
+     * @param string  $preChars  Arabic chars those may exists in the prefix
+     * @param string  $postChars Arabic chars those may exists in the postfix
+     * @param integer $maxPre    Max prefix length
+     * @param integer $maxPost   Max postfix length
+     * @param integer $minStem   Min stem length
+     *
+     * @return string Arabic stem of the word under giving rules
+     *
+     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
+     */
+    protected static function roughStem(
+        $word,
+        $notChars,
+        $preChars,
+        $postChars,
+        $maxPre,
+        $maxPost,
+        $minStem
+    ) {
+        $right = -1;
+        $left  = -1;
+        $max   = mb_strlen($word, 'UTF-8');
+
+        for ($i = 0; $i < $max; $i++) {
+            $needle = mb_substr($word, $i, 1, 'UTF-8');
+            if (mb_strpos($notChars, $needle, 0, 'UTF-8') === false) {
+                if ($right == -1) {
+                    $right = $i;
+                }
+                $left = $i;
+            }
+        }
+
+        if ($right > $maxPre) {
+            $right = $maxPre;
+        }
+
+        if ($max - $left - 1 > $maxPost) {
+            $left = $max - $maxPost - 1;
+        }
+
+        for ($i = 0; $i < $right; $i++) {
+            $needle = mb_substr($word, $i, 1, 'UTF-8');
+            if (mb_strpos($preChars, $needle, 0, 'UTF-8') === false) {
+                $right = $i;
+                break;
+            }
+        }
+
+        for ($i = $max - 1; $i > $left; $i--) {
+            $needle = mb_substr($word, $i, 1, 'UTF-8');
+            if (mb_strpos($postChars, $needle, 0, 'UTF-8') === false) {
+                $left = $i;
+                break;
+            }
+        }
+
+        if ($left - $right >= $minStem) {
+            $stem = mb_substr($word, $right, $left - $right + 1, 'UTF-8');
+        } else {
+            $stem = null;
+        }
+
+        return $stem;
+    }
+}
diff --git a/src/Stemmer/CroatianStemmer.php b/src/Stemmer/CroatianStemmer.php
new file mode 100644
index 0000000..d44cae0
--- /dev/null
+++ b/src/Stemmer/CroatianStemmer.php
@@ -0,0 +1,340 @@
+<?php
+
+/*
+ * This is a reimplementation in PHP of a simple rule-based stemmer for Croatian
+ * @link http://nlp.ffzg.hr/resources/tools/stemmer-for-croatian/ (Python).
+ * The original author is Ivan Pandžić.
+ *
+ * Simple stemmer for croatian language rewritten by Nenad Tičarić
+ *
+ * @link https://github.com/teamtnt/tntsearch
+ *
+ * Copyright (c) 2016 Nenad Tičarić nticaric@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+namespace WpBlocks\Search\Stemmer;
+
+class CroatianStemmer implements Stemmer
+{
+    protected static $stop = ['biti', 'jesam', 'budem', 'sam', 'jesi', 'budeš', 'si', 'jesmo', 'budemo',
+        'smo', 'jeste', 'budete', 'ste', 'jesu', 'budu', 'su', 'bih', 'bijah', 'bjeh',
+        'bijaše', 'bi', 'bje', 'bješe', 'bijasmo', 'bismo', 'bjesmo', 'bijaste', 'biste',
+        'bjeste', 'bijahu', 'biste', 'bjeste', 'bijahu', 'bi', 'biše', 'bjehu', 'bješe',
+        'bio', 'bili', 'budimo', 'budite', 'bila', 'bilo', 'bile', 'ću', 'ćeš', 'će',
+        'ćemo', 'ćete', 'želim', 'želiš', 'želi', 'želimo', 'želite', 'žele', 'moram',
+        'moraš', 'mora', 'moramo', 'morate', 'moraju', 'trebam', 'trebaš', 'treba',
+        'trebamo', 'trebate', 'trebaju', 'mogu', 'možeš', 'može', 'možemo', 'možete'];
+
+    public static function stem($token)
+    {
+        if (in_array($token, self::$stop)) {
+            return $token;
+        }
+        return self::korjenuj(self::transformiraj($token));
+    }
+
+    public static function istakniSlogotvornoR($niz)
+    {
+        return preg_replace('/(^|[^aeiou])r($|[^aeiou])/', '\1R\2', $niz);
+    }
+
+    public static function imaSamoglasnik($niz)
+    {
+        preg_match('/[aeiouR]/', self::istakniSlogotvornoR($niz), $matches);
+
+        if (count($matches) > 0) {
+            return true;
+        }
+
+        return false;
+    }
+
+    public static function transformiraj($pojavnica)
+    {
+        foreach (self::$transformations as $trazi => $zamijeni) {
+            if (self::endsWith($pojavnica, $trazi)) {
+                return substr($pojavnica, 0, -1 * strlen($trazi)) . $zamijeni;
+            }
+        }
+        return $pojavnica;
+    }
+
+    public static function korjenuj($pojavnica)
+    {
+        foreach (self::$rules as $rule) {
+            $rules    = explode(' ', $rule);
+            $osnova   = $rules[0];
+            $nastavak = $rules[1];
+            preg_match('/^(' . $osnova . ')(' . $nastavak . ')$/', $pojavnica, $dioba);
+            if (!empty($dioba)) {
+                if (self::imaSamoglasnik($dioba[1]) && strlen($dioba[1]) > 1) {
+                    return $dioba[1];
+                }
+            }
+        }
+        return $pojavnica;
+    }
+
+    public static function endsWith($haystack, $needle)
+    {
+        // search forward starting from end minus needle length characters
+        return $needle === '' || (($temp = strlen($haystack) - strlen($needle)) >= 0 && strpos($haystack, $needle, $temp) !== false);
+    }
+
+    protected static $transformations = [
+        'lozi'     => 'loga',
+        'lozima'   => 'loga',
+        'pjesi'    => 'pjeh',
+        'pjesima'  => 'pjeh',
+        'vojci'    => 'vojka',
+        'bojci'    => 'bojka',
+        'jaci'     => 'jak',
+        'jacima'   => 'jak',
+        'čajan'    => 'čajni',
+        'ijeran'   => 'ijerni',
+        'laran'    => 'larni',
+        'ijesan'   => 'ijesni',
+        'anjac'    => 'anjca',
+        'ajac'     => 'ajca',
+        'ajaca'    => 'ajca',
+        'ljaca'    => 'ljca',
+        'ljac'     => 'ljca',
+        'ejac'     => 'ejca',
+        'ejaca'    => 'ejca',
+        'ojac'     => 'ojca',
+        'ojaca'    => 'ojca',
+        'ajaka'    => 'ajka',
+        'ojaka'    => 'ojka',
+        'šaca'     => 'šca',
+        'šac'      => 'šca',
+        'inzima'   => 'ing',
+        'inzi'     => 'ing',
+        'tvenici'  => 'tvenik',
+        'tetici'   => 'tetika',
+        'teticima' => 'tetika',
+        'nstava'   => 'nstva',
+        'nicima'   => 'nik',
+        'ticima'   => 'tik',
+        'zicima'   => 'zik',
+        'snici'    => 'snik',
+        'kuse'     => 'kusi',
+        'kusan'    => 'kusni',
+        'kustava'  => 'kustva',
+        'dušan'    => 'dušni',
+        'antan'    => 'antni',
+        'bilan'    => 'bilni',
+        'tilan'    => 'tilni',
+        'avilan'   => 'avilni',
+        'silan'    => 'silni',
+        'gilan'    => 'gilni',
+        'rilan'    => 'rilni',
+        'nilan'    => 'nilni',
+        'alan'     => 'alni',
+        'ozan'     => 'ozni',
+        'rave'     => 'ravi',
+        'stavan'   => 'stavni',
+        'pravan'   => 'pravni',
+        'tivan'    => 'tivni',
+        'sivan'    => 'sivni',
+        'atan'     => 'atni',
+        'cenata'   => 'centa',
+        'denata'   => 'denta',
+        'genata'   => 'genta',
+        'lenata'   => 'lenta',
+        'menata'   => 'menta',
+        'jenata'   => 'jenta',
+        'venata'   => 'venta',
+        'tetan'    => 'tetni',
+        'pletan'   => 'pletni',
+        'šave'     => 'šavi',
+        'manata'   => 'manta',
+        'tanata'   => 'tanta',
+        'lanata'   => 'lanta',
+        'sanata'   => 'santa',
+        'ačak'     => 'ačka',
+        'ačaka'    => 'ačka',
+        'ušak'     => 'uška',
+        'atak'     => 'atka',
+        'ataka'    => 'atka',
+        'atci'     => 'atka',
+        'atcima'   => 'atka',
+        'etak'     => 'etka',
+        'etaka'    => 'etka',
+        'itak'     => 'itka',
+        'itaka'    => 'itka',
+        'itci'     => 'itka',
+        'otak'     => 'otka',
+        'otaka'    => 'otka',
+        'utak'     => 'utka',
+        'utaka'    => 'utka',
+        'utci'     => 'utka',
+        'utcima'   => 'utka',
+        'eskan'    => 'eskna',
+        'tičan'    => 'tični',
+        'ojsci'    => 'ojska',
+        'esama'    => 'esma',
+        'metara'   => 'metra',
+        'centar'   => 'centra',
+        'centara'  => 'centra',
+        'istara'   => 'istra',
+        'istar'    => 'istra',
+        'ošću'     => 'osti',
+        'daba'     => 'dba',
+        'čcima'    => 'čka',
+        'čci'      => 'čka',
+        'mac'      => 'mca',
+        'maca'     => 'mca',
+        'naca'     => 'nca',
+        'nac'      => 'nca',
+        'voljan'   => 'voljni',
+        'anaka'    => 'anki',
+        'vac'      => 'vca',
+        'vaca'     => 'vca',
+        'saca'     => 'sca',
+        'sac'      => 'sca',
+        'naca'     => 'nca',
+        'nac'      => 'nca',
+        'raca'     => 'rca',
+        'rac'      => 'rca',
+        'aoca'     => 'alca',
+        'alaca'    => 'alca',
+        'alac'     => 'alca',
+        'elaca'    => 'elca',
+        'elac'     => 'elca',
+        'olaca'    => 'olca',
+        'olac'     => 'olca',
+        'olce'     => 'olca',
+        'njac'     => 'njca',
+        'njaca'    => 'njca',
+        'ekata'    => 'ekta',
+        'ekat'     => 'ekta',
+        'izam'     => 'izma',
+        'izama'    => 'izma',
+        'jebe'     => 'jebi',
+        'baci'     => 'baci',
+        'ašan'     => 'ašni',
+    ];
+
+    protected static $rules = [
+        '.+(s|š)k ijima|ijega|ijemu|ijem|ijim|ijih|ijoj|ijeg|iji|ije|ija|oga|ome|omu|ima|og|om|im|ih|oj|i|e|o|a|u',
+        '.+(s|š)tv ima|om|o|a|u',
+        // N
+        '.+(t|m|p|r|g)anij ama|ima|om|a|u|e|i| ',
+        '.+an inom|ina|inu|ine|ima|in|om|u|i|a|e| ',
+        '.+in ima|ama|om|a|e|i|u|o| ',
+        '.+on ovima|ova|ove|ovi|ima|om|a|e|i|u| ',
+        '.+n ijima|ijega|ijemu|ijeg|ijem|ijim|ijih|ijoj|iji|ije|ija|iju|ima|ome|omu|oga|oj|om|ih|im|og|o|e|a|u|i| ',
+        // Ć
+        '.+(a|e|u)ć oga|ome|omu|ega|emu|ima|oj|ih|om|eg|em|og|uh|im|e|a',
+        // G
+        '.+ugov ima|i|e|a',
+        '.+ug ama|om|a|e|i|u|o',
+        '.+log ama|om|a|u|e| ',
+        '.+[^eo]g ovima|ama|ovi|ove|ova|om|a|e|i|u|o| ',
+        // I
+        '.+(rrar|ott|ss|ll)i jem|ja|ju|o| ',
+        // J
+        '.+uj ući|emo|ete|mo|em|eš|e|u| ',
+        '.+(c|č|ć|đ|l|r)aj evima|evi|eva|eve|ama|ima|em|a|e|i|u| ',
+        '.+(b|c|d|l|n|m|ž|g|f|p|r|s|t|z)ij ima|ama|om|a|e|i|u|o| ',
+        // L
+        //.+al inom|ina|inu|ine|ima|om|in|i|a|e
+        //.+[^(lo|ž)]il ima|om|a|e|u|i|
+        '.+[^z]nal ima|ama|om|a|e|i|u|o| ',
+        '.+ijal ima|ama|om|a|e|i|u|o| ',
+        '.+ozil ima|om|a|e|u|i| ',
+        '.+olov ima|i|a|e',
+        '.+ol ima|om|a|u|e|i| ',
+        // M
+        '.+lem ama|ima|om|a|e|i|u|o| ',
+        '.+ram ama|om|a|e|i|u|o',
+        //.+(es|e|u)m ama|om|a|e|i|u|o
+        // R
+        //.+(a|d|e|o|u)r ama|ima|om|u|a|e|i|
+        '.+(a|d|e|o)r ama|ima|om|u|a|e|i| ',
+        // S
+        '.+(e|i)s ima|om|e|a|u',
+        // Š
+        '.+(t|n|j|k|j|t|b|g|v)aš ama|ima|om|em|a|u|i|e| ',
+        '.+(e|i)š ima|ama|om|em|i|e|a|u| ',
+        // T
+        '.+ikat ima|om|a|e|i|u|o| ',
+        '.+lat ima|om|a|e|i|u|o| ',
+        '.+et ama|ima|om|a|e|i|u|o| ',
+        //.+ot ama|ima|om|a|u|e|i|
+        '.+(e|i|k|o)st ima|ama|om|a|e|i|u|o| ',
+        '.+išt ima|em|a|e|u',
+        //.+ut ovima|evima|ove|ovi|ova|eve|evi|eva|ima|om|a|u|e|i|
+        // V
+        '.+ova smo|ste|hu|ti|še|li|la|le|lo|t|h|o',
+        '.+(a|e|i)v ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|ama|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|o| ',
+        '.+[^dkml]ov ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|o| ',
+        '.+(m|l)ov ima|om|a|u|e|i| ',
+        // PRIDJEVI
+        '.+el ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|o| ',
+        '.+(a|e|š)nj ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|ega|emu|eg|em|im|ih|oj|om|og|a|e|i|o|u',
+        '.+čin ama|ome|omu|oga|ima|og|om|im|ih|oj|a|u|i|o|e| ',
+        '.+roši vši|smo|ste|še|mo|te|ti|li|la|lo|le|m|š|t|h|o',
+        '.+oš ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e| ',
+        '.+(e|o)vit ijima|ijega|ijemu|ijem|ijim|ijih|ijoj|ijeg|iji|ije|ija|oga|ome|omu|ima|og|om|im|ih|oj|i|e|o|a|u| ',
+        //.+tit ijima|ijega|ijemu|ijem|ijim|ijih|ijoj|ijeg|iji|ije|ija|oga|ome|omu|ima|og|om|im|ih|oj|e|o|a|u|i|
+        '.+ast ijima|ijega|ijemu|ijem|ijim|ijih|ijoj|ijeg|iji|ije|ija|oga|ome|omu|ima|og|om|im|ih|oj|i|e|o|a|u| ',
+        '.+k ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|o| ',
+        // GLAGOLI
+        '.+(e|a|i|u)va jući|smo|ste|jmo|jte|ju|la|le|li|lo|mo|na|ne|ni|no|te|ti|še|hu|h|j|m|n|o|t|v|š| ',
+        '.+ir ujemo|ujete|ujući|ajući|ivat|ujem|uješ|ujmo|ujte|avši|asmo|aste|ati|amo|ate|aju|aše|ahu|ala|alo|ali|ale|uje|uju|uj|al|an|am|aš|at|ah|ao',
+        '.+ač ismo|iste|iti|imo|ite|iše|eći|ila|ilo|ili|ile|ena|eno|eni|ene|io|im|iš|it|ih|en|i|e',
+        '.+ača vši|smo|ste|smo|ste|hu|ti|mo|te|še|la|lo|li|le|ju|na|no|ni|ne|o|m|š|t|h|n',
+        //.+ači smo|ste|ti|li|la|lo|le|mo|te|še|m|š|t|h|o|
+        // Druga_vrsta
+        '.+n uvši|usmo|uste|ući|imo|ite|emo|ete|ula|ulo|ule|uli|uto|uti|uta|em|eš|uo|ut|e|u|i',
+        '.+ni vši|smo|ste|ti|mo|te|mo|te|la|lo|le|li|m|š|o',
+        // A
+        '.+((a|r|i|p|e|u)st|[^o]g|ik|uc|oj|aj|lj|ak|ck|čk|šk|uk|nj|im|ar|at|et|št|it|ot|ut|zn|zv)a jući|vši|smo|ste|jmo|jte|jem|mo|te|je|ju|ti|še|hu|la|li|le|lo|na|no|ni|ne|t|h|o|j|n|m|š',
+        '.+ur ajući|asmo|aste|ajmo|ajte|amo|ate|aju|ati|aše|ahu|ala|ali|ale|alo|ana|ano|ani|ane|al|at|ah|ao|aj|an|am|aš',
+        '.+(a|i|o)staj asmo|aste|ahu|ati|emo|ete|aše|ali|ući|ala|alo|ale|mo|ao|em|eš|at|ah|te|e|u| ',
+        '.+(b|c|č|ć|d|e|f|g|j|k|n|r|t|u|v)a lama|lima|lom|lu|li|la|le|lo|l',
+        '.+(t|č|j|ž|š)aj evima|evi|eva|eve|ama|ima|em|a|e|i|u| ',
+        //.+(e|j|k|r|u|v)al ama|ima|om|u|i|a|e|o|
+        //.+(e|j|k|r|t|u|v)al ih|im
+        '.+([^o]m|ič|nč|uč|b|c|ć|d|đ|h|j|k|l|n|p|r|s|š|v|z|ž)a jući|vši|smo|ste|jmo|jte|mo|te|ju|ti|še|hu|la|li|le|lo|na|no|ni|ne|t|h|o|j|n|m|š',
+        '.+(a|i|o)sta dosmo|doste|doše|nemo|demo|nete|dete|nimo|nite|nila|vši|nem|dem|neš|deš|doh|de|ti|ne|nu|du|la|li|lo|le|t|o',
+        '.+ta smo|ste|jmo|jte|vši|ti|mo|te|ju|še|la|lo|le|li|na|no|ni|ne|n|j|o|m|š|t|h',
+        '.+inj asmo|aste|ati|emo|ete|ali|ala|alo|ale|aše|ahu|em|eš|at|ah|ao',
+        '.+as temo|tete|timo|tite|tući|tem|teš|tao|te|li|ti|la|lo|le',
+        // I
+        '.+(elj|ulj|tit|ac|ič|od|oj|et|av|ov)i vši|eći|smo|ste|še|mo|te|ti|li|la|lo|le|m|š|t|h|o',
+        '.+(tit|jeb|ar|ed|uš|ič)i jemo|jete|jem|ješ|smo|ste|jmo|jte|vši|mo|še|te|ti|ju|je|la|lo|li|le|t|m|š|h|j|o',
+        '.+(b|č|d|l|m|p|r|s|š|ž)i jemo|jete|jem|ješ|smo|ste|jmo|jte|vši|mo|lu|še|te|ti|ju|je|la|lo|li|le|t|m|š|h|j|o',
+        '.+luč ujete|ujući|ujemo|ujem|uješ|ismo|iste|ujmo|ujte|uje|uju|iše|iti|imo|ite|ila|ilo|ili|ile|ena|eno|eni|ene|uj|io|en|im|iš|it|ih|e|i',
+        '.+jeti smo|ste|še|mo|te|ti|li|la|lo|le|m|š|t|h|o',
+        '.+e lama|lima|lom|lu|li|la|le|lo|l',
+        '.+i lama|lima|lom|lu|li|la|le|lo|l',
+        // Pridjev_t
+        '.+at ijega|ijemu|ijima|ijeg|ijem|ijih|ijim|ima|oga|ome|omu|iji|ije|ija|iju|oj|og|om|im|ih|a|u|i|e|o| ',
+        // Pridjev
+        '.+et avši|ući|emo|imo|em|eš|e|u|i',
+        '.+ ajući|alima|alom|avši|asmo|aste|ajmo|ajte|ivši|amo|ate|aju|ati|aše|ahu|ali|ala|ale|alo|ana|ano|ani|ane|am|aš|at|ah|ao|aj|an',
+        '.+ anje|enje|anja|enja|enom|enoj|enog|enim|enih|anom|anoj|anog|anim|anih|eno|ovi|ova|oga|ima|ove|enu|anu|ena|ama',
+        '.+ nijega|nijemu|nijima|nijeg|nijem|nijim|nijih|nima|niji|nije|nija|niju|noj|nom|nog|nim|nih|an|na|nu|ni|ne|no',
+        '.+ om|og|im|ih|em|oj|an|u|o|i|e|a',
+    ];
+}
diff --git a/src/Stemmer/FrenchStemmer.php b/src/Stemmer/FrenchStemmer.php
new file mode 100644
index 0000000..45d0897
--- /dev/null
+++ b/src/Stemmer/FrenchStemmer.php
@@ -0,0 +1,720 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ * @link http://snowball.tartarus.org/algorithms/french/stemmer.html
+ *
+ * The original author is wamania
+ *
+ * Simple stemmer for french language rewritten by Nenad Tičarić
+ *
+ * @link https://github.com/teamtnt/tntsearch
+ *
+ * Copyright (c) 2016 Nenad Tičarić nticaric@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+class FrenchStemmer implements Stemmer
+{
+    /**
+     * All french vowels
+     */
+    protected static $vowels = ['a', 'e', 'i', 'o', 'u', 'y', 'â', 'à', 'ë', 'é', 'ê', 'è', 'ï', 'î', 'ô', 'û', 'ù'];
+
+    protected $word;
+
+    /**
+     * Helper, contains stringified list of vowels
+     *
+     * @var string
+     */
+    protected $plainVowels;
+
+    /**
+     * The original word, use to check if word has been modified
+     *
+     * @var string
+     */
+    protected $originalWord;
+
+    /**
+     * RV value
+     *
+     * @var string
+     */
+    protected $rv;
+
+    /**
+     * RV index (based on the beginning of the word)
+     *
+     * @var int
+     */
+    protected $rvIndex;
+
+    /**
+     * R1 value
+     *
+     * @var int
+     */
+    protected $r1;
+
+    /**
+     * R1 index (based on the beginning of the word)
+     *
+     * @var int
+     */
+    protected $r1Index;
+
+    /**
+     * R2 value
+     *
+     * @var int
+     */
+    protected $r2;
+
+    /**
+     * R2 index (based on the beginning of the word)
+     *
+     * @var int
+     */
+    protected $r2Index;
+
+    public static function stem($word)
+    {
+        return (new static())->analyze($word);
+    }
+
+    public function analyze($word)
+    {
+        $this->word = mb_strtolower($word);
+
+        $this->plainVowels = implode('', static::$vowels);
+
+        $this->step0();
+
+        $this->rv();
+        $this->r1();
+        $this->r2();
+
+        // to know if step1, 2a or 2b have altered the word
+        $this->originalWord = $this->word;
+
+        $nextStep = $this->step1();
+
+        // Do step 2a if either no ending was removed by step 1, or if one of endings amment, emment, ment, ments was found.
+        if (($nextStep == 2) || ($this->originalWord === $this->word)) {
+            $modified = $this->step2a();
+
+            if (!$modified) {
+                $this->step2b();
+            }
+        }
+
+        if ($this->word != $this->originalWord) {
+            $this->step3();
+        } else {
+            $this->step4();
+        }
+
+        $this->step5();
+        $this->step6();
+        $this->finish();
+
+        return $this->word;
+    }
+
+    /**
+     *  Assume the word is in lower case.
+     *  Then put into upper case u or i preceded and followed by a vowel, and y preceded or followed by a vowel.
+     *  u after q is also put into upper case. For example,
+     *      jouer 		-> 		joUer
+     *      ennuie 		-> 		ennuIe
+     *      yeux 		-> 		Yeux
+     *      quand 		-> 		qUand
+     */
+    private function step0()
+    {
+        $this->word = preg_replace('#([q])u#u', '$1U', $this->word);
+        $this->word = preg_replace('#(['.$this->plainVowels.'])y#u', '$1Y', $this->word);
+        $this->word = preg_replace('#y(['.$this->plainVowels.'])#u', 'Y$1', $this->word);
+        $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
+        $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
+    }
+
+    /**
+     * Step 1
+     * Search for the longest among the following suffixes, and perform the action indicated.
+     *
+     * @return integer Next step number
+     */
+    private function step1()
+    {
+        // ance   iqUe   isme   able   iste   eux   ances   iqUes   ismes   ables   istes
+        //     delete if in R2
+        if (($position = $this->search([
+            'ances', 'iqUes', 'ismes', 'ables', 'istes', 'ance', 'iqUe', 'isme', 'able', 'iste', 'eux',
+        ])) !== false) {
+            if ($this->inR2($position)) {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            return 3;
+        }
+
+        // atrice   ateur   ation   atrices   ateurs   ations
+        //      delete if in R2
+        //      if preceded by ic, delete if in R2, else replace by iqU
+        if (($position = $this->search(['atrices', 'ateurs', 'ations', 'atrice', 'ateur', 'ation'])) !== false) {
+            if ($this->inR2($position)) {
+                $this->word = mb_substr($this->word, 0, $position);
+
+                if (($position2 = $this->searchIfInR2(['ic'])) !== false) {
+                    $this->word = mb_substr($this->word, 0, $position2);
+                } else {
+                    $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
+                }
+            }
+
+            return 3;
+        }
+
+        // logie   logies
+        //      replace with log if in R2
+        if (($position = $this->search(['logies', 'logie'])) !== false) {
+            if ($this->inR2($position)) {
+                $this->word = preg_replace('#(logies|logie)$#u', 'log', $this->word);
+            }
+
+            return 3;
+        }
+
+        // usion   ution   usions   utions
+        //      replace with u if in R2
+        if (($position = $this->search(['usions', 'utions', 'usion', 'ution'])) !== false) {
+            if ($this->inR2($position)) {
+                $this->word = preg_replace('#(usion|ution|usions|utions)$#u', 'u', $this->word);
+            }
+
+            return 3;
+        }
+
+        // ence   ences
+        //      replace with ent if in R2
+        if (($position = $this->search(['ences', 'ence'])) !== false) {
+            if ($this->inR2($position)) {
+                $this->word = preg_replace('#(ence|ences)$#u', 'ent', $this->word);
+            }
+
+            return 3;
+        }
+
+        // issement   issements
+        //      delete if in R1 and preceded by a non-vowel
+        if (($position = $this->search(['issements', 'issement'])) != false) {
+            if ($this->inR1($position)) {
+                $before = $position - 1;
+                $letter = mb_substr($this->word, $before, 1);
+
+                if (!in_array($letter, static::$vowels)) {
+                    $this->word = mb_substr($this->word, 0, $position);
+                }
+            }
+
+            return 3;
+        }
+
+        // ement   ements
+        //      delete if in RV
+        //      if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+        //      if preceded by eus, delete if in R2, else replace by eux if in R1, otherwise,
+        //      if preceded by abl or iqU, delete if in R2, otherwise,
+        //      if preceded by ièr or Ièr, replace by i if in RV
+        if (($position = $this->search(['ements', 'ement'])) !== false) {
+            if ($this->inRv($position)) {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            if (($position = $this->searchIfInR2(['iv'])) !== false) {
+                $this->word = mb_substr($this->word, 0, $position);
+
+                if (($position2 = $this->searchIfInR2(['at'])) !== false) {
+                    $this->word = mb_substr($this->word, 0, $position2);
+                }
+            } elseif (($position = $this->search(['eus'])) !== false) {
+                if ($this->inR2($position)) {
+                    $this->word = mb_substr($this->word, 0, $position);
+                } elseif ($this->inR1($position)) {
+                    $this->word = preg_replace('#(eus)$#u', 'eux', $this->word);
+                }
+            } elseif (($position = $this->searchIfInR2(['abl', 'iqU'])) !== false) {
+                $this->word = mb_substr($this->word, 0, $position);
+            } elseif (($this->searchIfInRv(['ièr', 'Ièr'])) !== false) {
+                $this->word = preg_replace('#(ièr|Ièr)$#u', 'i', $this->word);
+            }
+
+            return 3;
+        }
+
+        // ité   ités
+        //      delete if in R2
+        //      if preceded by abil, delete if in R2, else replace by abl, otherwise,
+        //      if preceded by ic, delete if in R2, else replace by iqU, otherwise,
+        //      if preceded by iv, delete if in R2
+        if (($position = $this->search(['ités', 'ité'])) !== false) {
+            // delete if in R2
+            if ($this->inR2($position)) {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            // if preceded by abil, delete if in R2, else replace by abl, otherwise,
+            if (($position = $this->search(['abil'])) !== false) {
+                if ($this->inR2($position)) {
+                    $this->word = mb_substr($this->word, 0, $position);
+                } else {
+                    $this->word = preg_replace('#(abil)$#u', 'abl', $this->word);
+                }
+
+            // if preceded by ic, delete if in R2, else replace by iqU, otherwise,
+            } elseif (($position = $this->search(['ic'])) !== false) {
+                if ($this->inR2($position)) {
+                    $this->word = mb_substr($this->word, 0, $position);
+                } else {
+                    $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
+                }
+
+            // if preceded by iv, delete if in R2
+            } elseif (($position = $this->searchIfInR2(['iv'])) !== false) {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            return 3;
+        }
+
+        // if   ive   ifs   ives
+        //      delete if in R2
+        //      if preceded by at, delete if in R2 (and if further preceded by ic, delete if in R2, else replace by iqU)
+        if (($position = $this->search(['ifs', 'ives', 'if', 'ive'])) !== false) {
+            if ($this->inR2($position)) {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            if (($position = $this->searchIfInR2(['at'])) !== false) {
+                $this->word = mb_substr($this->word, 0, $position);
+
+                if (($position2 = $this->search(['ic'])) !== false) {
+                    if ($this->inR2($position2)) {
+                        $this->word = mb_substr($this->word, 0, $position2);
+                    } else {
+                        $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
+                    }
+                }
+            }
+
+            return 3;
+        }
+
+        // eaux
+        //      replace with eau
+        if (($this->search(['eaux'])) !== false) {
+            $this->word = preg_replace('#(eaux)$#u', 'eau', $this->word);
+
+            return 3;
+        }
+
+        // aux
+        //      replace with al if in R1
+        if (($position = $this->search(['aux'])) !== false) {
+            if ($this->inR1($position)) {
+                $this->word = preg_replace('#(aux)$#u', 'al', $this->word);
+            }
+
+            return 3;
+        }
+
+        // euse   euses
+        //      delete if in R2, else replace by eux if in R1
+        if (($position = $this->search(['euses', 'euse'])) !== false) {
+            if ($this->inR2($position)) {
+                $this->word = mb_substr($this->word, 0, $position);
+            } elseif ($this->inR1($position)) {
+                $this->word = preg_replace('#(euses|euse)$#u', 'eux', $this->word);
+            }
+
+            return 3;
+        }
+
+        // amment
+        //      replace with ant if in RV
+        if (($position = $this->search(['amment'])) !== false) {
+            if ($this->inRv($position)) {
+                $this->word = preg_replace('#(amment)$#u', 'ant', $this->word);
+            }
+            return 2;
+        }
+
+        // emment
+        //      replace with ent if in RV
+        if (($position = $this->search(['emment'])) !== false) {
+            if ($this->inRv($position)) {
+                $this->word = preg_replace('#(emment)$#u', 'ent', $this->word);
+            }
+
+            return 2;
+        }
+
+        // ment   ments
+        //      delete if preceded by a vowel in RV
+        if (($position = $this->search(['ments', 'ment'])) != false) {
+            $before = $position - 1;
+            $letter = mb_substr($this->word, $before, 1);
+
+            if ($this->inRv($before) && (in_array($letter, static::$vowels))) {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            return 2;
+        }
+
+        return 2;
+    }
+
+    /**
+     * Step 2a: Verb suffixes beginning i
+     *  In steps 2a and 2b all tests are confined to the RV region.
+     *  Search for the longest among the following suffixes and if found, delete if preceded by a non-vowel.
+     *      îmes   ît   îtes   i   ie   ies   ir   ira   irai   iraIent   irais   irait   iras   irent   irez   iriez
+     *      irions   irons   iront   is   issaIent   issais   issait   issant   issante   issantes   issants   isse
+     *      issent   isses   issez   issiez   issions   issons   it
+     *  (Note that the non-vowel itself must also be in RV.)
+     */
+    private function step2a()
+    {
+        if (($position = $this->searchIfInRv([
+            'îmes', 'îtes', 'ît', 'ies', 'ie', 'iraIent', 'irais', 'irait', 'irai', 'iras', 'ira', 'irent', 'irez', 'iriez',
+            'irions', 'irons', 'iront', 'ir', 'issaIent', 'issais', 'issait', 'issant', 'issantes', 'issante', 'issants',
+            'issent', 'isses', 'issez', 'isse', 'issiez', 'issions', 'issons', 'is', 'it', 'i'])) !== false) {
+            $before = $position - 1;
+            $letter = mb_substr($this->word, $before, 1);
+
+            if ($this->inRv($before) && (!in_array($letter, static::$vowels))) {
+                $this->word = mb_substr($this->word, 0, $position);
+
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Do step 2b if step 2a was done, but failed to remove a suffix.
+     * Step 2b: Other verb suffixes
+     */
+    private function step2b()
+    {
+        // é   ée   ées   és   èrent   er   era   erai   eraIent   erais   erait   eras   erez   eriez   erions   erons   eront   ez   iez
+        //      delete
+        if (($position = $this->searchIfInRv([
+            'ées', 'èrent', 'erais', 'erait', 'erai', 'eraIent', 'eras', 'erez', 'eriez',
+            'erions', 'erons', 'eront', 'era', 'er', 'iez', 'ez', 'és', 'ée', 'é'])) !== false) {
+            $this->word = mb_substr($this->word, 0, $position);
+
+            return true;
+        }
+
+        // âmes   ât   âtes   a   ai   aIent   ais   ait   ant   ante   antes   ants   as   asse   assent   asses   assiez   assions
+        //      delete
+        //      if preceded by e, delete
+        if (($position = $this->searchIfInRv([
+            'âmes', 'âtes', 'ât', 'aIent', 'ais', 'ait', 'antes', 'ante', 'ants', 'ant',
+            'assent', 'asses', 'assiez', 'assions', 'asse', 'as', 'ai', 'a'])) !== false) {
+            $before = $position - 1;
+            $letter = mb_substr($this->word, $before, 1);
+
+            if ($this->inRv($before) && ($letter === 'e')) {
+                $this->word = mb_substr($this->word, 0, $before);
+            } else {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            return true;
+        }
+
+        // ions
+        //      delete if in R2
+        if (($position = $this->searchIfInRv(['ions'])) !== false) {
+            if ($this->inR2($position)) {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Step 3: Replace final Y with i or final ç with c
+     */
+    private function step3()
+    {
+        $this->word = preg_replace('#(Y)$#u', 'i', $this->word);
+        $this->word = preg_replace('#(ç)$#u', 'c', $this->word);
+    }
+
+    /**
+     * Step 4: Residual suffix
+     */
+    private function step4()
+    {
+        //If the word ends s, not preceded by a, i, o, u, è or s, delete it.
+        if (preg_match('#[^aiouès]s$#', $this->word)) {
+            $this->word = mb_substr($this->word, 0, -1);
+        }
+
+        // In the rest of step 4, all tests are confined to the RV region.
+        // ion
+        //      delete if in R2 and preceded by s or t
+        if ((($position = $this->searchIfInRv(['ion'])) !== false) && ($this->inR2($position))) {
+            $before = $position - 1;
+            $letter = mb_substr($this->word, $before, 1);
+
+            if ($this->inRv($before) && (($letter === 's') || ($letter === 't'))) {
+                $this->word = mb_substr($this->word, 0, $position);
+            }
+
+            return true;
+        }
+
+        // ier   ière   Ier   Ière
+        //      replace with i
+        if (($this->searchIfInRv(['ier', 'ière', 'Ier', 'Ière'])) !== false) {
+            $this->word = preg_replace('#(ier|ière|Ier|Ière)$#u', 'i', $this->word);
+
+            return true;
+        }
+
+        // e
+        //      delete
+        if (($this->searchIfInRv(['e'])) !== false) {
+            $this->word = mb_substr($this->word, 0, -1);
+
+            return true;
+        }
+
+        // ë
+        //      if preceded by gu, delete
+        if (($position = $this->searchIfInRv(['guë'])) !== false) {
+            if ($this->inRv($position + 2)) {
+                $this->word = mb_substr($this->word, 0, -1);
+
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Step 5: Undouble
+     * If the word ends enn, onn, ett, ell or eill, delete the last letter
+     */
+    private function step5()
+    {
+        if ($this->search(['enn', 'onn', 'ett', 'ell', 'eill']) !== false) {
+            $this->word = mb_substr($this->word, 0, -1);
+        }
+    }
+
+    /**
+     * Step 6: Un-accent
+     * If the words ends é or è followed by at least one non-vowel, remove the accent from the e.
+     */
+    private function step6()
+    {
+        $this->word = preg_replace('#(é|è)([^'.$this->plainVowels.']+)$#u', 'e$2', $this->word);
+    }
+
+    /**
+     * And finally:
+     * Turn any remaining I, U and Y letters in the word back into lower case.
+     */
+    private function finish()
+    {
+        $this->word = str_replace(['I', 'U', 'Y'], ['i', 'u', 'y'], $this->word);
+    }
+
+    /**
+     *  If the word begins with two vowels, RV is the region after the third letter,
+     *  otherwise the region after the first vowel not at the beginning of the word,
+     *  or the end of the word if these positions cannot be found.
+     *  (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
+     */
+    protected function rv()
+    {
+        $length = mb_strlen($this->word);
+
+        $this->rv = '';
+        $this->rvIndex = $length;
+
+        if ($length < 3) {
+            return true;
+        }
+
+        // If the word begins with two vowels, RV is the region after the third letter
+        $first = mb_substr($this->word, 0, 1);
+        $second = mb_substr($this->word, 1, 1);
+
+        if ((in_array($first, static::$vowels)) && (in_array($second, static::$vowels))) {
+            $this->rv = mb_substr($this->word, 3);
+            $this->rvIndex = 3;
+
+            return true;
+        }
+
+        // (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
+        $begin3 = mb_substr($this->word, 0, 3);
+
+        if (in_array($begin3, ['par', 'col', 'tap'])) {
+            $this->rv = mb_substr($this->word, 3);
+            $this->rvIndex = 3;
+
+            return true;
+        }
+
+        //  otherwise the region after the first vowel not at the beginning of the word,
+        for ($i = 1; $i < $length; ++$i) {
+            $letter = mb_substr($this->word, $i, 1);
+
+            if (in_array($letter, static::$vowels)) {
+                $this->rv = mb_substr($this->word, ($i + 1));
+                $this->rvIndex = $i + 1;
+
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    protected function inRv($position)
+    {
+        return ($position >= $this->rvIndex);
+    }
+
+    protected function inR1($position)
+    {
+        return ($position >= $this->r1Index);
+    }
+
+    protected function inR2($position)
+    {
+        return ($position >= $this->r2Index);
+    }
+
+    protected function searchIfInRv($suffixes)
+    {
+        return $this->search($suffixes, $this->rvIndex);
+    }
+
+    protected function searchIfInR2($suffixes)
+    {
+        return $this->search($suffixes, $this->r2Index);
+    }
+
+    protected function search($suffixes, $offset = 0)
+    {
+        $length = mb_strlen($this->word);
+
+        if ($offset > $length) {
+            return false;
+        }
+
+        foreach ($suffixes as $suffixe) {
+            if ((($position = mb_strrpos($this->word, $suffixe, $offset)) !== false)
+                && ((mb_strlen($suffixe) + $position) == $length)) {
+                return $position;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * R1 is the region after the first non-vowel following a vowel, or the end of the word if there is no such non-vowel.
+     */
+    protected function r1()
+    {
+        [$this->r1Index, $this->r1] = $this->rx($this->word);
+    }
+
+    /**
+     * R2 is the region after the first non-vowel following a vowel in R1, or the end of the word if there is no such non-vowel.
+     */
+    protected function r2()
+    {
+        [$index, $value] = $this->rx($this->r1);
+
+        $this->r2 = $value;
+        $this->r2Index = $this->r1Index + $index;
+    }
+
+    /**
+     * Common function for R1 and R2
+     * Search the region after the first non-vowel following a vowel in $word, or the end of the word if there is no such non-vowel.
+     * R1 : $in = $this->word
+     * R2 : $in = R1
+     */
+    protected function rx($in)
+    {
+        $length = mb_strlen($in);
+
+        // defaults
+        $value = '';
+        $index = $length;
+
+        // we search all vowels
+        $vowels = [];
+
+        for ($i = 0; $i < $length; ++$i) {
+            $letter = mb_substr($in, $i, 1);
+
+            if (in_array($letter, static::$vowels)) {
+                $vowels[] = $i;
+            }
+        }
+
+        // search the non-vowel following a vowel
+        foreach ($vowels as $position) {
+            $after = $position + 1;
+            $letter = mb_substr($in, $after, 1);
+
+            if (!in_array($letter, static::$vowels)) {
+                $index = $after + 1;
+                $value = mb_substr($in, ($after + 1));
+
+                break;
+            }
+        }
+
+        return [$index, $value];
+    }
+}
diff --git a/src/Stemmer/GermanStemmer.php b/src/Stemmer/GermanStemmer.php
new file mode 100644
index 0000000..e79f6a0
--- /dev/null
+++ b/src/Stemmer/GermanStemmer.php
@@ -0,0 +1,258 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ * Copyright (c) 2013 Aris Buzachis (buzachis.aris@gmail.com)
+ *
+ * All rights reserved.
+ *
+ * This script is free software.
+ *
+ * DISCLAIMER:
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Takes a word and reduces it to its German stem using the Porter stemmer algorithm.
+ *
+ * References:
+ *  - http://snowball.tartarus.org/algorithms/porter/stemmer.html
+ *  - http://snowball.tartarus.org/algorithms/german/stemmer.html
+ *
+ * Usage:
+ *  $stem = GermanStemmer::stem($word);
+ *
+ * @author Aris Buzachis <buzachis.aris@gmail.com>
+ * @author Pascal Landau <kontakt@myseosolution.de>
+ */
+
+class GermanStemmer implements Stemmer
+{
+    /**
+     *  R1 and R2 regions (see the Porter algorithm)
+     */
+    private static $R1;
+
+    private static $R2;
+
+    private static $cache = [];
+
+    private static $vowels    = ['a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö', 'ü'];
+
+    private static $s_ending  = ['b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 'r', 't'];
+
+    private static $st_ending = ['b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't'];
+
+    /**
+     * Gets the stem of $word.
+     *
+     * @param string $word
+     *
+     * @return string
+     */
+    public static function stem($word)
+    {
+        $word = mb_strtolower($word);
+        //check for invalid characters
+        preg_match('#.#u', $word);
+        if (preg_last_error() !== 0) {
+            throw new \InvalidArgumentException("Word '$word' seems to be errornous. Error code from preg_last_error(): " . preg_last_error());
+        }
+        if (!isset(self::$cache[$word])) {
+            $result             = self::getStem($word);
+            self::$cache[$word] = $result;
+        }
+
+        return self::$cache[$word];
+    }
+
+    /**
+     * @param $word
+     *
+     * @return string
+     */
+    private static function getStem($word)
+    {
+        $word = self::step0a($word);
+        $word = self::step1($word);
+        $word = self::step2($word);
+        $word = self::step3($word);
+        $word = self::step0b($word);
+
+        return $word;
+    }
+
+    /**
+     * Replaces to protect some characters
+     *
+     * @param string $word
+     *
+     * @return string mixed
+     */
+    private static function step0a($word)
+    {
+        $vstr = implode('', self::$vowels);
+        $word = preg_replace('#([' . $vstr . '])u([' . $vstr . '])#u', '$1U$2', $word);
+        $word = preg_replace('#([' . $vstr . '])y([' . $vstr . '])#u', '$1Y$2', $word);
+
+        return $word;
+    }
+
+    /**
+     * Undo the initial replaces
+     *
+     * @param string $word
+     *
+     * @return string
+     */
+    private static function step0b($word)
+    {
+        $word = str_replace(['ä', 'ö', 'ü', 'U', 'Y'], ['a', 'o', 'u', 'u', 'y'], $word);
+
+        return $word;
+    }
+
+    private static function step1($word)
+    {
+        $word = str_replace('ß', 'ss', $word);
+
+        self::getR($word);
+
+        $replaceCount = 0;
+
+        $arr = ['em', 'ern', 'er'];
+        foreach ($arr as $s) {
+            self::$R1 = preg_replace('#' . $s . '$#u', '', self::$R1, -1, $replaceCount);
+            if ($replaceCount > 0) {
+                $word = preg_replace('#' . $s . '$#u', '', $word);
+            }
+        }
+
+        $arr = ['en', 'es', 'e'];
+        foreach ($arr as $s) {
+            self::$R1 = preg_replace('#' . $s . '$#u', '', self::$R1, -1, $replaceCount);
+            if ($replaceCount > 0) {
+                $word = preg_replace('#' . $s . '$#u', '', $word);
+                $word = preg_replace('#niss$#u', 'nis', $word);
+            }
+        }
+
+        $word = preg_replace('/([' . implode('', self::$s_ending) . '])s$/u', '$1', $word);
+
+        return $word;
+    }
+
+    private static function step2($word)
+    {
+        self::getR($word);
+
+        $replaceCount = 0;
+
+        $arr = ['est', 'er', 'en'];
+        foreach ($arr as $s) {
+            self::$R1 = preg_replace('#' . $s . '$#u', '', self::$R1, -1, $replaceCount);
+            if ($replaceCount > 0) {
+                $word = preg_replace('#' . $s . '$#u', '', $word);
+            }
+        }
+
+        if (strpos(self::$R1, 'st') !== false) {
+            self::$R1 = preg_replace('#st$#u', '', self::$R1);
+            $word     = preg_replace('#(...[' . implode('', self::$st_ending) . '])st$#u', '$1', $word);
+        }
+
+        return $word;
+    }
+
+    private static function step3($word)
+    {
+        self::getR($word);
+
+        $replaceCount = 0;
+
+        $arr = ['end', 'ung'];
+        foreach ($arr as $s) {
+            if (preg_match('#' . $s . '$#u', self::$R2)) {
+                $word = preg_replace('#([^e])' . $s . '$#u', '$1', $word, -1, $replaceCount);
+                if ($replaceCount > 0) {
+                    self::$R2 = preg_replace('#' . $s . '$#u', '', self::$R2, -1, $replaceCount);
+                }
+            }
+        }
+
+        $arr = ['isch', 'ik', 'ig'];
+        foreach ($arr as $s) {
+            if (preg_match('#' . $s . '$#u', self::$R2)) {
+                $word = preg_replace('#([^e])' . $s . '$#u', '$1', $word, -1, $replaceCount);
+                if ($replaceCount > 0) {
+                    self::$R2 = preg_replace('#' . $s . '$#u', '', self::$R2);
+                }
+            }
+        }
+
+        $arr = ['lich', 'heit'];
+        foreach ($arr as $s) {
+            self::$R2 = preg_replace('#' . $s . '$#u', '', self::$R2, -1, $replaceCount);
+            if ($replaceCount > 0) {
+                $word = preg_replace('#' . $s . '$#u', '', $word);
+            } else {
+                if (preg_match('#' . $s . '$#u', self::$R1)) {
+                    $word = preg_replace('#(er|en)' . $s . '$#u', '$1', $word, -1, $replaceCount);
+                    if ($replaceCount > 0) {
+                        self::$R1 = preg_replace('#' . $s . '$#u', '', self::$R1);
+                    }
+                }
+            }
+        }
+
+        $arr = ['keit'];
+        foreach ($arr as $s) {
+            self::$R2 = preg_replace('#' . $s . '$#u', '', self::$R2, -1, $replaceCount);
+            if ($replaceCount > 0) {
+                $word = preg_replace('#' . $s . '$#u', '', $word);
+            }
+        }
+
+        return $word;
+    }
+
+    /**
+     * Find R1 and R2
+     *
+     * @param string $word
+     */
+    private static function getR($word)
+    {
+        self::$R1 = '';
+        self::$R2 = '';
+
+        $vowels        = implode('', self::$vowels);
+        $vowelGroup    = "[{$vowels}]";
+        $nonVowelGroup = "[^{$vowels}]";
+        // R1 is the region after the first non-vowel following a vowel, or is the null region at the end of the word if there is no such non-vowel.
+        $pattern = "#(?P<rest>.*?{$vowelGroup}{$nonVowelGroup})(?P<r>.*)#u";
+        if (preg_match($pattern, $word, $match)) {
+            $rest = $match['rest'];
+            $r1   = $match['r'];
+            // [...], but then R1 is adjusted so that the region before it contains at least 3 letters.
+            $cutOff = 3 - mb_strlen($rest);
+            if ($cutOff > 0) {
+                $r1 = mb_substr($r1, $cutOff);
+            }
+            self::$R1 = $r1;
+        }
+
+        //R2 is the region after the first non-vowel following a vowel in R1, or is the null region at the end of the word if there is no such non-vowel.
+        if (preg_match($pattern, self::$R1, $match)) {
+            self::$R2 = $match['r'];
+        }
+    }
+}
diff --git a/src/Stemmer/ItalianStemmer.php b/src/Stemmer/ItalianStemmer.php
new file mode 100644
index 0000000..40b8bb6
--- /dev/null
+++ b/src/Stemmer/ItalianStemmer.php
@@ -0,0 +1,463 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ *  The following code, downloaded from <https://www.drupal.org/project/italianstemmer>,
+ *  was originally written by Roberto Mirizzi (<roberto.mirizzi@gmail.com>,
+ *  <http://sisinflab.poliba.it/mirizzi/>) in February 2007. It was the PHP5 implementation
+ *  of Martin Porter's stemming algorithm for Italian language. This algorithm can be found
+ *  at the address: <http://snowball.tartarus.org/algorithms/italian/stemmer.html>.
+ *
+ *  It was rewritten in March 2017 for TNTSearch by GaspariLab S.r.l., <dev@gasparilab.it>.
+ */
+
+/*
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+class ItalianStemmer implements Stemmer
+{
+    private static $cache = [];
+
+    private static $vocali = ['a', 'e', 'i', 'o', 'u', 'à', 'è', 'ì', 'ò', 'ù'];
+
+    private static $consonanti = [
+        'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'y', 'z',
+        'I', 'U',
+    ];
+
+    private static $accenti_acuti = ['á', 'é', 'í', 'ó', 'ú'];
+
+    private static $accenti_gravi = ['à', 'è', 'ì', 'ò', 'ù'];
+
+    private static $suffissi_step0 = [
+        'ci', 'gli', 'la', 'le', 'li', 'lo', 'mi', 'ne', 'si', 'ti', 'vi', 'sene',
+        'gliela', 'gliele', 'glieli', 'glielo', 'gliene', 'mela', 'mele', 'meli', 'melo', 'mene', 'tela', 'tele',
+        'teli', 'telo', 'tene', 'cela', 'cele', 'celi', 'celo', 'cene', 'vela', 'vele', 'veli', 'velo', 'vene',
+    ];
+
+    private static $suffissi_step1_a = [
+        'anza', 'anze', 'ico', 'ici', 'ica', 'ice', 'iche', 'ichi', 'ismo', 'ismi', 'abile', 'abili', 'ibile',
+        'ibili', 'ista', 'iste', 'isti', 'istà', 'istè', 'istì', 'oso', 'osi', 'osa', 'ose', 'mente', 'atrice',
+        'atrici', 'ante', 'anti',
+    ];
+
+    private static $suffissi_step1_b = ['azione', 'azioni', 'atore', 'atori'];
+
+    private static $suffissi_step1_c = ['logia', 'logie'];
+
+    private static $suffissi_step1_d = ['uzione', 'uzioni', 'usione', 'usioni'];
+
+    private static $suffissi_step1_e = ['enza', 'enze'];
+
+    private static $suffissi_step1_f = ['amento', 'amenti', 'imento', 'imenti'];
+
+    private static $suffissi_step1_g = ['amente'];
+
+    private static $suffissi_step1_h = ['ità'];
+
+    private static $suffissi_step1_i = ['ivo', 'ivi', 'iva', 'ive'];
+
+    private static $suffissi_step2 = [
+        'ammo', 'ando', 'ano', 'are', 'arono', 'asse', 'assero', 'assi', 'assimo', 'ata', 'ate', 'ati', 'ato', 'ava',
+        'avamo', 'avano', 'avate', 'avi', 'avo', 'emmo', 'enda', 'ende', 'endi', 'endo', 'erà', 'erai', 'eranno',
+        'ere', 'erebbe', 'erebbero', 'erei', 'eremmo', 'eremo', 'ereste', 'eresti', 'erete', 'erò', 'erono', 'essero',
+        'ete', 'eva', 'evamo', 'evano', 'evate', 'evi', 'evo', 'Yamo', 'iamo', 'immo', 'irà', 'irai', 'iranno', 'ire',
+        'irebbe', 'irebbero', 'irei', 'iremmo', 'iremo', 'ireste', 'iresti', 'irete', 'irò', 'irono', 'isca',
+        'iscano', 'isce', 'isci', 'isco', 'iscono', 'issero', 'ita', 'ite', 'iti', 'ito', 'iva', 'ivamo', 'ivano',
+        'ivate', 'ivi', 'ivo', 'ono', 'uta', 'ute', 'uti', 'uto', 'ar', 'ir',
+    ];
+
+    private static $ante_suff_a = ['ando', 'endo'];
+
+    private static $ante_suff_b = ['ar', 'er', 'ir'];
+
+    public function __construct()
+    {
+        usort(self::$suffissi_step0, function ($a, $b) { return mb_strlen($a) > mb_strlen($b) ? -1 : 1; });
+        usort(self::$suffissi_step1_a, function ($a, $b) { return mb_strlen($a) > mb_strlen($b) ? -1 : 1;});
+        usort(self::$suffissi_step2, function ($a, $b) { return mb_strlen($a) > mb_strlen($b) ? -1 : 1;});
+    }
+
+    /**
+     * Gets the stem of $word.
+     *
+     * @param string $word
+     *
+     * @return string
+     */
+    public static function stem($word)
+    {
+        $word = mb_strtolower($word);
+
+        // Check for invalid characters
+        preg_match('#.#u', $word);
+        if (preg_last_error() !== 0) {
+            throw new \InvalidArgumentException('Word "'.$word.'" seems to be errornous.
+                Error code from preg_last_error(): '.preg_last_error());
+        }
+
+        if (!isset(self::$cache[$word])) {
+            $result = self::getStem($word);
+            self::$cache[$word] = $result;
+        }
+
+        return self::$cache[$word];
+    }
+
+    /**
+     * @param $word
+     *
+     * @return string
+     */
+    private static function getStem($word)
+    {
+        $str = self::trim($word);
+        $str = self::toLower($str);
+        $str = self::replaceAccAcuti($str);
+        $str = self::putUAfterQToUpper($str);
+        $str = self::IUBetweenVowToUpper($str);
+        $step0 = self::step0($str);
+        $step1 = self::step1($step0);
+        $step2 = self::step2($step0, $step1);
+        $step3a = self::step3a($step2);
+        $step3b = self::step3b($step3a);
+        $step4 = self::step4($step3b);
+
+        return $step4;
+    }
+
+    private static function trim($str)
+    {
+        return trim($str);
+    }
+
+    private static function toLower($str)
+    {
+        return strtolower($str);
+    }
+
+    private static function replaceAccAcuti($str)
+    {
+        return str_replace(self::$accenti_acuti, self::$accenti_gravi, $str); //strtr
+    }
+
+    private static function putUAfterQToUpper($str)
+    {
+        return str_replace('qu', 'qU', $str);
+    }
+
+    private static function IUBetweenVowToUpper($str)
+    {
+        $pattern = '/([aeiouàèìòù])([iu])([aeiouàèìòù])/';
+
+        return preg_replace_callback($pattern, function ($matches) {
+            return strtoupper($matches[0]);
+        }, $str);
+    }
+
+    private static function returnRV($str)
+    {
+        /*
+        If the second letter is a consonant, RV is the region after the next following vowel,
+        or if the first two letters are vowels, RV is the region after the next consonant, and otherwise
+        (consonant-vowel case) RV is the region after the third letter.
+        But RV is the end of the word if these positions cannot be found. Example:
+        m a c h o [ho]     o l i v a [va]     t r a b a j o [bajo]     á u r e o [eo] prezzo sprezzante
+        */
+
+        if (mb_strlen($str) < 2) {
+            return '';
+        } //$str;
+
+        if (in_array($str[1], self::$consonanti)) {
+            $str = mb_substr($str, 2);
+            $str = strpbrk($str, implode(self::$vocali));
+
+            return mb_substr($str, 1); //secondo me devo mettere 1
+        } elseif (in_array($str[0], self::$vocali) && in_array($str[1], self::$vocali)) {
+            $str = strpbrk($str, implode(self::$consonanti));
+
+            return mb_substr($str, 1);
+        } elseif (in_array($str[0], self::$consonanti) && in_array($str[1], self::$vocali)) {
+            return mb_substr($str, 3);
+        }
+    }
+
+    private static function returnR1($str)
+    {
+        /*
+        R1 is the region after the first non-vowel following a vowel, or is the null region at the end
+        of the word if there is no such non-vowel. Example:
+        beautiful [iful]	beauty [y]	beau [NULL]	animadversion [imadversion]	sprinkled [kled]	eucharist [harist]
+        */
+
+        $pattern = '/['.implode(self::$vocali).']+'.'['.implode(self::$consonanti).']'.'(.*)/';
+        preg_match($pattern, $str, $matches);
+
+        return count($matches) >= 1 ? $matches[1] : '';
+    }
+
+    private static function returnR2($str)
+    {
+        /*
+        R2 is the region after the first non-vowel following a vowel in R1, or is the null region at the end
+        of the word if there is no such non-vowel. Example:
+        beautiful [ul]	beauty [NULL]	beau [NULL]	animadversion [adversion]	sprinkled [NULL]	eucharist [ist]
+        */
+
+        $R1 = self::returnR1($str);
+
+        $pattern = '/['.implode(self::$vocali).']+'.'['.implode(self::$consonanti).']'.'(.*)/';
+        preg_match($pattern, $R1, $matches);
+
+        return count($matches) >= 1 ? $matches[1] : '';
+    }
+
+    private static function step0($str)
+    {
+        //Step 0: Attached pronoun
+        //Always do steps 0
+
+        $str_len = mb_strlen($str);
+        $rv = self::returnRV($str);
+        $rv_len = mb_strlen($rv);
+
+        $pos = 0;
+        foreach (self::$suffissi_step0 as $suff) {
+            if ($rv_len - mb_strlen($suff) < 0) {
+                continue;
+            }
+            $pos = mb_strpos($rv, $suff, $rv_len - mb_strlen($suff));
+            if ($pos !== false) {
+                break;
+            }
+        }
+
+        $ante_suff = mb_substr($rv, 0, $pos);
+        $ante_suff_len = mb_strlen($ante_suff);
+
+        foreach (self::$ante_suff_a as $ante_a) {
+            if ($ante_suff_len - mb_strlen($ante_a) < 0) {
+                continue;
+            }
+            $pos_a = mb_strpos($ante_suff, $ante_a, $ante_suff_len - mb_strlen($ante_a));
+            if ($pos_a !== false) {
+                return mb_substr($str, 0, $pos + $str_len - $rv_len);
+            }
+        }
+
+        foreach (self::$ante_suff_b as $ante_b) {
+            if ($ante_suff_len - mb_strlen($ante_b) < 0) {
+                continue;
+            }
+            $pos_b = mb_strpos($ante_suff, $ante_b, $ante_suff_len - mb_strlen($ante_b));
+            if ($pos_b !== false) {
+                return mb_substr($str, 0, $pos + $str_len - $rv_len).'e';
+            }
+        }
+
+        return $str;
+    }
+
+    private static function deleteStuff($arr_suff, $str, $str_len, $where, $ovunque = false)
+    {
+        if ($where === 'r2') {
+            $r = self::returnR2($str);
+        } elseif ($where === 'rv') {
+            $r = self::returnRV($str);
+        } elseif ($where === 'r1') {
+            $r = self::returnR1($str);
+        }
+
+        $r_len = mb_strlen($r);
+
+        if ($ovunque) {
+            foreach ($arr_suff as $suff) {
+                if ($str_len - mb_strlen($suff) < 0) {
+                    continue;
+                }
+                $pos = mb_strpos($str, $suff, $str_len - mb_strlen($suff));
+                if ($pos !== false) {
+                    $pattern = '/'.$suff.'$/';
+                    $ret_str = preg_match($pattern, $r) ? mb_substr($str, 0, $pos) : '';
+                    if ($ret_str !== '') {
+                        return $ret_str;
+                    }
+                    break;
+                }
+            }
+        } else {
+            foreach ($arr_suff as $suff) {
+                if ($r_len - mb_strlen($suff) < 0) {
+                    continue;
+                }
+                $pos = mb_strpos($r, $suff, $r_len - mb_strlen($suff));
+                if ($pos !== false) {
+                    return mb_substr($str, 0, $pos + $str_len - $r_len);
+                }
+            }
+        }
+    }
+
+    private static function step1($str)
+    {
+        // Step 1: Standard suffix removal
+        // Always do steps 1
+
+        $str_len = mb_strlen($str);
+
+        // Delete if in R1, if preceded by 'iv', delete if in R2 (and if further preceded by 'at', delete if in R2),
+        // otherwise, if preceded by 'os', 'ic' or 'abil', delete if in R2
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_g, $str, $str_len, 'r1'))) {
+            if (!empty($ret_str1 = self::deleteStuff(['iv'], $ret_str, mb_strlen($ret_str), 'r2'))) {
+                if (!empty($ret_str2 = self::deleteStuff(['at'], $ret_str1, mb_strlen($ret_str1), 'r2'))) {
+                    return $ret_str2;
+                } else {
+                    return $ret_str1;
+                }
+            } elseif (!empty(
+                $ret_str1 = self::deleteStuff(['os', 'ic', 'abil'], $ret_str, mb_strlen($ret_str), 'r2')
+            )) {
+                return $ret_str1;
+            } else {
+                return $ret_str;
+            }
+        }
+
+        // Delete if in R2
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_a, $str, $str_len, 'r2', true))) {
+            return $ret_str;
+        }
+
+        // Delete if in R2, if preceded by 'ic', delete if in R2
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_b, $str, $str_len, 'r2'))) {
+            if (!empty($ret_str1 = self::deleteStuff(['ic'], $ret_str, mb_strlen($ret_str), 'r2'))) {
+                return $ret_str1;
+            } else {
+                return $ret_str;
+            }
+        }
+
+        // Replace with 'log' if in R2
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_c, $str, $str_len, 'r2'))) {
+            return $ret_str.'log';
+        }
+
+        // Replace with 'u' if in R2
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_d, $str, $str_len, 'r2'))) {
+            return $ret_str.'u';
+        }
+
+        // Replace with 'ente' if in R2
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_e, $str, $str_len, 'r2'))) {
+            return $ret_str.'ente';
+        }
+
+        // Delete if in RV
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_f, $str, $str_len, 'rv'))) {
+            return $ret_str;
+        }
+
+        // Delete if in R2, if preceded by 'abil', 'ic' or 'iv', delete if in R2
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_h, $str, $str_len, 'r2'))) {
+            if (!empty($ret_str1 = self::deleteStuff(['abil', 'ic', 'iv'], $ret_str, mb_strlen($ret_str), 'r2'))) {
+                return $ret_str1;
+            } else {
+                return $ret_str;
+            }
+        }
+
+        // Delete if in R2, if preceded by 'at', delete if in R2 (and if further preceded by 'ic', delete if in R2)
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step1_i, $str, $str_len, 'r2'))) {
+            if (!empty($ret_str1 = self::deleteStuff(['at'], $ret_str, mb_strlen($ret_str), 'r2'))) {
+                if (!empty($ret_str2 = self::deleteStuff(['ic'], $ret_str1, mb_strlen($ret_str1), 'r2'))) {
+                    return $ret_str2;
+                } else {
+                    return $ret_str1;
+                }
+            } else {
+                return $ret_str;
+            }
+        }
+
+        return $str;
+    }
+
+    private static function step2($str, $str_step1)
+    {
+        //Step 2: Verb suffixes
+        //Do step 2 if no ending was removed by step 1
+
+        if ($str != $str_step1) {
+            return $str_step1;
+        }
+
+        $str_len = mb_strlen($str);
+
+        if (!empty($ret_str = self::deleteStuff(self::$suffissi_step2, $str, $str_len, 'rv'))) {
+            return $ret_str;
+        }
+
+        return $str;
+    }
+
+    private static function step3a($str)
+    {
+        // Step 3a: Delete a final 'a', 'e', 'i', 'o',' à', 'è', 'ì' or 'ò' if it is in RV,
+        // and a preceding 'i' if it is in RV ('crocchi' -> 'crocch', 'crocchio' -> 'crocch')
+        // Always do steps 3a
+
+        $vocale_finale = ['a', 'e', 'i', 'o', 'à', 'è', 'ì', 'ò'];
+
+        $str_len = mb_strlen($str);
+
+        if (!empty($ret_str = self::deleteStuff($vocale_finale, $str, $str_len, 'rv'))) {
+            if (!empty($ret_str1 = self::deleteStuff(['i'], $ret_str, mb_strlen($ret_str), 'rv'))) {
+                return $ret_str1;
+            } else {
+                return $ret_str;
+            }
+        }
+
+        return $str;
+    }
+
+    private static function step3b($str)
+    {
+        // Step 3b: Replace final 'ch' (or 'gh') with 'c' (or 'g') if in 'RV' ('crocch' -> 'crocc')
+        // Always do steps 3b
+
+        $rv = self::returnRV($str);
+
+        $pattern = '/([cg])h$/';
+
+        return mb_substr($str, 0, mb_strlen($str) - mb_strlen($rv))
+            . preg_replace_callback(
+                $pattern,
+                function ($matches) {
+                    return $matches[0];
+                },
+                $rv
+            );
+    }
+
+    private static function step4($str)
+    {
+        // Step 4: Finally, turn I and U back into lower case
+
+        return strtolower($str);
+    }
+}
diff --git a/src/Stemmer/LatvianStemmer.php b/src/Stemmer/LatvianStemmer.php
new file mode 100644
index 0000000..4ec249b
--- /dev/null
+++ b/src/Stemmer/LatvianStemmer.php
@@ -0,0 +1,212 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ * Light stemmer for Latvian.
+ *
+ * Original Java code can be found in https://github.com/apache/lucene-solr
+ * Ported to Python by Rihards Krišlauks with minor modifications
+ *
+ * Ported to PHP from https://github.com/rihardsk/LatvianStemmer
+ *
+ * Simple stemmer for latvian language rewritten by Nenad Tičarić
+ *
+ * @link https://github.com/teamtnt/tntsearch
+ *
+ * Copyright (c) 2016 Nenad Tičarić nticaric@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Light stemmer for Latvian.
+ * <p>
+ * This is a light version of the algorithm in Karlis Kreslin's PhD thesis
+ * <i>A stemming algorithm for Latvian</i> with the following modifications:
+ * <ul>
+ *   <li>Only explicitly stems noun and adjective morphology
+ *   <li>Stricter length/vowel checks for the resulting stems (verb etc suffix stripping is removed)
+ *   <li>Removes only the primary inflectional suffixes: case and number for nouns
+ *       case, number, gender, and definitiveness for adjectives.
+ *   <li>Palatalization is only handled when a declension II,V,VI noun suffix is removed.
+ * </ul>
+ */
+
+class LatvianStemmer implements Stemmer
+{
+    private static $affixes = [
+        ['ajiem', 3, false],
+        ['ajai', 3, false],
+        ['ajam', 2, false],
+        ['ajām', 2, false],
+        ['ajos', 2, false],
+        ['ajās', 2, false],
+        ['iem', 2, true],
+        ['ajā', 2, false],
+        ['ais', 2, false],
+        ['ai', 2, false],
+        ['ei', 2, false],
+        ['ām', 1, false],
+        ['am', 1, false],
+        ['ēm', 1, false],
+        ['īm', 1, false],
+        ['im', 1, false],
+        ['um', 1, false],
+        ['us', 1, true],
+        ['as', 1, false],
+        ['ās', 1, false],
+        ['es', 1, false],
+        ['os', 1, true],
+        ['ij', 1, false],
+        ['īs', 1, false],
+        ['ēs', 1, false],
+        ['is', 1, false],
+        ['ie', 1, false],
+        ['u', 1, true],
+        ['a', 1, true],
+        ['i', 1, true],
+        ['e', 1, false],
+        ['ā', 1, false],
+        ['ē', 1, false],
+        ['ī', 1, false],
+        ['ū', 1, false],
+        ['o', 1, false],
+        ['s', 0, false],
+        ['š', 0, false],
+    ];
+
+    private static $VOWELS = 'aāeēiīouū';
+
+    /**
+     * @param $word string
+     *
+     * @return string
+     */
+    public static function stem($word)
+    {
+        $word = mb_strtolower($word);
+        $s = mb_str_split($word);
+        $numVowels = self::numVowels($s);
+        $length = count($s);
+
+        foreach (self::$affixes as $affix) {
+            if ($numVowels > $affix[1] and $length >= mb_strlen($affix[0]) + 3 and self::endswith(
+                $s,
+                $length,
+                $affix[0]
+            )) {
+                $length -= mb_strlen($affix[0]);
+                if ($affix[2]) {
+                    $s = self::unPalatalize($s, $length);
+                } else {
+                    $s = array_slice($s, 0, $length);
+                }
+                break;
+            }
+        }
+        return implode('', $s);
+    }
+
+    /**
+     * @param $s array<string>
+     *
+     * @return int
+     */
+    private static function numVowels($s)
+    {
+        $count = 0;
+        foreach ($s as $char) {
+            if (mb_substr_count(self::$VOWELS, $char) > 0) {
+                $count++;
+            }
+        }
+        return $count;
+    }
+
+    /**
+     * @param $s      array<string>
+     * @param $length integer
+     * @param $suffix string
+     *
+     * @return bool
+     */
+    public static function endswith($s, $length, $suffix)
+    {
+        return str_ends_with(implode('', array_splice($s, 0, $length)), $suffix);
+    }
+
+    /**
+     * @param $s      array<string>
+     * @param $length integer
+     *
+     * @return array
+     */
+    public static function unPalatalize($s, $length)
+    {
+        // we check the character removed: if its -u then
+        // its 2,5, or 6 gen pl., and these two can only apply then.
+        if ($s[$length] === 'u') {
+            // kš -> kst
+            if (self::endswith($s, $length, 'kš')) {
+                $length += 1;
+                $s[$length - 2] = 's';
+                $s[$length - 1] = 't';
+                return array_splice($s, 0, $length);
+            } elseif (self::endswith($s, $length, 'ņņ')) {
+                $s[$length - 2] = 'n';
+                $s[$length - 1] = 'n';
+                return array_splice($s, 0, $length);
+            }
+        }
+        // otherwise all other rules
+        if (self::endswith($s, $length, 'pj') or self::endswith($s, $length, 'bj') or self::endswith(
+            $s,
+            $length,
+            'mj'
+        ) or self::endswith($s, $length, 'vj')) {
+            $length--;
+        } elseif (self::endswith($s, $length, 'šņ')) {
+            $s[$length - 2] = 's';
+            $s[$length - 1] = 'n';
+        } elseif (self::endswith($s, $length, 'žņ')) {
+            $s[$length - 2] = 'z';
+            $s[$length - 1] = 'n';
+        } elseif (self::endswith($s, $length, 'šļ')) {
+            $s[$length - 2] = 's';
+            $s[$length - 1] = 'l';
+        } elseif (self::endswith($s, $length, 'žļ')) {
+            $s[$length - 2] = 'z';
+            $s[$length - 1] = 'l';
+        } elseif (self::endswith($s, $length, 'ļņ')) {
+            $s[$length - 2] = 'l';
+            $s[$length - 1] = 'n';
+        } elseif (self::endswith($s, $length, 'ļļ')) {
+            $s[$length - 2] = 'l';
+            $s[$length - 1] = 'l';
+        } elseif (self::endswith($s, $length, 'č')) {
+            $s[$length - 1] = 'c';
+        } elseif (self::endswith($s, $length, 'ļ')) {
+            $s[$length - 1] = 'l';
+        } elseif (self::endswith($s, $length, 'ņ')) {
+            $s[$length - 1] = 'n';
+        }
+        return array_splice($s, 0, $length);
+    }
+}
diff --git a/src/Stemmer/NoStemmer.php b/src/Stemmer/NoStemmer.php
new file mode 100644
index 0000000..150ee38
--- /dev/null
+++ b/src/Stemmer/NoStemmer.php
@@ -0,0 +1,11 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+class NoStemmer implements Stemmer
+{
+    public static function stem($word)
+    {
+        return $word;
+    }
+}
diff --git a/src/Stemmer/PolishStemmer.php b/src/Stemmer/PolishStemmer.php
new file mode 100644
index 0000000..1337290
--- /dev/null
+++ b/src/Stemmer/PolishStemmer.php
@@ -0,0 +1,163 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ * @link https://github.com/Tutanchamon/pl_stemmer
+ *
+ * Simple stemmer for polish language based on pl_stemmer by Błażej Kubiński.
+ * Originally written by Nenad Tičarić
+ *
+ * @link https://github.com/teamtnt/tntsearch
+ *
+ * Copyright (c) 2016 Nenad Tičarić nticaric@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+class PolishStemmer implements Stemmer
+{
+    public static function removeNouns($word)
+    {
+        if (strlen($word) > 7 && in_array(mb_substr($word, -5), ['zacja', 'zacją', 'zacji'])) {
+            return mb_substr($word, 0, -4);
+        }
+        if (strlen($word) > 6 && in_array(mb_substr($word, -4), ['acja', 'acji', 'acją', 'tach', 'anie', 'enie', 'eniu', 'aniu'])) {
+            return mb_substr($word, 0, -4);
+        }
+        if (strlen($word) > 6 && (mb_substr($word, -4) == 'tyka')) {
+            return mb_substr($word, 0, -2);
+        }
+        if (strlen($word) > 5 && in_array(mb_substr($word, -3), ['ach', 'ami', 'nia', 'niu', 'cia', 'ciu'])) {
+            return mb_substr($word, 0, -3);
+        }
+        if (strlen($word) > 5 && in_array(mb_substr($word, -3), ['cji', 'cja', 'cją'])) {
+            return mb_substr($word, 0, -2);
+        }
+        if (strlen($word) > 5 && in_array(mb_substr($word, -2), ['ce', 'ta'])) {
+            return mb_substr($word, 0, -2);
+        }
+        return $word;
+    }
+
+    public static function removeDiminutive($word)
+    {
+        if (strlen($word) > 6) {
+            if (in_array(mb_substr($word, -5), ['eczek', 'iczek', 'iszek', 'aszek', 'uszek'])) {
+                return mb_substr($word, 0, -5);
+            }
+            if (in_array(mb_substr($word, -4), ['enek', 'ejek', 'erek'])) {
+                return mb_substr($word, 0, -2);
+            }
+        }
+        if (strlen($word) > 4) {
+            if (in_array(mb_substr($word, -2), ['ek', 'ak'])) {
+                return mb_substr($word, 0, -2);
+            }
+        }
+        return $word;
+    }
+
+    public static function removeAdjectiveEnds($word)
+    {
+        if (strlen($word) > 7 && (mb_substr($word, 0, 3) == 'naj') && in_array(mb_substr($word, -3), ['sze', 'szy'])) {
+            return mb_substr($word, 3, -3);
+        }
+        if (strlen($word) > 7 && (mb_substr($word, 0, 3) == 'naj') && (mb_substr($word, 0, 5) == 'szych')) {
+            return mb_substr($word, 3, -5);
+        }
+        if (strlen($word) > 6 && (mb_substr($word, -4) == 'czny')) {
+            return mb_substr($word, 0, -4);
+        }
+        if (strlen($word) > 5 && in_array(mb_substr($word, -3), ['owy', 'owa', 'owe', 'ych', 'ego'])) {
+            return mb_substr($word, 0, -3);
+        }
+        if (strlen($word) > 5 && (mb_substr($word, -2) == 'ej')) {
+            return mb_substr($word, 0, -2);
+        }
+        return $word;
+    }
+
+    public static function removeVerbsEnds($word)
+    {
+        if (strlen($word) > 5 && (mb_substr($word, -3) == 'bym')) {
+            return mb_substr($word, 0, -3);
+        }
+        if (strlen($word) > 5 && in_array(mb_substr($word, -3), ['esz', 'asz', 'cie', 'eść', 'aść', 'łem', 'amy', 'emy'])) {
+            return mb_substr($word, 0, -3);
+        }
+        if (strlen($word) > 3 && in_array(mb_substr($word, -3), ['esz', 'asz', 'eść', 'aść', 'eć', 'ać'])) {
+            return mb_substr($word, 0, -2);
+        }
+        if (strlen($word) > 3 && in_array(mb_substr($word, -2), ['aj'])) {
+            return mb_substr($word, 0, -1);
+        }
+        if (strlen($word) > 3 && in_array(mb_substr($word, -2), ['ać', 'em', 'am', 'ał', 'ił', 'ić', 'ąc'])) {
+            return mb_substr($word, 0, -2);
+        }
+        return $word;
+    }
+
+    public static function removeAdverbsEnds($word)
+    {
+        if (strlen($word) > 4 && in_array(mb_substr($word, -3), ['nie', 'wie', 'rze'])) {
+            return mb_substr($word, 0, -2);
+        }
+        return $word;
+    }
+
+    public static function removePluralForms($word)
+    {
+        if (strlen($word) > 4 && in_array(mb_substr($word, -2), ['ów', 'om'])) {
+            return mb_substr($word, 0, -2);
+        }
+        if (strlen($word) > 4 && (mb_substr($word, -3) == 'ami')) {
+            return mb_substr($word, 0, -3);
+        }
+        return $word;
+    }
+
+    public static function removeGeneralEnds($word)
+    {
+        if (strlen($word) > 4 && in_array(substr($word, -2), ['ia', 'ie'])) {
+            return substr($word, 0, -2);
+        }
+        if (strlen($word) > 4 && in_array(substr($word, -1), ['u', 'ą', 'i', 'a', 'ę', 'y', 'ę', 'ł'])) {
+            return substr($word, 0, -1);
+        }
+        return $word;
+    }
+
+    public static function stem($word)
+    {
+        $word = mb_strtolower($word);
+
+        $stem = $word;
+
+        $stem = self::removeNouns($stem);
+        $stem = self::removeDiminutive($stem);
+        $stem = self::removeAdjectiveEnds($stem);
+        $stem = self::removeVerbsEnds($stem);
+        $stem = self::removeAdverbsEnds($stem);
+        $stem = self::removePluralForms($stem);
+        $stem = self::removeGeneralEnds($stem);
+
+        return $stem;
+    }
+}
diff --git a/src/Stemmer/PorterStemmer.php b/src/Stemmer/PorterStemmer.php
new file mode 100644
index 0000000..a7646a9
--- /dev/null
+++ b/src/Stemmer/PorterStemmer.php
@@ -0,0 +1,403 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ * Copyright (c) 2005 Richard Heyes (http://www.phpguru.org/)
+ *
+ * All rights reserved.
+ *
+ * This script is free software.
+ */
+
+/**
+ * PHP5 Implementation of the Porter Stemmer algorithm. Certain elements
+ * were borrowed from the (broken) implementation by Jon Abernathy.
+ *
+ * Usage:
+ *
+ *  $stem = PorterStemmer::Stem($word);
+ *
+ * How easy is that?
+ */
+
+class PorterStemmer implements Stemmer
+{
+    /**
+     * Regex for matching a consonant
+     *
+     * @var string
+     */
+    private static $regex_consonant = '(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)';
+
+    /**
+     * Regex for matching a vowel
+     *
+     * @var string
+     */
+    private static $regex_vowel = '(?:[aeiou]|(?<![aeiou])y)';
+
+    /**
+     * Stems a word. Simple huh?
+     *
+     * @param string $word Word to stem
+     *
+     * @return string Stemmed word
+     */
+    public static function stem($word)
+    {
+        if (strlen($word) <= 2) {
+            return $word;
+        }
+
+        $word = self::step1ab($word);
+        $word = self::step1c($word);
+        $word = self::step2($word);
+        $word = self::step3($word);
+        $word = self::step4($word);
+        $word = self::step5($word);
+
+        return $word;
+    }
+
+    /**
+     * Step 1
+     *
+     * @param string $word
+     *
+     * @return string
+     */
+    private static function step1ab($word)
+    {
+        $word = self::doPartA($word);
+        $word = self::doPartB($word);
+
+        return $word;
+    }
+
+    /**
+     * @param string $word
+     */
+    private static function doPartA($word)
+    {
+        if (substr($word, -1) == 's') {
+            self::replace($word, 'sses', 'ss')
+            || self::replace($word, 'ies', 'i')
+            || self::replace($word, 'ss', 'ss')
+            || self::replace($word, 's', '');
+        }
+        return $word;
+    }
+
+    private static function doPartB($word)
+    {
+        if (substr($word, -2, 1) != 'e' || !self::replace($word, 'eed', 'ee', 0)) {
+            // First rule
+            $v = self::$regex_vowel;
+
+            // ing and ed
+            if (preg_match("#$v+#", substr($word, 0, -3)) && self::replace($word, 'ing', '')
+                || preg_match("#$v+#", substr($word, 0, -2)) && self::replace($word, 'ed', '')) {
+                // Note use of && and OR, for precedence reasons
+
+                // If one of above two test successful
+                if (!self::replace($word, 'at', 'ate')
+                    && !self::replace($word, 'bl', 'ble')
+                    && !self::replace($word, 'iz', 'ize')) {
+                    // Double consonant ending
+                    if (self::doubleConsonant($word)
+                        && substr($word, -2) != 'll'
+                        && substr($word, -2) != 'ss'
+                        && substr($word, -2) != 'zz') {
+                        $word = substr($word, 0, -1);
+                    } elseif (self::m($word) == 1 && self::cvc($word)) {
+                        $word .= 'e';
+                    }
+                }
+            }
+        }
+        return $word;
+    }
+
+    /**
+     * Step 1c
+     *
+     * @param string $word Word to stem
+     */
+    private static function step1c($word)
+    {
+        $v = self::$regex_vowel;
+
+        if (substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1))) {
+            self::replace($word, 'y', 'i');
+        }
+
+        return $word;
+    }
+
+    /**
+     * Step 2
+     *
+     * @param string $word Word to stem
+     */
+    private static function step2($word)
+    {
+        switch (substr($word, -2, 1)) {
+            case 'a':
+                self::replace($word, 'ational', 'ate', 0)
+                || self::replace($word, 'tional', 'tion', 0);
+                break;
+            case 'c':
+                self::replace($word, 'enci', 'ence', 0)
+                || self::replace($word, 'anci', 'ance', 0);
+                break;
+            case 'e':
+                self::replace($word, 'izer', 'ize', 0);
+                break;
+            case 'g':
+                self::replace($word, 'logi', 'log', 0);
+                break;
+            case 'l':
+                self::replace($word, 'entli', 'ent', 0)
+                || self::replace($word, 'ousli', 'ous', 0)
+                || self::replace($word, 'alli', 'al', 0)
+                || self::replace($word, 'bli', 'ble', 0)
+                || self::replace($word, 'eli', 'e', 0);
+                break;
+            case 'o':
+                self::replace($word, 'ization', 'ize', 0)
+                || self::replace($word, 'ation', 'ate', 0)
+                || self::replace($word, 'ator', 'ate', 0);
+                break;
+            case 's':
+                self::replace($word, 'iveness', 'ive', 0)
+                || self::replace($word, 'fulness', 'ful', 0)
+                || self::replace($word, 'ousness', 'ous', 0)
+                || self::replace($word, 'alism', 'al', 0);
+                break;
+            case 't':
+                self::replace($word, 'biliti', 'ble', 0)
+                || self::replace($word, 'aliti', 'al', 0)
+                || self::replace($word, 'iviti', 'ive', 0);
+                break;
+        }
+
+        return $word;
+    }
+
+    /**
+     * Step 3
+     *
+     * @param string $word String to stem
+     */
+    private static function step3($word)
+    {
+        switch (substr($word, -2, 1)) {
+            case 'a':
+                self::replace($word, 'ical', 'ic', 0);
+                break;
+            case 's':
+                self::replace($word, 'ness', '', 0);
+                break;
+            case 't':
+                self::replace($word, 'icate', 'ic', 0)
+                || self::replace($word, 'iciti', 'ic', 0);
+                break;
+            case 'u':
+                self::replace($word, 'ful', '', 0);
+                break;
+            case 'v':
+                self::replace($word, 'ative', '', 0);
+                break;
+            case 'z':
+                self::replace($word, 'alize', 'al', 0);
+                break;
+        }
+
+        return $word;
+    }
+
+    /**
+     * Step 4
+     *
+     * @param string $word Word to stem
+     */
+    private static function step4($word)
+    {
+        switch (substr($word, -2, 1)) {
+            case 'a':
+                self::replace($word, 'al', '', 1);
+                break;
+            case 'c':
+                self::replace($word, 'ance', '', 1)
+                || self::replace($word, 'ence', '', 1);
+                break;
+            case 'e':
+                self::replace($word, 'er', '', 1);
+                break;
+            case 'i':
+                self::replace($word, 'ic', '', 1);
+                break;
+            case 'l':
+                self::replace($word, 'able', '', 1)
+                || self::replace($word, 'ible', '', 1);
+                break;
+            case 'n':
+                self::replace($word, 'ant', '', 1)
+                || self::replace($word, 'ement', '', 1)
+                || self::replace($word, 'ment', '', 1)
+                || self::replace($word, 'ent', '', 1);
+                break;
+            case 'o':
+                if (substr($word, -4) == 'tion' || substr($word, -4) == 'sion') {
+                    self::replace($word, 'ion', '', 1);
+                } else {
+                    self::replace($word, 'ou', '', 1);
+                }
+                break;
+            case 's':
+                self::replace($word, 'ism', '', 1);
+                break;
+            case 't':
+                self::replace($word, 'ate', '', 1)
+                || self::replace($word, 'iti', '', 1);
+                break;
+            case 'u':
+                self::replace($word, 'ous', '', 1);
+                break;
+            case 'v':
+                self::replace($word, 'ive', '', 1);
+                break;
+            case 'z':
+                self::replace($word, 'ize', '', 1);
+                break;
+        }
+
+        return $word;
+    }
+
+    /**
+     * Step 5
+     *
+     * @param string $word Word to stem
+     */
+    private static function step5($word)
+    {
+        // Part a
+        if (substr($word, -1) == 'e') {
+            if (self::m(substr($word, 0, -1)) > 1) {
+                self::replace($word, 'e', '');
+            } elseif (self::m(substr($word, 0, -1)) == 1) {
+                if (!self::cvc(substr($word, 0, -1))) {
+                    self::replace($word, 'e', '');
+                }
+            }
+        }
+
+        // Part b
+        if (self::m($word) > 1 && self::doubleConsonant($word) && substr($word, -1) == 'l') {
+            $word = substr($word, 0, -1);
+        }
+
+        return $word;
+    }
+
+    /**
+     * Replaces the first string with the second, at the end of the string. If third
+     * arg is given, then the preceding string must match that m count at least.
+     *
+     * @param string $str   String to check
+     * @param string $check Ending to check for
+     * @param string $repl  Replacement string
+     * @param int    $m     Optional minimum number of m() to meet
+     *
+     * @return bool Whether the $check string was at the end
+     *              of the $str string. True does not necessarily mean
+     *              that it was replaced.
+     */
+    private static function replace(&$str, $check, $repl, $m = null)
+    {
+        $len = 0 - strlen($check);
+
+        if (substr($str, $len) == $check) {
+            $substr = substr($str, 0, $len);
+            if (is_null($m) || self::m($substr) > $m) {
+                $str = $substr.$repl;
+            }
+
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * What, you mean it's not obvious from the name?
+     *
+     * Measures the number of consonant sequences in $str. if c is
+     * a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+     * presence,
+     *
+     * <c><v>       gives 0
+     * <c>vc<v>     gives 1
+     * <c>vcvc<v>   gives 2
+     * <c>vcvcvc<v> gives 3
+     *
+     * @param string $str The string to return the m count for
+     *
+     * @return int The m count
+     */
+    private static function m($str)
+    {
+        $c = self::$regex_consonant;
+        $v = self::$regex_vowel;
+
+        $str = preg_replace("#^$c+#", '', $str);
+        $str = preg_replace("#$v+$#", '', $str);
+
+        preg_match_all("#($v+$c+)#", $str, $matches);
+
+        return count($matches[1]);
+    }
+
+    /**
+     * Returns true/false as to whether the given string contains two
+     * of the same consonant next to each other at the end of the string.
+     *
+     * @param string $str String to check
+     *
+     * @return bool Result
+     */
+    private static function doubleConsonant($str)
+    {
+        $c = self::$regex_consonant;
+
+        return preg_match("#$c{2}$#", $str, $matches) && $matches[0][0] == $matches[0][1];
+    }
+
+    /**
+     * Checks for ending CVC sequence where second C is not W, X or Y
+     *
+     * @param string $str String to check
+     *
+     * @return bool Result
+     */
+    private static function cvc($str)
+    {
+        $c = self::$regex_consonant;
+        $v = self::$regex_vowel;
+
+        $matchFound = preg_match("#($c$v$c)$#", $str, $matches);
+
+        $return = false;
+
+        if ($matchFound && strlen($matches[1]) == 3) {
+            $return = true;
+            if (in_array($matches[1][2], ['w', 'x', 'y'])) {
+                $return = false;
+            }
+        }
+
+        return $return;
+    }
+}
diff --git a/src/Stemmer/PortugeseStemmer.php b/src/Stemmer/PortugeseStemmer.php
new file mode 100644
index 0000000..956bab4
--- /dev/null
+++ b/src/Stemmer/PortugeseStemmer.php
@@ -0,0 +1,766 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * This is a reimplementation of the Porter Stemmer Algorithm for Portuguese.
+ * This script is based on the implementation found on <https://github.com/wamania/php-stemmer>
+ * and has been rewriten to work with TNTSearch by Lucas Padilha <https://github.com/LucasPadilha>
+ *
+ * Takes a word and reduces it to its Portuguese stem using the Porter stemmer algorithm.
+ *
+ * References:
+ *  - http://snowball.tartarus.org/algorithms/porter/stemmer.html
+ *  - http://snowball.tartarus.org/algorithms/portuguese/stemmer.html
+ *
+ * Usage:
+ *  $stem = PortugueseStemmer::stem($word);
+ *
+ * @author Lucas Padilha <https://github.com/LucasPadilha>
+ */
+
+class PortugueseStemmer implements Stemmer
+{
+    /**
+     * UTF-8 Case lookup table
+     *
+     * This lookuptable defines the upper case letters to their correspponding
+     * lower case letter in UTF-8
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    private static $utf8_lower_to_upper = [
+        0x0061 => 0x0041, 0x03C6 => 0x03A6, 0x0163 => 0x0162, 0x00E5 => 0x00C5, 0x0062 => 0x0042,
+        0x013A => 0x0139, 0x00E1 => 0x00C1, 0x0142 => 0x0141, 0x03CD => 0x038E, 0x0101 => 0x0100,
+        0x0491 => 0x0490, 0x03B4 => 0x0394, 0x015B => 0x015A, 0x0064 => 0x0044, 0x03B3 => 0x0393,
+        0x00F4 => 0x00D4, 0x044A => 0x042A, 0x0439 => 0x0419, 0x0113 => 0x0112, 0x043C => 0x041C,
+        0x015F => 0x015E, 0x0144 => 0x0143, 0x00EE => 0x00CE, 0x045E => 0x040E, 0x044F => 0x042F,
+        0x03BA => 0x039A, 0x0155 => 0x0154, 0x0069 => 0x0049, 0x0073 => 0x0053, 0x1E1F => 0x1E1E,
+        0x0135 => 0x0134, 0x0447 => 0x0427, 0x03C0 => 0x03A0, 0x0438 => 0x0418, 0x00F3 => 0x00D3,
+        0x0440 => 0x0420, 0x0454 => 0x0404, 0x0435 => 0x0415, 0x0449 => 0x0429, 0x014B => 0x014A,
+        0x0431 => 0x0411, 0x0459 => 0x0409, 0x1E03 => 0x1E02, 0x00F6 => 0x00D6, 0x00F9 => 0x00D9,
+        0x006E => 0x004E, 0x0451 => 0x0401, 0x03C4 => 0x03A4, 0x0443 => 0x0423, 0x015D => 0x015C,
+        0x0453 => 0x0403, 0x03C8 => 0x03A8, 0x0159 => 0x0158, 0x0067 => 0x0047, 0x00E4 => 0x00C4,
+        0x03AC => 0x0386, 0x03AE => 0x0389, 0x0167 => 0x0166, 0x03BE => 0x039E, 0x0165 => 0x0164,
+        0x0117 => 0x0116, 0x0109 => 0x0108, 0x0076 => 0x0056, 0x00FE => 0x00DE, 0x0157 => 0x0156,
+        0x00FA => 0x00DA, 0x1E61 => 0x1E60, 0x1E83 => 0x1E82, 0x00E2 => 0x00C2, 0x0119 => 0x0118,
+        0x0146 => 0x0145, 0x0070 => 0x0050, 0x0151 => 0x0150, 0x044E => 0x042E, 0x0129 => 0x0128,
+        0x03C7 => 0x03A7, 0x013E => 0x013D, 0x0442 => 0x0422, 0x007A => 0x005A, 0x0448 => 0x0428,
+        0x03C1 => 0x03A1, 0x1E81 => 0x1E80, 0x016D => 0x016C, 0x00F5 => 0x00D5, 0x0075 => 0x0055,
+        0x0177 => 0x0176, 0x00FC => 0x00DC, 0x1E57 => 0x1E56, 0x03C3 => 0x03A3, 0x043A => 0x041A,
+        0x006D => 0x004D, 0x016B => 0x016A, 0x0171 => 0x0170, 0x0444 => 0x0424, 0x00EC => 0x00CC,
+        0x0169 => 0x0168, 0x03BF => 0x039F, 0x006B => 0x004B, 0x00F2 => 0x00D2, 0x00E0 => 0x00C0,
+        0x0434 => 0x0414, 0x03C9 => 0x03A9, 0x1E6B => 0x1E6A, 0x00E3 => 0x00C3, 0x044D => 0x042D,
+        0x0436 => 0x0416, 0x01A1 => 0x01A0, 0x010D => 0x010C, 0x011D => 0x011C, 0x00F0 => 0x00D0,
+        0x013C => 0x013B, 0x045F => 0x040F, 0x045A => 0x040A, 0x00E8 => 0x00C8, 0x03C5 => 0x03A5,
+        0x0066 => 0x0046, 0x00FD => 0x00DD, 0x0063 => 0x0043, 0x021B => 0x021A, 0x00EA => 0x00CA,
+        0x03B9 => 0x0399, 0x017A => 0x0179, 0x00EF => 0x00CF, 0x01B0 => 0x01AF, 0x0065 => 0x0045,
+        0x03BB => 0x039B, 0x03B8 => 0x0398, 0x03BC => 0x039C, 0x045C => 0x040C, 0x043F => 0x041F,
+        0x044C => 0x042C, 0x00FE => 0x00DE, 0x00F0 => 0x00D0, 0x1EF3 => 0x1EF2, 0x0068 => 0x0048,
+        0x00EB => 0x00CB, 0x0111 => 0x0110, 0x0433 => 0x0413, 0x012F => 0x012E, 0x00E6 => 0x00C6,
+        0x0078 => 0x0058, 0x0161 => 0x0160, 0x016F => 0x016E, 0x03B1 => 0x0391, 0x0457 => 0x0407,
+        0x0173 => 0x0172, 0x00FF => 0x0178, 0x006F => 0x004F, 0x043B => 0x041B, 0x03B5 => 0x0395,
+        0x0445 => 0x0425, 0x0121 => 0x0120, 0x017E => 0x017D, 0x017C => 0x017B, 0x03B6 => 0x0396,
+        0x03B2 => 0x0392, 0x03AD => 0x0388, 0x1E85 => 0x1E84, 0x0175 => 0x0174, 0x0071 => 0x0051,
+        0x0437 => 0x0417, 0x1E0B => 0x1E0A, 0x0148 => 0x0147, 0x0105 => 0x0104, 0x0458 => 0x0408,
+        0x014D => 0x014C, 0x00ED => 0x00CD, 0x0079 => 0x0059, 0x010B => 0x010A, 0x03CE => 0x038F,
+        0x0072 => 0x0052, 0x0430 => 0x0410, 0x0455 => 0x0405, 0x0452 => 0x0402, 0x0127 => 0x0126,
+        0x0137 => 0x0136, 0x012B => 0x012A, 0x03AF => 0x038A, 0x044B => 0x042B, 0x006C => 0x004C,
+        0x03B7 => 0x0397, 0x0125 => 0x0124, 0x0219 => 0x0218, 0x00FB => 0x00DB, 0x011F => 0x011E,
+        0x043E => 0x041E, 0x1E41 => 0x1E40, 0x03BD => 0x039D, 0x0107 => 0x0106, 0x03CB => 0x03AB,
+        0x0446 => 0x0426, 0x00FE => 0x00DE, 0x00E7 => 0x00C7, 0x03CA => 0x03AA, 0x0441 => 0x0421,
+        0x0432 => 0x0412, 0x010F => 0x010E, 0x00F8 => 0x00D8, 0x0077 => 0x0057, 0x011B => 0x011A,
+        0x0074 => 0x0054, 0x006A => 0x004A, 0x045B => 0x040B, 0x0456 => 0x0406, 0x0103 => 0x0102,
+        0x03BB => 0x039B, 0x00F1 => 0x00D1, 0x043D => 0x041D, 0x03CC => 0x038C, 0x00E9 => 0x00C9,
+        0x00F0 => 0x00D0, 0x0457 => 0x0407, 0x0123 => 0x0122,
+    ];
+
+    private static $vowels = ['a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'â', 'ê', 'ô'];
+
+    public static function stem($word)
+    {
+        // we do ALL in UTF-8
+        if (!self::check($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $word = self::strtolower($word);
+        $word = self::str_replace(['ã', 'õ'], ['a~', 'o~'], $word);
+
+        $rv      = '';
+        $rvIndex = '';
+        self::rv($word, $rv, $rvIndex);
+
+        $r1      = '';
+        $r1Index = '';
+        self::r1($word, $r1, $r1Index);
+
+        $r2      = '';
+        $r2Index = '';
+        self::r2($r1, $r1Index, $r2, $r2Index);
+
+        $initialWord = $word;
+
+        self::step1($word, $r1Index, $r2Index, $rvIndex);
+
+        if ($initialWord == $word) {
+            self::step2($word, $rvIndex);
+        }
+
+        if ($initialWord != $word) {
+            self::step3($word, $rvIndex);
+        } else {
+            self::step4($word, $rvIndex);
+        }
+
+        self::step5($word, $rvIndex);
+
+        self::finish($word);
+
+        return $word;
+    }
+
+    /**
+     * R1 is the region after the first non-vowel following a vowel, or the end of the word if there is no such non-vowel.
+     */
+    private static function r1($word, &$r1, &$r1Index)
+    {
+        [$index, $value] = self::rx($word);
+
+        $r1      = $value;
+        $r1Index = $index;
+
+        return true;
+    }
+
+    /**
+     * R2 is the region after the first non-vowel following a vowel in R1, or the end of the word if there is no such non-vowel.
+     */
+    private static function r2($r1, $r1Index, &$r2, &$r2Index)
+    {
+        [$index, $value] = self::rx($r1);
+
+        $r2      = $value;
+        $r2Index = $r1Index + $index;
+
+        return true;
+    }
+
+    /**
+     * Common function for R1 and R2
+     * Search the region after the first non-vowel following a vowel in $word, or the end of the word if there is no such non-vowel.
+     * R1 : $in = $this->word
+     * R2 : $in = R1
+     */
+    private static function rx($in)
+    {
+        $length = self::strlen($in);
+
+        // Defaults
+        $value = '';
+        $index = $length;
+
+        // Search all vowels
+        $vowels = [];
+        for ($i = 0; $i < $length; $i++) {
+            $letter = self::substr($in, $i, 1);
+
+            if (in_array($letter, static::$vowels)) {
+                $vowels[] = $i;
+            }
+        }
+
+        // Search the non-vowel following a vowel
+        foreach ($vowels as $position) {
+            $after  = $position + 1;
+            $letter = self::substr($in, $after, 1);
+
+            if (!in_array($letter, static::$vowels)) {
+                $index = $after + 1;
+                $value = self::substr($in, ($after + 1));
+                break;
+            }
+        }
+
+        return [$index, $value];
+    }
+
+    /**
+     * Used by spanish, italian, portuguese, etc (but not by french)
+     *
+     * If the second letter is a consonant, RV is the region after the next following vowel,
+     * or if the first two letters are vowels, RV is the region after the next consonant,
+     * and otherwise (consonant-vowel case) RV is the region after the third letter.
+     * But RV is the end of the word if these positions cannot be found.
+     */
+    private static function rv($word, &$rv, &$rvIndex)
+    {
+        $length = self::strlen($word);
+
+        if ($length < 3) {
+            return true;
+        }
+
+        $first  = self::substr($word, 0, 1);
+        $second = self::substr($word, 1, 1);
+
+        // If the second letter is a consonant, RV is the region after the next following vowel,
+        if (!in_array($second, static::$vowels)) {
+            for ($i = 2; $i < $length; $i++) {
+                $letter = self::substr($word, $i, 1);
+
+                if (in_array($letter, static::$vowels)) {
+                    $rv      = self::substr($word, ($i + 1));
+                    $rvIndex = $i + 1;
+
+                    return true;
+                }
+            }
+        }
+
+        // or if the first two letters are vowels, RV is the region after the next consonant,
+        if ((in_array($first, static::$vowels)) && (in_array($second, static::$vowels))) {
+            for ($i = 2; $i < $length; $i++) {
+                $letter = self::substr($word, $i, 1);
+
+                if (!in_array($letter, static::$vowels)) {
+                    $rv      = self::substr($word, ($i + 1));
+                    $rvIndex = $i + 1;
+
+                    return true;
+                }
+            }
+        }
+
+        // and otherwise (consonant-vowel case) RV is the region after the third letter.
+        if ((!in_array($first, static::$vowels)) && (in_array($second, static::$vowels))) {
+            $rv      = self::substr($word, 3);
+            $rvIndex = 3;
+
+            return true;
+        }
+
+        return false;
+    }
+
+    private static function inRv($position, $rvIndex)
+    {
+        return ($position >= $rvIndex);
+    }
+
+    private static function inR1($position, $r1Index)
+    {
+        return ($position >= $r1Index);
+    }
+
+    private static function inR2($position, $r2Index)
+    {
+        return ($position >= $r2Index);
+    }
+
+    private static function searchIfInRv($word, $suffixes, $rvIndex)
+    {
+        return self::search($word, $suffixes, $rvIndex);
+    }
+
+    private static function searchIfInR2($word, $suffixes, $r2Index)
+    {
+        return self::search($word, $suffixes, $r2Index);
+    }
+
+    private static function search($word, $suffixes, $offset = 0)
+    {
+        $length = self::strlen($word);
+
+        if ($offset > $length) {
+            return false;
+        }
+
+        foreach ($suffixes as $suffix) {
+            if ((($position = self::strrpos($word, $suffix, $offset)) !== false) && ((self::strlen($suffix) + $position) == $length)) {
+                return $position;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Step 1: Standard suffix removal
+     */
+    private static function step1(&$word, $r1Index, $r2Index, $rvIndex)
+    {
+        // delete if in R2
+        if (($position = self::search($word, ['amentos', 'imentos', 'adoras', 'adores', 'amento', 'imento', 'adora', 'istas', 'ismos', 'antes', 'ância', 'ezas', 'eza', 'icos', 'icas', 'ismo', 'ável', 'ível', 'ista', 'oso', 'osos', 'osas', 'osa', 'ico', 'ica', 'ador', 'aça~o', 'aço~es', 'ante'])) !== false) {
+            if (self::inR2($position, $r2Index)) {
+                $word = self::substr($word, 0, $position);
+            }
+
+            return true;
+        }
+
+        // replace with log if in R2
+        if (($position = self::search($word, ['logías', 'logía'])) !== false) {
+            if (self::inR2($position, $r2Index)) {
+                $word = preg_replace('#(logías|logía)$#u', 'log', $word);
+            }
+
+            return true;
+        }
+
+        // replace with u if in R2
+        if (($position = self::search($word, ['uciones', 'ución'])) !== false) {
+            if (self::inR2($position, $r2Index)) {
+                $word = preg_replace('#(uciones|ución)$#u', 'u', $word);
+            }
+
+            return true;
+        }
+
+        // replace with ente if in R2
+        if (($position = self::search($word, ['ências', 'ência'])) !== false) {
+            if (self::inR2($position, $r2Index)) {
+                $word = preg_replace('#(ências|ência)$#u', 'ente', $word);
+            }
+
+            return true;
+        }
+
+        // delete if in R1
+        // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+        // if preceded by os, ic or ad, delete if in R2
+        if (($position = self::search($word, ['amente'])) !== false) {
+            // delete if in R1
+            if (self::inR1($position, $r1Index)) {
+                $word = self::substr($word, 0, $position);
+            }
+
+            // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+            if (($position2 = self::searchIfInR2($word, ['iv'], $r2Index)) !== false) {
+                $word = self::substr($word, 0, $position2);
+
+                if (($position3 = self::searchIfInR2($word, ['at'], $r2Index)) !== false) {
+                    $word = self::substr($word, 0, $position3);
+                }
+
+            // if preceded by os, ic or ad, delete if in R2
+            } elseif (($position4 = self::searchIfInR2($word, ['os', 'ic', 'ad'], $r2Index)) !== false) {
+                $word = self::substr($word, 0, $position4);
+            }
+
+            return true;
+        }
+
+        // delete if in R2
+        // if preceded by ante, avel or ível, delete if in R2
+        if (($position = self::search($word, ['mente'])) !== false) {
+            // delete if in R2
+            if (self::inR2($position, $r2Index)) {
+                $word = self::substr($word, 0, $position);
+            }
+
+            // if preceded by ante, avel or ível, delete if in R2
+            if (($position2 = self::searchIfInR2($word, ['ante', 'avel', 'ível'], $r2Index)) != false) {
+                $word = self::substr($word, 0, $position2);
+            }
+
+            return true;
+        }
+
+        // delete if in R2
+        // if preceded by abil, ic or iv, delete if in R2
+        if (($position = self::search($word, ['idades', 'idade'])) !== false) {
+            // delete if in R2
+            if (self::inR2($position, $r2Index)) {
+                $word = self::substr($word, 0, $position);
+            }
+
+            // if preceded by abil, ic or iv, delete if in R2
+            if (($position2 = self::searchIfInR2($word, ['abil', 'ic', 'iv'], $r2Index)) !== false) {
+                $word = self::substr($word, 0, $position2);
+            }
+
+            return true;
+        }
+
+        // delete if in R2
+        // if preceded by at, delete if in R2
+        if (($position = self::search($word, ['ivas', 'ivos', 'iva', 'ivo'])) !== false) {
+            // delete if in R2
+            if (self::inR2($position, $r2Index)) {
+                $word = self::substr($word, 0, $position);
+            }
+
+            // if preceded by at, delete if in R2
+            if (($position2 = self::searchIfInR2($word, ['at'], $r2Index)) !== false) {
+                $word = self::substr($word, 0, $position2);
+            }
+
+            return true;
+        }
+
+        // replace with ir if in RV and preceded by e
+        if (($position = self::search($word, ['iras', 'ira'])) !== false) {
+            if (self::inRv($position, $rvIndex)) {
+                $before = $position - 1;
+                $letter = self::substr($word, $before, 1);
+
+                if ($letter == 'e') {
+                    $word = preg_replace('#(iras|ira)$#u', 'ir', $word);
+                }
+            }
+
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Step 2: Verb suffixes
+     * Search for the longest among the following suffixes in RV, and if found, delete.
+     */
+    private static function step2(&$word, $rvIndex)
+    {
+        if (($position = self::searchIfInRv($word, ['aríamos', 'eríamos', 'iríamos', 'ássemos', 'êssemos', 'íssemos', 'aríeis', 'eríeis', 'iríeis', 'ásseis', 'ésseis', 'ísseis', 'áramos', 'éramos', 'íramos', 'ávamos', 'aremos', 'eremos', 'iremos', 'ariam', 'eriam', 'iriam', 'assem', 'essem', 'issem', 'arias', 'erias', 'irias', 'ardes', 'erdes', 'irdes', 'asses', 'esses', 'isses', 'astes', 'estes', 'istes', 'áreis', 'areis', 'éreis', 'ereis', 'íreis', 'ireis', 'áveis', 'íamos', 'armos', 'ermos', 'irmos', 'aria', 'eria', 'iria', 'asse', 'esse', 'isse', 'aste', 'este', 'iste', 'arei', 'erei', 'irei', 'adas', 'idas', 'aram', 'eram', 'iram', 'avam', 'arem', 'erem', 'irem', 'ando', 'endo', 'indo', 'ara~o', 'era~o', 'ira~o', 'arás', 'aras', 'erás', 'eras', 'irás', 'avas', 'ares', 'eres', 'ires', 'íeis', 'ados', 'idos', 'ámos', 'amos', 'emos', 'imos', 'iras', 'ada', 'ida', 'ará', 'ara', 'erá', 'era', 'irá', 'ava', 'iam', 'ado', 'ido', 'ias', 'ais', 'eis', 'ira', 'ia', 'ei', 'am', 'em', 'ar', 'er', 'ir', 'as', 'es', 'is', 'eu', 'iu', 'ou'], $rvIndex)) !== false) {
+            $word = self::substr($word, 0, $position);
+
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Step 3: d-suffixes
+     */
+    private static function step3(&$word, $rvIndex)
+    {
+        // Delete suffix i if in RV and preceded by c
+        if (self::searchIfInRv($word, ['i'], $rvIndex) !== false) {
+            $letter = self::substr($word, -2, 1);
+
+            if ($letter == 'c') {
+                $word = self::substr($word, 0, -1);
+            }
+
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Step 4
+     */
+    private static function step4(&$word, $rvIndex)
+    {
+        // If the word ends with one of the suffixes "os   a   i   o   á   í   ó" in RV, delete it
+        if (($position = self::searchIfInRv($word, ['os', 'a', 'i', 'o', 'á', 'í', 'ó'], $rvIndex)) !== false) {
+            $word = self::substr($word, 0, $position);
+
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Step 5
+     */
+    private static function step5(&$word, $rvIndex)
+    {
+        // If the word ends with one of "e   é   ê" in RV, delete it, and if preceded by gu (or ci) with the u (or i) in RV, delete the u (or i).
+        if (self::searchIfInRv($word, ['e', 'é', 'ê'], $rvIndex) !== false) {
+            $word = self::substr($word, 0, -1);
+
+            if (($position2 = self::search($word, ['gu', 'ci'])) !== false) {
+                if (self::inRv(($position2 + 1), $rvIndex)) {
+                    $word = self::substr($word, 0, -1);
+                }
+            }
+
+            return true;
+        } elseif (self::search($word, ['ç']) !== false) {
+            $word = preg_replace('#(ç)$#u', 'c', $word);
+
+            return true;
+        }
+
+        return false;
+    }
+
+    private static function finish(&$word)
+    {
+        // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
+        $word = self::str_replace(['a~', 'o~'], ['ã', 'õ'], $word);
+    }
+
+    /**
+     * Tries to detect if a string is in Unicode encoding
+     *
+     * @author <bmorel@ssi.fr>
+     *
+     * @link   http://www.php.net/manual/en/function.utf8-encode.php
+     */
+    private static function check($str)
+    {
+        for ($i = 0; $i < strlen($str); $i++) {
+            if (ord($str[$i]) < 0x80) {
+                continue;
+            }
+            # 0bbbbbbb
+            elseif ((ord($str[$i]) & 0xE0) == 0xC0) {
+                $n = 1;
+            }
+            # 110bbbbb
+            elseif ((ord($str[$i]) & 0xF0) == 0xE0) {
+                $n = 2;
+            }
+            # 1110bbbb
+            elseif ((ord($str[$i]) & 0xF8) == 0xF0) {
+                $n = 3;
+            }
+            # 11110bbb
+            elseif ((ord($str[$i]) & 0xFC) == 0xF8) {
+                $n = 4;
+            }
+            # 111110bb
+            elseif ((ord($str[$i]) & 0xFE) == 0xFC) {
+                $n = 5;
+            }
+            # 1111110b
+            else {
+                return false;
+            }
+            # Does not match any model
+            for ($j = 0; $j < $n; $j++) {
+                # n bytes matching 10bbbbbb follow ?
+                if ((++$i == strlen($str)) || ((ord($str[$i]) & 0xC0) != 0x80)) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Unicode aware replacement for strlen()
+     *
+     * utf8_decode() converts characters that are not in ISO-8859-1
+     * to '?', which, for the purpose of counting, is alright - It's
+     * even faster than mb_strlen.
+     *
+     * @author <chernyshevsky at hotmail dot com>
+     *
+     * @see    strlen()
+     * @see    utf8_decode()
+     */
+    private static function strlen($string)
+    {
+        return mb_strlen($string, 'UTF-8');
+    }
+
+    /**
+     * Unicode aware replacement for substr()
+     *
+     * @author lmak at NOSPAM dot iti dot gr
+     *
+     * @link   http://www.php.net/manual/en/function.substr.php
+     * @see    substr()
+     */
+    private static function substr($str, $start, $length = null)
+    {
+        $ar = [];
+        preg_match_all('/./u', $str, $ar);
+
+        if ($length != null) {
+            return join('', array_slice($ar[0], $start, $length));
+        } else {
+            return join('', array_slice($ar[0], $start));
+        }
+    }
+
+    /**
+     * Unicode aware replacement for strrepalce()
+     *
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     *
+     * @see    strreplace();
+     */
+    private static function str_replace($s, $r, $str)
+    {
+        if (!is_array($s)) {
+            $s = '!' . preg_quote($s, '!') . '!u';
+        } else {
+            foreach ($s as $k => $v) {
+                $s[$k] = '!' . preg_quote($v) . '!u';
+            }
+        }
+        return preg_replace($s, $r, $str);
+    }
+
+    /**
+     * This is a unicode aware replacement for strtolower()
+     *
+     * Uses mb_string extension if available
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @see    strtolower()
+     * @see    utf8_strtoupper()
+     */
+    private static function strtolower($string)
+    {
+        if (!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) {
+            return mb_strtolower($string, 'utf-8');
+        }
+
+        //global $utf8_upper_to_lower;
+        $utf8_upper_to_lower = array_flip(self::$utf8_lower_to_upper);
+        $uni                 = self::utf8_to_unicode($string);
+        $cnt                 = count($uni);
+        for ($i = 0; $i < $cnt; $i++) {
+            if ($utf8_upper_to_lower[$uni[$i]]) {
+                $uni[$i] = $utf8_upper_to_lower[$uni[$i]];
+            }
+        }
+        return self::unicode_to_utf8($uni);
+    }
+
+    /**
+     * This function returns any UTF-8 encoded text as a list of
+     * Unicode values:
+     *
+     * @author Scott Michael Reynen <scott@randomchaos.com>
+     *
+     * @link   http://www.randomchaos.com/document.php?source=php_and_unicode
+     * @see    unicode_to_utf8()
+     */
+    private static function utf8_to_unicode(&$str)
+    {
+        $unicode     = [];
+        $values      = [];
+        $looking_for = 1;
+
+        for ($i = 0; $i < strlen($str); $i++) {
+            $this_value = ord($str[$i]);
+            if ($this_value < 128) {
+                $unicode[] = $this_value;
+            } else {
+                if (count($values) == 0) {
+                    $looking_for = ($this_value < 224) ? 2 : 3;
+                }
+
+                $values[] = $this_value;
+                if (count($values) == $looking_for) {
+                    $number = ($looking_for == 3) ?
+                    (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64) :
+                    (($values[0] % 32) * 64) + ($values[1] % 64);
+                    $unicode[]   = $number;
+                    $values      = [];
+                    $looking_for = 1;
+                }
+            }
+        }
+        return $unicode;
+    }
+
+    /**
+     * This function converts a Unicode array back to its UTF-8 representation
+     *
+     * @author Scott Michael Reynen <scott@randomchaos.com>
+     *
+     * @link   http://www.randomchaos.com/document.php?source=php_and_unicode
+     * @see    utf8_to_unicode()
+     */
+    private static function unicode_to_utf8(&$str)
+    {
+        if (!is_array($str)) {
+            return '';
+        }
+
+        $utf8 = '';
+        foreach ($str as $unicode) {
+            if ($unicode < 128) {
+                $utf8 .= chr($unicode);
+            } elseif ($unicode < 2048) {
+                $utf8 .= chr(192 + (($unicode - ($unicode % 64)) / 64));
+                $utf8 .= chr(128 + ($unicode % 64));
+            } else {
+                $utf8 .= chr(224 + (($unicode - ($unicode % 4096)) / 4096));
+                $utf8 .= chr(128 + ((($unicode % 4096) - ($unicode % 64)) / 64));
+                $utf8 .= chr(128 + ($unicode % 64));
+            }
+        }
+        return $utf8;
+    }
+
+    /**
+     * This is an Unicode aware replacement for strrpos
+     *
+     * Uses mb_string extension if available
+     *
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     *
+     * @see    strpos()
+     */
+    private static function strrpos($haystack, $needle, $offset = 0)
+    {
+        if (!defined('UTF8_NOMBSTRING') && function_exists('mb_strrpos')) {
+            return mb_strrpos($haystack, $needle, $offset, 'utf-8');
+        }
+
+        if (!$offset) {
+            $ar    = self::explode($needle, $haystack);
+            $count = count($ar);
+            if ($count > 1) {
+                return self::strlen($haystack) - self::strlen($ar[($count - 1)]) - self::strlen($needle);
+            }
+            return false;
+        } else {
+            if (!is_int($offset)) {
+                trigger_error('Offset must be an integer', E_USER_WARNING);
+                return false;
+            }
+
+            $str = self::substr($haystack, $offset);
+
+            if (false !== ($pos = self::strrpos($str, $needle))) {
+                return $pos + $offset;
+            }
+            return false;
+        }
+    }
+
+    /**
+     * Unicode aware replacement for explode
+     *
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     *
+     * @see    explode();
+     */
+    private static function explode($sep, $str)
+    {
+        if ($sep == '') {
+            trigger_error('Empty delimiter', E_USER_WARNING);
+            return false;
+        }
+
+        return preg_split('!' . preg_quote($sep, '!') . '!u', $str);
+    }
+}
diff --git a/src/Stemmer/RussianStemmer.php b/src/Stemmer/RussianStemmer.php
new file mode 100644
index 0000000..a1e174a
--- /dev/null
+++ b/src/Stemmer/RussianStemmer.php
@@ -0,0 +1,112 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ * Simple stemmer for russian language originally written by Nenad Tičarić
+ *
+ * @link https://github.com/teamtnt/tntsearch
+ *
+ * Copyright (c) 2016 Nenad Tičarić nticaric@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+class RussianStemmer implements Stemmer
+{
+    private static $VOWEL = '/аеиоуыэюя/u';
+
+    private static $PERFECTIVEGROUND = '/((ив|ивши|ившись|ыв|ывши|ывшись)|((?<=[ая])(в|вши|вшись)))$/u';
+
+    private static $REFLEXIVE = '/(с[яь])$/u';
+
+    private static $ADJECTIVE = '/(ее|ие|ые|ое|ими|ыми|ей|ий|ый|ой|ем|им|ым|ом|его|ого|ему|ому|их|ых|ую|юю|ая|яя|ою|ею)$/u';
+
+    private static $PARTICIPLE = '/((ивш|ывш|ующ)|((?<=[ая])(ем|нн|вш|ющ|щ)))$/u';
+
+    private static $VERB = '/((ила|ыла|ена|ейте|уйте|ите|или|ыли|ей|уй|ил|ыл|им|ым|ен|ило|ыло|ено|ят|ует|уют|ит|ыт|ены|ить|ыть|ишь|ую|ю)|((?<=[ая])(ла|на|ете|йте|ли|й|л|ем|н|ло|но|ет|ют|ны|ть|ешь|нно)))$/u';
+
+    private static $NOUN = '/(а|ев|ов|ие|ье|е|иями|ями|ами|еи|ии|и|ией|ей|ой|ий|й|иям|ям|ием|ем|ам|ом|о|у|ах|иях|ях|ы|ь|ию|ью|ю|ия|ья|я)$/u';
+
+    private static $RVRE = '/^(.*?[аеиоуыэюя])(.*)$/u';
+
+    private static $DERIVATIONAL = '/[^аеиоуыэюя][аеиоуыэюя]+[^аеиоуыэюя]+[аеиоуыэюя].*(?<=о)сть?$/u';
+
+    private static function s(&$s, $re, $to)
+    {
+        $orig = $s;
+        $s    = preg_replace($re, $to, $s);
+        return $orig !== $s;
+    }
+
+    private static function m($s, $re)
+    {
+        return preg_match($re, $s);
+    }
+
+    public static function stem($word)
+    {
+        $word = mb_strtolower($word);
+        $word = str_replace('ё', 'е', $word);
+
+        $stem = $word;
+
+        do {
+            if (!preg_match(self::$RVRE, $word, $p)) {
+                break;
+            }
+            $start = $p[1];
+            $RV    = $p[2];
+            if (!$RV) {
+                break;
+            }
+
+            // Step 1
+            if (!self::s($RV, self::$PERFECTIVEGROUND, '')) {
+                self::s($RV, self::$REFLEXIVE, '');
+
+                if (self::s($RV, self::$ADJECTIVE, '')) {
+                    self::s($RV, self::$PARTICIPLE, '');
+                } else {
+                    if (!self::s($RV, self::$VERB, '')) {
+                        self::s($RV, self::$NOUN, '');
+                    }
+                }
+            }
+
+            // Step 2
+            self::s($RV, '/и$/u', '');
+
+            // Step 3
+            if (self::m($RV, self::$DERIVATIONAL)) {
+                self::s($RV, '/ость?$/u', '');
+            }
+
+            // Step 4
+            if (!self::s($RV, '/ь$/u', '')) {
+                self::s($RV, '/ейше?/u', '');
+                self::s($RV, '/нн$/u', 'н');
+            }
+
+            $stem = $start . $RV;
+        } while (false);
+
+        return $stem;
+    }
+}
diff --git a/src/Stemmer/Stemmer.php b/src/Stemmer/Stemmer.php
new file mode 100644
index 0000000..9c19558
--- /dev/null
+++ b/src/Stemmer/Stemmer.php
@@ -0,0 +1,8 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+interface Stemmer
+{
+    public static function stem($word);
+}
diff --git a/src/Stemmer/UkrainianStemmer.php b/src/Stemmer/UkrainianStemmer.php
new file mode 100644
index 0000000..9ac04a3
--- /dev/null
+++ b/src/Stemmer/UkrainianStemmer.php
@@ -0,0 +1,113 @@
+<?php
+
+namespace WpBlocks\Search\Stemmer;
+
+/*
+ * Simple stemmer for ukrainian language originally written by Nenad Tičarić
+ *
+ * @link https://github.com/teamtnt/tntsearch
+ *
+ * Copyright (c) 2016 Nenad Tičarić nticaric@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+class UkrainianStemmer implements Stemmer
+{
+    private static $VOWEL = '/аеиоуюяіїє/u';
+
+    /* http://uk.wikipedia.org/wiki/Голосний_звук */
+    // var $PERFECTIVEGROUND = '/((ив|ивши|ившись|ыв|ывши|ывшись((?<=[ая])(в|вши|вшись)))$/';
+    private static $PERFECTIVEGROUND = '/(ив|ивши|ившись|ів|івши|івшись((?<=[ая|я])(в|вши|вшись)))$/u';
+
+    private static $REFLEXIVE = '/(с[яьи])$/u'; // http://uk.wikipedia.org/wiki/Рефлексивне_дієслово
+
+    private static $ADJECTIVE = '/(ими|ій|ий|а|е|ова|ове|ів|є|їй|єє|еє|я|ім|ем|им|ім|их|іх|ою|йми|іми|у|ю|ого|ому|ої)$/u'; //http://uk.wikipedia.org/wiki/Прикметник + http://wapedia.mobi/uk/Прикметник
+
+    private static $PARTICIPLE = '/(ий|ого|ому|им|ім|а|ій|у|ою|ій|і|их|йми|их)$/u'; //http://uk.wikipedia.org/wiki/Дієприкметник
+
+    private static $VERB = '/(сь|ся|ив|ать|ять|у|ю|ав|али|учи|ячи|вши|ши|е|ме|ати|яти|є)$/u'; //http://uk.wikipedia.org/wiki/Дієслово
+
+    private static $NOUN = '/(а|ев|ов|е|ями|ами|еи|и|ей|ой|ий|й|иям|ям|ием|ем|ам|ом|о|у|ах|иях|ях|ы|ь|ию|ью|ю|ия|ья|я|і|ові|ї|ею|єю|ою|є|еві|ем|єм|ів|їв|\'ю)$/u'; //http://uk.wikipedia.org/wiki/Іменник
+
+    private static $RVRE = '/^(.*?[аеиоуюяіїє])(.*)$/u';
+
+    private static $DERIVATIONAL = '/[^аеиоуюяіїє][аеиоуюяіїє]+[^аеиоуюяіїє]+[аеиоуюяіїє].*(?<=о)сть?$/u';
+
+    private static function s(&$s, $re, $to)
+    {
+        $orig = $s;
+        $s    = preg_replace($re, $to, $s);
+        return $orig !== $s;
+    }
+
+    private static function m($s, $re)
+    {
+        return preg_match($re, $s);
+    }
+
+    public static function stem($word)
+    {
+        $word = mb_strtolower($word);
+
+        $stem = $word;
+
+        do {
+            if (!preg_match(self::$RVRE, $word, $p)) {
+                break;
+            }
+            $start = $p[1];
+            $RV    = $p[2];
+            if (!$RV) {
+                break;
+            }
+
+            // Step 1
+            if (!self::s($RV, self::$PERFECTIVEGROUND, '')) {
+                self::s($RV, self::$REFLEXIVE, '');
+
+                if (self::s($RV, self::$ADJECTIVE, '')) {
+                    self::s($RV, self::$PARTICIPLE, '');
+                } else {
+                    if (!self::s($RV, self::$VERB, '')) {
+                        self::s($RV, self::$NOUN, '');
+                    }
+                }
+            }
+
+            // Step 2
+            self::s($RV, '/[и|i]$/u', '');
+
+            // Step 3
+            if (self::m($RV, self::$DERIVATIONAL)) {
+                self::s($RV, '/сть?$/u', '');
+            }
+
+            // Step 4
+            if (!self::s($RV, '/ь$/u', '')) {
+                self::s($RV, '/ейше?/u', '');
+                self::s($RV, '/нн$/u', 'н');
+            }
+
+            $stem = $start . $RV;
+        } while (false);
+
+        return $stem;
+    }
+}
diff --git a/src/Stopwords/croatian.json b/src/Stopwords/croatian.json
new file mode 100644
index 0000000..a92a934
--- /dev/null
+++ b/src/Stopwords/croatian.json
@@ -0,0 +1,182 @@
+[
+  "a",
+  "ako",
+  "ali",
+  "bi",
+  "bih",
+  "bila",
+  "bili",
+  "bilo",
+  "bio",
+  "bismo",
+  "biste",
+  "biti",
+  "bumo",
+  "da",
+  "do",
+  "duž",
+  "ga",
+  "hoće",
+  "hoćemo",
+  "hoćete",
+  "hoćeš",
+  "hoću",
+  "i",
+  "iako",
+  "ih",
+  "ili",
+  "iz",
+  "ja",
+  "je",
+  "jedna",
+  "jedne",
+  "jedno",
+  "jer",
+  "jesam",
+  "jesi",
+  "jesmo",
+  "jest",
+  "jeste",
+  "jesu",
+  "jim",
+  "joj",
+  "još",
+  "ju",
+  "kada",
+  "kako",
+  "kao",
+  "koja",
+  "koje",
+  "koji",
+  "kojima",
+  "koju",
+  "kroz",
+  "li",
+  "me",
+  "mene",
+  "meni",
+  "mi",
+  "mimo",
+  "moj",
+  "moja",
+  "moje",
+  "mu",
+  "na",
+  "nad",
+  "nakon",
+  "nam",
+  "nama",
+  "nas",
+  "naš",
+  "naša",
+  "naše",
+  "našeg",
+  "ne",
+  "nego",
+  "neka",
+  "neki",
+  "nekog",
+  "neku",
+  "nema",
+  "netko",
+  "neće",
+  "nećemo",
+  "nećete",
+  "nećeš",
+  "neću",
+  "nešto",
+  "ni",
+  "nije",
+  "nikoga",
+  "nikoje",
+  "nikoju",
+  "nisam",
+  "nisi",
+  "nismo",
+  "niste",
+  "nisu",
+  "njega",
+  "njegov",
+  "njegova",
+  "njegovo",
+  "njemu",
+  "njezin",
+  "njezina",
+  "njezino",
+  "njih",
+  "njihov",
+  "njihova",
+  "njihovo",
+  "njim",
+  "njima",
+  "njoj",
+  "nju",
+  "no",
+  "o",
+  "od",
+  "odmah",
+  "on",
+  "ona",
+  "oni",
+  "ono",
+  "ova",
+  "pa",
+  "pak",
+  "po",
+  "pod",
+  "pored",
+  "prije",
+  "s",
+  "sa",
+  "sam",
+  "samo",
+  "se",
+  "sebe",
+  "sebi",
+  "si",
+  "smo",
+  "ste",
+  "su",
+  "sve",
+  "svi",
+  "svog",
+  "svoj",
+  "svoja",
+  "svoje",
+  "svom",
+  "ta",
+  "tada",
+  "taj",
+  "tako",
+  "te",
+  "tebe",
+  "tebi",
+  "ti",
+  "to",
+  "toj",
+  "tome",
+  "tu",
+  "tvoj",
+  "tvoja",
+  "tvoje",
+  "u",
+  "uz",
+  "vam",
+  "vama",
+  "vas",
+  "vaš",
+  "vaša",
+  "vaše",
+  "već",
+  "vi",
+  "vrlo",
+  "za",
+  "zar",
+  "će",
+  "ćemo",
+  "ćete",
+  "ćeš",
+  "ću",
+  "što",
+  "tijekom"
+]
diff --git a/src/Stopwords/english.json b/src/Stopwords/english.json
new file mode 100644
index 0000000..04718ed
--- /dev/null
+++ b/src/Stopwords/english.json
@@ -0,0 +1,187 @@
+[
+  "one",
+  "also",
+  "lets",
+  "get",
+  "still",
+  "vs",
+  "re",
+  "our",
+  "their",
+  "couldn",
+  "hadn't",
+  "for",
+  "these",
+  "not",
+  "themselves",
+  "your",
+  "won't",
+  "which",
+  "just",
+  "o",
+  "you're",
+  "can",
+  "shouldn't",
+  "we",
+  "at",
+  "had",
+  "and",
+  "myself",
+  "but",
+  "you've",
+  "having",
+  "my",
+  "was",
+  "ve",
+  "during",
+  "it",
+  "y",
+  "she",
+  "how",
+  "haven't",
+  "other",
+  "aren't",
+  "there",
+  "doesn't",
+  "he",
+  "do",
+  "you'll",
+  "d",
+  "where",
+  "a",
+  "hers",
+  "are",
+  "both",
+  "i",
+  "or",
+  "itself",
+  "while",
+  "over",
+  "have",
+  "me",
+  "him",
+  "ain",
+  "haven",
+  "that",
+  "down",
+  "theirs",
+  "shan",
+  "what",
+  "shan't",
+  "them",
+  "all",
+  "mightn",
+  "from",
+  "when",
+  "won",
+  "then",
+  "most",
+  "wouldn",
+  "now",
+  "again",
+  "why",
+  "only",
+  "by",
+  "too",
+  "don't",
+  "herself",
+  "wasn't",
+  "with",
+  "each",
+  "above",
+  "whom",
+  "ll",
+  "until",
+  "her",
+  "so",
+  "who",
+  "needn't",
+  "ours",
+  "after",
+  "m",
+  "isn't",
+  "they",
+  "weren't",
+  "aren",
+  "will",
+  "doesn",
+  "the",
+  "any",
+  "hasn't",
+  "isn",
+  "were",
+  "his",
+  "up",
+  "yourself",
+  "on",
+  "out",
+  "as",
+  "off",
+  "below",
+  "own",
+  "s",
+  "into",
+  "some",
+  "t",
+  "hasn",
+  "between",
+  "here",
+  "should",
+  "of",
+  "in",
+  "being",
+  "mightn't",
+  "mustn",
+  "ourselves",
+  "shouldn",
+  "does",
+  "an",
+  "than",
+  "mustn't",
+  "yourselves",
+  "to",
+  "no",
+  "about",
+  "its",
+  "more",
+  "hadn",
+  "himself",
+  "further",
+  "you",
+  "is",
+  "against",
+  "once",
+  "this",
+  "should've",
+  "nor",
+  "did",
+  "wasn",
+  "she's",
+  "weren",
+  "has",
+  "those",
+  "been",
+  "wouldn't",
+  "don",
+  "yours",
+  "if",
+  "few",
+  "didn",
+  "be",
+  "needn",
+  "couldn't",
+  "that'll",
+  "didn't",
+  "same",
+  "before",
+  "ma",
+  "because",
+  "it's",
+  "such",
+  "very",
+  "you'd",
+  "doing",
+  "through",
+  "under",
+  "am"
+]
diff --git a/src/Stopwords/french.json b/src/Stopwords/french.json
new file mode 100644
index 0000000..66e7972
--- /dev/null
+++ b/src/Stopwords/french.json
@@ -0,0 +1,159 @@
+[
+  "auront",
+  "votre",
+  "ils",
+  "\u00e9tions",
+  "et",
+  "\u00e9tais",
+  "avec",
+  "elle",
+  "nos",
+  "\u00e9taient",
+  "\u00e9tait",
+  "soyez",
+  "seront",
+  "sommes",
+  "eussions",
+  "eus",
+  "eurent",
+  "aient",
+  "ont",
+  "ai",
+  "tu",
+  "aurais",
+  "e\u00fbmes",
+  "serais",
+  "eu",
+  "avait",
+  "ce",
+  "aie",
+  "ayant",
+  "avez",
+  "aurez",
+  "je",
+  "serons",
+  "sont",
+  "aurons",
+  "s",
+  "\u00e9t\u00e9es",
+  "soit",
+  "\u00eates",
+  "e\u00fbtes",
+  "par",
+  "qui",
+  "y",
+  "avaient",
+  "ne",
+  "vos",
+  "auriez",
+  "tes",
+  "serai",
+  "seraient",
+  "\u00e9tiez",
+  "te",
+  "fus",
+  "\u00e9tant",
+  "fussions",
+  "\u00e9t\u00e9s",
+  "mon",
+  "e\u00fbt",
+  "d",
+  "ayants",
+  "avions",
+  "f\u00fbmes",
+  "eues",
+  "eusses",
+  "la",
+  "n",
+  "c",
+  "lui",
+  "est",
+  "ayantes",
+  "nous",
+  "aies",
+  "que",
+  "aurions",
+  "ces",
+  "avons",
+  "mes",
+  "un",
+  "le",
+  "sa",
+  "fusse",
+  "aura",
+  "leur",
+  "eut",
+  "eussent",
+  "se",
+  "les",
+  "m",
+  "ton",
+  "\u00e9tantes",
+  "serait",
+  "ses",
+  "t",
+  "\u00e9t\u00e9",
+  "une",
+  "f\u00fbt",
+  "fusses",
+  "pas",
+  "aux",
+  "vous",
+  "ayez",
+  "ayons",
+  "\u00e9tants",
+  "es",
+  "m\u00eame",
+  "fut",
+  "auraient",
+  "eusse",
+  "toi",
+  "suis",
+  "aviez",
+  "aurai",
+  "ayante",
+  "seras",
+  "ta",
+  "sois",
+  "f\u00fbtes",
+  "auras",
+  "qu",
+  "\u00e9tante",
+  "serions",
+  "seriez",
+  "pour",
+  "ma",
+  "on",
+  "dans",
+  "serez",
+  "\u00e0",
+  "son",
+  "\u00e9t\u00e9e",
+  "furent",
+  "des",
+  "l",
+  "fussent",
+  "ait",
+  "notre",
+  "sera",
+  "me",
+  "soyons",
+  "il",
+  "mais",
+  "du",
+  "en",
+  "sur",
+  "fussiez",
+  "as",
+  "ou",
+  "avais",
+  "de",
+  "soient",
+  "eue",
+  "eux",
+  "aurait",
+  "eussiez",
+  "au",
+  "moi",
+  "j"
+]
diff --git a/src/Stopwords/german.json b/src/Stopwords/german.json
new file mode 100644
index 0000000..88c8644
--- /dev/null
+++ b/src/Stopwords/german.json
@@ -0,0 +1,234 @@
+[
+  "anderer",
+  "unseres",
+  "keinem",
+  "jener",
+  "jenes",
+  "keiner",
+  "jedem",
+  "anders",
+  "da",
+  "nichts",
+  "sehr",
+  "unseren",
+  "den",
+  "kein",
+  "wie",
+  "zu",
+  "meine",
+  "sondern",
+  "ihm",
+  "bei",
+  "einige",
+  "wollen",
+  "denn",
+  "ihres",
+  "werde",
+  "viel",
+  "wenn",
+  "eines",
+  "uns",
+  "welchem",
+  "habe",
+  "k\u00f6nnen",
+  "mich",
+  "und",
+  "euren",
+  "anderr",
+  "dazu",
+  "jedes",
+  "kann",
+  "an",
+  "wir",
+  "diesem",
+  "was",
+  "eure",
+  "ihre",
+  "wieder",
+  "dann",
+  "unser",
+  "eurer",
+  "in",
+  "deine",
+  "doch",
+  "ist",
+  "um",
+  "demselben",
+  "nach",
+  "waren",
+  "weil",
+  "manchen",
+  "dem",
+  "ihn",
+  "anderes",
+  "ohne",
+  "einen",
+  "wollte",
+  "jenem",
+  "einiger",
+  "seinen",
+  "dessen",
+  "jede",
+  "mir",
+  "keinen",
+  "dasselbe",
+  "k\u00f6nnte",
+  "es",
+  "hat",
+  "oder",
+  "\u00fcber",
+  "deines",
+  "ihr",
+  "wird",
+  "desselben",
+  "vor",
+  "meiner",
+  "seines",
+  "manchem",
+  "hatten",
+  "einigem",
+  "anderen",
+  "einmal",
+  "diese",
+  "meines",
+  "ich",
+  "also",
+  "derselben",
+  "hinter",
+  "solchem",
+  "war",
+  "damit",
+  "einem",
+  "deiner",
+  "aus",
+  "seinem",
+  "aller",
+  "anderm",
+  "sein",
+  "nur",
+  "einig",
+  "dieselbe",
+  "solchen",
+  "weg",
+  "haben",
+  "hin",
+  "deinen",
+  "dass",
+  "einigen",
+  "da\u00df",
+  "solche",
+  "alle",
+  "diesen",
+  "im",
+  "einiges",
+  "du",
+  "nicht",
+  "zwischen",
+  "w\u00fcrden",
+  "das",
+  "andere",
+  "jenen",
+  "sind",
+  "die",
+  "jetzt",
+  "so",
+  "dein",
+  "vom",
+  "bist",
+  "dieser",
+  "am",
+  "dies",
+  "des",
+  "manche",
+  "ihnen",
+  "w\u00e4hrend",
+  "allem",
+  "indem",
+  "aber",
+  "musste",
+  "dieselben",
+  "eures",
+  "gewesen",
+  "ihrer",
+  "welcher",
+  "derselbe",
+  "euer",
+  "andern",
+  "seiner",
+  "dich",
+  "denselben",
+  "sie",
+  "welchen",
+  "dieses",
+  "eurem",
+  "unserem",
+  "bis",
+  "hier",
+  "allen",
+  "mancher",
+  "wo",
+  "einer",
+  "auch",
+  "gegen",
+  "alles",
+  "weiter",
+  "nun",
+  "keines",
+  "keine",
+  "meinen",
+  "werden",
+  "zwar",
+  "der",
+  "warst",
+  "zur",
+  "eine",
+  "wirst",
+  "ihren",
+  "auf",
+  "dir",
+  "soll",
+  "anderem",
+  "als",
+  "deinem",
+  "durch",
+  "von",
+  "meinem",
+  "jene",
+  "ein",
+  "mit",
+  "unter",
+  "zum",
+  "bin",
+  "hab",
+  "derer",
+  "jeden",
+  "sollte",
+  "w\u00fcrde",
+  "welches",
+  "ander",
+  "er",
+  "etwas",
+  "sich",
+  "manches",
+  "welche",
+  "seine",
+  "jeder",
+  "ins",
+  "f\u00fcr",
+  "solcher",
+  "solches",
+  "ihrem",
+  "unsere",
+  "will",
+  "ob",
+  "dort",
+  "hatte",
+  "mein",
+  "sonst",
+  "man",
+  "muss",
+  "noch",
+  "machen",
+  "selbst",
+  "euch"
+]
diff --git a/src/Stopwords/italian.json b/src/Stopwords/italian.json
new file mode 100644
index 0000000..e420c1b
--- /dev/null
+++ b/src/Stopwords/italian.json
@@ -0,0 +1,281 @@
+[
+  "gli",
+  "dove",
+  "a",
+  "fossero",
+  "stiano",
+  "alle",
+  "avevano",
+  "hanno",
+  "mie",
+  "sar\u00f2",
+  "suoi",
+  "stai",
+  "questo",
+  "un",
+  "nei",
+  "anche",
+  "facessimo",
+  "starebbe",
+  "stemmo",
+  "questa",
+  "stesse",
+  "sua",
+  "dov",
+  "o",
+  "dallo",
+  "ero",
+  "dell",
+  "starei",
+  "stando",
+  "negl",
+  "fossi",
+  "all",
+  "sarai",
+  "di",
+  "suo",
+  "far\u00f2",
+  "tu",
+  "si",
+  "stavate",
+  "facciano",
+  "degli",
+  "vostra",
+  "avreste",
+  "foste",
+  "avranno",
+  "ha",
+  "facevo",
+  "quelli",
+  "sareste",
+  "loro",
+  "in",
+  "degl",
+  "come",
+  "stanno",
+  "ad",
+  "lo",
+  "avremo",
+  "facciate",
+  "avessi",
+  "dalla",
+  "vostro",
+  "coi",
+  "sugl",
+  "con",
+  "una",
+  "quelle",
+  "avuti",
+  "eri",
+  "eravamo",
+  "eravate",
+  "sono",
+  "fanno",
+  "stessero",
+  "abbiamo",
+  "chi",
+  "sia",
+  "alla",
+  "nello",
+  "tra",
+  "nostra",
+  "nostre",
+  "avemmo",
+  "sar\u00e0",
+  "saremmo",
+  "col",
+  "al",
+  "dei",
+  "da",
+  "facevano",
+  "faceste",
+  "mi",
+  "facesse",
+  "i",
+  "avete",
+  "\u00e8",
+  "siate",
+  "dai",
+  "tuoi",
+  "dal",
+  "avevo",
+  "farete",
+  "avute",
+  "allo",
+  "avr\u00e0",
+  "avuto",
+  "farei",
+  "io",
+  "tua",
+  "avevate",
+  "negli",
+  "l",
+  "la",
+  "faremo",
+  "vostri",
+  "saresti",
+  "stette",
+  "stavo",
+  "avendo",
+  "sarete",
+  "stavamo",
+  "fosse",
+  "faranno",
+  "perch\u00e9",
+  "staremo",
+  "voi",
+  "delle",
+  "noi",
+  "stareste",
+  "stava",
+  "dagl",
+  "se",
+  "avrete",
+  "quanto",
+  "della",
+  "nella",
+  "sull",
+  "sulle",
+  "vi",
+  "facesti",
+  "li",
+  "faceva",
+  "facciamo",
+  "miei",
+  "sul",
+  "fui",
+  "avrai",
+  "avessero",
+  "avuta",
+  "stiamo",
+  "del",
+  "stavi",
+  "agl",
+  "avevi",
+  "erano",
+  "uno",
+  "abbiate",
+  "stessi",
+  "quanta",
+  "staresti",
+  "fosti",
+  "sue",
+  "stettero",
+  "faremmo",
+  "vostre",
+  "nostri",
+  "avevamo",
+  "avrei",
+  "abbia",
+  "sulla",
+  "le",
+  "sarebbero",
+  "quale",
+  "quante",
+  "quella",
+  "ed",
+  "nell",
+  "tue",
+  "far\u00e0",
+  "fossimo",
+  "farebbero",
+  "siano",
+  "aveste",
+  "siamo",
+  "saranno",
+  "star\u00e0",
+  "feci",
+  "sugli",
+  "lui",
+  "fummo",
+  "fai",
+  "stetti",
+  "ebbi",
+  "ebbero",
+  "furono",
+  "ne",
+  "non",
+  "farai",
+  "faccio",
+  "pi\u00f9",
+  "dagli",
+  "avrebbe",
+  "mio",
+  "avesse",
+  "era",
+  "stia",
+  "questi",
+  "starai",
+  "su",
+  "il",
+  "ho",
+  "dalle",
+  "nelle",
+  "sui",
+  "tutto",
+  "ti",
+  "star\u00f2",
+  "fareste",
+  "dello",
+  "stesti",
+  "facessero",
+  "tuo",
+  "aveva",
+  "avessimo",
+  "siete",
+  "essendo",
+  "staranno",
+  "nostro",
+  "ma",
+  "c",
+  "avresti",
+  "stiate",
+  "per",
+  "queste",
+  "stavano",
+  "ci",
+  "ebbe",
+  "sto",
+  "starete",
+  "starebbero",
+  "cui",
+  "nel",
+  "facevate",
+  "fecero",
+  "facendo",
+  "e",
+  "farebbe",
+  "avr\u00f2",
+  "quello",
+  "avrebbero",
+  "dall",
+  "saremo",
+  "ai",
+  "avremmo",
+  "fu",
+  "fece",
+  "stessimo",
+  "contro",
+  "sarebbe",
+  "facevamo",
+  "steste",
+  "avesti",
+  "faccia",
+  "facessi",
+  "agli",
+  "quanti",
+  "abbiano",
+  "facevi",
+  "sta",
+  "facemmo",
+  "faresti",
+  "hai",
+  "sei",
+  "staremmo",
+  "sullo",
+  "mia",
+  "sarei",
+  "lei",
+  "che",
+  "tutti"
+]
diff --git a/src/Stopwords/latvian.json b/src/Stopwords/latvian.json
new file mode 100644
index 0000000..a0a41ba
--- /dev/null
+++ b/src/Stopwords/latvian.json
@@ -0,0 +1,165 @@
+[
+  "aiz",
+  "ap",
+  "ar",
+  "apakš",
+  "ārpus",
+  "augšpus",
+  "bez",
+  "caur",
+  "dēļ",
+  "gar",
+  "iekš",
+  "iz",
+  "kopš",
+  "labad",
+  "lejpus",
+  "līdz",
+  "no",
+  "otrpus",
+  "pa",
+  "par",
+  "pār",
+  "pēc",
+  "pie",
+  "pirms",
+  "pret",
+  "priekš",
+  "starp",
+  "šaipus",
+  "uz",
+  "viņpus",
+  "virs",
+  "virspus",
+  "zem",
+  "apakšpus",
+  "un",
+  "bet",
+  "jo",
+  "ja",
+  "ka",
+  "lai",
+  "tomēr",
+  "tikko",
+  "turpretī",
+  "arī",
+  "kaut",
+  "gan",
+  "tādēļ",
+  "tā",
+  "ne",
+  "tikvien",
+  "vien",
+  "kā",
+  "ir",
+  "te",
+  "vai",
+  "kamēr",
+  "ar",
+  "diezin",
+  "droši",
+  "diemžēl",
+  "nebūt",
+  "ik",
+  "it",
+  "taču",
+  "nu",
+  "pat",
+  "tiklab",
+  "iekšpus",
+  "nedz",
+  "tik",
+  "nevis",
+  "turpretim",
+  "jeb",
+  "iekam",
+  "iekām",
+  "iekāms",
+  "kolīdz",
+  "līdzko",
+  "tiklīdz",
+  "jebšu",
+  "tālab",
+  "tāpēc",
+  "nekā",
+  "itin",
+  "jā",
+  "jau",
+  "jel",
+  "nē",
+  "nezin",
+  "tad",
+  "tikai",
+  "vis",
+  "tak",
+  "iekams",
+  "vien",
+  "būt  ",
+  "biju ",
+  "biji",
+  "bija",
+  "bijām",
+  "bijāt",
+  "esmu",
+  "esi",
+  "esam",
+  "esat ",
+  "būšu     ",
+  "būsi",
+  "būs",
+  "būsim",
+  "būsiet",
+  "tikt",
+  "tiku",
+  "tiki",
+  "tika",
+  "tikām",
+  "tikāt",
+  "tieku",
+  "tiec",
+  "tiek",
+  "tiekam",
+  "tiekat",
+  "tikšu",
+  "tiks",
+  "tiksim",
+  "tiksiet",
+  "tapt",
+  "tapi",
+  "tapāt",
+  "topat",
+  "tapšu",
+  "tapsi",
+  "taps",
+  "tapsim",
+  "tapsiet",
+  "kļūt",
+  "kļuvu",
+  "kļuvi",
+  "kļuva",
+  "kļuvām",
+  "kļuvāt",
+  "kļūstu",
+  "kļūsti",
+  "kļūst",
+  "kļūstam",
+  "kļūstat",
+  "kļūšu",
+  "kļūsi",
+  "kļūs",
+  "kļūsim",
+  "kļūsiet",
+  "varēt",
+  "varēju",
+  "varējām",
+  "varēšu",
+  "varēsim",
+  "var",
+  "varēji",
+  "varējāt",
+  "varēsi",
+  "varēsiet",
+  "varat",
+  "varēja",
+  "varēs"
+]
diff --git a/src/Stopwords/russian.json b/src/Stopwords/russian.json
new file mode 100644
index 0000000..a59fecb
--- /dev/null
+++ b/src/Stopwords/russian.json
@@ -0,0 +1,153 @@
+[
+  "больше",
+  "может",
+  "много",
+  "более",
+  "ее",
+  "со",
+  "она",
+  "к",
+  "потому",
+  "и",
+  "хорошо",
+  "надо",
+  "не",
+  "же",
+  "по",
+  "есть",
+  "раз",
+  "конечно",
+  "у",
+  "нельзя",
+  "быть",
+  "кто",
+  "под",
+  "в",
+  "во",
+  "об",
+  "лучше",
+  "какой",
+  "даже",
+  "ему",
+  "до",
+  "я",
+  "почти",
+  "тем",
+  "вдруг",
+  "как",
+  "вы",
+  "них",
+  "да",
+  "но",
+  "вас",
+  "вам",
+  "сам",
+  "свою",
+  "там",
+  "нее",
+  "один",
+  "то",
+  "было",
+  "ну",
+  "эту",
+  "два",
+  "того",
+  "никогда",
+  "этот",
+  "чтобы",
+  "чего",
+  "нет",
+  "всего",
+  "меня",
+  "при",
+  "впрочем",
+  "этого",
+  "такой",
+  "после",
+  "нас",
+  "что",
+  "перед",
+  "ни",
+  "ведь",
+  "когда",
+  "им",
+  "ним",
+  "между",
+  "ж",
+  "а",
+  "из",
+  "наконец",
+  "вот",
+  "нибудь",
+  "куда",
+  "чуть",
+  "иногда",
+  "все",
+  "с",
+  "тогда",
+  "ты",
+  "тоже",
+  "ничего",
+  "себе",
+  "так",
+  "уже",
+  "они",
+  "тут",
+  "был",
+  "над",
+  "эти",
+  "какая",
+  "опять",
+  "этой",
+  "можно",
+  "совсем",
+  "него",
+  "ней",
+  "была",
+  "на",
+  "чем",
+  "для",
+  "еще",
+  "без",
+  "от",
+  "моя",
+  "потом",
+  "их",
+  "сейчас",
+  "этом",
+  "он",
+  "другой",
+  "про",
+  "здесь",
+  "три",
+  "были",
+  "будто",
+  "разве",
+  "только",
+  "всегда",
+  "уж",
+  "или",
+  "всех",
+  "мы",
+  "том",
+  "чтоб",
+  "если",
+  "где",
+  "за",
+  "тот",
+  "хоть",
+  "ей",
+  "зачем",
+  "через",
+  "о",
+  "себя",
+  "бы",
+  "мне",
+  "ли",
+  "всю",
+  "будет",
+  "мой",
+  "теперь",
+  "тебя",
+  "его"
+]
diff --git a/src/Stopwords/spanish.json b/src/Stopwords/spanish.json
new file mode 100644
index 0000000..24d64de
--- /dev/null
+++ b/src/Stopwords/spanish.json
@@ -0,0 +1,315 @@
+[
+  "hubierais",
+  "sentido",
+  "suya",
+  "fu\u00e9semos",
+  "estuvi\u00e9semos",
+  "estar\u00e1s",
+  "fuerais",
+  "ha",
+  "estar\u00e1n",
+  "tuvi\u00e9ramos",
+  "t\u00fa",
+  "estuvi\u00e9ramos",
+  "tuviesen",
+  "habido",
+  "hube",
+  "os",
+  "pero",
+  "sentida",
+  "habr\u00e9",
+  "hayan",
+  "otros",
+  "sin",
+  "suyos",
+  "estuviste",
+  "tanto",
+  "tendr\u00eda",
+  "tuvieron",
+  "tuya",
+  "lo",
+  "hubieras",
+  "que",
+  "fueran",
+  "estar\u00edamos",
+  "sobre",
+  "qu\u00e9",
+  "se\u00e1is",
+  "m\u00ed",
+  "haya",
+  "vosotras",
+  "tuvierais",
+  "\u00e9l",
+  "tenidas",
+  "ser\u00edas",
+  "poco",
+  "quien",
+  "m\u00edas",
+  "ti",
+  "esto",
+  "tiene",
+  "hay\u00e1is",
+  "otro",
+  "estar\u00eda",
+  "seremos",
+  "suyas",
+  "como",
+  "ser\u00e9is",
+  "me",
+  "ni",
+  "habr\u00e1",
+  "tu",
+  "algo",
+  "una",
+  "tenemos",
+  "hab\u00edamos",
+  "ten\u00edan",
+  "estuvisteis",
+  "sean",
+  "hubieran",
+  "la",
+  "tuvieran",
+  "tuvo",
+  "soy",
+  "era",
+  "estadas",
+  "estar\u00e1",
+  "mucho",
+  "tendr\u00e1",
+  "estuviera",
+  "fuiste",
+  "fuese",
+  "tendr\u00e9",
+  "estos",
+  "fu\u00e9ramos",
+  "no",
+  "ellas",
+  "cual",
+  "todo",
+  "durante",
+  "para",
+  "est\u00e9",
+  "los",
+  "hemos",
+  "habr\u00e9is",
+  "contra",
+  "habr\u00edas",
+  "fuera",
+  "ten\u00e9is",
+  "estuvo",
+  "con",
+  "habr\u00eda",
+  "cuando",
+  "estad",
+  "las",
+  "estamos",
+  "a",
+  "tendr\u00e1s",
+  "est\u00e1is",
+  "nosotros",
+  "estada",
+  "esa",
+  "tuvieseis",
+  "hubisteis",
+  "tened",
+  "estaremos",
+  "vuestras",
+  "habr\u00edais",
+  "fuesen",
+  "te",
+  "yo",
+  "habr\u00edamos",
+  "hubiesen",
+  "habr\u00e1s",
+  "y",
+  "nosotras",
+  "estuvieses",
+  "tendr\u00e9is",
+  "fueras",
+  "m\u00edos",
+  "vuestra",
+  "estar\u00e9",
+  "quienes",
+  "tengas",
+  "tuvi\u00e9semos",
+  "entre",
+  "mi",
+  "hubiese",
+  "desde",
+  "tuviera",
+  "ser\u00e1",
+  "tendremos",
+  "hubieron",
+  "son",
+  "estuvieseis",
+  "estuvieras",
+  "estando",
+  "has",
+  "tenido",
+  "este",
+  "teng\u00e1is",
+  "muy",
+  "un",
+  "ten\u00eda",
+  "est\u00e9is",
+  "habr\u00edan",
+  "tuve",
+  "fui",
+  "ten\u00edamos",
+  "por",
+  "tuvieras",
+  "tuvieses",
+  "estuvieran",
+  "vuestro",
+  "ser\u00e1n",
+  "tambi\u00e9n",
+  "porque",
+  "nuestro",
+  "ser\u00edan",
+  "estuvimos",
+  "fuimos",
+  "estados",
+  "se",
+  "donde",
+  "nuestra",
+  "hubiera",
+  "fueron",
+  "somos",
+  "est\u00e1n",
+  "habidas",
+  "sentidas",
+  "m\u00edo",
+  "todos",
+  "esta",
+  "fueses",
+  "hayas",
+  "tuvimos",
+  "sois",
+  "hab\u00edais",
+  "algunos",
+  "hubieses",
+  "es",
+  "hab\u00e9is",
+  "de",
+  "ella",
+  "hab\u00edas",
+  "teniendo",
+  "del",
+  "est\u00e1s",
+  "ese",
+  "est\u00e9n",
+  "tus",
+  "otra",
+  "tuviese",
+  "nada",
+  "nuestras",
+  "sus",
+  "habr\u00e1n",
+  "e",
+  "hasta",
+  "fue",
+  "otras",
+  "estuvierais",
+  "est\u00e9s",
+  "esas",
+  "hubiste",
+  "tienen",
+  "ten\u00edas",
+  "uno",
+  "estemos",
+  "tuyo",
+  "tendr\u00edas",
+  "su",
+  "estuviese",
+  "tendr\u00edais",
+  "estaba",
+  "eras",
+  "fuisteis",
+  "habiendo",
+  "tuvisteis",
+  "siente",
+  "estuvieron",
+  "vosotros",
+  "tenidos",
+  "estar\u00edais",
+  "tengamos",
+  "tendr\u00edamos",
+  "hayamos",
+  "hab\u00eda",
+  "tenga",
+  "estar\u00e9is",
+  "estar",
+  "mis",
+  "hay",
+  "tuyos",
+  "sentidos",
+  "ante",
+  "estar\u00edan",
+  "estuviesen",
+  "tendr\u00edan",
+  "habremos",
+  "vuestros",
+  "eso",
+  "tengan",
+  "estabas",
+  "hubimos",
+  "fueseis",
+  "seamos",
+  "hubieseis",
+  "esos",
+  "tenida",
+  "sea",
+  "el",
+  "hubi\u00e9ramos",
+  "en",
+  "habidos",
+  "he",
+  "hubo",
+  "hab\u00edan",
+  "al",
+  "estar\u00edas",
+  "unos",
+  "tuviste",
+  "sintiendo",
+  "ser\u00eda",
+  "tendr\u00e1n",
+  "ten\u00edais",
+  "algunas",
+  "estuve",
+  "s\u00ed",
+  "nuestros",
+  "o",
+  "est\u00e1bamos",
+  "eres",
+  "habida",
+  "nos",
+  "hubi\u00e9semos",
+  "antes",
+  "estaban",
+  "eran",
+  "m\u00e1s",
+  "han",
+  "\u00e9ramos",
+  "estabais",
+  "tuyas",
+  "seas",
+  "les",
+  "ser\u00e1s",
+  "tengo",
+  "ellos",
+  "ser\u00e9",
+  "sentid",
+  "ser\u00edamos",
+  "estas",
+  "muchos",
+  "erais",
+  "estoy",
+  "suyo",
+  "est\u00e1",
+  "tienes",
+  "le",
+  "ser\u00edais",
+  "estado",
+  "m\u00eda",
+  "ya"
+]
diff --git a/src/Stopwords/ukrainian.json b/src/Stopwords/ukrainian.json
new file mode 100644
index 0000000..72908f1
--- /dev/null
+++ b/src/Stopwords/ukrainian.json
@@ -0,0 +1,1279 @@
+[
+  "а",
+  "аби",
+  "абиде",
+  "абиким",
+  "абикого",
+  "абиколи",
+  "абикому",
+  "абикуди",
+  "абихто",
+  "абичий",
+  "абичийого",
+  "абичийому",
+  "абичим",
+  "абичию",
+  "абичия",
+  "абичиє",
+  "абичиєму",
+  "абичиєю",
+  "абичиєї",
+  "абичиї",
+  "абичиїй",
+  "абичиїм",
+  "абичиїми",
+  "абичиїх",
+  "абичого",
+  "абичому",
+  "абищо",
+  "абияка",
+  "абияке",
+  "абиякий",
+  "абияким",
+  "абиякими",
+  "абияких",
+  "абиякого",
+  "абиякому",
+  "абиякою",
+  "абиякої",
+  "абияку",
+  "абиякі",
+  "абиякій",
+  "абиякім",
+  "або",
+  "абощо",
+  "авжеж",
+  "авось",
+  "ага",
+  "адже",
+  "аж",
+  "ажень",
+  "але",
+  "амінь",
+  "ану",
+  "ані",
+  "аніде",
+  "аніж",
+  "анізащо",
+  "аніким",
+  "анікого",
+  "анікогісінько",
+  "аніколи",
+  "анікому",
+  "аніскільки",
+  "аніхто",
+  "анічим",
+  "анічого",
+  "анічогісінько",
+  "анічому",
+  "аніщо",
+  "аніяка",
+  "аніяке",
+  "аніякий",
+  "аніяким",
+  "аніякими",
+  "аніяких",
+  "аніякого",
+  "аніякому",
+  "аніякою",
+  "аніякої",
+  "аніяку",
+  "аніякі",
+  "аніякій",
+  "аніякім",
+  "аніякісенька",
+  "аніякісеньке",
+  "аніякісенький",
+  "аніякісеньким",
+  "аніякісенькими",
+  "аніякісеньких",
+  "аніякісенького",
+  "аніякісенькому",
+  "аніякісенькою",
+  "аніякісенької",
+  "аніякісеньку",
+  "аніякісенькі",
+  "аніякісенькій",
+  "аніякісенькім",
+  "аніякісінька",
+  "аніякісіньке",
+  "аніякісінький",
+  "аніякісіньким",
+  "аніякісінькими",
+  "аніякісіньких",
+  "аніякісінького",
+  "аніякісінькому",
+  "аніякісінькою",
+  "аніякісінької",
+  "аніякісіньку",
+  "аніякісінькі",
+  "аніякісінькій",
+  "аніякісінькім",
+  "ато",
+  "атож",
+  "ач",
+  "ачей",
+  "аякже",
+  "б",
+  "ба",
+  "багато",
+  "багатьма",
+  "багатьом",
+  "багатьох",
+  "без",
+  "би",
+  "бо",
+  "бодай",
+  "був",
+  "буде",
+  "будем",
+  "будемо",
+  "будете",
+  "будеш",
+  "буду",
+  "будуть",
+  "будь",
+  "будьмо",
+  "будьте",
+  "була",
+  "були",
+  "було",
+  "бути",
+  "буцім",
+  "буцімто",
+  "більш",
+  "біля",
+  "в",
+  "вам",
+  "вами",
+  "вас",
+  "ваш",
+  "ваша",
+  "ваше",
+  "вашим",
+  "вашими",
+  "ваших",
+  "вашого",
+  "вашому",
+  "вашою",
+  "вашої",
+  "вашу",
+  "ваші",
+  "вашій",
+  "вашім",
+  "ввесь",
+  "вві",
+  "весь",
+  "вздовж",
+  "ви",
+  "власне",
+  "властиво",
+  "внаслідок",
+  "вниз",
+  "внизу",
+  "вона",
+  "вони",
+  "воно",
+  "вподовж",
+  "впоперек",
+  "впродовж",
+  "все",
+  "всередині",
+  "вслід",
+  "всупереч",
+  "всього",
+  "всьому",
+  "всю",
+  "всюди",
+  "вся",
+  "всяк",
+  "всяка",
+  "всяке",
+  "всякий",
+  "всяким",
+  "всякими",
+  "всяких",
+  "всякого",
+  "всякому",
+  "всякою",
+  "всякої",
+  "всяку",
+  "всякі",
+  "всякій",
+  "всякім",
+  "всі",
+  "всій",
+  "всіляка",
+  "всіляке",
+  "всілякий",
+  "всіляким",
+  "всілякими",
+  "всіляких",
+  "всілякого",
+  "всілякому",
+  "всілякою",
+  "всілякої",
+  "всіляку",
+  "всілякі",
+  "всілякій",
+  "всілякім",
+  "всім",
+  "всіма",
+  "всіх",
+  "всією",
+  "всієї",
+  "втім",
+  "ві",
+  "від",
+  "відколи",
+  "відповідно",
+  "відтепер",
+  "відтоді",
+  "він",
+  "віщо",
+  "віщось",
+  "гаразд",
+  "ге",
+  "геть",
+  "да",
+  "давай",
+  "давати",
+  "де",
+  "дедалі",
+  "деким",
+  "декого",
+  "деколи",
+  "декому",
+  "декотра",
+  "декотре",
+  "декотрий",
+  "декотрим",
+  "декотрими",
+  "декотрих",
+  "декотрого",
+  "декотрому",
+  "декотрою",
+  "декотрої",
+  "декотру",
+  "декотрі",
+  "декотрій",
+  "декотрім",
+  "декілька",
+  "декільком",
+  "декількома",
+  "декількох",
+  "декім",
+  "десь",
+  "дехто",
+  "дечий",
+  "дечийого",
+  "дечийому",
+  "дечим",
+  "дечию",
+  "дечия",
+  "дечиє",
+  "дечиєму",
+  "дечиєю",
+  "дечиєї",
+  "дечиї",
+  "дечиїй",
+  "дечиїм",
+  "дечиїми",
+  "дечиїх",
+  "дечого",
+  "дечому",
+  "дечім",
+  "дещо",
+  "деяка",
+  "деяке",
+  "деякий",
+  "деяким",
+  "деякими",
+  "деяких",
+  "деякого",
+  "деякому",
+  "деякою",
+  "деякої",
+  "деяку",
+  "деякі",
+  "деякій",
+  "деякім",
+  "деінде",
+  "для",
+  "до",
+  "довкола",
+  "доки",
+  "допоки",
+  "допіру",
+  "досі",
+  "дотепер",
+  "доти",
+  "еге",
+  "ж",
+  "же",
+  "жоден",
+  "жодна",
+  "жодне",
+  "жодний",
+  "жодним",
+  "жодними",
+  "жодних",
+  "жодного",
+  "жодному",
+  "жодною",
+  "жодної",
+  "жодну",
+  "жодні",
+  "жодній",
+  "жоднім",
+  "жоднісінька",
+  "жоднісіньке",
+  "жоднісінький",
+  "жоднісіньким",
+  "жоднісінькими",
+  "жоднісіньких",
+  "жоднісінького",
+  "жоднісінькому",
+  "жоднісінькою",
+  "жоднісінької",
+  "жоднісіньку",
+  "жоднісінькі",
+  "жоднісінькій",
+  "жоднісінькім",
+  "з",
+  "за",
+  "завгодно",
+  "завдяки",
+  "завжди",
+  "завше",
+  "задля",
+  "залежно",
+  "замість",
+  "заради",
+  "зараз",
+  "зате",
+  "зверху",
+  "звідки",
+  "звідкилясь",
+  "звідкись",
+  "звідкіль",
+  "звідкіля",
+  "звідкілясь",
+  "звідси",
+  "звідсіль",
+  "звідсіля",
+  "звідти",
+  "звідтіль",
+  "звідтіля",
+  "звідусюди",
+  "звідусіль",
+  "звідціля",
+  "здовж",
+  "ззаду",
+  "зо",
+  "зсередини",
+  "зі",
+  "ич",
+  "й",
+  "його",
+  "йому",
+  "ким",
+  "кимось",
+  "кимсь",
+  "кого",
+  "когось",
+  "кожен",
+  "кожна",
+  "кожне",
+  "кожний",
+  "кожним",
+  "кожними",
+  "кожних",
+  "кожного",
+  "кожному",
+  "кожною",
+  "кожної",
+  "кожну",
+  "кожні",
+  "кожній",
+  "кожнім",
+  "кожнісінька",
+  "кожнісіньке",
+  "кожнісінький",
+  "кожнісіньким",
+  "кожнісінькими",
+  "кожнісіньких",
+  "кожнісінького",
+  "кожнісінькому",
+  "кожнісінькою",
+  "кожнісінької",
+  "кожнісіньку",
+  "кожнісінькі",
+  "кожнісінькій",
+  "кожнісінькім",
+  "коли",
+  "колись",
+  "коло",
+  "кому",
+  "комусь",
+  "котра",
+  "котрась",
+  "котре",
+  "котресь",
+  "котрий",
+  "котрийсь",
+  "котрим",
+  "котрими",
+  "котримись",
+  "котримось",
+  "котримсь",
+  "котрих",
+  "котрихось",
+  "котрихсь",
+  "котрого",
+  "котрогось",
+  "котрому",
+  "котромусь",
+  "котрою",
+  "котроюсь",
+  "котрої",
+  "котроїсь",
+  "котру",
+  "котрусь",
+  "котрі",
+  "котрій",
+  "котрійсь",
+  "котрім",
+  "котрімсь",
+  "котрісь",
+  "край",
+  "круг",
+  "кругом",
+  "крізь",
+  "крім",
+  "куди",
+  "кудись",
+  "кудою",
+  "кілька",
+  "кільком",
+  "кількома",
+  "кількох",
+  "кім",
+  "кімось",
+  "кімсь",
+  "кінець",
+  "ледве",
+  "ледь",
+  "лиш",
+  "лише",
+  "лишень",
+  "майже",
+  "мене",
+  "мені",
+  "мерсі",
+  "ми",
+  "мною",
+  "мов",
+  "мовби",
+  "мовбито",
+  "могла",
+  "могли",
+  "могло",
+  "мого",
+  "могти",
+  "може",
+  "можем",
+  "можемо",
+  "можете",
+  "можеш",
+  "можна",
+  "можу",
+  "можуть",
+  "можіть",
+  "мою",
+  "моя",
+  "моє",
+  "моєму",
+  "моєю",
+  "моєї",
+  "мої",
+  "моїй",
+  "моїм",
+  "моїми",
+  "моїх",
+  "міг",
+  "між",
+  "мій",
+  "на",
+  "навколо",
+  "навкруг",
+  "навпаки",
+  "навперейми",
+  "навпроти",
+  "навіть",
+  "навіщо",
+  "навіщось",
+  "над",
+  "надо",
+  "наді",
+  "нам",
+  "нами",
+  "наперед",
+  "напередодні",
+  "наперекір",
+  "напереріз",
+  "наприкінці",
+  "напроти",
+  "нас",
+  "насеред",
+  "насупроти",
+  "нате",
+  "наче",
+  "начеб",
+  "начебто",
+  "наш",
+  "наша",
+  "наше",
+  "нашим",
+  "нашими",
+  "наших",
+  "нашого",
+  "нашому",
+  "нашою",
+  "нашої",
+  "нашу",
+  "наші",
+  "нашій",
+  "нашім",
+  "не",
+  "неабичим",
+  "неабичого",
+  "неабичому",
+  "неабищо",
+  "небагато",
+  "небагатьма",
+  "небагатьом",
+  "небагатьох",
+  "невважаючи",
+  "невже",
+  "незважаючи",
+  "немов",
+  "немовби",
+  "немовбито",
+  "неначе",
+  "неначебто",
+  "нехай",
+  "нею",
+  "неї",
+  "нижче",
+  "ним",
+  "ними",
+  "них",
+  "но",
+  "ну",
+  "нумо",
+  "нумте",
+  "нього",
+  "ньому",
+  "ні",
+  "ніби",
+  "нібито",
+  "ніде",
+  "ніж",
+  "нізащо",
+  "нізвідки",
+  "нізвідкіля",
+  "ній",
+  "ніким",
+  "нікого",
+  "нікогісінько",
+  "ніколи",
+  "нікому",
+  "нікотра",
+  "нікотре",
+  "нікотрий",
+  "нікотрим",
+  "нікотрими",
+  "нікотрих",
+  "нікотрого",
+  "нікотрому",
+  "нікотрою",
+  "нікотрої",
+  "нікотру",
+  "нікотрі",
+  "нікотрій",
+  "нікотрім",
+  "нікуди",
+  "нім",
+  "нінащо",
+  "ніскільки",
+  "ніхто",
+  "нічий",
+  "нічийна",
+  "нічийне",
+  "нічийний",
+  "нічийним",
+  "нічийними",
+  "нічийних",
+  "нічийного",
+  "нічийному",
+  "нічийною",
+  "нічийної",
+  "нічийну",
+  "нічийні",
+  "нічийній",
+  "нічийнім",
+  "нічийого",
+  "нічийому",
+  "нічим",
+  "нічию",
+  "нічия",
+  "нічиє",
+  "нічиєму",
+  "нічиєю",
+  "нічиєї",
+  "нічиї",
+  "нічиїй",
+  "нічиїм",
+  "нічиїми",
+  "нічиїх",
+  "нічого",
+  "нічому",
+  "ніщо",
+  "ніяк",
+  "ніяка",
+  "ніяке",
+  "ніякий",
+  "ніяким",
+  "ніякими",
+  "ніяких",
+  "ніякого",
+  "ніякому",
+  "ніякою",
+  "ніякої",
+  "ніяку",
+  "ніякі",
+  "ніякій",
+  "ніякім",
+  "ніякісінька",
+  "ніякісіньке",
+  "ніякісінький",
+  "ніякісіньким",
+  "ніякісінькими",
+  "ніякісіньких",
+  "ніякісінького",
+  "ніякісінькому",
+  "ніякісінькою",
+  "ніякісінької",
+  "ніякісіньку",
+  "ніякісінькі",
+  "ніякісінькій",
+  "ніякісінькім",
+  "о",
+  "об",
+  "обабіч",
+  "обік",
+  "обіч",
+  "од",
+  "один",
+  "одна",
+  "однак",
+  "одначе",
+  "одне",
+  "одним",
+  "одними",
+  "одних",
+  "одно",
+  "одного",
+  "одному",
+  "одною",
+  "одної",
+  "одну",
+  "одні",
+  "одній",
+  "однім",
+  "однією",
+  "однієї",
+  "окрай",
+  "округ",
+  "округи",
+  "окрім",
+  "он",
+  "онде",
+  "онно",
+  "оно",
+  "опріч",
+  "опісля",
+  "осе",
+  "оскільки",
+  "ось",
+  "осісьо",
+  "от",
+  "ота",
+  "отак",
+  "отака",
+  "отаке",
+  "отакий",
+  "отаким",
+  "отакими",
+  "отаких",
+  "отакого",
+  "отакому",
+  "отакою",
+  "отакої",
+  "отаку",
+  "отакі",
+  "отакій",
+  "отакім",
+  "отакісінька",
+  "отакісіньке",
+  "отакісінький",
+  "отакісіньким",
+  "отакісінькими",
+  "отакісіньких",
+  "отакісінького",
+  "отакісінькому",
+  "отакісінькою",
+  "отакісінької",
+  "отакісіньку",
+  "отакісінькі",
+  "отакісінькій",
+  "отакісінькім",
+  "отам",
+  "оте",
+  "отже",
+  "отим",
+  "отими",
+  "отих",
+  "ото",
+  "отого",
+  "отож",
+  "отой",
+  "отому",
+  "отою",
+  "отої",
+  "отсе",
+  "оттак",
+  "отто",
+  "оту",
+  "отут",
+  "оті",
+  "отій",
+  "отім",
+  "отією",
+  "отієї",
+  "оце",
+  "оцей",
+  "оцим",
+  "оцими",
+  "оцих",
+  "оцього",
+  "оцьому",
+  "оцю",
+  "оця",
+  "оці",
+  "оцій",
+  "оцім",
+  "оцією",
+  "оцієї",
+  "пак",
+  "перед",
+  "перетака",
+  "перетаке",
+  "перетакий",
+  "перетаким",
+  "перетакими",
+  "перетаких",
+  "перетакого",
+  "перетакому",
+  "перетакою",
+  "перетакої",
+  "перетаку",
+  "перетакі",
+  "перетакій",
+  "перетакім",
+  "по",
+  "поблизу",
+  "побік",
+  "побіч",
+  "поверх",
+  "повз",
+  "повздовж",
+  "повсюди",
+  "повсюдно",
+  "подекуди",
+  "подеяка",
+  "подеяке",
+  "подеякий",
+  "подеяким",
+  "подеякими",
+  "подеяких",
+  "подеякого",
+  "подеякому",
+  "подеякою",
+  "подеякої",
+  "подеяку",
+  "подеякі",
+  "подеякій",
+  "подеякім",
+  "подовж",
+  "поза",
+  "позад",
+  "позаду",
+  "позата",
+  "позате",
+  "позатим",
+  "позатими",
+  "позатих",
+  "позатого",
+  "позатой",
+  "позатому",
+  "позатою",
+  "позатої",
+  "позату",
+  "позаті",
+  "позатій",
+  "позатім",
+  "позатією",
+  "позатієї",
+  "позаяк",
+  "поздовж",
+  "поки",
+  "покрай",
+  "покіль",
+  "поміж",
+  "понад",
+  "понадо",
+  "понижче",
+  "поперед",
+  "попереду",
+  "поперек",
+  "попліч",
+  "попри",
+  "попросту",
+  "попід",
+  "поруч",
+  "поряд",
+  "посеред",
+  "посередині",
+  "потім",
+  "поуз",
+  "прецінь",
+  "при",
+  "притому",
+  "причому",
+  "причім",
+  "про",
+  "проміж",
+  "просто",
+  "проте",
+  "проти",
+  "протягом",
+  "під",
+  "підо",
+  "після",
+  "раз",
+  "раніше",
+  "сам",
+  "сама",
+  "саме",
+  "сами",
+  "самий",
+  "самим",
+  "самими",
+  "самих",
+  "само",
+  "самого",
+  "самому",
+  "самою",
+  "самої",
+  "саму",
+  "самі",
+  "самій",
+  "самім",
+  "свого",
+  "свою",
+  "своя",
+  "своє",
+  "своєму",
+  "своєю",
+  "своєї",
+  "свої",
+  "своїй",
+  "своїм",
+  "своїми",
+  "своїх",
+  "свій",
+  "се",
+  "себе",
+  "себто",
+  "серед",
+  "сиріч",
+  "скрізь",
+  "скільки",
+  "скількись",
+  "скільком",
+  "скількома",
+  "скількомась",
+  "скількомось",
+  "скількомсь",
+  "скількох",
+  "скількохось",
+  "скількохсь",
+  "собою",
+  "собі",
+  "спереду",
+  "справді",
+  "стільки",
+  "стільком",
+  "стількома",
+  "стількох",
+  "супроти",
+  "супротив",
+  "суть",
+  "сюди",
+  "сяка",
+  "сяке",
+  "сякий",
+  "сяким",
+  "сякими",
+  "сяких",
+  "сякого",
+  "сякому",
+  "сякою",
+  "сякої",
+  "сяку",
+  "сякі",
+  "сякій",
+  "сякім",
+  "та",
+  "так",
+  "така",
+  "таке",
+  "такенна",
+  "такенне",
+  "такенний",
+  "такенним",
+  "такенними",
+  "такенних",
+  "такенного",
+  "такенному",
+  "такенною",
+  "такенної",
+  "такенну",
+  "такенні",
+  "такенній",
+  "такеннім",
+  "таки",
+  "такий",
+  "таким",
+  "такими",
+  "таких",
+  "такого",
+  "також",
+  "такому",
+  "такою",
+  "такої",
+  "таку",
+  "такі",
+  "такій",
+  "такім",
+  "такісінька",
+  "такісіньке",
+  "такісінький",
+  "такісіньким",
+  "такісінькими",
+  "такісіньких",
+  "такісінького",
+  "такісінькому",
+  "такісінькою",
+  "такісінької",
+  "такісіньку",
+  "такісінькі",
+  "такісінькій",
+  "такісінькім",
+  "там",
+  "тамки",
+  "тамта",
+  "тамте",
+  "тамтим",
+  "тамтими",
+  "тамтих",
+  "тамтого",
+  "тамтой",
+  "тамтому",
+  "тамтою",
+  "тамтої",
+  "тамту",
+  "тамті",
+  "тамтій",
+  "тамтім",
+  "тамтією",
+  "тамтієї",
+  "твого",
+  "твою",
+  "твоя",
+  "твоє",
+  "твоєму",
+  "твоєю",
+  "твоєї",
+  "твої",
+  "твоїй",
+  "твоїм",
+  "твоїми",
+  "твоїх",
+  "твій",
+  "те",
+  "тебе",
+  "теж",
+  "тепер",
+  "теперечки",
+  "теє",
+  "ти",
+  "тим",
+  "тими",
+  "тих",
+  "то",
+  "тобою",
+  "тобто",
+  "тобі",
+  "того",
+  "тоді",
+  "тож",
+  "той",
+  "тому",
+  "тощо",
+  "тою",
+  "тої",
+  "ту",
+  "туди",
+  "тудою",
+  "тут",
+  "тутеньки",
+  "тутечки",
+  "тутки",
+  "ті",
+  "тій",
+  "тільки",
+  "тім",
+  "тією",
+  "тієї",
+  "у",
+  "ув",
+  "увесь",
+  "уві",
+  "угу",
+  "уздовж",
+  "унаслідок",
+  "уподовж",
+  "упоперек",
+  "упродовж",
+  "усе",
+  "услід",
+  "усупереч",
+  "усього",
+  "усьому",
+  "усю",
+  "усюди",
+  "уся",
+  "усяк",
+  "усяка",
+  "усяке",
+  "усякий",
+  "усяким",
+  "усякими",
+  "усяких",
+  "усякого",
+  "усякому",
+  "усякою",
+  "усякої",
+  "усяку",
+  "усякі",
+  "усякій",
+  "усякім",
+  "усі",
+  "усій",
+  "усіляка",
+  "усіляке",
+  "усілякий",
+  "усіляким",
+  "усілякими",
+  "усіляких",
+  "усілякого",
+  "усілякому",
+  "усілякою",
+  "усілякої",
+  "усіляку",
+  "усілякі",
+  "усілякій",
+  "усілякім",
+  "усім",
+  "усіма",
+  "усіх",
+  "усією",
+  "усієї",
+  "утім",
+  "хай",
+  "хоч",
+  "хоча",
+  "хто",
+  "хтось",
+  "хіба",
+  "це",
+  "цебто",
+  "цей",
+  "цим",
+  "цими",
+  "цих",
+  "цього",
+  "цьому",
+  "цю",
+  "ця",
+  "ці",
+  "цій",
+  "цім",
+  "цією",
+  "цієї",
+  "чень",
+  "через",
+  "чи",
+  "чий",
+  "чийого",
+  "чийогось",
+  "чийому",
+  "чийомусь",
+  "чийсь",
+  "чим",
+  "чимось",
+  "чимсь",
+  "чию",
+  "чиюсь",
+  "чия",
+  "чиясь",
+  "чиє",
+  "чиєму",
+  "чиємусь",
+  "чиєсь",
+  "чиєю",
+  "чиєюсь",
+  "чиєї",
+  "чиєїсь",
+  "чиї",
+  "чиїй",
+  "чиїйсь",
+  "чиїм",
+  "чиїми",
+  "чиїмись",
+  "чиїмось",
+  "чиїмсь",
+  "чиїсь",
+  "чиїх",
+  "чиїхось",
+  "чиїхсь",
+  "чого",
+  "чогось",
+  "чому",
+  "чомусь",
+  "чортзна",
+  "чім",
+  "чімось",
+  "чімсь",
+  "шляхом",
+  "ще",
+  "що",
+  "щоб",
+  "щоби",
+  "щодо",
+  "щойно",
+  "щоправда",
+  "щось",
+  "я",
+  "як",
+  "яка",
+  "якась",
+  "якби",
+  "яке",
+  "якесь",
+  "який",
+  "якийсь",
+  "яким",
+  "якими",
+  "якимись",
+  "якимось",
+  "якимсь",
+  "яких",
+  "якихось",
+  "якихсь",
+  "якого",
+  "якогось",
+  "якому",
+  "якомусь",
+  "якось",
+  "якою",
+  "якоюсь",
+  "якої",
+  "якоїсь",
+  "якраз",
+  "яку",
+  "якусь",
+  "якщо",
+  "які",
+  "якій",
+  "якійсь",
+  "якім",
+  "якімсь",
+  "якісь",
+  "є",
+  "єси",
+  "і",
+  "ібн",
+  "із",
+  "ізсередини",
+  "інакша",
+  "інакше",
+  "інакший",
+  "інакшим",
+  "інакшими",
+  "інакших",
+  "інакшого",
+  "інакшому",
+  "інакшою",
+  "інакшої",
+  "інакшу",
+  "інакші",
+  "інакшій",
+  "інакшім",
+  "інколи",
+  "іноді",
+  "інша",
+  "інше",
+  "інший",
+  "іншим",
+  "іншими",
+  "інших",
+  "іншого",
+  "іншому",
+  "іншою",
+  "іншої",
+  "іншу",
+  "інші",
+  "іншій",
+  "іншім",
+  "іще",
+  "їй",
+  "їм",
+  "їх",
+  "їхнього",
+  "їхньому",
+  "їхньою",
+  "їхньої",
+  "їхню",
+  "їхня",
+  "їхнє",
+  "їхні",
+  "їхній",
+  "їхнім",
+  "їхніми",
+  "їхніх",
+  "її"
+]
diff --git a/src/Tokenizer/HtmlTokenizer.php b/src/Tokenizer/HtmlTokenizer.php
new file mode 100644
index 0000000..00ce400
--- /dev/null
+++ b/src/Tokenizer/HtmlTokenizer.php
@@ -0,0 +1,90 @@
+<?php
+
+namespace WpBlocks\Search\Tokenizer;
+
+use WpBlocks\Search\Compat\WP_HTML_Tag_Processor;
+use WpBlocks\Search\Helpers\Str;
+
+class HtmlTokenizer
+{
+    private WP_HTML_Tag_Processor $processor;
+
+    private int $cursor = 0;
+
+    private int $lastTokenEndsAt = 0;
+
+    private ?string $chunk = null;
+
+    /**
+     * @var string[]
+     */
+    private array $buffer = [];
+
+    public function __construct(string $html)
+    {
+        $this->processor = new WP_HTML_Tag_Processor($html);
+    }
+
+    public function nextToken(): ?Token
+    {
+        // Could be text before the first tag?
+
+        if (count($this->buffer) > 0) {
+            $token = array_shift($this->buffer);
+            return new Token($this->cursor, $token);
+        } elseif ($this->nextTag()) {
+            $this->processTag();
+            $words = [];
+
+            if ($this->chunk !== null) {
+                $words = Str::splitWords($this->chunk);
+                if (count($words) > 0) {
+                    $this->buffer = $words;
+                }
+            }
+
+            $tag = $this->processor->get_tag();
+
+            // This actually should never happen because of previous guards, the logic should be
+            // modified to reflect reality.
+            if ($tag === null) {
+                return null;
+            }
+
+            return new Token($this->cursor, $tag, true);
+        }
+
+        // Could be text after the last tag?
+
+        return null;
+    }
+
+    private function nextTag(): bool
+    {
+        return $this->processor->next_tag(['tag_closers' => 'visit']);
+    }
+
+    /**
+     * @return void
+     */
+    private function processTag()
+    {
+        $tokenStartsAt = $this->processor->get_token_starts_at();
+        $tokenEndsAt = $this->processor->get_token_ends_at();
+
+        // Need to check if there is a gap between tags;
+
+        if ($this->lastTokenEndsAt !== 0) {
+            $maybeChunkStartsAt = $this->lastTokenEndsAt + 1;
+
+            if ($tokenStartsAt !== $maybeChunkStartsAt) {
+                $length = $tokenStartsAt - $maybeChunkStartsAt;
+                $this->chunk = $this->processor->substr($maybeChunkStartsAt, $length);
+            } else {
+                $this->chunk = null;
+            }
+        }
+
+        $this->lastTokenEndsAt = $tokenEndsAt;
+    }
+}
diff --git a/src/Tokenizer/Tokenizer.php b/src/Tokenizer/Tokenizer.php
index a1ded39..d62f1ec 100644
--- a/src/Tokenizer/Tokenizer.php
+++ b/src/Tokenizer/Tokenizer.php
@@ -10,10 +10,10 @@ class Tokenizer
 
     private int $cursor = 0;
 
-    public function __construct($text)
+    public function __construct(string $text)
     {
-        Str::splitOnWhitespace($text);
         $this->text = $text;
+        Str::splitWords($this->text);
     }
 
     public function getToken(): Token