diff --git a/composer.json b/composer.json index d78bec7..ca07f32 100644 --- a/composer.json +++ b/composer.json @@ -12,5 +12,8 @@ "psr-4": { "Talmp\\Phputils\\": "src/" } + }, + "require": { + "voku/portable-ascii": "^2.0" } } diff --git a/composer.lock b/composer.lock index ea99336..a28e477 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,83 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "b397ea7d033393337c327b277a69ed5c", - "packages": [], + "content-hash": "593a89abdaffe1149e9528679c010bb2", + "packages": [ + { + "name": "voku/portable-ascii", + "version": "2.0.1", + "source": { + "type": "git", + "url": "https://github.com/voku/portable-ascii.git", + "reference": "b56450eed252f6801410d810c8e1727224ae0743" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/voku/portable-ascii/zipball/b56450eed252f6801410d810c8e1727224ae0743", + "reference": "b56450eed252f6801410d810c8e1727224ae0743", + "shasum": "" + }, + "require": { + "php": ">=7.0.0" + }, + "require-dev": { + "phpunit/phpunit": "~6.0 || ~7.0 || ~9.0" + }, + "suggest": { + "ext-intl": "Use Intl for transliterator_transliterate() support" + }, + "type": "library", + "autoload": { + "psr-4": { + "voku\\": "src/voku/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Lars Moelleken", + "homepage": "http://www.moelleken.org/" + } + ], + "description": "Portable ASCII library - performance optimized (ascii) string functions for php.", + "homepage": "https://github.com/voku/portable-ascii", + "keywords": [ + "ascii", + "clean", + "php" + ], + "support": { + "issues": "https://github.com/voku/portable-ascii/issues", + "source": "https://github.com/voku/portable-ascii/tree/2.0.1" + }, + "funding": [ + { + "url": "https://www.paypal.me/moelleken", + "type": "custom" + }, + { + "url": "https://github.com/voku", + "type": "github" + }, + { + "url": "https://opencollective.com/portable-ascii", + "type": "open_collective" + }, + { + "url": "https://www.patreon.com/voku", + "type": "patreon" + }, + { + "url": "https://tidelift.com/funding/github/packagist/voku/portable-ascii", + "type": "tidelift" + } + ], + "time": "2022-03-08T17:03:00+00:00" + } + ], "packages-dev": [ { "name": "doctrine/instantiator", @@ -2978,5 +3053,5 @@ "prefer-lowest": false, "platform": [], "platform-dev": [], - "plugin-api-version": "2.2.0" + "plugin-api-version": "2.3.0" } diff --git a/src/StrUtil.php b/src/StrUtil.php index 2e03740..0e7ebb9 100644 --- a/src/StrUtil.php +++ b/src/StrUtil.php @@ -2,8 +2,64 @@ namespace Talmp\Phputils; +use voku\helper\ASCII; + class StrUtil { + public static function toSearchablePhrases( + string $string, + string $separator = ' ', + int $limit = PHP_INT_MAX, + int $min_length = 1, + ): array { + $explode_arr = explode($separator, $string, $limit); + + $result = []; + + // in case input string is not ascii + // eg: léon + // and with min_length = 1 + // we will split it into + // ['l', 'é', 'e', 'o', 'n', 'lé', 'le', 'éo', 'eo', 'on', 'léo', 'leo', 'éon', 'eon', 'léon', 'leon'] + + foreach ($explode_arr as $sub_string) { + $mb_str_split = mb_str_split($sub_string); + + $sub_string_length = count($mb_str_split); + $pointer = 0; + + while (true) { + if ($pointer > $sub_string_length - 1) { + break; + } + + $length = 1; + + while ($pointer + $length < $sub_string_length + 1) { + $result[implode('', array_slice($mb_str_split, $pointer, $length))] = true; + $result[static::ascii(implode('', array_slice($mb_str_split, $pointer, $length)))] = true; + $length += 1; + } + + $pointer += 1; + } + } + + return array_keys($result); + } + + /** + * Transliterate a UTF-8 value to ASCII. + * + * @param string $value + * @param string $language + * @return string + */ + public static function ascii(string $value, string $language = 'en') + { + return ASCII::to_ascii($value, $language); + } + public static function replaceOnceIndex( array $searches, array $indexes, diff --git a/tests/StrUtilTest.php b/tests/StrUtilTest.php index f3b7395..55694b7 100644 --- a/tests/StrUtilTest.php +++ b/tests/StrUtilTest.php @@ -5,6 +5,13 @@ class StrUtilTest extends TestCase { + public function test_to_searchable_phrases() + { + $this->assertEquals(16, count(StrUtil::toSearchablePhrases('léon'))); + + $this->assertEquals(32, count(StrUtil::toSearchablePhrases('amélie'))); + } + public function test_replace_once() { // case 0