Skip to content

Commit

Permalink
Merge pull request #73 from alexander-nitsche/feature-handle-hyphenat…
Browse files Browse the repository at this point in the history
…ions-case-insensitive

FEATURE: Use `\hyphenations` case-insensitive (like `\patterns`)
  • Loading branch information
vanderlee authored May 25, 2023
2 parents d47fe93 + 774ad9a commit 8ee452b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 5 deletions.
42 changes: 37 additions & 5 deletions src/Syllable.php
Original file line number Diff line number Diff line change
Expand Up @@ -748,19 +748,51 @@ private function parseWord($word)
{
$wordLength = mb_strlen($word);

// Is this word smaller than the minimal length requirement?
if ($wordLength < $this->minHyphenLeft + $this->minHyphenRight
|| $wordLength < $this->minWordLength) {
return [$word];
}

// Is it a pre-hyphenated word?
if (isset($this->hyphenation[$word])) {
return mb_split('-', $this->hyphenation[$word]);
$wordLowerCased = mb_strtolower($word);

if (isset($this->hyphenation[$wordLowerCased])) {
return $this->parseWordByHyphenation($word, $wordLowerCased);
} else {
return $this->parseWordByPatterns($word, $wordLength, $wordLowerCased);
}
}

private function parseWordByHyphenation($word, $wordLowerCased = null)
{
$wordLowerCased = $wordLowerCased ?: mb_strtolower($word);

$hyphenation = $this->hyphenation[$wordLowerCased];
$hyphenationLength = mb_strlen($hyphenation);

$parts = [];
$part = '';
for ($i = 0, $j = 0; $i < $hyphenationLength; $i++) {
if (mb_substr($hyphenation, $i, 1) !== '-') {
$part .= mb_substr($word, $j++, 1);
} else {
$parts[] = $part;
$part = '';
}
}
if (!empty($part)) {
$parts[] = $part;
}

return $parts;
}

private function parseWordByPatterns($word, $wordLength = 0, $wordLowerCased = null)
{
$wordLength = $wordLength > 0 ? $wordLength : mb_strlen($word);
$wordLowerCased = $wordLowerCased ?: mb_strtolower($word);

// Convenience array
$text = '.'.mb_strtolower($word).'.';
$text = '.'.$wordLowerCased.'.';
$textLength = $wordLength + 2;
$patternLength = $this->maxPattern < $textLength
? $this->maxPattern
Expand Down
5 changes: 5 additions & 0 deletions tests/src/SyllableTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,13 @@ public function testCaseInsensitivity()
{
$this->object->setHyphen('-');

// Patterns
$this->assertEquals(['IN', 'EX', 'PLIC', 'A', 'BLE'], $this->object->splitText('INEXPLICABLE'));
$this->assertEquals(['in', 'ex', 'plic', 'a', 'ble'], $this->object->splitText('inexplicable'));

// Hyphenations
$this->assertEquals(['as', 'so', 'ciate'], $this->object->splitText('associate'));
$this->assertEquals(['AS', 'SO', 'CIATE'], $this->object->splitText('ASSOCIATE'));
}

/**
Expand Down

0 comments on commit 8ee452b

Please sign in to comment.