diff --git a/src/Syllable.php b/src/Syllable.php index 7985661..2d3f7bd 100644 --- a/src/Syllable.php +++ b/src/Syllable.php @@ -62,6 +62,11 @@ class Syllable private $excludes = array(); private $includes = array(); + /** + * @var int + */ + private $libxmlOptions = 0; + /** * Create a new Syllable class, with defaults * @@ -195,6 +200,16 @@ public function getMinWordLength() return $this->min_word_length; } + /** + * Options to use for HTML parsing by libxml + * @param integer $libxmlOptions + * @see https://www.php.net/manual/de/libxml.constants.php + */ + public function setLibxmlOptions($libxmlOptions) + { + $this->libxmlOptions = $libxmlOptions; + } + private static function initEncoding() { if (self::$encoding) { @@ -436,7 +451,7 @@ public function hyphenateHtml($html) { $dom = new \DOMDocument(); $dom->resolveExternals = true; - $dom->loadHTML($html); + $dom->loadHTML($html, $this->libxmlOptions); // filter excludes $xpath = new \DOMXPath($dom); diff --git a/tests/SyllableTest.php b/tests/SyllableTest.php index 86c72d2..1c03d42 100644 --- a/tests/SyllableTest.php +++ b/tests/SyllableTest.php @@ -222,6 +222,10 @@ public function testHyphenateHtml() $this->assertEquals('' . "\n" . '
Ridicu-lous-ly com-pli-cat-ed meta-text
' . "\n", $this->object->hyphenateHtml('Ridiculously complicated metatext')); + + $this->object->setLibxmlOptions(LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + $this->assertEquals('Ridicu-lous-ly com-pli-cat-ed meta-text
' + . "\n", $this->object->hyphenateHtml('Ridiculously complicated metatext')); } public function testCaseInsensitivity()