Skip to content

Commit

Permalink
Merge pull request #36 from yellowtree/libxmlOptions
Browse files Browse the repository at this point in the history
Possibility to omit Doctype for hyphenateHtml()
  • Loading branch information
vanderlee authored Dec 10, 2019
2 parents c8123b5 + df8e0c0 commit 1e12a7c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
17 changes: 16 additions & 1 deletion src/Syllable.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ class Syllable
private $excludes = array();
private $includes = array();

/**
* @var int
*/
private $libxmlOptions = 0;

/**
* Create a new Syllable class, with defaults
*
Expand Down Expand Up @@ -195,6 +200,16 @@ public function getMinWordLength()
return $this->min_word_length;
}

/**
* Options to use for HTML parsing by libxml
* @param integer $libxmlOptions
* @see https://www.php.net/manual/de/libxml.constants.php
*/
public function setLibxmlOptions($libxmlOptions)
{
$this->libxmlOptions = $libxmlOptions;
}

private static function initEncoding()
{
if (self::$encoding) {
Expand Down Expand Up @@ -436,7 +451,7 @@ public function hyphenateHtml($html)
{
$dom = new \DOMDocument();
$dom->resolveExternals = true;
$dom->loadHTML($html);
$dom->loadHTML($html, $this->libxmlOptions);

// filter excludes
$xpath = new \DOMXPath($dom);
Expand Down
4 changes: 4 additions & 0 deletions tests/SyllableTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ public function testHyphenateHtml()
$this->assertEquals('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">'
. "\n" . '<html><body><p>Ridicu-lous-ly <b class="unsplittable">com-pli-cat-ed</b> meta-text</p></body></html>'
. "\n", $this->object->hyphenateHtml('Ridiculously <b class="unsplittable">complicated</b> metatext'));

$this->object->setLibxmlOptions(LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$this->assertEquals('<p>Ridicu-lous-ly <b class="unsplittable">com-pli-cat-ed</b> meta-text</p>'
. "\n", $this->object->hyphenateHtml('Ridiculously <b class="unsplittable">complicated</b> metatext'));
}

public function testCaseInsensitivity()
Expand Down

0 comments on commit 1e12a7c

Please sign in to comment.