From 22ab9093112ce51ec799aeac72f7be7813c2b6bc Mon Sep 17 00:00:00 2001 From: Daniel Kurowski Date: Thu, 18 May 2023 10:14:00 +0200 Subject: [PATCH 1/3] update readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 6889b07..91e2d7d 100755 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +# This fork + +The original repo is not maintained anymore. This fork is intended to keep codebase up-to-date with current PHP versions. + PHP Html Parser ========================== From 026b468ea49a54e9077d9eed192ec9f555fe4f1e Mon Sep 17 00:00:00 2001 From: Daniel Kurowski Date: Thu, 18 May 2023 10:18:31 +0200 Subject: [PATCH 2/3] cs --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 166886f..6b978b9 100755 --- a/composer.json +++ b/composer.json @@ -31,7 +31,7 @@ "friendsofphp/php-cs-fixer": "^2.16" }, "autoload": { - "psr-4": { + "psr-4": { "PHPHtmlParser\\": "src/PHPHtmlParser" } } From 3c36dea6779885e3134f26aab8e328244b87e3e9 Mon Sep 17 00:00:00 2001 From: Daniel Kurowski Date: Thu, 18 May 2023 11:08:29 +0200 Subject: [PATCH 3/3] wip --- composer.json | 2 +- src/PHPHtmlParser/DTO/Tag/AttributeDTO.php | 11 ++++++----- src/PHPHtmlParser/Dom/Node/AbstractNode.php | 7 ++++--- src/PHPHtmlParser/Dom/Node/InnerNode.php | 5 +++-- src/PHPHtmlParser/Dom/Node/TextNode.php | 4 ++-- src/PHPHtmlParser/Dom/Parser.php | 20 +++++++++++--------- src/PHPHtmlParser/Dom/Tag.php | 16 ++++++++++------ tests/Node/TextTest.php | 9 +++++---- 8 files changed, 42 insertions(+), 32 deletions(-) diff --git a/composer.json b/composer.json index 6b978b9..903a093 100755 --- a/composer.json +++ b/composer.json @@ -17,7 +17,7 @@ "ext-mbstring": "*", "ext-zlib": "*", "ext-curl": "*", - "paquettg/string-encode": "~1.0.0", + "paquettg/string-encode": "^2.1", "php-http/httplug": "^2.1", "guzzlehttp/guzzle": "^7.0", "guzzlehttp/psr7": "^1.6", diff --git a/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php b/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php index 3e7e182..277a5ec 100755 --- a/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php +++ b/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php @@ -4,8 +4,9 @@ namespace PHPHtmlParser\DTO\Tag; -use stringEncode\Encode; -use stringEncode\Exception; +use StringEncoder\Contracts\EncoderInterface; +use StringEncoder\Exceptions\InvalidEncodingException; + final class AttributeDTO { @@ -51,10 +52,10 @@ public function htmlspecialcharsDecode(): void } /** - * @throws Exception + * @throws InvalidEncodingException */ - public function encodeValue(Encode $encode) + public function encodeValue(EncoderInterface $encode) { - $this->value = $encode->convert($this->value); + $this->value = $encode->convert()->fromString($this->value)->toString(); } } diff --git a/src/PHPHtmlParser/Dom/Node/AbstractNode.php b/src/PHPHtmlParser/Dom/Node/AbstractNode.php index 897445b..eb14a0d 100644 --- a/src/PHPHtmlParser/Dom/Node/AbstractNode.php +++ b/src/PHPHtmlParser/Dom/Node/AbstractNode.php @@ -12,7 +12,8 @@ use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException; use PHPHtmlParser\Finder; use PHPHtmlParser\Selector\Selector; -use stringEncode\Encode; +use StringEncoder\Contracts\EncoderInterface; + /** * Dom node object. @@ -57,7 +58,7 @@ abstract class AbstractNode /** * The encoding class used to encode strings. * - * @var mixed + * @var EncoderInterface */ protected $encode; @@ -206,7 +207,7 @@ public function delete() * * @return void */ - public function propagateEncoding(Encode $encode) + public function propagateEncoding(EncoderInterface $encode) { $this->encode = $encode; $this->tag->setEncoding($encode); diff --git a/src/PHPHtmlParser/Dom/Node/InnerNode.php b/src/PHPHtmlParser/Dom/Node/InnerNode.php index 448057a..084cbaa 100644 --- a/src/PHPHtmlParser/Dom/Node/InnerNode.php +++ b/src/PHPHtmlParser/Dom/Node/InnerNode.php @@ -8,7 +8,8 @@ use PHPHtmlParser\Exceptions\ChildNotFoundException; use PHPHtmlParser\Exceptions\CircularException; use PHPHtmlParser\Exceptions\LogicalException; -use stringEncode\Encode; +use StringEncoder\Contracts\EncoderInterface; + /** * Inner node of the html tree, might have children. @@ -33,7 +34,7 @@ abstract class InnerNode extends ArrayNode * Sets the encoding class to this node and propagates it * to all its children. */ - public function propagateEncoding(Encode $encode): void + public function propagateEncoding(EncoderInterface $encode): void { $this->encode = $encode; $this->tag->setEncoding($encode); diff --git a/src/PHPHtmlParser/Dom/Node/TextNode.php b/src/PHPHtmlParser/Dom/Node/TextNode.php index 1c8b646..6b705c0 100644 --- a/src/PHPHtmlParser/Dom/Node/TextNode.php +++ b/src/PHPHtmlParser/Dom/Node/TextNode.php @@ -89,7 +89,7 @@ public function text(): string // we already know the converted value return $this->convertedText; } - $text = $this->encode->convert($text); + $text = $this->encode->convert()->fromString($text)->toString(); // remember the conversion $this->convertedText = $text; @@ -109,7 +109,7 @@ public function setText(string $text): void { $this->text = $text; if (!\is_null($this->encode)) { - $text = $this->encode->convert($text); + $text = $this->encode->convert()->fromString($text)->toString(); // remember the conversion $this->convertedText = $text; diff --git a/src/PHPHtmlParser/Dom/Parser.php b/src/PHPHtmlParser/Dom/Parser.php index 7ed310c..49dd049 100644 --- a/src/PHPHtmlParser/Dom/Parser.php +++ b/src/PHPHtmlParser/Dom/Parser.php @@ -17,7 +17,9 @@ use PHPHtmlParser\Exceptions\LogicalException; use PHPHtmlParser\Exceptions\StrictException; use PHPHtmlParser\Options; -use stringEncode\Encode; +use StringEncoder\Contracts\EncoderInterface; +use StringEncoder\Encoder; + class Parser implements ParserInterface { @@ -104,15 +106,15 @@ public function parse(Options $options, Content $content, int $size): AbstractNo public function detectCharset(Options $options, string $defaultCharset, AbstractNode $root): bool { // set the default - $encode = new Encode(); - $encode->from($defaultCharset); - $encode->to($defaultCharset); + $encode = new Encoder(); + $encode->setSourceEncoding($defaultCharset); + $encode->setTargetEncoding($defaultCharset); $enforceEncoding = $options->getEnforceEncoding(); if ($enforceEncoding !== null) { // they want to enforce the given encoding - $encode->from($enforceEncoding); - $encode->to($enforceEncoding); + $encode->setSourceEncoding($enforceEncoding); + $encode->setTargetEncoding($enforceEncoding); return false; } @@ -138,7 +140,7 @@ public function detectCharset(Options $options, string $defaultCharset, Abstract } $matches = []; if (\preg_match('/charset=([^;]+)/', $content, $matches)) { - $encode->from(\trim($matches[1])); + $encode->setSourceEncoding(\trim($matches[1])); $root->propagateEncoding($encode); return true; @@ -233,7 +235,7 @@ private function parseTag(Options $options, Content $content, int $size): TagDTO /** * @throws ChildNotFoundException */ - private function detectHTML5Charset(Encode $encode, AbstractNode $root): bool + private function detectHTML5Charset(EncoderInterface $encode, AbstractNode $root): bool { /** @var AbstractNode|null $meta */ $meta = $root->find('meta[charset]', 0); @@ -241,7 +243,7 @@ private function detectHTML5Charset(Encode $encode, AbstractNode $root): bool return false; } - $encode->from(\trim($meta->getAttribute('charset'))); + $encode->setSourceEncoding(\trim($meta->getAttribute('charset'))); $root->propagateEncoding($encode); return true; diff --git a/src/PHPHtmlParser/Dom/Tag.php b/src/PHPHtmlParser/Dom/Tag.php index 2aeb6aa..bfa5958 100644 --- a/src/PHPHtmlParser/Dom/Tag.php +++ b/src/PHPHtmlParser/Dom/Tag.php @@ -6,7 +6,9 @@ use PHPHtmlParser\DTO\Tag\AttributeDTO; use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException; -use stringEncode\Encode; +use StringEncoder\Contracts\EncoderInterface; +use StringEncoder\Exceptions\InvalidEncodingException; + /** * Class Tag. @@ -49,7 +51,7 @@ class Tag /** * The encoding class to... encode the tags. * - * @var Encode|null + * @var EncoderInterface|null */ protected $encode; @@ -135,7 +137,7 @@ public function isSelfClosing(): bool /** * Sets the encoding type to be used. */ - public function setEncoding(Encode $encode): void + public function setEncoding(EncoderInterface $encode): void { $this->encode = $encode; } @@ -263,7 +265,7 @@ public function setAttributes(array $attr) /** * Returns all attributes of this tag. * - * @throws \stringEncode\Exception + * @throws InvalidEncodingException * * @return AttributeDTO[] */ @@ -286,7 +288,7 @@ public function getAttributes(): array * Returns an attribute by the key. * * @throws AttributeNotFoundException - * @throws \stringEncode\Exception + * @throws InvalidEncodingException */ public function getAttribute(string $key): AttributeDTO { @@ -332,12 +334,14 @@ public function makeOpeningTag() } catch (\TypeError $e) { $val = null; } - $val = $attributeDTO->getValue(); + if (\is_null($val)) { $return .= ' ' . $key; } elseif ($attributeDTO->isDoubleQuote()) { + $val = $attributeDTO->getValue(); $return .= ' ' . $key . '="' . $val . '"'; } else { + $val = $attributeDTO->getValue(); $return .= ' ' . $key . '=\'' . $val . '\''; } } diff --git a/tests/Node/TextTest.php b/tests/Node/TextTest.php index f94c496..c21d6a0 100755 --- a/tests/Node/TextTest.php +++ b/tests/Node/TextTest.php @@ -6,7 +6,8 @@ use PHPHtmlParser\Dom\Node\TextNode; use PHPHtmlParser\Options; use PHPUnit\Framework\TestCase; -use stringEncode\Encode; +use StringEncoder\Encoder; + class NodeTextTest extends TestCase { @@ -66,9 +67,9 @@ public function testSetText() public function testSetTextEncoded() { - $encode = new Encode(); - $encode->from('UTF-8'); - $encode->to('UTF-8'); + $encode = new Encoder(); + $encode->setSourceEncoding('UTF-8'); + $encode->setTargetEncoding('UTF-8'); $node = new TextNode('foo bar'); $node->propagateEncoding($encode);