From 467924823f344650ccb61e84b597ae368ebd9514 Mon Sep 17 00:00:00 2001 From: Greg Bowler Date: Thu, 20 Jul 2023 15:27:40 +0100 Subject: [PATCH] fix: use mb_encode_numericentity for #424 (#439) * fix: use mb_encode_numericentity for #424 * test: add copyright-trademark test to Element * fix: use mb_encode_numericentity for #424 * test: add copyright-trademark test to Element --- phpmd.xml | 2 +- src/Element.php | 11 ++++++++--- test/phpunit/ElementTest.php | 11 +++++++++++ test/phpunit/HTMLDocumentTest.php | 14 ++++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/phpmd.xml b/phpmd.xml index 63c4a821..dd377312 100644 --- a/phpmd.xml +++ b/phpmd.xml @@ -45,7 +45,7 @@ - + diff --git a/src/Element.php b/src/Element.php index 31e42538..d568c8c9 100644 --- a/src/Element.php +++ b/src/Element.php @@ -162,10 +162,15 @@ protected function __prop_set_innerHTML(string $innerHTML):void { $child->parentNode->removeChild($child); } - $innerHTML = mb_convert_encoding( + if($innerHTML === "") { + return; + } + + $conversionMap = [0x80, 0x10FFFF, 0, 0x1FFFFF]; + $innerHTML = mb_encode_numericentity( $innerHTML, - "HTML-ENTITIES", - "utf-8" + $conversionMap, + "UTF-8" ); $tempDocument = new HTMLDocument(); diff --git a/test/phpunit/ElementTest.php b/test/phpunit/ElementTest.php index d198e037..e576d1a4 100644 --- a/test/phpunit/ElementTest.php +++ b/test/phpunit/ElementTest.php @@ -639,4 +639,15 @@ public function testRemoveAttributeNS():void { self::assertFalse( $sut->hasAttributeNS($ns, "foo")); } + + public function testConstruct_copyrightTrademark():void { + $copyright = "©"; + $trademark = "™"; + $text = "Copyright $copyright PHP.Gt, DOM$trademark"; + $sut = new HTMLDocument("

PHP.Gt/Dom Unit Test

"); + $h1 = $sut->querySelector("h1"); + $h1->innerHTML = $text; + self::assertStringContainsString($copyright, $h1->innerHTML); + self::assertStringContainsString($trademark, $h1->innerHTML); + } } diff --git a/test/phpunit/HTMLDocumentTest.php b/test/phpunit/HTMLDocumentTest.php index 1f65cc56..3eefd728 100644 --- a/test/phpunit/HTMLDocumentTest.php +++ b/test/phpunit/HTMLDocumentTest.php @@ -781,4 +781,18 @@ public function testEscapedCharacters_entireDom():void { self::assertStringNotContainsString($needle, $domString); } } + + public function testConstruct_copyrightTrademark():void { + $copyright = "©"; + $trademark = "™"; + $text = "Copyright $copyright PHP.Gt, DOM$trademark"; + $sut = new HTMLDocument("

$text

"); + $h1 = $sut->querySelector("h1"); + self::assertStringContainsString($copyright, $h1->innerHTML); + self::assertStringContainsString($trademark, $h1->innerHTML); + + $sut->documentElement->innerHTML = $text; + self::assertStringContainsString($copyright, $sut->documentElement->innerHTML); + self::assertStringContainsString($trademark, $sut->documentElement->innerHTML); + } }