From 148909300c823f82922bf7259191186f3a7637d4 Mon Sep 17 00:00:00 2001 From: Adrien Crivelli Date: Sun, 20 May 2018 19:52:53 +0900 Subject: [PATCH] `Helper\Html` support UTF-8 HTML input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assume UTF-8 encoding. Not assuming UTF-8 would mangle text such as "русский" Fixes #444 --- CHANGELOG.md | 1 + src/PhpSpreadsheet/Helper/Html.php | 11 +++++-- tests/PhpSpreadsheetTests/Helper/HtmlTest.php | 33 +++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Helper/HtmlTest.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 78c62598..7a619519 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Fixed - Subtotal 9 in a group that has other subtotals 9 exclude the totals of the other subtotals in the range - [#332](https://github.com/PHPOffice/PhpSpreadsheet/issues/332) +- `Helper\Html` support UTF-8 HTML input - [#444](https://github.com/PHPOffice/PhpSpreadsheet/issues/444) ## [1.2.1] - 2018-04-10 diff --git a/src/PhpSpreadsheet/Helper/Html.php b/src/PhpSpreadsheet/Helper/Html.php index 61c66a84..eaf73028 100644 --- a/src/PhpSpreadsheet/Helper/Html.php +++ b/src/PhpSpreadsheet/Helper/Html.php @@ -603,6 +603,13 @@ class Html $this->stringData = ''; } + /** + * Parse HTML formatting and return the resulting RichText. + * + * @param string $html + * + * @return RichText + */ public function toRichTextObject($html) { $this->initialise(); @@ -611,8 +618,8 @@ class Html $dom = new DOMDocument(); // Load the HTML file into the DOM object // Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup - @$dom->loadHTML($html); - + $prefix = ''; + @$dom->loadHTML($prefix . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); // Discard excess white space $dom->preserveWhiteSpace = false; diff --git a/tests/PhpSpreadsheetTests/Helper/HtmlTest.php b/tests/PhpSpreadsheetTests/Helper/HtmlTest.php new file mode 100644 index 00000000..b15a7285 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Helper/HtmlTest.php @@ -0,0 +1,33 @@ +toRichTextObject($input); + + self::assertSame($expected, $actual->getPlainText()); + } + + public function providerUtf8EncodingSupport() + { + return [ + ['foo', 'foo'], + ['können', 'können'], + ['русский', 'русский'], + ["foo\nbar", '

foo

bar

'], + ]; + } +}