`Helper\Html` support UTF-8 HTML input

Assume UTF-8 encoding. Not assuming UTF-8 would mangle text such as "русский"

Fixes #444
This commit is contained in:
Adrien Crivelli 2018-05-20 19:52:53 +09:00
parent 38638268d8
commit 148909300c
No known key found for this signature in database
GPG Key ID: B182FD79DC6DE92E
3 changed files with 43 additions and 2 deletions

View File

@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
### Fixed ### Fixed
- Subtotal 9 in a group that has other subtotals 9 exclude the totals of the other subtotals in the range - [#332](https://github.com/PHPOffice/PhpSpreadsheet/issues/332) - Subtotal 9 in a group that has other subtotals 9 exclude the totals of the other subtotals in the range - [#332](https://github.com/PHPOffice/PhpSpreadsheet/issues/332)
- `Helper\Html` support UTF-8 HTML input - [#444](https://github.com/PHPOffice/PhpSpreadsheet/issues/444)
## [1.2.1] - 2018-04-10 ## [1.2.1] - 2018-04-10

View File

@ -603,6 +603,13 @@ class Html
$this->stringData = ''; $this->stringData = '';
} }
/**
* Parse HTML formatting and return the resulting RichText.
*
* @param string $html
*
* @return RichText
*/
public function toRichTextObject($html) public function toRichTextObject($html)
{ {
$this->initialise(); $this->initialise();
@ -611,8 +618,8 @@ class Html
$dom = new DOMDocument(); $dom = new DOMDocument();
// Load the HTML file into the DOM object // Load the HTML file into the DOM object
// Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup // Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
@$dom->loadHTML($html); $prefix = '<?xml encoding="UTF-8">';
@$dom->loadHTML($prefix . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
// Discard excess white space // Discard excess white space
$dom->preserveWhiteSpace = false; $dom->preserveWhiteSpace = false;

View File

@ -0,0 +1,33 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Helper;
use PhpOffice\PhpSpreadsheet\Helper\Html;
use PHPUnit\Framework\TestCase;
class HtmlTest extends TestCase
{
/**
* @dataProvider providerUtf8EncodingSupport
*
* @param mixed $expected
* @param mixed $input
*/
public function testUtf8EncodingSupport($expected, $input)
{
$html = new Html();
$actual = $html->toRichTextObject($input);
self::assertSame($expected, $actual->getPlainText());
}
public function providerUtf8EncodingSupport()
{
return [
['foo', 'foo'],
['können', 'können'],
['русский', 'русский'],
["foo\nbar", '<p>foo</p><p>bar</p>'],
];
}
}