From 95c8bb9918b43a1f088cab54c4684e1ef40caaaa Mon Sep 17 00:00:00 2001 From: Nathanael Noblet Date: Wed, 14 Aug 2019 10:04:21 -0600 Subject: [PATCH] Allow HTML Reader to load from string We often want to export a table as an excel sheet. The system renders the html and it seems like a waste of time to write it to the file system to use the reader. This allows us to render the html and then just pass it to a reader Closes #1136 --- CHANGELOG.md | 1 + docs/topics/reading-and-writing-to-file.md | 28 +++++++++ src/PhpSpreadsheet/Reader/Html.php | 58 +++++++++++++++---- tests/PhpSpreadsheetTests/Reader/HtmlTest.php | 30 ++++++++++ 4 files changed, 106 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20d26f9e..3891842e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). - HLookup needs an ordered list even if range_lookup is set to false [Issue #1055](https://github.com/PHPOffice/PhpSpreadsheet/issues/1055) and [PR #1076](https://github.com/PHPOffice/PhpSpreadsheet/pull/1076) - Improve performance of IF function calls via ranch pruning to avoid resolution of every branches [#844](https://github.com/PHPOffice/PhpSpreadsheet/pull/844) - MATCH function supports `*?~` Excel functionality, when match_type=0 - [Issue #1116](https://github.com/PHPOffice/PhpSpreadsheet/issues/1116) +- Allow HTML Reader to accept HTML as a string [Issue #1136](https://github.com/PHPOffice/PhpSpreadsheet/pull/1136) ### Fixed diff --git a/docs/topics/reading-and-writing-to-file.md b/docs/topics/reading-and-writing-to-file.md index 0b27f8c1..b26cc6a9 100644 --- a/docs/topics/reading-and-writing-to-file.md +++ b/docs/topics/reading-and-writing-to-file.md @@ -875,3 +875,31 @@ $writer->save('write.xls'); ``` Notice that it is ok to load an xlsx file and generate an xls file. + +## Generating Excel files from HTML content + +If you are generating an Excel file from pre-rendered HTML content you can do so +automatically using the HTML Reader. This is most useful when you are generating +Excel files from web application content that would be downloaded/sent to a user. + +For example: + +```php +$htmlString = ' + + + + + + + + + +
Hello World
Hello
World
Hello
World
'; + +$reader = new \PhpOffice\PhpSpreadsheet\Reader\Html(); +$spreadsheet = $reader->loadFromString($htmlString); + +$writer = \PhpOffice\PhpSpreadsheet\IOFactory::createWriter($spreadsheet, 'Xls'); +$writer->save('write.xls'); +``` diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index ff2c909e..bf9c6038 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -592,28 +592,64 @@ class Html extends BaseReader throw new Exception($pFilename . ' is an Invalid HTML file.'); } - // Create new sheet - while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { - $spreadsheet->createSheet(); - } - $spreadsheet->setActiveSheetIndex($this->sheetIndex); - - // Create a new DOM object + // Create a new DOM object $dom = new DOMDocument(); - // Reload the HTML file into the DOM object + // Reload the HTML file into the DOM object $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); if ($loaded === false) { throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); } - // Discard white space - $dom->preserveWhiteSpace = false; + return $this->loadDocument($dom, $spreadsheet); + } + + /** + * Spreadsheet from content. + * + * @param string $content + * + * @throws Exception + * + * @return Spreadsheet + */ + public function loadFromString($content): Spreadsheet + { + // Create a new DOM object + $dom = new DOMDocument(); + // Reload the HTML file into the DOM object + $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8')); + if ($loaded === false) { + throw new Exception('Failed to load content as a DOM Document'); + } + + return $this->loadDocument($dom, new Spreadsheet()); + } + + /** + * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance. + * + * @param DOMDocument $document + * @param Spreadsheet $spreadsheet + * + * @throws \PhpOffice\PhpSpreadsheet\Exception + * + * @return Spreadsheet + */ + private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet + { + while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { + $spreadsheet->createSheet(); + } + $spreadsheet->setActiveSheetIndex($this->sheetIndex); + + // Discard white space + $document->preserveWhiteSpace = false; $row = 0; $column = 'A'; $content = ''; $this->rowspan = []; - $this->processDomElement($dom, $spreadsheet->getActiveSheet(), $row, $column, $content); + $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content); // Return return $spreadsheet; diff --git a/tests/PhpSpreadsheetTests/Reader/HtmlTest.php b/tests/PhpSpreadsheetTests/Reader/HtmlTest.php index e8b00f1a..e9dd207f 100644 --- a/tests/PhpSpreadsheetTests/Reader/HtmlTest.php +++ b/tests/PhpSpreadsheetTests/Reader/HtmlTest.php @@ -299,6 +299,36 @@ class HtmlTest extends TestCase unlink($filename); } + public function testCanLoadFromString() + { + $html = ' + + + + + + + + + +
Hello World
Hello
World
Hello
World
'; + $spreadsheet = (new Html())->loadFromString($html); + $firstSheet = $spreadsheet->getSheet(0); + + $cellStyle = $firstSheet->getStyle('A1'); + self::assertFalse($cellStyle->getAlignment()->getWrapText()); + + $cellStyle = $firstSheet->getStyle('A2'); + self::assertTrue($cellStyle->getAlignment()->getWrapText()); + $cellValue = $firstSheet->getCell('A2')->getValue(); + $this->assertContains("\n", $cellValue); + + $cellStyle = $firstSheet->getStyle('A3'); + self::assertTrue($cellStyle->getAlignment()->getWrapText()); + $cellValue = $firstSheet->getCell('A3')->getValue(); + $this->assertContains("\n", $cellValue); + } + /** * @param string $html *