Allow HTML Reader to load from string

We often want to export a table as an excel sheet. The system renders the
html and it seems like a waste of time to write it to the file system to
use the reader. This allows us to render the html and then just pass it to
a reader

Closes #1136
This commit is contained in:
Nathanael Noblet 2019-08-14 10:04:21 -06:00 committed by Adrien Crivelli
parent 34675bdf5d
commit 95c8bb9918
No known key found for this signature in database
GPG Key ID: B182FD79DC6DE92E
4 changed files with 106 additions and 11 deletions

View File

@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
- HLookup needs an ordered list even if range_lookup is set to false [Issue #1055](https://github.com/PHPOffice/PhpSpreadsheet/issues/1055) and [PR #1076](https://github.com/PHPOffice/PhpSpreadsheet/pull/1076) - HLookup needs an ordered list even if range_lookup is set to false [Issue #1055](https://github.com/PHPOffice/PhpSpreadsheet/issues/1055) and [PR #1076](https://github.com/PHPOffice/PhpSpreadsheet/pull/1076)
- Improve performance of IF function calls via ranch pruning to avoid resolution of every branches [#844](https://github.com/PHPOffice/PhpSpreadsheet/pull/844) - Improve performance of IF function calls via ranch pruning to avoid resolution of every branches [#844](https://github.com/PHPOffice/PhpSpreadsheet/pull/844)
- MATCH function supports `*?~` Excel functionality, when match_type=0 - [Issue #1116](https://github.com/PHPOffice/PhpSpreadsheet/issues/1116) - MATCH function supports `*?~` Excel functionality, when match_type=0 - [Issue #1116](https://github.com/PHPOffice/PhpSpreadsheet/issues/1116)
- Allow HTML Reader to accept HTML as a string [Issue #1136](https://github.com/PHPOffice/PhpSpreadsheet/pull/1136)
### Fixed ### Fixed

View File

@ -875,3 +875,31 @@ $writer->save('write.xls');
``` ```
Notice that it is ok to load an xlsx file and generate an xls file. Notice that it is ok to load an xlsx file and generate an xls file.
## Generating Excel files from HTML content
If you are generating an Excel file from pre-rendered HTML content you can do so
automatically using the HTML Reader. This is most useful when you are generating
Excel files from web application content that would be downloaded/sent to a user.
For example:
```php
$htmlString = '<table>
<tr>
<td>Hello World</td>
</tr>
<tr>
<td>Hello<br />World</td>
</tr>
<tr>
<td>Hello<br>World</td>
</tr>
</table>';
$reader = new \PhpOffice\PhpSpreadsheet\Reader\Html();
$spreadsheet = $reader->loadFromString($htmlString);
$writer = \PhpOffice\PhpSpreadsheet\IOFactory::createWriter($spreadsheet, 'Xls');
$writer->save('write.xls');
```

View File

@ -592,28 +592,64 @@ class Html extends BaseReader
throw new Exception($pFilename . ' is an Invalid HTML file.'); throw new Exception($pFilename . ' is an Invalid HTML file.');
} }
// Create new sheet // Create a new DOM object
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
$spreadsheet->createSheet();
}
$spreadsheet->setActiveSheetIndex($this->sheetIndex);
// Create a new DOM object
$dom = new DOMDocument(); $dom = new DOMDocument();
// Reload the HTML file into the DOM object // Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
if ($loaded === false) { if ($loaded === false) {
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
} }
// Discard white space return $this->loadDocument($dom, $spreadsheet);
$dom->preserveWhiteSpace = false; }
/**
* Spreadsheet from content.
*
* @param string $content
*
* @throws Exception
*
* @return Spreadsheet
*/
public function loadFromString($content): Spreadsheet
{
// Create a new DOM object
$dom = new DOMDocument();
// Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8'));
if ($loaded === false) {
throw new Exception('Failed to load content as a DOM Document');
}
return $this->loadDocument($dom, new Spreadsheet());
}
/**
* Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
*
* @param DOMDocument $document
* @param Spreadsheet $spreadsheet
*
* @throws \PhpOffice\PhpSpreadsheet\Exception
*
* @return Spreadsheet
*/
private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
{
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
$spreadsheet->createSheet();
}
$spreadsheet->setActiveSheetIndex($this->sheetIndex);
// Discard white space
$document->preserveWhiteSpace = false;
$row = 0; $row = 0;
$column = 'A'; $column = 'A';
$content = ''; $content = '';
$this->rowspan = []; $this->rowspan = [];
$this->processDomElement($dom, $spreadsheet->getActiveSheet(), $row, $column, $content); $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
// Return // Return
return $spreadsheet; return $spreadsheet;

View File

@ -299,6 +299,36 @@ class HtmlTest extends TestCase
unlink($filename); unlink($filename);
} }
public function testCanLoadFromString()
{
$html = '<table>
<tr>
<td>Hello World</td>
</tr>
<tr>
<td>Hello<br />World</td>
</tr>
<tr>
<td>Hello<br>World</td>
</tr>
</table>';
$spreadsheet = (new Html())->loadFromString($html);
$firstSheet = $spreadsheet->getSheet(0);
$cellStyle = $firstSheet->getStyle('A1');
self::assertFalse($cellStyle->getAlignment()->getWrapText());
$cellStyle = $firstSheet->getStyle('A2');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A2')->getValue();
$this->assertContains("\n", $cellValue);
$cellStyle = $firstSheet->getStyle('A3');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A3')->getValue();
$this->assertContains("\n", $cellValue);
}
/** /**
* @param string $html * @param string $html
* *