Fix crash when reading HTML files

This commit is contained in:
Adrien Crivelli 2016-08-25 11:14:03 +09:00
parent 7a6c5b0d1b
commit ba70544846
No known key found for this signature in database
GPG Key ID: B182FD79DC6DE92E
3 changed files with 28 additions and 16 deletions

View File

@ -37,6 +37,7 @@
"suggest": { "suggest": {
"ext-zip": "*", "ext-zip": "*",
"ext-gd": "*", "ext-gd": "*",
"ext-dom": "Option to read and write HTML files",
"mpdf/mpdf": "Option for rendering PDF with PDF Writer", "mpdf/mpdf": "Option for rendering PDF with PDF Writer",
"dompdf/dompdf": "Option for rendering PDF with PDF Writer", "dompdf/dompdf": "Option for rendering PDF with PDF Writer",
"tecnick.com/tcpdf": "Option for rendering PDF with PDF Writer", "tecnick.com/tcpdf": "Option for rendering PDF with PDF Writer",

View File

@ -2,6 +2,11 @@
namespace PhpSpreadsheet\Helper; namespace PhpSpreadsheet\Helper;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMText;
/** /**
* Copyright (c) 2006 - 2016 PhpSpreadsheet * Copyright (c) 2006 - 2016 PhpSpreadsheet
* *
@ -612,7 +617,7 @@ class HTML
$this->initialise(); $this->initialise();
// Create a new DOM object // Create a new DOM object
$dom = new \DOMDocument(); $dom = new DOMDocument();
// Load the HTML file into the DOM object // Load the HTML file into the DOM object
// Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup // Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
$loaded = @$dom->loadHTML($html); $loaded = @$dom->loadHTML($html);
@ -786,7 +791,7 @@ class HTML
$this->stringData .= "\n"; $this->stringData .= "\n";
} }
protected function parseTextNode(\DOMText $textNode) protected function parseTextNode(DOMText $textNode)
{ {
$domText = preg_replace( $domText = preg_replace(
'/\s+/u', '/\s+/u',
@ -807,7 +812,7 @@ class HTML
} }
} }
protected function parseElementNode(\DOMElement $element) protected function parseElementNode(DOMElement $element)
{ {
$callbackTag = strtolower($element->nodeName); $callbackTag = strtolower($element->nodeName);
$this->stack[] = $callbackTag; $this->stack[] = $callbackTag;
@ -820,12 +825,12 @@ class HTML
$this->handleCallback($element, $callbackTag, $this->endTagCallbacks); $this->handleCallback($element, $callbackTag, $this->endTagCallbacks);
} }
protected function parseElements(\DOMNode $element) protected function parseElements(DOMNode $element)
{ {
foreach ($element->childNodes as $child) { foreach ($element->childNodes as $child) {
if ($child instanceof \DOMText) { if ($child instanceof DOMText) {
$this->parseTextNode($child); $this->parseTextNode($child);
} elseif ($child instanceof \DOMElement) { } elseif ($child instanceof DOMElement) {
$this->parseElementNode($child); $this->parseElementNode($child);
} }
} }

View File

@ -2,6 +2,12 @@
namespace PhpSpreadsheet\Reader; namespace PhpSpreadsheet\Reader;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMText;
use PhpSpreadsheet\Spreadsheet;
/** /**
* Copyright (c) 2006 - 2016 PhpSpreadsheet * Copyright (c) 2006 - 2016 PhpSpreadsheet
* *
@ -131,16 +137,16 @@ class HTML extends BaseReader implements IReader
} }
/** /**
* Loads PhpSpreadsheet from file * Loads Spreadsheet from file
* *
* @param string $pFilename * @param string $pFilename
* @throws Exception * @throws Exception
* @return PhpSpreadsheet * @return Spreadsheet
*/ */
public function load($pFilename) public function load($pFilename)
{ {
// Create new PhpSpreadsheet // Create new Spreadsheet
$spreadsheet = new PhpSpreadsheet(); $spreadsheet = new Spreadsheet();
// Load into this instance // Load into this instance
return $this->loadIntoExisting($pFilename, $spreadsheet); return $this->loadIntoExisting($pFilename, $spreadsheet);
@ -168,7 +174,7 @@ class HTML extends BaseReader implements IReader
return $this->inputEncoding; return $this->inputEncoding;
} }
// Data Array used for testing only, should write to PhpSpreadsheet object on completion of tests // Data Array used for testing only, should write to Spreadsheet object on completion of tests
protected $dataArray = []; protected $dataArray = [];
protected $tableLevel = 0; protected $tableLevel = 0;
protected $nestedColumn = ['A']; protected $nestedColumn = ['A'];
@ -458,11 +464,11 @@ class HTML extends BaseReader implements IReader
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance * Loads PhpSpreadsheet from file into PhpSpreadsheet instance
* *
* @param string $pFilename * @param string $pFilename
* @param \PhpSpreadsheet\Spreadsheet $spreadsheet * @param Spreadsheet $spreadsheet
* @throws Exception * @throws Exception
* @return \PhpSpreadsheet\Spreadsheet * @return Spreadsheet
*/ */
public function loadIntoExisting($pFilename, \PhpSpreadsheet\Spreadsheet $spreadsheet) public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
{ {
// Open file to validate // Open file to validate
$this->openFile($pFilename); $this->openFile($pFilename);
@ -473,14 +479,14 @@ class HTML extends BaseReader implements IReader
// Close after validating // Close after validating
fclose($this->fileHandle); fclose($this->fileHandle);
// Create new PhpSpreadsheet // Create new sheet
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
$spreadsheet->createSheet(); $spreadsheet->createSheet();
} }
$spreadsheet->setActiveSheetIndex($this->sheetIndex); $spreadsheet->setActiveSheetIndex($this->sheetIndex);
// Create a new DOM object // Create a new DOM object
$dom = new domDocument(); $dom = new DOMDocument();
// Reload the HTML file into the DOM object // Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
if ($loaded === false) { if ($loaded === false) {