Fix crash when reading HTML files

This commit is contained in:
Adrien Crivelli 2016-08-25 11:14:03 +09:00
parent 7a6c5b0d1b
commit ba70544846
No known key found for this signature in database
GPG Key ID: B182FD79DC6DE92E
3 changed files with 28 additions and 16 deletions

View File

@ -37,6 +37,7 @@
"suggest": {
"ext-zip": "*",
"ext-gd": "*",
"ext-dom": "Option to read and write HTML files",
"mpdf/mpdf": "Option for rendering PDF with PDF Writer",
"dompdf/dompdf": "Option for rendering PDF with PDF Writer",
"tecnick.com/tcpdf": "Option for rendering PDF with PDF Writer",

View File

@ -2,6 +2,11 @@
namespace PhpSpreadsheet\Helper;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMText;
/**
* Copyright (c) 2006 - 2016 PhpSpreadsheet
*
@ -612,7 +617,7 @@ class HTML
$this->initialise();
// Create a new DOM object
$dom = new \DOMDocument();
$dom = new DOMDocument();
// Load the HTML file into the DOM object
// Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
$loaded = @$dom->loadHTML($html);
@ -786,7 +791,7 @@ class HTML
$this->stringData .= "\n";
}
protected function parseTextNode(\DOMText $textNode)
protected function parseTextNode(DOMText $textNode)
{
$domText = preg_replace(
'/\s+/u',
@ -807,7 +812,7 @@ class HTML
}
}
protected function parseElementNode(\DOMElement $element)
protected function parseElementNode(DOMElement $element)
{
$callbackTag = strtolower($element->nodeName);
$this->stack[] = $callbackTag;
@ -820,12 +825,12 @@ class HTML
$this->handleCallback($element, $callbackTag, $this->endTagCallbacks);
}
protected function parseElements(\DOMNode $element)
protected function parseElements(DOMNode $element)
{
foreach ($element->childNodes as $child) {
if ($child instanceof \DOMText) {
if ($child instanceof DOMText) {
$this->parseTextNode($child);
} elseif ($child instanceof \DOMElement) {
} elseif ($child instanceof DOMElement) {
$this->parseElementNode($child);
}
}

View File

@ -2,6 +2,12 @@
namespace PhpSpreadsheet\Reader;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMText;
use PhpSpreadsheet\Spreadsheet;
/**
* Copyright (c) 2006 - 2016 PhpSpreadsheet
*
@ -131,16 +137,16 @@ class HTML extends BaseReader implements IReader
}
/**
* Loads PhpSpreadsheet from file
* Loads Spreadsheet from file
*
* @param string $pFilename
* @throws Exception
* @return PhpSpreadsheet
* @return Spreadsheet
*/
public function load($pFilename)
{
// Create new PhpSpreadsheet
$spreadsheet = new PhpSpreadsheet();
// Create new Spreadsheet
$spreadsheet = new Spreadsheet();
// Load into this instance
return $this->loadIntoExisting($pFilename, $spreadsheet);
@ -168,7 +174,7 @@ class HTML extends BaseReader implements IReader
return $this->inputEncoding;
}
// Data Array used for testing only, should write to PhpSpreadsheet object on completion of tests
// Data Array used for testing only, should write to Spreadsheet object on completion of tests
protected $dataArray = [];
protected $tableLevel = 0;
protected $nestedColumn = ['A'];
@ -458,11 +464,11 @@ class HTML extends BaseReader implements IReader
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance
*
* @param string $pFilename
* @param \PhpSpreadsheet\Spreadsheet $spreadsheet
* @param Spreadsheet $spreadsheet
* @throws Exception
* @return \PhpSpreadsheet\Spreadsheet
* @return Spreadsheet
*/
public function loadIntoExisting($pFilename, \PhpSpreadsheet\Spreadsheet $spreadsheet)
public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
{
// Open file to validate
$this->openFile($pFilename);
@ -473,14 +479,14 @@ class HTML extends BaseReader implements IReader
// Close after validating
fclose($this->fileHandle);
// Create new PhpSpreadsheet
// Create new sheet
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
$spreadsheet->createSheet();
}
$spreadsheet->setActiveSheetIndex($this->sheetIndex);
// Create a new DOM object
$dom = new domDocument();
$dom = new DOMDocument();
// Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
if ($loaded === false) {