Fix crash when reading HTML files
This commit is contained in:
parent
7a6c5b0d1b
commit
ba70544846
|
@ -37,6 +37,7 @@
|
||||||
"suggest": {
|
"suggest": {
|
||||||
"ext-zip": "*",
|
"ext-zip": "*",
|
||||||
"ext-gd": "*",
|
"ext-gd": "*",
|
||||||
|
"ext-dom": "Option to read and write HTML files",
|
||||||
"mpdf/mpdf": "Option for rendering PDF with PDF Writer",
|
"mpdf/mpdf": "Option for rendering PDF with PDF Writer",
|
||||||
"dompdf/dompdf": "Option for rendering PDF with PDF Writer",
|
"dompdf/dompdf": "Option for rendering PDF with PDF Writer",
|
||||||
"tecnick.com/tcpdf": "Option for rendering PDF with PDF Writer",
|
"tecnick.com/tcpdf": "Option for rendering PDF with PDF Writer",
|
||||||
|
|
|
@ -2,6 +2,11 @@
|
||||||
|
|
||||||
namespace PhpSpreadsheet\Helper;
|
namespace PhpSpreadsheet\Helper;
|
||||||
|
|
||||||
|
use DOMDocument;
|
||||||
|
use DOMElement;
|
||||||
|
use DOMNode;
|
||||||
|
use DOMText;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copyright (c) 2006 - 2016 PhpSpreadsheet
|
* Copyright (c) 2006 - 2016 PhpSpreadsheet
|
||||||
*
|
*
|
||||||
|
@ -612,7 +617,7 @@ class HTML
|
||||||
$this->initialise();
|
$this->initialise();
|
||||||
|
|
||||||
// Create a new DOM object
|
// Create a new DOM object
|
||||||
$dom = new \DOMDocument();
|
$dom = new DOMDocument();
|
||||||
// Load the HTML file into the DOM object
|
// Load the HTML file into the DOM object
|
||||||
// Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
|
// Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
|
||||||
$loaded = @$dom->loadHTML($html);
|
$loaded = @$dom->loadHTML($html);
|
||||||
|
@ -786,7 +791,7 @@ class HTML
|
||||||
$this->stringData .= "\n";
|
$this->stringData .= "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseTextNode(\DOMText $textNode)
|
protected function parseTextNode(DOMText $textNode)
|
||||||
{
|
{
|
||||||
$domText = preg_replace(
|
$domText = preg_replace(
|
||||||
'/\s+/u',
|
'/\s+/u',
|
||||||
|
@ -807,7 +812,7 @@ class HTML
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseElementNode(\DOMElement $element)
|
protected function parseElementNode(DOMElement $element)
|
||||||
{
|
{
|
||||||
$callbackTag = strtolower($element->nodeName);
|
$callbackTag = strtolower($element->nodeName);
|
||||||
$this->stack[] = $callbackTag;
|
$this->stack[] = $callbackTag;
|
||||||
|
@ -820,12 +825,12 @@ class HTML
|
||||||
$this->handleCallback($element, $callbackTag, $this->endTagCallbacks);
|
$this->handleCallback($element, $callbackTag, $this->endTagCallbacks);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseElements(\DOMNode $element)
|
protected function parseElements(DOMNode $element)
|
||||||
{
|
{
|
||||||
foreach ($element->childNodes as $child) {
|
foreach ($element->childNodes as $child) {
|
||||||
if ($child instanceof \DOMText) {
|
if ($child instanceof DOMText) {
|
||||||
$this->parseTextNode($child);
|
$this->parseTextNode($child);
|
||||||
} elseif ($child instanceof \DOMElement) {
|
} elseif ($child instanceof DOMElement) {
|
||||||
$this->parseElementNode($child);
|
$this->parseElementNode($child);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,12 @@
|
||||||
|
|
||||||
namespace PhpSpreadsheet\Reader;
|
namespace PhpSpreadsheet\Reader;
|
||||||
|
|
||||||
|
use DOMDocument;
|
||||||
|
use DOMElement;
|
||||||
|
use DOMNode;
|
||||||
|
use DOMText;
|
||||||
|
use PhpSpreadsheet\Spreadsheet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copyright (c) 2006 - 2016 PhpSpreadsheet
|
* Copyright (c) 2006 - 2016 PhpSpreadsheet
|
||||||
*
|
*
|
||||||
|
@ -131,16 +137,16 @@ class HTML extends BaseReader implements IReader
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads PhpSpreadsheet from file
|
* Loads Spreadsheet from file
|
||||||
*
|
*
|
||||||
* @param string $pFilename
|
* @param string $pFilename
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
* @return PhpSpreadsheet
|
* @return Spreadsheet
|
||||||
*/
|
*/
|
||||||
public function load($pFilename)
|
public function load($pFilename)
|
||||||
{
|
{
|
||||||
// Create new PhpSpreadsheet
|
// Create new Spreadsheet
|
||||||
$spreadsheet = new PhpSpreadsheet();
|
$spreadsheet = new Spreadsheet();
|
||||||
|
|
||||||
// Load into this instance
|
// Load into this instance
|
||||||
return $this->loadIntoExisting($pFilename, $spreadsheet);
|
return $this->loadIntoExisting($pFilename, $spreadsheet);
|
||||||
|
@ -168,7 +174,7 @@ class HTML extends BaseReader implements IReader
|
||||||
return $this->inputEncoding;
|
return $this->inputEncoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Data Array used for testing only, should write to PhpSpreadsheet object on completion of tests
|
// Data Array used for testing only, should write to Spreadsheet object on completion of tests
|
||||||
protected $dataArray = [];
|
protected $dataArray = [];
|
||||||
protected $tableLevel = 0;
|
protected $tableLevel = 0;
|
||||||
protected $nestedColumn = ['A'];
|
protected $nestedColumn = ['A'];
|
||||||
|
@ -458,11 +464,11 @@ class HTML extends BaseReader implements IReader
|
||||||
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance
|
||||||
*
|
*
|
||||||
* @param string $pFilename
|
* @param string $pFilename
|
||||||
* @param \PhpSpreadsheet\Spreadsheet $spreadsheet
|
* @param Spreadsheet $spreadsheet
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
* @return \PhpSpreadsheet\Spreadsheet
|
* @return Spreadsheet
|
||||||
*/
|
*/
|
||||||
public function loadIntoExisting($pFilename, \PhpSpreadsheet\Spreadsheet $spreadsheet)
|
public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
|
||||||
{
|
{
|
||||||
// Open file to validate
|
// Open file to validate
|
||||||
$this->openFile($pFilename);
|
$this->openFile($pFilename);
|
||||||
|
@ -473,14 +479,14 @@ class HTML extends BaseReader implements IReader
|
||||||
// Close after validating
|
// Close after validating
|
||||||
fclose($this->fileHandle);
|
fclose($this->fileHandle);
|
||||||
|
|
||||||
// Create new PhpSpreadsheet
|
// Create new sheet
|
||||||
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
|
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
|
||||||
$spreadsheet->createSheet();
|
$spreadsheet->createSheet();
|
||||||
}
|
}
|
||||||
$spreadsheet->setActiveSheetIndex($this->sheetIndex);
|
$spreadsheet->setActiveSheetIndex($this->sheetIndex);
|
||||||
|
|
||||||
// Create a new DOM object
|
// Create a new DOM object
|
||||||
$dom = new domDocument();
|
$dom = new DOMDocument();
|
||||||
// Reload the HTML file into the DOM object
|
// Reload the HTML file into the DOM object
|
||||||
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
|
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
|
||||||
if ($loaded === false) {
|
if ($loaded === false) {
|
||||||
|
|
Loading…
Reference in New Issue