From 0477e6fcfeac00e71a9ef140ee7113ee6a8315a1 Mon Sep 17 00:00:00 2001 From: GreatHumorist Date: Wed, 20 Sep 2017 13:20:12 +0800 Subject: [PATCH] In Xml reader throw exception in case of invalid XML (#222) When the xml file is not a standard xml file, the `simplexml_load_string` will return false, this will cause an error on "$xml->getNamespaces(true);" . So instead of showing the error, we throw an exception. --- CHANGELOG.md | 1 + src/PhpSpreadsheet/Reader/Xml.php | 45 ++++++++++++------- .../Reader/XEEValidatorTest.php | 26 ++++++++++- .../Reader/XEE/XEETestInvalidSimpleXML.xml | 8 ++++ 4 files changed, 64 insertions(+), 16 deletions(-) create mode 100644 tests/data/Reader/XEE/XEETestInvalidSimpleXML.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index e409ce8c..ad37f9fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Changed - Merge data-validations to reduce written worksheet size - @billblume [#131](https://github.com/PHPOffice/PhpSpreadSheet/issues/131) +- Throws exception if a XML file is invalid - @GreatHumorist [#222](https://github.com/PHPOffice/PhpSpreadsheet/pull/222) ### Fixed diff --git a/src/PhpSpreadsheet/Reader/Xml.php b/src/PhpSpreadsheet/Reader/Xml.php index d9368d02..b52637f8 100644 --- a/src/PhpSpreadsheet/Reader/Xml.php +++ b/src/PhpSpreadsheet/Reader/Xml.php @@ -117,6 +117,30 @@ class Xml extends BaseReader implements IReader return $valid; } + /** + * Check if the file is a valid SimpleXML. + * + * @param string $pFilename + * + * @throws Exception + * + * @return false|\SimpleXMLElement + */ + public function trySimpleXMLLoadString($pFilename) + { + try { + $xml = simplexml_load_string( + $this->securityScan(file_get_contents($pFilename)), + 'SimpleXMLElement', + Settings::getLibXmlLoaderOptions() + ); + } catch (\Exception $e) { + throw new Exception('Cannot load invalid XML file: ' . $pFilename, 0, $e); + } + + return $xml; + } + /** * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object. * @@ -133,11 +157,8 @@ class Xml extends BaseReader implements IReader $worksheetNames = []; - $xml = simplexml_load_string( - $this->securityScan(file_get_contents($pFilename)), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() - ); + $xml = $this->trySimpleXMLLoadString($pFilename); + $namespaces = $xml->getNamespaces(true); $xml_ss = $xml->children($namespaces['ss']); @@ -162,11 +183,8 @@ class Xml extends BaseReader implements IReader $worksheetInfo = []; - $xml = simplexml_load_string( - $this->securityScan(file_get_contents($pFilename)), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() - ); + $xml = $this->trySimpleXMLLoadString($pFilename); + $namespaces = $xml->getNamespaces(true); $worksheetID = 1; @@ -339,11 +357,8 @@ class Xml extends BaseReader implements IReader throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); } - $xml = simplexml_load_string( - $this->securityScan(file_get_contents($pFilename)), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() - ); + $xml = $this->trySimpleXMLLoadString($pFilename); + $namespaces = $xml->getNamespaces(true); $docProps = $spreadsheet->getProperties(); diff --git a/tests/PhpSpreadsheetTests/Reader/XEEValidatorTest.php b/tests/PhpSpreadsheetTests/Reader/XEEValidatorTest.php index 372e51bb..13ca9576 100644 --- a/tests/PhpSpreadsheetTests/Reader/XEEValidatorTest.php +++ b/tests/PhpSpreadsheetTests/Reader/XEEValidatorTest.php @@ -3,6 +3,8 @@ namespace PhpOffice\PhpSpreadsheetTests\Reader; use PhpOffice\PhpSpreadsheet\Reader\BaseReader; +use PhpOffice\PhpSpreadsheet\Reader\Exception; +use PhpOffice\PhpSpreadsheet\Reader\Xml; use PHPUnit_Framework_TestCase; class XEEValidatorTest extends PHPUnit_Framework_TestCase @@ -24,7 +26,29 @@ class XEEValidatorTest extends PHPUnit_Framework_TestCase public function providerInvalidXML() { $tests = []; - foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalid*.xml') as $file) { + foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalidUTF*.xml') as $file) { + $tests[basename($file)] = [realpath($file)]; + } + + return $tests; + } + + /** + * @dataProvider providerInvalidSimpleXML + * @expectedException \PhpOffice\PhpSpreadsheet\Reader\Exception + * + * @param $filename + */ + public function testInvalidSimpleXML($filename) + { + $xmlReader = new Xml(); + $xmlReader->trySimpleXMLLoadString($filename); + } + + public function providerInvalidSimpleXML() + { + $tests = []; + foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalidSimpleXML*.xml') as $file) { $tests[basename($file)] = [realpath($file)]; } diff --git a/tests/data/Reader/XEE/XEETestInvalidSimpleXML.xml b/tests/data/Reader/XEE/XEETestInvalidSimpleXML.xml new file mode 100644 index 00000000..9a382a76 --- /dev/null +++ b/tests/data/Reader/XEE/XEETestInvalidSimpleXML.xml @@ -0,0 +1,8 @@ + + + R&d + R + R>d + R'd + R"d +