From a4d97ba8960ca505af775780a4805cfa8ebad290 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Mon, 19 Nov 2018 22:47:34 +0100 Subject: [PATCH] Clean handle charset in XXE scanner --- src/PhpSpreadsheet/Reader/BaseReader.php | 33 ------- src/PhpSpreadsheet/Reader/Gnumeric.php | 13 ++- src/PhpSpreadsheet/Reader/Html.php | 26 ++---- src/PhpSpreadsheet/Reader/Ods.php | 17 ++-- .../Reader/Security/XmlScanner.php | 86 +++++++++++++++++++ src/PhpSpreadsheet/Reader/Xlsx.php | 81 +++++++++-------- src/PhpSpreadsheet/Reader/Xml.php | 9 +- .../Reader/Security/XmlScannerTest.php | 57 ++++++++++++ tests/PhpSpreadsheetTests/Reader/XmlTest.php | 48 ----------- 9 files changed, 225 insertions(+), 145 deletions(-) create mode 100644 src/PhpSpreadsheet/Reader/Security/XmlScanner.php create mode 100644 tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php diff --git a/src/PhpSpreadsheet/Reader/BaseReader.php b/src/PhpSpreadsheet/Reader/BaseReader.php index 0a092d36..59a5a9d8 100644 --- a/src/PhpSpreadsheet/Reader/BaseReader.php +++ b/src/PhpSpreadsheet/Reader/BaseReader.php @@ -221,37 +221,4 @@ abstract class BaseReader implements IReader throw new Exception('Could not open file ' . $pFilename . ' for reading.'); } } - - /** - * Scan theXML for use of securityScan(file_get_contents($filestream)); - } } diff --git a/src/PhpSpreadsheet/Reader/Gnumeric.php b/src/PhpSpreadsheet/Reader/Gnumeric.php index 228fb717..4d7db953 100644 --- a/src/PhpSpreadsheet/Reader/Gnumeric.php +++ b/src/PhpSpreadsheet/Reader/Gnumeric.php @@ -2,6 +2,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader; +use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\NamedRange; @@ -30,6 +31,11 @@ class Gnumeric extends BaseReader private $referenceHelper; + /** + * @var XmlScanner + */ + private $securityScanner; + /** * Create a new Gnumeric. */ @@ -37,6 +43,7 @@ class Gnumeric extends BaseReader { $this->readFilter = new DefaultReadFilter(); $this->referenceHelper = ReferenceHelper::getInstance(); + $this->securityScanner = new XmlScanner(); } /** @@ -77,7 +84,7 @@ class Gnumeric extends BaseReader File::assertFile($pFilename); $xml = new XMLReader(); - $xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); + $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); $xml->setParserProperty(2, true); $worksheetNames = []; @@ -106,7 +113,7 @@ class Gnumeric extends BaseReader File::assertFile($pFilename); $xml = new XMLReader(); - $xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); + $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); $xml->setParserProperty(2, true); $worksheetInfo = []; @@ -196,7 +203,7 @@ class Gnumeric extends BaseReader $gFileData = $this->gzfileGetContents($pFilename); - $xml = simplexml_load_string($this->securityScan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions()); + $xml = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions()); $namespacesMeta = $xml->getNamespaces(true); $gnmXML = $xml->children($namespacesMeta['gnm']); diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index b570fa4a..62153926 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -6,6 +6,7 @@ use DOMDocument; use DOMElement; use DOMNode; use DOMText; +use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Spreadsheet; use PhpOffice\PhpSpreadsheet\Style\Border; @@ -16,6 +17,11 @@ use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; /** PhpSpreadsheet root directory */ class Html extends BaseReader { + /** + * @var XmlScannerscan + */ + private $securityScanner; + /** * Sample size to read to determine if it's HTML or not. */ @@ -105,6 +111,7 @@ class Html extends BaseReader public function __construct() { $this->readFilter = new DefaultReadFilter(); + $this->securityScanner = new XmlScanner('loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); + $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); if ($loaded === false) { throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); } @@ -585,23 +592,6 @@ class Html extends BaseReader return $this; } - /** - * Scan theXML for use of readFilter = new DefaultReadFilter(); + $this->securityScanner = new XmlScanner(); } /** @@ -52,7 +59,7 @@ class Ods extends BaseReader $mimeType = $zip->getFromName($stat['name']); } elseif ($stat = $zip->statName('META-INF/manifest.xml')) { $xml = simplexml_load_string( - $this->securityScan($zip->getFromName('META-INF/manifest.xml')), + $this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -100,7 +107,7 @@ class Ods extends BaseReader $xml = new XMLReader(); $xml->xml( - $this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'), + $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'), null, Settings::getLibXmlLoaderOptions() ); @@ -154,7 +161,7 @@ class Ods extends BaseReader $xml = new XMLReader(); $xml->xml( - $this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'), + $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'), null, Settings::getLibXmlLoaderOptions() ); @@ -267,7 +274,7 @@ class Ods extends BaseReader // Meta $xml = simplexml_load_string( - $this->securityScan($zip->getFromName('meta.xml')), + $this->securityScanner->scan($zip->getFromName('meta.xml')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -367,7 +374,7 @@ class Ods extends BaseReader $dom = new \DOMDocument('1.01', 'UTF-8'); $dom->loadXML( - $this->securityScan($zip->getFromName('content.xml')), + $this->securityScanner->scan($zip->getFromName('content.xml')), Settings::getLibXmlLoaderOptions() ); diff --git a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php new file mode 100644 index 00000000..7bd21402 --- /dev/null +++ b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php @@ -0,0 +1,86 @@ +pattern = $pattern; + $this->libxmlDisableEntityLoader = $this->IdentifyLibxmlDisableEntityLoaderAvailability(); + + if ($this->libxmlDisableEntityLoader) { + libxml_disable_entity_loader(false); + } + } + + private function IdentifyLibxmlDisableEntityLoaderAvailability() + { + if (PHP_MAJOR_VERSION > 7) { + switch (PHP_MINOR_VERSION) { + case 2: + return (PHP_RELEASE_VERSION >= 1); + case 1: + return (PHP_RELEASE_VERSION >= 13); + case 0: + return (PHP_RELEASE_VERSION >= 27); + } + return true; + } + + return false; + } + + /** + * Scan the XML for use of libxmlDisableEntityLoader) { + $pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/'; + if (preg_match($pattern, $xml)) { + throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'); + } + } + + return $xml; + } + + /** + * Scan theXML for use of scan(file_get_contents($filestream)); + } +} diff --git a/src/PhpSpreadsheet/Reader/Xlsx.php b/src/PhpSpreadsheet/Reader/Xlsx.php index 30b6b6cf..91a4804c 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx.php +++ b/src/PhpSpreadsheet/Reader/Xlsx.php @@ -2,6 +2,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader; +use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Hyperlink; use PhpOffice\PhpSpreadsheet\Document\Properties; @@ -46,6 +47,11 @@ class Xlsx extends BaseReader */ private static $theme = null; + /** + * @var XmlScanner + */ + private $securityScanner; + /** * Create a new Xlsx Reader instance. */ @@ -53,6 +59,7 @@ class Xlsx extends BaseReader { $this->readFilter = new DefaultReadFilter(); $this->referenceHelper = ReferenceHelper::getInstance(); + $this->securityScanner = new XmlScanner(); } /** @@ -74,7 +81,7 @@ class Xlsx extends BaseReader if ($zip->open($pFilename) === true) { // check if it is an OOXML archive $rels = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, '_rels/.rels') ), 'SimpleXMLElement', @@ -119,14 +126,14 @@ class Xlsx extends BaseReader // The files we're looking at here are small enough that simpleXML is more efficient than XMLReader //~ http://schemas.openxmlformats.org/package/2006/relationships"); $rels = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')) + $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')) ); foreach ($rels->Relationship as $rel) { switch ($rel['Type']) { case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument': //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlWorkbook = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")) + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")) ); if ($xmlWorkbook->sheets) { @@ -163,7 +170,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $rels = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')), + $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -173,7 +180,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorkbook = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels') ), 'SimpleXMLElement', @@ -190,7 +197,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlWorkbook = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, "{$rel['Target']}") ), 'SimpleXMLElement', @@ -212,7 +219,7 @@ class Xlsx extends BaseReader $xml = new XMLReader(); $xml->xml( - $this->securityScanFile( + $this->securityScanner->scanFile( 'zip://' . File::realpath($pFilename) . '#' . "$dir/$fileWorksheet" ), null, @@ -403,7 +410,7 @@ class Xlsx extends BaseReader // Read the theme first, because we need the colour scheme when reading the styles //~ http://schemas.openxmlformats.org/package/2006/relationships" $wbRels = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, 'xl/_rels/workbook.xml.rels')), + $this->securityScanner->scan($this->getFromZipArchive($zip, 'xl/_rels/workbook.xml.rels')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -414,7 +421,7 @@ class Xlsx extends BaseReader $themeOrderAdditional = count($themeOrderArray); $xmlTheme = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "xl/{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "xl/{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -450,7 +457,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $rels = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')), + $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -458,7 +465,7 @@ class Xlsx extends BaseReader switch ($rel['Type']) { case 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties': $xmlCore = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -481,7 +488,7 @@ class Xlsx extends BaseReader break; case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties': $xmlCore = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -498,7 +505,7 @@ class Xlsx extends BaseReader break; case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties': $xmlCore = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -532,7 +539,7 @@ class Xlsx extends BaseReader $dir = dirname($rel['Target']); //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorkbook = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')), + $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -542,7 +549,7 @@ class Xlsx extends BaseReader $xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings']")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlStrings = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -589,7 +596,7 @@ class Xlsx extends BaseReader $xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlStyles = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -700,7 +707,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlWorkbook = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -752,7 +759,7 @@ class Xlsx extends BaseReader $fileWorksheet = $worksheets[(string) self::getArrayItem($eleSheet->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')]; //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlSheet = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "$dir/$fileWorksheet")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$fileWorksheet")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -1315,7 +1322,7 @@ class Xlsx extends BaseReader if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -1364,7 +1371,7 @@ class Xlsx extends BaseReader if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -1385,7 +1392,7 @@ class Xlsx extends BaseReader // Load comments file $relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath); $commentsFile = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $relPath)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -1415,7 +1422,7 @@ class Xlsx extends BaseReader // Load VML comments file $relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath); $vmlCommentsFile = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $relPath)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -1489,7 +1496,7 @@ class Xlsx extends BaseReader $unparsedVmlDrawing[$rId] = []; $unparsedVmlDrawing[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $relPath); $unparsedVmlDrawing[$rId]['relFilePath'] = $relPath; - $unparsedVmlDrawing[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedVmlDrawing[$rId]['filePath'])); + $unparsedVmlDrawing[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedVmlDrawing[$rId]['filePath'])); unset($unparsedVmlDrawing); } } @@ -1499,7 +1506,7 @@ class Xlsx extends BaseReader if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -1517,7 +1524,7 @@ class Xlsx extends BaseReader // Fetch linked images //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsVML = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname($vmlRelationship) . '/_rels/' . basename($vmlRelationship) . '.rels') ), 'SimpleXMLElement', @@ -1532,7 +1539,7 @@ class Xlsx extends BaseReader // Fetch VML document $vmlDrawing = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $vmlRelationship)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $vmlRelationship)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -1580,7 +1587,7 @@ class Xlsx extends BaseReader if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -1597,7 +1604,7 @@ class Xlsx extends BaseReader $fileDrawing = $drawings[(string) self::getArrayItem($drawing->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')]; //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsDrawing = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname($fileDrawing) . '/_rels/' . basename($fileDrawing) . '.rels') ), 'SimpleXMLElement', @@ -1623,7 +1630,7 @@ class Xlsx extends BaseReader } } $xmlDrawing = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $fileDrawing)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $fileDrawing)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() )->children('http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing'); @@ -1762,7 +1769,7 @@ class Xlsx extends BaseReader // unparsed drawing AlternateContent $xmlAltDrawing = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $fileDrawing)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $fileDrawing)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() )->children('http://schemas.openxmlformats.org/markup-compatibility/2006'); @@ -1981,7 +1988,7 @@ class Xlsx extends BaseReader if (!$this->readDataOnly) { $contentTypes = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, '[Content_Types].xml') ), 'SimpleXMLElement', @@ -2005,7 +2012,7 @@ class Xlsx extends BaseReader if ($this->includeCharts) { $chartEntryRef = ltrim($contentType['PartName'], '/'); $chartElements = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, $chartEntryRef) ), 'SimpleXMLElement', @@ -2292,7 +2299,7 @@ class Xlsx extends BaseReader if ($dataRels) { // exists and not empty if the ribbon have some pictures (other than internal MSO) $UIRels = simplexml_load_string( - $this->securityScan($dataRels), + $this->securityScanner->scan($dataRels), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -2429,7 +2436,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -2448,7 +2455,7 @@ class Xlsx extends BaseReader $unparsedCtrlProps[$rId] = []; $unparsedCtrlProps[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $ctrlProp['Target']); $unparsedCtrlProps[$rId]['relFilePath'] = (string) $ctrlProp['Target']; - $unparsedCtrlProps[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedCtrlProps[$rId]['filePath'])); + $unparsedCtrlProps[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedCtrlProps[$rId]['filePath'])); } unset($unparsedCtrlProps); } @@ -2461,7 +2468,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -2480,7 +2487,7 @@ class Xlsx extends BaseReader $unparsedPrinterSettings[$rId] = []; $unparsedPrinterSettings[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $printerSettings['Target']); $unparsedPrinterSettings[$rId]['relFilePath'] = (string) $printerSettings['Target']; - $unparsedPrinterSettings[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedPrinterSettings[$rId]['filePath'])); + $unparsedPrinterSettings[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedPrinterSettings[$rId]['filePath'])); } unset($unparsedPrinterSettings); } diff --git a/src/PhpSpreadsheet/Reader/Xml.php b/src/PhpSpreadsheet/Reader/Xml.php index 65babc38..7704232f 100644 --- a/src/PhpSpreadsheet/Reader/Xml.php +++ b/src/PhpSpreadsheet/Reader/Xml.php @@ -2,6 +2,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader; +use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\Document\Properties; @@ -35,12 +36,18 @@ class Xml extends BaseReader */ protected $charSet = 'UTF-8'; + /** + * @var XmlScanner + */ + private $securityScanner; + /** * Create a new Excel2003XML Reader instance. */ public function __construct() { $this->readFilter = new DefaultReadFilter(); + $this->securityScanner = new XmlScanner(); } /** @@ -109,7 +116,7 @@ class Xml extends BaseReader { try { $xml = simplexml_load_string( - $this->securityScan(file_get_contents($pFilename)), + $this->securityScanner->scan(file_get_contents($pFilename)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); diff --git a/tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php b/tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php new file mode 100644 index 00000000..8819b35d --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php @@ -0,0 +1,57 @@ +scanFile($filename); + self::assertEquals($expectedResult, $result); + } + + public function providerValidXML() + { + $tests = []; + foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestValid*.xml') as $file) { + $tests[basename($file)] = [realpath($file), file_get_contents($file)]; + } + + return $tests; + } + + + /** + * @dataProvider providerInvalidXML + * + * @param mixed $filename + */ + public function testInvalidXML($filename) + { + $this->expectException(\PhpOffice\PhpSpreadsheet\Reader\Exception::class); + + $reader = new XmlScanner(); + $expectedResult = 'FAILURE: Should throw an Exception rather than return a value'; + $result = $reader->scanFile($filename); + self::assertEquals($expectedResult, $result); + } + + public function providerInvalidXML() + { + $tests = []; + foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestInvalidUTF*.xml') as $file) { + $tests[basename($file)] = [realpath($file)]; + } + + return $tests; + } +} \ No newline at end of file diff --git a/tests/PhpSpreadsheetTests/Reader/XmlTest.php b/tests/PhpSpreadsheetTests/Reader/XmlTest.php index 43240481..e275735e 100644 --- a/tests/PhpSpreadsheetTests/Reader/XmlTest.php +++ b/tests/PhpSpreadsheetTests/Reader/XmlTest.php @@ -9,31 +9,6 @@ use PHPUnit\Framework\TestCase; class XmlTest extends TestCase { - /** - * @dataProvider providerInvalidXML - * - * @param mixed $filename - */ - public function testInvalidXML($filename) - { - $this->expectException(\PhpOffice\PhpSpreadsheet\Reader\Exception::class); - - $reader = $this->getMockForAbstractClass(BaseReader::class); - $expectedResult = 'FAILURE: Should throw an Exception rather than return a value'; - $result = $reader->securityScanFile($filename); - self::assertEquals($expectedResult, $result); - } - - public function providerInvalidXML() - { - $tests = []; - foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestInvalidUTF*.xml') as $file) { - $tests[basename($file)] = [realpath($file)]; - } - - return $tests; - } - /** * @dataProvider providerInvalidSimpleXML * @@ -57,29 +32,6 @@ class XmlTest extends TestCase return $tests; } - /** - * @dataProvider providerValidXML - * - * @param mixed $filename - * @param mixed $expectedResult - */ - public function testValidXML($filename, $expectedResult) - { - $reader = $this->getMockForAbstractClass(BaseReader::class); - $result = $reader->securityScanFile($filename); - self::assertEquals($expectedResult, $result); - } - - public function providerValidXML() - { - $tests = []; - foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestValid*.xml') as $file) { - $tests[basename($file)] = [realpath($file), file_get_contents($file)]; - } - - return $tests; - } - /** * Check if it can read XML Hyperlink correctly. */