From 0f8f071e24ee8b114d894ac172f77dc250e5bfa4 Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Tue, 20 Nov 2018 20:39:13 +0100 Subject: [PATCH 1/3] WIP: Xxe (#780) Changes to the xml security scanner to use libxml_disable_entity_loader() when cleanly supported and thread-safe, and to handle UTF-7 charset which otherwise permits an XXE exploit --- composer.lock | 2 +- src/PhpSpreadsheet/Reader/BaseReader.php | 33 ------- src/PhpSpreadsheet/Reader/Gnumeric.php | 13 ++- src/PhpSpreadsheet/Reader/Html.php | 26 ++---- src/PhpSpreadsheet/Reader/Ods.php | 17 ++-- .../Reader/Security/XmlScanner.php | 87 +++++++++++++++++++ src/PhpSpreadsheet/Reader/Xlsx.php | 81 +++++++++-------- src/PhpSpreadsheet/Reader/Xml.php | 9 +- .../Reader/Security/XmlScannerTest.php | 57 ++++++++++++ tests/PhpSpreadsheetTests/Reader/XmlTest.php | 49 ----------- tests/data/Reader/Xml/XEETestInvalidUTF-7.xml | 2 + 11 files changed, 229 insertions(+), 147 deletions(-) create mode 100644 src/PhpSpreadsheet/Reader/Security/XmlScanner.php create mode 100644 tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php create mode 100644 tests/data/Reader/Xml/XEETestInvalidUTF-7.xml diff --git a/composer.lock b/composer.lock index a43e910c..20d4273d 100644 --- a/composer.lock +++ b/composer.lock @@ -3137,4 +3137,4 @@ "ext-zlib": "*" }, "platform-dev": [] -} +} \ No newline at end of file diff --git a/src/PhpSpreadsheet/Reader/BaseReader.php b/src/PhpSpreadsheet/Reader/BaseReader.php index 0a092d36..59a5a9d8 100644 --- a/src/PhpSpreadsheet/Reader/BaseReader.php +++ b/src/PhpSpreadsheet/Reader/BaseReader.php @@ -221,37 +221,4 @@ abstract class BaseReader implements IReader throw new Exception('Could not open file ' . $pFilename . ' for reading.'); } } - - /** - * Scan theXML for use of securityScan(file_get_contents($filestream)); - } } diff --git a/src/PhpSpreadsheet/Reader/Gnumeric.php b/src/PhpSpreadsheet/Reader/Gnumeric.php index 228fb717..a726599e 100644 --- a/src/PhpSpreadsheet/Reader/Gnumeric.php +++ b/src/PhpSpreadsheet/Reader/Gnumeric.php @@ -5,6 +5,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\NamedRange; +use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\ReferenceHelper; use PhpOffice\PhpSpreadsheet\RichText\RichText; use PhpOffice\PhpSpreadsheet\Settings; @@ -30,6 +31,11 @@ class Gnumeric extends BaseReader private $referenceHelper; + /** + * @var XmlScanner + */ + private $securityScanner; + /** * Create a new Gnumeric. */ @@ -37,6 +43,7 @@ class Gnumeric extends BaseReader { $this->readFilter = new DefaultReadFilter(); $this->referenceHelper = ReferenceHelper::getInstance(); + $this->securityScanner = new XmlScanner(); } /** @@ -77,7 +84,7 @@ class Gnumeric extends BaseReader File::assertFile($pFilename); $xml = new XMLReader(); - $xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); + $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); $xml->setParserProperty(2, true); $worksheetNames = []; @@ -106,7 +113,7 @@ class Gnumeric extends BaseReader File::assertFile($pFilename); $xml = new XMLReader(); - $xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); + $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); $xml->setParserProperty(2, true); $worksheetInfo = []; @@ -196,7 +203,7 @@ class Gnumeric extends BaseReader $gFileData = $this->gzfileGetContents($pFilename); - $xml = simplexml_load_string($this->securityScan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions()); + $xml = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions()); $namespacesMeta = $xml->getNamespaces(true); $gnmXML = $xml->children($namespacesMeta['gnm']); diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index b570fa4a..fc09f1be 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -7,6 +7,7 @@ use DOMElement; use DOMNode; use DOMText; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; +use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Spreadsheet; use PhpOffice\PhpSpreadsheet\Style\Border; use PhpOffice\PhpSpreadsheet\Style\Color; @@ -16,6 +17,11 @@ use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; /** PhpSpreadsheet root directory */ class Html extends BaseReader { + /** + * @var XmlScanner + */ + private $securityScanner; + /** * Sample size to read to determine if it's HTML or not. */ @@ -105,6 +111,7 @@ class Html extends BaseReader public function __construct() { $this->readFilter = new DefaultReadFilter(); + $this->securityScanner = new XmlScanner('loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); + $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); if ($loaded === false) { throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); } @@ -585,23 +592,6 @@ class Html extends BaseReader return $this; } - /** - * Scan theXML for use of readFilter = new DefaultReadFilter(); + $this->securityScanner = new XmlScanner(); } /** @@ -52,7 +59,7 @@ class Ods extends BaseReader $mimeType = $zip->getFromName($stat['name']); } elseif ($stat = $zip->statName('META-INF/manifest.xml')) { $xml = simplexml_load_string( - $this->securityScan($zip->getFromName('META-INF/manifest.xml')), + $this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -100,7 +107,7 @@ class Ods extends BaseReader $xml = new XMLReader(); $xml->xml( - $this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'), + $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'), null, Settings::getLibXmlLoaderOptions() ); @@ -154,7 +161,7 @@ class Ods extends BaseReader $xml = new XMLReader(); $xml->xml( - $this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'), + $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'), null, Settings::getLibXmlLoaderOptions() ); @@ -267,7 +274,7 @@ class Ods extends BaseReader // Meta $xml = simplexml_load_string( - $this->securityScan($zip->getFromName('meta.xml')), + $this->securityScanner->scan($zip->getFromName('meta.xml')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -367,7 +374,7 @@ class Ods extends BaseReader $dom = new \DOMDocument('1.01', 'UTF-8'); $dom->loadXML( - $this->securityScan($zip->getFromName('content.xml')), + $this->securityScanner->scan($zip->getFromName('content.xml')), Settings::getLibXmlLoaderOptions() ); diff --git a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php new file mode 100644 index 00000000..eefd7c7d --- /dev/null +++ b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php @@ -0,0 +1,87 @@ +pattern = $pattern; + $this->libxmlDisableEntityLoader = $this->identifyLibxmlDisableEntityLoaderAvailability(); + + if ($this->libxmlDisableEntityLoader) { + libxml_disable_entity_loader(true); + } + } + + private function identifyLibxmlDisableEntityLoaderAvailability() + { + if (PHP_MAJOR_VERSION == 7) { + switch (PHP_MINOR_VERSION) { + case 2: + return PHP_RELEASE_VERSION >= 1; + case 1: + return PHP_RELEASE_VERSION >= 13; + case 0: + return PHP_RELEASE_VERSION >= 27; + } + + return true; + } + + return false; + } + + /** + * Scan the XML for use of pattern)) . '\\0?/'; + if (preg_match($pattern, $xml)) { + throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'); + } + + return $xml; + } + + /** + * Scan theXML for use of scan(file_get_contents($filestream)); + } +} diff --git a/src/PhpSpreadsheet/Reader/Xlsx.php b/src/PhpSpreadsheet/Reader/Xlsx.php index 30b6b6cf..b1e076a8 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx.php +++ b/src/PhpSpreadsheet/Reader/Xlsx.php @@ -6,6 +6,7 @@ use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Hyperlink; use PhpOffice\PhpSpreadsheet\Document\Properties; use PhpOffice\PhpSpreadsheet\NamedRange; +use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Reader\Xlsx\Chart; use PhpOffice\PhpSpreadsheet\ReferenceHelper; use PhpOffice\PhpSpreadsheet\RichText\RichText; @@ -46,6 +47,11 @@ class Xlsx extends BaseReader */ private static $theme = null; + /** + * @var XmlScanner + */ + private $securityScanner; + /** * Create a new Xlsx Reader instance. */ @@ -53,6 +59,7 @@ class Xlsx extends BaseReader { $this->readFilter = new DefaultReadFilter(); $this->referenceHelper = ReferenceHelper::getInstance(); + $this->securityScanner = new XmlScanner(); } /** @@ -74,7 +81,7 @@ class Xlsx extends BaseReader if ($zip->open($pFilename) === true) { // check if it is an OOXML archive $rels = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, '_rels/.rels') ), 'SimpleXMLElement', @@ -119,14 +126,14 @@ class Xlsx extends BaseReader // The files we're looking at here are small enough that simpleXML is more efficient than XMLReader //~ http://schemas.openxmlformats.org/package/2006/relationships"); $rels = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')) + $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')) ); foreach ($rels->Relationship as $rel) { switch ($rel['Type']) { case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument': //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlWorkbook = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")) + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")) ); if ($xmlWorkbook->sheets) { @@ -163,7 +170,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $rels = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')), + $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -173,7 +180,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorkbook = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels') ), 'SimpleXMLElement', @@ -190,7 +197,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlWorkbook = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, "{$rel['Target']}") ), 'SimpleXMLElement', @@ -212,7 +219,7 @@ class Xlsx extends BaseReader $xml = new XMLReader(); $xml->xml( - $this->securityScanFile( + $this->securityScanner->scanFile( 'zip://' . File::realpath($pFilename) . '#' . "$dir/$fileWorksheet" ), null, @@ -403,7 +410,7 @@ class Xlsx extends BaseReader // Read the theme first, because we need the colour scheme when reading the styles //~ http://schemas.openxmlformats.org/package/2006/relationships" $wbRels = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, 'xl/_rels/workbook.xml.rels')), + $this->securityScanner->scan($this->getFromZipArchive($zip, 'xl/_rels/workbook.xml.rels')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -414,7 +421,7 @@ class Xlsx extends BaseReader $themeOrderAdditional = count($themeOrderArray); $xmlTheme = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "xl/{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "xl/{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -450,7 +457,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $rels = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')), + $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -458,7 +465,7 @@ class Xlsx extends BaseReader switch ($rel['Type']) { case 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties': $xmlCore = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -481,7 +488,7 @@ class Xlsx extends BaseReader break; case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties': $xmlCore = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -498,7 +505,7 @@ class Xlsx extends BaseReader break; case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties': $xmlCore = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -532,7 +539,7 @@ class Xlsx extends BaseReader $dir = dirname($rel['Target']); //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorkbook = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')), + $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -542,7 +549,7 @@ class Xlsx extends BaseReader $xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings']")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlStrings = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -589,7 +596,7 @@ class Xlsx extends BaseReader $xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlStyles = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -700,7 +707,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlWorkbook = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -752,7 +759,7 @@ class Xlsx extends BaseReader $fileWorksheet = $worksheets[(string) self::getArrayItem($eleSheet->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')]; //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" $xmlSheet = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, "$dir/$fileWorksheet")), + $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$fileWorksheet")), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -1315,7 +1322,7 @@ class Xlsx extends BaseReader if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -1364,7 +1371,7 @@ class Xlsx extends BaseReader if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -1385,7 +1392,7 @@ class Xlsx extends BaseReader // Load comments file $relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath); $commentsFile = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $relPath)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -1415,7 +1422,7 @@ class Xlsx extends BaseReader // Load VML comments file $relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath); $vmlCommentsFile = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $relPath)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -1489,7 +1496,7 @@ class Xlsx extends BaseReader $unparsedVmlDrawing[$rId] = []; $unparsedVmlDrawing[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $relPath); $unparsedVmlDrawing[$rId]['relFilePath'] = $relPath; - $unparsedVmlDrawing[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedVmlDrawing[$rId]['filePath'])); + $unparsedVmlDrawing[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedVmlDrawing[$rId]['filePath'])); unset($unparsedVmlDrawing); } } @@ -1499,7 +1506,7 @@ class Xlsx extends BaseReader if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -1517,7 +1524,7 @@ class Xlsx extends BaseReader // Fetch linked images //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsVML = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname($vmlRelationship) . '/_rels/' . basename($vmlRelationship) . '.rels') ), 'SimpleXMLElement', @@ -1532,7 +1539,7 @@ class Xlsx extends BaseReader // Fetch VML document $vmlDrawing = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $vmlRelationship)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $vmlRelationship)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -1580,7 +1587,7 @@ class Xlsx extends BaseReader if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -1597,7 +1604,7 @@ class Xlsx extends BaseReader $fileDrawing = $drawings[(string) self::getArrayItem($drawing->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')]; //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsDrawing = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname($fileDrawing) . '/_rels/' . basename($fileDrawing) . '.rels') ), 'SimpleXMLElement', @@ -1623,7 +1630,7 @@ class Xlsx extends BaseReader } } $xmlDrawing = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $fileDrawing)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $fileDrawing)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() )->children('http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing'); @@ -1762,7 +1769,7 @@ class Xlsx extends BaseReader // unparsed drawing AlternateContent $xmlAltDrawing = simplexml_load_string( - $this->securityScan($this->getFromZipArchive($zip, $fileDrawing)), + $this->securityScanner->scan($this->getFromZipArchive($zip, $fileDrawing)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() )->children('http://schemas.openxmlformats.org/markup-compatibility/2006'); @@ -1981,7 +1988,7 @@ class Xlsx extends BaseReader if (!$this->readDataOnly) { $contentTypes = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, '[Content_Types].xml') ), 'SimpleXMLElement', @@ -2005,7 +2012,7 @@ class Xlsx extends BaseReader if ($this->includeCharts) { $chartEntryRef = ltrim($contentType['PartName'], '/'); $chartElements = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, $chartEntryRef) ), 'SimpleXMLElement', @@ -2292,7 +2299,7 @@ class Xlsx extends BaseReader if ($dataRels) { // exists and not empty if the ribbon have some pictures (other than internal MSO) $UIRels = simplexml_load_string( - $this->securityScan($dataRels), + $this->securityScanner->scan($dataRels), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -2429,7 +2436,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -2448,7 +2455,7 @@ class Xlsx extends BaseReader $unparsedCtrlProps[$rId] = []; $unparsedCtrlProps[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $ctrlProp['Target']); $unparsedCtrlProps[$rId]['relFilePath'] = (string) $ctrlProp['Target']; - $unparsedCtrlProps[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedCtrlProps[$rId]['filePath'])); + $unparsedCtrlProps[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedCtrlProps[$rId]['filePath'])); } unset($unparsedCtrlProps); } @@ -2461,7 +2468,7 @@ class Xlsx extends BaseReader //~ http://schemas.openxmlformats.org/package/2006/relationships" $relsWorksheet = simplexml_load_string( - $this->securityScan( + $this->securityScanner->scan( $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') ), 'SimpleXMLElement', @@ -2480,7 +2487,7 @@ class Xlsx extends BaseReader $unparsedPrinterSettings[$rId] = []; $unparsedPrinterSettings[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $printerSettings['Target']); $unparsedPrinterSettings[$rId]['relFilePath'] = (string) $printerSettings['Target']; - $unparsedPrinterSettings[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedPrinterSettings[$rId]['filePath'])); + $unparsedPrinterSettings[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedPrinterSettings[$rId]['filePath'])); } unset($unparsedPrinterSettings); } diff --git a/src/PhpSpreadsheet/Reader/Xml.php b/src/PhpSpreadsheet/Reader/Xml.php index 65babc38..9e982f2a 100644 --- a/src/PhpSpreadsheet/Reader/Xml.php +++ b/src/PhpSpreadsheet/Reader/Xml.php @@ -5,6 +5,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\Document\Properties; +use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\RichText\RichText; use PhpOffice\PhpSpreadsheet\Settings; use PhpOffice\PhpSpreadsheet\Shared\Date; @@ -35,12 +36,18 @@ class Xml extends BaseReader */ protected $charSet = 'UTF-8'; + /** + * @var XmlScanner + */ + private $securityScanner; + /** * Create a new Excel2003XML Reader instance. */ public function __construct() { $this->readFilter = new DefaultReadFilter(); + $this->securityScanner = new XmlScanner(); } /** @@ -109,7 +116,7 @@ class Xml extends BaseReader { try { $xml = simplexml_load_string( - $this->securityScan(file_get_contents($pFilename)), + $this->securityScanner->scan(file_get_contents($pFilename)), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); diff --git a/tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php b/tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php new file mode 100644 index 00000000..b914de08 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php @@ -0,0 +1,57 @@ +scanFile($filename); + self::assertEquals($expectedResult, $result); + } + + public function providerValidXML() + { + $tests = []; + foreach (glob(__DIR__ . '/../../../data/Reader/Xml/XEETestValid*.xml') as $file) { + $tests[basename($file)] = [realpath($file), file_get_contents($file)]; + } + + return $tests; + } + + /** + * @dataProvider providerInvalidXML + * + * @param mixed $filename + */ + public function testInvalidXML($filename) + { + $this->expectException(\PhpOffice\PhpSpreadsheet\Reader\Exception::class); + + $reader = new XmlScanner(); + $expectedResult = 'FAILURE: Should throw an Exception rather than return a value'; + $result = $reader->scanFile($filename); + self::assertEquals($expectedResult, $result); + } + + public function providerInvalidXML() + { + $tests = []; + foreach (glob(__DIR__ . '/../../../data/Reader/Xml/XEETestInvalidUTF*.xml') as $file) { + $tests[basename($file)] = [realpath($file)]; + } + + return $tests; + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/XmlTest.php b/tests/PhpSpreadsheetTests/Reader/XmlTest.php index 43240481..f085f437 100644 --- a/tests/PhpSpreadsheetTests/Reader/XmlTest.php +++ b/tests/PhpSpreadsheetTests/Reader/XmlTest.php @@ -3,37 +3,11 @@ namespace PhpOffice\PhpSpreadsheetTests\Reader; use PhpOffice\PhpSpreadsheet\Cell\DataType; -use PhpOffice\PhpSpreadsheet\Reader\BaseReader; use PhpOffice\PhpSpreadsheet\Reader\Xml; use PHPUnit\Framework\TestCase; class XmlTest extends TestCase { - /** - * @dataProvider providerInvalidXML - * - * @param mixed $filename - */ - public function testInvalidXML($filename) - { - $this->expectException(\PhpOffice\PhpSpreadsheet\Reader\Exception::class); - - $reader = $this->getMockForAbstractClass(BaseReader::class); - $expectedResult = 'FAILURE: Should throw an Exception rather than return a value'; - $result = $reader->securityScanFile($filename); - self::assertEquals($expectedResult, $result); - } - - public function providerInvalidXML() - { - $tests = []; - foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestInvalidUTF*.xml') as $file) { - $tests[basename($file)] = [realpath($file)]; - } - - return $tests; - } - /** * @dataProvider providerInvalidSimpleXML * @@ -57,29 +31,6 @@ class XmlTest extends TestCase return $tests; } - /** - * @dataProvider providerValidXML - * - * @param mixed $filename - * @param mixed $expectedResult - */ - public function testValidXML($filename, $expectedResult) - { - $reader = $this->getMockForAbstractClass(BaseReader::class); - $result = $reader->securityScanFile($filename); - self::assertEquals($expectedResult, $result); - } - - public function providerValidXML() - { - $tests = []; - foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestValid*.xml') as $file) { - $tests[basename($file)] = [realpath($file), file_get_contents($file)]; - } - - return $tests; - } - /** * Check if it can read XML Hyperlink correctly. */ diff --git a/tests/data/Reader/Xml/XEETestInvalidUTF-7.xml b/tests/data/Reader/Xml/XEETestInvalidUTF-7.xml new file mode 100644 index 00000000..0b791d42 --- /dev/null +++ b/tests/data/Reader/Xml/XEETestInvalidUTF-7.xml @@ -0,0 +1,2 @@ + ++ADwAIQ-DOCTYPE xmlrootname +AFsAPAAh-ENTITY +ACU aaa SYSTEM +ACI-http://127.0.0.1:8080/ext.dtd+ACIAPgAl-aaa+ADsAJQ-ccc+ADsAJQ-ddd+ADsAXQA+ \ No newline at end of file From 7f46932b2fabbaaf5a70e67cacc472a50b95cbb9 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Tue, 20 Nov 2018 20:51:42 +0100 Subject: [PATCH 2/3] Update Changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60f3ef63..e1323f66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com) and this project adheres to [Semantic Versioning](https://semver.org). -## [Unreleased] +## [1.5.1] - 2018-11-20 + +### Security + +- Fix and improve XXE security scanning for XML-based and HTML Readers - [#771](https://github.com/PHPOffice/PhpSpreadsheet/issues/771) ### Added From f5d1f03e94973654078140a4a35838cc94be6827 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Tue, 20 Nov 2018 20:57:38 +0100 Subject: [PATCH 3/3] Update Changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1323f66..ddcd4860 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com) and this project adheres to [Semantic Versioning](https://semver.org). +## [Unreleased] + ## [1.5.1] - 2018-11-20 ### Security