Clean handle charset in XXE scanner

This commit is contained in:
MarkBaker 2018-11-19 22:47:34 +01:00
parent 3bea6f516b
commit a4d97ba896
9 changed files with 225 additions and 145 deletions

View File

@ -221,37 +221,4 @@ abstract class BaseReader implements IReader
throw new Exception('Could not open file ' . $pFilename . ' for reading.'); throw new Exception('Could not open file ' . $pFilename . ' for reading.');
} }
} }
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $xml
*
* @throws Exception
*
* @return string
*/
public function securityScan($xml)
{
$pattern = '/\\0?' . implode('\\0?', str_split('<!DOCTYPE')) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
return $xml;
}
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $filestream
*
* @throws Exception
*
* @return string
*/
public function securityScanFile($filestream)
{
return $this->securityScan(file_get_contents($filestream));
}
} }

View File

@ -2,6 +2,7 @@
namespace PhpOffice\PhpSpreadsheet\Reader; namespace PhpOffice\PhpSpreadsheet\Reader;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\NamedRange; use PhpOffice\PhpSpreadsheet\NamedRange;
@ -30,6 +31,11 @@ class Gnumeric extends BaseReader
private $referenceHelper; private $referenceHelper;
/**
* @var XmlScanner
*/
private $securityScanner;
/** /**
* Create a new Gnumeric. * Create a new Gnumeric.
*/ */
@ -37,6 +43,7 @@ class Gnumeric extends BaseReader
{ {
$this->readFilter = new DefaultReadFilter(); $this->readFilter = new DefaultReadFilter();
$this->referenceHelper = ReferenceHelper::getInstance(); $this->referenceHelper = ReferenceHelper::getInstance();
$this->securityScanner = new XmlScanner();
} }
/** /**
@ -77,7 +84,7 @@ class Gnumeric extends BaseReader
File::assertFile($pFilename); File::assertFile($pFilename);
$xml = new XMLReader(); $xml = new XMLReader();
$xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->setParserProperty(2, true); $xml->setParserProperty(2, true);
$worksheetNames = []; $worksheetNames = [];
@ -106,7 +113,7 @@ class Gnumeric extends BaseReader
File::assertFile($pFilename); File::assertFile($pFilename);
$xml = new XMLReader(); $xml = new XMLReader();
$xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions()); $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->setParserProperty(2, true); $xml->setParserProperty(2, true);
$worksheetInfo = []; $worksheetInfo = [];
@ -196,7 +203,7 @@ class Gnumeric extends BaseReader
$gFileData = $this->gzfileGetContents($pFilename); $gFileData = $this->gzfileGetContents($pFilename);
$xml = simplexml_load_string($this->securityScan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions()); $xml = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
$namespacesMeta = $xml->getNamespaces(true); $namespacesMeta = $xml->getNamespaces(true);
$gnmXML = $xml->children($namespacesMeta['gnm']); $gnmXML = $xml->children($namespacesMeta['gnm']);

View File

@ -6,6 +6,7 @@ use DOMDocument;
use DOMElement; use DOMElement;
use DOMNode; use DOMNode;
use DOMText; use DOMText;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Spreadsheet; use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Style\Border; use PhpOffice\PhpSpreadsheet\Style\Border;
@ -16,6 +17,11 @@ use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
/** PhpSpreadsheet root directory */ /** PhpSpreadsheet root directory */
class Html extends BaseReader class Html extends BaseReader
{ {
/**
* @var XmlScannerscan
*/
private $securityScanner;
/** /**
* Sample size to read to determine if it's HTML or not. * Sample size to read to determine if it's HTML or not.
*/ */
@ -105,6 +111,7 @@ class Html extends BaseReader
public function __construct() public function __construct()
{ {
$this->readFilter = new DefaultReadFilter(); $this->readFilter = new DefaultReadFilter();
$this->securityScanner = new XmlScanner('<!ENTITY');
} }
/** /**
@ -543,7 +550,7 @@ class Html extends BaseReader
// Create a new DOM object // Create a new DOM object
$dom = new DOMDocument(); $dom = new DOMDocument();
// Reload the HTML file into the DOM object // Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
if ($loaded === false) { if ($loaded === false) {
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
} }
@ -585,23 +592,6 @@ class Html extends BaseReader
return $this; return $this;
} }
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $xml
*
* @return string
*/
public function securityScan($xml)
{
$pattern = '/\\0?' . implode('\\0?', str_split('<!ENTITY')) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
return $xml;
}
/** /**
* Apply inline css inline style. * Apply inline css inline style.
* *

View File

@ -4,6 +4,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader;
use DateTime; use DateTime;
use DateTimeZone; use DateTimeZone;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Calculation\Calculation; use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\Cell\DataType;
@ -19,12 +20,18 @@ use ZipArchive;
class Ods extends BaseReader class Ods extends BaseReader
{ {
/**
* @var XmlScanner
*/
private $securityScanner;
/** /**
* Create a new Ods Reader instance. * Create a new Ods Reader instance.
*/ */
public function __construct() public function __construct()
{ {
$this->readFilter = new DefaultReadFilter(); $this->readFilter = new DefaultReadFilter();
$this->securityScanner = new XmlScanner();
} }
/** /**
@ -52,7 +59,7 @@ class Ods extends BaseReader
$mimeType = $zip->getFromName($stat['name']); $mimeType = $zip->getFromName($stat['name']);
} elseif ($stat = $zip->statName('META-INF/manifest.xml')) { } elseif ($stat = $zip->statName('META-INF/manifest.xml')) {
$xml = simplexml_load_string( $xml = simplexml_load_string(
$this->securityScan($zip->getFromName('META-INF/manifest.xml')), $this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -100,7 +107,7 @@ class Ods extends BaseReader
$xml = new XMLReader(); $xml = new XMLReader();
$xml->xml( $xml->xml(
$this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'), $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
null, null,
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -154,7 +161,7 @@ class Ods extends BaseReader
$xml = new XMLReader(); $xml = new XMLReader();
$xml->xml( $xml->xml(
$this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'), $this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
null, null,
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -267,7 +274,7 @@ class Ods extends BaseReader
// Meta // Meta
$xml = simplexml_load_string( $xml = simplexml_load_string(
$this->securityScan($zip->getFromName('meta.xml')), $this->securityScanner->scan($zip->getFromName('meta.xml')),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -367,7 +374,7 @@ class Ods extends BaseReader
$dom = new \DOMDocument('1.01', 'UTF-8'); $dom = new \DOMDocument('1.01', 'UTF-8');
$dom->loadXML( $dom->loadXML(
$this->securityScan($zip->getFromName('content.xml')), $this->securityScanner->scan($zip->getFromName('content.xml')),
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );

View File

@ -0,0 +1,86 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Reader\Security;
use PhpOffice\PhpSpreadsheet\Reader\Exception;
class XmlScanner
{
/**
* Identifies whether the thread-safe libxmlDisableEntityLoader() function is available
*
* @var bool
*/
private $libxmlDisableEntityLoader = false;
private $pattern;
public function __construct($pattern = '<!DOCTYPE')
{
$this->pattern = $pattern;
$this->libxmlDisableEntityLoader = $this->IdentifyLibxmlDisableEntityLoaderAvailability();
if ($this->libxmlDisableEntityLoader) {
libxml_disable_entity_loader(false);
}
}
private function IdentifyLibxmlDisableEntityLoaderAvailability()
{
if (PHP_MAJOR_VERSION > 7) {
switch (PHP_MINOR_VERSION) {
case 2:
return (PHP_RELEASE_VERSION >= 1);
case 1:
return (PHP_RELEASE_VERSION >= 13);
case 0:
return (PHP_RELEASE_VERSION >= 27);
}
return true;
}
return false;
}
/**
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $filestream
*
* @throws Exception
*
* @return string
*/
public function scan($xml) {
$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = $result ? $matches[1] : 'UTF-8';
if ($charset !== 'UTF-8') {
$xml = mb_convert_encoding($xml, 'UTF-8', $charset);
}
if (!$this->libxmlDisableEntityLoader) {
$pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
}
return $xml;
}
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $filestream
*
* @throws Exception
*
* @return string
*/
public function scanFile($filestream)
{
return $this->scan(file_get_contents($filestream));
}
}

View File

@ -2,6 +2,7 @@
namespace PhpOffice\PhpSpreadsheet\Reader; namespace PhpOffice\PhpSpreadsheet\Reader;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\Hyperlink; use PhpOffice\PhpSpreadsheet\Cell\Hyperlink;
use PhpOffice\PhpSpreadsheet\Document\Properties; use PhpOffice\PhpSpreadsheet\Document\Properties;
@ -46,6 +47,11 @@ class Xlsx extends BaseReader
*/ */
private static $theme = null; private static $theme = null;
/**
* @var XmlScanner
*/
private $securityScanner;
/** /**
* Create a new Xlsx Reader instance. * Create a new Xlsx Reader instance.
*/ */
@ -53,6 +59,7 @@ class Xlsx extends BaseReader
{ {
$this->readFilter = new DefaultReadFilter(); $this->readFilter = new DefaultReadFilter();
$this->referenceHelper = ReferenceHelper::getInstance(); $this->referenceHelper = ReferenceHelper::getInstance();
$this->securityScanner = new XmlScanner();
} }
/** /**
@ -74,7 +81,7 @@ class Xlsx extends BaseReader
if ($zip->open($pFilename) === true) { if ($zip->open($pFilename) === true) {
// check if it is an OOXML archive // check if it is an OOXML archive
$rels = simplexml_load_string( $rels = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, '_rels/.rels') $this->getFromZipArchive($zip, '_rels/.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -119,14 +126,14 @@ class Xlsx extends BaseReader
// The files we're looking at here are small enough that simpleXML is more efficient than XMLReader // The files we're looking at here are small enough that simpleXML is more efficient than XMLReader
//~ http://schemas.openxmlformats.org/package/2006/relationships"); //~ http://schemas.openxmlformats.org/package/2006/relationships");
$rels = simplexml_load_string( $rels = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')) $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels'))
); );
foreach ($rels->Relationship as $rel) { foreach ($rels->Relationship as $rel) {
switch ($rel['Type']) { switch ($rel['Type']) {
case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument': case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument':
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlWorkbook = simplexml_load_string( $xmlWorkbook = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")) $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}"))
); );
if ($xmlWorkbook->sheets) { if ($xmlWorkbook->sheets) {
@ -163,7 +170,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$rels = simplexml_load_string( $rels = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')), $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -173,7 +180,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorkbook = simplexml_load_string( $relsWorkbook = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels') $this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -190,7 +197,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlWorkbook = simplexml_load_string( $xmlWorkbook = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, "{$rel['Target']}") $this->getFromZipArchive($zip, "{$rel['Target']}")
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -212,7 +219,7 @@ class Xlsx extends BaseReader
$xml = new XMLReader(); $xml = new XMLReader();
$xml->xml( $xml->xml(
$this->securityScanFile( $this->securityScanner->scanFile(
'zip://' . File::realpath($pFilename) . '#' . "$dir/$fileWorksheet" 'zip://' . File::realpath($pFilename) . '#' . "$dir/$fileWorksheet"
), ),
null, null,
@ -403,7 +410,7 @@ class Xlsx extends BaseReader
// Read the theme first, because we need the colour scheme when reading the styles // Read the theme first, because we need the colour scheme when reading the styles
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$wbRels = simplexml_load_string( $wbRels = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, 'xl/_rels/workbook.xml.rels')), $this->securityScanner->scan($this->getFromZipArchive($zip, 'xl/_rels/workbook.xml.rels')),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -414,7 +421,7 @@ class Xlsx extends BaseReader
$themeOrderAdditional = count($themeOrderArray); $themeOrderAdditional = count($themeOrderArray);
$xmlTheme = simplexml_load_string( $xmlTheme = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "xl/{$rel['Target']}")), $this->securityScanner->scan($this->getFromZipArchive($zip, "xl/{$rel['Target']}")),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -450,7 +457,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$rels = simplexml_load_string( $rels = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')), $this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -458,7 +465,7 @@ class Xlsx extends BaseReader
switch ($rel['Type']) { switch ($rel['Type']) {
case 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties': case 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties':
$xmlCore = simplexml_load_string( $xmlCore = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -481,7 +488,7 @@ class Xlsx extends BaseReader
break; break;
case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties': case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties':
$xmlCore = simplexml_load_string( $xmlCore = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -498,7 +505,7 @@ class Xlsx extends BaseReader
break; break;
case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties': case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties':
$xmlCore = simplexml_load_string( $xmlCore = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -532,7 +539,7 @@ class Xlsx extends BaseReader
$dir = dirname($rel['Target']); $dir = dirname($rel['Target']);
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorkbook = simplexml_load_string( $relsWorkbook = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')), $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -542,7 +549,7 @@ class Xlsx extends BaseReader
$xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings']")); $xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings']"));
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlStrings = simplexml_load_string( $xmlStrings = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -589,7 +596,7 @@ class Xlsx extends BaseReader
$xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']")); $xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']"));
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlStyles = simplexml_load_string( $xmlStyles = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")), $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -700,7 +707,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlWorkbook = simplexml_load_string( $xmlWorkbook = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")), $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -752,7 +759,7 @@ class Xlsx extends BaseReader
$fileWorksheet = $worksheets[(string) self::getArrayItem($eleSheet->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')]; $fileWorksheet = $worksheets[(string) self::getArrayItem($eleSheet->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')];
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main" //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlSheet = simplexml_load_string( $xmlSheet = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "$dir/$fileWorksheet")), $this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$fileWorksheet")),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -1315,7 +1322,7 @@ class Xlsx extends BaseReader
if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) {
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string( $relsWorksheet = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -1364,7 +1371,7 @@ class Xlsx extends BaseReader
if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) {
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string( $relsWorksheet = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -1385,7 +1392,7 @@ class Xlsx extends BaseReader
// Load comments file // Load comments file
$relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath); $relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath);
$commentsFile = simplexml_load_string( $commentsFile = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $relPath)), $this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -1415,7 +1422,7 @@ class Xlsx extends BaseReader
// Load VML comments file // Load VML comments file
$relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath); $relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath);
$vmlCommentsFile = simplexml_load_string( $vmlCommentsFile = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $relPath)), $this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -1489,7 +1496,7 @@ class Xlsx extends BaseReader
$unparsedVmlDrawing[$rId] = []; $unparsedVmlDrawing[$rId] = [];
$unparsedVmlDrawing[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $relPath); $unparsedVmlDrawing[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $relPath);
$unparsedVmlDrawing[$rId]['relFilePath'] = $relPath; $unparsedVmlDrawing[$rId]['relFilePath'] = $relPath;
$unparsedVmlDrawing[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedVmlDrawing[$rId]['filePath'])); $unparsedVmlDrawing[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedVmlDrawing[$rId]['filePath']));
unset($unparsedVmlDrawing); unset($unparsedVmlDrawing);
} }
} }
@ -1499,7 +1506,7 @@ class Xlsx extends BaseReader
if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) {
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string( $relsWorksheet = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -1517,7 +1524,7 @@ class Xlsx extends BaseReader
// Fetch linked images // Fetch linked images
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsVML = simplexml_load_string( $relsVML = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname($vmlRelationship) . '/_rels/' . basename($vmlRelationship) . '.rels') $this->getFromZipArchive($zip, dirname($vmlRelationship) . '/_rels/' . basename($vmlRelationship) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -1532,7 +1539,7 @@ class Xlsx extends BaseReader
// Fetch VML document // Fetch VML document
$vmlDrawing = simplexml_load_string( $vmlDrawing = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $vmlRelationship)), $this->securityScanner->scan($this->getFromZipArchive($zip, $vmlRelationship)),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -1580,7 +1587,7 @@ class Xlsx extends BaseReader
if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) { if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) {
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string( $relsWorksheet = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -1597,7 +1604,7 @@ class Xlsx extends BaseReader
$fileDrawing = $drawings[(string) self::getArrayItem($drawing->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')]; $fileDrawing = $drawings[(string) self::getArrayItem($drawing->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')];
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsDrawing = simplexml_load_string( $relsDrawing = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname($fileDrawing) . '/_rels/' . basename($fileDrawing) . '.rels') $this->getFromZipArchive($zip, dirname($fileDrawing) . '/_rels/' . basename($fileDrawing) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -1623,7 +1630,7 @@ class Xlsx extends BaseReader
} }
} }
$xmlDrawing = simplexml_load_string( $xmlDrawing = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $fileDrawing)), $this->securityScanner->scan($this->getFromZipArchive($zip, $fileDrawing)),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
)->children('http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing'); )->children('http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing');
@ -1762,7 +1769,7 @@ class Xlsx extends BaseReader
// unparsed drawing AlternateContent // unparsed drawing AlternateContent
$xmlAltDrawing = simplexml_load_string( $xmlAltDrawing = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $fileDrawing)), $this->securityScanner->scan($this->getFromZipArchive($zip, $fileDrawing)),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
)->children('http://schemas.openxmlformats.org/markup-compatibility/2006'); )->children('http://schemas.openxmlformats.org/markup-compatibility/2006');
@ -1981,7 +1988,7 @@ class Xlsx extends BaseReader
if (!$this->readDataOnly) { if (!$this->readDataOnly) {
$contentTypes = simplexml_load_string( $contentTypes = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, '[Content_Types].xml') $this->getFromZipArchive($zip, '[Content_Types].xml')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -2005,7 +2012,7 @@ class Xlsx extends BaseReader
if ($this->includeCharts) { if ($this->includeCharts) {
$chartEntryRef = ltrim($contentType['PartName'], '/'); $chartEntryRef = ltrim($contentType['PartName'], '/');
$chartElements = simplexml_load_string( $chartElements = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, $chartEntryRef) $this->getFromZipArchive($zip, $chartEntryRef)
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -2292,7 +2299,7 @@ class Xlsx extends BaseReader
if ($dataRels) { if ($dataRels) {
// exists and not empty if the ribbon have some pictures (other than internal MSO) // exists and not empty if the ribbon have some pictures (other than internal MSO)
$UIRels = simplexml_load_string( $UIRels = simplexml_load_string(
$this->securityScan($dataRels), $this->securityScanner->scan($dataRels),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );
@ -2429,7 +2436,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string( $relsWorksheet = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -2448,7 +2455,7 @@ class Xlsx extends BaseReader
$unparsedCtrlProps[$rId] = []; $unparsedCtrlProps[$rId] = [];
$unparsedCtrlProps[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $ctrlProp['Target']); $unparsedCtrlProps[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $ctrlProp['Target']);
$unparsedCtrlProps[$rId]['relFilePath'] = (string) $ctrlProp['Target']; $unparsedCtrlProps[$rId]['relFilePath'] = (string) $ctrlProp['Target'];
$unparsedCtrlProps[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedCtrlProps[$rId]['filePath'])); $unparsedCtrlProps[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedCtrlProps[$rId]['filePath']));
} }
unset($unparsedCtrlProps); unset($unparsedCtrlProps);
} }
@ -2461,7 +2468,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships" //~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string( $relsWorksheet = simplexml_load_string(
$this->securityScan( $this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels') $this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
), ),
'SimpleXMLElement', 'SimpleXMLElement',
@ -2480,7 +2487,7 @@ class Xlsx extends BaseReader
$unparsedPrinterSettings[$rId] = []; $unparsedPrinterSettings[$rId] = [];
$unparsedPrinterSettings[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $printerSettings['Target']); $unparsedPrinterSettings[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $printerSettings['Target']);
$unparsedPrinterSettings[$rId]['relFilePath'] = (string) $printerSettings['Target']; $unparsedPrinterSettings[$rId]['relFilePath'] = (string) $printerSettings['Target'];
$unparsedPrinterSettings[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedPrinterSettings[$rId]['filePath'])); $unparsedPrinterSettings[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedPrinterSettings[$rId]['filePath']));
} }
unset($unparsedPrinterSettings); unset($unparsedPrinterSettings);
} }

View File

@ -2,6 +2,7 @@
namespace PhpOffice\PhpSpreadsheet\Reader; namespace PhpOffice\PhpSpreadsheet\Reader;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Document\Properties; use PhpOffice\PhpSpreadsheet\Document\Properties;
@ -35,12 +36,18 @@ class Xml extends BaseReader
*/ */
protected $charSet = 'UTF-8'; protected $charSet = 'UTF-8';
/**
* @var XmlScanner
*/
private $securityScanner;
/** /**
* Create a new Excel2003XML Reader instance. * Create a new Excel2003XML Reader instance.
*/ */
public function __construct() public function __construct()
{ {
$this->readFilter = new DefaultReadFilter(); $this->readFilter = new DefaultReadFilter();
$this->securityScanner = new XmlScanner();
} }
/** /**
@ -109,7 +116,7 @@ class Xml extends BaseReader
{ {
try { try {
$xml = simplexml_load_string( $xml = simplexml_load_string(
$this->securityScan(file_get_contents($pFilename)), $this->securityScanner->scan(file_get_contents($pFilename)),
'SimpleXMLElement', 'SimpleXMLElement',
Settings::getLibXmlLoaderOptions() Settings::getLibXmlLoaderOptions()
); );

View File

@ -0,0 +1,57 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Security;
use PHPUnit\Framework\TestCase;
class XmlScannerTest extends TestCase
{
/**
* @dataProvider providerValidXML
*
* @param mixed $filename
* @param mixed $expectedResult
*/
public function testValidXML($filename, $expectedResult)
{
$reader = new XmlScanner();
$result = $reader->scanFile($filename);
self::assertEquals($expectedResult, $result);
}
public function providerValidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestValid*.xml') as $file) {
$tests[basename($file)] = [realpath($file), file_get_contents($file)];
}
return $tests;
}
/**
* @dataProvider providerInvalidXML
*
* @param mixed $filename
*/
public function testInvalidXML($filename)
{
$this->expectException(\PhpOffice\PhpSpreadsheet\Reader\Exception::class);
$reader = new XmlScanner();
$expectedResult = 'FAILURE: Should throw an Exception rather than return a value';
$result = $reader->scanFile($filename);
self::assertEquals($expectedResult, $result);
}
public function providerInvalidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestInvalidUTF*.xml') as $file) {
$tests[basename($file)] = [realpath($file)];
}
return $tests;
}
}

View File

@ -9,31 +9,6 @@ use PHPUnit\Framework\TestCase;
class XmlTest extends TestCase class XmlTest extends TestCase
{ {
/**
* @dataProvider providerInvalidXML
*
* @param mixed $filename
*/
public function testInvalidXML($filename)
{
$this->expectException(\PhpOffice\PhpSpreadsheet\Reader\Exception::class);
$reader = $this->getMockForAbstractClass(BaseReader::class);
$expectedResult = 'FAILURE: Should throw an Exception rather than return a value';
$result = $reader->securityScanFile($filename);
self::assertEquals($expectedResult, $result);
}
public function providerInvalidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestInvalidUTF*.xml') as $file) {
$tests[basename($file)] = [realpath($file)];
}
return $tests;
}
/** /**
* @dataProvider providerInvalidSimpleXML * @dataProvider providerInvalidSimpleXML
* *
@ -57,29 +32,6 @@ class XmlTest extends TestCase
return $tests; return $tests;
} }
/**
* @dataProvider providerValidXML
*
* @param mixed $filename
* @param mixed $expectedResult
*/
public function testValidXML($filename, $expectedResult)
{
$reader = $this->getMockForAbstractClass(BaseReader::class);
$result = $reader->securityScanFile($filename);
self::assertEquals($expectedResult, $result);
}
public function providerValidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestValid*.xml') as $file) {
$tests[basename($file)] = [realpath($file), file_get_contents($file)];
}
return $tests;
}
/** /**
* Check if it can read XML Hyperlink correctly. * Check if it can read XML Hyperlink correctly.
*/ */