Changes to the xml security scanner to use libxml_disable_entity_loader() when cleanly supported and thread-safe, and to handle UTF-7 charset which otherwise permits an XXE exploit
This commit is contained in:
Mark Baker 2018-11-20 20:39:13 +01:00 committed by GitHub
parent 3bea6f516b
commit 0f8f071e24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 229 additions and 147 deletions

2
composer.lock generated
View File

@ -3137,4 +3137,4 @@
"ext-zlib": "*"
},
"platform-dev": []
}
}

View File

@ -221,37 +221,4 @@ abstract class BaseReader implements IReader
throw new Exception('Could not open file ' . $pFilename . ' for reading.');
}
}
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $xml
*
* @throws Exception
*
* @return string
*/
public function securityScan($xml)
{
$pattern = '/\\0?' . implode('\\0?', str_split('<!DOCTYPE')) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
return $xml;
}
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $filestream
*
* @throws Exception
*
* @return string
*/
public function securityScanFile($filestream)
{
return $this->securityScan(file_get_contents($filestream));
}
}

View File

@ -5,6 +5,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\NamedRange;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
@ -30,6 +31,11 @@ class Gnumeric extends BaseReader
private $referenceHelper;
/**
* @var XmlScanner
*/
private $securityScanner;
/**
* Create a new Gnumeric.
*/
@ -37,6 +43,7 @@ class Gnumeric extends BaseReader
{
$this->readFilter = new DefaultReadFilter();
$this->referenceHelper = ReferenceHelper::getInstance();
$this->securityScanner = new XmlScanner();
}
/**
@ -77,7 +84,7 @@ class Gnumeric extends BaseReader
File::assertFile($pFilename);
$xml = new XMLReader();
$xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->setParserProperty(2, true);
$worksheetNames = [];
@ -106,7 +113,7 @@ class Gnumeric extends BaseReader
File::assertFile($pFilename);
$xml = new XMLReader();
$xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->setParserProperty(2, true);
$worksheetInfo = [];
@ -196,7 +203,7 @@ class Gnumeric extends BaseReader
$gFileData = $this->gzfileGetContents($pFilename);
$xml = simplexml_load_string($this->securityScan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
$xml = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
$namespacesMeta = $xml->getNamespaces(true);
$gnmXML = $xml->children($namespacesMeta['gnm']);

View File

@ -7,6 +7,7 @@ use DOMElement;
use DOMNode;
use DOMText;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Style\Border;
use PhpOffice\PhpSpreadsheet\Style\Color;
@ -16,6 +17,11 @@ use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
/** PhpSpreadsheet root directory */
class Html extends BaseReader
{
/**
* @var XmlScanner
*/
private $securityScanner;
/**
* Sample size to read to determine if it's HTML or not.
*/
@ -105,6 +111,7 @@ class Html extends BaseReader
public function __construct()
{
$this->readFilter = new DefaultReadFilter();
$this->securityScanner = new XmlScanner('<!ENTITY');
}
/**
@ -543,7 +550,7 @@ class Html extends BaseReader
// Create a new DOM object
$dom = new DOMDocument();
// Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
if ($loaded === false) {
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
}
@ -585,23 +592,6 @@ class Html extends BaseReader
return $this;
}
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $xml
*
* @return string
*/
public function securityScan($xml)
{
$pattern = '/\\0?' . implode('\\0?', str_split('<!ENTITY')) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
return $xml;
}
/**
* Apply inline css inline style.
*

View File

@ -8,6 +8,7 @@ use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Document\Properties;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\Date;
@ -19,12 +20,18 @@ use ZipArchive;
class Ods extends BaseReader
{
/**
* @var XmlScanner
*/
private $securityScanner;
/**
* Create a new Ods Reader instance.
*/
public function __construct()
{
$this->readFilter = new DefaultReadFilter();
$this->securityScanner = new XmlScanner();
}
/**
@ -52,7 +59,7 @@ class Ods extends BaseReader
$mimeType = $zip->getFromName($stat['name']);
} elseif ($stat = $zip->statName('META-INF/manifest.xml')) {
$xml = simplexml_load_string(
$this->securityScan($zip->getFromName('META-INF/manifest.xml')),
$this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -100,7 +107,7 @@ class Ods extends BaseReader
$xml = new XMLReader();
$xml->xml(
$this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
$this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
null,
Settings::getLibXmlLoaderOptions()
);
@ -154,7 +161,7 @@ class Ods extends BaseReader
$xml = new XMLReader();
$xml->xml(
$this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
$this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
null,
Settings::getLibXmlLoaderOptions()
);
@ -267,7 +274,7 @@ class Ods extends BaseReader
// Meta
$xml = simplexml_load_string(
$this->securityScan($zip->getFromName('meta.xml')),
$this->securityScanner->scan($zip->getFromName('meta.xml')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -367,7 +374,7 @@ class Ods extends BaseReader
$dom = new \DOMDocument('1.01', 'UTF-8');
$dom->loadXML(
$this->securityScan($zip->getFromName('content.xml')),
$this->securityScanner->scan($zip->getFromName('content.xml')),
Settings::getLibXmlLoaderOptions()
);

View File

@ -0,0 +1,87 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Reader\Security;
use PhpOffice\PhpSpreadsheet\Reader\Exception;
class XmlScanner
{
/**
* Identifies whether the thread-safe libxmlDisableEntityLoader() function is available.
*
* @var bool
*/
private $libxmlDisableEntityLoader = false;
private $pattern;
public function __construct($pattern = '<!DOCTYPE')
{
$this->pattern = $pattern;
$this->libxmlDisableEntityLoader = $this->identifyLibxmlDisableEntityLoaderAvailability();
if ($this->libxmlDisableEntityLoader) {
libxml_disable_entity_loader(true);
}
}
private function identifyLibxmlDisableEntityLoaderAvailability()
{
if (PHP_MAJOR_VERSION == 7) {
switch (PHP_MINOR_VERSION) {
case 2:
return PHP_RELEASE_VERSION >= 1;
case 1:
return PHP_RELEASE_VERSION >= 13;
case 0:
return PHP_RELEASE_VERSION >= 27;
}
return true;
}
return false;
}
/**
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param mixed $xml
*
* @throws Exception
*
* @return string
*/
public function scan($xml)
{
$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = $result ? $matches[1] : 'UTF-8';
if ($charset !== 'UTF-8') {
$xml = mb_convert_encoding($xml, 'UTF-8', $charset);
}
// Don't rely purely on libxml_disable_entity_loader()
$pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
return $xml;
}
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $filestream
*
* @throws Exception
*
* @return string
*/
public function scanFile($filestream)
{
return $this->scan(file_get_contents($filestream));
}
}

View File

@ -6,6 +6,7 @@ use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\Hyperlink;
use PhpOffice\PhpSpreadsheet\Document\Properties;
use PhpOffice\PhpSpreadsheet\NamedRange;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx\Chart;
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
@ -46,6 +47,11 @@ class Xlsx extends BaseReader
*/
private static $theme = null;
/**
* @var XmlScanner
*/
private $securityScanner;
/**
* Create a new Xlsx Reader instance.
*/
@ -53,6 +59,7 @@ class Xlsx extends BaseReader
{
$this->readFilter = new DefaultReadFilter();
$this->referenceHelper = ReferenceHelper::getInstance();
$this->securityScanner = new XmlScanner();
}
/**
@ -74,7 +81,7 @@ class Xlsx extends BaseReader
if ($zip->open($pFilename) === true) {
// check if it is an OOXML archive
$rels = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, '_rels/.rels')
),
'SimpleXMLElement',
@ -119,14 +126,14 @@ class Xlsx extends BaseReader
// The files we're looking at here are small enough that simpleXML is more efficient than XMLReader
//~ http://schemas.openxmlformats.org/package/2006/relationships");
$rels = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, '_rels/.rels'))
$this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels'))
);
foreach ($rels->Relationship as $rel) {
switch ($rel['Type']) {
case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument':
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlWorkbook = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}"))
$this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}"))
);
if ($xmlWorkbook->sheets) {
@ -163,7 +170,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$rels = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')),
$this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -173,7 +180,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorkbook = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')
),
'SimpleXMLElement',
@ -190,7 +197,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlWorkbook = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, "{$rel['Target']}")
),
'SimpleXMLElement',
@ -212,7 +219,7 @@ class Xlsx extends BaseReader
$xml = new XMLReader();
$xml->xml(
$this->securityScanFile(
$this->securityScanner->scanFile(
'zip://' . File::realpath($pFilename) . '#' . "$dir/$fileWorksheet"
),
null,
@ -403,7 +410,7 @@ class Xlsx extends BaseReader
// Read the theme first, because we need the colour scheme when reading the styles
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$wbRels = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, 'xl/_rels/workbook.xml.rels')),
$this->securityScanner->scan($this->getFromZipArchive($zip, 'xl/_rels/workbook.xml.rels')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -414,7 +421,7 @@ class Xlsx extends BaseReader
$themeOrderAdditional = count($themeOrderArray);
$xmlTheme = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "xl/{$rel['Target']}")),
$this->securityScanner->scan($this->getFromZipArchive($zip, "xl/{$rel['Target']}")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -450,7 +457,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$rels = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, '_rels/.rels')),
$this->securityScanner->scan($this->getFromZipArchive($zip, '_rels/.rels')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -458,7 +465,7 @@ class Xlsx extends BaseReader
switch ($rel['Type']) {
case 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties':
$xmlCore = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")),
$this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -481,7 +488,7 @@ class Xlsx extends BaseReader
break;
case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties':
$xmlCore = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")),
$this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -498,7 +505,7 @@ class Xlsx extends BaseReader
break;
case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties':
$xmlCore = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")),
$this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -532,7 +539,7 @@ class Xlsx extends BaseReader
$dir = dirname($rel['Target']);
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorkbook = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')),
$this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/_rels/" . basename($rel['Target']) . '.rels')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -542,7 +549,7 @@ class Xlsx extends BaseReader
$xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings']"));
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlStrings = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")),
$this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -589,7 +596,7 @@ class Xlsx extends BaseReader
$xpath = self::getArrayItem($relsWorkbook->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']"));
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlStyles = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")),
$this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$xpath[Target]")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -700,7 +707,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlWorkbook = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "{$rel['Target']}")),
$this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -752,7 +759,7 @@ class Xlsx extends BaseReader
$fileWorksheet = $worksheets[(string) self::getArrayItem($eleSheet->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')];
//~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"
$xmlSheet = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, "$dir/$fileWorksheet")),
$this->securityScanner->scan($this->getFromZipArchive($zip, "$dir/$fileWorksheet")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -1315,7 +1322,7 @@ class Xlsx extends BaseReader
if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) {
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
),
'SimpleXMLElement',
@ -1364,7 +1371,7 @@ class Xlsx extends BaseReader
if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) {
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
),
'SimpleXMLElement',
@ -1385,7 +1392,7 @@ class Xlsx extends BaseReader
// Load comments file
$relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath);
$commentsFile = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $relPath)),
$this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -1415,7 +1422,7 @@ class Xlsx extends BaseReader
// Load VML comments file
$relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath);
$vmlCommentsFile = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $relPath)),
$this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -1489,7 +1496,7 @@ class Xlsx extends BaseReader
$unparsedVmlDrawing[$rId] = [];
$unparsedVmlDrawing[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $relPath);
$unparsedVmlDrawing[$rId]['relFilePath'] = $relPath;
$unparsedVmlDrawing[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedVmlDrawing[$rId]['filePath']));
$unparsedVmlDrawing[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedVmlDrawing[$rId]['filePath']));
unset($unparsedVmlDrawing);
}
}
@ -1499,7 +1506,7 @@ class Xlsx extends BaseReader
if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) {
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
),
'SimpleXMLElement',
@ -1517,7 +1524,7 @@ class Xlsx extends BaseReader
// Fetch linked images
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsVML = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname($vmlRelationship) . '/_rels/' . basename($vmlRelationship) . '.rels')
),
'SimpleXMLElement',
@ -1532,7 +1539,7 @@ class Xlsx extends BaseReader
// Fetch VML document
$vmlDrawing = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $vmlRelationship)),
$this->securityScanner->scan($this->getFromZipArchive($zip, $vmlRelationship)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -1580,7 +1587,7 @@ class Xlsx extends BaseReader
if ($zip->locateName(dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')) {
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
),
'SimpleXMLElement',
@ -1597,7 +1604,7 @@ class Xlsx extends BaseReader
$fileDrawing = $drawings[(string) self::getArrayItem($drawing->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'id')];
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsDrawing = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname($fileDrawing) . '/_rels/' . basename($fileDrawing) . '.rels')
),
'SimpleXMLElement',
@ -1623,7 +1630,7 @@ class Xlsx extends BaseReader
}
}
$xmlDrawing = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $fileDrawing)),
$this->securityScanner->scan($this->getFromZipArchive($zip, $fileDrawing)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
)->children('http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing');
@ -1762,7 +1769,7 @@ class Xlsx extends BaseReader
// unparsed drawing AlternateContent
$xmlAltDrawing = simplexml_load_string(
$this->securityScan($this->getFromZipArchive($zip, $fileDrawing)),
$this->securityScanner->scan($this->getFromZipArchive($zip, $fileDrawing)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
)->children('http://schemas.openxmlformats.org/markup-compatibility/2006');
@ -1981,7 +1988,7 @@ class Xlsx extends BaseReader
if (!$this->readDataOnly) {
$contentTypes = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, '[Content_Types].xml')
),
'SimpleXMLElement',
@ -2005,7 +2012,7 @@ class Xlsx extends BaseReader
if ($this->includeCharts) {
$chartEntryRef = ltrim($contentType['PartName'], '/');
$chartElements = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, $chartEntryRef)
),
'SimpleXMLElement',
@ -2292,7 +2299,7 @@ class Xlsx extends BaseReader
if ($dataRels) {
// exists and not empty if the ribbon have some pictures (other than internal MSO)
$UIRels = simplexml_load_string(
$this->securityScan($dataRels),
$this->securityScanner->scan($dataRels),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
@ -2429,7 +2436,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
),
'SimpleXMLElement',
@ -2448,7 +2455,7 @@ class Xlsx extends BaseReader
$unparsedCtrlProps[$rId] = [];
$unparsedCtrlProps[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $ctrlProp['Target']);
$unparsedCtrlProps[$rId]['relFilePath'] = (string) $ctrlProp['Target'];
$unparsedCtrlProps[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedCtrlProps[$rId]['filePath']));
$unparsedCtrlProps[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedCtrlProps[$rId]['filePath']));
}
unset($unparsedCtrlProps);
}
@ -2461,7 +2468,7 @@ class Xlsx extends BaseReader
//~ http://schemas.openxmlformats.org/package/2006/relationships"
$relsWorksheet = simplexml_load_string(
$this->securityScan(
$this->securityScanner->scan(
$this->getFromZipArchive($zip, dirname("$dir/$fileWorksheet") . '/_rels/' . basename($fileWorksheet) . '.rels')
),
'SimpleXMLElement',
@ -2480,7 +2487,7 @@ class Xlsx extends BaseReader
$unparsedPrinterSettings[$rId] = [];
$unparsedPrinterSettings[$rId]['filePath'] = self::dirAdd("$dir/$fileWorksheet", $printerSettings['Target']);
$unparsedPrinterSettings[$rId]['relFilePath'] = (string) $printerSettings['Target'];
$unparsedPrinterSettings[$rId]['content'] = $this->securityScan($this->getFromZipArchive($zip, $unparsedPrinterSettings[$rId]['filePath']));
$unparsedPrinterSettings[$rId]['content'] = $this->securityScanner->scan($this->getFromZipArchive($zip, $unparsedPrinterSettings[$rId]['filePath']));
}
unset($unparsedPrinterSettings);
}

View File

@ -5,6 +5,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Document\Properties;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\Date;
@ -35,12 +36,18 @@ class Xml extends BaseReader
*/
protected $charSet = 'UTF-8';
/**
* @var XmlScanner
*/
private $securityScanner;
/**
* Create a new Excel2003XML Reader instance.
*/
public function __construct()
{
$this->readFilter = new DefaultReadFilter();
$this->securityScanner = new XmlScanner();
}
/**
@ -109,7 +116,7 @@ class Xml extends BaseReader
{
try {
$xml = simplexml_load_string(
$this->securityScan(file_get_contents($pFilename)),
$this->securityScanner->scan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);

View File

@ -0,0 +1,57 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Security;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PHPUnit\Framework\TestCase;
class XmlScannerTest extends TestCase
{
/**
* @dataProvider providerValidXML
*
* @param mixed $filename
* @param mixed $expectedResult
*/
public function testValidXML($filename, $expectedResult)
{
$reader = new XmlScanner();
$result = $reader->scanFile($filename);
self::assertEquals($expectedResult, $result);
}
public function providerValidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../../data/Reader/Xml/XEETestValid*.xml') as $file) {
$tests[basename($file)] = [realpath($file), file_get_contents($file)];
}
return $tests;
}
/**
* @dataProvider providerInvalidXML
*
* @param mixed $filename
*/
public function testInvalidXML($filename)
{
$this->expectException(\PhpOffice\PhpSpreadsheet\Reader\Exception::class);
$reader = new XmlScanner();
$expectedResult = 'FAILURE: Should throw an Exception rather than return a value';
$result = $reader->scanFile($filename);
self::assertEquals($expectedResult, $result);
}
public function providerInvalidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../../data/Reader/Xml/XEETestInvalidUTF*.xml') as $file) {
$tests[basename($file)] = [realpath($file)];
}
return $tests;
}
}

View File

@ -3,37 +3,11 @@
namespace PhpOffice\PhpSpreadsheetTests\Reader;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Reader\BaseReader;
use PhpOffice\PhpSpreadsheet\Reader\Xml;
use PHPUnit\Framework\TestCase;
class XmlTest extends TestCase
{
/**
* @dataProvider providerInvalidXML
*
* @param mixed $filename
*/
public function testInvalidXML($filename)
{
$this->expectException(\PhpOffice\PhpSpreadsheet\Reader\Exception::class);
$reader = $this->getMockForAbstractClass(BaseReader::class);
$expectedResult = 'FAILURE: Should throw an Exception rather than return a value';
$result = $reader->securityScanFile($filename);
self::assertEquals($expectedResult, $result);
}
public function providerInvalidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestInvalidUTF*.xml') as $file) {
$tests[basename($file)] = [realpath($file)];
}
return $tests;
}
/**
* @dataProvider providerInvalidSimpleXML
*
@ -57,29 +31,6 @@ class XmlTest extends TestCase
return $tests;
}
/**
* @dataProvider providerValidXML
*
* @param mixed $filename
* @param mixed $expectedResult
*/
public function testValidXML($filename, $expectedResult)
{
$reader = $this->getMockForAbstractClass(BaseReader::class);
$result = $reader->securityScanFile($filename);
self::assertEquals($expectedResult, $result);
}
public function providerValidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/Xml/XEETestValid*.xml') as $file) {
$tests[basename($file)] = [realpath($file), file_get_contents($file)];
}
return $tests;
}
/**
* Check if it can read XML Hyperlink correctly.
*/

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-7"?>
+ADwAIQ-DOCTYPE xmlrootname +AFsAPAAh-ENTITY +ACU aaa SYSTEM +ACI-http://127.0.0.1:8080/ext.dtd+ACIAPgAl-aaa+ADsAJQ-ccc+ADsAJQ-ddd+ADsAXQA+