Additional work on the Excel2003XML Reader

git-svn-id: https://phpexcel.svn.codeplex.com/svn/trunk@69733 2327b42d-5241-43d6-9e2a-de5ac946f064
This commit is contained in:
Mark Baker 2011-03-04 14:39:00 +00:00
parent f08f16c8ca
commit 34c4aa18ee
2 changed files with 47 additions and 16 deletions

View File

@ -72,6 +72,13 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
*/ */
private $_readFilter = null; private $_readFilter = null;
/**
* Character set used in the file
*
* @var string
*/
private $_charSet = 'UTF-8';
/** /**
* Read data only? * Read data only?
@ -197,6 +204,12 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
} }
} }
// Retrieve charset encoding
if(preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/um',$data,$matches)) {
$this->_charSet = strtoupper($matches[1]);
}
// echo 'Character Set is ',$this->_charSet,'<br />';
return $valid; return $valid;
} }
@ -212,6 +225,9 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
if (!file_exists($pFilename)) { if (!file_exists($pFilename)) {
throw new Exception("Could not open " . $pFilename . " for reading! File does not exist."); throw new Exception("Could not open " . $pFilename . " for reading! File does not exist.");
} }
if (!$this->canRead($pFilename)) {
throw new Exception($pFilename . " is an Invalid Spreadsheet file.");
}
$worksheetNames = array(); $worksheetNames = array();
@ -221,7 +237,7 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
$xml_ss = $xml->children($namespaces['ss']); $xml_ss = $xml->children($namespaces['ss']);
foreach($xml_ss->Worksheet as $worksheet) { foreach($xml_ss->Worksheet as $worksheet) {
$worksheet_ss = $worksheet->attributes($namespaces['ss']); $worksheet_ss = $worksheet->attributes($namespaces['ss']);
$worksheetNames[] = $worksheet_ss['Name']; $worksheetNames[] = self::_convertStringEncoding((string) $worksheet_ss['Name'],$this->_charSet);
} }
return $worksheetNames; return $worksheetNames;
@ -329,6 +345,10 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
throw new Exception("Could not open " . $pFilename . " for reading! File does not exist."); throw new Exception("Could not open " . $pFilename . " for reading! File does not exist.");
} }
if (!$this->canRead($pFilename)) {
throw new Exception($pFilename . " is an Invalid Spreadsheet file.");
}
$xml = simplexml_load_file($pFilename); $xml = simplexml_load_file($pFilename);
$namespaces = $xml->getNamespaces(true); $namespaces = $xml->getNamespaces(true);
@ -337,39 +357,39 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
foreach($xml->DocumentProperties[0] as $propertyName => $propertyValue) { foreach($xml->DocumentProperties[0] as $propertyName => $propertyValue) {
switch ($propertyName) { switch ($propertyName) {
case 'Title' : case 'Title' :
$docProps->setTitle($propertyValue); $docProps->setTitle(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
case 'Subject' : case 'Subject' :
$docProps->setSubject($propertyValue); $docProps->setSubject(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
case 'Author' : case 'Author' :
$docProps->setCreator($propertyValue); $docProps->setCreator(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
case 'Created' : case 'Created' :
$creationDate = strtotime($propertyValue); $creationDate = strtotime($propertyValue);
$docProps->setCreated($creationDate); $docProps->setCreated($creationDate);
break; break;
case 'LastAuthor' : case 'LastAuthor' :
$docProps->setLastModifiedBy($propertyValue); $docProps->setLastModifiedBy(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
case 'LastSaved' : case 'LastSaved' :
$lastSaveDate = strtotime($propertyValue); $lastSaveDate = strtotime($propertyValue);
$docProps->setModified($lastSaveDate); $docProps->setModified($lastSaveDate);
break; break;
case 'Company' : case 'Company' :
$docProps->setCompany($propertyValue); $docProps->setCompany(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
case 'Category' : case 'Category' :
$docProps->setCategory($propertyValue); $docProps->setCategory(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
case 'Manager' : case 'Manager' :
$docProps->setManager($propertyValue); $docProps->setManager(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
case 'Keywords' : case 'Keywords' :
$docProps->setKeywords($propertyValue); $docProps->setKeywords(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
case 'Description' : case 'Description' :
$docProps->setDescription($propertyValue); $docProps->setDescription(self::_convertStringEncoding($propertyValue,$this->_charSet));
break; break;
} }
} }
@ -535,6 +555,7 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
$worksheetID = 0; $worksheetID = 0;
$xml_ss = $xml->children($namespaces['ss']); $xml_ss = $xml->children($namespaces['ss']);
foreach($xml_ss->Worksheet as $worksheet) { foreach($xml_ss->Worksheet as $worksheet) {
$worksheet_ss = $worksheet->attributes($namespaces['ss']); $worksheet_ss = $worksheet->attributes($namespaces['ss']);
@ -549,7 +570,7 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
$objPHPExcel->createSheet(); $objPHPExcel->createSheet();
$objPHPExcel->setActiveSheetIndex($worksheetID); $objPHPExcel->setActiveSheetIndex($worksheetID);
if (isset($worksheet_ss['Name'])) { if (isset($worksheet_ss['Name'])) {
$worksheetName = (string) $worksheet_ss['Name']; $worksheetName = self::_convertStringEncoding((string) $worksheet_ss['Name'],$this->_charSet);
$objPHPExcel->getActiveSheet()->setTitle($worksheetName); $objPHPExcel->getActiveSheet()->setTitle($worksheetName);
} }
@ -635,6 +656,7 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
const TYPE_ERROR = 'e'; const TYPE_ERROR = 'e';
*/ */
case 'String' : case 'String' :
$cellValue = self::_convertStringEncoding($cellValue,$this->_charSet);
$type = PHPExcel_Cell_DataType::TYPE_STRING; $type = PHPExcel_Cell_DataType::TYPE_STRING;
break; break;
case 'Number' : case 'Number' :
@ -735,7 +757,7 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
$annotation = strip_tags($node); $annotation = strip_tags($node);
// echo 'Annotation: ',$annotation,'<br />'; // echo 'Annotation: ',$annotation,'<br />';
$objPHPExcel->getActiveSheet()->getComment( $columnID.$rowID ) $objPHPExcel->getActiveSheet()->getComment( $columnID.$rowID )
->setAuthor( $author ) ->setAuthor(self::_convertStringEncoding($author ,$this->_charSet))
->setText($this->_parseRichText($annotation) ); ->setText($this->_parseRichText($annotation) );
} }
@ -776,10 +798,17 @@ class PHPExcel_Reader_Excel2003XML implements PHPExcel_Reader_IReader
return $objPHPExcel; return $objPHPExcel;
} }
private static function _convertStringEncoding($string,$charset) {
if ($charset != 'UTF-8') {
return PHPExcel_Shared_String::ConvertEncoding($string,'UTF-8',$charset);
}
return $string;
}
private function _parseRichText($is = '') { private function _parseRichText($is = '') {
$value = new PHPExcel_RichText(); $value = new PHPExcel_RichText();
$value->createText($is); $value->createText(self::_convertStringEncoding($is,$this->_charSet));
return $value; return $value;
} }

View File

@ -52,6 +52,8 @@ $objReader = PHPExcel_IOFactory::createReader('Excel2003XML');
$objPHPExcel = $objReader->load("Excel2003XMLTest.xml"); $objPHPExcel = $objReader->load("Excel2003XMLTest.xml");
$callEndTime = microtime(true); $callEndTime = microtime(true);
$callTime = $callEndTime - $callStartTime; $callTime = $callEndTime - $callStartTime;
echo '<br />Call time to read Workbook was '.sprintf('%.4f',$callTime)." seconds<br />\n"; echo '<br />Call time to read Workbook was '.sprintf('%.4f',$callTime)." seconds<br />\n";
@ -59,9 +61,9 @@ echo '<br />Call time to read Workbook was '.sprintf('%.4f',$callTime)." seconds
echo date('H:i:s').' Current memory usage: '.(memory_get_usage(true) / 1024 / 1024)." MB<br /><hr />\n"; echo date('H:i:s').' Current memory usage: '.(memory_get_usage(true) / 1024 / 1024)." MB<br /><hr />\n";
echo date('H:i:s') . " Write to Excel2007 format<br />"; echo date('H:i:s') . " Write to Excel5 format<br />";
$objWriter = PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel2007'); $objWriter = PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel5');
$objWriter->save(str_replace('.php', '.xlsx', __FILE__)); $objWriter->save(str_replace('.php', '.xls', __FILE__));
// Echo memory peak usage // Echo memory peak usage