Feature: (cifren/MBaker) Work Item GH-205 - Handling merge cells in HTML Reader

This commit is contained in:
MarkBaker 2014-12-07 14:45:55 +00:00
parent b2e82a0e11
commit d7ea3e2ab0
2 changed files with 420 additions and 372 deletions

View File

@ -1,4 +1,5 @@
<?php <?php
/** /**
* PHPExcel * PHPExcel
* *
@ -24,15 +25,13 @@
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
* @version ##VERSION##, ##DATE## * @version ##VERSION##, ##DATE##
*/ */
/** PHPExcel root directory */ /** PHPExcel root directory */
if (!defined('PHPEXCEL_ROOT')) { if (!defined('PHPEXCEL_ROOT')) {
/** /**
* @ignore * @ignore
*/ */
define('PHPEXCEL_ROOT', dirname(__FILE__) . '/../../'); define('PHPEXCEL_ROOT', dirname(__FILE__) . '/../../');
require(PHPEXCEL_ROOT . 'PHPExcel/Autoloader.php'); require(PHPEXCEL_ROOT . 'PHPExcel/Autoloader.php');
} }
/** /**
@ -44,426 +43,474 @@ if (!defined('PHPEXCEL_ROOT')) {
*/ */
class PHPExcel_Reader_HTML extends PHPExcel_Reader_Abstract implements PHPExcel_Reader_IReader class PHPExcel_Reader_HTML extends PHPExcel_Reader_Abstract implements PHPExcel_Reader_IReader
{ {
/**
* Input encoding
*
* @var string
*/
private $_inputEncoding = 'ANSI';
/** /**
* Sheet index to read * Input encoding
* *
* @var int * @var string
*/ */
private $_sheetIndex = 0; protected $_inputEncoding = 'ANSI';
/** /**
* Formats * Sheet index to read
* *
* @var array * @var int
*/ */
private $_formats = array( 'h1' => array( 'font' => array( 'bold' => true, protected $_sheetIndex = 0;
'size' => 24,
),
), // Bold, 24pt
'h2' => array( 'font' => array( 'bold' => true,
'size' => 18,
),
), // Bold, 18pt
'h3' => array( 'font' => array( 'bold' => true,
'size' => 13.5,
),
), // Bold, 13.5pt
'h4' => array( 'font' => array( 'bold' => true,
'size' => 12,
),
), // Bold, 12pt
'h5' => array( 'font' => array( 'bold' => true,
'size' => 10,
),
), // Bold, 10pt
'h6' => array( 'font' => array( 'bold' => true,
'size' => 7.5,
),
), // Bold, 7.5pt
'a' => array( 'font' => array( 'underline' => true,
'color' => array( 'argb' => PHPExcel_Style_Color::COLOR_BLUE,
),
),
), // Blue underlined
'hr' => array( 'borders' => array( 'bottom' => array( 'style' => PHPExcel_Style_Border::BORDER_THIN,
'color' => array( PHPExcel_Style_Color::COLOR_BLACK,
),
),
),
), // Bottom border
);
/**
* Formats
*
* @var array
*/
protected $_formats = array(
'h1' => array('font' => array('bold' => true,
'size' => 24,
),
), // Bold, 24pt
'h2' => array('font' => array('bold' => true,
'size' => 18,
),
), // Bold, 18pt
'h3' => array('font' => array('bold' => true,
'size' => 13.5,
),
), // Bold, 13.5pt
'h4' => array('font' => array('bold' => true,
'size' => 12,
),
), // Bold, 12pt
'h5' => array('font' => array('bold' => true,
'size' => 10,
),
), // Bold, 10pt
'h6' => array('font' => array('bold' => true,
'size' => 7.5,
),
), // Bold, 7.5pt
'a' => array('font' => array('underline' => true,
'color' => array('argb' => PHPExcel_Style_Color::COLOR_BLUE,
),
),
), // Blue underlined
'hr' => array('borders' => array('bottom' => array('style' => PHPExcel_Style_Border::BORDER_THIN,
'color' => array(\PHPExcel_Style_Color::COLOR_BLACK,
),
),
),
), // Bottom border
);
/** protected $rowspan = array();
* Create a new PHPExcel_Reader_HTML
*/
public function __construct() {
$this->_readFilter = new PHPExcel_Reader_DefaultReadFilter();
}
/** /**
* Validate that the current file is an HTML file * Create a new PHPExcel_Reader_HTML
* */
* @return boolean public function __construct()
*/ {
protected function _isValidFormat() $this->_readFilter = new PHPExcel_Reader_DefaultReadFilter();
{ }
// Reading 2048 bytes should be enough to validate that the format is HTML
$data = fread($this->_fileHandle, 2048);
if ((strpos($data, '<') !== FALSE) &&
(strlen($data) !== strlen(strip_tags($data)))) {
return TRUE;
}
return FALSE; /**
} * Validate that the current file is an HTML file
*
* @return boolean
*/
protected function _isValidFormat()
{
// Reading 2048 bytes should be enough to validate that the format is HTML
$data = fread($this->_fileHandle, 2048);
if ((strpos($data, '<') !== FALSE) &&
(strlen($data) !== strlen(strip_tags($data)))) {
return TRUE;
}
/** return FALSE;
* Loads PHPExcel from file }
*
* @param string $pFilename
* @return PHPExcel
* @throws PHPExcel_Reader_Exception
*/
public function load($pFilename)
{
// Create new PHPExcel
$objPHPExcel = new PHPExcel();
// Load into this instance /**
return $this->loadIntoExisting($pFilename, $objPHPExcel); * Loads PHPExcel from file
} *
* @param string $pFilename
* @return PHPExcel
* @throws PHPExcel_Reader_Exception
*/
public function load($pFilename)
{
// Create new PHPExcel
$objPHPExcel = new PHPExcel();
/** // Load into this instance
* Set input encoding return $this->loadIntoExisting($pFilename, $objPHPExcel);
* }
* @param string $pValue Input encoding
*/
public function setInputEncoding($pValue = 'ANSI')
{
$this->_inputEncoding = $pValue;
return $this;
}
/** /**
* Get input encoding * Set input encoding
* *
* @return string * @param string $pValue Input encoding
*/ */
public function getInputEncoding() public function setInputEncoding($pValue = 'ANSI')
{ {
return $this->_inputEncoding; $this->_inputEncoding = $pValue;
}
// Data Array used for testing only, should write to PHPExcel object on completion of tests return $this;
private $_dataArray = array(); }
private $_tableLevel = 0; /**
private $_nestedColumn = array('A'); * Get input encoding
*
* @return string
*/
public function getInputEncoding()
{
return $this->_inputEncoding;
}
private function _setTableStartColumn($column) { // Data Array used for testing only, should write to PHPExcel object on completion of tests
if ($this->_tableLevel == 0) protected $_dataArray = array();
$column = 'A'; protected $_tableLevel = 0;
++$this->_tableLevel; protected $_nestedColumn = array('A');
$this->_nestedColumn[$this->_tableLevel] = $column;
return $this->_nestedColumn[$this->_tableLevel]; protected function _setTableStartColumn($column)
} {
if ($this->_tableLevel == 0)
$column = 'A';
++$this->_tableLevel;
$this->_nestedColumn[$this->_tableLevel] = $column;
private function _getTableStartColumn() { return $this->_nestedColumn[$this->_tableLevel];
return $this->_nestedColumn[$this->_tableLevel]; }
}
private function _releaseTableStartColumn() { protected function _getTableStartColumn()
--$this->_tableLevel; {
return array_pop($this->_nestedColumn); return $this->_nestedColumn[$this->_tableLevel];
} }
private function _flushCell($sheet,$column,$row,&$cellContent) { protected function _releaseTableStartColumn()
if (is_string($cellContent)) { {
// Simple String content --$this->_tableLevel;
if (trim($cellContent) > '') {
// Only actually write it if there's content in the string return array_pop($this->_nestedColumn);
}
protected function _flushCell($sheet, $column, $row, &$cellContent)
{
if (is_string($cellContent)) {
// Simple String content
if (trim($cellContent) > '') {
// Only actually write it if there's content in the string
// echo 'FLUSH CELL: ' , $column , $row , ' => ' , $cellContent , '<br />'; // echo 'FLUSH CELL: ' , $column , $row , ' => ' , $cellContent , '<br />';
// Write to worksheet to be done here... // Write to worksheet to be done here...
// ... we return the cell so we can mess about with styles more easily // ... we return the cell so we can mess about with styles more easily
$cell = $sheet->setCellValue($column.$row,$cellContent,true); $sheet->setCellValue($column . $row, $cellContent, true);
$this->_dataArray[$row][$column] = $cellContent; $this->_dataArray[$row][$column] = $cellContent;
} }
} else { } else {
// We have a Rich Text run // We have a Rich Text run
// TODO // TODO
$this->_dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent; $this->_dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
} }
$cellContent = (string) ''; $cellContent = (string) '';
} }
private function _processDomElement(DOMNode $element, $sheet, &$row, &$column, &$cellContent){ protected function _processDomElement(DOMNode $element, $sheet, &$row, &$column, &$cellContent, $format = null)
foreach($element->childNodes as $child){ {
if ($child instanceof DOMText) { foreach ($element->childNodes as $child) {
$domText = preg_replace('/\s+/',' ',trim($child->nodeValue)); if ($child instanceof DOMText) {
if (is_string($cellContent)) { $domText = preg_replace('/\s+/', ' ', trim($child->nodeValue));
// simply append the text if the cell content is a plain text string if (is_string($cellContent)) {
$cellContent .= $domText; // simply append the text if the cell content is a plain text string
} else { $cellContent .= $domText;
// but if we have a rich text run instead, we need to append it correctly } else {
// TODO // but if we have a rich text run instead, we need to append it correctly
} // TODO
} elseif($child instanceof DOMElement) { }
} elseif ($child instanceof DOMElement) {
// echo '<b>DOM ELEMENT: </b>' , strtoupper($child->nodeName) , '<br />'; // echo '<b>DOM ELEMENT: </b>' , strtoupper($child->nodeName) , '<br />';
$attributeArray = array(); $attributeArray = array();
foreach($child->attributes as $attribute) { foreach ($child->attributes as $attribute) {
// echo '<b>ATTRIBUTE: </b>' , $attribute->name , ' => ' , $attribute->value , '<br />'; // echo '<b>ATTRIBUTE: </b>' , $attribute->name , ' => ' , $attribute->value , '<br />';
$attributeArray[$attribute->name] = $attribute->value; $attributeArray[$attribute->name] = $attribute->value;
} }
switch($child->nodeName) { switch ($child->nodeName) {
case 'meta' : case 'meta' :
foreach($attributeArray as $attributeName => $attributeValue) { foreach ($attributeArray as $attributeName => $attributeValue) {
switch($attributeName) { switch ($attributeName) {
case 'content': case 'content':
// TODO // TODO
// Extract character set, so we can convert to UTF-8 if required // Extract character set, so we can convert to UTF-8 if required
break; break;
} }
} }
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
break; break;
case 'title' : case 'title' :
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
$sheet->setTitle($cellContent); $sheet->setTitle($cellContent);
$cellContent = ''; $cellContent = '';
break; break;
case 'span' : case 'span' :
case 'div' : case 'div' :
case 'font' : case 'font' :
case 'i' : case 'i' :
case 'em' : case 'em' :
case 'strong': case 'strong':
case 'b' : case 'b' :
// echo 'STYLING, SPAN OR DIV<br />'; // echo 'STYLING, SPAN OR DIV<br />';
if ($cellContent > '') if ($cellContent > '')
$cellContent .= ' '; $cellContent .= ' ';
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
if ($cellContent > '') if ($cellContent > '')
$cellContent .= ' '; $cellContent .= ' ';
// echo 'END OF STYLING, SPAN OR DIV<br />'; // echo 'END OF STYLING, SPAN OR DIV<br />';
break; break;
case 'hr' : case 'hr' :
$this->_flushCell($sheet,$column,$row,$cellContent); $this->_flushCell($sheet, $column, $row, $cellContent);
++$row; ++$row;
if (isset($this->_formats[$child->nodeName])) { if (isset($this->_formats[$child->nodeName])) {
$sheet->getStyle($column.$row)->applyFromArray($this->_formats[$child->nodeName]); $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
} else { } else {
$cellContent = '----------'; $cellContent = '----------';
$this->_flushCell($sheet,$column,$row,$cellContent); $this->_flushCell($sheet, $column, $row, $cellContent);
} }
++$row; ++$row;
case 'br' : case 'br' :
if ($this->_tableLevel > 0) { if ($this->_tableLevel > 0) {
// If we're inside a table, replace with a \n // If we're inside a table, replace with a \n
$cellContent .= "\n"; $cellContent .= "\n";
} else { } else {
// Otherwise flush our existing content and move the row cursor on // Otherwise flush our existing content and move the row cursor on
$this->_flushCell($sheet,$column,$row,$cellContent); $this->_flushCell($sheet, $column, $row, $cellContent);
++$row; ++$row;
} }
// echo 'HARD LINE BREAK: ' , '<br />'; // echo 'HARD LINE BREAK: ' , '<br />';
break; break;
case 'a' : case 'a' :
// echo 'START OF HYPERLINK: ' , '<br />'; // echo 'START OF HYPERLINK: ' , '<br />';
foreach($attributeArray as $attributeName => $attributeValue) { foreach ($attributeArray as $attributeName => $attributeValue) {
switch($attributeName) { switch ($attributeName) {
case 'href': case 'href':
// echo 'Link to ' , $attributeValue , '<br />'; // echo 'Link to ' , $attributeValue , '<br />';
$sheet->getCell($column.$row)->getHyperlink()->setUrl($attributeValue); $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
if (isset($this->_formats[$child->nodeName])) { if (isset($this->_formats[$child->nodeName])) {
$sheet->getStyle($column.$row)->applyFromArray($this->_formats[$child->nodeName]); $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
} }
break; break;
} }
} }
$cellContent .= ' '; $cellContent .= ' ';
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
// echo 'END OF HYPERLINK:' , '<br />'; // echo 'END OF HYPERLINK:' , '<br />';
break; break;
case 'h1' : case 'h1' :
case 'h2' : case 'h2' :
case 'h3' : case 'h3' :
case 'h4' : case 'h4' :
case 'h5' : case 'h5' :
case 'h6' : case 'h6' :
case 'ol' : case 'ol' :
case 'ul' : case 'ul' :
case 'p' : case 'p' :
if ($this->_tableLevel > 0) { if ($this->_tableLevel > 0) {
// If we're inside a table, replace with a \n // If we're inside a table, replace with a \n
$cellContent .= "\n"; $cellContent .= "\n";
// echo 'LIST ENTRY: ' , '<br />'; // echo 'LIST ENTRY: ' , '<br />';
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
// echo 'END OF LIST ENTRY:' , '<br />'; // echo 'END OF LIST ENTRY:' , '<br />';
} else { } else {
if ($cellContent > '') { if ($cellContent > '') {
$this->_flushCell($sheet,$column,$row,$cellContent); $this->_flushCell($sheet, $column, $row, $cellContent);
$row += 2; $row++;
} }
// echo 'START OF PARAGRAPH: ' , '<br />'; // echo 'START OF PARAGRAPH: ' , '<br />';
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
// echo 'END OF PARAGRAPH:' , '<br />'; // echo 'END OF PARAGRAPH:' , '<br />';
$this->_flushCell($sheet,$column,$row,$cellContent); $this->_flushCell($sheet, $column, $row, $cellContent);
if (isset($this->_formats[$child->nodeName])) { if (isset($this->_formats[$child->nodeName])) {
$sheet->getStyle($column.$row)->applyFromArray($this->_formats[$child->nodeName]); $sheet->getStyle($column . $row)->applyFromArray($this->_formats[$child->nodeName]);
} }
$row += 2; $row++;
$column = 'A'; $column = 'A';
} }
break; break;
case 'li' : case 'li' :
if ($this->_tableLevel > 0) { if ($this->_tableLevel > 0) {
// If we're inside a table, replace with a \n // If we're inside a table, replace with a \n
$cellContent .= "\n"; $cellContent .= "\n";
// echo 'LIST ENTRY: ' , '<br />'; // echo 'LIST ENTRY: ' , '<br />';
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
// echo 'END OF LIST ENTRY:' , '<br />'; // echo 'END OF LIST ENTRY:' , '<br />';
} else { } else {
if ($cellContent > '') { if ($cellContent > '') {
$this->_flushCell($sheet,$column,$row,$cellContent); $this->_flushCell($sheet, $column, $row, $cellContent);
} }
++$row; ++$row;
// echo 'LIST ENTRY: ' , '<br />'; // echo 'LIST ENTRY: ' , '<br />';
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
// echo 'END OF LIST ENTRY:' , '<br />'; // echo 'END OF LIST ENTRY:' , '<br />';
$this->_flushCell($sheet,$column,$row,$cellContent); $this->_flushCell($sheet, $column, $row, $cellContent);
$column = 'A'; $column = 'A';
} }
break; break;
case 'table' : case 'table' :
$this->_flushCell($sheet,$column,$row,$cellContent); $this->_flushCell($sheet, $column, $row, $cellContent);
$column = $this->_setTableStartColumn($column); $column = $this->_setTableStartColumn($column);
// echo 'START OF TABLE LEVEL ' , $this->_tableLevel , '<br />'; // echo 'START OF TABLE LEVEL ' , $this->_tableLevel , '<br />';
if ($this->_tableLevel > 1) if ($this->_tableLevel > 1)
--$row; --$row;
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
// echo 'END OF TABLE LEVEL ' , $this->_tableLevel , '<br />'; // echo 'END OF TABLE LEVEL ' , $this->_tableLevel , '<br />';
$column = $this->_releaseTableStartColumn(); $column = $this->_releaseTableStartColumn();
if ($this->_tableLevel > 1) { if ($this->_tableLevel > 1) {
++$column; ++$column;
} else { } else {
++$row; ++$row;
} }
break; break;
case 'thead' : case 'thead' :
case 'tbody' : case 'tbody' :
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
break; break;
case 'tr' : case 'tr' :
++$row; $column = $this->_getTableStartColumn();
$column = $this->_getTableStartColumn(); $cellContent = '';
$cellContent = '';
// echo 'START OF TABLE ' , $this->_tableLevel , ' ROW<br />'; // echo 'START OF TABLE ' , $this->_tableLevel , ' ROW<br />';
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
++$row;
// echo 'END OF TABLE ' , $this->_tableLevel , ' ROW<br />'; // echo 'END OF TABLE ' , $this->_tableLevel , ' ROW<br />';
break; break;
case 'th' : case 'th' :
case 'td' : case 'td' :
// echo 'START OF TABLE ' , $this->_tableLevel , ' CELL<br />'; // echo 'START OF TABLE ' , $this->_tableLevel , ' CELL<br />';
$this->_processDomElement($child,$sheet,$row,$column,$cellContent); $this->_processDomElement($child, $sheet, $row, $column, $cellContent);
// echo 'END OF TABLE ' , $this->_tableLevel , ' CELL<br />'; // echo 'END OF TABLE ' , $this->_tableLevel , ' CELL<br />';
$this->_flushCell($sheet,$column,$row,$cellContent);
++$column;
break;
case 'body' :
$row = 1;
$column = 'A';
$content = '';
$this->_tableLevel = 0;
$this->_processDomElement($child,$sheet,$row,$column,$cellContent);
break;
default:
$this->_processDomElement($child,$sheet,$row,$column,$cellContent);
}
}
}
}
/** while (isset($this->rowspan[$column . $row])) {
* Loads PHPExcel from file into PHPExcel instance ++$column;
* }
* @param string $pFilename
* @param PHPExcel $objPHPExcel
* @return PHPExcel
* @throws PHPExcel_Reader_Exception
*/
public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
{
// Open file to validate
$this->_openFile($pFilename);
if (!$this->_isValidFormat()) {
fclose ($this->_fileHandle);
throw new PHPExcel_Reader_Exception($pFilename . " is an Invalid HTML file.");
}
// Close after validating
fclose ($this->_fileHandle);
// Create new PHPExcel $this->_flushCell($sheet, $column, $row, $cellContent);
while ($objPHPExcel->getSheetCount() <= $this->_sheetIndex) {
$objPHPExcel->createSheet();
}
$objPHPExcel->setActiveSheetIndex( $this->_sheetIndex );
// Create a new DOM object if (isset($attributeArray['style']) && !empty($attributeArray['style'])) {
$dom = new DOMDocument; $styleAry = $this->getPhpExcelStyleArray($attributeArray['style']);
// Reload the HTML file into the DOM object
if ((version_compare(PHP_VERSION, '5.4.0') >= 0) && defined(LIBXML_DTDLOAD)) { if (!empty($styleAry)) {
$loaded = $dom->loadHTMLFile($pFilename, PHPExcel_Settings::getLibXmlLoaderOptions()); $sheet->getStyle($column . $row)->applyFromArray($styleAry);
} else { }
$loaded = $dom->loadHTMLFile($pFilename); }
//create merging rowspan
if (isset($attributeArray['rowspan']) && isset($attributeArray['colspan'])) {
$columnTo = $column;
for ($i = 0; $i < $attributeArray['colspan'] - 1; $i++) {
++$columnTo;
}
$range = $column . $row . ':' . $columnTo . ($row + $attributeArray['rowspan'] - 1);
foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
$this->rowspan[$value] = true;
}
$sheet->mergeCells($range);
} elseif (isset($attributeArray['rowspan'])) {
$range = $column . $row . ':' . $column . ($row + $attributeArray['rowspan'] - 1);
foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
$this->rowspan[$value] = true;
}
$sheet->mergeCells($range);
} elseif (isset($attributeArray['colspan'])) {
$columnTo = $column;
for ($i = 0; $i < $attributeArray['colspan'] - 1; $i++) {
++$columnTo;
}
$sheet->mergeCells($column . $row . ':' . $columnTo . $row);
$column = $columnTo;
}
++$column;
break;
case 'body' :
$row = 1;
$column = 'A';
$content = '';
$this->_tableLevel = 0;
$this->_processDomElement($child, $sheet, $row, $column, $cellContent);
break;
default:
$this->_processDomElement($child, $sheet, $row, $column, $cellContent);
}
}
} }
if ($loaded === FALSE) { }
throw new PHPExcel_Reader_Exception('Failed to load '. $pFilename. ' as a DOM Document');
}
// Discard white space /**
$dom->preserveWhiteSpace = false; * Loads PHPExcel from file into PHPExcel instance
*
* @param string $pFilename
* @param PHPExcel $objPHPExcel
* @return PHPExcel
* @throws PHPExcel_Reader_Exception
*/
public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
{
// Open file to validate
$this->_openFile($pFilename);
if (!$this->_isValidFormat()) {
fclose($this->_fileHandle);
throw new PHPExcel_Reader_Exception($pFilename . " is an Invalid HTML file.");
}
// Close after validating
fclose($this->_fileHandle);
// Create new PHPExcel
while ($objPHPExcel->getSheetCount() <= $this->_sheetIndex) {
$objPHPExcel->createSheet();
}
$objPHPExcel->setActiveSheetIndex($this->_sheetIndex);
$row = 0; // Create a new DOM object
$column = 'A'; $dom = new domDocument;
$content = ''; // Reload the HTML file into the DOM object
$this->_processDomElement($dom,$objPHPExcel->getActiveSheet(),$row,$column,$content); $loaded = $dom->loadHTMLFile($pFilename);
if ($loaded === FALSE) {
throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document');
}
// Discard white space
$dom->preserveWhiteSpace = false;
$row = 0;
$column = 'A';
$content = '';
$this->_processDomElement($dom, $objPHPExcel->getActiveSheet(), $row, $column, $content);
// Return // Return
return $objPHPExcel; return $objPHPExcel;
} }
/** /**
* Get sheet index * Get sheet index
* *
* @return int * @return int
*/ */
public function getSheetIndex() { public function getSheetIndex()
return $this->_sheetIndex; {
} return $this->_sheetIndex;
}
/** /**
* Set sheet index * Set sheet index
* *
* @param int $pValue Sheet index * @param int $pValue Sheet index
* @return PHPExcel_Reader_HTML * @return PHPExcel_Reader_HTML
*/ */
public function setSheetIndex($pValue = 0) { public function setSheetIndex($pValue = 0)
$this->_sheetIndex = $pValue; {
return $this; $this->_sheetIndex = $pValue;
}
return $this;
}
} }

View File

@ -49,6 +49,7 @@ Planned for v1.8.1
- Feature: (CQD) Work Item GH-389 - Additional Mac CJK codepage definitions - Feature: (CQD) Work Item GH-389 - Additional Mac CJK codepage definitions
- Feature: (bolovincev) Work Item GH-269 - Update Worksheet.php getStyleByColumnAndRow() to allow a range of cells rather than just a single cell - Feature: (bolovincev) Work Item GH-269 - Update Worksheet.php getStyleByColumnAndRow() to allow a range of cells rather than just a single cell
- Feature: (MBaker) - New methods added for testing cell status within merge groups - Feature: (MBaker) - New methods added for testing cell status within merge groups
- Feature: (cifren/MBaker) Work Item GH-205 - Handling merge cells in HTML Reader
2014-03-02 (v1.8.0): 2014-03-02 (v1.8.0):