From 6080c4561d69caafb71c1799b0ea77e04ad856fe Mon Sep 17 00:00:00 2001 From: Owen Leibman Date: Thu, 25 Jun 2020 22:42:38 -0700 Subject: [PATCH 1/3] Improve Coverage for HTML Reader Reader/Html is now covered except for 1 statement. There is some coverage of RichText when you know in advance that the html will expand into a single cell. It is a tougher nut, one that I have not yet cracked, to try to handle rich text while converting unkown html to multiple cells. The original author left this as a TODO, and so for now must I. It made sense to restructure some of the code. There are some changes. - Issue #1532 is fixed (links are now saved when using rowspan). - Colors can now be specified as html color name. To accomplish this, Helper/Html function colourNameLookup was changed from protected to public, and changed to static. - Superfluous empty lines were eliminated in a number of places, e.g. had formerly caused a wrapped cell to be created with 2 empty lines followed by A, B, and C on separate lines; it will now just have the 3 A/B/C lines, which seems like a more sensible interpretation. - Img alt tag, which had been cast to float, is now used as a string. Private member "encoding" is not used. Functions getEncoding and setEncoding have therefore been marked deprecated. In fact, I was unable to get SecurityScanner to pass *any* html which is not UTF-8. There are possibly ways of getting around this (in Reader/Html - I have no intention of messing with Security Scanner), as can be seen in my companion pull request for Excel2003 Xml Reader. Doing this would be easier for ASCII-compatible character sets (like ISO-8859-1), than for non-compatible charsets (like UTF-16). I am not convinced that the effort is worth it, but am willing to investigate further. I added a number of tests, creating an Html directory, and moving HtmlTest to that directory. --- samples/Basic/42_RichText.php | 2 +- src/PhpSpreadsheet/Helper/Html.php | 4 +- src/PhpSpreadsheet/Reader/BaseReader.php | 23 +- src/PhpSpreadsheet/Reader/Html.php | 661 ++++++++++-------- .../Reader/Html/HtmlBorderTest.php | 110 +++ .../Reader/Html/HtmlHelper.php | 28 + .../Reader/Html/HtmlImageTest.php | 84 +++ .../Reader/Html/HtmlLoadStringTest.php | 92 +++ .../Reader/Html/HtmlTagsTest.php | 236 +++++++ .../Reader/{ => Html}/HtmlTest.php | 251 ++----- tests/data/Reader/HTML/badhtml.html | 1 + 11 files changed, 978 insertions(+), 514 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Reader/Html/HtmlBorderTest.php create mode 100644 tests/PhpSpreadsheetTests/Reader/Html/HtmlHelper.php create mode 100644 tests/PhpSpreadsheetTests/Reader/Html/HtmlImageTest.php create mode 100644 tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php create mode 100644 tests/PhpSpreadsheetTests/Reader/Html/HtmlTagsTest.php rename tests/PhpSpreadsheetTests/Reader/{ => Html}/HtmlTest.php (53%) create mode 100644 tests/data/Reader/HTML/badhtml.html diff --git a/samples/Basic/42_RichText.php b/samples/Basic/42_RichText.php index 43b35a62..d5fa85b4 100644 --- a/samples/Basic/42_RichText.php +++ b/samples/Basic/42_RichText.php @@ -30,7 +30,7 @@ $html1 = ' while this block uses an underline.

-

+

I want to eat healthy food pizza. '; diff --git a/src/PhpSpreadsheet/Helper/Html.php b/src/PhpSpreadsheet/Helper/Html.php index 252e14a5..6c4cbf9b 100644 --- a/src/PhpSpreadsheet/Helper/Html.php +++ b/src/PhpSpreadsheet/Helper/Html.php @@ -694,9 +694,9 @@ class Html return implode('', $values[0]); } - protected function colourNameLookup($rgb) + public static function colourNameLookup(string $rgb): string { - return self::$colourMap[$rgb]; + return self::$colourMap[$rgb] ?? ''; } protected function startFontTag($tag): void diff --git a/src/PhpSpreadsheet/Reader/BaseReader.php b/src/PhpSpreadsheet/Reader/BaseReader.php index 77a6421b..eb0e3ba2 100644 --- a/src/PhpSpreadsheet/Reader/BaseReader.php +++ b/src/PhpSpreadsheet/Reader/BaseReader.php @@ -2,6 +2,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader; +use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException; use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Shared\File; @@ -133,11 +134,7 @@ abstract class BaseReader implements IReader public function getSecurityScanner() { - if (property_exists($this, 'securityScanner')) { - return $this->securityScanner; - } - - return null; + return $this->securityScanner; } /** @@ -147,12 +144,18 @@ abstract class BaseReader implements IReader */ protected function openFile($pFilename): void { - File::assertFile($pFilename); + if ($pFilename) { + File::assertFile($pFilename); - // Open file - $this->fileHandle = fopen($pFilename, 'rb'); - if ($this->fileHandle === false) { - throw new Exception('Could not open file ' . $pFilename . ' for reading.'); + // Open file + $fileHandle = fopen($pFilename, 'rb'); + } else { + $fileHandle = false; + } + if ($fileHandle !== false) { + $this->fileHandle = $fileHandle; + } else { + throw new ReaderException('Could not open file ' . $pFilename . ' for reading.'); } } } diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index 2fe85b6f..7cb14f49 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -16,6 +16,7 @@ use PhpOffice\PhpSpreadsheet\Style\Font; use PhpOffice\PhpSpreadsheet\Style\Style; use PhpOffice\PhpSpreadsheet\Worksheet\Drawing; use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; +use Throwable; /** PhpSpreadsheet root directory */ class Html extends BaseReader @@ -219,9 +220,13 @@ class Html extends BaseReader /** * Set input encoding. * + * @deprecated no use is made of this property + * * @param string $pValue Input encoding, eg: 'ANSI' * * @return $this + * + * @codeCoverageIgnore */ public function setInputEncoding($pValue) { @@ -233,7 +238,11 @@ class Html extends BaseReader /** * Get input encoding. * + * @deprecated no use is made of this property + * * @return string + * + * @codeCoverageIgnore */ public function getInputEncoding() { @@ -289,12 +298,319 @@ class Html extends BaseReader $cellContent = (string) ''; } - /** - * @param int $row - * @param string $column - * @param string $cellContent - */ - protected function processDomElement(DOMNode $element, Worksheet $sheet, &$row, &$column, &$cellContent): void + private function processDomElementBody(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void + { + $attributeArray = []; + foreach ($child->attributes as $attribute) { + $attributeArray[$attribute->name] = $attribute->value; + } + + if ($child->nodeName === 'body') { + $row = 1; + $column = 'A'; + $cellContent = ''; + $this->tableLevel = 0; + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + } else { + $this->processDomElementTitle($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementTitle(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName === 'title') { + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $sheet->setTitle($cellContent, true, false); + $cellContent = ''; + } else { + $this->processDomElementSpanEtc($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private static $spanEtc = ['span', 'div', 'font', 'i', 'em', 'strong', 'b']; + + private function processDomElementSpanEtc(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if (in_array($child->nodeName, self::$spanEtc)) { + if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') { + $sheet->getComment($column . $row) + ->getText() + ->createTextRun($child->textContent); + } + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + + if (isset($this->formats[$child->nodeName])) { + $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); + } + } else { + $this->processDomElementHr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementHr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName === 'hr') { + $this->flushCell($sheet, $column, $row, $cellContent); + ++$row; + if (isset($this->formats[$child->nodeName])) { + $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); + } + ++$row; + } + // fall through to br + $this->processDomElementBr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + + private function processDomElementBr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName === 'br' || $child->nodeName === 'hr') { + if ($this->tableLevel > 0) { + // If we're inside a table, replace with a \n and set the cell to wrap + $cellContent .= "\n"; + $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true); + } else { + // Otherwise flush our existing content and move the row cursor on + $this->flushCell($sheet, $column, $row, $cellContent); + ++$row; + } + } else { + $this->processDomElementA($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementA(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName === 'a') { + foreach ($attributeArray as $attributeName => $attributeValue) { + switch ($attributeName) { + case 'href': + $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue); + if (isset($this->formats[$child->nodeName])) { + $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); + } + + break; + case 'class': + if ($attributeValue === 'comment-indicator') { + break; // Ignore - it's just a red square. + } + } + } + // no idea why this should be needed + //$cellContent .= ' '; + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + } else { + $this->processDomElementH1Etc($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private static $h1Etc = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p']; + + private function processDomElementH1Etc(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if (in_array($child->nodeName, self::$h1Etc)) { + if ($this->tableLevel > 0) { + // If we're inside a table, replace with a \n + $cellContent .= $cellContent ? "\n" : ''; + $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true); + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + } else { + if ($cellContent > '') { + $this->flushCell($sheet, $column, $row, $cellContent); + ++$row; + } + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->flushCell($sheet, $column, $row, $cellContent); + + if (isset($this->formats[$child->nodeName])) { + $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); + } + + ++$row; + $column = 'A'; + } + } else { + $this->processDomElementLi($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementLi(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName === 'li') { + if ($this->tableLevel > 0) { + // If we're inside a table, replace with a \n + $cellContent .= $cellContent ? "\n" : ''; + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + } else { + if ($cellContent > '') { + $this->flushCell($sheet, $column, $row, $cellContent); + } + ++$row; + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->flushCell($sheet, $column, $row, $cellContent); + $column = 'A'; + } + } else { + $this->processDomElementImg($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementImg(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName === 'img') { + $this->insertImage($sheet, $column, $row, $attributeArray); + } else { + $this->processDomElementTable($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementTable(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName === 'table') { + $this->flushCell($sheet, $column, $row, $cellContent); + $column = $this->setTableStartColumn($column); + if ($this->tableLevel > 1) { + --$row; + } + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $column = $this->releaseTableStartColumn(); + if ($this->tableLevel > 1) { + ++$column; + } else { + ++$row; + } + } else { + $this->processDomElementTr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementTr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName === 'tr') { + $column = $this->getTableStartColumn(); + $cellContent = ''; + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + + if (isset($attributeArray['height'])) { + $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']); + } + + ++$row; + } else { + $this->processDomElementThTdOther($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementThTdOther(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + if ($child->nodeName !== 'td' && $child->nodeName !== 'th') { + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + } else { + $this->processDomElementThTd($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + } + } + + private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void + { + if (isset($attributeArray['bgcolor'])) { + $sheet->getStyle("$column$row")->applyFromArray( + [ + 'fill' => [ + 'fillType' => Fill::FILL_SOLID, + 'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])], + ], + ] + ); + } + } + + private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void + { + if (isset($attributeArray['width'])) { + $sheet->getColumnDimension($column)->setWidth($attributeArray['width']); + } + } + + private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void + { + if (isset($attributeArray['height'])) { + $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']); + } + } + + private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void + { + if (isset($attributeArray['align'])) { + $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']); + } + } + + private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void + { + if (isset($attributeArray['valign'])) { + $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']); + } + } + + private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void + { + if (isset($attributeArray['data-format'])) { + $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']); + } + } + + private function processDomElementThTd(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + { + while (isset($this->rowspan[$column . $row])) { + ++$column; + } + $this->processDomElement($child, $sheet, $row, $column, $cellContent); + + // apply inline style + $this->applyInlineStyle($sheet, $row, $column, $attributeArray); + + $this->flushCell($sheet, $column, $row, $cellContent); + + $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray); + $this->processDomElementWidth($sheet, $column, $attributeArray); + $this->processDomElementHeight($sheet, $row, $attributeArray); + $this->processDomElementAlign($sheet, $row, $column, $attributeArray); + $this->processDomElementVAlign($sheet, $row, $column, $attributeArray); + $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray); + + if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) { + //create merging rowspan and colspan + $columnTo = $column; + for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) { + ++$columnTo; + } + $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1); + foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) { + $this->rowspan[$value] = true; + } + $sheet->mergeCells($range); + $column = $columnTo; + } elseif (isset($attributeArray['rowspan'])) { + //create merging rowspan + $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1); + foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) { + $this->rowspan[$value] = true; + } + $sheet->mergeCells($range); + } elseif (isset($attributeArray['colspan'])) { + //create merging colspan + $columnTo = $column; + for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) { + ++$columnTo; + } + $sheet->mergeCells($column . $row . ':' . $columnTo . $row); + $column = $columnTo; + } + + ++$column; + } + + protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void { foreach ($element->childNodes as $child) { if ($child instanceof DOMText) { @@ -306,267 +622,7 @@ class Html extends BaseReader // but if we have a rich text run instead, we need to append it correctly // TODO } elseif ($child instanceof DOMElement) { - $attributeArray = []; - foreach ($child->attributes as $attribute) { - $attributeArray[$attribute->name] = $attribute->value; - } - - switch ($child->nodeName) { - case 'meta': - foreach ($attributeArray as $attributeName => $attributeValue) { - // Extract character set, so we can convert to UTF-8 if required - if ($attributeName === 'charset') { - $this->setInputEncoding($attributeValue); - } - } - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - - break; - case 'title': - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - $sheet->setTitle($cellContent, true, false); - $cellContent = ''; - - break; - case 'span': - case 'div': - case 'font': - case 'i': - case 'em': - case 'strong': - case 'b': - if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') { - $sheet->getComment($column . $row) - ->getText() - ->createTextRun($child->textContent); - - break; - } - - if ($cellContent > '') { - $cellContent .= ' '; - } - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - if ($cellContent > '') { - $cellContent .= ' '; - } - - if (isset($this->formats[$child->nodeName])) { - $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); - } - - break; - case 'hr': - $this->flushCell($sheet, $column, $row, $cellContent); - ++$row; - if (isset($this->formats[$child->nodeName])) { - $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); - } else { - $cellContent = '----------'; - $this->flushCell($sheet, $column, $row, $cellContent); - } - ++$row; - // Add a break after a horizontal rule, simply by allowing the code to dropthru - // no break - case 'br': - if ($this->tableLevel > 0) { - // If we're inside a table, replace with a \n and set the cell to wrap - $cellContent .= "\n"; - $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true); - } else { - // Otherwise flush our existing content and move the row cursor on - $this->flushCell($sheet, $column, $row, $cellContent); - ++$row; - } - - break; - case 'a': - foreach ($attributeArray as $attributeName => $attributeValue) { - switch ($attributeName) { - case 'href': - $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue); - if (isset($this->formats[$child->nodeName])) { - $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); - } - - break; - case 'class': - if ($attributeValue === 'comment-indicator') { - break; // Ignore - it's just a red square. - } - } - } - $cellContent .= ' '; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - - break; - case 'h1': - case 'h2': - case 'h3': - case 'h4': - case 'h5': - case 'h6': - case 'ol': - case 'ul': - case 'p': - if ($this->tableLevel > 0) { - // If we're inside a table, replace with a \n - $cellContent .= "\n"; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - } else { - if ($cellContent > '') { - $this->flushCell($sheet, $column, $row, $cellContent); - ++$row; - } - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - $this->flushCell($sheet, $column, $row, $cellContent); - - if (isset($this->formats[$child->nodeName])) { - $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); - } - - ++$row; - $column = 'A'; - } - - break; - case 'li': - if ($this->tableLevel > 0) { - // If we're inside a table, replace with a \n - $cellContent .= "\n"; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - } else { - if ($cellContent > '') { - $this->flushCell($sheet, $column, $row, $cellContent); - } - ++$row; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - $this->flushCell($sheet, $column, $row, $cellContent); - $column = 'A'; - } - - break; - case 'img': - $this->insertImage($sheet, $column, $row, $attributeArray); - - break; - case 'table': - $this->flushCell($sheet, $column, $row, $cellContent); - $column = $this->setTableStartColumn($column); - if ($this->tableLevel > 1) { - --$row; - } - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - $column = $this->releaseTableStartColumn(); - if ($this->tableLevel > 1) { - ++$column; - } else { - ++$row; - } - - break; - case 'thead': - case 'tbody': - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - - break; - case 'tr': - $column = $this->getTableStartColumn(); - $cellContent = ''; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - - if (isset($attributeArray['height'])) { - $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']); - } - - ++$row; - - break; - case 'th': - case 'td': - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - - while (isset($this->rowspan[$column . $row])) { - ++$column; - } - - // apply inline style - $this->applyInlineStyle($sheet, $row, $column, $attributeArray); - - $this->flushCell($sheet, $column, $row, $cellContent); - - if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) { - //create merging rowspan and colspan - $columnTo = $column; - for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) { - ++$columnTo; - } - $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1); - foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) { - $this->rowspan[$value] = true; - } - $sheet->mergeCells($range); - $column = $columnTo; - } elseif (isset($attributeArray['rowspan'])) { - //create merging rowspan - $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1); - foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) { - $this->rowspan[$value] = true; - } - $sheet->mergeCells($range); - } elseif (isset($attributeArray['colspan'])) { - //create merging colspan - $columnTo = $column; - for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) { - ++$columnTo; - } - $sheet->mergeCells($column . $row . ':' . $columnTo . $row); - $column = $columnTo; - } elseif (isset($attributeArray['bgcolor'])) { - $sheet->getStyle($column . $row)->applyFromArray( - [ - 'fill' => [ - 'fillType' => Fill::FILL_SOLID, - 'color' => ['rgb' => $attributeArray['bgcolor']], - ], - ] - ); - } - - if (isset($attributeArray['width'])) { - $sheet->getColumnDimension($column)->setWidth($attributeArray['width']); - } - - if (isset($attributeArray['height'])) { - $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']); - } - - if (isset($attributeArray['align'])) { - $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']); - } - - if (isset($attributeArray['valign'])) { - $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']); - } - - if (isset($attributeArray['data-format'])) { - $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']); - } - - ++$column; - - break; - case 'body': - $row = 1; - $column = 'A'; - $cellContent = ''; - $this->tableLevel = 0; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - - break; - default: - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - } + $this->processDomElementBody($element, $sheet, $row, $column, $cellContent, $child); } } } @@ -588,7 +644,11 @@ class Html extends BaseReader // Create a new DOM object $dom = new DOMDocument(); // Reload the HTML file into the DOM object - $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); + try { + $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); + } catch (Throwable $e) { + $loaded = false; + } if ($loaded === false) { throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document'); } @@ -606,7 +666,11 @@ class Html extends BaseReader // Create a new DOM object $dom = new DOMDocument(); // Reload the HTML file into the DOM object - $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8')); + try { + $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8')); + } catch (Throwable $e) { + $loaded = false; + } if ($loaded === false) { throw new Exception('Failed to load content as a DOM Document'); } @@ -837,7 +901,7 @@ class Html extends BaseReader return substr($value, 1); } - return null; + return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup((string) $value); } /** @@ -853,7 +917,7 @@ class Html extends BaseReader $src = urldecode($attributes['src']); $width = isset($attributes['width']) ? (float) $attributes['width'] : null; $height = isset($attributes['height']) ? (float) $attributes['height'] : null; - $name = isset($attributes['alt']) ? (float) $attributes['alt'] : null; + $name = $attributes['alt'] ?? null; $drawing = new Drawing(); $drawing->setPath($src); @@ -884,6 +948,28 @@ class Html extends BaseReader ); } + private static $borderMappings = [ + 'dash-dot' => Border::BORDER_DASHDOT, + 'dash-dot-dot' => Border::BORDER_DASHDOTDOT, + 'dashed' => Border::BORDER_DASHED, + 'dotted' => Border::BORDER_DOTTED, + 'double' => Border::BORDER_DOUBLE, + 'hair' => Border::BORDER_HAIR, + 'medium' => Border::BORDER_MEDIUM, + 'medium-dashed' => Border::BORDER_MEDIUMDASHED, + 'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT, + 'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT, + 'none' => Border::BORDER_NONE, + 'slant-dash-dot' => Border::BORDER_SLANTDASHDOT, + 'solid' => Border::BORDER_THIN, + 'thick' => Border::BORDER_THICK, + ]; + + public static function getBorderMappings(): array + { + return self::$borderMappings; + } + /** * Map html border style to PhpSpreadsheet border style. * @@ -893,38 +979,7 @@ class Html extends BaseReader */ public function getBorderStyle($style) { - switch ($style) { - case 'solid': - return Border::BORDER_THIN; - case 'dashed': - return Border::BORDER_DASHED; - case 'dotted': - return Border::BORDER_DOTTED; - case 'medium': - return Border::BORDER_MEDIUM; - case 'thick': - return Border::BORDER_THICK; - case 'none': - return Border::BORDER_NONE; - case 'dash-dot': - return Border::BORDER_DASHDOT; - case 'dash-dot-dot': - return Border::BORDER_DASHDOTDOT; - case 'double': - return Border::BORDER_DOUBLE; - case 'hair': - return Border::BORDER_HAIR; - case 'medium-dash-dot': - return Border::BORDER_MEDIUMDASHDOT; - case 'medium-dash-dot-dot': - return Border::BORDER_MEDIUMDASHDOTDOT; - case 'medium-dashed': - return Border::BORDER_MEDIUMDASHED; - case 'slant-dash-dot': - return Border::BORDER_SLANTDASHDOT; - } - - return null; + return (array_key_exists($style, self::$borderMappings)) ? self::$borderMappings[$style] : null; } /** diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlBorderTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlBorderTest.php new file mode 100644 index 00000000..58a0b5d7 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlBorderTest.php @@ -0,0 +1,110 @@ + + + Thin border + Border bottom + Border top + Border left + Border right + + + '; + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); + $firstSheet = $spreadsheet->getSheet(0); + $style = $firstSheet->getCell('A1')->getStyle(); + $borders = $style->getBorders(); + + /** @var Border $border */ + foreach ([$borders->getTop(), $borders->getBottom(), $borders->getLeft(), $borders->getRight()] as $border) { + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + } + + $style = $firstSheet->getCell('B1')->getStyle(); + $border = $style->getBorders()->getBottom(); + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_DASHED, $border->getBorderStyle()); + self::assertEquals(Border::BORDER_NONE, $style->getBorders()->getTop()->getBorderStyle()); + + $style = $firstSheet->getCell('C1')->getStyle(); + $border = $style->getBorders()->getTop(); + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + self::assertEquals(Border::BORDER_NONE, $style->getBorders()->getBottom()->getBorderStyle()); + + $style = $firstSheet->getCell('D1')->getStyle(); + $border = $style->getBorders()->getLeft(); + self::assertEquals('00ff00', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + self::assertEquals(Border::BORDER_NONE, $style->getBorders()->getBottom()->getBorderStyle()); + + $style = $firstSheet->getCell('E1')->getStyle(); + $border = $style->getBorders()->getRight(); + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + self::assertEquals(Border::BORDER_NONE, $style->getBorders()->getBottom()->getBorderStyle()); + + $style = $firstSheet->getCell('F1')->getStyle(); + $borders = $style->getBorders(); + foreach ([$borders->getTop(), $borders->getBottom(), $borders->getLeft(), $borders->getRight()] as $border) { + self::assertEquals(Border::BORDER_NONE, $border->getBorderStyle()); + } + } + + /** + * @dataProvider providerBorderStyle + */ + public function testBorderStyle(string $style, string $expectedResult): void + { + $borders = Html::getBorderMappings(); + self::assertEquals($expectedResult, $borders[$style]); + } + + public function testBorderStyleCoverage(): void + { + $expected = Html::getBorderMappings(); + $covered = []; + foreach ($expected as $key => $val) { + $covered[$key] = 0; + } + $tests = $this->providerBorderStyle(); + foreach ($tests as $test) { + $covered[$test[0]] = 1; + } + foreach ($covered as $key => $val) { + self::assertEquals(1, $val, "Borderstyle $key not tested"); + } + } + + public function providerBorderStyle(): array + { + return [ + ['dash-dot', Border::BORDER_DASHDOT], + ['dash-dot-dot', Border::BORDER_DASHDOTDOT], + ['dashed', Border::BORDER_DASHED], + ['dotted', Border::BORDER_DOTTED], + ['double', Border::BORDER_DOUBLE], + ['hair', Border::BORDER_HAIR], + ['medium', Border::BORDER_MEDIUM], + ['medium-dashed', Border::BORDER_MEDIUMDASHED], + ['medium-dash-dot', Border::BORDER_MEDIUMDASHDOT], + ['medium-dash-dot-dot', Border::BORDER_MEDIUMDASHDOTDOT], + ['none', Border::BORDER_NONE], + ['slant-dash-dot', Border::BORDER_SLANTDASHDOT], + ['solid', Border::BORDER_THIN], + ['thick', Border::BORDER_THICK], + ]; + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlHelper.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlHelper.php new file mode 100644 index 00000000..c09902ff --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlHelper.php @@ -0,0 +1,28 @@ +load($filename); + if ($unlink) { + unlink($filename); + } + + return $spreadsheet; + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlImageTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlImageTest.php new file mode 100644 index 00000000..cf4157e3 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlImageTest.php @@ -0,0 +1,84 @@ + + + test image + + '; + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); + $firstSheet = $spreadsheet->getSheet(0); + + /** @var Drawing $drawing */ + $drawing = $firstSheet->getDrawingCollection()[0]; + self::assertEquals($imagePath, $drawing->getPath()); + self::assertEquals('A1', $drawing->getCoordinates()); + self::assertEquals('test image', $drawing->getName()); + self::assertEquals('100', $drawing->getWidth()); + self::assertEquals('100', $drawing->getHeight()); + } + + public function testCanInsertImageWidth(): void + { + $imagePath = realpath(__DIR__ . '/../../../data/Reader/HTML/image.jpg'); + + $html = ' + + + +
test image
'; + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); + $firstSheet = $spreadsheet->getSheet(0); + + /** @var Drawing $drawing */ + $drawing = $firstSheet->getDrawingCollection()[0]; + self::assertEquals('50', $drawing->getWidth()); + self::assertEquals('50', $drawing->getHeight()); + } + + public function testCanInsertImageHeight(): void + { + $imagePath = realpath(__DIR__ . '/../../../data/Reader/HTML/image.jpg'); + + $html = ' + + + +
'; + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); + $firstSheet = $spreadsheet->getSheet(0); + + /** @var Drawing $drawing */ + $drawing = $firstSheet->getDrawingCollection()[0]; + self::assertEquals('', $drawing->getName()); + self::assertEquals('75', $drawing->getWidth()); + self::assertEquals('75', $drawing->getHeight()); + } + + public function testImageWithourSrc(): void + { + $html = ' + + + +
'; + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); + $firstSheet = $spreadsheet->getSheet(0); + + self::assertCount(0, $firstSheet->getDrawingCollection()); + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php new file mode 100644 index 00000000..e1041507 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php @@ -0,0 +1,92 @@ + + + Hello World + + + Hello
World + + + Hello
World + + '; + $spreadsheet = (new Html())->loadFromString($html); + $firstSheet = $spreadsheet->getSheet(0); + + $cellStyle = $firstSheet->getStyle('A1'); + self::assertFalse($cellStyle->getAlignment()->getWrapText()); + + $cellStyle = $firstSheet->getStyle('A2'); + self::assertTrue($cellStyle->getAlignment()->getWrapText()); + $cellValue = $firstSheet->getCell('A2')->getValue(); + self::assertStringContainsString("\n", $cellValue); + + $cellStyle = $firstSheet->getStyle('A3'); + self::assertTrue($cellStyle->getAlignment()->getWrapText()); + $cellValue = $firstSheet->getCell('A3')->getValue(); + self::assertStringContainsString("\n", $cellValue); + } + + public function testLoadInvalidString(): void + { + $this->expectException(ReaderException::class); + $html = ''; + $spreadsheet = (new Html())->loadFromString($html); + $firstSheet = $spreadsheet->getSheet(0); + $cellStyle = $firstSheet->getStyle('A1'); + self::assertFalse($cellStyle->getAlignment()->getWrapText()); + } + + public function testCanLoadFromStringIntoExistingSpreadsheet(): void + { + $html = ' + + + + + + + + + +
Hello World
Hello
World
Hello
World
'; + $reader = new Html(); + $spreadsheet = $reader->loadFromString($html); + $firstSheet = $spreadsheet->getSheet(0); + + $cellStyle = $firstSheet->getStyle('A1'); + self::assertFalse($cellStyle->getAlignment()->getWrapText()); + + $cellStyle = $firstSheet->getStyle('A2'); + self::assertTrue($cellStyle->getAlignment()->getWrapText()); + $cellValue = $firstSheet->getCell('A2')->getValue(); + self::assertStringContainsString("\n", $cellValue); + + $cellStyle = $firstSheet->getStyle('A3'); + self::assertTrue($cellStyle->getAlignment()->getWrapText()); + $cellValue = $firstSheet->getCell('A3')->getValue(); + self::assertStringContainsString("\n", $cellValue); + + $reader->setSheetIndex(1); + $html = ' + + + +
Goodbye World
'; + + self::assertEquals(1, $spreadsheet->getSheetCount()); + $spreadsheet = $reader->loadFromString($html, $spreadsheet); + self::assertEquals(2, $spreadsheet->getSheetCount()); + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlTagsTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlTagsTest.php new file mode 100644 index 00000000..c0c206f9 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlTagsTest.php @@ -0,0 +1,236 @@ + +123 +hyperlink5


6 +789 +101112 + +
+ + + + + +
123
456
789
  • A
  • B
  • C
1112
+ +
+ + + + + + +
M + + + +
NO
PQ
+
R
STU
+EOF; + $robj = $reader->loadFromString($html1); + $sheet = $robj->getActiveSheet(); + + self::assertEquals('www.google.com', $sheet->getCell('A2')->getHyperlink()->getUrl()); + self::assertEquals('hyperlink', $sheet->getCell('A2')->getValue()); + self::assertEquals(-1, $sheet->getRowDimension(11)->getRowHeight()); + self::assertEquals(20, $sheet->getRowDimension(12)->getRowHeight()); + self::assertEquals(5, $sheet->getCell('B2')->getValue()); + self::assertEquals(Border::BORDER_THIN, $sheet->getCell('B3')->getStyle()->getBorders()->getBottom()->getBorderStyle()); + self::assertEquals(6, $sheet->getCell('C4')->getValue()); + self::assertEquals(Border::BORDER_THIN, $sheet->getCell('A9')->getStyle()->getBorders()->getBottom()->getBorderStyle()); + + self::assertEquals(2, $sheet->getCell('B11')->getValue()); + self::assertTrue($sheet->getCell('B11')->getStyle()->getFont()->getItalic()); + + // list within table + self::assertEquals("A\nB\nC", $sheet->getCell('A14')->getValue()); + self::assertTrue($sheet->getCell('A14')->getStyle()->getAlignment()->getWrapText()); + // list outside of table + self::assertEquals('D', $sheet->getCell('A17')->getValue()); + self::assertEquals('E', $sheet->getCell('A18')->getValue()); + self::assertEquals('F', $sheet->getCell('A19')->getValue()); + + // embedded table + self::assertEquals('M', $sheet->getCell('A21')->getValue()); + self::assertEquals('N', $sheet->getCell('B20')->getValue()); + self::assertEquals('O', $sheet->getCell('C20')->getValue()); + self::assertEquals('P', $sheet->getCell('B21')->getValue()); + self::assertEquals('Q', $sheet->getCell('C21')->getValue()); + self::assertEquals('R', $sheet->getCell('C23')->getValue()); + self::assertEquals('S', $sheet->getCell('A24')->getValue()); + } + + public static function testTagsRowColSpans(): void + { + $reader = new Html(); + $html1 = << + + Month + Savings + Expenses + + + January + $100 + $50 + + + February + $80 + + + Away in March + $30 + + + $40 + + +EOF; + $robj = $reader->loadFromString($html1); + $sheet = $robj->getActiveSheet(); + + self::assertEquals(['C2:C3' => 'C2:C3', 'A4:B5' => 'A4:B5'], $sheet->getMergeCells()); + self::assertEquals('Away in March', $sheet->getCell('A4')->getValue()); + self::assertEquals('00FFFF', $sheet->getCell('A4')->getStyle()->getFill()->getEndColor()->getRGB()); + } + + public static function testDoublyEmbeddedTable(): void + { + $reader = new Html(); + $html1 = << +123 +456 +789 + + + +M + + + + + + + +
N + + + +
1011
1213
+
Y
PQX
+ +R + +STU + +EOF; + $robj = $reader->loadFromString($html1); + $sheet = $robj->getActiveSheet(); + + self::assertEquals('1', $sheet->getCell('A1')->getValue()); + self::assertEquals('2', $sheet->getCell('B1')->getValue()); + self::assertEquals('3', $sheet->getCell('C1')->getValue()); + self::assertEquals('4', $sheet->getCell('A2')->getValue()); + self::assertEquals('5', $sheet->getCell('B2')->getValue()); + self::assertEquals('6', $sheet->getCell('C2')->getValue()); + self::assertEquals('7', $sheet->getCell('A3')->getValue()); + self::assertEquals('8', $sheet->getCell('B3')->getValue()); + self::assertEquals('9', $sheet->getCell('C3')->getValue()); + self::assertEquals('10', $sheet->getCell('C5')->getValue()); + self::assertEquals('11', $sheet->getCell('D5')->getValue()); + self::assertEquals('12', $sheet->getCell('C6')->getValue()); + self::assertEquals('13', $sheet->getCell('D6')->getValue()); + self::assertEquals('N', $sheet->getCell('B6')->getValue()); + self::assertEquals('M', $sheet->getCell('A7')->getValue()); + self::assertEquals('Y', $sheet->getCell('E7')->getValue()); + self::assertEquals('P', $sheet->getCell('B8')->getValue()); + self::assertEquals('Q', $sheet->getCell('C8')->getValue()); + self::assertEquals('X', $sheet->getCell('D8')->getValue()); + self::assertEquals('R', $sheet->getCell('C10')->getValue()); + self::assertEquals('S', $sheet->getCell('A11')->getValue()); + self::assertEquals('T', $sheet->getCell('B11')->getValue()); + self::assertEquals('U', $sheet->getCell('C11')->getValue()); + } + + public static function testTagsOutsideTable(): void + { + $reader = new Html(); + $html1 = <<Here comes a list +
    +
  1. Item 1
  2. +
  3. Item 2
  4. +
  5. Item 3
  6. +
  7. Item 4
  8. +
+And here's another +
    +
  • Item A
  • +
  • Item B
  • +
+
    +Content before list +
  1. Item I
  2. +
  3. Item II
  4. +
  5. This is rich text
  6. +
+ +EOF; + $robj = $reader->loadFromString($html1); + $sheet = $robj->getActiveSheet(); + + self::assertTrue($sheet->getCell('A1')->getStyle()->getFont()->getBold()); + self::assertEquals('Here comes a list', $sheet->getCell('A1')->getValue()); + self::assertEquals('Item 1', $sheet->getCell('A3')->getValue()); + self::assertEquals('Item 2', $sheet->getCell('A4')->getValue()); + self::assertEquals('Item 3', $sheet->getCell('A5')->getValue()); + self::assertEquals('Item 4', $sheet->getCell('A6')->getValue()); + self::assertEquals('And here\'s another', $sheet->getCell('A7')->getValue()); + self::assertEquals('Item A', $sheet->getCell('A9')->getValue()); + self::assertEquals('Item B', $sheet->getCell('A10')->getValue()); + self::assertEquals('Content before list', $sheet->getCell('A11')->getValue()); + self::assertEquals('Item I', $sheet->getCell('A12')->getValue()); + self::assertEquals('Item II', $sheet->getCell('A13')->getValue()); + // TODO Rich Text not yet supported + } + + public static function testHyperlinksWithRowspan(): void + { + $reader = new Html(); + $html1 = << + + Title + Link 1 + + + Link 2 + + + Link 3 + + +EOF; + $robj = $reader->loadFromString($html1); + $sheet = $robj->getActiveSheet(); + self::assertEquals('https://google.com', $sheet->getCell('B1')->getHyperlink()->getUrl()); + self::assertEquals('https://google.com', $sheet->getCell('B2')->getHyperlink()->getUrl()); + self::assertEquals('https://google.com', $sheet->getCell('B3')->getHyperlink()->getUrl()); + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/HtmlTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php similarity index 53% rename from tests/PhpSpreadsheetTests/Reader/HtmlTest.php rename to tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php index b0994a33..e87e636b 100644 --- a/tests/PhpSpreadsheetTests/Reader/HtmlTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php @@ -1,12 +1,11 @@ canRead($filename)); } + public function testBadHtml(): void + { + $this->expectException(ReaderException::class); + $filename = 'tests/data/Reader/HTML/badhtml.html'; + $reader = new Html(); + self::assertTrue($reader->canRead($filename)); + $spreadsheet = $reader->load($filename); + self::assertTrue(false); + } + + public function testNonHtml(): void + { + $this->expectException(ReaderException::class); + $filename = __FILE__; + $reader = new Html(); + self::assertFalse($reader->canRead($filename)); + $spreadsheet = $reader->load($filename); + self::assertTrue(false); + } + + public function testInvalidFilename(): void + { + $reader = new Html(); + self::assertEquals(0, $reader->getSheetIndex()); + self::assertFalse($reader->canRead('')); + } + public function providerCanReadVerySmallFile() { $padding = str_repeat('a', 2048); @@ -38,7 +64,7 @@ class HtmlTest extends TestCase */ public function testCanReadVerySmallFile($expected, $content): void { - $filename = $this->createHtml($content); + $filename = HtmlHelper::createHtml($content); $reader = new Html(); $actual = $reader->canRead($filename); @@ -51,63 +77,21 @@ class HtmlTest extends TestCase { $html = ' - + +
Blue backgroundBlue backgroundUnknown fore/background
'; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); $firstSheet = $spreadsheet->getSheet(0); $style = $firstSheet->getCell('A1')->getStyle(); - self::assertEquals('FFFFFF', $style->getFont()->getColor()->getRGB()); - - unlink($filename); - } - - public function testCanApplyInlineBordersStyles(): void - { - $html = ' - - - - - - - -
Thin borderBorder bottomBorder topBorder leftBorder right
'; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); - $firstSheet = $spreadsheet->getSheet(0); - $style = $firstSheet->getCell('A1')->getStyle(); - $borders = $style->getBorders(); - - /** @var Border $border */ - foreach ([$borders->getTop(), $borders->getBottom(), $borders->getLeft(), $borders->getRight()] as $border) { - self::assertEquals('333333', $border->getColor()->getRGB()); - self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); - } - + self::assertEquals('0000FF', $style->getFill()->getStartColor()->getRGB()); + self::assertEquals('0000FF', $style->getFill()->getEndColor()->getRGB()); $style = $firstSheet->getCell('B1')->getStyle(); - $border = $style->getBorders()->getBottom(); - self::assertEquals('333333', $border->getColor()->getRGB()); - self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); - - $style = $firstSheet->getCell('C1')->getStyle(); - $border = $style->getBorders()->getTop(); - self::assertEquals('333333', $border->getColor()->getRGB()); - self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); - - $style = $firstSheet->getCell('D1')->getStyle(); - $border = $style->getBorders()->getLeft(); - self::assertEquals('333333', $border->getColor()->getRGB()); - self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); - - $style = $firstSheet->getCell('E1')->getStyle(); - $border = $style->getBorders()->getRight(); - self::assertEquals('333333', $border->getColor()->getRGB()); - self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); - - unlink($filename); + self::assertEquals('000000', $style->getFont()->getColor()->getRGB()); + self::assertEquals('000000', $style->getFill()->getEndColor()->getRGB()); + self::assertEquals('FFFFFF', $style->getFill()->getstartColor()->getRGB()); } public function testCanApplyInlineFontStyles(): void @@ -122,8 +106,8 @@ class HtmlTest extends TestCase Line through '; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); $firstSheet = $spreadsheet->getSheet(0); $style = $firstSheet->getCell('A1')->getStyle(); @@ -143,8 +127,6 @@ class HtmlTest extends TestCase $style = $firstSheet->getCell('F1')->getStyle(); self::assertTrue($style->getFont()->getStrikethrough()); - - unlink($filename); } public function testCanApplyInlineWidth(): void @@ -155,8 +137,8 @@ class HtmlTest extends TestCase 100px '; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); $firstSheet = $spreadsheet->getSheet(0); $dimension = $firstSheet->getColumnDimension('A'); @@ -164,8 +146,6 @@ class HtmlTest extends TestCase $dimension = $firstSheet->getColumnDimension('B'); self::assertEquals(100, $dimension->getWidth()); - - unlink($filename); } public function testCanApplyInlineHeight(): void @@ -178,8 +158,8 @@ class HtmlTest extends TestCase 2 '; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); $firstSheet = $spreadsheet->getSheet(0); $dimension = $firstSheet->getRowDimension(1); @@ -187,8 +167,6 @@ class HtmlTest extends TestCase $dimension = $firstSheet->getRowDimension(2); self::assertEquals(100, $dimension->getRowHeight()); - - unlink($filename); } public function testCanApplyAlignment(): void @@ -203,8 +181,8 @@ class HtmlTest extends TestCase Wraptext '; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); $firstSheet = $spreadsheet->getSheet(0); $style = $firstSheet->getCell('A1')->getStyle(); @@ -224,8 +202,6 @@ class HtmlTest extends TestCase $style = $firstSheet->getCell('F1')->getStyle(); self::assertTrue($style->getAlignment()->getWrapText()); - - unlink($filename); } public function testCanApplyInlineDataFormat(): void @@ -235,35 +211,12 @@ class HtmlTest extends TestCase 2019-02-02 12:34:00 '; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); $firstSheet = $spreadsheet->getSheet(0); $style = $firstSheet->getCell('A1')->getStyle(); self::assertEquals('mmm-yy', $style->getNumberFormat()->getFormatCode()); - - unlink($filename); - } - - public function testCanInsertImage(): void - { - $imagePath = realpath(__DIR__ . '/../../data/Reader/HTML/image.jpg'); - - $html = ' - - - -
'; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); - $firstSheet = $spreadsheet->getSheet(0); - - /** @var Drawing $drawing */ - $drawing = $firstSheet->getDrawingCollection()[0]; - self::assertEquals($imagePath, $drawing->getPath()); - self::assertEquals('A1', $drawing->getCoordinates()); - - unlink($filename); } public function testCanApplyCellWrapping(): void @@ -279,8 +232,8 @@ class HtmlTest extends TestCase Hello
World '; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); $firstSheet = $spreadsheet->getSheet(0); $cellStyle = $firstSheet->getStyle('A1'); @@ -295,103 +248,6 @@ class HtmlTest extends TestCase self::assertTrue($cellStyle->getAlignment()->getWrapText()); $cellValue = $firstSheet->getCell('A3')->getValue(); self::assertStringContainsString("\n", $cellValue); - - unlink($filename); - } - - public function testCanLoadFromString(): void - { - $html = ' - - - - - - - - - -
Hello World
Hello
World
Hello
World
'; - $spreadsheet = (new Html())->loadFromString($html); - $firstSheet = $spreadsheet->getSheet(0); - - $cellStyle = $firstSheet->getStyle('A1'); - self::assertFalse($cellStyle->getAlignment()->getWrapText()); - - $cellStyle = $firstSheet->getStyle('A2'); - self::assertTrue($cellStyle->getAlignment()->getWrapText()); - $cellValue = $firstSheet->getCell('A2')->getValue(); - self::assertStringContainsString("\n", $cellValue); - - $cellStyle = $firstSheet->getStyle('A3'); - self::assertTrue($cellStyle->getAlignment()->getWrapText()); - $cellValue = $firstSheet->getCell('A3')->getValue(); - self::assertStringContainsString("\n", $cellValue); - } - - public function testCanLoadFromStringIntoExistingSpreadsheet(): void - { - $html = ' - - - - - - - - - -
Hello World
Hello
World
Hello
World
'; - $reader = new Html(); - $spreadsheet = $reader->loadFromString($html); - $firstSheet = $spreadsheet->getSheet(0); - - $cellStyle = $firstSheet->getStyle('A1'); - self::assertFalse($cellStyle->getAlignment()->getWrapText()); - - $cellStyle = $firstSheet->getStyle('A2'); - self::assertTrue($cellStyle->getAlignment()->getWrapText()); - $cellValue = $firstSheet->getCell('A2')->getValue(); - self::assertStringContainsString("\n", $cellValue); - - $cellStyle = $firstSheet->getStyle('A3'); - self::assertTrue($cellStyle->getAlignment()->getWrapText()); - $cellValue = $firstSheet->getCell('A3')->getValue(); - self::assertStringContainsString("\n", $cellValue); - - $reader->setSheetIndex(1); - $html = ' - - - -
Goodbye World
'; - - self::assertEquals(1, $spreadsheet->getSheetCount()); - $spreadsheet = $reader->loadFromString($html, $spreadsheet); - self::assertEquals(2, $spreadsheet->getSheetCount()); - } - - /** - * @param string $html - * - * @return string - */ - private function createHtml($html) - { - $filename = tempnam(sys_get_temp_dir(), 'html'); - file_put_contents($filename, $html); - - return $filename; - } - - /** - * @param $filename - * - * @return \PhpOffice\PhpSpreadsheet\Spreadsheet - */ - private function loadHtmlIntoSpreadsheet($filename) - { - return (new Html())->load($filename); } public function testRowspanInRendering(): void @@ -417,11 +273,10 @@ class HtmlTest extends TestCase Text Indent '; - $filename = $this->createHtml($html); - $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); $firstSheet = $spreadsheet->getSheet(0); $style = $firstSheet->getCell('C2')->getStyle(); self::assertEquals(10, $style->getAlignment()->getIndent()); - unlink($filename); } } diff --git a/tests/data/Reader/HTML/badhtml.html b/tests/data/Reader/HTML/badhtml.html new file mode 100644 index 00000000..6816fb08 --- /dev/null +++ b/tests/data/Reader/HTML/badhtml.html @@ -0,0 +1 @@ + From 752a0a5a6c00f89f8cea6f3ea3584bd5ebac4086 Mon Sep 17 00:00:00 2001 From: Owen Leibman Date: Thu, 25 Jun 2020 23:11:30 -0700 Subject: [PATCH 2/3] Scrutinizer Recommendations Two unneeded assignments in tests, one unused parameter in source code. --- src/PhpSpreadsheet/Reader/Html.php | 4 ++-- tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index 7cb14f49..a83ed807 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -506,7 +506,7 @@ class Html extends BaseReader if ($child->nodeName !== 'td' && $child->nodeName !== 'th') { $this->processDomElement($child, $sheet, $row, $column, $cellContent); } else { - $this->processDomElementThTd($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray); } } @@ -559,7 +559,7 @@ class Html extends BaseReader } } - private function processDomElementThTd(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { while (isset($this->rowspan[$column . $row])) { ++$column; diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php index e87e636b..91e60d3e 100644 --- a/tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php @@ -23,7 +23,7 @@ class HtmlTest extends TestCase $filename = 'tests/data/Reader/HTML/badhtml.html'; $reader = new Html(); self::assertTrue($reader->canRead($filename)); - $spreadsheet = $reader->load($filename); + $reader->load($filename); self::assertTrue(false); } @@ -33,7 +33,7 @@ class HtmlTest extends TestCase $filename = __FILE__; $reader = new Html(); self::assertFalse($reader->canRead($filename)); - $spreadsheet = $reader->load($filename); + $reader->load($filename); self::assertTrue(false); } From 9f1a33cc8a386b97d2c330ec8cef1f6250a1e7e0 Mon Sep 17 00:00:00 2001 From: Owen Leibman Date: Thu, 25 Jun 2020 23:39:28 -0700 Subject: [PATCH 3/3] Scrutinizer Again Cascading series of unused unused parameters. --- src/PhpSpreadsheet/Reader/Html.php | 48 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index a83ed807..1e34de0e 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -298,7 +298,7 @@ class Html extends BaseReader $cellContent = (string) ''; } - private function processDomElementBody(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void + private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void { $attributeArray = []; foreach ($child->attributes as $attribute) { @@ -312,24 +312,24 @@ class Html extends BaseReader $this->tableLevel = 0; $this->processDomElement($child, $sheet, $row, $column, $cellContent); } else { - $this->processDomElementTitle($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray); } } - private function processDomElementTitle(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName === 'title') { $this->processDomElement($child, $sheet, $row, $column, $cellContent); $sheet->setTitle($cellContent, true, false); $cellContent = ''; } else { - $this->processDomElementSpanEtc($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray); } } private static $spanEtc = ['span', 'div', 'font', 'i', 'em', 'strong', 'b']; - private function processDomElementSpanEtc(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if (in_array($child->nodeName, self::$spanEtc)) { if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') { @@ -343,11 +343,11 @@ class Html extends BaseReader $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); } } else { - $this->processDomElementHr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray); } } - private function processDomElementHr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName === 'hr') { $this->flushCell($sheet, $column, $row, $cellContent); @@ -358,10 +358,10 @@ class Html extends BaseReader ++$row; } // fall through to br - $this->processDomElementBr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray); } - private function processDomElementBr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName === 'br' || $child->nodeName === 'hr') { if ($this->tableLevel > 0) { @@ -374,11 +374,11 @@ class Html extends BaseReader ++$row; } } else { - $this->processDomElementA($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray); } } - private function processDomElementA(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName === 'a') { foreach ($attributeArray as $attributeName => $attributeValue) { @@ -400,13 +400,13 @@ class Html extends BaseReader //$cellContent .= ' '; $this->processDomElement($child, $sheet, $row, $column, $cellContent); } else { - $this->processDomElementH1Etc($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray); } } private static $h1Etc = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p']; - private function processDomElementH1Etc(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if (in_array($child->nodeName, self::$h1Etc)) { if ($this->tableLevel > 0) { @@ -430,11 +430,11 @@ class Html extends BaseReader $column = 'A'; } } else { - $this->processDomElementLi($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray); } } - private function processDomElementLi(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName === 'li') { if ($this->tableLevel > 0) { @@ -451,20 +451,20 @@ class Html extends BaseReader $column = 'A'; } } else { - $this->processDomElementImg($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray); } } - private function processDomElementImg(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName === 'img') { $this->insertImage($sheet, $column, $row, $attributeArray); } else { - $this->processDomElementTable($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray); } } - private function processDomElementTable(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName === 'table') { $this->flushCell($sheet, $column, $row, $cellContent); @@ -480,11 +480,11 @@ class Html extends BaseReader ++$row; } } else { - $this->processDomElementTr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray); } } - private function processDomElementTr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName === 'tr') { $column = $this->getTableStartColumn(); @@ -497,11 +497,11 @@ class Html extends BaseReader ++$row; } else { - $this->processDomElementThTdOther($element, $sheet, $row, $column, $cellContent, $child, $attributeArray); + $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray); } } - private function processDomElementThTdOther(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void + private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void { if ($child->nodeName !== 'td' && $child->nodeName !== 'th') { $this->processDomElement($child, $sheet, $row, $column, $cellContent); @@ -622,7 +622,7 @@ class Html extends BaseReader // but if we have a rich text run instead, we need to append it correctly // TODO } elseif ($child instanceof DOMElement) { - $this->processDomElementBody($element, $sheet, $row, $column, $cellContent, $child); + $this->processDomElementBody($sheet, $row, $column, $cellContent, $child); } } }