diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c6f0414..49d361ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com) and this project adheres to [Semantic Versioning](https://semver.org). +## [Unreleased] - + +### Added + +- Added support for inline styles in Html reader (borders, alignment, width, height) + ## [1.6.0] - 2019-01-02 ### Added diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index d9f25a31..6a6ca73e 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -12,6 +12,9 @@ use PhpOffice\PhpSpreadsheet\Spreadsheet; use PhpOffice\PhpSpreadsheet\Style\Border; use PhpOffice\PhpSpreadsheet\Style\Color; use PhpOffice\PhpSpreadsheet\Style\Fill; +use PhpOffice\PhpSpreadsheet\Style\Font; +use PhpOffice\PhpSpreadsheet\Style\Style; +use PhpOffice\PhpSpreadsheet\Worksheet\Drawing; use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; /** PhpSpreadsheet root directory */ @@ -96,6 +99,26 @@ class Html extends BaseReader ], ], ], // Bottom border + 'strong' => [ + 'font' => [ + 'bold' => true, + ], + ], // Bold + 'b' => [ + 'font' => [ + 'bold' => true, + ], + ], // Bold + 'i' => [ + 'font' => [ + 'italic' => true, + ], + ], // Italic + 'em' => [ + 'font' => [ + 'italic' => true, + ], + ], // Italic ]; protected $rowspan = []; @@ -295,11 +318,9 @@ class Html extends BaseReader switch ($child->nodeName) { case 'meta': foreach ($attributeArray as $attributeName => $attributeValue) { - switch ($attributeName) { - case 'content': - // TODO - // Extract character set, so we can convert to UTF-8 if required - break; + // Extract character set, so we can convert to UTF-8 if required + if ($attributeName === 'charset') { + $this->setInputEncoding($attributeValue); } } $this->processDomElement($child, $sheet, $row, $column, $cellContent); @@ -334,6 +355,10 @@ class Html extends BaseReader $cellContent .= ' '; } + if (isset($this->formats[$child->nodeName])) { + $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); + } + break; case 'hr': $this->flushCell($sheet, $column, $row, $cellContent); @@ -423,6 +448,10 @@ class Html extends BaseReader $column = 'A'; } + break; + case 'img': + $this->insertImage($sheet, $column, $row, $attributeArray); + break; case 'table': $this->flushCell($sheet, $column, $row, $cellContent); @@ -448,6 +477,11 @@ class Html extends BaseReader $column = $this->getTableStartColumn(); $cellContent = ''; $this->processDomElement($child, $sheet, $row, $column, $cellContent); + + if (isset($attributeArray['height'])) { + $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']); + } + ++$row; break; @@ -501,6 +535,27 @@ class Html extends BaseReader ] ); } + + if (isset($attributeArray['width'])) { + $sheet->getColumnDimension($column)->setWidth($attributeArray['width']); + } + + if (isset($attributeArray['height'])) { + $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']); + } + + if (isset($attributeArray['align'])) { + $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']); + } + + if (isset($attributeArray['valign'])) { + $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']); + } + + if (isset($attributeArray['data-format'])) { + $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']); + } + ++$column; break; @@ -608,36 +663,271 @@ class Html extends BaseReader return; } - $supported_styles = ['background-color', 'color']; + $cellStyle = $sheet->getStyle($column . $row); // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color $styles = explode(';', $attributeArray['style']); foreach ($styles as $st) { $value = explode(':', $st); + $styleName = isset($value[0]) ? trim($value[0]) : null; + $styleValue = isset($value[1]) ? trim($value[1]) : null; - if (empty(trim($value[0])) || !in_array(trim($value[0]), $supported_styles)) { + if (!$styleName) { continue; } - //check if has #, so we can get clean hex - if (substr(trim($value[1]), 0, 1) == '#') { - $style_color = substr(trim($value[1]), 1); - } - - if (empty($style_color)) { - continue; - } - - switch (trim($value[0])) { + switch ($styleName) { + case 'background': case 'background-color': - $sheet->getStyle($column . $row)->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => "{$style_color}"]]]); + $styleColor = $this->getStyleColor($styleValue); + + if (!$styleColor) { + continue 2; + } + + $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]); break; case 'color': - $sheet->getStyle($column . $row)->applyFromArray(['font' => ['color' => ['rgb' => "{$style_color}"]]]); + $styleColor = $this->getStyleColor($styleValue); + + if (!$styleColor) { + continue 2; + } + + $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]); + + break; + + case 'border': + $this->setBorderStyle($cellStyle, $styleValue, 'allBorders'); + + break; + + case 'border-top': + $this->setBorderStyle($cellStyle, $styleValue, 'top'); + + break; + + case 'border-bottom': + $this->setBorderStyle($cellStyle, $styleValue, 'bottom'); + + break; + + case 'border-left': + $this->setBorderStyle($cellStyle, $styleValue, 'left'); + + break; + + case 'border-right': + $this->setBorderStyle($cellStyle, $styleValue, 'right'); + + break; + + case 'font-size': + $cellStyle->getFont()->setSize( + (float) $styleValue + ); + + break; + + case 'font-weight': + if ($styleValue === 'bold' || $styleValue >= 500) { + $cellStyle->getFont()->setBold(true); + } + + break; + + case 'font-style': + if ($styleValue === 'italic') { + $cellStyle->getFont()->setItalic(true); + } + + break; + + case 'font-family': + $cellStyle->getFont()->setName(str_replace('\'', '', $styleValue)); + + break; + + case 'text-decoration': + switch ($styleValue) { + case 'underline': + $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE); + + break; + case 'line-through': + $cellStyle->getFont()->setStrikethrough(true); + + break; + } + + break; + + case 'text-align': + $cellStyle->getAlignment()->setHorizontal($styleValue); + + break; + + case 'vertical-align': + $cellStyle->getAlignment()->setVertical($styleValue); + + break; + + case 'width': + $sheet->getColumnDimension($column)->setWidth( + str_replace('px', '', $styleValue) + ); + + break; + + case 'height': + $sheet->getRowDimension($row)->setRowHeight( + str_replace('px', '', $styleValue) + ); + + break; + + case 'word-wrap': + $cellStyle->getAlignment()->setWrapText( + $styleValue === 'break-word' + ); + + break; + + case 'text-indent': + $cellStyle->getAlignment()->setIndent( + (int) str_replace(['px'], '', $styleValue) + ); break; } } } + + /** + * Check if has #, so we can get clean hex. + * + * @param $value + * + * @return null|string + */ + public function getStyleColor($value) + { + if (strpos($value, '#') === 0) { + return substr($value, 1); + } + + return null; + } + + /** + * @param Worksheet $sheet + * @param string $column + * @param int $row + * @param array $attributes + * + * @throws \PhpOffice\PhpSpreadsheet\Exception + */ + private function insertImage(Worksheet $sheet, $column, $row, array $attributes) + { + if (!isset($attributes['src'])) { + return; + } + + $src = urldecode($attributes['src']); + $width = isset($attributes['width']) ? (float) $attributes['width'] : null; + $height = isset($attributes['height']) ? (float) $attributes['height'] : null; + $name = isset($attributes['alt']) ? (float) $attributes['alt'] : null; + + $drawing = new Drawing(); + $drawing->setPath($src); + $drawing->setWorksheet($sheet); + $drawing->setCoordinates($column . $row); + $drawing->setOffsetX(0); + $drawing->setOffsetY(10); + $drawing->setResizeProportional(true); + + if ($name) { + $drawing->setName($name); + } + + if ($width) { + $drawing->setWidth((int) $width); + } + + if ($height) { + $drawing->setHeight((int) $height); + } + + $sheet->getColumnDimension($column)->setWidth( + $drawing->getWidth() / 6 + ); + + $sheet->getRowDimension($row)->setRowHeight( + $drawing->getHeight() * 0.9 + ); + } + + /** + * Map html border style to PhpSpreadsheet border style. + * + * @param string $style + * + * @return null|string + */ + public function getBorderStyle($style) + { + switch ($style) { + case 'solid': + return Border::BORDER_THIN; + case 'dashed': + return Border::BORDER_DASHED; + case 'dotted': + return Border::BORDER_DOTTED; + case 'medium': + return Border::BORDER_MEDIUM; + case 'thick': + return Border::BORDER_THICK; + case 'none': + return Border::BORDER_NONE; + case 'dash-dot': + return Border::BORDER_DASHDOT; + case 'dash-dot-dot': + return Border::BORDER_DASHDOTDOT; + case 'double': + return Border::BORDER_DOUBLE; + case 'hair': + return Border::BORDER_HAIR; + case 'medium-dash-dot': + return Border::BORDER_MEDIUMDASHDOT; + case 'medium-dash-dot-dot': + return Border::BORDER_MEDIUMDASHDOTDOT; + case 'medium-dashed': + return Border::BORDER_MEDIUMDASHED; + case 'slant-dash-dot': + return Border::BORDER_SLANTDASHDOT; + } + + return null; + } + + /** + * @param Style $cellStyle + * @param string $styleValue + * @param string $type + */ + private function setBorderStyle(Style $cellStyle, $styleValue, $type) + { + list(, $borderStyle, $color) = explode(' ', $styleValue); + + $cellStyle->applyFromArray([ + 'borders' => [ + $type => [ + 'borderStyle' => $this->getBorderStyle($borderStyle), + 'color' => ['rgb' => $this->getStyleColor($color)], + ], + ], + ]); + } } diff --git a/tests/PhpSpreadsheetTests/Reader/HtmlTest.php b/tests/PhpSpreadsheetTests/Reader/HtmlTest.php index 060c1e60..f56030e0 100644 --- a/tests/PhpSpreadsheetTests/Reader/HtmlTest.php +++ b/tests/PhpSpreadsheetTests/Reader/HtmlTest.php @@ -3,6 +3,10 @@ namespace PhpOffice\PhpSpreadsheetTests\Reader; use PhpOffice\PhpSpreadsheet\Reader\Html; +use PhpOffice\PhpSpreadsheet\Style\Alignment; +use PhpOffice\PhpSpreadsheet\Style\Border; +use PhpOffice\PhpSpreadsheet\Style\Font; +use PhpOffice\PhpSpreadsheet\Worksheet\Drawing; use PHPUnit\Framework\TestCase; class HtmlTest extends TestCase @@ -29,19 +33,18 @@ class HtmlTest extends TestCase /** * @dataProvider providerCanReadVerySmallFile * - * @param bool $expected + * @param bool $expected * @param string $content */ public function testCanReadVerySmallFile($expected, $content) { - $filename = tempnam(sys_get_temp_dir(), 'html'); - file_put_contents($filename, $content); - + $filename = $this->createHtml($content); $reader = new Html(); $actual = $reader->canRead($filename); - unlink($filename); self::assertSame($expected, $actual); + + unlink($filename); } public function testBackgroundColorInRanding() @@ -51,14 +54,238 @@ class HtmlTest extends TestCase Blue background '; - $filename = tempnam(sys_get_temp_dir(), 'html'); - file_put_contents($filename, $html); - $reader = new Html(); - $spreadsheet = $reader->load($filename); + $filename = $this->createHtml($html); + $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); $firstSheet = $spreadsheet->getSheet(0); $style = $firstSheet->getCell('A1')->getStyle(); self::assertEquals('FFFFFF', $style->getFont()->getColor()->getRGB()); + unlink($filename); } + + public function testCanApplyInlineBordersStyles() + { + $html = ' + + + + + + + +
Thin borderBorder bottomBorder topBorder leftBorder right
'; + $filename = $this->createHtml($html); + $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $firstSheet = $spreadsheet->getSheet(0); + $style = $firstSheet->getCell('A1')->getStyle(); + $borders = $style->getBorders(); + + /** @var Border $border */ + foreach ([$borders->getTop(), $borders->getBottom(), $borders->getLeft(), $borders->getRight()] as $border) { + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + } + + $style = $firstSheet->getCell('B1')->getStyle(); + $border = $style->getBorders()->getBottom(); + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + + $style = $firstSheet->getCell('C1')->getStyle(); + $border = $style->getBorders()->getTop(); + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + + $style = $firstSheet->getCell('D1')->getStyle(); + $border = $style->getBorders()->getLeft(); + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + + $style = $firstSheet->getCell('E1')->getStyle(); + $border = $style->getBorders()->getRight(); + self::assertEquals('333333', $border->getColor()->getRGB()); + self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle()); + + unlink($filename); + } + + public function testCanApplyInlineFontStyles() + { + $html = ' + + + + + + + + +
16pxTimes New RomanBoldItalicUnderlineLine through
'; + $filename = $this->createHtml($html); + $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $firstSheet = $spreadsheet->getSheet(0); + + $style = $firstSheet->getCell('A1')->getStyle(); + self::assertEquals(16, $style->getFont()->getSize()); + + $style = $firstSheet->getCell('B1')->getStyle(); + self::assertEquals('Times New Roman', $style->getFont()->getName()); + + $style = $firstSheet->getCell('C1')->getStyle(); + self::assertTrue($style->getFont()->getBold()); + + $style = $firstSheet->getCell('D1')->getStyle(); + self::assertTrue($style->getFont()->getItalic()); + + $style = $firstSheet->getCell('E1')->getStyle(); + self::assertEquals(Font::UNDERLINE_SINGLE, $style->getFont()->getUnderline()); + + $style = $firstSheet->getCell('F1')->getStyle(); + self::assertTrue($style->getFont()->getStrikethrough()); + + unlink($filename); + } + + public function testCanApplyInlineWidth() + { + $html = ' + + + + +
50px100px
'; + $filename = $this->createHtml($html); + $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $firstSheet = $spreadsheet->getSheet(0); + + $dimension = $firstSheet->getColumnDimension('A'); + self::assertEquals(50, $dimension->getWidth()); + + $dimension = $firstSheet->getColumnDimension('B'); + self::assertEquals(100, $dimension->getWidth()); + + unlink($filename); + } + + public function testCanApplyInlineHeight() + { + $html = ' + + + + + + +
1
2
'; + $filename = $this->createHtml($html); + $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $firstSheet = $spreadsheet->getSheet(0); + + $dimension = $firstSheet->getRowDimension(1); + self::assertEquals(50, $dimension->getRowHeight()); + + $dimension = $firstSheet->getRowDimension(2); + self::assertEquals(100, $dimension->getRowHeight()); + + unlink($filename); + } + + public function testCanApplyAlignment() + { + $html = ' + + + + + + + + +
Center alignCenter valignCenter alignCenter valignText indentWraptext
'; + $filename = $this->createHtml($html); + $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $firstSheet = $spreadsheet->getSheet(0); + + $style = $firstSheet->getCell('A1')->getStyle(); + self::assertEquals(Alignment::HORIZONTAL_CENTER, $style->getAlignment()->getHorizontal()); + + $style = $firstSheet->getCell('B1')->getStyle(); + self::assertEquals(Alignment::VERTICAL_CENTER, $style->getAlignment()->getVertical()); + + $style = $firstSheet->getCell('C1')->getStyle(); + self::assertEquals(Alignment::HORIZONTAL_CENTER, $style->getAlignment()->getHorizontal()); + + $style = $firstSheet->getCell('D1')->getStyle(); + self::assertEquals(Alignment::VERTICAL_CENTER, $style->getAlignment()->getVertical()); + + $style = $firstSheet->getCell('E1')->getStyle(); + self::assertEquals(10, $style->getAlignment()->getIndent()); + + $style = $firstSheet->getCell('F1')->getStyle(); + self::assertTrue($style->getAlignment()->getWrapText()); + + unlink($filename); + } + + public function testCanApplyInlineDataFormat() + { + $html = ' + + + +
2019-02-02 12:34:00
'; + $filename = $this->createHtml($html); + $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $firstSheet = $spreadsheet->getSheet(0); + + $style = $firstSheet->getCell('A1')->getStyle(); + self::assertEquals('mmm-yy', $style->getNumberFormat()->getFormatCode()); + + unlink($filename); + } + + public function testCanInsertImage() + { + $imagePath = realpath(__DIR__ . '/../../data/Reader/HTML/image.jpg'); + + $html = ' + + + +
'; + $filename = $this->createHtml($html); + $spreadsheet = $this->loadHtmlIntoSpreadsheet($filename); + $firstSheet = $spreadsheet->getSheet(0); + + /** @var Drawing $drawing */ + $drawing = $firstSheet->getDrawingCollection()[0]; + self::assertEquals($imagePath, $drawing->getPath()); + self::assertEquals('A1', $drawing->getCoordinates()); + + unlink($filename); + } + + /** + * @param string $html + * + * @return string + */ + private function createHtml($html) + { + $filename = tempnam(sys_get_temp_dir(), 'html'); + file_put_contents($filename, $html); + + return $filename; + } + + /** + * @param $filename + * + * @return \PhpOffice\PhpSpreadsheet\Spreadsheet + */ + private function loadHtmlIntoSpreadsheet($filename) + { + return (new Html())->load($filename); + } } diff --git a/tests/data/Reader/HTML/image.jpg b/tests/data/Reader/HTML/image.jpg new file mode 100644 index 00000000..48afbc55 Binary files /dev/null and b/tests/data/Reader/HTML/image.jpg differ