From b509b672e0fcc629e559a08f7b33423aa3f0cfd5 Mon Sep 17 00:00:00 2001 From: Dominik Date: Thu, 15 Mar 2018 11:29:18 +0100 Subject: [PATCH] Xlsx reader do not read rows and columns filtered out in readFilter at all Set rows and columns dimensions for only cells rows and columns allowed by readfilter Fixes #370 Closes #421 --- CHANGELOG.md | 1 + src/PhpSpreadsheet/Reader/IReadFilter.php | 6 +- src/PhpSpreadsheet/Reader/Xlsx.php | 164 +++++++++++++----- .../Functional/ReadFilterTest.php | 124 +++++++++++++ 4 files changed, 251 insertions(+), 44 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Functional/ReadFilterTest.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ed3d06d..00e7748c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Subtotal 9 in a group that has other subtotals 9 exclude the totals of the other subtotals in the range - [#332](https://github.com/PHPOffice/PhpSpreadsheet/issues/332) - `Helper\Html` support UTF-8 HTML input - [#444](https://github.com/PHPOffice/PhpSpreadsheet/issues/444) - Xlsx loaded an extra empty comment for each real comment - [#375](https://github.com/PHPOffice/PhpSpreadsheet/issues/375) +- Xlsx reader do not read rows and columns filtered out in readFilter at all - [#370](https://github.com/PHPOffice/PhpSpreadsheet/issues/370) ## [1.2.1] - 2018-04-10 diff --git a/src/PhpSpreadsheet/Reader/IReadFilter.php b/src/PhpSpreadsheet/Reader/IReadFilter.php index 03c48db3..ccfe05ad 100644 --- a/src/PhpSpreadsheet/Reader/IReadFilter.php +++ b/src/PhpSpreadsheet/Reader/IReadFilter.php @@ -7,9 +7,9 @@ interface IReadFilter /** * Should this cell be read? * - * @param $column string Column address (as a string value like "A", or "IV") - * @param $row int Row number - * @param $worksheetName string Optional worksheet name + * @param string $column Column address (as a string value like "A", or "IV") + * @param int $row Row number + * @param string $worksheetName Optional worksheet name * * @return bool */ diff --git a/src/PhpSpreadsheet/Reader/Xlsx.php b/src/PhpSpreadsheet/Reader/Xlsx.php index e32be775..e9ac11bf 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx.php +++ b/src/PhpSpreadsheet/Reader/Xlsx.php @@ -320,6 +320,60 @@ class Xlsx extends BaseReader return $contents; } + /** + * Set Worksheet column attributes by attributes array passed. + * + * @param Worksheet $docSheet + * @param string $column A, B, ... DX, ... + * @param array $columnAttributes array of attributes (indexes are attribute name, values are value) + * 'xfIndex', 'visible', 'collapsed', 'outlineLevel', 'width', ... ? + */ + private function setColumnAttributes(Worksheet $docSheet, $column, array $columnAttributes) + { + if (isset($columnAttributes['xfIndex'])) { + $docSheet->getColumnDimension($column)->setXfIndex($columnAttributes['xfIndex']); + } + if (isset($columnAttributes['visible'])) { + $docSheet->getColumnDimension($column)->setVisible($columnAttributes['visible']); + } + if (isset($columnAttributes['collapsed'])) { + $docSheet->getColumnDimension($column)->setCollapsed($columnAttributes['collapsed']); + } + if (isset($columnAttributes['outlineLevel'])) { + $docSheet->getColumnDimension($column)->setOutlineLevel($columnAttributes['outlineLevel']); + } + if (isset($columnAttributes['width'])) { + $docSheet->getColumnDimension($column)->setWidth($columnAttributes['width']); + } + } + + /** + * Set Worksheet row attributes by attributes array passed. + * + * @param Worksheet $docSheet + * @param int $row 1, 2, 3, ... 99, ... + * @param array $rowAttributes array of attributes (indexes are attribute name, values are value) + * 'xfIndex', 'visible', 'collapsed', 'outlineLevel', 'rowHeight', ... ? + */ + private function setRowAttributes(Worksheet $docSheet, $row, array $rowAttributes) + { + if (isset($rowAttributes['xfIndex'])) { + $docSheet->getRowDimension($row)->setXfIndex($rowAttributes['xfIndex']); + } + if (isset($rowAttributes['visible'])) { + $docSheet->getRowDimension($row)->setVisible($rowAttributes['visible']); + } + if (isset($rowAttributes['collapsed'])) { + $docSheet->getRowDimension($row)->setCollapsed($rowAttributes['collapsed']); + } + if (isset($rowAttributes['outlineLevel'])) { + $docSheet->getRowDimension($row)->setOutlineLevel($rowAttributes['outlineLevel']); + } + if (isset($rowAttributes['rowHeight'])) { + $docSheet->getRowDimension($row)->setRowHeight($rowAttributes['rowHeight']); + } + } + /** * Loads Spreadsheet from file. * @@ -819,30 +873,6 @@ class Xlsx extends BaseReader } } - if (isset($xmlSheet->cols) && !$this->readDataOnly) { - foreach ($xmlSheet->cols->col as $col) { - for ($i = (int) ($col['min']); $i <= (int) ($col['max']); ++$i) { - if ($col['style'] && !$this->readDataOnly) { - $docSheet->getColumnDimension(Coordinate::stringFromColumnIndex($i))->setXfIndex((int) ($col['style'])); - } - if (self::boolean($col['hidden'])) { - $docSheet->getColumnDimension(Coordinate::stringFromColumnIndex($i))->setVisible(false); - } - if (self::boolean($col['collapsed'])) { - $docSheet->getColumnDimension(Coordinate::stringFromColumnIndex($i))->setCollapsed(true); - } - if ($col['outlineLevel'] > 0) { - $docSheet->getColumnDimension(Coordinate::stringFromColumnIndex($i))->setOutlineLevel((int) ($col['outlineLevel'])); - } - $docSheet->getColumnDimension(Coordinate::stringFromColumnIndex($i))->setWidth((float) ($col['width'])); - - if ((int) ($col['max']) == 16384) { - break; - } - } - } - } - if (isset($xmlSheet->printOptions) && !$this->readDataOnly) { if (self::boolean((string) $xmlSheet->printOptions['gridLinesSet'])) { $docSheet->setShowGridlines(true); @@ -858,25 +888,77 @@ class Xlsx extends BaseReader } } + $columnsAttributes = []; + $rowsAttributes = []; + if (isset($xmlSheet->cols) && !$this->readDataOnly) { + foreach ($xmlSheet->cols->col as $col) { + for ($i = (int) ($col['min']); $i <= (int) ($col['max']); ++$i) { + if ($col['style'] && !$this->readDataOnly) { + $columnsAttributes[Coordinate::stringFromColumnIndex($i)]['xfIndex'] = (int) $col['style']; + } + if (self::boolean($col['hidden'])) { + $columnsAttributes[Coordinate::stringFromColumnIndex($i)]['visible'] = false; + } + if (self::boolean($col['collapsed'])) { + $columnsAttributes[Coordinate::stringFromColumnIndex($i)]['collapsed'] = true; + } + if ($col['outlineLevel'] > 0) { + $columnsAttributes[Coordinate::stringFromColumnIndex($i)]['outlineLevel'] = (int) $col['outlineLevel']; + } + $columnsAttributes[Coordinate::stringFromColumnIndex($i)]['width'] = (float) $col['width']; + + if ((int) ($col['max']) == 16384) { + break; + } + } + } + } + + if ($xmlSheet && $xmlSheet->sheetData && $xmlSheet->sheetData->row) { + foreach ($xmlSheet->sheetData->row as $row) { + if ($row['ht'] && !$this->readDataOnly) { + $rowsAttributes[(int) $row['r']]['rowHeight'] = (float) $row['ht']; + } + if (self::boolean($row['hidden']) && !$this->readDataOnly) { + $rowsAttributes[(int) $row['r']]['visible'] = false; + } + if (self::boolean($row['collapsed'])) { + $rowsAttributes[(int) $row['r']]['collapsed'] = true; + } + if ($row['outlineLevel'] > 0) { + $rowsAttributes[(int) $row['r']]['outlineLevel'] = (int) $row['outlineLevel']; + } + if ($row['s'] && !$this->readDataOnly) { + $rowsAttributes[(int) $row['r']]['xfIndex'] = (int) $row['s']; + } + } + } + + // set columns/rows attributes + $columnsAttributesSet = []; + $rowsAttributesSet = []; + foreach ($columnsAttributes as $coordColumn => $columnAttributes) { + foreach ($rowsAttributes as $coordRow => $rowAttributes) { + if ($this->getReadFilter() !== null) { + if (!$this->getReadFilter()->readCell($coordColumn, $coordRow, $docSheet->getTitle())) { + continue; + } + } + + if (!isset($columnsAttributesSet[$coordColumn])) { + $this->setColumnAttributes($docSheet, $coordColumn, $columnAttributes); + $columnsAttributesSet[$coordColumn] = true; + } + if (!isset($rowsAttributesSet[$coordRow])) { + $this->setRowAttributes($docSheet, $coordRow, $rowAttributes); + $rowsAttributesSet[$coordRow] = true; + } + } + } + if ($xmlSheet && $xmlSheet->sheetData && $xmlSheet->sheetData->row) { $cIndex = 1; // Cell Start from 1 foreach ($xmlSheet->sheetData->row as $row) { - if ($row['ht'] && !$this->readDataOnly) { - $docSheet->getRowDimension((int) ($row['r']))->setRowHeight((float) ($row['ht'])); - } - if (self::boolean($row['hidden']) && !$this->readDataOnly) { - $docSheet->getRowDimension((int) ($row['r']))->setVisible(false); - } - if (self::boolean($row['collapsed'])) { - $docSheet->getRowDimension((int) ($row['r']))->setCollapsed(true); - } - if ($row['outlineLevel'] > 0) { - $docSheet->getRowDimension((int) ($row['r']))->setOutlineLevel((int) ($row['outlineLevel'])); - } - if ($row['s'] && !$this->readDataOnly) { - $docSheet->getRowDimension((int) ($row['r']))->setXfIndex((int) ($row['s'])); - } - $rowIndex = 1; foreach ($row->c as $c) { $r = (string) $c['r']; @@ -891,7 +973,7 @@ class Xlsx extends BaseReader if ($this->getReadFilter() !== null) { $coordinates = Coordinate::coordinateFromString($r); - if (!$this->getReadFilter()->readCell($coordinates[0], $coordinates[1], $docSheet->getTitle())) { + if (!$this->getReadFilter()->readCell($coordinates[0], (int) $coordinates[1], $docSheet->getTitle())) { continue; } } diff --git a/tests/PhpSpreadsheetTests/Functional/ReadFilterTest.php b/tests/PhpSpreadsheetTests/Functional/ReadFilterTest.php new file mode 100644 index 00000000..288fc57e --- /dev/null +++ b/tests/PhpSpreadsheetTests/Functional/ReadFilterTest.php @@ -0,0 +1,124 @@ +getActiveSheet()->fromArray($arrayData, null, 'A1'); + + $reloadedSpreadsheet = $this->writeAndReload($spreadsheet, $format); + $sheet = $reloadedSpreadsheet->getSheet(0); + // test highest column (very specific num of columns because of some 3rd party software) + self::assertSame('J', $sheet->getHighestColumn()); + + // test highest row (very specific num of rows because of some 3rd party software) + self::assertEquals(10, $sheet->getHighestRow()); + + // test top left coordinate + $sortedCoordinates = $sheet->getCellCollection()->getSortedCoordinates(); + $coordinateTopLeft = reset($sortedCoordinates); + self::assertSame('A1', $coordinateTopLeft); + } + + /** + * Test load Xlsx file with many empty cells (and big max row number) with readfilter. + * + * @dataProvider providerCellsValues + * + * @param array $arrayData + * @param mixed $format + */ + public function testXlsxLoadWithReadFilter($format, array $arrayData) + { + $spreadsheet = new Spreadsheet(); + $spreadsheet->getActiveSheet()->fromArray($arrayData, null, 'A1'); + + $reloadedSpreadsheet = $this->writeAndReload($spreadsheet, $format, function ($reader) { + // Create a stub for the readFilter class. + $readFilterStub = $this->createMock(IReadFilter::class); + $readFilterStub->method('readCell') + ->will($this->returnCallback([$this, 'readFilterReadCell'])); + // apply filter + $reader->setReadFilter($readFilterStub); + }); + $sheet = $reloadedSpreadsheet->getSheet(0); + // test highest column (very specific num of columns because of some 3rd party software) + self::assertSame('D', $sheet->getHighestColumn()); + + // test highest row (very specific num of rows because of some 3rd party software) + self::assertEquals(6, $sheet->getHighestRow()); + + // test top left coordinate + $sortedCoordinates = $sheet->getCellCollection()->getSortedCoordinates(); + $coordinateTopLeft = reset($sortedCoordinates); + self::assertSame('B2', $coordinateTopLeft); + } + + /** + * @see \PhpOffice\PhpSpreadsheet\Reader\IReadFilter::readCell() + * + * @param string $column Column address (as a string value like "A", or "IV") + * @param int $row Row number + * @param string $worksheetName Optional worksheet name + * + * @return bool + */ + public function readFilterReadCell($column, $row, $worksheetName = '') + { + // define filter range + $rowMin = 2; + $rowMax = 6; + $columnMin = 'B'; + $columnMax = 'D'; + + $r = (int) $row; + if ($r > $rowMax || $r < $rowMin) { + return false; + } + + $col = sprintf('%04s', $column); + if ($col > sprintf('%04s', $columnMax) || + $col < sprintf('%04s', $columnMin)) { + return false; + } + + return true; + } +}