From ce6ac1f0404ae5bd9155faeba4d1f5816b354e34 Mon Sep 17 00:00:00 2001 From: oleibman Date: Fri, 19 Jun 2020 11:28:57 -0700 Subject: [PATCH] Fix For #1509 (#1518) * Fix For #1509 User expected no CSV enclosures after $writer->setEnclosure(''), which had been changed to be consistent with $reader->setEnclosure(''). Writer will now omit enclosures after code above; no change to Reader. Tests have been added for this condition. * Add Option to Write CSV Enclosure Only When Required Allowing the user to specify no enclosure when writing a CSV can lead to a situation where PhpSpreadsheet (likewise Excel) will not read the resulting file as intended, e.g. if any cell contains a delimiter character. This is demonstrated in new test TestBadReread. No existing setting will rectify this situation. A better choice would be to add an option to write the enclosure only when it is needed, which is what Excel does. The RFC4180 spec at https://tools.ietf.org/html/rfc4180 states when it is needed - when the cell contains the delimiter, or the enclosure, or a newline. New test TestGoodReread demonstrates that the file is read as intended. The documentation has been updated to describe the new function, and to change the write example where the enclosure is set to null. * Scrutinizer Suggestions 3 minor changes, all in tests. --- docs/topics/reading-and-writing-to-file.md | 18 +- src/PhpSpreadsheet/Writer/Csv.php | 42 +++- .../Writer/Csv/CsvEnclosureTest.php | 216 ++++++++++++++++++ .../Writer/Csv/CsvWriteTest.php | 4 +- 4 files changed, 265 insertions(+), 15 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Writer/Csv/CsvEnclosureTest.php diff --git a/docs/topics/reading-and-writing-to-file.md b/docs/topics/reading-and-writing-to-file.md index 020e0634..f8ba084b 100644 --- a/docs/topics/reading-and-writing-to-file.md +++ b/docs/topics/reading-and-writing-to-file.md @@ -478,7 +478,7 @@ imports onto the 6th sheet: ```php $reader = new \PhpOffice\PhpSpreadsheet\Reader\Csv(); $reader->setDelimiter(';'); -$reader->setEnclosure(''); +$reader->setEnclosure('"'); $reader->setSheetIndex(5); $reader->loadIntoExisting("05featuredemo.csv", $spreadsheet); @@ -505,13 +505,26 @@ file: ```php $writer = new \PhpOffice\PhpSpreadsheet\Writer\Csv($spreadsheet); $writer->setDelimiter(';'); -$writer->setEnclosure(''); +$writer->setEnclosure('"'); $writer->setLineEnding("\r\n"); $writer->setSheetIndex(0); $writer->save("05featuredemo.csv"); ``` +#### CSV enclosures + +By default, all CSV fields are wrapped in the enclosure character, +which defaults to double-quote. +You can change to use the enclosure character only when required: + +``` php +$writer = new \PhpOffice\PhpSpreadsheet\Writer\Csv($spreadsheet); +$writer->setEnclosureRequired(false); + +$writer->save("05featuredemo.csv"); +``` + #### Write a specific worksheet CSV files can only contain one worksheet. Therefore, you can specify @@ -538,6 +551,7 @@ $writer->save("05featuredemo.csv"); CSV files are written in UTF-8. If they do not contain characters outside the ASCII range, nothing else need be done. However, if such characters are in the file, +or if the file starts with the 2 characters 'ID', it should explicitly include a BOM file header; if it doesn't, Excel will not interpret those characters correctly. This can be enabled by using the following code: diff --git a/src/PhpSpreadsheet/Writer/Csv.php b/src/PhpSpreadsheet/Writer/Csv.php index 414bdf90..74f28636 100644 --- a/src/PhpSpreadsheet/Writer/Csv.php +++ b/src/PhpSpreadsheet/Writer/Csv.php @@ -168,9 +168,9 @@ class Csv extends BaseWriter * * @return $this */ - public function setEnclosure($pValue) + public function setEnclosure($pValue = '"') { - $this->enclosure = $pValue ? $pValue : '"'; + $this->enclosure = $pValue; return $this; } @@ -296,6 +296,20 @@ class Csv extends BaseWriter return $this; } + private $enclosureRequired = true; + + public function setEnclosureRequired(bool $value): self + { + $this->enclosureRequired = $value; + + return $this; + } + + public function getEnclosureRequired(): bool + { + return $this->enclosureRequired; + } + /** * Write line to CSV file. * @@ -305,24 +319,28 @@ class Csv extends BaseWriter private function writeLine($pFileHandle, array $pValues): void { // No leading delimiter - $writeDelimiter = false; + $delimiter = ''; // Build the line $line = ''; foreach ($pValues as $element) { - // Escape enclosures - $element = str_replace($this->enclosure, $this->enclosure . $this->enclosure, $element); - // Add delimiter - if ($writeDelimiter) { - $line .= $this->delimiter; - } else { - $writeDelimiter = true; + $line .= $delimiter; + $delimiter = $this->delimiter; + // Escape enclosures + $enclosure = $this->enclosure; + if ($enclosure) { + // If enclosure is not required, use enclosure only if + // element contains newline, delimiter, or enclosure. + if (!$this->enclosureRequired && strpbrk($element, "$delimiter$enclosure\n") === false) { + $enclosure = ''; + } else { + $element = str_replace($enclosure, $enclosure . $enclosure, $element); + } } - // Add enclosed string - $line .= $this->enclosure . $element . $this->enclosure; + $line .= $enclosure . $element . $enclosure; } // Add line ending diff --git a/tests/PhpSpreadsheetTests/Writer/Csv/CsvEnclosureTest.php b/tests/PhpSpreadsheetTests/Writer/Csv/CsvEnclosureTest.php new file mode 100644 index 00000000..d048183c --- /dev/null +++ b/tests/PhpSpreadsheetTests/Writer/Csv/CsvEnclosureTest.php @@ -0,0 +1,216 @@ + '2020-06-03', + 'B1' => '000123', + 'C1' => '06.53', + 'D1' => '14.22', + 'A2' => '2020-06-04', + 'B2' => '000234', + 'C2' => '07.12', + 'D2' => '15.44', + ]; + + public function testNormalEnclosure(): void + { + $delimiter = ';'; + $enclosure = '"'; + $spreadsheet = new Spreadsheet(); + $sheet = $spreadsheet->getActiveSheet(); + foreach (self::$cellValues as $key => $value) { + $sheet->setCellValue($key, $value); + } + $writer = new CsvWriter($spreadsheet); + $writer->setDelimiter($delimiter); + $writer->setEnclosure($enclosure); + $filename = tempnam(File::sysGetTempDir(), 'phpspreadsheet-test'); + $writer->save($filename); + $filedata = file_get_contents($filename); + $filedata = preg_replace('/\\r?\\n/', $delimiter, $filedata); + $reader = new CsvReader(); + $reader->setDelimiter($delimiter); + $reader->setEnclosure($enclosure); + $newspreadsheet = $reader->load($filename); + unlink($filename); + $sheet = $newspreadsheet->getActiveSheet(); + $expected = ''; + foreach (self::$cellValues as $key => $value) { + self::assertEquals($value, $sheet->getCell($key)->getValue()); + $expected .= "$enclosure$value$enclosure$delimiter"; + } + self::assertEquals($expected, $filedata); + } + + public function testNoEnclosure(): void + { + $delimiter = ';'; + $enclosure = ''; + $spreadsheet = new Spreadsheet(); + $sheet = $spreadsheet->getActiveSheet(); + foreach (self::$cellValues as $key => $value) { + $sheet->setCellValue($key, $value); + } + $writer = new CsvWriter($spreadsheet); + $writer->setDelimiter($delimiter); + $writer->setEnclosure($enclosure); + self::assertEquals('', $writer->getEnclosure()); + $filename = tempnam(File::sysGetTempDir(), 'phpspreadsheet-test'); + $writer->save($filename); + $filedata = file_get_contents($filename); + $filedata = preg_replace('/\\r?\\n/', $delimiter, $filedata); + $reader = new CsvReader(); + $reader->setDelimiter($delimiter); + $reader->setEnclosure($enclosure); + self::assertEquals('"', $reader->getEnclosure()); + $newspreadsheet = $reader->load($filename); + unlink($filename); + $sheet = $newspreadsheet->getActiveSheet(); + $expected = ''; + foreach (self::$cellValues as $key => $value) { + self::assertEquals($value, $sheet->getCell($key)->getValue()); + $expected .= "$enclosure$value$enclosure$delimiter"; + } + self::assertEquals($expected, $filedata); + } + + public function testNotRequiredEnclosure1(): void + { + $delimiter = ';'; + $enclosure = '"'; + $spreadsheet = new Spreadsheet(); + $sheet = $spreadsheet->getActiveSheet(); + foreach (self::$cellValues as $key => $value) { + $sheet->setCellValue($key, $value); + } + $writer = new CsvWriter($spreadsheet); + self::assertTrue($writer->getEnclosureRequired()); + $writer->setEnclosureRequired(false)->setDelimiter($delimiter)->setEnclosure($enclosure); + $filename = tempnam(File::sysGetTempDir(), 'phpspreadsheet-test'); + $writer->save($filename); + $filedata = file_get_contents($filename); + $filedata = preg_replace('/\\r?\\n/', $delimiter, $filedata); + $reader = new CsvReader(); + $reader->setDelimiter($delimiter); + $reader->setEnclosure($enclosure); + $newspreadsheet = $reader->load($filename); + unlink($filename); + $sheet = $newspreadsheet->getActiveSheet(); + $expected = ''; + foreach (self::$cellValues as $key => $value) { + self::assertEquals($value, $sheet->getCell($key)->getValue()); + $expected .= "$value$delimiter"; + } + self::assertEquals($expected, $filedata); + } + + public function testNotRequiredEnclosure2(): void + { + $cellValues2 = [ + 'A1' => '2020-06-03', + 'B1' => 'has,separator', + 'C1' => 'has;non-separator', + 'D1' => 'has"enclosure', + 'A2' => 'has space', + 'B2' => "has\nnewline", + 'C2' => '', + 'D2' => '15.44', + 'A3' => ' leadingspace', + 'B3' => 'trailingspace ', + 'C3' => '=D2*2', + 'D3' => ',leadingcomma', + 'A4' => 'trailingquote"', + 'B4' => 'unused', + 'C4' => 'unused', + 'D4' => 'unused', + ]; + $calcc3 = '30.88'; + $expected1 = '2020-06-03,"has,separator",has;non-separator,"has""enclosure"'; + $expected2 = 'has space,"has' . "\n" . 'newline",,15.44'; + $expected3 = ' leadingspace,trailingspace ,' . $calcc3 . ',",leadingcomma"'; + $expected4 = '"trailingquote""",unused,unused,unused'; + $expectedfile = "$expected1\n$expected2\n$expected3\n$expected4\n"; + $delimiter = ','; + $enclosure = '"'; + $spreadsheet = new Spreadsheet(); + $sheet = $spreadsheet->getActiveSheet(); + foreach ($cellValues2 as $key => $value) { + $sheet->setCellValue($key, $value); + } + $writer = new CsvWriter($spreadsheet); + self::assertTrue($writer->getEnclosureRequired()); + $writer->setEnclosureRequired(false)->setDelimiter($delimiter)->setEnclosure($enclosure); + $filename = tempnam(File::sysGetTempDir(), 'phpspreadsheet-test'); + $writer->save($filename); + $filedata = file_get_contents($filename); + $filedata = preg_replace('/\\r/', '', $filedata); + $reader = new CsvReader(); + $reader->setDelimiter($delimiter); + $reader->setEnclosure($enclosure); + $newspreadsheet = $reader->load($filename); + unlink($filename); + $sheet = $newspreadsheet->getActiveSheet(); + foreach ($cellValues2 as $key => $value) { + self::assertEquals(($key === 'C3') ? $calcc3 : $value, $sheet->getCell($key)->getValue()); + } + self::assertEquals($expectedfile, $filedata); + } + + public function testGoodReread(): void + { + $delimiter = ','; + $enclosure = '"'; + $spreadsheet = new Spreadsheet(); + $sheet = $spreadsheet->getActiveSheet(); + $sheet->setCellValue('A1', '1'); + $sheet->setCellValue('B1', '2,3'); + $sheet->setCellValue('C1', '4'); + $writer = new CsvWriter($spreadsheet); + $writer->setEnclosureRequired(false)->setDelimiter($delimiter)->setEnclosure($enclosure); + $filename = tempnam(File::sysGetTempDir(), 'phpspreadsheet-test'); + $writer->save($filename); + $reader = new CsvReader(); + $reader->setDelimiter($delimiter); + $reader->setEnclosure($enclosure); + $newspreadsheet = $reader->load($filename); + unlink($filename); + $sheet = $newspreadsheet->getActiveSheet(); + self::assertEquals('1', $sheet->getCell('A1')->getValue()); + self::assertEquals('2,3', $sheet->getCell('B1')->getValue()); + self::assertEquals('4', $sheet->getCell('C1')->getValue()); + } + + public function testBadReread(): void + { + $delimiter = ','; + $enclosure = ''; + $spreadsheet = new Spreadsheet(); + $sheet = $spreadsheet->getActiveSheet(); + $sheet->setCellValue('A1', '1'); + $sheet->setCellValue('B1', '2,3'); + $sheet->setCellValue('C1', '4'); + $writer = new CsvWriter($spreadsheet); + $writer->setDelimiter($delimiter)->setEnclosure($enclosure); + $filename = tempnam(File::sysGetTempDir(), 'phpspreadsheet-test'); + $writer->save($filename); + $reader = new CsvReader(); + $reader->setDelimiter($delimiter); + $reader->setEnclosure($enclosure); + $newspreadsheet = $reader->load($filename); + unlink($filename); + $sheet = $newspreadsheet->getActiveSheet(); + self::assertEquals('1', $sheet->getCell('A1')->getValue()); + self::assertEquals('2', $sheet->getCell('B1')->getValue()); + self::assertEquals('3', $sheet->getCell('C1')->getValue()); + self::assertEquals('4', $sheet->getCell('D1')->getValue()); + } +} diff --git a/tests/PhpSpreadsheetTests/Writer/Csv/CsvWriteTest.php b/tests/PhpSpreadsheetTests/Writer/Csv/CsvWriteTest.php index 7252ecf9..7fe1902b 100644 --- a/tests/PhpSpreadsheetTests/Writer/Csv/CsvWriteTest.php +++ b/tests/PhpSpreadsheetTests/Writer/Csv/CsvWriteTest.php @@ -1,6 +1,6 @@ setEnclosure('\''); self::assertEquals('\'', $writer->getEnclosure()); $writer->setEnclosure(''); + self::assertEquals('', $writer->getEnclosure()); + $writer->setEnclosure(); self::assertEquals('"', $writer->getEnclosure()); self::assertEquals(PHP_EOL, $writer->getLineEnding()); self::assertFalse($writer->getUseBOM());