Improve Coverage for CSV (#1475)

I believe that both CSV Reader and Writer are 100% covered now.

There were some errors uncovered during development.

The reader specifically permits encodings other than UTF-8 to be used.
However, fgetcsv will not properly handle other encodings.
I tried replacing it with fgets/iconv/strgetcsv, but that could not
handle line breaks within a cell, even for UTF-8.
This is, I'm sure, a very rare use case.
I eventually handled it by using php://memory to hold the translated
file contents for non-UTF8. There were no tests for this situation,
and now there are (probably too many).

"Contiguous" read was not handle correctly. There is a file
in samples which uses it. It was designed to read a large sheet,
and split it into three. The first sheet was corrrect, but the
second and third were almost entirely empty. This has been corrected,
and the sample code was adapted into a formal test with assertions
to confirm that it works as designed.

I made a minor documentation change. Unlike HTML, where you never
need a BOM because you can declare the encoding in the file,
a CSV with non-ASCII characters must explicitly include a BOM
for Excel to handle it correctly. This was explained in the Reading CSV
section, but was glossed over in the Writing CSV section, which I
have updated.
This commit is contained in:
oleibman 2020-05-17 02:15:18 -07:00 committed by GitHub
parent 3090c1e73f
commit 7517cdd008
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 358 additions and 60 deletions

View File

@ -535,8 +535,12 @@ $writer->save("05featuredemo.csv");
#### Writing UTF-8 CSV files #### Writing UTF-8 CSV files
A CSV file can be marked as UTF-8 by writing a BOM file header. This can CSV files are written in UTF-8. If they do not contain characters
be enabled by using the following code: outside the ASCII range, nothing else need be done.
However, if such characters are in the file,
it should explicitly include a BOM file header;
if it doesn't, Excel will not interpret those characters correctly.
This can be enabled by using the following code:
``` php ``` php
$writer = new \PhpOffice\PhpSpreadsheet\Writer\Csv($spreadsheet); $writer = new \PhpOffice\PhpSpreadsheet\Writer\Csv($spreadsheet);

View File

@ -16,7 +16,7 @@ $spreadsheet->getActiveSheet()->setCellValue('A1', '=B1')
->setCellValue('B1', '=A1+1') ->setCellValue('B1', '=A1+1')
->setCellValue('B2', '=A2'); ->setCellValue('B2', '=A2');
Calculation::getInstance($spreadsheet)->cyclicFormulaCount = 100; Calculation::getInstance($spreadsheet)->cyclicFormulaCount = 15;
// Calculated data // Calculated data
$helper->log('Calculated data'); $helper->log('Calculated data');

View File

@ -43,13 +43,6 @@ class Csv extends BaseReader
*/ */
private $contiguous = false; private $contiguous = false;
/**
* Row counter for loading rows contiguously.
*
* @var int
*/
private $contiguousRow = -1;
/** /**
* The character that can escape the enclosure. * The character that can escape the enclosure.
* *
@ -101,28 +94,6 @@ class Csv extends BaseReader
fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ? fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0); fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
break;
case 'UTF-16LE':
fgets($this->fileHandle, 3) == "\xFF\xFE" ?
fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
break;
case 'UTF-16BE':
fgets($this->fileHandle, 3) == "\xFE\xFF" ?
fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
break;
case 'UTF-32LE':
fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
break;
case 'UTF-32BE':
fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
break;
default:
break; break;
} }
} }
@ -275,10 +246,7 @@ class Csv extends BaseReader
public function listWorksheetInfo($pFilename) public function listWorksheetInfo($pFilename)
{ {
// Open file // Open file
if (!$this->canRead($pFilename)) { $this->openFileOrMemory($pFilename);
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
}
$this->openFile($pFilename);
$fileHandle = $this->fileHandle; $fileHandle = $this->fileHandle;
// Skip BOM, if any // Skip BOM, if any
@ -324,6 +292,24 @@ class Csv extends BaseReader
return $this->loadIntoExisting($pFilename, $spreadsheet); return $this->loadIntoExisting($pFilename, $spreadsheet);
} }
private function openFileOrMemory($pFilename)
{
// Open file
$fhandle = $this->canRead($pFilename);
if (!$fhandle) {
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
}
$this->openFile($pFilename);
if ($this->inputEncoding !== 'UTF-8') {
fclose($this->fileHandle);
$entireFile = file_get_contents($pFilename);
$this->fileHandle = fopen('php://memory', 'r+');
$data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
fwrite($this->fileHandle, $data);
rewind($this->fileHandle);
}
}
/** /**
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance. * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
* *
@ -338,10 +324,7 @@ class Csv extends BaseReader
ini_set('auto_detect_line_endings', true); ini_set('auto_detect_line_endings', true);
// Open file // Open file
if (!$this->canRead($pFilename)) { $this->openFileOrMemory($pFilename);
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
}
$this->openFile($pFilename);
$fileHandle = $this->fileHandle; $fileHandle = $this->fileHandle;
// Skip BOM, if any // Skip BOM, if any
@ -357,22 +340,24 @@ class Csv extends BaseReader
// Set our starting row based on whether we're in contiguous mode or not // Set our starting row based on whether we're in contiguous mode or not
$currentRow = 1; $currentRow = 1;
if ($this->contiguous) { $outRow = 0;
$currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
}
// Loop through each line of the file in turn // Loop through each line of the file in turn
while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
$noOutputYet = true;
$columnLetter = 'A'; $columnLetter = 'A';
foreach ($rowData as $rowDatum) { foreach ($rowData as $rowDatum) {
if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
// Convert encoding if necessary if ($this->contiguous) {
if ($this->inputEncoding !== 'UTF-8') { if ($noOutputYet) {
$rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding); $noOutputYet = false;
++$outRow;
}
} else {
$outRow = $currentRow;
} }
// Set cell value // Set cell value
$sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum); $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
} }
++$columnLetter; ++$columnLetter;
} }
@ -382,10 +367,6 @@ class Csv extends BaseReader
// Close file // Close file
fclose($fileHandle); fclose($fileHandle);
if ($this->contiguous) {
$this->contiguousRow = $currentRow;
}
ini_set('auto_detect_line_endings', $lineEnding); ini_set('auto_detect_line_endings', $lineEnding);
// Return // Return
@ -477,9 +458,6 @@ class Csv extends BaseReader
public function setContiguous($contiguous) public function setContiguous($contiguous)
{ {
$this->contiguous = (bool) $contiguous; $this->contiguous = (bool) $contiguous;
if (!$contiguous) {
$this->contiguousRow = -1;
}
return $this; return $this;
} }
@ -530,7 +508,7 @@ class Csv extends BaseReader
// Check if file exists // Check if file exists
try { try {
$this->openFile($pFilename); $this->openFile($pFilename);
} catch (Exception $e) { } catch (\InvalidArgumentException $e) {
return false; return false;
} }

View File

@ -93,6 +93,8 @@ class Csv extends BaseWriter
// Open file // Open file
if (is_resource($pFilename)) { if (is_resource($pFilename)) {
$fileHandle = $pFilename; $fileHandle = $pFilename;
} elseif (!$pFilename) {
$fileHandle = false;
} else { } else {
$fileHandle = fopen($pFilename, 'wb+'); $fileHandle = fopen($pFilename, 'wb+');
} }
@ -176,10 +178,7 @@ class Csv extends BaseWriter
*/ */
public function setEnclosure($pValue) public function setEnclosure($pValue)
{ {
if ($pValue == '') { $this->enclosure = $pValue ? $pValue : '"';
$pValue = null;
}
$this->enclosure = $pValue;
return $this; return $this;
} }

View File

@ -0,0 +1,57 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader;
use PhpOffice\PhpSpreadsheet\Reader\IReadFilter;
/** Define a Read Filter class implementing IReadFilter */
class CsvContiguousFilter implements IReadFilter
{
private $startRow = 0;
private $endRow = 0;
private $filterType = 0;
/**
* Set the list of rows that we want to read.
*
* @param mixed $startRow
* @param mixed $chunkSize
*/
public function setRows($startRow, $chunkSize)
{
$this->startRow = $startRow;
$this->endRow = $startRow + $chunkSize;
}
public function setFilterType($type)
{
$this->filterType = $type;
}
public function filter1($row)
{
// Include rows 1-10, followed by 100-110, etc.
return $row % 100 <= 10;
}
public function filter0($row)
{
// Only read the heading row, and the rows that are configured in $this->_startRow and $this->_endRow
if (($row == 1) || ($row >= $this->startRow && $row < $this->endRow)) {
return true;
}
return false;
}
public function readCell($column, $row, $worksheetName = '')
{
if ($this->filterType == 1) {
return $this->filter1($row);
}
return $this->filter0($row);
}
}

View File

@ -0,0 +1,81 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader;
use PhpOffice\PhpSpreadsheet\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PHPUnit\Framework\TestCase;
class CsvContiguousTest extends TestCase
{
private $inputFileName = __DIR__ . '/../../../samples/Reader/sampleData/example2.csv';
public function testContiguous()
{
// Create a new Reader of the type defined in $inputFileType
$reader = new Csv();
// Define how many rows we want to read for each "chunk"
$chunkSize = 100;
// Create a new Instance of our Read Filter
$chunkFilter = new CsvContiguousFilter();
// Tell the Reader that we want to use the Read Filter that we've Instantiated
// and that we want to store it in contiguous rows/columns
self::assertFalse($reader->getContiguous());
$reader->setReadFilter($chunkFilter)
->setContiguous(true);
// Instantiate a new PhpSpreadsheet object manually
$spreadsheet = new Spreadsheet();
// Set a sheet index
$sheet = 0;
// Loop to read our worksheet in "chunk size" blocks
/** $startRow is set to 2 initially because we always read the headings in row #1 * */
for ($startRow = 2; $startRow <= 240; $startRow += $chunkSize) {
// Tell the Read Filter, the limits on which rows we want to read this iteration
$chunkFilter->setRows($startRow, $chunkSize);
// Increment the worksheet index pointer for the Reader
$reader->setSheetIndex($sheet);
// Load only the rows that match our filter into a new worksheet in the PhpSpreadsheet Object
$reader->loadIntoExisting($this->inputFileName, $spreadsheet);
// Set the worksheet title (to reference the "sheet" of data that we've loaded)
// and increment the sheet index as well
$spreadsheet->getActiveSheet()->setTitle('Country Data #' . (++$sheet));
}
$sheet = $spreadsheet->getSheetByName('Country Data #1');
self::assertEquals('Kabul', $sheet->getCell('A2')->getValue());
$sheet = $spreadsheet->getSheetByName('Country Data #2');
self::assertEquals('Lesotho', $sheet->getCell('B4')->getValue());
$sheet = $spreadsheet->getSheetByName('Country Data #3');
self::assertEquals(-20.1, $sheet->getCell('C6')->getValue());
}
public function testContiguous2()
{
// Create a new Reader of the type defined in $inputFileType
$reader = new Csv();
// Create a new Instance of our Read Filter
$chunkFilter = new CsvContiguousFilter();
$chunkFilter->setFilterType(1);
// Tell the Reader that we want to use the Read Filter that we've Instantiated
// and that we want to store it in contiguous rows/columns
$reader->setReadFilter($chunkFilter)
->setContiguous(true);
// Instantiate a new PhpSpreadsheet object manually
$spreadsheet = new Spreadsheet();
// Loop to read our worksheet in "chunk size" blocks
$reader->loadIntoExisting($this->inputFileName, $spreadsheet);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('Kabul', $sheet->getCell('A2')->getValue());
self::assertEquals('Kuwait', $sheet->getCell('B11')->getValue());
}
}

View File

@ -3,6 +3,7 @@
namespace PhpOffice\PhpSpreadsheetTests\Reader; namespace PhpOffice\PhpSpreadsheetTests\Reader;
use PhpOffice\PhpSpreadsheet\Reader\Csv; use PhpOffice\PhpSpreadsheet\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
class CsvTest extends TestCase class CsvTest extends TestCase
@ -130,4 +131,113 @@ class CsvTest extends TestCase
$this->assertSame('"', $reader->getEscapeCharacter()); $this->assertSame('"', $reader->getEscapeCharacter());
$this->assertSame($expected, $worksheet->toArray()); $this->assertSame($expected, $worksheet->toArray());
} }
/**
* @dataProvider providerEncodings
*
* @param string $filename
* @param string $encoding
*/
public function testEncodings($filename, $encoding)
{
$reader = new Csv();
$reader->setInputEncoding($encoding);
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('Å', $sheet->getCell('A1')->getValue());
}
public function testInvalidWorkSheetInfo()
{
$this->expectException(ReaderException::class);
$reader = new Csv();
$reader->listWorksheetInfo('');
}
/**
* @dataProvider providerEncodings
*
* @param string $filename
* @param string $encoding
*/
public function testWorkSheetInfo($filename, $encoding)
{
$reader = new Csv();
$reader->setInputEncoding($encoding);
$info = $reader->listWorksheetInfo($filename);
self::assertEquals('Worksheet', $info[0]['worksheetName']);
self::assertEquals('B', $info[0]['lastColumnLetter']);
self::assertEquals(1, $info[0]['lastColumnIndex']);
self::assertEquals(2, $info[0]['totalRows']);
self::assertEquals(2, $info[0]['totalColumns']);
}
public function providerEncodings()
{
return [
['data/Reader/CSV/encoding.iso88591.csv', 'ISO-8859-1'],
['data/Reader/CSV/encoding.utf8.csv', 'UTF-8'],
['data/Reader/CSV/encoding.utf8bom.csv', 'UTF-8'],
['data/Reader/CSV/encoding.utf16be.csv', 'UTF-16BE'],
['data/Reader/CSV/encoding.utf16le.csv', 'UTF-16LE'],
['data/Reader/CSV/encoding.utf32be.csv', 'UTF-32BE'],
['data/Reader/CSV/encoding.utf32le.csv', 'UTF-32LE'],
];
}
public function testUtf16LineBreak()
{
$reader = new Csv();
$reader->setInputEncoding('UTF-16BE');
$spreadsheet = $reader->load('data/Reader/CSV/utf16be.line_break_in_enclosure.csv');
$sheet = $spreadsheet->getActiveSheet();
$expected = <<<EOF
This is a test
with line breaks
that breaks the
delimiters
EOF;
self::assertEquals($expected, $sheet->getCell('B3')->getValue());
}
public function testSeparatorLine()
{
$reader = new Csv();
$reader->setSheetIndex(3);
$spreadsheet = $reader->load('data/Reader/CSV/sep.csv');
self::assertEquals(';', $reader->getDelimiter());
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals(3, $reader->getSheetIndex());
self::assertEquals(3, $spreadsheet->getActiveSheetIndex());
self::assertEquals('A', $sheet->getCell('A1')->getValue());
self::assertEquals(1, $sheet->getCell('B1')->getValue());
self::assertEquals(2, $sheet->getCell('A2')->getValue());
self::assertEquals(3, $sheet->getCell('B2')->getValue());
}
public function testDefaultSettings()
{
$reader = new Csv();
self::assertEquals('UTF-8', $reader->getInputEncoding());
self::assertEquals('"', $reader->getEnclosure());
$reader->setEnclosure('\'');
self::assertEquals('\'', $reader->getEnclosure());
$reader->setEnclosure('');
self::assertEquals('"', $reader->getEnclosure());
}
public function testReadEmptyFileName()
{
$this->expectException(ReaderException::class);
$reader = new Csv();
$filename = '';
$reader->load($filename);
}
public function testReadNonexistentFileName()
{
$this->expectException(ReaderException::class);
$reader = new Csv();
$reader->load('data/Reader/CSV/encoding.utf8.csvxxx');
}
} }

View File

@ -0,0 +1,60 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Functional;
use PhpOffice\PhpSpreadsheet\Reader\Csv as CsvReader;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Writer\Csv as CsvWriter;
use PhpOffice\PhpSpreadsheet\Writer\Exception as WriterException;
use PhpOffice\PhpSpreadsheetTests\Functional;
class CsvWriteTest extends Functional\AbstractFunctional
{
public function testNotFirstSheet()
{
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
$sheet->setCellValue('A1', 'First Sheet');
$sheet = $spreadsheet->createSheet();
$sheet->setCellValue('A1', 'Second Sheet');
$sheet = $spreadsheet->createSheet();
$sheet->setCellValue('A1', 'Third Sheet');
$writer = new CsvWriter($spreadsheet);
$writer->setSheetIndex(1);
self::assertEquals(1, $writer->getSheetIndex());
$filename = tempnam(File::sysGetTempDir(), 'phpspreadsheet-test');
$writer->save($filename);
$reader = new CsvReader();
$newspreadsheet = $reader->load($filename);
unlink($filename);
$sheet = $newspreadsheet->getActiveSheet();
self::assertEquals('Second Sheet', $sheet->getCell('A1')->getValue());
self::assertEquals(0, $newspreadsheet->getActiveSheetIndex());
}
public function testWriteEmptyFileName()
{
$this->expectException(WriterException::class);
$spreadsheet = new Spreadsheet();
$writer = new CsvWriter($spreadsheet);
$filename = '';
$writer->save($filename);
}
public function testDefaultSettings()
{
$spreadsheet = new Spreadsheet();
$writer = new CsvWriter($spreadsheet);
self::assertEquals('"', $writer->getEnclosure());
$writer->setEnclosure('\'');
self::assertEquals('\'', $writer->getEnclosure());
$writer->setEnclosure('');
self::assertEquals('"', $writer->getEnclosure());
self::assertEquals(PHP_EOL, $writer->getLineEnding());
self::assertFalse($writer->getUseBOM());
self::assertFalse($writer->getIncludeSeparatorLine());
self::assertFalse($writer->getExcelCompatibility());
self::assertEquals(0, $writer->getSheetIndex());
}
}

View File

@ -0,0 +1,2 @@
<EFBFBD>,1
2,3
1 1
2 2 3

Binary file not shown.
1 �� �1� �
2 �2� �3� �

Binary file not shown.
1 �,�1� �
2 �2�,�3� �
3

Binary file not shown.
1 ������ ���1��� ���
2 ���2��� ���3��� ���

Binary file not shown.
1 ���,���1��� ���
2 ���2���,���3��� ���
3 ���

View File

@ -0,0 +1,2 @@
Å,1
2,3
1 Å 1
2 2 3

View File

@ -0,0 +1,2 @@
Å,1
2,3
1 Å 1
2 2 3

View File

@ -0,0 +1,3 @@
sep=;
A;1
2;3
1 sep=
2 A 1
3 2 3

Binary file not shown.
1 Name Copy URL
2 Test This is a test with line breaks that breaks the delimiters http://google.com
3 Test This is a test with line breaks that breaks the delimiters http://google.com
4 Test This is a test with line breaks that breaks the delimiters http://google.com
5 Test This is a test with line breaks that breaks the delimiters http://google.com
6 Test This is a test http://google.com