diff --git a/docs/topics/reading-and-writing-to-file.md b/docs/topics/reading-and-writing-to-file.md index 3b6a037c..c0dc7c03 100644 --- a/docs/topics/reading-and-writing-to-file.md +++ b/docs/topics/reading-and-writing-to-file.md @@ -535,8 +535,12 @@ $writer->save("05featuredemo.csv"); #### Writing UTF-8 CSV files -A CSV file can be marked as UTF-8 by writing a BOM file header. This can -be enabled by using the following code: +CSV files are written in UTF-8. If they do not contain characters +outside the ASCII range, nothing else need be done. +However, if such characters are in the file, +it should explicitly include a BOM file header; +if it doesn't, Excel will not interpret those characters correctly. +This can be enabled by using the following code: ``` php $writer = new \PhpOffice\PhpSpreadsheet\Writer\Csv($spreadsheet); diff --git a/samples/Basic/13_CalculationCyclicFormulae.php b/samples/Basic/13_CalculationCyclicFormulae.php index 26e9784d..a446e56e 100644 --- a/samples/Basic/13_CalculationCyclicFormulae.php +++ b/samples/Basic/13_CalculationCyclicFormulae.php @@ -16,7 +16,7 @@ $spreadsheet->getActiveSheet()->setCellValue('A1', '=B1') ->setCellValue('B1', '=A1+1') ->setCellValue('B2', '=A2'); -Calculation::getInstance($spreadsheet)->cyclicFormulaCount = 100; +Calculation::getInstance($spreadsheet)->cyclicFormulaCount = 15; // Calculated data $helper->log('Calculated data'); diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php index 4d104595..2e485109 100644 --- a/src/PhpSpreadsheet/Reader/Csv.php +++ b/src/PhpSpreadsheet/Reader/Csv.php @@ -43,13 +43,6 @@ class Csv extends BaseReader */ private $contiguous = false; - /** - * Row counter for loading rows contiguously. - * - * @var int - */ - private $contiguousRow = -1; - /** * The character that can escape the enclosure. * @@ -101,28 +94,6 @@ class Csv extends BaseReader fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ? fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0); - break; - case 'UTF-16LE': - fgets($this->fileHandle, 3) == "\xFF\xFE" ? - fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); - - break; - case 'UTF-16BE': - fgets($this->fileHandle, 3) == "\xFE\xFF" ? - fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); - - break; - case 'UTF-32LE': - fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ? - fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); - - break; - case 'UTF-32BE': - fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ? - fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); - - break; - default: break; } } @@ -275,10 +246,7 @@ class Csv extends BaseReader public function listWorksheetInfo($pFilename) { // Open file - if (!$this->canRead($pFilename)) { - throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); - } - $this->openFile($pFilename); + $this->openFileOrMemory($pFilename); $fileHandle = $this->fileHandle; // Skip BOM, if any @@ -324,6 +292,24 @@ class Csv extends BaseReader return $this->loadIntoExisting($pFilename, $spreadsheet); } + private function openFileOrMemory($pFilename) + { + // Open file + $fhandle = $this->canRead($pFilename); + if (!$fhandle) { + throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); + } + $this->openFile($pFilename); + if ($this->inputEncoding !== 'UTF-8') { + fclose($this->fileHandle); + $entireFile = file_get_contents($pFilename); + $this->fileHandle = fopen('php://memory', 'r+'); + $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding); + fwrite($this->fileHandle, $data); + rewind($this->fileHandle); + } + } + /** * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. * @@ -338,10 +324,7 @@ class Csv extends BaseReader ini_set('auto_detect_line_endings', true); // Open file - if (!$this->canRead($pFilename)) { - throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); - } - $this->openFile($pFilename); + $this->openFileOrMemory($pFilename); $fileHandle = $this->fileHandle; // Skip BOM, if any @@ -357,22 +340,24 @@ class Csv extends BaseReader // Set our starting row based on whether we're in contiguous mode or not $currentRow = 1; - if ($this->contiguous) { - $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow; - } + $outRow = 0; // Loop through each line of the file in turn while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { + $noOutputYet = true; $columnLetter = 'A'; foreach ($rowData as $rowDatum) { if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { - // Convert encoding if necessary - if ($this->inputEncoding !== 'UTF-8') { - $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding); + if ($this->contiguous) { + if ($noOutputYet) { + $noOutputYet = false; + ++$outRow; + } + } else { + $outRow = $currentRow; } - // Set cell value - $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum); + $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum); } ++$columnLetter; } @@ -382,10 +367,6 @@ class Csv extends BaseReader // Close file fclose($fileHandle); - if ($this->contiguous) { - $this->contiguousRow = $currentRow; - } - ini_set('auto_detect_line_endings', $lineEnding); // Return @@ -477,9 +458,6 @@ class Csv extends BaseReader public function setContiguous($contiguous) { $this->contiguous = (bool) $contiguous; - if (!$contiguous) { - $this->contiguousRow = -1; - } return $this; } @@ -530,7 +508,7 @@ class Csv extends BaseReader // Check if file exists try { $this->openFile($pFilename); - } catch (Exception $e) { + } catch (\InvalidArgumentException $e) { return false; } diff --git a/src/PhpSpreadsheet/Writer/Csv.php b/src/PhpSpreadsheet/Writer/Csv.php index 142f15b8..cabfe450 100644 --- a/src/PhpSpreadsheet/Writer/Csv.php +++ b/src/PhpSpreadsheet/Writer/Csv.php @@ -93,6 +93,8 @@ class Csv extends BaseWriter // Open file if (is_resource($pFilename)) { $fileHandle = $pFilename; + } elseif (!$pFilename) { + $fileHandle = false; } else { $fileHandle = fopen($pFilename, 'wb+'); } @@ -176,10 +178,7 @@ class Csv extends BaseWriter */ public function setEnclosure($pValue) { - if ($pValue == '') { - $pValue = null; - } - $this->enclosure = $pValue; + $this->enclosure = $pValue ? $pValue : '"'; return $this; } diff --git a/tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php b/tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php new file mode 100644 index 00000000..95f7e787 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php @@ -0,0 +1,57 @@ +startRow = $startRow; + $this->endRow = $startRow + $chunkSize; + } + + public function setFilterType($type) + { + $this->filterType = $type; + } + + public function filter1($row) + { + // Include rows 1-10, followed by 100-110, etc. + return $row % 100 <= 10; + } + + public function filter0($row) + { + // Only read the heading row, and the rows that are configured in $this->_startRow and $this->_endRow + if (($row == 1) || ($row >= $this->startRow && $row < $this->endRow)) { + return true; + } + + return false; + } + + public function readCell($column, $row, $worksheetName = '') + { + if ($this->filterType == 1) { + return $this->filter1($row); + } + + return $this->filter0($row); + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/CsvContiguousTest.php b/tests/PhpSpreadsheetTests/Reader/CsvContiguousTest.php new file mode 100644 index 00000000..4fe2006d --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/CsvContiguousTest.php @@ -0,0 +1,81 @@ +getContiguous()); + $reader->setReadFilter($chunkFilter) + ->setContiguous(true); + + // Instantiate a new PhpSpreadsheet object manually + $spreadsheet = new Spreadsheet(); + + // Set a sheet index + $sheet = 0; + // Loop to read our worksheet in "chunk size" blocks + /** $startRow is set to 2 initially because we always read the headings in row #1 * */ + for ($startRow = 2; $startRow <= 240; $startRow += $chunkSize) { + // Tell the Read Filter, the limits on which rows we want to read this iteration + $chunkFilter->setRows($startRow, $chunkSize); + + // Increment the worksheet index pointer for the Reader + $reader->setSheetIndex($sheet); + // Load only the rows that match our filter into a new worksheet in the PhpSpreadsheet Object + $reader->loadIntoExisting($this->inputFileName, $spreadsheet); + // Set the worksheet title (to reference the "sheet" of data that we've loaded) + // and increment the sheet index as well + $spreadsheet->getActiveSheet()->setTitle('Country Data #' . (++$sheet)); + } + + $sheet = $spreadsheet->getSheetByName('Country Data #1'); + self::assertEquals('Kabul', $sheet->getCell('A2')->getValue()); + $sheet = $spreadsheet->getSheetByName('Country Data #2'); + self::assertEquals('Lesotho', $sheet->getCell('B4')->getValue()); + $sheet = $spreadsheet->getSheetByName('Country Data #3'); + self::assertEquals(-20.1, $sheet->getCell('C6')->getValue()); + } + + public function testContiguous2() + { + // Create a new Reader of the type defined in $inputFileType + $reader = new Csv(); + + // Create a new Instance of our Read Filter + $chunkFilter = new CsvContiguousFilter(); + $chunkFilter->setFilterType(1); + + // Tell the Reader that we want to use the Read Filter that we've Instantiated + // and that we want to store it in contiguous rows/columns + $reader->setReadFilter($chunkFilter) + ->setContiguous(true); + + // Instantiate a new PhpSpreadsheet object manually + $spreadsheet = new Spreadsheet(); + + // Loop to read our worksheet in "chunk size" blocks + $reader->loadIntoExisting($this->inputFileName, $spreadsheet); + + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('Kabul', $sheet->getCell('A2')->getValue()); + self::assertEquals('Kuwait', $sheet->getCell('B11')->getValue()); + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/CsvTest.php b/tests/PhpSpreadsheetTests/Reader/CsvTest.php index be08f6a6..bb593ce8 100644 --- a/tests/PhpSpreadsheetTests/Reader/CsvTest.php +++ b/tests/PhpSpreadsheetTests/Reader/CsvTest.php @@ -3,6 +3,7 @@ namespace PhpOffice\PhpSpreadsheetTests\Reader; use PhpOffice\PhpSpreadsheet\Reader\Csv; +use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException; use PHPUnit\Framework\TestCase; class CsvTest extends TestCase @@ -130,4 +131,113 @@ class CsvTest extends TestCase $this->assertSame('"', $reader->getEscapeCharacter()); $this->assertSame($expected, $worksheet->toArray()); } + + /** + * @dataProvider providerEncodings + * + * @param string $filename + * @param string $encoding + */ + public function testEncodings($filename, $encoding) + { + $reader = new Csv(); + $reader->setInputEncoding($encoding); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('Ã…', $sheet->getCell('A1')->getValue()); + } + + public function testInvalidWorkSheetInfo() + { + $this->expectException(ReaderException::class); + $reader = new Csv(); + $reader->listWorksheetInfo(''); + } + + /** + * @dataProvider providerEncodings + * + * @param string $filename + * @param string $encoding + */ + public function testWorkSheetInfo($filename, $encoding) + { + $reader = new Csv(); + $reader->setInputEncoding($encoding); + $info = $reader->listWorksheetInfo($filename); + self::assertEquals('Worksheet', $info[0]['worksheetName']); + self::assertEquals('B', $info[0]['lastColumnLetter']); + self::assertEquals(1, $info[0]['lastColumnIndex']); + self::assertEquals(2, $info[0]['totalRows']); + self::assertEquals(2, $info[0]['totalColumns']); + } + + public function providerEncodings() + { + return [ + ['data/Reader/CSV/encoding.iso88591.csv', 'ISO-8859-1'], + ['data/Reader/CSV/encoding.utf8.csv', 'UTF-8'], + ['data/Reader/CSV/encoding.utf8bom.csv', 'UTF-8'], + ['data/Reader/CSV/encoding.utf16be.csv', 'UTF-16BE'], + ['data/Reader/CSV/encoding.utf16le.csv', 'UTF-16LE'], + ['data/Reader/CSV/encoding.utf32be.csv', 'UTF-32BE'], + ['data/Reader/CSV/encoding.utf32le.csv', 'UTF-32LE'], + ]; + } + + public function testUtf16LineBreak() + { + $reader = new Csv(); + $reader->setInputEncoding('UTF-16BE'); + $spreadsheet = $reader->load('data/Reader/CSV/utf16be.line_break_in_enclosure.csv'); + $sheet = $spreadsheet->getActiveSheet(); + $expected = <<getCell('B3')->getValue()); + } + + public function testSeparatorLine() + { + $reader = new Csv(); + $reader->setSheetIndex(3); + $spreadsheet = $reader->load('data/Reader/CSV/sep.csv'); + self::assertEquals(';', $reader->getDelimiter()); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals(3, $reader->getSheetIndex()); + self::assertEquals(3, $spreadsheet->getActiveSheetIndex()); + self::assertEquals('A', $sheet->getCell('A1')->getValue()); + self::assertEquals(1, $sheet->getCell('B1')->getValue()); + self::assertEquals(2, $sheet->getCell('A2')->getValue()); + self::assertEquals(3, $sheet->getCell('B2')->getValue()); + } + + public function testDefaultSettings() + { + $reader = new Csv(); + self::assertEquals('UTF-8', $reader->getInputEncoding()); + self::assertEquals('"', $reader->getEnclosure()); + $reader->setEnclosure('\''); + self::assertEquals('\'', $reader->getEnclosure()); + $reader->setEnclosure(''); + self::assertEquals('"', $reader->getEnclosure()); + } + + public function testReadEmptyFileName() + { + $this->expectException(ReaderException::class); + $reader = new Csv(); + $filename = ''; + $reader->load($filename); + } + + public function testReadNonexistentFileName() + { + $this->expectException(ReaderException::class); + $reader = new Csv(); + $reader->load('data/Reader/CSV/encoding.utf8.csvxxx'); + } } diff --git a/tests/PhpSpreadsheetTests/Writer/Csv/CsvWriteTest.php b/tests/PhpSpreadsheetTests/Writer/Csv/CsvWriteTest.php new file mode 100644 index 00000000..f55781da --- /dev/null +++ b/tests/PhpSpreadsheetTests/Writer/Csv/CsvWriteTest.php @@ -0,0 +1,60 @@ +getActiveSheet(); + $sheet->setCellValue('A1', 'First Sheet'); + $sheet = $spreadsheet->createSheet(); + $sheet->setCellValue('A1', 'Second Sheet'); + $sheet = $spreadsheet->createSheet(); + $sheet->setCellValue('A1', 'Third Sheet'); + $writer = new CsvWriter($spreadsheet); + $writer->setSheetIndex(1); + self::assertEquals(1, $writer->getSheetIndex()); + $filename = tempnam(File::sysGetTempDir(), 'phpspreadsheet-test'); + $writer->save($filename); + $reader = new CsvReader(); + $newspreadsheet = $reader->load($filename); + unlink($filename); + $sheet = $newspreadsheet->getActiveSheet(); + self::assertEquals('Second Sheet', $sheet->getCell('A1')->getValue()); + self::assertEquals(0, $newspreadsheet->getActiveSheetIndex()); + } + + public function testWriteEmptyFileName() + { + $this->expectException(WriterException::class); + $spreadsheet = new Spreadsheet(); + $writer = new CsvWriter($spreadsheet); + $filename = ''; + $writer->save($filename); + } + + public function testDefaultSettings() + { + $spreadsheet = new Spreadsheet(); + $writer = new CsvWriter($spreadsheet); + self::assertEquals('"', $writer->getEnclosure()); + $writer->setEnclosure('\''); + self::assertEquals('\'', $writer->getEnclosure()); + $writer->setEnclosure(''); + self::assertEquals('"', $writer->getEnclosure()); + self::assertEquals(PHP_EOL, $writer->getLineEnding()); + self::assertFalse($writer->getUseBOM()); + self::assertFalse($writer->getIncludeSeparatorLine()); + self::assertFalse($writer->getExcelCompatibility()); + self::assertEquals(0, $writer->getSheetIndex()); + } +} diff --git a/tests/data/Reader/CSV/encoding.iso88591.csv b/tests/data/Reader/CSV/encoding.iso88591.csv new file mode 100644 index 00000000..84ecfd91 --- /dev/null +++ b/tests/data/Reader/CSV/encoding.iso88591.csv @@ -0,0 +1,2 @@ +Å,1 +2,3 diff --git a/tests/data/Reader/CSV/encoding.utf16be.csv b/tests/data/Reader/CSV/encoding.utf16be.csv new file mode 100644 index 00000000..8c9665ea Binary files /dev/null and b/tests/data/Reader/CSV/encoding.utf16be.csv differ diff --git a/tests/data/Reader/CSV/encoding.utf16le.csv b/tests/data/Reader/CSV/encoding.utf16le.csv new file mode 100644 index 00000000..1299a8b9 Binary files /dev/null and b/tests/data/Reader/CSV/encoding.utf16le.csv differ diff --git a/tests/data/Reader/CSV/encoding.utf32be.csv b/tests/data/Reader/CSV/encoding.utf32be.csv new file mode 100644 index 00000000..eaf28f69 Binary files /dev/null and b/tests/data/Reader/CSV/encoding.utf32be.csv differ diff --git a/tests/data/Reader/CSV/encoding.utf32le.csv b/tests/data/Reader/CSV/encoding.utf32le.csv new file mode 100644 index 00000000..49fc7770 Binary files /dev/null and b/tests/data/Reader/CSV/encoding.utf32le.csv differ diff --git a/tests/data/Reader/CSV/encoding.utf8.csv b/tests/data/Reader/CSV/encoding.utf8.csv new file mode 100644 index 00000000..d107f261 --- /dev/null +++ b/tests/data/Reader/CSV/encoding.utf8.csv @@ -0,0 +1,2 @@ +Ã…,1 +2,3 diff --git a/tests/data/Reader/CSV/encoding.utf8bom.csv b/tests/data/Reader/CSV/encoding.utf8bom.csv new file mode 100644 index 00000000..ea6f8bc5 --- /dev/null +++ b/tests/data/Reader/CSV/encoding.utf8bom.csv @@ -0,0 +1,2 @@ +Å,1 +2,3 diff --git a/tests/data/Reader/CSV/sep.csv b/tests/data/Reader/CSV/sep.csv new file mode 100644 index 00000000..50127f30 --- /dev/null +++ b/tests/data/Reader/CSV/sep.csv @@ -0,0 +1,3 @@ +sep=; +A;1 +2;3 diff --git a/tests/data/Reader/CSV/utf16be.line_break_in_enclosure.csv b/tests/data/Reader/CSV/utf16be.line_break_in_enclosure.csv new file mode 100644 index 00000000..d1720d0d Binary files /dev/null and b/tests/data/Reader/CSV/utf16be.line_break_in_enclosure.csv differ