From 139d85d87435d0ff11d24a2070c5846f8b73a53e Mon Sep 17 00:00:00 2001 From: Adrien Crivelli Date: Thu, 28 Dec 2017 12:22:01 +0900 Subject: [PATCH] Better auto-detection of CSV separators Closes #305 --- CHANGELOG.md | 12 ++++- src/PhpSpreadsheet/Reader/Csv.php | 6 +++ tests/PhpSpreadsheetTests/Reader/CsvTest.php | 53 ++++++++++++++++++-- tests/data/Reader/CSV/enclosure.csv | 4 ++ 4 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 tests/data/Reader/CSV/enclosure.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a7e5e03..28563460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [Unreleased] + +### Added + +### Fixed + +- Better auto-detection of CSV separators - [#305](https://github.com/PHPOffice/PhpSpreadsheet/issues/305) + ## [1.0.0] - 2017-12-25 ### Added -- Support to write merged cells in ODS format [#287](https://github.com/PHPOffice/PhpSpreadsheet/issues/287) -- Able to set the `topLeftCell` in freeze panes [#261](https://github.com/PHPOffice/PhpSpreadsheet/pull/261) +- Support to write merged cells in ODS format - [#287](https://github.com/PHPOffice/PhpSpreadsheet/issues/287) +- Able to set the `topLeftCell` in freeze panes - [#261](https://github.com/PHPOffice/PhpSpreadsheet/pull/261) - Support `DateTimeImmutable` as cell value - Support migration of prefixed classes diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php index 954b9e8d..2f13e368 100644 --- a/src/PhpSpreadsheet/Reader/Csv.php +++ b/src/PhpSpreadsheet/Reader/Csv.php @@ -155,6 +155,10 @@ class Csv extends BaseReader // Count how many times each of the potential delimiters appears in each line $numberLines = 0; while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) { + // Drop everything that is enclosed to avoid counting false positives in enclosures + $enclosure = preg_quote($this->enclosure, '/'); + $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line); + $countLine = []; for ($i = strlen($line) - 1; $i >= 0; --$i) { $char = $line[$i]; @@ -223,6 +227,8 @@ class Csv extends BaseReader * @param string $pFilename * * @throws Exception + * + * @return array */ public function listWorksheetInfo($pFilename) { diff --git a/tests/PhpSpreadsheetTests/Reader/CsvTest.php b/tests/PhpSpreadsheetTests/Reader/CsvTest.php index b1811777..003478fa 100644 --- a/tests/PhpSpreadsheetTests/Reader/CsvTest.php +++ b/tests/PhpSpreadsheetTests/Reader/CsvTest.php @@ -7,17 +7,60 @@ use PHPUnit\Framework\TestCase; class CsvTest extends TestCase { - public function testDelimiterDetection() + /** + * @dataProvider providerDelimiterDetection + * + * @param string $filename + * @param string $expectedDelimiter + * @param string $cell + * @param float|int|string $expectedValue + */ + public function testDelimiterDetection($filename, $expectedDelimiter, $cell, $expectedValue) { $reader = new Csv(); self::assertNull($reader->getDelimiter()); - $filename = __DIR__ . '/../../data/Reader/CSV/semicolon_separated.csv'; $spreadsheet = $reader->load($filename); - self::assertSame(';', $reader->getDelimiter(), 'should be able to infer the delimiter'); + self::assertSame($expectedDelimiter, $reader->getDelimiter(), 'should be able to infer the delimiter'); - $actual = $spreadsheet->getActiveSheet()->getCell('C2')->getValue(); - self::assertSame('25,5', $actual, 'should be able to retrieve values with commas'); + $actual = $spreadsheet->getActiveSheet()->getCell($cell)->getValue(); + self::assertSame($expectedValue, $actual, 'should be able to retrieve correct value'); + } + + public function providerDelimiterDetection() + { + return [ + [ + __DIR__ . '/../../data/Reader/CSV/enclosure.csv', + ',', + 'C4', + 'username2', + ], + [ + __DIR__ . '/../../data/Reader/CSV/semicolon_separated.csv', + ';', + 'C2', + '25,5', + ], + [ + __DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv', + ',', + 'B1', + 'Number of items with weight <= 50kg', + ], + [ + __DIR__ . '/../../../samples/Reader/sampleData/example1.csv', + ',', + 'I4', + '100%', + ], + [ + __DIR__ . '/../../../samples/Reader/sampleData/example2.csv', + ',', + 'D8', + -58.373161, + ], + ]; } } diff --git a/tests/data/Reader/CSV/enclosure.csv b/tests/data/Reader/CSV/enclosure.csv new file mode 100644 index 00000000..a768c63b --- /dev/null +++ b/tests/data/Reader/CSV/enclosure.csv @@ -0,0 +1,4 @@ +"Date/Time","task","username" +"12/17/2017 14:35","viewpage","username1", +"12/17/2017 04:00","editpage","username5", +"11/29/2017 08:20","deletepage","username2",,