Fix CSV delimiter detection on line breaks

The CSV Reader can now correctly ignore line breaks inside
enclosures which allows it to determine the delimiter
correctly.

Fixes #716
Fixes #717
This commit is contained in:
Paul Barton 2018-10-10 15:27:14 +01:00 committed by Adrien Crivelli
parent 54efe8824e
commit 813855b2b2
No known key found for this signature in database
GPG Key ID: B182FD79DC6DE92E
4 changed files with 62 additions and 5 deletions

View File

@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Xls file cause the exception during open by Xls reader - [#402](https://github.com/PHPOffice/PhpSpreadsheet/issues/402) - Xls file cause the exception during open by Xls reader - [#402](https://github.com/PHPOffice/PhpSpreadsheet/issues/402)
- Skip non numeric value in SUMIF - [#618](https://github.com/PHPOffice/PhpSpreadsheet/pull/618) - Skip non numeric value in SUMIF - [#618](https://github.com/PHPOffice/PhpSpreadsheet/pull/618)
- OFFSET should allow omitted height and width - [#561](https://github.com/PHPOffice/PhpSpreadsheet/issues/561) - OFFSET should allow omitted height and width - [#561](https://github.com/PHPOffice/PhpSpreadsheet/issues/561)
- Correctly determine delimiter when CSV contains line breaks inside enclosures - [#716](https://github.com/PHPOffice/PhpSpreadsheet/issues/716)
## [1.4.1] - 2018-09-30 ## [1.4.1] - 2018-09-30

View File

@ -163,11 +163,7 @@ class Csv extends BaseReader
// Count how many times each of the potential delimiters appears in each line // Count how many times each of the potential delimiters appears in each line
$numberLines = 0; $numberLines = 0;
while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) { while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
// Drop everything that is enclosed to avoid counting false positives in enclosures
$enclosure = preg_quote($this->enclosure, '/');
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
$countLine = []; $countLine = [];
for ($i = strlen($line) - 1; $i >= 0; --$i) { for ($i = strlen($line) - 1; $i >= 0; --$i) {
$char = $line[$i]; $char = $line[$i];
@ -230,6 +226,42 @@ class Csv extends BaseReader
return $this->skipBOM(); return $this->skipBOM();
} }
/**
* Get the next full line from the file.
*
* @param string $line
*
* @return bool|string
*/
private function getNextLine($line = '')
{
// Get the next line in the file
$newLine = fgets($this->fileHandle);
// Return false if there is no next line
if ($newLine === false) {
return false;
}
// Add the new line to the line passed in
$line = $line . $newLine;
// Drop everything that is enclosed to avoid counting false positives in enclosures
$enclosure = preg_quote($this->enclosure, '/');
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
// See if we have any enclosures left in the line
$matches = [];
preg_match('/(' . $enclosure . ')/', $line, $matches);
// if we still have an enclosure then we need to read the next line aswell
if (count($matches) > 0) {
$line = $this->getNextLine($line);
}
return $line;
}
/** /**
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
* *

View File

@ -43,6 +43,12 @@ class CsvTest extends TestCase
'C2', 'C2',
'25,5', '25,5',
], ],
[
__DIR__ . '/../../data/Reader/CSV/line_break_in_enclosure.csv',
',',
'A3',
'Test',
],
[ [
__DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv', __DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv',
',', ',',

View File

@ -0,0 +1,18 @@
Name,Copy,URL
Test,"This is a test
with line breaks
that breaks the
delimiters",http://google.com
Test,"This is a test
with line breaks
that breaks the
delimiters",http://google.com
Test,"This is a test
with line breaks
that breaks the
delimiters",http://google.com
Test,"This is a test
with line breaks
that breaks the
delimiters",http://google.com
Test,"This is a test",http://google.com
1 Name Copy URL
2 Test This is a test with line breaks that breaks the delimiters http://google.com
3 Test This is a test with line breaks that breaks the delimiters http://google.com
4 Test This is a test with line breaks that breaks the delimiters http://google.com
5 Test This is a test with line breaks that breaks the delimiters http://google.com
6 Test This is a test http://google.com