Fix CSV delimiter detection on line breaks

The CSV Reader can now correctly ignore line breaks inside
enclosures which allows it to determine the delimiter
correctly.

Fixes 
Fixes 
This commit is contained in:
Paul Barton 2018-10-10 15:27:14 +01:00 committed by Adrien Crivelli
parent 54efe8824e
commit 813855b2b2
No known key found for this signature in database
GPG Key ID: B182FD79DC6DE92E
4 changed files with 62 additions and 5 deletions
CHANGELOG.md
src/PhpSpreadsheet/Reader
tests
PhpSpreadsheetTests/Reader
data/Reader/CSV

View File

@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Xls file cause the exception during open by Xls reader - [#402](https://github.com/PHPOffice/PhpSpreadsheet/issues/402)
- Skip non numeric value in SUMIF - [#618](https://github.com/PHPOffice/PhpSpreadsheet/pull/618)
- OFFSET should allow omitted height and width - [#561](https://github.com/PHPOffice/PhpSpreadsheet/issues/561)
- Correctly determine delimiter when CSV contains line breaks inside enclosures - [#716](https://github.com/PHPOffice/PhpSpreadsheet/issues/716)
## [1.4.1] - 2018-09-30

View File

@ -163,11 +163,7 @@ class Csv extends BaseReader
// Count how many times each of the potential delimiters appears in each line
$numberLines = 0;
while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) {
// Drop everything that is enclosed to avoid counting false positives in enclosures
$enclosure = preg_quote($this->enclosure, '/');
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
$countLine = [];
for ($i = strlen($line) - 1; $i >= 0; --$i) {
$char = $line[$i];
@ -230,6 +226,42 @@ class Csv extends BaseReader
return $this->skipBOM();
}
/**
* Get the next full line from the file.
*
* @param string $line
*
* @return bool|string
*/
private function getNextLine($line = '')
{
// Get the next line in the file
$newLine = fgets($this->fileHandle);
// Return false if there is no next line
if ($newLine === false) {
return false;
}
// Add the new line to the line passed in
$line = $line . $newLine;
// Drop everything that is enclosed to avoid counting false positives in enclosures
$enclosure = preg_quote($this->enclosure, '/');
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
// See if we have any enclosures left in the line
$matches = [];
preg_match('/(' . $enclosure . ')/', $line, $matches);
// if we still have an enclosure then we need to read the next line aswell
if (count($matches) > 0) {
$line = $this->getNextLine($line);
}
return $line;
}
/**
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
*

View File

@ -43,6 +43,12 @@ class CsvTest extends TestCase
'C2',
'25,5',
],
[
__DIR__ . '/../../data/Reader/CSV/line_break_in_enclosure.csv',
',',
'A3',
'Test',
],
[
__DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv',
',',

View File

@ -0,0 +1,18 @@
Name,Copy,URL
Test,"This is a test
with line breaks
that breaks the
delimiters",http://google.com
Test,"This is a test
with line breaks
that breaks the
delimiters",http://google.com
Test,"This is a test
with line breaks
that breaks the
delimiters",http://google.com
Test,"This is a test
with line breaks
that breaks the
delimiters",http://google.com
Test,"This is a test",http://google.com
1 Name Copy URL
2 Test This is a test with line breaks that breaks the delimiters http://google.com
3 Test This is a test with line breaks that breaks the delimiters http://google.com
4 Test This is a test with line breaks that breaks the delimiters http://google.com
5 Test This is a test with line breaks that breaks the delimiters http://google.com
6 Test This is a test http://google.com