Fix CSV delimiter detection on line breaks
The CSV Reader can now correctly ignore line breaks inside enclosures which allows it to determine the delimiter correctly. Fixes #716 Fixes #717
This commit is contained in:
parent
54efe8824e
commit
813855b2b2
|
@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
|||
- Xls file cause the exception during open by Xls reader - [#402](https://github.com/PHPOffice/PhpSpreadsheet/issues/402)
|
||||
- Skip non numeric value in SUMIF - [#618](https://github.com/PHPOffice/PhpSpreadsheet/pull/618)
|
||||
- OFFSET should allow omitted height and width - [#561](https://github.com/PHPOffice/PhpSpreadsheet/issues/561)
|
||||
- Correctly determine delimiter when CSV contains line breaks inside enclosures - [#716](https://github.com/PHPOffice/PhpSpreadsheet/issues/716)
|
||||
|
||||
## [1.4.1] - 2018-09-30
|
||||
|
||||
|
|
|
@ -163,11 +163,7 @@ class Csv extends BaseReader
|
|||
|
||||
// Count how many times each of the potential delimiters appears in each line
|
||||
$numberLines = 0;
|
||||
while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) {
|
||||
// Drop everything that is enclosed to avoid counting false positives in enclosures
|
||||
$enclosure = preg_quote($this->enclosure, '/');
|
||||
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
|
||||
|
||||
while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
|
||||
$countLine = [];
|
||||
for ($i = strlen($line) - 1; $i >= 0; --$i) {
|
||||
$char = $line[$i];
|
||||
|
@ -230,6 +226,42 @@ class Csv extends BaseReader
|
|||
return $this->skipBOM();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next full line from the file.
|
||||
*
|
||||
* @param string $line
|
||||
*
|
||||
* @return bool|string
|
||||
*/
|
||||
private function getNextLine($line = '')
|
||||
{
|
||||
// Get the next line in the file
|
||||
$newLine = fgets($this->fileHandle);
|
||||
|
||||
// Return false if there is no next line
|
||||
if ($newLine === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add the new line to the line passed in
|
||||
$line = $line . $newLine;
|
||||
|
||||
// Drop everything that is enclosed to avoid counting false positives in enclosures
|
||||
$enclosure = preg_quote($this->enclosure, '/');
|
||||
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
|
||||
|
||||
// See if we have any enclosures left in the line
|
||||
$matches = [];
|
||||
preg_match('/(' . $enclosure . ')/', $line, $matches);
|
||||
|
||||
// if we still have an enclosure then we need to read the next line aswell
|
||||
if (count($matches) > 0) {
|
||||
$line = $this->getNextLine($line);
|
||||
}
|
||||
|
||||
return $line;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
|
||||
*
|
||||
|
|
|
@ -43,6 +43,12 @@ class CsvTest extends TestCase
|
|||
'C2',
|
||||
'25,5',
|
||||
],
|
||||
[
|
||||
__DIR__ . '/../../data/Reader/CSV/line_break_in_enclosure.csv',
|
||||
',',
|
||||
'A3',
|
||||
'Test',
|
||||
],
|
||||
[
|
||||
__DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv',
|
||||
',',
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
Name,Copy,URL
|
||||
Test,"This is a test
|
||||
with line breaks
|
||||
that breaks the
|
||||
delimiters",http://google.com
|
||||
Test,"This is a test
|
||||
with line breaks
|
||||
that breaks the
|
||||
delimiters",http://google.com
|
||||
Test,"This is a test
|
||||
with line breaks
|
||||
that breaks the
|
||||
delimiters",http://google.com
|
||||
Test,"This is a test
|
||||
with line breaks
|
||||
that breaks the
|
||||
delimiters",http://google.com
|
||||
Test,"This is a test",http://google.com
|
|
Loading…
Reference in New Issue