Previously, xls reader did not decode comment properly if they used UTF-8. References: * [The Microsoft Excel File Format](https://www.openoffice.org/sc/excelfileformat.pdf): From BIFF8 on, strings are always stored using UTF-16LE text encoding. The character array is a sequence of 16-bit values. Additionally it is possible to use a compressed format, which omits the high bytes of all characters, if they are all zero * [Apache POI](http://grepcode.com/file/repo1.maven.org/maven2/org.apache.poi/poi/3.7/org/apache/poi/util/StringUtil.java#StringUtil.readUnicodeString%28org.apache.poi.util.LittleEndianInput%29): is16BitFlag
This commit is contained in:
parent
7b90bb9394
commit
a1a03428e7
|
@ -1629,8 +1629,20 @@ class Xls extends BaseReader implements IReader
|
||||||
$cbRuns = self::getInt2d($recordData, 12);
|
$cbRuns = self::getInt2d($recordData, 12);
|
||||||
$text = $this->getSplicedRecordData();
|
$text = $this->getSplicedRecordData();
|
||||||
|
|
||||||
|
$textByte = $text['spliceOffsets'][1] - $text['spliceOffsets'][0] - 1;
|
||||||
|
$textStr = substr($text['recordData'], $text['spliceOffsets'][0] + 1, $textByte);
|
||||||
|
// get 1 byte
|
||||||
|
$is16Bit = ord($text['recordData'][0]);
|
||||||
|
// it is possible to use a compressed format,
|
||||||
|
// which omits the high bytes of all characters, if they are all zero
|
||||||
|
if (($is16Bit & 0x01) === 0) {
|
||||||
|
$textStr = \PhpOffice\PhpSpreadsheet\Shared\StringHelper::ConvertEncoding($textStr, 'UTF-8', 'ISO-8859-1');
|
||||||
|
} else {
|
||||||
|
$textStr = $this->decodeCodepage($textStr);
|
||||||
|
}
|
||||||
|
|
||||||
$this->textObjects[$this->textObjRef] = [
|
$this->textObjects[$this->textObjRef] = [
|
||||||
'text' => substr($text['recordData'], $text['spliceOffsets'][0] + 1, $cchText),
|
'text' => $textStr,
|
||||||
'format' => substr($text['recordData'], $text['spliceOffsets'][1], $cbRuns),
|
'format' => substr($text['recordData'], $text['spliceOffsets'][1], $cbRuns),
|
||||||
'alignment' => $grbitOpts,
|
'alignment' => $grbitOpts,
|
||||||
'rotation' => $rot,
|
'rotation' => $rot,
|
||||||
|
|
Loading…
Reference in New Issue