Previously, xls reader did not decode comment properly if they used UTF-8. References: * [The Microsoft Excel File Format](https://www.openoffice.org/sc/excelfileformat.pdf): From BIFF8 on, strings are always stored using UTF-16LE text encoding. The character array is a sequence of 16-bit values. Additionally it is possible to use a compressed format, which omits the high bytes of all characters, if they are all zero * [Apache POI](http://grepcode.com/file/repo1.maven.org/maven2/org.apache.poi/poi/3.7/org/apache/poi/util/StringUtil.java#StringUtil.readUnicodeString%28org.apache.poi.util.LittleEndianInput%29): is16BitFlag
This commit is contained in:
parent
7b90bb9394
commit
a1a03428e7
|
@ -1629,8 +1629,20 @@ class Xls extends BaseReader implements IReader
|
|||
$cbRuns = self::getInt2d($recordData, 12);
|
||||
$text = $this->getSplicedRecordData();
|
||||
|
||||
$textByte = $text['spliceOffsets'][1] - $text['spliceOffsets'][0] - 1;
|
||||
$textStr = substr($text['recordData'], $text['spliceOffsets'][0] + 1, $textByte);
|
||||
// get 1 byte
|
||||
$is16Bit = ord($text['recordData'][0]);
|
||||
// it is possible to use a compressed format,
|
||||
// which omits the high bytes of all characters, if they are all zero
|
||||
if (($is16Bit & 0x01) === 0) {
|
||||
$textStr = \PhpOffice\PhpSpreadsheet\Shared\StringHelper::ConvertEncoding($textStr, 'UTF-8', 'ISO-8859-1');
|
||||
} else {
|
||||
$textStr = $this->decodeCodepage($textStr);
|
||||
}
|
||||
|
||||
$this->textObjects[$this->textObjRef] = [
|
||||
'text' => substr($text['recordData'], $text['spliceOffsets'][0] + 1, $cchText),
|
||||
'text' => $textStr,
|
||||
'format' => substr($text['recordData'], $text['spliceOffsets'][1], $cbRuns),
|
||||
'alignment' => $grbitOpts,
|
||||
'rotation' => $rot,
|
||||
|
|
Loading…
Reference in New Issue