From a1a03428e776584a99cdd8d53881e2ad4a92b7cc Mon Sep 17 00:00:00 2001 From: Mingc Date: Thu, 23 Feb 2017 10:42:52 +0800 Subject: [PATCH] Decode comment text in xls reader (#99) (#100) Previously, xls reader did not decode comment properly if they used UTF-8. References: * [The Microsoft Excel File Format](https://www.openoffice.org/sc/excelfileformat.pdf): From BIFF8 on, strings are always stored using UTF-16LE text encoding. The character array is a sequence of 16-bit values. Additionally it is possible to use a compressed format, which omits the high bytes of all characters, if they are all zero * [Apache POI](http://grepcode.com/file/repo1.maven.org/maven2/org.apache.poi/poi/3.7/org/apache/poi/util/StringUtil.java#StringUtil.readUnicodeString%28org.apache.poi.util.LittleEndianInput%29): is16BitFlag --- src/PhpSpreadsheet/Reader/Xls.php | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/PhpSpreadsheet/Reader/Xls.php b/src/PhpSpreadsheet/Reader/Xls.php index 4e171ac3..af6def07 100644 --- a/src/PhpSpreadsheet/Reader/Xls.php +++ b/src/PhpSpreadsheet/Reader/Xls.php @@ -1629,8 +1629,20 @@ class Xls extends BaseReader implements IReader $cbRuns = self::getInt2d($recordData, 12); $text = $this->getSplicedRecordData(); + $textByte = $text['spliceOffsets'][1] - $text['spliceOffsets'][0] - 1; + $textStr = substr($text['recordData'], $text['spliceOffsets'][0] + 1, $textByte); + // get 1 byte + $is16Bit = ord($text['recordData'][0]); + // it is possible to use a compressed format, + // which omits the high bytes of all characters, if they are all zero + if (($is16Bit & 0x01) === 0) { + $textStr = \PhpOffice\PhpSpreadsheet\Shared\StringHelper::ConvertEncoding($textStr, 'UTF-8', 'ISO-8859-1'); + } else { + $textStr = $this->decodeCodepage($textStr); + } + $this->textObjects[$this->textObjRef] = [ - 'text' => substr($text['recordData'], $text['spliceOffsets'][0] + 1, $cchText), + 'text' => $textStr, 'format' => substr($text['recordData'], $text['spliceOffsets'][1], $cbRuns), 'alignment' => $grbitOpts, 'rotation' => $rot,