add ability to set codepage explicitly for BIFF5 (#1484)
If BIFF5 excel 95 file doesn't have codepage record, the default codepage CP1252 is used and can't be change. That causes to problems with decoding cyrillic text.
This commit is contained in:
parent
93fbf8a938
commit
6caa0cb4f5
|
@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
|
|||
- Add support for IFS() logical function [#1442](https://github.com/PHPOffice/PhpSpreadsheet/pull/1442)
|
||||
- Add Cell Address Helper to provide conversions between the R1C1 and A1 address formats [#1558](https://github.com/PHPOffice/PhpSpreadsheet/pull/1558)
|
||||
- Add ability to edit Html/Pdf before saving [#1499](https://github.com/PHPOffice/PhpSpreadsheet/pull/1499)
|
||||
- Add ability to set codepage explicitly for BIFF5 [#1018](https://github.com/PHPOffice/PhpSpreadsheet/issues/1018)
|
||||
|
||||
### Fixed
|
||||
|
||||
|
|
|
@ -439,6 +439,15 @@ class Xls extends BaseReader
|
|||
}
|
||||
}
|
||||
|
||||
public function setCodepage(string $codepage): void
|
||||
{
|
||||
if (!CodePage::validate($codepage)) {
|
||||
throw new PhpSpreadsheetException('Unknown codepage: ' . $codepage);
|
||||
}
|
||||
|
||||
$this->codepage = $codepage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
|
||||
*
|
||||
|
@ -640,7 +649,7 @@ class Xls extends BaseReader
|
|||
|
||||
// initialize
|
||||
$this->pos = 0;
|
||||
$this->codepage = 'CP1252';
|
||||
$this->codepage = $this->codepage ?: CodePage::DEFAULT_CODE_PAGE;
|
||||
$this->formats = [];
|
||||
$this->objFonts = [];
|
||||
$this->palette = [];
|
||||
|
|
|
@ -6,6 +6,8 @@ use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
|
|||
|
||||
class CodePage
|
||||
{
|
||||
public const DEFAULT_CODE_PAGE = 'CP1252';
|
||||
|
||||
private static $pageArray = [
|
||||
0 => 'CP1252', // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
|
||||
367 => 'ASCII', // ASCII
|
||||
|
@ -65,6 +67,11 @@ class CodePage
|
|||
65001 => 'UTF-8', // Unicode (UTF-8)
|
||||
];
|
||||
|
||||
public static function validate(string $codePage): bool
|
||||
{
|
||||
return in_array($codePage, self::$pageArray, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Microsoft Code Page Identifier to Code Page Name which iconv
|
||||
* and mbstring understands.
|
||||
|
|
Loading…
Reference in New Issue