Replace mbStrReplace() with str_replace()
By design, UTF-8 allows any byte-oriented substring searching algorithm, since the sequence of bytes for a character cannot occur anywhere else ([source](https://en.wikipedia.org/wiki/UTF-8#Advantages_3)). So `str_replace()` also works for UTF-8-encoded strings, assuming that the input strings are valid UTF-8 strings. The previous implementation of mbStrReplace() did nothing to detect invalid strings. Also, `str_replace()` does not support [Unicode equivalence](https://en.wikipedia.org/wiki/Unicode_equivalence), but nor do the other `mb_string` functions, and nor does `=SUBSTITUTE()` in Excel (tested on Excel for Mac version 15.19.1, Excel 2016 for Windows and LibreOffice 5.1). Closes #109
This commit is contained in:
parent
85c3bd0154
commit
3560f11935
|
@ -493,7 +493,7 @@ class TextData
|
|||
$instance = floor(Functions::flattenSingleValue($instance));
|
||||
|
||||
if ($instance == 0) {
|
||||
return \PhpOffice\PhpSpreadsheet\Shared\StringHelper::mbStrReplace($fromText, $toText, $text);
|
||||
return str_replace($fromText, $toText, $text);
|
||||
}
|
||||
|
||||
$pos = -1;
|
||||
|
|
|
@ -550,44 +550,6 @@ class StringHelper
|
|||
return preg_split('/(?<!^)(?!$)/u', $string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace into multi-bytes string.
|
||||
*
|
||||
* Strangely, PHP doesn't have a mb_str_replace multibyte function
|
||||
* As we'll only ever use this function with UTF-8 characters, we can simply "hard-code" the character set
|
||||
*
|
||||
* @param string|string[] $search
|
||||
* @param string|string[] $replace
|
||||
* @param string $subject
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function mbStrReplace($search, $replace, $subject)
|
||||
{
|
||||
if (is_array($subject)) {
|
||||
$ret = [];
|
||||
foreach ($subject as $key => $val) {
|
||||
$ret[$key] = self::mbStrReplace($search, $replace, $val);
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
foreach ((array) $search as $key => $s) {
|
||||
if ($s == '' && $s !== 0) {
|
||||
continue;
|
||||
}
|
||||
$r = !is_array($replace) ? $replace : (isset($replace[$key]) ? $replace[$key] : '');
|
||||
$pos = mb_strpos($subject, $s, 0, 'UTF-8');
|
||||
while ($pos !== false) {
|
||||
$subject = mb_substr($subject, 0, $pos, 'UTF-8') . $r . mb_substr($subject, $pos + mb_strlen($s, 'UTF-8'), null, 'UTF-8');
|
||||
$pos = mb_strpos($subject, $s, $pos + mb_strlen($r, 'UTF-8'), 'UTF-8');
|
||||
}
|
||||
}
|
||||
|
||||
return $subject;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the case of a string, so that all uppercase characters become lowercase
|
||||
* and all lowercase characters become uppercase.
|
||||
|
|
|
@ -27,4 +27,16 @@ return [
|
|||
'a',
|
||||
1,
|
||||
],
|
||||
'Unicode equivalence is not supported' => [
|
||||
"\u{0061}\u{030A}",
|
||||
"\u{0061}\u{030A}",
|
||||
"\u{00E5}",
|
||||
'x',
|
||||
],
|
||||
'Multibytes are supported' => [
|
||||
'x',
|
||||
"\u{00E5}",
|
||||
"\u{00E5}",
|
||||
'x',
|
||||
],
|
||||
];
|
||||
|
|
Loading…
Reference in New Issue