Replace mbStrReplace() with str_replace()
By design, UTF-8 allows any byte-oriented substring searching algorithm, since the sequence of bytes for a character cannot occur anywhere else ([source](https://en.wikipedia.org/wiki/UTF-8#Advantages_3)). So `str_replace()` also works for UTF-8-encoded strings, assuming that the input strings are valid UTF-8 strings. The previous implementation of mbStrReplace() did nothing to detect invalid strings. Also, `str_replace()` does not support [Unicode equivalence](https://en.wikipedia.org/wiki/Unicode_equivalence), but nor do the other `mb_string` functions, and nor does `=SUBSTITUTE()` in Excel (tested on Excel for Mac version 15.19.1, Excel 2016 for Windows and LibreOffice 5.1). Closes #109
This commit is contained in:
parent
85c3bd0154
commit
3560f11935
|
@ -493,7 +493,7 @@ class TextData
|
||||||
$instance = floor(Functions::flattenSingleValue($instance));
|
$instance = floor(Functions::flattenSingleValue($instance));
|
||||||
|
|
||||||
if ($instance == 0) {
|
if ($instance == 0) {
|
||||||
return \PhpOffice\PhpSpreadsheet\Shared\StringHelper::mbStrReplace($fromText, $toText, $text);
|
return str_replace($fromText, $toText, $text);
|
||||||
}
|
}
|
||||||
|
|
||||||
$pos = -1;
|
$pos = -1;
|
||||||
|
|
|
@ -550,44 +550,6 @@ class StringHelper
|
||||||
return preg_split('/(?<!^)(?!$)/u', $string);
|
return preg_split('/(?<!^)(?!$)/u', $string);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Replace into multi-bytes string.
|
|
||||||
*
|
|
||||||
* Strangely, PHP doesn't have a mb_str_replace multibyte function
|
|
||||||
* As we'll only ever use this function with UTF-8 characters, we can simply "hard-code" the character set
|
|
||||||
*
|
|
||||||
* @param string|string[] $search
|
|
||||||
* @param string|string[] $replace
|
|
||||||
* @param string $subject
|
|
||||||
*
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
public static function mbStrReplace($search, $replace, $subject)
|
|
||||||
{
|
|
||||||
if (is_array($subject)) {
|
|
||||||
$ret = [];
|
|
||||||
foreach ($subject as $key => $val) {
|
|
||||||
$ret[$key] = self::mbStrReplace($search, $replace, $val);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ((array) $search as $key => $s) {
|
|
||||||
if ($s == '' && $s !== 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$r = !is_array($replace) ? $replace : (isset($replace[$key]) ? $replace[$key] : '');
|
|
||||||
$pos = mb_strpos($subject, $s, 0, 'UTF-8');
|
|
||||||
while ($pos !== false) {
|
|
||||||
$subject = mb_substr($subject, 0, $pos, 'UTF-8') . $r . mb_substr($subject, $pos + mb_strlen($s, 'UTF-8'), null, 'UTF-8');
|
|
||||||
$pos = mb_strpos($subject, $s, $pos + mb_strlen($r, 'UTF-8'), 'UTF-8');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $subject;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reverse the case of a string, so that all uppercase characters become lowercase
|
* Reverse the case of a string, so that all uppercase characters become lowercase
|
||||||
* and all lowercase characters become uppercase.
|
* and all lowercase characters become uppercase.
|
||||||
|
|
|
@ -27,4 +27,16 @@ return [
|
||||||
'a',
|
'a',
|
||||||
1,
|
1,
|
||||||
],
|
],
|
||||||
|
'Unicode equivalence is not supported' => [
|
||||||
|
"\u{0061}\u{030A}",
|
||||||
|
"\u{0061}\u{030A}",
|
||||||
|
"\u{00E5}",
|
||||||
|
'x',
|
||||||
|
],
|
||||||
|
'Multibytes are supported' => [
|
||||||
|
'x',
|
||||||
|
"\u{00E5}",
|
||||||
|
"\u{00E5}",
|
||||||
|
'x',
|
||||||
|
],
|
||||||
];
|
];
|
||||||
|
|
Loading…
Reference in New Issue