Replace mbStrReplace() with str_replace()

By design, UTF-8 allows any byte-oriented substring searching algorithm,
since the sequence of bytes for a character cannot occur anywhere else
([source](https://en.wikipedia.org/wiki/UTF-8#Advantages_3)).

So `str_replace()` also works for UTF-8-encoded strings, assuming that
the input strings are valid UTF-8 strings. The previous implementation
of mbStrReplace() did nothing to detect invalid strings.

Also, `str_replace()` does not support [Unicode equivalence](https://en.wikipedia.org/wiki/Unicode_equivalence),
but nor do the other `mb_string` functions, and nor does `=SUBSTITUTE()` in Excel
(tested on Excel for Mac version 15.19.1, Excel 2016 for Windows and LibreOffice 5.1).

Closes #109
This commit is contained in:
Christian Schmidt 2017-03-05 12:35:21 +01:00 committed by Adrien Crivelli
parent 85c3bd0154
commit 3560f11935
No known key found for this signature in database
GPG Key ID: B182FD79DC6DE92E
3 changed files with 13 additions and 39 deletions

View File

@ -493,7 +493,7 @@ class TextData
$instance = floor(Functions::flattenSingleValue($instance));
if ($instance == 0) {
return \PhpOffice\PhpSpreadsheet\Shared\StringHelper::mbStrReplace($fromText, $toText, $text);
return str_replace($fromText, $toText, $text);
}
$pos = -1;

View File

@ -550,44 +550,6 @@ class StringHelper
return preg_split('/(?<!^)(?!$)/u', $string);
}
/**
* Replace into multi-bytes string.
*
* Strangely, PHP doesn't have a mb_str_replace multibyte function
* As we'll only ever use this function with UTF-8 characters, we can simply "hard-code" the character set
*
* @param string|string[] $search
* @param string|string[] $replace
* @param string $subject
*
* @return string
*/
public static function mbStrReplace($search, $replace, $subject)
{
if (is_array($subject)) {
$ret = [];
foreach ($subject as $key => $val) {
$ret[$key] = self::mbStrReplace($search, $replace, $val);
}
return $ret;
}
foreach ((array) $search as $key => $s) {
if ($s == '' && $s !== 0) {
continue;
}
$r = !is_array($replace) ? $replace : (isset($replace[$key]) ? $replace[$key] : '');
$pos = mb_strpos($subject, $s, 0, 'UTF-8');
while ($pos !== false) {
$subject = mb_substr($subject, 0, $pos, 'UTF-8') . $r . mb_substr($subject, $pos + mb_strlen($s, 'UTF-8'), null, 'UTF-8');
$pos = mb_strpos($subject, $s, $pos + mb_strlen($r, 'UTF-8'), 'UTF-8');
}
}
return $subject;
}
/**
* Reverse the case of a string, so that all uppercase characters become lowercase
* and all lowercase characters become uppercase.

View File

@ -27,4 +27,16 @@ return [
'a',
1,
],
'Unicode equivalence is not supported' => [
"\u{0061}\u{030A}",
"\u{0061}\u{030A}",
"\u{00E5}",
'x',
],
'Multibytes are supported' => [
'x',
"\u{00E5}",
"\u{00E5}",
'x',
],
];