From 3560f11935f8af970a4d077fcc56a7f050a39632 Mon Sep 17 00:00:00 2001 From: Christian Schmidt Date: Sun, 5 Mar 2017 12:35:21 +0100 Subject: [PATCH] Replace mbStrReplace() with str_replace() By design, UTF-8 allows any byte-oriented substring searching algorithm, since the sequence of bytes for a character cannot occur anywhere else ([source](https://en.wikipedia.org/wiki/UTF-8#Advantages_3)). So `str_replace()` also works for UTF-8-encoded strings, assuming that the input strings are valid UTF-8 strings. The previous implementation of mbStrReplace() did nothing to detect invalid strings. Also, `str_replace()` does not support [Unicode equivalence](https://en.wikipedia.org/wiki/Unicode_equivalence), but nor do the other `mb_string` functions, and nor does `=SUBSTITUTE()` in Excel (tested on Excel for Mac version 15.19.1, Excel 2016 for Windows and LibreOffice 5.1). Closes #109 --- src/PhpSpreadsheet/Calculation/TextData.php | 2 +- src/PhpSpreadsheet/Shared/StringHelper.php | 38 ------------------- .../data/Calculation/TextData/SUBSTITUTE.php | 12 ++++++ 3 files changed, 13 insertions(+), 39 deletions(-) diff --git a/src/PhpSpreadsheet/Calculation/TextData.php b/src/PhpSpreadsheet/Calculation/TextData.php index abf05d95..fd9cd7a9 100644 --- a/src/PhpSpreadsheet/Calculation/TextData.php +++ b/src/PhpSpreadsheet/Calculation/TextData.php @@ -493,7 +493,7 @@ class TextData $instance = floor(Functions::flattenSingleValue($instance)); if ($instance == 0) { - return \PhpOffice\PhpSpreadsheet\Shared\StringHelper::mbStrReplace($fromText, $toText, $text); + return str_replace($fromText, $toText, $text); } $pos = -1; diff --git a/src/PhpSpreadsheet/Shared/StringHelper.php b/src/PhpSpreadsheet/Shared/StringHelper.php index 299a9897..c4945540 100644 --- a/src/PhpSpreadsheet/Shared/StringHelper.php +++ b/src/PhpSpreadsheet/Shared/StringHelper.php @@ -550,44 +550,6 @@ class StringHelper return preg_split('/(? $val) { - $ret[$key] = self::mbStrReplace($search, $replace, $val); - } - - return $ret; - } - - foreach ((array) $search as $key => $s) { - if ($s == '' && $s !== 0) { - continue; - } - $r = !is_array($replace) ? $replace : (isset($replace[$key]) ? $replace[$key] : ''); - $pos = mb_strpos($subject, $s, 0, 'UTF-8'); - while ($pos !== false) { - $subject = mb_substr($subject, 0, $pos, 'UTF-8') . $r . mb_substr($subject, $pos + mb_strlen($s, 'UTF-8'), null, 'UTF-8'); - $pos = mb_strpos($subject, $s, $pos + mb_strlen($r, 'UTF-8'), 'UTF-8'); - } - } - - return $subject; - } - /** * Reverse the case of a string, so that all uppercase characters become lowercase * and all lowercase characters become uppercase. diff --git a/tests/data/Calculation/TextData/SUBSTITUTE.php b/tests/data/Calculation/TextData/SUBSTITUTE.php index 4c4dba1e..23f66a18 100644 --- a/tests/data/Calculation/TextData/SUBSTITUTE.php +++ b/tests/data/Calculation/TextData/SUBSTITUTE.php @@ -27,4 +27,16 @@ return [ 'a', 1, ], + 'Unicode equivalence is not supported' => [ + "\u{0061}\u{030A}", + "\u{0061}\u{030A}", + "\u{00E5}", + 'x', + ], + 'Multibytes are supported' => [ + 'x', + "\u{00E5}", + "\u{00E5}", + 'x', + ], ];