Replace mbStrReplace() with str_replace()

By design, UTF-8 allows any byte-oriented substring searching algorithm, since the sequence of bytes for a character cannot occur anywhere else ([source](https://en.wikipedia.org/wiki/UTF-8#Advantages_3)). So `str_replace()` also works for UTF-8-encoded strings, assuming that the input strings are valid UTF-8 strings. The previous implementation of mbStrReplace() did nothing to detect invalid strings. Also, `str_replace()` does not support [Unicode equivalence](https://en.wikipedia.org/wiki/Unicode_equivalence), but nor do the other `mb_string` functions, and nor does `=SUBSTITUTE()` in Excel (tested on Excel for Mac version 15.19.1, Excel 2016 for Windows and LibreOffice 5.1). Closes #109
2017-03-05 12:35:21 +01:00 · 2017-03-05 12:35:21 +01:00 · 3560f11935
commit 3560f11935
parent 85c3bd0154
3 changed files with 13 additions and 39 deletions
--- a/src/PhpSpreadsheet/Calculation/TextData.php
+++ b/src/PhpSpreadsheet/Calculation/TextData.php
@ -493,7 +493,7 @@ class TextData
        $instance = floor(Functions::flattenSingleValue($instance));
        if ($instance == 0) {
-            return \PhpOffice\PhpSpreadsheet\Shared\StringHelper::mbStrReplace($fromText, $toText, $text);
+            return str_replace($fromText, $toText, $text);
        }
        $pos = -1;
--- a/src/PhpSpreadsheet/Shared/StringHelper.php
+++ b/src/PhpSpreadsheet/Shared/StringHelper.php
@ -550,44 +550,6 @@ class StringHelper
        return preg_split('/(?<!^)(?!$)/u', $string);
    }
    /**
     * Replace into multi-bytes string.
     *
     * Strangely, PHP doesn't have a mb_str_replace multibyte function
     * As we'll only ever use this function with UTF-8 characters, we can simply "hard-code" the character set
     *
     * @param string|string[] $search
     * @param string|string[] $replace
     * @param string $subject
     *
     * @return string
     */
    public static function mbStrReplace($search, $replace, $subject)
    {
        if (is_array($subject)) {
            $ret = [];
            foreach ($subject as $key => $val) {
                $ret[$key] = self::mbStrReplace($search, $replace, $val);
            }
            return $ret;
        }
        foreach ((array) $search as $key => $s) {
            if ($s == '' && $s !== 0) {
                continue;
            }
            $r = !is_array($replace) ? $replace : (isset($replace[$key]) ? $replace[$key] : '');
            $pos = mb_strpos($subject, $s, 0, 'UTF-8');
            while ($pos !== false) {
                $subject = mb_substr($subject, 0, $pos, 'UTF-8') . $r . mb_substr($subject, $pos + mb_strlen($s, 'UTF-8'), null, 'UTF-8');
                $pos = mb_strpos($subject, $s, $pos + mb_strlen($r, 'UTF-8'), 'UTF-8');
            }
        }
        return $subject;
    }
    /**
     * Reverse the case of a string, so that all uppercase characters become lowercase
     * and all lowercase characters become uppercase.
--- a/tests/data/Calculation/TextData/SUBSTITUTE.php
+++ b/tests/data/Calculation/TextData/SUBSTITUTE.php
@ -27,4 +27,16 @@ return [
        'a',
        1,
    ],
    'Unicode equivalence is not supported' => [
        "\u{0061}\u{030A}",
        "\u{0061}\u{030A}",
        "\u{00E5}",
        'x',
    ],
    'Multibytes are supported' => [
        'x',
        "\u{00E5}",
        "\u{00E5}",
        'x',
    ],
 ];