Replace mbStrReplace() with str_replace()
By design, UTF-8 allows any byte-oriented substring searching algorithm, since the sequence of bytes for a character cannot occur anywhere else ([source](https://en.wikipedia.org/wiki/UTF-8#Advantages_3)). So `str_replace()` also works for UTF-8-encoded strings, assuming that the input strings are valid UTF-8 strings. The previous implementation of mbStrReplace() did nothing to detect invalid strings. Also, `str_replace()` does not support [Unicode equivalence](https://en.wikipedia.org/wiki/Unicode_equivalence), but nor do the other `mb_string` functions, and nor does `=SUBSTITUTE()` in Excel (tested on Excel for Mac version 15.19.1, Excel 2016 for Windows and LibreOffice 5.1). Closes #109
This commit is contained in:
		
							parent
							
								
									85c3bd0154
								
							
						
					
					
						commit
						3560f11935
					
				| @ -493,7 +493,7 @@ class TextData | |||||||
|         $instance = floor(Functions::flattenSingleValue($instance)); |         $instance = floor(Functions::flattenSingleValue($instance)); | ||||||
| 
 | 
 | ||||||
|         if ($instance == 0) { |         if ($instance == 0) { | ||||||
|             return \PhpOffice\PhpSpreadsheet\Shared\StringHelper::mbStrReplace($fromText, $toText, $text); |             return str_replace($fromText, $toText, $text); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         $pos = -1; |         $pos = -1; | ||||||
|  | |||||||
| @ -550,44 +550,6 @@ class StringHelper | |||||||
|         return preg_split('/(?<!^)(?!$)/u', $string); |         return preg_split('/(?<!^)(?!$)/u', $string); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /** |  | ||||||
|      * Replace into multi-bytes string. |  | ||||||
|      * |  | ||||||
|      * Strangely, PHP doesn't have a mb_str_replace multibyte function
 |  | ||||||
|      * As we'll only ever use this function with UTF-8 characters, we can simply "hard-code" the character set |  | ||||||
|      * |  | ||||||
|      * @param string|string[] $search |  | ||||||
|      * @param string|string[] $replace |  | ||||||
|      * @param string $subject |  | ||||||
|      * |  | ||||||
|      * @return string |  | ||||||
|      */ |  | ||||||
|     public static function mbStrReplace($search, $replace, $subject) |  | ||||||
|     { |  | ||||||
|         if (is_array($subject)) { |  | ||||||
|             $ret = []; |  | ||||||
|             foreach ($subject as $key => $val) { |  | ||||||
|                 $ret[$key] = self::mbStrReplace($search, $replace, $val); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             return $ret; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         foreach ((array) $search as $key => $s) { |  | ||||||
|             if ($s == '' && $s !== 0) { |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             $r = !is_array($replace) ? $replace : (isset($replace[$key]) ? $replace[$key] : ''); |  | ||||||
|             $pos = mb_strpos($subject, $s, 0, 'UTF-8'); |  | ||||||
|             while ($pos !== false) { |  | ||||||
|                 $subject = mb_substr($subject, 0, $pos, 'UTF-8') . $r . mb_substr($subject, $pos + mb_strlen($s, 'UTF-8'), null, 'UTF-8'); |  | ||||||
|                 $pos = mb_strpos($subject, $s, $pos + mb_strlen($r, 'UTF-8'), 'UTF-8'); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         return $subject; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /** |     /** | ||||||
|      * Reverse the case of a string, so that all uppercase characters become lowercase |      * Reverse the case of a string, so that all uppercase characters become lowercase | ||||||
|      * and all lowercase characters become uppercase. |      * and all lowercase characters become uppercase. | ||||||
|  | |||||||
| @ -27,4 +27,16 @@ return [ | |||||||
|         'a', |         'a', | ||||||
|         1, |         1, | ||||||
|     ], |     ], | ||||||
|  |     'Unicode equivalence is not supported' => [ | ||||||
|  |         "\u{0061}\u{030A}", | ||||||
|  |         "\u{0061}\u{030A}", | ||||||
|  |         "\u{00E5}", | ||||||
|  |         'x', | ||||||
|  |     ], | ||||||
|  |     'Multibytes are supported' => [ | ||||||
|  |         'x', | ||||||
|  |         "\u{00E5}", | ||||||
|  |         "\u{00E5}", | ||||||
|  |         'x', | ||||||
|  |     ], | ||||||
| ]; | ]; | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Christian Schmidt
						Christian Schmidt