%PDF- %PDF-
Direktori : /home/bitrix/www/bitrix/modules/main/lib/text/ |
Current File : //home/bitrix/www/bitrix/modules/main/lib/text/utfsafestring.php |
<?php /** * Bitrix Framework * @package bitrix * @subpackage main * @copyright 2001-2016 Bitrix */ namespace Bitrix\Main\Text; class UtfSafeString { public static function getLastPosition($haystack, $needle) { if (defined("BX_UTF")) { //mb_strrpos does not work on invalid UTF-8 strings $ln = strlen($needle); for ($i = strlen($haystack) - $ln; $i >= 0; $i--) { if (substr($haystack, $i, $ln) == $needle) { return $i; } } return false; } return strrpos($haystack, $needle); } public static function rtrimInvalidUtf($string) { //valid UTF-8 octet sequences //0xxxxxxx //110xxxxx 10xxxxxx //1110xxxx 10xxxxxx 10xxxxxx //11110xxx 10xxxxxx 10xxxxxx 10xxxxxx $last4bytes = \CUtil::binsubstr($string, -3); $reversed = array_reverse(unpack("C*", $last4bytes)); if (($reversed[0] & 0x80) === 0x00) //ASCII return $string; elseif (($reversed[0] & 0xC0) === 0xC0) //Start of utf seq (cut it!) return \CUtil::binsubstr($string, 0, -1); elseif (($reversed[1] & 0xE0) === 0xE0) //Start of utf seq (longer than 2 bytes) return \CUtil::binsubstr($string, 0, -2); elseif (($reversed[2] & 0xE0) === 0xF0) //Start of utf seq (longer than 3 bytes) return \CUtil::binsubstr($string, 0, -3); return $string; } /** * Escapes 4-bytes UTF sequences. * * @param $string * @return string */ public static function escapeInvalidUtf($string) { $escape = function($matches) { return (isset($matches[2])? '?' : $matches[1]); }; return preg_replace_callback('/([\x00-\x7F]+ |[\xC2-\xDF][\x80-\xBF] |\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]) |([\x80-\xFF])/x', $escape, $string ); } }