$p) { $str = preg_replace('/[' . $p . ']/u', $r, $str); } return $str; } /** * String to URL * * Transforms a string to a proper URL. * * @param string $str String to transform * @param boolean $with_slashes Keep slashes in URL * @return string */ public static function str2URL($str, $with_slashes = true) { $str = self::deaccent($str); $str = preg_replace('/[^A-Za-z0-9_\s\'\:\/[\]-]/', '', $str); return self::tidyURL($str, $with_slashes); } /** * URL cleanup * * @param string $str URL to tidy * @param boolean $keep_slashes Keep slashes in URL * @param boolean $keep_spaces Keep spaces in URL * @return string */ public static function tidyURL($str, $keep_slashes = true, $keep_spaces = false) { $str = strip_tags($str); $str = str_replace(['?', '&', '#', '=', '+', '<', '>', '"', '%'], '', $str); $str = str_replace("'", ' ', $str); $str = preg_replace('/[\s]+/u', ' ', trim($str)); if (!$keep_slashes) { $str = str_replace('/', '-', $str); } if (!$keep_spaces) { $str = str_replace(' ', '-', $str); } $str = preg_replace('/[-]+/', '-', $str); # Remove path changes in URL $str = preg_replace('%^/%', '', $str); $str = preg_replace('%\.+/%', '', $str); return $str; } /** * Cut string * * Returns a cuted string on spaced at given length $l. * * @param string $str String to cut * @param integer $l Length to keep * @return string */ public static function cutString($str, $l) { $s = preg_split('/([\s]+)/u', $str, -1, PREG_SPLIT_DELIM_CAPTURE); $res = ''; $L = 0; if (mb_strlen($s[0]) >= $l) { return mb_substr($s[0], 0, $l); } foreach ($s as $v) { $L = $L + mb_strlen($v); if ($L > $l) { break; } else { $res .= $v; } } return trim($res); } /** * Split words * * Returns an array of words from a given string. * * @param string $str Words to split * @return array */ public static function splitWords($str) { $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s'; if (preg_match_all('/([^' . $non_word . ']{3,})/msu', html::clean($str), $match)) { foreach ($match[1] as $i => $v) { $match[1][$i] = mb_strtolower($v); } return $match[1]; } return []; } /** * Encoding detection * * Returns the encoding (in lowercase) of given $str. * * @param string $str String * @return string */ public static function detectEncoding($str) { return strtolower(mb_detect_encoding($str . ' ', 'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,' . 'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,' . 'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15')); } /** * UTF8 conversions * * Returns an UTF-8 converted string. If $encoding is not specified, the * function will try to detect encoding. * * @param string $str String to convert * @param string $encoding Optionnal "from" encoding * @return string */ public static function toUTF8($str, $encoding = null) { if (!$encoding) { $encoding = self::detectEncoding($str); } if ($encoding != 'utf-8') { $str = iconv($encoding, 'UTF-8', $str); } return $str; } /** * Find bad UTF8 tokens * * Locates the first bad byte in a UTF-8 string returning it's * byte index in the string * PCRE Pattern to locate bad bytes in a UTF-8 string * Comes from W3 FAQ: Multilingual Forms * Note: modified to include full ASCII range including control chars * * @copyright Harry Fuecks (http://phputf8.sourceforge.net GNU LGPL 2.1) * * @param string $str String to search * @return integer|false */ public static function utf8badFind($str) { $UTF8_BAD = '([\x00-\x7F]' . # ASCII (including control chars) '|[\xC2-\xDF][\x80-\xBF]' . # non-overlong 2-byte '|\xE0[\xA0-\xBF][\x80-\xBF]' . # excluding overlongs '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' . # straight 3-byte '|\xED[\x80-\x9F][\x80-\xBF]' . # excluding surrogates '|\xF0[\x90-\xBF][\x80-\xBF]{2}' . # planes 1-3 '|[\xF1-\xF3][\x80-\xBF]{3}' . # planes 4-15 '|\xF4[\x80-\x8F][\x80-\xBF]{2}' . # plane 16 '|(.{1}))'; # invalid byte $pos = 0; $badList = []; while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) { $bytes = strlen($matches[0]); if (isset($matches[2])) { return $pos; } $pos += $bytes; $str = substr($str, $bytes); } return false; } /** * UTF8 cleanup * * Replaces non utf8 bytes in $str by $repl. * * @copyright Harry Fuecks (http://phputf8.sourceforge.net GNU LGPL 2.1) * * @param string $str String to clean * @param string $repl Replacement string * @return string */ public static function cleanUTF8($str, $repl = '?') { while (($bad_index = self::utf8badFind($str)) !== false) { $str = substr_replace($str, $repl, $bad_index, 1); } return $str; } /** * BOM removal * * Removes BOM from the begining of a string if present. * * @param string $str String to clean * @return string */ public static function removeBOM($str) { if (substr_count($str, '')) { return str_replace('', '', $str); } return $str; } /** * Quoted printable conversion * * Encodes given str to quoted printable * * @param string $str String to encode * @return string */ public static function QPEncode($str) { $res = ''; foreach (preg_split("/\r?\n/msu", $str) as $line) { $l = ''; preg_match_all('/./', $line, $m); foreach ($m[0] as $c) { $a = ord($c); if ($a < 32 || $a == 61 || $a > 126) { $c = sprintf('=%02X', $a); } $l .= $c; } $res .= $l . "\r\n"; } return $res; } }