Skip to content
Snippets Groups Projects
Select Git revision
  • projects-task-8
  • master default
  • disable-new-requests
  • fix-bulletin-view-missing-notes-error
  • add-missing-queue-managers
  • projects-task-53
  • projects-task-51
  • projects-task-43
  • projects-task-24
  • projects-task-31
  • projects-task-32
  • project-setup-docs
  • projects-task-28
  • projects-task-27
  • projects-task-9
  • projects-task-7
  • mass-update-course-codes-in-sections
  • wdn-four
  • learning-outcomes
  • additional-bulletin-pages
  • svn-redesign
  • svn-popups
  • svn-trunk
  • svn-performance
  • svn-tim
25 results

String.php

Blame
  • String.php 26.96 KiB
    <?php
    /**
     * Provides static methods for charset and locale safe string manipulation.
     *
     * Copyright 2003-2013 Horde LLC (http://www.horde.org/)
     *
     * See the enclosed file COPYING for license information (LGPL). If you
     * did not receive this file, see http://www.horde.org/licenses/lgpl21.
     *
     * @author   Jan Schneider <jan@horde.org>
     * @category Horde
     * @license  http://www.horde.org/licenses/lgpl21 LGPL 2.1
     * @package  Util
     */
    class Horde_String
    {
        /**
         * lower() cache.
         *
         * @var array
         */
        static protected $_lowers = array();
    
        /**
         * upper() cache.
         *
         * @var array
         */
        static protected $_uppers = array();
    
        /**
         * Converts a string from one charset to another.
         *
         * Uses the iconv or the mbstring extensions.
         * The original string is returned if conversion failed or none
         * of the extensions were available.
         *
         * @param mixed $input    The data to be converted. If $input is an an
         *                        array, the array's values get converted
         *                        recursively.
         * @param string $from    The string's current charset.
         * @param string $to      The charset to convert the string to.
         * @param boolean $force  Force conversion?
         *
         * @return mixed  The converted input data.
         */
        static public function convertCharset($input, $from, $to, $force = false)
        {
            /* Don't bother converting numbers. */
            if (is_numeric($input)) {
                return $input;
            }
    
            /* If the from and to character sets are identical, return now. */
            if (!$force && $from == $to) {
                return $input;
            }
            $from = self::lower($from);
            $to = self::lower($to);
            if (!$force && $from == $to) {
                return $input;
            }
    
            if (is_array($input)) {
                $tmp = array();
                reset($input);
                while (list($key, $val) = each($input)) {
                    $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force);
                }
                return $tmp;
            }
    
            if (is_object($input)) {
                // PEAR_Error/Exception objects are almost guaranteed to contain
                // recursion, which will cause a segfault in PHP. We should never
                // reach this line, but add a check.
                if (($input instanceof Exception) ||
                    ($input instanceof PEAR_Error)) {
                    return '';
                }
    
                $input = clone $input;
                $vars = get_object_vars($input);
                while (list($key, $val) = each($vars)) {
                    $input->$key = self::convertCharset($val, $from, $to, $force);
                }
                return $input;
            }
    
            if (!is_string($input)) {
                return $input;
            }
    
            return self::_convertCharset($input, $from, $to);
        }
    
        /**
         * Internal function used to do charset conversion.
         *
         * @param string $input  See self::convertCharset().
         * @param string $from   See self::convertCharset().
         * @param string $to     See self::convertCharset().
         *
         * @return string  The converted string.
         */
        static protected function _convertCharset($input, $from, $to)
        {
            /* Use utf8_[en|de]code() if possible and if the string isn't too
             * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these
             * functions use more memory. */
            if (Horde_Util::extensionExists('xml') &&
                ((strlen($input) < 16777216) ||
                 !Horde_Util::extensionExists('iconv') ||
                 !Horde_Util::extensionExists('mbstring'))) {
                if (($to == 'utf-8') &&
                    in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
                    return utf8_encode($input);
                }
    
                if (($from == 'utf-8') &&
                    in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
                    return utf8_decode($input);
                }
            }
    
            /* Try UTF7-IMAP conversions. */
            if (($from == 'utf7-imap') || ($to == 'utf7-imap')) {
                try {
                    if ($from == 'utf7-imap') {
                        return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to);
                    } else {
                        if ($from == 'utf-8') {
                            $conv = $input;
                        } else {
                            $conv = self::convertCharset($input, $from, 'UTF-8');
                        }
                        return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv);
                    }
                } catch (Horde_Imap_Client_Exception $e) {
                    return $input;
                }
            }
    
            /* Try iconv with transliteration. */
            if (Horde_Util::extensionExists('iconv')) {
                unset($php_errormsg);
                ini_set('track_errors', 1);
                $out = @iconv($from, $to . '//TRANSLIT', $input);
                $errmsg = isset($php_errormsg);
                ini_restore('track_errors');
                if (!$errmsg) {
                    return $out;
                }
            }
    
            /* Try mbstring. */
            if (Horde_Util::extensionExists('mbstring')) {
                $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from));
                if (!empty($out)) {
                    return $out;
                }
            }
    
            return $input;
        }
    
        /**
         * Makes a string lowercase.
         *
         * @param string $string   The string to be converted.
         * @param boolean $locale  If true the string will be converted based on
         *                         a given charset, locale independent else.
         * @param string $charset  If $locale is true, the charset to use when
         *                         converting.
         *
         * @return string  The string with lowercase characters.
         */
        static public function lower($string, $locale = false, $charset = null)
        {
            if ($locale) {
                if (Horde_Util::extensionExists('mbstring')) {
                    if (is_null($charset)) {
                        throw new InvalidArgumentException('$charset argument must not be null');
                    }
                    $ret = @mb_strtolower($string, self::_mbstringCharset($charset));
                    if (!empty($ret)) {
                        return $ret;
                    }
                }
                return strtolower($string);
            }
    
            if (!isset(self::$_lowers[$string])) {
                $language = setlocale(LC_CTYPE, 0);
                setlocale(LC_CTYPE, 'C');
                self::$_lowers[$string] = strtolower($string);
                setlocale(LC_CTYPE, $language);
            }
    
            return self::$_lowers[$string];
        }
    
        /**
         * Makes a string uppercase.
         *
         * @param string $string   The string to be converted.
         * @param boolean $locale  If true the string will be converted based on a
         *                         given charset, locale independent else.
         * @param string $charset  If $locale is true, the charset to use when
         *                         converting. If not provided the current charset.
         *
         * @return string  The string with uppercase characters.
         */
        static public function upper($string, $locale = false, $charset = null)
        {
            if ($locale) {
                if (Horde_Util::extensionExists('mbstring')) {
                    if (is_null($charset)) {
                        throw new InvalidArgumentException('$charset argument must not be null');
                    }
                    $ret = @mb_strtoupper($string, self::_mbstringCharset($charset));
                    if (!empty($ret)) {
                        return $ret;
                    }
                }
                return strtoupper($string);
            }
    
            if (!isset(self::$_uppers[$string])) {
                $language = setlocale(LC_CTYPE, 0);
                setlocale(LC_CTYPE, 'C');
                self::$_uppers[$string] = strtoupper($string);
                setlocale(LC_CTYPE, $language);
            }
    
            return self::$_uppers[$string];
        }
    
        /**
         * Returns a string with the first letter capitalized if it is
         * alphabetic.
         *
         * @param string $string   The string to be capitalized.
         * @param boolean $locale  If true the string will be converted based on a
         *                         given charset, locale independent else.
         * @param string $charset  The charset to use, defaults to current charset.
         *
         * @return string  The capitalized string.
         */
        static public function ucfirst($string, $locale = false, $charset = null)
        {
            if ($locale) {
                if (is_null($charset)) {
                    throw new InvalidArgumentException('$charset argument must not be null');
                }
                $first = self::substr($string, 0, 1, $charset);
                if (self::isAlpha($first, $charset)) {
                    $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset);
                }
            } else {
                $string = self::upper(substr($string, 0, 1), false) . substr($string, 1);
            }
    
            return $string;
        }
    
        /**
         * Returns a string with the first letter of each word capitalized if it is
         * alphabetic.
         *
         * Sentences are splitted into words at whitestrings.
         *
         * @param string $string   The string to be capitalized.
         * @param boolean $locale  If true the string will be converted based on a
         *                         given charset, locale independent else.
         * @param string $charset  The charset to use, defaults to current charset.
         *
         * @return string  The capitalized string.
         */
        static public function ucwords($string, $locale = false, $charset = null)
        {
            $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
            for ($i = 0, $c = count($words); $i < $c; $i += 2) {
                $words[$i] = self::ucfirst($words[$i], $locale, $charset);
            }
            return implode('', $words);
        }
    
        /**
         * Returns part of a string.
         *
         * @param string $string   The string to be converted.
         * @param integer $start   The part's start position, zero based.
         * @param integer $length  The part's length.
         * @param string $charset  The charset to use when calculating the part's
         *                         position and length, defaults to current
         *                         charset.
         *
         * @return string  The string's part.
         */
        static public function substr($string, $start, $length = null,
                                      $charset = 'UTF-8')
        {
            if (is_null($length)) {
                $length = self::length($string, $charset) - $start;
            }
    
            if ($length == 0) {
                return '';
            }
    
            /* Try mbstring. */
            if (Horde_Util::extensionExists('mbstring')) {
                $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset));
    
                /* mb_substr() returns empty string on failure. */
                if (strlen($ret)) {
                    return $ret;
                }
            }
    
            /* Try iconv. */
            if (Horde_Util::extensionExists('iconv')) {
                $ret = @iconv_substr($string, $start, $length, $charset);
    
                /* iconv_substr() returns false on failure. */
                if ($ret !== false) {
                    return $ret;
                }
            }
    
            return substr($string, $start, $length);
        }
    
        /**
         * Returns the character (not byte) length of a string.
         *
         * @param string $string  The string to return the length of.
         * @param string $charset The charset to use when calculating the string's
         *                        length.
         *
         * @return integer  The string's length.
         */
        static public function length($string, $charset = 'UTF-8')
        {
            $charset = self::lower($charset);
    
            if ($charset == 'utf-8' || $charset == 'utf8') {
                return strlen(utf8_decode($string));
            }
    
            if (Horde_Util::extensionExists('mbstring')) {
                $ret = @mb_strlen($string, self::_mbstringCharset($charset));
                if (!empty($ret)) {
                    return $ret;
                }
            }
    
            return strlen($string);
        }
    
        /**
         * Returns the numeric position of the first occurrence of $needle
         * in the $haystack string.
         *
         * @param string $haystack  The string to search through.
         * @param string $needle    The string to search for.
         * @param integer $offset   Allows to specify which character in haystack
         *                          to start searching.
         * @param string $charset   The charset to use when searching for the
         *                          $needle string.
         *
         * @return integer  The position of first occurrence.
         */
        static public function pos($haystack, $needle, $offset = 0,
                                   $charset = 'UTF-8')
        {
            if (Horde_Util::extensionExists('mbstring')) {
                $track_errors = ini_set('track_errors', 1);
                $ret = @mb_strpos($haystack, $needle, $offset, self::_mbstringCharset($charset));
                ini_set('track_errors', $track_errors);
                if (!isset($php_errormsg)) {
                    return $ret;
                }
            }
    
            return strpos($haystack, $needle, $offset);
        }
    
        /**
         * Returns the numeric position of the last occurrence of $needle
         * in the $haystack string.
         *
         * @param string $haystack  The string to search through.
         * @param string $needle    The string to search for.
         * @param integer $offset   Allows to specify which character in haystack
         *                          to start searching.
         * @param string $charset   The charset to use when searching for the
         *                          $needle string.
         *
         * @return integer  The position of first occurrence.
         */
        static public function rpos($haystack, $needle, $offset = 0,
                                    $charset = 'UTF-8')
        {
            if (Horde_Util::extensionExists('mbstring')) {
                $track_errors = ini_set('track_errors', 1);
                $ret = @mb_strrpos($haystack, $needle, $offset, self::_mbstringCharset($charset));
                ini_set('track_errors', $track_errors);
                if (!isset($php_errormsg)) {
                    return $ret;
                }
            }
    
            return strrpos($haystack, $needle, $offset);
        }
    
        /**
         * Returns a string padded to a certain length with another string.
         * This method behaves exactly like str_pad() but is multibyte safe.
         *
         * @param string $input    The string to be padded.
         * @param integer $length  The length of the resulting string.
         * @param string $pad      The string to pad the input string with. Must
         *                         be in the same charset like the input string.
         * @param const $type      The padding type. One of STR_PAD_LEFT,
         *                         STR_PAD_RIGHT, or STR_PAD_BOTH.
         * @param string $charset  The charset of the input and the padding
         *                         strings.
         *
         * @return string  The padded string.
         */
        static public function pad($input, $length, $pad = ' ',
                                   $type = STR_PAD_RIGHT, $charset = 'UTF-8')
        {
            $mb_length = self::length($input, $charset);
            $sb_length = strlen($input);
            $pad_length = self::length($pad, $charset);
    
            /* Return if we already have the length. */
            if ($mb_length >= $length) {
                return $input;
            }
    
            /* Shortcut for single byte strings. */
            if ($mb_length == $sb_length && $pad_length == strlen($pad)) {
                return str_pad($input, $length, $pad, $type);
            }
    
            switch ($type) {
            case STR_PAD_LEFT:
                $left = $length - $mb_length;
                $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input;
                break;
    
            case STR_PAD_BOTH:
                $left = floor(($length - $mb_length) / 2);
                $right = ceil(($length - $mb_length) / 2);
                $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) .
                    $input .
                    self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
                break;
    
            case STR_PAD_RIGHT:
                $right = $length - $mb_length;
                $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
                break;
            }
    
            return $output;
        }
    
        /**
         * Wraps the text of a message.
         *
         * @param string $string         String containing the text to wrap.
         * @param integer $width         Wrap the string at this number of
         *                               characters.
         * @param string $break          Character(s) to use when breaking lines.
         * @param boolean $cut           Whether to cut inside words if a line
         *                               can't be wrapped.
         * @param boolean $line_folding  Whether to apply line folding rules per
         *                               RFC 822 or similar. The correct break
         *                               characters including leading whitespace
         *                               have to be specified too.
         *
         * @return string  String containing the wrapped text.
         */
        static public function wordwrap($string, $width = 75, $break = "\n",
                                        $cut = false, $line_folding = false)
        {
            $wrapped = '';
    
            while (self::length($string, 'UTF-8') > $width) {
                $line = self::substr($string, 0, $width, 'UTF-8');
                $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8');
    
                // Make sure we didn't cut a word, unless we want hard breaks
                // anyway.
                if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) {
                    $line .= $match[1];
                    $string = $match[2];
                }
    
                // Wrap at existing line breaks.
                if (preg_match('/^(.*?)(\r?\n)(.*)$/su', $line, $match)) {
                    $wrapped .= $match[1] . $match[2];
                    $string = $match[3] . $string;
                    continue;
                }
    
                // Wrap at the last colon or semicolon followed by a whitespace if
                // doing line folding.
                if ($line_folding &&
                    preg_match('/^(.*?)(;|:)(\s+.*)$/u', $line, $match)) {
                    $wrapped .= $match[1] . $match[2] . $break;
                    $string = $match[3] . $string;
                    continue;
                }
    
                // Wrap at the last whitespace of $line.
                $sub = $line_folding
                    ? '(.+[^\s])'
                    : '(.*)';
    
                if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) {
                    $wrapped .= $match[1] . $break;
                    $string = ($line_folding ? $match[2] : '') . $match[3] . $string;
                    continue;
                }
    
                // Hard wrap if necessary.
                if ($cut) {
                    $wrapped .= $line . $break;
                    continue;
                }
    
                $wrapped .= $line;
            }
    
            return $wrapped . $string;
        }
    
        /**
         * Wraps the text of a message.
         *
         * @param string $text        String containing the text to wrap.
         * @param integer $length     Wrap $text at this number of characters.
         * @param string $break_char  Character(s) to use when breaking lines.
         * @param boolean $quote      Ignore lines that are wrapped with the '>'
         *                            character (RFC 2646)? If true, we don't
         *                            remove any padding whitespace at the end of
         *                            the string.
         *
         * @return string  String containing the wrapped text.
         */
        static public function wrap($text, $length = 80, $break_char = "\n",
                                    $quote = false)
        {
            $paragraphs = array();
    
            foreach (preg_split('/\r?\n/', $text) as $input) {
                if ($quote && (strpos($input, '>') === 0)) {
                    $line = $input;
                } else {
                    /* We need to handle the Usenet-style signature line
                     * separately; since the space after the two dashes is
                     * REQUIRED, we don't want to trim the line. */
                    if ($input != '-- ') {
                        $input = rtrim($input);
                    }
                    $line = self::wordwrap($input, $length, $break_char);
                }
    
                $paragraphs[] = $line;
            }
    
            return implode($break_char, $paragraphs);
        }
    
        /**
         * Return a truncated string, suitable for notifications.
         *
         * @param string $text     The original string.
         * @param integer $length  The maximum length.
         *
         * @return string  The truncated string, if longer than $length.
         */
        static public function truncate($text, $length = 100)
        {
            return (self::length($text) > $length)
                ? rtrim(self::substr($text, 0, $length - 3)) . '...'
                : $text;
        }
    
        /**
         * Return an abbreviated string, with characters in the middle of the
         * excessively long string replaced by '...'.
         *
         * @param string $text     The original string.
         * @param integer $length  The length at which to abbreviate.
         *
         * @return string  The abbreviated string, if longer than $length.
         */
        static public function abbreviate($text, $length = 20)
        {
            return (self::length($text) > $length)
                ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1))
                : $text;
        }
    
        /**
         * Returns the common leading part of two strings.
         *
         * @param string $str1  A string.
         * @param string $str2  Another string.
         *
         * @return string  The start of $str1 and $str2 that is identical in both.
         */
        static public function common($str1, $str2)
        {
            for ($result = '', $i = 0;
                 isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i];
                 $i++) {
                $result .= $str1[$i];
            }
            return $result;
        }
    
        /**
         * Returns true if the every character in the parameter is an alphabetic
         * character.
         *
         * @param string $string   The string to test.
         * @param string $charset  The charset to use when testing the string.
         *
         * @return boolean  True if the parameter was alphabetic only.
         */
        static public function isAlpha($string, $charset)
        {
            if (!Horde_Util::extensionExists('mbstring')) {
                return ctype_alpha($string);
            }
    
            $charset = self::_mbstringCharset($charset);
            $old_charset = mb_regex_encoding();
    
            if ($charset != $old_charset) {
                @mb_regex_encoding($charset);
            }
            $alpha = !@mb_ereg_match('[^[:alpha:]]', $string);
            if ($charset != $old_charset) {
                @mb_regex_encoding($old_charset);
            }
    
            return $alpha;
        }
    
        /**
         * Returns true if ever character in the parameter is a lowercase letter in
         * the current locale.
         *
         * @param string $string   The string to test.
         * @param string $charset  The charset to use when testing the string.
         *
         * @return boolean  True if the parameter was lowercase.
         */
        static public function isLower($string, $charset)
        {
            return ((self::lower($string, true, $charset) === $string) &&
                    self::isAlpha($string, $charset));
        }
    
        /**
         * Returns true if every character in the parameter is an uppercase letter
         * in the current locale.
         *
         * @param string $string   The string to test.
         * @param string $charset  The charset to use when testing the string.
         *
         * @return boolean  True if the parameter was uppercase.
         */
        static public function isUpper($string, $charset)
        {
            return ((self::upper($string, true, $charset) === $string) &&
                    self::isAlpha($string, $charset));
        }
    
        /**
         * Performs a multibyte safe regex match search on the text provided.
         *
         * @param string $text     The text to search.
         * @param array $regex     The regular expressions to use, without perl
         *                         regex delimiters (e.g. '/' or '|').
         * @param string $charset  The character set of the text.
         *
         * @return array  The matches array from the first regex that matches.
         */
        static public function regexMatch($text, $regex, $charset = null)
        {
            if (!empty($charset)) {
                $regex = self::convertCharset($regex, $charset, 'utf-8');
                $text = self::convertCharset($text, $charset, 'utf-8');
            }
    
            $matches = array();
            foreach ($regex as $val) {
                if (preg_match('/' . $val . '/u', $text, $matches)) {
                    break;
                }
            }
    
            if (!empty($charset)) {
                $matches = self::convertCharset($matches, 'utf-8', $charset);
            }
    
            return $matches;
        }
    
        /**
         * Check to see if a string is valid UTF-8.
         *
         * @param string $text  The text to check.
         *
         * @return boolean  True if valid UTF-8.
         */
        static public function validUtf8($text)
        {
            $text = strval($text);
    
            for ($i = 0, $len = strlen($text); $i < $len; ++$i) {
                $c = ord($text[$i]);
    
                if ($c > 128) {
                    if ($c > 247) {
                        // STD 63 (RFC 3629) eliminates 5 & 6-byte characters.
                        return false;
                    } elseif ($c > 239) {
                        $j = 3;
                    } elseif ($c > 223) {
                        $j = 2;
                    } elseif ($c > 191) {
                        $j = 1;
                    } else {
                        return false;
                    }
    
                    if (($i + $j) > $len) {
                        return false;
                    }
    
                    do {
                        $c = ord($text[++$i]);
                        if (($c < 128) || ($c > 191)) {
                            return false;
                        }
                    } while (--$j);
                }
            }
    
            return true;
        }
    
        /**
         * Workaround charsets that don't work with mbstring functions.
         *
         * @param string $charset  The original charset.
         *
         * @return string  The charset to use with mbstring functions.
         */
        static protected function _mbstringCharset($charset)
        {
            /* mbstring functions do not handle the 'ks_c_5601-1987' &
             * 'ks_c_5601-1989' charsets. However, these charsets are used, for
             * example, by various versions of Outlook to send Korean characters.
             * Use UHC (CP949) encoding instead. See, e.g.,
             * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */
            return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989'))
                ? 'UHC'
                : $charset;
        }
    
        /**
         * Strip UTF-8 byte order mark (BOM) from string data.
         *
         * @param string $str  Input string (UTF-8).
         *
         * @return string  Stripped string (UTF-8).
         */
        static public function trimUtf8Bom($str)
        {
            return (substr($str, 0, 3) == pack('CCC', 239, 187, 191))
                ? substr($str, 3)
                : $str;
        }
    
    }