瀏覽代碼

[sluggable] made transliterator not dependant on output buffering

Gediminas Morkevicius 14 年之前
父節點
當前提交
b0f67d89c9
共有 1 個文件被更改,包括 15 次插入27 次删除
  1. 15 27
      lib/Gedmo/Sluggable/Util/Urlizer.php

+ 15 - 27
lib/Gedmo/Sluggable/Util/Urlizer.php

@@ -236,13 +236,7 @@ class Urlizer
         
         $len = strlen($str);
         $i = 0;
-        
-        # Use an output buffer to copy the transliterated string
-        # This is done for performance vs. string concatenation - on my system, drops
-        # the average request time for the example from ~0.46ms to 0.41ms
-        # See http://phplens.com/lens/php-book/optimizing-debugging-php.php
-        # Section  "High Return Code Optimizations"
-        ob_start();
+        $result = '';
         
         while ($i < $len) {
             $ord = NULL;
@@ -275,7 +269,6 @@ class Urlizer
                                 + ($ord2-128)*64 + ($ord3-128);
                             $increment = 4;
                         } else {
-                            ob_end_clean();
                             throw new \Gedmo\Exception\UnexpectedValueException('Unidentified ut8 character was present, pure utf8 required');
                         }
                     }
@@ -291,7 +284,6 @@ class Urlizer
                 if (file_exists($bankfile)) {
                     # Load the appropriate database
                     if (!include $bankfile) {
-                        ob_end_clean();
                         throw new \Gedmo\Exception\RuntimeException('Cannot find character bank file: ' . $bankfile);
                     }
                 } else {
@@ -302,16 +294,14 @@ class Urlizer
 
             $newchar = $ord & 255;            
             if (isset($UTF8_TO_ASCII[$bank]) && array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
-                echo $UTF8_TO_ASCII[$bank][$newchar];
+                $result .= $UTF8_TO_ASCII[$bank][$newchar];
             } else {
-                echo $unknown;
+                $result .= $unknown;
             }
             $i += $increment;
         }
         
-        $str = ob_get_contents();
-        ob_end_clean();
-        return $str;
+        return $result;
     }
     
     /**
@@ -356,8 +346,8 @@ class Urlizer
     * @return boolean true if valid
     * @see http://hsivonen.iki.fi/php-utf8/
     */
-    public static function validUtf8($str) {
-        
+    public static function validUtf8($str) 
+    {    
         $mState = 0;     // cached expected number of octets after the current octet
                          // until the beginning of the next UTF8 character sequence
         $mUcs4  = 0;     // cached Unicode character
@@ -365,11 +355,11 @@ class Urlizer
         
         $len = strlen($str);
         
-        for($i = 0; $i < $len; $i++) {
+        for ($i = 0; $i < $len; $i++) {
             
             $in = ord($str{$i});
             
-            if ( $mState == 0) {
+            if ($mState == 0) {
                 
                 // When mState is zero we expect either a US-ASCII character or a
                 // multi-octet sequence.
@@ -377,28 +367,28 @@ class Urlizer
                     // US-ASCII, pass straight through.
                     $mBytes = 1;
                     
-                } else if (0xC0 == (0xE0 & ($in))) {
+                } elseif (0xC0 == (0xE0 & ($in))) {
                     // First octet of 2 octet sequence
                     $mUcs4 = ($in);
                     $mUcs4 = ($mUcs4 & 0x1F) << 6;
                     $mState = 1;
                     $mBytes = 2;
                     
-                } else if (0xE0 == (0xF0 & ($in))) {
+                } elseif (0xE0 == (0xF0 & ($in))) {
                     // First octet of 3 octet sequence
                     $mUcs4 = ($in);
                     $mUcs4 = ($mUcs4 & 0x0F) << 12;
                     $mState = 2;
                     $mBytes = 3;
                     
-                } else if (0xF0 == (0xF8 & ($in))) {
+                } elseif (0xF0 == (0xF8 & ($in))) {
                     // First octet of 4 octet sequence
                     $mUcs4 = ($in);
                     $mUcs4 = ($mUcs4 & 0x07) << 18;
                     $mState = 3;
                     $mBytes = 4;
                     
-                } else if (0xF8 == (0xFC & ($in))) {
+                } elseif (0xF8 == (0xFC & ($in))) {
                     /* First octet of 5 octet sequence.
                     *
                     * This is illegal because the encoded codepoint must be either
@@ -412,7 +402,7 @@ class Urlizer
                     $mState = 4;
                     $mBytes = 5;
                     
-                } else if (0xFC == (0xFE & ($in))) {
+                } elseif (0xFC == (0xFE & ($in))) {
                     // First octet of 6 octet sequence, see comments for 5 octet sequence.
                     $mUcs4 = ($in);
                     $mUcs4 = ($mUcs4 & 1) << 30;
@@ -444,7 +434,6 @@ class Urlizer
                     * Unicode codepoint to be output
                     */
                     if (0 == --$mState) {
-                        
                         /*
                         * Check for illegal sequences and codepoints.
                         */
@@ -456,10 +445,9 @@ class Urlizer
                             // From Unicode 3.2, surrogate characters are illegal
                             (($mUcs4 & 0xFFFFF800) == 0xD800) ||
                             // Codepoints outside the Unicode range are illegal
-                            ($mUcs4 > 0x10FFFF)) {
-                            
+                            ($mUcs4 > 0x10FFFF)
+                        ) {
                             return FALSE;
-                            
                         }
                         
                         //initialize UTF8 cache