Sfoglia il codice sorgente

Merge remote branch 'lewinski/yaml-escapes'

* lewinski/yaml-escapes:
  [Yaml] Improved support for double quoted values.
Fabien Potencier 14 anni fa
parent
commit
0a8730ab26

+ 88 - 0
src/Symfony/Component/Yaml/Escaper.php

@@ -0,0 +1,88 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ * (c) Fabien Potencier <fabien.potencier@symfony-project.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Symfony\Component\Yaml;
+
+/**
+ * Escaper encapsulates escaping rules for single and double-quoted
+ * YAML strings.
+ *
+ * @author Matthew Lewinski <matthew@lewinski.org>
+ */
+class Escaper
+{
+    // Characters that would cause a dumped string to require double quoting.
+    const REGEX_CHARACTER_TO_ESCAPE = "[\\x00-\\x1f]|\xc2\x85|\xc2\xa0|\xe2\x80\xa8|\xe2\x80\xa9";
+
+    // Mapping arrays for escaping a double quoted string. The backslash is
+    // first to ensure proper escaping because str_replace operates iteratively
+    // on the input arrays. This ordering of the characters avoids the use of strtr,
+    // which performs more slowly.
+    static private $escapees = array('\\\\', '\\"',
+                                     "\x00",  "\x01",  "\x02",  "\x03",  "\x04",  "\x05",  "\x06",  "\x07",
+                                     "\x08",  "\x09",  "\x0a",  "\x0b",  "\x0c",  "\x0d",  "\x0e",  "\x0f",
+                                     "\x10",  "\x11",  "\x12",  "\x13",  "\x14",  "\x15",  "\x16",  "\x17",
+                                     "\x18",  "\x19",  "\x1a",  "\x1b",  "\x1c",  "\x1d",  "\x1e",  "\x1f",
+                                     "\xc2\x85", "\xc2\xa0", "\xe2\x80\xa8", "\xe2\x80\xa9");
+    static private $escaped  = array('\\"', '\\\\',
+                                     "\\0",   "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a",
+                                     "\\b",   "\\t",   "\\n",   "\\v",   "\\f",   "\\r",   "\\x0e", "\\x0f",
+                                     "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17",
+                                     "\\x18", "\\x19", "\\x1a", "\\e",   "\\x1c", "\\x1d", "\\x1e", "\\x1f",
+                                     "\\N", "\\_", "\\L", "\\P");
+
+    /**
+     * Determines if a PHP value would require double quoting in YAML.
+     *
+     * @param string $value A PHP value
+     *
+     * @return Boolean True if the value would require double quotes.
+     */
+    static public function requiresDoubleQuoting($value)
+    {
+        return preg_match('/'.self::REGEX_CHARACTER_TO_ESCAPE.'/u', $value);
+    }
+    
+    /**
+     * Escapes and surrounds a PHP value with double quotes.
+     *
+     * @param string $value A PHP value
+     *
+     * @return string The quoted, escaped string
+     */
+    static public function escapeWithDoubleQuotes($value)
+    {
+        return sprintf('"%s"', str_replace(self::$escapees, self::$escaped, $value));
+    }
+
+    /**
+     * Determines if a PHP value would require single quoting in YAML.
+     *
+     * @param string $value A PHP value
+     *
+     * @return Boolean True if the value would require single quotes.
+     */
+    static public function requiresSingleQuoting($value)
+    {
+        return preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value);
+    }
+
+    /**
+     * Escapes and surrounds a PHP value with single quotes.
+     *
+     * @param string $value A PHP value
+     *
+     * @return string The quoted, escaped string
+     */
+    static public function escapeWithSingleQuotes($value)
+    {
+        return sprintf("'%s'", str_replace('\'', '\'\'', $value));
+    }
+}

+ 7 - 8
src/Symfony/Component/Yaml/Inline.php

@@ -88,10 +88,10 @@ class Inline
                 return is_string($value) ? "'$value'" : (int) $value;
             case is_numeric($value):
                 return is_infinite($value) ? str_ireplace('INF', '.Inf', strval($value)) : (is_string($value) ? "'$value'" : $value);
-            case false !== strpos($value, "\n") || false !== strpos($value, "\r"):
-                return sprintf('"%s"', str_replace(array('"', "\n", "\r"), array('\\"', '\n', '\r'), $value));
-            case preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value):
-                return sprintf("'%s'", str_replace('\'', '\'\'', $value));
+            case Escaper::requiresDoubleQuoting($value):
+                return Escaper::escapeWithDoubleQuotes($value);
+            case Escaper::requiresSingleQuoting($value):
+                return Escaper::escapeWithSingleQuotes($value);
             case '' == $value:
                 return "''";
             case preg_match(self::getTimestampRegex(), $value):
@@ -197,12 +197,11 @@ class Inline
 
         $output = substr($match[0], 1, strlen($match[0]) - 2);
 
+        $unescaper = new Unescaper();
         if ('"' == $scalar[$i]) {
-            // evaluate the string
-            $output = str_replace(array('\\"', '\\n', '\\r'), array('"', "\n", "\r"), $output);
+            $output = $unescaper->unescapeDoubleQuotedString($output);
         } else {
-            // unescape '
-            $output = str_replace('\'\'', '\'', $output);
+            $output = $unescaper->unescapeSingleQuotedString($output);
         }
 
         $i += strlen($match[0]);

+ 142 - 0
src/Symfony/Component/Yaml/Unescaper.php

@@ -0,0 +1,142 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ * (c) Fabien Potencier <fabien.potencier@symfony-project.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Symfony\Component\Yaml;
+
+/**
+ * Unescaper encapsulates unescaping rules for single and double-quoted
+ * YAML strings.
+ *
+ * @author Matthew Lewinski <matthew@lewinski.org>
+ */
+class Unescaper
+{
+    // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
+    // must be converted to that encoding.
+    const ENCODING = 'UTF-8';
+
+    // Regex fragment that matches an escaped character in a double quoted
+    // string.
+    const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})";
+
+    /**
+     * Unescapes a single quoted string.
+     *
+     * @param string $value A single quoted string.
+     *
+     * @return string The unescaped string.
+     */
+    public function unescapeSingleQuotedString($value)
+    {
+        return str_replace('\'\'', '\'', $value);
+    }
+
+    /**
+     * Unescapes a double quoted string.
+     *
+     * @param string $value A double quoted string.
+     *
+     * @return string The unescaped string.
+     */
+    public function unescapeDoubleQuotedString($value)
+    {
+        $self = $this;
+        $callback = function($match) use($self) {
+            return $self->unescapeCharacter($match[0]);
+        };
+
+        // evaluate the string
+        return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
+    }
+
+    /**
+     * Unescapes a character that was found in a double-quoted string
+     *
+     * @param string $value An escaped character
+     *
+     * @return string The unescaped character
+     */
+    public function unescapeCharacter($value)
+    {
+        switch ($value{1}) {
+            case '0':
+                return "\x0";
+            case 'a':
+                return "\x7";
+            case 'b':
+                return "\x8";
+            case 't':
+                return "\t";
+            case "\t":
+                return "\t";
+            case 'n':
+                return "\n";
+            case 'v':
+                return "\xb";
+            case 'f':
+                return "\xc";
+            case 'r':
+                return "\xd";
+            case 'e':
+                return "\x1b";
+            case ' ':
+                return ' ';
+            case '"':
+                return '"';
+            case '/':
+                return '/';
+            case '\\':
+                return '\\';
+            case 'N':
+                // U+0085 NEXT LINE
+                return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE');
+            case '_':
+                // U+00A0 NO-BREAK SPACE
+                return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE');
+            case 'L':
+                // U+2028 LINE SEPARATOR
+                return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE');
+            case 'P':
+                // U+2029 PARAGRAPH SEPARATOR
+                return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE');
+            case 'x':
+                $char = pack('n', hexdec(substr($value, 2, 2)));
+                return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
+            case 'u':
+                $char = pack('n', hexdec(substr($value, 2, 4)));
+                return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
+            case 'U':
+                $char = pack('N', hexdec(substr($value, 2, 8)));
+                return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE');
+        }
+    }
+
+    /**
+     * Convert a string from one encoding to another.
+     *
+     * @param string $string The string to convert
+     * @param string $to     The input encoding
+     * @param string $from   The output encoding
+     *
+     * @return string The string with the new encoding
+     *
+     * @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring)
+     */
+    protected function convertEncoding($value, $to, $from)
+    {
+        if (function_exists('iconv')) {
+            return iconv($from, $to, $value);
+        } elseif (function_exists('mb_convert_encoding')) {
+            return mb_convert_encoding($value, $to, $from);
+        }
+
+        throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).');
+    }
+}

+ 139 - 0
tests/Symfony/Tests/Component/Yaml/Fixtures/escapedCharacters.yml

@@ -0,0 +1,139 @@
+test: outside double quotes
+yaml: |
+    \0 \ \a \b \n
+php: |
+    "\\0 \\ \\a \\b \\n"
+---
+test: null
+yaml: |
+    "\0"
+php: |
+    "\x00"
+---
+test: bell
+yaml: |
+    "\a"
+php: |
+    "\x07"
+---
+test: backspace
+yaml: |
+    "\b"
+php: |
+    "\x08"
+---
+test: horizontal tab (1)
+yaml: |
+    "\t"
+php: |
+    "\x09"
+---
+test: horizontal tab (2)
+yaml: |
+    "\	"
+php: |
+    "\x09"
+---
+test: line feed
+yaml: |
+    "\n"
+php: |
+    "\x0a"
+---
+test: vertical tab
+yaml: |
+    "\v"
+php: |
+    "\x0b"
+---
+test: form feed
+yaml: |
+    "\f"
+php: |
+    "\x0c"
+---
+test: carriage return
+yaml: |
+    "\r"
+php: |
+    "\x0d"
+---
+test: escape
+yaml: |
+    "\e"
+php: |
+   "\x1b"
+---
+test: space
+yaml: |
+    "\ "
+php: |
+    "\x20"
+---
+test: slash
+yaml: |
+    "\/"
+php: |
+    "\x2f"
+---
+test: backslash
+yaml: |
+    "\\"
+php: |
+    "\\"
+---
+test: Unicode next line
+yaml: |
+    "\N"
+php: |
+    "\xc2\x85"
+---
+test: Unicode non-breaking space
+yaml: |
+    "\_"
+php: |
+    "\xc2\xa0"
+---
+test: Unicode line separator
+yaml: |
+    "\L"
+php: |
+    "\xe2\x80\xa8"
+---
+test: Unicode paragraph separator
+yaml: |
+    "\P"
+php: |
+    "\xe2\x80\xa9"
+---
+test: Escaped 8-bit Unicode
+yaml: |
+    "\x42"
+php: |
+    "B"
+---
+test: Escaped 16-bit Unicode
+yaml: |
+    "\u20ac"
+php: |
+    "\xe2\x82\xac"
+---
+test: Escaped 32-bit Unicode
+yaml: |
+    "\U00000043"
+php: |
+    "C"
+---
+test: Example 5.13 Escaped Characters
+note: |
+    Currently throws an error parsing first line. Maybe Symfony Yaml doesn't support
+    continuation of string across multiple lines? Keeping test here but disabled.
+todo: true
+yaml: |
+    "Fun with \\
+    \" \a \b \e \f \
+    \n \r \t \v \0 \
+    \  \_ \N \L \P \
+    \x41 \u0041 \U00000041"
+php: |
+    "Fun with \x5C\n\x22 \x07 \x08 \x1B \x0C\n\x0A \x0D \x09 \x0B \x00\n\x20 \xA0 \x85 \xe2\x80\xa8 \xe2\x80\xa9\nA A A"

+ 1 - 0
tests/Symfony/Tests/Component/Yaml/Fixtures/index.yml

@@ -1,3 +1,4 @@
+- escapedCharacters
 - sfComments
 - sfCompact
 - sfTests