Parser.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. <?php
  2. namespace Symfony\Components\Yaml;
  3. /*
  4. * This file is part of the symfony package.
  5. * (c) Fabien Potencier <fabien.potencier@symfony-project.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. /**
  11. * Parser parses YAML strings to convert them to PHP arrays.
  12. *
  13. * @package symfony
  14. * @subpackage yaml
  15. * @author Fabien Potencier <fabien.potencier@symfony-project.com>
  16. */
  17. class Parser
  18. {
  19. protected $offset = 0;
  20. protected $lines = array();
  21. protected $currentLineNb = -1;
  22. protected $currentLine = '';
  23. protected $refs = array();
  24. /**
  25. * Constructor
  26. *
  27. * @param integer $offset The offset of YAML document (used for line numbers in error messages)
  28. */
  29. public function __construct($offset = 0)
  30. {
  31. $this->offset = $offset;
  32. }
  33. /**
  34. * Parses a YAML string to a PHP value.
  35. *
  36. * @param string $value A YAML string
  37. *
  38. * @return mixed A PHP value
  39. *
  40. * @throws \InvalidArgumentException If the YAML is not valid
  41. */
  42. public function parse($value)
  43. {
  44. $this->currentLineNb = -1;
  45. $this->currentLine = '';
  46. $this->lines = explode("\n", $this->cleanup($value));
  47. $data = array();
  48. while ($this->moveToNextLine())
  49. {
  50. if ($this->isCurrentLineEmpty())
  51. {
  52. continue;
  53. }
  54. // tab?
  55. if (preg_match('#^\t+#', $this->currentLine))
  56. {
  57. throw new ParserException(sprintf('A YAML file cannot contain tabs as indentation at line %d (%s).', $this->getRealCurrentLineNb() + 1, $this->currentLine));
  58. }
  59. $isRef = $isInPlace = $isProcessed = false;
  60. if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#', $this->currentLine, $values))
  61. {
  62. if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#', $values['value'], $matches))
  63. {
  64. $isRef = $matches['ref'];
  65. $values['value'] = $matches['value'];
  66. }
  67. // array
  68. if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#'))
  69. {
  70. $c = $this->getRealCurrentLineNb() + 1;
  71. $parser = new Parser($c);
  72. $parser->refs =& $this->refs;
  73. $data[] = $parser->parse($this->getNextEmbedBlock());
  74. }
  75. else
  76. {
  77. if (preg_match('/^([^ ]+)\: +({.*?)$/', $values['value'], $matches))
  78. {
  79. $data[] = array($matches[1] => Inline::load($matches[2]));
  80. }
  81. elseif (isset($values['leadspaces'])
  82. && ' ' == $values['leadspaces']
  83. && preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{].*?) *\:(\s+(?P<value>.+?))?\s*$#', $values['value'], $matches))
  84. {
  85. // this is a compact notation element, add to next block and parse
  86. $c = $this->getRealCurrentLineNb();
  87. $parser = new Parser($c);
  88. $parser->refs =& $this->refs;
  89. $block = $values['value'];
  90. if (!$this->isNextLineIndented())
  91. {
  92. $block .= "\n".$this->getNextEmbedBlock();
  93. }
  94. $data[] = $parser->parse($block);
  95. }
  96. else
  97. {
  98. $data[] = $this->parseValue($values['value']);
  99. }
  100. }
  101. }
  102. else if (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"].*?) *\:(\s+(?P<value>.+?))?\s*$#', $this->currentLine, $values))
  103. {
  104. $key = Inline::parseScalar($values['key']);
  105. if ('<<' === $key)
  106. {
  107. if (isset($values['value']) && '*' === substr($values['value'], 0, 1))
  108. {
  109. $isInPlace = substr($values['value'], 1);
  110. if (!array_key_exists($isInPlace, $this->refs))
  111. {
  112. throw new ParserException(sprintf('Reference "%s" does not exist at line %s (%s).', $isInPlace, $this->getRealCurrentLineNb() + 1, $this->currentLine));
  113. }
  114. }
  115. else
  116. {
  117. if (isset($values['value']) && $values['value'] !== '')
  118. {
  119. $value = $values['value'];
  120. }
  121. else
  122. {
  123. $value = $this->getNextEmbedBlock();
  124. }
  125. $c = $this->getRealCurrentLineNb() + 1;
  126. $parser = new Parser($c);
  127. $parser->refs =& $this->refs;
  128. $parsed = $parser->parse($value);
  129. $merged = array();
  130. if (!is_array($parsed))
  131. {
  132. throw new ParserException(sprintf("YAML merge keys used with a scalar value instead of an array at line %s (%s)", $this->getRealCurrentLineNb() + 1, $this->currentLine));
  133. }
  134. else if (isset($parsed[0]))
  135. {
  136. // Numeric array, merge individual elements
  137. foreach (array_reverse($parsed) as $parsedItem)
  138. {
  139. if (!is_array($parsedItem))
  140. {
  141. throw new ParserException(sprintf("Merge items must be arrays at line %s (%s).", $this->getRealCurrentLineNb() + 1, $parsedItem));
  142. }
  143. $merged = array_merge($parsedItem, $merged);
  144. }
  145. }
  146. else
  147. {
  148. // Associative array, merge
  149. $merged = array_merge($merge, $parsed);
  150. }
  151. $isProcessed = $merged;
  152. }
  153. }
  154. else if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#', $values['value'], $matches))
  155. {
  156. $isRef = $matches['ref'];
  157. $values['value'] = $matches['value'];
  158. }
  159. if ($isProcessed)
  160. {
  161. // Merge keys
  162. $data = $isProcessed;
  163. }
  164. // hash
  165. else if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#'))
  166. {
  167. // if next line is less indented or equal, then it means that the current value is null
  168. if ($this->isNextLineIndented())
  169. {
  170. $data[$key] = null;
  171. }
  172. else
  173. {
  174. $c = $this->getRealCurrentLineNb() + 1;
  175. $parser = new Parser($c);
  176. $parser->refs =& $this->refs;
  177. $data[$key] = $parser->parse($this->getNextEmbedBlock());
  178. }
  179. }
  180. else
  181. {
  182. if ($isInPlace)
  183. {
  184. $data = $this->refs[$isInPlace];
  185. }
  186. else
  187. {
  188. $data[$key] = $this->parseValue($values['value']);
  189. }
  190. }
  191. }
  192. else
  193. {
  194. // 1-liner followed by newline
  195. if (2 == count($this->lines) && empty($this->lines[1]))
  196. {
  197. $value = Inline::load($this->lines[0]);
  198. if (is_array($value))
  199. {
  200. $first = reset($value);
  201. if ('*' === substr($first, 0, 1))
  202. {
  203. $data = array();
  204. foreach ($value as $alias)
  205. {
  206. $data[] = $this->refs[substr($alias, 1)];
  207. }
  208. $value = $data;
  209. }
  210. }
  211. return $value;
  212. }
  213. switch (preg_last_error())
  214. {
  215. case PREG_INTERNAL_ERROR:
  216. $error = 'Internal PCRE error on line';
  217. break;
  218. case PREG_BACKTRACK_LIMIT_ERROR:
  219. $error = 'pcre.backtrack_limit reached on line';
  220. break;
  221. case PREG_RECURSION_LIMIT_ERROR:
  222. $error = 'pcre.recursion_limit reached on line';
  223. break;
  224. case PREG_BAD_UTF8_ERROR:
  225. $error = 'Malformed UTF-8 data on line';
  226. break;
  227. case PREG_BAD_UTF8_OFFSET_ERROR:
  228. $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point on line';
  229. break;
  230. default:
  231. $error = 'Unable to parse line';
  232. }
  233. throw new ParserException(sprintf('%s %d (%s).', $error, $this->getRealCurrentLineNb() + 1, $this->currentLine));
  234. }
  235. if ($isRef)
  236. {
  237. $this->refs[$isRef] = end($data);
  238. }
  239. }
  240. return empty($data) ? null : $data;
  241. }
  242. /**
  243. * Returns the current line number (takes the offset into account).
  244. *
  245. * @return integer The current line number
  246. */
  247. protected function getRealCurrentLineNb()
  248. {
  249. return $this->currentLineNb + $this->offset;
  250. }
  251. /**
  252. * Returns the current line indentation.
  253. *
  254. * @return integer The current line indentation
  255. */
  256. protected function getCurrentLineIndentation()
  257. {
  258. return strlen($this->currentLine) - strlen(ltrim($this->currentLine, ' '));
  259. }
  260. /**
  261. * Returns the next embed block of YAML.
  262. *
  263. * @return string A YAML string
  264. */
  265. protected function getNextEmbedBlock()
  266. {
  267. $this->moveToNextLine();
  268. $newIndent = $this->getCurrentLineIndentation();
  269. if (!$this->isCurrentLineEmpty() && 0 == $newIndent)
  270. {
  271. throw new ParserException(sprintf('Indentation problem at line %d (%s)', $this->getRealCurrentLineNb() + 1, $this->currentLine));
  272. }
  273. $data = array(substr($this->currentLine, $newIndent));
  274. while ($this->moveToNextLine())
  275. {
  276. if ($this->isCurrentLineBlank())
  277. {
  278. $data[] = substr($this->currentLine, $newIndent);
  279. continue;
  280. }
  281. $indent = $this->getCurrentLineIndentation();
  282. if (preg_match('#^(?P<text> *)$#', $this->currentLine, $match))
  283. {
  284. // empty line
  285. $data[] = $match['text'];
  286. }
  287. else if ($indent >= $newIndent)
  288. {
  289. $data[] = substr($this->currentLine, $newIndent);
  290. }
  291. else if (0 == $indent)
  292. {
  293. $this->moveToPreviousLine();
  294. break;
  295. }
  296. else
  297. {
  298. throw new ParserException(sprintf('Indentation problem at line %d (%s)', $this->getRealCurrentLineNb() + 1, $this->currentLine));
  299. }
  300. }
  301. return implode("\n", $data);
  302. }
  303. /**
  304. * Moves the parser to the next line.
  305. */
  306. protected function moveToNextLine()
  307. {
  308. if ($this->currentLineNb >= count($this->lines) - 1)
  309. {
  310. return false;
  311. }
  312. $this->currentLine = $this->lines[++$this->currentLineNb];
  313. return true;
  314. }
  315. /**
  316. * Moves the parser to the previous line.
  317. */
  318. protected function moveToPreviousLine()
  319. {
  320. $this->currentLine = $this->lines[--$this->currentLineNb];
  321. }
  322. /**
  323. * Parses a YAML value.
  324. *
  325. * @param string $value A YAML value
  326. *
  327. * @return mixed A PHP value
  328. */
  329. protected function parseValue($value)
  330. {
  331. if ('*' === substr($value, 0, 1))
  332. {
  333. if (false !== $pos = strpos($value, '#'))
  334. {
  335. $value = substr($value, 1, $pos - 2);
  336. }
  337. else
  338. {
  339. $value = substr($value, 1);
  340. }
  341. if (!array_key_exists($value, $this->refs))
  342. {
  343. throw new ParserException(sprintf('Reference "%s" does not exist (%s).', $value, $this->currentLine));
  344. }
  345. return $this->refs[$value];
  346. }
  347. if (preg_match('/^(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?$/', $value, $matches))
  348. {
  349. $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
  350. return $this->parseFoldedScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), intval(abs($modifiers)));
  351. }
  352. else
  353. {
  354. return Inline::load($value);
  355. }
  356. }
  357. /**
  358. * Parses a folded scalar.
  359. *
  360. * @param string $separator The separator that was used to begin this folded scalar (| or >)
  361. * @param string $indicator The indicator that was used to begin this folded scalar (+ or -)
  362. * @param integer $indentation The indentation that was used to begin this folded scalar
  363. *
  364. * @return string The text value
  365. */
  366. protected function parseFoldedScalar($separator, $indicator = '', $indentation = 0)
  367. {
  368. $separator = '|' == $separator ? "\n" : ' ';
  369. $text = '';
  370. $notEOF = $this->moveToNextLine();
  371. while ($notEOF && $this->isCurrentLineBlank())
  372. {
  373. $text .= "\n";
  374. $notEOF = $this->moveToNextLine();
  375. }
  376. if (!$notEOF)
  377. {
  378. return '';
  379. }
  380. if (!preg_match('#^(?P<indent>'.($indentation ? str_repeat(' ', $indentation) : ' +').')(?P<text>.*)$#', $this->currentLine, $matches))
  381. {
  382. $this->moveToPreviousLine();
  383. return '';
  384. }
  385. $textIndent = $matches['indent'];
  386. $previousIndent = 0;
  387. $text .= $matches['text'].$separator;
  388. while ($this->currentLineNb + 1 < count($this->lines))
  389. {
  390. $this->moveToNextLine();
  391. if (preg_match('#^(?P<indent> {'.strlen($textIndent).',})(?P<text>.+)$#', $this->currentLine, $matches))
  392. {
  393. if (' ' == $separator && $previousIndent != $matches['indent'])
  394. {
  395. $text = substr($text, 0, -1)."\n";
  396. }
  397. $previousIndent = $matches['indent'];
  398. $text .= str_repeat(' ', $diff = strlen($matches['indent']) - strlen($textIndent)).$matches['text'].($diff ? "\n" : $separator);
  399. }
  400. else if (preg_match('#^(?P<text> *)$#', $this->currentLine, $matches))
  401. {
  402. $text .= preg_replace('#^ {1,'.strlen($textIndent).'}#', '', $matches['text'])."\n";
  403. }
  404. else
  405. {
  406. $this->moveToPreviousLine();
  407. break;
  408. }
  409. }
  410. if (' ' == $separator)
  411. {
  412. // replace last separator by a newline
  413. $text = preg_replace('/ (\n*)$/', "\n$1", $text);
  414. }
  415. switch ($indicator)
  416. {
  417. case '':
  418. $text = preg_replace('#\n+$#s', "\n", $text);
  419. break;
  420. case '+':
  421. break;
  422. case '-':
  423. $text = preg_replace('#\n+$#s', '', $text);
  424. break;
  425. }
  426. return $text;
  427. }
  428. /**
  429. * Returns true if the next line is indented.
  430. *
  431. * @return Boolean Returns true if the next line is indented, false otherwise
  432. */
  433. protected function isNextLineIndented()
  434. {
  435. $currentIndentation = $this->getCurrentLineIndentation();
  436. $notEOF = $this->moveToNextLine();
  437. while ($notEOF && $this->isCurrentLineEmpty())
  438. {
  439. $notEOF = $this->moveToNextLine();
  440. }
  441. if (false === $notEOF)
  442. {
  443. return false;
  444. }
  445. $ret = false;
  446. if ($this->getCurrentLineIndentation() <= $currentIndentation)
  447. {
  448. $ret = true;
  449. }
  450. $this->moveToPreviousLine();
  451. return $ret;
  452. }
  453. /**
  454. * Returns true if the current line is blank or if it is a comment line.
  455. *
  456. * @return Boolean Returns true if the current line is empty or if it is a comment line, false otherwise
  457. */
  458. protected function isCurrentLineEmpty()
  459. {
  460. return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
  461. }
  462. /**
  463. * Returns true if the current line is blank.
  464. *
  465. * @return Boolean Returns true if the current line is blank, false otherwise
  466. */
  467. protected function isCurrentLineBlank()
  468. {
  469. return '' == trim($this->currentLine, ' ');
  470. }
  471. /**
  472. * Returns true if the current line is a comment line.
  473. *
  474. * @return Boolean Returns true if the current line is a comment line, false otherwise
  475. */
  476. protected function isCurrentLineComment()
  477. {
  478. //checking explicitly the first char of the trim is faster than loops or strpos
  479. $ltrimmedLine = ltrim($this->currentLine, ' ');
  480. return $ltrimmedLine[0] === '#';
  481. }
  482. /**
  483. * Cleanups a YAML string to be parsed.
  484. *
  485. * @param string $value The input YAML string
  486. *
  487. * @return string A cleaned up YAML string
  488. */
  489. protected function cleanup($value)
  490. {
  491. $value = str_replace(array("\r\n", "\r"), "\n", $value);
  492. if (!preg_match("#\n$#", $value))
  493. {
  494. $value .= "\n";
  495. }
  496. // strip YAML header
  497. $count = 0;
  498. $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#s', '', $value, -1, $count);
  499. $this->offset += $count;
  500. // remove leading comments and/or ---
  501. $trimmedValue = preg_replace('#^((\#.*?\n)|(\-\-\-.*?\n))*#s', '', $value, -1, $count);
  502. if ($count == 1)
  503. {
  504. // items have been removed, update the offset
  505. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  506. $value = $trimmedValue;
  507. }
  508. return $value;
  509. }
  510. }