CssSelector.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector;
  11. use Symfony\Component\CssSelector\Exception\ParseException;
  12. /**
  13. * CssSelector is the main entry point of the component and can convert CSS
  14. * selectors to XPath expressions.
  15. *
  16. * $xpath = CssSelector::toXpath('h1.foo');
  17. *
  18. * This component is a port of the Python lxml library,
  19. * which is copyright Infrae and distributed under the BSD license.
  20. *
  21. * @author Fabien Potencier <fabien@symfony.com>
  22. *
  23. * @api
  24. */
  25. class CssSelector
  26. {
  27. /**
  28. * Translates a CSS expression to its XPath equivalent.
  29. * Optionally, a prefix can be added to the resulting XPath
  30. * expression with the $prefix parameter.
  31. *
  32. * @param mixed $cssExpr The CSS expression.
  33. * @param string $prefix An optional prefix for the XPath expression.
  34. *
  35. * @return string
  36. *
  37. * @throws ParseException When got None for xpath expression
  38. *
  39. * @api
  40. */
  41. static public function toXPath($cssExpr, $prefix = 'descendant-or-self::')
  42. {
  43. if (is_string($cssExpr)) {
  44. if (!$cssExpr) {
  45. return $prefix.'*';
  46. }
  47. if (preg_match('#^\w+\s*$#u', $cssExpr, $match)) {
  48. return $prefix.trim($match[0]);
  49. }
  50. if (preg_match('~^(\w*)#(\w+)\s*$~u', $cssExpr, $match)) {
  51. return sprintf("%s%s[@id = '%s']", $prefix, $match[1] ? $match[1] : '*', $match[2]);
  52. }
  53. if (preg_match('#^(\w*)\.(\w+)\s*$#u', $cssExpr, $match)) {
  54. return sprintf("%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]", $prefix, $match[1] ? $match[1] : '*', $match[2]);
  55. }
  56. $parser = new self();
  57. $cssExpr = $parser->parse($cssExpr);
  58. }
  59. $expr = $cssExpr->toXpath();
  60. // @codeCoverageIgnoreStart
  61. if (!$expr) {
  62. throw new ParseException(sprintf('Got None for xpath expression from %s.', $cssExpr));
  63. }
  64. // @codeCoverageIgnoreEnd
  65. if ($prefix) {
  66. $expr->addPrefix($prefix);
  67. }
  68. return (string) $expr;
  69. }
  70. /**
  71. * Parses an expression and returns the Node object that represents
  72. * the parsed expression.
  73. *
  74. * @throws \Exception When tokenizer throws it while parsing
  75. *
  76. * @param string $string The expression to parse
  77. *
  78. * @return Node\NodeInterface
  79. */
  80. public function parse($string)
  81. {
  82. $tokenizer = new Tokenizer();
  83. $stream = new TokenStream($tokenizer->tokenize($string), $string);
  84. try {
  85. return $this->parseSelectorGroup($stream);
  86. } catch (\Exception $e) {
  87. $class = get_class($e);
  88. throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()), 0, $e);
  89. }
  90. }
  91. /**
  92. * Parses a selector group contained in $stream and returns
  93. * the Node object that represents the expression.
  94. *
  95. * @param TokenStream $stream The stream to parse.
  96. *
  97. * @return Node\NodeInterface
  98. */
  99. private function parseSelectorGroup($stream)
  100. {
  101. $result = array();
  102. while (true) {
  103. $result[] = $this->parseSelector($stream);
  104. if ($stream->peek() == ',') {
  105. $stream->next();
  106. } else {
  107. break;
  108. }
  109. }
  110. if (count($result) == 1) {
  111. return $result[0];
  112. }
  113. return new Node\OrNode($result);
  114. }
  115. /**
  116. * Parses a selector contained in $stream and returns the Node
  117. * object that represents it.
  118. *
  119. * @throws ParseException When expected selector but got something else
  120. *
  121. * @param TokenStream $stream The stream containing the selector.
  122. *
  123. * @return Node\NodeInterface
  124. */
  125. private function parseSelector($stream)
  126. {
  127. $result = $this->parseSimpleSelector($stream);
  128. while (true) {
  129. $peek = $stream->peek();
  130. if (',' == $peek || null === $peek) {
  131. return $result;
  132. } elseif (in_array($peek, array('+', '>', '~'))) {
  133. // A combinator
  134. $combinator = (string) $stream->next();
  135. } else {
  136. $combinator = ' ';
  137. }
  138. $consumed = count($stream->getUsed());
  139. $nextSelector = $this->parseSimpleSelector($stream);
  140. if ($consumed == count($stream->getUsed())) {
  141. throw new ParseException(sprintf("Expected selector, got '%s'", $stream->peek()));
  142. }
  143. $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
  144. }
  145. return $result;
  146. }
  147. /**
  148. * Parses a simple selector (the current token) from $stream and returns
  149. * the resulting Node object.
  150. *
  151. * @throws ParseException When expected symbol but got something else
  152. *
  153. * @param TokenStream $stream The stream containing the selector.
  154. *
  155. * @return Node\NodeInterface
  156. */
  157. private function parseSimpleSelector($stream)
  158. {
  159. $peek = $stream->peek();
  160. if ('*' != $peek && !$peek->isType('Symbol')) {
  161. $element = $namespace = '*';
  162. } else {
  163. $next = $stream->next();
  164. if ('*' != $next && !$next->isType('Symbol')) {
  165. throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
  166. }
  167. if ($stream->peek() == '|') {
  168. $namespace = $next;
  169. $stream->next();
  170. $element = $stream->next();
  171. if ('*' != $element && !$next->isType('Symbol')) {
  172. throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
  173. }
  174. } else {
  175. $namespace = '*';
  176. $element = $next;
  177. }
  178. }
  179. $result = new Node\ElementNode($namespace, $element);
  180. $hasHash = false;
  181. while (true) {
  182. $peek = $stream->peek();
  183. if ('#' == $peek) {
  184. if ($hasHash) {
  185. /* You can't have two hashes
  186. (FIXME: is there some more general rule I'm missing?) */
  187. // @codeCoverageIgnoreStart
  188. break;
  189. // @codeCoverageIgnoreEnd
  190. }
  191. $stream->next();
  192. $result = new Node\HashNode($result, $stream->next());
  193. $hasHash = true;
  194. continue;
  195. } elseif ('.' == $peek) {
  196. $stream->next();
  197. $result = new Node\ClassNode($result, $stream->next());
  198. continue;
  199. } elseif ('[' == $peek) {
  200. $stream->next();
  201. $result = $this->parseAttrib($result, $stream);
  202. $next = $stream->next();
  203. if (']' != $next) {
  204. throw new ParseException(sprintf("] expected, got '%s'", $next));
  205. }
  206. continue;
  207. } elseif (':' == $peek || '::' == $peek) {
  208. $type = $stream->next();
  209. $ident = $stream->next();
  210. if (!$ident || !$ident->isType('Symbol')) {
  211. throw new ParseException(sprintf("Expected symbol, got '%s'", $ident));
  212. }
  213. if ($stream->peek() == '(') {
  214. $stream->next();
  215. $peek = $stream->peek();
  216. if ($peek->isType('String')) {
  217. $selector = $stream->next();
  218. } elseif ($peek->isType('Symbol') && is_int($peek)) {
  219. $selector = intval($stream->next());
  220. } else {
  221. // FIXME: parseSimpleSelector, or selector, or...?
  222. $selector = $this->parseSimpleSelector($stream);
  223. }
  224. $next = $stream->next();
  225. if (')' != $next) {
  226. throw new ParseException(sprintf("Expected ')', got '%s' and '%s'", $next, $selector));
  227. }
  228. $result = new Node\FunctionNode($result, $type, $ident, $selector);
  229. } else {
  230. $result = new Node\PseudoNode($result, $type, $ident);
  231. }
  232. continue;
  233. } else {
  234. if (' ' == $peek) {
  235. $stream->next();
  236. }
  237. break;
  238. }
  239. // FIXME: not sure what "negation" is
  240. }
  241. return $result;
  242. }
  243. /**
  244. * Parses an attribute from a selector contained in $stream and returns
  245. * the resulting AttribNode object.
  246. *
  247. * @throws ParseException When encountered unexpected selector
  248. *
  249. * @param Node\NodeInterface $selector The selector object whose attribute
  250. * is to be parsed.
  251. * @param TokenStream $stream The container token stream.
  252. *
  253. * @return Node\AttribNode
  254. */
  255. private function parseAttrib($selector, $stream)
  256. {
  257. $attrib = $stream->next();
  258. if ($stream->peek() == '|') {
  259. $namespace = $attrib;
  260. $stream->next();
  261. $attrib = $stream->next();
  262. } else {
  263. $namespace = '*';
  264. }
  265. if ($stream->peek() == ']') {
  266. return new Node\AttribNode($selector, $namespace, $attrib, 'exists', null);
  267. }
  268. $op = $stream->next();
  269. if (!in_array($op, array('^=', '$=', '*=', '=', '~=', '|=', '!='))) {
  270. throw new ParseException(sprintf("Operator expected, got '%s'", $op));
  271. }
  272. $value = $stream->next();
  273. if (!$value->isType('Symbol') && !$value->isType('String')) {
  274. throw new ParseException(sprintf("Expected string or symbol, got '%s'", $value));
  275. }
  276. return new Node\AttribNode($selector, $namespace, $attrib, $op, $value);
  277. }
  278. }