CrawlerTest.php 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Tests\Component\DomCrawler;
  11. use Symfony\Component\DomCrawler\Crawler;
  12. class CrawlerTest extends \PHPUnit_Framework_TestCase
  13. {
  14. public function testConstructor()
  15. {
  16. $crawler = new Crawler();
  17. $this->assertEquals(0, count($crawler), '__construct() returns an empty crawler');
  18. $crawler = new Crawler(new \DOMNode());
  19. $this->assertEquals(1, count($crawler), '__construct() takes a node as a first argument');
  20. }
  21. /**
  22. * @covers Symfony\Component\DomCrawler\Crawler::add
  23. */
  24. public function testAdd()
  25. {
  26. $crawler = new Crawler();
  27. $crawler->add($this->createDomDocument());
  28. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from a \DOMDocument');
  29. $crawler = new Crawler();
  30. $crawler->add($this->createNodeList());
  31. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from a \DOMNodeList');
  32. foreach ($this->createNodeList() as $node) {
  33. $list[] = $node;
  34. }
  35. $crawler = new Crawler();
  36. $crawler->add($list);
  37. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from an array of nodes');
  38. $crawler = new Crawler();
  39. $crawler->add($this->createNodeList()->item(0));
  40. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from an \DOMNode');
  41. $crawler = new Crawler();
  42. $crawler->add('<html><body>Foo</body></html>');
  43. $this->assertEquals('Foo', $crawler->filter('body')->text(), '->add() adds nodes from a string');
  44. }
  45. /**
  46. * @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
  47. */
  48. public function testAddHtmlContent()
  49. {
  50. $crawler = new Crawler();
  51. $crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');
  52. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addHtmlContent() adds nodes from an HTML string');
  53. $crawler->addHtmlContent('<html><head><base href="http://symfony.com"></head><a href="/contact"></a></html>', 'UTF-8');
  54. $this->assertEquals('http://symfony.com', $crawler->filter('base')->attr('href'), '->addHtmlContent() adds nodes from an HTML string');
  55. $this->assertEquals('http://symfony.com/contact', $crawler->filter('a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
  56. }
  57. /**
  58. * @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
  59. */
  60. public function testAddXmlContent()
  61. {
  62. $crawler = new Crawler();
  63. $crawler->addXmlContent('<html><div class="foo"></div></html>', 'UTF-8');
  64. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addXmlContent() adds nodes from an XML string');
  65. }
  66. /**
  67. * @covers Symfony\Component\DomCrawler\Crawler::addContent
  68. */
  69. public function testAddContent()
  70. {
  71. $crawler = new Crawler();
  72. $crawler->addContent('<html><div class="foo"></html>', 'text/html; charset=UTF-8');
  73. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addContent() adds nodes from an HTML string');
  74. $crawler = new Crawler();
  75. $crawler->addContent('<html><div class="foo"></html>');
  76. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addContent() uses text/html as the default type');
  77. $crawler = new Crawler();
  78. $crawler->addContent('<html><div class="foo"></div></html>', 'text/xml; charset=UTF-8');
  79. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addContent() adds nodes from an XML string');
  80. $crawler = new Crawler();
  81. $crawler->addContent('<html><div class="foo"></div></html>', 'text/xml');
  82. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addContent() adds nodes from an XML string');
  83. $crawler = new Crawler();
  84. $crawler->addContent('foo bar', 'text/plain');
  85. $this->assertEquals(0, count($crawler), '->addContent() does nothing if the type is not (x|ht)ml');
  86. }
  87. /**
  88. * @covers Symfony\Component\DomCrawler\Crawler::addDocument
  89. */
  90. public function testAddDocument()
  91. {
  92. $crawler = new Crawler();
  93. $crawler->addDocument($this->createDomDocument());
  94. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addDocument() adds nodes from a \DOMDocument');
  95. }
  96. /**
  97. * @covers Symfony\Component\DomCrawler\Crawler::addNodeList
  98. */
  99. public function testAddNodeList()
  100. {
  101. $crawler = new Crawler();
  102. $crawler->addNodeList($this->createNodeList());
  103. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addNodeList() adds nodes from a \DOMNodeList');
  104. }
  105. /**
  106. * @covers Symfony\Component\DomCrawler\Crawler::addNodes
  107. */
  108. public function testAddNodes()
  109. {
  110. foreach ($this->createNodeList() as $node) {
  111. $list[] = $node;
  112. }
  113. $crawler = new Crawler();
  114. $crawler->addNodes($list);
  115. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addNodes() adds nodes from an array of nodes');
  116. }
  117. /**
  118. * @covers Symfony\Component\DomCrawler\Crawler::addNode
  119. */
  120. public function testAddNode()
  121. {
  122. $crawler = new Crawler();
  123. $crawler->addNode($this->createNodeList()->item(0));
  124. $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addNode() adds nodes from an \DOMNode');
  125. }
  126. public function testClear()
  127. {
  128. $crawler = new Crawler(new \DOMNode());
  129. $crawler->clear();
  130. $this->assertEquals(0, count($crawler), '->clear() removes all the nodes from the crawler');
  131. }
  132. public function testEq()
  133. {
  134. $crawler = $this->createTestCrawler()->filter('li');
  135. $this->assertNotSame($crawler, $crawler->eq(0), '->eq() returns a new instance of a crawler');
  136. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->eq() returns a new instance of a crawler');
  137. $this->assertEquals('Two', $crawler->eq(1)->text(), '->eq() returns the nth node of the list');
  138. $this->assertEquals(0, count($crawler->eq(100)), '->eq() returns an empty crawler if the nth node does not exist');
  139. }
  140. public function testEach()
  141. {
  142. $data = $this->createTestCrawler()->filter('ul.first li')->each(function ($node, $i)
  143. {
  144. return $i.'-'.$node->nodeValue;
  145. });
  146. $this->assertEquals(array('0-One', '1-Two', '2-Three'), $data, '->each() executes an anonymous function on each node of the list');
  147. }
  148. public function testReduce()
  149. {
  150. $crawler = $this->createTestCrawler()->filter('ul.first li');
  151. $nodes = $crawler->reduce(function ($node, $i)
  152. {
  153. return $i == 1 ? false : true;
  154. });
  155. $this->assertNotSame($nodes, $crawler, '->reduce() returns a new instance of a crawler');
  156. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $nodes, '->reduce() returns a new instance of a crawler');
  157. $this->assertEquals(2, count($nodes), '->reduce() filters the nodes in the list');
  158. }
  159. public function testAttr()
  160. {
  161. $this->assertEquals('first', $this->createTestCrawler()->filter('li')->attr('class'), '->attr() returns the attribute of the first element of the node list');
  162. try {
  163. $this->createTestCrawler()->filter('ol')->attr('class');
  164. $this->fail('->attr() throws an \InvalidArgumentException if the node list is empty');
  165. } catch (\InvalidArgumentException $e) {
  166. $this->assertTrue(true, '->attr() throws an \InvalidArgumentException if the node list is empty');
  167. }
  168. }
  169. public function testText()
  170. {
  171. $this->assertEquals('One', $this->createTestCrawler()->filter('li')->text(), '->text() returns the node value of the first element of the node list');
  172. try {
  173. $this->createTestCrawler()->filter('ol')->text();
  174. $this->fail('->text() throws an \InvalidArgumentException if the node list is empty');
  175. } catch (\InvalidArgumentException $e) {
  176. $this->assertTrue(true, '->text() throws an \InvalidArgumentException if the node list is empty');
  177. }
  178. }
  179. public function testExtract()
  180. {
  181. $crawler = $this->createTestCrawler()->filter('ul.first li');
  182. $this->assertEquals(array('One', 'Two', 'Three'), $crawler->extract('_text'), '->extract() returns an array of extracted data from the node list');
  183. $this->assertEquals(array(array('One', 'first'), array('Two', ''), array('Three', '')), $crawler->extract(array('_text', 'class')), '->extract() returns an array of extracted data from the node list');
  184. $this->assertEquals(array(), $this->createTestCrawler()->filter('lo')->extract('_text'), '->extract() returns an empty array if the node list is empty');
  185. }
  186. /**
  187. * @covers Symfony\Component\DomCrawler\Crawler::filterXPath
  188. */
  189. public function testFilterXPath()
  190. {
  191. $crawler = $this->createTestCrawler();
  192. $this->assertNotSame($crawler, $crawler->filterXPath('//li'), '->filterXPath() returns a new instance of a crawler');
  193. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler');
  194. $crawler = $this->createTestCrawler()->filter('ul');
  195. $this->assertEquals(6, count($crawler->filterXPath('//li')), '->filterXPath() filters the node list with the XPath expression');
  196. }
  197. /**
  198. * @covers Symfony\Component\DomCrawler\Crawler::filter
  199. */
  200. public function testFilter()
  201. {
  202. $crawler = $this->createTestCrawler();
  203. $this->assertNotSame($crawler, $crawler->filter('li'), '->filter() returns a new instance of a crawler');
  204. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filter() returns a new instance of a crawler');
  205. $crawler = $this->createTestCrawler()->filter('ul');
  206. $this->assertEquals(6, count($crawler->filter('li')), '->filter() filters the node list with the CSS selector');
  207. }
  208. public function testSelectLink()
  209. {
  210. $crawler = $this->createTestCrawler();
  211. $this->assertNotSame($crawler, $crawler->selectLink('Foo'), '->selectLink() returns a new instance of a crawler');
  212. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->selectLink() returns a new instance of a crawler');
  213. $this->assertEquals(1, count($crawler->selectLink('Fabien\'s Foo')), '->selectLink() selects links by the node values');
  214. $this->assertEquals(1, count($crawler->selectLink('Fabien\'s Bar')), '->selectLink() selects links by the alt attribute of a clickable image');
  215. $this->assertEquals(2, count($crawler->selectLink('Fabien"s Foo')), '->selectLink() selects links by the node values');
  216. $this->assertEquals(2, count($crawler->selectLink('Fabien"s Bar')), '->selectLink() selects links by the alt attribute of a clickable image');
  217. $this->assertEquals(1, count($crawler->selectLink('\' Fabien"s Foo')), '->selectLink() selects links by the node values');
  218. $this->assertEquals(1, count($crawler->selectLink('\' Fabien"s Bar')), '->selectLink() selects links by the alt attribute of a clickable image');
  219. $this->assertEquals(4, count($crawler->selectLink('Foo')), '->selectLink() selects links by the node values');
  220. $this->assertEquals(4, count($crawler->selectLink('Bar')), '->selectLink() selects links by the node values');
  221. }
  222. public function testSelectButton()
  223. {
  224. $crawler = $this->createTestCrawler();
  225. $this->assertNotSame($crawler, $crawler->selectButton('FooValue'), '->selectButton() returns a new instance of a crawler');
  226. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->selectButton() returns a new instance of a crawler');
  227. $this->assertEquals(1, $crawler->selectButton('FooValue')->count(), '->selectButton() selects buttons');
  228. $this->assertEquals(1, $crawler->selectButton('FooName')->count(), '->selectButton() selects buttons');
  229. $this->assertEquals(1, $crawler->selectButton('FooId')->count(), '->selectButton() selects buttons');
  230. $this->assertEquals(1, $crawler->selectButton('BarValue')->count(), '->selectButton() selects buttons');
  231. $this->assertEquals(1, $crawler->selectButton('BarName')->count(), '->selectButton() selects buttons');
  232. $this->assertEquals(1, $crawler->selectButton('BarId')->count(), '->selectButton() selects buttons');
  233. }
  234. public function testLink()
  235. {
  236. $crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo');
  237. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Link', $crawler->link(), '->link() returns a Link instance');
  238. $this->assertEquals('post', $crawler->link('post')->getMethod(), '->link() takes a method as its argument');
  239. $crawler = $this->createTestCrawler('http://example.com/bar')->selectLink('GetLink');
  240. $this->assertEquals('http://example.com/bar?get=param', $crawler->link()->getUri(), '->link() returns a Link instance');
  241. try {
  242. $this->createTestCrawler()->filter('ol')->link();
  243. $this->fail('->link() throws an \InvalidArgumentException if the node list is empty');
  244. } catch (\InvalidArgumentException $e) {
  245. $this->assertTrue(true, '->link() throws an \InvalidArgumentException if the node list is empty');
  246. }
  247. }
  248. public function testLinks()
  249. {
  250. $crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo');
  251. $this->assertInternalType('array', $crawler->links(), '->links() returns an array');
  252. $this->assertEquals(4, count($crawler->links()), '->links() returns an array');
  253. $links = $crawler->links();
  254. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Link', $links[0], '->links() returns an array of Link instances');
  255. $this->assertEquals(array(), $this->createTestCrawler()->filter('ol')->links(), '->links() returns an empty array if the node selection is empty');
  256. }
  257. public function testForm()
  258. {
  259. $crawler = $this->createTestCrawler('http://example.com/bar/')->selectButton('FooValue');
  260. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Form', $crawler->form(), '->form() returns a Form instance');
  261. $this->assertEquals(array('FooName' => 'FooBar'), $crawler->form(array('FooName' => 'FooBar'))->getValues(), '->form() takes an array of values to submit as its first argument');
  262. try {
  263. $this->createTestCrawler()->filter('ol')->form();
  264. $this->fail('->form() throws an \InvalidArgumentException if the node list is empty');
  265. } catch (\InvalidArgumentException $e) {
  266. $this->assertTrue(true, '->form() throws an \InvalidArgumentException if the node list is empty');
  267. }
  268. }
  269. public function testLast()
  270. {
  271. $crawler = $this->createTestCrawler()->filter('ul.first li');
  272. $this->assertNotSame($crawler, $crawler->last(), '->last() returns a new instance of a crawler');
  273. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->last() returns a new instance of a crawler');
  274. $this->assertEquals('Three', $crawler->last()->text());
  275. }
  276. public function testFirst()
  277. {
  278. $crawler = $this->createTestCrawler()->filter('li');
  279. $this->assertNotSame($crawler, $crawler->first(), '->first() returns a new instance of a crawler');
  280. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->first() returns a new instance of a crawler');
  281. $this->assertEquals('One', $crawler->first()->text());
  282. }
  283. public function testSiblings()
  284. {
  285. $crawler = $this->createTestCrawler()->filter('li')->eq(1);
  286. $this->assertNotSame($crawler, $crawler->siblings(), '->siblings() returns a new instance of a crawler');
  287. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->siblings() returns a new instance of a crawler');
  288. $nodes = $crawler->siblings();
  289. $this->assertEquals(2, $nodes->count());
  290. $this->assertEquals('One', $nodes->eq(0)->text());
  291. $this->assertEquals('Three', $nodes->eq(1)->text());
  292. $nodes = $this->createTestCrawler()->filter('li')->eq(0)->siblings();
  293. $this->assertEquals(2, $nodes->count());
  294. $this->assertEquals('Two', $nodes->eq(0)->text());
  295. $this->assertEquals('Three', $nodes->eq(1)->text());
  296. try {
  297. $this->createTestCrawler()->filter('ol')->siblings();
  298. $this->fail('->siblings() throws an \InvalidArgumentException if the node list is empty');
  299. } catch (\InvalidArgumentException $e) {
  300. $this->assertTrue(true, '->siblings() throws an \InvalidArgumentException if the node list is empty');
  301. }
  302. }
  303. public function testNextAll()
  304. {
  305. $crawler = $this->createTestCrawler()->filter('li')->eq(1);
  306. $this->assertNotSame($crawler, $crawler->nextAll(), '->nextAll() returns a new instance of a crawler');
  307. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->nextAll() returns a new instance of a crawler');
  308. $nodes = $crawler->nextAll();
  309. $this->assertEquals(1, $nodes->count());
  310. $this->assertEquals('Three', $nodes->eq(0)->text());
  311. try {
  312. $this->createTestCrawler()->filter('ol')->nextAll();
  313. $this->fail('->nextAll() throws an \InvalidArgumentException if the node list is empty');
  314. } catch (\InvalidArgumentException $e) {
  315. $this->assertTrue(true, '->nextAll() throws an \InvalidArgumentException if the node list is empty');
  316. }
  317. }
  318. public function testPreviousAll()
  319. {
  320. $crawler = $this->createTestCrawler()->filter('li')->eq(2);
  321. $this->assertNotSame($crawler, $crawler->previousAll(), '->previousAll() returns a new instance of a crawler');
  322. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->previousAll() returns a new instance of a crawler');
  323. $nodes = $crawler->previousAll();
  324. $this->assertEquals(2, $nodes->count());
  325. $this->assertEquals('Two', $nodes->eq(0)->text());
  326. try {
  327. $this->createTestCrawler()->filter('ol')->previousAll();
  328. $this->fail('->previousAll() throws an \InvalidArgumentException if the node list is empty');
  329. } catch (\InvalidArgumentException $e) {
  330. $this->assertTrue(true, '->previousAll() throws an \InvalidArgumentException if the node list is empty');
  331. }
  332. }
  333. public function testChildren()
  334. {
  335. $crawler = $this->createTestCrawler()->filter('ul');
  336. $this->assertNotSame($crawler, $crawler->children(), '->children() returns a new instance of a crawler');
  337. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->children() returns a new instance of a crawler');
  338. $nodes = $crawler->children();
  339. $this->assertEquals(3, $nodes->count());
  340. $this->assertEquals('One', $nodes->eq(0)->text());
  341. $this->assertEquals('Two', $nodes->eq(1)->text());
  342. $this->assertEquals('Three', $nodes->eq(2)->text());
  343. try {
  344. $this->createTestCrawler()->filter('ol')->children();
  345. $this->fail('->children() throws an \InvalidArgumentException if the node list is empty');
  346. } catch (\InvalidArgumentException $e) {
  347. $this->assertTrue(true, '->children() throws an \InvalidArgumentException if the node list is empty');
  348. }
  349. }
  350. public function testParents()
  351. {
  352. $crawler = $this->createTestCrawler()->filter('li:first-child');
  353. $this->assertNotSame($crawler, $crawler->parents(), '->parents() returns a new instance of a crawler');
  354. $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->parents() returns a new instance of a crawler');
  355. $nodes = $crawler->parents();
  356. $this->assertEquals(3, $nodes->count());
  357. $nodes = $this->createTestCrawler()->filter('html')->parents();
  358. $this->assertEquals(0, $nodes->count());
  359. try {
  360. $this->createTestCrawler()->filter('ol')->parents();
  361. $this->fail('->parents() throws an \InvalidArgumentException if the node list is empty');
  362. } catch (\InvalidArgumentException $e) {
  363. $this->assertTrue(true, '->parents() throws an \InvalidArgumentException if the node list is empty');
  364. }
  365. }
  366. public function createTestCrawler($uri = null)
  367. {
  368. $dom = new \DOMDocument();
  369. $dom->loadHTML('
  370. <html>
  371. <body>
  372. <a href="foo">Foo</a>
  373. <a href="/foo"> Fabien\'s Foo </a>
  374. <a href="/foo">Fabien"s Foo</a>
  375. <a href="/foo">\' Fabien"s Foo</a>
  376. <a href="/bar"><img alt="Bar"/></a>
  377. <a href="/bar"><img alt=" Fabien\'s Bar "/></a>
  378. <a href="/bar"><img alt="Fabien&quot;s Bar"/></a>
  379. <a href="/bar"><img alt="\' Fabien&quot;s Bar"/></a>
  380. <a href="?get=param">GetLink</a>
  381. <form action="foo">
  382. <input type="submit" value="FooValue" name="FooName" id="FooId" />
  383. <input type="button" value="BarValue" name="BarName" id="BarId" />
  384. <button value="ButtonValue" name="ButtonName" id="ButtonId" />
  385. </form>
  386. <ul class="first">
  387. <li class="first">One</li>
  388. <li>Two</li>
  389. <li>Three</li>
  390. </ul>
  391. <ul>
  392. <li>One Bis</li>
  393. <li>Two Bis</li>
  394. <li>Three Bis</li>
  395. </ul>
  396. </body>
  397. </html>
  398. ');
  399. return new Crawler($dom, $uri);
  400. }
  401. protected function createDomDocument()
  402. {
  403. $dom = new \DOMDocument();
  404. $dom->loadXML('<html><div class="foo"></div></html>');
  405. return $dom;
  406. }
  407. protected function createNodeList()
  408. {
  409. $dom = new \DOMDocument();
  410. $dom->loadXML('<html><div class="foo"></div></html>');
  411. $domxpath = new \DOMXPath($dom);
  412. return $domxpath->query('//div');
  413. }
  414. }