Url.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. <?php
  2. namespace Guzzle\Http;
  3. use Guzzle\Common\Exception\InvalidArgumentException;
  4. /**
  5. * Parses and generates URLs based on URL parts. In favor of performance, URL parts are not validated.
  6. */
  7. class Url
  8. {
  9. protected $scheme;
  10. protected $host;
  11. protected $port;
  12. protected $username;
  13. protected $password;
  14. protected $path = '';
  15. protected $fragment;
  16. /** @var QueryString Query part of the URL */
  17. protected $query;
  18. /**
  19. * Factory method to create a new URL from a URL string
  20. *
  21. * @param string $url Full URL used to create a Url object
  22. *
  23. * @return Url
  24. * @throws InvalidArgumentException
  25. */
  26. public static function factory($url)
  27. {
  28. static $defaults = array('scheme' => null, 'host' => null, 'path' => null, 'port' => null, 'query' => null,
  29. 'user' => null, 'pass' => null, 'fragment' => null);
  30. if (false === ($parts = parse_url($url))) {
  31. throw new InvalidArgumentException('Was unable to parse malformed url: ' . $url);
  32. }
  33. $parts += $defaults;
  34. // Convert the query string into a QueryString object
  35. if ($parts['query'] || 0 !== strlen($parts['query'])) {
  36. $parts['query'] = QueryString::fromString($parts['query']);
  37. }
  38. return new static($parts['scheme'], $parts['host'], $parts['user'],
  39. $parts['pass'], $parts['port'], $parts['path'], $parts['query'],
  40. $parts['fragment']);
  41. }
  42. /**
  43. * Build a URL from parse_url parts. The generated URL will be a relative URL if a scheme or host are not provided.
  44. *
  45. * @param array $parts Array of parse_url parts
  46. *
  47. * @return string
  48. */
  49. public static function buildUrl(array $parts)
  50. {
  51. $url = $scheme = '';
  52. if (isset($parts['scheme'])) {
  53. $scheme = $parts['scheme'];
  54. $url .= $scheme . ':';
  55. }
  56. if (isset($parts['host'])) {
  57. $url .= '//';
  58. if (isset($parts['user'])) {
  59. $url .= $parts['user'];
  60. if (isset($parts['pass'])) {
  61. $url .= ':' . $parts['pass'];
  62. }
  63. $url .= '@';
  64. }
  65. $url .= $parts['host'];
  66. // Only include the port if it is not the default port of the scheme
  67. if (isset($parts['port'])
  68. && !(($scheme == 'http' && $parts['port'] == 80) || ($scheme == 'https' && $parts['port'] == 443))
  69. ) {
  70. $url .= ':' . $parts['port'];
  71. }
  72. }
  73. // Add the path component if present
  74. if (isset($parts['path']) && 0 !== strlen($parts['path'])) {
  75. // Always ensure that the path begins with '/' if set and something is before the path
  76. if ($url && $parts['path'][0] != '/' && substr($url, -1) != '/') {
  77. $url .= '/';
  78. }
  79. $url .= $parts['path'];
  80. }
  81. // Add the query string if present
  82. if (isset($parts['query'])) {
  83. $url .= '?' . $parts['query'];
  84. }
  85. // Ensure that # is only added to the url if fragment contains anything.
  86. if (isset($parts['fragment'])) {
  87. $url .= '#' . $parts['fragment'];
  88. }
  89. return $url;
  90. }
  91. /**
  92. * Create a new URL from URL parts
  93. *
  94. * @param string $scheme Scheme of the URL
  95. * @param string $host Host of the URL
  96. * @param string $username Username of the URL
  97. * @param string $password Password of the URL
  98. * @param int $port Port of the URL
  99. * @param string $path Path of the URL
  100. * @param QueryString|array|string $query Query string of the URL
  101. * @param string $fragment Fragment of the URL
  102. */
  103. public function __construct($scheme, $host, $username = null, $password = null, $port = null, $path = null, QueryString $query = null, $fragment = null)
  104. {
  105. $this->scheme = $scheme;
  106. $this->host = $host;
  107. $this->port = $port;
  108. $this->username = $username;
  109. $this->password = $password;
  110. $this->fragment = $fragment;
  111. if (!$query) {
  112. $this->query = new QueryString();
  113. } else {
  114. $this->setQuery($query);
  115. }
  116. $this->setPath($path);
  117. }
  118. /**
  119. * Clone the URL
  120. */
  121. public function __clone()
  122. {
  123. $this->query = clone $this->query;
  124. }
  125. /**
  126. * Returns the URL as a URL string
  127. *
  128. * @return string
  129. */
  130. public function __toString()
  131. {
  132. return self::buildUrl($this->getParts());
  133. }
  134. /**
  135. * Get the parts of the URL as an array
  136. *
  137. * @return array
  138. */
  139. public function getParts()
  140. {
  141. $query = (string) $this->query;
  142. return array(
  143. 'scheme' => $this->scheme,
  144. 'user' => $this->username,
  145. 'pass' => $this->password,
  146. 'host' => $this->host,
  147. 'port' => $this->port,
  148. 'path' => $this->getPath(),
  149. 'query' => $query !== '' ? $query : null,
  150. 'fragment' => $this->fragment,
  151. );
  152. }
  153. /**
  154. * Set the host of the request.
  155. *
  156. * @param string $host Host to set (e.g. www.yahoo.com, yahoo.com)
  157. *
  158. * @return Url
  159. */
  160. public function setHost($host)
  161. {
  162. if (strpos($host, ':') === false) {
  163. $this->host = $host;
  164. } else {
  165. list($host, $port) = explode(':', $host);
  166. $this->host = $host;
  167. $this->setPort($port);
  168. }
  169. return $this;
  170. }
  171. /**
  172. * Get the host part of the URL
  173. *
  174. * @return string
  175. */
  176. public function getHost()
  177. {
  178. return $this->host;
  179. }
  180. /**
  181. * Set the scheme part of the URL (http, https, ftp, etc)
  182. *
  183. * @param string $scheme Scheme to set
  184. *
  185. * @return Url
  186. */
  187. public function setScheme($scheme)
  188. {
  189. if ($this->scheme == 'http' && $this->port == 80) {
  190. $this->port = null;
  191. } elseif ($this->scheme == 'https' && $this->port == 443) {
  192. $this->port = null;
  193. }
  194. $this->scheme = $scheme;
  195. return $this;
  196. }
  197. /**
  198. * Get the scheme part of the URL
  199. *
  200. * @return string
  201. */
  202. public function getScheme()
  203. {
  204. return $this->scheme;
  205. }
  206. /**
  207. * Set the port part of the URL
  208. *
  209. * @param int $port Port to set
  210. *
  211. * @return Url
  212. */
  213. public function setPort($port)
  214. {
  215. $this->port = $port;
  216. return $this;
  217. }
  218. /**
  219. * Get the port part of the URl. Will return the default port for a given scheme if no port has been set.
  220. *
  221. * @return int|null
  222. */
  223. public function getPort()
  224. {
  225. if ($this->port) {
  226. return $this->port;
  227. } elseif ($this->scheme == 'http') {
  228. return 80;
  229. } elseif ($this->scheme == 'https') {
  230. return 443;
  231. }
  232. return null;
  233. }
  234. /**
  235. * Set the path part of the URL
  236. *
  237. * @param array|string $path Path string or array of path segments
  238. *
  239. * @return Url
  240. */
  241. public function setPath($path)
  242. {
  243. static $pathReplace = array(' ' => '%20', '?' => '%3F');
  244. if (is_array($path)) {
  245. $path = '/' . implode('/', $path);
  246. }
  247. $this->path = strtr($path, $pathReplace);
  248. return $this;
  249. }
  250. /**
  251. * Normalize the URL so that double slashes and relative paths are removed
  252. *
  253. * @return Url
  254. */
  255. public function normalizePath()
  256. {
  257. if (!$this->path || $this->path == '/' || $this->path == '*') {
  258. return $this;
  259. }
  260. $results = array();
  261. $segments = $this->getPathSegments();
  262. foreach ($segments as $segment) {
  263. if ($segment == '..') {
  264. array_pop($results);
  265. } elseif ($segment != '.' && $segment != '') {
  266. $results[] = $segment;
  267. }
  268. }
  269. // Combine the normalized parts and add the leading slash if needed
  270. $this->path = ($this->path[0] == '/' ? '/' : '') . implode('/', $results);
  271. // Add the trailing slash if necessary
  272. if ($this->path != '/' && end($segments) == '') {
  273. $this->path .= '/';
  274. }
  275. return $this;
  276. }
  277. /**
  278. * Add a relative path to the currently set path.
  279. *
  280. * @param string $relativePath Relative path to add
  281. *
  282. * @return Url
  283. */
  284. public function addPath($relativePath)
  285. {
  286. if ($relativePath != '/' && is_string($relativePath) && strlen($relativePath) > 0) {
  287. // Add a leading slash if needed
  288. if ($relativePath[0] != '/') {
  289. $relativePath = '/' . $relativePath;
  290. }
  291. $this->setPath(str_replace('//', '/', $this->path . $relativePath));
  292. }
  293. return $this;
  294. }
  295. /**
  296. * Get the path part of the URL
  297. *
  298. * @return string
  299. */
  300. public function getPath()
  301. {
  302. return $this->path;
  303. }
  304. /**
  305. * Get the path segments of the URL as an array
  306. *
  307. * @return array
  308. */
  309. public function getPathSegments()
  310. {
  311. return array_slice(explode('/', $this->getPath()), 1);
  312. }
  313. /**
  314. * Set the password part of the URL
  315. *
  316. * @param string $password Password to set
  317. *
  318. * @return Url
  319. */
  320. public function setPassword($password)
  321. {
  322. $this->password = $password;
  323. return $this;
  324. }
  325. /**
  326. * Get the password part of the URL
  327. *
  328. * @return null|string
  329. */
  330. public function getPassword()
  331. {
  332. return $this->password;
  333. }
  334. /**
  335. * Set the username part of the URL
  336. *
  337. * @param string $username Username to set
  338. *
  339. * @return Url
  340. */
  341. public function setUsername($username)
  342. {
  343. $this->username = $username;
  344. return $this;
  345. }
  346. /**
  347. * Get the username part of the URl
  348. *
  349. * @return null|string
  350. */
  351. public function getUsername()
  352. {
  353. return $this->username;
  354. }
  355. /**
  356. * Get the query part of the URL as a QueryString object
  357. *
  358. * @return QueryString
  359. */
  360. public function getQuery()
  361. {
  362. return $this->query;
  363. }
  364. /**
  365. * Set the query part of the URL
  366. *
  367. * @param QueryString|string|array $query Query to set
  368. *
  369. * @return Url
  370. */
  371. public function setQuery($query)
  372. {
  373. if (is_string($query)) {
  374. $output = null;
  375. parse_str($query, $output);
  376. $this->query = new QueryString($output);
  377. } elseif (is_array($query)) {
  378. $this->query = new QueryString($query);
  379. } elseif ($query instanceof QueryString) {
  380. $this->query = $query;
  381. }
  382. return $this;
  383. }
  384. /**
  385. * Get the fragment part of the URL
  386. *
  387. * @return null|string
  388. */
  389. public function getFragment()
  390. {
  391. return $this->fragment;
  392. }
  393. /**
  394. * Set the fragment part of the URL
  395. *
  396. * @param string $fragment Fragment to set
  397. *
  398. * @return Url
  399. */
  400. public function setFragment($fragment)
  401. {
  402. $this->fragment = $fragment;
  403. return $this;
  404. }
  405. /**
  406. * Check if this is an absolute URL
  407. *
  408. * @return bool
  409. */
  410. public function isAbsolute()
  411. {
  412. return $this->scheme && $this->host;
  413. }
  414. /**
  415. * Combine the URL with another URL. Follows the rules specific in RFC 3986 section 5.4.
  416. *
  417. * @param string $url Relative URL to combine with
  418. * @param bool $strictRfc3986 Set to true to use strict RFC 3986 compliance when merging paths. When first
  419. * released, Guzzle used an incorrect algorithm for combining relative URL paths. In
  420. * order to not break users, we introduced this flag to allow the merging of URLs based
  421. * on strict RFC 3986 section 5.4.1. This means that "http://a.com/foo/baz" merged with
  422. * "bar" would become "http://a.com/foo/bar". When this value is set to false, it would
  423. * become "http://a.com/foo/baz/bar".
  424. * @return Url
  425. * @throws InvalidArgumentException
  426. * @link http://tools.ietf.org/html/rfc3986#section-5.4
  427. */
  428. public function combine($url, $strictRfc3986 = false)
  429. {
  430. $url = self::factory($url);
  431. // Use the more absolute URL as the base URL
  432. if (!$this->isAbsolute() && $url->isAbsolute()) {
  433. $url = $url->combine($this);
  434. }
  435. // Passing a URL with a scheme overrides everything
  436. if ($buffer = $url->getScheme()) {
  437. $this->scheme = $buffer;
  438. $this->host = $url->getHost();
  439. $this->port = $url->getPort();
  440. $this->username = $url->getUsername();
  441. $this->password = $url->getPassword();
  442. $this->path = $url->getPath();
  443. $this->query = $url->getQuery();
  444. $this->fragment = $url->getFragment();
  445. return $this;
  446. }
  447. // Setting a host overrides the entire rest of the URL
  448. if ($buffer = $url->getHost()) {
  449. $this->host = $buffer;
  450. $this->port = $url->getPort();
  451. $this->username = $url->getUsername();
  452. $this->password = $url->getPassword();
  453. $this->path = $url->getPath();
  454. $this->query = $url->getQuery();
  455. $this->fragment = $url->getFragment();
  456. return $this;
  457. }
  458. $path = $url->getPath();
  459. $query = $url->getQuery();
  460. if (!$path) {
  461. if (count($query)) {
  462. $this->addQuery($query, $strictRfc3986);
  463. }
  464. } else {
  465. if ($path[0] == '/') {
  466. $this->path = $path;
  467. } elseif ($strictRfc3986) {
  468. $this->path .= '/../' . $path;
  469. } else {
  470. $this->path .= '/' . $path;
  471. }
  472. $this->normalizePath();
  473. $this->addQuery($query, $strictRfc3986);
  474. }
  475. $this->fragment = $url->getFragment();
  476. return $this;
  477. }
  478. private function addQuery(QueryString $new, $strictRfc386)
  479. {
  480. if (!$strictRfc386) {
  481. $new->merge($this->query);
  482. }
  483. $this->query = $new;
  484. }
  485. }