25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

1085 lines
28 KiB

  1. <?php
  2. /**
  3. * IRI parser/serialiser/normaliser
  4. *
  5. * @package Requests
  6. * @subpackage Utilities
  7. */
  8. /**
  9. * IRI parser/serialiser/normaliser
  10. *
  11. * Copyright (c) 2007-2010, Geoffrey Sneddon and Steve Minutillo.
  12. * All rights reserved.
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions are met:
  16. *
  17. * * Redistributions of source code must retain the above copyright notice,
  18. * this list of conditions and the following disclaimer.
  19. *
  20. * * Redistributions in binary form must reproduce the above copyright notice,
  21. * this list of conditions and the following disclaimer in the documentation
  22. * and/or other materials provided with the distribution.
  23. *
  24. * * Neither the name of the SimplePie Team nor the names of its contributors
  25. * may be used to endorse or promote products derived from this software
  26. * without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  29. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  30. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  31. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
  32. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  33. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  34. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  35. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  36. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  37. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. *
  40. * @package Requests
  41. * @subpackage Utilities
  42. * @author Geoffrey Sneddon
  43. * @author Steve Minutillo
  44. * @copyright 2007-2009 Geoffrey Sneddon and Steve Minutillo
  45. * @license http://www.opensource.org/licenses/bsd-license.php
  46. * @link http://hg.gsnedders.com/iri/
  47. *
  48. * @property string $iri IRI we're working with
  49. * @property-read string $uri IRI in URI form, {@see to_uri}
  50. * @property string $scheme Scheme part of the IRI
  51. * @property string $authority Authority part, formatted for a URI (userinfo + host + port)
  52. * @property string $iauthority Authority part of the IRI (userinfo + host + port)
  53. * @property string $userinfo Userinfo part, formatted for a URI (after '://' and before '@')
  54. * @property string $iuserinfo Userinfo part of the IRI (after '://' and before '@')
  55. * @property string $host Host part, formatted for a URI
  56. * @property string $ihost Host part of the IRI
  57. * @property string $port Port part of the IRI (after ':')
  58. * @property string $path Path part, formatted for a URI (after first '/')
  59. * @property string $ipath Path part of the IRI (after first '/')
  60. * @property string $query Query part, formatted for a URI (after '?')
  61. * @property string $iquery Query part of the IRI (after '?')
  62. * @property string $fragment Fragment, formatted for a URI (after '#')
  63. * @property string $ifragment Fragment part of the IRI (after '#')
  64. */
  65. class Requests_IRI {
  66. /**
  67. * Scheme
  68. *
  69. * @var string
  70. */
  71. protected $scheme = null;
  72. /**
  73. * User Information
  74. *
  75. * @var string
  76. */
  77. protected $iuserinfo = null;
  78. /**
  79. * ihost
  80. *
  81. * @var string
  82. */
  83. protected $ihost = null;
  84. /**
  85. * Port
  86. *
  87. * @var string
  88. */
  89. protected $port = null;
  90. /**
  91. * ipath
  92. *
  93. * @var string
  94. */
  95. protected $ipath = '';
  96. /**
  97. * iquery
  98. *
  99. * @var string
  100. */
  101. protected $iquery = null;
  102. /**
  103. * ifragment
  104. *
  105. * @var string
  106. */
  107. protected $ifragment = null;
  108. /**
  109. * Normalization database
  110. *
  111. * Each key is the scheme, each value is an array with each key as the IRI
  112. * part and value as the default value for that part.
  113. */
  114. protected $normalization = array(
  115. 'acap' => array(
  116. 'port' => 674
  117. ),
  118. 'dict' => array(
  119. 'port' => 2628
  120. ),
  121. 'file' => array(
  122. 'ihost' => 'localhost'
  123. ),
  124. 'http' => array(
  125. 'port' => 80,
  126. ),
  127. 'https' => array(
  128. 'port' => 443,
  129. ),
  130. );
  131. /**
  132. * Return the entire IRI when you try and read the object as a string
  133. *
  134. * @return string
  135. */
  136. public function __toString() {
  137. return $this->get_iri();
  138. }
  139. /**
  140. * Overload __set() to provide access via properties
  141. *
  142. * @param string $name Property name
  143. * @param mixed $value Property value
  144. */
  145. public function __set($name, $value) {
  146. if (method_exists($this, 'set_' . $name)) {
  147. call_user_func(array($this, 'set_' . $name), $value);
  148. }
  149. elseif (
  150. $name === 'iauthority'
  151. || $name === 'iuserinfo'
  152. || $name === 'ihost'
  153. || $name === 'ipath'
  154. || $name === 'iquery'
  155. || $name === 'ifragment'
  156. ) {
  157. call_user_func(array($this, 'set_' . substr($name, 1)), $value);
  158. }
  159. }
  160. /**
  161. * Overload __get() to provide access via properties
  162. *
  163. * @param string $name Property name
  164. * @return mixed
  165. */
  166. public function __get($name) {
  167. // isset() returns false for null, we don't want to do that
  168. // Also why we use array_key_exists below instead of isset()
  169. $props = get_object_vars($this);
  170. if (
  171. $name === 'iri' ||
  172. $name === 'uri' ||
  173. $name === 'iauthority' ||
  174. $name === 'authority'
  175. ) {
  176. $method = 'get_' . $name;
  177. $return = $this->$method();
  178. }
  179. elseif (array_key_exists($name, $props)) {
  180. $return = $this->$name;
  181. }
  182. // host -> ihost
  183. elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) {
  184. $name = $prop;
  185. $return = $this->$prop;
  186. }
  187. // ischeme -> scheme
  188. elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) {
  189. $name = $prop;
  190. $return = $this->$prop;
  191. }
  192. else {
  193. trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
  194. $return = null;
  195. }
  196. if ($return === null && isset($this->normalization[$this->scheme][$name])) {
  197. return $this->normalization[$this->scheme][$name];
  198. }
  199. else {
  200. return $return;
  201. }
  202. }
  203. /**
  204. * Overload __isset() to provide access via properties
  205. *
  206. * @param string $name Property name
  207. * @return bool
  208. */
  209. public function __isset($name) {
  210. return (method_exists($this, 'get_' . $name) || isset($this->$name));
  211. }
  212. /**
  213. * Overload __unset() to provide access via properties
  214. *
  215. * @param string $name Property name
  216. */
  217. public function __unset($name) {
  218. if (method_exists($this, 'set_' . $name)) {
  219. call_user_func(array($this, 'set_' . $name), '');
  220. }
  221. }
  222. /**
  223. * Create a new IRI object, from a specified string
  224. *
  225. * @param string|null $iri
  226. */
  227. public function __construct($iri = null) {
  228. $this->set_iri($iri);
  229. }
  230. /**
  231. * Create a new IRI object by resolving a relative IRI
  232. *
  233. * Returns false if $base is not absolute, otherwise an IRI.
  234. *
  235. * @param IRI|string $base (Absolute) Base IRI
  236. * @param IRI|string $relative Relative IRI
  237. * @return IRI|false
  238. */
  239. public static function absolutize($base, $relative) {
  240. if (!($relative instanceof Requests_IRI)) {
  241. $relative = new Requests_IRI($relative);
  242. }
  243. if (!$relative->is_valid()) {
  244. return false;
  245. }
  246. elseif ($relative->scheme !== null) {
  247. return clone $relative;
  248. }
  249. if (!($base instanceof Requests_IRI)) {
  250. $base = new Requests_IRI($base);
  251. }
  252. if ($base->scheme === null || !$base->is_valid()) {
  253. return false;
  254. }
  255. if ($relative->get_iri() !== '') {
  256. if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) {
  257. $target = clone $relative;
  258. $target->scheme = $base->scheme;
  259. }
  260. else {
  261. $target = new Requests_IRI;
  262. $target->scheme = $base->scheme;
  263. $target->iuserinfo = $base->iuserinfo;
  264. $target->ihost = $base->ihost;
  265. $target->port = $base->port;
  266. if ($relative->ipath !== '') {
  267. if ($relative->ipath[0] === '/') {
  268. $target->ipath = $relative->ipath;
  269. }
  270. elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') {
  271. $target->ipath = '/' . $relative->ipath;
  272. }
  273. elseif (($last_segment = strrpos($base->ipath, '/')) !== false) {
  274. $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
  275. }
  276. else {
  277. $target->ipath = $relative->ipath;
  278. }
  279. $target->ipath = $target->remove_dot_segments($target->ipath);
  280. $target->iquery = $relative->iquery;
  281. }
  282. else {
  283. $target->ipath = $base->ipath;
  284. if ($relative->iquery !== null) {
  285. $target->iquery = $relative->iquery;
  286. }
  287. elseif ($base->iquery !== null) {
  288. $target->iquery = $base->iquery;
  289. }
  290. }
  291. $target->ifragment = $relative->ifragment;
  292. }
  293. }
  294. else {
  295. $target = clone $base;
  296. $target->ifragment = null;
  297. }
  298. $target->scheme_normalization();
  299. return $target;
  300. }
  301. /**
  302. * Parse an IRI into scheme/authority/path/query/fragment segments
  303. *
  304. * @param string $iri
  305. * @return array
  306. */
  307. protected function parse_iri($iri) {
  308. $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
  309. $has_match = preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match);
  310. if (!$has_match) {
  311. throw new Requests_Exception('Cannot parse supplied IRI', 'iri.cannot_parse', $iri);
  312. }
  313. if ($match[1] === '') {
  314. $match['scheme'] = null;
  315. }
  316. if (!isset($match[3]) || $match[3] === '') {
  317. $match['authority'] = null;
  318. }
  319. if (!isset($match[5])) {
  320. $match['path'] = '';
  321. }
  322. if (!isset($match[6]) || $match[6] === '') {
  323. $match['query'] = null;
  324. }
  325. if (!isset($match[8]) || $match[8] === '') {
  326. $match['fragment'] = null;
  327. }
  328. return $match;
  329. }
  330. /**
  331. * Remove dot segments from a path
  332. *
  333. * @param string $input
  334. * @return string
  335. */
  336. protected function remove_dot_segments($input) {
  337. $output = '';
  338. while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') {
  339. // A: If the input buffer begins with a prefix of "../" or "./",
  340. // then remove that prefix from the input buffer; otherwise,
  341. if (strpos($input, '../') === 0) {
  342. $input = substr($input, 3);
  343. }
  344. elseif (strpos($input, './') === 0) {
  345. $input = substr($input, 2);
  346. }
  347. // B: if the input buffer begins with a prefix of "/./" or "/.",
  348. // where "." is a complete path segment, then replace that prefix
  349. // with "/" in the input buffer; otherwise,
  350. elseif (strpos($input, '/./') === 0) {
  351. $input = substr($input, 2);
  352. }
  353. elseif ($input === '/.') {
  354. $input = '/';
  355. }
  356. // C: if the input buffer begins with a prefix of "/../" or "/..",
  357. // where ".." is a complete path segment, then replace that prefix
  358. // with "/" in the input buffer and remove the last segment and its
  359. // preceding "/" (if any) from the output buffer; otherwise,
  360. elseif (strpos($input, '/../') === 0) {
  361. $input = substr($input, 3);
  362. $output = substr_replace($output, '', strrpos($output, '/'));
  363. }
  364. elseif ($input === '/..') {
  365. $input = '/';
  366. $output = substr_replace($output, '', strrpos($output, '/'));
  367. }
  368. // D: if the input buffer consists only of "." or "..", then remove
  369. // that from the input buffer; otherwise,
  370. elseif ($input === '.' || $input === '..') {
  371. $input = '';
  372. }
  373. // E: move the first path segment in the input buffer to the end of
  374. // the output buffer, including the initial "/" character (if any)
  375. // and any subsequent characters up to, but not including, the next
  376. // "/" character or the end of the input buffer
  377. elseif (($pos = strpos($input, '/', 1)) !== false) {
  378. $output .= substr($input, 0, $pos);
  379. $input = substr_replace($input, '', 0, $pos);
  380. }
  381. else {
  382. $output .= $input;
  383. $input = '';
  384. }
  385. }
  386. return $output . $input;
  387. }
  388. /**
  389. * Replace invalid character with percent encoding
  390. *
  391. * @param string $string Input string
  392. * @param string $extra_chars Valid characters not in iunreserved or
  393. * iprivate (this is ASCII-only)
  394. * @param bool $iprivate Allow iprivate
  395. * @return string
  396. */
  397. protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) {
  398. // Normalize as many pct-encoded sections as possible
  399. $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array(&$this, 'remove_iunreserved_percent_encoded'), $string);
  400. // Replace invalid percent characters
  401. $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
  402. // Add unreserved and % to $extra_chars (the latter is safe because all
  403. // pct-encoded sections are now valid).
  404. $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
  405. // Now replace any bytes that aren't allowed with their pct-encoded versions
  406. $position = 0;
  407. $strlen = strlen($string);
  408. while (($position += strspn($string, $extra_chars, $position)) < $strlen) {
  409. $value = ord($string[$position]);
  410. // Start position
  411. $start = $position;
  412. // By default we are valid
  413. $valid = true;
  414. // No one byte sequences are valid due to the while.
  415. // Two byte sequence:
  416. if (($value & 0xE0) === 0xC0) {
  417. $character = ($value & 0x1F) << 6;
  418. $length = 2;
  419. $remaining = 1;
  420. }
  421. // Three byte sequence:
  422. elseif (($value & 0xF0) === 0xE0) {
  423. $character = ($value & 0x0F) << 12;
  424. $length = 3;
  425. $remaining = 2;
  426. }
  427. // Four byte sequence:
  428. elseif (($value & 0xF8) === 0xF0) {
  429. $character = ($value & 0x07) << 18;
  430. $length = 4;
  431. $remaining = 3;
  432. }
  433. // Invalid byte:
  434. else {
  435. $valid = false;
  436. $length = 1;
  437. $remaining = 0;
  438. }
  439. if ($remaining) {
  440. if ($position + $length <= $strlen) {
  441. for ($position++; $remaining; $position++) {
  442. $value = ord($string[$position]);
  443. // Check that the byte is valid, then add it to the character:
  444. if (($value & 0xC0) === 0x80) {
  445. $character |= ($value & 0x3F) << (--$remaining * 6);
  446. }
  447. // If it is invalid, count the sequence as invalid and reprocess the current byte:
  448. else {
  449. $valid = false;
  450. $position--;
  451. break;
  452. }
  453. }
  454. }
  455. else {
  456. $position = $strlen - 1;
  457. $valid = false;
  458. }
  459. }
  460. // Percent encode anything invalid or not in ucschar
  461. if (
  462. // Invalid sequences
  463. !$valid
  464. // Non-shortest form sequences are invalid
  465. || $length > 1 && $character <= 0x7F
  466. || $length > 2 && $character <= 0x7FF
  467. || $length > 3 && $character <= 0xFFFF
  468. // Outside of range of ucschar codepoints
  469. // Noncharacters
  470. || ($character & 0xFFFE) === 0xFFFE
  471. || $character >= 0xFDD0 && $character <= 0xFDEF
  472. || (
  473. // Everything else not in ucschar
  474. $character > 0xD7FF && $character < 0xF900
  475. || $character < 0xA0
  476. || $character > 0xEFFFD
  477. )
  478. && (
  479. // Everything not in iprivate, if it applies
  480. !$iprivate
  481. || $character < 0xE000
  482. || $character > 0x10FFFD
  483. )
  484. ) {
  485. // If we were a character, pretend we weren't, but rather an error.
  486. if ($valid) {
  487. $position--;
  488. }
  489. for ($j = $start; $j <= $position; $j++) {
  490. $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
  491. $j += 2;
  492. $position += 2;
  493. $strlen += 2;
  494. }
  495. }
  496. }
  497. return $string;
  498. }
  499. /**
  500. * Callback function for preg_replace_callback.
  501. *
  502. * Removes sequences of percent encoded bytes that represent UTF-8
  503. * encoded characters in iunreserved
  504. *
  505. * @param array $match PCRE match
  506. * @return string Replacement
  507. */
  508. protected function remove_iunreserved_percent_encoded($match) {
  509. // As we just have valid percent encoded sequences we can just explode
  510. // and ignore the first member of the returned array (an empty string).
  511. $bytes = explode('%', $match[0]);
  512. // Initialize the new string (this is what will be returned) and that
  513. // there are no bytes remaining in the current sequence (unsurprising
  514. // at the first byte!).
  515. $string = '';
  516. $remaining = 0;
  517. // Loop over each and every byte, and set $value to its value
  518. for ($i = 1, $len = count($bytes); $i < $len; $i++) {
  519. $value = hexdec($bytes[$i]);
  520. // If we're the first byte of sequence:
  521. if (!$remaining) {
  522. // Start position
  523. $start = $i;
  524. // By default we are valid
  525. $valid = true;
  526. // One byte sequence:
  527. if ($value <= 0x7F) {
  528. $character = $value;
  529. $length = 1;
  530. }
  531. // Two byte sequence:
  532. elseif (($value & 0xE0) === 0xC0) {
  533. $character = ($value & 0x1F) << 6;
  534. $length = 2;
  535. $remaining = 1;
  536. }
  537. // Three byte sequence:
  538. elseif (($value & 0xF0) === 0xE0) {
  539. $character = ($value & 0x0F) << 12;
  540. $length = 3;
  541. $remaining = 2;
  542. }
  543. // Four byte sequence:
  544. elseif (($value & 0xF8) === 0xF0) {
  545. $character = ($value & 0x07) << 18;
  546. $length = 4;
  547. $remaining = 3;
  548. }
  549. // Invalid byte:
  550. else {
  551. $valid = false;
  552. $remaining = 0;
  553. }
  554. }
  555. // Continuation byte:
  556. else {
  557. // Check that the byte is valid, then add it to the character:
  558. if (($value & 0xC0) === 0x80) {
  559. $remaining--;
  560. $character |= ($value & 0x3F) << ($remaining * 6);
  561. }
  562. // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
  563. else {
  564. $valid = false;
  565. $remaining = 0;
  566. $i--;
  567. }
  568. }
  569. // If we've reached the end of the current byte sequence, append it to Unicode::$data
  570. if (!$remaining) {
  571. // Percent encode anything invalid or not in iunreserved
  572. if (
  573. // Invalid sequences
  574. !$valid
  575. // Non-shortest form sequences are invalid
  576. || $length > 1 && $character <= 0x7F
  577. || $length > 2 && $character <= 0x7FF
  578. || $length > 3 && $character <= 0xFFFF
  579. // Outside of range of iunreserved codepoints
  580. || $character < 0x2D
  581. || $character > 0xEFFFD
  582. // Noncharacters
  583. || ($character & 0xFFFE) === 0xFFFE
  584. || $character >= 0xFDD0 && $character <= 0xFDEF
  585. // Everything else not in iunreserved (this is all BMP)
  586. || $character === 0x2F
  587. || $character > 0x39 && $character < 0x41
  588. || $character > 0x5A && $character < 0x61
  589. || $character > 0x7A && $character < 0x7E
  590. || $character > 0x7E && $character < 0xA0
  591. || $character > 0xD7FF && $character < 0xF900
  592. ) {
  593. for ($j = $start; $j <= $i; $j++) {
  594. $string .= '%' . strtoupper($bytes[$j]);
  595. }
  596. }
  597. else {
  598. for ($j = $start; $j <= $i; $j++) {
  599. $string .= chr(hexdec($bytes[$j]));
  600. }
  601. }
  602. }
  603. }
  604. // If we have any bytes left over they are invalid (i.e., we are
  605. // mid-way through a multi-byte sequence)
  606. if ($remaining) {
  607. for ($j = $start; $j < $len; $j++) {
  608. $string .= '%' . strtoupper($bytes[$j]);
  609. }
  610. }
  611. return $string;
  612. }
  613. protected function scheme_normalization() {
  614. if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) {
  615. $this->iuserinfo = null;
  616. }
  617. if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) {
  618. $this->ihost = null;
  619. }
  620. if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) {
  621. $this->port = null;
  622. }
  623. if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) {
  624. $this->ipath = '';
  625. }
  626. if (isset($this->ihost) && empty($this->ipath)) {
  627. $this->ipath = '/';
  628. }
  629. if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) {
  630. $this->iquery = null;
  631. }
  632. if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) {
  633. $this->ifragment = null;
  634. }
  635. }
  636. /**
  637. * Check if the object represents a valid IRI. This needs to be done on each
  638. * call as some things change depending on another part of the IRI.
  639. *
  640. * @return bool
  641. */
  642. public function is_valid() {
  643. $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
  644. if ($this->ipath !== '' &&
  645. (
  646. $isauthority && $this->ipath[0] !== '/' ||
  647. (
  648. $this->scheme === null &&
  649. !$isauthority &&
  650. strpos($this->ipath, ':') !== false &&
  651. (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
  652. )
  653. )
  654. ) {
  655. return false;
  656. }
  657. return true;
  658. }
  659. /**
  660. * Set the entire IRI. Returns true on success, false on failure (if there
  661. * are any invalid characters).
  662. *
  663. * @param string $iri
  664. * @return bool
  665. */
  666. protected function set_iri($iri) {
  667. static $cache;
  668. if (!$cache) {
  669. $cache = array();
  670. }
  671. if ($iri === null) {
  672. return true;
  673. }
  674. if (isset($cache[$iri])) {
  675. list($this->scheme,
  676. $this->iuserinfo,
  677. $this->ihost,
  678. $this->port,
  679. $this->ipath,
  680. $this->iquery,
  681. $this->ifragment,
  682. $return) = $cache[$iri];
  683. return $return;
  684. }
  685. $parsed = $this->parse_iri((string) $iri);
  686. $return = $this->set_scheme($parsed['scheme'])
  687. && $this->set_authority($parsed['authority'])
  688. && $this->set_path($parsed['path'])
  689. && $this->set_query($parsed['query'])
  690. && $this->set_fragment($parsed['fragment']);
  691. $cache[$iri] = array($this->scheme,
  692. $this->iuserinfo,
  693. $this->ihost,
  694. $this->port,
  695. $this->ipath,
  696. $this->iquery,
  697. $this->ifragment,
  698. $return);
  699. return $return;
  700. }
  701. /**
  702. * Set the scheme. Returns true on success, false on failure (if there are
  703. * any invalid characters).
  704. *
  705. * @param string $scheme
  706. * @return bool
  707. */
  708. protected function set_scheme($scheme) {
  709. if ($scheme === null) {
  710. $this->scheme = null;
  711. }
  712. elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) {
  713. $this->scheme = null;
  714. return false;
  715. }
  716. else {
  717. $this->scheme = strtolower($scheme);
  718. }
  719. return true;
  720. }
  721. /**
  722. * Set the authority. Returns true on success, false on failure (if there are
  723. * any invalid characters).
  724. *
  725. * @param string $authority
  726. * @return bool
  727. */
  728. protected function set_authority($authority) {
  729. static $cache;
  730. if (!$cache) {
  731. $cache = array();
  732. }
  733. if ($authority === null) {
  734. $this->iuserinfo = null;
  735. $this->ihost = null;
  736. $this->port = null;
  737. return true;
  738. }
  739. if (isset($cache[$authority])) {
  740. list($this->iuserinfo,
  741. $this->ihost,
  742. $this->port,
  743. $return) = $cache[$authority];
  744. return $return;
  745. }
  746. $remaining = $authority;
  747. if (($iuserinfo_end = strrpos($remaining, '@')) !== false) {
  748. $iuserinfo = substr($remaining, 0, $iuserinfo_end);
  749. $remaining = substr($remaining, $iuserinfo_end + 1);
  750. }
  751. else {
  752. $iuserinfo = null;
  753. }
  754. if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) {
  755. $port = substr($remaining, $port_start + 1);
  756. if ($port === false || $port === '') {
  757. $port = null;
  758. }
  759. $remaining = substr($remaining, 0, $port_start);
  760. }
  761. else {
  762. $port = null;
  763. }
  764. $return = $this->set_userinfo($iuserinfo) &&
  765. $this->set_host($remaining) &&
  766. $this->set_port($port);
  767. $cache[$authority] = array($this->iuserinfo,
  768. $this->ihost,
  769. $this->port,
  770. $return);
  771. return $return;
  772. }
  773. /**
  774. * Set the iuserinfo.
  775. *
  776. * @param string $iuserinfo
  777. * @return bool
  778. */
  779. protected function set_userinfo($iuserinfo) {
  780. if ($iuserinfo === null) {
  781. $this->iuserinfo = null;
  782. }
  783. else {
  784. $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
  785. $this->scheme_normalization();
  786. }
  787. return true;
  788. }
  789. /**
  790. * Set the ihost. Returns true on success, false on failure (if there are
  791. * any invalid characters).
  792. *
  793. * @param string $ihost
  794. * @return bool
  795. */
  796. protected function set_host($ihost) {
  797. if ($ihost === null) {
  798. $this->ihost = null;
  799. return true;
  800. }
  801. if (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') {
  802. if (Requests_IPv6::check_ipv6(substr($ihost, 1, -1))) {
  803. $this->ihost = '[' . Requests_IPv6::compress(substr($ihost, 1, -1)) . ']';
  804. }
  805. else {
  806. $this->ihost = null;
  807. return false;
  808. }
  809. }
  810. else {
  811. $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
  812. // Lowercase, but ignore pct-encoded sections (as they should
  813. // remain uppercase). This must be done after the previous step
  814. // as that can add unescaped characters.
  815. $position = 0;
  816. $strlen = strlen($ihost);
  817. while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) {
  818. if ($ihost[$position] === '%') {
  819. $position += 3;
  820. }
  821. else {
  822. $ihost[$position] = strtolower($ihost[$position]);
  823. $position++;
  824. }
  825. }
  826. $this->ihost = $ihost;
  827. }
  828. $this->scheme_normalization();
  829. return true;
  830. }
  831. /**
  832. * Set the port. Returns true on success, false on failure (if there are
  833. * any invalid characters).
  834. *
  835. * @param string $port
  836. * @return bool
  837. */
  838. protected function set_port($port) {
  839. if ($port === null) {
  840. $this->port = null;
  841. return true;
  842. }
  843. if (strspn($port, '0123456789') === strlen($port)) {
  844. $this->port = (int) $port;
  845. $this->scheme_normalization();
  846. return true;
  847. }
  848. $this->port = null;
  849. return false;
  850. }
  851. /**
  852. * Set the ipath.
  853. *
  854. * @param string $ipath
  855. * @return bool
  856. */
  857. protected function set_path($ipath) {
  858. static $cache;
  859. if (!$cache) {
  860. $cache = array();
  861. }
  862. $ipath = (string) $ipath;
  863. if (isset($cache[$ipath])) {
  864. $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
  865. }
  866. else {
  867. $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
  868. $removed = $this->remove_dot_segments($valid);
  869. $cache[$ipath] = array($valid, $removed);
  870. $this->ipath = ($this->scheme !== null) ? $removed : $valid;
  871. }
  872. $this->scheme_normalization();
  873. return true;
  874. }
  875. /**
  876. * Set the iquery.
  877. *
  878. * @param string $iquery
  879. * @return bool
  880. */
  881. protected function set_query($iquery) {
  882. if ($iquery === null) {
  883. $this->iquery = null;
  884. }
  885. else {
  886. $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
  887. $this->scheme_normalization();
  888. }
  889. return true;
  890. }
  891. /**
  892. * Set the ifragment.
  893. *
  894. * @param string $ifragment
  895. * @return bool
  896. */
  897. protected function set_fragment($ifragment) {
  898. if ($ifragment === null) {
  899. $this->ifragment = null;
  900. }
  901. else {
  902. $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
  903. $this->scheme_normalization();
  904. }
  905. return true;
  906. }
  907. /**
  908. * Convert an IRI to a URI (or parts thereof)
  909. *
  910. * @param string|bool IRI to convert (or false from {@see get_iri})
  911. * @return string|false URI if IRI is valid, false otherwise.
  912. */
  913. protected function to_uri($string) {
  914. if (!is_string($string)) {
  915. return false;
  916. }
  917. static $non_ascii;
  918. if (!$non_ascii) {
  919. $non_ascii = implode('', range("\x80", "\xFF"));
  920. }
  921. $position = 0;
  922. $strlen = strlen($string);
  923. while (($position += strcspn($string, $non_ascii, $position)) < $strlen) {
  924. $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
  925. $position += 3;
  926. $strlen += 2;
  927. }
  928. return $string;
  929. }
  930. /**
  931. * Get the complete IRI
  932. *
  933. * @return string
  934. */
  935. protected function get_iri() {
  936. if (!$this->is_valid()) {
  937. return false;
  938. }
  939. $iri = '';
  940. if ($this->scheme !== null) {
  941. $iri .= $this->scheme . ':';
  942. }
  943. if (($iauthority = $this->get_iauthority()) !== null) {
  944. $iri .= '//' . $iauthority;
  945. }
  946. $iri .= $this->ipath;
  947. if ($this->iquery !== null) {
  948. $iri .= '?' . $this->iquery;
  949. }
  950. if ($this->ifragment !== null) {
  951. $iri .= '#' . $this->ifragment;
  952. }
  953. return $iri;
  954. }
  955. /**
  956. * Get the complete URI
  957. *
  958. * @return string
  959. */
  960. protected function get_uri() {
  961. return $this->to_uri($this->get_iri());
  962. }
  963. /**
  964. * Get the complete iauthority
  965. *
  966. * @return string
  967. */
  968. protected function get_iauthority() {
  969. if ($this->iuserinfo === null && $this->ihost === null && $this->port === null) {
  970. return null;
  971. }
  972. $iauthority = '';
  973. if ($this->iuserinfo !== null) {
  974. $iauthority .= $this->iuserinfo . '@';
  975. }
  976. if ($this->ihost !== null) {
  977. $iauthority .= $this->ihost;
  978. }
  979. if ($this->port !== null) {
  980. $iauthority .= ':' . $this->port;
  981. }
  982. return $iauthority;
  983. }
  984. /**
  985. * Get the complete authority
  986. *
  987. * @return string
  988. */
  989. protected function get_authority() {
  990. $iauthority = $this->get_iauthority();
  991. if (is_string($iauthority)) {
  992. return $this->to_uri($iauthority);
  993. }
  994. else {
  995. return $iauthority;
  996. }
  997. }
  998. }