You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

1239 lines
28 KiB

  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @version 1.3.1
  37. * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  38. * @author Ryan Parman
  39. * @author Geoffrey Sneddon
  40. * @author Ryan McCue
  41. * @link http://simplepie.org/ SimplePie
  42. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  43. */
  44. /**
  45. * IRI parser/serialiser/normaliser
  46. *
  47. * @package SimplePie
  48. * @subpackage HTTP
  49. * @author Geoffrey Sneddon
  50. * @author Steve Minutillo
  51. * @author Ryan McCue
  52. * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue
  53. * @license http://www.opensource.org/licenses/bsd-license.php
  54. */
  55. class SimplePie_IRI
  56. {
  57. /**
  58. * Scheme
  59. *
  60. * @var string
  61. */
  62. protected $scheme = null;
  63. /**
  64. * User Information
  65. *
  66. * @var string
  67. */
  68. protected $iuserinfo = null;
  69. /**
  70. * ihost
  71. *
  72. * @var string
  73. */
  74. protected $ihost = null;
  75. /**
  76. * Port
  77. *
  78. * @var string
  79. */
  80. protected $port = null;
  81. /**
  82. * ipath
  83. *
  84. * @var string
  85. */
  86. protected $ipath = '';
  87. /**
  88. * iquery
  89. *
  90. * @var string
  91. */
  92. protected $iquery = null;
  93. /**
  94. * ifragment
  95. *
  96. * @var string
  97. */
  98. protected $ifragment = null;
  99. /**
  100. * Normalization database
  101. *
  102. * Each key is the scheme, each value is an array with each key as the IRI
  103. * part and value as the default value for that part.
  104. */
  105. protected $normalization = array(
  106. 'acap' => array(
  107. 'port' => 674
  108. ),
  109. 'dict' => array(
  110. 'port' => 2628
  111. ),
  112. 'file' => array(
  113. 'ihost' => 'localhost'
  114. ),
  115. 'http' => array(
  116. 'port' => 80,
  117. 'ipath' => '/'
  118. ),
  119. 'https' => array(
  120. 'port' => 443,
  121. 'ipath' => '/'
  122. ),
  123. );
  124. /**
  125. * Return the entire IRI when you try and read the object as a string
  126. *
  127. * @return string
  128. */
  129. public function __toString()
  130. {
  131. return $this->get_iri();
  132. }
  133. /**
  134. * Overload __set() to provide access via properties
  135. *
  136. * @param string $name Property name
  137. * @param mixed $value Property value
  138. */
  139. public function __set($name, $value)
  140. {
  141. if (method_exists($this, 'set_' . $name))
  142. {
  143. call_user_func(array($this, 'set_' . $name), $value);
  144. }
  145. elseif (
  146. $name === 'iauthority'
  147. || $name === 'iuserinfo'
  148. || $name === 'ihost'
  149. || $name === 'ipath'
  150. || $name === 'iquery'
  151. || $name === 'ifragment'
  152. )
  153. {
  154. call_user_func(array($this, 'set_' . substr($name, 1)), $value);
  155. }
  156. }
  157. /**
  158. * Overload __get() to provide access via properties
  159. *
  160. * @param string $name Property name
  161. * @return mixed
  162. */
  163. public function __get($name)
  164. {
  165. // isset() returns false for null, we don't want to do that
  166. // Also why we use array_key_exists below instead of isset()
  167. $props = get_object_vars($this);
  168. if (
  169. $name === 'iri' ||
  170. $name === 'uri' ||
  171. $name === 'iauthority' ||
  172. $name === 'authority'
  173. )
  174. {
  175. $return = $this->{"get_$name"}();
  176. }
  177. elseif (array_key_exists($name, $props))
  178. {
  179. $return = $this->$name;
  180. }
  181. // host -> ihost
  182. elseif (($prop = 'i' . $name) && array_key_exists($prop, $props))
  183. {
  184. $name = $prop;
  185. $return = $this->$prop;
  186. }
  187. // ischeme -> scheme
  188. elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props))
  189. {
  190. $name = $prop;
  191. $return = $this->$prop;
  192. }
  193. else
  194. {
  195. trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
  196. $return = null;
  197. }
  198. if ($return === null && isset($this->normalization[$this->scheme][$name]))
  199. {
  200. return $this->normalization[$this->scheme][$name];
  201. }
  202. else
  203. {
  204. return $return;
  205. }
  206. }
  207. /**
  208. * Overload __isset() to provide access via properties
  209. *
  210. * @param string $name Property name
  211. * @return bool
  212. */
  213. public function __isset($name)
  214. {
  215. if (method_exists($this, 'get_' . $name) || isset($this->$name))
  216. {
  217. return true;
  218. }
  219. else
  220. {
  221. return false;
  222. }
  223. }
  224. /**
  225. * Overload __unset() to provide access via properties
  226. *
  227. * @param string $name Property name
  228. */
  229. public function __unset($name)
  230. {
  231. if (method_exists($this, 'set_' . $name))
  232. {
  233. call_user_func(array($this, 'set_' . $name), '');
  234. }
  235. }
  236. /**
  237. * Create a new IRI object, from a specified string
  238. *
  239. * @param string $iri
  240. */
  241. public function __construct($iri = null)
  242. {
  243. $this->set_iri($iri);
  244. }
  245. /**
  246. * Create a new IRI object by resolving a relative IRI
  247. *
  248. * Returns false if $base is not absolute, otherwise an IRI.
  249. *
  250. * @param IRI|string $base (Absolute) Base IRI
  251. * @param IRI|string $relative Relative IRI
  252. * @return IRI|false
  253. */
  254. public static function absolutize($base, $relative)
  255. {
  256. if (!($relative instanceof SimplePie_IRI))
  257. {
  258. $relative = new SimplePie_IRI($relative);
  259. }
  260. if (!$relative->is_valid())
  261. {
  262. return false;
  263. }
  264. elseif ($relative->scheme !== null)
  265. {
  266. return clone $relative;
  267. }
  268. else
  269. {
  270. if (!($base instanceof SimplePie_IRI))
  271. {
  272. $base = new SimplePie_IRI($base);
  273. }
  274. if ($base->scheme !== null && $base->is_valid())
  275. {
  276. if ($relative->get_iri() !== '')
  277. {
  278. if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null)
  279. {
  280. $target = clone $relative;
  281. $target->scheme = $base->scheme;
  282. }
  283. else
  284. {
  285. $target = new SimplePie_IRI;
  286. $target->scheme = $base->scheme;
  287. $target->iuserinfo = $base->iuserinfo;
  288. $target->ihost = $base->ihost;
  289. $target->port = $base->port;
  290. if ($relative->ipath !== '')
  291. {
  292. if ($relative->ipath[0] === '/')
  293. {
  294. $target->ipath = $relative->ipath;
  295. }
  296. elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '')
  297. {
  298. $target->ipath = '/' . $relative->ipath;
  299. }
  300. elseif (($last_segment = strrpos($base->ipath, '/')) !== false)
  301. {
  302. $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
  303. }
  304. else
  305. {
  306. $target->ipath = $relative->ipath;
  307. }
  308. $target->ipath = $target->remove_dot_segments($target->ipath);
  309. $target->iquery = $relative->iquery;
  310. }
  311. else
  312. {
  313. $target->ipath = $base->ipath;
  314. if ($relative->iquery !== null)
  315. {
  316. $target->iquery = $relative->iquery;
  317. }
  318. elseif ($base->iquery !== null)
  319. {
  320. $target->iquery = $base->iquery;
  321. }
  322. }
  323. $target->ifragment = $relative->ifragment;
  324. }
  325. }
  326. else
  327. {
  328. $target = clone $base;
  329. $target->ifragment = null;
  330. }
  331. $target->scheme_normalization();
  332. return $target;
  333. }
  334. else
  335. {
  336. return false;
  337. }
  338. }
  339. }
  340. /**
  341. * Parse an IRI into scheme/authority/path/query/fragment segments
  342. *
  343. * @param string $iri
  344. * @return array
  345. */
  346. protected function parse_iri($iri)
  347. {
  348. $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
  349. if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match))
  350. {
  351. if ($match[1] === '')
  352. {
  353. $match['scheme'] = null;
  354. }
  355. if (!isset($match[3]) || $match[3] === '')
  356. {
  357. $match['authority'] = null;
  358. }
  359. if (!isset($match[5]))
  360. {
  361. $match['path'] = '';
  362. }
  363. if (!isset($match[6]) || $match[6] === '')
  364. {
  365. $match['query'] = null;
  366. }
  367. if (!isset($match[8]) || $match[8] === '')
  368. {
  369. $match['fragment'] = null;
  370. }
  371. return $match;
  372. }
  373. else
  374. {
  375. // This can occur when a paragraph is accidentally parsed as a URI
  376. return false;
  377. }
  378. }
  379. /**
  380. * Remove dot segments from a path
  381. *
  382. * @param string $input
  383. * @return string
  384. */
  385. protected function remove_dot_segments($input)
  386. {
  387. $output = '';
  388. while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
  389. {
  390. // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
  391. if (strpos($input, '../') === 0)
  392. {
  393. $input = substr($input, 3);
  394. }
  395. elseif (strpos($input, './') === 0)
  396. {
  397. $input = substr($input, 2);
  398. }
  399. // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
  400. elseif (strpos($input, '/./') === 0)
  401. {
  402. $input = substr($input, 2);
  403. }
  404. elseif ($input === '/.')
  405. {
  406. $input = '/';
  407. }
  408. // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
  409. elseif (strpos($input, '/../') === 0)
  410. {
  411. $input = substr($input, 3);
  412. $output = substr_replace($output, '', strrpos($output, '/'));
  413. }
  414. elseif ($input === '/..')
  415. {
  416. $input = '/';
  417. $output = substr_replace($output, '', strrpos($output, '/'));
  418. }
  419. // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
  420. elseif ($input === '.' || $input === '..')
  421. {
  422. $input = '';
  423. }
  424. // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
  425. elseif (($pos = strpos($input, '/', 1)) !== false)
  426. {
  427. $output .= substr($input, 0, $pos);
  428. $input = substr_replace($input, '', 0, $pos);
  429. }
  430. else
  431. {
  432. $output .= $input;
  433. $input = '';
  434. }
  435. }
  436. return $output . $input;
  437. }
  438. /**
  439. * Replace invalid character with percent encoding
  440. *
  441. * @param string $string Input string
  442. * @param string $extra_chars Valid characters not in iunreserved or
  443. * iprivate (this is ASCII-only)
  444. * @param bool $iprivate Allow iprivate
  445. * @return string
  446. */
  447. protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
  448. {
  449. // Normalize as many pct-encoded sections as possible
  450. $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string);
  451. // Replace invalid percent characters
  452. $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
  453. // Add unreserved and % to $extra_chars (the latter is safe because all
  454. // pct-encoded sections are now valid).
  455. $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
  456. // Now replace any bytes that aren't allowed with their pct-encoded versions
  457. $position = 0;
  458. $strlen = strlen($string);
  459. while (($position += strspn($string, $extra_chars, $position)) < $strlen)
  460. {
  461. $value = ord($string[$position]);
  462. // Start position
  463. $start = $position;
  464. // By default we are valid
  465. $valid = true;
  466. // No one byte sequences are valid due to the while.
  467. // Two byte sequence:
  468. if (($value & 0xE0) === 0xC0)
  469. {
  470. $character = ($value & 0x1F) << 6;
  471. $length = 2;
  472. $remaining = 1;
  473. }
  474. // Three byte sequence:
  475. elseif (($value & 0xF0) === 0xE0)
  476. {
  477. $character = ($value & 0x0F) << 12;
  478. $length = 3;
  479. $remaining = 2;
  480. }
  481. // Four byte sequence:
  482. elseif (($value & 0xF8) === 0xF0)
  483. {
  484. $character = ($value & 0x07) << 18;
  485. $length = 4;
  486. $remaining = 3;
  487. }
  488. // Invalid byte:
  489. else
  490. {
  491. $valid = false;
  492. $length = 1;
  493. $remaining = 0;
  494. }
  495. if ($remaining)
  496. {
  497. if ($position + $length <= $strlen)
  498. {
  499. for ($position++; $remaining; $position++)
  500. {
  501. $value = ord($string[$position]);
  502. // Check that the byte is valid, then add it to the character:
  503. if (($value & 0xC0) === 0x80)
  504. {
  505. $character |= ($value & 0x3F) << (--$remaining * 6);
  506. }
  507. // If it is invalid, count the sequence as invalid and reprocess the current byte:
  508. else
  509. {
  510. $valid = false;
  511. $position--;
  512. break;
  513. }
  514. }
  515. }
  516. else
  517. {
  518. $position = $strlen - 1;
  519. $valid = false;
  520. }
  521. }
  522. // Percent encode anything invalid or not in ucschar
  523. if (
  524. // Invalid sequences
  525. !$valid
  526. // Non-shortest form sequences are invalid
  527. || $length > 1 && $character <= 0x7F
  528. || $length > 2 && $character <= 0x7FF
  529. || $length > 3 && $character <= 0xFFFF
  530. // Outside of range of ucschar codepoints
  531. // Noncharacters
  532. || ($character & 0xFFFE) === 0xFFFE
  533. || $character >= 0xFDD0 && $character <= 0xFDEF
  534. || (
  535. // Everything else not in ucschar
  536. $character > 0xD7FF && $character < 0xF900
  537. || $character < 0xA0
  538. || $character > 0xEFFFD
  539. )
  540. && (
  541. // Everything not in iprivate, if it applies
  542. !$iprivate
  543. || $character < 0xE000
  544. || $character > 0x10FFFD
  545. )
  546. )
  547. {
  548. // If we were a character, pretend we weren't, but rather an error.
  549. if ($valid)
  550. $position--;
  551. for ($j = $start; $j <= $position; $j++)
  552. {
  553. $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
  554. $j += 2;
  555. $position += 2;
  556. $strlen += 2;
  557. }
  558. }
  559. }
  560. return $string;
  561. }
  562. /**
  563. * Callback function for preg_replace_callback.
  564. *
  565. * Removes sequences of percent encoded bytes that represent UTF-8
  566. * encoded characters in iunreserved
  567. *
  568. * @param array $match PCRE match
  569. * @return string Replacement
  570. */
  571. protected function remove_iunreserved_percent_encoded($match)
  572. {
  573. // As we just have valid percent encoded sequences we can just explode
  574. // and ignore the first member of the returned array (an empty string).
  575. $bytes = explode('%', $match[0]);
  576. // Initialize the new string (this is what will be returned) and that
  577. // there are no bytes remaining in the current sequence (unsurprising
  578. // at the first byte!).
  579. $string = '';
  580. $remaining = 0;
  581. // Loop over each and every byte, and set $value to its value
  582. for ($i = 1, $len = count($bytes); $i < $len; $i++)
  583. {
  584. $value = hexdec($bytes[$i]);
  585. // If we're the first byte of sequence:
  586. if (!$remaining)
  587. {
  588. // Start position
  589. $start = $i;
  590. // By default we are valid
  591. $valid = true;
  592. // One byte sequence:
  593. if ($value <= 0x7F)
  594. {
  595. $character = $value;
  596. $length = 1;
  597. }
  598. // Two byte sequence:
  599. elseif (($value & 0xE0) === 0xC0)
  600. {
  601. $character = ($value & 0x1F) << 6;
  602. $length = 2;
  603. $remaining = 1;
  604. }
  605. // Three byte sequence:
  606. elseif (($value & 0xF0) === 0xE0)
  607. {
  608. $character = ($value & 0x0F) << 12;
  609. $length = 3;
  610. $remaining = 2;
  611. }
  612. // Four byte sequence:
  613. elseif (($value & 0xF8) === 0xF0)
  614. {
  615. $character = ($value & 0x07) << 18;
  616. $length = 4;
  617. $remaining = 3;
  618. }
  619. // Invalid byte:
  620. else
  621. {
  622. $valid = false;
  623. $remaining = 0;
  624. }
  625. }
  626. // Continuation byte:
  627. else
  628. {
  629. // Check that the byte is valid, then add it to the character:
  630. if (($value & 0xC0) === 0x80)
  631. {
  632. $remaining--;
  633. $character |= ($value & 0x3F) << ($remaining * 6);
  634. }
  635. // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
  636. else
  637. {
  638. $valid = false;
  639. $remaining = 0;
  640. $i--;
  641. }
  642. }
  643. // If we've reached the end of the current byte sequence, append it to Unicode::$data
  644. if (!$remaining)
  645. {
  646. // Percent encode anything invalid or not in iunreserved
  647. if (
  648. // Invalid sequences
  649. !$valid
  650. // Non-shortest form sequences are invalid
  651. || $length > 1 && $character <= 0x7F
  652. || $length > 2 && $character <= 0x7FF
  653. || $length > 3 && $character <= 0xFFFF
  654. // Outside of range of iunreserved codepoints
  655. || $character < 0x2D
  656. || $character > 0xEFFFD
  657. // Noncharacters
  658. || ($character & 0xFFFE) === 0xFFFE
  659. || $character >= 0xFDD0 && $character <= 0xFDEF
  660. // Everything else not in iunreserved (this is all BMP)
  661. || $character === 0x2F
  662. || $character > 0x39 && $character < 0x41
  663. || $character > 0x5A && $character < 0x61
  664. || $character > 0x7A && $character < 0x7E
  665. || $character > 0x7E && $character < 0xA0
  666. || $character > 0xD7FF && $character < 0xF900
  667. )
  668. {
  669. for ($j = $start; $j <= $i; $j++)
  670. {
  671. $string .= '%' . strtoupper($bytes[$j]);
  672. }
  673. }
  674. else
  675. {
  676. for ($j = $start; $j <= $i; $j++)
  677. {
  678. $string .= chr(hexdec($bytes[$j]));
  679. }
  680. }
  681. }
  682. }
  683. // If we have any bytes left over they are invalid (i.e., we are
  684. // mid-way through a multi-byte sequence)
  685. if ($remaining)
  686. {
  687. for ($j = $start; $j < $len; $j++)
  688. {
  689. $string .= '%' . strtoupper($bytes[$j]);
  690. }
  691. }
  692. return $string;
  693. }
  694. protected function scheme_normalization()
  695. {
  696. if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo'])
  697. {
  698. $this->iuserinfo = null;
  699. }
  700. if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost'])
  701. {
  702. $this->ihost = null;
  703. }
  704. if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port'])
  705. {
  706. $this->port = null;
  707. }
  708. if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath'])
  709. {
  710. $this->ipath = '';
  711. }
  712. if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery'])
  713. {
  714. $this->iquery = null;
  715. }
  716. if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment'])
  717. {
  718. $this->ifragment = null;
  719. }
  720. }
  721. /**
  722. * Check if the object represents a valid IRI. This needs to be done on each
  723. * call as some things change depending on another part of the IRI.
  724. *
  725. * @return bool
  726. */
  727. public function is_valid()
  728. {
  729. $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
  730. if ($this->ipath !== '' &&
  731. (
  732. $isauthority && (
  733. $this->ipath[0] !== '/' ||
  734. substr($this->ipath, 0, 2) === '//'
  735. ) ||
  736. (
  737. $this->scheme === null &&
  738. !$isauthority &&
  739. strpos($this->ipath, ':') !== false &&
  740. (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
  741. )
  742. )
  743. )
  744. {
  745. return false;
  746. }
  747. return true;
  748. }
  749. /**
  750. * Set the entire IRI. Returns true on success, false on failure (if there
  751. * are any invalid characters).
  752. *
  753. * @param string $iri
  754. * @return bool
  755. */
  756. public function set_iri($iri)
  757. {
  758. static $cache;
  759. if (!$cache)
  760. {
  761. $cache = array();
  762. }
  763. if ($iri === null)
  764. {
  765. return true;
  766. }
  767. elseif (isset($cache[$iri]))
  768. {
  769. list($this->scheme,
  770. $this->iuserinfo,
  771. $this->ihost,
  772. $this->port,
  773. $this->ipath,
  774. $this->iquery,
  775. $this->ifragment,
  776. $return) = $cache[$iri];
  777. return $return;
  778. }
  779. else
  780. {
  781. $parsed = $this->parse_iri((string) $iri);
  782. if (!$parsed)
  783. {
  784. return false;
  785. }
  786. $return = $this->set_scheme($parsed['scheme'])
  787. && $this->set_authority($parsed['authority'])
  788. && $this->set_path($parsed['path'])
  789. && $this->set_query($parsed['query'])
  790. && $this->set_fragment($parsed['fragment']);
  791. $cache[$iri] = array($this->scheme,
  792. $this->iuserinfo,
  793. $this->ihost,
  794. $this->port,
  795. $this->ipath,
  796. $this->iquery,
  797. $this->ifragment,
  798. $return);
  799. return $return;
  800. }
  801. }
  802. /**
  803. * Set the scheme. Returns true on success, false on failure (if there are
  804. * any invalid characters).
  805. *
  806. * @param string $scheme
  807. * @return bool
  808. */
  809. public function set_scheme($scheme)
  810. {
  811. if ($scheme === null)
  812. {
  813. $this->scheme = null;
  814. }
  815. elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme))
  816. {
  817. $this->scheme = null;
  818. return false;
  819. }
  820. else
  821. {
  822. $this->scheme = strtolower($scheme);
  823. }
  824. return true;
  825. }
  826. /**
  827. * Set the authority. Returns true on success, false on failure (if there are
  828. * any invalid characters).
  829. *
  830. * @param string $authority
  831. * @return bool
  832. */
  833. public function set_authority($authority)
  834. {
  835. static $cache;
  836. if (!$cache)
  837. $cache = array();
  838. if ($authority === null)
  839. {
  840. $this->iuserinfo = null;
  841. $this->ihost = null;
  842. $this->port = null;
  843. return true;
  844. }
  845. elseif (isset($cache[$authority]))
  846. {
  847. list($this->iuserinfo,
  848. $this->ihost,
  849. $this->port,
  850. $return) = $cache[$authority];
  851. return $return;
  852. }
  853. else
  854. {
  855. $remaining = $authority;
  856. if (($iuserinfo_end = strrpos($remaining, '@')) !== false)
  857. {
  858. $iuserinfo = substr($remaining, 0, $iuserinfo_end);
  859. $remaining = substr($remaining, $iuserinfo_end + 1);
  860. }
  861. else
  862. {
  863. $iuserinfo = null;
  864. }
  865. if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false)
  866. {
  867. if (($port = substr($remaining, $port_start + 1)) === false)
  868. {
  869. $port = null;
  870. }
  871. $remaining = substr($remaining, 0, $port_start);
  872. }
  873. else
  874. {
  875. $port = null;
  876. }
  877. $return = $this->set_userinfo($iuserinfo) &&
  878. $this->set_host($remaining) &&
  879. $this->set_port($port);
  880. $cache[$authority] = array($this->iuserinfo,
  881. $this->ihost,
  882. $this->port,
  883. $return);
  884. return $return;
  885. }
  886. }
  887. /**
  888. * Set the iuserinfo.
  889. *
  890. * @param string $iuserinfo
  891. * @return bool
  892. */
  893. public function set_userinfo($iuserinfo)
  894. {
  895. if ($iuserinfo === null)
  896. {
  897. $this->iuserinfo = null;
  898. }
  899. else
  900. {
  901. $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
  902. $this->scheme_normalization();
  903. }
  904. return true;
  905. }
  906. /**
  907. * Set the ihost. Returns true on success, false on failure (if there are
  908. * any invalid characters).
  909. *
  910. * @param string $ihost
  911. * @return bool
  912. */
  913. public function set_host($ihost)
  914. {
  915. if ($ihost === null)
  916. {
  917. $this->ihost = null;
  918. return true;
  919. }
  920. elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']')
  921. {
  922. if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1)))
  923. {
  924. $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']';
  925. }
  926. else
  927. {
  928. $this->ihost = null;
  929. return false;
  930. }
  931. }
  932. else
  933. {
  934. $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
  935. // Lowercase, but ignore pct-encoded sections (as they should
  936. // remain uppercase). This must be done after the previous step
  937. // as that can add unescaped characters.
  938. $position = 0;
  939. $strlen = strlen($ihost);
  940. while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen)
  941. {
  942. if ($ihost[$position] === '%')
  943. {
  944. $position += 3;
  945. }
  946. else
  947. {
  948. $ihost[$position] = strtolower($ihost[$position]);
  949. $position++;
  950. }
  951. }
  952. $this->ihost = $ihost;
  953. }
  954. $this->scheme_normalization();
  955. return true;
  956. }
  957. /**
  958. * Set the port. Returns true on success, false on failure (if there are
  959. * any invalid characters).
  960. *
  961. * @param string $port
  962. * @return bool
  963. */
  964. public function set_port($port)
  965. {
  966. if ($port === null)
  967. {
  968. $this->port = null;
  969. return true;
  970. }
  971. elseif (strspn($port, '0123456789') === strlen($port))
  972. {
  973. $this->port = (int) $port;
  974. $this->scheme_normalization();
  975. return true;
  976. }
  977. else
  978. {
  979. $this->port = null;
  980. return false;
  981. }
  982. }
  983. /**
  984. * Set the ipath.
  985. *
  986. * @param string $ipath
  987. * @return bool
  988. */
  989. public function set_path($ipath)
  990. {
  991. static $cache;
  992. if (!$cache)
  993. {
  994. $cache = array();
  995. }
  996. $ipath = (string) $ipath;
  997. if (isset($cache[$ipath]))
  998. {
  999. $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
  1000. }
  1001. else
  1002. {
  1003. $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
  1004. $removed = $this->remove_dot_segments($valid);
  1005. $cache[$ipath] = array($valid, $removed);
  1006. $this->ipath = ($this->scheme !== null) ? $removed : $valid;
  1007. }
  1008. $this->scheme_normalization();
  1009. return true;
  1010. }
  1011. /**
  1012. * Set the iquery.
  1013. *
  1014. * @param string $iquery
  1015. * @return bool
  1016. */
  1017. public function set_query($iquery)
  1018. {
  1019. if ($iquery === null)
  1020. {
  1021. $this->iquery = null;
  1022. }
  1023. else
  1024. {
  1025. $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
  1026. $this->scheme_normalization();
  1027. }
  1028. return true;
  1029. }
  1030. /**
  1031. * Set the ifragment.
  1032. *
  1033. * @param string $ifragment
  1034. * @return bool
  1035. */
  1036. public function set_fragment($ifragment)
  1037. {
  1038. if ($ifragment === null)
  1039. {
  1040. $this->ifragment = null;
  1041. }
  1042. else
  1043. {
  1044. $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
  1045. $this->scheme_normalization();
  1046. }
  1047. return true;
  1048. }
  1049. /**
  1050. * Convert an IRI to a URI (or parts thereof)
  1051. *
  1052. * @return string
  1053. */
  1054. public function to_uri($string)
  1055. {
  1056. static $non_ascii;
  1057. if (!$non_ascii)
  1058. {
  1059. $non_ascii = implode('', range("\x80", "\xFF"));
  1060. }
  1061. $position = 0;
  1062. $strlen = strlen($string);
  1063. while (($position += strcspn($string, $non_ascii, $position)) < $strlen)
  1064. {
  1065. $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
  1066. $position += 3;
  1067. $strlen += 2;
  1068. }
  1069. return $string;
  1070. }
  1071. /**
  1072. * Get the complete IRI
  1073. *
  1074. * @return string
  1075. */
  1076. public function get_iri()
  1077. {
  1078. if (!$this->is_valid())
  1079. {
  1080. return false;
  1081. }
  1082. $iri = '';
  1083. if ($this->scheme !== null)
  1084. {
  1085. $iri .= $this->scheme . ':';
  1086. }
  1087. if (($iauthority = $this->get_iauthority()) !== null)
  1088. {
  1089. $iri .= '//' . $iauthority;
  1090. }
  1091. if ($this->ipath !== '')
  1092. {
  1093. $iri .= $this->ipath;
  1094. }
  1095. elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '')
  1096. {
  1097. $iri .= $this->normalization[$this->scheme]['ipath'];
  1098. }
  1099. if ($this->iquery !== null)
  1100. {
  1101. $iri .= '?' . $this->iquery;
  1102. }
  1103. if ($this->ifragment !== null)
  1104. {
  1105. $iri .= '#' . $this->ifragment;
  1106. }
  1107. return $iri;
  1108. }
  1109. /**
  1110. * Get the complete URI
  1111. *
  1112. * @return string
  1113. */
  1114. public function get_uri()
  1115. {
  1116. return $this->to_uri($this->get_iri());
  1117. }
  1118. /**
  1119. * Get the complete iauthority
  1120. *
  1121. * @return string
  1122. */
  1123. protected function get_iauthority()
  1124. {
  1125. if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null)
  1126. {
  1127. $iauthority = '';
  1128. if ($this->iuserinfo !== null)
  1129. {
  1130. $iauthority .= $this->iuserinfo . '@';
  1131. }
  1132. if ($this->ihost !== null)
  1133. {
  1134. $iauthority .= $this->ihost;
  1135. }
  1136. if ($this->port !== null)
  1137. {
  1138. $iauthority .= ':' . $this->port;
  1139. }
  1140. return $iauthority;
  1141. }
  1142. else
  1143. {
  1144. return null;
  1145. }
  1146. }
  1147. /**
  1148. * Get the complete authority
  1149. *
  1150. * @return string
  1151. */
  1152. protected function get_authority()
  1153. {
  1154. $iauthority = $this->get_iauthority();
  1155. if (is_string($iauthority))
  1156. return $this->to_uri($iauthority);
  1157. else
  1158. return $iauthority;
  1159. }
  1160. }