Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 
 

618 rader
17 KiB

  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @version 1.3.1
  37. * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  38. * @author Ryan Parman
  39. * @author Geoffrey Sneddon
  40. * @author Ryan McCue
  41. * @link http://simplepie.org/ SimplePie
  42. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  43. */
  44. /**
  45. * Decode HTML Entities
  46. *
  47. * This implements HTML5 as of revision 967 (2007-06-28)
  48. *
  49. * @deprecated Use DOMDocument instead!
  50. * @package SimplePie
  51. */
  52. class SimplePie_Decode_HTML_Entities
  53. {
  54. /**
  55. * Data to be parsed
  56. *
  57. * @access private
  58. * @var string
  59. */
  60. var $data = '';
  61. /**
  62. * Currently consumed bytes
  63. *
  64. * @access private
  65. * @var string
  66. */
  67. var $consumed = '';
  68. /**
  69. * Position of the current byte being parsed
  70. *
  71. * @access private
  72. * @var int
  73. */
  74. var $position = 0;
  75. /**
  76. * Create an instance of the class with the input data
  77. *
  78. * @access public
  79. * @param string $data Input data
  80. */
  81. public function __construct($data)
  82. {
  83. $this->data = $data;
  84. }
  85. /**
  86. * Parse the input data
  87. *
  88. * @access public
  89. * @return string Output data
  90. */
  91. public function parse()
  92. {
  93. while (($this->position = strpos($this->data, '&', $this->position)) !== false)
  94. {
  95. $this->consume();
  96. $this->entity();
  97. $this->consumed = '';
  98. }
  99. return $this->data;
  100. }
  101. /**
  102. * Consume the next byte
  103. *
  104. * @access private
  105. * @return mixed The next byte, or false, if there is no more data
  106. */
  107. public function consume()
  108. {
  109. if (isset($this->data[$this->position]))
  110. {
  111. $this->consumed .= $this->data[$this->position];
  112. return $this->data[$this->position++];
  113. }
  114. else
  115. {
  116. return false;
  117. }
  118. }
  119. /**
  120. * Consume a range of characters
  121. *
  122. * @access private
  123. * @param string $chars Characters to consume
  124. * @return mixed A series of characters that match the range, or false
  125. */
  126. public function consume_range($chars)
  127. {
  128. if ($len = strspn($this->data, $chars, $this->position))
  129. {
  130. $data = substr($this->data, $this->position, $len);
  131. $this->consumed .= $data;
  132. $this->position += $len;
  133. return $data;
  134. }
  135. else
  136. {
  137. return false;
  138. }
  139. }
  140. /**
  141. * Unconsume one byte
  142. *
  143. * @access private
  144. */
  145. public function unconsume()
  146. {
  147. $this->consumed = substr($this->consumed, 0, -1);
  148. $this->position--;
  149. }
  150. /**
  151. * Decode an entity
  152. *
  153. * @access private
  154. */
  155. public function entity()
  156. {
  157. switch ($this->consume())
  158. {
  159. case "\x09":
  160. case "\x0A":
  161. case "\x0B":
  162. case "\x0B":
  163. case "\x0C":
  164. case "\x20":
  165. case "\x3C":
  166. case "\x26":
  167. case false:
  168. break;
  169. case "\x23":
  170. switch ($this->consume())
  171. {
  172. case "\x78":
  173. case "\x58":
  174. $range = '0123456789ABCDEFabcdef';
  175. $hex = true;
  176. break;
  177. default:
  178. $range = '0123456789';
  179. $hex = false;
  180. $this->unconsume();
  181. break;
  182. }
  183. if ($codepoint = $this->consume_range($range))
  184. {
  185. static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8");
  186. if ($hex)
  187. {
  188. $codepoint = hexdec($codepoint);
  189. }
  190. else
  191. {
  192. $codepoint = intval($codepoint);
  193. }
  194. if (isset($windows_1252_specials[$codepoint]))
  195. {
  196. $replacement = $windows_1252_specials[$codepoint];
  197. }
  198. else
  199. {
  200. $replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
  201. }
  202. if (!in_array($this->consume(), array(';', false), true))
  203. {
  204. $this->unconsume();
  205. }
  206. $consumed_length = strlen($this->consumed);
  207. $this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
  208. $this->position += strlen($replacement) - $consumed_length;
  209. }
  210. break;
  211. default:
  212. static $entities = array(
  213. 'Aacute' => "\xC3\x81",
  214. 'aacute' => "\xC3\xA1",
  215. 'Aacute;' => "\xC3\x81",
  216. 'aacute;' => "\xC3\xA1",
  217. 'Acirc' => "\xC3\x82",
  218. 'acirc' => "\xC3\xA2",
  219. 'Acirc;' => "\xC3\x82",
  220. 'acirc;' => "\xC3\xA2",
  221. 'acute' => "\xC2\xB4",
  222. 'acute;' => "\xC2\xB4",
  223. 'AElig' => "\xC3\x86",
  224. 'aelig' => "\xC3\xA6",
  225. 'AElig;' => "\xC3\x86",
  226. 'aelig;' => "\xC3\xA6",
  227. 'Agrave' => "\xC3\x80",
  228. 'agrave' => "\xC3\xA0",
  229. 'Agrave;' => "\xC3\x80",
  230. 'agrave;' => "\xC3\xA0",
  231. 'alefsym;' => "\xE2\x84\xB5",
  232. 'Alpha;' => "\xCE\x91",
  233. 'alpha;' => "\xCE\xB1",
  234. 'AMP' => "\x26",
  235. 'amp' => "\x26",
  236. 'AMP;' => "\x26",
  237. 'amp;' => "\x26",
  238. 'and;' => "\xE2\x88\xA7",
  239. 'ang;' => "\xE2\x88\xA0",
  240. 'apos;' => "\x27",
  241. 'Aring' => "\xC3\x85",
  242. 'aring' => "\xC3\xA5",
  243. 'Aring;' => "\xC3\x85",
  244. 'aring;' => "\xC3\xA5",
  245. 'asymp;' => "\xE2\x89\x88",
  246. 'Atilde' => "\xC3\x83",
  247. 'atilde' => "\xC3\xA3",
  248. 'Atilde;' => "\xC3\x83",
  249. 'atilde;' => "\xC3\xA3",
  250. 'Auml' => "\xC3\x84",
  251. 'auml' => "\xC3\xA4",
  252. 'Auml;' => "\xC3\x84",
  253. 'auml;' => "\xC3\xA4",
  254. 'bdquo;' => "\xE2\x80\x9E",
  255. 'Beta;' => "\xCE\x92",
  256. 'beta;' => "\xCE\xB2",
  257. 'brvbar' => "\xC2\xA6",
  258. 'brvbar;' => "\xC2\xA6",
  259. 'bull;' => "\xE2\x80\xA2",
  260. 'cap;' => "\xE2\x88\xA9",
  261. 'Ccedil' => "\xC3\x87",
  262. 'ccedil' => "\xC3\xA7",
  263. 'Ccedil;' => "\xC3\x87",
  264. 'ccedil;' => "\xC3\xA7",
  265. 'cedil' => "\xC2\xB8",
  266. 'cedil;' => "\xC2\xB8",
  267. 'cent' => "\xC2\xA2",
  268. 'cent;' => "\xC2\xA2",
  269. 'Chi;' => "\xCE\xA7",
  270. 'chi;' => "\xCF\x87",
  271. 'circ;' => "\xCB\x86",
  272. 'clubs;' => "\xE2\x99\xA3",
  273. 'cong;' => "\xE2\x89\x85",
  274. 'COPY' => "\xC2\xA9",
  275. 'copy' => "\xC2\xA9",
  276. 'COPY;' => "\xC2\xA9",
  277. 'copy;' => "\xC2\xA9",
  278. 'crarr;' => "\xE2\x86\xB5",
  279. 'cup;' => "\xE2\x88\xAA",
  280. 'curren' => "\xC2\xA4",
  281. 'curren;' => "\xC2\xA4",
  282. 'Dagger;' => "\xE2\x80\xA1",
  283. 'dagger;' => "\xE2\x80\xA0",
  284. 'dArr;' => "\xE2\x87\x93",
  285. 'darr;' => "\xE2\x86\x93",
  286. 'deg' => "\xC2\xB0",
  287. 'deg;' => "\xC2\xB0",
  288. 'Delta;' => "\xCE\x94",
  289. 'delta;' => "\xCE\xB4",
  290. 'diams;' => "\xE2\x99\xA6",
  291. 'divide' => "\xC3\xB7",
  292. 'divide;' => "\xC3\xB7",
  293. 'Eacute' => "\xC3\x89",
  294. 'eacute' => "\xC3\xA9",
  295. 'Eacute;' => "\xC3\x89",
  296. 'eacute;' => "\xC3\xA9",
  297. 'Ecirc' => "\xC3\x8A",
  298. 'ecirc' => "\xC3\xAA",
  299. 'Ecirc;' => "\xC3\x8A",
  300. 'ecirc;' => "\xC3\xAA",
  301. 'Egrave' => "\xC3\x88",
  302. 'egrave' => "\xC3\xA8",
  303. 'Egrave;' => "\xC3\x88",
  304. 'egrave;' => "\xC3\xA8",
  305. 'empty;' => "\xE2\x88\x85",
  306. 'emsp;' => "\xE2\x80\x83",
  307. 'ensp;' => "\xE2\x80\x82",
  308. 'Epsilon;' => "\xCE\x95",
  309. 'epsilon;' => "\xCE\xB5",
  310. 'equiv;' => "\xE2\x89\xA1",
  311. 'Eta;' => "\xCE\x97",
  312. 'eta;' => "\xCE\xB7",
  313. 'ETH' => "\xC3\x90",
  314. 'eth' => "\xC3\xB0",
  315. 'ETH;' => "\xC3\x90",
  316. 'eth;' => "\xC3\xB0",
  317. 'Euml' => "\xC3\x8B",
  318. 'euml' => "\xC3\xAB",
  319. 'Euml;' => "\xC3\x8B",
  320. 'euml;' => "\xC3\xAB",
  321. 'euro;' => "\xE2\x82\xAC",
  322. 'exist;' => "\xE2\x88\x83",
  323. 'fnof;' => "\xC6\x92",
  324. 'forall;' => "\xE2\x88\x80",
  325. 'frac12' => "\xC2\xBD",
  326. 'frac12;' => "\xC2\xBD",
  327. 'frac14' => "\xC2\xBC",
  328. 'frac14;' => "\xC2\xBC",
  329. 'frac34' => "\xC2\xBE",
  330. 'frac34;' => "\xC2\xBE",
  331. 'frasl;' => "\xE2\x81\x84",
  332. 'Gamma;' => "\xCE\x93",
  333. 'gamma;' => "\xCE\xB3",
  334. 'ge;' => "\xE2\x89\xA5",
  335. 'GT' => "\x3E",
  336. 'gt' => "\x3E",
  337. 'GT;' => "\x3E",
  338. 'gt;' => "\x3E",
  339. 'hArr;' => "\xE2\x87\x94",
  340. 'harr;' => "\xE2\x86\x94",
  341. 'hearts;' => "\xE2\x99\xA5",
  342. 'hellip;' => "\xE2\x80\xA6",
  343. 'Iacute' => "\xC3\x8D",
  344. 'iacute' => "\xC3\xAD",
  345. 'Iacute;' => "\xC3\x8D",
  346. 'iacute;' => "\xC3\xAD",
  347. 'Icirc' => "\xC3\x8E",
  348. 'icirc' => "\xC3\xAE",
  349. 'Icirc;' => "\xC3\x8E",
  350. 'icirc;' => "\xC3\xAE",
  351. 'iexcl' => "\xC2\xA1",
  352. 'iexcl;' => "\xC2\xA1",
  353. 'Igrave' => "\xC3\x8C",
  354. 'igrave' => "\xC3\xAC",
  355. 'Igrave;' => "\xC3\x8C",
  356. 'igrave;' => "\xC3\xAC",
  357. 'image;' => "\xE2\x84\x91",
  358. 'infin;' => "\xE2\x88\x9E",
  359. 'int;' => "\xE2\x88\xAB",
  360. 'Iota;' => "\xCE\x99",
  361. 'iota;' => "\xCE\xB9",
  362. 'iquest' => "\xC2\xBF",
  363. 'iquest;' => "\xC2\xBF",
  364. 'isin;' => "\xE2\x88\x88",
  365. 'Iuml' => "\xC3\x8F",
  366. 'iuml' => "\xC3\xAF",
  367. 'Iuml;' => "\xC3\x8F",
  368. 'iuml;' => "\xC3\xAF",
  369. 'Kappa;' => "\xCE\x9A",
  370. 'kappa;' => "\xCE\xBA",
  371. 'Lambda;' => "\xCE\x9B",
  372. 'lambda;' => "\xCE\xBB",
  373. 'lang;' => "\xE3\x80\x88",
  374. 'laquo' => "\xC2\xAB",
  375. 'laquo;' => "\xC2\xAB",
  376. 'lArr;' => "\xE2\x87\x90",
  377. 'larr;' => "\xE2\x86\x90",
  378. 'lceil;' => "\xE2\x8C\x88",
  379. 'ldquo;' => "\xE2\x80\x9C",
  380. 'le;' => "\xE2\x89\xA4",
  381. 'lfloor;' => "\xE2\x8C\x8A",
  382. 'lowast;' => "\xE2\x88\x97",
  383. 'loz;' => "\xE2\x97\x8A",
  384. 'lrm;' => "\xE2\x80\x8E",
  385. 'lsaquo;' => "\xE2\x80\xB9",
  386. 'lsquo;' => "\xE2\x80\x98",
  387. 'LT' => "\x3C",
  388. 'lt' => "\x3C",
  389. 'LT;' => "\x3C",
  390. 'lt;' => "\x3C",
  391. 'macr' => "\xC2\xAF",
  392. 'macr;' => "\xC2\xAF",
  393. 'mdash;' => "\xE2\x80\x94",
  394. 'micro' => "\xC2\xB5",
  395. 'micro;' => "\xC2\xB5",
  396. 'middot' => "\xC2\xB7",
  397. 'middot;' => "\xC2\xB7",
  398. 'minus;' => "\xE2\x88\x92",
  399. 'Mu;' => "\xCE\x9C",
  400. 'mu;' => "\xCE\xBC",
  401. 'nabla;' => "\xE2\x88\x87",
  402. 'nbsp' => "\xC2\xA0",
  403. 'nbsp;' => "\xC2\xA0",
  404. 'ndash;' => "\xE2\x80\x93",
  405. 'ne;' => "\xE2\x89\xA0",
  406. 'ni;' => "\xE2\x88\x8B",
  407. 'not' => "\xC2\xAC",
  408. 'not;' => "\xC2\xAC",
  409. 'notin;' => "\xE2\x88\x89",
  410. 'nsub;' => "\xE2\x8A\x84",
  411. 'Ntilde' => "\xC3\x91",
  412. 'ntilde' => "\xC3\xB1",
  413. 'Ntilde;' => "\xC3\x91",
  414. 'ntilde;' => "\xC3\xB1",
  415. 'Nu;' => "\xCE\x9D",
  416. 'nu;' => "\xCE\xBD",
  417. 'Oacute' => "\xC3\x93",
  418. 'oacute' => "\xC3\xB3",
  419. 'Oacute;' => "\xC3\x93",
  420. 'oacute;' => "\xC3\xB3",
  421. 'Ocirc' => "\xC3\x94",
  422. 'ocirc' => "\xC3\xB4",
  423. 'Ocirc;' => "\xC3\x94",
  424. 'ocirc;' => "\xC3\xB4",
  425. 'OElig;' => "\xC5\x92",
  426. 'oelig;' => "\xC5\x93",
  427. 'Ograve' => "\xC3\x92",
  428. 'ograve' => "\xC3\xB2",
  429. 'Ograve;' => "\xC3\x92",
  430. 'ograve;' => "\xC3\xB2",
  431. 'oline;' => "\xE2\x80\xBE",
  432. 'Omega;' => "\xCE\xA9",
  433. 'omega;' => "\xCF\x89",
  434. 'Omicron;' => "\xCE\x9F",
  435. 'omicron;' => "\xCE\xBF",
  436. 'oplus;' => "\xE2\x8A\x95",
  437. 'or;' => "\xE2\x88\xA8",
  438. 'ordf' => "\xC2\xAA",
  439. 'ordf;' => "\xC2\xAA",
  440. 'ordm' => "\xC2\xBA",
  441. 'ordm;' => "\xC2\xBA",
  442. 'Oslash' => "\xC3\x98",
  443. 'oslash' => "\xC3\xB8",
  444. 'Oslash;' => "\xC3\x98",
  445. 'oslash;' => "\xC3\xB8",
  446. 'Otilde' => "\xC3\x95",
  447. 'otilde' => "\xC3\xB5",
  448. 'Otilde;' => "\xC3\x95",
  449. 'otilde;' => "\xC3\xB5",
  450. 'otimes;' => "\xE2\x8A\x97",
  451. 'Ouml' => "\xC3\x96",
  452. 'ouml' => "\xC3\xB6",
  453. 'Ouml;' => "\xC3\x96",
  454. 'ouml;' => "\xC3\xB6",
  455. 'para' => "\xC2\xB6",
  456. 'para;' => "\xC2\xB6",
  457. 'part;' => "\xE2\x88\x82",
  458. 'permil;' => "\xE2\x80\xB0",
  459. 'perp;' => "\xE2\x8A\xA5",
  460. 'Phi;' => "\xCE\xA6",
  461. 'phi;' => "\xCF\x86",
  462. 'Pi;' => "\xCE\xA0",
  463. 'pi;' => "\xCF\x80",
  464. 'piv;' => "\xCF\x96",
  465. 'plusmn' => "\xC2\xB1",
  466. 'plusmn;' => "\xC2\xB1",
  467. 'pound' => "\xC2\xA3",
  468. 'pound;' => "\xC2\xA3",
  469. 'Prime;' => "\xE2\x80\xB3",
  470. 'prime;' => "\xE2\x80\xB2",
  471. 'prod;' => "\xE2\x88\x8F",
  472. 'prop;' => "\xE2\x88\x9D",
  473. 'Psi;' => "\xCE\xA8",
  474. 'psi;' => "\xCF\x88",
  475. 'QUOT' => "\x22",
  476. 'quot' => "\x22",
  477. 'QUOT;' => "\x22",
  478. 'quot;' => "\x22",
  479. 'radic;' => "\xE2\x88\x9A",
  480. 'rang;' => "\xE3\x80\x89",
  481. 'raquo' => "\xC2\xBB",
  482. 'raquo;' => "\xC2\xBB",
  483. 'rArr;' => "\xE2\x87\x92",
  484. 'rarr;' => "\xE2\x86\x92",
  485. 'rceil;' => "\xE2\x8C\x89",
  486. 'rdquo;' => "\xE2\x80\x9D",
  487. 'real;' => "\xE2\x84\x9C",
  488. 'REG' => "\xC2\xAE",
  489. 'reg' => "\xC2\xAE",
  490. 'REG;' => "\xC2\xAE",
  491. 'reg;' => "\xC2\xAE",
  492. 'rfloor;' => "\xE2\x8C\x8B",
  493. 'Rho;' => "\xCE\xA1",
  494. 'rho;' => "\xCF\x81",
  495. 'rlm;' => "\xE2\x80\x8F",
  496. 'rsaquo;' => "\xE2\x80\xBA",
  497. 'rsquo;' => "\xE2\x80\x99",
  498. 'sbquo;' => "\xE2\x80\x9A",
  499. 'Scaron;' => "\xC5\xA0",
  500. 'scaron;' => "\xC5\xA1",
  501. 'sdot;' => "\xE2\x8B\x85",
  502. 'sect' => "\xC2\xA7",
  503. 'sect;' => "\xC2\xA7",
  504. 'shy' => "\xC2\xAD",
  505. 'shy;' => "\xC2\xAD",
  506. 'Sigma;' => "\xCE\xA3",
  507. 'sigma;' => "\xCF\x83",
  508. 'sigmaf;' => "\xCF\x82",
  509. 'sim;' => "\xE2\x88\xBC",
  510. 'spades;' => "\xE2\x99\xA0",
  511. 'sub;' => "\xE2\x8A\x82",
  512. 'sube;' => "\xE2\x8A\x86",
  513. 'sum;' => "\xE2\x88\x91",
  514. 'sup;' => "\xE2\x8A\x83",
  515. 'sup1' => "\xC2\xB9",
  516. 'sup1;' => "\xC2\xB9",
  517. 'sup2' => "\xC2\xB2",
  518. 'sup2;' => "\xC2\xB2",
  519. 'sup3' => "\xC2\xB3",
  520. 'sup3;' => "\xC2\xB3",
  521. 'supe;' => "\xE2\x8A\x87",
  522. 'szlig' => "\xC3\x9F",
  523. 'szlig;' => "\xC3\x9F",
  524. 'Tau;' => "\xCE\xA4",
  525. 'tau;' => "\xCF\x84",
  526. 'there4;' => "\xE2\x88\xB4",
  527. 'Theta;' => "\xCE\x98",
  528. 'theta;' => "\xCE\xB8",
  529. 'thetasym;' => "\xCF\x91",
  530. 'thinsp;' => "\xE2\x80\x89",
  531. 'THORN' => "\xC3\x9E",
  532. 'thorn' => "\xC3\xBE",
  533. 'THORN;' => "\xC3\x9E",
  534. 'thorn;' => "\xC3\xBE",
  535. 'tilde;' => "\xCB\x9C",
  536. 'times' => "\xC3\x97",
  537. 'times;' => "\xC3\x97",
  538. 'TRADE;' => "\xE2\x84\xA2",
  539. 'trade;' => "\xE2\x84\xA2",
  540. 'Uacute' => "\xC3\x9A",
  541. 'uacute' => "\xC3\xBA",
  542. 'Uacute;' => "\xC3\x9A",
  543. 'uacute;' => "\xC3\xBA",
  544. 'uArr;' => "\xE2\x87\x91",
  545. 'uarr;' => "\xE2\x86\x91",
  546. 'Ucirc' => "\xC3\x9B",
  547. 'ucirc' => "\xC3\xBB",
  548. 'Ucirc;' => "\xC3\x9B",
  549. 'ucirc;' => "\xC3\xBB",
  550. 'Ugrave' => "\xC3\x99",
  551. 'ugrave' => "\xC3\xB9",
  552. 'Ugrave;' => "\xC3\x99",
  553. 'ugrave;' => "\xC3\xB9",
  554. 'uml' => "\xC2\xA8",
  555. 'uml;' => "\xC2\xA8",
  556. 'upsih;' => "\xCF\x92",
  557. 'Upsilon;' => "\xCE\xA5",
  558. 'upsilon;' => "\xCF\x85",
  559. 'Uuml' => "\xC3\x9C",
  560. 'uuml' => "\xC3\xBC",
  561. 'Uuml;' => "\xC3\x9C",
  562. 'uuml;' => "\xC3\xBC",
  563. 'weierp;' => "\xE2\x84\x98",
  564. 'Xi;' => "\xCE\x9E",
  565. 'xi;' => "\xCE\xBE",
  566. 'Yacute' => "\xC3\x9D",
  567. 'yacute' => "\xC3\xBD",
  568. 'Yacute;' => "\xC3\x9D",
  569. 'yacute;' => "\xC3\xBD",
  570. 'yen' => "\xC2\xA5",
  571. 'yen;' => "\xC2\xA5",
  572. 'yuml' => "\xC3\xBF",
  573. 'Yuml;' => "\xC5\xB8",
  574. 'yuml;' => "\xC3\xBF",
  575. 'Zeta;' => "\xCE\x96",
  576. 'zeta;' => "\xCE\xB6",
  577. 'zwj;' => "\xE2\x80\x8D",
  578. 'zwnj;' => "\xE2\x80\x8C"
  579. );
  580. for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++)
  581. {
  582. $consumed = substr($this->consumed, 1);
  583. if (isset($entities[$consumed]))
  584. {
  585. $match = $consumed;
  586. }
  587. }
  588. if ($match !== null)
  589. {
  590. $this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
  591. $this->position += strlen($entities[$match]) - strlen($consumed) - 1;
  592. }
  593. break;
  594. }
  595. }
  596. }