Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 
 
 

501 Zeilen
11 KiB

  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @version 1.3.1
  37. * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  38. * @author Ryan Parman
  39. * @author Geoffrey Sneddon
  40. * @author Ryan McCue
  41. * @link http://simplepie.org/ SimplePie
  42. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  43. */
  44. /**
  45. * HTTP Response Parser
  46. *
  47. * @package SimplePie
  48. * @subpackage HTTP
  49. */
  50. class SimplePie_HTTP_Parser
  51. {
  52. /**
  53. * HTTP Version
  54. *
  55. * @var float
  56. */
  57. public $http_version = 0.0;
  58. /**
  59. * Status code
  60. *
  61. * @var int
  62. */
  63. public $status_code = 0;
  64. /**
  65. * Reason phrase
  66. *
  67. * @var string
  68. */
  69. public $reason = '';
  70. /**
  71. * Key/value pairs of the headers
  72. *
  73. * @var array
  74. */
  75. public $headers = array();
  76. /**
  77. * Body of the response
  78. *
  79. * @var string
  80. */
  81. public $body = '';
  82. /**
  83. * Current state of the state machine
  84. *
  85. * @var string
  86. */
  87. protected $state = 'http_version';
  88. /**
  89. * Input data
  90. *
  91. * @var string
  92. */
  93. protected $data = '';
  94. /**
  95. * Input data length (to avoid calling strlen() everytime this is needed)
  96. *
  97. * @var int
  98. */
  99. protected $data_length = 0;
  100. /**
  101. * Current position of the pointer
  102. *
  103. * @var int
  104. */
  105. protected $position = 0;
  106. /**
  107. * Name of the hedaer currently being parsed
  108. *
  109. * @var string
  110. */
  111. protected $name = '';
  112. /**
  113. * Value of the hedaer currently being parsed
  114. *
  115. * @var string
  116. */
  117. protected $value = '';
  118. /**
  119. * Create an instance of the class with the input data
  120. *
  121. * @param string $data Input data
  122. */
  123. public function __construct($data)
  124. {
  125. $this->data = $data;
  126. $this->data_length = strlen($this->data);
  127. }
  128. /**
  129. * Parse the input data
  130. *
  131. * @return bool true on success, false on failure
  132. */
  133. public function parse()
  134. {
  135. while ($this->state && $this->state !== 'emit' && $this->has_data())
  136. {
  137. $state = $this->state;
  138. $this->$state();
  139. }
  140. $this->data = '';
  141. if ($this->state === 'emit' || $this->state === 'body')
  142. {
  143. return true;
  144. }
  145. else
  146. {
  147. $this->http_version = '';
  148. $this->status_code = '';
  149. $this->reason = '';
  150. $this->headers = array();
  151. $this->body = '';
  152. return false;
  153. }
  154. }
  155. /**
  156. * Check whether there is data beyond the pointer
  157. *
  158. * @return bool true if there is further data, false if not
  159. */
  160. protected function has_data()
  161. {
  162. return (bool) ($this->position < $this->data_length);
  163. }
  164. /**
  165. * See if the next character is LWS
  166. *
  167. * @return bool true if the next character is LWS, false if not
  168. */
  169. protected function is_linear_whitespace()
  170. {
  171. return (bool) ($this->data[$this->position] === "\x09"
  172. || $this->data[$this->position] === "\x20"
  173. || ($this->data[$this->position] === "\x0A"
  174. && isset($this->data[$this->position + 1])
  175. && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
  176. }
  177. /**
  178. * Parse the HTTP version
  179. */
  180. protected function http_version()
  181. {
  182. if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
  183. {
  184. $len = strspn($this->data, '0123456789.', 5);
  185. $this->http_version = substr($this->data, 5, $len);
  186. $this->position += 5 + $len;
  187. if (substr_count($this->http_version, '.') <= 1)
  188. {
  189. $this->http_version = (float) $this->http_version;
  190. $this->position += strspn($this->data, "\x09\x20", $this->position);
  191. $this->state = 'status';
  192. }
  193. else
  194. {
  195. $this->state = false;
  196. }
  197. }
  198. else
  199. {
  200. $this->state = false;
  201. }
  202. }
  203. /**
  204. * Parse the status code
  205. */
  206. protected function status()
  207. {
  208. if ($len = strspn($this->data, '0123456789', $this->position))
  209. {
  210. $this->status_code = (int) substr($this->data, $this->position, $len);
  211. $this->position += $len;
  212. $this->state = 'reason';
  213. }
  214. else
  215. {
  216. $this->state = false;
  217. }
  218. }
  219. /**
  220. * Parse the reason phrase
  221. */
  222. protected function reason()
  223. {
  224. $len = strcspn($this->data, "\x0A", $this->position);
  225. $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
  226. $this->position += $len + 1;
  227. $this->state = 'new_line';
  228. }
  229. /**
  230. * Deal with a new line, shifting data around as needed
  231. */
  232. protected function new_line()
  233. {
  234. $this->value = trim($this->value, "\x0D\x20");
  235. if ($this->name !== '' && $this->value !== '')
  236. {
  237. $this->name = strtolower($this->name);
  238. // We should only use the last Content-Type header. c.f. issue #1
  239. if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
  240. {
  241. $this->headers[$this->name] .= ', ' . $this->value;
  242. }
  243. else
  244. {
  245. $this->headers[$this->name] = $this->value;
  246. }
  247. }
  248. $this->name = '';
  249. $this->value = '';
  250. if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
  251. {
  252. $this->position += 2;
  253. $this->state = 'body';
  254. }
  255. elseif ($this->data[$this->position] === "\x0A")
  256. {
  257. $this->position++;
  258. $this->state = 'body';
  259. }
  260. else
  261. {
  262. $this->state = 'name';
  263. }
  264. }
  265. /**
  266. * Parse a header name
  267. */
  268. protected function name()
  269. {
  270. $len = strcspn($this->data, "\x0A:", $this->position);
  271. if (isset($this->data[$this->position + $len]))
  272. {
  273. if ($this->data[$this->position + $len] === "\x0A")
  274. {
  275. $this->position += $len;
  276. $this->state = 'new_line';
  277. }
  278. else
  279. {
  280. $this->name = substr($this->data, $this->position, $len);
  281. $this->position += $len + 1;
  282. $this->state = 'value';
  283. }
  284. }
  285. else
  286. {
  287. $this->state = false;
  288. }
  289. }
  290. /**
  291. * Parse LWS, replacing consecutive LWS characters with a single space
  292. */
  293. protected function linear_whitespace()
  294. {
  295. do
  296. {
  297. if (substr($this->data, $this->position, 2) === "\x0D\x0A")
  298. {
  299. $this->position += 2;
  300. }
  301. elseif ($this->data[$this->position] === "\x0A")
  302. {
  303. $this->position++;
  304. }
  305. $this->position += strspn($this->data, "\x09\x20", $this->position);
  306. } while ($this->has_data() && $this->is_linear_whitespace());
  307. $this->value .= "\x20";
  308. }
  309. /**
  310. * See what state to move to while within non-quoted header values
  311. */
  312. protected function value()
  313. {
  314. if ($this->is_linear_whitespace())
  315. {
  316. $this->linear_whitespace();
  317. }
  318. else
  319. {
  320. switch ($this->data[$this->position])
  321. {
  322. case '"':
  323. // Workaround for ETags: we have to include the quotes as
  324. // part of the tag.
  325. if (strtolower($this->name) === 'etag')
  326. {
  327. $this->value .= '"';
  328. $this->position++;
  329. $this->state = 'value_char';
  330. break;
  331. }
  332. $this->position++;
  333. $this->state = 'quote';
  334. break;
  335. case "\x0A":
  336. $this->position++;
  337. $this->state = 'new_line';
  338. break;
  339. default:
  340. $this->state = 'value_char';
  341. break;
  342. }
  343. }
  344. }
  345. /**
  346. * Parse a header value while outside quotes
  347. */
  348. protected function value_char()
  349. {
  350. $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
  351. $this->value .= substr($this->data, $this->position, $len);
  352. $this->position += $len;
  353. $this->state = 'value';
  354. }
  355. /**
  356. * See what state to move to while within quoted header values
  357. */
  358. protected function quote()
  359. {
  360. if ($this->is_linear_whitespace())
  361. {
  362. $this->linear_whitespace();
  363. }
  364. else
  365. {
  366. switch ($this->data[$this->position])
  367. {
  368. case '"':
  369. $this->position++;
  370. $this->state = 'value';
  371. break;
  372. case "\x0A":
  373. $this->position++;
  374. $this->state = 'new_line';
  375. break;
  376. case '\\':
  377. $this->position++;
  378. $this->state = 'quote_escaped';
  379. break;
  380. default:
  381. $this->state = 'quote_char';
  382. break;
  383. }
  384. }
  385. }
  386. /**
  387. * Parse a header value while within quotes
  388. */
  389. protected function quote_char()
  390. {
  391. $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
  392. $this->value .= substr($this->data, $this->position, $len);
  393. $this->position += $len;
  394. $this->state = 'value';
  395. }
  396. /**
  397. * Parse an escaped character within quotes
  398. */
  399. protected function quote_escaped()
  400. {
  401. $this->value .= $this->data[$this->position];
  402. $this->position++;
  403. $this->state = 'quote';
  404. }
  405. /**
  406. * Parse the body
  407. */
  408. protected function body()
  409. {
  410. $this->body = substr($this->data, $this->position);
  411. if (!empty($this->headers['transfer-encoding']))
  412. {
  413. unset($this->headers['transfer-encoding']);
  414. $this->state = 'chunked';
  415. }
  416. else
  417. {
  418. $this->state = 'emit';
  419. }
  420. }
  421. /**
  422. * Parsed a "Transfer-Encoding: chunked" body
  423. */
  424. protected function chunked()
  425. {
  426. if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
  427. {
  428. $this->state = 'emit';
  429. return;
  430. }
  431. $decoded = '';
  432. $encoded = $this->body;
  433. while (true)
  434. {
  435. $is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
  436. if (!$is_chunked)
  437. {
  438. // Looks like it's not chunked after all
  439. $this->state = 'emit';
  440. return;
  441. }
  442. $length = hexdec(trim($matches[1]));
  443. if ($length === 0)
  444. {
  445. // Ignore trailer headers
  446. $this->state = 'emit';
  447. $this->body = $decoded;
  448. return;
  449. }
  450. $chunk_length = strlen($matches[0]);
  451. $decoded .= $part = substr($encoded, $chunk_length, $length);
  452. $encoded = substr($encoded, $chunk_length + $length + 2);
  453. if (trim($encoded) === '0' || empty($encoded))
  454. {
  455. $this->state = 'emit';
  456. $this->body = $decoded;
  457. return;
  458. }
  459. }
  460. }
  461. }