25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

1260 lines
37 KiB

  1. <?php
  2. /**
  3. * Deprecated. Use WP_HTTP (http.php) instead.
  4. */
  5. _deprecated_file( basename( __FILE__ ), '3.0.0', WPINC . '/http.php' );
  6. if ( ! class_exists( 'Snoopy', false ) ) :
  7. /*************************************************
  8. Snoopy - the PHP net client
  9. Author: Monte Ohrt <monte@ispi.net>
  10. Copyright (c): 1999-2008 New Digital Group, all rights reserved
  11. Version: 1.2.4
  12. * This library is free software; you can redistribute it and/or
  13. * modify it under the terms of the GNU Lesser General Public
  14. * License as published by the Free Software Foundation; either
  15. * version 2.1 of the License, or (at your option) any later version.
  16. *
  17. * This library is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20. * Lesser General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU Lesser General Public
  23. * License along with this library; if not, write to the Free Software
  24. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  25. You may contact the author of Snoopy by e-mail at:
  26. monte@ohrt.com
  27. The latest version of Snoopy can be obtained from:
  28. http://snoopy.sourceforge.net/
  29. *************************************************/
  30. class Snoopy
  31. {
  32. /**** Public variables ****/
  33. /* user definable vars */
  34. var $host = "www.php.net"; // host name we are connecting to
  35. var $port = 80; // port we are connecting to
  36. var $proxy_host = ""; // proxy host to use
  37. var $proxy_port = ""; // proxy port to use
  38. var $proxy_user = ""; // proxy user to use
  39. var $proxy_pass = ""; // proxy password to use
  40. var $agent = "Snoopy v1.2.4"; // agent we masquerade as
  41. var $referer = ""; // referer info to pass
  42. var $cookies = array(); // array of cookies to pass
  43. // $cookies["username"]="joe";
  44. var $rawheaders = array(); // array of raw headers to send
  45. // $rawheaders["Content-type"]="text/html";
  46. var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
  47. var $lastredirectaddr = ""; // contains address of last redirected address
  48. var $offsiteok = true; // allows redirection off-site
  49. var $maxframes = 0; // frame content depth maximum. 0 = disallow
  50. var $expandlinks = true; // expand links to fully qualified URLs.
  51. // this only applies to fetchlinks()
  52. // submitlinks(), and submittext()
  53. var $passcookies = true; // pass set cookies back through redirects
  54. // NOTE: this currently does not respect
  55. // dates, domains or paths.
  56. var $user = ""; // user for http authentication
  57. var $pass = ""; // password for http authentication
  58. // http accept types
  59. var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  60. var $results = ""; // where the content is put
  61. var $error = ""; // error messages sent here
  62. var $response_code = ""; // response code returned from server
  63. var $headers = array(); // headers returned from server sent here
  64. var $maxlength = 500000; // max return data length (body)
  65. var $read_timeout = 0; // timeout on read operations, in seconds
  66. // supported only since PHP 4 Beta 4
  67. // set to 0 to disallow timeouts
  68. var $timed_out = false; // if a read operation timed out
  69. var $status = 0; // http request status
  70. var $temp_dir = "/tmp"; // temporary directory that the webserver
  71. // has permission to write to.
  72. // under Windows, this should be C:\temp
  73. var $curl_path = "/usr/local/bin/curl";
  74. // Snoopy will use cURL for fetching
  75. // SSL content if a full system path to
  76. // the cURL binary is supplied here.
  77. // set to false if you do not have
  78. // cURL installed. See http://curl.haxx.se
  79. // for details on installing cURL.
  80. // Snoopy does *not* use the cURL
  81. // library functions built into php,
  82. // as these functions are not stable
  83. // as of this Snoopy release.
  84. /**** Private variables ****/
  85. var $_maxlinelen = 4096; // max line length (headers)
  86. var $_httpmethod = "GET"; // default http request method
  87. var $_httpversion = "HTTP/1.0"; // default http request version
  88. var $_submit_method = "POST"; // default submit method
  89. var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
  90. var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
  91. var $_redirectaddr = false; // will be set if page fetched is a redirect
  92. var $_redirectdepth = 0; // increments on an http redirect
  93. var $_frameurls = array(); // frame src urls
  94. var $_framedepth = 0; // increments on frame depth
  95. var $_isproxy = false; // set if using a proxy server
  96. var $_fp_timeout = 30; // timeout for socket connection
  97. /*======================================================================*\
  98. Function: fetch
  99. Purpose: fetch the contents of a web page
  100. (and possibly other protocols in the
  101. future like ftp, nntp, gopher, etc.)
  102. Input: $URI the location of the page to fetch
  103. Output: $this->results the output text from the fetch
  104. \*======================================================================*/
  105. function fetch($URI)
  106. {
  107. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  108. $URI_PARTS = parse_url($URI);
  109. if (!empty($URI_PARTS["user"]))
  110. $this->user = $URI_PARTS["user"];
  111. if (!empty($URI_PARTS["pass"]))
  112. $this->pass = $URI_PARTS["pass"];
  113. if (empty($URI_PARTS["query"]))
  114. $URI_PARTS["query"] = '';
  115. if (empty($URI_PARTS["path"]))
  116. $URI_PARTS["path"] = '';
  117. switch(strtolower($URI_PARTS["scheme"]))
  118. {
  119. case "http":
  120. $this->host = $URI_PARTS["host"];
  121. if(!empty($URI_PARTS["port"]))
  122. $this->port = $URI_PARTS["port"];
  123. if($this->_connect($fp))
  124. {
  125. if($this->_isproxy)
  126. {
  127. // using proxy, send entire URI
  128. $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  129. }
  130. else
  131. {
  132. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  133. // no proxy, send only the path
  134. $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  135. }
  136. $this->_disconnect($fp);
  137. if($this->_redirectaddr)
  138. {
  139. /* url was redirected, check if we've hit the max depth */
  140. if($this->maxredirs > $this->_redirectdepth)
  141. {
  142. // only follow redirect if it's on this site, or offsiteok is true
  143. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  144. {
  145. /* follow the redirect */
  146. $this->_redirectdepth++;
  147. $this->lastredirectaddr=$this->_redirectaddr;
  148. $this->fetch($this->_redirectaddr);
  149. }
  150. }
  151. }
  152. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  153. {
  154. $frameurls = $this->_frameurls;
  155. $this->_frameurls = array();
  156. while(list(,$frameurl) = each($frameurls))
  157. {
  158. if($this->_framedepth < $this->maxframes)
  159. {
  160. $this->fetch($frameurl);
  161. $this->_framedepth++;
  162. }
  163. else
  164. break;
  165. }
  166. }
  167. }
  168. else
  169. {
  170. return false;
  171. }
  172. return true;
  173. break;
  174. case "https":
  175. if(!$this->curl_path)
  176. return false;
  177. if(function_exists("is_executable"))
  178. if (!is_executable($this->curl_path))
  179. return false;
  180. $this->host = $URI_PARTS["host"];
  181. if(!empty($URI_PARTS["port"]))
  182. $this->port = $URI_PARTS["port"];
  183. if($this->_isproxy)
  184. {
  185. // using proxy, send entire URI
  186. $this->_httpsrequest($URI,$URI,$this->_httpmethod);
  187. }
  188. else
  189. {
  190. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  191. // no proxy, send only the path
  192. $this->_httpsrequest($path, $URI, $this->_httpmethod);
  193. }
  194. if($this->_redirectaddr)
  195. {
  196. /* url was redirected, check if we've hit the max depth */
  197. if($this->maxredirs > $this->_redirectdepth)
  198. {
  199. // only follow redirect if it's on this site, or offsiteok is true
  200. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  201. {
  202. /* follow the redirect */
  203. $this->_redirectdepth++;
  204. $this->lastredirectaddr=$this->_redirectaddr;
  205. $this->fetch($this->_redirectaddr);
  206. }
  207. }
  208. }
  209. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  210. {
  211. $frameurls = $this->_frameurls;
  212. $this->_frameurls = array();
  213. while(list(,$frameurl) = each($frameurls))
  214. {
  215. if($this->_framedepth < $this->maxframes)
  216. {
  217. $this->fetch($frameurl);
  218. $this->_framedepth++;
  219. }
  220. else
  221. break;
  222. }
  223. }
  224. return true;
  225. break;
  226. default:
  227. // not a valid protocol
  228. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  229. return false;
  230. break;
  231. }
  232. return true;
  233. }
  234. /*======================================================================*\
  235. Function: submit
  236. Purpose: submit an http form
  237. Input: $URI the location to post the data
  238. $formvars the formvars to use.
  239. format: $formvars["var"] = "val";
  240. $formfiles an array of files to submit
  241. format: $formfiles["var"] = "/dir/filename.ext";
  242. Output: $this->results the text output from the post
  243. \*======================================================================*/
  244. function submit($URI, $formvars="", $formfiles="")
  245. {
  246. unset($postdata);
  247. $postdata = $this->_prepare_post_body($formvars, $formfiles);
  248. $URI_PARTS = parse_url($URI);
  249. if (!empty($URI_PARTS["user"]))
  250. $this->user = $URI_PARTS["user"];
  251. if (!empty($URI_PARTS["pass"]))
  252. $this->pass = $URI_PARTS["pass"];
  253. if (empty($URI_PARTS["query"]))
  254. $URI_PARTS["query"] = '';
  255. if (empty($URI_PARTS["path"]))
  256. $URI_PARTS["path"] = '';
  257. switch(strtolower($URI_PARTS["scheme"]))
  258. {
  259. case "http":
  260. $this->host = $URI_PARTS["host"];
  261. if(!empty($URI_PARTS["port"]))
  262. $this->port = $URI_PARTS["port"];
  263. if($this->_connect($fp))
  264. {
  265. if($this->_isproxy)
  266. {
  267. // using proxy, send entire URI
  268. $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
  269. }
  270. else
  271. {
  272. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  273. // no proxy, send only the path
  274. $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  275. }
  276. $this->_disconnect($fp);
  277. if($this->_redirectaddr)
  278. {
  279. /* url was redirected, check if we've hit the max depth */
  280. if($this->maxredirs > $this->_redirectdepth)
  281. {
  282. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  283. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  284. // only follow redirect if it's on this site, or offsiteok is true
  285. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  286. {
  287. /* follow the redirect */
  288. $this->_redirectdepth++;
  289. $this->lastredirectaddr=$this->_redirectaddr;
  290. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  291. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  292. else
  293. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  294. }
  295. }
  296. }
  297. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  298. {
  299. $frameurls = $this->_frameurls;
  300. $this->_frameurls = array();
  301. while(list(,$frameurl) = each($frameurls))
  302. {
  303. if($this->_framedepth < $this->maxframes)
  304. {
  305. $this->fetch($frameurl);
  306. $this->_framedepth++;
  307. }
  308. else
  309. break;
  310. }
  311. }
  312. }
  313. else
  314. {
  315. return false;
  316. }
  317. return true;
  318. break;
  319. case "https":
  320. if(!$this->curl_path)
  321. return false;
  322. if(function_exists("is_executable"))
  323. if (!is_executable($this->curl_path))
  324. return false;
  325. $this->host = $URI_PARTS["host"];
  326. if(!empty($URI_PARTS["port"]))
  327. $this->port = $URI_PARTS["port"];
  328. if($this->_isproxy)
  329. {
  330. // using proxy, send entire URI
  331. $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  332. }
  333. else
  334. {
  335. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  336. // no proxy, send only the path
  337. $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  338. }
  339. if($this->_redirectaddr)
  340. {
  341. /* url was redirected, check if we've hit the max depth */
  342. if($this->maxredirs > $this->_redirectdepth)
  343. {
  344. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  345. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  346. // only follow redirect if it's on this site, or offsiteok is true
  347. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  348. {
  349. /* follow the redirect */
  350. $this->_redirectdepth++;
  351. $this->lastredirectaddr=$this->_redirectaddr;
  352. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  353. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  354. else
  355. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  356. }
  357. }
  358. }
  359. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  360. {
  361. $frameurls = $this->_frameurls;
  362. $this->_frameurls = array();
  363. while(list(,$frameurl) = each($frameurls))
  364. {
  365. if($this->_framedepth < $this->maxframes)
  366. {
  367. $this->fetch($frameurl);
  368. $this->_framedepth++;
  369. }
  370. else
  371. break;
  372. }
  373. }
  374. return true;
  375. break;
  376. default:
  377. // not a valid protocol
  378. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  379. return false;
  380. break;
  381. }
  382. return true;
  383. }
  384. /*======================================================================*\
  385. Function: fetchlinks
  386. Purpose: fetch the links from a web page
  387. Input: $URI where you are fetching from
  388. Output: $this->results an array of the URLs
  389. \*======================================================================*/
  390. function fetchlinks($URI)
  391. {
  392. if ($this->fetch($URI))
  393. {
  394. if($this->lastredirectaddr)
  395. $URI = $this->lastredirectaddr;
  396. if(is_array($this->results))
  397. {
  398. for($x=0;$x<count($this->results);$x++)
  399. $this->results[$x] = $this->_striplinks($this->results[$x]);
  400. }
  401. else
  402. $this->results = $this->_striplinks($this->results);
  403. if($this->expandlinks)
  404. $this->results = $this->_expandlinks($this->results, $URI);
  405. return true;
  406. }
  407. else
  408. return false;
  409. }
  410. /*======================================================================*\
  411. Function: fetchform
  412. Purpose: fetch the form elements from a web page
  413. Input: $URI where you are fetching from
  414. Output: $this->results the resulting html form
  415. \*======================================================================*/
  416. function fetchform($URI)
  417. {
  418. if ($this->fetch($URI))
  419. {
  420. if(is_array($this->results))
  421. {
  422. for($x=0;$x<count($this->results);$x++)
  423. $this->results[$x] = $this->_stripform($this->results[$x]);
  424. }
  425. else
  426. $this->results = $this->_stripform($this->results);
  427. return true;
  428. }
  429. else
  430. return false;
  431. }
  432. /*======================================================================*\
  433. Function: fetchtext
  434. Purpose: fetch the text from a web page, stripping the links
  435. Input: $URI where you are fetching from
  436. Output: $this->results the text from the web page
  437. \*======================================================================*/
  438. function fetchtext($URI)
  439. {
  440. if($this->fetch($URI))
  441. {
  442. if(is_array($this->results))
  443. {
  444. for($x=0;$x<count($this->results);$x++)
  445. $this->results[$x] = $this->_striptext($this->results[$x]);
  446. }
  447. else
  448. $this->results = $this->_striptext($this->results);
  449. return true;
  450. }
  451. else
  452. return false;
  453. }
  454. /*======================================================================*\
  455. Function: submitlinks
  456. Purpose: grab links from a form submission
  457. Input: $URI where you are submitting from
  458. Output: $this->results an array of the links from the post
  459. \*======================================================================*/
  460. function submitlinks($URI, $formvars="", $formfiles="")
  461. {
  462. if($this->submit($URI,$formvars, $formfiles))
  463. {
  464. if($this->lastredirectaddr)
  465. $URI = $this->lastredirectaddr;
  466. if(is_array($this->results))
  467. {
  468. for($x=0;$x<count($this->results);$x++)
  469. {
  470. $this->results[$x] = $this->_striplinks($this->results[$x]);
  471. if($this->expandlinks)
  472. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  473. }
  474. }
  475. else
  476. {
  477. $this->results = $this->_striplinks($this->results);
  478. if($this->expandlinks)
  479. $this->results = $this->_expandlinks($this->results,$URI);
  480. }
  481. return true;
  482. }
  483. else
  484. return false;
  485. }
  486. /*======================================================================*\
  487. Function: submittext
  488. Purpose: grab text from a form submission
  489. Input: $URI where you are submitting from
  490. Output: $this->results the text from the web page
  491. \*======================================================================*/
  492. function submittext($URI, $formvars = "", $formfiles = "")
  493. {
  494. if($this->submit($URI,$formvars, $formfiles))
  495. {
  496. if($this->lastredirectaddr)
  497. $URI = $this->lastredirectaddr;
  498. if(is_array($this->results))
  499. {
  500. for($x=0;$x<count($this->results);$x++)
  501. {
  502. $this->results[$x] = $this->_striptext($this->results[$x]);
  503. if($this->expandlinks)
  504. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  505. }
  506. }
  507. else
  508. {
  509. $this->results = $this->_striptext($this->results);
  510. if($this->expandlinks)
  511. $this->results = $this->_expandlinks($this->results,$URI);
  512. }
  513. return true;
  514. }
  515. else
  516. return false;
  517. }
  518. /*======================================================================*\
  519. Function: set_submit_multipart
  520. Purpose: Set the form submission content type to
  521. multipart/form-data
  522. \*======================================================================*/
  523. function set_submit_multipart()
  524. {
  525. $this->_submit_type = "multipart/form-data";
  526. }
  527. /*======================================================================*\
  528. Function: set_submit_normal
  529. Purpose: Set the form submission content type to
  530. application/x-www-form-urlencoded
  531. \*======================================================================*/
  532. function set_submit_normal()
  533. {
  534. $this->_submit_type = "application/x-www-form-urlencoded";
  535. }
  536. /*======================================================================*\
  537. Private functions
  538. \*======================================================================*/
  539. /*======================================================================*\
  540. Function: _striplinks
  541. Purpose: strip the hyperlinks from an html document
  542. Input: $document document to strip.
  543. Output: $match an array of the links
  544. \*======================================================================*/
  545. function _striplinks($document)
  546. {
  547. preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
  548. ([\"\'])? # find single or double quote
  549. (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
  550. # quote, otherwise match up to next space
  551. 'isx",$document,$links);
  552. // catenate the non-empty matches from the conditional subpattern
  553. while(list($key,$val) = each($links[2]))
  554. {
  555. if(!empty($val))
  556. $match[] = $val;
  557. }
  558. while(list($key,$val) = each($links[3]))
  559. {
  560. if(!empty($val))
  561. $match[] = $val;
  562. }
  563. // return the links
  564. return $match;
  565. }
  566. /*======================================================================*\
  567. Function: _stripform
  568. Purpose: strip the form elements from an html document
  569. Input: $document document to strip.
  570. Output: $match an array of the links
  571. \*======================================================================*/
  572. function _stripform($document)
  573. {
  574. preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  575. // catenate the matches
  576. $match = implode("\r\n",$elements[0]);
  577. // return the links
  578. return $match;
  579. }
  580. /*======================================================================*\
  581. Function: _striptext
  582. Purpose: strip the text from an html document
  583. Input: $document document to strip.
  584. Output: $text the resulting text
  585. \*======================================================================*/
  586. function _striptext($document)
  587. {
  588. // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  589. // so, list your entities one by one here. I included some of the
  590. // more common ones.
  591. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
  592. "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
  593. "'([\r\n])[\s]+'", // strip out white space
  594. "'&(quot|#34|#034|#x22);'i", // replace html entities
  595. "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
  596. "'&(lt|#60|#060|#x3c);'i",
  597. "'&(gt|#62|#062|#x3e);'i",
  598. "'&(nbsp|#160|#xa0);'i",
  599. "'&(iexcl|#161);'i",
  600. "'&(cent|#162);'i",
  601. "'&(pound|#163);'i",
  602. "'&(copy|#169);'i",
  603. "'&(reg|#174);'i",
  604. "'&(deg|#176);'i",
  605. "'&(#39|#039|#x27);'",
  606. "'&(euro|#8364);'i", // europe
  607. "'&a(uml|UML);'", // german
  608. "'&o(uml|UML);'",
  609. "'&u(uml|UML);'",
  610. "'&A(uml|UML);'",
  611. "'&O(uml|UML);'",
  612. "'&U(uml|UML);'",
  613. "'&szlig;'i",
  614. );
  615. $replace = array( "",
  616. "",
  617. "\\1",
  618. "\"",
  619. "&",
  620. "<",
  621. ">",
  622. " ",
  623. chr(161),
  624. chr(162),
  625. chr(163),
  626. chr(169),
  627. chr(174),
  628. chr(176),
  629. chr(39),
  630. chr(128),
  631. chr(0xE4), // ANSI &auml;
  632. chr(0xF6), // ANSI &ouml;
  633. chr(0xFC), // ANSI &uuml;
  634. chr(0xC4), // ANSI &Auml;
  635. chr(0xD6), // ANSI &Ouml;
  636. chr(0xDC), // ANSI &Uuml;
  637. chr(0xDF), // ANSI &szlig;
  638. );
  639. $text = preg_replace($search,$replace,$document);
  640. return $text;
  641. }
  642. /*======================================================================*\
  643. Function: _expandlinks
  644. Purpose: expand each link into a fully qualified URL
  645. Input: $links the links to qualify
  646. $URI the full URI to get the base from
  647. Output: $expandedLinks the expanded links
  648. \*======================================================================*/
  649. function _expandlinks($links,$URI)
  650. {
  651. preg_match("/^[^\?]+/",$URI,$match);
  652. $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  653. $match = preg_replace("|/$|","",$match);
  654. $match_part = parse_url($match);
  655. $match_root =
  656. $match_part["scheme"]."://".$match_part["host"];
  657. $search = array( "|^http://".preg_quote($this->host)."|i",
  658. "|^(\/)|i",
  659. "|^(?!http://)(?!mailto:)|i",
  660. "|/\./|",
  661. "|/[^\/]+/\.\./|"
  662. );
  663. $replace = array( "",
  664. $match_root."/",
  665. $match."/",
  666. "/",
  667. "/"
  668. );
  669. $expandedLinks = preg_replace($search,$replace,$links);
  670. return $expandedLinks;
  671. }
  672. /*======================================================================*\
  673. Function: _httprequest
  674. Purpose: go get the http data from the server
  675. Input: $url the url to fetch
  676. $fp the current open file pointer
  677. $URI the full URI
  678. $body body contents to send if any (POST)
  679. Output:
  680. \*======================================================================*/
  681. function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  682. {
  683. $cookie_headers = '';
  684. if($this->passcookies && $this->_redirectaddr)
  685. $this->setcookies();
  686. $URI_PARTS = parse_url($URI);
  687. if(empty($url))
  688. $url = "/";
  689. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
  690. if(!empty($this->agent))
  691. $headers .= "User-Agent: ".$this->agent."\r\n";
  692. if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
  693. $headers .= "Host: ".$this->host;
  694. if(!empty($this->port) && $this->port != 80)
  695. $headers .= ":".$this->port;
  696. $headers .= "\r\n";
  697. }
  698. if(!empty($this->accept))
  699. $headers .= "Accept: ".$this->accept."\r\n";
  700. if(!empty($this->referer))
  701. $headers .= "Referer: ".$this->referer."\r\n";
  702. if(!empty($this->cookies))
  703. {
  704. if(!is_array($this->cookies))
  705. $this->cookies = (array)$this->cookies;
  706. reset($this->cookies);
  707. if ( count($this->cookies) > 0 ) {
  708. $cookie_headers .= 'Cookie: ';
  709. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  710. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  711. }
  712. $headers .= substr($cookie_headers,0,-2) . "\r\n";
  713. }
  714. }
  715. if(!empty($this->rawheaders))
  716. {
  717. if(!is_array($this->rawheaders))
  718. $this->rawheaders = (array)$this->rawheaders;
  719. while(list($headerKey,$headerVal) = each($this->rawheaders))
  720. $headers .= $headerKey.": ".$headerVal."\r\n";
  721. }
  722. if(!empty($content_type)) {
  723. $headers .= "Content-type: $content_type";
  724. if ($content_type == "multipart/form-data")
  725. $headers .= "; boundary=".$this->_mime_boundary;
  726. $headers .= "\r\n";
  727. }
  728. if(!empty($body))
  729. $headers .= "Content-length: ".strlen($body)."\r\n";
  730. if(!empty($this->user) || !empty($this->pass))
  731. $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
  732. //add proxy auth headers
  733. if(!empty($this->proxy_user))
  734. $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
  735. $headers .= "\r\n";
  736. // set the read timeout if needed
  737. if ($this->read_timeout > 0)
  738. socket_set_timeout($fp, $this->read_timeout);
  739. $this->timed_out = false;
  740. fwrite($fp,$headers.$body,strlen($headers.$body));
  741. $this->_redirectaddr = false;
  742. unset($this->headers);
  743. while($currentHeader = fgets($fp,$this->_maxlinelen))
  744. {
  745. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  746. {
  747. $this->status=-100;
  748. return false;
  749. }
  750. if($currentHeader == "\r\n")
  751. break;
  752. // if a header begins with Location: or URI:, set the redirect
  753. if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  754. {
  755. // get URL portion of the redirect
  756. preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
  757. // look for :// in the Location header to see if hostname is included
  758. if(!preg_match("|\:\/\/|",$matches[2]))
  759. {
  760. // no host in the path, so prepend
  761. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  762. // eliminate double slash
  763. if(!preg_match("|^/|",$matches[2]))
  764. $this->_redirectaddr .= "/".$matches[2];
  765. else
  766. $this->_redirectaddr .= $matches[2];
  767. }
  768. else
  769. $this->_redirectaddr = $matches[2];
  770. }
  771. if(preg_match("|^HTTP/|",$currentHeader))
  772. {
  773. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
  774. {
  775. $this->status= $status[1];
  776. }
  777. $this->response_code = $currentHeader;
  778. }
  779. $this->headers[] = $currentHeader;
  780. }
  781. $results = '';
  782. do {
  783. $_data = fread($fp, $this->maxlength);
  784. if (strlen($_data) == 0) {
  785. break;
  786. }
  787. $results .= $_data;
  788. } while(true);
  789. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  790. {
  791. $this->status=-100;
  792. return false;
  793. }
  794. // check if there is a redirect meta tag
  795. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  796. {
  797. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  798. }
  799. // have we hit our frame depth and is there frame src to fetch?
  800. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  801. {
  802. $this->results[] = $results;
  803. for($x=0; $x<count($match[1]); $x++)
  804. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  805. }
  806. // have we already fetched framed content?
  807. elseif(is_array($this->results))
  808. $this->results[] = $results;
  809. // no framed content
  810. else
  811. $this->results = $results;
  812. return true;
  813. }
  814. /*======================================================================*\
  815. Function: _httpsrequest
  816. Purpose: go get the https data from the server using curl
  817. Input: $url the url to fetch
  818. $URI the full URI
  819. $body body contents to send if any (POST)
  820. Output:
  821. \*======================================================================*/
  822. function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  823. {
  824. if($this->passcookies && $this->_redirectaddr)
  825. $this->setcookies();
  826. $headers = array();
  827. $URI_PARTS = parse_url($URI);
  828. if(empty($url))
  829. $url = "/";
  830. // GET ... header not needed for curl
  831. //$headers[] = $http_method." ".$url." ".$this->_httpversion;
  832. if(!empty($this->agent))
  833. $headers[] = "User-Agent: ".$this->agent;
  834. if(!empty($this->host))
  835. if(!empty($this->port))
  836. $headers[] = "Host: ".$this->host.":".$this->port;
  837. else
  838. $headers[] = "Host: ".$this->host;
  839. if(!empty($this->accept))
  840. $headers[] = "Accept: ".$this->accept;
  841. if(!empty($this->referer))
  842. $headers[] = "Referer: ".$this->referer;
  843. if(!empty($this->cookies))
  844. {
  845. if(!is_array($this->cookies))
  846. $this->cookies = (array)$this->cookies;
  847. reset($this->cookies);
  848. if ( count($this->cookies) > 0 ) {
  849. $cookie_str = 'Cookie: ';
  850. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  851. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  852. }
  853. $headers[] = substr($cookie_str,0,-2);
  854. }
  855. }
  856. if(!empty($this->rawheaders))
  857. {
  858. if(!is_array($this->rawheaders))
  859. $this->rawheaders = (array)$this->rawheaders;
  860. while(list($headerKey,$headerVal) = each($this->rawheaders))
  861. $headers[] = $headerKey.": ".$headerVal;
  862. }
  863. if(!empty($content_type)) {
  864. if ($content_type == "multipart/form-data")
  865. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  866. else
  867. $headers[] = "Content-type: $content_type";
  868. }
  869. if(!empty($body))
  870. $headers[] = "Content-length: ".strlen($body);
  871. if(!empty($this->user) || !empty($this->pass))
  872. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  873. $headerfile = tempnam( $this->temp_dir, "sno" );
  874. $cmdline_params = '-k -D ' . escapeshellarg( $headerfile );
  875. foreach ( $headers as $header ) {
  876. $cmdline_params .= ' -H ' . escapeshellarg( $header );
  877. }
  878. if ( ! empty( $body ) ) {
  879. $cmdline_params .= ' -d ' . escapeshellarg( $body );
  880. }
  881. if ( $this->read_timeout > 0 ) {
  882. $cmdline_params .= ' -m ' . escapeshellarg( $this->read_timeout );
  883. }
  884. exec( $this->curl_path . ' ' . $cmdline_params . ' ' . escapeshellarg( $URI ), $results, $return );
  885. if($return)
  886. {
  887. $this->error = "Error: cURL could not retrieve the document, error $return.";
  888. return false;
  889. }
  890. $results = implode("\r\n",$results);
  891. $result_headers = file("$headerfile");
  892. $this->_redirectaddr = false;
  893. unset($this->headers);
  894. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  895. {
  896. // if a header begins with Location: or URI:, set the redirect
  897. if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  898. {
  899. // get URL portion of the redirect
  900. preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
  901. // look for :// in the Location header to see if hostname is included
  902. if(!preg_match("|\:\/\/|",$matches[2]))
  903. {
  904. // no host in the path, so prepend
  905. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  906. // eliminate double slash
  907. if(!preg_match("|^/|",$matches[2]))
  908. $this->_redirectaddr .= "/".$matches[2];
  909. else
  910. $this->_redirectaddr .= $matches[2];
  911. }
  912. else
  913. $this->_redirectaddr = $matches[2];
  914. }
  915. if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  916. $this->response_code = $result_headers[$currentHeader];
  917. $this->headers[] = $result_headers[$currentHeader];
  918. }
  919. // check if there is a redirect meta tag
  920. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  921. {
  922. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  923. }
  924. // have we hit our frame depth and is there frame src to fetch?
  925. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  926. {
  927. $this->results[] = $results;
  928. for($x=0; $x<count($match[1]); $x++)
  929. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  930. }
  931. // have we already fetched framed content?
  932. elseif(is_array($this->results))
  933. $this->results[] = $results;
  934. // no framed content
  935. else
  936. $this->results = $results;
  937. unlink("$headerfile");
  938. return true;
  939. }
  940. /*======================================================================*\
  941. Function: setcookies()
  942. Purpose: set cookies for a redirection
  943. \*======================================================================*/
  944. function setcookies()
  945. {
  946. for($x=0; $x<count($this->headers); $x++)
  947. {
  948. if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
  949. $this->cookies[$match[1]] = urldecode($match[2]);
  950. }
  951. }
  952. /*======================================================================*\
  953. Function: _check_timeout
  954. Purpose: checks whether timeout has occurred
  955. Input: $fp file pointer
  956. \*======================================================================*/
  957. function _check_timeout($fp)
  958. {
  959. if ($this->read_timeout > 0) {
  960. $fp_status = socket_get_status($fp);
  961. if ($fp_status["timed_out"]) {
  962. $this->timed_out = true;
  963. return true;
  964. }
  965. }
  966. return false;
  967. }
  968. /*======================================================================*\
  969. Function: _connect
  970. Purpose: make a socket connection
  971. Input: $fp file pointer
  972. \*======================================================================*/
  973. function _connect(&$fp)
  974. {
  975. if(!empty($this->proxy_host) && !empty($this->proxy_port))
  976. {
  977. $this->_isproxy = true;
  978. $host = $this->proxy_host;
  979. $port = $this->proxy_port;
  980. }
  981. else
  982. {
  983. $host = $this->host;
  984. $port = $this->port;
  985. }
  986. $this->status = 0;
  987. if($fp = fsockopen(
  988. $host,
  989. $port,
  990. $errno,
  991. $errstr,
  992. $this->_fp_timeout
  993. ))
  994. {
  995. // socket connection succeeded
  996. return true;
  997. }
  998. else
  999. {
  1000. // socket connection failed
  1001. $this->status = $errno;
  1002. switch($errno)
  1003. {
  1004. case -3:
  1005. $this->error="socket creation failed (-3)";
  1006. case -4:
  1007. $this->error="dns lookup failure (-4)";
  1008. case -5:
  1009. $this->error="connection refused or timed out (-5)";
  1010. default:
  1011. $this->error="connection failed (".$errno.")";
  1012. }
  1013. return false;
  1014. }
  1015. }
  1016. /*======================================================================*\
  1017. Function: _disconnect
  1018. Purpose: disconnect a socket connection
  1019. Input: $fp file pointer
  1020. \*======================================================================*/
  1021. function _disconnect($fp)
  1022. {
  1023. return(fclose($fp));
  1024. }
  1025. /*======================================================================*\
  1026. Function: _prepare_post_body
  1027. Purpose: Prepare post body according to encoding type
  1028. Input: $formvars - form variables
  1029. $formfiles - form upload files
  1030. Output: post body
  1031. \*======================================================================*/
  1032. function _prepare_post_body($formvars, $formfiles)
  1033. {
  1034. settype($formvars, "array");
  1035. settype($formfiles, "array");
  1036. $postdata = '';
  1037. if (count($formvars) == 0 && count($formfiles) == 0)
  1038. return;
  1039. switch ($this->_submit_type) {
  1040. case "application/x-www-form-urlencoded":
  1041. reset($formvars);
  1042. while(list($key,$val) = each($formvars)) {
  1043. if (is_array($val) || is_object($val)) {
  1044. while (list($cur_key, $cur_val) = each($val)) {
  1045. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  1046. }
  1047. } else
  1048. $postdata .= urlencode($key)."=".urlencode($val)."&";
  1049. }
  1050. break;
  1051. case "multipart/form-data":
  1052. $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
  1053. reset($formvars);
  1054. while(list($key,$val) = each($formvars)) {
  1055. if (is_array($val) || is_object($val)) {
  1056. while (list($cur_key, $cur_val) = each($val)) {
  1057. $postdata .= "--".$this->_mime_boundary."\r\n";
  1058. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
  1059. $postdata .= "$cur_val\r\n";
  1060. }
  1061. } else {
  1062. $postdata .= "--".$this->_mime_boundary."\r\n";
  1063. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
  1064. $postdata .= "$val\r\n";
  1065. }
  1066. }
  1067. reset($formfiles);
  1068. while (list($field_name, $file_names) = each($formfiles)) {
  1069. settype($file_names, "array");
  1070. while (list(, $file_name) = each($file_names)) {
  1071. if (!is_readable($file_name)) continue;
  1072. $fp = fopen($file_name, "r");
  1073. $file_content = fread($fp, filesize($file_name));
  1074. fclose($fp);
  1075. $base_name = basename($file_name);
  1076. $postdata .= "--".$this->_mime_boundary."\r\n";
  1077. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
  1078. $postdata .= "$file_content\r\n";
  1079. }
  1080. }
  1081. $postdata .= "--".$this->_mime_boundary."--\r\n";
  1082. break;
  1083. }
  1084. return $postdata;
  1085. }
  1086. }
  1087. endif;
  1088. ?>