You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

353 line
11 KiB

  1. <?php
  2. namespace Kuxin\Helper;
  3. /**
  4. * Class Collect
  5. *
  6. * @package Kuxin\Helper
  7. * @author Pakey <pakey@qq.com>
  8. */
  9. class Collect
  10. {
  11. /**
  12. * 获取内容
  13. *
  14. * @param $data
  15. * @return bool|mixed|string
  16. */
  17. public static function getContent($data, $header = [], $option = [])
  18. {
  19. if (is_string($data))
  20. $data = ['rule' => $data, 'charset' => 'auto'];
  21. if (strpos($data['rule'], '[timestamp]') || strpos($data['rule'], '[时间]')) {
  22. $data['rule'] = str_replace(['[timestamp]', '[时间]'], [time() - 64566122, date('Y-m-d H:i:s')], $data['rule']);
  23. } elseif (isset($data['usetimestamp']) && $data['usetimestamp'] == 1) {
  24. $data['rule'] .= (strpos($data['rule'], '?') ? '&_ptcms=' : '?_ptcms=') . (time() - 13456867);
  25. }
  26. if (isset($data['method']) && strtolower($data['method']) == 'post') {
  27. $content = Http::post($data['rule'], [], $header, $option);
  28. } else {
  29. $content = Http::get($data['rule'], [], $header, $option);
  30. }
  31. if ($content) {
  32. // 处理编码
  33. if (empty($data['charset']) || !in_array($data['charset'], ['auto', 'utf-8', 'gbk'])) {
  34. $data['charset'] = 'auto';
  35. }
  36. // 检测编码
  37. if ($data['charset'] == 'auto') {
  38. if (preg_match('/[;\s\'"]charset[=\'\s]+?big/i', $content)) {
  39. $data['charset'] = 'big5';
  40. } elseif (preg_match('/[;\s\'"]charset[=\'"\s]+?gb/i', $content) || preg_match('/[;\s\'"]encoding[=\'"\s]+?gb/i', $content)) {
  41. $data['charset'] = 'gbk';
  42. } elseif (mb_detect_encoding($content) != 'UTF-8') {
  43. $data['charset'] = 'gbk';
  44. }
  45. }
  46. // 转换
  47. switch ($data['charset']) {
  48. case 'gbk':
  49. $content = mb_convert_encoding($content, 'UTF-8', 'GBK');
  50. break;
  51. case 'big5':
  52. $content = mb_convert_encoding($content, 'UTF-8', 'big-5');
  53. $content = big5::toutf8($content);
  54. break;
  55. case 'utf-16':
  56. $content = mb_convert_encoding($content, 'UTF-8', 'UTF-16');
  57. default:
  58. }
  59. //错误标识
  60. if (!empty($data['error']) && strpos($content, $data['error']) !== false) {
  61. return '';
  62. }
  63. if (!empty($data['replace'])) {
  64. $content = self::replace($content, $data['replace']);
  65. }
  66. return $content;
  67. }
  68. return '';
  69. }
  70. /**
  71. * 根据正则批量获取
  72. *
  73. * @param mixed $pregArr 正则
  74. * @param string $code 源内容
  75. * @param int $needposition 确定是否需要间距数字
  76. * @return array|bool
  77. */
  78. public static function getMatchAll($pregArr, $code, $needposition = 0)
  79. {
  80. if (is_numeric($pregArr)) {
  81. return $pregArr;
  82. } elseif (is_string($pregArr)) {
  83. $pregArr = ['rule' => self::parseMatchRule($pregArr)];
  84. } elseif (empty($pregArr['rule'])) {
  85. return [];
  86. }
  87. if (!self::isreg($pregArr['rule']))
  88. return [];
  89. $pregstr = '{' . $pregArr['rule'] . '}';
  90. $pregstr .= empty($pregArr['option']) ? '' : $pregArr['option'];
  91. $matchvar = $match = [];
  92. if (!empty($pregstr)) {
  93. if ($needposition) {
  94. preg_match_all($pregstr, $code, $match, PREG_SET_ORDER + PREG_OFFSET_CAPTURE);
  95. } else {
  96. preg_match_all($pregstr, $code, $match);
  97. }
  98. }
  99. if (is_array($match)) {
  100. if ($needposition) {
  101. foreach ($match as $var) {
  102. if (is_array($var)) {
  103. $matchvar[] = $var[count($var) - 1];
  104. } else {
  105. $matchvar[] = $var;
  106. }
  107. }
  108. } else {
  109. if (isset($match['2'])) {
  110. $count = count($match);
  111. foreach ($match['1'] as $k => $v) {
  112. if ($v == '') {
  113. for ($i = 2; $i < $count; $i++) {
  114. if (!empty($match[$i][$k])) {
  115. $match['1'][$k] = $match[$i][$k];
  116. break;
  117. }
  118. }
  119. }
  120. }
  121. }
  122. if (isset($match['1'])) {
  123. $matchvar = $match['1'];
  124. } else {
  125. return false;
  126. }
  127. }
  128. if (!empty($pregArr['replace'])) {
  129. foreach ($matchvar as $k => $v) {
  130. $matchvar[$k] = self::replace($v, $pregArr['replace']);
  131. }
  132. }
  133. return $matchvar;
  134. }
  135. return [];
  136. }
  137. /**
  138. * 根据正则获取指定数据 单个
  139. *
  140. * @param mixed $pregArr 正则
  141. * @param string $code 源内容
  142. * @return bool|string
  143. */
  144. public static function getMatch($pregArr, $code)
  145. {
  146. if (is_numeric($pregArr)) {
  147. return $pregArr;
  148. } elseif (empty($pregArr) || (isset($pregArr['rule']) && empty($pregArr['rule']))) {
  149. return '';
  150. } elseif (is_string($pregArr)) {
  151. $pregArr = ['rule' => self::parseMatchRule($pregArr), 'replace' => []];
  152. }
  153. if (!self::isreg($pregArr['rule']))
  154. return $pregArr['rule'];
  155. $pregstr = '{' . $pregArr['rule'] . '}';
  156. $pregstr .= empty($pregArr['option']) ? '' : $pregArr['option'];
  157. preg_match($pregstr, $code, $match);
  158. $result = '';
  159. if (strpos($pregstr, '|') && isset($match['2'])) {
  160. array_shift($match);
  161. foreach ($match as $result) {
  162. if ($result) {
  163. break;
  164. }
  165. }
  166. } elseif (isset($match['1'])) {
  167. $result = $match['1'];
  168. }
  169. if ($result) {
  170. if (empty($pregArr['replace'])) {
  171. return $result;
  172. } else {
  173. return self::replace($result, $pregArr['replace']);
  174. }
  175. }
  176. return '';
  177. }
  178. /**
  179. * 内容替换 支持正则批量替换
  180. *
  181. * @param string $con 代替换的内容
  182. * @param array $arr 替换规则数组 单个元素如下
  183. * array(
  184. * 'rule'=>'规则1',//♂后面表示要替换的 内容
  185. * 'option'=>'参数',
  186. * 'method'=>1,//1 正则 0普通
  187. * v ),
  188. * @return mixed
  189. */
  190. public static function replace($con, array $arr)
  191. {
  192. foreach ($arr as $v) {
  193. if (!empty($v['rule'])) {
  194. $tmp = explode('♂', $v['rule']);
  195. $rule = $tmp['0'];
  196. $replace = isset($tmp['1']) ? $tmp['1'] : '';
  197. $v['option'] = isset($v['option']) ? $v['option'] : '';
  198. if ($v['method'] == 1) { //正则
  199. $con = preg_replace("{" . $rule . "}{$v['option']}", $replace, $con);
  200. } else {
  201. if (strpos($v['option'], 'i') === false) {
  202. $con = str_replace($rule, $replace, $con);
  203. } else {
  204. $con = str_ireplace($rule, $replace, $con);
  205. }
  206. }
  207. }
  208. }
  209. return $con;
  210. }
  211. /**
  212. * 处理链接,根据当前页面地址得到完整的链接地址
  213. *
  214. * @param string $url 当前链接
  215. * @param string $path 当前页面地址
  216. * @return string
  217. */
  218. public static function parseUrl($url, $path)
  219. {
  220. if ($url) {
  221. if (strpos($url, '://') === false) {
  222. if (substr($url, 0, 1) == '/') {
  223. $tmp = parse_url($path);
  224. $url = $tmp['scheme'] . '://' . $tmp['host'] . $url;
  225. } elseif (substr($url, 0, 3) == '../') {
  226. $url = dirname($path) . substr($url, 2);
  227. } elseif (substr($path, -1) == '/') {
  228. $url = $path . $url;
  229. } else {
  230. $url = dirname($path) . '/' . $url;
  231. }
  232. }
  233. return $url;
  234. } else {
  235. return '';
  236. }
  237. }
  238. /**
  239. * 内容切割方式
  240. *
  241. * @param string $strings 要切割的内容
  242. * @param string $argl 左侧标识 如果带有.+?则为正则模式
  243. * @param string $argr 右侧标识 如果带有.+?则为正则模式
  244. * @param bool $lt 是否包含左切割字符串
  245. * @param bool $gt 是否包含右切割字符串
  246. * @return string
  247. */
  248. public static function cut($strings, $argl, $argr, $lt = false, $gt = false)
  249. {
  250. if (!$strings)
  251. return ("");
  252. if (strpos($argl, ".+?")) {
  253. $argl = strtr($argl, ["/" => "\/"]);
  254. if (preg_match("/" . $argl . "/", $strings, $match))
  255. $argl = $match[0];
  256. }
  257. if (strpos($argr, ".+?")) {
  258. $argr = strtr($argr, ["/" => "\/"]);
  259. if (preg_match("/" . $argr . "/", $strings, $match))
  260. $argr = $match[0];
  261. }
  262. $args = explode($argl, $strings);
  263. $args = explode($argr, $args[1]);
  264. $args = $args[0];
  265. if ($args) {
  266. if ($lt)
  267. $args = $argl . $args;
  268. if ($gt)
  269. $args .= $argr;
  270. } else {
  271. $args = "";
  272. }
  273. return ($args);
  274. }
  275. /**
  276. * 简写规则转化
  277. *
  278. * @param $rules
  279. * @return array|string
  280. */
  281. public static function parseMatchRule($rules)
  282. {
  283. $replace_pairs = [
  284. '{' => '\{',
  285. '}' => '\}',
  286. '[内容]' => '(.*?)',
  287. '[数字]' => '\d*',
  288. '[空白]' => '\s*',
  289. '[任意]' => '.*?',
  290. '[参数]' => '[^\>\<]*?',
  291. '[属性]' => '[^\>\<\'"]*?',
  292. ];
  293. if (is_array($rules)) {
  294. $rules['rule'] = strtr($rules['rule'], $replace_pairs);
  295. return $rules;
  296. }
  297. return strtr($rules, $replace_pairs);
  298. }
  299. /**
  300. * 是否正则
  301. *
  302. * @param $str
  303. * @return bool
  304. */
  305. public static function isreg($str)
  306. {
  307. return (strpos($str, ')') !== false || strpos($str, '(') !== false);
  308. }
  309. /**
  310. * @param $data
  311. * @return array
  312. */
  313. public static function parseListData($data)
  314. {
  315. $list = [];
  316. $num = 0;
  317. foreach ($data as $v) {
  318. if ($v) {
  319. if ($num) {
  320. if ($num != count($v))
  321. return [];
  322. } else {
  323. $num = count($v);
  324. }
  325. }
  326. }
  327. foreach ($data as $k => $v) {
  328. if ($v) {
  329. foreach ($v as $kk => $vv) {
  330. $list[$kk][$k] = $vv;
  331. }
  332. } else {
  333. for ($i = 0; $i < $num; $i++) {
  334. $list[$i][$k] = '';
  335. }
  336. }
  337. }
  338. return $list;
  339. }
  340. }