*/ class Collect { /** * 获取内容 * * @param $data * @return bool|mixed|string */ public static function getContent($data, $header = [], $option = []) { if (is_string($data)) $data = ['rule' => $data, 'charset' => 'auto']; if (strpos($data['rule'], '[timestamp]') || strpos($data['rule'], '[时间]')) { $data['rule'] = str_replace(['[timestamp]', '[时间]'], [time() - 64566122, date('Y-m-d H:i:s')], $data['rule']); } elseif (isset($data['usetimestamp']) && $data['usetimestamp'] == 1) { $data['rule'] .= (strpos($data['rule'], '?') ? '&_ptcms=' : '?_ptcms=') . (time() - 13456867); } if (isset($data['method']) && strtolower($data['method']) == 'post') { $content = Http::post($data['rule'], [], $header, $option); } else { $content = Http::get($data['rule'], [], $header, $option); } if ($content) { // 处理编码 if (empty($data['charset']) || !in_array($data['charset'], ['auto', 'utf-8', 'gbk'])) { $data['charset'] = 'auto'; } // 检测编码 if ($data['charset'] == 'auto') { if (preg_match('/[;\s\'"]charset[=\'\s]+?big/i', $content)) { $data['charset'] = 'big5'; } elseif (preg_match('/[;\s\'"]charset[=\'"\s]+?gb/i', $content) || preg_match('/[;\s\'"]encoding[=\'"\s]+?gb/i', $content)) { $data['charset'] = 'gbk'; } elseif (mb_detect_encoding($content) != 'UTF-8') { $data['charset'] = 'gbk'; } } // 转换 switch ($data['charset']) { case 'gbk': $content = mb_convert_encoding($content, 'UTF-8', 'GBK'); break; case 'big5': $content = mb_convert_encoding($content, 'UTF-8', 'big-5'); $content = big5::toutf8($content); break; case 'utf-16': $content = mb_convert_encoding($content, 'UTF-8', 'UTF-16'); default: } //错误标识 if (!empty($data['error']) && strpos($content, $data['error']) !== false) { return ''; } if (!empty($data['replace'])) { $content = self::replace($content, $data['replace']); } return $content; } return ''; } /** * 根据正则批量获取 * * @param mixed $pregArr 正则 * @param string $code 源内容 * @param int $needposition 确定是否需要间距数字 * @return array|bool */ public static function getMatchAll($pregArr, $code, $needposition = 0) { if (is_numeric($pregArr)) { return $pregArr; } elseif (is_string($pregArr)) { $pregArr = ['rule' => self::parseMatchRule($pregArr)]; } elseif (empty($pregArr['rule'])) { return []; } if (!self::isreg($pregArr['rule'])) return []; $pregstr = '{' . $pregArr['rule'] . '}'; $pregstr .= empty($pregArr['option']) ? '' : $pregArr['option']; $matchvar = $match = []; if (!empty($pregstr)) { if ($needposition) { preg_match_all($pregstr, $code, $match, PREG_SET_ORDER + PREG_OFFSET_CAPTURE); } else { preg_match_all($pregstr, $code, $match); } } if (is_array($match)) { if ($needposition) { foreach ($match as $var) { if (is_array($var)) { $matchvar[] = $var[count($var) - 1]; } else { $matchvar[] = $var; } } } else { if (isset($match['2'])) { $count = count($match); foreach ($match['1'] as $k => $v) { if ($v == '') { for ($i = 2; $i < $count; $i++) { if (!empty($match[$i][$k])) { $match['1'][$k] = $match[$i][$k]; break; } } } } } if (isset($match['1'])) { $matchvar = $match['1']; } else { return false; } } if (!empty($pregArr['replace'])) { foreach ($matchvar as $k => $v) { $matchvar[$k] = self::replace($v, $pregArr['replace']); } } return $matchvar; } return []; } /** * 根据正则获取指定数据 单个 * * @param mixed $pregArr 正则 * @param string $code 源内容 * @return bool|string */ public static function getMatch($pregArr, $code) { if (is_numeric($pregArr)) { return $pregArr; } elseif (empty($pregArr) || (isset($pregArr['rule']) && empty($pregArr['rule']))) { return ''; } elseif (is_string($pregArr)) { $pregArr = ['rule' => self::parseMatchRule($pregArr), 'replace' => []]; } if (!self::isreg($pregArr['rule'])) return $pregArr['rule']; $pregstr = '{' . $pregArr['rule'] . '}'; $pregstr .= empty($pregArr['option']) ? '' : $pregArr['option']; preg_match($pregstr, $code, $match); $result = ''; if (strpos($pregstr, '|') && isset($match['2'])) { array_shift($match); foreach ($match as $result) { if ($result) { break; } } } elseif (isset($match['1'])) { $result = $match['1']; } if ($result) { if (empty($pregArr['replace'])) { return $result; } else { return self::replace($result, $pregArr['replace']); } } return ''; } /** * 内容替换 支持正则批量替换 * * @param string $con 代替换的内容 * @param array $arr 替换规则数组 单个元素如下 * array( * 'rule'=>'规则1',//♂后面表示要替换的 内容 * 'option'=>'参数', * 'method'=>1,//1 正则 0普通 * v ), * @return mixed */ public static function replace($con, array $arr) { foreach ($arr as $v) { if (!empty($v['rule'])) { $tmp = explode('♂', $v['rule']); $rule = $tmp['0']; $replace = isset($tmp['1']) ? $tmp['1'] : ''; $v['option'] = isset($v['option']) ? $v['option'] : ''; if ($v['method'] == 1) { //正则 $con = preg_replace("{" . $rule . "}{$v['option']}", $replace, $con); } else { if (strpos($v['option'], 'i') === false) { $con = str_replace($rule, $replace, $con); } else { $con = str_ireplace($rule, $replace, $con); } } } } return $con; } /** * 处理链接,根据当前页面地址得到完整的链接地址 * * @param string $url 当前链接 * @param string $path 当前页面地址 * @return string */ public static function parseUrl($url, $path) { if ($url) { if (strpos($url, '://') === false) { if (substr($url, 0, 1) == '/') { $tmp = parse_url($path); $url = $tmp['scheme'] . '://' . $tmp['host'] . $url; } elseif (substr($url, 0, 3) == '../') { $url = dirname($path) . substr($url, 2); } elseif (substr($path, -1) == '/') { $url = $path . $url; } else { $url = dirname($path) . '/' . $url; } } return $url; } else { return ''; } } /** * 内容切割方式 * * @param string $strings 要切割的内容 * @param string $argl 左侧标识 如果带有.+?则为正则模式 * @param string $argr 右侧标识 如果带有.+?则为正则模式 * @param bool $lt 是否包含左切割字符串 * @param bool $gt 是否包含右切割字符串 * @return string */ public static function cut($strings, $argl, $argr, $lt = false, $gt = false) { if (!$strings) return (""); if (strpos($argl, ".+?")) { $argl = strtr($argl, ["/" => "\/"]); if (preg_match("/" . $argl . "/", $strings, $match)) $argl = $match[0]; } if (strpos($argr, ".+?")) { $argr = strtr($argr, ["/" => "\/"]); if (preg_match("/" . $argr . "/", $strings, $match)) $argr = $match[0]; } $args = explode($argl, $strings); $args = explode($argr, $args[1]); $args = $args[0]; if ($args) { if ($lt) $args = $argl . $args; if ($gt) $args .= $argr; } else { $args = ""; } return ($args); } /** * 简写规则转化 * * @param $rules * @return array|string */ public static function parseMatchRule($rules) { $replace_pairs = [ '{' => '\{', '}' => '\}', '[内容]' => '(.*?)', '[数字]' => '\d*', '[空白]' => '\s*', '[任意]' => '.*?', '[参数]' => '[^\>\<]*?', '[属性]' => '[^\>\<\'"]*?', ]; if (is_array($rules)) { $rules['rule'] = strtr($rules['rule'], $replace_pairs); return $rules; } return strtr($rules, $replace_pairs); } /** * 是否正则 * * @param $str * @return bool */ public static function isreg($str) { return (strpos($str, ')') !== false || strpos($str, '(') !== false); } /** * @param $data * @return array */ public static function parseListData($data) { $list = []; $num = 0; foreach ($data as $v) { if ($v) { if ($num) { if ($num != count($v)) return []; } else { $num = count($v); } } } foreach ($data as $k => $v) { if ($v) { foreach ($v as $kk => $vv) { $list[$kk][$k] = $vv; } } else { for ($i = 0; $i < $num; $i++) { $list[$i][$k] = ''; } } } return $list; } }