代码语言:javascript复制
/**
* Describe:Curl post函数
* @param $url
* @param array $data
* @param bool $isPost
* @param int $coding
* @param bool $isForge
* @return bool|string
* Created by zhangzq at 2021/4/26 17:22
*/
function curlPost($url, $data = array(), $isPost = true, $coding = 0, $isForge = true)
{
$ip = mt_rand(11, 191) . "." . mt_rand(0, 240) . "." . mt_rand(1, 240) . "." . mt_rand(1, 240); //随机ip
$agentarry = [
"Mozilla/5.0 (compatible; Baiduspider/2.0; http://www.baidu.com/search/spider.html)",
"Mozilla/5.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
"Mozilla/5.0 (compatible; JikeSpider; http://shoulu.jike.com/spider.html)",
"Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
"Sogou web spider/4.0( http://www.sogou.com/docs/help/webmasters.htm#07)",
"Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/;)",
"Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)",
//PC端的UserAgent
"safari 5.1 – MAC" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"safari 5.1 – Windows" => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Firefox 38esr" => "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
"IE 11" => "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
"IE 9.0" => "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
"IE 8.0" => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
"IE 7.0" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
"IE 6.0" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
"Firefox 4.0.1 – MAC" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Firefox 4.0.1 – Windows" => "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Opera 11.11 – MAC" => "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
"Opera 11.11 – Windows" => "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
"Chrome 17.0 – MAC" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"傲游(Maxthon)" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
"腾讯TT" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
"世界之窗(The World) 2.x" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
"世界之窗(The World) 3.x" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
"360浏览器" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
"搜狗浏览器 1.x" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
"Avant" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
"Green Browser" => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
//移动端口
// "safari iOS 4.33 – iPhone" => "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
// "safari iOS 4.33 – iPod Touch" => "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
// "safari iOS 4.33 – iPad" => "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
// "Android N1" => "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
// "Android QQ浏览器 For android" => "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
// "Android Opera Mobile" => "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
// "Android Pad Moto Xoom" => "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
// "BlackBerry" => "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1 (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1 ",
// "WebOS HP Touchpad" => "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
// "UC标准" => "NOKIA5700/ UCWEB7.0.2.37/28/999",
// "UCOpenwave" => "Openwave/ UCWEB7.0.2.37/28/999",
// "UC Opera" => "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
// "微信内置浏览器" => "Mozilla/5.0 (Linux; Android 6.0; 1503-M02 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.2 TBS/036558 Safari/537.36 MicroMessenger/6.3.25.861 NetType/WIFI Language/zh_CN",
// ""=>"",
];
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
//追踪返回302状态码,继续抓取
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_NOBODY, false);
curl_setopt($ch, CURLOPT_REFERER, 'https://www.ixigua.com/');//模拟来路
if ($isPost) {
curl_setopt($ch, CURLOPT_POST, 1);// POST数据
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data)); // POST参数
}
if ($isForge) {
$useragent = $agentarry[array_rand($agentarry, 1)]; //随机浏览器useragent
$header = array(
'CLIENT-IP:' . $ip,
'X-FORWARDED-FOR:' . $ip,
); //构造ip
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_USERAGENT, $useragent); //模拟常用浏览器的useragent
}
$output = curl_exec($ch);// 执行并获取url地址的内容
$errorCode = curl_errno($ch);
curl_close($ch);// 释放curl句柄
if (0 !== $errorCode) {
//echo 'cURL Error:'.curl_error($ch);
return false;
}
if ($coding == 1) {
$output = mb_convert_encoding($output, "UTF-8", "GBK");
} elseif ($coding == 2) {
$output = mb_convert_encoding($output, "GBK", "UTF-8");
}
return $output;
}
复制代码
http.php
代码语言:javascript复制<?php
namespace appluckycommon;
/**
* Class Http
*/
class Http
{
private static $info = [];
/**
* @param $url
* @param array $headers
* @param array $options
* @return Requests_Response
*/
public static function get($url, array $headers = [], array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $options]);
}
/**
* @param $url
* @param array $headers
* @param array $options
* @return Requests_Response
*/
public static function head($url, array $headers = [], array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $options]);
}
/**
* @param $url
* @param array $headers
* @param array $options
* @return Requests_Response
*/
public static function delete($url, array $headers = [], array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $options]);
}
/**
* @param $url
* @param array $headers
* @param array $options
* @return Requests_Response
*/
public static function trace($url, array $headers = [], array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $options]);
}
/**
* @param $url
* @param array $headers
* @param array $data
* @param array $options
* @return Requests_Response
*/
public static function post($url, array $headers = [], $data = [], array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $data, $options]);
}
/**
* @param $url
* @param array $headers
* @param array $data
* @param array $options
* @return Requests_Response
*/
public static function put($url, array $headers = [], array $data = [], array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $data, $options]);
}
/**
* @param $url
* @param array $headers
* @param array $data
* @param array $options
* @return Requests_Response
*/
public static function options($url, array $headers = [], array $data = [], array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $data, $options]);
}
/**
* @param $url
* @param $headers
* @param array $data
* @param array $options
* @return Requests_Response
*/
public static function patch($url, array $headers, array $data = [], array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $data, $options]);
}
/**
* @param string $url
* @param array $headers
* @param array $data
* @param string $type
* @param array $options
* @return Requests_Response
*/
public static function request($url, array $headers = [], array $data = [], $type = Requests::GET, array $options = [])
{
$options = self::setOptions($options);
return self::call(__FUNCTION__, [$url, $headers, $data, $type, $options]);
}
/**
* @param array $requests
* @param array $options
* @return Requests_Response
*/
public static function request_multiple(array $requests, array $options)
{
return self::call(__FUNCTION__, [$requests, $options]);
}
private static function getHooks()
{
$hooks = new Requests_Hooks();
$hooks->register('curl.after_request', function ($headers, $info) {
self::$info = !is_array($info) ? [$info] : $info;
});
return $hooks;
}
private static function setOptions($options)
{
if (!isset($options['hooks'])) {
$options['hooks'] = self::getHooks();
}
$options['idn'] = false;
return $options;
}
private static function call($name, $args)
{
try {
$type = $name;
if ('request' === $name) {
$type = isset($args[3]) ? $args[3] : $name;
}
$type = strtolower($type);
//设置url
if ($type !== 'request_multiple') {
$args[0] = new RequestsIRI($args[0]);
}
$response = call_user_func_array(['Requests', $name], $args);
if ($response instanceof Requests_Response && is_object($response->url)) {
$response->url = $response->url->uri;
}
return $response;
} catch (Exception $e) {
Log::error($e->getMessage());
$response = new Requests_Response();
$response->body = '';
return $response;
}
}
}
复制代码
代码语言:javascript复制 //业务代码
// www.cwl.gov.cn 请求头
private $cwlHeader = [
'X-Requested-With'=> 'XMLHttpRequest',
"Referer" => "http://www.cwl.gov.cn/kjxx/"
];
protected function doGet($url, $urlParams = [], $headers = [], $options = [])
{
$response = appluckycommonHttp::request($url, $headers, $urlParams, 'GET', $options);
return $response->body;
}
/**
* @param $num
* @return array
* @description 按期号爬(福彩3D)
*/
public function fcsdCrawlerByNum($num){
$url = "http://www.cwl.gov.cn/cwl_admin/kjxx/findKjxx/forIssue?name=3d&code=".$num;
$data = $this->doGet($url,[],$this->cwlHeader);
$data = json_decode($data,true);
if($data["state"] != _MSG_SUCCESS){
Redis::getInstance()->redisHSetTrue(_REDIS_CRAWLER_STATUS,'fcsdCrawlerByNum');
Log::warning("现在已经爬不到 福彩3D 开奖结果了(中国福利彩票发行管理中心官方网站 www.cwl.gov.cn) fcsdCrawlerByNum()", []);
return ["code"=>_MSG_SYSTEM_ERROR, "msg"=>$data["message"]];
}
Redis::getInstance()->redisHSetFalse(_REDIS_CRAWLER_STATUS,'fcsdCrawlerByNum');
return $this->fcsdDataFormat($data["result"]);
}
复制代码
另外一种的业务逻辑PHP代码块
代码语言:javascript复制 /**
* @param $lotteryId
* @return array
* @description 地方彩往期爬取
*/
public function dfCrawlerM500List($lotteryId)
{
$id500 = $this->fu51ToWb[$lotteryId];
$url = "https://m.500.com/info/kaijiang/moreexpect/" . $id500 . "/?from=";
$result = $this->doGet($url, [], $this->m500Header);
$encode = mb_detect_encoding($result, array("ASCII", 'UTF-8', "GB2312", "GBK", 'BIG5'));
$result = mb_convert_encoding($result, 'UTF-8', $encode);
$pattern = '/<div.*?>.*?([0-9]{5,10}).*?<em>([0-9-]{10}).*?</em>s*</div>s*<div.*?>s*<ul.*?>(.*?)</ul>s*</div>/ism'; //
if (preg_match_all($pattern, $result, $matches)) {
unset($matches[0]);
$data = [];
foreach ($matches[1] as $key => $value) {
if (preg_match_all('/<li.*?>(.*?)</li>/ism', $matches[3][$key], $res)) {
unset($res[0]);
$res = $res[1];
} else {
continue;
}
$data[] = [
"lottery_id" => $lotteryId,
"lottery_name" => $this->dfName[$lotteryId],
"lottery_no" => $matches[1][$key],
"lottery_date" => $matches[2][$key],
"lottery_res" => $res,
];
}
unset($matches);
unset($result);
} else {
Redis::getInstance()->redisHSetTrue(_REDIS_CRAWLER_STATUS, 'dfCrawlerM500List');
Log::warning("现在已经爬不到 地方彩 历史开奖结果了(500彩票网 m.500.com) dfCrawlerM500List()", []);
return ["code" => _MSG_SYSTEM_ERROR, "msg" => "查询失败", "data" => []];
}
Redis::getInstance()->redisHSetFalse(_REDIS_CRAWLER_STATUS, 'dfCrawlerM500List');
return ["code" => _MSG_SYSTEM_SUCCESS, "msg" => "查询成功", "data" => $data];
}
复制代码
代码语言:javascript复制$pattern = '/<div.*?>.*?([0-9]{5,10}).*?<em>([0-9-]{10}).*?</em>s*</div>s*<div.*?>s*<ul.*?>(.*?)</ul>s*</div>/ism';
//对于的html代码块
/*
<div class="kaij-tit ">2020009期<em>2020-01-20 星期一</em> </div>
<div class="kaij-jg ">
<ul>
<li class="red-ball" >3</li>
<li class="red-ball" >2</li>
<li class="red-ball" >2</li>
<li class="red-ball" >5</li>
<li class="red-ball" >8</li>
</ul>
</div>
*/
复制代
POST方法处理
代码语言:javascript复制 /*
* 球队球员转入
*/
public function getPlayerShiftTo($teamId, $page_no, $page_count)
{
try {
$uri = 'sports/team/player_shiftTo';
$data = Http::post($this->url . $uri, [], ['team_id' => $teamId, 'page_no' => $page_no, 'page_count' => $page_count]);
$data = json_decode($data->body, true);
if (empty($data)) {
throw new Exception('服务接口出现错误 -1');
}
return $data;
} catch (Exception $e) {
Log::error($e->getMessage());
return ['code' => -1, 'msg' => $e->getMessage()];
}
}
复制代码