Home >Backend Development >PHP Tutorial >curl抓取数据返回码为200,但是没有内容
set_time_limit(0);// GET测试淘宝详情页抓取$url = "http://item.taobao.com/item.htm?id=37530539791";$url = "http://item.taobao.com/item.htm?id=14861616067";//curl 伪造useragent$useragent = array( 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1', 'Opera/9.27 (Windows NT 5.2; U; zh-cn)', 'Opera/8.0 (Macintosh; PPC Mac OS X; U; en)', 'Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13 ', 'Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Version/3.1 Safari/525.13');header("Content-type: text/html; charset=utf-8");$cookiefile = realpath("./")."/Application/Runtime/Temp/cookie.txt";//创建一个用于存放cookie信息的临时文件,if (!file_exists($cookiefile)){ $file = @file_put_contents($cookiefile, "");}$ch = curl_init();//设置选项,包括URLcurl_setopt($ch, CURLOPT_URL, $url);curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);curl_setopt($ch, CURLOPT_HEADER, 0);curl_setopt($ch, CURLOPT_NOBODY,0);curl_setopt($ch, CURLOPT_MAXREDIRS, 300);curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //获取数据返回流形式curl_setopt($ch, CURLOPT_AUTOREFERER, true); //重定向时,自动设置header中的Referer:信息curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //启用时会将服务器服务器返回的"Location: "放在header中递归的返回给服务器,使用CURLOPT_MAXREDIRS可以限定递归返回的数量// 设置iP和useragent/*curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)');curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:28.58.88.'.$r, 'CLIENT-IP:225.28.58.'.$r)); //构造IP curl_setopt($ch, CURLOPT_REFERER, "http://www.baidu.com"); //构造来路 curl_setopt($ch, CURLOPT_USERAGENT, array_rand($useragent));*/// 设置代理/*curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);curl_setopt($ch, CURLOPT_PROXY, '218.213.168.131:80');*///curl_setopt($ch, CURLOPT_PROXYUSERPWD, 'user:password');// 对于cookie保存curl_setopt($ch, CURLOPT_COOKIESESSION, true);curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiefile);//关闭连接时,将服务器端返回的cookie保存在以下文件中curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiefile);//执行并获取HTML文档内容for ($i=0;$i<=5;$i++){ curl_setopt($ch, CURLOPT_USERAGENT, $useragent[$i]); $output = curl_exec($ch); if (!empty($output)){ break; }}//释放curl句柄$info = curl_getinfo($ch);curl_close($ch);echo "<pre class="brush:php;toolbar:false">";print_r($info);die($output);
$url = "http://item.taobao.com/item.htm?id=14861616067";echo curl_get($url);
function curl_get($durl, $data=array()) { $cookiejar = realpath('cookie.txt'); $t = parse_url($durl); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL,$durl); curl_setopt($ch, CURLOPT_TIMEOUT,5); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0); curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']); curl_setopt($ch, CURLOPT_REFERER, "http://$t[host]/"); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiejar); curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); if($data) { curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); } $r = curl_exec($ch); curl_close($ch); return $r;}
谢谢,是可以用的,我的不可以是因为 来源模拟 错了吗?
你可以在我的代码中逐个注释掉来判断少了什么
估计是 少了 CURLOPT_REFERER
好的,谢谢。