在php中我们如果要取得数据、模拟登陆、POST数据等功能第一个想到的肯定是curl函数了,这个函数方便实用并且还可以多线程了下面整理了一个例子,有兴趣的朋友可参考.
例子,使用php curl获取网页数据的方法,代码如下:
<?php $ch = curl_init(); //设置选项,包括URL curl_setopt($ch, CURLOPT_URL, "http://www.phprm.com"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 0); //执行并获取HTML文档内容 $output = curl_exec($ch); //释放curl句柄 curl_close($ch); 使用phpcurlpost提交数据的方法, 代码如下: $url = "http://www.phprm.com/curl_post.php"; $post_data = array( "nameuser" => "syxrrrr", "pw" => "123456" ); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data); $output = curl_exec($ch); curl_close($ch); echo $output; 取得数据、模拟登陆、POST数据, 代码如下: /********************** curl 系列 ***********************/ //直接通过curl方式取得数据(包含POST、HEADER等) /* * $url: 如果非数组,则为http;如是数组,则为https * $header: 头文件 * $post: post方式提交 array形式 * $cookies: 0默认无cookie,1为设置,2为获取 */ public function curl_allinfo($urls, $header = FALSE, $post = FALSE, $cookies = 0) { $url = is_array($urls) ? $urls['0'] : $urls; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //带header方式提交 if ($header != FALSE) { curl_setopt($ch, CURLOPT_HTTPHEADER, $header); } //post提交方式 if ($post != FALSE) { curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $post); } if ($cookies == 1) { curl_setopt($ch, CURLOPT_COOKIEJAR, "cookiefile"); } else if ($cookies == 2) { curl_setopt($ch, CURLOPT_COOKIEFILE, "cookiefile"); } if (is_array($urls)) { curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); } $data = curl_exec($ch); curl_close($ch); return $data; } 最后附一个模仿搜索引擎蜘蛛来抓取网页, 代码如下: function get_web_page($url) { $options = array( CURLOPT_RETURNTRANSFER => true, // return web page 返回网页 CURLOPT_HEADER => false, // 不返回头信息 CURLOPT_FOLLOWLOCATION => true, // follow redirects CURLOPT_ENCODING => "", // handle all encodings CURLOPT_USERAGENT => "spider", // 设置UserAgent CURLOPT_AUTOREFERER => true, // set referer on redirect CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect 连接超时 CURLOPT_TIMEOUT => 120, // timeout on response 回复超时 CURLOPT_MAXREDIRS => 10, // stop after 10 redirects ); $ch = curl_init($url); curl_setopt_array($ch, $options); $content = curl_exec($ch); $err = curl_errno($ch); $errmsg = curl_error($ch); $header = curl_getinfo($ch); curl_close($ch); $header[''errno''] = $err; $header[''errmsg''] = $errmsg; $header[''content''] = $content; return $header; }
本文地址:
转载随意,但请附上文章地址:-)