看到总有人问curl的问题,这两天没事写了个采集论坛的小程序。里面包括了模拟登陆,获取页面源代码,正则匹配结果等部分,希望对大家有用。
- set_time_limit(0);
- //cookie保存目录
- $cookie_jar = '/tmp/cookie.tmp';
- /*函数------------------------------------------------------------------------------------------------------------*/
- //模拟请求数据
- function request($url,$postfields,$cookie_jar,$referer){
- $ch = curl_init();
- $options = array(CURLOPT_URL => $url,
- CURLOPT_HEADER => 0,
- CURLOPT_NOBODY => 0,
- CURLOPT_PORT => 80,
- CURLOPT_POST => 1,
- CURLOPT_POSTFIELDS => $postfields,
- CURLOPT_RETURNTRANSFER => 1,
- CURLOPT_FOLLOWLOCATION => 1,
- CURLOPT_COOKIEJAR => $cookie_jar,
- CURLOPT_COOKIEFILE => $cookie_jar,
- CURLOPT_REFERER => $referer
- );
- curl_setopt_array($ch, $options);
- $code = curl_exec($ch);
- curl_close($ch);
- return $code;
- }
-
- //获取帖子列表
- function getThreadsList($code){
- preg_match_all('/
|