Home  >  Article  >  php教程  >  抓取文章列表

抓取文章列表

PHP中文网
PHP中文网Original
2016-05-23 17:10:001003browse

跳至

<?php
/**
 * 
 * @authors HG (hg0728@qq.com)
 * @date    2015-05-22 17:00:48
 * @version 1.0
 */
header("Content-type:text/html;charset=utf-8");
function getCurl($url) {
		$ch = curl_init();
		curl_setopt($ch, CURLOPT_URL, $url);
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
		curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
		$result = curl_exec($ch);
		curl_close($ch);
		return $result;
}

function preg_list($str){//从curl获得指定内容
	$regex = &#39;/(.*?)/&#39;;
	$isMatched = preg_match_all($regex, $str, $matches);

	for ($i=0; $i < $isMatched; $i++) { 
		$str = $matches[1][$i] .&#39; &#39;. $matches[2][$i];
		echo $matches[1][$i];
		file_put_contents(&#39;blogs.txt&#39;, $str. "\n", FILE_APPEND);

		
	}
}
for ($i=0; $i < 201; $i++) { //翻页抓取
	if($i==0){
		$url = &#39;http://www.cnblogs.com/&#39;;
		$str = getCurl($url);
	}
	else {
		$url = &#39;http://www.cnblogs.com/sitehome/p/&#39;.$i;
		$str = getCurl($url);
	}
	preg_list($str);
}

                   

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn