Home >php教程 >PHP源码 >curl 抓取yahoo boss 搜索结果

curl 抓取yahoo boss 搜索结果

PHP中文网
PHP中文网Original
2016-05-25 17:13:57984browse


1.编写curl类,进行网页内容抓取 
 
class CurlUtil  
{  
    private $curl;  
    private $timeout = 10;  
  
    /** 
     * 初始化curl对象 
     */  
    public function __construct()  
    {  
        $this->curl = curl_init();  
        curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);  
        curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)");  
        curl_setopt($this->curl, CURLOPT_HEADER, false); //设定是否显示头信息  
        curl_setopt($this->curl, CURLOPT_NOBODY, false); //设定是否输出页面内容  
        curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, $this->timeout);  
        curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);  
        curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);  
    }  
  
    /** 
     * 注销函数 关闭curl对象 
     */  
    public function __destruct()  
    {  
        curl_close($this->curl);  
    }  
  
    /** 
     * 获取网页的内容 
     */  
    public function getWebPageContent($url)  
    {  
        curl_setopt($this->curl, CURLOPT_URL, $url);  
        return curl_exec($this->curl);  
    }  
}  



2.创建curl对象 
 
$CurlUtil = new CurlUtil();    


3.抓取yahoo搜索结果 
 
function  getYahooSearch(CurlUtil $curl, $key)  
{  
    $key = urlencode($key);  
    $searchUrl = "http://boss.yahooapis.com/ysearch/web/v1/$key?appid=你的雅虎appid&lang=tzh&region=hk&abstract=long&count=20&format=json&start=0&count=10";  
    
    $josnStr = $curl->getWebPageContent($searchUrl);  
    $searchDataInfo = json_decode($josnStr, true);  
    $searchData = $searchDataInfo['ysearchresponse']['resultset_web'];  
  
    $returnArray = array();  
    if (!empty($searchData)) {  
        foreach ($searchData as $data) {  
            $returnArray[] = array("url" => $data['url'], "date" => $data['date'], 'title' => strip_tags($data['title']), 'description' => strip_tags($data['abstract']));  
        }  
    }  
    return $returnArray;  
}  


4.测试结果 
 
var_dump(getYahooSearch($CurlUtil, "百度"));

                   

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Previous article:良好的php代码Next article:码农的hello world