Home >Backend Development >PHP Tutorial >A php general collection class based on phpQuery written by myself

A php general collection class based on phpQuery written by myself

WBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOriginal: 2016-07-25 08:50:151251browse

It’s still a side dish. It’s my first time to share code. This is a PHP collection class I wrote before. I have been using it. I feel it is very simple and powerful. As long as you know a little bit about selectors, you can collect any page. It supports https pages, which is enough for simple collection.

/**
*General list collection class
*Version V1.3
*Author: JAE
*Blog: http://blog.jaekj.com
*/
require_once '../phpQuery/phpQuery/phpQuery.php';
class QueryList{
private $pageURL;
private $regArr = array();
public $jsonArr = array();
private $regRange;
private $html;
/**************************************************
* Parameters: Page address selector array block selector
* [Selector array] Description: Format array("name"=>array("selector", "type"),.....)
* [Type 】Description: Values "text", "html", "attribute"
*[Block Selector]: Refers to first selecting several large blocks according to the rules, and then making relevant selections in the blocks respectively
***** ********************************************/
function QueryList($pageURL,$regArr=array(),$regRange='')
{
$this->pageURL = $pageURL;
//为了能获取https://
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$this->pageURL);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
$this->html = curl_exec($ch);
curl_close($ch);
if(!empty($regArr))
{
$this->regArr = $regArr;
$this->regRange = $regRange;
$this->getList();
}
}
function setQuery($regArr,$regRange='')
{
$this->jsonArr=array();
$this->regArr = $regArr;
$this->regRange = $regRange;
$this->getList();
}
private function getList()
{
$hobj = phpQuery::newDocumentHTML($this->html);
if(!empty($this->regRange))
{
$robj = pq($hobj)->find($this->regRange);
$i=0;
foreach($robj as $item)
{
while(list($key,$reg_value)=each($this->regArr))
{
$iobj = pq($item)->find($reg_value[0]);
switch($reg_value[1])
{
case 'text':
$this->jsonArr[$i][$key] = trim(pq($iobj)->text());
break;
case 'html':
$this->jsonArr[$i][$key] = trim(pq($iobj)->html());
break;
default:
$this->jsonArr[$i][$key] = pq($iobj)->attr($reg_value[1]);
break;
}
}
//重置数组指针
reset($this->regArr);
$i++;
}
}
else
{
while(list($key,$reg_value)=each($this->regArr))
{
$lobj = pq($hobj)->find($reg_value[0]);
$i=0;
foreach($lobj as $item)
{
switch($reg_value[1])
{
case 'text':
$this->jsonArr[$i++][$key] = trim(pq($item)->text());
break;
case 'html':
$this->jsonArr[$i++][$key] = trim(pq($item)->html());
break;
default:
$this->jsonArr[$i++][$key] = pq($item)->attr($reg_value[1]);
break;
}
}
}
}
}
function getJSON()
{
return json_encode($this->jsonArr);
}
}

Copy code

require 'Query/QueryList.class.php';
//Collect OSC code sharing list, title link author
$url = "http://www.oschina.net/ code/list";
$reg = array("title"=>array(".code_title a:eq(0)","text"),"url"=>array(".code_title a:eq( 0)","href"),"author"=>array("img","title"));
$rang = ".code_list li";
$hj = new QueryList($url,$reg, $rang);
$arr = $hj->jsonArr;
print_r($arr);
//If you also want to collect the TOP40 active contributor images on the right side of the current page and get the JSON data, you can write like this
$reg = array("portrait"=>array(".hot_top img","src"));
$hj->setQuery($reg);
$json = $hj->getJSON();
echo $ json . "
";
//OSC content page content
$url = "http://www.oschina.net/code/snippet_186288_23816";
$reg = array("title"= >array(".QTitle h1","text"),"con"=>array(".Content","html"));
$hj = new QueryList($url,$reg);
$ arr = $hj->jsonArr;
print_r($arr);
//Just give so many examples, is it very convenient to use for collection

Copy code

/**
*Baidu and Google search API written by myself
*Version V2.0
*Author: JAE
*Blog: http://blog.jaekj.com
**/
require_once 'QueryList_class.php';
class Searcher
{
private $searcher;
private $key;
private $num;
private $page;
private $regArr ;
private $regRange ;
private $regZnum;
public $jsonArr;
//参数搜索引擎搜索关键字返回的结果条数第几页
function Searcher($searcher,$key,$num,$page)
{
if($searcher=='baidu')
{
$this->regArr = array("title"=>array("h3.t a,#ting_singlesong_box a","text"),"tCon"=>array("div.c-abstract,font:slice(0,2),div#weibo,table tr:eq(0),div.c-abstract-size p:eq(0),div.vd_sitcom_new_tinfo","text"),"url"=>array("h3.t a,#ting_singlesong_box a","href"));
$this->regRange = 'table.result,table.result-op';
$this->regZnum=array("zNum"=>array("span.nums","text"));
}
else if($searcher=='google')
{
$this->regArr = array("title"=>array("h3.r a","text"),"tCon"=>array("span.st","text"),"url"=>array("h3.r a","href"));
$this->regRange = 'li.g';
$this->regZnum=array("zNum"=>array("div#resultStats","text"));
}
$this->searcher = $searcher;
$this->key = $key;
$this->num = $num;
$this->page = $page-1;
$this->getList();
}
private function getList()
{
$s = urlencode($this->key);
$num = $this->num;
$start = $this->num*$this->page;
if($this->searcher=='baidu')
{
$url = "http://www.baidu.com/s?pn=$start&rn=$num&wd=$s";
$reg_znum='/[d,]+/';
}
else if($this->searcher=='google')
{
$url="https://www.google.com.hk/search?filter=0&lr=&newwindow=1&safe=images&hl=en&as_qdr=all&num=$num&start=$start&q=$s";
$reg_znum='/([d,]+) result(s)?/';
}
$searcherObj = new QueryList($url,$this->regArr,$this->regRange);
for($i=0;$ijsonArr);$i++)
{
if($this->searcher=='baidu')
{
$searcherObj->jsonArr[$i]['url'] = $this->getBaiduRealURL($searcherObj->jsonArr[$i]['url']);
}
else if($this->searcher=='google')
{
$searcherObj->jsonArr[$i]['url'] = $this->getGoogleRealURL($searcherObj->jsonArr[$i]['url']);
}
}
$this->jsonArr = $searcherObj->jsonArr ;
//获取总共结果条数
$searcherObj->setQuery($this->regZnum);
$zNum = $searcherObj->jsonArr[0]['zNum'];
preg_match($reg_znum,$zNum,$arr)?$zNum=$arr[0]:$zNum=0;
$zNum = (int)str_replace(',','',$zNum);
//计算总页数
$zPage = ceil($zNum/$this->num);
$this->jsonArr=array('num'=>$this->num,'page'=>((int)$this->page+1),'zNum'=>$zNum,'zPage'=>$zPage,"s"=>"$this->key",'other'=>array('author'=>'JAE','QQ'=>'734708094','blog'=>'http://blog.jaekj.com'),'data'=>$this->jsonArr);
}
function getJSON()
{
return json_encode($this->jsonArr);
}
private function getBaiduRealURL($url)
{
//得到百度跳转的真正地址
$header = get_headers($url,1);
if (strpos($header[0],'301') || strpos($header[0],'302'))
{
if(is_array($header['Location']))
{
//return $header['Location'][count($header['Location'])-1];
return $header['Location'][0];
}
else
{
return $header['Location'];
}
}
else
{
return $url;
}
}
private function getGoogleRealURL($url)
{
$reg_url = '/q=(.+)&/U';
return preg_match($reg_url,$url,$arr)?urldecode($arr[1]):$url;
}
}
// $hj = new Searcher('google','oschina',20,2);
// print_r( $hj->jsonArr);
//效果演示地址
//http://blog.jaekj.com//jae/demo/searcher/Searcher_class.php?searcher=baidu&s=jaekj&num=20&page=1

复制代码

Statement：

The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Previous article：The background sends emails every 5 minutes, and the email content is html.Next article：The background sends emails every 5 minutes, and the email content is html.

See more

A php general collection class based on phpQuery written by myself

Related articles