Home >Backend Development >PHP Tutorial >A php general collection class based on phpQuery written by myself

A php general collection class based on phpQuery written by myself

WBOY
WBOYOriginal
2016-07-25 08:50:151191browse
It’s still a side dish. It’s my first time to share code. This is a PHP collection class I wrote before. I have been using it. I feel it is very simple and powerful. As long as you know a little bit about selectors, you can collect any page. It supports https pages, which is enough for simple collection.
  1. /**
  2. *General list collection class
  3. *Version V1.3
  4. *Author: JAE
  5. *Blog: http://blog.jaekj.com
  6. */
  7. require_once '../phpQuery/phpQuery/phpQuery.php';
  8. class QueryList{
  9. private $pageURL;
  10. private $regArr = array();
  11. public $jsonArr = array();
  12. private $regRange;
  13. private $html;
  14. /**************************************************
  15. * Parameters: Page address selector array block selector
  16. * [Selector array] Description: Format array("name"=>array("selector", "type"),.....)
  17. * [Type 】Description: Values ​​"text", "html", "attribute"
  18. *[Block Selector]: Refers to first selecting several large blocks according to the rules, and then making relevant selections in the blocks respectively
  19. ***** ********************************************/
  20. function QueryList($pageURL,$regArr=array(),$regRange='')
  21. {
  22. $this->pageURL = $pageURL;
  23. //为了能获取https://
  24. $ch = curl_init();
  25. curl_setopt($ch, CURLOPT_URL,$this->pageURL);
  26. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  27. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
  28. curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
  29. $this->html = curl_exec($ch);
  30. curl_close($ch);
  31. if(!empty($regArr))
  32. {
  33. $this->regArr = $regArr;
  34. $this->regRange = $regRange;
  35. $this->getList();
  36. }
  37. }
  38. function setQuery($regArr,$regRange='')
  39. {
  40. $this->jsonArr=array();
  41. $this->regArr = $regArr;
  42. $this->regRange = $regRange;
  43. $this->getList();
  44. }
  45. private function getList()
  46. {
  47. $hobj = phpQuery::newDocumentHTML($this->html);
  48. if(!empty($this->regRange))
  49. {
  50. $robj = pq($hobj)->find($this->regRange);
  51. $i=0;
  52. foreach($robj as $item)
  53. {
  54. while(list($key,$reg_value)=each($this->regArr))
  55. {
  56. $iobj = pq($item)->find($reg_value[0]);
  57. switch($reg_value[1])
  58. {
  59. case 'text':
  60. $this->jsonArr[$i][$key] = trim(pq($iobj)->text());
  61. break;
  62. case 'html':
  63. $this->jsonArr[$i][$key] = trim(pq($iobj)->html());
  64. break;
  65. default:
  66. $this->jsonArr[$i][$key] = pq($iobj)->attr($reg_value[1]);
  67. break;
  68. }
  69. }
  70. //重置数组指针
  71. reset($this->regArr);
  72. $i++;
  73. }
  74. }
  75. else
  76. {
  77. while(list($key,$reg_value)=each($this->regArr))
  78. {
  79. $lobj = pq($hobj)->find($reg_value[0]);
  80. $i=0;
  81. foreach($lobj as $item)
  82. {
  83. switch($reg_value[1])
  84. {
  85. case 'text':
  86. $this->jsonArr[$i++][$key] = trim(pq($item)->text());
  87. break;
  88. case 'html':
  89. $this->jsonArr[$i++][$key] = trim(pq($item)->html());
  90. break;
  91. default:
  92. $this->jsonArr[$i++][$key] = pq($item)->attr($reg_value[1]);
  93. break;
  94. }
  95. }
  96. }
  97. }
  98. }
  99. function getJSON()
  100. {
  101. return json_encode($this->jsonArr);
  102. }
  103. }
Copy code
  1. require 'Query/QueryList.class.php';
  2. //Collect OSC code sharing list, title link author
  3. $url = "http://www.oschina.net/ code/list";
  4. $reg = array("title"=>array(".code_title a:eq(0)","text"),"url"=>array(".code_title a:eq( 0)","href"),"author"=>array("img","title"));
  5. $rang = ".code_list li";
  6. $hj = new QueryList($url,$reg, $rang);
  7. $arr = $hj->jsonArr;
  8. print_r($arr);
  9. //If you also want to collect the TOP40 active contributor images on the right side of the current page and get the JSON data, you can write like this
  10. $reg = array("portrait"=>array(".hot_top img","src"));
  11. $hj->setQuery($reg);
  12. $json = $hj->getJSON();
  13. echo $ json . "
    ";
  14. //OSC content page content
  15. $url = "http://www.oschina.net/code/snippet_186288_23816";
  16. $reg = array("title"= >array(".QTitle h1","text"),"con"=>array(".Content","html"));
  17. $hj = new QueryList($url,$reg);
  18. $ arr = $hj->jsonArr;
  19. print_r($arr);
  20. //Just give so many examples, is it very convenient to use for collection
Copy code
  1. /**
  2. *Baidu and Google search API written by myself
  3. *Version V2.0
  4. *Author: JAE
  5. *Blog: http://blog.jaekj.com
  6. **/
  7. require_once 'QueryList_class.php';
  8. class Searcher
  9. {
  10. private $searcher;
  11. private $key;
  12. private $num;
  13. private $page;
  14. private $regArr ;
  15. private $regRange ;
  16. private $regZnum;
  17. public $jsonArr;
  18. //参数 搜索引擎 搜索关键字 返回的结果条数 第几页
  19. function Searcher($searcher,$key,$num,$page)
  20. {
  21. if($searcher=='baidu')
  22. {
  23. $this->regArr = array("title"=>array("h3.t a,#ting_singlesong_box a","text"),"tCon"=>array("div.c-abstract,font:slice(0,2),div#weibo,table tr:eq(0),div.c-abstract-size p:eq(0),div.vd_sitcom_new_tinfo","text"),"url"=>array("h3.t a,#ting_singlesong_box a","href"));
  24. $this->regRange = 'table.result,table.result-op';
  25. $this->regZnum=array("zNum"=>array("span.nums","text"));
  26. }
  27. else if($searcher=='google')
  28. {
  29. $this->regArr = array("title"=>array("h3.r a","text"),"tCon"=>array("span.st","text"),"url"=>array("h3.r a","href"));
  30. $this->regRange = 'li.g';
  31. $this->regZnum=array("zNum"=>array("div#resultStats","text"));
  32. }
  33. $this->searcher = $searcher;
  34. $this->key = $key;
  35. $this->num = $num;
  36. $this->page = $page-1;
  37. $this->getList();
  38. }
  39. private function getList()
  40. {
  41. $s = urlencode($this->key);
  42. $num = $this->num;
  43. $start = $this->num*$this->page;
  44. if($this->searcher=='baidu')
  45. {
  46. $url = "http://www.baidu.com/s?pn=$start&rn=$num&wd=$s";
  47. $reg_znum='/[d,]+/';
  48. }
  49. else if($this->searcher=='google')
  50. {
  51. $url="https://www.google.com.hk/search?filter=0&lr=&newwindow=1&safe=images&hl=en&as_qdr=all&num=$num&start=$start&q=$s";
  52. $reg_znum='/([d,]+) result(s)?/';
  53. }
  54. $searcherObj = new QueryList($url,$this->regArr,$this->regRange);
  55. for($i=0;$ijsonArr);$i++)
  56. {
  57. if($this->searcher=='baidu')
  58. {
  59. $searcherObj->jsonArr[$i]['url'] = $this->getBaiduRealURL($searcherObj->jsonArr[$i]['url']);
  60. }
  61. else if($this->searcher=='google')
  62. {
  63. $searcherObj->jsonArr[$i]['url'] = $this->getGoogleRealURL($searcherObj->jsonArr[$i]['url']);
  64. }
  65. }
  66. $this->jsonArr = $searcherObj->jsonArr ;
  67. //获取总共结果条数
  68. $searcherObj->setQuery($this->regZnum);
  69. $zNum = $searcherObj->jsonArr[0]['zNum'];
  70. preg_match($reg_znum,$zNum,$arr)?$zNum=$arr[0]:$zNum=0;
  71. $zNum = (int)str_replace(',','',$zNum);
  72. //计算总页数
  73. $zPage = ceil($zNum/$this->num);
  74. $this->jsonArr=array('num'=>$this->num,'page'=>((int)$this->page+1),'zNum'=>$zNum,'zPage'=>$zPage,"s"=>"$this->key",'other'=>array('author'=>'JAE','QQ'=>'734708094','blog'=>'http://blog.jaekj.com'),'data'=>$this->jsonArr);
  75. }
  76. function getJSON()
  77. {
  78. return json_encode($this->jsonArr);
  79. }
  80. private function getBaiduRealURL($url)
  81. {
  82. //得到百度跳转的真正地址
  83. $header = get_headers($url,1);
  84. if (strpos($header[0],'301') || strpos($header[0],'302'))
  85. {
  86. if(is_array($header['Location']))
  87. {
  88. //return $header['Location'][count($header['Location'])-1];
  89. return $header['Location'][0];
  90. }
  91. else
  92. {
  93. return $header['Location'];
  94. }
  95. }
  96. else
  97. {
  98. return $url;
  99. }
  100. }
  101. private function getGoogleRealURL($url)
  102. {
  103. $reg_url = '/q=(.+)&/U';
  104. return preg_match($reg_url,$url,$arr)?urldecode($arr[1]):$url;
  105. }
  106. }
  107. // $hj = new Searcher('google','oschina',20,2);
  108. // print_r( $hj->jsonArr);
  109. //效果演示地址
  110. //http://blog.jaekj.com//jae/demo/searcher/Searcher_class.php?searcher=baidu&s=jaekj&num=20&page=1
复制代码


Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn