Home  >  Article  >  Backend Development  >  A php text collection class

A php text collection class

WBOY
WBOYOriginal
2016-07-25 09:07:16813browse
  1. $C = new Collection();
  2. $C->url = 'http://bbs.it-home.org/subject_search?cat=1001&search_text=%E5%B9%B4%E8%BD %BB%E4%BA%BA';
  3. $C->startFlag = '

    ';
  4. $C->endFlag = '
    ';
  5. $C->init();
  6. $C->regExp = "|

    (.*)

    |Uis";
  7. $C->parse();
  8. print_rr( $C->result);
  9. */
Copy code

php text collection class file:

  1. <%

  2. /**
  3. Module name: php text collection class
  4. **/
  5. class Collection{
  6. //Enter public
  7. var $url; //URL address to be analyzed
  8. var $content; / /Read content
  9. var $regExp; //To obtain part of the regular expression
  10. var $codeFrom; //Encoding of the original text
  11. var $codeTo; //Coding to be converted
  12. var $timeout; //Waiting for collection Time

  13. var $startFlag; //The flag for starting article collection is 0 by default. When collecting entries, only the text blocks between $startFlag and $endFlag are searched and collected.

  14. var $endFlag; //The flag of the end of article collection. The default is the end of the article. When collecting entries, only the text blocks between $startFlag and $endFlag are searched and collected.
  15. var $block; //The text block between $startFlag and $endFlag
  16. //Export private
  17. var $result; //Output result

  18. //Initialize the collector

  19. function init( ){
  20. if(empty($url))
  21. $this->getFile();
  22. $this->convertEncoding();
  23. }
  24. //Collect the required content
  25. function parse(){
  26. $this- >getBlock();
  27. preg_match_all($this->regExp, $this->block ,$this->result,PREG_SET_ORDER);
  28. return $this->block;
  29. }
  30. //Error handling
  31. function error($msg){
  32. echo $msg;
  33. }
  34. //If reading the remote web page is successful, return the file; if it fails, return false
  35. function getFile(){
  36. $datalines = @file($this-> ;url);
  37. if(!$datalines){
  38. $this->error("can't read the url:".$this->url);
  39. return false;
  40. } else {
  41. $importdata = implode('', $datalines);
  42. $importdata = str_replace(array ("rn", "r"), "n", $importdata);
  43. $this->content = $importdata;
  44. }
  45. }
  46. //Get the required text block
  47. function getBlock(){
  48. if(!empty($this->startFlag))
  49. $this->block = substr($this->content,strpos($this- >content,$this->startFlag));
  50. if(!empty($this->endFlag))
  51. $this->block = substr($this->block,0,strpos($this ->block,$this->endFlag));
  52. }
  53. //Conversion of content encoding
  54. function convertEncoding(){
  55. if(!empty($this->codeTo))
  56. $this->codeFrom = mb_detect_encoding($this->content);
  57. //Conversion is only performed if the conversion plan is given.
  58. if(!empty($this->codeTo))
  59. $this->content = mb_convert_encoding($this->content,$this->codeTo,$this->codeFrom) or $this-> ;error("can't convert Encoding");
  60. }
  61. }//end of class
  62. ?>

Copy code


Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn