-
- $C = new Collection();
- $C->url = 'http://bbs.it-home.org/subject_search?cat=1001&search_text=%E5%B9%B4%E8%BD %BB%E4%BA%BA';
- $C->startFlag = '';
- $C->endFlag = '
';
- $C->init();
- $C->regExp = "|
(.*) |Uis";
- $C->parse();
- print_rr( $C->result);
- */
Copy code
php text collection class file:
-
-
<%
- /**
- Module name: php text collection class
- **/
- class Collection{
- //Enter public
- var $url; //URL address to be analyzed
- var $content; / /Read content
- var $regExp; //To obtain part of the regular expression
- var $codeFrom; //Encoding of the original text
- var $codeTo; //Coding to be converted
- var $timeout; //Waiting for collection Time
var $startFlag; //The flag for starting article collection is 0 by default. When collecting entries, only the text blocks between $startFlag and $endFlag are searched and collected.
- var $endFlag; //The flag of the end of article collection. The default is the end of the article. When collecting entries, only the text blocks between $startFlag and $endFlag are searched and collected.
- var $block; //The text block between $startFlag and $endFlag
- //Export private
- var $result; //Output result
//Initialize the collector
- function init( ){
- if(empty($url))
- $this->getFile();
- $this->convertEncoding();
- }
- //Collect the required content
- function parse(){
- $this- >getBlock();
- preg_match_all($this->regExp, $this->block ,$this->result,PREG_SET_ORDER);
- return $this->block;
- }
- //Error handling
- function error($msg){
- echo $msg;
- }
- //If reading the remote web page is successful, return the file; if it fails, return false
- function getFile(){
- $datalines = @file($this-> ;url);
- if(!$datalines){
- $this->error("can't read the url:".$this->url);
- return false;
- } else {
- $importdata = implode('', $datalines);
- $importdata = str_replace(array ("rn", "r"), "n", $importdata);
- $this->content = $importdata;
- }
- }
- //Get the required text block
- function getBlock(){
- if(!empty($this->startFlag))
- $this->block = substr($this->content,strpos($this- >content,$this->startFlag));
- if(!empty($this->endFlag))
- $this->block = substr($this->block,0,strpos($this ->block,$this->endFlag));
- }
- //Conversion of content encoding
- function convertEncoding(){
- if(!empty($this->codeTo))
- $this->codeFrom = mb_detect_encoding($this->content);
- //Conversion is only performed if the conversion plan is given.
- if(!empty($this->codeTo))
- $this->content = mb_convert_encoding($this->content,$this->codeTo,$this->codeFrom) or $this-> ;error("can't convert Encoding");
- }
- }//end of class
- ?>
-
Copy code
|