首頁  >  文章  >  後端開發  >  php實作的一個很好用HTML解析器類別可用於擷取資料_PHP教程

php實作的一個很好用HTML解析器類別可用於擷取資料_PHP教程

WBOY
WBOY原創
2016-07-21 16:12:551198瀏覽

複製程式碼 程式碼如下:

 $oldSetting = libxml_use_internal_errors( true );   
libxml_clear_errors();  
/**
 * 
 * - -----------------------------------
 * |PHP5 Framework - 2011
 * |Web Site: www.iblue.cc
 * |E-mail: mejinke@gmail.com
 * |Date: 2012-10-12
 * - --- --------------------------------
 * 
 * @desc HTML解析器
 * @ author jingke
 */   
class XF_HtmlDom  
{  
    private $_xpath = null;  
    private $_nodePath = '';  

    公用函數 __construct($xpath = null, $nodePath = '')  
    {  
      
        $this->_nodePath = $nodePath;  
    }  

    public function loadHtml($url)  
    {  
           $content = '';  
        if(strpos(strtolower($url), 'http')===false)  
        {    
        }  
        else 
        {               $user_agent = "Baiduspider ( http://www.baidu.com/search/spider.htm)";  
            $user_agent1='Mozilla/5.0 (Windows NT 5.1; rv:6.0) Gecko/20100101 Firefox/6.0';  
           curl_setopt($ch, CURLOPT_URL, $url);   
           curl_setopt($ch, CURLOPT_HEADER, false);   
           curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);   
           curl_setopt($ch, CURLOPT_REFERER, $url);  
           curl_setopt($ch, CURLOPT_USERAGENT, $user_agent1);  
           curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);  
            $content =curl_exec($ch);   
           curl_close($ch);  
        }  

        $html = new DOMDocument();   
        $html->loadHtml($content);   
        $this->_xpath = new DOMXPath( $html );   
        //回$this; 

    }  

    public function find($query, $index = null)  
    {  
        $this->_nodePath = '/ /' ; 
        else 
            $this->_nodePath .= '/';  

        $nodes = $this->_xpath->query($this->_nodePath.$query);  
        //echo $nodes->item(0)->getNodePath();exit; 

          
       if ($index === null && !is_numeric($         $tmp = array();  
            foreach ($nodes as $node)   
           XF_HtmlDom($this->_xpath, $node->getNodePath());  
            }  
            return $tmp;  
        }  
        return new XF_HtmlDom($this->_xpath,$this->_xpath->query($this->_nodePath.$mpath),$ > getNodePath());  
    }  

    /**
     * 取得內容
    */ 
    public function text()  = '' && $this-> ; _xpath != null )   
            return $this->_xpath->query($this->_nodePath)->item(0)->textContent;  
        else 
            回中false;  
   public function getAttribute($name)  
    {  
        if ($這— — >_nodePath != '' && $this ->_xpath != null )   
           以 return $this->_xpath->query($this->_nodePath)->_nodePath>item($0 name);  
        else 
            回中 false;  
      {  
        if($name == 'innertext')  
           與 $this ->文字();  
        else 
            return $this->getAttribute($name);  
    }    
}    
$xp = new xf_HtmlDom();  
$xp->loadHtml('http://www.aizhan.com/siteall/www.opendir.cn/'); 
$rows = $xp->find("td[@id='baidu']/a", 0)->innertext;  
print_r($rows); 





http://www.bkjia.com/PHPjc/313589.html

www.bkjia.com

true

http: //www.bkjia.com/PHPjc/313589.html

TechArticle
複製程式碼如下: ?php $oldSetting = libxml_use_internal_errors( true ); libxml_clear_errors(); /** * * - ----------------------------------- * |PHP5 框架 - 2011 * |W ...

陳述:
本文內容由網友自願投稿,版權歸原作者所有。本站不承擔相應的法律責任。如發現涉嫌抄襲或侵權的內容,請聯絡admin@php.cn