Home > Article > Backend Development > 怎么抓取网页实时内容
如何抓取网页实时内容
<br /> #网址:http://data.shishicai.cn/cqssc/haoma/<br /> #Demo:<br /> <?php<br /> /* Created on [2013-5-1] Author[Newton] Filename[action.php]*/<br /> #编码转换<br /> function convToUtf8($str) {<br /> if (mb_detect_encoding($str, "UTF-8, ISO-8859-1, GBK") != "UTF-8") {<br /> return iconv("GBK", "utf-8", $str);<br /> } else {<br /> return $str;<br /> }<br /> }<br /> <br /> header("content-type:text/html;charset:utf-8");<br /> error_reporting(E_ERROR);<br /> $pages = file_get_contents('http://data.shishicai.cn/cqssc/haoma/');<br /> //$pages = htmlspecialchars($pages);<br /> $pages = convToUtf8($pages);<br /> echo "pages-->>".print_r($pages);echo PHP_EOL;<br /> $doc = new DOMDocument();<br /> $new_doc = new DOMDocument('1.0', 'utf-8');<br /> <br /> echo "doc-->>".print_r($doc);echo PHP_EOL;<br /> $dom = $doc->getElementsByTagName('table');<br /> <br /> $newdoc = $new_doc->loadhtml($dom->item(2)->nodeValue);<br /> $table = $new_doc->saveHTML();<br /> echo "table-->>{$table}".PHP_EOL;<br /> <br /> #result:<br /> #……乱码……<br /> #pages-->>1 DOMDocument Object ( ) doc-->>1 table-->><br /> #table是空的……<br /> ?><br />