Home > Article > Backend Development > 如何取出百度某排行榜50条并在每条加前后缀呢
怎么取出百度某排行榜50条并在每条加前后缀呢
如http://top.baidu.com/buzz?b=26&c=1&fr=topcategory_c1
------解决思路----------------------
帶輸出的完整例子,你好好學習下。
中間你想改什麼,直接改就可以了。
<br /><?php<br />function getContent($url){<br /> $ch = curl_init();<br /> curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); <br /> curl_setopt($ch, CURLOPT_URL, $url);<br /> curl_setopt($ch, CURLOPT_TIMEOUT, 300);<br /> $response = curl_exec($ch);<br /> if($error=curl_error($ch)){<br /> die($error);<br /> }<br /> curl_close($ch);<br /><br /> $content = iconv('GBK','UTF-8//IGNORE',$response);<br /><br /> return $content;<br />}<br /><br />function getKeywords($content){<br /> // keywords<br /> preg_match_all('/<td class="keyword">(.*?)<\/td>/is', $content, $keywords);<br /> $data = $keywords[1];<br /><br /> $result = array();<br /><br /> foreach($data as $val){<br /> preg_match_all('/<a.*?href="(.*?)".*?>(.*?)<\/a>/is', $val, $tmp);<br /> array_push($result, array('name'=>$tmp[2][0],'url'=>'http://top.baidu.com'.substr($tmp[1][0],1)));<br /> }<br /><br /> return $result;<br />}<br /><br />function getTc($content){<br /> // tc<br /> preg_match_all('/<td class="tc">(.*?)<\/td>/is', $content, $tc);<br /><br /> $data = $tc[1];<br /><br /> $result = array();<br /><br /> foreach($data as $val){<br /> preg_match_all('/<a.*?href="(.*?)".*?>(.*?)<\/a>/is', $val, $tmp);<br /> array_push($result, array('brief'=>$tmp[1][0], 'news'=>$tmp[1][1], 'tieba'=>$tmp[1][2]));<br /> }<br /><br /> return $result;<br />}<br /><br />function getNum($content){<br /> // last<br /> preg_match_all('/<td class="last">(.*?)<\/td>/is', $content, $last); //icon-rise up icon-fall down<br /><br /> $data = $last[1];<br /><br /> $result = array();<br /><br /> foreach($data as $val){<br /> preg_match_all('/<span.*?class="(.*?)">(.*?)<\/span>/is', $val, $tmp);<br /> array_push($result, array('flag'=>str_replace('icon-','',$tmp[1][0]), 'num'=>$tmp[2][0]));<br /> }<br /><br /> return $result;<br />}<br /><br />$url = 'http://top.baidu.com/buzz?b=340&c=1&fr=topbuzz_b339_c1';<br />$content = getContent($url);<br />$result = array();<br /><br />$keywords = getKeywords($content);<br />$tc = getTc($content);<br />$num = getNum($content);<br /><br />for($i=0,$len=count($keywords); $i<$len; $i++){<br /> $tmp = array(<br /> 'name' => $keywords[$i]['name'],<br /> 'url' => $keywords[$i]['url'],<br /> 'brief' => $tc[$i]['brief'],<br /> 'news' => $tc[$i]['news'],<br /> 'tieba' => $tc[$i]['tieba'],<br /> 'flag' => $num[$i]['flag'],<br /> 'num' => $num[$i]['num']<br /> );<br /> array_push($result, $tmp);<br />}<br />?><br /><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><br /><html><br /> <head><br /> <meta http-equiv="content-type" content="text/html;charset=utf-8"><br /> <title> baidu </title><br /> </head><br /><br /> <body><br /> <table><br /> <?php<br /> $i = 1;<br /> foreach($result as $val){<br /> ?><br /> <tr><br /> <td><?=$i ?></td><br /> <td><a href="<?=$val['url'] ?>"><?=$val['name'] ?></a></td><br /> <td><a href="<?=$val['brief'] ?>">簡介</a></td><br /> <td><a href="<?=$val['news'] ?>">新聞</a></td><br /> <td><a href="<?=$val['tieba'] ?>">貼吧</a></td><br /> <td><?php if($val['flag']=='fall'){ echo '↓'; }else{echo '↑'; } ?><?=$val['num'] ?></td><br /> </tr><br /> <?php<br /> $i++;<br /> }<br /> ?><br /> </table><br /> </body><br /></html><br />