玩了curl后发现php正则真的很重要,可惜我很菜。拿了网页上的三个字段写了个demo。下面贴的代码复制下来可以直接运行。其实就是用别人的数据自己做API的一种思路。<?php <br />
function getwebcontent($url){ <br>
$ch = curl_init(); <br>
curl_setopt($ch, CURLOPT_URL, $url); <br>
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); <br>
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); <br>
$contents = trim(curl_exec($ch)); <br>
curl_close($ch); <br>
return $contents; <br>
} <br>
function DeleteHtml($str) { <br>
$str = trim($str); <br>
$str = str_replace("\t","",$str); <br>
$str = str_replace("\r\n","",$str); <br>
$str = str_replace("\r","",$str); <br>
$str = str_replace("\n","",$str); <br>
return trim($str); <br>
}<br>
set_time_limit(0);<br>
$datas =DeleteHtml(getwebcontent('http://www.jin10.com')); <br>
preg_match_all('/(\d{2}:\d{2})/',$datas,$time); /*对应的所有时间*/<br>
preg_match_all('/<td>]+?id\s*?=\s*?"content_\d+?">(.+?)/',$datas,$content); /*对应的所有内容*/<br>
preg_match_all('/"普通新闻"|"重要新闻"|"一般数据"|"重要数据"/',$datas,$importance); /*对应的所有新闻的重要性*/<br>
$contents=$content[1]; /*对应的所有内容*/<br>
$times=$time[1]; /*对应的所有时间*/<br>
$importances=$importance[0];<br>
<br>
$news=[];<br>
foreach ($contents as $key => $value) {<br>
$t=[<br>
"content"=>$value,<br>
"time"=>$times[$key],<br>
"importance"=>str_replace('"','',$importances[$key]),<br>
];<br>
array_push($news,$t);<br>
}<br>
var_dump($news);<br>
<br>
?> <p class="attach">
<img src="/Public/Images/extension/common.gif" style="max-width:90%" style="max-width:90%" border="0" alt="附件" align="absmiddle">
109news.rar
<span class="date">( 1.12 MB 下载:7 次 )</span>
</p> <p class="da_word"><think id="ad_196de4c105c1d8be"></think></p>
</td>