php采集页面内容并自动转码/*<br>
* 用法 get_contents('www.yi210.com', 'utf-8');<br>
* 采集页面内容并自动转码<br>
* get_contents()自定义函数<br>
* $url 需要采集的页面地址<br>
* $timeout 超时时间,默认20 <br>
*/<br>
function get_contents($url, $timeout = 20)<br>
{<br>
if( function_exists('curl_init') ){<br>
$ch = curl_init();<br>
curl_setopt( $ch, CURLOPT_URL, $url );<br>
curl_setopt( $ch, CURLOPT_HEADER, false );<br>
curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout );<br>
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); <br>
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, $timeout ); <br>
$content = curl_exec( $ch );<br>
curl_close( $ch );<br>
$data = $content ? $content : false;<br>
} else {<br>
//利用了stream_context_create()设置超时时间:<br>
$pots = array(<br>
'http' => array(<br>
'timeout' => $timeout<br>
)<br>
);<br>
$context = stream_context_create( $pots );<br>
$content = @file_get_contents( $url, false, $context );<br>
$data = $content ? $content : false;<br>
} <br>
return $data ? my_encoding( $content, 'utf-8' ) : false;<br>
}<br>
<br>
/*<br>
* 页面内容并自动转码<br>
* my_encoding()自定义函数<br>
* $data 为 curl_exec() 或 file_get_contents() 所获得的页面内容<br>
* $to 需要转成的编码<br>
*/<br>
function my_encoding( $data, $to )<br>
{<br>
$encode_arr = array('UTF-8','ASCII','GBK','GB2312','BIG5','JIS','eucjp-win','sjis-win','EUC-JP');<br>
$encoded = mb_detect_encoding($data, $encode_arr);<br>
$data = mb_convert_encoding($data,$to,$encoded);<br>
return $data;<br>
}
个站地址:http://www.yi210.com/?p=932
AD:真正免费,域名+虚机+企业邮箱=0元