Home  >  Article  >  Backend Development  >  php采摘程序中常用的函数

php采摘程序中常用的函数

WBOY
WBOYOriginal
2016-06-13 10:40:33795browse

php采集程序中常用的函数

php采集程序中常用的函数

?

//获得当前的脚本网址function get_php_url(){        if(!empty($_SERVER["REQUEST_URI"])){                $scriptName = $_SERVER["REQUEST_URI"];                $nowurl = $scriptName;        }else{                $scriptName = $_SERVER["PHP_SELF"];                if(empty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;                else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];        }        return $nowurl;}//把全角数字转为半角数字function GetAlabNum($fnum){        $nums = array("0","1","2","3","4","5","6","7","8","9");        $fnums = "0123456789";        for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);        $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum);        if($fnum=="") $fnum=0;        return $fnum;}//去除HTML标记function Text2Html($txt){        $txt = str_replace("  "," ",$txt);        $txt = str_replace("<","<",$txt);        $txt = str_replace(">",">",$txt);        $txt = preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt);        return $txt;}//清除HTML标记function ClearHtml($str){        $str = str_replace('<','<',$str);        $str = str_replace('>','>',$str);        return $str;}//相对路径转化成绝对路径function relative_to_absolute($content, $feed_url) {    preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);    $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);    $server_url = preg_replace("/\/.*/", "", $server_url);    if ($server_url == '') {        return $content;    }    if (isset($protocol[0])) {        $new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content);        $new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content);    } else {        $new_content = $content;    }    return $new_content;}//取得所有链接function get_all_url($code){        preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);        return array('name'=>$arr[2],'url'=>$arr[1]);}//获取指定标记中的内容function get_tag_data($str, $start, $end){        if ( $start == '' || $end == '' ){               return;        }        $str = explode($start, $str);        $str = explode($end, $str[1]);        return $str[0];}//HTML表格的每行转为CSV格式数组function get_tr_array($table) {        $table = preg_replace("'<td[^>]*?>'si",'"',$table);        $table = str_replace("</td>",'",',$table);        $table = str_replace("</tr>","{tr}",$table);        //去掉 HTML 标记        $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);        //去掉空白字符         $table = preg_replace("'([\r\n])[\s]+'","",$table);        $table = str_replace(" ","",$table);        $table = str_replace(" ","",$table);        $table = explode(",{tr}",$table);        array_pop($table);        return $table;}//将HTML表格的每行每列转为数组,采集表格数据function get_td_array($table) {        $table = preg_replace("'<table[^>]*?>'si","",$table);        $table = preg_replace("'<tr[^>]*?>'si","",$table);        $table = preg_replace("'<td[^>]*?>'si","",$table);        $table = str_replace("</tr>","{tr}",$table);        $table = str_replace("</td>","{td}",$table);        //去掉 HTML 标记        $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);        //去掉空白字符         $table = preg_replace("'([\r\n])[\s]+'","",$table);        $table = str_replace(" ","",$table);        $table = str_replace(" ","",$table);               $table = explode('{tr}', $table);        array_pop($table);        foreach ($table as $key=>$tr) {                $td = explode('{td}', $tr);                array_pop($td);            $td_array[] = $td;        }        return $td_array;}//返回字符串中的所有单词 $distinct=true 去除重复function split_en_str($str,$distinct=true) {        preg_match_all('/([a-zA-Z]+)/',$str,$match);        if ($distinct == true) {                $match[1] = array_unique($match[1]);        }        sort($match[1]);        return $match[1];}
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn