search
Homephp教程php手册PHP采集程序中常用的函数

PHP采集程序中常用的函数

Jun 13, 2016 am 10:59 AM
phpexampleKeywordsfunctionInquireuseofprogramgetcollection

函数描述及例子 PHP采集程序中常用的函数 查询关键字 PHP采集程序中常用的函数

//获得当前的脚本网址 
function get_php_url(){ 
        if(!empty($_SERVER["REQUEST_URI"])){ 
                $scriptName = $_SERVER["REQUEST_URI"]; 
                $nowurl = $scriptName; 
        }else{ 
                $scriptName = $_SERVER["PHP_SELF"]; 
                if(empty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName; 
                else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"]; 
        } 
        return $nowurl; 
} 
//把全角数字转为半角数字 
function GetAlabNum($fnum){ 
        $nums = array("0","1","2","3","4","5","6","7","8","9"); 
        $fnums = "0123456789"; 
        for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum); 
        $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum); 
        if($fnum=="") $fnum=0; 
        return $fnum; 
} 
//去除HTML标记 
function Text2Html($txt){ 
        $txt = str_replace("  "," ",$txt); 
        $txt = str_replace("<","<",$txt); 
        $txt = str_replace(">",">",$txt); 
        $txt = preg_replace("/[\r\n]{1,}/isU","
\r\n",$txt); 
        return $txt; 
}
//清除HTML标记 
function ClearHtml($str){ 
        $str = str_replace(&#39;<&#39;,&#39;<&#39;,$str); 
        $str = str_replace(&#39;>&#39;,&#39;>&#39;,$str); 
        return $str; 
} 
//相对路径转化成绝对路径 
function relative_to_absolute($content, $feed_url) { 
    preg_match(&#39;/(http|https|ftp):\/\//&#39;, $feed_url, $protocol); 
    $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url); 
    $server_url = preg_replace("/\/.*/", "", $server_url);
    if ($server_url == &#39;&#39;) { 
        return $content; 
    }
    if (isset($protocol[0])) { 
        $new_content = preg_replace(&#39;/href="\//&#39;, &#39;href="&#39;.$protocol[0].$server_url.&#39;/&#39;, $content); 
        $new_content = preg_replace(&#39;/src="\//&#39;, &#39;src="&#39;.$protocol[0].$server_url.&#39;/&#39;, $new_content); 
    } else { 
        $new_content = $content; 
    } 
    return $new_content; 
} 
//取得所有链接 
function get_all_url($code){ 
        preg_match_all(&#39;/"\&#39; ]+)["|\&#39;]?\s*[^>]*>([^>]+)<\/a>/i&#39;,$code,$arr); 
        return array(&#39;name&#39;=>$arr[2],&#39;url&#39;=>$arr[1]); 
}
//获取指定标记中的内容 
function get_tag_data($str, $start, $end){ 
        if ( $start == &#39;&#39; || $end == &#39;&#39; ){ 
               return; 
        } 
        $str = explode($start, $str); 
        $str = explode($end, $str[1]); 
        return $str[0]; 
} 
//HTML表格的每行转为CSV格式数组 
function get_tr_array($table) { 
        $table = preg_replace("&#39;<td[^>]*?>&#39;si",&#39;"&#39;,$table); 
        $table = str_replace("",&#39;",&#39;,$table); 
        $table = str_replace("","{tr}",$table); 
        //去掉 HTML 标记 
        $table = preg_replace("&#39;<[\/\!]*?[^<>]*?>&#39;si","",$table); 
        //去掉空白字符 
        $table = preg_replace("&#39;([\r\n])[\s]+&#39;","",$table); 
        $table = str_replace(" ","",$table); 
        $table = str_replace(" ","",$table);
        $table = explode(",{tr}",$table); 
        array_pop($table); 
        return $table; 
}
//将HTML表格的每行每列转为数组,采集表格数据 
function get_td_array($table) { 
        $table = preg_replace("&#39;<table[^>]*?>&#39;si","",$table); 
        $table = preg_replace("&#39;<tr[^>]*?>&#39;si","",$table); 
        $table = preg_replace("&#39;<td[^>]*?>&#39;si","",$table); 
        $table = str_replace("","{tr}",$table); 
        $table = str_replace("","{td}",$table); 
        //去掉 HTML 标记 
        $table = preg_replace("&#39;<[\/\!]*?[^<>]*?>&#39;si","",$table); 
        //去掉空白字符 
        $table = preg_replace("&#39;([\r\n])[\s]+&#39;","",$table); 
        $table = str_replace(" ","",$table); 
        $table = str_replace(" ","",$table); 
        
        $table = explode(&#39;{tr}&#39;, $table); 
        array_pop($table); 
        foreach ($table as $key=>$tr) { 
                $td = explode(&#39;{td}&#39;, $tr); 
                array_pop($td); 
            $td_array[] = $td; 
        } 
        return $td_array; 
}
//返回字符串中的所有单词 $distinct=true 去除重复 
function split_en_str($str,$distinct=true) { 
        preg_match_all(&#39;/([a-zA-Z]+)/&#39;,$str,$match); 
        if ($distinct == true) { 
                $match[1] = array_unique($match[1]); 
        } 
        sort($match[1]); 
        return $match[1]; 
}
 
函数描述及例子
 
PHP采集程序中常用的函数

查询关键字
 
PHP采集程序中常用的函数
<!--?
//获得当前的脚本网址 
function get_php_url(){ 
        if(!empty($_SERVER["REQUEST_URI"])){ 
                $scriptName = $_SERVER["REQUEST_URI"]; 
                $nowurl = $scriptName; 
        }else{ 
                $scriptName = $_SERVER["PHP_SELF"]; 
                if(empty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName; 
                else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"]; 
        } 
        return $nowurl; 
} 
//把全角数字转为半角数字 
function GetAlabNum($fnum){ 
        $nums = array("0","1","2","3","4","5","6","7","8","9"); 
        $fnums = "0123456789"; 
        for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum); 
        $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum); 
        if($fnum=="") $fnum=0; 
        return $fnum; 
} 
//去除HTML标记 
function Text2Html($txt){ 
        $txt = str_replace("  "," ",$txt); 
        $txt = str_replace("<","<",$txt); 
        $txt = str_replace("-->",">",$txt); 
        $txt = preg_replace("/[\r\n]{1,}/isU","
\r\n",$txt); 
        return $txt; 
}
//清除HTML标记 
function ClearHtml($str){ 
        $str = str_replace(&#39;<&#39;,&#39;<&#39;,$str); 
        $str = str_replace(&#39;>&#39;,&#39;>&#39;,$str); 
        return $str; 
} 
//相对路径转化成绝对路径 
function relative_to_absolute($content, $feed_url) { 
    preg_match(&#39;/(http|https|ftp):\/\//&#39;, $feed_url, $protocol); 
    $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url); 
    $server_url = preg_replace("/\/.*/", "", $server_url);
    if ($server_url == &#39;&#39;) { 
        return $content; 
    }
    if (isset($protocol[0])) { 
        $new_content = preg_replace(&#39;/href="\//&#39;, &#39;href="&#39;.$protocol[0].$server_url.&#39;/&#39;, $content); 
        $new_content = preg_replace(&#39;/src="\//&#39;, &#39;src="&#39;.$protocol[0].$server_url.&#39;/&#39;, $new_content); 
    } else { 
        $new_content = $content; 
    } 
    return $new_content; 
} 
//取得所有链接 
function get_all_url($code){ 
        preg_match_all(&#39;/"\&#39; ]+)["|\&#39;]?\s*[^>]*>([^>]+)<\/a>/i&#39;,$code,$arr); 
        return array(&#39;name&#39;=>$arr[2],&#39;url&#39;=>$arr[1]); 
}
//获取指定标记中的内容 
function get_tag_data($str, $start, $end){ 
        if ( $start == &#39;&#39; || $end == &#39;&#39; ){ 
               return; 
        } 
        $str = explode($start, $str); 
        $str = explode($end, $str[1]); 
        return $str[0]; 
} 
//HTML表格的每行转为CSV格式数组 
function get_tr_array($table) { 
        $table = preg_replace("&#39;<td[^>]*?>&#39;si",&#39;"&#39;,$table); 
        $table = str_replace("",&#39;",&#39;,$table); 
        $table = str_replace("","{tr}",$table); 
        //去掉 HTML 标记 
        $table = preg_replace("&#39;<[\/\!]*?[^<>]*?>&#39;si","",$table); 
        //去掉空白字符 
        $table = preg_replace("&#39;([\r\n])[\s]+&#39;","",$table); 
        $table = str_replace(" ","",$table); 
        $table = str_replace(" ","",$table);
        $table = explode(",{tr}",$table); 
        array_pop($table); 
        return $table; 
}
//将HTML表格的每行每列转为数组,采集表格数据 
function get_td_array($table) { 
        $table = preg_replace("&#39;<table[^>]*?>&#39;si","",$table); 
        $table = preg_replace("&#39;<tr[^>]*?>&#39;si","",$table); 
        $table = preg_replace("&#39;<td[^>]*?>&#39;si","",$table); 
        $table = str_replace("","{tr}",$table); 
        $table = str_replace("","{td}",$table); 
        //去掉 HTML 标记 
        $table = preg_replace("&#39;<[\/\!]*?[^<>]*?>&#39;si","",$table); 
        //去掉空白字符 
        $table = preg_replace("&#39;([\r\n])[\s]+&#39;","",$table); 
        $table = str_replace(" ","",$table); 
        $table = str_replace(" ","",$table); 
        
        $table = explode(&#39;{tr}&#39;, $table); 
        array_pop($table); 
        foreach ($table as $key=>$tr) { 
                $td = explode(&#39;{td}&#39;, $tr); 
                array_pop($td); 
            $td_array[] = $td; 
        } 
        return $td_array; 
}
//返回字符串中的所有单词 $distinct=true 去除重复 
function split_en_str($str,$distinct=true) { 
        preg_match_all(&#39;/([a-zA-Z]+)/&#39;,$str,$match); 
        if ($distinct == true) { 
                $match[1] = array_unique($match[1]); 
        } 
        sort($match[1]); 
        return $match[1]; 
}
 
</td[^></tr[^></table[^></td[^></a\s+href=["|\&#39;]?([^></td[^></tr[^></table[^></td[^></a\s+href=["|\&#39;]?([^>

Statement
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Hot AI Tools

Undresser.AI Undress

Undresser.AI Undress

AI-powered app for creating realistic nude photos

AI Clothes Remover

AI Clothes Remover

Online AI tool for removing clothes from photos.

Undress AI Tool

Undress AI Tool

Undress images for free

Clothoff.io

Clothoff.io

AI clothes remover

Video Face Swap

Video Face Swap

Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Tools

Notepad++7.3.1

Notepad++7.3.1

Easy-to-use and free code editor

MantisBT

MantisBT

Mantis is an easy-to-deploy web-based defect tracking tool designed to aid in product defect tracking. It requires PHP, MySQL and a web server. Check out our demo and hosting services.

VSCode Windows 64-bit Download

VSCode Windows 64-bit Download

A free and powerful IDE editor launched by Microsoft

EditPlus Chinese cracked version

EditPlus Chinese cracked version

Small size, syntax highlighting, does not support code prompt function

SublimeText3 Mac version

SublimeText3 Mac version

God-level code editing software (SublimeText3)