Heim  >  Artikel  >  Backend-Entwicklung  >  网页快照功能_PHP教程

网页快照功能_PHP教程

WBOY
WBOYOriginal
2016-07-20 11:05:241126Durchsuche

//====================================================
//        FileName:    snap.class.php
//        Summary:    网页快照类
//        Author:        millken(迷路林肯)
//        LastModifed:2007-06-29
//        copyright (c)2007 [email]millken@gmail.com[/email]
//====================================================
class snap{
    var $dir;
    var $log;
    var $contents;
    var $filename;
    var $host;
    var $name;
    var $data_ts;
    var $ttl;
    var $url;
    var $ts;
    function snap(){
        $this->log = "New snap() object instantiated.
n";  
        $this->dir = dirname(__FILE__)."/";
    }
    function fetch($url="",$ttl=10){
        $this->log .= "--------------------------------
fetch() called
n";
        $this->log .= "url: ".$url."
n";
        $hosts = parse_url($url);
        $this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
        if (!$url) {
            $this->log .= "OOPS: You need to pass a URL!
";
            return false;
        }
        $this->ttl = $ttl;
        $this->url = $url;
        $this->name = md5($this->url);
        $this->filename = $this->dir.$this->name;
        $this->log .= "Filename: ".$this->filename."
";
        $this->getFile_ts();
        $this->file_get_content();

    }
    function file_get_content(){
        ob_start();
        $this->ts = time() - $this->data_ts;
        if($this->data_ts 0 && $this->ts ttl){
            $this->log .= "cache has expired
";
            @readfile($this->filename);  
            $this->contents = ob_get_contents();
            ob_end_clean();
        }else{
            $this->log .= "cache hasn't expired
";       
            @readfile($this->url);  
            $this->contents = ob_get_contents();
            ob_end_clean();
            $this->saveToCache();
        }
        return true;
    }
    function saveToCache(){
        $this->log .= "saveToCache() called
";
        //create file pointer
        if (!$fp=@fopen($this->filename,"w")) {
            $this->log .= "Could not open ".$this->filename."
";
            return false;
        }
        $this->contents = $this->formaturl($this->contents,$this->host);
        $this->contents = preg_replace("'<script>]*?>.*?</script>'si","",$this->contents);
        //write to file
        if (!@fwrite($fp,$this->contents)) {
            $this->log .= "Could not write to ".$this->filename."
";
            fclose($fp);
            return false;
        }
        //close file pointer
        fclose($fp);
        return true;
    }
    function getFile_ts(){
        $this->log .= "getFile_ts() called
";
        if (!file_exists($this->filename)) {
            $this->data_ts = 0;
            $this->log .= $this->filename." does not exist
";
            return false;
        }
        $this->data_ts = filemtime($this->filename);
        return true;
    }
    function formaturl($l1,$l2){
    if (preg_match_all("/(网页快照功能_PHP教程]+src="([^"]+)"[^>]*>)|(]+href="([^"]+)"[^>]*>)|(]+href="([^"]+)"[^>]*>)|(网页快照功能_PHP教程]+src='([^']+)'[^>]*>)|(]+href='([^']+)'[^>]*>)/i",$l1,$regs)){
      foreach($regs[0] as $num => $url){
       $l1 = str_replace($url,$this->lIIIIl($url,$l2),$l1);
      }
    }
    return     $l1;
    }

    function lIIIIl($l1,$l2){
    if(preg_match("/(.*)(href|src)=(.+?)( |/>|>).*/i",$l1,$regs)){$I2 = $regs[3];}
    if(strlen($I2)>0){
      $I1 = str_replace(chr(34),"",$I2);
      $I1 = str_replace(chr(39),"",$I1);
    }else{return $l1;}
    $url_parsed = parse_url($l2);
    $scheme      = $url_parsed["scheme"];if($scheme!=""){$scheme = $scheme."://";}
    $host      = $url_parsed["host"];  
    $l3       = $scheme.$host;
    if(strlen($l3)==0){return $l1;}
    $path      = dirname($url_parsed["path"]);if($path[0]==""){$path="";}
    $pos      = strpos($I1,"#");
    if($pos>0) $I1 = substr($I1,0,$pos);
    //判断类型
    if(preg_match("/^(http|https|ftp):(//|\)(([w/+-~`@:%])+.)+([w/.=?+-~`@':!%#]|(&)|&)+/i",$I1)){return $l1; }//http开头的url类型要跳过
    elseif($I1[0]=="/"){$I1 = $l3.$I1;}//绝对路径
    elseif(substr($I1,0,3)=="../"){//相对路径
          while(substr($I1,0,3)=="../"){
       $I1 = substr($I1,strlen($I1)-(strlen($I1)-3),strlen($I1)-3);
       if(strlen($path)>0){
        $path = dirname($path);
       }
      }
      $I1 = $l3.$path."/".$I1;
    }
    elseif(substr($I1,0,2)=="./"){
      $I1 = $l3.$path.substr($I1,strlen($I1)-(strlen($I1)-1),strlen($I1)-1);
    }
    elseif(strtolower(substr($I1,0,7))=="mailto:"||strtolower(substr($I1,0,11))=="java script:"){
      return $l1;
    }else{
      $I1 = $l3.$path."/".$I1;
    }
    return str_replace($I2,""$I1"",$l1);
    }
}
?>
 

用法test.php:

 require_once(dirname(__FILE__).'/snap.class.php');
$h = new snap();
$h->fetch($_GET['url']);
//echo $h->log;
echo $h->contents;
?>


www.bkjia.comtruehttp://www.bkjia.com/PHPjc/445124.htmlTechArticle?PHP //==================================================== // FileName: snap.class.php // Summary: 网页快照类 // Author: millken(迷路林肯) // LastModifed:2007-06-29 // co...
Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn