Home  >  Article  >  Backend Development  >  Web page snapshot function_PHP tutorial

Web page snapshot function_PHP tutorial

WBOY
WBOYOriginal
2016-07-20 11:05:241125browse

//====================================================
//        FileName:    snap.class.php
//        Summary:    网页快照类
//        Author:        millken(迷路林肯)
//        LastModifed:2007-06-29
//        copyright (c)2007 [email]millken@gmail.com[/email]
//====================================================
class snap{
    var $dir;
    var $log;
    var $contents;
    var $filename;
    var $host;
    var $name;
    var $data_ts;
    var $ttl;
    var $url;
    var $ts;
    function snap(){
        $this->log = "New snap() object instantiated.
n";  
        $this->dir = dirname(__FILE__)."/";
    }
    function fetch($url="",$ttl=10){
        $this->log .= "--------------------------------
fetch() called
n";
        $this->log .= "url: ".$url."
n";
        $hosts = parse_url($url);
        $this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
        if (!$url) {
            $this->log .= "OOPS: You need to pass a URL!
";
            return false;
        }
        $this->ttl = $ttl;
        $this->url = $url;
        $this->name = md5($this->url);
        $this->filename = $this->dir.$this->name;
        $this->log .= "Filename: ".$this->filename."
";
        $this->getFile_ts();
        $this->file_get_content();

    }
    function file_get_content(){
        ob_start();
        $this->ts = time() - $this->data_ts;
        if($this->data_ts <>0 && $this->ts <= $this->ttl){
            $this->log .= "cache has expired
";
            @readfile($this->filename);  
            $this->contents = ob_get_contents();
            ob_end_clean();
        }else{
            $this->log .= "cache hasn't expired
";       
            @readfile($this->url);  
            $this->contents = ob_get_contents();
            ob_end_clean();
            $this->saveToCache();
        }
        return true;
    }
    function saveToCache(){
        $this->log .= "saveToCache() called
";
        //create file pointer
        if (!$fp=@fopen($this->filename,"w")) {
            $this->log .= "Could not open ".$this->filename."
";
            return false;
        }
        $this->contents = $this->formaturl($this->contents,$this->host);
        $this->contents = preg_replace("']*?>.*?'si","",$this->contents);
        //write to file
        if (!@fwrite($fp,$this->contents)) {
            $this->log .= "Could not write to ".$this->filename."
";
            fclose($fp);
            return false;
        }
        //close file pointer
        fclose($fp);
        return true;
    }
    function getFile_ts(){
        $this->log .= "getFile_ts() called
";
        if (!file_exists($this->filename)) {
            $this->data_ts = 0;
            $this->log .= $this->filename." does not exist
";
            return false;
        }
        $this->data_ts = filemtime($this->filename);
        return true;
    }
    function formaturl($l1,$l2){
    if (preg_match_all("/(]+src="([^"]+)"[^>]*>)|(]+href="([^"]+)"[^>]*>)|(]+href="([^"]+)"[^>]*>)|(]+src='([^']+)'[^>]*>)|(]+href='([^']+)'[^>]*>)/i",$l1,$regs)){
      foreach($regs[0] as $num => $url){
       $l1 = str_replace($url,$this->lIIIIl($url,$l2),$l1);
      }
    }
    return     $l1;
    }

    function lIIIIl($l1,$l2){
    if(preg_match("/(.*)(href|src)=(.+?)( |/>|>).*/i",$l1,$regs)){$I2 = $regs[3];}
    if(strlen($I2)>0){
      $I1 = str_replace(chr(34),"",$I2);
      $I1 = str_replace(chr(39),"",$I1);
    }else{return $l1;}
    $url_parsed = parse_url($l2);
    $scheme      = $url_parsed["scheme"];if($scheme!=""){$scheme = $scheme."://";}
    $host      = $url_parsed["host"];  
    $l3       = $scheme.$host;
    if(strlen($l3)==0){return $l1;}
    $path      = dirname($url_parsed["path"]);if($path[0]==""){$path="";}
    $pos      = strpos($I1,"#");
    if($pos>0) $I1 = substr($I1,0,$pos);
    //判断类型
    if(preg_match("/^(http|https|ftp):(//|)(([w/+-~`@:%])+.)+([w/.=?+-~`@':!%#]|(&)|&)+/i",$I1)){return $l1; }//http开头的url类型要跳过
    elseif($I1[0]=="/"){$I1 = $l3.$I1;}//绝对路径
    elseif(substr($I1,0,3)=="../"){//相对路径
          while(substr($I1,0,3)=="../"){
       $I1 = substr($I1,strlen($I1)-(strlen($I1)-3),strlen($I1)-3);
       if(strlen($path)>0){
        $path = dirname($path);
       }
      }
      $I1 = $l3.$path."/".$I1;
    }
    elseif(substr($I1,0,2)=="./"){
      $I1 = $l3.$path.substr($I1,strlen($I1)-(strlen($I1)-1),strlen($I1)-1);
    }
    elseif(strtolower(substr($I1,0,7))=="mailto:"||strtolower(substr($I1,0,11))=="java script:"){
      return $l1;
    }else{
      $I1 = $l3.$path."/".$I1;
    }
    return str_replace($I2,""$I1"",$l1);
    }
}
?>
 

用法test.php:

 require_once(dirname(__FILE__).'/snap.class.php');
$h = new snap();
$h->fetch($_GET['url']);
//echo $h->log;
echo $h->contents;
?>


www.bkjia.comtruehttp://www.bkjia.com/PHPjc/445124.htmlTechArticle?PHP //==================================================== // FileName: snap.class.php // Summary: 网页快照类 // Author: millken(迷路林肯) // LastModifed:2007-06-29 // co...
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn