Home  >  Article  >  Backend Development  >  php读取大文件示例分享(文件操作类)_PHP

php读取大文件示例分享(文件操作类)_PHP

WBOY
WBOYOriginal
2016-06-01 11:54:43895browse

Lib_File2.php
复制代码 代码如下:
 class Lib_File2
 {
  //文件目录
  private $root = '/data/wwwroot/kkpromo/data/';

  //文件后缀
  private $suffix = '.log';

  //文件句柄
  private $handle=null;

  //一次读取文件的最大记录数
  private $limit=40000;

  //每行读取的字节长度
  private $length=1024;

  //开始时间
  private  $startTime=0;

  //内存使用基准点
  private static $startMemory=0;

  //
  private $conn=null;

  //
  private static  $init=null;

  public static function instance()
  {
   self::$startMemory = memory_get_usage(true);

   if(self::$init && is_object(self::$init))
   {
    return self::$init;
   }

   self::$init = new self();

   return self::$init;
  }

  private function __construct(){}

  public  function setRoot($root)  
  {
   if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');
   $this->root = $root;
  }

  public function setSuffix($suffix)
  {
   $this->suffix = $suffix;
  }

  public function setLimit($limit)
  {
   if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');
   if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');
   $this->limit = intval($limit);
  }

  public function _getFile($date , $appid , $op)
  {
   $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;
   if(!file_exists($filename))
   {
    die($filename.' FILE DOES NOT EXISTS!');
   }

   if(!is_file($filename))
   {
    die($filename.' FILE DOES NOT EXIST!');
   }

   if(!is_readable($filename))
   {
    die($filename.'  FILE ACCESS DENY!');
   }

   return $filename;
  }

  
  public function closeFile($date=null , $appid=null , $op=null)
  {
   if($op && $date && $appid)
   {
    if(is_object($this->handle[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])
    {
     unset($this->handle[$date.'_'.$appid.'_'.$op]);
     $this->handle[$date.'_'.$appid.'_'.$op]=null;
    }

    $this->conn[$date.'_'.$appid.'_'.$op]=null;
    $this->handle[$date.'_'.$appid.'_'.$op]=null;
    unset($this->handle[$date.'_'.$appid.'_'.$op]);
   }
   else {
    if(is_array($this->handle) && $this->handle)
    {
     foreach ($this->handle as $key=>$val){
      unset($this->handle[$key]);
      $this->conn[$key]=null;
      $this->handle[$key]=null;
     }
    }
   } 

   return true;
  }

  
  private function _openFile($date , $appid , $op)
  {   
   $this->startTime = microtime(true);
   if(isset($this->conn[$date.'_'.$appid.'_'.$op])  && $this->conn[$date.'_'.$appid.'_'.$op])
   {
    return $this->handle[$date.'_'.$appid.'_'.$op];
   }

   $filename = self::_getFile($date , $appid , $op);
   if(($this->handle[$date.'_'.$appid.'_'.$op] = new SplFileObject($filename , 'r'))!=null)
   {
    $this->conn[$date.'_'.$appid.'_'.$op] = true;
    return $this->handle[$date.'_'.$appid.'_'.$op];
   }
   else {
    die('FILE OPEN FAILED!');
   }
  }

  
  /**
   * 功能:解析数据
   * 格式:  array('timestamp','mid','data');
   * @param string $data
   * @return boolean|array
   */
  private  function _parseData($data , $jsonFlag=true)
  {
   if(empty($data) || !is_string($data)) return false;
   $result = array(
     'timestamp'=>0,
     'mid'=>0,
     'data'=>array(),
   );

   $data = explode('|', $data);
   if(count($data)    $result['timestamp'] = $data[0];
   $result['mid'] = $data[1];
   if($jsonFlag)
   {
    $result['data'] = @json_decode($data[2] , true);
    unset($result['mid']);
   }
   if(empty($result['timestamp']) || empty($result['mid'])) return false;

   unset($data);
   return $result;
  }

  
  /**
   * TODO:读取单一文件
   * @param string $date: 如(20140327)
   * @param int  $appid: 如(1000,9000)
   * @param string $op:如(show,login , index)
   * @param number $startNum 默认从第一行开始
   * @param number $length 默认到$this->limit 读取的行数
   * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...)  过滤条件
   * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段
   * @return array(count , diffTime , memory , data)
   */
  public  function readFile($date , $appid , $op , $startNum=0 , $length=0 , $jsonFlag=false ,  $condition=array())
  {
   $data['data'] = "";
   $data['count'] = 0;
   $index = $startNum;
   $startNum = empty($startNum) ? 0 : $startNum;
   $length = empty($length) ? $this->limit : $length;

   $handle = self::_openFile($date , $appid , $op );
   $line_number=0;

   if($handle)
   { 
    $handle->seek($startNum);
    $handle->setMaxLineLen($this->length);
    while (intval($line_number) - intval($startNum)     {
     $tmp = $handle->current();
     if(empty($tmp)) continue;     
     $tmp = self::_parseData($tmp , $jsonFlag);
     $line_number = $handle->key();
     !$jsonFlag && $condition= array();        
     if(isset($condition) && $condition)
     {
      $key = array_keys($condition);
      if(in_array($tmp['data'][$key[0]], $condition[$key[0]]))
      {
       $data['count']++;
       $data['data'][$line_number] = $tmp;
      }
     }
     else
     {
      $data['data'][$line_number] = $tmp;
      $data['count']++;
     }

     if(intval($line_number) - intval($startNum) >= intval($length)-1) break;
     unset($tmp);
     $handle->next();
    }
    unset($tmp , $length , $line_number , $condition);
   }

   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

   return $data;
  }
  

  /**
   * TODO:命令行下获取文件总记录数*
   * @param string $date
   * @param int $appid
   * @param string $op
   * @return array
   */
  public  function total_lineFile($date, $appid, $op)
  {
   $this->_openFile($date, $appid, $op);
   $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义
   $line = `wc -l  $file`;
   if(preg_match("/(\d{1,})/", $line , $ret)){
    $data['count']=$ret[1];
   }else{
    $data['count']=0;
   }
   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';
   return $data;
  }

  
  /**
   * TODO:统计{$data}.{$op}.log记录数
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param array $condition
   * @return array
   */
  public function countFile($date , $appid , $op ,$condition=array())
  {
   $data['count'] = 0;
   $handle = self::_openFile($date , $appid , $op );  
   if($handle)
   {
    $handle->setMaxLineLen($this->length);
    while (!$handle->eof())
    {
     $tmp = $handle->current();
     if(empty($tmp)) continue;
     $tmp = self::_parseData($tmp);
     if($condition && is_array($condition) )
     {      
      $key = array_keys($condition);
      if(isset($tmp['data'][$key[0]]) && $tmp['data'][$key[0]] && in_array($tmp['data'][$key[0]], $condition[$key[0]]))
      {
       $data['count']++;
      }
     }
     else
     {
      $data['count']++;      
     }

     unset($tmp);     
     $handle->next();
    }
   }
   unset($handle , $condition , $tmp , $key , $val);
   self::closeFile($date , $appid , $op );

   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

   return $data;
  }  

  

  /**
   * TODO:统计用户数
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空
   * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段
   * @param array $condition
   * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})
   */
  public function countFileMID($date , $appid , $op  ,  $midflag=false , $jsonFlag=false,  $condition=array())
  {
   //$count = self::total_lineFile($date , $appid , $op );
   $count = self::countFile($date , $appid , $op );
   $index = ceil($count['count'] / $this->limit);
   $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

   for ($i=0 ; $i   {
    $startNum = $this->limit*$i;
    $endNum = $this->limit;
    $data = self::readFile($date , $appid , $op  ,  $startNum , $endNum , $jsonFlag);
    var_dump($data);exit();

    if($data['data'] && is_array($data['data']))
    {
     foreach ($data['data'] as $arr)
     {
      if($condition && is_array($condition))
      {
       $key = array_keys($condition);
       if(isset($arr['data'][$key[0]])  && (in_array($arr['data'][$key[0]] , $condition[$key]) || empty($condition[$key[0]])))
       {
        $result['mid'][$arr["mid"]] =1;
        $result['count']++;
       }
      }
      else
      {
        $result['mid'][$arr["mid"]] =1; 
        $result['count']++;
      }
      unset($data);
     }
    }
   }
   unset($index , $count , $condition , $data  , $arr);
   self::closeFile($date , $appid , $op);

   $result['mid'] = array_keys($result['mid']);  
   if(empty($midflag)) unset($result['mid']); 

   $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M';
   return $result;
  }

  
  /**
   * TODO:跨时间段 统计参加$op用户数据
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param number $day
   * @param bool  $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空
   * @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"},
  *          "20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})
   */
  public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false)
  {
   $date_i=0;
   for ($i =0; $i    $date_i = date('Ymd' , strtotime($date)-$i*86400);
    $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag); 
   }
   unset($date , $date_i , $appid , $op  , $day);

   return $result;
  }
 }
?>

Lib_File1.php
复制代码 代码如下:
 class Lib_File1
 {
  //文件目录
  private $root = '/data/wwwroot/kkpromo/data/';

  //文件后缀
  private $suffix = '.log';

  //文件句柄
  private $hander=null;

  //一次读取文件的最大记录数
  private $limit=40000;

  //每行读取的字节长度
  private $length=1024;

  //开始时间
  private  $startTime=0;

  //内存使用基准点
  private static $startMemory=0;

  //
  private $conn=null;

  //
  private static  $init=null;

  public static function instance()
  {
   self::$startMemory = memory_get_usage(true);

   if(self::$init && is_object(self::$init))
   {
    return self::$init;
   }

   self::$init = new self();

   return self::$init;
  }

  private function __construct(){}

  public  function setRoot($root)  
  {
   if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');
   $this->root = $root;
  }

  public function setSuffix($suffix)
  {
   $this->suffix = $suffix;
  }

  public function setLimit($limit)
  {
   if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');
   if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');
   $this->limit = intval($limit);
  }

  private function _getFile($date , $appid , $op)
  {
   $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;
   if(!file_exists($filename))
   {
    die($filename.' FILE DOES NOT EXISTS!');
   }

   if(!is_file($filename))
   {
    die($filename.' FILE DOES NOT EXIST!');
   }

   if(!is_readable($filename))
   {
    die($filename.'  FILE ACCESS DENY!');
   }

   return $filename;
  }

  
  public function closeFile($date=null , $appid=null , $op=null)
  {
   if($op && $date && $appid)
   {
    if(is_object($this->hander[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])
    {
     fclose($this->hander[$date.'_'.$appid.'_'.$op]);
    }

    $this->conn[$date.'_'.$appid.'_'.$op]=null;
    $this->hander[$date.'_'.$appid.'_'.$op]=null;
   }
   else {
    if(is_array($this->hander) && $this->hander)
    {
     foreach ($this->hander as $key=>$val){
      fclose($this->hander[$key]);
      $this->conn[$key]=null;
      $this->hander[$key]=null;
     }
    }
   } 

   return true;
  }

  
  private function _openFile($date , $appid , $op)
  {   
   $this->startTime = microtime(true);
   if(isset($this->conn[$date.'_'.$appid.'_'.$op])  && $this->conn[$date.'_'.$appid.'_'.$op])
   {
    return $this->hander[$date.'_'.$appid.'_'.$op];
   }

   $filename = self::_getFile($date , $appid , $op);
   if(($this->hander[$date.'_'.$appid.'_'.$op] = fopen($filename, 'r'))!=null)
   {
    $this->conn[$date.'_'.$appid.'_'.$op] = true;
    return $this->hander[$date.'_'.$appid.'_'.$op];
   }
   else {
    die('FILE OPEN FAILED!');
   }
  }

  
  /**
   * 功能:解析数据
   * 格式:  array('timestamp','mid','data');
   * @param string $data
   * @return boolean|array
   */
  private  function _parseData($data)
  {
   if(empty($data) || !is_string($data)) return false;
   $result = array(
     'timestamp'=>0,
     'mid'=>0,
     'data'=>array(),
   );

   $data = explode('|', $data);
   if(count($data)    $result['timestamp'] = $data[0];
   $result['mid'] = $data[1];
   $result['data'] = @json_decode($data[2] , true);
   if(empty($result['timestamp']) || empty($result['mid'])) return false;

   unset($data);
   return $result;
  }

  
  /**
   * TODO:读取单一文件
   * @param string $date: 如(20140327)
   * @param int  $appid: 如(1000,9000)
   * @param string $op:如(show,login , index)
   * @param number $startNum 默认从第一行开始
   * @param number $endNum 默认到$this->limit结束
   * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...)  过滤条件
   * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段
   * @return array(count , diffTime , memory , data)
   */
  public  function readFile($date , $appid , $op ,$startNum=0 , $endNum=0 , $jsonFlag=false ,  $condition=array())
  {
   $data['data'] = "";
   $data['count'] = 0;
   $index = $startNum;
   $startNum = empty($startNum) ? 0 : $startNum;
   $endNum = empty($endNum) ? $this->limit : $endNum;

   $hander = self::_openFile($date , $appid , $op );
   $tmpindex=0;

   if($hander)
   {
    //!feof($hander)
    while ($tmpindex     {
     $tmp = fgets($hander , $this->length);
     if(empty($tmp)) continue;
     if($tmpindex =$startNum)
     {
      $tmp = self::_parseData($tmp);    
      if(empty($tmp)) continue;
      //去掉jsondata
       if(!$jsonFlag) { unset($tmp[2]);  $condition= array();  }
      //条件过滤
      if($condition && is_array($condition) )
      {
       foreach ($condition as $key=>$val){
        if(in_array($tmp['data'][$key], $condition[$key]))
         unset($key , $val); 
         $data['count']++;
         $data['data'][$index] = $tmp;
         $index++;
        }
      }
      else{    
       $data['data'][$index] = $tmp;
       $index++;
       $data['count']++;
      }
     }
     if($tmpindex >= $endNum) break;
     $tmpindex++;
     unset($tmp);
    }
    fseek($hander ,  SEEK_END);
   }

   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

   return $data;
  }
  

  /**
   * TODO:命令行下获取文件总记录数*
   * @param string $date
   * @param int $appid
   * @param string $op
   * @return array
   */
  public  function total_lineFile($date, $appid, $op)
  {
   $this->_openFile($date, $appid, $op);
   $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义
   $line = `wc -l  $file`;
   if(preg_match("/(\d{1,})/", $line , $ret)){
    $data['count']=$ret[1];
   }else{
    $data['count']=0;
   }
   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';
   return $data;
  }

  
  /**
   * TODO:统计{$data}.{$op}.log记录数
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param array $condition
   * @return array
   */
  public function countFile($date , $appid , $op ,$condition=array())
  {
   $data['count'] = 0;
   $hander = self::_openFile($date , $appid , $op );  
   if($hander)
   {
    while (!feof($hander))
    {
     $tmp = fgets($hander , $this->length);
     $tmp = self::_parseData($tmp);
     if(empty($tmp)) continue;
     if($condition && is_array($condition) )
     {
      foreach ($condition as $key=>$val){
       if(isset($tmp['data'][$key]) && $tmp['data'][$key] && in_array($tmp['data'][$key], $condition[$key])){
        unset($key , $val); 
        $data['count']++;
       } 
      }
     }
     else
      $data['count']++;      
     unset($tmp);
    }
    fseek($hander , SEEK_END);
   }
   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

   return $data;
  }  

  

  /**
   * TODO:统计用户数
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空
   * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段
   * @param array $condition
   * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})
   */
  public function countFileMID($date , $appid , $op  ,  $midflag=false , $jsonFlag=false,  $condition=array())
  {
   $count = self::total_lineFile($date , $appid , $op );
   $index = ceil($count['count'] / $this->limit);
   $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

   for ($i=0 ; $i   {
    $startNum = $this->limit*$i;
    $endNum = $this->limit*($i+1);
    $data = self::readFile($date , $appid , $op  ,  $startNum , $endNum , $jsonFlag);
    if($data['data'] && is_array($data['data']))
    {
     foreach ($data['data'] as $arr)
     {
      if($condition && is_array($condition)){
       foreach ($condition as $key=>$val){ 
        if(isset($arr['data'][$key])  && (in_array($arr['data'][$key] , $condition[$key]) || empty($condition[$key]))){
         if(!isset($result['mid'][$arr['mid']]))  { $result['mid'][$arr["mid"]] =1; $result['count']++; }
        }
       }
      }
      else {
       if(!isset($result['mid'][$arr['mid']]))  { $result['mid'][$arr["mid"]] =1; $result['count']++;   }
      }
     }
    }
    unset($data['data'] , $data);
   }
   unset($index , $count , $condition , $data  , $arr);
   self::closeFile($date , $appid , $op);

    $result['mid'] = array_keys($result['mid']);
    //$result['count'] = count($result['mid']);   
    if(empty($midflag)) unset($result['mid']); 

   $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M';
   return $result;
  }

  
  /**
   * TODO:跨时间段 统计参加$op用户数据
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param number $day
   * @param bool  $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空
   * @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"},
  *          "20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})
   */
  public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false)
  {
   $date_i=0;
   for ($i =0; $i    $date_i = date('Ymd' , strtotime($date)-$i*86400);
    $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag); 
   }
   unset($date , $date_i , $appid , $op  , $day);

   return $result;
  }
 }
?>

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn