Home  >  Article  >  Backend Development  >  PHP reading large file example sharing (file operation class)_PHP tutorial

PHP reading large file example sharing (file operation class)_PHP tutorial

WBOY
WBOYOriginal
2016-07-13 10:32:51730browse

Lib_File2.php

Copy code The code is as follows:

class Lib_File2
{
//File directory
private $root = '/data/wwwroot/kkpromo/data/';

//File suffix
private $suffix = '.log';

//File handle
private $handle=null;

//Maximum size of file read at one time Number of records
private $limit=40000;

//The length of bytes read in each line
private $length=1024;

//Start time
private $startTime=0;

//Memory usage benchmark
private static $startMemory=0;

//
private $conn=null;

//
private static $init=null;

public static function instance()
{
self::$startMemory = memory_get_usage(true);

if( self::$init && is_object(self::$init))
{
return self::$init;
}

self::$init = new self();

return self::$init;
}

private function __construct(){}

public function setRoot($root)
{
if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');
$this->root = $root;
}

public function setSuffix($ suffix)
{
$this->suffix = $suffix;
}

public function setLimit($limit)
{
if(!is_numeric($limit )) die($limit.' SHOULD BE NUMBERIC');
if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');
$this-> limit = intval($limit);
}

public function _getFile($date, $appid, $op)
{
$filename = rtrim($this->root, '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;
if(!file_exists($filename))
{
die( $filename.' FILE DOES NOT EXISTS!');
}

if(!is_file($filename))
{
die($filename.' FILE DOES NOT EXISTS!' );
}

if(!is_readable($filename))
{
die($filename.' FILE ACCESS DENY!');
}

return $filename;
}


public function closeFile($date=null, $appid=null, $op=null)
{
if($op && $ date && $appid)
{
if(is_object($this->handle[$date.'_'.$appid.'_'.$op]) || $this->conn[ $date.'_'.$appid.'_'.$op])
{
unset($this->handle[$date.'_'.$appid.'_'.$op ]);
$this->handle[$date.'_'.$appid.'_'.$op]=null;
}

$this->conn[ $date.'_'.$appid.'_'.$op]=null;
$this->handle[$date.'_'.$appid.'_'.$op]=null;
unset($this->handle[$date.'_'.$appid.'_'.$op]);
}
else {
if(is_array($this- >handle) && $this->handle)
{
foreach ($this->handle as $key=>$val){
unset($this->handle[$ key]);
          $this->conn[$key]=null; >
return true;
}


private function _openFile($date, $appid, $op)
{
$this->startTime = microtime(true );
if(isset($this->conn[$date.'_'.$appid.'_'.$op]) && $this->conn[$date.'_'.$ appid.'_'.$op])
{
return $this->handle[$date.'_'.$appid.'_'.$op];
}

   $filename = self::_getFile($date , $appid , $op);
   if(($this->handle[$date.'_'.$appid.'_'.$op] = new SplFileObject($filename , 'r'))!=null)
   {
    $this->conn[$date.'_'.$appid.'_'.$op] = true;
    return $this->handle[$date.'_'.$appid.'_'.$op];
   }
   else {
    die('FILE OPEN FAILED!');
   }
  }

  
  /**
   * 功能:解析数据
   * 格式:  array('timestamp','mid','data');
   * @param string $data
   * @return boolean|array
   */
  private  function _parseData($data , $jsonFlag=true)
  {
   if(empty($data) || !is_string($data)) return false;
   $result = array(
     'timestamp'=>0,
     'mid'=>0,
     'data'=>array(),
   );

   $data = explode('|', $data);
   if(count($data) < 3 || !is_array($data)) return false;
$result['timestamp'] = $data[0];
$result['mid'] = $data[1];
if($jsonFlag)
{
$result['data'] = @json_decode($data[2] , true);
unset($result['mid']);
}
if(empty($result['timestamp']) || empty($result['mid'])) return false;

unset($data);
return $result;
}


/**
* TODO:读取单一文件
* @param string $date: 如(20140327)
* @param int $appid: 如(1000,9000)
* @param string $op:如(show,login , index)
* @param number $startNum 默认从第一行开始
* @param number $length 默认到$this->limit 读取的行数
   * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...)  过滤条件
   * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段
   * @return array(count , diffTime , memory , data)
   */
  public  function readFile($date , $appid , $op , $startNum=0 , $length=0 , $jsonFlag=false ,  $condition=array())
  {
   $data['data'] = "";
   $data['count'] = 0;
   $index = $startNum;
   $startNum = empty($startNum) ? 0 : $startNum;
   $length = empty($length) ? $this->limit : $length;

   $handle = self::_openFile($date , $appid , $op );
   $line_number=0;

   if($handle)
   { 
    $handle->seek($startNum);
    $handle->setMaxLineLen($this->length);
    while (intval($line_number) - intval($startNum) < intval($length)-1)
{
$tmp = $handle->current();
     if(empty($tmp)) continue;     
     $tmp = self::_parseData($tmp , $jsonFlag);
     $line_number = $handle->key();
     !$jsonFlag && $condition= array();        
     if(isset($condition) && $condition)
     {
      $key = array_keys($condition);
      if(in_array($tmp['data'][$key[0]], $condition[$key[0]]))
      {
       $data['count']++;
       $data['data'][$line_number] = $tmp;
      }
     }
     else
     {
      $data['data'][$line_number] = $tmp;
      $data['count']++;
     }

     if(intval($line_number) - intval($startNum) >= intval($length)-1) break;
     unset($tmp);
     $handle->next();
    }
    unset($tmp , $length , $line_number , $condition);
   }

   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

   return $data;
  }
  

  /**
   * TODO:命令行下获取文件总记录数*
   * @param string $date
   * @param int $appid
   * @param string $op
   * @return array
   */
  public  function total_lineFile($date, $appid, $op)
  {
   $this->_openFile($date, $appid, $op);
   $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义
   $line = `wc -l  $file`;
   if(preg_match("/(\d{1,})/", $line , $ret)){
    $data['count']=$ret[1];
   }else{
    $data['count']=0;
   }
   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';
   return $data;
  }

  
  /**
   * TODO:统计{$data}.{$op}.log记录数
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param array $condition
   * @return array
   */
  public function countFile($date , $appid , $op ,$condition=array())
  {
   $data['count'] = 0;
   $handle = self::_openFile($date , $appid , $op );  
   if($handle)
   {
    $handle->setMaxLineLen($this->length);
    while (!$handle->eof())
    {
     $tmp = $handle->current();
     if(empty($tmp)) continue;
     $tmp = self::_parseData($tmp);
     if($condition && is_array($condition) )
     {      
      $key = array_keys($condition);
      if(isset($tmp['data'][$key[0]]) && $tmp['data'][$key[0]] && in_array($tmp['data'][$key[0]], $condition[$key[0]]))
      {
       $data['count']++;
      }
     }
     else
     {
      $data['count']++;      
     }

     unset($tmp);     
     $handle->next();
    }
   }
   unset($handle , $condition , $tmp , $key , $val);
   self::closeFile($date , $appid , $op );

   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

   return $data;
  }  

  

  /**
   * TODO:统计用户数
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空
   * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段
   * @param array $condition
   * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})
   */
  public function countFileMID($date , $appid , $op  ,  $midflag=false , $jsonFlag=false,  $condition=array())
  {
   //$count = self::total_lineFile($date , $appid , $op );
   $count = self::countFile($date , $appid , $op );
   $index = ceil($count['count'] / $this->limit);
   $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

   for ($i=0 ; $i<$index ; $i++)
{
$startNum = $this->limit*$i;
    $endNum = $this->limit;
    $data = self::readFile($date , $appid , $op  ,  $startNum , $endNum , $jsonFlag);
    var_dump($data);exit();

    if($data['data'] && is_array($data['data']))
    {
     foreach ($data['data'] as $arr)
     {
      if($condition && is_array($condition))
      {
       $key = array_keys($condition);
       if(isset($arr['data'][$key[0]])  && (in_array($arr['data'][$key[0]] , $condition[$key]) || empty($condition[$key[0]])))
       {
        $result['mid'][$arr["mid"]] =1;
        $result['count']++;
       }
      }
      else
      {
        $result['mid'][$arr["mid"]] =1; 
        $result['count']++;
      }
      unset($data);
     }
    }
   }
   unset($index , $count , $condition , $data  , $arr);
   self::closeFile($date , $appid , $op);

   $result['mid'] = array_keys($result['mid']);  
   if(empty($midflag)) unset($result['mid']); 

   $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M';
   return $result;
  }

  
  /**
* TODO: Statistics of participating $op user data across time periods
* @param string $date
* @param int $appid
* @param string $op
* @param number $ day
* @param bool $midflag: The default is false, then mid returns an empty array; if set to true, the mid array is not empty
* @return array is in the form of ("20140326":{"mid": [],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"},
* "20140325":{"mid":[],"count":2181,"diffTime" :0.0397667884827,"memory":"3.75 M"})
*/
  public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false)
  {
   $date_i=0;
   for ($i =0; $i<$day ; $i++){
    $date_i = date('Ymd' , strtotime($date)-$i*86400);
    $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag); 
   }
   unset($date , $date_i , $appid , $op  , $day);

   return $result;
  }
 }
?>

Lib_File1.php

复制代码 代码如下:

class Lib_File1
{
//文件目录
private $root = '/data/wwwroot/kkpromo/data/';

//文件后缀
private $suffix = '.log';

//文件句柄
private $hander=null;

//一次读取文件的最大记录数
private $limit=40000;

//每行读取的字节长度
private $length=1024;

//开始时间
private $startTime=0;

//内存使用基准点
private static $startMemory=0;

//
private $conn=null;

//
private static $init=null;

public static function instance()
{
self::$startMemory = memory_get_usage(true);

if(self::$init && is_object(self::$init))
{
return self::$init;
}

self::$init = new self();

return self::$init;
}

private function __construct(){}

public function setRoot($root)
{
if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');
$this->root = $root;
  }

  public function setSuffix($suffix)
  {
   $this->suffix = $suffix;
  }

  public function setLimit($limit)
  {
   if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');
   if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');
   $this->limit = intval($limit);
  }

  private function _getFile($date , $appid , $op)
  {
   $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;
   if(!file_exists($filename))
   {
    die($filename.' FILE DOES NOT EXISTS!');
   }

   if(!is_file($filename))
   {
    die($filename.' FILE DOES NOT EXIST!');
   }

   if(!is_readable($filename))
   {
    die($filename.'  FILE ACCESS DENY!');
   }

   return $filename;
  }

  
  public function closeFile($date=null , $appid=null , $op=null)
  {
   if($op && $date && $appid)
   {
    if(is_object($this->hander[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])
    {
     fclose($this->hander[$date.'_'.$appid.'_'.$op]);
    }

    $this->conn[$date.'_'.$appid.'_'.$op]=null;
    $this->hander[$date.'_'.$appid.'_'.$op]=null;
   }
   else {
    if(is_array($this->hander) && $this->hander)
    {
     foreach ($this->hander as $key=>$val){
      fclose($this->hander[$key]);
      $this->conn[$key]=null;
      $this->hander[$key]=null;
     }
    }
   } 

   return true;
  }

  
  private function _openFile($date , $appid , $op)
  {   
   $this->startTime = microtime(true);
   if(isset($this->conn[$date.'_'.$appid.'_'.$op])  && $this->conn[$date.'_'.$appid.'_'.$op])
   {
    return $this->hander[$date.'_'.$appid.'_'.$op];
   }

   $filename = self::_getFile($date , $appid , $op);
   if(($this->hander[$date.'_'.$appid.'_'.$op] = fopen($filename, 'r'))!=null)
   {
    $this->conn[$date.'_'.$appid.'_'.$op] = true;
    return $this->hander[$date.'_'.$appid.'_'.$op];
   }
   else {
    die('FILE OPEN FAILED!');
   }
  }

  
  /**
   * 功能:解析数据
   * 格式:  array('timestamp','mid','data');
   * @param string $data
   * @return boolean|array
   */
  private  function _parseData($data)
  {
   if(empty($data) || !is_string($data)) return false;
   $result = array(
     'timestamp'=>0,
     'mid'=>0,
     'data'=>array(),
   );

   $data = explode('|', $data);
   if(count($data) < 3 || !is_array($data)) return false;
$result['timestamp'] = $data[0];
$result['mid'] = $data[1];
$result['data'] = @json_decode($data[2] , true);
if(empty($result['timestamp']) || empty($result['mid'])) return false;

unset($data);
return $result;
}


/**
* TODO:读取单一文件
* @param string $date: 如(20140327)
* @param int $appid: 如(1000,9000)
* @param string $op:如(show,login , index)
* @param number $startNum 默认从第一行开始
* @param number $endNum 默认到$this->limit结束
   * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...)  过滤条件
   * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段
   * @return array(count , diffTime , memory , data)
   */
  public  function readFile($date , $appid , $op ,$startNum=0 , $endNum=0 , $jsonFlag=false ,  $condition=array())
  {
   $data['data'] = "";
   $data['count'] = 0;
   $index = $startNum;
   $startNum = empty($startNum) ? 0 : $startNum;
   $endNum = empty($endNum) ? $this->limit : $endNum;

   $hander = self::_openFile($date , $appid , $op );
   $tmpindex=0;

   if($hander)
   {
    //!feof($hander)
    while ($tmpindex < $endNum)
{
$tmp = fgets($hander , $this->length);
     if(empty($tmp)) continue;
     if($tmpindex < $endNum && $tmpindex >=$startNum)
     {
      $tmp = self::_parseData($tmp);    
      if(empty($tmp)) continue;
      //去掉jsondata
       if(!$jsonFlag) { unset($tmp[2]);  $condition= array();  }
      //条件过滤
      if($condition && is_array($condition) )
      {
       foreach ($condition as $key=>$val){
        if(in_array($tmp['data'][$key], $condition[$key]))
         unset($key , $val); 
         $data['count']++;
         $data['data'][$index] = $tmp;
         $index++;
        }
      }
      else{    
       $data['data'][$index] = $tmp;
       $index++;
       $data['count']++;
      }
     }
     if($tmpindex >= $endNum) break;
     $tmpindex++;
     unset($tmp);
    }
    fseek($hander ,  SEEK_END);
   }

   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

   return $data;
  }
  

  /**
   * TODO:命令行下获取文件总记录数*
   * @param string $date
   * @param int $appid
   * @param string $op
   * @return array
   */
  public  function total_lineFile($date, $appid, $op)
  {
   $this->_openFile($date, $appid, $op);
   $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义
   $line = `wc -l  $file`;
   if(preg_match("/(\d{1,})/", $line , $ret)){
    $data['count']=$ret[1];
   }else{
    $data['count']=0;
   }
   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';
   return $data;
  }

  
  /**
   * TODO:统计{$data}.{$op}.log记录数
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param array $condition
   * @return array
   */
  public function countFile($date , $appid , $op ,$condition=array())
  {
   $data['count'] = 0;
   $hander = self::_openFile($date , $appid , $op );  
   if($hander)
   {
    while (!feof($hander))
    {
     $tmp = fgets($hander , $this->length);
     $tmp = self::_parseData($tmp);
     if(empty($tmp)) continue;
     if($condition && is_array($condition) )
     {
      foreach ($condition as $key=>$val){
       if(isset($tmp['data'][$key]) && $tmp['data'][$key] && in_array($tmp['data'][$key], $condition[$key])){
        unset($key , $val); 
        $data['count']++;
       } 
      }
     }
     else
      $data['count']++;      
     unset($tmp);
    }
    fseek($hander , SEEK_END);
   }
   $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

   return $data;
  }  

  

  /**
   * TODO:统计用户数
   * @param string $date
   * @param int $appid
   * @param string $op
   * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空
   * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段
   * @param array $condition
   * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})
   */
  public function countFileMID($date , $appid , $op  ,  $midflag=false , $jsonFlag=false,  $condition=array())
  {
   $count = self::total_lineFile($date , $appid , $op );
   $index = ceil($count['count'] / $this->limit);
   $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

   for ($i=0 ; $i<$index ; $i++)
   {
    $startNum = $this->limit*$i;
    $endNum = $this->limit*($i+1);
    $data = self::readFile($date , $appid , $op  ,  $startNum , $endNum , $jsonFlag);
    if($data['data'] && is_array($data['data']))
    {
     foreach ($data['data'] as $arr)
     {
      if($condition && is_array($condition)){
       foreach ($condition as $key=>$val){ 
        if(isset($arr['data'][$key])  && (in_array($arr['data'][$key] , $condition[$key]) || empty($condition[$key]))){
         if(!isset($result['mid'][$arr['mid']]))  { $result['mid'][$arr["mid"]] =1; $result['count']++; }
        }
       }
      }
      else {
       if(!isset($result['mid'][$arr['mid']]))  { $result['mid'][$arr["mid"]] =1; $result['count']++;   }
      }
     }
    }
    unset($data['data'] , $data);
   }
   unset($index , $count , $condition , $data  , $arr);
   self::closeFile($date , $appid , $op);

    $result['mid'] = array_keys($result['mid']);
    //$result['count'] = count($result['mid']);   
    if(empty($midflag)) unset($result['mid']); 

   $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);
   $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M';
   return $result;
  }

  
  /**
* TODO: Statistics of participating $op user data across time periods
* @param string $date
* @param int $appid
* @param string $op
* @param number $ day
* @param bool $midflag: The default is false, then mid returns an empty array; if set to true, the mid array is not empty
* @return array is in the form of ("20140326":{"mid": [],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"},
* "20140325":{"mid":[],"count":2181,"diffTime" :0.0397667884827,"memory":"3.75 M"})
*/
  public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false)
  {
   $date_i=0;
   for ($i =0; $i<$day ; $i++){
    $date_i = date('Ymd' , strtotime($date)-$i*86400);
    $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag); 
   }
   unset($date , $date_i , $appid , $op  , $day);

   return $result;
  }
 }
?>

www.bkjia.comtruehttp://www.bkjia.com/PHPjc/754035.htmlTechArticleLib_File2.php 复制代码 代码如下: ?php class Lib_File2 { //文件目录 private $root = '/data/wwwroot/kkpromo/data/'; //文件后缀 private $suffix = '.log'; //文件句柄...
Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn