Rumah >pembangunan bahagian belakang >tutorial php > 运用php充当shell脚本(转载)
使用php充当shell脚本(转载)
任务:过滤出2010-08-18的apache访问日志,并放到本地数据库。
解决方案:写两个php文件解决这个问题
假定linux系统
假定全utf-8
假定php已经放在$PATH里
假如有这么一个日志/site/data/log/access_log_20100818,内容示例如下:
[120.42.16.230] [-] [-] [2010-08-17 08:36:41] [GET] [www.site.com] [/membercenter/ordinary/score] [] [HTTP/1.1] [200] [2585] [-] [Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; QQDownload 618; GTB6.5; 360SE)] [121.229.144.193] [-] [-] [2010-08-17 08:36:41] [GET] [www.site.com] [/bbs/jiehunzhenhao/wosikainv_49602.html] [] [HTTP/1.1] [200] [12631] [http://www.site.com/bbs/forum/jiehunzhenhao/filter/0/orderby/2/ascdesc/desc/page/4] [Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8] [121.229.144.193] [-] [-] [2010-08-17 08:36:41] [POST] [www.site.com] [/bbsmanage/moderatorsetajax] [] [HTTP/1.1] [200] [21] [http://www.site.com/bbsmanage/moderatorset?id=4650] [Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; CIBA; 360SE)] [60.190.125.3] [-] [-] [2010-08-17 08:36:41] [GET] [www.site.com] [/bbs/fangchanzatan/jiangjiatong_49458.html] [] [HTTP/1.1] [200] [10435] [http://www.site.com/membercenter/ordinary/bbssend?page=6] [Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X)] [118.120.207.138] [-] [-] [2010-08-17 08:36:41] [GET] [www.site.com] [/bbs/jingcaitietu/tianshangrenjian_51533.html] [] [HTTP/1.1] [200] [13418] [http://www.site.com/bbs/forum/jingcaitietu/] [Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 627; GTB6.5; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727)] [121.229.144.193] [-] [-] [2010-08-18 08:36:41] [GET] [www.site.com] [/bbsmanage/setmoderator] [] [HTTP/1.1] [200] [451] [http://www.site.com/mange/magframe] [Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; CIBA; 360SE)] [121.229.144.193] [-] [-] [2010-08-18 08:36:42] [POST] [www.site.com] [/bbsmanage/moderatorxml] [] [HTTP/1.1] [200] [3699] [http://www.site.com/bbsmanage/setmoderator] [Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; CIBA; 360SE)] [60.211.96.212] [-] [-] [2010-08-18 08:36:42] [GET] [www.site.com] [/member/index/id/7651] [] [HTTP/1.1] [200] [5308] [http://www.site.com/membercenter/ordinary/friend] [Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; SE 2.X; .NET CLR 2.0.50727; .NET CLR 4.0.20506)] [113.205.59.70] [-] [-] [2010-08-18 08:36:43] [POST] [www.site.com] [/register/checkcaptcha] [] [HTTP/1.1] [200] [21] [http://www.site.com/register/ordinary/member_id/8326] [Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648)] [123.4.197.242] [-] [-] [2010-08-18 08:36:43] [GET] [www.site.com] [/bbsoperate/tuijian] [?act=tuijian&id=33936] [HTTP/1.1] [200] [4448] [-] [Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; CNCDialer)] 。。。。。 。。。。。
#!/usr/local/php/bin/php <?php include 'CommandPublic.php'; //这是公共文件 $handle = fopen("php://stdin", "r"); //这里获得命令行参数 if (count($argv) > 1 ) $date = $argv[1]; else $date = '2010-01-01'; //迭代 $j =0; while (!feof($handle)) { $buffer = fgets($handle); process($buffer); } //关闭输入流,并结束 fclose($handle); //筛选处理 function process($str){ global $j; global $date; $str = strval($str); $str = trim($str); $str = preg_replace('#\n|\r\n#',"", $str); //首先要确保符合日志格式 if (preg_match('#\[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\] \[.*?\]#', $str)) { if (!preg_match('#::1#', $str)) { //这是无用的记录 if (preg_match('#'. $date .'#', $str)) { //关键点,匹配 $j++; echo $str . "\n"; //这里通过管道输出到下一个文件 } } } } ?>
#!/usr/local/php/bin/php <?php include 'CommandPublic.php'; $db = Sys::getdb2(); $handle = fopen("php://stdin", "r"); $i=0; while (!feof($handle)) { $i++; $buffer = fgets($handle); process($buffer,$i); } fclose($handle); //筛选处理 function process($str,$i=0){ global $db; //正则截取出各字段内容 $ip = preg_replace('#^\[(.*?)\] \[.*?\] \[.*?\] \[(.*?)\] \[(.*?)\] \[(.*?)\] \[(.*?)\] \[.*?\] \[(.*?)\] \[(.*?)\] \[(.*?)\] \[(.*?)\] \[(.*?)\]$#', '$1|$2|$3|$4|$5|$6|$7|$8|$9|$10', $str); $arr = explode('|', $ip); $engine_name=''; $result = array( 'ip'=> $arr[0], 'access_time' => $arr[1], 'get_post'=> $arr[2], 'httphost' => $arr[3], 'url'=> $arr[4], 'http_type' => $arr[5], 'code'=> $arr[6], 'length' => $arr[7], 'source'=> $arr[8], 'agent' => substr( $arr[9],0, 250), 'engine_name' => $engine_name, ); $db->insert('table1', $result); //这里只是输出到控制台给人看 echo $i .': ' .$arr[1].' '. $arr[0] . "\n"; } ?>