首页  >  文章  >  php教程  >  分享我写的PHP文法分析的源码

分享我写的PHP文法分析的源码

WBOY
WBOY原创
2016-06-21 08:51:171001浏览

最近尝试做了文法分析的东东,问题较多。
请提建议。代码放不下,分两页。

下载地址 http://download.csdn.net/detail/xuzuning/4529066


PHP code
		include 'ttrie.php';

class Rule extends TTrie {
  public $rule = array();
  public $savematch = 0;

  function __construct($s='') {
    $this->set( array(
        ' ' => 'Separated',
        "\r\n" => 'set_rule',
        "\n" => 'set_rule',
        "\t" => 'Separated',
        '->' => 'Separated',
        '→' => 'Separated',
        '' => 'set_parallel_rule',
        ));
    $this->match($s);
    if($this->rule[0][0] == $this->rule[0][1]) {
        if(count($this->rule[0]) == 2) $this->rule[0][0] .= "'";
        else array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0]));
    }else {
        $c = $this->rule[0][0];
        $n = 0;
        foreach($this->rule as $r) if($r[0] == $c) $n++;
        if($n > 1) array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0]));
    }
  }
  function Separated() {
  }
  function set_rule() {
    $this->rule[] = $this->buffer;
    $this->buffer = array();
  }
  function set_parallel_rule() {
    $t = $this->buffer[0];
    $this->set_rule();
    $this->buffer[] = $t;
  }
}


class Grammar {
  var $closure = array();
  var $first = array();
  var $follow = array();
  var $rule = array();
  var $identifier = array();
  var $leay = array();
  var $forecast = array();
  var $stack = array();
  var $ll = 'LL(0)';
  var $lr = 'LR(0)';

  function __construct($s='') {
    $p = new Rule($s);
    $this->rule = $p->rule;
    $this->set_grammar();
  }

  function set_grammar() {
    foreach($this->rule as $rule) {
        if(! in_array($rule[0], $this->identifier)) $this->identifier[] = $rule[0];
    }
    foreach($this->rule as $rule) {
        foreach($rule as $v)
            if(! in_array($v, $this->identifier) && ! in_array($v, $this->leay))
                $this->leay[] = $v;
    }
    $this->set_first();
    $this->set_follow();
    $this->set_closure();
    $this->set_select();
    $this->set_forecast();
  }
  function set_first() {
    foreach($this->rule as $rule) $this->first[$rule[0]] = array();
    //直接收取 形如U->a…的产生式(其中a是终结符),把a收入到First(U)中
    foreach($this->rule as $v) {
        if(in_array($v[1], $this->leay)) $this->first[$v[0]][] = $v[1];
    }
    //反复传递 形入U->P1P2P3…Pn的产生式(其中P是非终结符),应先把First(P1)中的全部内容传送到First(U)中,如果P1中有ε,把First(P2)中的内容传送到First(U)中,类推直到Pi中无ε
    do {
        $t = serialize($this->first);
        foreach($this->rule as $rule) {
            for($i=1; $i<count if>identifier)) {
                    $this->first[$rule[0]] = array_unique(array_merge($this->first[$rule[0]], $this->first[$v]));
                    if(! in_array('#', $this->first[$v])) break;
                }else break;
            }
        }
    }while($t != serialize($this->first));
  }
  function set_follow() {
    foreach($this->rule as $rule) $this->follow[$rule[0]] = array();
    //直接收取 形如 …Ua… 的,把a直接收入到Follow(U)中
    foreach($this->rule as $rule) {
        for($i=1; $i<count if>identifier) && in_array($rule[$i+1], $this->leay))
                $this->follow[$rule[$i]][] = $rule[$i+1];
        }
        if(in_array($rule[$i], $this->identifier)) $this->follow[$rule[$i]][] = '#';
    }
    foreach($this->follow as &$v) if(! $v) $v[] = '#';
    //直接收取 形如 …UP…(P是非终结符)的,把First(P)中非ε收入到Follow(U)中
    foreach($this->rule as $rule) {
        for($i=1; $i<count if>identifier) && in_array($rule[$i+1], $this->identifier)) {
                $this->follow[$rule[$i]] = array_unique(array_merge($this->follow[$rule[$i]], array_diff($this->first[$rule[$i+1]], array('#'))));
            }
        }
    }
    //反复传递 形如U->aP的(P是非终结符)或U->aPQ(P,Q为非终结符且Q中含ε),应把Follow(U)中的全部内容传送到Follow(P)中
    do {
        $t = serialize($this->follow);
        foreach($this->rule as $rule) {
            $s = $rule[0];
            $d = end($rule);
            if(in_array($d, $this->leay)) continue;
            $p = prev($rule);
            if(in_array($p, $this->leay)) $this->follow[$d] = array_unique(array_merge($this->follow[$d], $this->follow[$s]));
            elseif(in_array('#', $this->follow[$d])) $this->follow[$p] = array_unique(array_merge($this->follow[$p], $this->follow[$s]));
        }
    }while($t != serialize($this->follow));
  }
  function set_closure() {
    $shift = array();
    $this->closure[0][] = array('offs' => 1, 'rule' => 0);
    for($i=0 ; $i closure); $i++) {
        $cnt = count($this->closure);
        //构造闭包 closure
        $ex = array();
        $j = 0;
        $tmp = array();
        do {
            $size = count($this->closure[$i]);
            for($j=0; $j<count>closure[$i]); $j++) {
                $dfa = $this->closure[$i][$j];
                $rule = $this->rule[$dfa['rule']];
                if(isset($rule[$dfa['offs']])) {
                    $ch = $ex[] = $rule[$dfa['offs']];
                }
                foreach($this->rule as $r=>$rule) {
                    if(in_array($rule[0], $ex)) {
                        $t = array('offs' => 1, 'rule' => $r);
                        if(!isset($tmp[$r][1])) $this->closure[$i][] = $t;
                        $tmp[$r][1] = 1;
                    }
                }
            }
        }while(count($this->closure[$i]) != $size); //直到不再增大

        //判断状态转向 go
        $out = array();
        foreach($this->closure[$i] as $k=>$dfa) {
            $rule = $this->rule[$dfa['rule']];
            if(isset($rule[$dfa['offs']])) {
                $t = "$dfa[rule],$dfa[offs]";
                $ch = $rule[$dfa['offs']];
                $this->closure[$i][$k]['char'] = $ch;
                if(isset($out[$ch])) $shift[$t] = $out[$ch];
                if(isset($shift[$t])) {
                    $this->closure[$i][$k]['target'] = $shift[$t];
                    $dfa['offs']++;
                    if(!$this->in_closure($dfa, $this->closure[$shift[$t]])) $this->closure[$shift[$t]][] = $dfa;

                } else {
                    $cnt = count($this->closure);
                    $this->closure[$i][$k]['target'] = $cnt;
                    $shift[$t] = $cnt;
                    $dfa['offs']++;
                    $this->closure[count($this->closure)][] = $dfa;
                    $out[$ch] = $cnt;
                }
            }
        }

        //构造状态转换表
        foreach($this->closure[$i] as $k=>$dfa) {
            if(isset($dfa['target'])) {
                $v = $dfa['char'];
                if(in_array($v, $this->identifier)) $this->goto[$i][$v] = $dfa['target'];
                else {
                    $this->action[$i][$v][] = "S$dfa[target]";
                    $this->request[$i][$v] = $dfa['rule'];
                }
            } else {
                $ch = $this->rule[$dfa['rule']][0];
                foreach($this->follow[$ch] as $v) {
                    $this->action[$i][$v][] = "r$dfa[rule]";
                    $this->request[$i][$v] = $dfa['rule'];
                }
            }
        }
        foreach($this->action[$i] as $c=>$v) {
            $v = array_unique($v);
            if(count($v) > 1) $this->lr = 'SLR(1)';
            $this->action[$i][$c] = $v;
        }
    }
  }

  function in_closure($t, $s) {
    foreach($s as $r) if($t['offs'] == $r['offs'] && $t['rule'] == $r['rule']) return true;
    return false;
    return in_array(serialize($t), array_map('serialize', $s));
  }
  function set_select() {
    foreach($this->rule as $i=>$rule) {
        $y = array($rule[1]);
        if(in_array($y[0], $this->leay)) {
            if($y[0] != '#') {
                $this->select[$i] = $y;
                continue;
            }
        }else $y = $this->first[$rule[1]];
        $x = $this->follow[$rule[0]];
        //SELECT(X->Y)=(FIRST(Y)-{ε})并FOLLOW(X)
        $this->select[$i] = array_unique( array_merge(array_diff($y, array('#')), $x) );
    }
  }
  /**
   * 构造预测分析表
   **/
  function set_forecast() {
    foreach($this->select as $i=>$r) {
        $c = $this->rule[$i][0];
        $v = array_reverse(array_slice($this->rule[$i], 1));
        foreach($r as $k) {
            $this->forecast[$c][$k][] = $v;
        }
    }
    //检查冲突
    foreach($this->forecast as $c=>$r) {
        foreach($r as $k) {
            if(count($k) > 1) {
                $this->ll = 'LL(1)';
            }
        }
    }
  }
</count></count></count></count>

 

PHP code
		

			function ll_start($s) {
    $t = array();
    foreach($this->rule as $rule) if($rule[0] == $rule[1]) $t[] = $rule;
    if($t) {
        foreach($t as $rule) printf('<tr><td colspan="4">%s 存在左递归</td></tr>', preg_replace('/ /', ' → ', join(' ', $rule), 1));
        return;
    }
    $stack = array('#', key($this->forecast));
    $i = 0;
    $step = 1;
    $timeout = 10 * strlen($s);
    while($stack && $i forecast[$r][$s{$i}])) $msg = $r . ' → ' . join(' ', array_reverse($this->forecast[$r][$s{$i}][0]));
        else $msg = '错误';
        printf("<tr>
<td>%d</td>
<td>%s</td>
<td>%s</td>
<td>%s</td>
</tr>", $step++, substr(join('', $stack), -50), substr($s, $i), $msg);
        if($r == $s{$i}) {
            array_pop($stack);
            $i++;
        }elseif(isset($this->forecast[$r][$s{$i}])) {
            array_pop($stack);
            if(current($this->forecast[$r][$s{$i}][0]) != '#')
                $stack = array_merge($stack, $this->forecast[$r][$s{$i}][0]);
        }else break;
    }
  }
  function lr_start($s) {
    $State = array(0); //状态栈
    $Symbol = array('#'); //符号栈
    $i = 0;
    $step = 1;
    $timeout = 10 * strlen($s);
    while($i action[$sp][$ch]) && $this->action[$sp][$ch][0] == 'r0') $msg = 'acc';
        if(isset($this->request[$sp][$ch])) $request = preg_replace('/ /', ' → ', join(' ', $this->rule[$this->request[$sp][$ch]]), 1);
        else $request = 'error';
        printf("<tr>
<th>%d</th>
<td>%s</td>
<td>%s</td>
<td>%s</td>
<td>%s</td>
</tr>", $step++, substr(join('', $State), -50), join('', $Symbol), $msg, $request);

        if(isset($this->action[$sp][$ch])  isset($this->goto[$sp][$ch])) {
            $t = isset($this->action[$sp][$ch]) ? $this->action[$sp][$ch][0] : $this->goto[$sp][$ch];
            $n = substr($t, 1) + 0;
            if($t{0} == 'r') {
                for($j=0; $j<count>rule[$n])-1; $j++) {
                    array_pop($State);
                    array_pop($Symbol);
                }
                if($n == 0) break;
                $c = $Symbol[] = $this->rule[$n][0];
                $State[] = $this->goto[end($State)][$c];
            }elseif($t{0} == 'S') {
                $State[] = $n;
                $Symbol[] = $ch;
                $i++;
            }else ;
        }else break;
    }
  }
  function report($in='') {
    if($in) $in = trim($in, '#') . '#';
    echo '<style>table {font-size:10pt</style>';

    echo '<table>
<tr><td><b>文法</b></td></tr>';
    foreach($this->rule as $rule) {
        echo '<tr><td>';
        echo preg_replace('/ /', ' → ', join(' ', $rule), 1);
        echo '</td></tr>';
    }
    echo '</table>';

    $identifier = $this->identifier;
    echo '<table>
<tr>
<td><b>标识符</b></td>';
    echo '<td>' . join(' ', $identifier) . '</td>
</tr>';
    echo '</table>';

    $leay = array_diff($this->leay, array('#'));
    $leay[] = '#';
    echo '<table>
<tr>
<td><b>终结符</b></td>';
    echo '<td>' . join(' ', $leay) . '</td>
</tr>';
    echo '</table>';

    echo '<table width="60%" border="1">
<tr>
<th>标识符</th>
<th>推出空</th>
<th>FIRST集</th>
<th>FOLLOW集</th>
</tr>';
    foreach($identifier as $ch) {
        echo '<tr>
<td>' . $ch . '</td>';
        echo '<td>' . (in_array('#', $this->first[$ch]) ? 'True' : 'false') . '</td>';
        echo '<td>' . join(' ', $this->first[$ch]) . '</td>';
        echo '<td>' . join(' ', $this->follow[$ch]) . '</td>';
        echo '</tr>';
    }
    echo '</table>';

    echo '<table width="100%">
<tr><td> </td></tr>
<tr><th>' . $this->ll . '文法分析</th></tr>
</table>';

    echo '<table width="60%" border="1">
<tr>
<th>产生式</th>
<th>Select集</th>
</tr>';
    foreach($this->rule as $i=>$rule) {
        echo '<tr>';
        echo '<td>' . preg_replace('/ /', ' → ', join(' ', $rule), 1) . '</td>';
        echo '<td>' . join(' ', $this->select[$i]) . '</td>';
        echo '</tr>';
    }
    echo '</table>';

    $forecast = $this->forecast;
    echo '<table width="60%">
<tr><td><b>预测分析表</b></td></tr>';
    echo '<tr><td><table width="100%" border="1">';
    echo '<tr>
<th> </th>
<th>' . join('</th>
<th>', $leay) . '</th>
</tr>';
    foreach($identifier as $ch) {
        echo '<tr>
<td>' . $ch . '</td>';
        foreach($leay as $v) {
            $s = '';
            if(isset($forecast[$ch][$v])) {
                foreach($forecast[$ch][$v] as $t) {
                    if($s) $s .= '<br>';
                    $s .= $ch . ' → '. join(' ', array_reverse($t));
                }
                if(count($forecast[$ch][$v]) > 1) $s = "<font color="red">$s</font>";
            }else $s .= ' ';
            echo '<td>' . $s . '</td>';
        }
        echo '</tr>
<tr>';        
    }
    echo '</tr>
</table></td></tr>';
    echo '</table>';

    if($in) {
        echo '<table><tr>
<th>测试字符串</th>';
        echo '<td>' . trim($in, '#') . '</td>
</tr></table>';
        echo '<table width="100%" border="1">';
        echo '<tr>
<th>步骤</th>
<th>分析栈</th>
<th>剩余字符</th>
<th>生成式或匹配</th>
</tr>';
        $this->ll_start($in);
        echo '</table>';
    }

    echo '<table width="100%">
<tr><td> </td></tr>';
    echo '<tr>
<th>' . $this->lr . '文法分析</th>
<th></th>
</tr>
</table>';
    echo '<table><tr><th>状态转移表</th></tr></table>';
    echo '<table width="100%" border="1">';
    echo '<tr>
<th rowspan="2">状态</th>
<th colspan="'" . count>Action</th>
<th colspan="'" . count>Goto</th>
<th rowspan="2">DFA</th>
</tr>';
    echo '<tr>
<th>' . join('</th>
<th>', $leay) . '</th>
<th>'. join('</th>
<th>', $identifier) . '</th>
</tr>';
    foreach($this->action as $i=>$item) {
        echo "<tr>
<td>$i</td>";
        foreach($leay as $v) {
            $s = isset($item[$v]) ? join(',', $item[$v]) : ' ';
            if(strpos($s, ',')) $s = "<font color="red">$s</font>";
            echo "<td>$s</td>";
        }
        foreach($identifier as $v) {
            $s = isset($this->goto[$i][$v]) ? $this->goto[$i][$v] : ' ';
            echo "<td>$s</td>";
        }
        echo '<td>' . $this->showDFA($i) .'</td>';
        echo '</tr>';
    }            
    echo '</table>';

    if($in) {
        echo '<table><tr>
<th>测试字符串</th>';
        echo '<td>' . trim($in, '#') . '</td>
</tr></table>';
        echo '<table width="100%" border="1">';
        echo '<tr>
<th>步骤</th>
<th>状态栈</th>
<th>符号栈</th>
<th>剩余字符</th>
<th>生成式</th>
</tr>';
        $this->lr_start($in);
        echo '</table>';
    }
  }
  function showDFA($i) {
    $res = array();
    foreach($this->closure[$i] as $dfa) {
        $rule = $this->rule[$dfa['rule']];
        array_splice($rule, $dfa['offs'], 0, '·');
        array_splice($rule, 1, 0, '→');
        if(isset($dfa['target'])) $rule[] = " [$dfa[target]]";
        $res[] = join('', $rule);
    }
    return join('; ', $res);
  }
}


for($i=1; $i$i";
$n = current($_GET) or $n = 1;
$S = '';
include "$n.txt";

$p = new grammar($G);
$p->report($S);
</count>

ttrie.php

PHP code
		<?php class TTrie {
  protected $buffer = array();
  protected $dict = array( array() );
  protected $input = 0; //字符串当前偏移
  protected $backtracking = 0; //字符串回溯位置
  public $debug = 0;
  public $savematch = 1;

  function set($word, $action=&#39;&#39;) {
    if(is_array($word)) {
        foreach($word as $k=>$v) $this->set($k, $v);
        return;
    }
    $p = count($this->dict);
    $cur = 0; //当前节点号
    foreach(str_split($word) as $c) {
        if (isset($this->dict[$cur][$c])) { //已存在就下移
            $cur = $this->dict[$cur][$c];
            continue;
        }
        $this->dict[$p]= array(); //创建新节点
        $this->dict[$cur][$c] = $p; //在父节点记录子节点号
        $cur = $p; //把当前节点设为新插入的
        $p++;
    }
    $this->dict[$cur]['acc'] = $action; //一个词结束,标记叶子节点
  }

  function match($s) {
    $ret = array();
    $cur = 0; //当前节点,初始为根节点
    $i =& $this->input; //字符串当前偏移
    $p =& $this->backtracking; //字符串回溯位置
    $s .= "\0"; //附加结束符
    $len = strlen($s);
    $buf = '';
    while($i dict[$cur][$c])) { //如果存在
            $cur = $this->dict[$cur][$c]; //转到对应的位置
            if(isset($this->dict[$cur][$s[$i+1]])) {//检查下一个字符是否也能匹配,长度优先
                $i++;
                continue;
            }
            if(isset($this->dict[$cur]['acc'])) { //是叶子节点,单词匹配!
                if($buf) {
                    $this->buffer[] = $buf;
                    $buf = '';
                }
                if($this->savematch) $this->buffer[] = substr($s, $p, $i - $p + 1); //取出匹配位置和匹配的词

                $ar = explode(',', $this->dict[$cur]['acc']);
                call_user_func_array( array($this, array_shift($ar)), $ar );

                $p = $i + 1; //设置下一个回溯位置
                $cur = 0; //重置当前节点为根节点
            }
        } else { //不匹配
            $buf .= $s{$p}; //substr($s, $p, $i - $p + 1); //保存未匹配位置和未匹配的内容
            $cur = 0; //重置当前节点为根节点
            $i = $p; //把当前偏移设为回溯位置
            $p = $i + 1; //设置下一个回溯位置
        }
        $i++; //下一个字符
    }
    if(trim($buf, "\0")) $this->buffer[] = trim($buf, "\0");
  }

  function __call($method, $param) {
    if($this->debug) printf("偏移:%d 回溯:%d\n", $this->input, $this->backtracking);

  }
}



声明:
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn