Heim  >  Artikel  >  php教程  >  分享我写的PHP文法分析的源码

分享我写的PHP文法分析的源码

WBOY
WBOYOriginal
2016-06-21 08:51:171001Durchsuche

最近尝试做了文法分析的东东,问题较多。
请提建议。代码放不下,分两页。

下载地址 http://download.csdn.net/detail/xuzuning/4529066


PHP code
		include 'ttrie.php';

class Rule extends TTrie {
  public $rule = array();
  public $savematch = 0;

  function __construct($s='') {
    $this->set( array(
        ' ' => 'Separated',
        "\r\n" => 'set_rule',
        "\n" => 'set_rule',
        "\t" => 'Separated',
        '->' => 'Separated',
        '→' => 'Separated',
        '' => 'set_parallel_rule',
        ));
    $this->match($s);
    if($this->rule[0][0] == $this->rule[0][1]) {
        if(count($this->rule[0]) == 2) $this->rule[0][0] .= "'";
        else array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0]));
    }else {
        $c = $this->rule[0][0];
        $n = 0;
        foreach($this->rule as $r) if($r[0] == $c) $n++;
        if($n > 1) array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0]));
    }
  }
  function Separated() {
  }
  function set_rule() {
    $this->rule[] = $this->buffer;
    $this->buffer = array();
  }
  function set_parallel_rule() {
    $t = $this->buffer[0];
    $this->set_rule();
    $this->buffer[] = $t;
  }
}


class Grammar {
  var $closure = array();
  var $first = array();
  var $follow = array();
  var $rule = array();
  var $identifier = array();
  var $leay = array();
  var $forecast = array();
  var $stack = array();
  var $ll = 'LL(0)';
  var $lr = 'LR(0)';

  function __construct($s='') {
    $p = new Rule($s);
    $this->rule = $p->rule;
    $this->set_grammar();
  }

  function set_grammar() {
    foreach($this->rule as $rule) {
        if(! in_array($rule[0], $this->identifier)) $this->identifier[] = $rule[0];
    }
    foreach($this->rule as $rule) {
        foreach($rule as $v)
            if(! in_array($v, $this->identifier) && ! in_array($v, $this->leay))
                $this->leay[] = $v;
    }
    $this->set_first();
    $this->set_follow();
    $this->set_closure();
    $this->set_select();
    $this->set_forecast();
  }
  function set_first() {
    foreach($this->rule as $rule) $this->first[$rule[0]] = array();
    //直接收取 形如U->a…的产生式(其中a是终结符),把a收入到First(U)中
    foreach($this->rule as $v) {
        if(in_array($v[1], $this->leay)) $this->first[$v[0]][] = $v[1];
    }
    //反复传递 形入U->P1P2P3…Pn的产生式(其中P是非终结符),应先把First(P1)中的全部内容传送到First(U)中,如果P1中有ε,把First(P2)中的内容传送到First(U)中,类推直到Pi中无ε
    do {
        $t = serialize($this->first);
        foreach($this->rule as $rule) {
            for($i=1; $i<count if>identifier)) {
                    $this->first[$rule[0]] = array_unique(array_merge($this->first[$rule[0]], $this->first[$v]));
                    if(! in_array('#', $this->first[$v])) break;
                }else break;
            }
        }
    }while($t != serialize($this->first));
  }
  function set_follow() {
    foreach($this->rule as $rule) $this->follow[$rule[0]] = array();
    //直接收取 形如 …Ua… 的,把a直接收入到Follow(U)中
    foreach($this->rule as $rule) {
        for($i=1; $i<count if>identifier) && in_array($rule[$i+1], $this->leay))
                $this->follow[$rule[$i]][] = $rule[$i+1];
        }
        if(in_array($rule[$i], $this->identifier)) $this->follow[$rule[$i]][] = '#';
    }
    foreach($this->follow as &$v) if(! $v) $v[] = '#';
    //直接收取 形如 …UP…(P是非终结符)的,把First(P)中非ε收入到Follow(U)中
    foreach($this->rule as $rule) {
        for($i=1; $i<count if>identifier) && in_array($rule[$i+1], $this->identifier)) {
                $this->follow[$rule[$i]] = array_unique(array_merge($this->follow[$rule[$i]], array_diff($this->first[$rule[$i+1]], array('#'))));
            }
        }
    }
    //反复传递 形如U->aP的(P是非终结符)或U->aPQ(P,Q为非终结符且Q中含ε),应把Follow(U)中的全部内容传送到Follow(P)中
    do {
        $t = serialize($this->follow);
        foreach($this->rule as $rule) {
            $s = $rule[0];
            $d = end($rule);
            if(in_array($d, $this->leay)) continue;
            $p = prev($rule);
            if(in_array($p, $this->leay)) $this->follow[$d] = array_unique(array_merge($this->follow[$d], $this->follow[$s]));
            elseif(in_array('#', $this->follow[$d])) $this->follow[$p] = array_unique(array_merge($this->follow[$p], $this->follow[$s]));
        }
    }while($t != serialize($this->follow));
  }
  function set_closure() {
    $shift = array();
    $this->closure[0][] = array('offs' => 1, 'rule' => 0);
    for($i=0 ; $i closure); $i++) {
        $cnt = count($this->closure);
        //构造闭包 closure
        $ex = array();
        $j = 0;
        $tmp = array();
        do {
            $size = count($this->closure[$i]);
            for($j=0; $j<count>closure[$i]); $j++) {
                $dfa = $this->closure[$i][$j];
                $rule = $this->rule[$dfa['rule']];
                if(isset($rule[$dfa['offs']])) {
                    $ch = $ex[] = $rule[$dfa['offs']];
                }
                foreach($this->rule as $r=>$rule) {
                    if(in_array($rule[0], $ex)) {
                        $t = array('offs' => 1, 'rule' => $r);
                        if(!isset($tmp[$r][1])) $this->closure[$i][] = $t;
                        $tmp[$r][1] = 1;
                    }
                }
            }
        }while(count($this->closure[$i]) != $size); //直到不再增大

        //判断状态转向 go
        $out = array();
        foreach($this->closure[$i] as $k=>$dfa) {
            $rule = $this->rule[$dfa['rule']];
            if(isset($rule[$dfa['offs']])) {
                $t = "$dfa[rule],$dfa[offs]";
                $ch = $rule[$dfa['offs']];
                $this->closure[$i][$k]['char'] = $ch;
                if(isset($out[$ch])) $shift[$t] = $out[$ch];
                if(isset($shift[$t])) {
                    $this->closure[$i][$k]['target'] = $shift[$t];
                    $dfa['offs']++;
                    if(!$this->in_closure($dfa, $this->closure[$shift[$t]])) $this->closure[$shift[$t]][] = $dfa;

                } else {
                    $cnt = count($this->closure);
                    $this->closure[$i][$k]['target'] = $cnt;
                    $shift[$t] = $cnt;
                    $dfa['offs']++;
                    $this->closure[count($this->closure)][] = $dfa;
                    $out[$ch] = $cnt;
                }
            }
        }

        //构造状态转换表
        foreach($this->closure[$i] as $k=>$dfa) {
            if(isset($dfa['target'])) {
                $v = $dfa['char'];
                if(in_array($v, $this->identifier)) $this->goto[$i][$v] = $dfa['target'];
                else {
                    $this->action[$i][$v][] = "S$dfa[target]";
                    $this->request[$i][$v] = $dfa['rule'];
                }
            } else {
                $ch = $this->rule[$dfa['rule']][0];
                foreach($this->follow[$ch] as $v) {
                    $this->action[$i][$v][] = "r$dfa[rule]";
                    $this->request[$i][$v] = $dfa['rule'];
                }
            }
        }
        foreach($this->action[$i] as $c=>$v) {
            $v = array_unique($v);
            if(count($v) > 1) $this->lr = 'SLR(1)';
            $this->action[$i][$c] = $v;
        }
    }
  }

  function in_closure($t, $s) {
    foreach($s as $r) if($t['offs'] == $r['offs'] && $t['rule'] == $r['rule']) return true;
    return false;
    return in_array(serialize($t), array_map('serialize', $s));
  }
  function set_select() {
    foreach($this->rule as $i=>$rule) {
        $y = array($rule[1]);
        if(in_array($y[0], $this->leay)) {
            if($y[0] != '#') {
                $this->select[$i] = $y;
                continue;
            }
        }else $y = $this->first[$rule[1]];
        $x = $this->follow[$rule[0]];
        //SELECT(X->Y)=(FIRST(Y)-{ε})并FOLLOW(X)
        $this->select[$i] = array_unique( array_merge(array_diff($y, array('#')), $x) );
    }
  }
  /**
   * 构造预测分析表
   **/
  function set_forecast() {
    foreach($this->select as $i=>$r) {
        $c = $this->rule[$i][0];
        $v = array_reverse(array_slice($this->rule[$i], 1));
        foreach($r as $k) {
            $this->forecast[$c][$k][] = $v;
        }
    }
    //检查冲突
    foreach($this->forecast as $c=>$r) {
        foreach($r as $k) {
            if(count($k) > 1) {
                $this->ll = 'LL(1)';
            }
        }
    }
  }
</count></count></count></count>

 

PHP code
		

			function ll_start($s) {
    $t = array();
    foreach($this->rule as $rule) if($rule[0] == $rule[1]) $t[] = $rule;
    if($t) {
        foreach($t as $rule) printf('<tr><td colspan="4">%s 存在左递归</td></tr>', preg_replace('/ /', ' → ', join(' ', $rule), 1));
        return;
    }
    $stack = array('#', key($this->forecast));
    $i = 0;
    $step = 1;
    $timeout = 10 * strlen($s);
    while($stack && $i forecast[$r][$s{$i}])) $msg = $r . ' → ' . join(' ', array_reverse($this->forecast[$r][$s{$i}][0]));
        else $msg = '错误';
        printf("<tr>
<td>%d</td>
<td>%s</td>
<td>%s</td>
<td>%s</td>
</tr>", $step++, substr(join('', $stack), -50), substr($s, $i), $msg);
        if($r == $s{$i}) {
            array_pop($stack);
            $i++;
        }elseif(isset($this->forecast[$r][$s{$i}])) {
            array_pop($stack);
            if(current($this->forecast[$r][$s{$i}][0]) != '#')
                $stack = array_merge($stack, $this->forecast[$r][$s{$i}][0]);
        }else break;
    }
  }
  function lr_start($s) {
    $State = array(0); //状态栈
    $Symbol = array('#'); //符号栈
    $i = 0;
    $step = 1;
    $timeout = 10 * strlen($s);
    while($i action[$sp][$ch]) && $this->action[$sp][$ch][0] == 'r0') $msg = 'acc';
        if(isset($this->request[$sp][$ch])) $request = preg_replace('/ /', ' → ', join(' ', $this->rule[$this->request[$sp][$ch]]), 1);
        else $request = 'error';
        printf("<tr>
<th>%d</th>
<td>%s</td>
<td>%s</td>
<td>%s</td>
<td>%s</td>
</tr>", $step++, substr(join('', $State), -50), join('', $Symbol), $msg, $request);

        if(isset($this->action[$sp][$ch])  isset($this->goto[$sp][$ch])) {
            $t = isset($this->action[$sp][$ch]) ? $this->action[$sp][$ch][0] : $this->goto[$sp][$ch];
            $n = substr($t, 1) + 0;
            if($t{0} == 'r') {
                for($j=0; $j<count>rule[$n])-1; $j++) {
                    array_pop($State);
                    array_pop($Symbol);
                }
                if($n == 0) break;
                $c = $Symbol[] = $this->rule[$n][0];
                $State[] = $this->goto[end($State)][$c];
            }elseif($t{0} == 'S') {
                $State[] = $n;
                $Symbol[] = $ch;
                $i++;
            }else ;
        }else break;
    }
  }
  function report($in='') {
    if($in) $in = trim($in, '#') . '#';
    echo '<style>table {font-size:10pt</style>';

    echo '<table>
<tr><td><b>文法</b></td></tr>';
    foreach($this->rule as $rule) {
        echo '<tr><td>';
        echo preg_replace('/ /', ' → ', join(' ', $rule), 1);
        echo '</td></tr>';
    }
    echo '</table>';

    $identifier = $this->identifier;
    echo '<table>
<tr>
<td><b>标识符</b></td>';
    echo '<td>' . join(' ', $identifier) . '</td>
</tr>';
    echo '</table>';

    $leay = array_diff($this->leay, array('#'));
    $leay[] = '#';
    echo '<table>
<tr>
<td><b>终结符</b></td>';
    echo '<td>' . join(' ', $leay) . '</td>
</tr>';
    echo '</table>';

    echo '<table width="60%" border="1">
<tr>
<th>标识符</th>
<th>推出空</th>
<th>FIRST集</th>
<th>FOLLOW集</th>
</tr>';
    foreach($identifier as $ch) {
        echo '<tr>
<td>' . $ch . '</td>';
        echo '<td>' . (in_array('#', $this->first[$ch]) ? 'True' : 'false') . '</td>';
        echo '<td>' . join(' ', $this->first[$ch]) . '</td>';
        echo '<td>' . join(' ', $this->follow[$ch]) . '</td>';
        echo '</tr>';
    }
    echo '</table>';

    echo '<table width="100%">
<tr><td> </td></tr>
<tr><th>' . $this->ll . '文法分析</th></tr>
</table>';

    echo '<table width="60%" border="1">
<tr>
<th>产生式</th>
<th>Select集</th>
</tr>';
    foreach($this->rule as $i=>$rule) {
        echo '<tr>';
        echo '<td>' . preg_replace('/ /', ' → ', join(' ', $rule), 1) . '</td>';
        echo '<td>' . join(' ', $this->select[$i]) . '</td>';
        echo '</tr>';
    }
    echo '</table>';

    $forecast = $this->forecast;
    echo '<table width="60%">
<tr><td><b>预测分析表</b></td></tr>';
    echo '<tr><td><table width="100%" border="1">';
    echo '<tr>
<th> </th>
<th>' . join('</th>
<th>', $leay) . '</th>
</tr>';
    foreach($identifier as $ch) {
        echo '<tr>
<td>' . $ch . '</td>';
        foreach($leay as $v) {
            $s = '';
            if(isset($forecast[$ch][$v])) {
                foreach($forecast[$ch][$v] as $t) {
                    if($s) $s .= '<br>';
                    $s .= $ch . ' → '. join(' ', array_reverse($t));
                }
                if(count($forecast[$ch][$v]) > 1) $s = "<font color="red">$s</font>";
            }else $s .= ' ';
            echo '<td>' . $s . '</td>';
        }
        echo '</tr>
<tr>';        
    }
    echo '</tr>
</table></td></tr>';
    echo '</table>';

    if($in) {
        echo '<table><tr>
<th>测试字符串</th>';
        echo '<td>' . trim($in, '#') . '</td>
</tr></table>';
        echo '<table width="100%" border="1">';
        echo '<tr>
<th>步骤</th>
<th>分析栈</th>
<th>剩余字符</th>
<th>生成式或匹配</th>
</tr>';
        $this->ll_start($in);
        echo '</table>';
    }

    echo '<table width="100%">
<tr><td> </td></tr>';
    echo '<tr>
<th>' . $this->lr . '文法分析</th>
<th></th>
</tr>
</table>';
    echo '<table><tr><th>状态转移表</th></tr></table>';
    echo '<table width="100%" border="1">';
    echo '<tr>
<th rowspan="2">状态</th>
<th colspan="'" . count>Action</th>
<th colspan="'" . count>Goto</th>
<th rowspan="2">DFA</th>
</tr>';
    echo '<tr>
<th>' . join('</th>
<th>', $leay) . '</th>
<th>'. join('</th>
<th>', $identifier) . '</th>
</tr>';
    foreach($this->action as $i=>$item) {
        echo "<tr>
<td>$i</td>";
        foreach($leay as $v) {
            $s = isset($item[$v]) ? join(',', $item[$v]) : ' ';
            if(strpos($s, ',')) $s = "<font color="red">$s</font>";
            echo "<td>$s</td>";
        }
        foreach($identifier as $v) {
            $s = isset($this->goto[$i][$v]) ? $this->goto[$i][$v] : ' ';
            echo "<td>$s</td>";
        }
        echo '<td>' . $this->showDFA($i) .'</td>';
        echo '</tr>';
    }            
    echo '</table>';

    if($in) {
        echo '<table><tr>
<th>测试字符串</th>';
        echo '<td>' . trim($in, '#') . '</td>
</tr></table>';
        echo '<table width="100%" border="1">';
        echo '<tr>
<th>步骤</th>
<th>状态栈</th>
<th>符号栈</th>
<th>剩余字符</th>
<th>生成式</th>
</tr>';
        $this->lr_start($in);
        echo '</table>';
    }
  }
  function showDFA($i) {
    $res = array();
    foreach($this->closure[$i] as $dfa) {
        $rule = $this->rule[$dfa['rule']];
        array_splice($rule, $dfa['offs'], 0, '·');
        array_splice($rule, 1, 0, '→');
        if(isset($dfa['target'])) $rule[] = " [$dfa[target]]";
        $res[] = join('', $rule);
    }
    return join('; ', $res);
  }
}


for($i=1; $i$i";
$n = current($_GET) or $n = 1;
$S = '';
include "$n.txt";

$p = new grammar($G);
$p->report($S);
</count>

ttrie.php

PHP code
		<?php class TTrie {
  protected $buffer = array();
  protected $dict = array( array() );
  protected $input = 0; //字符串当前偏移
  protected $backtracking = 0; //字符串回溯位置
  public $debug = 0;
  public $savematch = 1;

  function set($word, $action=&#39;&#39;) {
    if(is_array($word)) {
        foreach($word as $k=>$v) $this->set($k, $v);
        return;
    }
    $p = count($this->dict);
    $cur = 0; //当前节点号
    foreach(str_split($word) as $c) {
        if (isset($this->dict[$cur][$c])) { //已存在就下移
            $cur = $this->dict[$cur][$c];
            continue;
        }
        $this->dict[$p]= array(); //创建新节点
        $this->dict[$cur][$c] = $p; //在父节点记录子节点号
        $cur = $p; //把当前节点设为新插入的
        $p++;
    }
    $this->dict[$cur]['acc'] = $action; //一个词结束,标记叶子节点
  }

  function match($s) {
    $ret = array();
    $cur = 0; //当前节点,初始为根节点
    $i =& $this->input; //字符串当前偏移
    $p =& $this->backtracking; //字符串回溯位置
    $s .= "\0"; //附加结束符
    $len = strlen($s);
    $buf = '';
    while($i dict[$cur][$c])) { //如果存在
            $cur = $this->dict[$cur][$c]; //转到对应的位置
            if(isset($this->dict[$cur][$s[$i+1]])) {//检查下一个字符是否也能匹配,长度优先
                $i++;
                continue;
            }
            if(isset($this->dict[$cur]['acc'])) { //是叶子节点,单词匹配!
                if($buf) {
                    $this->buffer[] = $buf;
                    $buf = '';
                }
                if($this->savematch) $this->buffer[] = substr($s, $p, $i - $p + 1); //取出匹配位置和匹配的词

                $ar = explode(',', $this->dict[$cur]['acc']);
                call_user_func_array( array($this, array_shift($ar)), $ar );

                $p = $i + 1; //设置下一个回溯位置
                $cur = 0; //重置当前节点为根节点
            }
        } else { //不匹配
            $buf .= $s{$p}; //substr($s, $p, $i - $p + 1); //保存未匹配位置和未匹配的内容
            $cur = 0; //重置当前节点为根节点
            $i = $p; //把当前偏移设为回溯位置
            $p = $i + 1; //设置下一个回溯位置
        }
        $i++; //下一个字符
    }
    if(trim($buf, "\0")) $this->buffer[] = trim($buf, "\0");
  }

  function __call($method, $param) {
    if($this->debug) printf("偏移:%d 回溯:%d\n", $this->input, $this->backtracking);

  }
}



Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn