最近尝试做了文法分析的东东,问题较多。
请提建议。代码放不下,分两页。
下载地址 http://download.csdn.net/detail/xuzuning/4529066
include 'ttrie.php'; class Rule extends TTrie { public $rule = array(); public $savematch = 0; function __construct($s='') { $this->set( array( ' ' => 'Separated', "\r\n" => 'set_rule', "\n" => 'set_rule', "\t" => 'Separated', '->' => 'Separated', '→' => 'Separated', '' => 'set_parallel_rule', )); $this->match($s); if($this->rule[0][0] == $this->rule[0][1]) { if(count($this->rule[0]) == 2) $this->rule[0][0] .= "'"; else array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0])); }else { $c = $this->rule[0][0]; $n = 0; foreach($this->rule as $r) if($r[0] == $c) $n++; if($n > 1) array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0])); } } function Separated() { } function set_rule() { $this->rule[] = $this->buffer; $this->buffer = array(); } function set_parallel_rule() { $t = $this->buffer[0]; $this->set_rule(); $this->buffer[] = $t; } } class Grammar { var $closure = array(); var $first = array(); var $follow = array(); var $rule = array(); var $identifier = array(); var $leay = array(); var $forecast = array(); var $stack = array(); var $ll = 'LL(0)'; var $lr = 'LR(0)'; function __construct($s='') { $p = new Rule($s); $this->rule = $p->rule; $this->set_grammar(); } function set_grammar() { foreach($this->rule as $rule) { if(! in_array($rule[0], $this->identifier)) $this->identifier[] = $rule[0]; } foreach($this->rule as $rule) { foreach($rule as $v) if(! in_array($v, $this->identifier) && ! in_array($v, $this->leay)) $this->leay[] = $v; } $this->set_first(); $this->set_follow(); $this->set_closure(); $this->set_select(); $this->set_forecast(); } function set_first() { foreach($this->rule as $rule) $this->first[$rule[0]] = array(); //直接收取 形如U->a…的产生式(其中a是终结符),把a收入到First(U)中 foreach($this->rule as $v) { if(in_array($v[1], $this->leay)) $this->first[$v[0]][] = $v[1]; } //反复传递 形入U->P1P2P3…Pn的产生式(其中P是非终结符),应先把First(P1)中的全部内容传送到First(U)中,如果P1中有ε,把First(P2)中的内容传送到First(U)中,类推直到Pi中无ε do { $t = serialize($this->first); foreach($this->rule as $rule) { for($i=1; $i<count if>identifier)) { $this->first[$rule[0]] = array_unique(array_merge($this->first[$rule[0]], $this->first[$v])); if(! in_array('#', $this->first[$v])) break; }else break; } } }while($t != serialize($this->first)); } function set_follow() { foreach($this->rule as $rule) $this->follow[$rule[0]] = array(); //直接收取 形如 …Ua… 的,把a直接收入到Follow(U)中 foreach($this->rule as $rule) { for($i=1; $i<count if>identifier) && in_array($rule[$i+1], $this->leay)) $this->follow[$rule[$i]][] = $rule[$i+1]; } if(in_array($rule[$i], $this->identifier)) $this->follow[$rule[$i]][] = '#'; } foreach($this->follow as &$v) if(! $v) $v[] = '#'; //直接收取 形如 …UP…(P是非终结符)的,把First(P)中非ε收入到Follow(U)中 foreach($this->rule as $rule) { for($i=1; $i<count if>identifier) && in_array($rule[$i+1], $this->identifier)) { $this->follow[$rule[$i]] = array_unique(array_merge($this->follow[$rule[$i]], array_diff($this->first[$rule[$i+1]], array('#')))); } } } //反复传递 形如U->aP的(P是非终结符)或U->aPQ(P,Q为非终结符且Q中含ε),应把Follow(U)中的全部内容传送到Follow(P)中 do { $t = serialize($this->follow); foreach($this->rule as $rule) { $s = $rule[0]; $d = end($rule); if(in_array($d, $this->leay)) continue; $p = prev($rule); if(in_array($p, $this->leay)) $this->follow[$d] = array_unique(array_merge($this->follow[$d], $this->follow[$s])); elseif(in_array('#', $this->follow[$d])) $this->follow[$p] = array_unique(array_merge($this->follow[$p], $this->follow[$s])); } }while($t != serialize($this->follow)); } function set_closure() { $shift = array(); $this->closure[0][] = array('offs' => 1, 'rule' => 0); for($i=0 ; $i closure); $i++) { $cnt = count($this->closure); //构造闭包 closure $ex = array(); $j = 0; $tmp = array(); do { $size = count($this->closure[$i]); for($j=0; $j<count>closure[$i]); $j++) { $dfa = $this->closure[$i][$j]; $rule = $this->rule[$dfa['rule']]; if(isset($rule[$dfa['offs']])) { $ch = $ex[] = $rule[$dfa['offs']]; } foreach($this->rule as $r=>$rule) { if(in_array($rule[0], $ex)) { $t = array('offs' => 1, 'rule' => $r); if(!isset($tmp[$r][1])) $this->closure[$i][] = $t; $tmp[$r][1] = 1; } } } }while(count($this->closure[$i]) != $size); //直到不再增大 //判断状态转向 go $out = array(); foreach($this->closure[$i] as $k=>$dfa) { $rule = $this->rule[$dfa['rule']]; if(isset($rule[$dfa['offs']])) { $t = "$dfa[rule],$dfa[offs]"; $ch = $rule[$dfa['offs']]; $this->closure[$i][$k]['char'] = $ch; if(isset($out[$ch])) $shift[$t] = $out[$ch]; if(isset($shift[$t])) { $this->closure[$i][$k]['target'] = $shift[$t]; $dfa['offs']++; if(!$this->in_closure($dfa, $this->closure[$shift[$t]])) $this->closure[$shift[$t]][] = $dfa; } else { $cnt = count($this->closure); $this->closure[$i][$k]['target'] = $cnt; $shift[$t] = $cnt; $dfa['offs']++; $this->closure[count($this->closure)][] = $dfa; $out[$ch] = $cnt; } } } //构造状态转换表 foreach($this->closure[$i] as $k=>$dfa) { if(isset($dfa['target'])) { $v = $dfa['char']; if(in_array($v, $this->identifier)) $this->goto[$i][$v] = $dfa['target']; else { $this->action[$i][$v][] = "S$dfa[target]"; $this->request[$i][$v] = $dfa['rule']; } } else { $ch = $this->rule[$dfa['rule']][0]; foreach($this->follow[$ch] as $v) { $this->action[$i][$v][] = "r$dfa[rule]"; $this->request[$i][$v] = $dfa['rule']; } } } foreach($this->action[$i] as $c=>$v) { $v = array_unique($v); if(count($v) > 1) $this->lr = 'SLR(1)'; $this->action[$i][$c] = $v; } } } function in_closure($t, $s) { foreach($s as $r) if($t['offs'] == $r['offs'] && $t['rule'] == $r['rule']) return true; return false; return in_array(serialize($t), array_map('serialize', $s)); } function set_select() { foreach($this->rule as $i=>$rule) { $y = array($rule[1]); if(in_array($y[0], $this->leay)) { if($y[0] != '#') { $this->select[$i] = $y; continue; } }else $y = $this->first[$rule[1]]; $x = $this->follow[$rule[0]]; //SELECT(X->Y)=(FIRST(Y)-{ε})并FOLLOW(X) $this->select[$i] = array_unique( array_merge(array_diff($y, array('#')), $x) ); } } /** * 构造预测分析表 **/ function set_forecast() { foreach($this->select as $i=>$r) { $c = $this->rule[$i][0]; $v = array_reverse(array_slice($this->rule[$i], 1)); foreach($r as $k) { $this->forecast[$c][$k][] = $v; } } //检查冲突 foreach($this->forecast as $c=>$r) { foreach($r as $k) { if(count($k) > 1) { $this->ll = 'LL(1)'; } } } } </count></count></count></count>
续
function ll_start($s) { $t = array(); foreach($this->rule as $rule) if($rule[0] == $rule[1]) $t[] = $rule; if($t) { foreach($t as $rule) printf('<tr><td colspan="4">%s 存在左递归</td></tr>', preg_replace('/ /', ' → ', join(' ', $rule), 1)); return; } $stack = array('#', key($this->forecast)); $i = 0; $step = 1; $timeout = 10 * strlen($s); while($stack && $i forecast[$r][$s{$i}])) $msg = $r . ' → ' . join(' ', array_reverse($this->forecast[$r][$s{$i}][0])); else $msg = '错误'; printf("<tr> <td>%d</td> <td>%s</td> <td>%s</td> <td>%s</td> </tr>", $step++, substr(join('', $stack), -50), substr($s, $i), $msg); if($r == $s{$i}) { array_pop($stack); $i++; }elseif(isset($this->forecast[$r][$s{$i}])) { array_pop($stack); if(current($this->forecast[$r][$s{$i}][0]) != '#') $stack = array_merge($stack, $this->forecast[$r][$s{$i}][0]); }else break; } } function lr_start($s) { $State = array(0); //状态栈 $Symbol = array('#'); //符号栈 $i = 0; $step = 1; $timeout = 10 * strlen($s); while($i action[$sp][$ch]) && $this->action[$sp][$ch][0] == 'r0') $msg = 'acc'; if(isset($this->request[$sp][$ch])) $request = preg_replace('/ /', ' → ', join(' ', $this->rule[$this->request[$sp][$ch]]), 1); else $request = 'error'; printf("<tr> <th>%d</th> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> </tr>", $step++, substr(join('', $State), -50), join('', $Symbol), $msg, $request); if(isset($this->action[$sp][$ch]) isset($this->goto[$sp][$ch])) { $t = isset($this->action[$sp][$ch]) ? $this->action[$sp][$ch][0] : $this->goto[$sp][$ch]; $n = substr($t, 1) + 0; if($t{0} == 'r') { for($j=0; $j<count>rule[$n])-1; $j++) { array_pop($State); array_pop($Symbol); } if($n == 0) break; $c = $Symbol[] = $this->rule[$n][0]; $State[] = $this->goto[end($State)][$c]; }elseif($t{0} == 'S') { $State[] = $n; $Symbol[] = $ch; $i++; }else ; }else break; } } function report($in='') { if($in) $in = trim($in, '#') . '#'; echo '<style>table {font-size:10pt</style>'; echo '<table> <tr><td><b>文法</b></td></tr>'; foreach($this->rule as $rule) { echo '<tr><td>'; echo preg_replace('/ /', ' → ', join(' ', $rule), 1); echo '</td></tr>'; } echo '</table>'; $identifier = $this->identifier; echo '<table> <tr> <td><b>标识符</b></td>'; echo '<td>' . join(' ', $identifier) . '</td> </tr>'; echo '</table>'; $leay = array_diff($this->leay, array('#')); $leay[] = '#'; echo '<table> <tr> <td><b>终结符</b></td>'; echo '<td>' . join(' ', $leay) . '</td> </tr>'; echo '</table>'; echo '<table width="60%" border="1"> <tr> <th>标识符</th> <th>推出空</th> <th>FIRST集</th> <th>FOLLOW集</th> </tr>'; foreach($identifier as $ch) { echo '<tr> <td>' . $ch . '</td>'; echo '<td>' . (in_array('#', $this->first[$ch]) ? 'True' : 'false') . '</td>'; echo '<td>' . join(' ', $this->first[$ch]) . '</td>'; echo '<td>' . join(' ', $this->follow[$ch]) . '</td>'; echo '</tr>'; } echo '</table>'; echo '<table width="100%"> <tr><td> </td></tr> <tr><th>' . $this->ll . '文法分析</th></tr> </table>'; echo '<table width="60%" border="1"> <tr> <th>产生式</th> <th>Select集</th> </tr>'; foreach($this->rule as $i=>$rule) { echo '<tr>'; echo '<td>' . preg_replace('/ /', ' → ', join(' ', $rule), 1) . '</td>'; echo '<td>' . join(' ', $this->select[$i]) . '</td>'; echo '</tr>'; } echo '</table>'; $forecast = $this->forecast; echo '<table width="60%"> <tr><td><b>预测分析表</b></td></tr>'; echo '<tr><td><table width="100%" border="1">'; echo '<tr> <th> </th> <th>' . join('</th> <th>', $leay) . '</th> </tr>'; foreach($identifier as $ch) { echo '<tr> <td>' . $ch . '</td>'; foreach($leay as $v) { $s = ''; if(isset($forecast[$ch][$v])) { foreach($forecast[$ch][$v] as $t) { if($s) $s .= '<br>'; $s .= $ch . ' → '. join(' ', array_reverse($t)); } if(count($forecast[$ch][$v]) > 1) $s = "<font color="red">$s</font>"; }else $s .= ' '; echo '<td>' . $s . '</td>'; } echo '</tr> <tr>'; } echo '</tr> </table></td></tr>'; echo '</table>'; if($in) { echo '<table><tr> <th>测试字符串</th>'; echo '<td>' . trim($in, '#') . '</td> </tr></table>'; echo '<table width="100%" border="1">'; echo '<tr> <th>步骤</th> <th>分析栈</th> <th>剩余字符</th> <th>生成式或匹配</th> </tr>'; $this->ll_start($in); echo '</table>'; } echo '<table width="100%"> <tr><td> </td></tr>'; echo '<tr> <th>' . $this->lr . '文法分析</th> <th></th> </tr> </table>'; echo '<table><tr><th>状态转移表</th></tr></table>'; echo '<table width="100%" border="1">'; echo '<tr> <th rowspan="2">状态</th> <th colspan="'" . count>Action</th> <th colspan="'" . count>Goto</th> <th rowspan="2">DFA</th> </tr>'; echo '<tr> <th>' . join('</th> <th>', $leay) . '</th> <th>'. join('</th> <th>', $identifier) . '</th> </tr>'; foreach($this->action as $i=>$item) { echo "<tr> <td>$i</td>"; foreach($leay as $v) { $s = isset($item[$v]) ? join(',', $item[$v]) : ' '; if(strpos($s, ',')) $s = "<font color="red">$s</font>"; echo "<td>$s</td>"; } foreach($identifier as $v) { $s = isset($this->goto[$i][$v]) ? $this->goto[$i][$v] : ' '; echo "<td>$s</td>"; } echo '<td>' . $this->showDFA($i) .'</td>'; echo '</tr>'; } echo '</table>'; if($in) { echo '<table><tr> <th>测试字符串</th>'; echo '<td>' . trim($in, '#') . '</td> </tr></table>'; echo '<table width="100%" border="1">'; echo '<tr> <th>步骤</th> <th>状态栈</th> <th>符号栈</th> <th>剩余字符</th> <th>生成式</th> </tr>'; $this->lr_start($in); echo '</table>'; } } function showDFA($i) { $res = array(); foreach($this->closure[$i] as $dfa) { $rule = $this->rule[$dfa['rule']]; array_splice($rule, $dfa['offs'], 0, '·'); array_splice($rule, 1, 0, '→'); if(isset($dfa['target'])) $rule[] = " [$dfa[target]]"; $res[] = join('', $rule); } return join('; ', $res); } } for($i=1; $i$i"; $n = current($_GET) or $n = 1; $S = ''; include "$n.txt"; $p = new grammar($G); $p->report($S); </count>
ttrie.php
<?php class TTrie { protected $buffer = array(); protected $dict = array( array() ); protected $input = 0; //字符串当前偏移 protected $backtracking = 0; //字符串回溯位置 public $debug = 0; public $savematch = 1; function set($word, $action='') { if(is_array($word)) { foreach($word as $k=>$v) $this->set($k, $v); return; } $p = count($this->dict); $cur = 0; //当前节点号 foreach(str_split($word) as $c) { if (isset($this->dict[$cur][$c])) { //已存在就下移 $cur = $this->dict[$cur][$c]; continue; } $this->dict[$p]= array(); //创建新节点 $this->dict[$cur][$c] = $p; //在父节点记录子节点号 $cur = $p; //把当前节点设为新插入的 $p++; } $this->dict[$cur]['acc'] = $action; //一个词结束,标记叶子节点 } function match($s) { $ret = array(); $cur = 0; //当前节点,初始为根节点 $i =& $this->input; //字符串当前偏移 $p =& $this->backtracking; //字符串回溯位置 $s .= "\0"; //附加结束符 $len = strlen($s); $buf = ''; while($i dict[$cur][$c])) { //如果存在 $cur = $this->dict[$cur][$c]; //转到对应的位置 if(isset($this->dict[$cur][$s[$i+1]])) {//检查下一个字符是否也能匹配,长度优先 $i++; continue; } if(isset($this->dict[$cur]['acc'])) { //是叶子节点,单词匹配! if($buf) { $this->buffer[] = $buf; $buf = ''; } if($this->savematch) $this->buffer[] = substr($s, $p, $i - $p + 1); //取出匹配位置和匹配的词 $ar = explode(',', $this->dict[$cur]['acc']); call_user_func_array( array($this, array_shift($ar)), $ar ); $p = $i + 1; //设置下一个回溯位置 $cur = 0; //重置当前节点为根节点 } } else { //不匹配 $buf .= $s{$p}; //substr($s, $p, $i - $p + 1); //保存未匹配位置和未匹配的内容 $cur = 0; //重置当前节点为根节点 $i = $p; //把当前偏移设为回溯位置 $p = $i + 1; //设置下一个回溯位置 } $i++; //下一个字符 } if(trim($buf, "\0")) $this->buffer[] = trim($buf, "\0"); } function __call($method, $param) { if($this->debug) printf("偏移:%d 回溯:%d\n", $this->input, $this->backtracking); } }