>  기사  >  백엔드 개발  >  对编译原理有兴趣的进

对编译原理有兴趣的进

WBOY
WBOY원래의
2016-06-23 14:38:08827검색

本帖最后由 xuzuning 于 2012-08-27 16:40:33 编辑

最近尝试做了文法分析的东东,问题较多。
请提建议。代码放不下,分两页。下载地址  http://download.csdn.net/detail/xuzuning/4529066
include 'ttrie.php';class Rule extends TTrie {  public $rule = array();  public $savematch = 0;  function __construct($s='') {	$this->set( array(		' ' => 'Separated',		"\r\n" => 'set_rule',		"\n" => 'set_rule',		"\t" => 'Separated',		'->' => 'Separated',		'→' => 'Separated',		'|' => 'set_parallel_rule',		));	$this->match($s);	if($this->rule[0][0] == $this->rule[0][1]) {		if(count($this->rule[0]) == 2) $this->rule[0][0] .= "'";		else array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0]));	}else {		$c = $this->rule[0][0];		$n = 0;		foreach($this->rule as $r) if($r[0] == $c) $n++;		if($n > 1) array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0]));	}  }  function Separated() {  }  function set_rule() {	$this->rule[] = $this->buffer;	$this->buffer = array();  }  function set_parallel_rule() {	$t = $this->buffer[0];	$this->set_rule();	$this->buffer[] = $t;  }}class Grammar {  var $closure = array();  var $first = array();  var $follow = array();  var $rule = array();  var $identifier = array();  var $leay = array();  var $forecast = array();  var $stack = array();  var $ll = 'LL(0)';  var $lr = 'LR(0)';  function __construct($s='') {	$p = new Rule($s);	$this->rule = $p->rule;	$this->set_grammar();  }  function set_grammar() {	foreach($this->rule as $rule) {		if(! in_array($rule[0], $this->identifier)) $this->identifier[] = $rule[0];	}	foreach($this->rule as $rule) {		foreach($rule as $v)			if(! in_array($v, $this->identifier) && ! in_array($v, $this->leay))				$this->leay[] = $v;	}	$this->set_first();	$this->set_follow();	$this->set_closure();	$this->set_select();	$this->set_forecast();  }  function set_first() {	foreach($this->rule as $rule) $this->first[$rule[0]] = array();	//直接收取 形如U->a…的产生式(其中a是终结符),把a收入到First(U)中	foreach($this->rule as $v) {		if(in_array($v[1], $this->leay)) $this->first[$v[0]][] = $v[1];	}	//反复传递 形入U->P1P2P3…Pn的产生式(其中P是非终结符),应先把First(P1)中的全部内容传送到First(U)中,如果P1中有ε,把First(P2)中的内容传送到First(U)中,类推直到Pi中无ε	do {		$t = serialize($this->first);		foreach($this->rule as $rule) {			for($i=1; $i<count($rule); $i++) {				$v = $rule[$i];				if(in_array($v, $this->identifier)) {					$this->first[$rule[0]] = array_unique(array_merge($this->first[$rule[0]], $this->first[$v]));					if(! in_array('#', $this->first[$v])) break;				}else break;			}		}	}while($t != serialize($this->first));  }  function set_follow() {	foreach($this->rule as $rule) $this->follow[$rule[0]] = array();	//直接收取 形如 …Ua… 的,把a直接收入到Follow(U)中	foreach($this->rule as $rule) {		for($i=1; $i<count($rule)-1; $i++) {			if(in_array($rule[$i], $this->identifier) && in_array($rule[$i+1], $this->leay))				$this->follow[$rule[$i]][] = $rule[$i+1];		}		if(in_array($rule[$i], $this->identifier)) $this->follow[$rule[$i]][] = '#';	}	foreach($this->follow as &$v) if(! $v) $v[] = '#';	//直接收取 形如 …UP…(P是非终结符)的,把First(P)中非ε收入到Follow(U)中	foreach($this->rule as $rule) {		for($i=1; $i<count($rule)-1; $i++) {			if(in_array($rule[$i], $this->identifier) && in_array($rule[$i+1], $this->identifier)) {				$this->follow[$rule[$i]] = array_unique(array_merge($this->follow[$rule[$i]], array_diff($this->first[$rule[$i+1]], array('#'))));			}		}	}	//反复传递 形如U->aP的(P是非终结符)或U->aPQ(P,Q为非终结符且Q中含ε),应把Follow(U)中的全部内容传送到Follow(P)中	do {		$t = serialize($this->follow);		foreach($this->rule as $rule) {			$s = $rule[0];			$d = end($rule);			if(in_array($d, $this->leay)) continue;			$p = prev($rule);			if(in_array($p, $this->leay)) $this->follow[$d] = array_unique(array_merge($this->follow[$d], $this->follow[$s]));			elseif(in_array('#', $this->follow[$d])) $this->follow[$p] = array_unique(array_merge($this->follow[$p], $this->follow[$s]));		}	}while($t != serialize($this->follow));  }  function set_closure() {	$shift = array();	$this->closure[0][] = array('offs' => 1, 'rule' => 0);	for($i=0 ; $i < count($this->closure); $i++) {		$cnt = count($this->closure);		//构造闭包 closure		$ex = array();		$j = 0;		$tmp = array();		do {			$size = count($this->closure[$i]);			for($j=0; $j<count($this->closure[$i]); $j++) {				$dfa = $this->closure[$i][$j];				$rule = $this->rule[$dfa['rule']];				if(isset($rule[$dfa['offs']])) {					$ch = $ex[] = $rule[$dfa['offs']];				}				foreach($this->rule as $r=>$rule) {					if(in_array($rule[0], $ex)) {						$t = array('offs' => 1, 'rule' => $r);						if(!isset($tmp[$r][1])) $this->closure[$i][] = $t;						$tmp[$r][1] = 1;					}				}			}		}while(count($this->closure[$i]) != $size); //直到不再增大		//判断状态转向 go		$out = array();		foreach($this->closure[$i] as $k=>$dfa) {			$rule = $this->rule[$dfa['rule']];			if(isset($rule[$dfa['offs']])) {				$t = "$dfa[rule],$dfa[offs]";				$ch = $rule[$dfa['offs']];				$this->closure[$i][$k]['char'] = $ch;				if(isset($out[$ch])) $shift[$t] = $out[$ch];				if(isset($shift[$t])) {					$this->closure[$i][$k]['target'] = $shift[$t];					$dfa['offs']++;					if(!$this->in_closure($dfa, $this->closure[$shift[$t]])) $this->closure[$shift[$t]][] = $dfa;				} else {					$cnt = count($this->closure);					$this->closure[$i][$k]['target'] = $cnt;					$shift[$t] = $cnt;					$dfa['offs']++;					$this->closure[count($this->closure)][] = $dfa;					$out[$ch] = $cnt;				}			}		}		//构造状态转换表		foreach($this->closure[$i] as $k=>$dfa) {			if(isset($dfa['target'])) {				$v = $dfa['char'];				if(in_array($v, $this->identifier)) $this->goto[$i][$v] = $dfa['target'];				else {					$this->action[$i][$v][] = "S$dfa[target]";					$this->request[$i][$v] = $dfa['rule'];				}			} else {				$ch = $this->rule[$dfa['rule']][0];				foreach($this->follow[$ch] as $v) {					$this->action[$i][$v][] = "r$dfa[rule]";					$this->request[$i][$v] = $dfa['rule'];				}			}		}		foreach($this->action[$i] as $c=>$v) {			$v = array_unique($v);			if(count($v) > 1) $this->lr = 'SLR(1)';			$this->action[$i][$c] = $v;		}	}  }  function in_closure($t, $s) {	foreach($s as $r) if($t['offs'] == $r['offs'] && $t['rule'] == $r['rule']) return true;	return false;	return in_array(serialize($t), array_map('serialize', $s));  }  function set_select() {	foreach($this->rule as $i=>$rule) {		$y = array($rule[1]);		if(in_array($y[0], $this->leay)) {			if($y[0] != '#') {				$this->select[$i] = $y;				continue;			}		}else $y = $this->first[$rule[1]];		$x = $this->follow[$rule[0]];		//SELECT(X->Y)=(FIRST(Y)-{ε})并FOLLOW(X)		$this->select[$i] = array_unique( array_merge(array_diff($y, array('#')), $x) );	}  }  /**   * 构造预测分析表   **/  function set_forecast() {	foreach($this->select as $i=>$r) {		$c = $this->rule[$i][0];		$v = array_reverse(array_slice($this->rule[$i], 1));		foreach($r as $k) {			$this->forecast[$c][$k][] = $v;		}	}	//检查冲突	foreach($this->forecast as $c=>$r) {		foreach($r as $k) {			if(count($k) > 1) {				$this->ll = 'LL(1)';			}		}	}  }

回复讨论(解决方案)

  function ll_start($s) {	$t = array();	foreach($this->rule as $rule) if($rule[0] == $rule[1]) $t[] = $rule;	if($t) {		foreach($t as $rule) printf('<tr><td colspan=4>%s 存在左递归</td></tr>', preg_replace('/ /', ' → ', join(' ', $rule), 1));		return;	}	$stack = array('#', key($this->forecast));	$i = 0;	$step = 1;	$timeout = 10 * strlen($s);	while($stack && $i < strlen($s) && $timeout--) {		$r = end($stack);		if($r == $s{$i}) {			$msg = $r == '#' ? '成功' : "$r 匹配";		}elseif(isset($this->forecast[$r][$s{$i}])) $msg = $r . ' → ' . join(' ', array_reverse($this->forecast[$r][$s{$i}][0]));		else $msg = '错误';		printf("<tr><td>%d</td><td>%s</td><td>%s</td><td>%s</td></tr>", $step++, substr(join('', $stack), -50), substr($s, $i), $msg);		if($r == $s{$i}) {			array_pop($stack);			$i++;		}elseif(isset($this->forecast[$r][$s{$i}])) {			array_pop($stack);			if(current($this->forecast[$r][$s{$i}][0]) != '#')				$stack = array_merge($stack, $this->forecast[$r][$s{$i}][0]);		}else break;	}  }  function lr_start($s) {	$State = array(0); //状态栈	$Symbol = array('#'); //符号栈	$i = 0;	$step = 1;	$timeout = 10 * strlen($s);	while($i < strlen($s) && $timeout--) {		$ch = $s{$i};		$sp = end($State);		$msg = substr($s, $i);		if(isset($this->action[$sp][$ch]) && $this->action[$sp][$ch][0] == 'r0') $msg = 'acc';		if(isset($this->request[$sp][$ch])) $request = preg_replace('/ /', ' → ', join(' ', $this->rule[$this->request[$sp][$ch]]), 1);		else $request = 'error';		printf("<tr><th>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>", $step++, substr(join('', $State), -50), join('', $Symbol), $msg, $request);		if(isset($this->action[$sp][$ch]) || isset($this->goto[$sp][$ch])) {			$t = isset($this->action[$sp][$ch]) ? $this->action[$sp][$ch][0] : $this->goto[$sp][$ch];			$n = substr($t, 1) + 0;			if($t{0} == 'r') {				for($j=0; $j<count($this->rule[$n])-1; $j++) {					array_pop($State);					array_pop($Symbol);				}				if($n == 0) break;				$c = $Symbol[] = $this->rule[$n][0];				$State[] = $this->goto[end($State)][$c];			}elseif($t{0} == 'S') {				$State[] = $n;				$Symbol[] = $ch;				$i++;			}else ;		}else break;	}  }  function report($in='') {	if($in) $in = trim($in, '#') . '#';	echo '<style>table {font-size:10pt</style>';	echo '<table ><tr><td><b>文法</b></td></tr>';	foreach($this->rule as $rule) {		echo '<tr><td>';		echo preg_replace('/ /', ' → ', join(' ', $rule), 1);		echo '</td></tr>';	}	echo '</table>';	$identifier = $this->identifier;	echo '<table><tr><td><b>标识符</b></td>';	echo '<td>' . join(' ', $identifier) . '</td></tr>';	echo '</table>';	$leay = array_diff($this->leay, array('#'));	$leay[] = '#';	echo '<table><tr><td><b>终结符</b></td>';	echo '<td>' . join(' ', $leay) . '</td></tr>';	echo '</table>';	echo '<table width=60% border=1><tr><th>标识符</th><th>推出空</th><th>FIRST集</th><th>FOLLOW集</th></tr>';	foreach($identifier as $ch) {		echo '<tr><td>' . $ch . '</td>';		echo '<td>' . (in_array('#', $this->first[$ch]) ? 'True' : 'false') . '</td>';		echo '<td>' . join(' ', $this->first[$ch]) . '</td>';		echo '<td>' . join(' ', $this->follow[$ch]) . '</td>';		echo '</tr>';	}	echo '</table>';	echo '<table width=100%><tr><td> </td></tr><tr><th>' . $this->ll . '文法分析</th></tr></table>';	echo '<table width=60% border=1><tr><th>产生式</th><th>Select集</th></tr>';	foreach($this->rule as $i=>$rule) {		echo '<tr>';		echo '<td>' . preg_replace('/ /', ' → ', join(' ', $rule), 1) . '</td>';		echo '<td>' . join(' ', $this->select[$i]) . '</td>';		echo '</tr>';	}	echo '</table>';	$forecast = $this->forecast;	echo '<table width=60%><tr><td><b>预测分析表</b></td></tr>';	echo '<tr><td><table width=100% border=1>';	echo '<tr><th> </th><th>' . join('</th><th>', $leay) . '</th></tr>';	foreach($identifier as $ch) {		echo '<tr><td>' . $ch . '</td>';		foreach($leay as $v) {			$s = '';			if(isset($forecast[$ch][$v])) {				foreach($forecast[$ch][$v] as $t) {					if($s) $s .= '<br />';					$s .= $ch . ' → '. join(' ', array_reverse($t));				}				if(count($forecast[$ch][$v]) > 1) $s = "<font color=red>$s</font>";			}else $s .= ' ';			echo '<td>' . $s . '</td>';		}		echo '<tr>';			}	echo '</table></td></tr>';	echo '</table>';	if($in) {		echo '<table><tr><th>测试字符串</th>';		echo '<td>' . trim($in, '#') . '</td></tr></table>';		echo '<table width=100% border=1>';		echo '<tr><th>步骤</th><th>分析栈</th><th>剩余字符</th><th>生成式或匹配</th></tr>';		$this->ll_start($in);		echo '</table>';	}	echo '<table width=100%><tr><td> </td></tr>';	echo '<tr><th>' . $this->lr . '文法分析<th></tr></table>';	echo '<table><tr><th>状态转移表</th></tr></table>';	echo '<table width=100% border=1>';	echo '<tr><th rowspan=2>状态</th><th colspan=' . count($leay) . '>Action</th><th colspan=' . count($identifier) . '>Goto</th><th rowspan=2>DFA</th></tr>';	echo '<tr><th>' . join('</th><th>', $leay) . '</th><th>'. join('</th><th>', $identifier) . '</th></tr>';	foreach($this->action as $i=>$item) {		echo "<tr><td>$i</td>";		foreach($leay as $v) {			$s = isset($item[$v]) ? join(',', $item[$v]) : ' ';			if(strpos($s, ',')) $s = "<font color=red>$s</font>";			echo "<td>$s</td>";		}		foreach($identifier as $v) {			$s = isset($this->goto[$i][$v]) ? $this->goto[$i][$v] : ' ';			echo "<td>$s</td>";		}		echo '<td>' . $this->showDFA($i) .'</td>';		echo '</tr>';	}				echo '</table>';	if($in) {		echo '<table><tr><th>测试字符串</th>';		echo '<td>' . trim($in, '#') . '</td></tr></table>';		echo '<table width=100% border=1>';		echo '<tr><th>步骤</th><th>状态栈</th><th>符号栈</th><th>剩余字符</th><th>生成式</th></tr>';		$this->lr_start($in);		echo '</table>';	}  }  function showDFA($i) {	$res = array();	foreach($this->closure[$i] as $dfa) {		$rule = $this->rule[$dfa['rule']];		array_splice($rule, $dfa['offs'], 0, '&middot;');		array_splice($rule, 1, 0, '→');		if(isset($dfa['target'])) $rule[] = " [$dfa[target]]";		$res[] = join('', $rule);	}	return join('; ', $res);  }}for($i=1; $i<=count(glob('*.txt')); $i++) echo " <a href=?id=$i>$i</a>";$n = current($_GET) or $n = 1;$S = '';include "$n.txt";$p = new grammar($G);$p->report($S);

ttrie.php
<?phpclass TTrie {  protected $buffer = array();  protected $dict = array( array() );  protected $input = 0; //字符串当前偏移  protected $backtracking = 0; //字符串回溯位置  public $debug = 0;  public $savematch = 1;  function set($word, $action='') {	if(is_array($word)) {		foreach($word as $k=>$v) $this->set($k, $v);		return;	}	$p = count($this->dict);	$cur = 0; //当前节点号	foreach(str_split($word) as $c) {		if (isset($this->dict[$cur][$c])) { //已存在就下移			$cur = $this->dict[$cur][$c];			continue;		}		$this->dict[$p]= array(); //创建新节点		$this->dict[$cur][$c] = $p; //在父节点记录子节点号		$cur = $p; //把当前节点设为新插入的		$p++;	}	$this->dict[$cur]['acc'] = $action; //一个词结束,标记叶子节点  }  function match($s) {	$ret = array();	$cur = 0; //当前节点,初始为根节点	$i =& $this->input; //字符串当前偏移	$p =& $this->backtracking; //字符串回溯位置	$s .= "\0"; //附加结束符	$len = strlen($s);	$buf = '';	while($i < $len) {		$c = $s{$i};		if(isset($this->dict[$cur][$c])) { //如果存在			$cur = $this->dict[$cur][$c]; //转到对应的位置			if(isset($this->dict[$cur][$s[$i+1]])) {//检查下一个字符是否也能匹配,长度优先				$i++;				continue;			}			if(isset($this->dict[$cur]['acc'])) { //是叶子节点,单词匹配!				if($buf) {					$this->buffer[] = $buf;					$buf = '';				}				if($this->savematch) $this->buffer[] = substr($s, $p, $i - $p + 1); //取出匹配位置和匹配的词				$ar = explode(',', $this->dict[$cur]['acc']);				call_user_func_array( array($this, array_shift($ar)), $ar );				$p = $i + 1; //设置下一个回溯位置				$cur = 0; //重置当前节点为根节点			}		} else { //不匹配			$buf .= $s{$p}; //substr($s, $p, $i - $p + 1); //保存未匹配位置和未匹配的内容			$cur = 0; //重置当前节点为根节点			$i = $p; //把当前偏移设为回溯位置			$p = $i + 1; //设置下一个回溯位置		}		$i++; //下一个字符	}	if(trim($buf, "\0")) $this->buffer[] = trim($buf, "\0");  }  function __call($method, $param) {	if($this->debug) printf("偏移:%d 回溯:%d\n", $this->input, $this->backtracking);  }}

相关数据文件
1.txt

<?php$G =<<< TXTS -> a HH -> a M dH -> dM -> A bM -> #A -> a MA ->eTXT;$S = 'aaaebbd#';
2.txt
<?php$G =<<< TXTE ->  EE ->  E + TE ->  TT ->  T * FT ->  FF ->  ( E )F ->  iTXT;$S = 'i*(i+i)#';
3.txt
<?php$G =<<< TXTE → ( L )E → aL → L , EL → ETXT;$S = '((a),a,(a,a))';
4.txt
<?php$G =<<< TXTS' → SS → A BS → b CA → #A → bB → #B → a DC → A DC → bD → a SD → cTXT;$zS = 'baac';
5.txt
<?php$G =<<< TXTS → i S e S | i S | aTXT;$S = 'iiaea#';
6.txt
<?php$G =<<< TXTS → a AS → dA → b A SA → #TXT;$S = 'abbb';
7.txt
<?php$G =<<< TXTS' → SS → a SS → b SS → aTXT;$S = 'aabba';
8.txt
<?php$G =<<< TXTS → A a | bA → A c | S d | #TXT;$G1 =<<< TXTS → A a | bA → b d AA → c A | a d A | # TXT;$S = 'bdadcca';

有兴趣,可惜编译原理完全忘记了。
弱弱地求教下:高级语言做编译器,效率会不会太低下了。。

写点文字,画点图就更好了。看代码,太费劲

写点文字,画点图就更好了。看代码,太费劲
你把代码复制下来运行一下

我打包上传了的,但一时看不见

偶都不知道编译原理是啥
有时间慢慢研读

唠叨是科学家
我们是一线生产工人
进来站队围观科学家研究

偶都不知道编译原理是啥
有时间慢慢研读

好东西,下午载后,好好看看

编译原理纯粹没学过!

回去翻翻  编译原理 ,清华大学出版社出版的,看看就知道了
词法分析,语法分析,语义分析....刚毕业还有点印象的飘过

yacc.lex工具很强大

最好有注释

最好有注释

你这个太学院派了。

貌似 你弄的复杂了

可以依这lua的代码来画瓢。他的那段代码很简洁的。只是层级有点多好绕!

这语言也太高级了, 不过性能上不用担心. 

呃,求注释。

牛,看不懂

先描述一下你的文法,要不怎么叫人看得懂?

关注下!

多谢提供
学习了

不?喔!

快要学了,会多多关注

站内大神果然很多,小弟初到贵宝地,望大家多多关照

不懂编译,所以关注

懂编译原理的都很强悍!!

不好意思,回帖赚积分。

当时编译的课程设计就是这个,你要是早两年放上来就好了,我就不用自己做了嘛~
20分直接算入总分呀有木有有木有~

牛,看不懂

对头。

先描述一下你的文法,要不怎么叫人看得懂?

支持的是这句。上一楼,引用错了。

要不要先描述下文法啊,外行表示直接从代码里猜鸭梨很大。。

朋友 不是中国人吧?

形式语言正学呢,刚接触文法,编译原理下学期才学呢

看了大半?小??於明白了,做??????注....

完全看不懂

下载次数:33
回复次数:45

没有人对输出的结果提出异议
没有人对程序的改进提出建议
没有人对算法上的问题提出质疑

使用的数据都是网上找的(当然也是抄自教科书)
程序中未对出现的冲突做任何处理(只是罗列)

可见这些文法例子是经过精心设计的,怎么弄都不会错

只看了LL(1)
1中冲突选择非#
2,3,8 为什么不消除左递归?
5,7 要处理提取因子.

LR晚上再看。

编译原理真心看不懂

只看了LL(1)
1中冲突选择非#
2,3,8 为什么不消除左递归?
5,7 要处理提取因子.

LR晚上再看。 谢谢
其实我也算是初学,以前看过,根本就没弄明白

这回算是明白了点,对消除左递归、提取因子的算法实现这在研究。所以程序你并没有

PHP哦,,,,,

想想大学那会这门课程还考的满分。现在都忘光了
去复习下

看了看,跟自己当年写的1+1=2完全不是那么回事啊!

挺复杂的算法,没想到做了3年业务系统还有一部分看不懂

得好好研究算法了

完全看不懂。研究中。。

我最近也在自己做一个很简易的两步编译器,楼主多交流多沟通啊


程序是直接运行Grammar.php文件吧,运行出来就是上图。我表示看不懂,压力很大。

我就看到了$

超强悍的词法分析工具:flex
超强悍的语法分析工具:bison,BYACC。
PHP是解释型的编程语言,本来语法就是仿真C语言,做语法分析太慢。
直接上C语言+flex+BISON吧

同用LR分析法写过语法分析器的路过,楼主你的程序对较大的文法支持的怎么样,速度怎么样?
我这有个自己写的SQL语言的部分文法,你要不要试一试?

算法很重要 在学习!

好东西,下午载后,好好看看

现在刚开始接触php,努力向上爬

好眼熟的东西 不过似乎早就还给老师了- - 

没看懂

拿php写,服了

貌似好高深的样子,php小白,就会做简单的东西,mark一个,学习下

哎,这东西在大学上了一个学期之后就还给老师了

完全眼花@@

完全眼花@@

太复杂拉,看不懂

哈哈哈,楼主整好了给个连接呗,在线编译。

没学过,暂时也用不到。以后再说吧,精力有限。

运行结果看着很熟悉,可就是看不懂。。。

仰视中……

写多点注释嘛,这样看起来很累的

没看明白。
你是基于什么算法实现的?

唠叨前辈你好

성명:
본 글의 내용은 네티즌들의 자발적인 기여로 작성되었으며, 저작권은 원저작자에게 있습니다. 본 사이트는 이에 상응하는 법적 책임을 지지 않습니다. 표절이나 침해가 의심되는 콘텐츠를 발견한 경우 admin@php.cn으로 문의하세요.