北京工业大学编译原理实验报告

更新时间：2024-03-16 12:23:01 阅读量：综合文库文档下载

说明：文章内容仅供预览，部分内容可能不全。下载后的文档，内容与下面显示的完全一致。下载之前请确认下面内容是否您想要的，是否完整无缺。

北京工业大学是211还是985推荐度：
相关推荐

实验报告

计算机学院

课程名称：编译原理实验人学号：110703xx 姓名：xxx 实验完成日期：2014年5月20日报告完成日期：2014年5月20日

实验一词法分析程序的设计与实现 ................................................................................... 3

词法的正规式描述： ............................................................................................................... 3 状态图： ................................................................................................................................... 4 词法分析程序数据结构与算法： ........................................................................................... 4 词法分析算法： ....................................................................................................................... 5 实验结果： ............................................................................................................................... 7 实验中遇到的问题及其解决： ............................................................................................... 8

1、保留字的检测问题： ................................................................................................. 8

2、关于0为首位的数字是int8、int10和int16的判断问题： .................................. 8 3、关于回退的问题： ..................................................................................................... 8

实验二自顶向下的语法分析—递归子程序法 .................................................................... 9

改写后的产生式集合： ........................................................................................................... 9 化简后的语法图： ................................................................................................................... 9 递归子程序算法 ..................................................................................................................... 10 实验结果： ............................................................................................................................. 13 实验中遇到的问题及其解决： ............................................................................................. 14

1、消除左递归，提取左因子之后的E、 T对应的子程序的编写问题： ............... 14

2、缩进的控制： ........................................................................................................... 14

实验三语法制导的三地址代码生成程序 ........................................................................... 15

语法制导定义： ..................................................................................................................... 15 三地址代码生成器的数据结构 ............................................................................................. 16 三地址生成器算法： ............................................................................................................. 17 实验结果： ............................................................................................................................. 21 实验中遇到的问题及其解决： ............................................................................................. 22

1、根据化简后的产生式修改语法制导定义： ........................................................... 22

2、使用真假出口法和继承属性来确定goto的标号： .............................................. 22

实验一词法分析程序的设计与实现

词法的正规式描述：

标识符 <字母>(<字母>|<数字字符>)*

十进制整数 0|(1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)* 八进制整数 0(0|1|2|3|4|5|6|7)(0|1|2|3|4|5|6|7)* 十六进制整数

0(x|X)(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)*

运算符和分隔符 + - * / > < = ( ) ; 关键字 if then else while do .

状态图：

If/then/while/do/else保留字a-z/A-Z标识符或保留字其他标识符开始0Int8或int10或int16X/x1-7Int161-9十进制数+ - * / > < = ( ) ; 其他int8运算符和分隔符int10

词法分析程序数据结构与算法：

//单词类 class Token { public:

int type;//种别 string value;//属性值 string name;//单词具体内容 Token() {

type = DEFAULT;

value = NONE_OF_VALUE; }

Token(int type, string value, string name): type(type), value(value), name(name){}

~Token() {} };

词法分析算法：

Token* TokenScan(ifstream &from_file) { char ch;//用于保存从文件中读取的字符 //读第一个字符 int i = 0;

char value[30] = \;//用来存放token的属性值 ch = from_file.get();

while (ch == BLANK || ch == TAB || ch == NEWLINE) { ch = from_file.get(); }

//////////////////////////////////// if (isalpha(ch)) { value[i++] = ch; ch = from_file.get();

////判断后续的是否为IDN的成分（数字或字母） while (isalnum(ch)) { value[i++] = ch; ch = from_file.get(); }

//直到不是IDN成分，回退一字符 from_file.unget();

//TODO：这里加上保留字检测部分

//进行字符串的对比，即可比较出保留字，通过压栈的形式来获得完整的属性值 ////////////////////////////////////

以

下

为

保

留

字

的

检

测

//////////////////////////////////////////////////////////////////////////// if (strcmp(value, WORD_IF) == 0) return new Token(IF, NONE_OF_VALUE, WORD_IF);

if (strcmp(value, WORD_THEN) == 0) return new Token(THEN, NONE_OF_VALUE, WORD_THEN);

if (strcmp(value, WORD_ELSE) == 0) return new Token(ELSE, NONE_OF_VALUE, WORD_ELSE);

if (strcmp(value, WORD_WHILE) == 0) return new Token(WHILE, NONE_OF_VALUE, WORD_WHILE);

if (strcmp(value, WORD_DO) == 0) return new Token(DO, NONE_OF_VALUE, WORD_DO);

return new Token(IDN, value, value); }

////////////////////////////////////

以

下

为

数

字

的

检

测

以

下

为

标

识

符

的

检

测

////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////// if (isdigit(ch)) { value[i++] = ch;

//如果第一个数字是0，则有可能是INT10的0、INT8或INT16 if (ch == '0') {

ch = from_file.get();

if ((ch >= '0' && ch < '8') || ch == 'x' || ch == 'X') { //如果0后面紧跟着数字0-8，则为INT8 if (isdigit(ch)) {

while (ch >= '0' && ch < '8') { value[i++] = ch; ch = from_file.get(); }

from_file.unget();

return new Token(INT8, value, value); }

value[i++] = ch; //到这一步的都是INT16 ch = from_file.get();

while (isdigit(ch) || (ch >= 'a' && ch <= 'f')) { value[i++] = ch;

//TODO:这里没有解决0xrtr的问题 ch = from_file.get(); }

from_file.unget();

return new Token(INT16, value, value);

} else {//0后面的不为0-7的digit或x或X等 8或16进制特征字符，则为10进制的0，回退一个字符

from_file.unget();

return new Token(INT10, value, value); } }

//能到这一步的都是INT10,且不为0打头 ch = from_file.get(); while (isdigit(ch)) { value[i++] = ch; ch = from_file.get(); }

from_file.unget();

return new Token(INT10, value, value); }

//////////////////////////////////// value[i++] = ch;

以

下

为

运

算

符

的

检

测

////////////////////////////////////////////////////////////////////////////

switch (ch) {

case '+': return new Token(ADD, value, \); case '-': return new Token(MINUS, value, \); case '*': return new Token(MUL, value, \); case '/': return new Token(DIC, value, \); case '>': return new Token(MORE, value, \); case '<': return new Token(LESS, value, \); case '=': return new Token(EQU, value, \); case '(': return new Token(LBRAC,value, \); case ')': return new Token(RBRAC, value, \); case ';': return new Token(COMMA, value, \); default:ErrorHandle(from_file); break; }

return new Token(DEFAULT, NONE_OF_VALUE, NONE_OF_VALUE); }

实验结果：

实验中遇到的问题及其解决：

1、保留字的检测问题：

一开始的时候我的想法是遇到if、while、do、then等单词的首字母时即开始划分状态，后来发现这样子判断的分支会特别多，而且效率不是很高，对保留字集合的扩展支持的也不是很好。后来我发现保留字存在于标识符的子集，所以为什么不先判断标识符然后再判断是不是保留字呢？后来我就照着这个思路成功实现了功能。

2、关于0为首位的数字是int8、int10和int16的判断问题：

当读入的第一个字符为0时，可能为int8、可能是int10的0也可能是int16的开头，当下一个字符是0~7时，开始进行int8的匹配；当下一个字符是x或X时，开始进行int16的匹配；当下一个字符为其他字符时，说明这是一个十进制的0(此时还需进行一字节的回退)

3、关于回退的问题：

有些时候需要进行回退，否则不能正常的进行完整个分析过程，需要进行回退的场合为： (1) 匹配标识符(或保留字时)，向后逐字读取的时候当下一个不是字母或数字的时候需要

将读取的字符回退，才能继续向下进行； (2) 判断为int10的0的时候需要一步回退：

else {//0后面的不为0-7的digit或x或X等 8或16进制特征字符，则为10进制的0，回退一个字符

from_file.unget();

return new Token(INT10, value, value); }

(3) 判断数字的时候到最后一个不为数字的都需要回退。

实验二自顶向下的语法分析—递归子程

改写后的产生式集合：

S -> id = E; S -> if C then S; S -> while C do S; C -> E > E; C -> E <= E; E -> T (+ T) *; E -> T (- T) *; T -> F (* F) *; T -> F (/ F) *; F -> (E); F -> id; F -> int8; F -> int10; F -> int16;

化简后的语法图：

+ET*TF

序法

-ET /TF

递归子程序算法

int ProcedureS(ifstream& from_file) { Indent();

cout<<\<

indentation += 4;//子程序开始 Indent();

Token* token = TokenScan(from_file);

if (token->type == IF) {

cout<name<

token = TokenScan(from_file); Indent();

if (token->type == THEN) {

cout<name<

} else if (token->type == WHILE) { cout<name<

token = TokenScan(from_file); Indent();

if (token->type == DO) {

cout<name<

} else if (token->type == IDN) {

cout<<\<name<type == EQU) { Indent();

cout<name<

indentation -= 4;//子程序结束 return 0;

}

int ProcedureC(ifstream& from_file) { Indent();

cout<<\<

indentation += 4;//子程序开始 Token* token;

ProcedureE(from_file);

token = TokenScan(from_file); Indent();

if (token->type == MORE) {

cout<name<

} else if (token->type == LESS) { cout<name<

indentation -= 4;//子程序结束 return 0; }

int ProcedureE(ifstream& from_file) { Indent();

cout<<\<

indentation += 4;//子程序开始 Token* token;

ProcedureT(from_file); while (true) {

token = TokenScan(from_file); if (token->type == ADD) { Indent();

cout<name<

} else if (token->type == MINUS) { Indent();

cout<name<

for (int i = 0; i < (int)token->name.length(); i++) { from_file.unget();//回退 }

indentation -= 4;//子程序结束 return 0; }

}

indentation -= 4;//子程序结束 return 0; }

int ProcedureT(ifstream& from_file) { Indent();

cout<<\<

indentation += 4;//子程序开始 Token* token;

ProcedureF(from_file); while (true) {

token = TokenScan(from_file); if (token->type == MUL) { Indent();

cout<name<

} else if (token->type == DIC) { Indent();

cout<name<

for (int i = 0; i < (int)token->name.length(); i++) { from_file.unget();//回退 }

indentation -= 4;//子程序结束 return 0; } }

indentation -= 4;//子程序结束 return 0;

}int ProcedureF(ifstream& from_file) { Indent();

cout<<\<

indentation += 4;//子程序开始 Token* token;

token = TokenScan(from_file); if (token->type == LBRAC) {

cout<name<

token = TokenScan(from_file); cout<name<

Indent();

if (token->type == IDN) {

cout<<\<name<

if (token->type == INT8) {

cout<<\<value)<

if (token->type == INT10) {

cout<<\<value<

if (token->type == INT16) {

cout<<\<value)<

indentation -= 4;//子程序结束 return 0; }

实验结果：

实验中遇到的问题及其解决：

1、消除左递归，提取左因子之后的E、 T对应的子程序的编写问题：

经过多次测试，我发现在一个expression超过两个运算符的时候我的E、T子程序就只能成功的分析出第一段的式子，后来发现E->T（+T）*类似的产生式没有写循环调用控制，后来在(+T)*的最外层加了一个while(true)循环，然后在while(true)的首行加入了不是‘+’就return的判定，成功解决了问题。

2、缩进的控制：

这个实验中碰到的第二个问题就是语法树缩进的控制问题，最终通过一个全局变量indentation来控制缩进的字符数量，一个Indent()函数来输出缩进(其实就是空格)，控制缩进数量的关键点有两个：一为进入子程序的时候indentation+=4，二为结束子程序的时候indentation-=4。剩下的就是根据调试来选择在哪里输出缩进空格的问题了。

实验三语法制导的三地址代码生成程序

语法制导定义：

产生式 S –> id = E ; S –> if C then S C.true = newlabel; C.false = S.next; S1.next = S.next; S.code = C.code || gen(C.true ‘:’) || S1.code S –> while C do S S.begin = newlabel; C.true = newlabel; C.false = S.next; S1.next = S.begin; S.code = gen(S.begin ‘:’) || C.code || gen(C.true ‘:’) || S1.code || gen(‘goto’ S.begin); C –> E1 > E2 C.code = E1.code || E2.code || gen(‘if’E1.place ‘>’ E2.place ‘goto’ C.true) || gen(‘goto’C.false) C –> E1 < E2 C.code = E1.code || E2.code || gen(‘if’ E1.place ‘<’ E2.place ‘goto’ C.true) || gen(‘goto’ C.false) E –> T1 (+ T2)* E.place = newtemp; (E.code = T1.code||T2.code|| gen(E.place ‘:=’ T1.place ‘+’ T2.place); T1.place = E.place; T1.code = E.code;)+ 语义规则 S.code = E.code || gen(id.place ‘:=’ E.place) E –> T1 (- T2)* E.place = newtemp; (E.code = T1.code||T2.code|| gen(E.place ‘:=’ T1.place ‘-’ T2.place); T1.place = E.place; T1.code = E.code;)+ E.place = T.place; E.code = T.code T.place = F.place; T.code = F.code E –> T T –> F

T –> F1 (* F2)* T.place = newtemp; (T.code = F1.code || F2.code || gen(T.place ‘:=’ F1.place ‘*’ F2.place); F1.place = T.place; F1.code = T.code;)+ T.place = newtemp; (T.code = F1.code || F2.code || gen(T.place ‘:=’ F1.place ‘/’ F2.place); F1.place = T.place; F1.code = T.code;)+ F.place = E.place; F.code = E.code F.place = id.name; F.code = ‘’ F.place = int8.value; F.code = ‘’ F.place = int10.value; F.code = ‘’ F.place = int16.value; F.code = ‘’ T –> F1 (/ F2)* F –> ( E ) F –> id F –> int8 F –> int10 F –> int16 三地址代码生成器的数据结构

typedef struct { /*S的属性定义*/ char code[CODESIZE]; int begin; int next; }AttrS;

typedef struct { /*E的属性定义*/ char code[CODESIZE];//CodeSize = 500 char place[BUFSIZE];//BufSize = 200 }AttrE;

typedef struct { /*C的属性定义*/ char code[CODESIZE]; int c_false;//用来标记入口 int c_true;//用来标记入口 }AttrC;

typedef struct { /*T的属性定义*/ char code[CODESIZE];//CodeSize = 500 char place[BUFSIZE];//BufSize = 200 }AttrT;

typedef struct { /*F的属性定义*/ char code[CODESIZE];//CodeSize = 500 char place[BUFSIZE];//BufSize = 200 }AttrF;

typedef struct { /*IDN的属性定义*/ char idname[BUFSIZE]; int entry; }AttrIDN;

三地址生成器算法：

int ProcedureS(ifstream& from_file, AttrS &s) {

AttrC c;//C的属性 AttrS s1;//s1的属性 AttrE e;//e的属性

char temp_idn_name[50];//用来暂存当下一个是IDN时，s->id:=E 的 id的name

Token* token = TokenScan(from_file);

//////////////////////////////////////////s-> if C then S1///////////////////////////////////////////// if (token->type == IF) {

c.c_true = NewLabel();//c.c_true出口有了新标签 s1.begin = c.c_true;// C真则往 S1走

s1.next = c.c_false = s.next; // c假则走s的下一步为L0标签，在前面预置了

ProcedureC(from_file, c);

token = TokenScan(from_file); if (token->type == THEN) { ProcedureS(from_file, s1);

sprintf_s(s.code, \, c.code, c.c_true, s1.code);//将中间代码输出到s.code中 } else { exit(-1); }

//////////////////////////////////////////s-> while C do S1///////////////////////////////////////////// } else if (token->type == WHILE) { s1.next = s.begin = NewLabel();

c.c_true = s1.begin = NewLabel();//C真则往 S1走

c.c_false = s.next;// c假则走s的下一步为L0标签，在前面预置了

ProcedureC(from_file, c);

token = TokenScan(from_file); if (token->type == DO) {

ProcedureS(from_file, s1);

sprintf_s(s.code,\,s.begin,c.code,c.c_true,s1.code,s.begin); } else { exit(-1); }

//////////////////////////////////////////s-> id := E///////////////////////////////////////////// } else if (token->type == IDN) {

strcpy_s(temp_idn_name, token->name.c_str()); token = TokenScan(from_file); if (token->type == EQU) { ProcedureE(from_file, e);

sprintf_s(s.code,\,e.code, temp_idn_name, e.place);

} else { exit(-1); } }

return 0; }

int ProcedureC(ifstream& from_file, AttrC &c) {

AttrE e1;//e1的属性 AttrE e2;//e2的属性 Token* token;

ProcedureE(from_file, e1); token = TokenScan(from_file); if (token->type == MORE) { ProcedureE(from_file, e2);

sprintf_s(c.code, \,e1.code,e2.code,e1.place,e2.place,c.c_true,c.c_false); } else if (token->type == LESS) { ProcedureE(from_file, e2);

sprintf_s(c.code, \,e1.code,e2.code,e1.place,e2.place,c.c_true,c.c_false); } else { exit(-1); }

return 0; }

int ProcedureE(ifstream& from_file, AttrE &e) {

AttrT t1; AttrT t2;

Token* token;

ProcedureT(from_file, t1); while (true) {

token = TokenScan(from_file); if (token->type == ADD) { ProcedureT(from_file, t2); strcpy_s(e.place,NewTemp());

sprintf_s(e.code,\,t1.code,t2.code, e.place,t1.place,t2.place);

//这里是关键,用t1.code和t1.place临时记录了上一次while的e.code 和e.place，随着while的不断加深，t1的代码会不断长长 strcpy_s(t1.code,e.code); strcpy_s(t1.place,e.place); } else if (token->type == MINUS) { ProcedureT(from_file, t2); strcpy_s(e.place,NewTemp());

sprintf_s(e.code,\,t1.code,t2.code, e.place,t1.place,t2.place);

strcpy_s(t1.code,e.code); strcpy_s(t1.place,e.place); } else {

for (int i = 0; i < (int)token->name.length(); i++) { from_file.unget();//回退 }

//////////////////////////////////////////E->T///////////////////////////////////////////// strcpy_s(e.place,t1.place);

sprintf_s(e.code,\,t1.code); break; } }

return 0; }

int ProcedureT(ifstream& from_file, AttrT &t) { AttrF f1; AttrF f2;

Token* token;

ProcedureF(from_file ,f1); while (true) {

token = TokenScan(from_file); if (token->type == MUL) {

ProcedureF(from_file, f2); strcpy_s(t.place,NewTemp());

sprintf_s(t.code,\,f1.code,f2.code,t.place,f1.place,f2.place);

strcpy_s(f1.code,t.code);

strcpy_s(f1.place,t.place); } else if (token->type == DIC) { ProcedureF(from_file, f2); strcpy_s(t.place,NewTemp());

sprintf_s(t.code,\,f1.code,f2.code,t.place,f1.place,f2.place);

strcpy_s(f1.code,t.code);

strcpy_s(f1.place,t.place); } else {

for (int i = 0; i < (int)token->name.length(); i++) { from_file.unget();//回退 }

strcpy_s(t.place,f1.place);

sprintf_s(t.code,\,f1.code); break; } }

return 0; }

int ProcedureF(ifstream& from_file, AttrF &f) { AttrE e;

Token* token;

char temp_value[50];

token = TokenScan(from_file); if (token->type == LBRAC) { ProcedureE(from_file, e);

strcpy_s(f.place,e.place);//f.place = e.place sprintf_s(f.code,\,e.code);

token = TokenScan(from_file);//匹配右括号 }

if (token->type == IDN) {

strcpy_s(f.place,token->name.c_str()); sprintf_s(f.code,\); }

if (token->type == INT8) {

sprintf_s(temp_value, \, ValueOfINT8(token->value)); strcpy_s(f.place, temp_value); sprintf_s(f.code,\); }

if (token->type == INT10) {

//sprintf_s(temp_value, \ strcpy_s(f.place,token->value.c_str()); sprintf_s(f.code,\); }

if (token->type == INT16) {

sprintf_s(temp_value, \, ValueOfINT16(token->value)); strcpy_s(f.place, temp_value); sprintf_s(f.code,\); }

return 0; }

实验结果：

实验中遇到的问题及其解决：

1、根据化简后的产生式修改语法制导定义：

主要难点在于E –> T1 (+ T2)*此种产生式当循环调用的次数多于1的时候需要做一步

T1.place = E.place;T1.code = E.code 将上一次循环的E.code和E.place暂存到T1.code和T1.place中，这样在下一次循环中通过E.code = T1.code||T2.code||gen(E.place ‘:=’ T1.place ‘+’ T2.place)就可以正确的重新生成E的新的三地址代码。

2、使用真假出口法和继承属性来确定goto的标号：

在本程序中并没有使用拉链回填的方法实现goto，而是使用下面的实现方法：例如，S –> if C then S和 S –> while C do S这种的产生式，

(1)在S中，先创建所有用到的非终结符的属性结构体： AttrC c;//C的属性 AttrS s1;//s1的属性 AttrE e;//e的属性

(2)在match完if和while后，用以下语句构造真假出口 if (token->type == IF) {

c.c_true = NewLabel();//c.c_true出口有了新标签 s1.begin = c.c_true;// C真则往 S1走

s1.next = c.c_false = s.next; // c假则走s的下一步为L0标签，在前面预置了 …}

else if (token->type == WHILE) { s1.next = s.begin = NewLabel();

c.c_true = s1.begin = NewLabel();//C真则往 S1走

c.c_false = s.next;// c假则走s的下一步为L0标签，在前面预置了 ...}

(3)在构造完真假出口后进入c的子程序(将s中定义的c的属性传进去，也就实现了将标号传进去)

ProcedureC(from_file, c);

(4) 这样子在c子程序结束需要goto的时候定位到正确的标号：