TEST语言的词法规则
- 标识符:字母打头,后接任意字母或数字。
- 保留字:标识符的子集,包括if, else, for, while, int, write, read。
- 无符号整数:由数字组成,但最高位不能为0,允许一位的0。
- 分界符:(、)、;、{、}、,
- 运算符:+、-、*、/、=、<、>、>=、<=、!=、==
- 注释符:/* */
TEST语言的语法规则
1) <program> → {<declaration_list><statement_list>}
2) <declaration_list> → <declaration_list><declaration_stat> | ε
3) <declaration_stat> → int ID;
4) <statement_list> → <statement_list><statement>| ε
5) <statement> → <if_stat>|<while_stat>|<for_stat>|<read_stat> |<write_stat>|<compound_stat> |<assignment_stat>|;
6) <if_stat> → if (<bool_expression >) <statement >
| if (<bool_expression>) <statement >else < statement >
7) <while_stat> → while (<bool_expression>) < statement >
8) <for_stat> → for (<assignment_expression>; <bool_expression>;
<assignment_ expression >)<statement>
9) <write_stat> → write < arithmetic_expression >;
10) <read_stat> → read ID;
11) <compound_stat> → {<statement_list>}
12) <assignment_expression> → ID=<arithmetic_expression>
13) <assignment_stat> →<assignment_expression>;
14) <bool_expression>→<arithmetic_expression> > <arithmetic_expression>
|<arithmetic_expression> < <arithmetic_expression>
|<arithmetic_expression> >= <arithmetic_expression>
|<arithmetic_expression> <= <arithmetic_expression>
|<arithmetic_expression> == <arithmetic_expression>
|<arithmetic_expression> != <arithmetic_expression>
15) <arithmetic_expression> → <arithmetic_expression>+<term>
|< arithmetic_expression>-<term>
|< term >
16) < term > → < term >*<factor>|< term >/<factor>|< factor >
17) < factor > → (<arithmetic_expression>)|ID|NUM
实现步骤
- 根据词法规则写出相应的正则表达式,构造NFA,最后化简DFA
NFA
DFA
- 根据词法分析的结果进行语法分析,消除左递归,找出first,follow集
关键代码
- 文件读取
由于文件读取是一个异步操作,这里用一个promise将其包裹
<input type='file' accept='text/plain' onchange='openFile(event)'/>
let readPromise = new Promise(function (resolve, reject) { openFile = function (event) { let input = event.target; let reader = new FileReader(); reader.onload = function () { if (reader.result) { resolve(reader.result); } }; reader.readAsText(input.files[0]); }; }); readPromise.then(function (result) { readResultString = result; readResultArray = readResultString.split(''); //先进行词法分析 wordStatistic(); // console.log(wordStatisticResult); // console.log(" "); console.log(TEXTARRAY[grammarStatistic()]); console.log(statisticLog); });
逐行读取词法分析结果
//功能性函数,用于读取或者输出 const readLine = () => { let array = resultLine.Lines[resultLine.flag].split(" "); resultLine.flag++; statisticLog += array[0] + " " + array[1] + "\r\n"; return ({ symbol: array[0], value: array[1] }); }; const outFunction = (Keyword, ch) => { wordStatisticResult += Keyword + " " + ch + '\r\n'; };
- 词法分析
const keyword = ["if", "else", "for", "while", "do", "read", "write", "int"];//所有的保留字 const singleWord = ['+', '-', '*', '(', ')', '{', '}', ',', ';', ':', '.'];//除号单独处理 const doubleWord = ['>', '<', '=', '!'];//可能是双运算符的数组 let openFile; let readResultString = "";//文件读取结果string let readResultArray = [];//文件读取结果array let wordStatisticResult = "";//词法分析结果 let statisticLog = "";//语法分析记录 let stack = [];//用于临时存放词法分析结果的栈 let resultLine = {};//当前的某一行词法分析结果 symbol类型,value值 let t = ''; let readPromise = new Promise(function (resolve, reject) { openFile = function (event) { let input = event.target; let reader = new FileReader(); reader.onload = function () { if (reader.result) { resolve(reader.result); } }; reader.readAsText(input.files[0]); }; }); //用于词法分析 const wordStatistic = () => { for (let i = 0; i < readResultArray.length;) { let ch = readResultArray[i]; while (ch == " " || ch == "\r" || ch == "\n") { ch = readResultArray[++i]; } //判断是标识符还是保留字 if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') { stack = [ch]; ch = readResultArray[++i]; while (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9') { stack.push(ch); ch = readResultArray[++i]; } let StrToken = stack.join(""); outFunction(keyword.indexOf(StrToken) >= 0 ? StrToken : "ID", StrToken); } //判断数字 else if (ch >= '0' && ch <= '9') { stack = [ch]; ch = readResultArray[++i]; while (ch >= '0' && ch <= '9') { stack.push(ch); ch = readResultArray[++i]; } let StrToken = stack.join(""); outFunction("NUM", StrToken); } //判断单分界符 else if (singleWord.indexOf(ch) >= 0) { stack = [ch]; ch = readResultArray[++i]; let StrToken = stack.join(""); outFunction(StrToken, StrToken); } //判断双分界符 else if (doubleWord.indexOf(ch) >= 0) { stack = [ch]; let ch1 = ch; ch = readResultArray[++i]; if (ch == '=') { stack.push(ch); ch = readResultArray[++i]; } let StrToken = stack.join(""); outFunction(ch1 == "!" ? "ERROR" : StrToken, StrToken); } //除号单独处理 else if (ch == '/') { ch = readResultArray[++i]; if (ch == "*") { let ch2 = readResultArray[++i]; do { ch = ch2; ch2 = readResultArray[++i]; } while ((ch != "*" || ch2 != "/") && i < readResultArray.length); ch = readResultArray[++i]; } else { outFunction(ch, ch); } } else { outFunction("ERROR", ch); ch = readResultArray[++i]; } } };
- 语法分析
//用于语法分析 const grammarStatistic = () => { let code = CODE.NORMAL; resultLine = { Lines: wordStatisticResult.split('\r\n'), flag: 0, };//初始化,开始读第一行词法分析结果 code = program(); return code; }; const program = () => { let code = CODE.NORMAL; t = readLine(); if (t.symbol != "{") { return CODE["LOSS{"]; } t = readLine(); code = declaration_list(); if (code > CODE.NORMAL) { return code; } code = statement_list(); if (code > CODE.NORMAL) { return code; } if (t.symbol != "}") { code = CODE["LOSS}"];//缺少右括号 return code; } return code; }; const declaration_list = () => { let code = CODE.NORMAL; while (t.symbol == "int") { code = declaration_stat(); if (code > CODE.NORMAL) { return code; } } return code; }; const declaration_stat = () => { let code = CODE.NORMAL; t = readLine(); if (t.symbol != "ID") { return CODE["LOSSID"]; } t = readLine(); if (t.symbol != ";") { return CODE["LOSS;"]; } t = readLine(); return code; }; const statement_list = () => { let code = CODE.NORMAL; while (t.symbol != "}") { code = statement(); if (code > CODE.NORMAL) { return code; } if (t.value == "undefined") {//如果读取到末尾 return CODE["LOSS}"]; } } return code; }; const statement = () => { let code = CODE.NORMAL; if (t.symbol == "if") { code = if_state(); } else if (t.symbol == "while") { code = while_stat(); } else if (t.symbol == "for") { code = for_stat(); } else if (t.symbol == "read") { code = read_stat(); } else if (t.symbol == "write") { code = write_stat(); } else if (t.symbol == "{") { code = compound_stat(); } else if (t.symbol == "ID") { code = assignment_stat(); } else if (t.symbol == ";") { t = readLine(); } else { t = readLine(); } return code; }; const if_state = () => { let code = CODE.NORMAL; t = readLine(); if (t.value != "(") { return CODE["LOSS("];//缺少左括号 } t = readLine(); code = bool_expression(); if (code > CODE.NORMAL) { return code; } if (t.symbol != ")") { return CODE["LOSS)"];//缺少右括号 } t = readLine(); code = statement(); if (code > CODE.NORMAL) { return code; } if (t.symbol == "else") { t = readLine(); code = statement(); if (code > CODE.NORMAL) { return code; } } return code; }; const while_stat = () => { let code = CODE.NORMAL; t = readLine(); if (t.value != "(") { return CODE["LOSS("];//缺少左括号 } t = readLine(); code = bool_expression(); if (code > CODE.NORMAL) { return code; } if (t.symbol != ")") { return CODE["LOSS)"];//缺少右括号 } t = readLine(); code = statement(); return code; }; const for_stat = () => { let code = CODE.NORMAL; t = readLine(); if (t.symbol != "(") { return CODE["LOSS("]; } t = readLine(); code = assignment_expression(); if (code > CODE.NORMAL) { return code; } if (t.symbol != ";") { return CODE["LOSS;"]; } t = readLine(); code = bool_expression(); if (code > CODE.NORMAL) { return code; } if (t.symbol != ";") { return CODE["LOSS;"]; } t = readLine(); code = assignment_expression(); if (code > CODE.NORMAL) { return code; } if (t.symbol != ")") { return CODE["LOSS)"]; } t = readLine(); code = statement(); return code; }; const write_stat = () => { let code = CODE.NORMAL; t = readLine(); code = arithmetic_expression(); if (code > CODE.NORMAL) { return code; } if (t.symbol != ";") { return CODE["LOSS;"]; } t = readLine(); return code; }; const read_stat = () => { let code = CODE.NORMAL; t = readLine(); if (t.symbol != "ID") { return CODE["LOSSID"]; } t = readLine(); if (t.symbol != ";") { return CODE["LOSS;"]; } t = readLine(); return code; }; const compound_stat = () => { let code = CODE.NORMAL; t = readLine(); code = statement_list(); if (code > CODE.NORMAL) { return code; } if (t.symbol != "}") { return CODE["LOSS}"]; } else { t = readLine(); } return code; }; const assignment_expression = () => { let code = CODE.NORMAL; if (t.symbol == "ID") { t = readLine(); if (t.symbol == "=") { t = readLine(); code = arithmetic_expression(); if (code > CODE.NORMAL) { return code; } } else { return CODE["LOSS="]; } } else { return CODE["LOSSID"]; } return code; }; const assignment_stat = () => { let code = CODE.NORMAL; code = assignment_expression(); if (code > CODE.NORMAL) { return code; } if (t.symbol != ";") { return CODE["LOSS;"]; } return code; }; const bool_expression = () => { let code = CODE.NORMAL; code = arithmetic_expression(); if (code > CODE.NORMAL) { return code; } if (t.value == ">" || t.value == "<" || t.value == '>=' || t.value == "<=" || t.value == "!=" || t.value == "==") { t = readLine(); code = arithmetic_expression(); if (code > CODE.NORMAL) { return code; } } return code; }; const arithmetic_expression = () => { let code = CODE.NORMAL; code = term(); if (code > CODE.NORMAL) { return code; } while (t.symbol == "+" || t.symbol == "-") { t = readLine(); code = term(); if (code > CODE.NORMAL) { return code; } } return code; }; const term = () => { let code = CODE.NORMAL; code = factor(); if (code > CODE.NORMAL) { return code; } while (t.symbol == "*" || t.symbol == "/") { t = readLine(); code = factor(); if (code > CODE.NORMAL) { return code; } } return code; }; const factor = () => { let code = CODE.NORMAL; if (t.symbol == "(") { t = readLine(); code = arithmetic_expression(); if (code > CODE.NORMAL) { return code; } if (t.symbol != ')') { return CODE["LOSS)"]; } t = readLine(); } else if (t.symbol == "ID" || t.symbol == "NUM") { t = readLine(); } else { return CODE["LOSSfactor"]; } return code; };