TEST语言的词法规则
- 标识符:字母打头,后接任意字母或数字。
- 保留字:标识符的子集,包括if, else, for, while, int, write, read。
- 无符号整数:由数字组成,但最高位不能为0,允许一位的0。
- 分界符:(、)、;、{、}、,
- 运算符:+、-、*、/、=、<、>、>=、<=、!=、==
- 注释符:/* */
TEST语言的语法规则
1) <program> → {<declaration_list><statement_list>}
2) <declaration_list> → <declaration_list><declaration_stat> | ε
3) <declaration_stat> → int ID;
4) <statement_list> → <statement_list><statement>| ε
5) <statement> → <if_stat>|<while_stat>|<for_stat>|<read_stat> |<write_stat>|<compound_stat> |<assignment_stat>|;
6) <if_stat> → if (<bool_expression >) <statement >
| if (<bool_expression>) <statement >else < statement >
7) <while_stat> → while (<bool_expression>) < statement >
8) <for_stat> → for (<assignment_expression>; <bool_expression>;
<assignment_ expression >)<statement>
9) <write_stat> → write < arithmetic_expression >;
10) <read_stat> → read ID;
11) <compound_stat> → {<statement_list>}
12) <assignment_expression> → ID=<arithmetic_expression>
13) <assignment_stat> →<assignment_expression>;
14) <bool_expression>→<arithmetic_expression> > <arithmetic_expression>
|<arithmetic_expression> < <arithmetic_expression>
|<arithmetic_expression> >= <arithmetic_expression>
|<arithmetic_expression> <= <arithmetic_expression>
|<arithmetic_expression> == <arithmetic_expression>
|<arithmetic_expression> != <arithmetic_expression>
15) <arithmetic_expression> → <arithmetic_expression>+<term>
|< arithmetic_expression>-<term>
|< term >
16) < term > → < term >*<factor>|< term >/<factor>|< factor >
17) < factor > → (<arithmetic_expression>)|ID|NUM
实现步骤
- 根据词法规则写出相应的正则表达式,构造NFA,最后化简DFA
NFA
![]()
DFA
![]()
- 根据词法分析的结果进行语法分析,消除左递归,找出first,follow集
关键代码
- 文件读取
由于文件读取是一个异步操作,这里用一个promise将其包裹
<input type='file' accept='text/plain' onchange='openFile(event)'/>
let readPromise = new Promise(function (resolve, reject) {
openFile = function (event) {
let input = event.target;
let reader = new FileReader();
reader.onload = function () {
if (reader.result) {
resolve(reader.result);
}
};
reader.readAsText(input.files[0]);
};
});
readPromise.then(function (result) {
readResultString = result;
readResultArray = readResultString.split('');
//先进行词法分析
wordStatistic();
// console.log(wordStatisticResult);
// console.log(" ");
console.log(TEXTARRAY[grammarStatistic()]);
console.log(statisticLog);
});逐行读取词法分析结果
//功能性函数,用于读取或者输出
const readLine = () => {
let array = resultLine.Lines[resultLine.flag].split(" ");
resultLine.flag++;
statisticLog += array[0] + " " + array[1] + "\r\n";
return ({
symbol: array[0],
value: array[1]
});
};
const outFunction = (Keyword, ch) => {
wordStatisticResult += Keyword + " " + ch + '\r\n';
};- 词法分析
const keyword = ["if", "else", "for", "while", "do", "read", "write", "int"];//所有的保留字
const singleWord = ['+', '-', '*', '(', ')', '{', '}', ',', ';', ':', '.'];//除号单独处理
const doubleWord = ['>', '<', '=', '!'];//可能是双运算符的数组
let openFile;
let readResultString = "";//文件读取结果string
let readResultArray = [];//文件读取结果array
let wordStatisticResult = "";//词法分析结果
let statisticLog = "";//语法分析记录
let stack = [];//用于临时存放词法分析结果的栈
let resultLine = {};//当前的某一行词法分析结果 symbol类型,value值
let t = '';
let readPromise = new Promise(function (resolve, reject) {
openFile = function (event) {
let input = event.target;
let reader = new FileReader();
reader.onload = function () {
if (reader.result) {
resolve(reader.result);
}
};
reader.readAsText(input.files[0]);
};
});
//用于词法分析
const wordStatistic = () => {
for (let i = 0; i < readResultArray.length;) {
let ch = readResultArray[i];
while (ch == " " || ch == "\r" || ch == "\n") {
ch = readResultArray[++i];
}
//判断是标识符还是保留字
if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
stack = [ch];
ch = readResultArray[++i];
while (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9') {
stack.push(ch);
ch = readResultArray[++i];
}
let StrToken = stack.join("");
outFunction(keyword.indexOf(StrToken) >= 0 ? StrToken : "ID", StrToken);
}
//判断数字
else if (ch >= '0' && ch <= '9') {
stack = [ch];
ch = readResultArray[++i];
while (ch >= '0' && ch <= '9') {
stack.push(ch);
ch = readResultArray[++i];
}
let StrToken = stack.join("");
outFunction("NUM", StrToken);
}
//判断单分界符
else if (singleWord.indexOf(ch) >= 0) {
stack = [ch];
ch = readResultArray[++i];
let StrToken = stack.join("");
outFunction(StrToken, StrToken);
}
//判断双分界符
else if (doubleWord.indexOf(ch) >= 0) {
stack = [ch];
let ch1 = ch;
ch = readResultArray[++i];
if (ch == '=') {
stack.push(ch);
ch = readResultArray[++i];
}
let StrToken = stack.join("");
outFunction(ch1 == "!" ? "ERROR" : StrToken, StrToken);
}
//除号单独处理
else if (ch == '/') {
ch = readResultArray[++i];
if (ch == "*") {
let ch2 = readResultArray[++i];
do {
ch = ch2;
ch2 = readResultArray[++i];
} while ((ch != "*" || ch2 != "/") && i < readResultArray.length);
ch = readResultArray[++i];
} else {
outFunction(ch, ch);
}
}
else {
outFunction("ERROR", ch);
ch = readResultArray[++i];
}
}
};- 语法分析
//用于语法分析
const grammarStatistic = () => {
let code = CODE.NORMAL;
resultLine = {
Lines: wordStatisticResult.split('\r\n'),
flag: 0,
};//初始化,开始读第一行词法分析结果
code = program();
return code;
};
const program = () => {
let code = CODE.NORMAL;
t = readLine();
if (t.symbol != "{") {
return CODE["LOSS{"];
}
t = readLine();
code = declaration_list();
if (code > CODE.NORMAL) {
return code;
}
code = statement_list();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != "}") {
code = CODE["LOSS}"];//缺少右括号
return code;
}
return code;
};
const declaration_list = () => {
let code = CODE.NORMAL;
while (t.symbol == "int") {
code = declaration_stat();
if (code > CODE.NORMAL) {
return code;
}
}
return code;
};
const declaration_stat = () => {
let code = CODE.NORMAL;
t = readLine();
if (t.symbol != "ID") {
return CODE["LOSSID"];
}
t = readLine();
if (t.symbol != ";") {
return CODE["LOSS;"];
}
t = readLine();
return code;
};
const statement_list = () => {
let code = CODE.NORMAL;
while (t.symbol != "}") {
code = statement();
if (code > CODE.NORMAL) {
return code;
}
if (t.value == "undefined") {//如果读取到末尾
return CODE["LOSS}"];
}
}
return code;
};
const statement = () => {
let code = CODE.NORMAL;
if (t.symbol == "if") {
code = if_state();
}
else if (t.symbol == "while") {
code = while_stat();
}
else if (t.symbol == "for") {
code = for_stat();
}
else if (t.symbol == "read") {
code = read_stat();
}
else if (t.symbol == "write") {
code = write_stat();
}
else if (t.symbol == "{") {
code = compound_stat();
}
else if (t.symbol == "ID") {
code = assignment_stat();
}
else if (t.symbol == ";") {
t = readLine();
}
else {
t = readLine();
}
return code;
};
const if_state = () => {
let code = CODE.NORMAL;
t = readLine();
if (t.value != "(") {
return CODE["LOSS("];//缺少左括号
}
t = readLine();
code = bool_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != ")") {
return CODE["LOSS)"];//缺少右括号
}
t = readLine();
code = statement();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol == "else") {
t = readLine();
code = statement();
if (code > CODE.NORMAL) {
return code;
}
}
return code;
};
const while_stat = () => {
let code = CODE.NORMAL;
t = readLine();
if (t.value != "(") {
return CODE["LOSS("];//缺少左括号
}
t = readLine();
code = bool_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != ")") {
return CODE["LOSS)"];//缺少右括号
}
t = readLine();
code = statement();
return code;
};
const for_stat = () => {
let code = CODE.NORMAL;
t = readLine();
if (t.symbol != "(") {
return CODE["LOSS("];
}
t = readLine();
code = assignment_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != ";") {
return CODE["LOSS;"];
}
t = readLine();
code = bool_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != ";") {
return CODE["LOSS;"];
}
t = readLine();
code = assignment_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != ")") {
return CODE["LOSS)"];
}
t = readLine();
code = statement();
return code;
};
const write_stat = () => {
let code = CODE.NORMAL;
t = readLine();
code = arithmetic_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != ";") {
return CODE["LOSS;"];
}
t = readLine();
return code;
};
const read_stat = () => {
let code = CODE.NORMAL;
t = readLine();
if (t.symbol != "ID") {
return CODE["LOSSID"];
}
t = readLine();
if (t.symbol != ";") {
return CODE["LOSS;"];
}
t = readLine();
return code;
};
const compound_stat = () => {
let code = CODE.NORMAL;
t = readLine();
code = statement_list();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != "}") {
return CODE["LOSS}"];
}
else {
t = readLine();
}
return code;
};
const assignment_expression = () => {
let code = CODE.NORMAL;
if (t.symbol == "ID") {
t = readLine();
if (t.symbol == "=") {
t = readLine();
code = arithmetic_expression();
if (code > CODE.NORMAL) {
return code;
}
}
else {
return CODE["LOSS="];
}
}
else {
return CODE["LOSSID"];
}
return code;
};
const assignment_stat = () => {
let code = CODE.NORMAL;
code = assignment_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != ";") {
return CODE["LOSS;"];
}
return code;
};
const bool_expression = () => {
let code = CODE.NORMAL;
code = arithmetic_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.value == ">" || t.value == "<" || t.value == '>=' || t.value == "<=" || t.value == "!=" || t.value == "==") {
t = readLine();
code = arithmetic_expression();
if (code > CODE.NORMAL) {
return code;
}
}
return code;
};
const arithmetic_expression = () => {
let code = CODE.NORMAL;
code = term();
if (code > CODE.NORMAL) {
return code;
}
while (t.symbol == "+" || t.symbol == "-") {
t = readLine();
code = term();
if (code > CODE.NORMAL) {
return code;
}
}
return code;
};
const term = () => {
let code = CODE.NORMAL;
code = factor();
if (code > CODE.NORMAL) {
return code;
}
while (t.symbol == "*" || t.symbol == "/") {
t = readLine();
code = factor();
if (code > CODE.NORMAL) {
return code;
}
}
return code;
};
const factor = () => {
let code = CODE.NORMAL;
if (t.symbol == "(") {
t = readLine();
code = arithmetic_expression();
if (code > CODE.NORMAL) {
return code;
}
if (t.symbol != ')') {
return CODE["LOSS)"];
}
t = readLine();
}
else if (t.symbol == "ID" || t.symbol == "NUM") {
t = readLine();
}
else {
return CODE["LOSSfactor"];
}
return code;
};