词法分析器

词法分析器,对PL/0进行词法分析

Code.txt文件

const       a<=10;
var         b,c;
procedure   p;
begin
end.

代码

#include <fstream>
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "iostream"
using namespace std;

//全局变量,保留字表
static char reserveWord[13][20] = {
        "const","var","procedure","begin","end","odd","if","then","call","while","do",
        "read","write"
};
//运算符表,根据需要可以自行增加
static char operatorCode[11][5] = {
        "+","-","*","/","<","<=",">",">=","#","=",":="
};

static char Delimiter[5] = {//界符表
        '(',
        ')',
        ',',
        ';',
        '.'
};

static char ID[1000][50] = { "" };//标识符表

static char NUM[1000][50] = { "" };//用户定义的数

enum symbol{
    nul,//无实际意义
    ident,//标识符
    number,//整数
    Plus,minus,times,slash,lss,leq,gtr,geq,neq,eql ,becomes, //运算符
    lparen,rparen,comma,semicolon,period,            //界符
    constsym,varsym,procsym,beginsym,endsym,oddsym,ifsym,thensym,//保留字表
    callsym,whilesym,dosym,readsym,writesym,
    unknown,//未知字符
};

int SearchReserveWord(char reserWord[][20],char str[]){
    for (int i = 0;i<13; i++ ) {
        if (strcmp(reserWord[i], str)==0) {
            return i+constsym;
        }
    }
    return nul;
}

int SearchoperatorCode(char operatorcode[][5],char str[]){
    for (int i = 0; i < 11; ++i) {
        if (strcmp(operatorcode[i],str)==0){
            return i+Plus;
        }
    }
    return nul;
}

bool IsLetter(char letter){
    if ((letter>='a'&&letter<='z')||(letter>='A'&&letter<='Z')) {
        return true;
    }
    return false;
}

bool IsNum(char num){
    if (num>='0'&&num<='9') {
        return true;
    }
    return false;
}

void Scanner(int &syn,char r[],char token[],int &p){
    int count = 0;
    char ch;
    for (int i = 0; i<20; i++)
        token[i] = '\0';

    while (r[p]==' '||r[p]=='\n'||r[p]=='\t'||r[p]=='\v'||r[p]=='\r'){
        p++;
    }

    if (IsLetter(r[p])) {//字母打头,判断是保留字或是标识符
        token[count++] = r[p];
        p++;//指针后移
        while (IsLetter(r[p])||IsNum(r[p])) {
            token[count++] = r[p];
            p++;
        }
        token[count] = '\0';
        syn = SearchReserveWord(reserveWord, token);
        if (syn==nul) {
            syn = ident;
        }
        return;
    }
    else if (IsNum(r[p])){//判断是否为无符号整数
        while (IsNum(r[p])) {
            token[count++] = r[p];
            p++;
        }
        token[count] = '\0';
        syn = number;
        return;
    }
    else if (r[p]=='('||r[p]==')'||r[p]==','||r[p]==';'||r[p]=='.'){//判断是否为界符
        token[0] = r[p];
        token[1] = '\0';
        for (int i = 0; i<5; i++) {
            if (Delimiter[i]==r[p]) {
                syn = i+lparen;
                break;
            }
        }
        p++;
        return;
    }
    else if(r[p]=='+'||r[p]=='-'||r[p]=='*'||r[p]=='/'||r[p]=='>'||
            r[p]=='<'||r[p]=='#'||r[p]=='='||r[p]==':'){//判断是否为运算符

        if (r[p]=='>'||r[p]=='<'||r[p]==':'){
            if (r[p+1]=='='){
                token[0] = r[p++];
                token[1] = r[p++];
                token[2] = '\0';
            } else{
                token[0] = r[p++];
                token[1] = '\0';
            }
        } else{
            token[0] = r[p++];
            token[1] = '\0';
        }
        syn = SearchoperatorCode(operatorCode,token);
//        if (syn==-1){
//            syn = 101;
//        }
        return;
    }

    else if (r[p]==EOF||r[p]=='\0'){
        syn = nul;
        return;
    }
    else{
        syn = unknown;
        return;
    }
}

int main(int argc, const char * argv[]) {
    // insert code here...
    char resourceCode[10000];
    char token[20] = {0,};
    int syn = -1,length = 0;//length是标识符表长度
    int p = 0;
    FILE *fp;
    ofstream outfile("result.txt");

    if ((fp = fopen("Code.txt", "r")) == NULL)
    {//打开源程序
        cout << "can't open this file";
        exit(0);
    }
    resourceCode[p] = fgetc(fp);
    while(resourceCode[p]!=EOF){
        p++;
        resourceCode[p] = fgetc(fp);
    }
    resourceCode[p] = '\0';

    fclose(fp);

    cout<<resourceCode<<endl;

    p = 0;

    while(syn!=nul){
        Scanner(syn,resourceCode,token,p);
        outfile << "(" << syn << "," << token << ")" << endl;
        if (syn==ident){//标识符
            bool IsExit = false;
            for (int i = 0; i < length; ++i)
                if (strcmp(ID[i],token)==0){
                    IsExit = true;
                    break;
                }
            if (IsExit== false){
                strcpy(ID[length],token);
                length++;
            }
            cout<<token<<"标识符"<<endl;
        }else if (syn>=constsym&&syn<=writesym){//保留字
            cout<<token<<"保留字"<<endl;
        }else if (syn>=lparen&&syn<=period){//界符
            cout<<token<<"界符"<<endl;
        }else if (syn>=Plus&&syn<=becomes){//操作符(运算符)
            cout<<token<<"运算符"<<endl;
        }else if (syn==number){//整数
            cout<<token<<"整数"<<endl;
        } else if (syn==unknown){//未知字符
            cout<<token<<"未知字符"<<endl;
        }
    }

    outfile.close();

    return 0;
}

输出

/Users/ailuoku6/CLionProjects/Cifafenxi1/cmake-build-debug/Cifafenxi1
const       a<=10;
var         b,c;
procedure   p;
begin
end.

const保留字
a标识符
<=运算符
10整数
;界符
var保留字
b标识符
,界符
c标识符
;界符
procedure保留字
p标识符
;界符
begin保留字
end保留字
.界符

Process finished with exit code 0

result.txt

(19,const)
(1,a)
(8,<=)
(2,10)
(17,;)
(20,var)
(1,b)
(16,,)
(1,c)
(17,;)
(21,procedure)
(1,p)
(17,;)
(22,begin)
(23,end)
(18,.)
(0,)

词法分析器
http://blog.ailuoku6.top/2019/04/21/ci-fa-fen-xi-qi/
作者
ailuoku6
发布于
2019年4月21日
许可协议