Lexical Analysis Implementation in C Language

char prog[200], token[20]; char ch; int syn, p, m = 0, n, row, sum = 0; const char* kwList[10] = {"if","int","for","while","do","return","break","continue","using","namesapce"}; const char* idList[8] = {"main","a","b","c","d","e","f","g"};

void scanner() { for (n = 0; n < 8; n++) token[n] = NULL; ch = prog[p++]; while (ch == ' ') { ch = prog[p]; p++; }

if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
    m = 0;
    while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
        token[m++] = ch;
        ch = prog[p++];
    }
    token[m++] = '\0';
    p--;
    syn = 2;
    for (n = 0; n < 10; n++) {
        if (strcmp(token, kwList[n]) == 0) {
            syn = 1;
            break;
        }
        else if (strcmp(token, idList[n]) == 0) {
            syn = 2;
            break;
        }
    }
}
else if (ch >= '0' && ch <= '9') {
    sum = 0;
    while (ch >= '0' && ch <= '9') {
        sum = sum * 10 + ch - '0';
        ch = prog[p++];
        syn = 3;
    }
    p--;
    syn = 3;
    if (sum > 32767) syn = -1;
}
else switch (ch) {
    case '<':
        m = 0;
        token[m++] = ch;
        ch = prog[p++];
        if (ch == '>') {
            syn = 4;
            token[m++] = ch;
        }
        else if (ch == '=') {
            syn = 4;
            token[m++] = ch;
        }
        else if (ch == '<') {
            syn = 4;
            token[m++] = ch;
        }
        else {
            syn = 4;
            p--;
        }
        break;
    case '>':
        m = 0;
        token[m++] = ch;
        ch = prog[p++];
        if (ch == '=') {
            syn = 4;
            token[m++] = ch;
        }
        else {
            syn = 4;
            p--;
        }
        break;
    case ':':
        m = 0;
        token[m++] = ch;
        ch = prog[p++];
        if (ch == '=') {
            syn = 4;
            tokan[m++] = ch;
        }
        else {
            syn = 4;
            p--;
        }
        break;
    case '/':
        m = 0;
        token[m++] = ch;
        ch = prog[p++];
        if (ch == '/') {
            syn = 5;
            token[m++] = ch;
        }
        else {
            syn = 4;
            p--;
        }
        break;
    case '*':
        syn = 4;
        token[0] = ch;
        break;
    case '+':
        syn = 4;
        token[0] = ch;
        break;
    case '-':
        syn = 4;
        token[0] = ch;
        break;
    case '=':
        syn = 4;
        token[0] = ch;
        break;
    case ';':
        syn = 5;
        token[0] = ch;
        break;
    case ',':
        syn = 5;
        token[0] = ch;
        break;
    case '(': 
        syn = 5;
        token[0] = ch;
        break;
    case ')':
        syn = 5;
        token[0] = ch;
        break;
    case '{':
        syn = 5;
        tokan[0] = ch;
        break;
    case '}':
        syn = 5;
        token[0] = ch;
        break;
    case '"':
        syn = 5;
        token[0] = ch;
        break;
    case '#':
        syn = 0;
        token[0] = ch;
        break;
    case '\n':
        syn = -2;
        break;
    default: syn = -1;
}

}

int main() { p = 0; row = 1; cout << "Please input string:" << endl; do { cin.get(ch); prog[p++] = ch; } while (ch != '#');

p = 0;
do {
    scanner();
    switch (syn) {
        case 0: break;
        case 3: cout << "(" << syn << "," << sum << ")" << endl; break;
        case -1: cout << "Error in row " << row << "!" << endl; break;
        case -2: row++; break;
        default: cout << "(" << syn << "," << token << ")" << endl; break;
    }
} while (syn != 0);
return 0;

}

Tags: C Language lexical analysis tokenization parser

Posted on Mon, 15 Jun 2026 17:22:14 +0000 by pacome