# 스캐너 직접 구현하기

# 상태도

TIP

어떤 프로세스가 결론을 내리기 위해서 반복적인 조건 검사가 필요거나

현재의 상태가 과거의 상태에 의해서 영향받을 때 상태도가 유용하다.

# 논리 계층

class Token {
    constructor(type, value) {
        this.type = type;
        this.value = value;
    }
}

class Scanner {
    constructor(text) {
        this._state = State.START;
        this._index = 0;
        this._text = text;
        this._preLoadedChar = null;
        this._tokenValue = "";
    }

    hasNext() {
        return this._index < this._text.length;
    }

    next() {
        while (true) {
            const ch = this._getChar();

            switch (this._state) {
                case State.START: {
                    const token = this._scanStartState(ch);
                    if (token) return token;
                    break;
                }

                case State.INTEGER: {
                    const token = this._scanIntegerState(ch);
                    if (token) return token;
                    break;
                }

                case State.FLOAT: {
                    const token = this._scanFloatState(ch);
                    if (token) return token;
                    break;
                }

                ...

                default:
                    this._index = this._text.length;
                    return new Token('ERROR', ch);
            }
        }
    }
}

let scanner = new Scanner("3 * (1 + 4) / 0.1592654");

while (scanner.hasNext()) {
  let token = scanner.next();
  console.log(token);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

# 구현 계층

# _scanIntegerState

_scanIntegerState(ch) {
    switch (ch) {
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            this._tokenValue = this._tokenValue + ch;
            break;

        case '.':
            this._state = State.FLOAT;
            this._tokenValue = this._tokenValue + ch;
            break;

        default:
            this._state = State.START;
            this._preLoadedChar = ch;
            const token = new Token('INTEGER', this._tokenValue);
            this._tokenValue = "";
            return token;
    }

    return null;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

# _scanFloatState

_scanFloatState(ch) {
    switch (ch) {
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            this._tokenValue = this._tokenValue + ch;
            break;

        default:
            this._state = State.START;
            this._preLoadedChar = ch;
            const token = new Token('FLOAT', this._tokenValue);
            this._tokenValue = "";
            return token;
    }

    return null;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

# _scanStartState

_scanStartState(ch) {
    switch (ch) {
        case ' ':
        case '\t': return new Token('WHITE_SPACE', ch);

        case '+': return new Token('PLUS', ch);
        case '-': return new Token('MINUS', ch);
        case '*': return new Token('MUL', ch);
        case '/': return new Token('DIV', ch);
        case '(': return new Token('LEFT_PAREN', ch);
        case ')': return new Token('RIGHT_PAREN', ch);

        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            this._state = State.INTEGER;
            this._tokenValue = this._tokenValue + ch;
            break;

        default:
            this._index = this._text.length;
            return new Token('ERROR', ch);
    }

    return null;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

# 전체 코드

# 실행 결과

# 코드

const State = {
    START:          0,
    ERROR:          1,
    WHITE_SPACE:    2,
    OPERATOR:       3,
    INTEGER:        4,
    FLOAT:          5,
    PAREN:          6,
}

class Token {
    constructor(type, value) {
        this.type = type;
        this.value = value;
    }
}

class Scanner {
    constructor(text) {
        this._state = State.START;
        this._index = 0;
        this._text = text;
        this._preLoadedChar = null;
        this._tokenValue = "";
    }

    hasNext() {
        return this._index < this._text.length;
    }

    next() {
        while (true) {
            const ch = this._getChar();

            switch (this._state) {
                case State.START: {
                    const token = this._scanStartState(ch);
                    if (token) return token;
                    break;
                }

                case State.INTEGER: {
                    const token = this._scanIntegerState(ch);
                    if (token) return token;
                    break;
                }

                case State.FLOAT: {
                    const token = this._scanFloatState(ch);
                    if (token) return token;
                    break;
                }

                default:
                    this._index = this._text.length;
                    return new Token('ERROR', ch);
            }
        }
    }

    _getChar() {
        if (this._preLoadedChar) {
            const ch = this._preLoadedChar;
            this._preLoadedChar = null;
            return ch;
        }

        if (this.hasNext() == false) {
            return null;
        }

        const ch = this._text[this._index];
        this._index++;
        return ch;
    }

    _scanStartState(ch) {
        switch (ch) {
            case ' ':
            case '\t': return new Token('WHITE_SPACE', ch);

            case '+': return new Token('PLUS', ch);
            case '-': return new Token('MINUS', ch);
            case '*': return new Token('MUL', ch);
            case '/': return new Token('DIV', ch);
            case '(': return new Token('LEFT_PAREN', ch);
            case ')': return new Token('RIGHT_PAREN', ch);

            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
                this._state = State.INTEGER;
                this._tokenValue = this._tokenValue + ch;
                break;

            default:
                this._index = this._text.length;
                return new Token('ERROR', ch);
        }

        return null;
    }

    _scanIntegerState(ch) {
        switch (ch) {
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
                this._tokenValue = this._tokenValue + ch;
                break;

            case '.':
                this._state = State.FLOAT;
                this._tokenValue = this._tokenValue + ch;
                break;

            default:
                this._state = State.START;
                this._preLoadedChar = ch;
                const token = new Token('INTEGER', this._tokenValue);
                this._tokenValue = "";
                return token;
        }

        return null;
    }

    _scanFloatState(ch) {
        switch (ch) {
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
                this._tokenValue = this._tokenValue + ch;
                break;

            default:
                this._state = State.START;
                this._preLoadedChar = ch;
                const token = new Token('FLOAT', this._tokenValue);
                this._tokenValue = "";
                return token;
        }

        return null;
    }
}

let scanner = new Scanner("3 * (1 + 4) / 0.1592654");

while (scanner.hasNext()) {
  let token = scanner.next();
  console.log(token);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150