Interpreter/Parser.py at master · verae98/Interpreter · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
from typing import List, Tuple, Union, Callable
from Lexer import Token
from Enums import Error, Errornr, State, TokenValues
import copy

class Node():

    def __init__(self, linenr : int = 0, value : str = None, type : TokenValues = None):
        self.value = value
        self.type = type
        self.linenr = linenr

    def __str__(self) -> str:
        return ("(value: " + (" None " if self.value is None else self.value) + " Type: " + str(self.type) + ")\n")

    def __repr__(self) -> str:
        return self.__str__()

class operator_node(Node):

    def __init__(self, linenr : int, value : str = None , lhs : Node = None , operator : TokenValues = None, rhs : Node = None):
        Node.__init__(self, linenr, value, operator )
        self.value = value
        self.lhs = lhs
        self.operator = operator
        self.rhs = rhs
        self.linenr = linenr

    def __str__(self) -> str:
        return ("(value: " + (" None " if self.value is None else self.value) + " lhs: " + (" None " if self.lhs is None else self.lhs.__repr__())
                + " operator: " + str(self.operator.name) + " rhs: " + (" None " if self.rhs is None else self.rhs.__repr__()) + ")\n")

    def __repr__(self) -> str:
        return self.__str__()

class value_node(Node):
    def __init__(self, linenr : int, value : str = None, type : TokenValues = None):
        Node.__init__(self, linenr, value, type)
        self.value = value
        self.type = type
        self.linenr = linenr

    def __str__(self):
        return ("(value: " + (" None " if self.value is None else self.value) + " Type: " + str(self.type.name) +  ")\n" )

    def __repr__(self):
        return self.__str__()

class ProgramValues():
    def __init__(self, state : State = State.Idle, unprocessed : List[Token] = list(), error_list : List[Error] = list()):
        self.state = state
        self.unprocessedTokens = unprocessed
        self.error_list = error_list

    def __str__(self):
        return "ProgramValues ( State: "  + str(self.state) + ", Unprocessed " + str(self.unprocessedTokens) + ", Errorlist " + str(self.error_list) + " )"

    def __repr__(self):
        return self.__str__()

# findNode :: Node -> Node | None
def findNode(node : Node) -> Union[Node,None]:
    """ Find the node with empty space and returns the node with an empty lhs or rhs
    If Node is not an operator, return nonde (happens only when root is a value) """
    if(isinstance(node, operator_node)):
        if (node.lhs == None or node.rhs == None):
            return node
        if (isinstance(node.rhs, value_node)):
            return node
        return findNode(node.rhs)
    return None

# processMath :: Token -> Node -> Tuple[Node, Error]
def processMath(currentToken1: Token, current_node1 : Node) -> (Node, [Error]):
    """Retrieve node and append the token in this node return this Node and the Errorlist. Errorlist is empty if no errors occur.
    Append Node according to the math rules"""
    error = []
    currentToken = copy.copy(currentToken1)
    current_node = copy.copy(current_node1)
    if (currentToken.instance == TokenValues.PLUS or currentToken.instance == TokenValues.MIN):
        new_node = operator_node(currentToken.linenr, currentToken.type, current_node, currentToken.instance)
        return new_node, error
    elif (currentToken.instance == TokenValues.MULTIPLY or currentToken.instance == TokenValues.DIVIDED_BY):
        # find the node that is not filled yet
        node_ = findNode(current_node)
        if(node_ != None):
            lhs = node_.rhs
            new_node = operator_node(currentToken.linenr, currentToken.type, lhs, currentToken.instance)
            node_.rhs = new_node
        else:
            # if there are only values in the nodetree
            new_node = operator_node(currentToken.linenr, currentToken.type, current_node, currentToken.instance)
            current_node = new_node

    else:
        new_node = value_node(currentToken.linenr, currentToken.type, currentToken.instance)
        # check if list is empty
        if(current_node.value == None):
            current_node = new_node
        else:
            # get empty operator_node
            node_ = findNode(current_node)
            if(node_ != None):
                if(node_.lhs == None):
                    node_.lhs = new_node
                elif (node_.rhs == None):
                    node_.rhs = new_node
                else:
                    error.append(Error(Errornr.MATH_ERROR, "On line: " + str(currentToken.linenr) + ", too many numbers versus operators"))
            else:
                error.append(Error(Errornr.MATH_ERROR, "On line: " + str(currentToken.linenr) + ", cannot calculate " + currentToken.type))
                pass
    return current_node, error

# processComparison :: List[Token] -> Tuple[Node, List[Error]]
def processComparison(tokens1: List[Token]) -> (Node, List[Error]):
    """Retrieve tokens and return a node according to the comparison rules"""
    error = []
    tokens = copy.copy(tokens1)
    # get first element that meets the condition
    func = lambda currentToken: currentToken.instance == TokenValues.EQUAL or currentToken.instance == TokenValues.NOTEQUAL or currentToken.instance == TokenValues.GE \
        or currentToken.instance == TokenValues.SE or currentToken.instance == TokenValues.GREATER or currentToken.instance == TokenValues.SMALLER
    index = _getFirst(list(map(func, tokens)))

    lhs, pv_lhs = processTokens(tokens[:index])# process the tokens till the comparison token
    if(len(pv_lhs.error_list) > 0):
        error += (pv_lhs.error_list)
    if (pv_lhs.state == State.ERROR or len(lhs) > 1 ):
        error += (Error(Errornr.SYNTAX_ERROR, "On line " + str(tokens[0].linenr) + " ^" + str(tokens[0].type) + " invalid syntax: too many arguments for comparison"))

    rhs, pv_rhs = processTokens(tokens[index + 1:]) # process the tokens after the comparison token
    if(len(pv_rhs.error_list) > 0):
        error += (pv_rhs.error_list)
    if ((pv_rhs.state == State.ERROR or len(rhs) > 1) and len(error) == 0):
        error.append(Error(Errornr.SYNTAX_ERROR, "On line " + str(tokens[0].linenr) + " ^" + str(tokens[index + 1].type) + " invalid syntax: too many arguments for comparison"))
    current_node = operator_node(tokens[index].linenr, tokens[index].type, lhs[0], tokens[index].instance, rhs[0])
    return current_node, error

# processIf_While :: Listp[Tokens] -> Tuple[Node, ProgramValues] | Tuple[None, List[Error]]
def processIf_While(tokens1: List[Token]) -> Union[Tuple[Node, List[Error]], Tuple[None, List[Error]] ]:
    """Retrieve tokens and return a node according to the if or while layout. Since if and while have the same rules,
    these commands can both be processed in this funcion"""
    error = []
    tokens = copy.copy(tokens1)
    # get index of all important tokens
    index_parL = getIndexToken(lambda x: x.instance == TokenValues.LPAREN, tokens)
    index_parR = getIndexToken(lambda x: x.instance == TokenValues.RPAREN, tokens)
    index_bracL = getIndexToken(lambda x: x.instance == TokenValues.LBRACE, tokens)

    # check for syntax errors
    if(index_parL == -1 or index_parR == -1):
        error.append(Error(Errornr.SYNTAX_ERROR, "Syntax Error on line " + str(tokens[0].linenr)))
        return None, error

    if((index_bracL - index_parR) != 1):
        error.append(Error(Errornr.SYNTAX_ERROR, "Syntax Error on line " + str(tokens[index_parR].linenr)))
        return None, error
    elif(index_bracL == -1):
        error.append(Error(Errornr.SYNTAX_ERROR, "Syntax Error on line " + str(tokens[index_parR].linenr)))
        return None, error

    new_node = operator_node(tokens[0].linenr, tokens[0].type, None, tokens[0].instance)
    compare_node, error = processComparison(tokens[index_parL+1:index_parR])
    node_list_rhs, pv1 = processTokens(tokens[index_bracL+1:-1])
    pv = copy.copy(pv1)
    pv.unprocessedTokens = []
    if(len(error) > 0):
        error += pv.error_list
    new_node.lhs = compare_node
    new_node.rhs = node_list_rhs
    return new_node, error

# processAssign : List[Token] -> Tuple[Node, ProgramValues]
def processAssign(tokens1: List[Token]) -> (Node, List[Error]):
    """Retrieve tokens and return a node according to the assign layout. """
    error = []
    tokens = copy.copy(tokens1)
    index = getIndexToken(lambda x: x.instance == TokenValues.ASSIGN, tokens) # get index of assignment
    if(index != 1):
        error.append(Error(Errornr.SYNTAX_ERROR, "too many arguments for assignment"))
    if(tokens[0].instance != TokenValues.VAR):
        error.append(Error(Errornr.SYNTAX_ERROR, "on line " + str(tokens[0].linenr) + " -> \"" + tokens[0].type + "\" cannot be an variable"))
    lhs = value_node(tokens[0].linenr, tokens[0].type, TokenValues.VAR_ASSIGN)
    # check if after assignment there are enough parameters (value and terminator)
    if(index + 1 >= len(tokens) -1):
        error.append(Error(Errornr.SYNTAX_ERROR, "On line " + str(tokens[index].linenr) + ", invalid syntax"))
        return None, error
    rhs, pv1 = processTokens(tokens[index+1:])
    pv = copy.copy(pv1)
    pv.unprocessedTokens = []
    if (pv.state == State.ERROR):
        error += pv.error_list
    if(len(rhs) > 1):
        error += pv.error_list
    current_node = operator_node(tokens[index].linenr, tokens[index].type, lhs, tokens[index].instance, rhs[0])
    return current_node, error

# _getFirst :: List[Bool] -> int
def _getFirst(list_to_check : List[bool]) -> int:
    """retrieve list of bools, return index of first True element"""
    if(len(list_to_check) == 0):
        return -1
    index = _getFirst(list_to_check[:-1])
    # check if the element in the list is True and index is not overwritten yet
    if(list_to_check[-1] and index == -1):
        return len(list_to_check) -1
    return index

# getIndexToken :: (Token -> bool) -> List[Token] -> int
def getIndexToken(f : Callable[[Token], bool], tokens : List[Token]) -> int :
    # map the tokens to a list of bools. Element in list is True if they met the condition in f
    # get first element that has the match, return index
    return _getFirst(list(map(f, tokens)))


#amountOpenBraces :: List[Token] -> int
def amountOpenBraces(tokens: List[Token]) -> int:
    # count the bracets: open +1 and close means -1 return result of counting
    if(len(tokens) <= 0):
        return 0
    braces = amountOpenBraces(tokens[:-1])
    currentToken = tokens[-1]
    if(currentToken.instance == TokenValues.LBRACE):
        braces = braces + 1
    elif(currentToken.instance == TokenValues.RBRACE):
        braces = braces - 1
    return braces


# _processTokens :: List[Tokens] -> Tuple[List[Node], ProgramValues]
def processTokens(tokens1: List[Token]) -> (List[Node], ProgramValues):
    """Retrieve the tokens that needs to be processed. Check in the token the instance and act accordingly
    It returns the nodes of the processed tokens and the ProgramValues"""
    tokens = copy.copy(tokens1)
    if(len(tokens) == 0):
        return [], ProgramValues(unprocessed = []) # default values

    nodes, pv_old = processTokens(tokens[0:-1])
    pv = copy.copy(pv_old)
    currentToken = tokens[-1]
    if(pv.state == State.ERROR):
        return nodes, pv
    if(pv.state == State.PRINT):
        if(currentToken.instance == TokenValues.PRINT_END):
            new_node = operator_node(pv.unprocessedTokens[0].linenr, pv.unprocessedTokens[0].type, None, pv.unprocessedTokens[0].instance)
            rhs, pv = copy.copy(processTokens(pv.unprocessedTokens[1:]))
            pv.state = State.Idle
            if(len(rhs) > 0):
                new_node.lhs = rhs[0]
                new_node.rhs = rhs
            nodes.append(new_node)
            return nodes, pv
        pv.unprocessedTokens.append(currentToken)
        return nodes, pv

    if (pv.state == State.Math):
        # program rules, assignment before math
        if (currentToken.instance == TokenValues.ASSIGN):
            pv.state = State.ASSIGN
            # remove incorrect math equation
            nodes = nodes[:-1]
            pv.unprocessedTokens.append(currentToken)
        elif (currentToken.instance != TokenValues.NUMBER and currentToken.instance != TokenValues.VAR and currentToken.instance != TokenValues.PLUS and currentToken.instance != TokenValues.MIN and
                    currentToken.instance != TokenValues.MULTIPLY and currentToken.instance != TokenValues.DIVIDED_BY):
            # math state is over, remove processed tokens
            pv.unprocessedTokens = []
            pv.state = State.Idle
        else:
            pv.unprocessedTokens.append(currentToken)
            nodes[-1], error = processMath(currentToken, nodes[-1])
            if(len(error) > 0):
                pv.state = State.ERROR
                pv.error_list += (error)
        return nodes, pv

    elif (pv.state == State.ASSIGN):
        if(currentToken.instance == TokenValues.SEMICOLON):
            pv.unprocessedTokens.append(currentToken)
            new_node, error_list = processAssign(pv.unprocessedTokens)
            if (len(error_list) > 0):
                pv.state = State.ERROR
                pv.error_list += error_list
                pv.unprocessedTokens = []
                return nodes, pv
            pv.unprocessedTokens = []
            nodes.append(new_node)
            pv.state = State.Idle
        else:
            pv.unprocessedTokens.append(currentToken)
        return nodes, pv

    elif(pv.state == State.IF_WHILE):
        pv.unprocessedTokens.append(currentToken)
        if(currentToken.instance == TokenValues.RBRACE):
            amountBraces = amountOpenBraces(pv.unprocessedTokens)
            if (amountBraces == 0):
                new_node, error_list = processIf_While(pv.unprocessedTokens)
                if(new_node == None):
                    pv.state = State.ERROR
                    pv.error_list += error_list
                    return nodes, pv
                nodes.append(new_node)
                pv.unprocessedTokens = []
                pv.state = State.Idle
                nodes.append(Node())
            elif (amountBraces < 0):
                pv.error_list.append(Error(Errornr.SYNTAX_ERROR, "on line " + str(pv.unprocessedTokens[0].linenr) + ", missing opening brace"))

        return nodes, pv


    elif(currentToken.instance == TokenValues.PLUS or currentToken.instance == TokenValues.MIN or
                 currentToken.instance == TokenValues.MULTIPLY or currentToken.instance == TokenValues.DIVIDED_BY or currentToken.instance == TokenValues.NUMBER or currentToken.instance == TokenValues.VAR):
        if(pv.state == State.Idle):
            pv.state = State.Math
            nodes.append(Node())

            nodes[-1], error = processMath(currentToken, nodes[-1])
            if (len(error) > 0):
                pv.state = State.ERROR
                pv.error_list += (error)
                return nodes, pv


    elif (currentToken.instance == TokenValues.ASSIGN):
        if (pv.state == State.Idle):
            pv.state = State.ASSIGN


    elif(currentToken.instance == TokenValues.IF):
        pv.state = State.IF_WHILE

    elif (currentToken.instance == TokenValues.WHILE):
        pv.state = State.IF_WHILE

    elif (currentToken.instance == TokenValues.PRINT):
        pv.state = State.PRINT

    pv.unprocessedTokens.append(currentToken)

    return nodes, pv


# parse :: List[Token] -> Tuple[List[Node], ProgramValues]
def parse(tokens: List[Token]) -> (List[Node], ProgramValues):
    # Calls processTokens, which processes the tokens recursively
    return processTokens(tokens)