From 16c75f926f41cb9aa89a7cef4e859c10dd08b6e5 Mon Sep 17 00:00:00 2001 From: mryouse Date: Sat, 21 May 2022 04:44:29 +0000 Subject: lex and parse types --- lexer.py | 28 ++++++++++++++++++++++++++++ parser.py | 15 +++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/lexer.py b/lexer.py index aa2d107..3b93e41 100644 --- a/lexer.py +++ b/lexer.py @@ -55,6 +55,18 @@ class TokenType(Enum): # symbols SYMBOL = auto() + # types + INT_TYPE = auto() + FLOAT_TYPE = auto() + STRING_TYPE = auto() + ANY_TYPE = auto() + +types = { + ":int": TokenType.INT_TYPE, + ":float": TokenType.FLOAT_TYPE, + ":string": TokenType.STRING_TYPE, + ":any": TokenType.ANY_TYPE } + keywords = { "print": TokenType.PRINT, "+": TokenType.PLUS, @@ -123,6 +135,11 @@ def lex(data): tok, length = get_bool(data[current+1:], line) tokens.append(tok) current += length + #types + elif char == ":": + tok, length = get_type(data[current:], line) # include : + tokens.append(tok) + current += length # symbols else: tok, length = get_symbol(data[current:], line) @@ -188,6 +205,17 @@ def get_symbol(data, line): break return Token(TokenType.SYMBOL, value, None, line), counter - 1 +def get_type(data, line): + counter = 0 + value = "" + while data[counter] not in SEPARATORS: + value += data[counter] + counter += 1 + if counter >= len(data): + break + if value not in types: + raise LexError(f"unrecognized type {value}", line) + return Token(types[value], value, None, line), counter - 1 def main(data): try: diff --git a/parser.py b/parser.py index f331cfe..6acda5d 100644 --- a/parser.py +++ b/parser.py @@ -13,6 +13,12 @@ class Expr: def __str__(self): return f"{self.value}" + class Type: + def __init__(self, name): + self.name = name + def __str__(self): + return self.name + class Symbol: def __init__(self, name): self.name = name @@ -76,6 +82,9 @@ def parseExpression(token, prev, tokens): elif token.type_ in (TokenType.STRING, TokenType.TRUE, TokenType.FALSE, TokenType.INT, TokenType.FLOAT): expr, inc = parseLiteral(token, prev, tokens[idx+1:]) args.append(expr) + elif token.type_ in (TokenType.INT_TYPE, TokenType.FLOAT_TYPE, TokenType.STRING_TYPE, TokenType.ANY_TYPE): + expr, inc = parseType(token, prev, tokens[idx+1:]) + args.append(expr) else: expr, inc = parseSymbol(token, prev, tokens[idx+1:]) args.append(expr) @@ -90,6 +99,9 @@ def parseSymbol(token, prev, tokens): def parseLiteral(token, prev, tokens): return Expr.Literal(token.value), 1 +def parseType(token, prev, tokens): + return Expr.Type(token.text), 1 + def parse(tokens): idx = 0 prev = None @@ -103,6 +115,9 @@ def parse(tokens): elif token.type_ in (TokenType.FALSE, TokenType.TRUE, TokenType.STRING, TokenType.INT, TokenType.FLOAT): lit, counter = parseLiteral(token, prev, tokens[idx+1:]) exprs.append(lit) + elif token.type_ in (TokenType.INT_TYPE, TokenType.FLOAT_TYPE, TokenType.STRING_TYPE, TokenType.ANY_TYPE): + typ, counter = parseType(token, prev, tokens[idx+1:]) + exprs.append(typ) else: sym, counter = parseSymbol(token, prev, tokens[idx+1:]) exprs.append(sym) -- cgit v1.2.3