diff options
| author | mryouse | 2022-05-13 02:35:25 +0000 |
|---|---|---|
| committer | mryouse | 2022-05-13 02:35:25 +0000 |
| commit | 3d23b45a0ab381f34a2dae327e22cfa862af46ea (patch) | |
| tree | 104bd1949d60947058b1208169b0372092b0f14c /lexer.py | |
| parent | e18bdec21683adfb2658359568a54a2f3f21d703 (diff) | |
lists? not sure if they fully work, but somewhat
Diffstat (limited to 'lexer.py')
| -rw-r--r-- | lexer.py | 28 |
1 files changed, 20 insertions, 8 deletions
@@ -5,6 +5,8 @@ DOUBLE_QUOTE = '"' BACKSLASH = "\\" OPEN_PAREN = "(" CLOSE_PAREN = ")" +OPEN_BRACE = "[" +CLOSE_BRACE = "]" DIGITS = "0123456789" LETTERS = "abcdefghijklmnopqrstuvwxyz" PUNCTUATION = "-_!*$@%^&=+/?<>~" @@ -48,7 +50,7 @@ def lex_bool(inp): else: raise Exception("invalid boolean") - if peek(inp[len(str(token)):]) not in (None, " ", CLOSE_PAREN): + if peek(inp[len(str(token)):]) not in (None, " ", CLOSE_PAREN, CLOSE_BRACE): raise Exception("invalid boolean") #return token, inp[len(str(token)):] @@ -58,7 +60,7 @@ def lex_bool(inp): def lex_number(inp): token = "" for idx, c in enumerate(inp): - if c in (" ", CLOSE_PAREN): + if c in (" ", CLOSE_PAREN, CLOSE_BRACE): if "." in token: #return float(token), inp[idx:] return NebFloat(float(token)), inp[idx:] @@ -96,14 +98,13 @@ def lex_number(inp): def lex_symbol(inp): token = "" for idx, c in enumerate(inp): - if c in (CLOSE_PAREN, " "): + if c in (CLOSE_PAREN, CLOSE_BRACE, " "): return NebSymbol(token), inp[idx:] elif c in SYMBOL_VALS: token += c else: raise Exception("improper symbol") return NebSymbol(token), "" - def peek(inp): if len(inp) == 0: @@ -123,6 +124,13 @@ def lex(inp, tokens): elif nxt == CLOSE_PAREN: tokens.append(NebClose()) return lex(inp[1:], tokens) + # braces + elif nxt == OPEN_BRACE: + tokens.append(NebListStart()) + return lex(inp[1:], tokens) + elif nxt == CLOSE_BRACE: + tokens.append(NebListEnd()) + return lex(inp[1:], tokens) # numbers elif nxt in list(DIGITS) or nxt in ("+", "-", "."): # + and - are symbols, too @@ -130,7 +138,8 @@ def lex(inp, tokens): after = peek(inp[1:]) if after not in DIGITS: # parse a symbol token, remainder = lex_symbol(inp) - if peek(remainder) not in (None, CLOSE_PAREN, " "): + if peek(remainder) not in (None, CLOSE_PAREN, CLOSE_BRACE, " "): + print(f"{peek(remainder)}") raise Exception("spaces required between tokens") tokens.append(token) return lex(remainder, tokens) @@ -141,21 +150,24 @@ def lex(inp, tokens): elif nxt == DOUBLE_QUOTE: token, remainder = lex_string(inp[1:]) #print(f"received [{token}] [{remainder}]") - if peek(remainder) not in (None, CLOSE_PAREN, " "): + if peek(remainder) not in (None, CLOSE_PAREN, " ", CLOSE_BRACE): + print(f"{peek(remainder)}") raise Exception("spaces required between tokens") tokens.append(token) return lex(remainder, tokens) # bool elif nxt == "#": token, remainder = lex_bool(inp[1:]) - if peek(remainder) not in (None, CLOSE_PAREN, " "): + if peek(remainder) not in (None, CLOSE_PAREN, " ", CLOSE_BRACE): + print(f"{peek(remainder)}") raise Exception("spaces required between tokens") tokens.append(token) return lex(remainder, tokens) # symbols elif nxt in SYMBOL_VALS: token, remainder = lex_symbol(inp) - if peek(remainder) not in (None, CLOSE_PAREN, " "): + if peek(remainder) not in (None, CLOSE_PAREN, " ", CLOSE_BRACE): + print(f"{peek(remainder)}") raise Exception("spaces required between tokens") tokens.append(token) return lex(remainder, tokens) |
