refactor: make neb a module

author: mryouse 2022-06-18 02:45:04 +0000
committer: mryouse 2022-06-18 02:45:04 +0000
commit: d1a1c1592e610526c4a0432f93bd9ea6ae96d6e8 (patch)
tree: df4c78f3ce5dfb1369d5fc6c155ca43e8bfc729f /lexer.py
parent: 065d138ca3013a4d1ef1aa3d7c48982d8bee5de2 (diff)
1 files changed, 0 insertions, 168 deletions
diff --git a/lexer.py b/lexer.py
deleted file mode 100644
index fa3db90..0000000
--- a/lexer.py
+++ /dev/null
@@ -1,168 +0,0 @@
-from structs import TokenType, Token
-from exceptions import LexError
-import sys
-
-
-types = {
-    ":int": TokenType.INT_TYPE,
-    ":float": TokenType.FLOAT_TYPE,
-    ":number": TokenType.NUMBER_TYPE,
-    ":string": TokenType.STRING_TYPE,
-    ":list": TokenType.LIST_TYPE,
-    ":any": TokenType.ANY_TYPE,
-    ":literal": TokenType.LITERAL_TYPE,
-    ":bool": TokenType.BOOL_TYPE }
-
-keywords = {
-    "if": TokenType.IF,
-    "for-count": TokenType.FOR_COUNT,
-    "def": TokenType.DEF,
-    "lambda": TokenType.LAMBDA,
-    "&": TokenType.MANY,
-    "func": TokenType.FUNC }
-
-
-WHITESPACE = [" ", "\n", "\t"]
-SEPARATORS = WHITESPACE + [")"]
-DIGITS = list("0123456789")
-
-def lex(data):
-    start = 0
-    current = 0
-    line = 1
-    end = len(data)
-
-    tokens = []
-    while current < end:
-        char = data[current]
-        if char == ";":
-            while char != "\n" and current < end:
-                current += 1
-                char = data[current]
-            continue
-        if char == "\n":
-            line += 1
-        if char in WHITESPACE:
-            current += 1
-            continue
-        elif char == "(":
-            tokens.append(Token(TokenType.OPEN_PAREN, "(", None, line))
-        elif char == ")":
-            tokens.append(Token(TokenType.CLOSE_PAREN, ")", None, line))
-        # numbers
-        elif char in DIGITS or char == ".":
-            tok, length = get_number(data[current:], line)
-            tokens.append(tok)
-            current += length
-        # strings
-        elif char == '"':
-            tok, length, offset = get_string(data[current+1:], line)
-            tokens.append(tok)
-            current += length
-            line += offset
-        # bools
-        elif char == "#":
-            tok, length = get_bool(data[current+1:], line)
-            tokens.append(tok)
-            current += length
-        #types
-        elif char == ":":
-            tok, length = get_type(data[current:], line) # include :
-            tokens.append(tok)
-            current += length
-        # symbols
-        else:
-            tok, length = get_symbol(data[current:], line)
-            if tok.text in keywords:
-                tok.type_ = keywords[tok.text]
-            tokens.append(tok)
-            current += length
-
-        current += 1
-    tokens.append(Token(TokenType.EOF, "", None, line))
-    return tokens
-
-def get_number(data, line):
-    counter = 0
-    value = ""
-    is_float = False
-    char = data[counter]
-    while char not in SEPARATORS:
-        if char in DIGITS:
-            value += char
-        elif char == ".":
-            if is_float:
-                raise LexError("too many '.' in number", line)
-            is_float = True
-            value += char
-        else:
-            raise Exception(f"invalid number: {value}")
-        counter += 1
-        if counter >= len(data):
-            break
-        char = data[counter]
-    if is_float:
-        return Token(TokenType.FLOAT, value, float(value), line), counter - 1
-    else:
-        return Token(TokenType.INT, value, int(value), line), counter - 1
-
-
-def get_string(data, line):
-    offset = 0
-    counter = 0
-    string = ""
-    while data[counter] != '"':
-        if data[counter] == "\n":
-            offset += 1
-
-        # look ahead to see if it's a double quote
-        if data[counter] == "\\" and \
-                    len(data) > counter and \
-                    data[counter+1] == '"':
-            string += '"'
-            counter += 1
-        else:
-            string += data[counter]
-        counter += 1
-        if counter >= len(data):
-            raise Exception("couldn't parse string")
-    string = string.encode().decode("unicode_escape")
-    return Token(TokenType.STRING, str(string), str(string), line), counter + 1, offset
-
-def get_bool(data, line):
-    counter = 0
-    value = ""
-    while data[counter] not in SEPARATORS:
-        value += data[counter]
-        counter += 1
-        if counter >= len(data):
-            break
-    if value == "true":
-        return Token(TokenType.TRUE, "#true", True, line), 4
-    elif value == "false":
-        return Token(TokenType.FALSE, "#false", False, line), 5
-    else:
-        raise LexError("couldn't parse boolean", line)
-
-def get_symbol(data, line):
-    counter = 0
-    value = ""
-    while data[counter] not in SEPARATORS:
-        value += data[counter]
-        counter += 1
-        if counter >= len(data):
-            break
-    return Token(TokenType.SYMBOL, value, None, line), counter - 1
-
-def get_type(data, line):
-    counter = 0
-    value = ""
-    while data[counter] not in SEPARATORS:
-        value += data[counter]
-        counter += 1
-        if counter >= len(data):
-            break
-    if value not in types:
-        raise LexError(f"unrecognized type {value}", line)
-    return Token(types[value], value, None, line), counter - 1
-
author	mryouse	2022-06-18 02:45:04 +0000
committer	mryouse	2022-06-18 02:45:04 +0000
commit	d1a1c1592e610526c4a0432f93bd9ea6ae96d6e8 (patch)
tree	df4c78f3ce5dfb1369d5fc6c155ca43e8bfc729f /lexer.py
parent	065d138ca3013a4d1ef1aa3d7c48982d8bee5de2 (diff)