1 files changed, 154 insertions, 0 deletions
diff --git a/lexer.py b/lexer.py
new file mode 100644
index 0000000..913a9aa
--- /dev/null
+++ b/lexer.py
@@ -0,0 +1,154 @@
+from tokens import *
+
+# consts
+DOUBLE_QUOTE = '"'
+BACKSLASH = "\\"
+OPEN_PAREN = "("
+CLOSE_PAREN = ")"
+DIGITS = "0123456789"
+LETTERS = "abcdefghijklmnopqrstuvwxyz"
+PUNCTUATION = "-_!*$@%^&=+/"
+SYMBOL_VALS = list(LETTERS + LETTERS.upper() + DIGITS + PUNCTUATION)
+
+
+def lex_string(inp):
+    token = ""
+    esc = False
+    for idx, c in enumerate(inp):
+        # if we're escaping a quote, don't add the \
+        if esc:
+            if c == DOUBLE_QUOTE:
+                token += DOUBLE_QUOTE
+            elif c == BACKSLASH:
+                token += BACKSLASH
+            else:
+                token += f"{BACKSLASH}{c}"
+
+        # if it's an ecsape char, set esc and continue
+        elif c == BACKSLASH:
+            esc = True
+            continue
+        
+        elif c == DOUBLE_QUOTE:
+            #return token, inp[idx + 1:]
+            return NebLiteral(NebType.STRING, token), inp[idx + 1:]
+
+        else:
+            token += c 
+
+        esc = False
+
+    raise Exception("improperly ended string!")
+
+def lex_bool(inp):
+    if inp[0:4] == "true":
+        token = True
+    elif inp[0:5] == "false":
+        token = False
+    else:
+        raise Exception("invalid boolean")
+    
+    if peek(inp[len(str(token)):]) not in (None, " ", CLOSE_PAREN):
+        raise Exception("invalid boolean")
+        
+    #return token, inp[len(str(token)):]
+    return NebLiteral(NebType.BOOL, token), inp[len(str(token)):]
+
+
+def lex_number(inp):
+    token = ""
+    for idx, c in enumerate(inp):
+        if c in (" ", CLOSE_PAREN):
+            if "." in token:
+                #return float(token), inp[idx:]
+                return NebLiteral(NebType.FLOAT, float(token)), inp[idx:]
+            else:
+                #return int(token), inp[idx:]
+                return NebLiteral(NebType.INT, int(token)), inp[idx:]
+
+        if c in list(DIGITS): # or c in ("-", "."):
+            token += c
+        elif c == "+": 
+            if idx == 0:
+                continue
+            else:
+                raise Exception("improper sign placement!")
+        elif c == "-":
+            if idx == 0:
+                token += c
+            else:
+                raise Exception("improper sign placement!")
+        elif c == ".":
+            if c not in token:
+                token += c
+            else:
+                raise Exception("too many decimal points")
+        else:
+            raise Exception("improper numeric!")
+
+    if "." in token:
+        #return float(token), ""
+        return NebLiteral(NebType.FLOAT, float(token)), ""
+    else:
+        #return int(token), ""
+        return NebLiteral(NebType.INT, int(token)), ""
+
+def lex_symbol(inp):
+    token = ""
+    for idx, c in enumerate(inp):
+        if c in (CLOSE_PAREN, " "):
+            return NebSymbol(token), inp[idx:]
+        elif c in SYMBOL_VALS:
+            token += c
+        else:
+            raise Exception("improper symbol")
+    return NebSymbol(token), ""
+    
+
+def peek(inp):
+    if len(inp) == 0:
+        return None
+    return inp[0]
+
+def lex(inp, tokens):
+    inp = inp.strip() # white space doesn't matter at this point
+    nxt = peek(inp)
+    if nxt is None:
+        #print(f"returning [{tokens}]")
+        return tokens
+    # parens
+    if nxt == OPEN_PAREN:
+        tokens.append(NebOpen())
+        return lex(inp[1:], tokens)
+    elif nxt == CLOSE_PAREN:
+        tokens.append(NebClose())
+        return lex(inp[1:], tokens)
+    # numbers
+    elif nxt in list(DIGITS) or nxt in ("+", "-", "."):
+        token, remainder = lex_number(inp)
+        tokens.append(token)
+        return lex(remainder, tokens)
+    # strings
+    elif nxt == DOUBLE_QUOTE:
+        token, remainder = lex_string(inp[1:])
+        #print(f"received [{token}] [{remainder}]")
+        if peek(remainder) not in (None, CLOSE_PAREN, " "):
+            raise Exception("spaces required between tokens")
+        tokens.append(token)
+        return lex(remainder, tokens)
+    # bool
+    elif nxt == "#":
+        token, remainder = lex_bool(inp[1:])
+        if peek(remainder) not in (None, CLOSE_PAREN, " "):
+            raise Exception("spaces required between tokens")
+        tokens.append(token)
+        return lex(remainder, tokens)
+    # symbols
+    elif nxt in SYMBOL_VALS:
+        token, remainder = lex_symbol(inp)
+        if peek(remainder) not in (None, CLOSE_PAREN, " "):
+            raise Exception("spaces required between tokens")
+        tokens.append(token)
+        return lex(remainder, tokens)
+    else:
+        raise Exception("unable to lex")