aboutsummaryrefslogtreecommitdiff
path: root/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.py')
-rw-r--r--lexer.py154
1 files changed, 154 insertions, 0 deletions
diff --git a/lexer.py b/lexer.py
new file mode 100644
index 0000000..913a9aa
--- /dev/null
+++ b/lexer.py
@@ -0,0 +1,154 @@
+from tokens import *
+
+# consts
+DOUBLE_QUOTE = '"'
+BACKSLASH = "\\"
+OPEN_PAREN = "("
+CLOSE_PAREN = ")"
+DIGITS = "0123456789"
+LETTERS = "abcdefghijklmnopqrstuvwxyz"
+PUNCTUATION = "-_!*$@%^&=+/"
+SYMBOL_VALS = list(LETTERS + LETTERS.upper() + DIGITS + PUNCTUATION)
+
+
+def lex_string(inp):
+ token = ""
+ esc = False
+ for idx, c in enumerate(inp):
+ # if we're escaping a quote, don't add the \
+ if esc:
+ if c == DOUBLE_QUOTE:
+ token += DOUBLE_QUOTE
+ elif c == BACKSLASH:
+ token += BACKSLASH
+ else:
+ token += f"{BACKSLASH}{c}"
+
+ # if it's an ecsape char, set esc and continue
+ elif c == BACKSLASH:
+ esc = True
+ continue
+
+ elif c == DOUBLE_QUOTE:
+ #return token, inp[idx + 1:]
+ return NebLiteral(NebType.STRING, token), inp[idx + 1:]
+
+ else:
+ token += c
+
+ esc = False
+
+ raise Exception("improperly ended string!")
+
+def lex_bool(inp):
+ if inp[0:4] == "true":
+ token = True
+ elif inp[0:5] == "false":
+ token = False
+ else:
+ raise Exception("invalid boolean")
+
+ if peek(inp[len(str(token)):]) not in (None, " ", CLOSE_PAREN):
+ raise Exception("invalid boolean")
+
+ #return token, inp[len(str(token)):]
+ return NebLiteral(NebType.BOOL, token), inp[len(str(token)):]
+
+
+def lex_number(inp):
+ token = ""
+ for idx, c in enumerate(inp):
+ if c in (" ", CLOSE_PAREN):
+ if "." in token:
+ #return float(token), inp[idx:]
+ return NebLiteral(NebType.FLOAT, float(token)), inp[idx:]
+ else:
+ #return int(token), inp[idx:]
+ return NebLiteral(NebType.INT, int(token)), inp[idx:]
+
+ if c in list(DIGITS): # or c in ("-", "."):
+ token += c
+ elif c == "+":
+ if idx == 0:
+ continue
+ else:
+ raise Exception("improper sign placement!")
+ elif c == "-":
+ if idx == 0:
+ token += c
+ else:
+ raise Exception("improper sign placement!")
+ elif c == ".":
+ if c not in token:
+ token += c
+ else:
+ raise Exception("too many decimal points")
+ else:
+ raise Exception("improper numeric!")
+
+ if "." in token:
+ #return float(token), ""
+ return NebLiteral(NebType.FLOAT, float(token)), ""
+ else:
+ #return int(token), ""
+ return NebLiteral(NebType.INT, int(token)), ""
+
+def lex_symbol(inp):
+ token = ""
+ for idx, c in enumerate(inp):
+ if c in (CLOSE_PAREN, " "):
+ return NebSymbol(token), inp[idx:]
+ elif c in SYMBOL_VALS:
+ token += c
+ else:
+ raise Exception("improper symbol")
+ return NebSymbol(token), ""
+
+
+def peek(inp):
+ if len(inp) == 0:
+ return None
+ return inp[0]
+
+def lex(inp, tokens):
+ inp = inp.strip() # white space doesn't matter at this point
+ nxt = peek(inp)
+ if nxt is None:
+ #print(f"returning [{tokens}]")
+ return tokens
+ # parens
+ if nxt == OPEN_PAREN:
+ tokens.append(NebOpen())
+ return lex(inp[1:], tokens)
+ elif nxt == CLOSE_PAREN:
+ tokens.append(NebClose())
+ return lex(inp[1:], tokens)
+ # numbers
+ elif nxt in list(DIGITS) or nxt in ("+", "-", "."):
+ token, remainder = lex_number(inp)
+ tokens.append(token)
+ return lex(remainder, tokens)
+ # strings
+ elif nxt == DOUBLE_QUOTE:
+ token, remainder = lex_string(inp[1:])
+ #print(f"received [{token}] [{remainder}]")
+ if peek(remainder) not in (None, CLOSE_PAREN, " "):
+ raise Exception("spaces required between tokens")
+ tokens.append(token)
+ return lex(remainder, tokens)
+ # bool
+ elif nxt == "#":
+ token, remainder = lex_bool(inp[1:])
+ if peek(remainder) not in (None, CLOSE_PAREN, " "):
+ raise Exception("spaces required between tokens")
+ tokens.append(token)
+ return lex(remainder, tokens)
+ # symbols
+ elif nxt in SYMBOL_VALS:
+ token, remainder = lex_symbol(inp)
+ if peek(remainder) not in (None, CLOSE_PAREN, " "):
+ raise Exception("spaces required between tokens")
+ tokens.append(token)
+ return lex(remainder, tokens)
+ else:
+ raise Exception("unable to lex")