aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormryouse2022-05-10 02:07:40 +0000
committermryouse2022-05-10 02:07:40 +0000
commit7bed8de9b493ca2a2b13d6293db6bd81b73325ce (patch)
tree7efa56cddb8a0cec4b4c03495874474b510c6a09
initial commit
-rw-r--r--README.md13
-rw-r--r--lexer.py154
-rw-r--r--parser.py43
-rw-r--r--repl.py24
-rw-r--r--runner.py43
-rw-r--r--tokens.py63
6 files changed, 340 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8330050
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+# neb
+### an attempt at a language
+
+## ideas
+ - **Lisp-y**: I hope you like parentheses!
+ - **Strongly typed**: types are Good, and could enable future compilation
+ - **We <3 Linux**: strong support for pipelines and shell-ing out
+ - **Immutable variables**: mutability is scary and makes for strange bugs
+ - **Pure functions**: side effects are also scary
+
+## things that work
+ - `(print [out :string]) => :bool`
+ - pretty much nothing else
diff --git a/lexer.py b/lexer.py
new file mode 100644
index 0000000..913a9aa
--- /dev/null
+++ b/lexer.py
@@ -0,0 +1,154 @@
+from tokens import *
+
+# consts
+DOUBLE_QUOTE = '"'
+BACKSLASH = "\\"
+OPEN_PAREN = "("
+CLOSE_PAREN = ")"
+DIGITS = "0123456789"
+LETTERS = "abcdefghijklmnopqrstuvwxyz"
+PUNCTUATION = "-_!*$@%^&=+/"
+SYMBOL_VALS = list(LETTERS + LETTERS.upper() + DIGITS + PUNCTUATION)
+
+
+def lex_string(inp):
+ token = ""
+ esc = False
+ for idx, c in enumerate(inp):
+ # if we're escaping a quote, don't add the \
+ if esc:
+ if c == DOUBLE_QUOTE:
+ token += DOUBLE_QUOTE
+ elif c == BACKSLASH:
+ token += BACKSLASH
+ else:
+ token += f"{BACKSLASH}{c}"
+
+ # if it's an ecsape char, set esc and continue
+ elif c == BACKSLASH:
+ esc = True
+ continue
+
+ elif c == DOUBLE_QUOTE:
+ #return token, inp[idx + 1:]
+ return NebLiteral(NebType.STRING, token), inp[idx + 1:]
+
+ else:
+ token += c
+
+ esc = False
+
+ raise Exception("improperly ended string!")
+
+def lex_bool(inp):
+ if inp[0:4] == "true":
+ token = True
+ elif inp[0:5] == "false":
+ token = False
+ else:
+ raise Exception("invalid boolean")
+
+ if peek(inp[len(str(token)):]) not in (None, " ", CLOSE_PAREN):
+ raise Exception("invalid boolean")
+
+ #return token, inp[len(str(token)):]
+ return NebLiteral(NebType.BOOL, token), inp[len(str(token)):]
+
+
+def lex_number(inp):
+ token = ""
+ for idx, c in enumerate(inp):
+ if c in (" ", CLOSE_PAREN):
+ if "." in token:
+ #return float(token), inp[idx:]
+ return NebLiteral(NebType.FLOAT, float(token)), inp[idx:]
+ else:
+ #return int(token), inp[idx:]
+ return NebLiteral(NebType.INT, int(token)), inp[idx:]
+
+ if c in list(DIGITS): # or c in ("-", "."):
+ token += c
+ elif c == "+":
+ if idx == 0:
+ continue
+ else:
+ raise Exception("improper sign placement!")
+ elif c == "-":
+ if idx == 0:
+ token += c
+ else:
+ raise Exception("improper sign placement!")
+ elif c == ".":
+ if c not in token:
+ token += c
+ else:
+ raise Exception("too many decimal points")
+ else:
+ raise Exception("improper numeric!")
+
+ if "." in token:
+ #return float(token), ""
+ return NebLiteral(NebType.FLOAT, float(token)), ""
+ else:
+ #return int(token), ""
+ return NebLiteral(NebType.INT, int(token)), ""
+
+def lex_symbol(inp):
+ token = ""
+ for idx, c in enumerate(inp):
+ if c in (CLOSE_PAREN, " "):
+ return NebSymbol(token), inp[idx:]
+ elif c in SYMBOL_VALS:
+ token += c
+ else:
+ raise Exception("improper symbol")
+ return NebSymbol(token), ""
+
+
+def peek(inp):
+ if len(inp) == 0:
+ return None
+ return inp[0]
+
+def lex(inp, tokens):
+ inp = inp.strip() # white space doesn't matter at this point
+ nxt = peek(inp)
+ if nxt is None:
+ #print(f"returning [{tokens}]")
+ return tokens
+ # parens
+ if nxt == OPEN_PAREN:
+ tokens.append(NebOpen())
+ return lex(inp[1:], tokens)
+ elif nxt == CLOSE_PAREN:
+ tokens.append(NebClose())
+ return lex(inp[1:], tokens)
+ # numbers
+ elif nxt in list(DIGITS) or nxt in ("+", "-", "."):
+ token, remainder = lex_number(inp)
+ tokens.append(token)
+ return lex(remainder, tokens)
+ # strings
+ elif nxt == DOUBLE_QUOTE:
+ token, remainder = lex_string(inp[1:])
+ #print(f"received [{token}] [{remainder}]")
+ if peek(remainder) not in (None, CLOSE_PAREN, " "):
+ raise Exception("spaces required between tokens")
+ tokens.append(token)
+ return lex(remainder, tokens)
+ # bool
+ elif nxt == "#":
+ token, remainder = lex_bool(inp[1:])
+ if peek(remainder) not in (None, CLOSE_PAREN, " "):
+ raise Exception("spaces required between tokens")
+ tokens.append(token)
+ return lex(remainder, tokens)
+ # symbols
+ elif nxt in SYMBOL_VALS:
+ token, remainder = lex_symbol(inp)
+ if peek(remainder) not in (None, CLOSE_PAREN, " "):
+ raise Exception("spaces required between tokens")
+ tokens.append(token)
+ return lex(remainder, tokens)
+ else:
+ raise Exception("unable to lex")
diff --git a/parser.py b/parser.py
new file mode 100644
index 0000000..6360b62
--- /dev/null
+++ b/parser.py
@@ -0,0 +1,43 @@
+from tokens import *
+
+
+def peek(inp):
+ if len(inp) == 0:
+ return None
+ return inp[0]
+
+def parse_expression(tkns):
+ # expressions MUST start with a symbol (for now?)
+ symbol = None
+ args = []
+ for idx, t in enumerate(tkns):
+ #if isinstance(t, NebOpen):
+ if idx == 0:
+ if not isinstance(t, NebSymbol):
+ raise Exception("expressions must start with a symbol")
+ else:
+ symbol = t
+ elif isinstance(t, NebClose):
+ return NebExpression(symbol, args), tkns[idx + 1:]
+ else: # TODO nested expressions
+ args.append(t)
+
+ raise Exception("couldn't parse expression!")
+
+def parse(tkns, parsed):
+ nxt = peek(tkns)
+ if nxt is None:
+ return parsed
+ if isinstance(nxt, NebOpen):
+ expr, remainder = parse_expression(tkns[1:])
+ parsed.append(expr)
+ return parse(remainder, parsed)
+ elif isinstance(nxt, NebLiteral):
+ parsed.append(nxt)
+ return parse(tkns[1:], parsed)
+ elif isinstance(nxt, NebSymbol):
+ parsed.append(nxt)
+ return parse(tkns[1:], parsed)
+ else:
+ raise Exception("expecting an expression or a literal")
+
diff --git a/repl.py b/repl.py
new file mode 100644
index 0000000..af84c82
--- /dev/null
+++ b/repl.py
@@ -0,0 +1,24 @@
+from lexer import lex
+from parser import parse
+from runner import evaluate
+
+def main():
+ idx = 1
+ while True:
+ inp = input(f"#{idx}> ")
+ if len(inp.strip()) == 0:
+ continue
+ try:
+ lexed = lex(inp, [])
+ print(f" - LEX: {lexed}")
+ parsed = parse(lexed, [])
+ print(f" - PARSE: {parsed}")
+ ev = evaluate(parsed, [])
+ print(f"=> {ev}")
+ idx += 1
+ except Exception as e:
+ print(f"panic! {e}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/runner.py b/runner.py
new file mode 100644
index 0000000..a2c7e99
--- /dev/null
+++ b/runner.py
@@ -0,0 +1,43 @@
+from tokens import *
+
+def std_print(arg):
+ print(arg.value)
+ #return [] # TODO this should return empty list
+ return NebLiteral(NebType.BOOL, True)
+
+std = {
+ "print": {
+ "func": NebFunction("print", [NebType.STRING], NebType.BOOL),
+ "impl": std_print }
+ }
+
+def peek(inp):
+ if len(inp) == 0:
+ return None
+ return inp[0]
+
+def evaluate(items, pop):
+ nxt = peek(items)
+ if nxt is None:
+ return pop
+ elif isinstance(nxt, NebLiteral):
+ pop = nxt.value
+ return evaluate(items[1:], pop)
+ elif isinstance(nxt, NebSymbol):
+ if not nxt.name in std:
+ raise Exception(f"no such symbol: '{nxt.name}'")
+ this_func = std[nxt.name]
+ return evaluate(items[1:], this_func["impl"])
+ elif isinstance(nxt, NebExpression):
+ if not nxt.symbol.name in std:
+ raise Exception(f"no such symbol: {nxt.symbol.name}")
+ this_func = std[nxt.symbol.name]
+ #expected_sig = " ".join(x.type_.name for x in nxt.args)
+ #if this_func["func"].in_sig() != expected_sig:
+ if this_func["func"].in_sig() != nxt.maybe_sig():
+ raise Exception(f"{nxt.symbol.name} expects '{this_func['func'].in_sig()}', got '{nxt.maybe_sig()}'")
+ ret = this_func["impl"](*(nxt.args))
+ return evaluate(items[1:], ret)
+ else:
+ raise Exception("expected a literal or an expression")
+
diff --git a/tokens.py b/tokens.py
new file mode 100644
index 0000000..e7f137b
--- /dev/null
+++ b/tokens.py
@@ -0,0 +1,63 @@
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import TypeVar, List
+
+T = TypeVar("T", int, float, str, bool)
+
+# classes
+class NebType(Enum):
+ INT = auto()
+ FLOAT = auto()
+ STRING = auto()
+ BOOL = auto()
+
+@dataclass
+class NebToken:
+ pass
+
+@dataclass
+class NebLiteral(NebToken):
+ type_: NebType
+ value: T
+
+class NebSeparator(NebToken):
+ pass
+
+class NebOpen(NebSeparator):
+ pass
+
+class NebClose(NebSeparator):
+ pass
+
+@dataclass
+class NebSymbol(NebToken):
+ name: str
+
+@dataclass
+class NebExpression(NebToken):
+ symbol: NebSymbol
+ args: List[NebToken]
+
+ def maybe_sig(self):
+ out = []
+ for arg in self.args:
+ if isinstance(arg, NebLiteral):
+ out.append(":" + arg.type_.name.lower())
+ else:
+ raise Exception("expressions must have a list of literals") #TODO not true
+ return " ".join(out)
+
+@dataclass
+class NebFunction(NebToken):
+ name: str
+ args: List[NebType]
+ returns: NebType
+
+ def in_sig(self):
+ return " ".join(":" + x.name.lower() for x in self.args)
+
+ def out_sig(self):
+ return ":" + self.returns.lower()
+
+ def sig(self):
+ return (self.in_sig() + " > " + self.out_sig()).strip()