From 7bed8de9b493ca2a2b13d6293db6bd81b73325ce Mon Sep 17 00:00:00 2001 From: mryouse Date: Tue, 10 May 2022 02:07:40 +0000 Subject: initial commit --- README.md | 13 ++++++ lexer.py | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ parser.py | 43 ++++++++++++++++++ repl.py | 24 ++++++++++ runner.py | 43 ++++++++++++++++++ tokens.py | 63 +++++++++++++++++++++++++ 6 files changed, 340 insertions(+) create mode 100644 README.md create mode 100644 lexer.py create mode 100644 parser.py create mode 100644 repl.py create mode 100644 runner.py create mode 100644 tokens.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..8330050 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# neb +### an attempt at a language + +## ideas + - **Lisp-y**: I hope you like parentheses! + - **Strongly typed**: types are Good, and could enable future compilation + - **We <3 Linux**: strong support for pipelines and shell-ing out + - **Immutable variables**: mutability is scary and makes for strange bugs + - **Pure functions**: side effects are also scary + +## things that work + - `(print [out :string]) => :bool` + - pretty much nothing else diff --git a/lexer.py b/lexer.py new file mode 100644 index 0000000..913a9aa --- /dev/null +++ b/lexer.py @@ -0,0 +1,154 @@ +from tokens import * + +# consts +DOUBLE_QUOTE = '"' +BACKSLASH = "\\" +OPEN_PAREN = "(" +CLOSE_PAREN = ")" +DIGITS = "0123456789" +LETTERS = "abcdefghijklmnopqrstuvwxyz" +PUNCTUATION = "-_!*$@%^&=+/" +SYMBOL_VALS = list(LETTERS + LETTERS.upper() + DIGITS + PUNCTUATION) + + +def lex_string(inp): + token = "" + esc = False + for idx, c in enumerate(inp): + # if we're escaping a quote, don't add the \ + if esc: + if c == DOUBLE_QUOTE: + token += DOUBLE_QUOTE + elif c == BACKSLASH: + token += BACKSLASH + else: + token += f"{BACKSLASH}{c}" + + # if it's an ecsape char, set esc and continue + elif c == BACKSLASH: + esc = True + continue + + elif c == DOUBLE_QUOTE: + #return token, inp[idx + 1:] + return NebLiteral(NebType.STRING, token), inp[idx + 1:] + + else: + token += c + + esc = False + + raise Exception("improperly ended string!") + +def lex_bool(inp): + if inp[0:4] == "true": + token = True + elif inp[0:5] == "false": + token = False + else: + raise Exception("invalid boolean") + + if peek(inp[len(str(token)):]) not in (None, " ", CLOSE_PAREN): + raise Exception("invalid boolean") + + #return token, inp[len(str(token)):] + return NebLiteral(NebType.BOOL, token), inp[len(str(token)):] + + +def lex_number(inp): + token = "" + for idx, c in enumerate(inp): + if c in (" ", CLOSE_PAREN): + if "." in token: + #return float(token), inp[idx:] + return NebLiteral(NebType.FLOAT, float(token)), inp[idx:] + else: + #return int(token), inp[idx:] + return NebLiteral(NebType.INT, int(token)), inp[idx:] + + if c in list(DIGITS): # or c in ("-", "."): + token += c + elif c == "+": + if idx == 0: + continue + else: + raise Exception("improper sign placement!") + elif c == "-": + if idx == 0: + token += c + else: + raise Exception("improper sign placement!") + elif c == ".": + if c not in token: + token += c + else: + raise Exception("too many decimal points") + else: + raise Exception("improper numeric!") + + if "." in token: + #return float(token), "" + return NebLiteral(NebType.FLOAT, float(token)), "" + else: + #return int(token), "" + return NebLiteral(NebType.INT, int(token)), "" + +def lex_symbol(inp): + token = "" + for idx, c in enumerate(inp): + if c in (CLOSE_PAREN, " "): + return NebSymbol(token), inp[idx:] + elif c in SYMBOL_VALS: + token += c + else: + raise Exception("improper symbol") + return NebSymbol(token), "" + + +def peek(inp): + if len(inp) == 0: + return None + return inp[0] + +def lex(inp, tokens): + inp = inp.strip() # white space doesn't matter at this point + nxt = peek(inp) + if nxt is None: + #print(f"returning [{tokens}]") + return tokens + # parens + if nxt == OPEN_PAREN: + tokens.append(NebOpen()) + return lex(inp[1:], tokens) + elif nxt == CLOSE_PAREN: + tokens.append(NebClose()) + return lex(inp[1:], tokens) + # numbers + elif nxt in list(DIGITS) or nxt in ("+", "-", "."): + token, remainder = lex_number(inp) + tokens.append(token) + return lex(remainder, tokens) + # strings + elif nxt == DOUBLE_QUOTE: + token, remainder = lex_string(inp[1:]) + #print(f"received [{token}] [{remainder}]") + if peek(remainder) not in (None, CLOSE_PAREN, " "): + raise Exception("spaces required between tokens") + tokens.append(token) + return lex(remainder, tokens) + # bool + elif nxt == "#": + token, remainder = lex_bool(inp[1:]) + if peek(remainder) not in (None, CLOSE_PAREN, " "): + raise Exception("spaces required between tokens") + tokens.append(token) + return lex(remainder, tokens) + # symbols + elif nxt in SYMBOL_VALS: + token, remainder = lex_symbol(inp) + if peek(remainder) not in (None, CLOSE_PAREN, " "): + raise Exception("spaces required between tokens") + tokens.append(token) + return lex(remainder, tokens) + else: + raise Exception("unable to lex") diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..6360b62 --- /dev/null +++ b/parser.py @@ -0,0 +1,43 @@ +from tokens import * + + +def peek(inp): + if len(inp) == 0: + return None + return inp[0] + +def parse_expression(tkns): + # expressions MUST start with a symbol (for now?) + symbol = None + args = [] + for idx, t in enumerate(tkns): + #if isinstance(t, NebOpen): + if idx == 0: + if not isinstance(t, NebSymbol): + raise Exception("expressions must start with a symbol") + else: + symbol = t + elif isinstance(t, NebClose): + return NebExpression(symbol, args), tkns[idx + 1:] + else: # TODO nested expressions + args.append(t) + + raise Exception("couldn't parse expression!") + +def parse(tkns, parsed): + nxt = peek(tkns) + if nxt is None: + return parsed + if isinstance(nxt, NebOpen): + expr, remainder = parse_expression(tkns[1:]) + parsed.append(expr) + return parse(remainder, parsed) + elif isinstance(nxt, NebLiteral): + parsed.append(nxt) + return parse(tkns[1:], parsed) + elif isinstance(nxt, NebSymbol): + parsed.append(nxt) + return parse(tkns[1:], parsed) + else: + raise Exception("expecting an expression or a literal") + diff --git a/repl.py b/repl.py new file mode 100644 index 0000000..af84c82 --- /dev/null +++ b/repl.py @@ -0,0 +1,24 @@ +from lexer import lex +from parser import parse +from runner import evaluate + +def main(): + idx = 1 + while True: + inp = input(f"#{idx}> ") + if len(inp.strip()) == 0: + continue + try: + lexed = lex(inp, []) + print(f" - LEX: {lexed}") + parsed = parse(lexed, []) + print(f" - PARSE: {parsed}") + ev = evaluate(parsed, []) + print(f"=> {ev}") + idx += 1 + except Exception as e: + print(f"panic! {e}") + + +if __name__ == "__main__": + main() diff --git a/runner.py b/runner.py new file mode 100644 index 0000000..a2c7e99 --- /dev/null +++ b/runner.py @@ -0,0 +1,43 @@ +from tokens import * + +def std_print(arg): + print(arg.value) + #return [] # TODO this should return empty list + return NebLiteral(NebType.BOOL, True) + +std = { + "print": { + "func": NebFunction("print", [NebType.STRING], NebType.BOOL), + "impl": std_print } + } + +def peek(inp): + if len(inp) == 0: + return None + return inp[0] + +def evaluate(items, pop): + nxt = peek(items) + if nxt is None: + return pop + elif isinstance(nxt, NebLiteral): + pop = nxt.value + return evaluate(items[1:], pop) + elif isinstance(nxt, NebSymbol): + if not nxt.name in std: + raise Exception(f"no such symbol: '{nxt.name}'") + this_func = std[nxt.name] + return evaluate(items[1:], this_func["impl"]) + elif isinstance(nxt, NebExpression): + if not nxt.symbol.name in std: + raise Exception(f"no such symbol: {nxt.symbol.name}") + this_func = std[nxt.symbol.name] + #expected_sig = " ".join(x.type_.name for x in nxt.args) + #if this_func["func"].in_sig() != expected_sig: + if this_func["func"].in_sig() != nxt.maybe_sig(): + raise Exception(f"{nxt.symbol.name} expects '{this_func['func'].in_sig()}', got '{nxt.maybe_sig()}'") + ret = this_func["impl"](*(nxt.args)) + return evaluate(items[1:], ret) + else: + raise Exception("expected a literal or an expression") + diff --git a/tokens.py b/tokens.py new file mode 100644 index 0000000..e7f137b --- /dev/null +++ b/tokens.py @@ -0,0 +1,63 @@ +from dataclasses import dataclass +from enum import Enum, auto +from typing import TypeVar, List + +T = TypeVar("T", int, float, str, bool) + +# classes +class NebType(Enum): + INT = auto() + FLOAT = auto() + STRING = auto() + BOOL = auto() + +@dataclass +class NebToken: + pass + +@dataclass +class NebLiteral(NebToken): + type_: NebType + value: T + +class NebSeparator(NebToken): + pass + +class NebOpen(NebSeparator): + pass + +class NebClose(NebSeparator): + pass + +@dataclass +class NebSymbol(NebToken): + name: str + +@dataclass +class NebExpression(NebToken): + symbol: NebSymbol + args: List[NebToken] + + def maybe_sig(self): + out = [] + for arg in self.args: + if isinstance(arg, NebLiteral): + out.append(":" + arg.type_.name.lower()) + else: + raise Exception("expressions must have a list of literals") #TODO not true + return " ".join(out) + +@dataclass +class NebFunction(NebToken): + name: str + args: List[NebType] + returns: NebType + + def in_sig(self): + return " ".join(":" + x.name.lower() for x in self.args) + + def out_sig(self): + return ":" + self.returns.lower() + + def sig(self): + return (self.in_sig() + " > " + self.out_sig()).strip() -- cgit v1.2.3