diff options
Diffstat (limited to 'neb')
| -rw-r--r-- | neb/__init__.py | 6 | ||||
| -rw-r--r-- | neb/exceptions.py | 13 | ||||
| -rw-r--r-- | neb/interpreter.py | 876 | ||||
| -rw-r--r-- | neb/lexer.py | 168 | ||||
| -rw-r--r-- | neb/parser.py | 67 | ||||
| -rw-r--r-- | neb/structs.py | 108 | ||||
| -rw-r--r-- | neb/typeclass.py | 34 |
7 files changed, 1272 insertions, 0 deletions
diff --git a/neb/__init__.py b/neb/__init__.py new file mode 100644 index 0000000..f5afe60 --- /dev/null +++ b/neb/__init__.py @@ -0,0 +1,6 @@ +from .structs import * +from .lexer import * +from .parser import * +from .interpreter import * +from .exceptions import * +from .typeclass import * diff --git a/neb/exceptions.py b/neb/exceptions.py new file mode 100644 index 0000000..8bbe000 --- /dev/null +++ b/neb/exceptions.py @@ -0,0 +1,13 @@ +class NebPanic(BaseException): + pass + +class InterpretPanic(NebPanic): + def __init__(self, sym, msg, arg=None): + big_message = f"[{sym.line}] '{sym.name}': {msg}" + if arg is not None: + big_message += f" (got {arg})" + super().__init__(big_message) + +class LexError(NebPanic): + def __init__(self, message, line): + super().__init__(f"line {line}: {message}") diff --git a/neb/interpreter.py b/neb/interpreter.py new file mode 100644 index 0000000..760b3a6 --- /dev/null +++ b/neb/interpreter.py @@ -0,0 +1,876 @@ +from .structs import * +from .exceptions import * +from .lexer import lex +from .parser import parse +from .typeclass import TypeEnum, is_subtype_of +from pathlib import Path +from glob import glob +import subprocess +import shlex +import random +import sys +import math + + +class Arg: + + def __init__(self, name, type_, *, optional=False, lazy=False): + self.name = name + self.type_ = type_ + self.optional = optional + self.lazy = lazy + + def __str__(self): + opt = "?" if self.optional else "" + lazy = "~" if self.lazy else "" + return f"{lazy}{opt}{self.name} {self.type_}" + + +class Function: + + def __init__(self, name, params, body, args=None, many=None): + self.name = name + self.params = params + self.body = body + self.args = args + self.many = many + self.type_ = TypeEnum.ANY # TODO no it's not + + def describe(self, name=None): + if name is None: + name = self.name + out = [f"({name}"] + if self.args is not None: + for arg in self.args: + out.append(f"{arg}") + if self.many is not None: + out.append(f"{self.many}") + return " ".join(out) + ")" + + def arity_check(self, symbol, params): + min_arity = len([a for a in self.args if not a.optional]) + max_arity = -1 if self.many is not None else len(self.args) + + if len(params) < min_arity or (max_arity >= 0 and len(params) > max_arity): + if max_arity < 0: + fmt = f"{min_arity}+" + elif min_arity != max_arity: + fmt = f"{min_arity}-{max_arity}" + else: + fmt = f"{min_arity}" + raise InterpretPanic(symbol, f"expected [{fmt}] arguments, received {len(params)}") + return True + + def evaluate_args(self, symbol, params, env, ns): + self.arity_check(symbol, params) + ret = [] + + for idx, param in enumerate(params): + if idx < len(self.args): + arg = self.args[idx] + else: + arg = self.many + if arg.lazy: + ret.append(param) + continue + ev = evaluate(param, env, ns) + if not is_subtype_of(ev.type_, arg.type_): + exp = f"{arg.type_}" + rec = f"{ev.type_}" + raise InterpretPanic(symbol, f"received {rec}, expected {exp}", ev) + ret.append(ev) + return ret + + def call(self, expr, env): + pass + +class Builtin(Function): + + def __init__(self, callable_, args=None, many=None): + super().__init__("<builtin>", None, callable_, args, many) + + def call(self, expr, env, ns): + self.arity_check(expr.args[0], expr.args[1:]) + evaluated_args = self.evaluate_args(expr.args[0], expr.args[1:], env, ns) + return self.body(expr.args[0], evaluated_args, env, ns) + + +class UserFunction(Function): + + def __init__(self, name, params, body): + newparams, args, many = self.process_params(name, params) + super().__init__(name, newparams, body, args, many) + + def process_params(self, name, params): + newparams = [] + args = [] + many = None + prev_type = False + first = True + for param in params: + if isinstance(param, Symbol): + if many is not None: + raise NebPanic("& must be last argument") + if param.name == "&": + many = Arg(param.name, TypeEnum.ANY) + else: + newparams.append(param) + args.append(Arg(param.name, TypeEnum.ANY)) + prev_type = False + elif isinstance(param, Type) and not prev_type and not first: + typ = TypeEnum.__getattr__(param.name[1:].upper()) + if many is None: + args[-1].type_ = typ + else: + many.type_ = typ + prev_type = True + else: + raise NebPanic("invalid :func signature", param) + first = False + return newparams, args, many + + def call(self, expr, env, ns): + self.arity_check(expr.args[0], expr.args[1:]) + evaluated_args = self.evaluate_args(expr.args[0], expr.args[1:], env, ns) + this_env = Environment(env) + for idx, param in enumerate(self.params): + this_env.register(param.name, evaluated_args[idx]) + + # if we got "many", wrap the rest in a list + if self.many: + this_env.register(self.many.name, List(evaluated_args[len(self.params):], True)) + + return interpret(self.body, env=this_env, ns=ns) + +class Environment: + + def __init__(self, parent=None): + self.parent = parent + self.environment = {} + + def register(self, key, value): + self.environment[key] = value + + def reregister(self, key, value): + if not self.contains(key): + raise NebPanic(f"undefined symbol: '{key}") + if key in self.environment: + self.register(key, value) + else: + self.parent.reregister(key, value) + + def contains(self, key): + if key in self.environment: + return True + elif self.parent is not None: + return self.parent.contains(key) + else: + return False + + def get(self, key): + if not self.contains(key): + raise NebPanic(f"undefined symbol: '{key}") + if key in self.environment: + return self.environment[key] + else: + return self.parent.get(key) + + def __str__(self): + out = "" + for k, v in self.environment.items(): + out += f"{k}: {v}, " + return out + +GLOBALS = Environment() + +def interpret(exprs, *, env=GLOBALS, ns=None): + ret = None + for expr in exprs: + ret = evaluate(expr, env, ns) + return ret + +def evaluate(expr, env, ns=None): + if isinstance(expr, Literal) or isinstance(expr, Function) or isinstance(expr, Type): + return expr + elif isinstance(expr, Symbol): + if env.contains(expr.name): + return evaluate(env.get(expr.name), env, ns) + elif ns is not None and env.contains(f"{ns}/{expr.name}"): + return evaluate(env.get(f"{ns}/{expr.name}"), env, ns) + else: + raise NebPanic(f"no such symbol: {expr}") + + # if it's a literal list, return it + if expr.data: + return expr + # if it's an empty list, return it + elif len(expr.args) == 0: + return expr + + if not isinstance(expr.args[0], Symbol): + raise NebPanic("can't evaluate without a symbol") + name = expr.args[0].name + if env.contains(name): + return env.get(name).call(expr, env, ns) + elif ns is not None and env.contains(f"{ns}/{name}"): + return env.get(f"{ns}/{name}").call(expr, env, ns) + else: + raise InterpretPanic(expr.args[0], "unable to evaluate") + +def interpretOr(symbol, args, env, ns): + # or returns true for the first expression that returns true + for arg in args: + ev = evaluate(arg, env, ns) + if not isinstance(ev, Bool): + raise InterpretPanic(symbol, "requires :bool arguments") + if ev.value == True: + return ev + return Bool(False) + +or_arg = Arg("arg", TypeEnum.BOOL, lazy=True) +GLOBALS.register("or", Builtin(interpretOr, [or_arg, or_arg], or_arg)) + +def interpretAnd(symbol, args, env, ns): + # and returns false for the first expression that returns false + for arg in args: + ev = evaluate(arg, env, ns) + if not isinstance(ev, Bool): + raise InterpretPanic(symbol, "requires :bool arguments") + if ev.value == False: + return ev + return Bool(True) + +GLOBALS.register("and", Builtin(interpretAnd, [or_arg, or_arg], or_arg)) + +def interpretEq(symbol, args, env, ns): + # NOTE this currently only works for literals + # compare types because 0 != #false in neb + if type(args[0]) == type(args[1]) and args[0].value == args[1].value: + return Bool(True) + else: + return Bool(False) + +eq_arg = Arg("value", TypeEnum.LITERAL) +GLOBALS.register("eq?", Builtin(interpretEq, [eq_arg, eq_arg])) + +def interpretGreaterThan(symbol, args, env, ns): + return Bool(args[0].value > args[1].value) + +compare_arg = Arg("num", TypeEnum.NUMBER) +GLOBALS.register(">", Builtin(interpretGreaterThan, [compare_arg, compare_arg])) + +def interpretGreaterThanEqual(symbol, args, env, ns): + return Bool(args[0].value >= args[1].value) + +GLOBALS.register(">=", Builtin(interpretGreaterThanEqual, [compare_arg, compare_arg])) + +def interpretLessThan(symbol, args, env, ns): + return Bool(args[0].value < args[1].value) + +GLOBALS.register("<", Builtin(interpretLessThan, [compare_arg, compare_arg])) + +def interpretLessThanEqual(symbol, args, env, ns): + return Bool(args[0].value <= args[1].value) + +GLOBALS.register("<=", Builtin(interpretLessThanEqual, [compare_arg, compare_arg])) + +def interpretAddition(symbol, args, env, ns): + res = 0 + for arg in args: + res += arg.value + if isinstance(res, float): + return Float(res) + else: + return Int(res) + +term_arg = Arg("term", TypeEnum.NUMBER) +GLOBALS.register("+", Builtin(interpretAddition, [term_arg], term_arg)) + +def interpretSubtraction(symbol, args, env, ns): + if len(args) == 1: + res = -args[0].value + else: + res = args[0].value + for arg in args[1:]: + res -= arg.value + if isinstance(res, float): + return Float(res) + else: + return Int(res) + +GLOBALS.register("-", Builtin(interpretSubtraction, [term_arg], term_arg)) + +def interpretMultiplication(symbol, args, env, ns): + res = args[0].value + for arg in args[1:]: + res = res * arg.value + if isinstance(res, float): + return Float(res) + else: + return Int(res) + +factor_arg = Arg("factor", TypeEnum.NUMBER) +GLOBALS.register("*", Builtin(interpretMultiplication, [factor_arg, factor_arg], factor_arg)) + +def interpretDivision(symbol, args, env, ns): + ret = args[0].value / args[1].value + if int(ret) == ret: + return Int(int(ret)) + else: + return Float(ret) + +GLOBALS.register("/", Builtin(interpretDivision, [factor_arg, factor_arg])) + +def interpretNot(symbol, args, env, ns): + return Bool(not args[0].value) + +not_arg = Arg("not", TypeEnum.BOOL) +GLOBALS.register("not", Builtin(interpretNot, [not_arg])) + +def interpretIf(symbol, args, env, ns): + if args[0].value: + return evaluate(args[1], env, ns) + elif len(args) == 3: + return evaluate(args[2], env, ns) + return List([]) + +cond = Arg("cond", TypeEnum.BOOL) +t_branch = Arg("t-branch", TypeEnum.ANY, lazy=True) +f_branch = Arg("f-branch", TypeEnum.ANY, optional=True, lazy=True) +GLOBALS.register("if", Builtin(interpretIf, [cond, t_branch, f_branch])) + +def interpretPrint(symbol, args, env, ns): + print(args[0].value) + return List([]) # print returns nothing + +GLOBALS.register("print", Builtin(interpretPrint, [Arg("arg", TypeEnum.STRING)])) + +def interpretDef(symbol, args, env, ns): + + if not isinstance(args[0], Symbol): + raise InterpretPanic(symbol, "requires a :string name", args[0]) + name = args[0].name # NOTE: we are not evaluating the name!! + if not isinstance(name, str): + raise InterpretPanic(symbol, "requires a :string name") + + env.register(name, args[1]) # TODO since this isn't lazily evaluated, side effects are allowed (bad!) + + return List([]) + +def_name_arg = Arg("name", TypeEnum.ANY, lazy=True) +def_val_arg = Arg("value", TypeEnum.ANY) +GLOBALS.register("def", Builtin(interpretDef, [def_name_arg, def_val_arg])) + +def interpretRedef(symbol, args, env, ns): + if not isinstance(args[0], Symbol): + raise InterpretPanic(symbol, "requires a :string name", args[0]) + name = args[0].name # NOTE: we are not evaluating the name!! + if not env.contains(name): + raise InterpretPanic(symbol, "not previously defined", args[0]) + + env.reregister(name, args[1]) + return List([]) + +GLOBALS.register("redef", Builtin(interpretRedef, [def_name_arg, def_val_arg])) + +def interpretLambda(symbol, args, env, ns): + if len(args[0].args) != 0: + func = UserFunction("<lambda>", args[0].args, args[1:]) + else: + func = UserFunction("<lambda>", [], args[1:]) + return func + +lambda_args_arg = Arg("args", TypeEnum.ANY, lazy=True) +lambda_body_arg = Arg("body", TypeEnum.ANY, lazy=True) +GLOBALS.register("lambda", Builtin(interpretLambda, [lambda_args_arg, lambda_body_arg], lambda_body_arg)) + +def interpretToString(symbol, args, env, ns): + item = args[0] + if isinstance(item, String): + return item + elif isinstance(item, Literal): + return String(str(item)) + else: + return String(f"{item}") + +GLOBALS.register("->string", Builtin(interpretToString, [Arg("arg", TypeEnum.ANY)])) + +def interpretConcat(symbol, args, env, ns): + out = "" + for arg in args: + out += arg.value + return String(out) + +string_arg = Arg("arg", TypeEnum.STRING) +GLOBALS.register("concat", Builtin(interpretConcat, [string_arg, string_arg], string_arg)) + +def interpretForCount(symbol, args, env, ns): + new_env = Environment(env) + ret = None + for idx in range(0, args[0].value): + new_env.register("idx", Int(idx + 1)) + for arg in args[1:]: + ret = evaluate(arg, new_env, ns) + if ret is None: + return List([]) + return ret + +for_count_arg = Arg("count", TypeEnum.INT) +for_body_arg = Arg("body", TypeEnum.ANY, lazy=True) +GLOBALS.register("for-count", Builtin(interpretForCount, [for_count_arg, for_body_arg], for_body_arg)) + +def interpretForEach(symbol, args, env, ns): + new_env = Environment(env) + ret = None + for item in args[0].args: + new_env.register("_item_", evaluate(item, env, ns)) + for arg in args[1:]: + ret = evaluate(arg, new_env, ns) + if ret is None: + return List([]) + return ret + +for_each_arg = Arg("list", TypeEnum.LIST) +GLOBALS.register("for-each", Builtin(interpretForEach, [for_each_arg, for_body_arg], for_body_arg)) + +def interpretPipe(symbol, args, env, ns): + new_env = Environment(env) + pipe = None + for arg in args: + if pipe is not None: + new_env.register("items", pipe) + pipe = evaluate(arg, new_env, ns) + if pipe is None: + return List([]) + return pipe + +# TODO +GLOBALS.register("|", Builtin(interpretPipe, 2)) + +def interpretBranch(symbol, args, env, ns): + for arg in args: + if len(arg.args) != 2: + raise InterpretPanic(symbol, "each branch requires two expressions") + cond = evaluate(arg.args[0], env, ns) # this is the condition + if not isinstance(cond, Bool): + raise InterpretPanic(symbol, "branch condition must be :bool", cond) + if cond.value: + return evaluate(arg.args[1], env, ns) + return List([]) + +GLOBALS.register("branch", Builtin(interpretBranch, [for_body_arg], for_body_arg)) + +def interpretFunc(symbol, args, env, ns): + if not isinstance(args[0], Symbol): + raise InterpretPanic(symbol, "requires a :string name") + name = args[0].name # NOTE: we are not evaluating the name!! + + if ns is not None: + name = f"{ns}/{name}" + + # compose a lambda + func = interpretLambda(None, args[1:], env, ns) + + env.register(name, func) + return List([]) + +GLOBALS.register("func", Builtin(interpretFunc, [def_name_arg, lambda_args_arg, lambda_body_arg], lambda_body_arg)) + +def interpretReadLines(symbol, args, env, ns): + target_file_name = args[0].value + target_file = Path(target_file_name).resolve() + if not target_file.exists(): + raise InterpretPanic(symbol, "no such file", target_file) + with open(target_file, "r") as fil: + data = fil.readlines() + out = List([String(d) for d in data], True) # all lines are strings + return out + +GLOBALS.register("read-lines", Builtin(interpretReadLines, [Arg("filename", TypeEnum.STRING)])) + +def interpretStrip(symbol, args, env, ns): + return String(args[0].value.strip()) + +GLOBALS.register("strip", Builtin(interpretStrip, [Arg("filename", TypeEnum.STRING)])) + +# - string->int and string->float +def interpretStringToInt(symbol, args, env, ns): + try: + val = int(args[0].value) + return Int(val) + except: + raise InterpretPanic(symbol, "can't convert to an :int", args[0]) + +GLOBALS.register("string->int", Builtin(interpretStringToInt, [Arg("arg", TypeEnum.STRING)])) + +def interpretSplit(symbol, args, env, ns): + target = args[0] + if len(args) == 1: + return List([String(char) for char in target.value], True) + splitter = args[1] + ret = target.value.split(splitter.value) + return List([String(r) for r in ret], True) + +GLOBALS.register("split", Builtin(interpretSplit, [Arg("target", TypeEnum.STRING)], Arg("splitter", TypeEnum.STRING, optional=True))) + +def interpretListLength(symbol, args, env, ns): + return Int(len(args[0].args)) + +GLOBALS.register("list-length", Builtin(interpretListLength, [Arg("arg", TypeEnum.LIST)])) + +def interpretFirst(symbol, args, env, ns): + if len(args[0].args) == 0: + raise InterpretPanic(symbol, "list is empty") + return evaluate(args[0].args[0], env, ns) + +GLOBALS.register("first", Builtin(interpretFirst, [Arg("arg", TypeEnum.LIST, )])) + +def interpretRest(symbol, args, env, ns): + # TODO do we know it's not evaluated? + return List(args[0].args[1:], True) # we don't evaluate the remainder of the list + +GLOBALS.register("rest", Builtin(interpretRest, [Arg("arg", TypeEnum.LIST)])) + +def interpretMap(symbol, args, env, ns): + func = args[0] + if not isinstance(func, Function): + raise InterpretPanic(symbol, "requires a :func as its first argument", func) + lst = evaluate(args[1], env, ns) + if not isinstance(lst, List): + raise InterpretPanic(symbol, "requires a :list as its second argument", lst) + out = [] + for arg in lst.args: + ev = func.call(List([func, arg]), env, ns) + out.append(ev) + return List(out, True) + +GLOBALS.register("map", Builtin(interpretMap, [Arg("func", TypeEnum.ANY), Arg("list", TypeEnum.LIST)])) + +def interpretZip(symbol, args, env, ns): + z1 = args[0] + z2 = args[1] + if len(z1.args) != len(z2.args): + raise InterpretPanic(symbol, "requires two :lists of the same size") + out = [] + for idx in range(len(z1.args)): + f = z1.args[idx] + s = z2.args[idx] + out.append(List([f, s], True)) + return List(out, True) + +zip_arg = Arg("list", TypeEnum.LIST) +GLOBALS.register("zip", Builtin(interpretZip, [zip_arg, zip_arg])) + +def interpretList(symbol, args, env, ns): + return List(args, True) + +GLOBALS.register("list", Builtin(interpretList, [], Arg("item", TypeEnum.ANY))) + +def interpretListReverse(symbol, args, env, ns): + new_args = args[0].args[:] # make a copy of the args + new_args.reverse() + return List(new_args, True) + +GLOBALS.register("list-reverse", Builtin(interpretListReverse, [Arg("list", TypeEnum.LIST)])) + +def interpretApply(symbol, args, env, ns): + # TODO: to support lambdas, we can't assume the func is defined + func = args[0] + if not isinstance(func, Symbol): + raise InterpretPanic(symbol, "requires a symbol as its first argument", func) + new_lst = List([func] + args[1].args) + return evaluate(new_lst, env, ns) + +GLOBALS.register("apply", Builtin(interpretApply, [Arg("func", TypeEnum.ANY, lazy=True), Arg("list", TypeEnum.LIST)])) + +def interpretGlob(symbol, args, env, ns): + items = glob(args[0].value) + return List([String(item) for item in items], True) + +GLOBALS.register("glob", Builtin(interpretGlob, [Arg("regex", TypeEnum.STRING)])) + +def interpretShell(symbol, args, env, ns): + ret = subprocess.run(shlex.split(args[0].value), capture_output=True) + return List([String(r) for r in ret.stdout.decode("utf-8").split("\n")], True) + +GLOBALS.register("$", Builtin(interpretShell, [Arg("command", TypeEnum.STRING)])) + +def interpretEmpty(symbol, args, env, ns): + return Bool(len(args[0].args) == 0) + +GLOBALS.register("empty?", Builtin(interpretEmpty, [Arg("list", TypeEnum.LIST)])) + +def interpretShuf(symbol, args, env, ns): + items = args[0].args[:] + random.shuffle(items) + return List(items, True) + +GLOBALS.register("shuf", Builtin(interpretShuf, [Arg("list", TypeEnum.LIST)])) + +def interpretIsList(symbol, args, env, ns): + return Bool(isinstance(args[0], List)) + +GLOBALS.register("list?", Builtin(interpretIsList, [Arg("arg", TypeEnum.ANY)])) + +def interpretBlock(symbol, args, env, ns): + ret = List([]) + for arg in args: + ret = evaluate(arg, env, ns) + return ret + +block_arg = Arg("expr", TypeEnum.ANY, lazy=True) +GLOBALS.register("block", Builtin(interpretBlock, [block_arg], block_arg)) + +def interpretExit(symbol, args, env, ns): + status = 0 if len(args) == 0 else args[0].value + sys.exit(status) + return List([]) + +exit_arg = Arg("status", TypeEnum.INT, optional=True) +GLOBALS.register("exit", Builtin(interpretExit, [exit_arg])) + +def interpretUnlink(symbol, args, env, ns): + target_path = Path(args[0].value).resolve() + if not target_path.exists(): + raise InterpretPanic(symbol, "target file does not exist", target_path) + target_path.unlink() + return List([]) + +GLOBALS.register("unlink", Builtin(interpretUnlink, [Arg("filename", TypeEnum.STRING)])) + +def interpretArgv(symbol, args, env, ns): + out = [] + for arg in sys.argv[1:]: + out.append(String(arg)) + return List(out, True) + +GLOBALS.register("argv", Builtin(interpretArgv, [])) + +def interpretIn(symbol, args, env, ns): + target = args[0] + lst = args[1] + for arg in lst.args: + if type(arg) == type(target) and arg.value == target.value: + return Bool(True) + return Bool(False) + +in_target_arg = Arg("target", TypeEnum.LITERAL) +in_list_arg = Arg("list", TypeEnum.LIST) +GLOBALS.register("in?", Builtin(interpretIn, [in_target_arg, in_list_arg])) + +def interpretLast(symbol, args, env, ns): + if len(args[0].args) == 0: + raise InterpretPanic("List is empty") + return evaluate(args[0].args[-1], env, ns) + +GLOBALS.register("last", Builtin(interpretLast, [Arg("list", TypeEnum.LIST)])) + +def interpretJoin(symbol, args, env, ns): + lst = args[0] + target = args[1] + return String(target.value.join([a.value for a in lst.args])) + +join_list_arg = Arg("list", TypeEnum.LIST) +join_string_arg = Arg("joiner", TypeEnum.STRING) +GLOBALS.register("join", Builtin(interpretJoin, [join_list_arg, join_string_arg])) + +def interpretWithWrite(symbol, args, env, ns): + target_file = args[0] + new_env = Environment(env) + target_path = Path(target_file.value).resolve() + ret = Literal([]) + with open(str(target_path), "w") as fil: + new_env.register("_file_", List([fil], True)) # TODO wrong! + for arg in args[1:]: + ret = evaluate(arg, new_env, ns) + return ret + +GLOBALS.register("with-write", Builtin(interpretWithWrite, [Arg("filename", TypeEnum.STRING)], Arg("exprs", TypeEnum.ANY, lazy=True))) + +def interpretWrite(symbol, args, env, ns): + # write :string :filehandle + line = args[0] + handle = args[1] + handle.args[0].write(line.value) # TODO wrong! how do we evaluate a handle? + return Literal([]) + +GLOBALS.register("write", Builtin(interpretWrite, [Arg("string", TypeEnum.STRING), Arg("filename", TypeEnum.LIST)])) + +def interpretNewline(symbol, args, env, ns): + return String("\n") + +GLOBALS.register("newline", Builtin(interpretNewline, [])) + +def interpretExists(symbol, args, env, ns): + return Bool(Path(args[0].value).resolve().exists()) + +GLOBALS.register("exists?", Builtin(interpretExists, [Arg("filename", TypeEnum.STRING)])) + +def interpretFirstChar(symbol, args, env, ns): + if len(args[0].value) == 0: + raise InterpretPanic(symbol, ":string is empty", ev) + return String(args[0].value[0]) + +GLOBALS.register("first-char", Builtin(interpretFirstChar, [Arg("string", TypeEnum.STRING)])) + +def interpretRestChar(symbol, args, env, ns): + return String(args[0].value[1:]) + +GLOBALS.register("rest-char", Builtin(interpretRestChar, [Arg("string", TypeEnum.STRING)])) + +def interpretSlice(symbol, args, env, ns): + lst = args[0] + idx = args[1] + if len(args) == 2: + return List(lst.args[idx.value - 1:]) + length = args[2] + diff = idx.value - 1 + length.value + return List(lst.args[idx.value - 1:diff]) + +slice_list_arg = Arg("list", TypeEnum.LIST) +slice_idx_arg = Arg("idx", TypeEnum.INT) +slice_length_arg = Arg("length", TypeEnum.INT, optional=True) +GLOBALS.register("slice", Builtin(interpretSlice, [slice_list_arg, slice_idx_arg, slice_length_arg])) + +def interpretClear(symbol, args, env, ns): + subprocess.run(["clear"]) + return List([]) + +GLOBALS.register("clear", Builtin(interpretClear, [])) + +def interpretReadLine(symbol, args, env, ns): + ret = input(args[0].value) + return String(ret) + +GLOBALS.register("read-line", Builtin(interpretReadLine, [Arg("prompt", TypeEnum.STRING)])) + +def interpretReadChar(symbol, args, env, ns): + import termios, tty + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + try: + tty.setraw(fd) + ch = sys.stdin.buffer.read1(4) # some keys are >1 bytes + except Exception: + raise + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old) + return String(ch.decode("utf-8")) + +GLOBALS.register("read-char", Builtin(interpretReadChar, [])) + +def interpretAppend(symbol, args, env, ns): + lst = args[0] + val = args[1] + items = lst.args[:] + return List(items + [val], True) + +GLOBALS.register("append", Builtin(interpretAppend, [Arg("list", TypeEnum.LIST), Arg("item", TypeEnum.ANY)])) + +# TODO: this is actually for records/structs/whatever they're called +def interpretRemove(symbol, args, env, ns): + lst = args[0] + key = args[1] + out = [] + for arg in lst.args: + if arg.args[0].value != key.value: + out.append(arg) + return List(out, True) + +GLOBALS.register("remove", Builtin(interpretRemove, [Arg("list", TypeEnum.LIST), Arg("key", TypeEnum.ANY)])) + +def interpretWhile(symbol, args, env, ns): + cond = args[0] + ret = List([]) + while True: + ev = evaluate(cond, env, ns) + if not isinstance(ev, Bool): + raise InterpretPanic(symbol, "expects a :bool condition", ev) + if not ev.value: + break + for arg in args[1:]: + ret = evaluate(arg, env, ns) + return ret + +GLOBALS.register("while", Builtin(interpretWhile, [Arg("cond", TypeEnum.BOOL, lazy=True)], Arg("expr", TypeEnum.ANY, lazy=True))) + +def interpretUse(symbol, args, env, ns): + target_file_name = args[0].value + target_file = Path(target_file_name).resolve() + if not target_file.exists(): + raise InterpretPanic(symbol, "no such file", target_file) + with open(target_file, "r") as fil: + data = fil.read() + interpret(parse(lex(data))) + return List([]) + +GLOBALS.register("use", Builtin(interpretUse, [Arg("filename", TypeEnum.STRING)])) + +def interpretAssert(symbol, args, env, ns): + if args[0].value != True: + raise InterpretPanic(symbol, "assertion failed") + return List([]) + +GLOBALS.register("assert", Builtin(interpretAssert, [Arg("cond", TypeEnum.BOOL)])) + +def interpretHowTo(symbol, args, env, ns): + if not isinstance(args[0], Symbol): + raise InterpretPanic(symbol, "expects a symbol", args[0]) + sym = env.get(args[0].name) + print(sym.describe(args[0].name)) + return List([]) + +GLOBALS.register("howto", Builtin(interpretHowTo, [Arg("symbol", TypeEnum.ANY, lazy=True)])) + +def interpretSymbols(symbol, args, env, ns): + keys = list(env.environment.keys()) + keys.sort() + out = "" + for idx, key in enumerate(keys): + if idx % 6 == 0: + print(out) + out = f"{key}" + else: + out = f"{out} {key}" + print(out) + return List([]) + +GLOBALS.register("symbols", Builtin(interpretSymbols, [])) + +def interpretUseAs(symbol, args, env, ns): + target_file_name = args[0].value + target_file = Path(target_file_name).resolve() + if not target_file.exists(): + raise InterpretPanic(symbol, "no such file", target_file) + with open(target_file, "r") as fil: + data = fil.read() + interpret(parse(lex(data)), ns=args[1].name) + return List([]) + +GLOBALS.register("use-as", Builtin(interpretUseAs, [Arg("filename", TypeEnum.STRING), Arg("namespace", TypeEnum.ANY, lazy=True)])) + +def interpretFloor(symbol, args, env, ns): + return Int(math.floor(args[0].value)) + +GLOBALS.register("floor", Builtin(interpretFloor, [Arg("floor", TypeEnum.NUMBER)])) + +def interpretFilter(symbol, args, env, ns): + func = args[0] + if not isinstance(func, Function): + raise InterpretPanic(symbol, "requires a :func as its first argument", func) + lst = args[1] + out = [] + for arg in lst.args: + ev = func.call(List([func, arg]), env, ns) + if not isinstance(ev, Bool): + raise InterpretPanic(symbol, "function must return :bool", ev) + if ev.value: + out.append(arg) + return List(out, True) + +GLOBALS.register("filter", Builtin(interpretFilter, [Arg("func", TypeEnum.ANY), Arg("list", TypeEnum.LIST)])) + +def interpretTypeOf(symbol, args, env, ns): + return Type(f"{args[0].type_}") + +GLOBALS.register("typeof", Builtin(interpretTypeOf, [Arg("candidate", TypeEnum.ANY)])) diff --git a/neb/lexer.py b/neb/lexer.py new file mode 100644 index 0000000..b522460 --- /dev/null +++ b/neb/lexer.py @@ -0,0 +1,168 @@ +from .structs import TokenType, Token +from .exceptions import LexError +import sys + + +types = { + ":int": TokenType.INT_TYPE, + ":float": TokenType.FLOAT_TYPE, + ":number": TokenType.NUMBER_TYPE, + ":string": TokenType.STRING_TYPE, + ":list": TokenType.LIST_TYPE, + ":any": TokenType.ANY_TYPE, + ":literal": TokenType.LITERAL_TYPE, + ":bool": TokenType.BOOL_TYPE } + +keywords = { + "if": TokenType.IF, + "for-count": TokenType.FOR_COUNT, + "def": TokenType.DEF, + "lambda": TokenType.LAMBDA, + "&": TokenType.MANY, + "func": TokenType.FUNC } + + +WHITESPACE = [" ", "\n", "\t"] +SEPARATORS = WHITESPACE + [")"] +DIGITS = list("0123456789") + +def lex(data): + start = 0 + current = 0 + line = 1 + end = len(data) + + tokens = [] + while current < end: + char = data[current] + if char == ";": + while char != "\n" and current < end: + current += 1 + char = data[current] + continue + if char == "\n": + line += 1 + if char in WHITESPACE: + current += 1 + continue + elif char == "(": + tokens.append(Token(TokenType.OPEN_PAREN, "(", None, line)) + elif char == ")": + tokens.append(Token(TokenType.CLOSE_PAREN, ")", None, line)) + # numbers + elif char in DIGITS or char == ".": + tok, length = get_number(data[current:], line) + tokens.append(tok) + current += length + # strings + elif char == '"': + tok, length, offset = get_string(data[current+1:], line) + tokens.append(tok) + current += length + line += offset + # bools + elif char == "#": + tok, length = get_bool(data[current+1:], line) + tokens.append(tok) + current += length + #types + elif char == ":": + tok, length = get_type(data[current:], line) # include : + tokens.append(tok) + current += length + # symbols + else: + tok, length = get_symbol(data[current:], line) + if tok.text in keywords: + tok.type_ = keywords[tok.text] + tokens.append(tok) + current += length + + current += 1 + tokens.append(Token(TokenType.EOF, "", None, line)) + return tokens + +def get_number(data, line): + counter = 0 + value = "" + is_float = False + char = data[counter] + while char not in SEPARATORS: + if char in DIGITS: + value += char + elif char == ".": + if is_float: + raise LexError("too many '.' in number", line) + is_float = True + value += char + else: + raise Exception(f"invalid number: {value}") + counter += 1 + if counter >= len(data): + break + char = data[counter] + if is_float: + return Token(TokenType.FLOAT, value, float(value), line), counter - 1 + else: + return Token(TokenType.INT, value, int(value), line), counter - 1 + + +def get_string(data, line): + offset = 0 + counter = 0 + string = "" + while data[counter] != '"': + if data[counter] == "\n": + offset += 1 + + # look ahead to see if it's a double quote + if data[counter] == "\\" and \ + len(data) > counter and \ + data[counter+1] == '"': + string += '"' + counter += 1 + else: + string += data[counter] + counter += 1 + if counter >= len(data): + raise Exception("couldn't parse string") + string = string.encode().decode("unicode_escape") + return Token(TokenType.STRING, str(string), str(string), line), counter + 1, offset + +def get_bool(data, line): + counter = 0 + value = "" + while data[counter] not in SEPARATORS: + value += data[counter] + counter += 1 + if counter >= len(data): + break + if value == "true": + return Token(TokenType.TRUE, "#true", True, line), 4 + elif value == "false": + return Token(TokenType.FALSE, "#false", False, line), 5 + else: + raise LexError("couldn't parse boolean", line) + +def get_symbol(data, line): + counter = 0 + value = "" + while data[counter] not in SEPARATORS: + value += data[counter] + counter += 1 + if counter >= len(data): + break + return Token(TokenType.SYMBOL, value, None, line), counter - 1 + +def get_type(data, line): + counter = 0 + value = "" + while data[counter] not in SEPARATORS: + value += data[counter] + counter += 1 + if counter >= len(data): + break + if value not in types: + raise LexError(f"unrecognized type {value}", line) + return Token(types[value], value, None, line), counter - 1 + diff --git a/neb/parser.py b/neb/parser.py new file mode 100644 index 0000000..ea875fb --- /dev/null +++ b/neb/parser.py @@ -0,0 +1,67 @@ +from .structs import * + +def parseExpression(token, prev, tokens): + idx = 0 + args = [] + prev = token + while tokens[idx].type_ != TokenType.CLOSE_PAREN: + token = tokens[idx] + inc = 1 + if token.type_ == TokenType.OPEN_PAREN: + expr, inc = parseExpression(token, prev, tokens[idx+1:]) + args.append(expr) + elif token.type_ in (TokenType.STRING, TokenType.TRUE, TokenType.FALSE, TokenType.INT, TokenType.FLOAT): + expr, inc = parseLiteral(token, prev, tokens[idx+1:]) + args.append(expr) + elif token.type_ in (TokenType.INT_TYPE, TokenType.FLOAT_TYPE, TokenType.STRING_TYPE, TokenType.ANY_TYPE, TokenType.LIST_TYPE, TokenType.NUMBER_TYPE, TokenType.BOOL_TYPE, TokenType.LITERAL_TYPE): + expr, inc = parseType(token, prev, tokens[idx+1:]) + args.append(expr) + else: + expr, inc = parseSymbol(token, prev, tokens[idx+1:]) + args.append(expr) + idx += inc + prev = token + + return List(args), idx + 2 # parens + +def parseSymbol(token, prev, tokens): + return Symbol(token.text, token.line), 1 + +def parseLiteral(token, prev, tokens): + if token.type_ == TokenType.STRING: + return String(token.value), 1 + elif token.type_ == TokenType.INT: + return Int(token.value), 1 + elif token.type_ == TokenType.FLOAT: + return Float(token.value), 1 + elif token.type_ in (TokenType.TRUE, TokenType.FALSE): + return Bool(token.value), 1 + else: + return Literal(token.value), 1 + +def parseType(token, prev, tokens): + return Type(token.text), 1 + +def parse(tokens): + idx = 0 + prev = None + exprs = [] + while tokens[idx].type_ != TokenType.EOF: + token = tokens[idx] + counter = 1 + if token.type_ == TokenType.OPEN_PAREN: + expr, counter = parseExpression(token, prev, tokens[idx+1:]) + exprs.append(expr) + elif token.type_ in (TokenType.FALSE, TokenType.TRUE, TokenType.STRING, TokenType.INT, TokenType.FLOAT): + lit, counter = parseLiteral(token, prev, tokens[idx+1:]) + exprs.append(lit) + elif token.type_ in (TokenType.INT_TYPE, TokenType.FLOAT_TYPE, TokenType.STRING_TYPE, TokenType.ANY_TYPE, TokenType.LIST_TYPE, TokenType.NUMBER_TYPE, TokenType.BOOL_TYPE, TokenType.LITERAL_TYPE): + typ, counter = parseType(token, prev, tokens[idx+1:]) + exprs.append(typ) + else: + sym, counter = parseSymbol(token, prev, tokens[idx+1:]) + exprs.append(sym) + + idx += counter + prev = token + return exprs diff --git a/neb/structs.py b/neb/structs.py new file mode 100644 index 0000000..09807db --- /dev/null +++ b/neb/structs.py @@ -0,0 +1,108 @@ +from dataclasses import dataclass +from enum import Enum, auto +from typing import Any +from .typeclass import TypeEnum + +# tokens and types +# NOTE: this can probably be simplified +class TokenType(Enum): + + OPEN_PAREN = auto() + CLOSE_PAREN = auto() + + EOF = auto() + + # literals + INT = auto() + FLOAT = auto() + STRING = auto() + TRUE = auto() + FALSE = auto() + + # keywords + IF = auto() + FOR_COUNT = auto() + DEF = auto() + LAMBDA = auto() + FUNC = auto() + + # symbols + SYMBOL = auto() + + # types + INT_TYPE = auto() + FLOAT_TYPE = auto() + NUMBER_TYPE = auto() + STRING_TYPE = auto() + ANY_TYPE = auto() + LIST_TYPE = auto() + LITERAL_TYPE = auto() + BOOL_TYPE = auto() + + MANY = auto() + +@dataclass +class Token: + type_: TokenType + text: str + value: Any + line: int + + def __str__(self): + return f"{self.type_.name} {self.text} {self.line}" + +class Literal: + def __init__(self, value, type_=None): + self.value = value + if type_ is None: + self.type_ = TypeEnum.ANY + else: + self.type_ = type_ + def __str__(self): + return f"{self.value}:literal" + +class Int(Literal): + def __init__(self, value): + super().__init__(value, TypeEnum.INT) + def __str__(self): + return f"{self.value}" + +class Float(Literal): + def __init__(self, value): + super().__init__(value, TypeEnum.FLOAT) + def __str__(self): + return f"{self.value}" + +class Bool(Literal): + def __init__(self, value): + super().__init__(value, TypeEnum.BOOL) + def __str__(self): + return f"#{str(self.value).lower()}" + +class String(Literal): + def __init__(self, value): + super().__init__(value, TypeEnum.STRING) + def __str__(self): + return f'"{repr(self.value)[1:-1]}"' + +class Type: + def __init__(self, name): + self.name = name + def __str__(self): + return self.name + +class Symbol: + def __init__(self, name, line): + self.name = name + self.line = line + def __str__(self): + return f"'{self.name}" + +class List: + def __init__(self, args, data=False): + self.args = args + self.data = data + self.type_ = TypeEnum.LIST + def __str__(self): + return "(" + " ".join(f"{arg}" for arg in self.args) + ")" + diff --git a/neb/typeclass.py b/neb/typeclass.py new file mode 100644 index 0000000..eae412c --- /dev/null +++ b/neb/typeclass.py @@ -0,0 +1,34 @@ +from enum import Enum, auto + +class TypeEnum(Enum): + ANY = auto() + STRING = auto() + INT = auto() + FLOAT = auto() + NUMBER = auto() + LIST = auto() + LITERAL = auto() + BOOL = auto() + + def __str__(self): + return f":{self.name.lower()}" + +HIERARCHY = { TypeEnum.ANY: None, + TypeEnum.LITERAL: TypeEnum.ANY, + TypeEnum.LIST: TypeEnum.ANY, + TypeEnum.STRING: TypeEnum.LITERAL, + TypeEnum.BOOL: TypeEnum.LITERAL, + TypeEnum.NUMBER: TypeEnum.LITERAL, + TypeEnum.INT: TypeEnum.NUMBER, + TypeEnum.FLOAT: TypeEnum.NUMBER } + +def is_subtype_of(candidate, expected): + if candidate == expected: + return True + parent = HIERARCHY[candidate] + while parent is not None: + if parent == expected: + return True + parent = HIERARCHY[parent] + return False + |
