aboutsummaryrefslogtreecommitdiff
path: root/parser.d
diff options
context:
space:
mode:
authorBen Winston2023-05-19 17:55:36 -0400
committerBen Winston2023-05-19 17:55:36 -0400
commit10c75f60c1f5fa27268ea9a850b63b777b087cbc (patch)
treeebeb3a040ac24076e68915652c210d93f124c805 /parser.d
initial commit
Diffstat (limited to 'parser.d')
-rw-r--r--parser.d398
1 files changed, 398 insertions, 0 deletions
diff --git a/parser.d b/parser.d
new file mode 100644
index 0000000..d471f4d
--- /dev/null
+++ b/parser.d
@@ -0,0 +1,398 @@
+import std.stdio;
+import std.string;
+import std.algorithm : canFind;
+import std.conv : to;
+
+import chunk;
+
+enum FormType {
+ ATOM,
+ CONS,
+ NIL,
+ SYMBOL,
+ FUNC,
+
+ EOF,
+ PARSE_ERROR
+}
+
+/*
+struct Form {
+ FormType type;
+ bool evaluate;
+}
+*/
+
+abstract class Form {
+ FormType type;
+ bool evaluate;
+ int line;
+}
+
+class Symbol : Form {
+
+ string name;
+
+ this(string name, int line) {
+ this(name, false, line);
+ }
+
+ this(string name, bool quoted, int line) {
+ this.name = name;
+ this.line = line;
+ this.evaluate = !quoted;
+ this.type = FormType.SYMBOL;
+ }
+}
+
+class Eof : Form {
+ this(int line) {
+ this.line = line;
+ this.type = FormType.EOF;
+ }
+}
+
+class ParseError : Form {
+
+ string message;
+
+ this(string message, int line) {
+ this.message = message;
+ this.line = line;
+ this.type = FormType.PARSE_ERROR;
+ }
+}
+
+class Cons : Form {
+
+ Form head;
+ Form[] tail;
+ int argCount;
+
+ this(int line) {
+ this.line = line;
+ this.type = FormType.NIL;
+ this.argCount = 0;
+ this.evaluate = false;
+ }
+
+ void append(Form form) {
+ if (argCount == 0) {
+ head = form;
+
+ type = FormType.CONS;
+ } else {
+ tail ~= form;
+ }
+ argCount++;
+ }
+}
+
+class Func : Form {
+
+ Symbol name;
+ Cons args;
+ Form[] funcBody;
+
+ this(int line) {
+ this.line = line;
+ this.type = FormType.FUNC;
+ }
+
+ void addToBody(Form f) {
+ this.funcBody ~= f;
+ }
+}
+
+class Atom : Form {
+ Value value;
+
+ this(string value, int line) {
+ this.value = makeStringValue(value);
+ this.line = line;
+ this.type = FormType.ATOM;
+ this.evaluate = false;
+ }
+
+ this(double value, int line) {
+ this.value = makeNumberValue(value);
+ this.line = line;
+ this.type = FormType.ATOM;
+ this.evaluate = false;
+ }
+
+ this(bool value, int line) {
+ this.value = makeBooleanValue(value);
+ this.line = line;
+ this.type = FormType.ATOM;
+ this.evaluate = false;
+ }
+
+}
+
+enum ValueType {
+ STRING,
+ NUMBER,
+ BOOLEAN,
+ OBJ,
+}
+
+union As {
+ bool boolean;
+ double number;
+ string str;
+ Obj obj;
+}
+
+
+struct Value {
+ ValueType type;
+ As as;
+}
+
+Value makeStringValue(string str) {
+ As as = { str: str };
+ Value val = { ValueType.STRING, as };
+ return val;
+}
+
+Value makeNumberValue(double number) {
+ As as = { number: number };
+ Value val = { ValueType.NUMBER, as };
+ return val;
+}
+
+Value makeBooleanValue(bool boolean) {
+ As as = { boolean: boolean };
+ Value val = { ValueType.BOOLEAN };
+ return val;
+}
+
+Value makeObjValue(Obj obj) {
+ As as = { obj: obj };
+ Value val = { ValueType.OBJ };
+ return val;
+}
+
+string atomAsString(Atom a) {
+ Value val = a.value;
+ switch (val.type) {
+ case ValueType.STRING:
+ return val.as.str;
+ case ValueType.NUMBER:
+ return format("%g", val.as.number);
+ case ValueType.BOOLEAN:
+ if (val.as.boolean) {
+ return "true";
+ } else {
+ return "false";
+ }
+ default:
+ return "! unknown value type !";
+ }
+}
+
+class Parser {
+
+ string source;
+ int pos;
+ int line;
+
+ bool peekable() {
+ return (pos < source.length);
+ }
+
+ char peek() {
+ if (pos < source.length) {
+ return source[pos];
+ } else {
+ //writeln("peek returns null");
+ return '\0';
+ }
+ }
+
+ char advance() {
+ char ret = peek();
+ if (ret != '\0') {
+ pos++;
+ }
+ return ret;
+ }
+
+ void skipWhitespace() {
+ while (canFind([' ', '\t', '\r', '\n'], peek())) {
+ if (peek() == '\n') {
+ line++;
+ }
+ pos++;
+ }
+ }
+
+ Form parseSymbol() {
+ char[] acc;
+ char next;
+ while (peekable()) {
+ next = peek();
+ if (isBoundary(next)) {
+ break;
+ }
+ acc ~= next;
+ advance();
+ }
+ return new Symbol(to!string(acc), line);
+ }
+
+ Form parseString() {
+ char[] acc;
+ char next;
+ while (peekable()) {
+ next = peek();
+ if (next == '"') {
+ break;
+ } else if (next == '\n') {
+ line++;
+ }
+ acc ~= advance();
+ }
+ if (!peekable() && next != '"') {
+ return new ParseError("unterminated string!", line);
+ }
+ advance(); // go past last quote
+ return new Atom(to!string(acc), line);
+ }
+
+ bool isBoundary(char ch) {
+ return canFind([' ', '\r', '\t', '\n', ')'], ch);
+ }
+
+ bool isDigit(char ch) {
+ return '0' <= ch && '9' >= ch;
+ }
+
+ Form parseNumber() {
+ char[] acc;
+ char next;
+ while (peekable()) {
+ next = peek();
+ if (isBoundary(next)) {
+ break;
+ } else if (isDigit(next)) {
+ acc ~= next;
+ } else {
+ return new ParseError("unparseable number", line);
+ }
+ advance();
+ }
+ return new Atom(to!double(to!string(acc)), line);
+ }
+
+ Form parseFunc() {
+ // we've parsed `func`, but not the symbol yet
+ Form sym = parseForm();
+ if (sym.type != FormType.SYMBOL) {
+ return new ParseError("func definitions expect a symbol name", line);
+ }
+
+ // args should be a cons
+ Form args = parseForm();
+ if (args.type != FormType.CONS && args.type != FormType.NIL) {
+ return new ParseError("func definitions expect a list of arguments", line);
+ }
+
+ Func func = new Func(line);
+ func.name = cast(Symbol)sym;
+ func.args = cast(Cons)args;
+
+ char next;
+ while(peekable()) {
+ next = peek();
+ if (next == ')') {
+ break;
+ }
+ func.addToBody(parseForm());
+ }
+
+ if (!peekable()) {
+ return new ParseError("unterminated cons", line);
+ }
+
+ advance(); // consume closing paren
+
+ return func;
+ }
+
+
+ Form parseCons() {
+ skipWhitespace();
+ char next;
+ Cons cons = new Cons(line);
+
+ if (peekable() && peek() == ')') {
+ advance();
+ return cons;
+ }
+
+ Form f = parseForm();
+ if (f.type == FormType.SYMBOL) {
+ Symbol s = cast(Symbol)f;
+ switch (s.name) {
+ case "func":
+ return parseFunc();
+ default:
+ break;
+ }
+ }
+
+ // if it's not a special symbol, add it to the cons and keep parsing
+ cons.append(f);
+ while(peekable()) {
+ next = peek();
+ if (next == ')') {
+ break;
+ }
+ cons.append(parseForm());
+ }
+
+ if (!peekable()) {
+ return new ParseError("unterminated cons", line);
+ }
+
+ advance(); // go past closing paren
+
+ return cons;
+ }
+
+ Form parseForm() {
+ skipWhitespace();
+
+ if (!peekable()) {
+ return new Eof(line);
+ }
+
+ char next = peek();
+
+ if (isDigit(next)) {
+ return parseNumber();
+ }
+
+ switch (next) {
+ case '"':
+ advance(); // go past first quote
+ return parseString();
+ case '(':
+ advance(); // go past the open paren
+ return parseCons();
+ default:
+ return parseSymbol();
+ }
+ advance();
+
+ return new Atom(0, -1);
+ }
+
+ this(string source) {
+ this.source = source;
+ this.pos = 0;
+ this.line = 1;
+ }
+}