from tokens import * # consts DOUBLE_QUOTE = '"' BACKSLASH = "\\" OPEN_PAREN = "(" CLOSE_PAREN = ")" DIGITS = "0123456789" LETTERS = "abcdefghijklmnopqrstuvwxyz" PUNCTUATION = "-_!*$@%^&=+/" SYMBOL_VALS = list(LETTERS + LETTERS.upper() + DIGITS + PUNCTUATION) def lex_string(inp): token = "" esc = False for idx, c in enumerate(inp): # if we're escaping a quote, don't add the \ if esc: if c == DOUBLE_QUOTE: token += DOUBLE_QUOTE elif c == BACKSLASH: token += BACKSLASH else: token += f"{BACKSLASH}{c}" # if it's an ecsape char, set esc and continue elif c == BACKSLASH: esc = True continue elif c == DOUBLE_QUOTE: #return token, inp[idx + 1:] return NebLiteral(NebType.STRING, token), inp[idx + 1:] else: token += c esc = False raise Exception("improperly ended string!") def lex_bool(inp): if inp[0:4] == "true": token = True elif inp[0:5] == "false": token = False else: raise Exception("invalid boolean") if peek(inp[len(str(token)):]) not in (None, " ", CLOSE_PAREN): raise Exception("invalid boolean") #return token, inp[len(str(token)):] return NebLiteral(NebType.BOOL, token), inp[len(str(token)):] def lex_number(inp): token = "" for idx, c in enumerate(inp): if c in (" ", CLOSE_PAREN): if "." in token: #return float(token), inp[idx:] return NebLiteral(NebType.FLOAT, float(token)), inp[idx:] else: #return int(token), inp[idx:] return NebLiteral(NebType.INT, int(token)), inp[idx:] if c in list(DIGITS): # or c in ("-", "."): token += c elif c == "+": if idx == 0: continue else: raise Exception("improper sign placement!") elif c == "-": if idx == 0: token += c else: raise Exception("improper sign placement!") elif c == ".": if c not in token: token += c else: raise Exception("too many decimal points") else: raise Exception("improper numeric!") if "." in token: #return float(token), "" return NebLiteral(NebType.FLOAT, float(token)), "" else: #return int(token), "" return NebLiteral(NebType.INT, int(token)), "" def lex_symbol(inp): token = "" for idx, c in enumerate(inp): if c in (CLOSE_PAREN, " "): return NebSymbol(token), inp[idx:] elif c in SYMBOL_VALS: token += c else: raise Exception("improper symbol") return NebSymbol(token), "" def peek(inp): if len(inp) == 0: return None return inp[0] def lex(inp, tokens): inp = inp.strip() # white space doesn't matter at this point nxt = peek(inp) if nxt is None: #print(f"returning [{tokens}]") return tokens # parens if nxt == OPEN_PAREN: tokens.append(NebOpen()) return lex(inp[1:], tokens) elif nxt == CLOSE_PAREN: tokens.append(NebClose()) return lex(inp[1:], tokens) # numbers elif nxt in list(DIGITS) or nxt in ("+", "-", "."): token, remainder = lex_number(inp) tokens.append(token) return lex(remainder, tokens) # strings elif nxt == DOUBLE_QUOTE: token, remainder = lex_string(inp[1:]) #print(f"received [{token}] [{remainder}]") if peek(remainder) not in (None, CLOSE_PAREN, " "): raise Exception("spaces required between tokens") tokens.append(token) return lex(remainder, tokens) # bool elif nxt == "#": token, remainder = lex_bool(inp[1:]) if peek(remainder) not in (None, CLOSE_PAREN, " "): raise Exception("spaces required between tokens") tokens.append(token) return lex(remainder, tokens) # symbols elif nxt in SYMBOL_VALS: token, remainder = lex_symbol(inp) if peek(remainder) not in (None, CLOSE_PAREN, " "): raise Exception("spaces required between tokens") tokens.append(token) return lex(remainder, tokens) else: raise Exception("unable to lex")