#include "ms.h" #include #include namespace ms { struct token { enum token_type { LPAREN, RPAREN, DOT, QUOTE, BOOLEAN, NUMBER, IDENT, STRING, EOF, LEX_ERROR, } type; size_t start; std::string dstr; int dint; bool dbool; }; struct Parser { Parser(const std::string &src) : src(src), lpos(0), bs() { } Value *parse() { token t = next_token(); switch (t.type) { // Const case token::BOOLEAN: return unshield().new_bool(t.dbool); case token::NUMBER: return unshield().new_number(t.dint); case token::STRING: return unshield().new_string(t.dstr); // Id case token::IDENT: return unshield().new_symbol(t.dstr); // 'S-Exp case token::QUOTE: { Memory::Shield shield(get_memory()); Value *i = parse(); if (!i) return nullptr; shield.add(i); return shield.new_pair(shield.new_symbol("quote"), shield.new_pair(i, shield.new_nil())); } // (S-Exp* [S-Exp . S-Exp]) case token::LPAREN: { if (peek_token().type == token::RPAREN) { next_token(); return unshield().new_nil(); } Memory::Shield shield(get_memory()); Value *first = parse(); if (!first) return nullptr; shield.add(first); Value *ret = shield.new_pair(first, nullptr), *prev = ret; while (true) { token tc = peek_token(); if (tc.type == token::RPAREN) { next_token(); prev->p2 = shield.new_nil(); break; } if (tc.type == token::DOT) { next_token(); prev->p2 = parse(); token rp = next_token(); if (rp.type != token::RPAREN) { parse_error("expected rparen"); return nullptr; } break; } Value *item = parse(); if (!item) return nullptr; shield.add(item); Value *nv = shield.new_pair(item, nullptr); prev->p2 = nv; prev = nv; } return ret; } default: parse_error("expected token"); return nullptr; } } Value *parse_toplevel() { Memory::Shield shield(get_memory()); Value *ret = shield.new_pair(shield.new_nil(), nullptr), *prev = ret; while (peek_token().type != token::EOF) { Value *item = parse(); if (!item) return nullptr; shield.add(item); Value *nv = shield.new_pair(item, nullptr); prev->p2 = nv; prev = nv; } prev->p2 = shield.new_nil(); return (Value *)ret->p2; } void parse_error(const std::string &msg) { std::cerr << "sexp_parse: " << msg << "\n"; } token unput_token(token t) { bs.push(t); return t; } token next_token() { if (bs.empty()) return read_token(); token t = bs.top(); bs.pop(); return t; } token peek_token() { if (!bs.empty()) return bs.top(); return unput_token(read_token()); } token read_token() { token t; again: if (lpos == src.size()) { t.type = token::EOF; t.start = lpos; return t; } switch (src.at(lpos)) { case ' ': case '\t': case '\n': case '\r': lpos++; goto again; case ';': while (src.size() != ++lpos) if (src.at(lpos) == '\n') break; goto again; case '(': t.type = token::LPAREN; t.start = lpos++; break; case ')': t.type = token::RPAREN; t.start = lpos++; break; case '\'': t.type = token::QUOTE; t.start = lpos++; break; case '.': t.type = token::DOT; t.start = lpos++; break; case '#': if (src.size() == lpos || (src.at(lpos + 1) != 't' && src.at(lpos + 1) != 'f')) { t.type = token::LEX_ERROR; t.start = lpos; break; } t.type = token::BOOLEAN; t.start = lpos; t.dbool = src.at(lpos + 1) == 't'; lpos += 2; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': t.type = token::NUMBER; t.start = lpos; t.dint = 0; while (src.size() != lpos && src.at(lpos) >= '0' && src.at(lpos) <= '9') t.dint = t.dint * 10 + src.at(lpos++) - '0'; break; case '"': { t.type = token::STRING; t.start = lpos; t.dstr = ""; bool quoted = false; while (true) { if (src.size() == ++lpos) { t.type = token::LEX_ERROR; break; } if (quoted) { if (src.at(lpos) == '"') t.dstr.push_back('"'); else { t.type = token::LEX_ERROR; break; } quoted = false; continue; } if (src.at(lpos) == '\'') { quoted = true; continue; } if (src.at(lpos) == '"') { lpos++; break; } t.dstr.push_back(src.at(lpos)); } break; } default: t.type = token::IDENT; t.start = lpos; t.dstr = ""; const std::string idchars = "!$%&*+-./<=>?@^_:"; while (src.size() != lpos) { char c = src.at(lpos); if ('0' <= c && c <= '9' || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || idchars.find(c) != std::string::npos) { t.dstr.push_back(c); lpos++; } else break; } if (t.dstr.empty()) { t.type = token::LEX_ERROR; } break; } return t; } const std::string &src; size_t lpos; std::stack bs; }; Value *sexp_parse_toplevel(const std::string &str) { Parser parser(str); return parser.parse_toplevel(); } }