diff options
author | Kazuki Yamaguchi <k@rhe.jp> | 2018-06-08 16:57:17 +0900 |
---|---|---|
committer | Kazuki Yamaguchi <k@rhe.jp> | 2018-06-08 16:57:17 +0900 |
commit | 6a1fa7db286c9cda11a1847affd688b9e5f80c08 (patch) | |
tree | 9f7f6671ece671cac2f77be4fac14dcdca279d17 | |
download | mini-scheme-6a1fa7db286c9cda11a1847affd688b9e5f80c08.tar.gz |
initial commit
-rw-r--r-- | .gitignore | 7 | ||||
-rw-r--r-- | .mbed | 3 | ||||
-rw-r--r-- | C12832.lib | 1 | ||||
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | builtin_mbed.cpp | 88 | ||||
-rw-r--r-- | examples/count_up.scm | 6 | ||||
-rw-r--r-- | examples/display.scm | 1 | ||||
-rw-r--r-- | examples/fact.scm | 22 | ||||
-rw-r--r-- | examples/fib.scm | 5 | ||||
-rw-r--r-- | examples/joystick.scm | 14 | ||||
-rw-r--r-- | examples/map.scm | 10 | ||||
-rw-r--r-- | examples/test.scm | 14 | ||||
-rw-r--r-- | main.cpp | 89 | ||||
-rw-r--r-- | mbed-os.lib | 1 | ||||
-rw-r--r-- | mbed_settings.py | 45 | ||||
-rw-r--r-- | prelude.mbed.scm | 13 | ||||
-rw-r--r-- | ss/.gitignore | 5 | ||||
-rw-r--r-- | ss/Makefile | 32 | ||||
-rw-r--r-- | ss/builtin.cpp | 349 | ||||
-rw-r--r-- | ss/compile.cpp | 750 | ||||
-rw-r--r-- | ss/eval.cpp | 344 | ||||
-rw-r--r-- | ss/main.cpp | 81 | ||||
-rw-r--r-- | ss/memory.cpp | 204 | ||||
-rw-r--r-- | ss/mini-scheme.txt | 278 | ||||
-rw-r--r-- | ss/ms.h | 466 | ||||
-rw-r--r-- | ss/prelude.scm | 54 | ||||
-rw-r--r-- | ss/sexp_parse.cpp | 262 |
27 files changed, 3162 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..77b3e00 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +*.pyc +/BUILD +*.o +*.d +ms +vgcore.* +prelude.*inc @@ -0,0 +1,3 @@ +TARGET=K64F +TOOLCHAIN=GCC_ARM +ROOT=. diff --git a/C12832.lib b/C12832.lib new file mode 100644 index 0000000..b11b409 --- /dev/null +++ b/C12832.lib @@ -0,0 +1 @@ +https://os.mbed.com/users/chris/code/C12832/#7de323fa46fe diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3527d90 --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +export PATH := /opt/python/2.7/bin:$(PATH) +CXX = g++ +CXXFLAGS = -std=c++03 \ + -O0 -ggdb3 -Wall -Wextra -Wno-parentheses -Wno-unused-parameter \ + -MMD \ + -DMS_DEBUG \ + # -DMS_GC_DEBUG \ + +all: prelude.mbed.inc + rm -f ss/*.o + make -C ss prelude.inc + mbed compile + +prelude.mbed.inc: prelude.mbed.scm + xxd -i $< > $@ + +run: all + mbed compile -f diff --git a/builtin_mbed.cpp b/builtin_mbed.cpp new file mode 100644 index 0000000..61dc8d4 --- /dev/null +++ b/builtin_mbed.cpp @@ -0,0 +1,88 @@ +#include "ss/ms.h" +#include <map> +#include "C12832.h" + +// LCD +C12832 lcd(D11, D13, D12, D7, D10); +// Joystick: up, down, left, right, press +DigitalIn joystick[] = {A2, A3, A4, A5, D4}; + +namespace ms { + +#define CHECK_INT(v) __extension__({ \ + if ((v)->type != Value::INTEGER) \ + return unshield().new_error("expected integer"); \ + reinterpret_cast<intptr_t>(v->p1); \ +}) + +#define CHECK_PAIR(v) __extension__({ \ + if ((v)->type != Value::PAIR) \ + return unshield().new_error("expected pair"); \ + v; \ +}) + +#define CHECK_STR(v) __extension__({ \ + if ((v)->type != Value::STRING) \ + return unshield().new_error("expected string"); \ + *static_cast<const std::string *>(v->p1); \ +}) + + +static Value *mbed_wait_ms(int argc, Value **argv) { + wait(CHECK_INT(argv[0]) / 1000.0); + return unshield().new_nil(); +} + +static Value *c12832_cls(int argc, Value **argv) { + lcd.cls(); + return unshield().new_nil(); +} + +static Value *c12832_pixel(int argc, Value **argv) { + intptr_t a = CHECK_INT(argv[0]), + b = CHECK_INT(argv[1]), + c = CHECK_INT(argv[2]); + lcd.pixel((int)a, (int)b, (int)c); + return unshield().new_nil(); +} + +static Value *c12832_print(int argc, Value **argv) { + const Value *pair = CHECK_PAIR(argv[0]); + intptr_t x = CHECK_INT(car(pair)), y = CHECK_INT(cdr(pair)); + const std::string &str = CHECK_STR(argv[1]); + + lcd.locate(x, y); + lcd.printf(str.c_str()); + return unshield().new_nil(); +} + +static Value *c12832_fillcircle(int argc, Value **argv) { + intptr_t x = CHECK_INT(argv[0]), + y = CHECK_INT(argv[1]), + r = CHECK_INT(argv[2]), + c = CHECK_INT(argv[3]); + lcd.fillcircle((int)x, (int)y, (int)r, (int)c); + return unshield().new_nil(); +} + +static Value *joystick_active_p(int argc, Value **argv) { + intptr_t n = CHECK_INT(argv[0]); + if (n < 0 || n > (intptr_t)(sizeof(joystick)/sizeof(joystick[0]))) + return unshield().new_error("invalid joystick number"); + return unshield().new_bool(joystick[n]); +} + +void builtin_add_mbed(std::map<const std::string, Value *> &map) { + static Memory::Shield shield(get_memory()); // FIXME + + map["mbed-wait-ms"] = shield.new_cfunc(mbed_wait_ms, 1); + + map["c12832-cls"] = shield.new_cfunc(c12832_cls, 0); + map["c12832-pixel"] = shield.new_cfunc(c12832_pixel, 3); + map["c12832-print"] = shield.new_cfunc(c12832_print, 2); + map["c12832-fillcircle"] = shield.new_cfunc(c12832_fillcircle, 4); + + map["joystick-active?"] = shield.new_cfunc(joystick_active_p, 1); +} + +} diff --git a/examples/count_up.scm b/examples/count_up.scm new file mode 100644 index 0000000..6eb5fa0 --- /dev/null +++ b/examples/count_up.scm @@ -0,0 +1,6 @@ +(define (main n) + (c12832-cls) + (c12832-print '(30 . 12) (number->string n)) + (mbed-wait-ms 500) + (main (+ n 1))) +(main 0) diff --git a/examples/display.scm b/examples/display.scm new file mode 100644 index 0000000..b933fd4 --- /dev/null +++ b/examples/display.scm @@ -0,0 +1 @@ +(display "a") diff --git a/examples/fact.scm b/examples/fact.scm new file mode 100644 index 0000000..af56eb7 --- /dev/null +++ b/examples/fact.scm @@ -0,0 +1,22 @@ +(define (fact n) + (if (= n 0) + 1 + (* n (fact (- n 1))))) + +(define (fact/cps n cont) + (if (= n 0) + (cont 1) + (fact/cps (- n 1) (lambda (a) (cont (* n a)))))) + +(define num 11) + +(display (string-append "fact: " (number->string (fact num)))) +(display (string-append "fact/cps: " (number->string (fact/cps num (lambda (x) x))))) + + (define return #f) + +(display (+ 1 (call/cc + (lambda (cont) + (set! return cont) + 1)))) +(display (return 23)) diff --git a/examples/fib.scm b/examples/fib.scm new file mode 100644 index 0000000..2105072 --- /dev/null +++ b/examples/fib.scm @@ -0,0 +1,5 @@ +(define (fib n) + (if (<= n 2) + 1 + (+ (fib (- n 1)) (fib (- n 2))))) +(display (fib 20)) diff --git a/examples/joystick.scm b/examples/joystick.scm new file mode 100644 index 0000000..6a618de --- /dev/null +++ b/examples/joystick.scm @@ -0,0 +1,14 @@ +(define x-max 128) +(define y-max 32) + +(define (clamp x a b) + (cond ((> x b) b) ((< x a) a) (else x))) + +(let loop ((x (/ x-max 2)) (y (/ y-max 2)) (r 3)) + (c12832-cls) + (c12832-fillcircle x y r 1) + (mbed-wait-ms 50) + (loop + (clamp (cond ((joystick-active? 2) (- x 1)) ((joystick-active? 3) (+ x 1)) (else x)) 0 x-max) + (clamp (cond ((joystick-active? 0) (- y 1)) ((joystick-active? 1) (+ y 1)) (else y)) 0 y-max) + (if (joystick-active? 4) 10 3))) diff --git a/examples/map.scm b/examples/map.scm new file mode 100644 index 0000000..0789aca --- /dev/null +++ b/examples/map.scm @@ -0,0 +1,10 @@ +(define (map_a func lst) + (let next ((rest lst)) + (if (null? rest) + '() + (cons (func (car rest)) (next (cdr rest)))))) + +(define x (list 1 2 3)) +(display + (map_a (lambda (t) (display t) (+ t 1)) + x)) diff --git a/examples/test.scm b/examples/test.scm new file mode 100644 index 0000000..05967f3 --- /dev/null +++ b/examples/test.scm @@ -0,0 +1,14 @@ +;(display (quote (1 2 3))) + +;; (define (assert x msg) +;; (if (not x) (display (string-append "NG" msg)))) +;; +;; +;; (define (x y . z) (display (cons y z))) +;; (assert (equal? )) + +(define (sum n) (if (= n 0) n (+ n (sum (- n 1))))) +(define (sumi n acc) (if (= n 0) acc (sumi (- n 1) (+ n acc)))) + +(display (sumi 1000000 0)) +(display (sum 1000000)) diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..889f1fc --- /dev/null +++ b/main.cpp @@ -0,0 +1,89 @@ +#include "mbed.h" +#include "ss/ms.h" +#include <map> +#include <cstring> +#include "C12832.h" + +DigitalOut led1(LED1); +Serial pc(USBTX, USBRX); +extern C12832 lcd; + +static void report_error(const std::string &message) { + lcd.locate(0, 0); + lcd.printf("fatal: %s:", message.c_str()); +} + +static void report_progress(const std::string &message) { + lcd.locate(0, 18); + lcd.printf("%s", message.c_str()); +} + +static void eval_string(const std::string &source) { + // Parse S-expressions + report_progress("parsing"); + ms::Value *parsed = ms::sexp_parse_toplevel(source); + if (!parsed) { + report_error("parse error"); + return; + } + + // Compile into bytecode + report_progress("compiling"); + ms::Assembly *assm = ms::compile(parsed); + if (!assm) { + report_error("compile error"); + return; + } + + // Evaluate + report_progress("evaluating"); + ms::Value *rpair = ms::eval(assm, nullptr), + *ret = car(rpair), *rerr = cdr(rpair); + + lcd.cls(); + lcd.locate(0, 9); + if (rerr->type == ms::Value::NIL) + lcd.printf("ret: %s", dump_s(ret).c_str()); + else + lcd.printf("err: %s", + (*static_cast<const std::string *>(rerr->p1)).c_str()); +} + +int main() { +#include "ss/prelude.inc" + std::string prelude(reinterpret_cast<const char *>(prelude_scm), prelude_scm_len); +#include "prelude.mbed.inc" + prelude.append(reinterpret_cast<const char *>(prelude_mbed_scm), prelude_mbed_scm_len); + + pc.baud(9600); + + while (true) { + report_progress("waiting for input"); + + std::string source; + int found_eof = 0; + while (true) { + char c = pc.getc(); + if (c == 'E') + found_eof = 1; + else if (found_eof == 1 && c == 'O') + found_eof++; + else if (found_eof == 2 && c == 'F') { + source.resize(source.size() - 2); + break; + } else + found_eof = 0; + source.push_back(c); + } + std::string concat(prelude); + concat.append(source); + + eval_string(concat); + } + + report_error("unreachable"); + while (true) { + led1 = !led1; + wait(0.5); + } +} diff --git a/mbed-os.lib b/mbed-os.lib new file mode 100644 index 0000000..ef2aac4 --- /dev/null +++ b/mbed-os.lib @@ -0,0 +1 @@ +https://github.com/ARMmbed/mbed-os/#f9ee4e849f8cbd64f1ec5fdd4ad256585a208360 diff --git a/mbed_settings.py b/mbed_settings.py new file mode 100644 index 0000000..29bd6b8 --- /dev/null +++ b/mbed_settings.py @@ -0,0 +1,45 @@ +""" +mbed SDK +Copyright (c) 2016 ARM Limited + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from os.path import join, abspath, dirname + +#ROOT = abspath(join(dirname(__file__), ".")) + +############################################################################## +# Build System Settings +############################################################################## +#BUILD_DIR = abspath(join(ROOT, "build")) + +# ARM +#ARM_PATH = "C:/Program Files/ARM" + +# GCC ARM +GCC_ARM_PATH = "/opt/gcc-arm-none-eabi-7-2017-q4-major/bin" + +# GCC CodeRed +#GCC_CR_PATH = "C:/code_red/RedSuite_4.2.0_349/redsuite/Tools/bin" + +# IAR +#IAR_PATH = "C:/Program Files (x86)/IAR Systems/Embedded Workbench 7.0/arm" + +# Goanna static analyser. Please overload it in private_settings.py +#GOANNA_PATH = "c:/Program Files (x86)/RedLizards/Goanna Central 3.2.3/bin" + +#BUILD_OPTIONS = [] + +# mbed.org username +#MBED_ORG_USER = "" diff --git a/prelude.mbed.scm b/prelude.mbed.scm new file mode 100644 index 0000000..2661d61 --- /dev/null +++ b/prelude.mbed.scm @@ -0,0 +1,13 @@ +(define-cfunc mbed-wait-ms) + +(define-cfunc c12832-cls) +(define-cfunc c12832-print) +(define-cfunc c12832-pixel) +(define-cfunc c12832-fillcircle) + +(define-cfunc joystick-active?) + +(set! display + (lambda (x) + (c12832-print '(0 . 0) (dump-s x)) + x)) diff --git a/ss/.gitignore b/ss/.gitignore new file mode 100644 index 0000000..3cdccba --- /dev/null +++ b/ss/.gitignore @@ -0,0 +1,5 @@ +*.o +*.d +ms +vgcore.* +prelude.inc diff --git a/ss/Makefile b/ss/Makefile new file mode 100644 index 0000000..f369ae1 --- /dev/null +++ b/ss/Makefile @@ -0,0 +1,32 @@ +CXX = g++ +CXXFLAGS = -std=c++03 \ + -O3 -ggdb3 -Wall -Wextra -Wno-parentheses -Wno-unused-parameter \ + -MMD \ + -DMS_DEBUG \ + #-DMS_GC_DEBUG \ + #-DMS_GC_DEBUG_STRESS \ + +TARGET = ms +SRCS = $(wildcard *.cpp) +OBJS = $(filter-out prelude.o, $(SRCS:%.cpp=%.o)) + +%.o: %.cpp + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -MMD -c -o $@ $< + +.PHONY: all clean + +all: $(TARGET) + +$(TARGET): $(OBJS) + $(CXX) $(CXXFLAGS) -o $(TARGET) $(OBJS) + +prelude.inc: prelude.scm + xxd -i $< > $@ +main.o: prelude.inc + +clean: + rm -f ms + rm -f *.o + rm -f *.d + +-include $(SRCS:%.cpp=%.d) diff --git a/ss/builtin.cpp b/ss/builtin.cpp new file mode 100644 index 0000000..410f8b2 --- /dev/null +++ b/ss/builtin.cpp @@ -0,0 +1,349 @@ +#include "ms.h" +#include <list> +#include <map> +#include <algorithm> + +namespace ms { + +static inline bool number_p(Value *v) { + return v->type == Value::INTEGER; +} + +static inline intptr_t number_to_int(Value *v) { + assert(number_p(v)); + return reinterpret_cast<intptr_t>(v->p1); +} + +static Value *builtin_number_p(int argc, Value **argv) { + return unshield().new_bool(number_p(argv[0])); +} + +#define DEFINE_ARITH(name, op) \ + static Value *builtin_##name(int argc, Value **argv) { \ + if (!number_p(argv[0]) || !number_p(argv[1])) \ + return unshield().new_error(#op ": wrong argument type"); \ + return unshield().new_number(number_to_int(argv[0]) op number_to_int(argv[1])); \ + } +#define DEFINE_COMPARE(name, op) \ + static Value *builtin_##name(int argc, Value **argv) { \ + if (!number_p(argv[0]) || !number_p(argv[1])) \ + return unshield().new_error(#op ": wrong argument type"); \ + return unshield().new_bool(number_to_int(argv[0]) op number_to_int(argv[1])); \ + } +DEFINE_ARITH(add, +) +DEFINE_ARITH(sub, -) +DEFINE_ARITH(mul, *) +DEFINE_ARITH(div, /) +DEFINE_COMPARE(eq, ==) +DEFINE_COMPARE(lt, <) +DEFINE_COMPARE(leq, <=) +DEFINE_COMPARE(gt, >) +DEFINE_COMPARE(geq, >=) + +// Lists +static inline bool null_p(Value *v) { + return v->type == Value::NIL; +} + +static inline bool pair_p(Value *v) { + return v->type == Value::PAIR; +} + +static Value *builtin_null_p(int argc, Value **argv) { + return unshield().new_bool(null_p(argv[0])); +} + +static Value *builtin_pair_p(int argc, Value **argv) { + return unshield().new_bool(pair_p(argv[0])); +} + +static Value *builtin_list_p(int argc, Value **argv) { + Value *v = argv[0]; + while (true) { + if (v->type == Value::NIL) + return unshield().new_bool(true); + if (v->type != Value::PAIR) + return unshield().new_bool(false); + v = (Value *)v->p2; + } +} + +static inline bool symbol_p(const Value *v) { + return v->type == Value::SYMBOL; +} + +static Value *builtin_symbol_p(int argc, Value **argv) { + return unshield().new_bool(argv[0]->type == Value::SYMBOL); +} + +static Value *builtin_car(int argc, Value **argv) { + if (!pair_p(argv[0])) + return unshield().new_error("car: pair expected"); + return (Value *)argv[0]->p1; +} + +static Value *builtin_cdr(int argc, Value **argv) { + if (!pair_p(argv[0])) + return unshield().new_error("cdr: pair expected"); + return (Value *)argv[0]->p2; +} + +static Value *builtin_cons(int argc, Value **argv) { + return unshield().new_pair(argv[0], argv[1]); +} + +static Value *builtin_list(int argc, Value **argv) { + return unshield().new_list(argc, argv); +} + +static Value *builtin_length(int argc, Value **argv) { + size_t size = 0; + Value *v = argv[0]; + while (true) { + if (v->type == Value::NIL) + return unshield().new_number(size); + if (v->type != Value::PAIR) + return unshield().new_number(size + 1); // FIXME: error? + size++; + v = (Value *)v->p2; + } +} + +static Value *builtin_eq_p(int argc, Value **argv); + +static Value *builtin_memq(int argc, Value **argv) { + Value *v = argv[1]; + while (true) { + if (v->type == Value::NIL) + return v; + if (v->type != Value::PAIR) + return unshield().new_nil(); // FIXME: error? + + Value *eq_args[2]; + eq_args[0] = argv[0]; + eq_args[1] = (Value *)v->p1; + if (builtin_eq_p(2, eq_args)->test()) + return v; + v = (Value *)v->p2; + } +} + +static Value *builtin_last(int argc, Value **argv) { + Value *v = argv[1]; + if (!pair_p(argv[0])) + return unshield().new_error("last: pair expected"); + while (true) { + if (((Value *)v->p2)->type != Value::PAIR) + return (Value *)v->p1; + v = (Value *)v->p2; + } +} + +static Value *builtin_append(int argc, Value **argv) { + assert(0); +} + +static Value *builtin_set_car(int argc, Value **argv) { + if (!pair_p(argv[0])) + return unshield().new_error("set-car!: pair expected"); + // check, constantnedd + argv[0]->p1 = argv[1]; + return unshield().new_nil(); +} + +static Value *builtin_set_cdr(int argc, Value **argv) { + if (!pair_p(argv[0])) + return unshield().new_error("set-cdr!: pair expected"); + // check, constantnedd + argv[0]->p2 = argv[1]; + return unshield().new_nil(); +} + +// Boolean +static Value *builtin_boolean_p(int argc, Value **argv) { + return unshield().new_bool(argv[0]->type == Value::BOOLEAN); +} + +// String +static inline bool string_p(const Value *v) { + return v->type == Value::STRING; +} + +static Value *builtin_string_p(int argc, Value **argv) { + return unshield().new_bool(argv[0]->type == Value::STRING); +} + +static Value *builtin_string_append(int argc, Value **argv) { + std::string *ret = new std::string(); + for (int i = 0; i < argc; i++) { + const Value *s = argv[i]; + if (!string_p(s)) { + delete ret; + return unshield().new_error("string-append: string expected"); + } + ret->append(*static_cast<const std::string *>(s->p1)); + } + return unshield().new_string(ret); +} + +static Value *builtin_symbol_to_string(int argc, Value **argv) { + const Value *sym = argv[0]; + if (!symbol_p(sym)) + return unshield().new_error("symbol->string: symbol expected"); + return unshield().new_string(*static_cast<const std::string *>(sym->p1)); +} + +static Value *builtin_string_to_symbol(int argc, Value **argv) { + const Value *str = argv[0]; + if (!string_p(str)) + return unshield().new_error("string->symbol: string expected"); + return unshield().new_symbol(*static_cast<const std::string *>(str->p1)); +} + +static Value *builtin_string_to_number(int argc, Value **argv) { + const Value *str = argv[0]; + const std::string &s = *static_cast<const std::string *>(str->p1); + intptr_t i = 0; + for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) { + if (*it >= '0' && *it <= '9') + i = i * 10 + (*it - '0'); + else + return unshield().new_error("string->number: invalid character"); + } + return unshield().new_number(i); +} + +static Value *builtin_number_to_string(int argc, Value **argv) { + const Value *num = argv[0]; + intptr_t i = reinterpret_cast<intptr_t>(num->p1); + if (i == 0) + return unshield().new_string("0"); + std::string *ret = new std::string(); + while (i) { + intptr_t p = i / 10, q = i % 10; + ret->push_back('0' + (char)q); + i = p; + } + std::reverse(ret->begin(), ret->end()); + return unshield().new_string(ret); +} + + +// Function +static Value *builtin_procedure_p(int argc, Value **argv) { + return unshield().new_bool(argv[0]->type == Value::LAMBDA || + argv[0]->type == Value::CFUNC || + argv[0]->type == Value::CALLCC || + argv[0]->type == Value::CONT); +} + + +// Comparison +static Value *builtin_eq_p(int argc, Value **argv) { + if (argv[0] == argv[1]) + return unshield().new_bool(true); + if (argv[0]->type != argv[1]->type) + return unshield().new_bool(false); + + switch (argv[0]->type) { + case Value::BOOLEAN: + return unshield().new_bool(argv[0]->p1 == argv[1]->p1); + case Value::SYMBOL: { + const std::string &s0 = *static_cast<const std::string *>(argv[0]->p1), + &s1 = *static_cast<const std::string *>(argv[1]->p1); + return unshield().new_bool(s0 == s1); + } + case Value::NIL: + return unshield().new_bool(true); + default: + return unshield().new_bool(false); + } +} + +static Value *builtin_equal_p(int argc, Value **argv) { + switch (argv[0]->type) { + case Value::BOOLEAN: + case Value::SYMBOL: + case Value::NIL: + return builtin_eq_p(argc, argv); + default: + // FIXME + return unshield().new_bool(argv[0]->type == argv[1]->type && + argv[0]->p1 == argv[1]->p1 && + argv[0]->p2 == argv[1]->p2); + } +} + + +// Debugging +static Value *builtin_dump(int argc, Value **argv) { + dump(argv[0]); + return argv[0]; +} + +static Value *builtin_dump_s(int argc, Value **argv) { + return unshield().new_string(dump_s(argv[0])); +} + + + +Value *get_cfunc(const std::string &id) { + static std::map<const std::string, Value *> variables; + static Memory::Shield shield(get_memory()); // FIXME + if (variables.empty()) { +#define ADD(name, fname, argc) variables[name] = shield.new_cfunc(fname, argc) + ADD("number?", builtin_number_p, 1); + ADD("+", builtin_add, 2); + ADD("-", builtin_sub, 2); + ADD("*", builtin_mul, 2); + ADD("/", builtin_div, 2); + ADD("=", builtin_eq, 2); + ADD("<", builtin_lt, 2); + ADD("<=", builtin_leq, 2); + ADD(">", builtin_gt, 2); + ADD(">=", builtin_geq, 2); + + ADD("null?", builtin_null_p, 1); + ADD("pair?", builtin_pair_p, 1); + ADD("list?", builtin_list_p, 1); + ADD("symbol?", builtin_symbol_p, 1); + ADD("car", builtin_car, 1); + ADD("cdr", builtin_cdr, 1); + ADD("cons", builtin_cons, 2); + ADD("list", builtin_list, -1); + ADD("length", builtin_length, 1); + ADD("memq", builtin_memq, 2); + ADD("last", builtin_last, 1); + ADD("append", builtin_append, -1); + ADD("set-car!", builtin_set_car, 2); + ADD("set-cdr!", builtin_set_cdr, 2); + + ADD("boolean?", builtin_boolean_p, 1); + // not + + ADD("string?", builtin_string_p, 1); + ADD("string-append", builtin_string_append, -1); + ADD("symbol->string", builtin_symbol_to_string, 1); + ADD("string->symbol", builtin_string_to_symbol, 1); + ADD("string->number", builtin_string_to_number, 1); + ADD("number->string", builtin_number_to_string, 1); + + ADD("procedure?", builtin_procedure_p, 1); + + ADD("eq?", builtin_eq_p, 2); + // neq + ADD("equal?", builtin_equal_p, 2); + + ADD("dump-s", builtin_dump_s, 1); + ADD("display", builtin_dump, 1); +#undef ADD + +#ifdef __MBED_CONFIG_DATA__ + extern void builtin_add_mbed(std::map<const std::string, Value *> &); + builtin_add_mbed(variables); +#endif + } + return variables.at(id); +} + +} diff --git a/ss/compile.cpp b/ss/compile.cpp new file mode 100644 index 0000000..d73a5f8 --- /dev/null +++ b/ss/compile.cpp @@ -0,0 +1,750 @@ +#include "ms.h" +#include <algorithm> + +namespace ms { + +static inline const std::string &unwrap_id(const Value *id) { + return *static_cast<const std::string *>(id->p1); +} + +// Helpers for list; node is either a PAIR or NIL +#define IS_END(node) \ + (node->is_a(Value::NIL)) +#define CHECK_LIST(node) { \ + if (!node->is_a(Value::PAIR)) {\ + compile_error("expected pair"); \ + return -1; \ + } \ +} +#define CHECK_END(node) { \ + if (!IS_END(node)) { \ + compile_error("expected end of token"); \ + return -1; \ + } \ +} +#define CHECK_NOTEND(node) { \ + CHECK_LIST(node); \ + if (IS_END(node)) { \ + compile_error("unexpected end of token"); \ + return -1; \ + } \ +} +#define CHECK(node, t) { \ + CHECK_NOTEND(node); \ + if (!car(node)->is_a(t)) { \ + compile_error("unexpected token"); \ + return -1; \ + } \ +} +#define BEGINS_WITH_KEYWORD(node, s) \ + node->is_a(Value::PAIR) && \ + (car(node)->is_a(Value::SYMBOL) && \ + *static_cast<const std::string *>(car(node)->p1) == s) + +#define CONSUME_KEYWORD(node, s) { \ + CHECK(node, Value::SYMBOL); \ + if (*static_cast<const std::string *>(car(node)->p1) != s) { \ + compile_error("unexpected id: expected " s); \ + return -1; \ + } \ + node = cdr(node); \ +} +#define CONSUME(node) __extension__({ \ + CHECK_LIST(node); \ + const Value *ret = car(node); \ + node = cdr(node); \ + ret; \ +}) +#define CONSUME_ID(node) __extension__({ \ + CHECK(node, Value::SYMBOL); \ + CONSUME(node); \ +}) + +class Compiler { + +public: + Compiler(const Compiler *parent) : + parent(parent), variables(), seq(nullptr), label_next(0), num_args(0), + seq_shield(get_memory()) { + seq = static_cast<Assembly *>(seq_shield.new_object(Value::ASM)); + seq->p1 = new std::vector<Instruction>(); + seq->p2 = nullptr; + } + + int feed_toplevel_sequence(const Value *node) { + while (!IS_END(node)) { + if (feed_toplevel(CONSUME(node)) < 0) + return -1; + if (!IS_END(node)) + emit(Instruction::POP); + } + return 0; + } + + Assembly *result() { + if (finish() < 0) + return nullptr; + return seq; + } + +private: + + int feed_toplevel(const Value *node) { + if (BEGINS_WITH_KEYWORD(node, "define")) + return feed_define(node); + else if (BEGINS_WITH_KEYWORD(node, "define-cfunc")) + return feed_define_cfunc(node); + else if (BEGINS_WITH_KEYWORD(node, "load")) + return -1; // Not supported + else + return feed_exp(node); + } + + int feed_define(const Value *node) { + CONSUME_KEYWORD(node, "define"); + + const Value *id; + if (car(node)->is_a(Value::PAIR)) { + // (define (Id Id* [. Id]) Body) + const Value *id_and_args = CONSUME(node); + id = CONSUME_ID(id_and_args); + emit_lambda(id_and_args, node); + } + else { + // (define Id Exp) + id = CONSUME_ID(node); + if (feed_exp(CONSUME(node)) < 0) + return -1; + CHECK_END(node); + } + int n = bind_variable(id); + // At toplevel, define behaves like set! if id is bound + if (n < 0 && parent) + return -1; + emit(Instruction::SETVAR, id); + emit(Instruction::PUSH, id); + return 0; + } + + int feed_define_cfunc(const Value *node) { + // (define-cfunc Id) + CONSUME_KEYWORD(node, "define-cfunc"); + const Value *id = CONSUME_ID(node); + CHECK_END(node); + + int n = bind_variable(id); + if (n < 0 && parent) + return -1; + emit(Instruction::PUSH, get_cfunc(unwrap_id(id))); + emit(Instruction::SETVAR, id); + emit(Instruction::PUSH, id); + return 0; + } + + int feed_exp(const Value *node) { + switch (node->type) { + case Value::INTEGER: + case Value::BOOLEAN: + case Value::STRING: + case Value::NIL: + emit(Instruction::PUSH, node); + return 0; + case Value::SYMBOL: + // TODO: better way + if (unwrap_id(node) == ":callcc") { + emit(Instruction::PUSH, unshield().new_object(Value::CALLCC)); + return 0; + } + if (check_variable(node) < 0) + return -1; + emit(Instruction::GETVAR, node); + return 0; + default: + if (node->type != Value::PAIR) { + compile_error("expected list"); + return -1; + } + if (BEGINS_WITH_KEYWORD(node, "lambda")) + return feed_lambda(node); + else if (BEGINS_WITH_KEYWORD(node, "quote")) + return feed_quote(node); + else if (BEGINS_WITH_KEYWORD(node, "set!")) + return feed_set(node); + else if (BEGINS_WITH_KEYWORD(node, "let")) + return feed_let(node); + else if (BEGINS_WITH_KEYWORD(node, "let*")) + return feed_letstar(node); + else if (BEGINS_WITH_KEYWORD(node, "letrec")) + return feed_letrec(node); + else if (BEGINS_WITH_KEYWORD(node, "if")) + return feed_if(node); + else if (BEGINS_WITH_KEYWORD(node, "cond")) + return feed_cond(node); + else if (BEGINS_WITH_KEYWORD(node, "and")) + return feed_and(node); + else if (BEGINS_WITH_KEYWORD(node, "or")) + return feed_or(node); + else if (BEGINS_WITH_KEYWORD(node, "begin")) + return feed_begin(node); + else if (BEGINS_WITH_KEYWORD(node, "do")) + return feed_do(node); + else + return feed_apply(node); + } + } + + void emit_lambda(const Value *args, const Value *body) { + emit(Instruction::LAMBDA, unshield().new_pair(args, body)); + } + + int feed_lambda(const Value *node) { + Memory::Shield shield(get_memory()); + + CONSUME_KEYWORD(node, "lambda"); + const Value *args; + CHECK_LIST(node); + if (car(node)->is_a(Value::SYMBOL)) { + // (lambda Id Body) + args = shield.new_pair((Value *)CONSUME_ID(node), shield.new_nil()); + } else { + // (lambda (Id* [Id . Id]) Body) + // check car(node) is a pair or nil + args = CONSUME(node); + } + emit_lambda(args, node); + return 0; + } + + int feed_body(const Value *node) { + if (IS_END(node)) { + emit(Instruction::PUSH, unshield().new_nil()); + return 0; + } + bool found_exp = false; + while (!IS_END(node)) { + const Value *item = CONSUME(node); + if (BEGINS_WITH_KEYWORD(item, "define")) { + if (found_exp) { + compile_error("Define after Exp in Body"); + return -1; + } + if (feed_define(item) < 0) + return -1; + } else { + found_exp = true; + if (feed_exp(item) < 0) + return -1; + } + if (!IS_END(node)) + emit(Instruction::POP); + } + return 0; + } + + int feed_apply(const Value *node) { + if (feed_exp(CONSUME(node)) < 0) + return -1; + + uintptr_t args = 0; + while (!node->is_a(Value::NIL)) { + if (!node->is_a(Value::PAIR)) { + compile_error("unexpected DOT"); + return -1; + } + args++; + if (feed_exp(CONSUME(node)) < 0) + return -1; + } + CHECK_END(node); + emit(Instruction::APPLY, args); + return 0; + } + + int feed_quote(const Value *node) { + CONSUME_KEYWORD(node, "quote"); + const Value *ret = CONSUME(node); + CHECK_END(node); + emit(Instruction::PUSH, ret); + return 0; + } + + int feed_set(const Value *node) { + CONSUME_KEYWORD(node, "set!"); + const Value *id = CONSUME_ID(node); + if (feed_exp(CONSUME(node)) < 0) + return -1; + CHECK_END(node); + if (check_variable(id) < 0) + return -1; + emit(Instruction::DUP); + emit(Instruction::SETVAR, id); + return 0; + } + + int parse_bindings(Memory::Shield &shield, const Value *bindings, Value **pargs, Value **pexprs) { + Value *pair = const_cast<Value *>(CONSUME(bindings)); + Value *bid = const_cast<Value *>(CONSUME(pair)), + *expr = const_cast<Value *>(CONSUME(pair)); + CHECK_END(pair); + + Value *nil = shield.new_nil(); + Value *args = shield.new_pair(bid, nil), + *exprs = shield.new_pair(expr, nil), + *args_prev = args, *expr_prev = exprs; + while (!IS_END(bindings)) { + if (!bindings->is_a(Value::PAIR)) { + compile_error("unexpected DOT in Bindings"); + return -1; + } + pair = const_cast<Value *>(CONSUME(bindings)); + bid = const_cast<Value *>(CONSUME(pair)); + expr = const_cast<Value *>(CONSUME(pair)); + CHECK_END(pair); + args_prev = static_cast<Value *>(args_prev->p2 = shield.new_pair(bid, nil)); + expr_prev = static_cast<Value *>(expr_prev->p2 = shield.new_pair(expr, nil)); + } + *pargs = args; + *pexprs = exprs; + return 0; + } + + int feed_let(const Value *node) { + Memory::Shield shield(get_memory()); + + CONSUME_KEYWORD(node, "let"); + // (let ((a b) (c d)) body) + // => ((lambda (a c) body) b d) + const Value *id = CONSUME(node), + *bindings; + if (id->is_a(Value::SYMBOL)) { + if (bind_variable(id) < 0) + return -1; + bindings = CONSUME(node); + } else { + bindings = id; + id = nullptr; + } + + if (IS_END(bindings)) { + emit_lambda(bindings, node); + if (id) { + emit(Instruction::DUP); + emit(Instruction::SETVAR, id); + } + emit(Instruction::APPLY, (uintptr_t)0); + return 0; + } + + Value *args, *exprs; + if (parse_bindings(shield, bindings, &args, &exprs) < 0) + return -1; + emit_lambda(args, node); + if (id) { + emit(Instruction::DUP); + emit(Instruction::SETVAR, id); + } + uintptr_t argc = 0; + do { + argc++; + if (feed_exp((Value *)exprs->p1) < 0) + return -1; + } while ((exprs = (Value *)exprs->p2)->is_a(Value::PAIR)); + emit(Instruction::APPLY, argc); + return 0; + } + + int feed_letstar(const Value *node) { + Memory::Shield shield(get_memory()); + + CONSUME_KEYWORD(node, "let*"); + // (let* ((a b) (c d)) body) + // => ((lambda (a) ((lambda (c) body) d)) b) + const Value *bindings = CONSUME(node), + *body = node; + + Value *nil = shield.new_nil(); + const Value *args = nil, *exprs = nil; + while (!IS_END(bindings)) { + if (!bindings->is_a(Value::PAIR)) { + compile_error("unexpected DOT in Bindings"); + return -1; + } + const Value *pair = CONSUME(bindings); + args = shield.new_pair(CONSUME(pair), args); + exprs = shield.new_pair(CONSUME(pair), exprs); + CHECK_END(pair); + } + + const Value *lambda = shield.new_symbol("lambda"); + while (!IS_END(args)) { + const Value *la = shield.new_pair(CONSUME(args), nil); + const Value *lm = shield.new_pair(lambda, shield.new_pair(la, body)); + body = shield.new_pair(shield.new_pair(lm, shield.new_pair(CONSUME(exprs), nil)), nil); + } + + CHECK_LIST(body); + return feed_apply(car(body)); + } + + int feed_letrec(const Value *node) { + Memory::Shield shield(get_memory()); + + CONSUME_KEYWORD(node, "letrec"); + // (letrec ((a b) (c d)) body) + // => ((lambda () (define a b) (define c d) body)) + const Value *bindings = CONSUME(node), + *body = node; + + Value *nil = shield.new_nil(); + const Value *args = nil, *exprs = nil; + while (!IS_END(bindings)) { + if (!bindings->is_a(Value::PAIR)) { + compile_error("unexpected DOT in Bindings"); + return -1; + } + const Value *pair = CONSUME(bindings); + args = shield.new_pair(CONSUME(pair), args); + exprs = shield.new_pair(CONSUME(pair), exprs); + CHECK_END(pair); + } + + const Value *define = shield.new_symbol("define"); + while (!IS_END(args)) { + const Value *def = shield.new_pair(define, + shield.new_pair(CONSUME(args), + shield.new_pair(CONSUME(exprs), nil))); + body = shield.new_pair(def, body); + } + + emit_lambda(nil, body); + emit(Instruction::APPLY); + return 0; + } + + int feed_if(const Value *node) { + CONSUME_KEYWORD(node, "if"); + const Value *ccond = CONSUME(node); + const Value *cthen = CONSUME(node); + + if (feed_exp(ccond) < 0) + return -1; + int label_unless = new_label(), + label_end = new_label(); + emit(Instruction::JUMPUNLESS, (uintptr_t)label_unless); + if (feed_exp(cthen) < 0) + return -1; + emit(Instruction::JUMP, (uintptr_t)label_end); + emit(Instruction::NOP, (uintptr_t)label_unless); + if (!IS_END(node)) { + const Value *celse = CONSUME(node); + if (feed_exp(celse) < 0) + return -1; + CHECK_END(node); + } + emit(Instruction::NOP, (uintptr_t)label_end); + return 0; + } + + int feed_cond(const Value *node) { + CONSUME_KEYWORD(node, "cond"); + CHECK_NOTEND(node); + int label_next = new_label(), + label_end = new_label(); + while (!IS_END(node)) { + emit(Instruction::NOP, (uintptr_t)label_next); + + const Value *cl = CONSUME(node); + if (BEGINS_WITH_KEYWORD(cl, "else")) { + CONSUME_KEYWORD(cl, "else"); + if (feed_sequence(cl) < 0) + return -1; + } + else { + if (feed_exp(CONSUME(cl)) < 0) + return -1; + label_next = new_label(); + emit(Instruction::JUMPUNLESS, (uintptr_t)label_next); + if (feed_sequence(cl) < 0) + return -1; + emit(Instruction::JUMP, (uintptr_t)label_end); + } + } + emit(Instruction::NOP, (uintptr_t)label_end); + return 0; + } + + int feed_and(const Value *node) { + CONSUME_KEYWORD(node, "and"); + int label = new_label(); + while (!IS_END(node)) { + if (feed_exp(CONSUME(node)) < 0) + return -1; + if (!IS_END(node)) + emit(Instruction::JUMPUNLESS, (uintptr_t)label); + } + emit(Instruction::NOP, (uintptr_t)label); + return 0; + } + + int feed_or(const Value *node) { + CONSUME_KEYWORD(node, "or"); + int label = new_label(); + while (!IS_END(node)) { + if (feed_exp(CONSUME(node)) < 0) + return -1; + if (!IS_END(node)) + emit(Instruction::JUMPIF, (uintptr_t)label); + } + emit(Instruction::NOP, (uintptr_t)label); + return 0; + } + + int feed_begin(const Value *node) { + CONSUME_KEYWORD(node, "begin"); + return feed_sequence(node); + } + + int feed_do(const Value *node) { + Memory::Shield shield(get_memory()); + + CONSUME_KEYWORD(node, "do"); + // (do + // ((Id1 Def1 Step1) (Id2 Def2 Step2)) + // (Test Exp*) + // Body) + // => + // (x=(lambda (:$ Id1 Id2) + // (if Test (begin Exp*) (begin Body (:$ Step1 Step2)))) + // x Def1 Def2) + const Value *vars = CONSUME(node), + *test_and_exps = CONSUME(node), + *test = CONSUME(test_and_exps), + *exps = test_and_exps, + *body = node; + + std::vector<const Value *> defs; + const Value *nil = shield.new_nil(), + *dollar = shield.new_symbol(":$"), + *lambda_params = nil, *lambda_args = nil; + while (!IS_END(vars)) { + const Value *var = CONSUME(vars), + *id = CONSUME(var), + *def = CONSUME(var), + *step = id; + if (!IS_END(var)) + step = CONSUME(var); + CHECK_END(var); + + lambda_params = shield.new_pair(id, lambda_params); + lambda_args = shield.new_pair(step, lambda_args); + defs.push_back(def); + } + const Value *if_then = + shield.new_pair(shield.new_symbol("begin"), exps); + lambda_args = shield.new_pair(dollar, lambda_args); + const Value *if_else = + shield.new_pair(shield.new_symbol("begin"), + shield.new_pair(shield.new_pair(shield.new_symbol("begin"), body), + shield.new_pair(shield.new_pair(dollar, lambda_args), nil))); + const Value *lambda_body = + shield.new_pair(shield.new_symbol("if"), + shield.new_pair(test, + shield.new_pair(if_then, + shield.new_pair(if_else, nil)))); + + lambda_params = shield.new_pair(dollar, lambda_params); + // ((lambda ...) DUP ...defs) + emit_lambda(lambda_params, shield.new_pair(lambda_body, nil)); + dump(lambda_body); + emit(Instruction::DUP); + for (size_t i = 0; i < defs.size(); i++) + if (feed_exp(defs[i]) < 0) + return -1; + emit(Instruction::APPLY, defs.size() + 1); + + return 0; + } + + int feed_sequence(const Value *node) { + Memory::Shield shield(get_memory()); + + if (IS_END(node)) { + emit(Instruction::PUSH, shield.new_nil()); + return 0; + } + while (!IS_END(node)) { + const Value *q = CONSUME(node); + if (feed_exp(q) < 0) + return -1; + if (!IS_END(node)) + emit(Instruction::POP); + } + return 0; + } + + int feed_arguments(const Value *node) { + while (!IS_END(node)) { + if (node->is_a(Value::SYMBOL)) { + // (arg . varargs) + if (bind_variable(node) < 0) + return -1; + num_args = -num_args - 1; + break; + } + CHECK(node, Value::SYMBOL); + if (bind_variable(CONSUME_ID(node)) < 0) + return -1; + num_args++; + } + return 0; + } + + void compile_error(const std::string &msg) const { + std::cerr << "compile: " << msg << "\n"; + } + + void compile_error(const std::string &msg, const std::string &msg2) const { + std::cerr << "compile: " << msg << msg2 << "\n"; + } + + int bind_variable(const Value *id) { + const std::string &str = unwrap_id(id); + const std::vector<std::string>::const_reverse_iterator it + = std::find(variables.rbegin(), variables.rend(), str); + if (it == variables.rend()) { + int n = variables.size(); + variables.push_back(str); + return n; + } + return -1; + } + + int check_variable(const Value *id) const { + const std::string &str = unwrap_id(id); + const std::vector<std::string>::const_reverse_iterator it + = std::find(variables.rbegin(), variables.rend(), str); + if (it == variables.rend()) { + if (!parent) { + compile_error("undefined variable reference: ", str); + return -1; + } + return parent->check_variable(id); + } + return 0; + } + + void emit(Instruction::Type op, uintptr_t arg) { + Instruction i(op, arg); + seq->seq()->push_back(i); + } + + void emit(Instruction::Type op, const Value *arg) { + emit(op, (uintptr_t)arg); + if (arg) // Conservative + seq_shield.add((Value *)arg); + } + + void emit(Instruction::Type op) { + emit(op, (uintptr_t)0); + } + + int new_label() { + return label_next++; + } + + int finish() { + std::vector<size_t> labels(label_next); + bool *tailcallable = (bool *)std::malloc(seq->seq()->size() * sizeof(bool)); + if (!tailcallable) + return -1; + std::fill(tailcallable, tailcallable + sizeof(bool) * seq->seq()->size(), false); + int ret = -1; + + for (size_t pos = 0; pos < seq->seq()->size(); pos++) { + Instruction *i = &seq->seq()->at(seq->seq()->size() - pos - 1); + switch (i->type) { + case Instruction::LAMBDA: { + const Value *memo = (const Value *)i->arg; + Compiler sub(this); + if (sub.feed_arguments((const Value *)memo->p1) < 0) + goto out; + if (sub.feed_body((const Value *)memo->p2) < 0) + goto out; + Assembly *assm = sub.result(); + if (!assm) + goto out; + i->arg = (uintptr_t)assm; + break; + } + case Instruction::NOP: { + tailcallable[pos] = (pos == 0 || tailcallable[pos - 1]); + + labels[i->arg] = pos; + break; + } + case Instruction::JUMP: { + tailcallable[pos] = (pos == 0 || tailcallable[pos - labels[i->arg]]); + } /* fall through */ + case Instruction::JUMPIF: + case Instruction::JUMPUNLESS: { + // NOP always comes after JUMP* + i->arg = pos - labels[i->arg]; + break; + } + case Instruction::GETVAR: + case Instruction::SETVAR: { + const std::string &str = unwrap_id(reinterpret_cast<const Value *>(i->arg)); + uintptr_t offset = 0; + const Compiler *cmpl = this; + while (cmpl) { + const std::vector<std::string>::const_iterator it + = std::find(cmpl->variables.begin(), cmpl->variables.end(), str); + if (it == cmpl->variables.end()) { + offset += cmpl->variables.size(); + cmpl = cmpl->parent; + continue; + } + i->arg = offset + std::distance(cmpl->variables.begin(), it); + break; + } + break; + } + case Instruction::RET: { + tailcallable[pos] = true; + break; + } + case Instruction::APPLY: { + if (pos == 0 || tailcallable[pos - 1]) + i->type = Instruction::APPLY_TAILCALL; + break; + } + default: + break; + } + } + + emit(Instruction::RET, (uintptr_t)0); + seq->set_num_variables(variables.size(), num_args); + ret = 0; +out: + std::free(tailcallable); + return ret; + } + + const Value *toplevel; + const Compiler *parent; + std::vector<std::string> variables; + Assembly *seq; + int label_next; + int num_args; + Memory::Shield seq_shield; +}; + +Assembly *compile(const Value *node) { + Compiler compiler(nullptr); + if (compiler.feed_toplevel_sequence(node) < 0) + return nullptr; + return compiler.result(); +} + +} diff --git a/ss/eval.cpp b/ss/eval.cpp new file mode 100644 index 0000000..85ec044 --- /dev/null +++ b/ss/eval.cpp @@ -0,0 +1,344 @@ +#include "ms.h" +#include <list> +#include <map> +#include <cstring> +#include <sstream> +#include <cstdlib> + +/* + * Stack frame layout: + * 0 | oldpc + * 1 | oldasm + * 2 | oldenv + * 3 | oldbp // for debug + * 4 V ... + */ + +namespace ms { + +static Value *new_lambda(Assembly *assm, Value *parent_env) { + Value *v = unshield().new_object(Value::LAMBDA); + v->p1 = assm; + v->p2 = parent_env; + return v; +} + +static Value *new_environment(Value *parent, size_t vsize) { + std::vector<Value *> *vec = new std::vector<Value *>(vsize); + std::fill(vec->begin(), vec->end(), (Value *)0); + + Value *v = get_memory()->alloc(Value::ENVIRON); + v->p1 = parent; + v->p2 = vec; + return v; +} + +void environ_mark(Memory &mem, const Value *v) { + if (v->p1) + mem.mark_object(static_cast<Value *>(v->p1)); + const std::vector<Value *> *vec = static_cast<const std::vector<Value *> *>(v->p2); + for (std::vector<Value *>::const_iterator i = vec->begin(); i != vec->end(); i++) { + if (*i) + mem.mark_object(*i); + } +} + +void environ_free(Value *v) { + delete static_cast<std::vector<Value *> *>(v->p2); +} + +void asm_mark(Memory &mem, const Assembly *v) { + for (std::vector<Instruction>::const_iterator i = v->seq()->begin(); + i != v->seq()->end(); i++) { + switch (i->type) { + case Instruction::PUSH: + mem.mark_object(reinterpret_cast<const Value *>(i->arg)); + break; + case Instruction::LAMBDA: + mem.mark_object(reinterpret_cast<const Assembly *>(i->arg)); + break; + default: + break; + } + } +} + +void asm_free(Assembly *v) { + delete v->seq(); +} + +void Continuation::mark(Memory &mem) const { + size_t stop = sp(); + while (stop--) + mem.mark_object(copied_stack()[stop]); +} + +void Continuation::free() { + std::free(p1); // copied_stack +} + +static Continuation *new_continuation(Value **copied_stack, size_t sp) { + Value *v = unshield().new_object(Value::CONT); + v->p1 = copied_stack; + v->p2 = reinterpret_cast<void *>(sp); + return static_cast<Continuation *>(v); +} + +static bool check_argc(int expected, int given) { + return expected == given || expected < 0 && given >= -expected - 1; +} + +static Value *argument_error(int expected, int given) { + std::ostringstream sstream; + sstream << "wrong number of arguments: expected " + << expected << ", given " << given; + return unshield().new_error(sstream.str()); +} + + +class VM { +private: +#define stack_push(v) do { \ + Value *vv = (v); \ + if (sp == stack_depth) \ + return unshield().new_pair(unshield().new_nil(), unshield().new_error("stack overflow")); \ + stack[sp++] = vv; \ +} while (0) +#define stack_pop() \ + stack[--sp] +#define stack_popn(n) \ + (sp -= n) + + static inline std::vector<Value *> *unwrap_env(Value *v) { + return static_cast<std::vector<Value *> *>(v->p2); + } + + static inline Value **resolve_var(Value *env, size_t idx) { + while (idx >= unwrap_env(env)->size()) { + idx -= unwrap_env(env)->size(); + env = static_cast<Value *>(env->p1); + } + return &unwrap_env(env)->at(idx); + } + +public: + VM(const Assembly *assm, Value *parent_env, int stack_depth) : + stack_depth(stack_depth), + toplevel_assm(assm), env(parent_env), sp(0) { + stack = (Value **)std::malloc(sizeof(Value *) * stack_depth); + assert(stack); + } + + ~VM() { + std::free(stack); + } + + Value *execute() { + size_t pc = 0, bp = 0; + const Assembly *assm = toplevel_assm; + env = new_environment(env, assm->num_variables()); + +#ifdef THREADED_CODE +# error FIXME +#else +# define CASE(n) case Instruction::n: +# define NEXT break +#endif + + // FIXME: threaded + while (pc < assm->seq()->size()) { + const Instruction *i = &assm->seq()->at(pc++); + //std::cout << "ins:" << pc-1 << ",itype:" << i->type << "\n"; + switch (i->type) { + CASE(NOP) + NEXT; + CASE(PUSH) + stack_push((Value *)i->arg); + NEXT; + CASE(POP) + (void)stack_pop(); + NEXT; + CASE(DUP) { + Value *v = stack_pop(); + stack_push(v); + stack_push(v); + NEXT; + } + CASE(GETVAR) + stack_push(*resolve_var(env, i->arg)); + NEXT; + CASE(SETVAR) + *resolve_var(env, i->arg) = stack_pop(); + NEXT; + CASE(LAMBDA) + stack_push(new_lambda(reinterpret_cast<Assembly *>(i->arg), env)); + NEXT; + CASE(APPLY) + CASE(APPLY_TAILCALL) +apply_again: { + Memory::Shield shield(get_memory()); + int argc = i->arg; + Value **args = &stack[sp - argc]; + Value *callable = stack[sp - argc - 1]; + // std::cout << "CALLING:"; + // dump(callable); + // std::cout << "\nARGS:" << i->arg << "\n"; + // for (int ik = 0; ik < i->arg; ik++) + // dump(args[ik]); + // std::cout << "\nEND\n"; + // std::cout << "bp="<<bp<<",sp="<<sp<<"\n"; + if (callable->type == Value::LAMBDA) { + shield.add(callable); + + const Assembly *casm = static_cast<const Assembly *>(callable->p1); + if (!check_argc(casm->num_args(), argc)) + return shield.new_pair(shield.new_nil(), + argument_error(casm->num_args(), argc)); + + if (casm->num_args() < 0) { + args[-casm->num_args() - 1] = + shield.new_list(argc - (-casm->num_args() - 1), + &args[-casm->num_args() - 1]); + argc = -casm->num_args(); + } + Value *new_env = shield.add(new_environment(static_cast<Value *>(callable->p2), + casm->num_variables())); + if (argc > 0) + std::memcpy(&unwrap_env(new_env)->at(0), + args, argc * sizeof(Value *)); + stack_popn(i->arg + 1); + + if (bp == 0 || i->type != Instruction::APPLY_TAILCALL) { + // Non-tail call && non-toplevel call + stack_push(shield.new_number(pc)); + stack_push(const_cast<Assembly *>(assm)); + stack_push(env); + stack_push(shield.new_number(bp)); + } + pc = 0; + assm = casm; + env = new_env; + bp = sp; + } else if (callable->type == Value::CFUNC) { + int expected_argc = (intptr_t)callable->p2; + if (!check_argc(expected_argc, argc)) + return shield.new_pair(shield.new_nil(), argument_error(expected_argc, argc)); + + Value *(*func)(int, Value **) = + reinterpret_cast<Value *(*)(int, Value **)>(callable->p1); + Value *ret = func(i->arg, args); + shield.add(ret); + if (ret->is_a(Value::ERROR)) + return shield.new_pair(shield.new_nil(), ret); + stack_popn(argc + 1); + stack_push(ret); + } else if (callable->type == Value::CALLCC) { + if (!check_argc(1, argc)) + return shield.new_pair(shield.new_nil(), argument_error(1, argc)); + + Value *called = shield.add(stack_pop()); // arg for call/cc + (void)shield.add(stack_pop()); // call/cc + + Value **copied_stack = (Value **)std::malloc(sizeof(Value *) * stack_depth); + assert(copied_stack); + std::memcpy(copied_stack, stack, stack_depth * sizeof(Value *)); + copied_stack[sp + 0] = shield.new_number(pc); + copied_stack[sp + 1] = const_cast<Assembly *>(assm); + copied_stack[sp + 2] = env; + copied_stack[sp + 3] = shield.new_number(bp); + // std::cout << "cont saved! bp=" << bp << ",sp=" << sp << ": \n"; + Value *cont = shield.add(new_continuation(copied_stack, sp + 4)); + + stack_push(called); + stack_push(cont); + goto apply_again; + } else if (callable->type == Value::CONT) { + if (!check_argc(1, argc)) + return shield.new_pair(shield.new_nil(), argument_error(1, argc)); + // std::cout << "cont! bp=" << bp << ",sp=" << sp << ": \n"; + Value *arg = shield.add(stack_pop()); // arg for cont + + const Continuation *cont = static_cast<Continuation *>(callable); + std::memcpy(stack, cont->copied_stack(), stack_depth * sizeof(Value *)); + sp = cont->sp(); + + bp = reinterpret_cast<size_t>(stack_pop()->p1); + env = stack_pop(); + assm = static_cast<const Assembly *>(stack_pop()); + pc = reinterpret_cast<size_t>(stack_pop()->p1); + + // std::cout << "cont restored! bp=" << bp << ",sp=" << sp << ": \n"; + stack_push(arg); + } else + return shield.new_pair(shield.new_nil(), shield.new_error("invalid application")); + NEXT; + } + CASE(RET) { + // std::cout << "ret! bp=" << bp << ",sp=" << sp << ": "; + Value *ret = stack_pop(); + // dump(ret); + // std::cout << "\n"; + assert(bp == sp); + if (sp != 0) { + bp = reinterpret_cast<size_t>(stack_pop()->p1); + env = stack_pop(); + assm = static_cast<const Assembly *>(stack_pop()); + pc = reinterpret_cast<size_t>(stack_pop()->p1); + } + + stack_push(ret); + NEXT; + } + CASE(JUMP) + pc += i->arg; + NEXT; + CASE(JUMPUNLESS) + if (!stack_pop()->test()) + pc += i->arg; + NEXT; + CASE(JUMPIF) + if (stack_pop()->test()) + pc += i->arg; + NEXT; + } + } + // Toplevel + assert(bp == 0 && sp == 1); + { + Memory::Shield shield(get_memory()); + return shield.new_pair(shield.add(stack_pop()), shield.new_nil()); + } + } + + size_t stack_depth; + Value **stack; + const Assembly *toplevel_assm; + Value *env; + size_t sp; +}; + +void vm_mark_objects(Memory &mem, const VM *vm) { + if (!vm) + return; + size_t stop = vm->sp; + while (stop--) + mem.mark_object(vm->stack[stop]); + if (vm->env) + mem.mark_object(vm->env); + mem.mark_object(vm->toplevel_assm); +} + +Value *eval(const Assembly *assm, Value *env) { + Memory::Shield shield(get_memory()); + Value *wrapper = shield.new_object(Value::VM); + wrapper->p1 = wrapper->p2 = nullptr; + + VM vm(assm, env, 1024); + wrapper->p1 = &vm; + + Value *ret = vm.execute(); + wrapper->p1 = nullptr; + return ret; +} + +} diff --git a/ss/main.cpp b/ss/main.cpp new file mode 100644 index 0000000..5bb7967 --- /dev/null +++ b/ss/main.cpp @@ -0,0 +1,81 @@ +#include "ms.h" +#include <sstream> +#include <fstream> + +#ifdef __linux__ +static void report_error(const std::string &message) { + std::cerr << "fatal: " << message << "\n"; +} + +static void eval_string(const std::string &source) { + ms::Memory::Shield shield(ms::get_memory()); + + // Parse S-expressions + ms::Value *parsed = ms::sexp_parse_toplevel(source); + if (!parsed) { + report_error("parse error"); + return; + } + shield.add(parsed); + std::cout << "Parsed:\n"; + dump(parsed); + + // Compile into bytecode + ms::Assembly *assm = ms::compile(parsed); + if (!assm) { + report_error("compile error"); + return; + } + shield.add(assm); + std::cout << "Compiled:\n"; + dump(assm); + + // Evaluate + std::cout << "Evaluating:\n"; + ms::Value *rpair = ms::eval(assm, nullptr), + *ret = car(rpair), *rerr = cdr(rpair); + shield.add(rpair); + + if (rerr->type == ms::Value::NIL) + std::cout << "Return: " << dump_s(ret) << "\n"; + else + std::cout << "Runtime error: " << dump_s(rerr) << "\n"; +} + +int main(int argc, char **argv) +{ +#include "prelude.inc" + std::string prelude(reinterpret_cast<const char *>(prelude_scm), prelude_scm_len); + + if (argc == 2) { + std::ifstream t(argv[1]); + std::stringstream buffer; + buffer << prelude; + buffer << t.rdbuf(); + eval_string(buffer.str()); + return 0; + } + + while (true) { + std::string source; + int found_eof = 0; + while (true) { + char c = std::cin.get(); + if (c == 'E') + found_eof = 1; + else if (found_eof == 1 && c == 'O') + found_eof++; + else if (found_eof == 2 && c == 'F') { + source.resize(source.size() - 2); + break; + } else + found_eof = 0; + source.push_back(c); + } + std::string concat(prelude); + concat.append(source); + + eval_string(concat); + } +} +#endif diff --git a/ss/memory.cpp b/ss/memory.cpp new file mode 100644 index 0000000..9972f0a --- /dev/null +++ b/ss/memory.cpp @@ -0,0 +1,204 @@ +#include "ms.h" + +namespace ms { + +Memory *get_memory() { + static Memory memory(1024); + return &memory; +} + +Memory::Block::Block(size_t size) { + slots = new Value[size]; + bitmap = new bool[size]; + for (size_t i = 0; i < size; i++) { + slots[i].type = Value::FREE; + slots[i].p1 = slots + i + 1; + } + slots[size - 1].p1 = nullptr; +} + +Memory::Block::~Block() { + delete bitmap; + delete slots; +} + +Memory::Memory(size_t block_size) : block_size(block_size), blocks(), roots() { + Memory::Block *block = new Memory::Block(block_size); + blocks.push_back(block); + freelist = &block->slots[0]; +} + +Value *Memory::alloc(Value::Type type) { +#if MS_GC_DEBUG_STRESS + run_gc(); +#endif + if (!freelist) { +#if MS_GC_DEBUG + std::cerr << "Memory::alloc(): no free object, triggering GC\n"; +#endif + run_gc(); + if (!freelist) { +#if MS_GC_DEBUG + std::cerr << "Memory::alloc(): still no free object, " + "allocating a new block\n"; +#endif + Memory::Block *block = new Memory::Block(block_size); + blocks.push_back(block); + freelist = &block->slots[0]; + } + } + Value *obj = freelist; + freelist = static_cast<Value *>(freelist->p1); + + obj->type = type; + return obj; +} + +void Memory::mark_object(const Value *v) { + Memory::Block *block = nullptr; + for (std::vector<Memory::Block *>::iterator i = blocks.begin(); + i != blocks.end(); i++) { + if (v >= (*i)->slots && v < (*i)->slots + block_size) { + block = *i; + break; + } + } + assert(block); // Invalid pointer + + size_t pos = v - block->slots; + if (block->bitmap[pos]) + return; + block->bitmap[pos] = true; + + switch (v->type) { + case Value::BOOLEAN: + break; + case Value::NIL: + break; + case Value::PAIR: + mark_object(static_cast<Value *>(v->p1)); + if (v->p2) + mark_object(static_cast<Value *>(v->p2)); + break; + case Value::INTEGER: + break; + case Value::STRING: + case Value::SYMBOL: + case Value::ERROR: + break; + case Value::LAMBDA: + mark_object(static_cast<Assembly *>(v->p1)); + mark_object(static_cast<Value *>(v->p2)); + break; + case Value::CFUNC: + break; + case Value::CALLCC: + break; + case Value::CONT: + static_cast<const Continuation *>(v)->mark(*this); + break; + case Value::ENVIRON: + environ_mark(*this, v); + break; + case Value::VM: + vm_mark_objects(*this, static_cast<VM *>(v->p1)); + break; + case Value::ASM: + asm_mark(*this, static_cast<const Assembly *>(v)); + break; + case Value::SHIELD: { + const std::vector<Value *> *vec = + static_cast<const std::vector<Value *> *>(v->p1); + if (vec) { + for (size_t i = 0; i < vec->size(); i++) + mark_object(vec->at(i)); + } + break; + } + case Value::FREE: + assert(0); + break; + } +} + +void Memory::free_object(Value *v) { + switch (v->type) { + case Value::BOOLEAN: + break; + case Value::NIL: + break; + case Value::PAIR: + break; + case Value::STRING: + case Value::SYMBOL: + case Value::ERROR: + delete static_cast<std::string *>(v->p1); + break; + case Value::INTEGER: + break; + case Value::LAMBDA: + break; + case Value::CFUNC: + break; + case Value::CALLCC: + break; + case Value::CONT: + static_cast<Continuation *>(v)->free(); + break; + case Value::ENVIRON: + environ_free(v); + break; + case Value::VM: + // Is a wrapper object + break; + case Value::ASM: + asm_free(static_cast<Assembly *>(v)); + break; + case Value::SHIELD: + assert(!v->p1); + break; + case Value::FREE: + assert(0); + break; + } +} + +void Memory::run_gc() { +#if MS_GC_DEBUG + std::cerr << "Memory::run_gc(): " << blocks.size() << " blocks, " + << roots.size() << " root objects\n"; + size_t live = 0, freed = 0; +#endif + for (std::vector<Memory::Block *>::iterator i = blocks.begin(); + i != blocks.end(); i++) + std::fill((*i)->bitmap, (*i)->bitmap + sizeof(bool) * block_size, false); + + for (std::list<Value *>::iterator i = roots.begin(); + i != roots.end(); i++) + mark_object(*i); + + for (std::vector<Memory::Block *>::iterator it = blocks.begin(); + it != blocks.end(); it++) { + for (size_t i = 0; i < block_size; i++) { + if (!(*it)->bitmap[i]) { + Value *obj = &(*it)->slots[i]; + if (obj->type == Value::FREE) + continue; + free_object(obj); + obj->type = Value::FREE; + obj->p1 = freelist; + freelist = obj; +#ifdef MS_GC_DEBUG + freed++; + } else { + live++; +#endif + } + } + } +#if MS_GC_DEBUG + std::cerr << "Memory::run_gc(): " << live << " reachable, " << freed << " freed\n"; +#endif +} + +} diff --git a/ss/mini-scheme.txt b/ss/mini-scheme.txt new file mode 100644 index 0000000..659ed59 --- /dev/null +++ b/ss/mini-scheme.txt @@ -0,0 +1,278 @@ +Return-Path: <takuo@c.titech.ac.jp> +X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on walnut.rhe.jp +X-Spam-Level: +X-Spam-Status: No, score=-4.2 required=5.0 tests=BAYES_00,RCVD_IN_DNSWL_MED, + RP_MATCHES_RCVD,URIBL_BLOCKED autolearn=ham autolearn_force=no version=3.4.1 +Delivered-To: k@rhe.jp +Received: from walnut.rhe.jp by walnut.rhe.jp with LMTP id + oPojBVyLzFoBKQAAAEchFg for <k@rhe.jp>; Tue, 10 Apr 2018 10:01:00 +0000 +Received: from mail001.nap.gsic.titech.ac.jp (mail001.nap.gsic.titech.ac.jp + [131.112.13.101]) by walnut.rhe.jp (Postfix) with SMTP id EEF12D4ED3 for + <k@rhe.jp>; Tue, 10 Apr 2018 10:00:59 +0000 (UTC) +Received: By OpenMail Mailer;Tue, 10 Apr 2018 19:00:59 +0900 (JST) +X-CM-AUTO-FORWARD: Yes +X-M2K-MDT-ROLE: yamaguchi.k.aw@m.titech.ac.jp +X-M2K-MDT-ROLE-GRP: 0 +Received: from 172.22.40.199 by mail001.nap.gsic.titech.ac.jp with Mail2000 + ESMTP Server V7.00(24593:0:AUTH_RELAY) (envelope-from + <takuo@c.titech.ac.jp>); Tue, 10 Apr 2018 19:00:59 +0900 (JST) +Received: from [131.112.13.104] (mail004.nap.gsic.titech.ac.jp) by + drweb02.nap.gsic.titech.ac.jp (Dr.Web MailD 6.0.2.8) with SMTP id 009E7906 + for yamaguchi.k.aw@m.titech.ac.jp; Tue, 10 Apr 2018 19:00:59 +Received: from 131.112.13.92 by mail004.nap.gsic.titech.ac.jp with Mail2000 + ESMTP Server V7.00(32578:0:AUTH_RELAY) (envelope-from + <takuo@c.titech.ac.jp>); Tue, 10 Apr 2018 19:00:57 +0900 (JST) +Received: from localhost (localhost [127.0.0.1]) by + filter002.nap.gsic.titech.ac.jp (deepsmtpd.s) with ESMTP id + <5B45593B-6EB8-4A7E-A3B1-0602FC701CE9@c.titech.ac.jp> for + <yamaguchi.k.aw@m.titech.ac.jp>; Tue, 10 Apr 2018 19:00:56 +0900 +Received: from mail004.nap.gsic.titech.ac.jp (131.112.13.104 + [131.112.13.104]) by filter002.nap.gsic.titech.ac.jp (deepsmtpd.s) with + ESMTP id <5B45593B-6EB8-4A7E-A3B1-0602FC701CE9@c.titech.ac.jp> for + <announce@psg.c.titech.ac.jp>; Tue, 10 Apr 2018 19:00:47 +0900 +Received: from 172.22.40.201 by mail004.nap.gsic.titech.ac.jp with Mail2000 + ESMTP Server V7.00(32585:0:AUTH_RELAY) (envelope-from + <takuo@c.titech.ac.jp>); Tue, 10 Apr 2018 19:00:47 +0900 (JST) +Received: from [131.112.13.104] (mail004.nap.gsic.titech.ac.jp) by + drweb04.nap.gsic.titech.ac.jp (Dr.Web MailD 6.0.2.8) with SMTP id 009E83F9 + for announce@psg.c.titech.ac.jp; Tue, 10 Apr 2018 19:00:47 +Received: from 131.112.16.43 by mail004.nap.gsic.titech.ac.jp with Mail2000 + ESMTPA Server V7.00(32578:0:AUTH_LOGIN) (envelope-from + <takuo@c.titech.ac.jp>); Tue, 10 Apr 2018 19:00:47 +0900 (JST) +From: Takuo Watanabe <takuo@c.titech.ac.jp> +Subject: Re: Programming Project for Newcomers (2018) +Date: Tue, 10 Apr 2018 19:00:47 +0900 +References: <C880F20A-F75F-4FE7-9EBB-08B9627D80A2@c.titech.ac.jp> +To: announce@psg.c.titech.ac.jp +In-Reply-To: <C880F20A-F75F-4FE7-9EBB-08B9627D80A2@c.titech.ac.jp> +Message-Id: <5B45593B-6EB8-4A7E-A3B1-0602FC701CE9@c.titech.ac.jp> +X-Mailer: Apple Mail (2.3445.6.18) +X-FROM-DOMAIN: c.titech.ac.jp +X-FROM-EMAIL: takuo@c.titech.ac.jp +X-titech-rate: 0.00% +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +Content-Type: text/plain; charset=utf-8 + +おおよそ1ヶ月程度で作成したインタプリタのデモを行ってもらいます. +その際,以下の質問に答えられるようにしてください. + +* 作成した処理系は要求をみたしているか +* 作成した処理系の特徴 + - どのようなことができるか + - 他の一般的なScheme処理系との違いは何か +* 技術的に解決すべきことは何で,その課題をどのようにして解決したか +* その他実装上工夫した点は何か +* 実装に要した時間はどのくらいか +* 実装上参考にした文献,webサイト,ソフトウェア,言語処理系等は何か + + +> On Apr 10, 2018, at 18:51, Takuo Watanabe <takuo@c.titech.ac.jp> wrote: +> +> 例年新4年生むけに毎年行っているプログラミング課題の案内です. +> 言語処理系・関数プログラミング・記号処理・システムプログラミングに慣れ親しむのが目的です. +> M1以上の人が参加してもかまいません. +> +> コースA: Schemeで実装する +> (a) Schemeサブセット(Mini-Scheme)の機能全部を実装すること. +> (b) 実装したMini-Scheme自身で実装した処理系を動作させることができるようにすること +> (c) [オプション] インタプリタ上のプログラムを ctrl-C で停止できるようにする.その際インタプリタ自身は停止しないようにすること. +> (c) [オプション] DのCommon-Lisp風マクロの代わりに衛生的マクロ機構を実装する +> (d) [オプション] call/cc, 限定継続, unwind-protect, catch-throw のいずれかを実装する +> (ヒント:call/ccのある言語でcal/ccを実装するのは比較的簡単) +> +> コースB: Scheme以外の言語で実装する +> (a) 使用する言語は静的型検査を行う言語であること. +> (b) Schemeサブセット(Mini-Scheme)の機能全部を実装すること.ただしTCOはオプションとする. +> (c) ホスト言語(実装するための言語)の関数やオブジェクトをMini-Scheme内から利用できるようにすること +> (d) [オプション] ホスト言語側のプログラムからMini-Schemeの関数を呼び出せるようにすること +> (e) [オプション] インタプリタ上のプログラムを ctrl-C で停止できるようにする.その際インタプリタ自身は停止しないようにすること. +> (f) [オプション] その他面白そうな言語機構 +> 例:アクターモデル風の並行計算機構,リアクティブプログラミング,文脈志向プログラミング,etc. +> +> コースC: マイコンボード(mbed FRDM-K64F)で動作させる. +> (a) Mini-Schemeの仕様からTCOとマクロは省いてよい +> (b) GCを実装すること +> (c) mbedアプリケーションシールド,あるいは他のセンサなどのペリフェラル(一部でよい)を使えるようにすること +> * 必要な部品や工具は用意する +> * ホスト側である程度前処理して(例えばC/C++や仮想機械語にコンパイルして), +> その出力をmbedで動作させてもよい.その場合ホスト側はScheme等で実装してもかまわない. +> * 使用するボードの情報 +> - FRDM-K64F: https://os.mbed.com/platforms/FRDM-K64F/ +> - mbedアプリケーションシールド: https://os.mbed.com/components/mbed-Application-Shield/ +> * (参考)Cによる小規模な実装の例 +> - https://github.com/wtakuo/tscheme +> +> いずれのコースとも,以下の要件をみたすこと. +> ・構文に合致しない入力はエラーとすること +> ・実行時エラーによって処理系自体が停止してしまわないようにすること(できる範囲で). +> +> 以上です.期間約1ヶ月程度とします.完成したものはゼミの時間にデモをしてもらいます. +> わからないことは遠慮なく私や先輩にきいてください. +> +> +> 実装するSchemeサブセット (Mini-Scheme) +> +> A. 構文 +> +> 非終端記号: +> 大文字[A-Z]で始まる名前 +> +> 終端記号: +> 開き括弧 ( +> 閉じ括弧 ) +> ピリオド . +> 小文字[a-z]で構成される名前 +> シングルクオート ' で囲まれた文字列 +> +> X* 要素Xの0個以上のくりかえし +> X+ 要素Xの1個以上のくりかえし +> [X] 要素Xは高々1個 +> +> Toplevel ::= Exp +> | Define +> | (load String) ファイルの内容を読み込む +> +> Define ::= (define Id Exp) +> | (define (Id Id* [. Id]) Body) +> +> Exp ::= Const 定数 +> | Id 変数 +> | (lambda Arg Body) λ抽象 +> | (Exp Exp*) 関数適用 +> | (quote S-Exp) クオート (注1) +> | (set! Id Exp) 代入 +> | (let [Id] Bindings Body) let +> | (let* Bindings Body) let* (注4) +> | (letrec Bindings Body) letrec +> | (if Exp Exp [Exp]) 条件式(if) +> | (cond (Exp Exp+)* [(else Exp+)]) 条件式(cond) (注5) +> | (and Exp*) 条件式(and) +> | (or Exp*) 条件式(or) +> | (begin Exp*) 逐次実行 +> | (do ((Id Exp Exp)*) (Exp Exp*) Body) 繰り返し +> +> Body ::= Define* Exp+ +> +> Arg ::= Id +> | (Id* [Id . Id]) +> +> Bindings ::= ((Id Exp)*) +> +> S-Exp ::= Const +> | Id +> | (S-Exp* [S-Exp . S-Exp]) +> +> Const ::= Num +> | Bool +> | String +> | () +> +> Num: 最低限10進整数はサポートすること +> +> Bool ::= '#t' +> | '#f' +> +> String: ダブルクオートで囲まれた文字列 (注1) +> +> Id ::= [0-9A-Za-z!$%&*+-./<=>?@^_]+ (注3) +> +> (注1) (quote X) だけでなく 'X という記法もサポートすること. +> (注2) バックスラッシュ \ によるエスケープをサポートすること. +> (注3) 数値となるものをのぞくこと. +> (注4) let* は let の0個以上の繰り返しではなく let* という名前である. +> (注5) 節は(else節を含めて)最低1個はあること.else節は高々1個とすること. +> +> B. 関数(最低限) +> +> 整数 +> number?, +, -, *, /, =, <, <=, >, >= +> +> リスト +> null?, pair?, list?, symbol?, +> car, cdr, cons, list, length, memq, last, append, +> set-car!, set-cdr! +> +> ブール値 +> boolean?, not +> +> 文字列 +> string?, string-append, +> symbol->string, string->symbol, string->number, number->string +> +> 関数 +> procedure? +> +> 比較 +> eq?, neq?, equal? +> +> その他 +> load +> +> C. 末尾呼び出しの最適化(TCO) +> 末尾呼び出しの最適化(Tail Call Optimization, TCO)とは,関数本体の +> 末尾の位置(tail position)に関数呼び出しがある場合,コールスタックを +> 消費せずにその呼び出された関数に制御を移すことです.例えば +> +> (define (even? x) +> (if (= x 0) #t (odd? (- x 1)))) +> (define (odd? x) +> (if (= x 1) #t (even? (- x 1)))) +> +> のような関数 even?, odd? の場合,0以上の引数に対しスタックオーバー +> フローを起こさずに計算できなくてはなりません. +> +> D. Common-Lisp風マクロ +> マクロは例えば以下のように定義します. +> +> (define-macro (positive x) +> (list '> x 0)) +> +> プログラム中に (positive e) という式が出てきた場合,これが +> (list '> e 0) の実行結果で置き換えられ(これを「マクロが展開される」 +> といいます),その後で式の評価が行われます.例えば +> (if (positive (+ (g x) 1)) (foo) (bar)) +> という式の場合,まずマクロが展開されて +> (if (> (+ (g x) 1) 0) (foo) (bar)) +> という形の式が得られて,それからこの式が評価されます. +> +> 次に,以下のような関数定義との違いについて説明しておきます. +> +> (define (positive x) +> (> x 0)) +> +> 関数定義の場合は,(positive e) という式を評価する場合は,まず +> eを評価して,その結果の値が関数 positive の引数になります.一方 +> もしこれがマクロなら,e は評価されずに (> e 0) という形の式に +> 展開され,それから展開後の式が評価されます. +> +> 引数を評価しなくてすむので,関数定義と違って以下のような定義も +> 可能です. +> +> (define (let*-expander vars body) +> (if (null? vars) +> (cons 'begin body) +> (list 'let (list (car vars)) (let*-expander (cdr vars) body)))) +> +> (define-macro (let* vars . body) +> (let*-expander vars body)) +> +> これは let を使って let* を定義した例です.このようにマクロを +> 使うことで,さまざまな構文をユーザが自分で定義することができます. +> +> Schemeには define-syntax (let-syntax) というマクロ定義機構がありますが, +> これは展開の前後で変数名がぶつからないようにする衛生的(hygienic)マクロと呼ばれる +> もので,ちょっと実装は難しいと思います.今回実装してもらうのは Common-Lisp +> 風のマクロです.ちなみにCommon-Lispだと上の例は以下のようになります. +> +> (defun let*-expander (vars body) +> (if (null vars) +> (cons 'begin body) +> (list 'let (list (car vars)) (let*-expander (cdr vars) body)))) +> +> (defmacro let* (vars &rest body) +> (let*-expander vars body)) +> +> 以上. +> + @@ -0,0 +1,466 @@ +#ifndef MS_H +#define MS_H + +#include <string> +#include <vector> +#include <list> +#include <stdint.h> // yucks +#include <iostream> +#include <cassert> +#include <sstream> + +#define nullptr 0 + +namespace ms { +struct Value { + enum Type { + BOOLEAN, // {bool} + NIL, // {} + PAIR, // {Value, Value} + SYMBOL, // {std::string} + INTEGER, // {intptr_t} + STRING, // {std::string} + + LAMBDA, // {ASM, ENVIRON} + CFUNC, // {ptr, int(args)} + CALLCC, // {} + CONT, // {ptr(stack)} + + ASM, // {vec, int(vars)} internal + ENVIRON, // {ENVIRON(parent), vec<Value>} internal + VM, // {} internal + ERROR, // {std::string} internal + + SHIELD, // {vec<Value>} internal + FREE, // {ptr(next)} internal + } type; + void *p1, *p2; + + inline bool is_a(Value::Type type) const { + return this->type == type; + } + + inline bool test() const { + return !(type == Value::BOOLEAN && !p1); + } +}; + +struct Instruction { + enum Type { + NOP, + PUSH, + POP, + DUP, + GETVAR, + SETVAR, + LAMBDA, + APPLY, + APPLY_TAILCALL, + RET, + JUMP, + JUMPUNLESS, + JUMPIF, + } type; + + uintptr_t arg; + + Instruction(Instruction::Type op, uintptr_t arg) : + type(op), arg(arg) { + } +}; + +// memory.cc +class Memory { +public: + Memory(size_t block_size); + Value *alloc(Value::Type type); + void run_gc(); + void mark_object(const Value *v); + + struct ObjectFactory { + ObjectFactory(Memory *mem) : mem(mem) { + } + + virtual Value *new_object(Value::Type type) { + Value *ret = mem->alloc(type); + ret->p1 = ret->p2 = nullptr; + return ret; + } + + Value *new_nil() { + return new_object(Value::NIL); + } + + Value *new_bool(bool b) { + Value *ret = new_object(Value::BOOLEAN); + ret->p1 = b ? (void *)-1 : nullptr; // XXX + return ret; + } + + Value *new_number(intptr_t i) { + Value *ret = new_object(Value::INTEGER); + ret->p1 = reinterpret_cast<void *>(i); + return ret; + } + + Value *new_pair(Value *a, Value *b) { + Value *ret = new_object(Value::PAIR); + // assert(a && a->type != Value::FREE); + // assert(!b || b->type != Value::FREE); + ret->p1 = a; + ret->p2 = b; + return ret; + } + + // TODO: REMOVE + const Value *new_pair(const Value *a, const Value *b) { + Value *ret = new_object(Value::PAIR); + ret->p1 = (void *)a; + ret->p2 = (void *)b; + return ret; + } + + Value *new_list(size_t num, Value **vs) { + if (num == 0) + return new_nil(); + Value *ret = new_pair(vs[0], nullptr), + *prev = ret; + for (size_t i = 1; i < num; i++) { + prev->p2 = new_pair(vs[i], nullptr); + prev = static_cast<Value *>(prev->p2); + } + prev->p2 = new_nil(); + return ret; + } + + Value *new_string(const std::string &s) { + Value *ret = new_object(Value::STRING); + ret->p1 = new std::string(s); + return ret; + } + + Value *new_string(std::string *s) { + Value *ret = new_object(Value::STRING); + ret->p1 = s; + return ret; + } + + Value *new_symbol(const std::string &s) { + Value *ret = new_string(s); + ret->type = Value::SYMBOL; + return ret; + } + + Value *new_error(const std::string &s) { + Value *ret = new_string(s); + ret->type = Value::ERROR; + return ret; + } + + Value *new_cfunc(Value *(*ptr)(int, Value **), int argc) { + Value *v = new_object(Value::CFUNC); + v->p1 = reinterpret_cast<void *>(ptr); + v->p2 = reinterpret_cast<void *>(argc); + return v; + } + + protected: + Memory *mem; + }; + + struct Shield : ObjectFactory { + Shield(Memory *mem) : ObjectFactory(mem), vec() { + shield = mem->alloc(Value::SHIELD); + shield->p1 = &vec; + shield->p2 = nullptr; + + mem->roots.push_back(shield); + }; + + ~Shield() { + mem->roots.remove(shield); + shield->p1 = nullptr; + } + + Value *new_object(Value::Type type) { + Value *ret = ObjectFactory::new_object(type); + return add(ret); + } + + Value *add(Value *obj) { + vec.push_back(obj); + return obj; + } + + private: + Value *shield; + std::vector<Value *> vec; + }; + +private: + struct Block { + Block(size_t size); + ~Block(); + + Value *slots; + bool *bitmap; + }; + void free_object(Value *v); + + size_t block_size; + std::vector<Block *> blocks; + Value *freelist; +public: + std::list<Value *> roots; + bool allow_gc; +}; +Memory *get_memory(); + +static inline Memory::ObjectFactory &unshield() { + static Memory::ObjectFactory oc(get_memory()); + return oc; +} + + +struct Assembly : Value { + inline std::vector<Instruction> *seq() const { + return static_cast<std::vector<Instruction> *>(p1); + } + + inline void set_num_variables(int num, int args) { + assert(!p2); + p2 = new std::pair<int, int>(num, args); + } + + inline size_t num_variables() const { + return static_cast<std::pair<int, int> *>(p2)->first; + } + + inline int num_args() const { + return static_cast<std::pair<int, int> *>(p2)->second; + } +}; +struct Continuation : Value { + inline const Value **copied_stack() const { + return static_cast<const Value **>(p1); + } + + inline size_t sp() const { + return reinterpret_cast<size_t>(p2); + } + + void mark(Memory &) const; + void free(); +}; +// static asser: sizeof Value == sizeof Assembly + + + +// sexp_parse.cc +Value *sexp_parse_toplevel(const std::string &); + +// compile.cc +Assembly *compile(const Value *); + +// builtin.cc +Value *get_cfunc(const std::string &id); + +// eval.cc +class VM; +void vm_mark_objects(Memory &, const VM *); +void environ_mark(Memory &, const Value *); +void environ_free(Value *); +void asm_mark(Memory &, const Assembly *); +void asm_free(Assembly *); +Value *eval(const Assembly *assm, Value *env); + + +// utils +static inline const Value *car(const Value *p) { + assert(p->type == Value::PAIR); + return static_cast<const Value *>(p->p1); +} + +static inline const Value *cdr(const Value *p) { + assert(p->type == Value::PAIR); + return static_cast<const Value *>(p->p2); +} + +static inline Value *car(Value *p) { + assert(p->type == Value::PAIR); + return static_cast<Value *>(p->p1); +} + +static inline Value *cdr(Value *p) { + assert(p->type == Value::PAIR); + return static_cast<Value *>(p->p2); +} + +static inline void dump_s(const Assembly *, std::ostream &, size_t); +static inline void dump_s(const Value *v, std::ostream &os, size_t indent) { + if (!v) { + os << "<nullptr>"; + return; + } + switch (v->type) { + case Value::BOOLEAN: + os << (v->p1 ? "true" : "false"); + break; + case Value::NIL: + os << "<nil>"; + break; + case Value::PAIR: { + assert(v->p1); + assert(v->p2); + + os << "("; + dump_s(static_cast<const Value *>(v->p1), os, indent + 1); + const Value *next = static_cast<const Value *>(v->p2); + while (true) { + if (next->is_a(Value::NIL)) { + os << ")"; + break; + } else if (next->is_a(Value::PAIR)) { + os << " "; + dump_s(static_cast<const Value *>(next->p1), os, indent + 1); + next = static_cast<const Value *>(next->p2); + } else { + os << " . "; + dump_s(next, os, indent + 1); + os << ")"; + break; + } + } + break; + } + case Value::SYMBOL: + assert(!v->p2); + os << *static_cast<const std::string *>(v->p1); + break; + case Value::INTEGER: + assert(!v->p2); + os << reinterpret_cast<intptr_t>(v->p1); + break; + case Value::STRING: + assert(!v->p2); + os << "<string " << *static_cast<const std::string *>(v->p1) << ">"; + break; + case Value::LAMBDA: + os << "<lambda "; + dump_s(static_cast<const Value *>(v->p1), os, indent + 1); + os << " "; + dump_s(static_cast<const Value *>(v->p2), os, indent + 1); + os << ">"; + break; + case Value::CFUNC: + os << "<cfunc " << v->p1 << ">"; + break; + case Value::CALLCC: + os << "<callcc>"; + break; + case Value::CONT: + os << "<continuation>"; + break; + case Value::ENVIRON: + os << "<environ>"; + break; + case Value::VM: + os << "<vm>"; + break; + case Value::SHIELD: + os << "<shield>"; + break; + case Value::ASM: + os << "<asm "; + dump_s((const Assembly *)v, os, indent + 1); + os << ">"; + break; + case Value::ERROR: + os << "<error: " << *static_cast<const std::string *>(v->p1) << ">"; + break; + case Value::FREE: + os << "<free>"; + break; + } +} + +static inline std::string dump_s(const Value *a) { + std::ostringstream os; + dump_s(a, os, 0); + os << "\n"; + return os.str(); +} + +static inline void dump(const Value *a) { + dump_s(a, std::cerr, 0); + std::cerr << "\n"; +} + +static inline void dump_s(const Assembly *a, std::ostream &os, size_t indent) { + os << "dump-asm: num_variables=" << a->num_variables() << ":num_args=" << a->num_args() << "\n"; + for (std::vector<Instruction>::const_iterator i = a->seq()->begin(); + i != a->seq()->end(); i++) { + os << std::string(indent, '\t'); + //os << "ins:" << i->type <<", arg:" << i->arg << std::endl; + switch (i->type) { + case Instruction::NOP: + os << "NOP\n"; + break; + case Instruction::PUSH: + os << "PUSH\t\t"; + dump_s((const Value *)i->arg, os, indent + 1); + os << "\n"; + break; + case Instruction::POP: + os << "POP\n"; + break; + case Instruction::DUP: + os << "DUP\n"; + break; + case Instruction::GETVAR: + os << "GETVAR\t\t" << i->arg << "\n"; + break; + case Instruction::SETVAR: + os << "SETVAR\t\t" << i->arg << "\n"; + break; + case Instruction::LAMBDA: + os << "LAMBDA\t\tasm="; + dump_s((const Assembly *)i->arg, os, indent + 1); + break; + case Instruction::APPLY_TAILCALL: + os << "APPLY_TAILCALL\targs=" << i->arg << "\n"; + break; + case Instruction::APPLY: + os << "APPLY\t\targs=" << i->arg << "\n"; + break; + case Instruction::RET: + os << "RET\n"; + break; + case Instruction::JUMP: + os << "JUMP\t\t+" << i->arg << "\n"; + break; + case Instruction::JUMPUNLESS: + os << "JUMPUNLESS\t+" << i->arg << "\n"; + break; + case Instruction::JUMPIF: + os << "JUMPIF\t\t+" << i->arg << "\n"; + break; + } + } +} + +static inline std::string dump_s(const Assembly *a) { + std::ostringstream os; + dump_s(a, os, 0); + os << "\n"; + return os.str(); +} + +static inline void dump(const Assembly *a) { + dump_s(a, std::cerr, 0); + std::cerr << "\n"; +} + +} + +#endif diff --git a/ss/prelude.scm b/ss/prelude.scm new file mode 100644 index 0000000..4ce1f65 --- /dev/null +++ b/ss/prelude.scm @@ -0,0 +1,54 @@ +(define-cfunc number?) +(define-cfunc +) +(define-cfunc -) +(define-cfunc *) +(define-cfunc /) +(define-cfunc =) +(define-cfunc <) +(define-cfunc <=) +(define-cfunc >) +(define-cfunc >=) + +(define-cfunc null?) +(define-cfunc pair?) +(define-cfunc list?) +(define-cfunc symbol?) +(define-cfunc car) +(define-cfunc cdr) +(define-cfunc cons) +(define-cfunc list) +(define-cfunc length) +(define-cfunc memq) +(define-cfunc last) +(define-cfunc append) +(define-cfunc set-car!) +(define-cfunc set-cdr!) + +(define-cfunc boolean?) +(define (not a) (if a #f #t)) + +(define-cfunc string?) +(define-cfunc string-append) +(define-cfunc symbol->string) +(define-cfunc string->symbol) +(define-cfunc string->number) +(define-cfunc number->string) + +(define-cfunc procedure?) + +(define-cfunc eq?) +(define (neq? a b) (not (eq? a b))) +(define-cfunc equal?) + + +(define-cfunc dump-s) +(define-cfunc display) +(define call-with-current-continuation :callcc) +(define call/cc call-with-current-continuation) + + +(define (map func lst) + (let recur ((rest lst)) + (if (null? rest) + '() + (cons (func (car rest)) (recur (cdr rest)))))) diff --git a/ss/sexp_parse.cpp b/ss/sexp_parse.cpp new file mode 100644 index 0000000..eda2e08 --- /dev/null +++ b/ss/sexp_parse.cpp @@ -0,0 +1,262 @@ +#include "ms.h" +#include <stack> +#include <iostream> + +namespace ms { + +struct token { + enum token_type { + LPAREN, + RPAREN, + DOT, + QUOTE, + BOOLEAN, + NUMBER, + IDENT, + STRING, + + EOF, + LEX_ERROR, + } type; + size_t start; + + std::string dstr; + int dint; + bool dbool; +}; + +struct Parser { + Parser(const std::string &src) : + src(src), lpos(0), bs() { + } + + Value *parse() { + token t = next_token(); + switch (t.type) { + // Const + case token::BOOLEAN: + return unshield().new_bool(t.dbool); + case token::NUMBER: + return unshield().new_number(t.dint); + case token::STRING: + return unshield().new_string(t.dstr); + // Id + case token::IDENT: + return unshield().new_symbol(t.dstr); + // 'S-Exp + case token::QUOTE: { + Memory::Shield shield(get_memory()); + Value *i = parse(); + if (!i) + return nullptr; + shield.add(i); + return shield.new_pair(shield.new_symbol("quote"), + shield.new_pair(i, shield.new_nil())); + } + // (S-Exp* [S-Exp . S-Exp]) + case token::LPAREN: { + if (peek_token().type == token::RPAREN) { + next_token(); + return unshield().new_nil(); + } + Memory::Shield shield(get_memory()); + Value *first = parse(); + if (!first) + return nullptr; + shield.add(first); + Value *ret = shield.new_pair(first, nullptr), + *prev = ret; + while (true) { + token tc = peek_token(); + if (tc.type == token::RPAREN) { + next_token(); + prev->p2 = shield.new_nil(); + break; + } + if (tc.type == token::DOT) { + next_token(); + prev->p2 = parse(); + token rp = next_token(); + if (rp.type != token::RPAREN) { + parse_error("expected rparen"); + return nullptr; + } + break; + } + Value *item = parse(); + if (!item) + return nullptr; + shield.add(item); + Value *nv = shield.new_pair(item, nullptr); + prev->p2 = nv; + prev = nv; + } + return ret; + } + default: + parse_error("expected token"); + return nullptr; + } + } + + Value *parse_toplevel() { + Memory::Shield shield(get_memory()); + + Value *ret = shield.new_pair(shield.new_nil(), nullptr), + *prev = ret; + while (peek_token().type != token::EOF) { + Value *item = parse(); + if (!item) + return nullptr; + shield.add(item); + Value *nv = shield.new_pair(item, nullptr); + prev->p2 = nv; + prev = nv; + } + prev->p2 = shield.new_nil(); + return (Value *)ret->p2; + } + + void parse_error(const std::string &msg) { + std::cerr << "sexp_parse: " << msg << "\n"; + } + + token unput_token(token t) { + bs.push(t); + return t; + } + + token next_token() { + if (bs.empty()) + return read_token(); + token t = bs.top(); + bs.pop(); + return t; + } + + token peek_token() { + if (!bs.empty()) + return bs.top(); + return unput_token(read_token()); + } + + token read_token() { + token t; + +again: + if (lpos == src.size()) { + t.type = token::EOF; + t.start = lpos; + return t; + } + switch (src.at(lpos)) { + case ' ': case '\t': case '\n': case '\r': + lpos++; + goto again; + case ';': + while (src.size() != ++lpos) + if (src.at(lpos) == '\n') + break; + goto again; + case '(': + t.type = token::LPAREN; t.start = lpos++; + break; + case ')': + t.type = token::RPAREN; t.start = lpos++; + break; + case '\'': + t.type = token::QUOTE; t.start = lpos++; + break; + case '.': + t.type = token::DOT; t.start = lpos++; + break; + case '#': + if (src.size() == lpos || + (src.at(lpos + 1) != 't' && + src.at(lpos + 1) != 'f')) { + t.type = token::LEX_ERROR; + t.start = lpos; + break; + } + t.type = token::BOOLEAN; t.start = lpos; + t.dbool = src.at(lpos + 1) == 't'; + lpos += 2; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + t.type = token::NUMBER; + t.start = lpos; + t.dint = 0; + while (src.size() != lpos && + src.at(lpos) >= '0' && + src.at(lpos) <= '9') + t.dint = t.dint * 10 + src.at(lpos++) - '0'; + break; + case '"': { + t.type = token::STRING; + t.start = lpos; + t.dstr = ""; + bool quoted = false; + while (true) { + if (src.size() == ++lpos) { + t.type = token::LEX_ERROR; + break; + } + if (quoted) { + if (src.at(lpos) == '"') + t.dstr.push_back('"'); + else { + t.type = token::LEX_ERROR; + break; + } + quoted = false; + continue; + } + if (src.at(lpos) == '\'') { + quoted = true; + continue; + } + if (src.at(lpos) == '"') { + lpos++; + break; + } + t.dstr.push_back(src.at(lpos)); + } + break; + } + default: + t.type = token::IDENT; + t.start = lpos; + t.dstr = ""; + const std::string idchars = "!$%&*+-./<=>?@^_:"; + while (src.size() != lpos) { + char c = src.at(lpos); + if ('0' <= c && c <= '9' || + 'a' <= c && c <= 'z' || + 'A' <= c && c <= 'Z' || + idchars.find(c) != std::string::npos) { + t.dstr.push_back(c); + lpos++; + } + else + break; + } + if (t.dstr.empty()) { + t.type = token::LEX_ERROR; + } + break; + } + return t; + } + + const std::string &src; + size_t lpos; + std::stack<token> bs; +}; + +Value *sexp_parse_toplevel(const std::string &str) { + Parser parser(str); + return parser.parse_toplevel(); +} + +} |