;; ---- 1996 Scheme Workshop -- Compiling Scheme ;; -- scanparse.ss ;;; This file contains a scanner and a parser for R4RS Scheme (with ;;; integer being the only numeric type). (define scan (let ([digit->value (let ((zero (char->integer #\0))) (lambda (c) (- (char->integer c) zero)))] [scan-error (lambda (c) (error 'scan "Unexpected character ~s" c))] [whitespace? (lambda (ch) (memv ch '(#\space #\newline #\tab)))] [digit? (lambda (ch) (<= (char->integer #\0) (char->integer ch) (char->integer #\9)))] [alpha? (lambda (ch) (let ([n (char->integer ch)]) (or (<= (char->integer #\a) n (char->integer #\z)) (<= (char->integer #\A) n (char->integer #\Z)))))] [spec-initial? (lambda (ch) (memv ch '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\= #\> #\? #\~ #\_ #\^)))] [end-comment? (lambda (ch) (or (eof-object? ch) (eqv? #\newline ch)))] [delimeter? (lambda (ch) (or (eof-object? ch) (memv ch '(#\space #\newline #\tab #\( #\[ #\) #\] #\" #\;))))]) (letrec ([s0 (lambda (ip) (let ([c (read-char ip)]) (cond ((whitespace? c) (s0 ip)) ((eof-object? c) '(eof)) ((eqv? #\( c) '(lparen)) ((eqv? #\) c) '(rparen)) ((eqv? #\[ c) '(lbrack)) ((eqv? #\] c) '(rbrack)) ((eqv? #\' c) '(quote)) ((eqv? #\` c) '(grave)) ((eqv? #\. c) (s-dot 1 ip)) ((eqv? #\; c) (s-comment ip)) ((eqv? #\, c) (s-comma ip)) ((eqv? #\" c) (s-string '() ip)) ((eqv? #\# c) (s-hash ip)) ((eqv? #\+ c) (s-sign #t ip)) ((eqv? #\- c) (s-sign #f ip)) ((digit? c) (s-number #t (digit->value c) ip)) ((or (alpha? c) (spec-initial? c)) (s-identifier (list c) ip)) (else (scan-error c)))))] [s-comment (lambda (ip) (let ([c (read-char ip)]) (cond ((end-comment? c) (s0 ip)) (else (s-comment ip)))))] [s-dot (lambda (n ip) (let ([c (read-char ip)]) (cond ((whitespace? c) (case n ((1) '(dot)) ((3) '(ellipsis)) (else (error 'scan "Bad token ~a" (make-string n #\.))))) ((eqv? #\. c) (s-dot (add1 n) ip)) (else (scan-error c)))))] [s-comma (lambda (ip) (let ([c (peek-char ip)]) (cond ((eqv? #\@ c) (read-char ip) '(comma-at)) (else '(comma)))))] [s-string (lambda (acc ip) (let ([c (read-char ip)]) (cond ((eqv? #\" c) `(datum ,(list->string (reverse acc)))) ((eqv? #\\ c) (s-string (cons (read-char ip) acc) ip)) (else (s-string (cons c acc) ip)))))] [s-hash (lambda (ip) (let ([c (read-char ip)]) (cond ((eqv? #\t c) (s-need-delimiter '(boolean #t) ip)) ((eqv? #\f c) (s-need-delimiter '(boolean #f) ip)) ((eqv? #\( c) '(hash-lparen)) ((eqv? #\\ c) (s-char '() ip)) (else (scan-error c)))))] [s-need-delimiter (lambda (acc ip) (let ([c (peek-char ip)]) (cond ((delimeter? c) acc) (else (scan-error c)))))] [s-char (lambda (acc ip) (let ([c (peek-char ip)]) (cond ((delimeter? c) (cond ((null? acc) (read-char ip) `(datum ,c)) ((null? (cdr acc)) `(datum ,(car acc))) (else (let ((name (list->string (reverse acc)))) (cond ((string=? name "space") '(datum #\space)) ((string=? name "tab") '(datum #\tab)) ((string=? name "newline") '(datum #\newline)) (else (error 'scan "Bad character name ~a" name))))))) (else (read-char ip) (s-char (cons c acc) ip)))))] [s-sign (lambda (plus? ip) (let ([c (peek-char ip)]) (cond ((delimeter? c) `(identifier ,(if plus? '+ '-))) ((digit? c) (read-char ip) (s-number plus? (digit->value c) ip)) (else (parse-error c)))))] [s-number (lambda (pos? acc ip) (let ([c (peek-char ip)]) (cond ((delimeter? c) `(datum ,(if pos? acc (- acc)))) ((digit? c) (read-char ip) (s-number pos? (+ (* acc 10) (digit->value c)) ip)) (else (parse-error c)))))] [s-identifier (lambda (acc ip) (let ([c (peek-char ip)]) (cond ((delimeter? c) `(identifier ,(string->symbol (list->string (reverse acc))))) ((or (alpha? c) (spec-initial? c) (digit? c) (memv c '(#\. #\+ #\-))) (read-char ip) (s-identifier (cons c acc) ip)) (else (parse-error c)))))]) (lambda (ip) (s0 ip))))) (define parse (let ((parse-error (lambda (tok) (error 'parse "unexpected token ~s" tok)))) (letrec ((reverse* ; needed for improper lists (lambda (acc ls) (if (null? ls) acc (reverse* (cons (car ls) acc) (cdr ls)))))) (letrec ((p-list0 (lambda (ip) (let ([tok (scan ip)]) (record-case tok (rparen () '()) (else (let* ((head (p tok ip))) (p-list (list head) ip))))))) (p-list (lambda (acc ip) (let ([tok (scan ip)]) (record-case tok (rparen () (reverse acc)) (dot () (let* ((tail (p (scan ip) ip)) (tok (scan ip))) (record-case tok (rparen () (reverse* tail acc)) (else (parse-error tok))))) (else (let* ((head (p tok ip))) (p-list (cons head acc) ip))))))) (p-vector (lambda (acc ip) (let ([tok (scan ip)]) (record-case tok (rparen () (list->vector (reverse acc))) (else (let ((head (p tok ip))) (p-vector (cons head acc) ip))))))) (p (lambda (tok ip) (record-case tok (datum (d) d) (identifier (i) i) (lparen () (p-list0 ip)) (hash-lparen () (p-vector '() ip)) (quote () (list 'quote (p (scan ip) ip))) (grave () (list 'quasiquote (p (scan ip) ip))) (comma () (list 'unquote (p (scan ip) ip))) (comma-at () (list 'unquote-splicing (p (scan ip) ip))) (rparen () (parse-error tok)) (dot () (parse-error tok)) (eof () (parse-error tok)) (else (error 'parse "sanity-check: Bad token ~s" tok)))))) (lambda (ip) (p (scan ip) ip))))))