;; ---- 1996 Scheme Workshop -- Compiling Scheme ;; -- back.ss ;;; the code generator ;; ---------- Run-time and architecture considerations ;; ---- Registers ; All registers are referred to symbolically in this pass. Here are ; the symbols. ;; fp -- frame pointer (points to the _base_ of the frame) ;; cp -- closure pointer ;; ap -- allocation pointer (points to the first free word in the heap) ;; ac -- accumulator (a general purpose register. All procedure ;; returns leave their values in the accumulator) ;; t1, t2, t3 -- three general-purpose registers ;; ---- Memory layout ;; a closure is heap allocated and looks much like a vector: ;; ----------------------------------------------------------- ;; | header | code pointer | free value0 | free value1 | ... | ;; ----------------------------------------------------------- ;; ^ ;; \--- cp ;; the header contains nothing right now, and would contain the ;; closure size if we were garbage collecting. ;; A frame is stack allocated, and looks like: ;; |-----------------------| ;; | ... | ;; |-----------------------| ;; | inline argument | ;; |-----------------------| ;; | | ;; | ... | ;; |-----------------------| ;; | bound value1 | ;; |-----------------------| ;; | bound value0 | ;; |-----------------------| ;; | return code pointer | <---- fp ;; |-----------------------| ;; | saved closure pointer | ;; |-----------------------| ;; The saved closure pointer is only placed when a non-tail call is ;; about to be made, so it won't be there on the top frame (so really ;; you could say that the saved closure pointer for the continuation ;; is one word lower than the fp at all times). ;; ---- Data formats ;; Numbers: ;; -------------------------------------- ;; | 29-bit 2's complement integer 000 | ;; -------------------------------------- ;; Booleans: ;; ------------------ ------------------ ;; #t: | ... 1 00000001 | #f: | ... 0 00000001 | ;; ------------------ ------------------ ;; Empty lists: ;; ---------------- ;; | ... 00001001 | ;; ---------------- ;; Characters: ;; -------------------------------------- ;; | ... 8-bit character data 00010001 | ;; -------------------------------------- ;; Pairs: ;; --------------- ------------- ;; | address 010 | --> | car | cdr | ;; -----\--------- / ------------- ;; ----------- ;; Strings: ;; --------------- ------------------------------------------------- ;; | address 011 | --> | length | string data (may span many words)... | ;; -----\--------- / ------------------------------------------------- ;; ----------- ;; Symbols: ;; --------------- -------------------------- ;; | address 100 | --> | symbol name (a string) | ;; -----\--------- / -------------------------- ;; ----------- ;; Vectors: ;; --------------- ;; | address 101 | ;; -----|--------- ;; v ;; ----------------------------------------------------------- ;; | length | (v-ref 0) | (v-ref 1) | ... | (v-ref length-1) | ;; ----------------------------------------------------------- ;; Closures: ;; --------------- ;; | address 110 | ;; -----|--------- ;; v ;; ----------------------------------------------------------------------- ;; | length | code pointer | (free 0) | (free 1) | ... | (free length-1) | ;; ----------------------------------------------------------------------- ;; ---- Tagging ; All values are tagged in their lower three bits. (define number-tag #b000) (define immed-tag #b001) (define pair-tag #b010) (define string-tag #b011) (define symbol-tag #b100) (define vector-tag #b101) (define closure-tag #b110) (define mask #b111) (define tag-len 3) ; Numbers are represented in two's complement form. Since three bits ; are used by the tag, our range is -2^28 to 2^28-1 ; ``immediates'' have eight bits for tag information. The uppper bits ; are used for the actual data. For a character that means the ascii ; representation. #t sets bit nine to 1, #f sets bit nine to 0. (define bool-tag #b00000001) (define null-tag #b00001001) (define char-tag #b00010001) (define imm-mask #b11111111) (define imm-tag-len 8) ;; In order not to scatter `4's around the code, the symbolic constant ;; ws stands for ``word size'' (define ws 4) ;; ---- literal encoding (define encode (let ([numtop (expt 2 29)]) (lambda (obj) (cond [(number? obj) (cond [(and (<= 0 obj) (< obj numtop)) (* obj (+ mask 1))] [(and (<= (- numtop) obj) (< obj 0)) (* (+ numtop obj) (+ mask 1))] [else (error 'encode "~s is out of range" obj)])] [(boolean? obj) (+ (* (if obj 1 0) (+ imm-mask 1)) bool-tag)] [(null? obj) null-tag] [(char? obj) (let ([val (char->integer obj)]) (+ (* val (+ imm-mask 1)) char-tag))] [else (error 'encode "~s not encodable" obj)])))) ;; ---- Output format ;; The output of the code generator is a list whose car is the symbol ;; `instructions'. The procedure instructions, below, handles the ;; flattening of the list so nobody ever has to write the word ;; ``append'' in their code. (define instructions (lambda args (cons 'instructions (let loop ([ls args]) (if (null? ls) '() (if (eq? (caar ls) 'instructions) (append (cdar ls) (loop (cdr ls))) (cons (car ls) (loop (cdr ls))))))))) ;; -------------------- The Code Generator proper ;; Two parameters are specially handled by the code generator, dd and ;; cd. ;; dd is the ``data destination'', the place where the data from ;; the currently compiled expression should go. It should be one of: ;; * the symbol `effect', signifying that the data isn't really needed, ;; * a register (symbol), or ;; * a list (register offset), where offset is an integer. ;; cd is the ``control destination'', the place where control will ;; flow after the currently compiled expression is done. It should be ;; one of: ;; * the symbol `return', signifying that a procedure return will be ;; done next, ;; * A label (currently represented as a symbol), or ;; * a list of two labels: (labelA labelB), signifying that if the ;; current expression turns out to be true, control should transfer ;; to label A, otherwise it should transfer to labelB. ;; cd ;; 'return label (labA labB) ;; -------------------------- ;; 'effect | -- BEGIN IF ;; dd reg | OK OK -- ;; (reg off) | -- OK -- ;; ---- The top ; The todo list keeps track of pending lambda bodies who need their ; code generated. (define todo '()) ; ((label code) ...) (define cg-top (lambda (exp) (set! todo (cons (list '_scheme_entry `(lambda () ,exp)) todo)) (cg-code))) (define cg-code (lambda () (if (null? todo) (instructions) (let ([first (car todo)] [rest (cdr todo)]) (set! todo rest) (let ([label (car first)]) (record-case (cadr first) [lambda (formals body) (instructions `(label ,label) (cg body (* (+ (length formals) 1) ws) 'ac 'return 'ignored) (cg-code))])))))) ;; ---- cg ;; fs is the size of the current frame ;; nextlab is the label which will be emitted after this instruction (define varref->address (lambda (exp) (record-case exp [bound (n name) `(fp ,(* (+ n 1) ws))] [free (n name) `(cp ,(* (+ n 2) ws))]))) (define cg (lambda (exp fs dd cd nextlab) (record-case exp [bound (n name) (cg-load-branch `(fp ,(* (+ n 1) ws)) dd cd nextlab)] [free (n name) (cg-load-branch `(cp ,(* (+ n 2) ws)) dd cd nextlab)] [quote (obj) (cg-set-branch obj dd cd nextlab)] [begin (a b) (let ([midlab (gen-label "begin")]) (instructions (cg a fs 'effect midlab midlab) `(label ,midlab) (cg b fs dd cd nextlab)))] [if (t c a) (let ([truelab (gen-label "iftrue")] [falselab (gen-label "iffalse")]) (instructions (cg t fs 'effect (join-labels truelab falselab) truelab) `(label ,truelab) (cg c fs dd cd falselab) `(label ,falselab) (cg a fs dd cd nextlab)))] [build-closure (code . fvars) (if (eq? dd 'effect) (cg-jump (cd->true cd) nextlab) (let ([codelab (gen-label "code")]) (set! todo (cons (list codelab code) todo)) (instructions `(comment "build-closure") (cg-allocate (+ (length fvars) 2) 'ac) `(set ,(length fvars) t1) `(st t1 (ac 0)) `(set ,codelab t1) `(st t1 (ac ,(* 1 ws))) (let f ([ls fvars] [pos 2]) (if (null? ls) (instructions) (instructions `(ld ,(varref->address (car ls)) t3) `(st t3 (ac ,(* pos ws))) (f (cdr ls) (+ pos 1))))) (cg-type-tag closure-tag 'ac) (cg-store 'ac dd) `(comment "end build-closure") (cg-jump cd nextlab))))] [else (let ([rator (car exp)] [rands (cdr exp)] [ratorlab (gen-label "endrator")]) (cond [(symbol? rator) (cg-inline exp rator rands fs dd cd nextlab)] [(eq? cd 'return) (instructions (cg-rands rands fs) (cg rator (+ fs (* (length rands) ws)) 'ac ratorlab ratorlab) `(label ,ratorlab) (cg-shuffle fs (length rands)) `(andn ac ,mask cp) `(ld (cp ,(* 1 ws)) ac) `(jmp ac) `(nop))] [else (let ([retlab (gen-label "return")]) (instructions `(st cp (fp ,fs)) `(set ,retlab ac) `(st ac (fp ,(+ fs (* 1 ws)))) (cg-rands rands (+ fs (* 2 ws))) (cg rator (+ fs (* (+ (length rands) 2) ws)) 'ac ratorlab ratorlab) `(label ,ratorlab) `(andn ac ,mask cp) (cg-pushstack (+ fs (* 1 ws))) `(ld (cp ,(* 1 ws)) ac) `(jmp ac) `(nop) `(label ,retlab) (cg-pushstack (- (+ fs (* 1 ws)))) `(ld (fp ,fs) cp) (cg-store 'ac dd) (cond [(pair? cd) (let ([truelab (car cd)] [falselab (cadr cd)]) (instructions `(cmp ac ,(encode #f)) (cg-branch truelab falselab nextlab 'bne 'be)))] [else (cg-jump cd nextlab)])))]))]))) (define cg-shuffle (lambda (fs num) (let loop ([top fs] [bot ws] [num num]) (if (zero? num) (instructions) (instructions `(ld (fp ,top) t1) `(st t1 (fp ,bot)) (loop (+ top ws) (+ bot ws) (sub1 num))))))) ;; ---- general cg procedures. (define cg-jump (lambda (lab nextlab) (if (eq? lab 'return) (instructions `(ld (fp 0) t1) `(jmp t1) `(nop)) (if (eq? lab nextlab) (instructions) (instructions `(ba ,lab) `(nop)))))) (define cg-branch (lambda (truelab falselab nextlab jump-if-true jump-if-false) (instructions (cond [(eq? truelab nextlab) `(,jump-if-false ,falselab)] [(eq? falselab nextlab) `(,jump-if-true ,truelab)] [else (instructions `(,jump-if-true ,truelab) `(nop) `(ba ,falselab))]) `(nop)))) ; that darned delay slot (define cg-store (lambda (src dest) (cond [(eq? dest 'effect) (instructions)] [(pair? dest) `(st ,src ,dest)] [else (if (eq? src dest) (instructions) `(mov ,src ,dest))]))) ;; ---- More specialized cg procedures (define cg-load-branch (lambda (loc dd cd nextlab) (cond [(eq? dd 'effect) (cond [(pair? cd) (let ([truelab (car cd)] [falselab (cadr cd)]) (instructions `(ld ,loc t1) `(cmp t1 ,(encode #f)) (cg-branch truelab falselab nextlab 'bne 'be)))] [else (cg-jump cd nextlab)])] [(pair? dd) (let ([register (car dd)] [offset (cadr dd)]) (instructions `(ld ,loc t1) `(st t1 (,register ,offset)) (cg-jump cd nextlab)))] [else (instructions `(ld ,loc ,dd) (cg-jump cd nextlab))]))) (define cg-set-branch (lambda (obj dd cd nextlab) (cond [(eq? dd 'effect) (if (pair? cd) (let ([truelab (car cd)] [falselab (cadr cd)]) (cg-jump (if obj truelab falselab) nextlab)) (cg-jump cd nextlab))] [(pair? dd) (instructions `(set ,(encode obj) t1 ,(format "~s" obj)) `(st t1 ,dd) (cg-jump cd nextlab))] [else (instructions `(set ,(encode obj) ,dd ,(format "~s" obj)) (cg-jump cd nextlab))]))) ;; ---- Code generation for operands ;; * cg-rands generates code that drops the evaluated operands onto ;; the stack. ;; * the various cg-?-rands procedures ensure that the code gets put ;; into the temporary registers for use by primitives. ;; * cg-effect-rands doesn't place the operands anywhere at all. (define cg-rands (lambda (rands fs) (if (null? rands) (instructions) (let ([randlab (gen-label "rand")]) (instructions (cg (car rands) fs `(fp ,fs) randlab randlab) `(label ,randlab) (cg-rands (cdr rands) (+ fs ws))))))) (define cg-effect-rands (lambda (ls fs) (if (null? ls) (instructions) (let ([randlab (gen-label "rand")]) (instructions (cg (car ls) fs 'effect randlab randlab) `(label ,randlab) (cg-effect-rands (cdr ls) fs)))))) (define cg-unary-rand (lambda (rands fs) (let ([rand (car rands)]) (let ([endlab (gen-label "unaryrand")]) (instructions (cg (car rands) fs 't1 endlab endlab) `(label ,endlab)))))) (define cg-binary-rands (lambda (rands fs) (let ([r0 (car rands)] [r1 (cadr rands)]) (let ([r0lab (gen-label "binary0")] [r1lab (gen-label "binary1")]) (instructions (cg r0 fs `(fp ,fs) r0lab r0lab) `(label ,r0lab) (cg r1 (+ fs (* 1 ws)) 'ac r1lab r1lab) `(label ,r1lab) `(mov ac t2) `(ld (fp ,fs) t1)))))) (define cg-ternary-rands (lambda (rands fs) (let ([r0 (car rands)] [r1 (cadr rands)] [r2 (caddr rands)]) (let ([r0lab (gen-label "ternary0")] [r1lab (gen-label "ternary1")] [r2lab (gen-label "ternary2")]) (instructions (cg r0 fs `(fp ,fs) r0lab r0lab) `(label ,r0lab) (cg r1 (+ fs (* 1 ws)) `(fp ,(+ fs (* 1 ws))) r1lab r1lab) `(label ,r1lab) (cg r2 (+ fs (* 2 ws)) 'ac r2lab r2lab) `(label ,r2lab) `(mov ac t3) `(ld (fp ,(+ fs (* 1 ws))) t2) `(ld (fp ,fs) t1)))))) ;; ---- generation for inlines (define cg-inline (lambda (exp name rands fs dd cd nextlab) (case name [(+) (cg-true-inline cg-binary-rands rands fs dd cd nextlab `(add t1 t2 ac))] [(-) (cg-true-inline cg-binary-rands rands fs dd cd nextlab `(sub t1 t2 ac))] [(*) (cg-true-inline cg-binary-rands rands fs dd cd nextlab (instructions `(sra t2 ,tag-len t2) `(smul t1 t2 ac)))] [(/) (cg-true-inline cg-binary-rands rands fs dd cd nextlab (instructions `(sdiv t1 t2 ac) `(sll ac ,tag-len ac)))] [(= eq?) (cg-binary-pred-inline exp rands fs dd cd nextlab 'be 'bne `(cmp t1 t2))] [(<) (cg-binary-pred-inline exp rands fs dd cd nextlab 'bl 'bge `(cmp t1 t2))] [(boolean?) (cg-type-test exp bool-tag imm-mask rands fs dd cd nextlab)] [(car) (cg-ref-inline cg-unary-rand rands fs dd cd nextlab `(ld (t1 ,(- pair-tag)) ac))] [(cdr) (cg-ref-inline cg-unary-rand rands fs dd cd nextlab `(ld (t1 ,(- ws pair-tag)) ac))] [(char?) (cg-type-test exp char-tag imm-mask rands fs dd cd nextlab)] [(char->integer) (cg-true-inline cg-unary-rand rands fs dd cd nextlab (instructions `(srl t1 8 ac) `(sll ac ,tag-len ac)))] [(cons) (cg-true-inline cg-binary-rands rands fs dd cd nextlab (instructions (cg-allocate 2 'ac) `(st t1 (ac 0)) `(st t2 (ac ,(* 1 ws))) (cg-type-tag pair-tag 'ac)))] [(integer?) (cg-type-test exp number-tag mask rands fs dd cd nextlab)] [(string->uninterned-symbol) (cg-true-inline cg-unary-rand rands fs dd cd nextlab (instructions (cg-allocate 1 'ac) `(st t1 (ac 0)) (cg-type-tag symbol-tag 'ac)))] [(not) (if (eq? dd 'effect) (if (pair? cd) (let ([truelab (car cd)] [falselab (cadr cd)]) (cg (car rands) fs 'effect (join-labels falselab truelab) nextlab)) (instructions (cg-effect-rands rands fs) (cg-jump cd nextlab))) (cg `(if ,(car rands) '#f '#t) fs dd cd nextlab))] [(null?) (cg-type-test exp null-tag imm-mask rands fs dd cd nextlab)] [(pair?) (cg-type-test exp pair-tag mask rands fs dd cd nextlab)] [(procedure?) (cg-type-test exp closure-tag mask rands fs dd cd nextlab)] [(string) (cg-true-inline cg-rands rands fs dd cd nextlab (instructions `(comment "string") (cg-allocate (+ (quotient (+ (length rands) (- ws 1)) ws) 1) 'ac) `(set ,(length rands) t1) `(st t1 (ac 0)) (let loop ([fpos fs] [spos ws] [num (length rands)]) (if (zero? num) (instructions) (instructions `(ld (fp ,fpos) t1) `(srl t1 8 t1) ; 8 bits for the string tag `(stb t1 (ac ,spos)) (loop (+ fpos ws) (+ spos 1) (- num 1))))) (cg-type-tag string-tag 'ac) `(comment "end string")))] [(string?) (cg-type-test exp string-tag mask rands fs dd cd nextlab)] [(string-length) (cg-true-inline cg-unary-rand rands fs dd cd nextlab (instructions `(ld (t1 ,(- string-tag)) ac) `(sll ac ,tag-len ac)))] [(string-ref) (cg-true-inline cg-binary-rands rands fs dd cd nextlab (instructions `(sra t2 ,tag-len t2) `(add t1 t2 t1) `(ldb (t1 ,(- ws string-tag)) ac) `(sll ac 8 ac) (cg-type-tag char-tag 'ac)))] [(vector) (cg-true-inline cg-rands rands fs dd cd nextlab (instructions `(comment "vector") (cg-allocate (+ (length rands) 1) 'ac) `(set ,(length rands) t1) `(st t1 (ac 0)) (let loop ([fpos fs] [vpos 1] [num (length rands)]) (if (zero? num) (instructions) (instructions `(ld (fp ,fpos) t1) `(st t1 (ac ,(* vpos ws))) (loop (+ fpos ws) (+ vpos 1) (- num 1))))) (cg-type-tag vector-tag 'ac) `(comment "end vector")))] [(vector?) (cg-type-test exp vector-tag mask rands fs dd cd nextlab)] [(vector-length) (cg-true-inline cg-unary-rand rands fs dd cd nextlab (instructions `(ld (t1 ,(- vector-tag)) ac) `(sll ac ,tag-len ac)))] [(vector-ref) (cg-ref-inline cg-binary-rands rands fs dd cd nextlab (instructions `(sra t2 1 t2) `(add t1 t2 t1) `(ld (t1 ,(- ws vector-tag)) ac)))] [(vector-set!) (instructions (cg-ternary-rands rands fs) `(comment "vector-set") `(sra t2 1 t2) `(add t1 t2 t1) `(st t3 (t1 ,(- ws vector-tag))) `(comment "end vector-set") (if (eq? dd 'effect) (cg-jump (cd->true cd) nextlab) (instructions (cg-store 't3 dd) ; why not? (cg-jump cd nextlab))))] [(symbol?) (cg-type-test exp symbol-tag mask rands fs dd cd nextlab)] [(symbol->string) (cg-true-inline cg-unary-rand rands fs dd cd nextlab `(ld (t1 ,(- symbol-tag)) ac))] [else (error 'cg-prim "sanity-check: bad primitive ~s" name)]))) ;; ---- inline helpers ;; cg-true-inline is used for primitives which always return true and ;; have no side-effects (define cg-true-inline (lambda (rander rands fs dd cd nextlab code) (if (eq? dd 'effect) (instructions (cg-effect-rands rands fs) (cg-jump (cd->true cd) nextlab)) (instructions (rander rands fs) code (cg-store 'ac dd) (cg-jump cd nextlab))))) ;; cg-ref-inline is used for non-side-effecting primitives which could ;; return a true or a false value. (define cg-ref-inline (lambda (rander rands fs dd cd nextlab code) (if (eq? dd 'effect) (if (pair? cd) (let ([truelab (car cd)] [falselab (cadr cd)]) (instructions (rander rands fs) code `(cmp ac ,(encode #f)) (cg-branch truelab falselab nextlab 'bne 'be))) (instructions (cg-effect-rands rands fs) (cg-jump cd nextlab))) (instructions (rander rands fs) code (cg-store 'ac dd) (cg-jump cd nextlab))))) ;; cg-binary-pred-inline is used for binary predicates. (define cg-binary-pred-inline (lambda (exp rands fs dd cd nextlab trueinst falseinst code) (if (eq? dd 'effect) (if (pair? cd) (let ([truelab (car cd)] [falselab (cadr cd)]) (instructions (cg-binary-rands rands fs) code (cg-branch truelab falselab nextlab trueinst falseinst))) (instructions (cg-effect-rands rands fs) (cg-jump cd nextlab))) (cg `(if ,exp '#t '#f) fs dd cd nextlab)))) (define cg-type-test (lambda (exp tag mask rands fs dd cd nextlab) (if (eq? dd 'effect) (if (pair? cd) (let ([truelab (car cd)] [falselab (cadr cd)]) (instructions (cg-unary-rand rands fs) `(and t1 ,mask t1) `(cmp t1 ,tag) (cg-branch truelab falselab nextlab 'be 'bne))) (instructions (cg-effect-rands rands fs) (cg-jump cd nextlab))) (cg `(if ,exp '#t '#f) fs dd cd nextlab)))) ;; ---- Some aliases for common assembly sequences (define cg-type-tag (lambda (tag reg) `(or ,reg ,tag ,reg))) (define cg-pushstack (lambda (n) `(add fp ,n fp))) (define cg-allocate (lambda (n target) (let ([n (if (even? n) n (+ n 1))]) (instructions `(mov ap ,target) `(add ap ,(* n ws) ap))))) ;; ---- label handling procedures (define join-labels (lambda (a b) (cond [(pair? a) (join-labels (car a) b)] [(pair? b) (list a (cadr b))] [else (list a b)]))) (define cd->true (lambda (cd) (if (pair? cd) (car cd) cd))) (define gen-label (let ([n 0]) (lambda (str) (set! n (add1 n)) (string->uninterned-symbol (string-append str (number->string n))))))