From 773595f0db0f51a541dc4aad3bfee4ef2ad78eb0 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 8 May 2015 11:04:36 +0200 Subject: [PATCH] Add compiler from tree-il to cps2 * module/language/tree-il/compile-cps2.scm: New file. * module/Makefile.am: Add the file to the build. --- module/Makefile.am | 1 + module/language/tree-il/compile-cps2.scm | 892 +++++++++++++++++++++++ 2 files changed, 893 insertions(+) create mode 100644 module/language/tree-il/compile-cps2.scm diff --git a/module/Makefile.am b/module/Makefile.am index 2a7b9e893..e4785aef8 100644 --- a/module/Makefile.am +++ b/module/Makefile.am @@ -116,6 +116,7 @@ TREE_IL_LANG_SOURCES = \ language/tree-il/canonicalize.scm \ language/tree-il/analyze.scm \ language/tree-il/inline.scm \ + language/tree-il/compile-cps2.scm \ language/tree-il/compile-cps.scm \ language/tree-il/debug.scm \ language/tree-il/spec.scm diff --git a/module/language/tree-il/compile-cps2.scm b/module/language/tree-il/compile-cps2.scm new file mode 100644 index 000000000..f8710ba6b --- /dev/null +++ b/module/language/tree-il/compile-cps2.scm @@ -0,0 +1,892 @@ +;;; Continuation-passing style (CPS) intermediate language (IL) + +;; Copyright (C) 2013, 2014, 2015 Free Software Foundation, Inc. + +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +;;; Commentary: +;;; +;;; This pass converts Tree-IL to the continuation-passing style (CPS) +;;; language. +;;; +;;; CPS is a lower-level representation than Tree-IL. Converting to +;;; CPS, beyond adding names for all control points and all values, +;;; simplifies expressions in the following ways, among others: +;;; +;;; * Fixing the order of evaluation. +;;; +;;; * Converting assigned variables to boxed variables. +;;; +;;; * Requiring that Scheme's has already been lowered to +;;; . +;;; +;;; * Inlining default-value initializers into lambda-case +;;; expressions. +;;; +;;; * Inlining prompt bodies. +;;; +;;; * Turning toplevel and module references into primcalls. This +;;; involves explicitly modelling the "scope" of toplevel lookups +;;; (indicating the module with respect to which toplevel bindings +;;; are resolved). +;;; +;;; The utility of CPS is that it gives a name to everything: every +;;; intermediate value, and every control point (continuation). As such +;;; it is more verbose than Tree-IL, but at the same time more simple as +;;; the number of concepts is reduced. +;;; +;;; Code: + +(define-module (language tree-il compile-cps2) + #:use-module (ice-9 match) + #:use-module ((srfi srfi-1) #:select (fold filter-map)) + #:use-module (srfi srfi-26) + #:use-module ((system foreign) #:select (make-pointer pointer->scm)) + #:use-module (language cps2) + #:use-module (language cps2 utils) + #:use-module (language cps primitives) + #:use-module (language tree-il analyze) + #:use-module (language tree-il optimize) + #:use-module (language tree-il) + #:use-module (language cps intmap) + #:export (compile-cps)) + +;;; Guile's semantics are that a toplevel lambda captures a reference on +;;; the current module, and that all contained lambdas use that module +;;; to resolve toplevel variables. This parameter tracks whether or not +;;; we are in a toplevel lambda. If we are in a lambda, the parameter +;;; is bound to a fresh name identifying the module that was current +;;; when the toplevel lambda is defined. +;;; +;;; This is more complicated than it need be. Ideally we should resolve +;;; all toplevel bindings to bindings from specific modules, unless the +;;; binding is unbound. This is always valid if the compilation unit +;;; sets the module explicitly, as when compiling a module, but it +;;; doesn't work for files auto-compiled for use with `load'. +;;; +(define current-topbox-scope (make-parameter #f)) +(define scope-counter (make-parameter #f)) + +(define (fresh-scope-id) + (let ((scope-id (scope-counter))) + (scope-counter (1+ scope-id)) + scope-id)) + +;;; We will traverse the nested Tree-IL expression to build the +;;; label->cont mapping for the result. When visiting any particular +;;; expression, we usually already know the label and the $kargs wrapper +;;; for the cont, and just need to know the body of that cont. However +;;; when building the body of that possibly nested Tree-IL expression we +;;; will also need to add conts to the result, so really it's a process +;;; that takes an incoming program, adds conts to that program, and +;;; returns the result program and the result term. +;;; +;;; It's a bit treacherous to do in a functional style as once you start +;;; adding to a program, you shouldn't add to previous versions of that +;;; program. Getting that right in the context of this program seed +;;; that is threaded through the conversion requires the use of a +;;; pattern, with-cps. +;;; +;;; with-cps goes like this: +;;; +;;; (with-cps cps clause ... tail-clause) +;;; +;;; Valid clause kinds are: +;;; +;;; (letk LABEL CONT) +;;; (letv VAR ...) +;;; (let$ X (PROC ARG ...)) +;;; +;;; letk and letv create fresh CPS labels and variable names, +;;; respectively. Labels and vars bound by letk and letv are in scope +;;; from their point of definition onward. letv just creates fresh +;;; variable names for use in other parts of with-cps, while letk binds +;;; fresh labels to values and adds them to the resulting program. The +;;; right-hand-side of letk, CONT, is passed to build-cont, so it should +;;; be a valid production of that language. +;;; +;;; let$ delegates processing to a sub-computation. The form (PROC ARG +;;; ...) is syntactically altered to be (PROC CPS ARG ...), where CPS is +;;; the value of the program being built, at that point in the +;;; left-to-right with-cps execution. That form is is expected to +;;; evaluate to two values: the new CPS term, and the value to bind to +;;; X. X is in scope for the following with-cps clauses. The name was +;;; chosen because the $ is reminiscent of the $ in CPS data types. +;;; +;;; The result of the with-cps form is determined by the tail clause, +;;; which may be of these two kinds: +;;; +;;; ($ (PROC ARG ...)) +;;; EXP +;;; +;;; $ is like let$, but in tail position. Otherwise EXP is any kind of +;;; expression, which should not add to the resulting program. Ending +;;; the with-cps with EXP is equivalant to returning (values CPS EXP). +;;; +;;; It's a bit of a monad, innit? Don't tell anyone though! +;;; +(define-syntax with-cps + (syntax-rules (letk letv let$ $) + ((_ (exp ...) clause ...) + (let ((cps (exp ...))) + (with-cps cps clause ...))) + ((_ cps (letk label cont) clause ...) + (let-fresh (label) () + (with-cps (intmap-add cps label (build-cont cont)) + clause ...))) + ((_ cps (letv v ...) clause ...) + (let-fresh () (v ...) + (with-cps cps clause ...))) + ((_ cps (let$ var (proc arg ...)) clause ...) + (call-with-values (lambda () (proc cps arg ...)) + (lambda (cps var) + (with-cps cps clause ...)))) + ((_ cps ($ (proc arg ...))) + (proc cps arg ...)) + ((_ cps exp) + (values cps exp)))) + +;;; Sometimes you need to just bind some constants to CPS values. +;;; with-cps-constants is there for you. For example: +;;; +;;; (with-cps-constants cps ((foo 34)) +;;; (build-term ($values (foo)))) +;;; +;;; The body of with-cps-constants is a with-cps clause, or a sequence +;;; of such clauses. But usually you will want with-cps-constants +;;; inside a with-cps, so it usually looks like this: +;;; +;;; (with-cps cps +;;; ... +;;; ($ (with-cps-constants ((foo 34)) +;;; (build-term ($values (foo)))))) +;;; +;;; which is to say that the $ or the let$ adds the CPS argument for us. +;;; +(define-syntax with-cps-constants + (syntax-rules () + ((_ cps () clause ...) + (with-cps cps clause ...)) + ((_ cps ((var val) (var* val*) ...) clause ...) + (let ((x val)) + (with-cps cps + (letv var) + (let$ body (with-cps-constants ((var* val*) ...) + clause ...)) + (letk label ($kargs ('var) (var) ,body)) + (build-term ($continue label #f ($const x)))))))) + +(define (toplevel-box cps src name bound? val-proc) + (define (lookup cps name bound? k) + (match (current-topbox-scope) + (#f + (with-cps cps + (build-term ($continue k src + ($primcall 'resolve (name bound?)))))) + (scope-id + (with-cps cps + ($ (with-cps-constants ((scope scope-id)) + (build-term + ($continue k src + ($primcall 'cached-toplevel-box (scope name bound?)))))))))) + (with-cps cps + (letv box) + (let$ body (val-proc box)) + (letk kbox ($kargs ('box) (box) ,body)) + ($ (with-cps-constants ((name name) + (bound? bound?)) + ($ (lookup name bound? kbox)))))) + +(define (module-box cps src module name public? bound? val-proc) + (with-cps cps + (letv box) + (let$ body (val-proc box)) + (letk kbox ($kargs ('box) (box) ,body)) + ($ (with-cps-constants ((module module) + (name name) + (public? public?) + (bound? bound?)) + (build-term ($continue kbox src + ($primcall 'cached-module-box + (module name public? bound?)))))))) + +(define (capture-toplevel-scope cps src scope-id k) + (with-cps cps + (letv module) + (let$ body (with-cps-constants ((scope scope-id)) + (build-term + ($continue k src + ($primcall 'cache-current-module! (module scope)))))) + (letk kmodule ($kargs ('module) (module) ,body)) + (build-term ($continue kmodule src + ($primcall 'current-module ()))))) + +(define (fold-formals proc seed arity gensyms inits) + (match arity + (($ $arity req opt rest kw allow-other-keys?) + (let () + (define (fold-req names gensyms seed) + (match names + (() (fold-opt opt gensyms inits seed)) + ((name . names) + (proc name (car gensyms) #f + (fold-req names (cdr gensyms) seed))))) + (define (fold-opt names gensyms inits seed) + (match names + (() (fold-rest rest gensyms inits seed)) + ((name . names) + (proc name (car gensyms) (car inits) + (fold-opt names (cdr gensyms) (cdr inits) seed))))) + (define (fold-rest rest gensyms inits seed) + (match rest + (#f (fold-kw kw gensyms inits seed)) + (name (proc name (car gensyms) #f + (fold-kw kw (cdr gensyms) inits seed))))) + (define (fold-kw kw gensyms inits seed) + (match kw + (() + (unless (null? gensyms) + (error "too many gensyms")) + (unless (null? inits) + (error "too many inits")) + seed) + (((key name var) . kw) + ;; Could be that var is not a gensym any more. + (when (symbol? var) + (unless (eq? var (car gensyms)) + (error "unexpected keyword arg order"))) + (proc name (car gensyms) (car inits) + (fold-kw kw (cdr gensyms) (cdr inits) seed))))) + (fold-req req gensyms seed))))) + +(define (unbound? cps src var kt kf) + (define tc8-iflag 4) + (define unbound-val 9) + (define unbound-bits (logior (ash unbound-val 8) tc8-iflag)) + (with-cps cps + ($ (with-cps-constants ((unbound (pointer->scm + (make-pointer unbound-bits)))) + (build-term ($continue kf src + ($branch kt ($primcall 'eq? (var unbound))))))))) + +(define (init-default-value cps name sym subst init body) + (match (hashq-ref subst sym) + ((orig-var subst-var box?) + (let ((src (tree-il-src init))) + (define (maybe-box cps k make-body) + (if box? + (with-cps cps + (letv phi) + (letk kbox ($kargs (name) (phi) + ($continue k src ($primcall 'box (phi))))) + ($ (make-body kbox))) + (make-body cps k))) + (with-cps cps + (letk knext ($kargs (name) (subst-var) ,body)) + ($ (maybe-box + knext + (lambda (cps k) + (with-cps cps + (letk kbound ($kargs () () ($continue k src + ($values (orig-var))))) + (letv val rest) + (letk krest ($kargs (name 'rest) (val rest) + ($continue k src ($values (val))))) + (letk kreceive ($kreceive (list name) 'rest krest)) + (let$ init (convert init kreceive subst)) + (letk kunbound ($kargs () () ,init)) + ($ (unbound? src orig-var kunbound kbound))))))))))) + +;; cps exp k-name alist -> cps term +(define (convert cps exp k subst) + ;; exp (v-name -> term) -> term + (define (convert-arg cps exp k) + (match exp + (($ src name sym) + (match (hashq-ref subst sym) + ((orig-var box #t) + (with-cps cps + (letv unboxed) + (let$ body (k unboxed)) + (letk kunboxed ($kargs ('unboxed) (unboxed) ,body)) + (build-term ($continue kunboxed src ($primcall 'box-ref (box)))))) + ((orig-var subst-var #f) (k cps subst-var)) + (var (k cps var)))) + (else + (with-cps cps + (letv arg rest) + (let$ body (k arg)) + (letk karg ($kargs ('arg 'rest) (arg rest) ,body)) + (letk kreceive ($kreceive '(arg) 'rest karg)) + ($ (convert exp kreceive subst)))))) + ;; (exp ...) ((v-name ...) -> term) -> term + (define (convert-args cps exps k) + (match exps + (() (k cps '())) + ((exp . exps) + (convert-arg cps exp + (lambda (cps name) + (convert-args cps exps + (lambda (cps names) + (k cps (cons name names))))))))) + (define (box-bound-var cps name sym body) + (match (hashq-ref subst sym) + ((orig-var subst-var #t) + (with-cps cps + (letk k ($kargs (name) (subst-var) ,body)) + (build-term ($continue k #f ($primcall 'box (orig-var)))))) + (else + (with-cps cps body)))) + (define (box-bound-vars cps names syms body) + (match (vector names syms) + (#((name . names) (sym . syms)) + (with-cps cps + (let$ body (box-bound-var name sym body)) + ($ (box-bound-vars names syms body)))) + (#(() ()) (with-cps cps body)))) + (define (bound-var sym) + (match (hashq-ref subst sym) + ((var . _) var) + ((? exact-integer? var) var))) + + (match exp + (($ src name sym) + (with-cps cps + (rewrite-term (hashq-ref subst sym) + ((orig-var box #t) ($continue k src ($primcall 'box-ref (box)))) + ((orig-var subst-var #f) ($continue k src ($values (subst-var)))) + (var ($continue k src ($values (var))))))) + + (($ src) + (with-cps cps + (build-term ($continue k src ($const *unspecified*))))) + + (($ src exp) + (with-cps cps + (build-term ($continue k src ($const exp))))) + + (($ src name) + (with-cps cps + (build-term ($continue k src ($prim name))))) + + (($ fun-src meta body) + (let () + (define (convert-clauses cps body ktail) + (match body + (#f (values cps #f)) + (($ src req opt rest kw inits gensyms body alternate) + (let* ((arity (make-$arity req (or opt '()) rest + (map (match-lambda + ((kw name sym) + (list kw name (bound-var sym)))) + (if kw (cdr kw) '())) + (and kw (car kw)))) + (names (fold-formals (lambda (name sym init names) + (cons name names)) + '() + arity gensyms inits))) + (define (fold-formals* cps f seed arity gensyms inits) + (match (fold-formals + (lambda (name sym init cps+seed) + (match cps+seed + ((cps . seed) + (call-with-values (lambda () + (f cps name sym init seed)) + (lambda (cps seed) (cons cps seed)))))) + (cons cps seed) arity gensyms inits) + ((cps . seed) (values cps seed)))) + (with-cps cps + (let$ kalt (convert-clauses alternate ktail)) + (let$ body (convert body ktail subst)) + (let$ body + (fold-formals* + (lambda (cps name sym init body) + (if init + (init-default-value cps name sym subst init body) + (box-bound-var cps name sym body))) + body arity gensyms inits)) + (letk kargs ($kargs names (map bound-var gensyms) ,body)) + (letk kclause ($kclause ,arity kargs kalt)) + kclause))))) + (if (current-topbox-scope) + (with-cps cps + (letv self) + (letk ktail ($ktail)) + (let$ kclause (convert-clauses body ktail)) + (letk kfun ($kfun fun-src meta self ktail kclause)) + (build-term ($continue k fun-src ($fun kfun)))) + (let ((scope-id (fresh-scope-id))) + (with-cps cps + (let$ body ((lambda (cps) + (parameterize ((current-topbox-scope scope-id)) + (convert cps exp k subst))))) + (letk kscope ($kargs () () ,body)) + ($ (capture-toplevel-scope fun-src scope-id kscope))))))) + + (($ src mod name public?) + (module-box + cps src mod name public? #t + (lambda (cps box) + (with-cps cps + (build-term ($continue k src ($primcall 'box-ref (box)))))))) + + (($ src mod name public? exp) + (convert-arg cps exp + (lambda (val) + (module-box + cps src mod name public? #t + (lambda (cps box) + (with-cps cps + (build-term + ($continue k src ($primcall 'box-set! (box val)))))))))) + + (($ src name) + (toplevel-box + cps src name #t + (lambda (cps box) + (with-cps cps + (build-term ($continue k src ($primcall 'box-ref (box)))))))) + + (($ src name exp) + (convert-arg cps exp + (lambda (cps val) + (toplevel-box + cps src name #f + (lambda (cps box) + (with-cps cps + (build-term + ($continue k src ($primcall 'box-set! (box val)))))))))) + + (($ src name exp) + (convert-arg cps exp + (lambda (cps val) + (with-cps cps + ($ (with-cps-constants ((name name)) + (build-term + ($continue k src ($primcall 'define! (name val)))))))))) + + (($ src proc args) + (convert-args cps (cons proc args) + (match-lambda* + ((cps (proc . args)) + (with-cps cps + (build-term ($continue k src ($call proc args)))))))) + + (($ src name args) + (cond + ((branching-primitive? name) + (convert-args cps args + (lambda (cps args) + (with-cps cps + (letk kt ($kargs () () ($continue k src ($const #t)))) + (letk kf ($kargs () () ($continue k src ($const #f)))) + (build-term ($continue kf src + ($branch kt ($primcall name args)))))))) + ((and (eq? name 'not) (match args ((_) #t) (_ #f))) + (convert-args cps args + (lambda (cps args) + (with-cps cps + (letk kt ($kargs () () ($continue k src ($const #f)))) + (letk kf ($kargs () () ($continue k src ($const #f)))) + (build-term ($continue kf src + ($branch kt ($values args)))))))) + ((and (eq? name 'list) + (and-map (match-lambda + ((or ($ ) + ($ ) + ($ ) + ($ )) #t) + (_ #f)) + args)) + ;; See note below in `canonicalize' about `vector'. The same + ;; thing applies to `list'. + (let lp ((cps cps) (args args) (k k)) + (match args + (() + (with-cps cps + (build-term ($continue k src ($const '()))))) + ((arg . args) + (with-cps cps + (letv tail) + (let$ body (convert-arg arg + (lambda (cps head) + (with-cps cps + (build-term ($continue k src + ($primcall 'cons (head tail)))))))) + (letk ktail ($kargs ('tail) (tail) ,body)) + ($ (lp args ktail))))))) + (else + (convert-args cps args + (lambda (cps args) + (with-cps cps + (build-term ($continue k src ($primcall name args))))))))) + + ;; Prompts with inline handlers. + (($ src escape-only? tag body + ($ hsrc hmeta + ($ _ hreq #f hrest #f () hsyms hbody #f))) + ;; Handler: + ;; khargs: check args returned to handler, -> khbody + ;; khbody: the handler, -> k + ;; + ;; Post-body: + ;; krest: collect return vals from body to list, -> kpop + ;; kpop: pop the prompt, -> kprim + ;; kprim: load the values primitive, -> kret + ;; kret: (apply values rvals), -> k + ;; + ;; Escape prompts evaluate the body with the continuation of krest. + ;; Otherwise we do a no-inline call to body, continuing to krest. + (convert-arg cps tag + (lambda (cps tag) + (let ((hnames (append hreq (if hrest (list hrest) '()))) + (bound-vars (map bound-var hsyms))) + (define (convert-body cps khargs krest) + (if escape-only? + (with-cps cps + (let$ body (convert body krest subst)) + (letk kbody ($kargs () () ,body)) + (build-term ($continue kbody src ($prompt #t tag khargs)))) + (convert-arg cps body + (lambda (cps thunk) + (with-cps cps + (letk kbody ($kargs () () + ($continue krest (tree-il-src body) + ($primcall 'call-thunk/no-inline + (thunk))))) + (build-term ($continue kbody (tree-il-src body) + ($prompt #f tag khargs)))))))) + (with-cps cps + (letv prim vals) + (let$ hbody (convert hbody k subst)) + (let$ hbody (box-bound-vars hnames hsyms hbody)) + (letk khbody ($kargs hnames bound-vars ,hbody)) + (letk khargs ($kreceive hreq hrest khbody)) + (letk kprim ($kargs ('prim) (prim) + ($continue k src ($primcall 'apply (prim vals))))) + (letk kret ($kargs () () + ($continue kprim src ($prim 'values)))) + (letk kpop ($kargs ('rest) (vals) + ($continue kret src ($primcall 'unwind ())))) + ;; FIXME: Attach hsrc to $kreceive. + (letk krest ($kreceive '() 'rest kpop)) + ($ (convert-body khargs krest))))))) + + (($ src tag args ($ _ ())) + (convert-args cps (cons tag args) + (lambda (cps args*) + (with-cps cps + (build-term + ($continue k src ($primcall 'abort-to-prompt args*))))))) + + (($ src tag args tail) + (convert-args cps + (append (list (make-primitive-ref #f 'abort-to-prompt) tag) + args + (list tail)) + (lambda (cps args*) + (with-cps cps + (build-term ($continue k src ($primcall 'apply args*))))))) + + (($ src test consequent alternate) + (define (convert-test cps kt kf) + (match test + (($ src (? branching-primitive? name) args) + (convert-args cps args + (lambda (cps args) + (with-cps cps + (build-term ($continue kf src + ($branch kt ($primcall name args)))))))) + (_ (convert-arg cps test + (lambda (cps test) + (with-cps cps + (build-term ($continue kf src + ($branch kt ($values (test))))))))))) + (with-cps cps + (let$ t (convert consequent k subst)) + (let$ f (convert alternate k subst)) + (letk kt ($kargs () () ,t)) + (letk kf ($kargs () () ,f)) + ($ (convert-test kt kf)))) + + (($ src name gensym exp) + (convert-arg cps exp + (lambda (cps exp) + (match (hashq-ref subst gensym) + ((orig-var box #t) + (with-cps cps + (build-term + ($continue k src ($primcall 'box-set! (box exp)))))))))) + + (($ src head tail) + (with-cps cps + (let$ tail (convert tail k subst)) + (letv vals) + (letk kseq ($kargs ('vals) (vals) ,tail)) + (letk kreceive ($kreceive '() 'vals kseq)) + ($ (convert head kreceive subst)))) + + (($ src names syms vals body) + (let lp ((cps cps) (names names) (syms syms) (vals vals)) + (match (list names syms vals) + ((() () ()) (convert cps body k subst)) + (((name . names) (sym . syms) (val . vals)) + (with-cps cps + (let$ body (lp names syms vals)) + (let$ body (box-bound-var name sym body)) + (letv rest) + (letk klet ($kargs (name 'rest) ((bound-var sym) rest) ,body)) + (letk kreceive ($kreceive (list name) 'rest klet)) + ($ (convert val kreceive subst))))))) + + (($ src names gensyms funs body) + ;; Some letrecs can be contified; that happens later. + (define (convert-funs cps funs) + (match funs + (() + (with-cps cps '())) + ((fun . funs) + (with-cps cps + (let$ fun (convert fun k subst)) + (let$ funs (convert-funs funs)) + (cons (match fun + (($ $continue _ _ (and fun ($ $fun))) + fun)) + funs))))) + (if (current-topbox-scope) + (let ((vars (map bound-var gensyms))) + (with-cps cps + (let$ body (convert body k subst)) + (letk krec ($kargs names vars ,body)) + (let$ funs (convert-funs funs)) + (build-term ($continue krec src ($rec names vars funs))))) + (let ((scope-id (fresh-scope-id))) + (with-cps cps + (let$ body ((lambda (cps) + (parameterize ((current-topbox-scope scope-id)) + (convert cps exp k subst))))) + (letk kscope ($kargs () () ,body)) + ($ (capture-toplevel-scope src scope-id kscope)))))) + + (($ src exp + ($ lsrc req #f rest #f () syms body #f)) + (let ((names (append req (if rest (list rest) '()))) + (bound-vars (map bound-var syms))) + (with-cps cps + (let$ body (convert body k subst)) + (let$ body (box-bound-vars names syms body)) + (letk kargs ($kargs names bound-vars ,body)) + (letk kreceive ($kreceive req rest kargs)) + ($ (convert exp kreceive subst))))))) + +(define (build-subst exp) + "Compute a mapping from lexical gensyms to CPS variable indexes. CPS +uses small integers to identify variables, instead of gensyms. + +This subst table serves an additional purpose of mapping variables to +replacements. The usual reason to replace one variable by another is +assignment conversion. Default argument values is the other reason. + +The result is a hash table mapping symbols to substitutions (in the case +that a variable is substituted) or to indexes. A substitution is a list +of the form: + + (ORIG-INDEX SUBST-INDEX BOXED?) + +A true value for BOXED? indicates that the replacement variable is in a +box. If a variable is not substituted, the mapped value is a small +integer." + (let ((table (make-hash-table))) + (define (down exp) + (match exp + (($ src name sym exp) + (match (hashq-ref table sym) + ((orig subst #t) #t) + ((orig subst #f) (hashq-set! table sym (list orig subst #t))) + ((? number? idx) (hashq-set! table sym (list idx (fresh-var) #t))))) + (($ src req opt rest kw inits gensyms body alternate) + (fold-formals (lambda (name sym init seed) + (hashq-set! table sym + (if init + (list (fresh-var) (fresh-var) #f) + (fresh-var)))) + #f + (make-$arity req (or opt '()) rest + (if kw (cdr kw) '()) (and kw (car kw))) + gensyms + inits)) + (($ src names gensyms vals body) + (for-each (lambda (sym) + (hashq-set! table sym (fresh-var))) + gensyms)) + (($ src names gensyms vals body) + (for-each (lambda (sym) + (hashq-set! table sym (fresh-var))) + gensyms)) + (_ #t)) + (values)) + (define (up exp) (values)) + ((make-tree-il-folder) exp down up) + table)) + +(define (cps-convert/thunk exp) + (parameterize ((label-counter 0) + (var-counter 0) + (scope-counter 0)) + (with-cps empty-intmap + (letv init) + ;; Allocate kinit first so that we know that the entry point's + ;; label is zero. This simplifies data flow in the compiler if we + ;; can just pass around the program as a map of continuations and + ;; know that the entry point is label 0. + (letk kinit ,#f) + (letk ktail ($ktail)) + (let$ body (convert exp ktail (build-subst exp))) + (letk kbody ($kargs () () ,body)) + (letk kclause ($kclause ('() '() #f '() #f) kbody #f)) + ($ ((lambda (cps) + (let ((init (build-cont + ($kfun (tree-il-src exp) '() init ktail kclause)))) + (with-cps (intmap-add cps kinit init) + kinit)))))))) + +(define *comp-module* (make-fluid)) + +(define %warning-passes + `((unused-variable . ,unused-variable-analysis) + (unused-toplevel . ,unused-toplevel-analysis) + (unbound-variable . ,unbound-variable-analysis) + (arity-mismatch . ,arity-analysis) + (format . ,format-analysis))) + +(define (optimize-tree-il x e opts) + (define warnings + (or (and=> (memq #:warnings opts) cadr) + '())) + + ;; Go through the warning passes. + (let ((analyses (filter-map (lambda (kind) + (assoc-ref %warning-passes kind)) + warnings))) + (analyze-tree analyses x e)) + + (optimize x e opts)) + +(define (canonicalize exp) + (post-order + (lambda (exp) + (match exp + (($ src 'vector + (and args + ((or ($ ) ($ ) ($ ) ($ )) + ...))) + ;; Some macros generate calls to "vector" with like 300 + ;; arguments. Since we eventually compile to make-vector and + ;; vector-set!, it reduces live variable pressure to allocate the + ;; vector first, then set values as they are produced, if we can + ;; prove that no value can capture the continuation. (More on + ;; that caveat here: + ;; http://wingolog.org/archives/2013/11/02/scheme-quiz-time). + ;; + ;; Normally we would do this transformation in the compiler, but + ;; it's quite tricky there and quite easy here, so hold your nose + ;; while we drop some smelly code. + (let ((len (length args)) + (v (gensym "v "))) + (make-let src + (list 'v) + (list v) + (list (make-primcall src 'make-vector + (list (make-const #f len) + (make-const #f #f)))) + (fold (lambda (arg n tail) + (make-seq + src + (make-primcall + src 'vector-set! + (list (make-lexical-ref src 'v v) + (make-const #f n) + arg)) + tail)) + (make-lexical-ref src 'v v) + (reverse args) (reverse (iota len)))))) + + (($ src 'struct-set! (struct index value)) + ;; Unhappily, and undocumentedly, struct-set! returns the value + ;; that was set. There is code that relies on this. Hackety + ;; hack... + (let ((v (gensym "v "))) + (make-let src + (list 'v) + (list v) + (list value) + (make-seq src + (make-primcall src 'struct-set! + (list struct + index + (make-lexical-ref src 'v v))) + (make-lexical-ref src 'v v))))) + + (($ src escape-only? tag body + ($ hsrc hmeta + ($ _ hreq #f hrest #f () hsyms hbody #f))) + exp) + + ;; Eta-convert prompts without inline handlers. + (($ src escape-only? tag body handler) + (let ((h (gensym "h ")) + (args (gensym "args "))) + (make-let + src (list 'h) (list h) (list handler) + (make-seq + src + (make-conditional + src + (make-primcall src 'procedure? (list (make-lexical-ref #f 'h h))) + (make-void src) + (make-primcall + src 'scm-error + (list + (make-const #f 'wrong-type-arg) + (make-const #f "call-with-prompt") + (make-const #f "Wrong type (expecting procedure): ~S") + (make-primcall #f 'list (list (make-lexical-ref #f 'h h))) + (make-primcall #f 'list (list (make-lexical-ref #f 'h h)))))) + (make-prompt + src escape-only? tag body + (make-lambda + src '() + (make-lambda-case + src '() #f 'args #f '() (list args) + (make-primcall + src 'apply + (list (make-lexical-ref #f 'h h) + (make-lexical-ref #f 'args args))) + #f))))))) + (_ exp))) + exp)) + +(define (compile-cps exp env opts) + (values (cps-convert/thunk + (canonicalize (optimize-tree-il exp env opts))) + env + env)) + +;;; Local Variables: +;;; eval: (put 'with-cps 'scheme-indent-function 2) +;;; eval: (put 'with-cps-constants 'scheme-indent-function 1) +;;; eval: (put 'convert-arg 'scheme-indent-function 2) +;;; eval: (put 'convert-args 'scheme-indent-function 2) +;;; End: