@@ -637,6 +637,7 @@ The compiler will need to generate one such function for each
637637extracting all the @racket[λ ]-expressions and another for compiling
638638each of them:
639639
640+
640641@#reader scribble/comment-reader
641642(racketblock
642643;; LExpr -> (Listof LExpr)
@@ -708,20 +709,21 @@ Here's the function for emitting closure construction code:
708709(racketblock
709710;; (Listof Variable) Label Expr CEnv -> Asm
710711(define (compile-λ xs f e0 c)
711- (let ((fvs (fvs `(λ ,xs ',f ,e0))))
712- `(;; Save label address
713- (lea rax (offset ,f 0 ))
714- (mov (offset rdi 0 ) rax)
715-
716- ;; Save the environment
717- (mov rax ,(length fvs))
718- (mov (offset rdi 1 ) rax)
719- ,@(copy-env-to-heap fvs c 2 )
712+ `(;; Save label address
713+ (lea rax (offset ,f 0 ))
714+ (mov (offset rdi 0 ) rax)
715+
716+ ;; Save the environment
717+ (mov r8 ,(length ys))
718+ (mov (offset rdi 1 ) r8)
719+ (mov r9 rdi)
720+ (add r9 16 )
721+ ,@(copy-env-to-heap ys c 0 )
720722
721- ;; Return a pointer to the closure
722- (mov rax rdi)
723- (or rax ,type-proc)
724- (add rdi ,(* 8 (+ 2 (length fvs) ))))))
723+ ;; Return a pointer to the closure
724+ (mov rax rdi)
725+ (or rax ,type-proc)
726+ (add rdi ,(* 8 (+ 2 (length ys ))))))
725727)
726728
727729Compared the previous code we say for function pointer references, the
@@ -737,12 +739,13 @@ location where the closure is stored:
737739@#reader scribble/comment-reader
738740(racketblock
739741;; (Listof Variable) CEnv Natural -> Asm
742+ ;; Pointer to beginning of environment in r9
740743(define (copy-env-to-heap fvs c i)
741744 (match fvs
742745 ['() '() ]
743746 [(cons x fvs)
744- `((mov rax (offset rsp ,(- (add1 (lookup x c)))))
745- (mov (offset rdi ,i) rax )
747+ `((mov r8 (offset rsp ,(- (add1 (lookup x c)))))
748+ (mov (offset r9 ,i) r8 )
746749 ,@(copy-env-to-heap fvs c (add1 i)))]))
747750)
748751
@@ -822,6 +825,199 @@ Let's try it out:
822825(asm-interp (compile '((λ (f) (f (f 0 ))) (λ (x) (add1 x)))))
823826]
824827
825- And here's the complete compiler, including tail calls:
828+ @section[#:tag-prefix "loot " ]{Recursive Functions}
829+
830+ Writing recursive programs with the Y-combinator is a bit
831+ inconvenient. Let us now add a recursive function binding construct:
832+ @racket[letrec ].
833+
834+ A @racket[letrec ]-expression has a shape like a
835+ @racket[let ]-expression, but variables are bound in both the body
836+ @emph{and } the right-hand-side of the @racket[letrec ]. To keep
837+ matters simple, we will assume the right-hand-sides of a
838+ @racket[letrec ] are all @racket[λ ]-expressions. (Racket eases this
839+ restriction, but it significantly complicates compilation.)
840+
841+ So for example, writing the @racket[even?] and @racket[odd?] functions
842+ using @racket[letrec ] looks like:
843+
844+ @ex[
845+ (letrec ((even?
846+ (λ (x)
847+ (if (zero? x)
848+ #t
849+ (odd? (sub1 x)))))
850+ (odd?
851+ (λ (x)
852+ (if (zero? x)
853+ #f
854+ (even? (sub1 x))))))
855+ (even? 10 ))
856+ ]
857+
858+
859+ To compile a @racket[letrec ]-expression, we can compile the
860+ @racket[λ ]-expression as functions just as before. Notice that the
861+ recursive (or mutually recursive) occurrence will be considered a free
862+ variable within the @racket[λ ]-expression, so just like any other free
863+ variable, the closure creation should capture the value of this
864+ binding.
865+
866+ We need to extend the syntax functions for computing free variables,
867+ extracting @racket[λ ]-expressions, and so on. All of this is
868+ straightforward.
869+
870+ The key complication to compiling a @racket[letrec ]-expression is that
871+ the name of a function should be bound---to itself---within the body
872+ of the function. The key insight into achieving this is to first
873+ allocate closures, but to delay the actual population of the closures'
874+ environments.
875+
876+ The way that compiling a @racket[letrec ]-expression works is roughly:
877+
878+ @itemlist[
879+
880+ @item{allocate a closure for each of the right-hand-side
881+ @racket[λ ]-expressions, but do not copy the (relevant parts of the)
882+ environment in to closures (yet),}
883+
884+ @item{push each of these closures on to the stack (effectively binding
885+ the left-hand-sides to the unitialized closures),}
886+
887+ @item{now that the names are bound, we can populate the closures, and
888+ references to any of the @racket[letrec ]-bound variables will be
889+ captured correctly,}
890+
891+ @item{then compile the body in an environment that includes all of the
892+ @racket[letrec ]-bound variables.}
893+
894+ ]
895+
896+ The @racket[compile-letrec] function takes a list of variables to
897+ bind, the right-hand-side @racket[λ ]-expressions, body, and
898+ compile-time environment. It relies on three helper functions to
899+ handle the tasks listed above:
900+
901+ @#reader scribble/comment-reader
902+ (racketblock
903+ ;; (Listof Variable) (Listof Lambda) Expr CEnv -> Asm
904+ (define (compile-letrec fs ls e c)
905+ (let ((c0 (compile-letrec-λs ls c))
906+ (c1 (compile-letrec-init fs ls (append fs c)))
907+ (c2 (compile-e e (append fs c))))
908+ `(,@c0
909+ ,@c1
910+ ,@c2)))
911+ )
912+
913+ The first two tasks are taken care of by @racket[compile-letrec-λs],
914+ which allocates unitialized closures and pushes each on the stack.
915+
916+ @#reader scribble/comment-reader
917+ (racketblock
918+ ;; (Listof Lambda) CEnv -> Asm
919+ ;; Create a bunch of uninitialized closures and push them on the stack
920+ (define (compile-letrec-λs ls c)
921+ (match ls
922+ ['() '() ]
923+ [(cons l ls)
924+ (let ((cs (compile-letrec-λs ls (cons #f c)))
925+ (ys (fvs l)))
926+ `((lea rax (offset ,(second (third l)) 0 ))
927+ (mov (offset rdi 0 ) rax)
928+ (mov rax ,(length ys))
929+ (mov (offset rdi 1 ) rax)
930+ (mov rax rdi)
931+ (or rax ,type-proc)
932+ (add rdi ,(* 8 (+ 2 (length ys))))
933+ (mov (offset rsp ,(- (add1 (length c)))) rax)
934+ ,@cs))]))
935+ )
936+
937+ The @racket[compile-letrec-init] goes through each function and
938+ initializes its closure now that all of the function pointers are
939+ available. Finally the body is compiled in an extended environment.
940+
941+ @#reader scribble/comment-reader
942+ (racketblock
943+ ;; (Listof Variable) (Listof Lambda) CEnv -> Asm
944+ ;; Initialize closures bound to each variable in fs
945+ (define (compile-letrec-init fs ls c)
946+ (match fs
947+ ['() '() ]
948+ [(cons f fs)
949+ (let ((ys (fvs (first ls)))
950+ (cs (compile-letrec-init fs (rest ls) c)))
951+ `((mov r9 (offset rsp ,(- (add1 (lookup f c)))))
952+ (xor r9 ,type-proc)
953+ (add r9 16 ) ; move past label and length
954+ ,@(copy-env-to-heap ys c 0 )
955+ ,@cs))]))
956+ )
957+
958+ We can give a spin:
959+
960+ @ex[
961+ (asm-interp (compile '(letrec ((even?
962+ (λ (x)
963+ (if (zero? x)
964+ #t
965+ (odd? (sub1 x)))))
966+ (odd?
967+ (λ (x)
968+ (if (zero? x)
969+ #f
970+ (even? (sub1 x))))))
971+ (even? 10 ))))
972+ ]
973+
974+
975+ @section[#:tag-prefix "loot " ]{Syntactic sugar for function definitions}
976+
977+ The @racket[letrec ] form is a generlization of the
978+ @racket[(begin (define (_f _x ... ) _e) ... _e0)] form we started with
979+ when we first started looking at adding functions to the language. To
980+ fully subsume the language of @seclink["Iniquity " ]{Iniquity}, we can
981+ add this form back in to the language as syntactic sugar for
982+ @racket[letrec ], i.e. we can eliminate this form from programs by
983+ rewriting them.
984+
985+ Let @tt{Expr+} refer to programs containing @racket[(begin (define (_f
986+ _x ... ) _e) ... _e0)]. The @racket[desugar] function writes
987+ @tt{Expr+}s into @tt{Expr}s.
988+
989+ @#reader scribble/comment-reader
990+ (racketblock
991+ ;; Expr+ -> Expr
992+ (define (desugar e+)
993+ (match e+
994+ [`(begin ,@(list `(define (,fs . ,xss) ,es) ... ) ,e)
995+ `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es)
996+ ,(desugar e))]
997+ [(? symbol? x) x]
998+ [(? imm? i) i]
999+ [`(box ,e0) `(box ,(desugar e0))]
1000+ [`(unbox ,e0) `(unbox ,(desugar e0))]
1001+ [`(cons ,e0 ,e1) `(cons ,(desugar e0) ,(desugar e1))]
1002+ [`(car ,e0) `(car ,(desugar e0))]
1003+ [`(cdr ,e0) `(cdr ,(desugar e0))]
1004+ [`(add1 ,e0) `(add1 ,(desugar e0))]
1005+ [`(sub1 ,e0) `(sub1 ,(desugar e0))]
1006+ [`(zero? ,e0) `(zero? ,(desugar e0))]
1007+ [`(empty? ,e0) `(empty? ,(desugar e0))]
1008+ [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))]
1009+ [`(+ ,e0 ,e1) `(+ ,(desugar e0) ,(desugar e1))]
1010+ [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))]
1011+ [`(letrec ,bs ,e0)
1012+ `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs)
1013+ ,(desugar e0))]
1014+ [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))]
1015+ [`(,e . ,es) `(,(desugar e) ,@(map desugar es))]))
1016+ )
1017+
1018+ The compiler now just desugars before labeling and compiling expressions.
1019+
1020+ And here's the complete compiler, including tail calls, @racket[letrec ], etc.:
8261021
8271022@codeblock-include["loot/compile.rkt " ]
1023+
0 commit comments