|
12 | 12 | @(define codeblock-include (make-codeblock-include #'h)) |
13 | 13 |
|
14 | 14 | @(for-each (λ (f) (ev `(require (file ,(path->string (build-path notes "loot" f)))))) |
15 | | - '("interp.rkt" "compile.rkt" #;"asm/interp.rkt" #;"asm/printer.rkt")) |
| 15 | + '("interp.rkt" "compile.rkt" "syntax.rkt" "asm/interp.rkt" "asm/printer.rkt")) |
16 | 16 |
|
17 | 17 | @title[#:tag "Loot"]{Loot: lambda the ultimate} |
18 | 18 |
|
@@ -478,3 +478,350 @@ And we get the same results: |
478 | 478 |
|
479 | 479 | @section[#:tag-prefix "loot"]{Compiling Loot} |
480 | 480 |
|
| 481 | +Compiling a @racket[λ]-expression will involve generating two |
| 482 | +different chunks of instructions: |
| 483 | + |
| 484 | +@itemlist[ |
| 485 | + |
| 486 | +@item{one to implement the function, i.e. the code to be executed when |
| 487 | +the function created by the @racket[λ]-expression is called, and} |
| 488 | + |
| 489 | +@item{one to create a closure, i.e. to capture the environment at the |
| 490 | +point the @racket[λ]-expression is evaluated.} |
| 491 | + |
| 492 | +] |
| 493 | + |
| 494 | +@section[#:tag-prefix "loot"]{Compiling Function Definitions} |
| 495 | + |
| 496 | +The first part closely follows the appoach of defining a function |
| 497 | +definition @racket[(define (_f _x ...) _e)] from our previous compilers. |
| 498 | + |
| 499 | +Ther are two important differences from the past though: |
| 500 | + |
| 501 | +@itemlist[ |
| 502 | + |
| 503 | +@item{@racket[λ]-expressions don't have a name, and} |
| 504 | + |
| 505 | +@item{the body of the @racket[λ]-expression may reference variables |
| 506 | +bound outside of the @racket[λ]-expression.} |
| 507 | + |
| 508 | +] |
| 509 | + |
| 510 | +To deal with the first issue, we first make a pass over the program |
| 511 | +inserting computed names for each @racket[λ]-expression. |
| 512 | + |
| 513 | +To accomodate this, we will introduce the following data type for |
| 514 | +``labelled'' expressions: |
| 515 | + |
| 516 | +@#reader scribble/comment-reader |
| 517 | +(racketblock |
| 518 | +;; type LExpr = |
| 519 | +;; .... |
| 520 | +;; | `(λ ,Formals ',Symbol ,Expr) |
| 521 | +) |
| 522 | + |
| 523 | +An @tt{LExpr} is just like a @tt{Expr} except that |
| 524 | +@racket[λ]-expressions have the form like @racket[(λ (x) 'fred (+ x |
| 525 | +x))]. The symbol @racket['fred] here is used to give a name to the |
| 526 | +@racket[λ]-expression. The use of @racket[quote] is so that |
| 527 | +@tt{LExprs} are still a valid subset of Racket expressions. |
| 528 | + |
| 529 | +The first step of the compiler will be to label every |
| 530 | +@racket[λ]-expression using the following function: |
| 531 | + |
| 532 | +@#reader scribble/comment-reader |
| 533 | +(racketblock |
| 534 | +;; Expr -> LExpr |
| 535 | +(define (label-λ e) |
| 536 | + (match e |
| 537 | + [(? symbol? x) x] |
| 538 | + [(? imm? i) i] |
| 539 | + [`(box ,e0) `(box ,(label-λ e0))] |
| 540 | + [`(unbox ,e0) `(unbox ,(label-λ e0))] |
| 541 | + [`(cons ,e0 ,e1) `(cons ,(label-λ e0) ,(label-λ e1))] |
| 542 | + [`(car ,e0) `(car ,(label-λ e0))] |
| 543 | + [`(cdr ,e0) `(cdr ,(label-λ e0))] |
| 544 | + [`(add1 ,e0) `(add1 ,(label-λ e0))] |
| 545 | + [`(sub1 ,e0) `(sub1 ,(label-λ e0))] |
| 546 | + [`(zero? ,e0) `(zero? ,(label-λ e0))] |
| 547 | + [`(empty? ,e0) `(empty? ,(label-λ e0))] |
| 548 | + [`(if ,e0 ,e1 ,e2) `(if ,(label-λ e0) ,(label-λ e1) ,(label-λ e2))] |
| 549 | + [`(+ ,e0 ,e1) `(+ ,(label-λ e0) ,(label-λ e1))] |
| 550 | + [`(let ((,x ,e0)) ,e1) `(let ((,x ,(label-λ e0))) ,(label-λ e1))] |
| 551 | + [`(λ ,xs ,e0) `(λ ,xs ',(gensym) ,(label-λ e0))] |
| 552 | + [`(,e . ,es) `(,(label-λ e) ,@(map label-λ es))])) |
| 553 | +) |
| 554 | + |
| 555 | +Here it is at work: |
| 556 | + |
| 557 | +@ex[ |
| 558 | +(label-λ |
| 559 | + '(λ (t) |
| 560 | + ((λ (f) (t (λ (z) ((f f) z)))) |
| 561 | + (λ (f) (t (λ (z) ((f f) z))))))) |
| 562 | +] |
| 563 | + |
| 564 | +Now turning to the second issue--@racket[λ]-expression may reference |
| 565 | +variables bound outside of the expression---let's consider how to |
| 566 | +compile something like @racket[(λ (x) z)]? |
| 567 | + |
| 568 | +There are many possible solutions, but perhaps the simplest is to |
| 569 | +compile this as a function that takes @emph{two} arguments, |
| 570 | +i.e. compile it as if it were: @racket[(λ (x z) z)]. The idea is that |
| 571 | +a @racket[λ]-expression defines a function of both explicit arguments |
| 572 | +(the parameters) and implicit arguments (the free variables of the |
| 573 | +@racket[λ]-expression). |
| 574 | + |
| 575 | +This will have to work in concert with closure creation and function |
| 576 | +calls. When the @racket[λ]-expression is evaluated, a closure will be |
| 577 | +created storing the value of @racket[z]. When the function is |
| 578 | +applied, the caller will need to retrieve that value and place it as |
| 579 | +the second argument on stack before calling the function's code. |
| 580 | + |
| 581 | +To implement this, we will need to compute the free variables, which |
| 582 | +we do with the following function: |
| 583 | + |
| 584 | +@#reader scribble/comment-reader |
| 585 | +(racketblock |
| 586 | +;; LExpr -> (Listof Variable) |
| 587 | +(define (fvs e) |
| 588 | + (define (fvs e) |
| 589 | + (match e |
| 590 | + [(? symbol? x) (list x)] |
| 591 | + [(? imm? i) '()] |
| 592 | + [`(box ,e0) (fvs e0)] |
| 593 | + [`(unbox ,e0) (fvs e0)] |
| 594 | + [`(cons ,e0 ,e1) (append (fvs e0) (fvs e1))] |
| 595 | + [`(car ,e0) (fvs e0)] |
| 596 | + [`(cdr ,e0) (fvs e0)] |
| 597 | + [`(add1 ,e0) (fvs e0)] |
| 598 | + [`(sub1 ,e0) (fvs e0)] |
| 599 | + [`(zero? ,e0) (fvs e0)] |
| 600 | + [`(empty? ,e0) (fvs e0)] |
| 601 | + [`(if ,e0 ,e1 ,e2) (append (fvs e0) (fvs e1) (fvs e2))] |
| 602 | + [`(+ ,e0 ,e1) (append (fvs e0) (fvs e1))] |
| 603 | + [`(let ((,x ,e0)) ,e1) (append (fvs e0) (remq* (list x) (fvs e1)))] |
| 604 | + [`(λ ,xs ,l ,e0) (remq* xs (fvs e0))] |
| 605 | + [`(,e . ,es) (append (fvs e) (apply append (map fvs es)))])) |
| 606 | + (remove-duplicates (fvs e))) |
| 607 | +) |
| 608 | + |
| 609 | +We can now write the function that compiles a labelled |
| 610 | +@racket[λ]-expression into a function in assembly: |
| 611 | + |
| 612 | +@#reader scribble/comment-reader |
| 613 | +(racketblock |
| 614 | +;; Lambda -> Asm |
| 615 | +(define (compile-λ-definition l) |
| 616 | + (match l |
| 617 | + [`(λ ,xs ',f ,e0) |
| 618 | + (let ((c0 (compile-tail-e e0 (reverse (append xs (fvs l)))))) |
| 619 | + `(,f |
| 620 | + ,@c0 |
| 621 | + ret))])) |
| 622 | +) |
| 623 | + |
| 624 | +Here's what's emitted for a @racket[λ]-expression with a free variable: |
| 625 | +@ex[ |
| 626 | +(compile-λ-definition '(λ (x) 'f z)) |
| 627 | +] |
| 628 | + |
| 629 | +Notice that it's identical to a @racket[λ]-expression with an added |
| 630 | +parameter and no free variables: |
| 631 | +@ex[ |
| 632 | +(compile-λ-definition '(λ (x z) 'f z)) |
| 633 | +] |
| 634 | + |
| 635 | +The compiler will need to generate one such function for each |
| 636 | +@racket[λ]-expression in the program. So we use a helper function for |
| 637 | +extracting all the @racket[λ]-expressions and another for compiling |
| 638 | +each of them: |
| 639 | + |
| 640 | +@#reader scribble/comment-reader |
| 641 | +(racketblock |
| 642 | +;; LExpr -> (Listof LExpr) |
| 643 | +;; Extract all the lambda expressions |
| 644 | +(define (λs e) |
| 645 | + (match e |
| 646 | + [(? symbol? x) '()] |
| 647 | + [(? imm? i) '()] |
| 648 | + [`(box ,e0) (λs e0)] |
| 649 | + [`(unbox ,e0) (λs e0)] |
| 650 | + [`(cons ,e0 ,e1) (append (λs e0) (λs e1))] |
| 651 | + [`(car ,e0) (λs e0)] |
| 652 | + [`(cdr ,e0) (λs e0)] |
| 653 | + [`(add1 ,e0) (λs e0)] |
| 654 | + [`(sub1 ,e0) (λs e0)] |
| 655 | + [`(zero? ,e0) (λs e0)] |
| 656 | + [`(empty? ,e0) (λs e0)] |
| 657 | + [`(if ,e0 ,e1 ,e2) (append (λs e0) (λs e1) (λs e2))] |
| 658 | + [`(+ ,e0 ,e1) (append (λs e0) (λs e1))] |
| 659 | + [`(let ((,x ,e0)) ,e1) (append (λs e0) (λs e1))] |
| 660 | + [`(λ ,xs ,l ,e0) (cons e (λs e0))] |
| 661 | + [`(,e . ,es) (append (λs e) (apply append (map λs es)))])) |
| 662 | + |
| 663 | +;; (Listof Lambda) -> Asm |
| 664 | +(define (compile-λ-definitions ls) |
| 665 | + (apply append (map compile-λ-definition ls))) |
| 666 | +) |
| 667 | + |
| 668 | + |
| 669 | +The top-level @racket[compile] function now labels inserts labels and |
| 670 | +compiles all the @racket[λ]-expressions to functions: |
| 671 | + |
| 672 | +@#reader scribble/comment-reader |
| 673 | +(racketblock |
| 674 | +;; Expr -> Asm |
| 675 | +(define (compile e) |
| 676 | + (let ((le (label-λ e))) |
| 677 | + `(entry |
| 678 | + ,@(compile-tail-e le '()) |
| 679 | + ret |
| 680 | + ,@(compile-λ-definitions (λs le)) |
| 681 | + err |
| 682 | + (push rbp) |
| 683 | + (call error) |
| 684 | + ret))) |
| 685 | +) |
| 686 | + |
| 687 | +What remains is the issue of compiling @racket[λ]-expressions to code |
| 688 | +to create a closure. |
| 689 | + |
| 690 | +@section[#:tag-prefix "loot"]{Save the Environment: Create a Closure!} |
| 691 | + |
| 692 | +We've already seen how to create a reference to a function pointer, |
| 693 | +enabling functions to be first-class values that can be passed around, |
| 694 | +returned from other functions, stored in data structures, etc. The |
| 695 | +basic idea was to allocate a location in memory and save the address |
| 696 | +of a function label there. |
| 697 | + |
| 698 | +A closure is just this, plus the environment that needs to be restored |
| 699 | +with the function is called. So representing a closure is fairly |
| 700 | +straightforward: we will allocate a location in memory and save the |
| 701 | +function label, plus each value that is needed from the environment. |
| 702 | +In order to keep track of how many values there are, we'll also store |
| 703 | +the length of the environment. |
| 704 | + |
| 705 | +Here's the function for emitting closure construction code: |
| 706 | + |
| 707 | +@#reader scribble/comment-reader |
| 708 | +(racketblock |
| 709 | +;; (Listof Variable) Label Expr CEnv -> Asm |
| 710 | +(define (compile-λ xs f e0 c) |
| 711 | + (let ((fvs (fvs `(λ ,xs ',f ,e0)))) |
| 712 | + `(;; Save label address |
| 713 | + (lea rax (offset ,f 0)) |
| 714 | + (mov (offset rdi 0) rax) |
| 715 | + |
| 716 | + ;; Save the environment |
| 717 | + (mov rax ,(length fvs)) |
| 718 | + (mov (offset rdi 1) rax) |
| 719 | + ,@(copy-env-to-heap fvs c 2) |
| 720 | + |
| 721 | + ;; Return a pointer to the closure |
| 722 | + (mov rax rdi) |
| 723 | + (or rax ,type-proc) |
| 724 | + (add rdi ,(* 8 (+ 2 (length fvs))))))) |
| 725 | +) |
| 726 | + |
| 727 | +Compared the previous code we say for function pointer references, the |
| 728 | +only difference is the code to store the length and value of the free |
| 729 | +variables of the @racket[λ]-expression. Also: the amount of memory |
| 730 | +allocated is no longer just a single cell, but depends on the number |
| 731 | +of free variables being closed over. |
| 732 | + |
| 733 | +The @racket[copy-env-to-heap] function generates instructions for |
| 734 | +dereferencing variables and copying them to the appropriate memory |
| 735 | +location where the closure is stored: |
| 736 | + |
| 737 | +@#reader scribble/comment-reader |
| 738 | +(racketblock |
| 739 | +;; (Listof Variable) CEnv Natural -> Asm |
| 740 | +(define (copy-env-to-heap fvs c i) |
| 741 | + (match fvs |
| 742 | + ['() '()] |
| 743 | + [(cons x fvs) |
| 744 | + `((mov rax (offset rsp ,(- (add1 (lookup x c))))) |
| 745 | + (mov (offset rdi ,i) rax) |
| 746 | + ,@(copy-env-to-heap fvs c (add1 i)))])) |
| 747 | +) |
| 748 | + |
| 749 | +That's all there is to closure construction! |
| 750 | + |
| 751 | +@section[#:tag-prefix "loot"]{Calling Functions} |
| 752 | + |
| 753 | +The last final peice of the puzzle is making function calls and |
| 754 | +closures work together. Remember that a @racket[λ]-expression is |
| 755 | +compiled into a function that expects two sets of arguments on the |
| 756 | +stack: the first are the explicit arguments that given at the call |
| 757 | +site; the other arguments are the implicit arguments corresponding to |
| 758 | +free variables the @racket[λ]-expression being called. The value of |
| 759 | +these arguments are given by the environment saved in the closure of |
| 760 | +the @racket[λ]-expressions. |
| 761 | + |
| 762 | +So the code generated for a function call needs to manage running each |
| 763 | +subexpression, the first of which should evaluate to a function (a |
| 764 | +pointer to a closure). The arguments are saved on the stack, and then |
| 765 | +the values stored in the environment part of the closure need to be |
| 766 | +copied from the heap to the stack: |
| 767 | + |
| 768 | +@#reader scribble/comment-reader |
| 769 | +(racketblock |
| 770 | +;; LExpr (Listof LExpr) CEnv -> Asm |
| 771 | +(define (compile-call e0 es c) |
| 772 | + (let ((cs (compile-es es (cons #f c))) |
| 773 | + (c0 (compile-e e0 c)) |
| 774 | + (i (- (add1 (length c)))) |
| 775 | + (stack-size (* 8 (length c)))) |
| 776 | + `(,@c0 |
| 777 | + (mov (offset rsp ,i) rax) |
| 778 | + ,@cs |
| 779 | + (mov rax (offset rsp ,i)) |
| 780 | + ,@assert-proc |
| 781 | + (xor rax ,type-proc) |
| 782 | + (sub rsp ,stack-size) |
| 783 | + ,@(copy-closure-env-to-stack (add1 (length es))) |
| 784 | + (call (offset rax 0)) |
| 785 | + (add rsp ,stack-size)))) |
| 786 | +) |
| 787 | + |
| 788 | +The only new bit is the use of @racket[copy-closure-env-to-stack]. |
| 789 | +Unlike the closure construction code, in which we statically know what |
| 790 | +and how many variables to save in a closure, we must dynamically |
| 791 | +loop over the environment to move values to the stack: |
| 792 | + |
| 793 | +@#reader scribble/comment-reader |
| 794 | +(racketblock |
| 795 | +;; Natural -> Asm |
| 796 | +;; Copy closure's (in rax) env to stack skipping n spots |
| 797 | +(define (copy-closure-env-to-stack n) |
| 798 | + (let ((copy-loop (gensym 'copy_closure)) |
| 799 | + (copy-done (gensym 'copy_done))) |
| 800 | + `((mov r8 (offset rax 1)) ; length |
| 801 | + (mov r9 rax) |
| 802 | + (add r9 16) ; start of env |
| 803 | + (mov rcx rsp) ; start of stack |
| 804 | + (add rcx ,(- (* 8 (add1 n)))) |
| 805 | + ,copy-loop |
| 806 | + (cmp r8 0) |
| 807 | + (je ,copy-done) |
| 808 | + (mov rbx (offset r9 0)) |
| 809 | + (mov (offset rcx 0) rbx) |
| 810 | + (sub r8 1) |
| 811 | + (add r9 8) |
| 812 | + (sub rcx 8) |
| 813 | + (jmp ,copy-loop) |
| 814 | + ,copy-done))) |
| 815 | +) |
| 816 | + |
| 817 | +Let's try it out: |
| 818 | + |
| 819 | +@ex[ |
| 820 | +(asm-interp (compile '((let ((x 8)) (λ (y) x)) 2))) |
| 821 | +(asm-interp (compile '(((λ (x) (λ (y) x)) 8) 2))) |
| 822 | +(asm-interp (compile '((λ (f) (f (f 0))) (λ (x) (add1 x))))) |
| 823 | +] |
| 824 | + |
| 825 | +And here's the complete compiler, including tail calls: |
| 826 | + |
| 827 | +@codeblock-include["loot/compile.rkt"] |
0 commit comments