Skip to content

Commit 09ccb90

Browse files
committed
Checkpoint self-hosting
Bunch of different steps toward self-hosting: + Rework source code to be within language supported by compiler. + Lots of work toward implementing `read`. + Simplified version of a86 library, which is compilable. + Expanded standard library. + Add libunistring for some Unicode support.
1 parent 65c786f commit 09ccb90

29 files changed

Lines changed: 1323 additions & 1055 deletions

langs/outlaw/Makefile

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# NOTES:
2+
# - You will need a static version of libunistring to link against; on Mac
3+
# ld will always choose .dylib over .a to link, so either rename or remove
4+
# the .dylib versions.
5+
16
UNAME := $(shell uname)
27
.PHONY: test
38

@@ -20,7 +25,7 @@ objs = \
2025
default: runtime.o
2126

2227
runtime.o: $(objs)
23-
ld -r $(objs) -o runtime.o
28+
ld -r $(objs) -lunistring -o runtime.o
2429

2530
%.run: %.o runtime.o
2631
gcc runtime.o $< -o $@
@@ -32,10 +37,10 @@ runtime.o: $(objs)
3237
nasm -g -f $(format) -o $@ $<
3338

3439
stdlib.s: stdlib.rkt
35-
racket -t compile-library.rkt -m stdlib.rkt > stdlib.s
40+
cat stdlib.rkt | racket -t compile-library.rkt > stdlib.s
3641

3742
%.s: %.rkt
38-
racket -t compile-file.rkt -m $< > $@
43+
cat $< | racket -t compile-stdin.rkt > $@
3944

4045
clean:
4146
rm *.o *.s *.run

langs/outlaw/a86/ast.rkt

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#lang racket
2+
(provide (all-defined-out))
3+
4+
(struct Text ())
5+
(struct Data ())
6+
7+
(struct Global (x))
8+
(struct Label (x))
9+
(struct Call (x))
10+
(struct Ret ())
11+
(struct Mov (dst src))
12+
(struct Add (dst src))
13+
(struct Sub (dst src))
14+
(struct Cmp (a1 a2))
15+
(struct Jmp (x))
16+
(struct Je (x))
17+
(struct Jne (x))
18+
(struct Jl (x))
19+
(struct Jle (x))
20+
(struct Jg (x))
21+
(struct Jge (x))
22+
(struct And (dst src))
23+
(struct Or (dst src))
24+
(struct Xor (dst src))
25+
(struct Sal (dst i))
26+
(struct Sar (dst i))
27+
(struct Push (a1))
28+
(struct Pop (a1))
29+
(struct Lea (dst x))
30+
(struct Div (den))
31+
32+
(struct Offset (r i))
33+
(struct Extern (x))
34+
35+
(struct Equ (x v))
36+
(struct Const (x))
37+
(struct Dd (x))
38+
(struct Dq (x))
39+
(struct Plus (e1 e2))
40+
41+
;; (U Instruction Asm) ... -> Asm
42+
;; Convenient for sequencing instructions or groups of instructions
43+
(define (seq . xs)
44+
(foldr (λ (x is)
45+
(if (list? x)
46+
(append x is)
47+
(cons x is)))
48+
'()
49+
xs))
50+
51+
(define (register? x)
52+
(and (memq x '(cl eax rax rbx rcx rdx rbp rsp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15))
53+
#t))
54+
55+
(define (exp? x)
56+
(or (Offset? x)
57+
(and (Plus? x)
58+
(exp? (Plus-e1 x))
59+
(exp? (Plus-e2 x)))
60+
(symbol? x)
61+
(integer? x)))
62+
63+
(define offset? Offset?)
64+
65+
(define (label? x)
66+
(and (symbol? x)
67+
(not (register? x))))
68+
69+
(define (instruction? x)
70+
(or (Text? x)
71+
(Data? x)
72+
(Global? x)
73+
(Label? x)
74+
(Extern? x)
75+
(Call? x)
76+
(Ret? x)
77+
(Mov? x)
78+
(Add? x)
79+
(Sub? x)
80+
(Cmp? x)
81+
(Jmp? x)
82+
(Je? x)
83+
(Jne? x)
84+
(Jl? x)
85+
(Jle? x)
86+
(Jg? x)
87+
(Jge? x)
88+
(And? x)
89+
(Or? x)
90+
(Xor? x)
91+
(Sal? x)
92+
(Sar? x)
93+
(Push? x)
94+
(Pop? x)
95+
(Lea? x)
96+
(Div? x)
97+
;(Comment? x)
98+
(Equ? x)
99+
(Dd? x)
100+
(Dq? x)))

langs/outlaw/a86/callback.rkt

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#lang racket
2+
;; based on racket/draw/unsafe/callback
3+
(provide guard-foreign-escape)
4+
(require ffi/unsafe
5+
ffi/unsafe/atomic)
6+
7+
(define callback-atomic? (eq? 'chez-scheme (system-type 'vm)))
8+
9+
(define-syntax-rule (guard-foreign-escape e0 e ...)
10+
(call-guarding-foreign-escape (lambda () e0 e ...)))
11+
12+
(define (call-guarding-foreign-escape thunk)
13+
(if callback-atomic?
14+
((call-with-c-return
15+
(lambda ()
16+
(with-handlers ([(lambda (x) #t)
17+
(lambda (x)
18+
;; Deliver an exception re-raise after returning back
19+
;; from `call-with-c-return`:
20+
(lambda ()
21+
(when (in-atomic-mode?)
22+
(end-atomic)) ; error happened during atomic mode
23+
;(enable-interrupts) ; ... with interrupts disabled
24+
(void/reference-sink call-with-c-return-box)
25+
(raise x)))])
26+
(let ([vs (call-with-values thunk list)])
27+
;; Deliver successful values after returning back from
28+
;; `call-with-c-return`:
29+
(lambda ()
30+
(void/reference-sink call-with-c-return-box)
31+
(apply values vs)))))))
32+
(thunk)))
33+
34+
(define call-with-c-return-box (box #f))
35+
36+
;; `call-with-c-return` looks like a foreign function, due to a cast
37+
;; to and from a callback, so returning from `call-with-c-return` will
38+
;; pop and C frame stacks (via longjmp internally) that were pushed
39+
;; since `call-with-c-return` was called.
40+
(define call-with-c-return
41+
(and callback-atomic?
42+
(cast (lambda (thunk) (thunk))
43+
(_fun #:atomic? #t
44+
#:keep call-with-c-return-box
45+
_racket -> _racket)
46+
(_fun _racket -> _racket))))

langs/outlaw/a86/interp.rkt

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#lang racket
2+
(provide/contract
3+
[current-objs (parameter/c (listof path-string?))]
4+
[asm-interp (-> (listof instruction?) any/c)]
5+
[asm-interp/io (-> (listof instruction?) string? any/c)])
6+
7+
(require "printer.rkt" "ast.rkt" "callback.rkt"
8+
(rename-in ffi/unsafe [-> _->]))
9+
10+
11+
;; Assembly code is linked with object files in this parameter
12+
(define current-objs
13+
(make-parameter '()))
14+
15+
;; Asm -> Value
16+
;; Interpret (by assemblying, linking, and loading) x86-64 code
17+
;; Assume: entry point is "entry"
18+
(define (asm-interp a)
19+
(asm-interp/io a #f))
20+
21+
(define fopen
22+
(get-ffi-obj "fopen" (ffi-lib #f) (_fun _path _string/utf-8 _-> _pointer)))
23+
24+
(define fflush
25+
(get-ffi-obj "fflush" (ffi-lib #f) (_fun _pointer _-> _void)))
26+
27+
(define fclose
28+
(get-ffi-obj "fclose" (ffi-lib #f) (_fun _pointer _-> _void)))
29+
30+
(define fmt (if (eq? (system-type 'os) 'macosx) 'macho64 'elf64))
31+
32+
;; Asm String -> (cons Value String)
33+
;; Like asm-interp, but uses given string for input and returns
34+
;; result with string output
35+
(define (asm-interp/io a input)
36+
(define t.s (make-temporary-file "nasm~a.s"))
37+
(define t.o (path-replace-extension t.s #".o"))
38+
(define t.so (path-replace-extension t.s #".so"))
39+
(define t.in (path-replace-extension t.s #".in"))
40+
(define t.out (path-replace-extension t.s #".out"))
41+
42+
(with-output-to-file t.s
43+
#:exists 'truncate
44+
(λ ()
45+
; (parameterize ((current-shared? #t))
46+
(displayln (asm-string a))))
47+
48+
(nasm t.s t.o)
49+
(ld t.o t.so)
50+
51+
(define libt.so (ffi-lib t.so))
52+
53+
(define init-label
54+
(match (findf Label? a)
55+
[(Label l) l]
56+
[_ (error "no initial label found")]))
57+
58+
(define entry
59+
(get-ffi-obj init-label libt.so (_fun _pointer _-> _int64)))
60+
61+
;; install our own `error_handler` procedure to prevent `exit` calls
62+
;; from interpreted code bringing down the parent process. All of
63+
;; these hooks into the runtime need a better API and documentation,
64+
;; but this is a rough hack to make Extort work for now.
65+
(when (ffi-obj-ref "error_handler" libt.so (thunk #f))
66+
(set-ffi-obj! "error_handler" libt.so _pointer
67+
(function-ptr (λ () (raise 'err)) (_fun _-> _void))))
68+
69+
70+
(define current-heap #f)
71+
72+
;; allocate a heap
73+
(when (ffi-obj-ref "heap" libt.so (thunk #f))
74+
(set! current-heap (make-c-parameter "heap" libt.so _pointer))
75+
76+
(if (ffi-obj-ref "from" libt.so (thunk #f))
77+
(begin
78+
(current-heap
79+
; IMPROVE ME: hard-coded heap size
80+
(malloc _int64 20000 'raw))
81+
(set-ffi-obj! "from" libt.so _pointer (current-heap))
82+
(set-ffi-obj! "to" libt.so _pointer (ptr-add (current-heap) 10000 _int64))
83+
(set-ffi-obj! "types" libt.so _pointer (malloc _int32 10000)))
84+
(current-heap
85+
; IMPROVE ME: hard-coded heap size
86+
(malloc _int64 10000 'raw))))
87+
88+
(delete-file t.s)
89+
(delete-file t.o)
90+
(delete-file t.so)
91+
(if input
92+
(let ()
93+
(unless (and (ffi-obj-ref "in" libt.so (thunk #f))
94+
(ffi-obj-ref "out" libt.so (thunk #f)))
95+
(error "asm-interp/io: running in IO mode without IO linkage"))
96+
97+
(with-output-to-file t.in #:exists 'truncate
98+
(thunk (display input)))
99+
100+
(define current-in
101+
(make-c-parameter "in" libt.so _pointer))
102+
(define current-out
103+
(make-c-parameter "out" libt.so _pointer))
104+
105+
(current-in (fopen t.in "r"))
106+
(current-out (fopen t.out "w"))
107+
108+
(define result
109+
(begin0
110+
(with-handlers ((symbol? identity))
111+
(guard-foreign-escape
112+
(if current-heap
113+
(cons (current-heap) (entry (current-heap)))
114+
(entry #f))))
115+
#;
116+
(when current-heap
117+
(free (current-heap)))))
118+
119+
(fflush (current-out))
120+
(fclose (current-in))
121+
(fclose (current-out))
122+
123+
(define output (file->string t.out))
124+
(delete-file t.in)
125+
(delete-file t.out)
126+
(cons result output))
127+
128+
(begin0
129+
(with-handlers ((symbol? identity))
130+
(guard-foreign-escape
131+
(if current-heap
132+
(cons (current-heap) (entry (current-heap)))
133+
(entry #f))))
134+
#;
135+
(when current-heap
136+
(free (current-heap))))))
137+
138+
139+
(define (string-splice xs)
140+
(apply string-append
141+
(add-between (map (lambda (s) (string-append "\"" s "\"")) xs)
142+
" ")))
143+
144+
;;; Utilities for calling nasm and linker with informative error messages
145+
146+
(struct exn:nasm exn:fail:user ())
147+
(define nasm-msg
148+
(string-append
149+
"assembly error: make sure to use `prog` to construct an assembly program\n"
150+
"if you did and still get this error; please share with course staff."))
151+
152+
(define (nasm:error msg)
153+
(raise (exn:nasm (format "~a\n\n~a" nasm-msg msg)
154+
(current-continuation-marks))))
155+
156+
;; run nasm on t.s to create t.o
157+
(define (nasm t.s t.o)
158+
(define err-port (open-output-string))
159+
(unless (parameterize ((current-error-port err-port))
160+
(system (format "nasm -f ~a ~a -o ~a" fmt t.s t.o)))
161+
(nasm:error (get-output-string err-port))))
162+
163+
(struct exn:ld exn:fail:user ())
164+
(define (ld:error msg)
165+
(raise (exn:ld (format "link error: ~a" msg)
166+
(current-continuation-marks))))
167+
168+
(define (ld:undef-symbol s)
169+
(ld:error
170+
(string-append
171+
(format "symbol ~a not defined in linked objects: ~a\n" s (current-objs))
172+
"use `current-objs` to link in object containing symbol definition.")))
173+
174+
;; link together t.o with current-objs to create shared t.so
175+
(define (ld t.o t.so)
176+
(define err-port (open-output-string))
177+
(define objs (string-splice (current-objs)))
178+
(define -z-defs-maybe
179+
(if (eq? (system-type 'os) 'macosx)
180+
""
181+
"-z defs "))
182+
(unless (parameterize ((current-error-port err-port))
183+
(system (format "gcc ~a-v -shared ~a ~a -o ~a"
184+
-z-defs-maybe
185+
t.o objs t.so)))
186+
(define err-msg
187+
(get-output-string err-port))
188+
(match (or (regexp-match #rx"Undefined.*\"(.*)\"" err-msg) ; mac
189+
(regexp-match #rx"undefined reference to `(.*)'" err-msg)) ; linux
190+
[(list _ symbol) (ld:undef-symbol symbol)]
191+
[_ (ld:error (format "unknown link error.\n\n~a" err-msg))])))

0 commit comments

Comments
 (0)