From 5a717ee961c9ef3fc17be80ca76956185b2ca080 Mon Sep 17 00:00:00 2001 From: "Michael D. Adams" Date: Wed, 19 Nov 2014 00:24:59 -0700 Subject: [PATCH] Initial check-in --- .hgignore | 2 + LICENSE | 26 +++++ README | 104 +++++++++++++++++ atoms.scm | 28 +++++ expand-primitives.scm | 13 +++ expand.scm | 243 ++++++++++++++++++++++++++++++++++++++++ gensym.scm | 24 ++++ idents.scm | 24 ++++ k-syntax.scm | 185 +++++++++++++++++++++++++++++++ record-match.scm | 89 +++++++++++++++ s-exprs.scm | 71 ++++++++++++ syntax-case.scm | 43 ++++++++ tests.scm | 251 ++++++++++++++++++++++++++++++++++++++++++ typed-records.scm | 45 ++++++++ u-syntax.scm | 29 +++++ 15 files changed, 1177 insertions(+) create mode 100644 .hgignore create mode 100644 LICENSE create mode 100644 README create mode 100644 atoms.scm create mode 100644 expand-primitives.scm create mode 100644 expand.scm create mode 100644 gensym.scm create mode 100644 idents.scm create mode 100644 k-syntax.scm create mode 100644 record-match.scm create mode 100644 s-exprs.scm create mode 100644 syntax-case.scm create mode 100644 tests.scm create mode 100644 typed-records.scm create mode 100644 u-syntax.scm diff --git a/.hgignore b/.hgignore new file mode 100644 index 0000000..4106f43 --- /dev/null +++ b/.hgignore @@ -0,0 +1,2 @@ +syntax: glob +*~ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a90ae27 --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +Copyright (c) 2014, Michael D. Adams +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README b/README new file mode 100644 index 0000000..4cec150 --- /dev/null +++ b/README @@ -0,0 +1,104 @@ +==== Overview ==== + +This library implements the hygienic macro expansion algorithm in: + + Michael D. Adams. Towards the essence of hygiene. In Proceedings of + the 42nd ACM SIGPLAN-SIGACT Symposium on Principles of Programming + Languages, POPL '15. ACM, New York, NY, USA, January 2015. ISBN + 978-1-4503-3300-9. doi: 10.1145/2676726.2677013. + + http://michaeldadams.org/papers/hygiene/ + +==== Running the tests ==== + +To test this code, just load "tests.scm". + +The output should be: + + Running 'or-test0' + Running 'or-test1' + Running 'or-test2' + Running 'or-test4' + Running 'inc-test1' + +If one of the tests fails, then "!!! FAILED !!!" will be printed under +the appropriate test. + +This code has been tested under Chez Scheme 8.3. + +==== Running the expander ==== + +To directly run the expander, import the (expand) library and call +call (expand-s-expr #f env0 s-expr) where s-expr is the expression to +expand. + +You can replace #f with a number for the maximum number of expansion +steps to take. + +If you have a different set of core forms, you can replace env0 +with a new default environment. + +NOTE: + +The expander internally operates over k-syntax objects. The +expand-s-expr function automatically injects into and projects out of +these k-syntax objects. The s-expr notation is easier to read but +looses some information. Specifically, gensyms are reformatted to be +human readable and identifiers are translated into an easier to read +vector notation. + +The expand-k-syntax function operates the same as expand-s-expr except +that it uses these k-syntax objects. See the implementation of +expand-s-expr for how to inject into and project out of k-syntax. + +==== Files ==== + +Top-level files: + - expand.scm: The main expander + - expand-primitives.scm: Primitives available to macro transformers + - tests.scm: Tests of the main expander + +Data type definitions: + - atoms.scm: Defines the types and operators for ref-atom and bind-atom + - idents.scm: Defines the type and operators for ident + - u-syntax.scm: Defines the type and operators for u-syntax + - k-syntax.scm: Defines the type and operators for k-syntax + - s-exprs.scm: Defines the type and operators for s-expr including functions for + injecting and projecting s-exprs to and from k-syntax and u-syntax + +Utility libraries: + - gensym.scm: Unique symbol generation + - record-match.scm: Defines a 'match' pattern matching form for matching records + - typed-records.scm: Defines record definitions that check the types of arguments to the constructor + +Misc: + - syntax-case.scm: Scrap code and notes if we ever want to implement the syntax-case pattern matcher + +==== Limitations ==== + +Main aim of this code is an implementation of the algorithm described +in "Towards the Essence of Hygiene". For the sake of clarity we make +several simplifications. + + - As in "Towards the Essence of Hygiene", we do not implement + datum->syntax, syntax->datum, define, or define-syntax. + + - As in "Towards the Essence of Hygiene", the algorithm we implement + is quadratic in the size of the input though a full syntax-case + implementation can be linear. + + - We directly implement the algorithm in the first have of "Towards + the Essence of Hygiene" (i.e., the one using 'gensym') instead of + the nominal based implementation described in the latter half of + "Towards the Essence of Hygiene". + + - 'syntax-case' is both a hygienic algorithm and a pattern matching + form used with that algorithm. This code just implements the + hygienic expansion algorithm and does not implement pattern + matching or the forms that go with it. This includes the + 'syntax-case', 'quasi-syntax', and 'unquote' syntax forms. + (Though, these can actually be implemented as macros within the + system implemented by this code.) + + - We do not implement advanced macro features like libraries or + define-meta. diff --git a/atoms.scm b/atoms.scm new file mode 100644 index 0000000..aeced3d --- /dev/null +++ b/atoms.scm @@ -0,0 +1,28 @@ +(library (atoms) + (export + ref-atom ref-atom? make-ref-atom ref-atom-name + bind-atom bind-atom? make-bind-atom bind-atom-name + ref-atom-equal? bind-atom-equal? + gensym-ref-atom gensym-bind-atom + ) + (import (rnrs) (typed-records) (gensym)) + +;; This library defines types and operations for ref-atoms and bind-atoms + +;; There are two types of atoms: ref-atom and bind-atom. Each unique +;; atom is represented by a symbolic name. +(define-record-type atom) +(define-typed-subrecord atom ref-atom () (name symbol?)) +(define-typed-subrecord atom bind-atom () (name symbol?)) + +;; ref-atom-equal? and bind-atom-equal? respectively test whether two +;; ref-atoms or bind-atoms have the name symbolic name. +(define (ref-atom-equal? r1 r2) (equal? (ref-atom-name r1) (ref-atom-name r2))) +(define (bind-atom-equal? b1 b2) (equal? (bind-atom-name b1) (bind-atom-name b2))) + +;; gensym-ref-atom and gensym-bind-atom generate unique, fresh +;; versions of ref-atoms or bind-atoms, respectively +(define (gensym-ref-atom a) (make-ref-atom (gensym (symbol->string (ref-atom-name a))))) +(define (gensym-bind-atom a) (make-bind-atom (gensym (symbol->string (bind-atom-name a))))) + +) diff --git a/expand-primitives.scm b/expand-primitives.scm new file mode 100644 index 0000000..3ff6574 --- /dev/null +++ b/expand-primitives.scm @@ -0,0 +1,13 @@ +(library (expand-primitives) + (export (rename (ident-ref=? free-identifier=?) (ident-bind=? bound-identifier=?))) + (import (idents)) + +;; This library defines the primitives that are accessible to macro +;; transformers when they are expanding. + +;; Some features that should be implemented in the future but are not currently implemented are: +;; - syntax-case +;; - (syntax ...) +;; - (quasi-syntax ...) +;; - (unquote-syntax ...) +) diff --git a/expand.scm b/expand.scm new file mode 100644 index 0000000..e1b4890 --- /dev/null +++ b/expand.scm @@ -0,0 +1,243 @@ +(library (expand) + (export + expand-s-expr expand-k-syntax* expand-k-syntax expand-u-syntax make-macro-transformer + + env0 + lambda-transformer + if-transformer + let-transformer + letrec-transformer + let-syntax-transformer + letrec-syntax-transformer + syntax-transformer + + hyg subst subst* + ) + (import (rnrs) (rnrs eval) + (record-match) (typed-records) (gensym) + (atoms) (idents) (u-syntax) (k-syntax) (s-exprs) + ) + +;; This library implements the main expander + +;;;;;;;;;;;;;;;; +;; Wrappers/drivers + +;; This function acts as a wrapper for the expander that goes from +;; s-expr to s-expr. +;; +;; The 'n' parameter is the maximum number of expansion steps to take +;; or #f for no limit. +;; +;; You should usually pass env0 as the env parameter. + +(define (expand-s-expr n env s-expr) + (s-expr-prettify-gensyms + (s-expr-prettify-idents + (k-syntax->s-expr + (expand-k-syntax* n env (s-expr->k-syntax s-expr)))))) + +;; This function acts as a wrapper for the expander that goes from +;; k-syntax to k-syntax. It is called by expand-s-expr and its 'n' +;; and env parameters are the same as for expand-s-expr. + +(define (expand-k-syntax* n env k-syntax) + (if (and n (= n 0)) + k-syntax + (let ([k-syntax^ (expand-k-syntax env k-syntax)]) + (if (not k-syntax^) + k-syntax + (expand-k-syntax* (and n (- n 1)) env k-syntax^))))) + +;;;;;;;;;;;;;;;; +;; Expanding K-Syntax + +;; This function does one step of expansion for k-syntax. +;; If there is not expansion to be done, it returns #f. +;; +;; This function is basically just a traversal to find a u-syntax in +;; the k-syntax to expand. However, it does extend the environment +;; whenever it finds a binding form. During this process +;; make-macro-transformer is used to create the function that +;; implements macro transformers. + +(define (expand-k-syntax env k-syntax) + (define (extend-env form on-rhs lhs rhs env) + (case form + ['let (if on-rhs env (cons (cons lhs #f) env))] + ['letrec (cons (cons lhs #f) env)] + ['let-syntax (if on-rhs env (cons (cons lhs (make-macro-transformer rhs)) env))] + ;; Note that the spec doesn't say what happens when expanding + ;; the rhs of a letrec-syntax requires calling a macro also + ;; defined in that letrec-syntax. We disallow this. + ['letrec-syntax (cons (cons lhs (if on-rhs #f (make-macro-transformer rhs))) env)])) + ((traverse-k-syntax expand-u-syntax extend-env) env k-syntax)) + +;; This function creates a function implementing the macro transformer +;; for a particular bit of k-syntax. +;; +;; In order to implement this, we project the k-syntax to an s-expr and use Scheme's 'eval'. +;; +;; We also use 'hyg' to ensure that the resulting function is hygienic. + +(define (make-macro-transformer k-syntax) + (define f (eval (k-syntax->s-expr (remove-let-syntax k-syntax)) + (environment '(except (rnrs) free-identifier=? bound-identifier=?) '(expand-primitives)))) + (lambda (macro-call) (make-k-u-syntax ((hyg f) macro-call)))) + +;; This function removes let-syntax and letrec-syntax from a k-syntax. +;; This is used in make-macro-transformer when projecting a k-syntax +;; to an s-expr as we do not want to assume the underlying system +;; implements let-syntax or letrec-syntax. (After all, that is the +;; job of the expander implemented in this file.) Fortunately, since +;; the k-syntax in make-macro-transformer is already fully expanded, +;; we can just replace any let-syntax or letrec-syntax with a 'let' +;; with no binders. + +(define remove-let-syntax + (map-k-syntax (lambda (k-syntax) + (match k-syntax () + [#(k-let-syntax _ body) (make-k-let '() body)] + [#(k-letrec-syntax _ body) (make-k-let '() body)] + [else k-syntax])))) + +;;;;;;;;;;;;;;;; +;; Expanding U-Syntax + +;; This function implements one step of expansion for u-syntax, which +;; is the main job of the expander. Expansion of constants, +;; variables, and function applications is done directly, but forms +;; that look like macro calls are all dispatched to the environment. +;; These forms include not just macro calls but also things like +;; lambda, if, let, letrec, and syntax. Structuring the code this way +;; means that locally defined macros can shadow these forms. + +(define (expand-u-syntax env u-syntax) + (match u-syntax (lambda if let letrec let-syntax letrec-syntax syntax syntax-case) + [c (constant? c) (make-k-const c)] + [#(ident r b) + (let ([m (assq r env)]) + (cond + [(and m (cdr m)) (error 'expand-u-syntax "out of phase or context" r m)] + [else (make-k-var r)]))] + [(#(ident #(ref-atom r) b) . args) + (assq r env) + (let ([m (assq r env)]) + (cond + [(not m) (error 'expand-u-syntax "unbound variable" r)] + [(not (cdr m)) (error 'expand-u-syntax "out of phase or context" r m)] + [else ((cdr m) u-syntax)]))] + [(fun . args) (make-k-app (make-k-u-syntax fun) (map make-k-u-syntax args))])) + +;;;;;;;;;;;;;;;; +;; Helpers for core-form transformers + +;; This macro defines a short-hand for defining the functions that +;; implement the core forms. +(define-syntax define-transformer + (syntax-rules () + [(_ (name pat) body) + (define (name tmp) + (match tmp () + [pat body] + [else (error 'name "invalid syntax" tmp)]))])) + +;; This macro defines a short-hand for defining transformers +;; for let-style core forms. +(define-syntax define-let-style-transformer + (syntax-rules () + [(_ name make #f) (define-let-style-transformer name make (lambda (b r^ rhs) rhs))] + [(_ name make #t) (define-let-style-transformer name make subst*)] + [(_ name make rhs-fun) + (define-transformer (name (_ bindings . body)) + (let-values ([(r^ b) (freshen-idents (map car bindings))]) + (let ([rhs (map cadr bindings)]) + (make (map (lambda (lhs rhs) (list lhs (make-k-u-syntax (rhs-fun b r^ rhs)))) r^ rhs) + (map (lambda (body) (make-k-u-syntax (subst* b r^ body))) body)))))])) + +;; This function is a helper for defining the core forms that splits a +;; list of 'ident' into a binder and reference part. It then gensyms +;; the reference part. The resulting bind-atoms and ref-atoms are +;; intended to be passed to 'subst*'. +(define (freshen-idents args) + (values (map (lambda (arg) (gensym-ref-atom (ident-ref arg))) args) + (map (lambda (arg) (ident-bind arg)) args))) + +;;;;;;;;;;;;;;;; +;; Transformers for core forms + +(define-transformer (lambda-transformer (_ args . bodyX)) + (let-values ([(r^ b) (freshen-idents args)]) + (make-k-lam r^ (map (lambda (body) (make-k-u-syntax (subst* b r^ body))) bodyX)))) + +(define-transformer (if-transformer (_ test true false)) + (make-k-if (make-k-u-syntax test) (make-k-u-syntax true) (make-k-u-syntax false))) + +(define-transformer (syntax-transformer (_ body)) + (make-k-syn body)) + +;; Note that the syntax core forms have the same structure as their +;; non-syntax counter parts. The difference shows up in what things +;; are bound to in the traversal in expand-k-syntax. +(define-let-style-transformer let-transformer make-k-let #f) +(define-let-style-transformer letrec-transformer make-k-letrec #t) +(define-let-style-transformer let-syntax-transformer make-k-let-syntax #f) +(define-let-style-transformer letrec-syntax-transformer make-k-letrec-syntax #t) + +;; The default environment with bindings for each core form. +(define env0 + `((lambda . ,lambda-transformer) + (if . ,if-transformer) + (syntax . ,syntax-transformer) + (let . ,let-transformer) + (letrec . ,letrec-transformer) + (let-syntax . ,let-syntax-transformer) + (letrec-syntax . ,letrec-syntax-transformer) + )) + +;;;;;;;;;;;;;;;; +;; Enforcing hygiene + +;; This function takes a function 'f' and returns a hygienic +;; version of it. + +(define (hyg f) + (lambda (arg) + (define perm '()) + (define (make-perm ident) + (let* ([b1 (ident-bind ident)] + [b2 (gensym-bind-atom b1)]) + (set! perm (cons (cons (bind-atom-name b1) (bind-atom-name b2)) + (cons (cons (bind-atom-name b2) (bind-atom-name b1)) perm))))) + (define (apply-permutation perm u-syntax) + (define (f ident) + (let ([r (assq (bind-atom-name (ident-bind ident)) perm)]) + (if r + (make-ident (ident-ref ident) (make-bind-atom (cdr r))) + ident))) + (u-syntax-map-idents f u-syntax)) + (u-syntax-map-idents make-perm arg) ;; generate the permutation + (apply-permutation perm (f (apply-permutation perm arg))))) ;; takes advantage of the fact 'perm' is a self inverse + + +;;;;;;;;;;;;;;;; +;; Subst + +;; This function traverses a u-syntax, body, and for any identifiers +;; with a binder part equal to b and replaces their reference part +;; with r. +(define (subst b r body) + (define (f ident) + (if (bind-atom-equal? (ident-bind ident) b) + (make-ident r b) + ident)) + (u-syntax-map-idents f body)) + +;; This function is the same as subst but it takes a list of binder +;; parts and reference parts; +(define (subst* bs rs body) + (cond + [(null? bs) body] + [else (subst* (cdr bs) (cdr rs) (subst (car bs) (car rs) body))])) + +) diff --git a/gensym.scm b/gensym.scm new file mode 100644 index 0000000..0808abf --- /dev/null +++ b/gensym.scm @@ -0,0 +1,24 @@ +(library (gensym) + (export gensym gensym? prettify-gensym) + (import (scheme)) + +;; We need a way to generate fresh symbols. Since gensym is not +;; standardized, this library exports gensym and should be modified as +;; needed to implement gensym. + +;; Imports from (scheme) that are re-exported +;; +;; gensym : string? -> symbol? +;; gensym? : object -> boolean? + +;; Takes a gensym and returns a symbol with an easier to read name +(define (prettify-gensym s) + (let ([str (gensym->unique-string s)]) + (define (suffix i) + (cond + [(= i (string-length str)) str] + [(eq? '#\- (string-ref str i)) (substring str (+ i 1) (string-length str))] + [else (suffix (+ i 1))])) + (string->symbol (string-append (symbol->string s) ":" (suffix 0))))) + +) diff --git a/idents.scm b/idents.scm new file mode 100644 index 0000000..4587dae --- /dev/null +++ b/idents.scm @@ -0,0 +1,24 @@ +(library (idents) + (export + ident ident? make-ident ident-ref ident-bind + ident-ref=? ident-bind=? + prettify-ident + ) + (import (rnrs) (typed-records) (atoms)) + +;; This library defines types and operations for identifiers + +;; An identifier is a diatomic pair of a ref-atom and a bind-atom. +(define-typed-record ident () (ref ref-atom?) (bind bind-atom?)) + +;; ident-ref=? and ident-bind=? respectively test whether the +;; ref-atoms or bind-atoms in two identifiers are equal. +(define (ident-ref=? i1 i2) (ref-atom-equal? (ident-ref i1) (ident-ref i2))) +(define (ident-bind=? i1 i2) (bind-atom-equal? (ident-bind i1) (ident-bind i2))) + +;; Convert an ident to a vector notation that pretty prints better. +(define (prettify-ident s) + (vector (ref-atom-name (ident-ref s)) + (bind-atom-name (ident-bind s)))) + +) diff --git a/k-syntax.scm b/k-syntax.scm new file mode 100644 index 0000000..fb8a6bc --- /dev/null +++ b/k-syntax.scm @@ -0,0 +1,185 @@ +(library (k-syntax) + (export k-syntax k-syntax? + k-const make-k-const k-const? + k-var make-k-var k-var? + k-lam make-k-lam k-lam? + k-app make-k-app k-app? + k-if make-k-if k-if? + k-let make-k-let k-let? + k-letrec make-k-letrec k-letrec? + k-let-syntax make-k-let-syntax k-let-syntax? + k-letrec-syntax make-k-letrec-syntax k-letrec-syntax? + k-syn make-k-syn k-syn? + k-u-syntax make-k-u-syntax k-u-syntax? + + traverse-k-syntax map-k-syntax + ) + (import (rnrs) (record-match) (typed-records) + (atoms) (u-syntax)) + +;; This library defines types and operations for k-syntax, which +;; represents code that is already fully expanded and thus has a known +;; binding structure. + +;;;;;;;;;;;;;;;; +;; K-syntax data type + +(define-record-type k-syntax) +(define-typed-subrecord k-syntax k-const () (value constant?)) +(define-typed-subrecord k-syntax k-var () (ident ref-atom?)) +(define-typed-subrecord k-syntax k-lam () (args (list-of? ref-atom?)) (body (list-of? k-syntax?))) +(define-typed-subrecord k-syntax k-app () (fun k-syntax?) (args (list-of? k-syntax?))) +(define-typed-subrecord k-syntax k-if () (test k-syntax?) (true k-syntax?) (false k-syntax?)) +(define-typed-subrecord k-syntax k-let () (bindings bindings?) (body (list-of? k-syntax?))) +(define-typed-subrecord k-syntax k-letrec () (bindings bindings?) (body (list-of? k-syntax?))) +(define-typed-subrecord k-syntax k-let-syntax () (bindings bindings?) (body (list-of? k-syntax?))) +(define-typed-subrecord k-syntax k-letrec-syntax () (bindings bindings?) (body (list-of? k-syntax?))) +(define-typed-subrecord k-syntax k-syn () (value u-syntax?)) +(define-typed-subrecord k-syntax k-u-syntax () (value u-syntax?)) + +(define (binding? x) (and (list? x) (eq? 2 (length x)) (ref-atom? (car x)) (k-syntax? (cadr x)))) +(define bindings? (list-of? binding?)) + +;;;;;;;;;;;;;;;; +;; K-syntax traversal + +;; Applies k-syntax-fun to every object in a k-syntax in a bottom-up traversal +(define (map-k-syntax k-syntax-fun) + (define (rec-rhs bindings) + (map (lambda (binding) (list (car binding) (rec (cadr binding)))) bindings)) + (define (rec k-syntax) + (k-syntax-fun + (match k-syntax () + [#(k-syn syn) k-syntax] + [#(k-u-syntax _) k-syntax] + [#(k-const _) k-syntax] + [#(k-var _) k-syntax] + [#(k-lam args body) (make-k-lam args (map rec body))] + [#(k-app fun args) (make-k-app (rec fun) (map rec args))] + [#(k-if test true false) (make-k-if (rec test) (rec true) (rec false))] + [#(k-let bindings body) (make-k-let (rec-rhs bindings) (map rec body))] + [#(k-letrec bindings body) (make-k-letrec (rec-rhs bindings) (map rec body))] + [#(k-let-syntax bindings body) (make-k-let-syntax (rec-rhs bindings) (map rec body))] + [#(k-letrec-syntax bindings body) (make-k-letrec-syntax (rec-rhs bindings) (map rec body))]))) + rec) + +;; Finds the left-most k-u-syntax and apply u-syntax-fun to it. +;; If there is none to update, returns #f. +;; Maintains an environment and uses env-fun to update it. +;; +;; u-syntax-fun : u-syntax? -> k-syntax? +;; +;; (env-fun form on-rhs lhs rhs env) -> env +;; Must add one binding to env based on the given parameters +;; +;; - 'form': Specifies which type of binding form triggered this +;; call. One of 'let 'letrec 'let-syntax or 'letrec-syntax +;; +;; - 'on-rhs': #t if we are extending the environment for the rhs of +;; the bindings in a binding form. #f if we are extending the +;; environment for the body of a binding form. +;; +;; - 'lhs': The name of a ref-atom for a binding to add. +;; +;; - 'rhs': The k-syntax for a binding to add. +;; +;; - 'env': An association list mapping names of ref-atoms to either +;; #f (for out of phase or context variables) or a k-syntax. +;; +;; Note that we use 'ap' and 'ap-map' to help us manage the job of +;; continuing the traversal when recursive calls return #f but +;; stopping when they return a true value. +(define (traverse-k-syntax u-syntax-fun env-fun) + (define (rec-env env k-syntax) + + (define (extend-env form on-rhs bindings) + (define (extend binding env) + (env-fun form on-rhs (ref-atom-name (car binding)) (cadr binding) env)) + (fold-right extend env bindings)) + (define (rec-bindings form bindings) + (define env^ (extend-env form #t bindings)) + (ap-map (lambda (binding) (ap (list (car binding)) (rec-env env^ (cadr binding)))) bindings)) + (define (rec-body form bindings body) + (define env^ (extend-env form #f bindings)) + (ap-map (lambda (k-syntax) (rec-env env^ k-syntax)) body)) + + (define (rec k-syntax) (rec-env env k-syntax)) + (match k-syntax () + + ;; Apply "u-syntax-fun" to the u-syntax + [#(k-u-syntax value) (u-syntax-fun env value)] + + ;; Boilerplate recursions + [#(k-const _) #f] + [#(k-var _) #f] + [#(k-lam args body) (ap (make-k-lam args) (ap-map rec body))] + [#(k-app fun args) (ap (make-k-app) (rec fun) (ap-map rec args))] + [#(k-if test true false) (ap (make-k-if) (rec test) (rec true) (rec false))] + + ;; Binding forms + [#(k-let bindings body) + (ap (make-k-let) + (rec-bindings 'let bindings) + (rec-body 'let bindings body))] + [#(k-letrec bindings body) + (ap (make-k-letrec) + (rec-bindings 'letrec bindings) + (rec-body 'letrec bindings body))] + [#(k-let-syntax bindings body) + (ap (make-k-let-syntax) + (rec-bindings 'let-syntax bindings) + (rec-body 'let-syntax bindings body))] + [#(k-letrec-syntax bindings body) + (ap (make-k-letrec-syntax) + (rec-bindings 'letrec-syntax bindings) + (rec-body 'letrec-syntax bindings body))] + + ;; Primitives + [#(k-syn syn) #f])) + + rec-env) + +;;;;;;;;;;;;;;;; +;; Helpers for 'traverse-k-syntax'. + +;; This macro helps find where to stop a traversal by +;; checking when a recursive call returns #f which means +;; that there are no changes to be made. +;; +;; For example suppose we have the call: +;; (ap (fun a b) (f i j) (g x y)) +;; This is morally equivalent to: +;; (fun a b (f i j) (g x y)) +;; Except that we check whether the calls to f and g return #f. +;; +;; If (f i j) is not #f, then we return: +;; (fun a b (f i j) y) +;; Note that we do not recur on y and thus leave it unchanged. +;; +;; If (f i j) is #f but (g x y) is not, we return: +;; (fun a b j (g x y)) +;; +;; Finally if both f and g return #f, we return #f. +(define-syntax ap + (syntax-rules () + [(_ (fun ...)) #f] + [(_ (fun ...) (m0 ... d0) (m ... d) ...) + (let ([a (m0 ... d0)]) + (if a + (fun ... a d ...) + (ap (fun ... d0) (m ... d) ...)))])) + +;; This is a list version of 'ap'. It applied 'f' to each element of +;; 'ls' until we find an element that does not return #f. The list is +;; then returned with that element replaced by the return value of 'f'. +(define (ap-map f ls) + (define (rec head tail) + (cond + [(null? tail) #f] + [else (let ([a (f (car tail))]) + (if a + (append head (cons a (cdr tail))) + (rec (append head (list (car tail))) (cdr tail))))])) + (rec '() ls)) + +) diff --git a/record-match.scm b/record-match.scm new file mode 100644 index 0000000..de9df79 --- /dev/null +++ b/record-match.scm @@ -0,0 +1,89 @@ +(library (record-match) + (export match) + (import (rnrs)) + +;; This library implements a 'match' pattern matching form that makes +;; it easy to pattern match records. +;; +;; Usage: (match scr (lits ...) [pat guard body] ...) +;; - Scr is an expression that is pattern matched +;; - Lits is a lit of literals that are treated as literal *symbols* in the following patterns +;; - Pat is a pattern. Notably we use vector notation to pattern match records (e.g., #(rec-name field1 field2)). +;; - Guard is a predicate that is tested to see whether to run the body. This part is optional and treated as #f if omitted. +;; - Body is an expression to run if the pattern matches and the guard passes. + +(define-syntax match + (lambda (stx) + (syntax-case stx () + [(_ scr lits . clauses) + (let () + (define count 0) + (define (gensym) + (set! count (+ 1 count)) + (string->symbol (string-append "tmp" (number->string count)))) + (define (is-lit? l) + (define (go lits) + (syntax-case lits () + [() #f] + [(lit . lits) (or (free-identifier=? l #'lit) (go #'lits))])) + (go #'lits)) + (define (mk-fields scr name number fields succ) + (syntax-case fields () + [() succ] + [(field . fields) + (mk-pred #`((record-accessor (record-type-descriptor #,name) #,number) #,scr) #'field + (mk-fields scr name (+ number 1) #'fields succ))])) + (define (mk-pred scr pat succ) + ;; identifier to examine * pattern * syntax on success -> syntax to implement the pattern match + ;; assumes the identifier 'fail is bound to a failure continuation + (with-syntax ([tmp (datum->syntax #'tmp (gensym))]) + #`(let ([tmp #,scr]) + #,(syntax-case pat () + [id (identifier? #'id) + (cond + [(free-identifier=? #'id #'_) succ] + [(is-lit? #'id) #`(if (equal? tmp 'id) #,succ (fail))] + [else #`(let ([id tmp]) #,succ)])] + [(pat-car . pat-cdr) + #`(if (pair? tmp) + #,(mk-pred #'(car tmp) #'pat-car + (mk-pred #'(cdr tmp) #'pat-cdr + succ)) + (fail))] + [#(name fields ...) + #`(if (and (record? tmp) + (equal? (record-rtd tmp) (record-type-descriptor name)) + (equal? (vector-length (record-type-field-names (record-type-descriptor name))) + #,(length (syntax->datum #'(fields ...))))) + #,(mk-fields scr #'name 0 #'(fields ...) succ) + (fail))] + [atom (let ([x (syntax->datum #'atom)]) + (or (boolean? x) (number? x) (char? x) + (string? x) (null? x))) + #`(if (equal? tmp 'atom) #,succ (fail))] + [_ (syntax-violation 'match "unknown pattern form" pat)])))) + (define (mk-clauses clauses) + (syntax-case clauses () + [() #'(error 'match "unmatched value" tmp)] + [([pat body] . rest) (mk-clauses #'([pat #t body] . rest))] + [([pat guard body] . rest) + #`(let ([fail (lambda () #,(mk-clauses #'rest))]) + #,(mk-pred #'tmp #'pat #'(if guard body (fail))))])) + #`(let ([tmp scr]) #,(mk-clauses #'clauses)))]))) + +#;(define (tests) +(print-gensym 'pretty) +(expand '(match x () [a b])) +(expand '(match x () [a b c])) +(expand '(match x () [a b c] [i j k])) +(expand '(match x () [(1 . 2) b])) +(expand '(match x () [() y])) +(expand '(match x () [(l y) y])) +(expand '(match x (l m) [(l y) y])) +(expand '(match x (l m) [(l y) y][(m z) z])) +(define-record-type pair (fields fst snd)) +(expand '(match x () [#(pair y z) 3])) +(match (make-pair 1 2) () [#(pair x y) (list x y)]) +) + +) diff --git a/s-exprs.scm b/s-exprs.scm new file mode 100644 index 0000000..115f967 --- /dev/null +++ b/s-exprs.scm @@ -0,0 +1,71 @@ +(library (s-exprs) + (export + s-expr->u-syntax s-expr->k-syntax k-syntax->s-expr + s-expr-map-type s-expr-prettify-idents s-expr-prettify-gensyms) + (import (rnrs) (record-match) (gensym) + (atoms) (idents) (k-syntax)) + +;; This library implements injection and projections for s-exprs to +;; and from k-syntax and u-syntax as well as functions for mapping +;; across s-exprs. + +;;;;;;;;;;;;;;;; +;; From s-expr + +;; Converts from s-expr to u-syntax by converting symbols to ident objects +(define (s-expr->u-syntax s-expr) + (s-expr-map-type symbol? (lambda (s) (make-ident (make-ref-atom s) (make-bind-atom s))) s-expr)) + +;; Converts from s-expr to k-syntax by wrapping a k-u-syntax around the u-syntax for the s-expr +(define (s-expr->k-syntax s-expr) (make-k-u-syntax (s-expr->u-syntax s-expr))) + +;;;;;;;;;;;;;;;; +;; To s-expr + +;; Converts a k-syntax to an s-expr. Leaves the contents of any +;; contained u-syntax unchanged. +(define (k-syntax->s-expr k-syntax) + (define (rec k-syntax) + (match k-syntax () + + ;; Boilerplate recursions + [#(k-u-syntax value) `(u-syntax ,value)] + [#(k-syn syn) `(quote ,syn)] + [#(k-const c) `(quote ,c)] + [#(k-var #(ref-atom r)) r] + [#(k-lam args body) `(lambda ,(map ref-atom-name args) . ,(map rec body))] + [#(k-app fun args) `(,(rec fun) . ,(map rec args))] + [#(k-if test true false) `(if ,(rec test) ,(rec true) ,(rec false))] + + ;; Recursions with bindings + [#(k-let bindings body) + `(let ,(map (lambda (b) `(,(ref-atom-name (car b)) ,(rec (cadr b)))) bindings) . ,(map rec body))] + [#(k-letrec bindings body) + `(letrec ,(map (lambda (b) `(,(ref-atom-name (car b)) ,(rec (cadr b)))) bindings) . ,(map rec body))] + [#(k-let-syntax bindings body) + `(let-syntax ,(map (lambda (b) `(,(ref-atom-name (car b)) ,(rec (cadr b)))) bindings) . ,(map rec body))] + [#(k-letrec-syntax bindings body) + `(letrec-syntax ,(map (lambda (b) `(,(ref-atom-name (car b)) ,(rec (cadr b)))) bindings) . ,(map rec body))] + )) + + (rec k-syntax)) + +;;;;;;;;;;;;;;;; +;; Mapping across s-exprs + +;; Applies 'f' to any objects satisfying 'type?' in the s-expr 's'. +(define (s-expr-map-type type? f s) + (define (rec s) (s-expr-map-type type? f s)) + (cond + [(type? s) (f s)] + [(pair? s) (cons (rec (car s)) (rec (cdr s)))] + [(vector? s) (vector-map rec s)] + [else s])) + +;; Applies prettify-ident to all ident in an s-expr +(define (s-expr-prettify-idents s) (s-expr-map-type ident? prettify-ident s)) + +;; Applies prettify-gensym to all gensyms in an s-expr +(define (s-expr-prettify-gensyms s) (s-expr-map-type gensym? prettify-gensym s)) + +) diff --git a/syntax-case.scm b/syntax-case.scm new file mode 100644 index 0000000..d58e873 --- /dev/null +++ b/syntax-case.scm @@ -0,0 +1,43 @@ +;(define-typed-subrecord k-syntax k-syntax-case () (scr k-syntax?) (lits null?) (clauses (clauses? pattern? k-syntax?))) + + +;; Patterns + +#| +(define-record-type pattern) +(define-typed-subrecord pattern p-wild ()) +(define-typed-subrecord pattern p-nil ()) +(define-typed-subrecord pattern p-var () (ident ref-atom?)) +(define-typed-subrecord pattern p-pair () (fst pattern?) (snd pattern?)) + +#;(define (map-pattern-ident f pattern) + (match pattern () + [#(ident r b) (f pattern)] ;; (why do we need #(...) instead of #[...] ???) + [() pattern] + [(p1 . p2) + (let ([p1^ (map-pattern-ident p1)] + [p2^ (map-pattern-ident p2)]) + (make-p-pair p1^ p2^))])) +|# + + +#;(define (expand-clause clause) + (match clause + [(pattern body) + (let-values ([(b r p) (freshen-pattern pattern)]) + (list p (subst* b r body)))])) + +#;(define (expand-pattern u-syntax) + (match u-syntax (_) + [#(ident #(ref-atom _) b0) (values '() (make-p-wild))] + [#(ident r b) + (let ([r^ (gensym-ref-atom)]) + (values (list b) (list r^) (make-p-var r^)))] + [() (values '() (make-p-nil))] + [(p1 . p2) + (let-values ([(b1 r1 p1^) (expand-pattern p1)] + [(b2 r2 p2^) (expand-pattern p2)]) + (values (append b1 b2) (append r1 r2) + (make-p-pair p1^ p2^)))])) + + diff --git a/tests.scm b/tests.scm new file mode 100644 index 0000000..31475ba --- /dev/null +++ b/tests.scm @@ -0,0 +1,251 @@ +(import (expand)) + +;; This file contains tests for the macro expander + +;;;;;;;;;;;;;;;; +;; Testing framework + +;; Returns true if two s-expr are alpha equivalent +(define (alpha-eq? s1 s2) + (define perm '()) + (define (rec s1 s2) + (or + (eq? s1 s2) + (and (pair? s1) (pair? s2) + (rec (car s1) (car s2)) + (rec (cdr s1) (cdr s2))) + (and (vector? s1) (vector? s2) + (= (vector-length s1) (vector-length s2)) + (for-all rec (vector->list s1) (vector->list s2))) + (and (symbol? s1) (symbol? s2) + (cond + [(assq s1 perm) (eq? (cdr (assq s1 perm)) s2)] + [(and (exists (lambda (x) (eq? x '#\:)) (string->list (symbol->string s1))) + (exists (lambda (x) (eq? x '#\:)) (string->list (symbol->string s2)))) + (set! perm (cons (cons s1 s2) perm)) #t] + [else #f] + )))) + (rec s1 s2)) + +;; A record storing the information needed to run a test +(define-record-type test (fields name input output)) + +;; A list of tests that are defined +(define tests '()) + +;; Defines a test +(define-syntax define-test + (syntax-rules () + [(_ name input output) + (begin + (define name (make-test 'name input output)) + (set! tests (cons name tests)))])) + +;; Runs all tests that have been defined +(define (run-tests) + (for-each (lambda (test) + (display "Running '") (display (test-name test)) (display "'") (newline) + (if (not (alpha-eq? (test-output test) + (expand-s-expr #f env0 (test-input test)))) + (begin (display "!!! FAILED !!!") (newline)))) + (reverse tests))) + +;; Runs a single test with extra information about input and output +(define (run-test test) + (display "Testing: ")(display (test-name test))(newline) + (display "Running: ")(pretty-print `(expand-s-expr #f env0 (test-input ,(test-name test)))) + (display "Input:")(newline) + (pretty-print (test-input test)) + (display "Expected output:")(newline) + (pretty-print (test-output test)) + (let ([actual (expand-s-expr #f env0 (test-input test))]) + (display "Actual output:")(newline) + (pretty-print (expand-s-expr #f env0 (test-input test))) + (display "Output comparison: ")(write (alpha-eq? (test-output test) actual))(newline))) + +;;;;;;;;;;;;;;;; +;; Test helpers + +;; Helper for building tests involving the 'or' macro +(define (or-macro body) + `(letrec-syntax ([or (lambda (stx) + (if (null? (cdr stx)) + #'#t + (if (null? (cddr stx)) + (cadr stx) + (list #'let (list (list #'tmp (cadr stx))) + (list #'if #'tmp #'tmp (cons #'or (cddr stx)))))))]) + ,body)) + +;; Helper for building tests involving the 'let-inc' macro +(define (let-inc-macro body) + `(let-syntax ([let-inc (lambda (stx) + (let ([u (cadr stx)] + [v (caddr stx)]) + (list #'let (list (list u (list #'+ #'1 u))) v)))]) + ,body)) + +;; Helper for building tests involving the 'm' macro +(define (m-macro body) + `(let-syntax ([m (lambda (stx) + (let ([y (cadr stx)]) + (list #'let-inc #'x (list #'* #'x #'y))))]) + ,body)) + +;;;;;;;;;;;;;;;; +;; Tests of the 'or' macro + +(define-test or-test0 (or-macro `(or 1 2)) + '(letrec-syntax ([or:7 (lambda (stx:8) + (if (null? (cdr stx:8)) + '#t + (if (null? (cddr stx:8)) + (cadr stx:8) + (list + '#(let let) + (list (list '#(tmp tmp) (cadr stx:8))) + (list + '#(if if) + '#(tmp tmp) + '#(tmp tmp) + (cons '#(or:7 or) (cddr stx:8)))))))]) + (let ([tmp:9 '1]) (if tmp:9 tmp:9 '2)))) +(define-test or-test1 `(let ([tmp 1]) ,(or-macro `(or 2 tmp))) + '(let ([tmp:7 '1]) + (letrec-syntax ([or:8 (lambda (stx:9) + (if (null? (cdr stx:9)) + '#t + (if (null? (cddr stx:9)) + (cadr stx:9) + (list + '#(let let) + (list + (list '#(tmp:7 tmp) (cadr stx:9))) + (list + '#(if if) + '#(tmp:7 tmp) + '#(tmp:7 tmp) + (cons + '#(or:8 or) + (cddr stx:9)))))))]) + (let ([tmp:10 '2]) (if tmp:10 tmp:10 tmp:7))))) +(define-test or-test2 (or-macro `(let ([tmp 1]) (or 2 tmp))) + '(letrec-syntax ([or:11 (lambda (stx:12) + (if (null? (cdr stx:12)) + '#t + (if (null? (cddr stx:12)) + (cadr stx:12) + (list + '#(let let) + (list (list '#(tmp tmp) (cadr stx:12))) + (list + '#(if if) + '#(tmp tmp) + '#(tmp tmp) + (cons + '#(or:11 or) + (cddr stx:12)))))))]) + (let ([tmp:13 '1]) + (let ([tmp:14 '2]) (if tmp:14 tmp:14 tmp:13))))) + +(define or-test3 `(let ([if 1]) ,(or-macro `(or 2 if)))) ;; TODO: automate the check that this thows an error + +(define-test or-test4 (or-macro `(let ([if 1]) (or 2 if))) + '(letrec-syntax ([or:15 (lambda (stx:16) + (if (null? (cdr stx:16)) + '#t + (if (null? (cddr stx:16)) + (cadr stx:16) + (list + '#(let let) + (list (list '#(tmp tmp) (cadr stx:16))) + (list + '#(if if) + '#(tmp tmp) + '#(tmp tmp) + (cons + '#(or:15 or) + (cddr stx:16)))))))]) + (let ([if:17 '1]) + (let ([tmp:18 '2]) (if tmp:18 tmp:18 if:17))))) + +;;;;;;;;;;;;;;;; +;; Tests of the 'inc' macro + +(define-test inc-test1 `(let ([x 3]) ,(let-inc-macro (m-macro `(m x)))) + '(let ([x:19 '3]) + (let-syntax ([let-inc:20 (lambda (stx:21) + (let ([u:22 (cadr stx:21)] + [v:23 (caddr stx:21)]) + (list + '#(let let) + (list (list u:22 (list '#(+ +) '1 u:22))) + v:23)))]) + (let-syntax ([m:24 (lambda (stx:25) + (let ([y:26 (cadr stx:25)]) + (list + '#(let-inc:20 let-inc) + '#(x:19 x) + (list '#(* *) '#(x:19 x) '#(y:26 y)))))]) + (let ([x:27 (+ '1 x:19)]) (* x:27 y:26)))))) + +(run-tests) + + +;; TODO: +;; - Environment handling in expand-k-syntax +;; + The macro case in expand-u-syntax +;; - Simplify 'match' notation +;; + Library implementing syntax-case, free-identifier=? +;; - Check for duplicate identifers in binding lists +;; - Pretty print k-syntax + +;; all that is left is the environment handling in expand-k-syntax and environment lookup in expand-u-syntax + + +#| + +(letrec-syntax ([m1 (lambda (x) x)] + [m2 (m1 (lambda (x) x))]) +3) + +(let-syntax ([m1 (lambda (x) x)] + [m2 (m1 (lambda (x) x))]) +3) + +(let ([x 1]) + (let-syntax ([m (lambda (y) (if x 2 3))]) + (m))) + +|# + + +; (match (where ([i ident?] [(r b) (and (ref-atom? r) (bind-atom? b) (ident? this))] +; [c ... +; [ .. +; [( ... +;; short hand for "match ... [(#(ident #(ref-atom foo) _) ...) ..] + +#| + +> (s-expr->k-syntax '(lambda (x) x)) + +> (k-syntax->s-expr (expand-k-syntax '() (expand-k-syntax '() (s-expr->k-syntax '(lambda (x) x))))) +(lambda (#{x stw3ewjeozscrb1a8f9bs-3}) + #{x stw3ewjeozscrb1a8f9bs-3}) + +> (k-syntax->s-expr (expand-k-syntax '() (expand-k-syntax '() (expand-k-syntax '() (expand-k-syntax '() (expand-k-syntax '() (s-expr->k-syntax '(let-syntax ([m (lambda (stx) (syntax z))]) (m))))))))) +(begin z) + +> (expand-s-expr #f '() '(let-syntax ([m (lambda (stx) (syntax z))]) (m))) +(begin z) + + + +;; macros: +;; or +;; let-inc / m + + + +|# diff --git a/typed-records.scm b/typed-records.scm new file mode 100644 index 0000000..6082639 --- /dev/null +++ b/typed-records.scm @@ -0,0 +1,45 @@ +(library (typed-records) + (export define-typed-record define-typed-subrecord list-of?) + (import (rnrs)) + +;; This library defines forms for defining record types that check the +;; type of the objects they contain. + +;; Defines a record type that checks the type of the objects it contains. +;; Usage: (define-typed-record name (clauses ...) (field pred) ...) +;; - Name is the name of the record type +;; - Clauses are define-record-type clauses. (Usually this will be empty.) +;; - Field and pred are the field names and the predicate that that field must satisfy. +(define-syntax define-typed-record + (syntax-rules () + [(_ self (clauses ...) (field pred) ...) + (define-record-type self + clauses ... + (fields field ...) + (protocol + (lambda (make) + (lambda (field ...) + (if (not (pred field)) (assertion-violation 'self "invalid argument type" 'pred 'field field)) ... + (make field ...)))))])) + +;; Behaves the same as define-typed-record except that it has a parent +;; record type as the first argument to the form. +(define-syntax define-typed-subrecord + (syntax-rules () + [(_ p self (clauses ...) (field pred) ...) + (define-record-type self + (parent p) + clauses ... + (fields field ...) + (protocol + (lambda (make) + (lambda (field ...) + (if (not (pred field)) (assertion-violation 'self "invalid argument type" 'pred 'field field)) ... + ((make field ...))))))])) + +;; A helper function for defining list types +;; Usage: ((list-of? f) x) +;; Returns true iff x is a list and f returns true on all elements in that list. +(define (list-of? f) (lambda (x) (and (list? x) (for-all f x)))) + +) diff --git a/u-syntax.scm b/u-syntax.scm new file mode 100644 index 0000000..5f520b1 --- /dev/null +++ b/u-syntax.scm @@ -0,0 +1,29 @@ +(library (u-syntax) + (export constant? u-syntax? u-syntax-map-idents) + (import (rnrs) (typed-records) (idents)) + +;; This library defines types and operations for u-syntax, which +;; represents code that is not yet fully expanded and thus has an +;; unknown binding structure. + +;; The constant forms allowed in a u-syntax. Note symbols are not +;; allowed. Identifiers should be used instead. +(define (constant? x) + (or (boolean? x) (number? x) (char? x) (string? x) (null? x))) + +;; A predicate defining a u-syntax as an ident, constant, or pair of +;; u-syntax. +(define (u-syntax? x) + (or (ident? x) + (constant? x) + (and (pair? x) (u-syntax? (car x)) (u-syntax? (cdr x))))) + +;; A function that applies 'f' to all ident in a u-syntax. +(define (u-syntax-map-idents f s) + (cond + [(ident? s) (f s)] + [(constant? s) s] + [(pair? s) (cons (u-syntax-map-idents f (car s)) + (u-syntax-map-idents f (cdr s)))])) + +)