;; NOTES ;; define a macro `make-parser' that generates a rec-desc parser for ;; the given LL(1) grammar. E.g. ;; (make-parser ;; ((expr -> factor (star (alt + -) factor)) ;; [[+|-] factor]* ;; (factor -> term (star (alt * /) term)) ;; (term -> (alt atom (lparen expr rparen))) ;; (atom -> (alt number identifier)))) ;; ;; (make-parser ;; ((expr -> factor (* (alt + -) factor)) ;; (factor -> term (+ (alt * /) term)))) ;; ;; adding action to the grammar: ;; (rule -> var:nonterm (action (var) FORMS) + nonterm) ;; Action can simply be mapped to lambda. ;; ;; example implementation: ;; recursive descent parser for simple expressions ;; expr -> factor ((+ | -) factor)* ;; factor -> term ((* | /) term)* ;; term -> atom | ( expr ) ;; atom -> number | identifier ;; ;; (make-lexer ;; (whitespace (alt tab space newline) (type skip)) ;; (comment "#" (* (not newline)) (type skip)) ;; (lparen "(") ;; (rparen ")") ;; (integer (+ digit)) ;; (ident (+ alpha)) ;; (digit (range "0" "9")) ;; (alpha (range "a" "z"))) (define (make-lexer sequence) (let ((tokens sequence) (current #f)) (define (match token) (if (eq? token current) (advance) (begin (print "lexer: expected '" token "', got '" current "'") (error "match")))) (define (advance) (if (null? tokens) (set! current 'eos) (begin (set! current (car tokens)) (set! tokens (cdr tokens))))) (advance) (lambda args (apply (case (car args) ((match) match) ((current) (lambda () current)) (else (error "lexer: unknown method" (car args)))) (cdr args))))) ;; usage example: ;; (define lexer (make-lexer '(list of tokens))) ;; (lexer 'match 'list) -> t ;; (lexer 'match 'of) -> t ;; (lexer 'current) -> 'tokens ;; (lexer 'match 'tokens) ;; (lexer 'match 'eos) ;; (lexer 'match 'eos) ;; grammar specification (define rules ()) (define (grammar-1) (set! rules ()) (defrule 'S '(sequence A B) 'eos) (defrule 'A 'a) (defrule 'B 'b)) (define (grammar-2) (set! rules ()) (defrule 'S '(sequence A B C)) (defrule 'A '(sequence B C D)) (defrule 'B 'b) (defrule 'C 'c) (defrule 'D 'd)) ;; new style using macros, sequence is the default (duh) (define (grammar-3) (set! rules ()) (defrule S -> A B eos) (defrule A -> a b c) (defrule B -> b a)) ;; produces fn L-parser (define-macro (define-language name . grammar) (list)) (define-language L (S -> A b) (A -> a) (B -> (^ b c)) ; or (define as aliases) (C -> (+ a b c)) ; 1+ (D -> (* a b c)) ; 0+ ;; action, rule elements are available in the closure: terminals are ;; bound to their actual value, non-terminals are bound to the ;; return value of the entire rule (E -> a b (! print (list a b))) (F -> c d (!! (+ c d)))) ; !! rule return value ;; ---------------------------------------------------------------------- ;; representation with data structure and code (define (defrule name expansion) (set! rules (append rules (list (list name expansion))))) (define (run-grammar start) (let* ((rule (assoc start rules)) (name (car rule)) (expansion (cadr rule))) (print "eval: " name " -> " expansion) (cond ((symbol? expansion) (print "match: " expansion)) ((list? expansion) (case (car expansion) ((sequence) (print "sequence") (dolist (rule (rest expansion)) (run-grammar rule))) ((alternative) (print "alternative") (or (map run-grammar (cdr expansion)))) (else (print "unknown"))))))) ;;(grammar-1) ;;(print rules) ;;(run-grammar 'S) ;; ---------------------------------------------------------------------- ;; representation with closures (define (defrule name expansion) (set! rules (nconc rules (list (list name (cond ((symbolp expansion) (lambda () (printf "match %s\n" expansion))) ((listp expansion) (case (first expansion) ((sequence) (lambda () (print "sequence") (dolist (rule (rest expansion)) (funcall (second (assoc rule rules)))))))))))))) ;;(grammar-1) ;;(print rules) ;;(funcall (second (assoc 'S rules))) ;; ---------------------------------------------------------------------- ;; compiled network (define (defrule name alternatives) (set! rules (nconc rules (list (list name alternatives))))) (define (compile-net root) (let ((rule (assoc root rules))) (if (null? rule) (print "compiler: null rule (error?)")) (let ((name (first rule)) (expansion (second rule))) (printf "compiling rule %s\n" name) (cond ((symbolp expansion) (printf "compiling match %s\n" expansion) (lambda () (printf "runtime: match: %s\n" expansion) (lexer 'match expansion))) ((listp expansion) (case (first expansion) ((sequence) (print "compiling sequence") (let ((exps (map compile-net (rest expansion)))) (lambda () (printf "runtime: %s -> %s\n" name expansion) (dolist (exp exps) (printf "runtime: calling %s\n" exp) (funcall exp))))))))))) (define-macro (defrule name . alternatives) `(set! rules (append rules (list (list ,name ,@alternatives))))) ;; BUG handle unknown tokens (define (compile-net root) (cond ((member root tokens) (print "compiling match " root) (lambda () (print "runtime: match: " root) (lexer 'match root))) ((list? root) (print "compiling meta " root) (case (car root) ((^) (print "alternative")) ((*) (print "zero-or-more")) ((+) (print "one-or-more")) ((\?) (print "optional")) ((eps) (print "epsilon"))) (lambda () (print "nyi"))) (else (let* ((rule (assoc root rules)) (name (car rule)) (alternatives (cdr rule))) (print "compiling rule " name " -> " alternatives) (let ((alts (map compile-net alternatives))) (lambda () (print "runtime: " name " -> " alternatives) (dolist (alt alts) ;;(print "runtime: funcall " alt) (alt)))))))) (define tokens '(a b c eos)) (define (grammar) (set! rules ()) (defrule 'S 'A 'B 'A 'eos) (defrule 'A 'a) ;;(defrule 'A '(* a b)) (defrule 'B 'b 'c)) (grammar) (print rules) (define net (compile-net 'S)) ; or (compile-net rules start-symbol) (print "compilation ready.") (set! rules ()) (define lexer (make-lexer '(a b c a))) ;;(define lexer (make-lexer '(a b c a X))) ; provoke unknown token 'X' (net)