(require 'stdlib) ;; NOTES ;; define a macro `make-parser' that generates a rec-desc parser for ;; the given LL(1) grammar. E.g. ;; (make-parser ;; ((expr -> factor (star (alt + -) factor)) ;; [[+|-] factor]* ;; (factor -> term (star (alt * /) term)) ;; (term -> (alt atom (lparen expr rparen))) ;; (atom -> (alt number identifier)))) ;; ;; (make-parser ;; ((expr -> factor (* (alt + -) factor)) ;; (factor -> term (+ (alt * /) term)))) ;; ;; adding action to the grammar: ;; (rule -> var:nonterm (action (var) FORMS) + nonterm) ;; Action can simply be mapped to lambda. ;; ;; example implementation: ;; recursive descent parser for simple expressions ;; expr -> factor ((+ | -) factor)* ;; factor -> term ((* | /) term)* ;; term -> atom | ( expr ) ;; atom -> number | identifier ;; ;; (make-lexer ;; (whitespace (alt tab space newline) (type skip)) ;; (comment "#" (* (not newline)) (type skip)) ;; (lparen "(") ;; (rparen ")") ;; (integer (+ digit)) ;; (ident (+ alpha)) ;; (digit (range "0" "9")) ;; (alpha (range "a" "z"))) (define (make-lexer sequence) (let ((tokens sequence) (current nil)) (define (match token) (if (eq token current) (advance) (error "lexer: expected '%s', got '%s'" token current))) (define (advance) (setq current (or (first tokens) 'eos)) (setq tokens (rest tokens))) (advance) (lambda args (apply (case (first args) ((match) match) ((current) (lambda () current)) (t (error "lexer: unknown method '%s'" (first args)))) (rest args))))) ;; usage example: ;; (define lexer (make-lexer '(list of tokens))) ;; (lexer 'match 'list) -> t ;; (lexer 'match 'of) -> t ;; (lexer 'current) -> 'tokens ;; (lexer 'match 'tokens) ;; (lexer 'match 'eos) ;; (lexer 'match 'eos) ;; grammar specification (define rules ()) (define (grammar-1) (setq rules ()) (defrule 'S '(sequence A B) 'eos) (defrule 'A 'a) (defrule 'B 'b)) (define (grammar-2) (setq rules ()) (defrule 'S '(sequence A B C)) (defrule 'A '(sequence B C D)) (defrule 'B 'b) (defrule 'C 'c) (defrule 'D 'd)) ;; new style using macros, sequence is the default (duh) (define (grammar-3) (setq rules ()) (defrule S -> A B eos) (defrule A -> a b c) (defrule B -> b a)) ;; produces fn L-parser (defmacro define-language (name #!rest grammar) ) (define-language L (S -> A b) (A -> a) (B -> (^ b c)) ; or (define as aliases) (C -> (+ a b c)) ; 1+ (D -> (* a b c)) ; 0+ ;; action, rule elements are available in the closure: terminals are ;; bound to their actual value, non-terminals are bound to the ;; return value of the entire rule (E -> a b (! puts (list a b))) (F -> c d (!! (+ c d)))) ; !! rule return value ;; ---------------------------------------------------------------------- ;; representation with data structure and code (define (defrule name expansion) (setq rules (nconc rules (list (list name expansion))))) (define (run-grammar start) (let* ((rule (assoc start rules)) (name (first rule)) (expansion (second rule))) (printf "eval: %s -> %s\n" name expansion) (cond ((symbolp expansion) (printf "match: %s\n" expansion)) ((listp expansion) (case (first expansion) ((sequence) (puts "sequence") (dolist (rule (rest expansion)) (run-grammar rule))) ((alternative) (puts "alternative") (or (map run-grammar (rest expansion)))) (t (puts "unknown"))))))) ;;(grammar-1) ;;(puts rules) ;;(run-grammar 'S) ;; ---------------------------------------------------------------------- ;; representation with closures (define (defrule name expansion) (setq rules (nconc rules (list (list name (cond ((symbolp expansion) (lambda () (printf "match %s\n" expansion))) ((listp expansion) (case (first expansion) ((sequence) (lambda () (puts "sequence") (dolist (rule (rest expansion)) (funcall (second (assoc rule rules)))))))))))))) ;;(grammar-1) ;;(puts rules) ;;(funcall (second (assoc 'S rules))) ;; ---------------------------------------------------------------------- ;; compiled network (define (defrule name alternatives) (setq rules (nconc rules (list (list name alternatives))))) (define (compile-net root) (let ((rule (assoc root rules))) (if (null rule) (puts "compiler: null rule (error?)") nil (let ((name (first rule)) (expansion (second rule))) (printf "compiling rule %s\n" name) (cond ((symbolp expansion) (printf "compiling match %s\n" expansion) (lambda () (printf "runtime: match: %s\n" expansion) (lexer 'match expansion))) ((listp expansion) (case (first expansion) ((sequence) (puts "compiling sequence") (let ((exps (map compile-net (rest expansion)))) (lambda () (printf "runtime: %s -> %s\n" name expansion) (dolist (exp exps) (printf "runtime: calling %s\n" exp) (funcall exp)))))))))))) (defmacro defrule (name #!rest alternatives) `(setq rules (nconc rules (list (list ,name ,@alternatives))))) ;; BUG handle unknown tokens (define (compile-net root) (cond ((memq root tokens) (printf "compiling match %s\n" root) (lambda () (printf "runtime: match: %s\n" root) (lexer 'match root))) ((listp root) (printf "compiling meta %s\n" root) (case (first root) ((^) (puts "alternative")) ((*) (puts "zero-or-more")) ((+) (puts "one-or-more")) ((\?) (puts "optional")) ((eps) (puts "epsilon"))) (lambda () (puts "nyi"))) (t (let* ((rule (assoc root rules)) (name (first rule)) (alternatives (rest rule))) (printf "compiling rule %s -> %s\n" name alternatives) (let ((alts (map compile-net alternatives))) (lambda () (printf "runtime: %s -> %s\n" name alternatives) (dolist (alt alts) (printf "runtime: funcall %s\n" alt) (funcall alt)))))))) (define tokens '(a b c eos)) (define (grammar) (setq rules ()) (defrule 'S 'A 'B 'A 'eos) (defrule 'A 'a) ;;(defrule 'A '(* a b)) (defrule 'B 'b 'c)) (grammar) (puts rules) (define net (compile-net 'S)) ; or (compile-net rules start-symbol) (puts "compilation ready.") (setq rules nil) (define lexer (make-lexer '(a b c a))) (net) ; or (funcall net)