1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-30 06:50:31 +02:00

Move define-nonterm

* module/ice-9/peg/string-peg.scm: remove define-nonterm and make a simpler
   macro called `define-sexp-parser' to make the PEG grammar
* module/ice-9/peg.scm: move define-nonterm macro to this file
* module/ice-9/peg/codegen.scm: move code to wrap a parser result nicely to
   this file, under name `wrap-parser-for-users'
This commit is contained in:
Noah Lavine 2011-03-28 15:13:35 -04:00 committed by Andy Wingo
parent 97c846947d
commit 0afaf59982
3 changed files with 89 additions and 80 deletions

View file

@ -19,7 +19,7 @@
(define-module (ice-9 peg)
#:export (peg-parse
; define-nonterm
define-nonterm
; define-nonterm-f
peg-match)
; #:export-syntax (define-nonterm)
@ -30,7 +30,7 @@
#:re-export (peg-sexp-compile
define-grammar
define-grammar-f
define-nonterm
; define-nonterm
keyword-flatten
context-flatten
peg:start
@ -67,6 +67,35 @@ execute the STMTs and try again."
#f
(make-prec 0 (car res) string (string-collapse (cadr res))))))
;; The results of parsing using a nonterminal are cached. Think of it like a
;; hash with no conflict resolution. Process for deciding on the cache size
;; wasn't very scientific; just ran the benchmarks and stopped a little after
;; the point of diminishing returns on my box.
(define *cache-size* 512)
;; Defines a new nonterminal symbol accumulating with ACCUM.
(define-syntax define-nonterm
(lambda (x)
(syntax-case x ()
((_ sym accum pat)
(let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
(accumsym (syntax->datum #'accum))
(c (datum->syntax x (gensym))));; the cache
;; CODE is the code to parse the string if the result isn't cached.
(let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
#`(begin
(define #,c (make-vector *cache-size* #f));; the cache
(define (sym str strlen at)
(let* ((vref (vector-ref #,c (modulo at *cache-size*))))
;; Check to see whether the value is cached.
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
(caddr vref);; If it is return it.
(let ((fres ;; Else calculate it and cache it.
(#,syn str strlen at)))
(vector-set! #,c (modulo at *cache-size*)
(list str at fres))
fres)))))))))))
;; Searches through STRING for something that parses to PEG-MATCHER. Think
;; regexp search.
(define-syntax peg-match

View file

@ -18,7 +18,7 @@
;;;;
(define-module (ice-9 peg codegen)
#:export (peg-sexp-compile)
#:export (peg-sexp-compile wrap-parser-for-users)
#:use-module (ice-9 peg)
#:use-module (ice-9 peg string-peg)
#:use-module (ice-9 pretty-print)
@ -244,3 +244,30 @@ return EXP."
(lit
#`(and success
#,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
;; Packages the results of a parser
(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
#`(lambda (str strlen at)
(let ((res (#,parser str strlen at)))
;; Try to match the nonterminal.
(if res
;; If we matched, do some post-processing to figure out
;; what data to propagate upward.
(let ((at (car res))
(body (cadr res)))
#,(cond
((eq? accumsym 'name)
#`(list at '#,s-syn))
((eq? accumsym 'all)
#`(list (car res)
(cond
((not (list? body))
(list '#,s-syn body))
((null? body) '#,s-syn)
((symbol? (car body))
(list '#,s-syn body))
(else (cons '#,s-syn body)))))
((eq? accumsym 'none) #`(list (car res) '()))
(else #`(begin res))))
;; If we didn't match, just return false.
#f))))

View file

@ -22,16 +22,11 @@
peg-as-peg
define-grammar
define-grammar-f
define-nonterm
peg-grammar)
#:use-module (ice-9 peg)
#:use-module (ice-9 peg codegen))
;; The results of parsing using a nonterminal are cached. Think of it like a
;; hash with no conflict resolution. Process for deciding on the cache size
;; wasn't very scientific; just ran the benchmarks and stopped a little after
;; the point of diminishing returns on my box.
(define *cache-size* 512)
#:use-module (ice-9 peg codegen)
#:use-module (ice-9 peg match-record)
#:use-module (ice-9 peg simplify-tree))
;; Gets the left-hand depth of a list.
(define (depth lst)
@ -39,58 +34,6 @@
0
(+ 1 (depth (car lst)))))
(eval-when (compile load eval)
(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
; (let ((matchf-syn (datum->syntax for-syntax matchf)))
#`(lambda (str strlen at)
(let ((res (#,matchf-syn str strlen at)))
;; Try to match the nonterminal.
(if res
;; If we matched, do some post-processing to figure out
;; what data to propagate upward.
(let ((at (car res))
(body (cadr res)))
#,(cond
((eq? accumsym 'name)
#`(list at '#,s-syn))
((eq? accumsym 'all)
#`(list (car res)
(cond
((not (list? body))
(list '#,s-syn body))
((null? body) '#,s-syn)
((symbol? (car body))
(list '#,s-syn body))
(else (cons '#,s-syn body)))))
((eq? accumsym 'none) #`(list (car res) '()))
(else #`(begin res))))
;; If we didn't match, just return false.
#f))))
)
;; Defines a new nonterminal symbol accumulating with ACCUM.
(define-syntax define-nonterm
(lambda (x)
(syntax-case x ()
((_ sym accum pat)
(let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
(accumsym (syntax->datum #'accum))
(c (datum->syntax x (gensym))));; the cache
;; CODE is the code to parse the string if the result isn't cached.
(let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
#`(begin
(define #,c (make-vector *cache-size* #f));; the cache
(define (sym str strlen at)
(let* ((vref (vector-ref #,c (modulo at *cache-size*))))
;; Check to see whether the value is cached.
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
(caddr vref);; If it is return it.
(let ((fres ;; Else calculate it and cache it.
(#,syn str strlen at)))
(vector-set! #,c (modulo at *cache-size*)
(list str at fres))
fres)))))))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; Parse string PEGs using sexp PEGs.
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
@ -114,34 +57,43 @@ LB < '['
RB < ']'
")
(define-nonterm peg-grammar all
(define-syntax define-sexp-parser
(lambda (x)
(syntax-case x ()
((_ sym accum pat)
(let* ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
(accumsym (syntax->datum #'accum))
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
#`(define sym #,syn))))))
(define-sexp-parser peg-grammar all
(body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
(define-nonterm peg-pattern all
(define-sexp-parser peg-pattern all
(and peg-alternative
(body lit (and (ignore "/") peg-sp peg-alternative) *)))
(define-nonterm peg-alternative all
(define-sexp-parser peg-alternative all
(body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
(define-nonterm peg-suffix all
(define-sexp-parser peg-suffix all
(and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
(define-nonterm peg-primary all
(define-sexp-parser peg-primary all
(or (and "(" peg-sp peg-pattern ")" peg-sp)
(and "." peg-sp)
peg-literal
peg-charclass
(and peg-nonterminal (body ! "<" 1))))
(define-nonterm peg-literal all
(define-sexp-parser peg-literal all
(and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
(define-nonterm peg-charclass all
(define-sexp-parser peg-charclass all
(and (ignore "[")
(body lit (and (body ! "]" 1)
(or charclass-range charclass-single)) *)
(ignore "]")
peg-sp))
(define-nonterm charclass-range all (and peg-any "-" peg-any))
(define-nonterm charclass-single all peg-any)
(define-nonterm peg-nonterminal all
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
(define-sexp-parser charclass-single all peg-any)
(define-sexp-parser peg-nonterminal all
(and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
(define-nonterm peg-sp none
(define-sexp-parser peg-sp none
(body lit (or " " "\t" "\n") *))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -297,9 +249,10 @@ RB < ']'
;; Builds a lambda-expressions for the pattern STR using accum.
(define (peg-string-compile str-stx accum)
(let ((string (syntax->datum str-stx)))
(peg-sexp-compile
(compressor
(peg-pattern->defn
(peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
(peg:tree (peg-parse peg-pattern string)) str-stx)
str-stx)
accum))
accum)))