mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-30 06:50:31 +02:00
Move define-nonterm
* module/ice-9/peg/string-peg.scm: remove define-nonterm and make a simpler macro called `define-sexp-parser' to make the PEG grammar * module/ice-9/peg.scm: move define-nonterm macro to this file * module/ice-9/peg/codegen.scm: move code to wrap a parser result nicely to this file, under name `wrap-parser-for-users'
This commit is contained in:
parent
97c846947d
commit
0afaf59982
3 changed files with 89 additions and 80 deletions
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
(define-module (ice-9 peg)
|
(define-module (ice-9 peg)
|
||||||
#:export (peg-parse
|
#:export (peg-parse
|
||||||
; define-nonterm
|
define-nonterm
|
||||||
; define-nonterm-f
|
; define-nonterm-f
|
||||||
peg-match)
|
peg-match)
|
||||||
; #:export-syntax (define-nonterm)
|
; #:export-syntax (define-nonterm)
|
||||||
|
@ -30,7 +30,7 @@
|
||||||
#:re-export (peg-sexp-compile
|
#:re-export (peg-sexp-compile
|
||||||
define-grammar
|
define-grammar
|
||||||
define-grammar-f
|
define-grammar-f
|
||||||
define-nonterm
|
; define-nonterm
|
||||||
keyword-flatten
|
keyword-flatten
|
||||||
context-flatten
|
context-flatten
|
||||||
peg:start
|
peg:start
|
||||||
|
@ -67,6 +67,35 @@ execute the STMTs and try again."
|
||||||
#f
|
#f
|
||||||
(make-prec 0 (car res) string (string-collapse (cadr res))))))
|
(make-prec 0 (car res) string (string-collapse (cadr res))))))
|
||||||
|
|
||||||
|
;; The results of parsing using a nonterminal are cached. Think of it like a
|
||||||
|
;; hash with no conflict resolution. Process for deciding on the cache size
|
||||||
|
;; wasn't very scientific; just ran the benchmarks and stopped a little after
|
||||||
|
;; the point of diminishing returns on my box.
|
||||||
|
(define *cache-size* 512)
|
||||||
|
|
||||||
|
;; Defines a new nonterminal symbol accumulating with ACCUM.
|
||||||
|
(define-syntax define-nonterm
|
||||||
|
(lambda (x)
|
||||||
|
(syntax-case x ()
|
||||||
|
((_ sym accum pat)
|
||||||
|
(let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
|
||||||
|
(accumsym (syntax->datum #'accum))
|
||||||
|
(c (datum->syntax x (gensym))));; the cache
|
||||||
|
;; CODE is the code to parse the string if the result isn't cached.
|
||||||
|
(let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||||
|
#`(begin
|
||||||
|
(define #,c (make-vector *cache-size* #f));; the cache
|
||||||
|
(define (sym str strlen at)
|
||||||
|
(let* ((vref (vector-ref #,c (modulo at *cache-size*))))
|
||||||
|
;; Check to see whether the value is cached.
|
||||||
|
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
|
||||||
|
(caddr vref);; If it is return it.
|
||||||
|
(let ((fres ;; Else calculate it and cache it.
|
||||||
|
(#,syn str strlen at)))
|
||||||
|
(vector-set! #,c (modulo at *cache-size*)
|
||||||
|
(list str at fres))
|
||||||
|
fres)))))))))))
|
||||||
|
|
||||||
;; Searches through STRING for something that parses to PEG-MATCHER. Think
|
;; Searches through STRING for something that parses to PEG-MATCHER. Think
|
||||||
;; regexp search.
|
;; regexp search.
|
||||||
(define-syntax peg-match
|
(define-syntax peg-match
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
;;;;
|
;;;;
|
||||||
|
|
||||||
(define-module (ice-9 peg codegen)
|
(define-module (ice-9 peg codegen)
|
||||||
#:export (peg-sexp-compile)
|
#:export (peg-sexp-compile wrap-parser-for-users)
|
||||||
#:use-module (ice-9 peg)
|
#:use-module (ice-9 peg)
|
||||||
#:use-module (ice-9 peg string-peg)
|
#:use-module (ice-9 peg string-peg)
|
||||||
#:use-module (ice-9 pretty-print)
|
#:use-module (ice-9 pretty-print)
|
||||||
|
@ -244,3 +244,30 @@ return EXP."
|
||||||
(lit
|
(lit
|
||||||
#`(and success
|
#`(and success
|
||||||
#,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
|
#,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
|
||||||
|
|
||||||
|
;; Packages the results of a parser
|
||||||
|
(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
|
||||||
|
#`(lambda (str strlen at)
|
||||||
|
(let ((res (#,parser str strlen at)))
|
||||||
|
;; Try to match the nonterminal.
|
||||||
|
(if res
|
||||||
|
;; If we matched, do some post-processing to figure out
|
||||||
|
;; what data to propagate upward.
|
||||||
|
(let ((at (car res))
|
||||||
|
(body (cadr res)))
|
||||||
|
#,(cond
|
||||||
|
((eq? accumsym 'name)
|
||||||
|
#`(list at '#,s-syn))
|
||||||
|
((eq? accumsym 'all)
|
||||||
|
#`(list (car res)
|
||||||
|
(cond
|
||||||
|
((not (list? body))
|
||||||
|
(list '#,s-syn body))
|
||||||
|
((null? body) '#,s-syn)
|
||||||
|
((symbol? (car body))
|
||||||
|
(list '#,s-syn body))
|
||||||
|
(else (cons '#,s-syn body)))))
|
||||||
|
((eq? accumsym 'none) #`(list (car res) '()))
|
||||||
|
(else #`(begin res))))
|
||||||
|
;; If we didn't match, just return false.
|
||||||
|
#f))))
|
||||||
|
|
|
@ -22,16 +22,11 @@
|
||||||
peg-as-peg
|
peg-as-peg
|
||||||
define-grammar
|
define-grammar
|
||||||
define-grammar-f
|
define-grammar-f
|
||||||
define-nonterm
|
|
||||||
peg-grammar)
|
peg-grammar)
|
||||||
#:use-module (ice-9 peg)
|
#:use-module (ice-9 peg)
|
||||||
#:use-module (ice-9 peg codegen))
|
#:use-module (ice-9 peg codegen)
|
||||||
|
#:use-module (ice-9 peg match-record)
|
||||||
;; The results of parsing using a nonterminal are cached. Think of it like a
|
#:use-module (ice-9 peg simplify-tree))
|
||||||
;; hash with no conflict resolution. Process for deciding on the cache size
|
|
||||||
;; wasn't very scientific; just ran the benchmarks and stopped a little after
|
|
||||||
;; the point of diminishing returns on my box.
|
|
||||||
(define *cache-size* 512)
|
|
||||||
|
|
||||||
;; Gets the left-hand depth of a list.
|
;; Gets the left-hand depth of a list.
|
||||||
(define (depth lst)
|
(define (depth lst)
|
||||||
|
@ -39,58 +34,6 @@
|
||||||
0
|
0
|
||||||
(+ 1 (depth (car lst)))))
|
(+ 1 (depth (car lst)))))
|
||||||
|
|
||||||
(eval-when (compile load eval)
|
|
||||||
(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
|
|
||||||
; (let ((matchf-syn (datum->syntax for-syntax matchf)))
|
|
||||||
#`(lambda (str strlen at)
|
|
||||||
(let ((res (#,matchf-syn str strlen at)))
|
|
||||||
;; Try to match the nonterminal.
|
|
||||||
(if res
|
|
||||||
;; If we matched, do some post-processing to figure out
|
|
||||||
;; what data to propagate upward.
|
|
||||||
(let ((at (car res))
|
|
||||||
(body (cadr res)))
|
|
||||||
#,(cond
|
|
||||||
((eq? accumsym 'name)
|
|
||||||
#`(list at '#,s-syn))
|
|
||||||
((eq? accumsym 'all)
|
|
||||||
#`(list (car res)
|
|
||||||
(cond
|
|
||||||
((not (list? body))
|
|
||||||
(list '#,s-syn body))
|
|
||||||
((null? body) '#,s-syn)
|
|
||||||
((symbol? (car body))
|
|
||||||
(list '#,s-syn body))
|
|
||||||
(else (cons '#,s-syn body)))))
|
|
||||||
((eq? accumsym 'none) #`(list (car res) '()))
|
|
||||||
(else #`(begin res))))
|
|
||||||
;; If we didn't match, just return false.
|
|
||||||
#f))))
|
|
||||||
)
|
|
||||||
|
|
||||||
;; Defines a new nonterminal symbol accumulating with ACCUM.
|
|
||||||
(define-syntax define-nonterm
|
|
||||||
(lambda (x)
|
|
||||||
(syntax-case x ()
|
|
||||||
((_ sym accum pat)
|
|
||||||
(let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
|
|
||||||
(accumsym (syntax->datum #'accum))
|
|
||||||
(c (datum->syntax x (gensym))));; the cache
|
|
||||||
;; CODE is the code to parse the string if the result isn't cached.
|
|
||||||
(let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
|
|
||||||
#`(begin
|
|
||||||
(define #,c (make-vector *cache-size* #f));; the cache
|
|
||||||
(define (sym str strlen at)
|
|
||||||
(let* ((vref (vector-ref #,c (modulo at *cache-size*))))
|
|
||||||
;; Check to see whether the value is cached.
|
|
||||||
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
|
|
||||||
(caddr vref);; If it is return it.
|
|
||||||
(let ((fres ;; Else calculate it and cache it.
|
|
||||||
(#,syn str strlen at)))
|
|
||||||
(vector-set! #,c (modulo at *cache-size*)
|
|
||||||
(list str at fres))
|
|
||||||
fres)))))))))))
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;;;; Parse string PEGs using sexp PEGs.
|
;;;;; Parse string PEGs using sexp PEGs.
|
||||||
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
|
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
|
||||||
|
@ -114,34 +57,43 @@ LB < '['
|
||||||
RB < ']'
|
RB < ']'
|
||||||
")
|
")
|
||||||
|
|
||||||
(define-nonterm peg-grammar all
|
(define-syntax define-sexp-parser
|
||||||
|
(lambda (x)
|
||||||
|
(syntax-case x ()
|
||||||
|
((_ sym accum pat)
|
||||||
|
(let* ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
|
||||||
|
(accumsym (syntax->datum #'accum))
|
||||||
|
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||||
|
#`(define sym #,syn))))))
|
||||||
|
|
||||||
|
(define-sexp-parser peg-grammar all
|
||||||
(body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
|
(body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
|
||||||
(define-nonterm peg-pattern all
|
(define-sexp-parser peg-pattern all
|
||||||
(and peg-alternative
|
(and peg-alternative
|
||||||
(body lit (and (ignore "/") peg-sp peg-alternative) *)))
|
(body lit (and (ignore "/") peg-sp peg-alternative) *)))
|
||||||
(define-nonterm peg-alternative all
|
(define-sexp-parser peg-alternative all
|
||||||
(body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
|
(body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
|
||||||
(define-nonterm peg-suffix all
|
(define-sexp-parser peg-suffix all
|
||||||
(and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
|
(and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
|
||||||
(define-nonterm peg-primary all
|
(define-sexp-parser peg-primary all
|
||||||
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
||||||
(and "." peg-sp)
|
(and "." peg-sp)
|
||||||
peg-literal
|
peg-literal
|
||||||
peg-charclass
|
peg-charclass
|
||||||
(and peg-nonterminal (body ! "<" 1))))
|
(and peg-nonterminal (body ! "<" 1))))
|
||||||
(define-nonterm peg-literal all
|
(define-sexp-parser peg-literal all
|
||||||
(and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
|
(and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
|
||||||
(define-nonterm peg-charclass all
|
(define-sexp-parser peg-charclass all
|
||||||
(and (ignore "[")
|
(and (ignore "[")
|
||||||
(body lit (and (body ! "]" 1)
|
(body lit (and (body ! "]" 1)
|
||||||
(or charclass-range charclass-single)) *)
|
(or charclass-range charclass-single)) *)
|
||||||
(ignore "]")
|
(ignore "]")
|
||||||
peg-sp))
|
peg-sp))
|
||||||
(define-nonterm charclass-range all (and peg-any "-" peg-any))
|
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
|
||||||
(define-nonterm charclass-single all peg-any)
|
(define-sexp-parser charclass-single all peg-any)
|
||||||
(define-nonterm peg-nonterminal all
|
(define-sexp-parser peg-nonterminal all
|
||||||
(and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
|
(and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
|
||||||
(define-nonterm peg-sp none
|
(define-sexp-parser peg-sp none
|
||||||
(body lit (or " " "\t" "\n") *))
|
(body lit (or " " "\t" "\n") *))
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
@ -297,9 +249,10 @@ RB < ']'
|
||||||
|
|
||||||
;; Builds a lambda-expressions for the pattern STR using accum.
|
;; Builds a lambda-expressions for the pattern STR using accum.
|
||||||
(define (peg-string-compile str-stx accum)
|
(define (peg-string-compile str-stx accum)
|
||||||
(peg-sexp-compile
|
(let ((string (syntax->datum str-stx)))
|
||||||
(compressor
|
(peg-sexp-compile
|
||||||
(peg-pattern->defn
|
(compressor
|
||||||
(peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
|
(peg-pattern->defn
|
||||||
str-stx)
|
(peg:tree (peg-parse peg-pattern string)) str-stx)
|
||||||
accum))
|
str-stx)
|
||||||
|
accum)))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue