mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-09 21:40:33 +02:00
peg: refactor peg-sexp-compile to operate on syntax directly
* module/ice-9/peg.scm (cg-generic-ret): Remove unused for-syntax argument. (peg-sexp-compile): Take the pattern as syntax directly, and use syntax-case to destructure it and dispatch to the code generators. (cg-and, cg-and-int, cg-or, cg-or-int): Refactor to operate on syntax instead of on s-expressions. (cg-body): Likewise; though this was a larger refactor. (define-nonterm, peg-match): Adapt to peg-sexp-compile calling convention change. (peg-string-compile): Likewise, and just take the grammar as a syntax object.
This commit is contained in:
parent
5041b82067
commit
3be2799eaf
1 changed files with 100 additions and 132 deletions
|
@ -91,7 +91,7 @@ return EXP."
|
|||
|
||||
;; Code we generate will have a certain return structure depending on how we're
|
||||
;; accumulating (the ACCUM variable).
|
||||
(define (cg-generic-ret for-syntax accum name body-uneval at)
|
||||
(define (cg-generic-ret accum name body-uneval at)
|
||||
;; name, body-uneval and at are syntax
|
||||
#`(let ((body #,body-uneval))
|
||||
#,(cond
|
||||
|
@ -183,135 +183,102 @@ return EXP."
|
|||
|
||||
;; Takes an arbitrary expressions and accumulation variable, then parses it.
|
||||
;; E.g.: (peg-sexp-compile syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
|
||||
(define (peg-sexp-compile for-syntax pat accum)
|
||||
(cond
|
||||
((string? pat) (cg-string pat (baf accum)))
|
||||
((symbol? pat) ;; either peg-any or a nonterminal
|
||||
(cond
|
||||
((eq? pat 'peg-any) (cg-peg-any (baf accum)))
|
||||
;; if pat is any other symbol it's a nonterminal, so just return it
|
||||
(else (datum->syntax for-syntax pat))))
|
||||
((or (not (list? pat)) (null? pat))
|
||||
;; anything besides a string, symbol, or list is an error
|
||||
(datum->syntax for-syntax
|
||||
(error-val `(peg-sexp-compile-error-1 ,pat ,accum))))
|
||||
((eq? (car pat) 'range) ;; range of characters (e.g. [a-z])
|
||||
(cg-range (cadr pat) (caddr pat) (baf accum)))
|
||||
((eq? (car pat) 'ignore) ;; match but don't parse
|
||||
(peg-sexp-compile for-syntax (cadr pat) 'none))
|
||||
((eq? (car pat) 'capture) ;; parse
|
||||
(peg-sexp-compile for-syntax (cadr pat) 'body))
|
||||
((eq? (car pat) 'peg) ;; embedded PEG string
|
||||
(peg-string-compile for-syntax (cadr pat) (baf accum)))
|
||||
((eq? (car pat) 'and)
|
||||
(cg-and for-syntax (cdr pat) (baf accum)))
|
||||
((eq? (car pat) 'or)
|
||||
(cg-or for-syntax (cdr pat) (baf accum)))
|
||||
((eq? (car pat) 'body)
|
||||
(if (not (= (length pat) 4))
|
||||
(datum->syntax for-syntax
|
||||
(error-val `(peg-sexp-compile-error-2 ,pat ,accum)))
|
||||
(datum->syntax for-syntax
|
||||
(apply cg-body for-syntax (cons (baf accum) (cdr pat))))))
|
||||
(else (datum->syntax for-syntax
|
||||
(error-val `(peg-sexp-compile-error-3 ,pat ,accum))))))
|
||||
(define (peg-sexp-compile pat accum)
|
||||
(syntax-case pat (peg-any range ignore capture peg and or body)
|
||||
(peg-any
|
||||
(cg-peg-any (baf accum)))
|
||||
(sym (identifier? #'sym) ;; nonterminal
|
||||
#'sym)
|
||||
(str (string? (syntax->datum #'str)) ;; literal string
|
||||
(cg-string (syntax->datum #'str) (baf accum)))
|
||||
((range start end) ;; range of characters (e.g. [a-z])
|
||||
(and (char? (syntax->datum #'start)) (char? (syntax->datum #'end)))
|
||||
(cg-range (syntax->datum #'start) (syntax->datum #'end) (baf accum)))
|
||||
((ignore pat) ;; match but don't parse
|
||||
(peg-sexp-compile #'pat 'none))
|
||||
((capture pat) ;; parse
|
||||
(peg-sexp-compile #'pat 'body))
|
||||
((peg pat) ;; embedded PEG string
|
||||
(string? (syntax->datum #'pat))
|
||||
(peg-string-compile #'pat (baf accum)))
|
||||
((and pat ...)
|
||||
(cg-and #'(pat ...) (baf accum)))
|
||||
((or pat ...)
|
||||
(cg-or #'(pat ...) (baf accum)))
|
||||
((body type pat num)
|
||||
(cg-body (baf accum) #'type #'pat #'num))))
|
||||
|
||||
;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
|
||||
(define (cg-and for-syntax arglst accum)
|
||||
#`(lambda (str strlen at)
|
||||
(define (cg-and clauses accum)
|
||||
#`(lambda (str len pos)
|
||||
(let ((body '()))
|
||||
#,(cg-and-int for-syntax arglst accum #'str #'strlen #'at #'body))))
|
||||
#,(cg-and-int clauses accum #'str #'len #'pos #'body))))
|
||||
|
||||
;; Internal function builder for AND (calls itself).
|
||||
(define (cg-and-int for-syntax arglst accum str strlen at body)
|
||||
(if (null? arglst)
|
||||
(cggr for-syntax accum 'cg-and #`(reverse #,body) at) ;; base case
|
||||
(let ((mf (peg-sexp-compile for-syntax (car arglst) accum))) ;; match function
|
||||
#`(let ((res (#,mf #,str #,strlen #,at)))
|
||||
(if (not res)
|
||||
#f ;; if the match failed, the and failed
|
||||
;; otherwise update AT and BODY then recurse
|
||||
(let ((newat (car res))
|
||||
(newbody (cadr res)))
|
||||
(set! #,at newat)
|
||||
(push-not-null! #,body (single-filter newbody))
|
||||
#,(cg-and-int for-syntax (cdr arglst) accum str strlen at body)))))))
|
||||
(define (cg-and-int clauses accum str strlen at body)
|
||||
(syntax-case clauses ()
|
||||
(()
|
||||
(cggr accum 'cg-and #`(reverse #,body) at))
|
||||
((first rest ...)
|
||||
#`(let ((res (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)))
|
||||
(and res
|
||||
;; update AT and BODY then recurse
|
||||
(let ((newat (car res))
|
||||
(newbody (cadr res)))
|
||||
(set! #,at newat)
|
||||
(push-not-null! #,body (single-filter newbody))
|
||||
#,(cg-and-int #'(rest ...) accum str strlen at body)))))))
|
||||
|
||||
;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
|
||||
(define (cg-or for-syntax arglst accum)
|
||||
#`(lambda (str strlen at)
|
||||
#,(cg-or-int for-syntax arglst accum #'str #'strlen #'at #'body)))
|
||||
(define (cg-or clauses accum)
|
||||
#`(lambda (str len pos)
|
||||
#,(cg-or-int clauses accum #'str #'len #'pos)))
|
||||
|
||||
;; Internal function builder for OR (calls itself).
|
||||
(define (cg-or-int for-syntax arglst accum str strlen at body)
|
||||
(if (null? arglst)
|
||||
#f ;; base case
|
||||
(let ((mf (peg-sexp-compile for-syntax (car arglst) accum)))
|
||||
#`(let ((res (#,mf #,str #,strlen #,at)))
|
||||
(if res ;; if the match succeeds, we're done
|
||||
#,(cggr for-syntax accum 'cg-or #`(cadr res) #`(car res))
|
||||
#,(cg-or-int for-syntax (cdr arglst) accum str strlen at body))))))
|
||||
|
||||
;; Returns a block of code that tries to match PAT, and on success updates AT
|
||||
;; and BODY, return #f on failure and #t on success.
|
||||
(define (cg-body-test for-syntax pat accum str strlen at body)
|
||||
(let ((mf (peg-sexp-compile for-syntax pat accum)))
|
||||
#`(let ((at2-body2 (#,mf #,str #,strlen #,at)))
|
||||
(if (or (not at2-body2) (= #,at (car at2-body2)))
|
||||
#f
|
||||
(let ((at2 (car at2-body2))
|
||||
(body2 (cadr at2-body2)))
|
||||
(set! #,at at2)
|
||||
(push-not-null!
|
||||
#,body
|
||||
(single-filter body2))
|
||||
#t)))))
|
||||
|
||||
;; Returns a block of code that sees whether NUM wants us to try and match more
|
||||
;; given that we've already matched COUNT.
|
||||
(define (cg-body-more for-syntax num count)
|
||||
(cond ((number? num) #`(< #,count #,(datum->syntax for-syntax num)))
|
||||
((eq? num '+) #t)
|
||||
((eq? num '*) #t)
|
||||
((eq? num '?) #`(< #,count 1))
|
||||
(else (error-val `(cg-body-more-error ,num ,count)))))
|
||||
|
||||
;; Returns a function that takes a paramter indicating whether or not the match
|
||||
;; was succesful and returns what the body expression should return.
|
||||
(define (cg-body-ret for-syntax accum type name body at at2)
|
||||
#`(lambda (success)
|
||||
#,(cond ((eq? type '!)
|
||||
#`(if success #f #,(cggr for-syntax accum name ''() at)))
|
||||
((eq? type '&)
|
||||
#`(if success #,(cggr for-syntax accum name ''() at) #f))
|
||||
((eq? type 'lit)
|
||||
#`(if success
|
||||
#,(cggr for-syntax accum name #`(reverse #,body) at2) #f))
|
||||
(else (error-val
|
||||
`(cg-body-ret-error ,type ,accum ,name ,body ,at ,at2))))))
|
||||
|
||||
;; Returns a block of code that sees whether COUNT satisfies the constraints of
|
||||
;; NUM.
|
||||
(define (cg-body-success for-syntax num count)
|
||||
(cond ((number? num) #`(= #,count #,num))
|
||||
((eq? num '+) #`(>= #,count 1))
|
||||
((eq? num '*) #t)
|
||||
((eq? num '?) #`(<= #,count 1))
|
||||
(else `(cg-body-success-error ,num))))
|
||||
(define (cg-or-int clauses accum str strlen at)
|
||||
(syntax-case clauses ()
|
||||
(()
|
||||
#f)
|
||||
((first rest ...)
|
||||
#`(or (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)
|
||||
#,(cg-or-int #'(rest ...) accum str strlen at)))))
|
||||
|
||||
;; Returns a function that parses a BODY element.
|
||||
(define (cg-body for-syntax accum type pat num)
|
||||
(let (; this doesn't work with regular syntax, and I'd really
|
||||
; like to know why.
|
||||
(at2 (datum->syntax for-syntax (gensym))))
|
||||
#`(lambda (str strlen at)
|
||||
(let ((#,at2 at) (count 0) (body '()))
|
||||
(while (and #,(cg-body-test for-syntax pat accum
|
||||
#'str #'strlen at2 #'body)
|
||||
(set! count (+ count 1))
|
||||
#,(cg-body-more for-syntax num #'count)))
|
||||
(#,(cg-body-ret for-syntax accum type 'cg-body #'body #'at at2)
|
||||
#,(cg-body-success for-syntax num #'count))))))
|
||||
(define (cg-body accum type pat num)
|
||||
#`(lambda (str strlen at)
|
||||
(let ((body '()))
|
||||
(let lp ((end at) (count 0))
|
||||
(let* ((match (#,(peg-sexp-compile pat accum) str strlen end))
|
||||
(new-end (if match (car match) end))
|
||||
(count (if (> new-end end) (1+ count) count)))
|
||||
(if (> new-end end)
|
||||
(push-not-null! body (single-filter (cadr match))))
|
||||
(if (and (> new-end end)
|
||||
#,(syntax-case num (+ * ?)
|
||||
(n (number? (syntax->datum #'n))
|
||||
#'(< count n))
|
||||
(+ #t)
|
||||
(* #t)
|
||||
(? #'(< count 1))))
|
||||
(lp new-end count)
|
||||
(let ((success #,(syntax-case num (+ * ?)
|
||||
(n (number? (syntax->datum #'n))
|
||||
#'(= count n))
|
||||
(+ #'(>= count 1))
|
||||
(* #t)
|
||||
(? #t))))
|
||||
#,(syntax-case type (! & lit)
|
||||
(!
|
||||
#`(if success
|
||||
#f
|
||||
#,(cggr accum 'cg-body #''() #'at)))
|
||||
(&
|
||||
#`(and success
|
||||
#,(cggr accum 'cg-body #''() #'at)))
|
||||
(lit
|
||||
#`(and success
|
||||
#,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; FOR DEFINING AND USING NONTERMINALS
|
||||
|
@ -355,8 +322,7 @@ return EXP."
|
|||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ sym accum pat)
|
||||
(let ((matchf (peg-sexp-compile x (syntax->datum #'pat)
|
||||
(syntax->datum #'accum)))
|
||||
(let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
|
||||
(accumsym (syntax->datum #'accum))
|
||||
(c (datum->syntax x (gensym))));; the cache
|
||||
;; CODE is the code to parse the string if the result isn't cached.
|
||||
|
@ -389,12 +355,11 @@ return EXP."
|
|||
(define-syntax peg-match
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ peg-matcher string-uncopied)
|
||||
(let ((pmsym (syntax->datum #'peg-matcher)))
|
||||
(let ((peg-sexp-compile
|
||||
(if (string? pmsym)
|
||||
(peg-string-compile x pmsym 'body)
|
||||
(peg-sexp-compile x pmsym 'body))))
|
||||
((_ pattern string-uncopied)
|
||||
(let ((pmsym (syntax->datum #'pattern)))
|
||||
(let ((matcher (if (string? (syntax->datum #'pattern))
|
||||
(peg-string-compile #'pattern 'body)
|
||||
(peg-sexp-compile #'pattern 'body))))
|
||||
;; We copy the string before using it because it might have been
|
||||
;; modified in-place since the last time it was parsed, which would
|
||||
;; invalidate the cache. Guile uses copy-on-write for strings, so
|
||||
|
@ -403,8 +368,7 @@ return EXP."
|
|||
(strlen (string-length string-uncopied))
|
||||
(at 0))
|
||||
(let ((ret (until (or (>= at strlen)
|
||||
(#,peg-sexp-compile
|
||||
string strlen at))
|
||||
(#,matcher string strlen at))
|
||||
(set! at (+ at 1)))))
|
||||
(if (eq? ret #t) ;; (>= at strlen) succeeded
|
||||
#f
|
||||
|
@ -674,9 +638,13 @@ RB < ']'
|
|||
(else (map compressor lst)))))
|
||||
|
||||
;; Builds a lambda-expressions for the pattern STR using accum.
|
||||
(define (peg-string-compile for-syntax str accum)
|
||||
(peg-sexp-compile for-syntax
|
||||
(compressor (peg-parse-pattern (peg:tree (peg-parse peg-pattern str))))
|
||||
(define (peg-string-compile str-stx accum)
|
||||
(peg-sexp-compile
|
||||
(datum->syntax
|
||||
str-stx
|
||||
(compressor
|
||||
(peg-parse-pattern
|
||||
(peg:tree (peg-parse peg-pattern (syntax->datum str-stx))))))
|
||||
accum))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue