mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-09 21:40:33 +02:00
peg: refactor peg-sexp-compile to operate on syntax directly
* module/ice-9/peg.scm (cg-generic-ret): Remove unused for-syntax argument. (peg-sexp-compile): Take the pattern as syntax directly, and use syntax-case to destructure it and dispatch to the code generators. (cg-and, cg-and-int, cg-or, cg-or-int): Refactor to operate on syntax instead of on s-expressions. (cg-body): Likewise; though this was a larger refactor. (define-nonterm, peg-match): Adapt to peg-sexp-compile calling convention change. (peg-string-compile): Likewise, and just take the grammar as a syntax object.
This commit is contained in:
parent
5041b82067
commit
3be2799eaf
1 changed files with 100 additions and 132 deletions
|
@ -91,7 +91,7 @@ return EXP."
|
||||||
|
|
||||||
;; Code we generate will have a certain return structure depending on how we're
|
;; Code we generate will have a certain return structure depending on how we're
|
||||||
;; accumulating (the ACCUM variable).
|
;; accumulating (the ACCUM variable).
|
||||||
(define (cg-generic-ret for-syntax accum name body-uneval at)
|
(define (cg-generic-ret accum name body-uneval at)
|
||||||
;; name, body-uneval and at are syntax
|
;; name, body-uneval and at are syntax
|
||||||
#`(let ((body #,body-uneval))
|
#`(let ((body #,body-uneval))
|
||||||
#,(cond
|
#,(cond
|
||||||
|
@ -183,135 +183,102 @@ return EXP."
|
||||||
|
|
||||||
;; Takes an arbitrary expressions and accumulation variable, then parses it.
|
;; Takes an arbitrary expressions and accumulation variable, then parses it.
|
||||||
;; E.g.: (peg-sexp-compile syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
|
;; E.g.: (peg-sexp-compile syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
|
||||||
(define (peg-sexp-compile for-syntax pat accum)
|
(define (peg-sexp-compile pat accum)
|
||||||
(cond
|
(syntax-case pat (peg-any range ignore capture peg and or body)
|
||||||
((string? pat) (cg-string pat (baf accum)))
|
(peg-any
|
||||||
((symbol? pat) ;; either peg-any or a nonterminal
|
(cg-peg-any (baf accum)))
|
||||||
(cond
|
(sym (identifier? #'sym) ;; nonterminal
|
||||||
((eq? pat 'peg-any) (cg-peg-any (baf accum)))
|
#'sym)
|
||||||
;; if pat is any other symbol it's a nonterminal, so just return it
|
(str (string? (syntax->datum #'str)) ;; literal string
|
||||||
(else (datum->syntax for-syntax pat))))
|
(cg-string (syntax->datum #'str) (baf accum)))
|
||||||
((or (not (list? pat)) (null? pat))
|
((range start end) ;; range of characters (e.g. [a-z])
|
||||||
;; anything besides a string, symbol, or list is an error
|
(and (char? (syntax->datum #'start)) (char? (syntax->datum #'end)))
|
||||||
(datum->syntax for-syntax
|
(cg-range (syntax->datum #'start) (syntax->datum #'end) (baf accum)))
|
||||||
(error-val `(peg-sexp-compile-error-1 ,pat ,accum))))
|
((ignore pat) ;; match but don't parse
|
||||||
((eq? (car pat) 'range) ;; range of characters (e.g. [a-z])
|
(peg-sexp-compile #'pat 'none))
|
||||||
(cg-range (cadr pat) (caddr pat) (baf accum)))
|
((capture pat) ;; parse
|
||||||
((eq? (car pat) 'ignore) ;; match but don't parse
|
(peg-sexp-compile #'pat 'body))
|
||||||
(peg-sexp-compile for-syntax (cadr pat) 'none))
|
((peg pat) ;; embedded PEG string
|
||||||
((eq? (car pat) 'capture) ;; parse
|
(string? (syntax->datum #'pat))
|
||||||
(peg-sexp-compile for-syntax (cadr pat) 'body))
|
(peg-string-compile #'pat (baf accum)))
|
||||||
((eq? (car pat) 'peg) ;; embedded PEG string
|
((and pat ...)
|
||||||
(peg-string-compile for-syntax (cadr pat) (baf accum)))
|
(cg-and #'(pat ...) (baf accum)))
|
||||||
((eq? (car pat) 'and)
|
((or pat ...)
|
||||||
(cg-and for-syntax (cdr pat) (baf accum)))
|
(cg-or #'(pat ...) (baf accum)))
|
||||||
((eq? (car pat) 'or)
|
((body type pat num)
|
||||||
(cg-or for-syntax (cdr pat) (baf accum)))
|
(cg-body (baf accum) #'type #'pat #'num))))
|
||||||
((eq? (car pat) 'body)
|
|
||||||
(if (not (= (length pat) 4))
|
|
||||||
(datum->syntax for-syntax
|
|
||||||
(error-val `(peg-sexp-compile-error-2 ,pat ,accum)))
|
|
||||||
(datum->syntax for-syntax
|
|
||||||
(apply cg-body for-syntax (cons (baf accum) (cdr pat))))))
|
|
||||||
(else (datum->syntax for-syntax
|
|
||||||
(error-val `(peg-sexp-compile-error-3 ,pat ,accum))))))
|
|
||||||
|
|
||||||
;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
|
;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
|
||||||
(define (cg-and for-syntax arglst accum)
|
(define (cg-and clauses accum)
|
||||||
#`(lambda (str strlen at)
|
#`(lambda (str len pos)
|
||||||
(let ((body '()))
|
(let ((body '()))
|
||||||
#,(cg-and-int for-syntax arglst accum #'str #'strlen #'at #'body))))
|
#,(cg-and-int clauses accum #'str #'len #'pos #'body))))
|
||||||
|
|
||||||
;; Internal function builder for AND (calls itself).
|
;; Internal function builder for AND (calls itself).
|
||||||
(define (cg-and-int for-syntax arglst accum str strlen at body)
|
(define (cg-and-int clauses accum str strlen at body)
|
||||||
(if (null? arglst)
|
(syntax-case clauses ()
|
||||||
(cggr for-syntax accum 'cg-and #`(reverse #,body) at) ;; base case
|
(()
|
||||||
(let ((mf (peg-sexp-compile for-syntax (car arglst) accum))) ;; match function
|
(cggr accum 'cg-and #`(reverse #,body) at))
|
||||||
#`(let ((res (#,mf #,str #,strlen #,at)))
|
((first rest ...)
|
||||||
(if (not res)
|
#`(let ((res (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)))
|
||||||
#f ;; if the match failed, the and failed
|
(and res
|
||||||
;; otherwise update AT and BODY then recurse
|
;; update AT and BODY then recurse
|
||||||
(let ((newat (car res))
|
(let ((newat (car res))
|
||||||
(newbody (cadr res)))
|
(newbody (cadr res)))
|
||||||
(set! #,at newat)
|
(set! #,at newat)
|
||||||
(push-not-null! #,body (single-filter newbody))
|
(push-not-null! #,body (single-filter newbody))
|
||||||
#,(cg-and-int for-syntax (cdr arglst) accum str strlen at body)))))))
|
#,(cg-and-int #'(rest ...) accum str strlen at body)))))))
|
||||||
|
|
||||||
;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
|
;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
|
||||||
(define (cg-or for-syntax arglst accum)
|
(define (cg-or clauses accum)
|
||||||
#`(lambda (str strlen at)
|
#`(lambda (str len pos)
|
||||||
#,(cg-or-int for-syntax arglst accum #'str #'strlen #'at #'body)))
|
#,(cg-or-int clauses accum #'str #'len #'pos)))
|
||||||
|
|
||||||
;; Internal function builder for OR (calls itself).
|
;; Internal function builder for OR (calls itself).
|
||||||
(define (cg-or-int for-syntax arglst accum str strlen at body)
|
(define (cg-or-int clauses accum str strlen at)
|
||||||
(if (null? arglst)
|
(syntax-case clauses ()
|
||||||
#f ;; base case
|
(()
|
||||||
(let ((mf (peg-sexp-compile for-syntax (car arglst) accum)))
|
#f)
|
||||||
#`(let ((res (#,mf #,str #,strlen #,at)))
|
((first rest ...)
|
||||||
(if res ;; if the match succeeds, we're done
|
#`(or (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)
|
||||||
#,(cggr for-syntax accum 'cg-or #`(cadr res) #`(car res))
|
#,(cg-or-int #'(rest ...) accum str strlen at)))))
|
||||||
#,(cg-or-int for-syntax (cdr arglst) accum str strlen at body))))))
|
|
||||||
|
|
||||||
;; Returns a block of code that tries to match PAT, and on success updates AT
|
|
||||||
;; and BODY, return #f on failure and #t on success.
|
|
||||||
(define (cg-body-test for-syntax pat accum str strlen at body)
|
|
||||||
(let ((mf (peg-sexp-compile for-syntax pat accum)))
|
|
||||||
#`(let ((at2-body2 (#,mf #,str #,strlen #,at)))
|
|
||||||
(if (or (not at2-body2) (= #,at (car at2-body2)))
|
|
||||||
#f
|
|
||||||
(let ((at2 (car at2-body2))
|
|
||||||
(body2 (cadr at2-body2)))
|
|
||||||
(set! #,at at2)
|
|
||||||
(push-not-null!
|
|
||||||
#,body
|
|
||||||
(single-filter body2))
|
|
||||||
#t)))))
|
|
||||||
|
|
||||||
;; Returns a block of code that sees whether NUM wants us to try and match more
|
|
||||||
;; given that we've already matched COUNT.
|
|
||||||
(define (cg-body-more for-syntax num count)
|
|
||||||
(cond ((number? num) #`(< #,count #,(datum->syntax for-syntax num)))
|
|
||||||
((eq? num '+) #t)
|
|
||||||
((eq? num '*) #t)
|
|
||||||
((eq? num '?) #`(< #,count 1))
|
|
||||||
(else (error-val `(cg-body-more-error ,num ,count)))))
|
|
||||||
|
|
||||||
;; Returns a function that takes a paramter indicating whether or not the match
|
|
||||||
;; was succesful and returns what the body expression should return.
|
|
||||||
(define (cg-body-ret for-syntax accum type name body at at2)
|
|
||||||
#`(lambda (success)
|
|
||||||
#,(cond ((eq? type '!)
|
|
||||||
#`(if success #f #,(cggr for-syntax accum name ''() at)))
|
|
||||||
((eq? type '&)
|
|
||||||
#`(if success #,(cggr for-syntax accum name ''() at) #f))
|
|
||||||
((eq? type 'lit)
|
|
||||||
#`(if success
|
|
||||||
#,(cggr for-syntax accum name #`(reverse #,body) at2) #f))
|
|
||||||
(else (error-val
|
|
||||||
`(cg-body-ret-error ,type ,accum ,name ,body ,at ,at2))))))
|
|
||||||
|
|
||||||
;; Returns a block of code that sees whether COUNT satisfies the constraints of
|
|
||||||
;; NUM.
|
|
||||||
(define (cg-body-success for-syntax num count)
|
|
||||||
(cond ((number? num) #`(= #,count #,num))
|
|
||||||
((eq? num '+) #`(>= #,count 1))
|
|
||||||
((eq? num '*) #t)
|
|
||||||
((eq? num '?) #`(<= #,count 1))
|
|
||||||
(else `(cg-body-success-error ,num))))
|
|
||||||
|
|
||||||
;; Returns a function that parses a BODY element.
|
;; Returns a function that parses a BODY element.
|
||||||
(define (cg-body for-syntax accum type pat num)
|
(define (cg-body accum type pat num)
|
||||||
(let (; this doesn't work with regular syntax, and I'd really
|
#`(lambda (str strlen at)
|
||||||
; like to know why.
|
(let ((body '()))
|
||||||
(at2 (datum->syntax for-syntax (gensym))))
|
(let lp ((end at) (count 0))
|
||||||
#`(lambda (str strlen at)
|
(let* ((match (#,(peg-sexp-compile pat accum) str strlen end))
|
||||||
(let ((#,at2 at) (count 0) (body '()))
|
(new-end (if match (car match) end))
|
||||||
(while (and #,(cg-body-test for-syntax pat accum
|
(count (if (> new-end end) (1+ count) count)))
|
||||||
#'str #'strlen at2 #'body)
|
(if (> new-end end)
|
||||||
(set! count (+ count 1))
|
(push-not-null! body (single-filter (cadr match))))
|
||||||
#,(cg-body-more for-syntax num #'count)))
|
(if (and (> new-end end)
|
||||||
(#,(cg-body-ret for-syntax accum type 'cg-body #'body #'at at2)
|
#,(syntax-case num (+ * ?)
|
||||||
#,(cg-body-success for-syntax num #'count))))))
|
(n (number? (syntax->datum #'n))
|
||||||
|
#'(< count n))
|
||||||
|
(+ #t)
|
||||||
|
(* #t)
|
||||||
|
(? #'(< count 1))))
|
||||||
|
(lp new-end count)
|
||||||
|
(let ((success #,(syntax-case num (+ * ?)
|
||||||
|
(n (number? (syntax->datum #'n))
|
||||||
|
#'(= count n))
|
||||||
|
(+ #'(>= count 1))
|
||||||
|
(* #t)
|
||||||
|
(? #t))))
|
||||||
|
#,(syntax-case type (! & lit)
|
||||||
|
(!
|
||||||
|
#`(if success
|
||||||
|
#f
|
||||||
|
#,(cggr accum 'cg-body #''() #'at)))
|
||||||
|
(&
|
||||||
|
#`(and success
|
||||||
|
#,(cggr accum 'cg-body #''() #'at)))
|
||||||
|
(lit
|
||||||
|
#`(and success
|
||||||
|
#,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;;;; FOR DEFINING AND USING NONTERMINALS
|
;;;;; FOR DEFINING AND USING NONTERMINALS
|
||||||
|
@ -355,8 +322,7 @@ return EXP."
|
||||||
(lambda (x)
|
(lambda (x)
|
||||||
(syntax-case x ()
|
(syntax-case x ()
|
||||||
((_ sym accum pat)
|
((_ sym accum pat)
|
||||||
(let ((matchf (peg-sexp-compile x (syntax->datum #'pat)
|
(let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
|
||||||
(syntax->datum #'accum)))
|
|
||||||
(accumsym (syntax->datum #'accum))
|
(accumsym (syntax->datum #'accum))
|
||||||
(c (datum->syntax x (gensym))));; the cache
|
(c (datum->syntax x (gensym))));; the cache
|
||||||
;; CODE is the code to parse the string if the result isn't cached.
|
;; CODE is the code to parse the string if the result isn't cached.
|
||||||
|
@ -389,12 +355,11 @@ return EXP."
|
||||||
(define-syntax peg-match
|
(define-syntax peg-match
|
||||||
(lambda (x)
|
(lambda (x)
|
||||||
(syntax-case x ()
|
(syntax-case x ()
|
||||||
((_ peg-matcher string-uncopied)
|
((_ pattern string-uncopied)
|
||||||
(let ((pmsym (syntax->datum #'peg-matcher)))
|
(let ((pmsym (syntax->datum #'pattern)))
|
||||||
(let ((peg-sexp-compile
|
(let ((matcher (if (string? (syntax->datum #'pattern))
|
||||||
(if (string? pmsym)
|
(peg-string-compile #'pattern 'body)
|
||||||
(peg-string-compile x pmsym 'body)
|
(peg-sexp-compile #'pattern 'body))))
|
||||||
(peg-sexp-compile x pmsym 'body))))
|
|
||||||
;; We copy the string before using it because it might have been
|
;; We copy the string before using it because it might have been
|
||||||
;; modified in-place since the last time it was parsed, which would
|
;; modified in-place since the last time it was parsed, which would
|
||||||
;; invalidate the cache. Guile uses copy-on-write for strings, so
|
;; invalidate the cache. Guile uses copy-on-write for strings, so
|
||||||
|
@ -403,8 +368,7 @@ return EXP."
|
||||||
(strlen (string-length string-uncopied))
|
(strlen (string-length string-uncopied))
|
||||||
(at 0))
|
(at 0))
|
||||||
(let ((ret (until (or (>= at strlen)
|
(let ((ret (until (or (>= at strlen)
|
||||||
(#,peg-sexp-compile
|
(#,matcher string strlen at))
|
||||||
string strlen at))
|
|
||||||
(set! at (+ at 1)))))
|
(set! at (+ at 1)))))
|
||||||
(if (eq? ret #t) ;; (>= at strlen) succeeded
|
(if (eq? ret #t) ;; (>= at strlen) succeeded
|
||||||
#f
|
#f
|
||||||
|
@ -674,9 +638,13 @@ RB < ']'
|
||||||
(else (map compressor lst)))))
|
(else (map compressor lst)))))
|
||||||
|
|
||||||
;; Builds a lambda-expressions for the pattern STR using accum.
|
;; Builds a lambda-expressions for the pattern STR using accum.
|
||||||
(define (peg-string-compile for-syntax str accum)
|
(define (peg-string-compile str-stx accum)
|
||||||
(peg-sexp-compile for-syntax
|
(peg-sexp-compile
|
||||||
(compressor (peg-parse-pattern (peg:tree (peg-parse peg-pattern str))))
|
(datum->syntax
|
||||||
|
str-stx
|
||||||
|
(compressor
|
||||||
|
(peg-parse-pattern
|
||||||
|
(peg:tree (peg-parse peg-pattern (syntax->datum str-stx))))))
|
||||||
accum))
|
accum))
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue