mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-01 04:10:18 +02:00
Separate PEG Strings
* module/ice-9/peg.scm: remove functions dealing with PEGs as strings * module/ice-9/peg/string-peg.scm: and put them here
This commit is contained in:
parent
00923497d2
commit
5e16c41703
3 changed files with 315 additions and 282 deletions
|
@ -18,14 +18,11 @@
|
|||
;;;;
|
||||
|
||||
(define-module (ice-9 peg)
|
||||
#:export (peg-string-compile
|
||||
context-flatten
|
||||
#:export (context-flatten
|
||||
peg-parse
|
||||
define-nonterm
|
||||
define-nonterm-f
|
||||
; define-nonterm
|
||||
; define-nonterm-f
|
||||
peg-match
|
||||
define-grammar
|
||||
define-grammar-f
|
||||
peg:start
|
||||
peg:end
|
||||
peg:string
|
||||
|
@ -33,8 +30,13 @@
|
|||
peg:substring
|
||||
peg-record?
|
||||
keyword-flatten)
|
||||
; #:export-syntax (define-nonterm)
|
||||
#:use-module (ice-9 peg codegen)
|
||||
#:re-export (peg-sexp-compile)
|
||||
#:use-module (ice-9 peg string-peg)
|
||||
#:re-export (peg-sexp-compile
|
||||
define-grammar
|
||||
define-grammar-f
|
||||
define-nonterm)
|
||||
#:use-module (system base pmatch))
|
||||
|
||||
;;;
|
||||
|
@ -64,62 +66,6 @@ execute the STMTs and try again."
|
|||
;;;;; FOR DEFINING AND USING NONTERMINALS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; The results of parsing using a nonterminal are cached. Think of it like a
|
||||
;; hash with no conflict resolution. Process for deciding on the cache size
|
||||
;; wasn't very scientific; just ran the benchmarks and stopped a little after
|
||||
;; the point of diminishing returns on my box.
|
||||
(define *cache-size* 512)
|
||||
|
||||
(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
|
||||
; (let ((matchf-syn (datum->syntax for-syntax matchf)))
|
||||
#`(lambda (str strlen at)
|
||||
(let ((res (#,matchf-syn str strlen at)))
|
||||
;; Try to match the nonterminal.
|
||||
(if res
|
||||
;; If we matched, do some post-processing to figure out
|
||||
;; what data to propagate upward.
|
||||
(let ((at (car res))
|
||||
(body (cadr res)))
|
||||
#,(cond
|
||||
((eq? accumsym 'name)
|
||||
#`(list at '#,s-syn))
|
||||
((eq? accumsym 'all)
|
||||
#`(list (car res)
|
||||
(cond
|
||||
((not (list? body))
|
||||
(list '#,s-syn body))
|
||||
((null? body) '#,s-syn)
|
||||
((symbol? (car body))
|
||||
(list '#,s-syn body))
|
||||
(else (cons '#,s-syn body)))))
|
||||
((eq? accumsym 'none) #`(list (car res) '()))
|
||||
(else #`(begin res))))
|
||||
;; If we didn't match, just return false.
|
||||
#f))))
|
||||
|
||||
;; Defines a new nonterminal symbol accumulating with ACCUM.
|
||||
(define-syntax define-nonterm
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ sym accum pat)
|
||||
(let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
|
||||
(accumsym (syntax->datum #'accum))
|
||||
(c (datum->syntax x (gensym))));; the cache
|
||||
;; CODE is the code to parse the string if the result isn't cached.
|
||||
(let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
|
||||
#`(begin
|
||||
(define #,c (make-vector *cache-size* #f));; the cache
|
||||
(define (sym str strlen at)
|
||||
(let* ((vref (vector-ref #,c (modulo at *cache-size*))))
|
||||
;; Check to see whether the value is cached.
|
||||
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
|
||||
(caddr vref);; If it is return it.
|
||||
(let ((fres ;; Else calculate it and cache it.
|
||||
(#,syn str strlen at)))
|
||||
(vector-set! #,c (modulo at *cache-size*)
|
||||
(list str at fres))
|
||||
fres)))))))))))
|
||||
|
||||
;; Parses STRING using NONTERM
|
||||
(define (peg-parse nonterm string)
|
||||
;; We copy the string before using it because it might have been modified
|
||||
|
@ -225,225 +171,6 @@ execute the STMTs and try again."
|
|||
(member (car x) keyword-lst)))
|
||||
lst))
|
||||
|
||||
;; Gets the left-hand depth of a list.
|
||||
(define (depth lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
0
|
||||
(+ 1 (depth (car lst)))))
|
||||
|
||||
;; Trims characters off the front and end of STR.
|
||||
;; (trim-1chars "'ab'") -> "ab"
|
||||
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; Parse string PEGs using sexp PEGs.
|
||||
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Grammar for PEGs in PEG grammar.
|
||||
(define peg-as-peg
|
||||
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
|
||||
pattern <-- alternative (SLASH sp alternative)*
|
||||
alternative <-- ([!&]? sp suffix)+
|
||||
suffix <-- primary ([*+?] sp)*
|
||||
primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
|
||||
literal <-- ['] (!['] .)* ['] sp
|
||||
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
|
||||
CCrange <-- . '-' .
|
||||
CCsingle <-- .
|
||||
nonterminal <-- [a-zA-Z0-9-]+ sp
|
||||
sp < [ \t\n]*
|
||||
SLASH < '/'
|
||||
LB < '['
|
||||
RB < ']'
|
||||
")
|
||||
|
||||
(define-nonterm peg-grammar all
|
||||
(body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
|
||||
(define-nonterm peg-pattern all
|
||||
(and peg-alternative
|
||||
(body lit (and (ignore "/") peg-sp peg-alternative) *)))
|
||||
(define-nonterm peg-alternative all
|
||||
(body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
|
||||
(define-nonterm peg-suffix all
|
||||
(and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
|
||||
(define-nonterm peg-primary all
|
||||
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
||||
(and "." peg-sp)
|
||||
peg-literal
|
||||
peg-charclass
|
||||
(and peg-nonterminal (body ! "<" 1))))
|
||||
(define-nonterm peg-literal all
|
||||
(and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
|
||||
(define-nonterm peg-charclass all
|
||||
(and (ignore "[")
|
||||
(body lit (and (body ! "]" 1)
|
||||
(or charclass-range charclass-single)) *)
|
||||
(ignore "]")
|
||||
peg-sp))
|
||||
(define-nonterm charclass-range all (and peg-any "-" peg-any))
|
||||
(define-nonterm charclass-single all peg-any)
|
||||
(define-nonterm peg-nonterminal all
|
||||
(and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
|
||||
(define-nonterm peg-sp none
|
||||
(body lit (or " " "\t" "\n") *))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; PARSE STRING PEGS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Pakes a string representing a PEG grammar and defines all the nonterminals in
|
||||
;; it as the associated PEGs.
|
||||
(define (peg-parser str for-syntax)
|
||||
(let ((parsed (peg-parse peg-grammar str)))
|
||||
(if (not parsed)
|
||||
(begin
|
||||
;; (display "Invalid PEG grammar!\n")
|
||||
#f)
|
||||
(let ((lst (peg:tree parsed)))
|
||||
(cond
|
||||
((or (not (list? lst)) (null? lst))
|
||||
lst)
|
||||
((eq? (car lst) 'peg-grammar)
|
||||
#`(begin
|
||||
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
|
||||
(context-flatten (lambda (lst) (<= (depth lst) 2))
|
||||
(cdr lst))))))))))
|
||||
|
||||
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
|
||||
;; defines all the appropriate nonterminals.
|
||||
(define-syntax define-grammar
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ str)
|
||||
(peg-parser (syntax->datum #'str) x)))))
|
||||
(define define-grammar-f peg-parser)
|
||||
|
||||
;; Parse a nonterminal and pattern listed in LST.
|
||||
(define (peg-nonterm->defn lst for-syntax)
|
||||
(let* ((nonterm (car lst))
|
||||
(grabber (cadr lst))
|
||||
(pattern (caddr lst))
|
||||
(nonterm-name (datum->syntax for-syntax
|
||||
(string->symbol (cadr nonterm)))))
|
||||
#`(define-nonterm #,nonterm-name
|
||||
#,(cond
|
||||
((string=? grabber "<--") (datum->syntax for-syntax 'all))
|
||||
((string=? grabber "<-") (datum->syntax for-syntax 'body))
|
||||
(else (datum->syntax for-syntax 'none)))
|
||||
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
|
||||
|
||||
;; Parse a pattern.
|
||||
(define (peg-pattern->defn lst for-syntax)
|
||||
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
|
||||
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
|
||||
(cdr lst)))))
|
||||
|
||||
;; Parse an alternative.
|
||||
(define (peg-alternative->defn lst for-syntax)
|
||||
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
|
||||
(context-flatten (lambda (x) (or (string? (car x))
|
||||
(eq? (car x) 'peg-suffix)))
|
||||
(cdr lst)))))
|
||||
|
||||
;; Parse a body.
|
||||
(define (peg-body->defn lst for-syntax)
|
||||
(let ((suffix '())
|
||||
(front (datum->syntax for-syntax 'lit)))
|
||||
(cond
|
||||
((eq? (car lst) 'peg-suffix)
|
||||
(set! suffix lst))
|
||||
((string? (car lst))
|
||||
(begin (set! front (datum->syntax for-syntax
|
||||
(string->symbol (car lst))))
|
||||
(set! suffix (cadr lst))))
|
||||
(else `(peg-parse-body-fail ,lst)))
|
||||
#`(body #,front #,@(peg-suffix->defn
|
||||
suffix
|
||||
for-syntax))))
|
||||
|
||||
;; Parse a suffix.
|
||||
(define (peg-suffix->defn lst for-syntax)
|
||||
#`(#,(peg-primary->defn (cadr lst) for-syntax)
|
||||
#,(if (null? (cddr lst))
|
||||
1
|
||||
(datum->syntax for-syntax (string->symbol (caddr lst))))))
|
||||
|
||||
;; Parse a primary.
|
||||
(define (peg-primary->defn lst for-syntax)
|
||||
(let ((el (cadr lst)))
|
||||
(cond
|
||||
((list? el)
|
||||
(cond
|
||||
((eq? (car el) 'peg-literal)
|
||||
(peg-literal->defn el for-syntax))
|
||||
((eq? (car el) 'peg-charclass)
|
||||
(peg-charclass->defn el for-syntax))
|
||||
((eq? (car el) 'peg-nonterminal)
|
||||
(datum->syntax for-syntax (string->symbol (cadr el))))))
|
||||
((string? el)
|
||||
(cond
|
||||
((equal? el "(")
|
||||
(peg-pattern->defn (caddr lst) for-syntax))
|
||||
((equal? el ".")
|
||||
(datum->syntax for-syntax 'peg-any))
|
||||
(else (datum->syntax for-syntax
|
||||
`(peg-parse-any unknown-string ,lst)))))
|
||||
(else (datum->syntax for-syntax
|
||||
`(peg-parse-any unknown-el ,lst))))))
|
||||
|
||||
;; Parses a literal.
|
||||
(define (peg-literal->defn lst for-syntax)
|
||||
(datum->syntax for-syntax (trim-1chars (cadr lst))))
|
||||
|
||||
;; Parses a charclass.
|
||||
(define (peg-charclass->defn lst for-syntax)
|
||||
#`(or
|
||||
#,@(map
|
||||
(lambda (cc)
|
||||
(cond
|
||||
((eq? (car cc) 'charclass-range)
|
||||
#`(range #,(datum->syntax
|
||||
for-syntax
|
||||
(string-ref (cadr cc) 0))
|
||||
#,(datum->syntax
|
||||
for-syntax
|
||||
(string-ref (cadr cc) 2))))
|
||||
((eq? (car cc) 'charclass-single)
|
||||
(datum->syntax for-syntax (cadr cc)))))
|
||||
(context-flatten
|
||||
(lambda (x) (or (eq? (car x) 'charclass-range)
|
||||
(eq? (car x) 'charclass-single)))
|
||||
(cdr lst)))))
|
||||
|
||||
;; Compresses a list to save the optimizer work.
|
||||
;; e.g. (or (and a)) -> a
|
||||
(define (compressor-core lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
lst
|
||||
(cond
|
||||
((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
|
||||
(null? (cddr lst)))
|
||||
(compressor-core (cadr lst)))
|
||||
((and (eq? (car lst) 'body)
|
||||
(eq? (cadr lst) 'lit)
|
||||
(eq? (cadddr lst) 1))
|
||||
(compressor-core (caddr lst)))
|
||||
(else (map compressor-core lst)))))
|
||||
|
||||
(define (compressor syn for-syntax)
|
||||
(datum->syntax for-syntax
|
||||
(compressor-core (syntax->datum syn))))
|
||||
|
||||
;; Builds a lambda-expressions for the pattern STR using accum.
|
||||
(define (peg-string-compile str-stx accum)
|
||||
(peg-sexp-compile
|
||||
(compressor
|
||||
(peg-pattern->defn
|
||||
(peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
|
||||
str-stx)
|
||||
accum))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; PMATCH STRUCTURE MUNGING
|
||||
;; Pretty self-explanatory.
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
(define-module (ice-9 peg codegen)
|
||||
#:export (peg-sexp-compile)
|
||||
#:use-module (ice-9 peg)
|
||||
#:use-module (ice-9 peg string-peg)
|
||||
#:use-module (ice-9 pretty-print)
|
||||
#:use-module (system base pmatch))
|
||||
|
||||
|
|
305
module/ice-9/peg/string-peg.scm
Normal file
305
module/ice-9/peg/string-peg.scm
Normal file
|
@ -0,0 +1,305 @@
|
|||
;;;; string-peg.scm --- representing PEG grammars as strings
|
||||
;;;;
|
||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;;;;
|
||||
|
||||
(define-module (ice-9 peg string-peg)
|
||||
#:export (peg-string-compile
|
||||
peg-as-peg
|
||||
define-grammar
|
||||
define-grammar-f
|
||||
define-nonterm
|
||||
peg-grammar)
|
||||
#:use-module (ice-9 peg)
|
||||
#:use-module (ice-9 peg codegen))
|
||||
|
||||
;; The results of parsing using a nonterminal are cached. Think of it like a
|
||||
;; hash with no conflict resolution. Process for deciding on the cache size
|
||||
;; wasn't very scientific; just ran the benchmarks and stopped a little after
|
||||
;; the point of diminishing returns on my box.
|
||||
(define *cache-size* 512)
|
||||
|
||||
;; Gets the left-hand depth of a list.
|
||||
(define (depth lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
0
|
||||
(+ 1 (depth (car lst)))))
|
||||
|
||||
(eval-when (compile load eval)
|
||||
(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
|
||||
; (let ((matchf-syn (datum->syntax for-syntax matchf)))
|
||||
#`(lambda (str strlen at)
|
||||
(let ((res (#,matchf-syn str strlen at)))
|
||||
;; Try to match the nonterminal.
|
||||
(if res
|
||||
;; If we matched, do some post-processing to figure out
|
||||
;; what data to propagate upward.
|
||||
(let ((at (car res))
|
||||
(body (cadr res)))
|
||||
#,(cond
|
||||
((eq? accumsym 'name)
|
||||
#`(list at '#,s-syn))
|
||||
((eq? accumsym 'all)
|
||||
#`(list (car res)
|
||||
(cond
|
||||
((not (list? body))
|
||||
(list '#,s-syn body))
|
||||
((null? body) '#,s-syn)
|
||||
((symbol? (car body))
|
||||
(list '#,s-syn body))
|
||||
(else (cons '#,s-syn body)))))
|
||||
((eq? accumsym 'none) #`(list (car res) '()))
|
||||
(else #`(begin res))))
|
||||
;; If we didn't match, just return false.
|
||||
#f))))
|
||||
)
|
||||
|
||||
;; Defines a new nonterminal symbol accumulating with ACCUM.
|
||||
(define-syntax define-nonterm
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ sym accum pat)
|
||||
(let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
|
||||
(accumsym (syntax->datum #'accum))
|
||||
(c (datum->syntax x (gensym))));; the cache
|
||||
;; CODE is the code to parse the string if the result isn't cached.
|
||||
(let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
|
||||
#`(begin
|
||||
(define #,c (make-vector *cache-size* #f));; the cache
|
||||
(define (sym str strlen at)
|
||||
(let* ((vref (vector-ref #,c (modulo at *cache-size*))))
|
||||
;; Check to see whether the value is cached.
|
||||
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
|
||||
(caddr vref);; If it is return it.
|
||||
(let ((fres ;; Else calculate it and cache it.
|
||||
(#,syn str strlen at)))
|
||||
(vector-set! #,c (modulo at *cache-size*)
|
||||
(list str at fres))
|
||||
fres)))))))))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; Parse string PEGs using sexp PEGs.
|
||||
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Grammar for PEGs in PEG grammar.
|
||||
(define peg-as-peg
|
||||
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
|
||||
pattern <-- alternative (SLASH sp alternative)*
|
||||
alternative <-- ([!&]? sp suffix)+
|
||||
suffix <-- primary ([*+?] sp)*
|
||||
primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
|
||||
literal <-- ['] (!['] .)* ['] sp
|
||||
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
|
||||
CCrange <-- . '-' .
|
||||
CCsingle <-- .
|
||||
nonterminal <-- [a-zA-Z0-9-]+ sp
|
||||
sp < [ \t\n]*
|
||||
SLASH < '/'
|
||||
LB < '['
|
||||
RB < ']'
|
||||
")
|
||||
|
||||
(define-nonterm peg-grammar all
|
||||
(body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
|
||||
(define-nonterm peg-pattern all
|
||||
(and peg-alternative
|
||||
(body lit (and (ignore "/") peg-sp peg-alternative) *)))
|
||||
(define-nonterm peg-alternative all
|
||||
(body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
|
||||
(define-nonterm peg-suffix all
|
||||
(and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
|
||||
(define-nonterm peg-primary all
|
||||
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
||||
(and "." peg-sp)
|
||||
peg-literal
|
||||
peg-charclass
|
||||
(and peg-nonterminal (body ! "<" 1))))
|
||||
(define-nonterm peg-literal all
|
||||
(and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
|
||||
(define-nonterm peg-charclass all
|
||||
(and (ignore "[")
|
||||
(body lit (and (body ! "]" 1)
|
||||
(or charclass-range charclass-single)) *)
|
||||
(ignore "]")
|
||||
peg-sp))
|
||||
(define-nonterm charclass-range all (and peg-any "-" peg-any))
|
||||
(define-nonterm charclass-single all peg-any)
|
||||
(define-nonterm peg-nonterminal all
|
||||
(and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
|
||||
(define-nonterm peg-sp none
|
||||
(body lit (or " " "\t" "\n") *))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;; PARSE STRING PEGS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Pakes a string representing a PEG grammar and defines all the nonterminals in
|
||||
;; it as the associated PEGs.
|
||||
(define (peg-parser str for-syntax)
|
||||
(let ((parsed (peg-parse peg-grammar str)))
|
||||
(if (not parsed)
|
||||
(begin
|
||||
;; (display "Invalid PEG grammar!\n")
|
||||
#f)
|
||||
(let ((lst (peg:tree parsed)))
|
||||
(cond
|
||||
((or (not (list? lst)) (null? lst))
|
||||
lst)
|
||||
((eq? (car lst) 'peg-grammar)
|
||||
#`(begin
|
||||
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
|
||||
(context-flatten (lambda (lst) (<= (depth lst) 2))
|
||||
(cdr lst))))))))))
|
||||
|
||||
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
|
||||
;; defines all the appropriate nonterminals.
|
||||
(define-syntax define-grammar
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((_ str)
|
||||
(peg-parser (syntax->datum #'str) x)))))
|
||||
(define define-grammar-f peg-parser)
|
||||
|
||||
;; Parse a nonterminal and pattern listed in LST.
|
||||
(define (peg-nonterm->defn lst for-syntax)
|
||||
(let* ((nonterm (car lst))
|
||||
(grabber (cadr lst))
|
||||
(pattern (caddr lst))
|
||||
(nonterm-name (datum->syntax for-syntax
|
||||
(string->symbol (cadr nonterm)))))
|
||||
#`(define-nonterm #,nonterm-name
|
||||
#,(cond
|
||||
((string=? grabber "<--") (datum->syntax for-syntax 'all))
|
||||
((string=? grabber "<-") (datum->syntax for-syntax 'body))
|
||||
(else (datum->syntax for-syntax 'none)))
|
||||
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
|
||||
|
||||
;; Parse a pattern.
|
||||
(define (peg-pattern->defn lst for-syntax)
|
||||
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
|
||||
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
|
||||
(cdr lst)))))
|
||||
|
||||
;; Parse an alternative.
|
||||
(define (peg-alternative->defn lst for-syntax)
|
||||
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
|
||||
(context-flatten (lambda (x) (or (string? (car x))
|
||||
(eq? (car x) 'peg-suffix)))
|
||||
(cdr lst)))))
|
||||
|
||||
;; Parse a body.
|
||||
(define (peg-body->defn lst for-syntax)
|
||||
(let ((suffix '())
|
||||
(front (datum->syntax for-syntax 'lit)))
|
||||
(cond
|
||||
((eq? (car lst) 'peg-suffix)
|
||||
(set! suffix lst))
|
||||
((string? (car lst))
|
||||
(begin (set! front (datum->syntax for-syntax
|
||||
(string->symbol (car lst))))
|
||||
(set! suffix (cadr lst))))
|
||||
(else `(peg-parse-body-fail ,lst)))
|
||||
#`(body #,front #,@(peg-suffix->defn
|
||||
suffix
|
||||
for-syntax))))
|
||||
|
||||
;; Parse a suffix.
|
||||
(define (peg-suffix->defn lst for-syntax)
|
||||
#`(#,(peg-primary->defn (cadr lst) for-syntax)
|
||||
#,(if (null? (cddr lst))
|
||||
1
|
||||
(datum->syntax for-syntax (string->symbol (caddr lst))))))
|
||||
|
||||
;; Parse a primary.
|
||||
(define (peg-primary->defn lst for-syntax)
|
||||
(let ((el (cadr lst)))
|
||||
(cond
|
||||
((list? el)
|
||||
(cond
|
||||
((eq? (car el) 'peg-literal)
|
||||
(peg-literal->defn el for-syntax))
|
||||
((eq? (car el) 'peg-charclass)
|
||||
(peg-charclass->defn el for-syntax))
|
||||
((eq? (car el) 'peg-nonterminal)
|
||||
(datum->syntax for-syntax (string->symbol (cadr el))))))
|
||||
((string? el)
|
||||
(cond
|
||||
((equal? el "(")
|
||||
(peg-pattern->defn (caddr lst) for-syntax))
|
||||
((equal? el ".")
|
||||
(datum->syntax for-syntax 'peg-any))
|
||||
(else (datum->syntax for-syntax
|
||||
`(peg-parse-any unknown-string ,lst)))))
|
||||
(else (datum->syntax for-syntax
|
||||
`(peg-parse-any unknown-el ,lst))))))
|
||||
|
||||
;; Trims characters off the front and end of STR.
|
||||
;; (trim-1chars "'ab'") -> "ab"
|
||||
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
|
||||
|
||||
;; Parses a literal.
|
||||
(define (peg-literal->defn lst for-syntax)
|
||||
(datum->syntax for-syntax (trim-1chars (cadr lst))))
|
||||
|
||||
;; Parses a charclass.
|
||||
(define (peg-charclass->defn lst for-syntax)
|
||||
#`(or
|
||||
#,@(map
|
||||
(lambda (cc)
|
||||
(cond
|
||||
((eq? (car cc) 'charclass-range)
|
||||
#`(range #,(datum->syntax
|
||||
for-syntax
|
||||
(string-ref (cadr cc) 0))
|
||||
#,(datum->syntax
|
||||
for-syntax
|
||||
(string-ref (cadr cc) 2))))
|
||||
((eq? (car cc) 'charclass-single)
|
||||
(datum->syntax for-syntax (cadr cc)))))
|
||||
(context-flatten
|
||||
(lambda (x) (or (eq? (car x) 'charclass-range)
|
||||
(eq? (car x) 'charclass-single)))
|
||||
(cdr lst)))))
|
||||
|
||||
;; Compresses a list to save the optimizer work.
|
||||
;; e.g. (or (and a)) -> a
|
||||
(define (compressor-core lst)
|
||||
(if (or (not (list? lst)) (null? lst))
|
||||
lst
|
||||
(cond
|
||||
((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
|
||||
(null? (cddr lst)))
|
||||
(compressor-core (cadr lst)))
|
||||
((and (eq? (car lst) 'body)
|
||||
(eq? (cadr lst) 'lit)
|
||||
(eq? (cadddr lst) 1))
|
||||
(compressor-core (caddr lst)))
|
||||
(else (map compressor-core lst)))))
|
||||
|
||||
(define (compressor syn for-syntax)
|
||||
(datum->syntax for-syntax
|
||||
(compressor-core (syntax->datum syn))))
|
||||
|
||||
;; Builds a lambda-expressions for the pattern STR using accum.
|
||||
(define (peg-string-compile str-stx accum)
|
||||
(peg-sexp-compile
|
||||
(compressor
|
||||
(peg-pattern->defn
|
||||
(peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
|
||||
str-stx)
|
||||
accum))
|
Loading…
Add table
Add a link
Reference in a new issue