mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 11:50:28 +02:00
* module/ice-9/peg.scm: don't re-export 'define-grammar-f' * module/ice-9/peg/string-peg.scm: don't export 'define-grammar-f'
273 lines
10 KiB
Scheme
273 lines
10 KiB
Scheme
;;;; string-peg.scm --- representing PEG grammars as strings
|
|
;;;;
|
|
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
|
;;;;
|
|
;;;; This library is free software; you can redistribute it and/or
|
|
;;;; modify it under the terms of the GNU Lesser General Public
|
|
;;;; License as published by the Free Software Foundation; either
|
|
;;;; version 3 of the License, or (at your option) any later version.
|
|
;;;;
|
|
;;;; This library is distributed in the hope that it will be useful,
|
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;;;; Lesser General Public License for more details.
|
|
;;;;
|
|
;;;; You should have received a copy of the GNU Lesser General Public
|
|
;;;; License along with this library; if not, write to the Free Software
|
|
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
;;;;
|
|
|
|
(define-module (ice-9 peg string-peg)
|
|
#:export (peg-as-peg
|
|
define-peg-string-patterns
|
|
peg-grammar)
|
|
#:use-module (ice-9 peg using-parsers)
|
|
#:use-module (ice-9 peg codegen)
|
|
#:use-module (ice-9 peg simplify-tree))
|
|
|
|
;; Gets the left-hand depth of a list.
|
|
(define (depth lst)
|
|
(if (or (not (list? lst)) (null? lst))
|
|
0
|
|
(+ 1 (depth (car lst)))))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;; Parse string PEGs using sexp PEGs.
|
|
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Grammar for PEGs in PEG grammar.
|
|
(define peg-as-peg
|
|
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
|
|
pattern <-- alternative (SLASH sp alternative)*
|
|
alternative <-- ([!&]? sp suffix)+
|
|
suffix <-- primary ([*+?] sp)*
|
|
primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
|
|
literal <-- ['] (!['] .)* ['] sp
|
|
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
|
|
CCrange <-- . '-' .
|
|
CCsingle <-- .
|
|
nonterminal <-- [a-zA-Z0-9-]+ sp
|
|
sp < [ \t\n]*
|
|
SLASH < '/'
|
|
LB < '['
|
|
RB < ']'
|
|
")
|
|
|
|
(define-syntax define-sexp-parser
|
|
(lambda (x)
|
|
(syntax-case x ()
|
|
((_ sym accum pat)
|
|
(let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
|
|
(accumsym (syntax->datum #'accum))
|
|
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
|
#`(define sym #,syn))))))
|
|
|
|
(define-sexp-parser peg-grammar all
|
|
(+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
|
|
(define-sexp-parser peg-pattern all
|
|
(and peg-alternative
|
|
(* (and (ignore "/") peg-sp peg-alternative))))
|
|
(define-sexp-parser peg-alternative all
|
|
(+ (and (? (or "!" "&")) peg-sp peg-suffix)))
|
|
(define-sexp-parser peg-suffix all
|
|
(and peg-primary (* (and (or "*" "+" "?") peg-sp))))
|
|
(define-sexp-parser peg-primary all
|
|
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
|
(and "." peg-sp)
|
|
peg-literal
|
|
peg-charclass
|
|
(and peg-nonterminal (not-followed-by "<"))))
|
|
(define-sexp-parser peg-literal all
|
|
(and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
|
|
(define-sexp-parser peg-charclass all
|
|
(and (ignore "[")
|
|
(* (and (not-followed-by "]")
|
|
(or charclass-range charclass-single)))
|
|
(ignore "]")
|
|
peg-sp))
|
|
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
|
|
(define-sexp-parser charclass-single all peg-any)
|
|
(define-sexp-parser peg-nonterminal all
|
|
(and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
|
|
(define-sexp-parser peg-sp none
|
|
(* (or " " "\t" "\n")))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;;; PARSE STRING PEGS
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; Takes a string representing a PEG grammar and returns syntax that
|
|
;; will define all of the nonterminals in the grammar with equivalent
|
|
;; PEG s-expressions.
|
|
(define (peg-parser str for-syntax)
|
|
(let ((parsed (match-pattern peg-grammar str)))
|
|
(if (not parsed)
|
|
(begin
|
|
;; (display "Invalid PEG grammar!\n")
|
|
#f)
|
|
(let ((lst (peg:tree parsed)))
|
|
(cond
|
|
((or (not (list? lst)) (null? lst))
|
|
lst)
|
|
((eq? (car lst) 'peg-grammar)
|
|
#`(begin
|
|
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
|
|
(context-flatten (lambda (lst) (<= (depth lst) 2))
|
|
(cdr lst))))))))))
|
|
|
|
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
|
|
;; defines all the appropriate nonterminals.
|
|
(define-syntax define-peg-string-patterns
|
|
(lambda (x)
|
|
(syntax-case x ()
|
|
((_ str)
|
|
(peg-parser (syntax->datum #'str) x)))))
|
|
|
|
;; lst has format (nonterm grabber pattern), where
|
|
;; nonterm is a symbol (the name of the nonterminal),
|
|
;; grabber is a string (either "<", "<-" or "<--"), and
|
|
;; pattern is the parse of a PEG pattern expressed as as string.
|
|
(define (peg-nonterm->defn lst for-syntax)
|
|
(let* ((nonterm (car lst))
|
|
(grabber (cadr lst))
|
|
(pattern (caddr lst))
|
|
(nonterm-name (datum->syntax for-syntax
|
|
(string->symbol (cadr nonterm)))))
|
|
#`(define-peg-pattern #,nonterm-name
|
|
#,(cond
|
|
((string=? grabber "<--") (datum->syntax for-syntax 'all))
|
|
((string=? grabber "<-") (datum->syntax for-syntax 'body))
|
|
(else (datum->syntax for-syntax 'none)))
|
|
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
|
|
|
|
;; lst has format ('peg-pattern ...).
|
|
;; After the context-flatten, (cdr lst) has format
|
|
;; (('peg-alternative ...) ...), where the outer list is a collection
|
|
;; of elements from a '/' alternative.
|
|
(define (peg-pattern->defn lst for-syntax)
|
|
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
|
|
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
|
|
(cdr lst)))))
|
|
|
|
;; lst has format ('peg-alternative ...).
|
|
;; After the context-flatten, (cdr lst) has the format
|
|
;; (item ...), where each item has format either ("!" ...), ("&" ...),
|
|
;; or ('peg-suffix ...).
|
|
(define (peg-alternative->defn lst for-syntax)
|
|
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
|
|
(context-flatten (lambda (x) (or (string? (car x))
|
|
(eq? (car x) 'peg-suffix)))
|
|
(cdr lst)))))
|
|
|
|
;; lst has the format either
|
|
;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
|
|
;; ('peg-suffix ...).
|
|
(define (peg-body->defn lst for-syntax)
|
|
(cond
|
|
((equal? (car lst) "&")
|
|
#`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
|
((equal? (car lst) "!")
|
|
#`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
|
((eq? (car lst) 'peg-suffix)
|
|
(peg-suffix->defn lst for-syntax))
|
|
(else `(peg-parse-body-fail ,lst))))
|
|
|
|
;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
|
|
(define (peg-suffix->defn lst for-syntax)
|
|
(let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
|
|
(cond
|
|
((null? (cddr lst))
|
|
inner-defn)
|
|
((equal? (caddr lst) "*")
|
|
#`(* #,inner-defn))
|
|
((equal? (caddr lst) "?")
|
|
#`(? #,inner-defn))
|
|
((equal? (caddr lst) "+")
|
|
#`(+ #,inner-defn)))))
|
|
|
|
;; Parse a primary.
|
|
(define (peg-primary->defn lst for-syntax)
|
|
(let ((el (cadr lst)))
|
|
(cond
|
|
((list? el)
|
|
(cond
|
|
((eq? (car el) 'peg-literal)
|
|
(peg-literal->defn el for-syntax))
|
|
((eq? (car el) 'peg-charclass)
|
|
(peg-charclass->defn el for-syntax))
|
|
((eq? (car el) 'peg-nonterminal)
|
|
(datum->syntax for-syntax (string->symbol (cadr el))))))
|
|
((string? el)
|
|
(cond
|
|
((equal? el "(")
|
|
(peg-pattern->defn (caddr lst) for-syntax))
|
|
((equal? el ".")
|
|
(datum->syntax for-syntax 'peg-any))
|
|
(else (datum->syntax for-syntax
|
|
`(peg-parse-any unknown-string ,lst)))))
|
|
(else (datum->syntax for-syntax
|
|
`(peg-parse-any unknown-el ,lst))))))
|
|
|
|
;; Trims characters off the front and end of STR.
|
|
;; (trim-1chars "'ab'") -> "ab"
|
|
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
|
|
|
|
;; Parses a literal.
|
|
(define (peg-literal->defn lst for-syntax)
|
|
(datum->syntax for-syntax (trim-1chars (cadr lst))))
|
|
|
|
;; Parses a charclass.
|
|
(define (peg-charclass->defn lst for-syntax)
|
|
#`(or
|
|
#,@(map
|
|
(lambda (cc)
|
|
(cond
|
|
((eq? (car cc) 'charclass-range)
|
|
#`(range #,(datum->syntax
|
|
for-syntax
|
|
(string-ref (cadr cc) 0))
|
|
#,(datum->syntax
|
|
for-syntax
|
|
(string-ref (cadr cc) 2))))
|
|
((eq? (car cc) 'charclass-single)
|
|
(datum->syntax for-syntax (cadr cc)))))
|
|
(context-flatten
|
|
(lambda (x) (or (eq? (car x) 'charclass-range)
|
|
(eq? (car x) 'charclass-single)))
|
|
(cdr lst)))))
|
|
|
|
;; Compresses a list to save the optimizer work.
|
|
;; e.g. (or (and a)) -> a
|
|
(define (compressor-core lst)
|
|
(if (or (not (list? lst)) (null? lst))
|
|
lst
|
|
(cond
|
|
((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
|
|
(null? (cddr lst)))
|
|
(compressor-core (cadr lst)))
|
|
((and (eq? (car lst) 'body)
|
|
(eq? (cadr lst) 'lit)
|
|
(eq? (cadddr lst) 1))
|
|
(compressor-core (caddr lst)))
|
|
(else (map compressor-core lst)))))
|
|
|
|
(define (compressor syn for-syntax)
|
|
(datum->syntax for-syntax
|
|
(compressor-core (syntax->datum syn))))
|
|
|
|
;; Builds a lambda-expressions for the pattern STR using accum.
|
|
(define (peg-string-compile args accum)
|
|
(syntax-case args ()
|
|
((str-stx) (string? (syntax->datum #'str-stx))
|
|
(let ((string (syntax->datum #'str-stx)))
|
|
(compile-peg-pattern
|
|
(compressor
|
|
(peg-pattern->defn
|
|
(peg:tree (match-pattern peg-pattern string)) #'str-stx)
|
|
#'str-stx)
|
|
(if (eq? accum 'all) 'body accum))))
|
|
(else (error "Bad embedded PEG string" args))))
|
|
|
|
(add-peg-compiler! 'peg peg-string-compile)
|
|
|