1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-07-02 23:50:47 +02:00
guile/module/ice-9/peg/string-peg.scm
Noah Lavine 0afaf59982 Move define-nonterm
* module/ice-9/peg/string-peg.scm: remove define-nonterm and make a simpler
   macro called `define-sexp-parser' to make the PEG grammar
* module/ice-9/peg.scm: move define-nonterm macro to this file
* module/ice-9/peg/codegen.scm: move code to wrap a parser result nicely to
   this file, under name `wrap-parser-for-users'
2013-01-16 10:11:44 +01:00

258 lines
9.2 KiB
Scheme

;;;; string-peg.scm --- representing PEG grammars as strings
;;;;
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;;
(define-module (ice-9 peg string-peg)
#:export (peg-string-compile
peg-as-peg
define-grammar
define-grammar-f
peg-grammar)
#:use-module (ice-9 peg)
#:use-module (ice-9 peg codegen)
#:use-module (ice-9 peg match-record)
#:use-module (ice-9 peg simplify-tree))
;; Gets the left-hand depth of a list.
(define (depth lst)
(if (or (not (list? lst)) (null? lst))
0
(+ 1 (depth (car lst)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; Parse string PEGs using sexp PEGs.
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Grammar for PEGs in PEG grammar.
(define peg-as-peg
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
pattern <-- alternative (SLASH sp alternative)*
alternative <-- ([!&]? sp suffix)+
suffix <-- primary ([*+?] sp)*
primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
literal <-- ['] (!['] .)* ['] sp
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
CCrange <-- . '-' .
CCsingle <-- .
nonterminal <-- [a-zA-Z0-9-]+ sp
sp < [ \t\n]*
SLASH < '/'
LB < '['
RB < ']'
")
(define-syntax define-sexp-parser
(lambda (x)
(syntax-case x ()
((_ sym accum pat)
(let* ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
(accumsym (syntax->datum #'accum))
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
#`(define sym #,syn))))))
(define-sexp-parser peg-grammar all
(body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
(define-sexp-parser peg-pattern all
(and peg-alternative
(body lit (and (ignore "/") peg-sp peg-alternative) *)))
(define-sexp-parser peg-alternative all
(body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
(define-sexp-parser peg-suffix all
(and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
(define-sexp-parser peg-primary all
(or (and "(" peg-sp peg-pattern ")" peg-sp)
(and "." peg-sp)
peg-literal
peg-charclass
(and peg-nonterminal (body ! "<" 1))))
(define-sexp-parser peg-literal all
(and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
(define-sexp-parser peg-charclass all
(and (ignore "[")
(body lit (and (body ! "]" 1)
(or charclass-range charclass-single)) *)
(ignore "]")
peg-sp))
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
(define-sexp-parser charclass-single all peg-any)
(define-sexp-parser peg-nonterminal all
(and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
(define-sexp-parser peg-sp none
(body lit (or " " "\t" "\n") *))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; PARSE STRING PEGS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pakes a string representing a PEG grammar and defines all the nonterminals in
;; it as the associated PEGs.
(define (peg-parser str for-syntax)
(let ((parsed (peg-parse peg-grammar str)))
(if (not parsed)
(begin
;; (display "Invalid PEG grammar!\n")
#f)
(let ((lst (peg:tree parsed)))
(cond
((or (not (list? lst)) (null? lst))
lst)
((eq? (car lst) 'peg-grammar)
#`(begin
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
(context-flatten (lambda (lst) (<= (depth lst) 2))
(cdr lst))))))))))
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
;; defines all the appropriate nonterminals.
(define-syntax define-grammar
(lambda (x)
(syntax-case x ()
((_ str)
(peg-parser (syntax->datum #'str) x)))))
(define define-grammar-f peg-parser)
;; Parse a nonterminal and pattern listed in LST.
(define (peg-nonterm->defn lst for-syntax)
(let* ((nonterm (car lst))
(grabber (cadr lst))
(pattern (caddr lst))
(nonterm-name (datum->syntax for-syntax
(string->symbol (cadr nonterm)))))
#`(define-nonterm #,nonterm-name
#,(cond
((string=? grabber "<--") (datum->syntax for-syntax 'all))
((string=? grabber "<-") (datum->syntax for-syntax 'body))
(else (datum->syntax for-syntax 'none)))
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
;; Parse a pattern.
(define (peg-pattern->defn lst for-syntax)
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
(cdr lst)))))
;; Parse an alternative.
(define (peg-alternative->defn lst for-syntax)
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
(context-flatten (lambda (x) (or (string? (car x))
(eq? (car x) 'peg-suffix)))
(cdr lst)))))
;; Parse a body.
(define (peg-body->defn lst for-syntax)
(let ((suffix '())
(front (datum->syntax for-syntax 'lit)))
(cond
((eq? (car lst) 'peg-suffix)
(set! suffix lst))
((string? (car lst))
(begin (set! front (datum->syntax for-syntax
(string->symbol (car lst))))
(set! suffix (cadr lst))))
(else `(peg-parse-body-fail ,lst)))
#`(body #,front #,@(peg-suffix->defn
suffix
for-syntax))))
;; Parse a suffix.
(define (peg-suffix->defn lst for-syntax)
#`(#,(peg-primary->defn (cadr lst) for-syntax)
#,(if (null? (cddr lst))
1
(datum->syntax for-syntax (string->symbol (caddr lst))))))
;; Parse a primary.
(define (peg-primary->defn lst for-syntax)
(let ((el (cadr lst)))
(cond
((list? el)
(cond
((eq? (car el) 'peg-literal)
(peg-literal->defn el for-syntax))
((eq? (car el) 'peg-charclass)
(peg-charclass->defn el for-syntax))
((eq? (car el) 'peg-nonterminal)
(datum->syntax for-syntax (string->symbol (cadr el))))))
((string? el)
(cond
((equal? el "(")
(peg-pattern->defn (caddr lst) for-syntax))
((equal? el ".")
(datum->syntax for-syntax 'peg-any))
(else (datum->syntax for-syntax
`(peg-parse-any unknown-string ,lst)))))
(else (datum->syntax for-syntax
`(peg-parse-any unknown-el ,lst))))))
;; Trims characters off the front and end of STR.
;; (trim-1chars "'ab'") -> "ab"
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
;; Parses a literal.
(define (peg-literal->defn lst for-syntax)
(datum->syntax for-syntax (trim-1chars (cadr lst))))
;; Parses a charclass.
(define (peg-charclass->defn lst for-syntax)
#`(or
#,@(map
(lambda (cc)
(cond
((eq? (car cc) 'charclass-range)
#`(range #,(datum->syntax
for-syntax
(string-ref (cadr cc) 0))
#,(datum->syntax
for-syntax
(string-ref (cadr cc) 2))))
((eq? (car cc) 'charclass-single)
(datum->syntax for-syntax (cadr cc)))))
(context-flatten
(lambda (x) (or (eq? (car x) 'charclass-range)
(eq? (car x) 'charclass-single)))
(cdr lst)))))
;; Compresses a list to save the optimizer work.
;; e.g. (or (and a)) -> a
(define (compressor-core lst)
(if (or (not (list? lst)) (null? lst))
lst
(cond
((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
(null? (cddr lst)))
(compressor-core (cadr lst)))
((and (eq? (car lst) 'body)
(eq? (cadr lst) 'lit)
(eq? (cadddr lst) 1))
(compressor-core (caddr lst)))
(else (map compressor-core lst)))))
(define (compressor syn for-syntax)
(datum->syntax for-syntax
(compressor-core (syntax->datum syn))))
;; Builds a lambda-expressions for the pattern STR using accum.
(define (peg-string-compile str-stx accum)
(let ((string (syntax->datum str-stx)))
(peg-sexp-compile
(compressor
(peg-pattern->defn
(peg:tree (peg-parse peg-pattern string)) str-stx)
str-stx)
accum)))