mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
PEG: Add full support for PEG + some extensions
This commit adds support for PEG as described in: <https://bford.info/pub/lang/peg.pdf> It adds support for the missing features (comments, underscores in identifiers and escaping) while keeping the extensions (dashes in identifiers, < and <--). The naming system tries to be as close as possible to the one proposed in the paper. * module/ice-9/peg/string-peg.scm: Rewrite PEG parser. * test-suite/tests/peg.test: Fix import Signed-off-by: Ludovic Courtès <ludo@gnu.org>
This commit is contained in:
parent
47807c9b11
commit
ff11753df1
4 changed files with 313 additions and 180 deletions
7
NEWS
7
NEWS
|
@ -20,6 +20,13 @@ downright unusable (e.g., <https://bugs.gnu.org/72378>), non-conforming
|
||||||
(e.g., <https://bugs.gnu.org/72383>), or buggy (e.g.,
|
(e.g., <https://bugs.gnu.org/72383>), or buggy (e.g.,
|
||||||
<https://bugs.gnu.org/72372>).
|
<https://bugs.gnu.org/72372>).
|
||||||
|
|
||||||
|
* New interfaces and functionality
|
||||||
|
|
||||||
|
** PEG parser
|
||||||
|
|
||||||
|
PEG grammar parser in (ice-9 peg string-peg) has been rewritten to cover
|
||||||
|
all the functionality defined in <https://bford.info/pub/lang/peg.pdf>.
|
||||||
|
|
||||||
** GOOPS: Introduce new forms method* and define-method*
|
** GOOPS: Introduce new forms method* and define-method*
|
||||||
|
|
||||||
The module (oop goops) now exports method* and define-method* which are
|
The module (oop goops) now exports method* and define-method* which are
|
||||||
|
|
|
@ -17,6 +17,10 @@ Wikipedia has a clear and concise introduction to PEGs if you want to
|
||||||
familiarize yourself with the syntax:
|
familiarize yourself with the syntax:
|
||||||
@url{http://en.wikipedia.org/wiki/Parsing_expression_grammar}.
|
@url{http://en.wikipedia.org/wiki/Parsing_expression_grammar}.
|
||||||
|
|
||||||
|
The paper that introduced PEG contains a more detailed description of how PEG
|
||||||
|
works, and describes its syntax in detail:
|
||||||
|
@url{https://bford.info/pub/lang/peg.pdf}
|
||||||
|
|
||||||
The @code{(ice-9 peg)} module works by compiling PEGs down to lambda
|
The @code{(ice-9 peg)} module works by compiling PEGs down to lambda
|
||||||
expressions. These can either be stored in variables at compile-time by
|
expressions. These can either be stored in variables at compile-time by
|
||||||
the define macros (@code{define-peg-pattern} and
|
the define macros (@code{define-peg-pattern} and
|
||||||
|
@ -216,8 +220,8 @@ should propagate up the parse tree. The normal @code{<-} propagates the
|
||||||
matched text up the parse tree, @code{<--} propagates the matched text
|
matched text up the parse tree, @code{<--} propagates the matched text
|
||||||
up the parse tree tagged with the name of the nonterminal, and @code{<}
|
up the parse tree tagged with the name of the nonterminal, and @code{<}
|
||||||
discards that matched text and propagates nothing up the parse tree.
|
discards that matched text and propagates nothing up the parse tree.
|
||||||
Also, nonterminals may consist of any alphanumeric character or a ``-''
|
Also, nonterminals may include ``-'' character, while in normal PEG it is not
|
||||||
character (in normal PEGs nonterminals can only be alphabetic).
|
allowed.
|
||||||
|
|
||||||
For example, if we:
|
For example, if we:
|
||||||
@lisp
|
@lisp
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
;;;; string-peg.scm --- representing PEG grammars as strings
|
;;;; string-peg.scm --- representing PEG grammars as strings
|
||||||
;;;;
|
;;;;
|
||||||
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
;;;; Copyright (C) 2010, 2011, Free Software Foundation, Inc.
|
||||||
|
;;;; Copyright (C) 2024 Ekaitz Zarraga <ekaitz@elenq.tech>
|
||||||
;;;;
|
;;;;
|
||||||
;;;; This library is free software; you can redistribute it and/or
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
;;;; modify it under the terms of the GNU Lesser General Public
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
@ -21,10 +22,15 @@
|
||||||
#:export (peg-as-peg
|
#:export (peg-as-peg
|
||||||
define-peg-string-patterns
|
define-peg-string-patterns
|
||||||
peg-grammar)
|
peg-grammar)
|
||||||
|
#:use-module (ice-9 match)
|
||||||
#:use-module (ice-9 peg using-parsers)
|
#:use-module (ice-9 peg using-parsers)
|
||||||
|
#:use-module (srfi srfi-1)
|
||||||
#:use-module (ice-9 peg codegen)
|
#:use-module (ice-9 peg codegen)
|
||||||
#:use-module (ice-9 peg simplify-tree))
|
#:use-module (ice-9 peg simplify-tree))
|
||||||
|
|
||||||
|
;; This module provides support for PEG as described in:
|
||||||
|
;; <https://bford.info/pub/lang/peg.pdf>
|
||||||
|
|
||||||
;; Gets the left-hand depth of a list.
|
;; Gets the left-hand depth of a list.
|
||||||
(define (depth lst)
|
(define (depth lst)
|
||||||
(if (or (not (list? lst)) (null? lst))
|
(if (or (not (list? lst)) (null? lst))
|
||||||
|
@ -38,22 +44,58 @@
|
||||||
|
|
||||||
;; Grammar for PEGs in PEG grammar.
|
;; Grammar for PEGs in PEG grammar.
|
||||||
(define peg-as-peg
|
(define peg-as-peg
|
||||||
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
|
"# Hierarchical syntax
|
||||||
pattern <-- alternative (SLASH sp alternative)*
|
Grammar <-- Spacing Definition+ EndOfFile
|
||||||
alternative <-- ([!&]? sp suffix)+
|
Definition <-- Identifier LEFTARROW Expression
|
||||||
suffix <-- primary ([*+?] sp)*
|
|
||||||
primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
|
Expression <-- Sequence (SLASH Sequence)*
|
||||||
literal <-- ['] (!['] .)* ['] sp
|
Sequence <-- Prefix*
|
||||||
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
|
Prefix <-- (AND / NOT)? Suffix
|
||||||
CCrange <-- . '-' .
|
Suffix <-- Primary (QUESTION / STAR / PLUS)?
|
||||||
CCsingle <-- .
|
Primary <-- Identifier !LEFTARROW
|
||||||
nonterminal <-- [a-zA-Z0-9-]+ sp
|
/ OPEN Expression CLOSE
|
||||||
sp < [ \t\n]*
|
/ Literal / Class / DOT
|
||||||
SLASH < '/'
|
|
||||||
LB < '['
|
# Lexical syntax
|
||||||
RB < ']'
|
Identifier <-- IdentStart IdentCont* Spacing
|
||||||
|
# NOTE: `-` is an extension
|
||||||
|
IdentStart <- [a-zA-Z_] / '-'
|
||||||
|
IdentCont <- IdentStart / [0-9]
|
||||||
|
|
||||||
|
Literal <-- SQUOTE (!SQUOTE Char)* SQUOTE Spacing
|
||||||
|
/ DQUOTE (!DQUOTE Char)* DQUOTE Spacing
|
||||||
|
Class <-- OPENBRACKET !NOTIN (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing
|
||||||
|
Range <-- Char DASH Char / Char
|
||||||
|
Char <-- '\\\\' [nrt'\"\\[\\]\\\\]
|
||||||
|
/ '\\\\' [0-7][0-7][0-7]
|
||||||
|
/ '\\\\' [0-7][0-7]?
|
||||||
|
/ !'\\\\' .
|
||||||
|
|
||||||
|
# NOTE: `<--` and `<` are extensions
|
||||||
|
LEFTARROW <- ('<--' / '<-' / '<') Spacing
|
||||||
|
SQUOTE < [']
|
||||||
|
DQUOTE < [\"]
|
||||||
|
DASH < '-'
|
||||||
|
OPENBRACKET < '['
|
||||||
|
CLOSEBRACKET < ']'
|
||||||
|
SLASH < '/' Spacing
|
||||||
|
AND <-- '&' Spacing
|
||||||
|
NOT <-- '!' Spacing
|
||||||
|
QUESTION <-- '?' Spacing
|
||||||
|
STAR <-- '*' Spacing
|
||||||
|
PLUS <-- '+' Spacing
|
||||||
|
OPEN < '(' Spacing
|
||||||
|
CLOSE < ')' Spacing
|
||||||
|
DOT <-- '.' Spacing
|
||||||
|
|
||||||
|
Spacing < (Space / Comment)*
|
||||||
|
Comment < '#' (!EndOfLine .)* EndOfLine
|
||||||
|
Space < ' ' / '\t' / EndOfLine
|
||||||
|
EndOfLine < '\\r\\n' / '\\n' / '\\r'
|
||||||
|
EndOfFile < !.
|
||||||
")
|
")
|
||||||
|
|
||||||
|
|
||||||
(define-syntax define-sexp-parser
|
(define-syntax define-sexp-parser
|
||||||
(lambda (x)
|
(lambda (x)
|
||||||
(syntax-case x ()
|
(syntax-case x ()
|
||||||
|
@ -63,35 +105,78 @@ RB < ']'
|
||||||
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||||
#`(define sym #,syn))))))
|
#`(define sym #,syn))))))
|
||||||
|
|
||||||
(define-sexp-parser peg-grammar all
|
(define-sexp-parser Grammar all
|
||||||
(+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
|
(and Spacing (+ Definition) EndOfFile))
|
||||||
(define-sexp-parser peg-pattern all
|
(define-sexp-parser Definition all
|
||||||
(and peg-alternative
|
(and Identifier LEFTARROW Expression))
|
||||||
(* (and (ignore "/") peg-sp peg-alternative))))
|
(define-sexp-parser Expression all
|
||||||
(define-sexp-parser peg-alternative all
|
(and Sequence (* (and SLASH Sequence))))
|
||||||
(+ (and (? (or "!" "&")) peg-sp peg-suffix)))
|
(define-sexp-parser Sequence all
|
||||||
(define-sexp-parser peg-suffix all
|
(* Prefix))
|
||||||
(and peg-primary (* (and (or "*" "+" "?") peg-sp))))
|
(define-sexp-parser Prefix all
|
||||||
(define-sexp-parser peg-primary all
|
(and (? (or AND NOT)) Suffix))
|
||||||
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
(define-sexp-parser Suffix all
|
||||||
(and "." peg-sp)
|
(and Primary (? (or QUESTION STAR PLUS))))
|
||||||
peg-literal
|
(define-sexp-parser Primary all
|
||||||
peg-charclass
|
(or (and Identifier (not-followed-by LEFTARROW))
|
||||||
(and peg-nonterminal (not-followed-by "<"))))
|
(and OPEN Expression CLOSE)
|
||||||
(define-sexp-parser peg-literal all
|
Literal
|
||||||
(and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
|
Class
|
||||||
(define-sexp-parser peg-charclass all
|
DOT))
|
||||||
(and (ignore "[")
|
(define-sexp-parser Identifier all
|
||||||
(* (and (not-followed-by "]")
|
(and IdentStart (* IdentCont) Spacing))
|
||||||
(or charclass-range charclass-single)))
|
(define-sexp-parser IdentStart body
|
||||||
(ignore "]")
|
(or (or (range #\a #\z) (range #\A #\Z) "_") "-")) ; NOTE: - is an extension
|
||||||
peg-sp))
|
(define-sexp-parser IdentCont body
|
||||||
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
|
(or IdentStart (range #\0 #\9)))
|
||||||
(define-sexp-parser charclass-single all peg-any)
|
(define-sexp-parser Literal all
|
||||||
(define-sexp-parser peg-nonterminal all
|
(or (and SQUOTE (* (and (not-followed-by SQUOTE) Char)) SQUOTE Spacing)
|
||||||
(and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
|
(and DQUOTE (* (and (not-followed-by DQUOTE) Char)) DQUOTE Spacing)))
|
||||||
(define-sexp-parser peg-sp none
|
(define-sexp-parser Class all
|
||||||
(* (or " " "\t" "\n")))
|
(and OPENBRACKET (* (and (not-followed-by CLOSEBRACKET) Range)) CLOSEBRACKET Spacing))
|
||||||
|
(define-sexp-parser Range all
|
||||||
|
(or (and Char DASH Char) Char))
|
||||||
|
(define-sexp-parser Char all
|
||||||
|
(or (and "\\" (or "n" "r" "t" "'" "\"" "[" "]" "\\"))
|
||||||
|
(and "\\" (range #\0 #\7) (range #\0 #\7) (range #\0 #\7))
|
||||||
|
(and "\\" (range #\0 #\7) (? (range #\0 #\7)))
|
||||||
|
(and (not-followed-by "\\") peg-any)))
|
||||||
|
(define-sexp-parser LEFTARROW body
|
||||||
|
(and (or "<--" "<-" "<") Spacing)) ; NOTE: <-- and < are extensions
|
||||||
|
(define-sexp-parser SLASH none
|
||||||
|
(and "/" Spacing))
|
||||||
|
(define-sexp-parser AND all
|
||||||
|
(and "&" Spacing))
|
||||||
|
(define-sexp-parser NOT all
|
||||||
|
(and "!" Spacing))
|
||||||
|
(define-sexp-parser QUESTION all
|
||||||
|
(and "?" Spacing))
|
||||||
|
(define-sexp-parser STAR all
|
||||||
|
(and "*" Spacing))
|
||||||
|
(define-sexp-parser PLUS all
|
||||||
|
(and "+" Spacing))
|
||||||
|
(define-sexp-parser OPEN none
|
||||||
|
(and "(" Spacing))
|
||||||
|
(define-sexp-parser CLOSE none
|
||||||
|
(and ")" Spacing))
|
||||||
|
(define-sexp-parser DOT all
|
||||||
|
(and "." Spacing))
|
||||||
|
(define-sexp-parser SQUOTE none "'")
|
||||||
|
(define-sexp-parser DQUOTE none "\"")
|
||||||
|
(define-sexp-parser OPENBRACKET none "[")
|
||||||
|
(define-sexp-parser CLOSEBRACKET none "]")
|
||||||
|
(define-sexp-parser DASH none "-")
|
||||||
|
(define-sexp-parser Spacing none
|
||||||
|
(* (or Space Comment)))
|
||||||
|
(define-sexp-parser Comment none
|
||||||
|
(and "#" (* (and (not-followed-by EndOfLine) peg-any)) EndOfLine))
|
||||||
|
(define-sexp-parser Space none
|
||||||
|
(or " " "\t" EndOfLine))
|
||||||
|
(define-sexp-parser EndOfLine none
|
||||||
|
(or "\r\n" "\n" "\r"))
|
||||||
|
(define-sexp-parser EndOfFile none
|
||||||
|
(not-followed-by peg-any))
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;;;; PARSE STRING PEGS
|
;;;;; PARSE STRING PEGS
|
||||||
|
@ -101,7 +186,7 @@ RB < ']'
|
||||||
;; will define all of the nonterminals in the grammar with equivalent
|
;; will define all of the nonterminals in the grammar with equivalent
|
||||||
;; PEG s-expressions.
|
;; PEG s-expressions.
|
||||||
(define (peg-parser str for-syntax)
|
(define (peg-parser str for-syntax)
|
||||||
(let ((parsed (match-pattern peg-grammar str)))
|
(let ((parsed (match-pattern Grammar str)))
|
||||||
(if (not parsed)
|
(if (not parsed)
|
||||||
(begin
|
(begin
|
||||||
;; (display "Invalid PEG grammar!\n")
|
;; (display "Invalid PEG grammar!\n")
|
||||||
|
@ -110,11 +195,154 @@ RB < ']'
|
||||||
(cond
|
(cond
|
||||||
((or (not (list? lst)) (null? lst))
|
((or (not (list? lst)) (null? lst))
|
||||||
lst)
|
lst)
|
||||||
((eq? (car lst) 'peg-grammar)
|
((eq? (car lst) 'Grammar)
|
||||||
#`(begin
|
(Grammar->defn lst for-syntax)))))))
|
||||||
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
|
|
||||||
(context-flatten (lambda (lst) (<= (depth lst) 2))
|
;; (Grammar (Definition ...) (Definition ...))
|
||||||
(cdr lst))))))))))
|
(define (Grammar->defn lst for-syntax)
|
||||||
|
#`(begin
|
||||||
|
#,@(map (lambda (x) (Definition->defn x for-syntax))
|
||||||
|
(context-flatten (lambda (lst) (<= (depth lst) 1))
|
||||||
|
(cdr lst)))))
|
||||||
|
|
||||||
|
;; (Definition (Identifier "Something") "<-" (Expression ...))
|
||||||
|
;; `-> (define-peg-pattern Something 'all ...)
|
||||||
|
(define (Definition->defn lst for-syntax)
|
||||||
|
(match lst
|
||||||
|
(('Definition ('Identifier identifier) grabber expression)
|
||||||
|
#`(define-peg-pattern
|
||||||
|
#,(datum->syntax for-syntax (string->symbol identifier))
|
||||||
|
#,(match grabber
|
||||||
|
("<--" (datum->syntax for-syntax 'all))
|
||||||
|
("<-" (datum->syntax for-syntax 'body))
|
||||||
|
("<" (datum->syntax for-syntax 'none)))
|
||||||
|
#,(compressor
|
||||||
|
(Expression->defn expression for-syntax)
|
||||||
|
for-syntax)))))
|
||||||
|
|
||||||
|
;; (Expression X)
|
||||||
|
;; `-> (or X)
|
||||||
|
;; (Expression X Y)
|
||||||
|
;; `-> (or X Y)
|
||||||
|
;; (Expression X (Y Z ...))
|
||||||
|
;; `-> (or X Y Z ...)
|
||||||
|
(define (Expression->defn lst for-syntax)
|
||||||
|
(match lst
|
||||||
|
(('Expression seq ...)
|
||||||
|
#`(or #,@(map (lambda (x) (Sequence->defn x for-syntax))
|
||||||
|
(keyword-flatten '(Sequence) seq))))))
|
||||||
|
|
||||||
|
;; (Sequence X)
|
||||||
|
;; `-> (and X)
|
||||||
|
;; (Sequence X Y)
|
||||||
|
;; `-> (and X Y)
|
||||||
|
;; (Sequence X (Y Z ...))
|
||||||
|
;; `-> (and X Y Z ...)
|
||||||
|
(define (Sequence->defn lst for-syntax)
|
||||||
|
(match lst
|
||||||
|
(('Sequence pre ...)
|
||||||
|
#`(and #,@(map (lambda (x) (Prefix->defn x for-syntax))
|
||||||
|
(keyword-flatten '(Prefix) pre))))))
|
||||||
|
|
||||||
|
;; (Prefix (Suffix ...))
|
||||||
|
;; `-> (...)
|
||||||
|
;; (Prefix (NOT "!") (Suffix ...))
|
||||||
|
;; `-> (not-followed-by ...)
|
||||||
|
;; (Prefix (AND "&") (Suffix ...))
|
||||||
|
;; `-> (followed-by ...)
|
||||||
|
(define (Prefix->defn lst for-syntax)
|
||||||
|
(match lst
|
||||||
|
(('Prefix ('AND _) su) #`(followed-by #,(Suffix->defn su for-syntax)))
|
||||||
|
(('Prefix ('NOT _) su) #`(not-followed-by #,(Suffix->defn su for-syntax)))
|
||||||
|
(('Prefix suffix) (Suffix->defn suffix for-syntax))))
|
||||||
|
|
||||||
|
;; (Suffix (Primary ...))
|
||||||
|
;; `-> (...)
|
||||||
|
;; (Suffix (Primary ...) (STAR "*"))
|
||||||
|
;; `-> (* ...)
|
||||||
|
;; (Suffix (Primary ...) (QUESTION "?"))
|
||||||
|
;; `-> (? ...)
|
||||||
|
;; (Suffix (Primary ...) (PLUS "+"))
|
||||||
|
;; `-> (+ ...)
|
||||||
|
(define (Suffix->defn lst for-syntax)
|
||||||
|
(match lst
|
||||||
|
(('Suffix prim) (Primary->defn prim for-syntax))
|
||||||
|
(('Suffix prim ('STAR _)) #`(* #,(Primary->defn prim for-syntax)))
|
||||||
|
(('Suffix prim ('QUESTION _)) #`(? #,(Primary->defn prim for-syntax)))
|
||||||
|
(('Suffix prim ('PLUS _)) #`(+ #,(Primary->defn prim for-syntax)))))
|
||||||
|
|
||||||
|
|
||||||
|
(define (Primary->defn lst for-syntax)
|
||||||
|
(let ((value (second lst)))
|
||||||
|
(match (car value)
|
||||||
|
('DOT #'peg-any)
|
||||||
|
('Identifier (Identifier->defn value for-syntax))
|
||||||
|
('Expression (Expression->defn value for-syntax))
|
||||||
|
('Literal (Literal->defn value for-syntax))
|
||||||
|
('Class (Class->defn value for-syntax)))))
|
||||||
|
|
||||||
|
;; (Identifier "hello")
|
||||||
|
;; `-> hello
|
||||||
|
(define (Identifier->defn lst for-syntax)
|
||||||
|
(datum->syntax for-syntax (string->symbol (second lst))))
|
||||||
|
|
||||||
|
;; (Literal (Char "a") (Char "b") (Char "c"))
|
||||||
|
;; `-> "abc"
|
||||||
|
(define (Literal->defn lst for-syntax)
|
||||||
|
(apply string (map (lambda (x) (Char->defn x for-syntax)) (cdr lst))))
|
||||||
|
|
||||||
|
;; TODO: empty Class can happen: `[]`, but what does it represent?
|
||||||
|
;; (Class ...)
|
||||||
|
;; `-> (or ...)
|
||||||
|
(define (Class->defn lst for-syntax)
|
||||||
|
#`(or #,@(map (lambda (x) (Range->defn x for-syntax))
|
||||||
|
(cdr lst))))
|
||||||
|
|
||||||
|
;; For one character:
|
||||||
|
;; (Range (Char "a"))
|
||||||
|
;; `-> "a"
|
||||||
|
;; Or for a range:
|
||||||
|
;; (Range (Char "a") (Char "b"))
|
||||||
|
;; `-> (range #\a #\b)
|
||||||
|
(define (Range->defn lst for-syntax)
|
||||||
|
(match lst
|
||||||
|
(('Range ch)
|
||||||
|
(string (Char->defn ch for-syntax)))
|
||||||
|
(('Range range-beginning range-end)
|
||||||
|
#`(range
|
||||||
|
#,(Char->defn range-beginning for-syntax)
|
||||||
|
#,(Char->defn range-end for-syntax)))))
|
||||||
|
|
||||||
|
;; (Char "a")
|
||||||
|
;; `-> #\a
|
||||||
|
;; (Char "\\n")
|
||||||
|
;; `-> #\newline
|
||||||
|
;; (Char "\\135")
|
||||||
|
;; `-> #\]
|
||||||
|
(define (Char->defn lst for-syntax)
|
||||||
|
(let* ((charstr (second lst))
|
||||||
|
(first (string-ref charstr 0)))
|
||||||
|
(cond
|
||||||
|
((= 1 (string-length charstr)) first)
|
||||||
|
((char-numeric? (string-ref charstr 1))
|
||||||
|
(integer->char
|
||||||
|
(reduce + 0
|
||||||
|
(map
|
||||||
|
(lambda (x y)
|
||||||
|
(* (- (char->integer x) (char->integer #\0)) y))
|
||||||
|
(reverse (string->list charstr 1))
|
||||||
|
'(1 8 64)))))
|
||||||
|
(else
|
||||||
|
(case (string-ref charstr 1)
|
||||||
|
((#\n) #\newline)
|
||||||
|
((#\r) #\return)
|
||||||
|
((#\t) #\tab)
|
||||||
|
((#\') #\')
|
||||||
|
((#\]) #\])
|
||||||
|
((#\\) #\\)
|
||||||
|
((#\[) #\[))))))
|
||||||
|
|
||||||
|
(define peg-grammar Grammar)
|
||||||
|
|
||||||
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
|
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
|
||||||
;; defines all the appropriate nonterminals.
|
;; defines all the appropriate nonterminals.
|
||||||
|
@ -124,119 +352,6 @@ RB < ']'
|
||||||
((_ str)
|
((_ str)
|
||||||
(peg-parser (syntax->datum #'str) x)))))
|
(peg-parser (syntax->datum #'str) x)))))
|
||||||
|
|
||||||
;; lst has format (nonterm grabber pattern), where
|
|
||||||
;; nonterm is a symbol (the name of the nonterminal),
|
|
||||||
;; grabber is a string (either "<", "<-" or "<--"), and
|
|
||||||
;; pattern is the parse of a PEG pattern expressed as as string.
|
|
||||||
(define (peg-nonterm->defn lst for-syntax)
|
|
||||||
(let* ((nonterm (car lst))
|
|
||||||
(grabber (cadr lst))
|
|
||||||
(pattern (caddr lst))
|
|
||||||
(nonterm-name (datum->syntax for-syntax
|
|
||||||
(string->symbol (cadr nonterm)))))
|
|
||||||
#`(define-peg-pattern #,nonterm-name
|
|
||||||
#,(cond
|
|
||||||
((string=? grabber "<--") (datum->syntax for-syntax 'all))
|
|
||||||
((string=? grabber "<-") (datum->syntax for-syntax 'body))
|
|
||||||
(else (datum->syntax for-syntax 'none)))
|
|
||||||
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
|
|
||||||
|
|
||||||
;; lst has format ('peg-pattern ...).
|
|
||||||
;; After the context-flatten, (cdr lst) has format
|
|
||||||
;; (('peg-alternative ...) ...), where the outer list is a collection
|
|
||||||
;; of elements from a '/' alternative.
|
|
||||||
(define (peg-pattern->defn lst for-syntax)
|
|
||||||
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
|
|
||||||
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
|
|
||||||
(cdr lst)))))
|
|
||||||
|
|
||||||
;; lst has format ('peg-alternative ...).
|
|
||||||
;; After the context-flatten, (cdr lst) has the format
|
|
||||||
;; (item ...), where each item has format either ("!" ...), ("&" ...),
|
|
||||||
;; or ('peg-suffix ...).
|
|
||||||
(define (peg-alternative->defn lst for-syntax)
|
|
||||||
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
|
|
||||||
(context-flatten (lambda (x) (or (string? (car x))
|
|
||||||
(eq? (car x) 'peg-suffix)))
|
|
||||||
(cdr lst)))))
|
|
||||||
|
|
||||||
;; lst has the format either
|
|
||||||
;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
|
|
||||||
;; ('peg-suffix ...).
|
|
||||||
(define (peg-body->defn lst for-syntax)
|
|
||||||
(cond
|
|
||||||
((equal? (car lst) "&")
|
|
||||||
#`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
|
||||||
((equal? (car lst) "!")
|
|
||||||
#`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
|
||||||
((eq? (car lst) 'peg-suffix)
|
|
||||||
(peg-suffix->defn lst for-syntax))
|
|
||||||
(else `(peg-parse-body-fail ,lst))))
|
|
||||||
|
|
||||||
;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
|
|
||||||
(define (peg-suffix->defn lst for-syntax)
|
|
||||||
(let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
|
|
||||||
(cond
|
|
||||||
((null? (cddr lst))
|
|
||||||
inner-defn)
|
|
||||||
((equal? (caddr lst) "*")
|
|
||||||
#`(* #,inner-defn))
|
|
||||||
((equal? (caddr lst) "?")
|
|
||||||
#`(? #,inner-defn))
|
|
||||||
((equal? (caddr lst) "+")
|
|
||||||
#`(+ #,inner-defn)))))
|
|
||||||
|
|
||||||
;; Parse a primary.
|
|
||||||
(define (peg-primary->defn lst for-syntax)
|
|
||||||
(let ((el (cadr lst)))
|
|
||||||
(cond
|
|
||||||
((list? el)
|
|
||||||
(cond
|
|
||||||
((eq? (car el) 'peg-literal)
|
|
||||||
(peg-literal->defn el for-syntax))
|
|
||||||
((eq? (car el) 'peg-charclass)
|
|
||||||
(peg-charclass->defn el for-syntax))
|
|
||||||
((eq? (car el) 'peg-nonterminal)
|
|
||||||
(datum->syntax for-syntax (string->symbol (cadr el))))))
|
|
||||||
((string? el)
|
|
||||||
(cond
|
|
||||||
((equal? el "(")
|
|
||||||
(peg-pattern->defn (caddr lst) for-syntax))
|
|
||||||
((equal? el ".")
|
|
||||||
(datum->syntax for-syntax 'peg-any))
|
|
||||||
(else (datum->syntax for-syntax
|
|
||||||
`(peg-parse-any unknown-string ,lst)))))
|
|
||||||
(else (datum->syntax for-syntax
|
|
||||||
`(peg-parse-any unknown-el ,lst))))))
|
|
||||||
|
|
||||||
;; Trims characters off the front and end of STR.
|
|
||||||
;; (trim-1chars "'ab'") -> "ab"
|
|
||||||
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
|
|
||||||
|
|
||||||
;; Parses a literal.
|
|
||||||
(define (peg-literal->defn lst for-syntax)
|
|
||||||
(datum->syntax for-syntax (trim-1chars (cadr lst))))
|
|
||||||
|
|
||||||
;; Parses a charclass.
|
|
||||||
(define (peg-charclass->defn lst for-syntax)
|
|
||||||
#`(or
|
|
||||||
#,@(map
|
|
||||||
(lambda (cc)
|
|
||||||
(cond
|
|
||||||
((eq? (car cc) 'charclass-range)
|
|
||||||
#`(range #,(datum->syntax
|
|
||||||
for-syntax
|
|
||||||
(string-ref (cadr cc) 0))
|
|
||||||
#,(datum->syntax
|
|
||||||
for-syntax
|
|
||||||
(string-ref (cadr cc) 2))))
|
|
||||||
((eq? (car cc) 'charclass-single)
|
|
||||||
(datum->syntax for-syntax (cadr cc)))))
|
|
||||||
(context-flatten
|
|
||||||
(lambda (x) (or (eq? (car x) 'charclass-range)
|
|
||||||
(eq? (car x) 'charclass-single)))
|
|
||||||
(cdr lst)))))
|
|
||||||
|
|
||||||
;; Compresses a list to save the optimizer work.
|
;; Compresses a list to save the optimizer work.
|
||||||
;; e.g. (or (and a)) -> a
|
;; e.g. (or (and a)) -> a
|
||||||
(define (compressor-core lst)
|
(define (compressor-core lst)
|
||||||
|
@ -263,11 +378,10 @@ RB < ']'
|
||||||
(let ((string (syntax->datum #'str-stx)))
|
(let ((string (syntax->datum #'str-stx)))
|
||||||
(compile-peg-pattern
|
(compile-peg-pattern
|
||||||
(compressor
|
(compressor
|
||||||
(peg-pattern->defn
|
(Expression->defn
|
||||||
(peg:tree (match-pattern peg-pattern string)) #'str-stx)
|
(peg:tree (match-pattern Expression string)) #'str-stx)
|
||||||
#'str-stx)
|
#'str-stx)
|
||||||
(if (eq? accum 'all) 'body accum))))
|
(if (eq? accum 'all) 'body accum))))
|
||||||
(else (error "Bad embedded PEG string" args))))
|
(else (error "Bad embedded PEG string" args))))
|
||||||
|
|
||||||
(add-peg-compiler! 'peg peg-string-compile)
|
(add-peg-compiler! 'peg peg-string-compile)
|
||||||
|
|
||||||
|
|
|
@ -28,17 +28,25 @@
|
||||||
;; the nonterminals defined in the PEG parser written with
|
;; the nonterminals defined in the PEG parser written with
|
||||||
;; S-expressions.
|
;; S-expressions.
|
||||||
(define grammar-mapping
|
(define grammar-mapping
|
||||||
'((grammar peg-grammar)
|
'((Grammar Grammar)
|
||||||
(pattern peg-pattern)
|
(Definition Definition)
|
||||||
(alternative peg-alternative)
|
(Expression Expression)
|
||||||
(suffix peg-suffix)
|
(Sequence Sequence)
|
||||||
(primary peg-primary)
|
(Prefix Prefix)
|
||||||
(literal peg-literal)
|
(Suffix Suffix)
|
||||||
(charclass peg-charclass)
|
(Primary Primary)
|
||||||
(CCrange charclass-range)
|
(Identifier Identifier)
|
||||||
(CCsingle charclass-single)
|
(Literal Literal)
|
||||||
(nonterminal peg-nonterminal)
|
(Class Class)
|
||||||
(sp peg-sp)))
|
(Range Range)
|
||||||
|
(Char Char)
|
||||||
|
(LEFTARROW LEFTARROW)
|
||||||
|
(AND AND)
|
||||||
|
(NOT NOT)
|
||||||
|
(QUESTION QUESTION)
|
||||||
|
(STAR STAR)
|
||||||
|
(PLUS PLUS)
|
||||||
|
(DOT DOT)))
|
||||||
|
|
||||||
;; Transforms the nonterminals defined in the PEG parser written as a PEG to the nonterminals defined in the PEG parser written with S-expressions.
|
;; Transforms the nonterminals defined in the PEG parser written as a PEG to the nonterminals defined in the PEG parser written with S-expressions.
|
||||||
(define (grammar-transform x)
|
(define (grammar-transform x)
|
||||||
|
@ -69,7 +77,7 @@
|
||||||
(peg:tree (match-pattern (@@ (ice-9 peg) peg-grammar) (@@ (ice-9 peg) peg-as-peg)))
|
(peg:tree (match-pattern (@@ (ice-9 peg) peg-grammar) (@@ (ice-9 peg) peg-as-peg)))
|
||||||
(tree-map
|
(tree-map
|
||||||
grammar-transform
|
grammar-transform
|
||||||
(peg:tree (match-pattern grammar (@@ (ice-9 peg) peg-as-peg)))))))
|
(peg:tree (match-pattern Grammar (@@ (ice-9 peg) peg-as-peg)))))))
|
||||||
|
|
||||||
;; A grammar for pascal-style comments from Wikipedia.
|
;; A grammar for pascal-style comments from Wikipedia.
|
||||||
(define comment-grammar
|
(define comment-grammar
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue