mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-30 06:50:31 +02:00
PEG: Add support for not-in-range
and [^...]
Modern PEG supports inversed class like `[^a-z]` that would get any character not in the `a-z` range. This commit adds support for that and also for a new `not-in-range` PEG pattern for scheme. * module/ice-9/peg/codegen.scm (cg-not-in-range): New function. * module/ice-9/peg/string-peg.scm: Add support for `[^...]` * test-suite/tests/peg.test: Test it. * doc/ref/api-peg.texi: Document accordingly. Signed-off-by: Ludovic Courtès <ludo@gnu.org>
This commit is contained in:
parent
ff11753df1
commit
25504ba216
5 changed files with 73 additions and 4 deletions
3
NEWS
3
NEWS
|
@ -27,6 +27,9 @@ downright unusable (e.g., <https://bugs.gnu.org/72378>), non-conforming
|
||||||
PEG grammar parser in (ice-9 peg string-peg) has been rewritten to cover
|
PEG grammar parser in (ice-9 peg string-peg) has been rewritten to cover
|
||||||
all the functionality defined in <https://bford.info/pub/lang/peg.pdf>.
|
all the functionality defined in <https://bford.info/pub/lang/peg.pdf>.
|
||||||
|
|
||||||
|
The 'not-in-range' pattern was also added to (ice-9 peg); it is
|
||||||
|
available from PEG strings via '[^...]'.
|
||||||
|
|
||||||
** GOOPS: Introduce new forms method* and define-method*
|
** GOOPS: Introduce new forms method* and define-method*
|
||||||
|
|
||||||
The module (oop goops) now exports method* and define-method* which are
|
The module (oop goops) now exports method* and define-method* which are
|
||||||
|
|
|
@ -147,6 +147,14 @@ Parses any character falling between @var{a} and @var{z}.
|
||||||
@code{(range #\a #\z)}
|
@code{(range #\a #\z)}
|
||||||
@end deftp
|
@end deftp
|
||||||
|
|
||||||
|
@deftp {PEG Pattern} {inverse range of characters} a z
|
||||||
|
Parses any character not falling between @var{a} and @var{z}.
|
||||||
|
|
||||||
|
@code{"[^a-z]"}
|
||||||
|
|
||||||
|
@code{(not-in-range #\a #\z)}
|
||||||
|
@end deftp
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
|
|
|
@ -140,6 +140,27 @@ return EXP."
|
||||||
((none) #`(list (1+ pos) '()))
|
((none) #`(list (1+ pos) '()))
|
||||||
(else (error "bad accum" accum))))))))))
|
(else (error "bad accum" accum))))))))))
|
||||||
|
|
||||||
|
;; Generates code for matching a range of characters not between start and end.
|
||||||
|
;; E.g.: (cg-not-in-range syntax #\a #\z 'body)
|
||||||
|
(define (cg-not-in-range pat accum)
|
||||||
|
(syntax-case pat ()
|
||||||
|
((start end)
|
||||||
|
(if (not (and (char? (syntax->datum #'start))
|
||||||
|
(char? (syntax->datum #'end))))
|
||||||
|
(error "range PEG should have characters after it; instead got"
|
||||||
|
#'start #'end))
|
||||||
|
#`(lambda (str len pos)
|
||||||
|
(and (< pos len)
|
||||||
|
(let ((c (string-ref str pos)))
|
||||||
|
(and (or (char<? c start) (char>? c end))
|
||||||
|
#,(case accum
|
||||||
|
((all) #`(list (1+ pos)
|
||||||
|
(list 'cg-not-in-range (string c))))
|
||||||
|
((name) #`(list (1+ pos) 'cg-not-in-range))
|
||||||
|
((body) #`(list (1+ pos) (string c)))
|
||||||
|
((none) #`(list (1+ pos) '()))
|
||||||
|
(else (error "bad accum" accum))))))))))
|
||||||
|
|
||||||
;; Generate code to match a pattern and do nothing with the result
|
;; Generate code to match a pattern and do nothing with the result
|
||||||
(define (cg-ignore pat accum)
|
(define (cg-ignore pat accum)
|
||||||
(syntax-case pat ()
|
(syntax-case pat ()
|
||||||
|
@ -304,6 +325,7 @@ return EXP."
|
||||||
(assq-set! peg-compiler-alist symbol function)))
|
(assq-set! peg-compiler-alist symbol function)))
|
||||||
|
|
||||||
(add-peg-compiler! 'range cg-range)
|
(add-peg-compiler! 'range cg-range)
|
||||||
|
(add-peg-compiler! 'not-in-range cg-not-in-range)
|
||||||
(add-peg-compiler! 'ignore cg-ignore)
|
(add-peg-compiler! 'ignore cg-ignore)
|
||||||
(add-peg-compiler! 'capture cg-capture)
|
(add-peg-compiler! 'capture cg-capture)
|
||||||
(add-peg-compiler! 'and cg-and)
|
(add-peg-compiler! 'and cg-and)
|
||||||
|
|
|
@ -54,7 +54,7 @@ Prefix <-- (AND / NOT)? Suffix
|
||||||
Suffix <-- Primary (QUESTION / STAR / PLUS)?
|
Suffix <-- Primary (QUESTION / STAR / PLUS)?
|
||||||
Primary <-- Identifier !LEFTARROW
|
Primary <-- Identifier !LEFTARROW
|
||||||
/ OPEN Expression CLOSE
|
/ OPEN Expression CLOSE
|
||||||
/ Literal / Class / DOT
|
/ Literal / Class / NotInClass / DOT
|
||||||
|
|
||||||
# Lexical syntax
|
# Lexical syntax
|
||||||
Identifier <-- IdentStart IdentCont* Spacing
|
Identifier <-- IdentStart IdentCont* Spacing
|
||||||
|
@ -64,6 +64,7 @@ IdentCont <- IdentStart / [0-9]
|
||||||
|
|
||||||
Literal <-- SQUOTE (!SQUOTE Char)* SQUOTE Spacing
|
Literal <-- SQUOTE (!SQUOTE Char)* SQUOTE Spacing
|
||||||
/ DQUOTE (!DQUOTE Char)* DQUOTE Spacing
|
/ DQUOTE (!DQUOTE Char)* DQUOTE Spacing
|
||||||
|
NotInClass <-- OPENBRACKET NOTIN (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing
|
||||||
Class <-- OPENBRACKET !NOTIN (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing
|
Class <-- OPENBRACKET !NOTIN (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing
|
||||||
Range <-- Char DASH Char / Char
|
Range <-- Char DASH Char / Char
|
||||||
Char <-- '\\\\' [nrt'\"\\[\\]\\\\]
|
Char <-- '\\\\' [nrt'\"\\[\\]\\\\]
|
||||||
|
@ -78,6 +79,7 @@ DQUOTE < [\"]
|
||||||
DASH < '-'
|
DASH < '-'
|
||||||
OPENBRACKET < '['
|
OPENBRACKET < '['
|
||||||
CLOSEBRACKET < ']'
|
CLOSEBRACKET < ']'
|
||||||
|
NOTIN < '^'
|
||||||
SLASH < '/' Spacing
|
SLASH < '/' Spacing
|
||||||
AND <-- '&' Spacing
|
AND <-- '&' Spacing
|
||||||
NOT <-- '!' Spacing
|
NOT <-- '!' Spacing
|
||||||
|
@ -122,6 +124,7 @@ EndOfFile < !.
|
||||||
(and OPEN Expression CLOSE)
|
(and OPEN Expression CLOSE)
|
||||||
Literal
|
Literal
|
||||||
Class
|
Class
|
||||||
|
NotInClass
|
||||||
DOT))
|
DOT))
|
||||||
(define-sexp-parser Identifier all
|
(define-sexp-parser Identifier all
|
||||||
(and IdentStart (* IdentCont) Spacing))
|
(and IdentStart (* IdentCont) Spacing))
|
||||||
|
@ -133,7 +136,11 @@ EndOfFile < !.
|
||||||
(or (and SQUOTE (* (and (not-followed-by SQUOTE) Char)) SQUOTE Spacing)
|
(or (and SQUOTE (* (and (not-followed-by SQUOTE) Char)) SQUOTE Spacing)
|
||||||
(and DQUOTE (* (and (not-followed-by DQUOTE) Char)) DQUOTE Spacing)))
|
(and DQUOTE (* (and (not-followed-by DQUOTE) Char)) DQUOTE Spacing)))
|
||||||
(define-sexp-parser Class all
|
(define-sexp-parser Class all
|
||||||
(and OPENBRACKET (* (and (not-followed-by CLOSEBRACKET) Range)) CLOSEBRACKET Spacing))
|
(and OPENBRACKET (not-followed-by NOTIN)
|
||||||
|
(* (and (not-followed-by CLOSEBRACKET) Range)) CLOSEBRACKET Spacing))
|
||||||
|
(define-sexp-parser NotInClass all
|
||||||
|
(and OPENBRACKET NOTIN
|
||||||
|
(* (and (not-followed-by CLOSEBRACKET) Range)) CLOSEBRACKET Spacing))
|
||||||
(define-sexp-parser Range all
|
(define-sexp-parser Range all
|
||||||
(or (and Char DASH Char) Char))
|
(or (and Char DASH Char) Char))
|
||||||
(define-sexp-parser Char all
|
(define-sexp-parser Char all
|
||||||
|
@ -143,6 +150,8 @@ EndOfFile < !.
|
||||||
(and (not-followed-by "\\") peg-any)))
|
(and (not-followed-by "\\") peg-any)))
|
||||||
(define-sexp-parser LEFTARROW body
|
(define-sexp-parser LEFTARROW body
|
||||||
(and (or "<--" "<-" "<") Spacing)) ; NOTE: <-- and < are extensions
|
(and (or "<--" "<-" "<") Spacing)) ; NOTE: <-- and < are extensions
|
||||||
|
(define-sexp-parser NOTIN none
|
||||||
|
(and "^"))
|
||||||
(define-sexp-parser SLASH none
|
(define-sexp-parser SLASH none
|
||||||
(and "/" Spacing))
|
(and "/" Spacing))
|
||||||
(define-sexp-parser AND all
|
(define-sexp-parser AND all
|
||||||
|
@ -279,6 +288,7 @@ EndOfFile < !.
|
||||||
('Identifier (Identifier->defn value for-syntax))
|
('Identifier (Identifier->defn value for-syntax))
|
||||||
('Expression (Expression->defn value for-syntax))
|
('Expression (Expression->defn value for-syntax))
|
||||||
('Literal (Literal->defn value for-syntax))
|
('Literal (Literal->defn value for-syntax))
|
||||||
|
('NotInClass (NotInClass->defn value for-syntax))
|
||||||
('Class (Class->defn value for-syntax)))))
|
('Class (Class->defn value for-syntax)))))
|
||||||
|
|
||||||
;; (Identifier "hello")
|
;; (Identifier "hello")
|
||||||
|
@ -291,13 +301,35 @@ EndOfFile < !.
|
||||||
(define (Literal->defn lst for-syntax)
|
(define (Literal->defn lst for-syntax)
|
||||||
(apply string (map (lambda (x) (Char->defn x for-syntax)) (cdr lst))))
|
(apply string (map (lambda (x) (Char->defn x for-syntax)) (cdr lst))))
|
||||||
|
|
||||||
;; TODO: empty Class can happen: `[]`, but what does it represent?
|
;; (NotInClass ...)
|
||||||
|
;; `-> (and ...)
|
||||||
|
(define (NotInClass->defn lst for-syntax)
|
||||||
|
#`(and #,@(map (lambda (x) (NotInRange->defn x for-syntax))
|
||||||
|
(cdr lst))))
|
||||||
|
|
||||||
;; (Class ...)
|
;; (Class ...)
|
||||||
;; `-> (or ...)
|
;; `-> (or ...)
|
||||||
(define (Class->defn lst for-syntax)
|
(define (Class->defn lst for-syntax)
|
||||||
#`(or #,@(map (lambda (x) (Range->defn x for-syntax))
|
#`(or #,@(map (lambda (x) (Range->defn x for-syntax))
|
||||||
(cdr lst))))
|
(cdr lst))))
|
||||||
|
|
||||||
|
;; NOTE: It's coming from NotInClass.
|
||||||
|
;; For one character:
|
||||||
|
;; (Range (Char "a"))
|
||||||
|
;; `-> (not-in-range #\a #\a)
|
||||||
|
;; Or for a range:
|
||||||
|
;; (Range (Char "a") (Char "b"))
|
||||||
|
;; `-> (not-in-range #\a #\b)
|
||||||
|
(define (NotInRange->defn lst for-syntax)
|
||||||
|
(match lst
|
||||||
|
(('Range c)
|
||||||
|
(let ((ch (Char->defn c for-syntax)))
|
||||||
|
#`(not-in-range #,ch #,ch)))
|
||||||
|
(('Range range-beginning range-end)
|
||||||
|
#`(not-in-range
|
||||||
|
#,(Char->defn range-beginning for-syntax)
|
||||||
|
#,(Char->defn range-end for-syntax)))))
|
||||||
|
|
||||||
;; For one character:
|
;; For one character:
|
||||||
;; (Range (Char "a"))
|
;; (Range (Char "a"))
|
||||||
;; `-> "a"
|
;; `-> "a"
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
(Identifier Identifier)
|
(Identifier Identifier)
|
||||||
(Literal Literal)
|
(Literal Literal)
|
||||||
(Class Class)
|
(Class Class)
|
||||||
|
(NotInClass NotInClass)
|
||||||
(Range Range)
|
(Range Range)
|
||||||
(Char Char)
|
(Char Char)
|
||||||
(LEFTARROW LEFTARROW)
|
(LEFTARROW LEFTARROW)
|
||||||
|
@ -85,7 +86,7 @@
|
||||||
End <-- '*)'
|
End <-- '*)'
|
||||||
C <- Begin N* End
|
C <- Begin N* End
|
||||||
N <- C / (!Begin !End Z)
|
N <- C / (!Begin !End Z)
|
||||||
Z <- .")
|
Z <- [^X-Z]") ;; Forbid some characters to test not-in-range
|
||||||
|
|
||||||
;; A short /etc/passwd file.
|
;; A short /etc/passwd file.
|
||||||
(define *etc-passwd*
|
(define *etc-passwd*
|
||||||
|
@ -125,6 +126,9 @@ SLASH < '/'")
|
||||||
(match-pattern C "(*blah*)")
|
(match-pattern C "(*blah*)")
|
||||||
(make-prec 0 8 "(*blah*)"
|
(make-prec 0 8 "(*blah*)"
|
||||||
'((Begin "(*") "blah" (End "*)")))))
|
'((Begin "(*") "blah" (End "*)")))))
|
||||||
|
(pass-if
|
||||||
|
"simple comment with forbidden char"
|
||||||
|
(not (match-pattern C "(*blYh*)")))
|
||||||
(pass-if
|
(pass-if
|
||||||
"simple comment padded"
|
"simple comment padded"
|
||||||
(equal?
|
(equal?
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue