1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00

PEG: string-peg: Better support for escaping.

* module/ice-9/peg/string-peg.scm (peg-as-peg): Augment with rules for
hexadecimal digits, “\uXXX” for characters, “\t” for tabs, etc.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>
This commit is contained in:
Ekaitz Zarraga 2024-12-22 21:01:07 +01:00 committed by Ludovic Courtès
parent c86a48a92f
commit 38ad264979
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -67,9 +67,10 @@ Literal <-- SQUOTE (!SQUOTE Char)* SQUOTE Spacing
NotInClass <-- OPENBRACKET NOTIN (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing NotInClass <-- OPENBRACKET NOTIN (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing
Class <-- OPENBRACKET !NOTIN (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing Class <-- OPENBRACKET !NOTIN (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing
Range <-- Char DASH Char / Char Range <-- Char DASH Char / Char
Char <-- '\\\\' [nrt'\"\\[\\]\\\\] Char <-- '\\\\' [nrtf'\"\\[\\]\\\\]
/ '\\\\' [0-7][0-7][0-7] / '\\\\' [0-7][0-7][0-7]
/ '\\\\' [0-7][0-7]? / '\\\\' [0-7][0-7]?
/ '\\\\' 'u' HEX HEX HEX HEX
/ !'\\\\' . / !'\\\\' .
# NOTE: `<--` and `<` are extensions # NOTE: `<--` and `<` are extensions
@ -79,6 +80,7 @@ DQUOTE < [\"]
DASH < '-' DASH < '-'
OPENBRACKET < '[' OPENBRACKET < '['
CLOSEBRACKET < ']' CLOSEBRACKET < ']'
HEX <- [0-9a-fA-F]
NOTIN < '^' NOTIN < '^'
SLASH < '/' Spacing SLASH < '/' Spacing
AND <-- '&' Spacing AND <-- '&' Spacing
@ -92,7 +94,7 @@ DOT <-- '.' Spacing
Spacing < (Space / Comment)* Spacing < (Space / Comment)*
Comment < '#' (!EndOfLine .)* EndOfLine Comment < '#' (!EndOfLine .)* EndOfLine
Space < ' ' / '\t' / EndOfLine Space < ' ' / '\\t' / EndOfLine
EndOfLine < '\\r\\n' / '\\n' / '\\r' EndOfLine < '\\r\\n' / '\\n' / '\\r'
EndOfFile < !. EndOfFile < !.
") ")
@ -144,12 +146,15 @@ EndOfFile < !.
(define-sexp-parser Range all (define-sexp-parser Range all
(or (and Char DASH Char) Char)) (or (and Char DASH Char) Char))
(define-sexp-parser Char all (define-sexp-parser Char all
(or (and "\\" (or "n" "r" "t" "'" "\"" "[" "]" "\\")) (or (and "\\" (or "n" "r" "t" "f" "'" "\"" "[" "]" "\\"))
(and "\\" (range #\0 #\7) (range #\0 #\7) (range #\0 #\7)) (and "\\" (range #\0 #\7) (range #\0 #\7) (range #\0 #\7))
(and "\\" (range #\0 #\7) (? (range #\0 #\7))) (and "\\" (range #\0 #\7) (? (range #\0 #\7)))
(and "\\" "u" HEX HEX HEX HEX)
(and (not-followed-by "\\") peg-any))) (and (not-followed-by "\\") peg-any)))
(define-sexp-parser LEFTARROW body (define-sexp-parser LEFTARROW body
(and (or "<--" "<-" "<") Spacing)) ; NOTE: <-- and < are extensions (and (or "<--" "<-" "<") Spacing)) ; NOTE: <-- and < are extensions
(define-sexp-parser HEX body
(or (range #\0 #\9) (range #\a #\f) (range #\A #\F)))
(define-sexp-parser NOTIN none (define-sexp-parser NOTIN none
(and "^")) (and "^"))
(define-sexp-parser SLASH none (define-sexp-parser SLASH none
@ -372,12 +377,27 @@ EndOfFile < !.
(* (- (char->integer x) (char->integer #\0)) y)) (* (- (char->integer x) (char->integer #\0)) y))
(reverse (string->list charstr 1)) (reverse (string->list charstr 1))
'(1 8 64))))) '(1 8 64)))))
((char=? #\u (string-ref charstr 1))
(integer->char
(reduce + 0
(map
(lambda (x y)
(* (cond
((char-numeric? x)
(- (char->integer x) (char->integer #\0)))
((char-alphabetic? x)
(+ 10 (- (char->integer x) (char->integer #\a)))))
y))
(reverse (string->list (string-downcase charstr) 2))
'(1 16 256 4096)))))
(else (else
(case (string-ref charstr 1) (case (string-ref charstr 1)
((#\n) #\newline) ((#\n) #\newline)
((#\r) #\return) ((#\r) #\return)
((#\t) #\tab) ((#\t) #\tab)
((#\f) #\page)
((#\') #\') ((#\') #\')
((#\") #\")
((#\]) #\]) ((#\]) #\])
((#\\) #\\) ((#\\) #\\)
((#\[) #\[)))))) ((#\[) #\[))))))