mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-29 19:30:36 +02:00
This is a regression since Guile 3.0.2 and breaks compilation of a Guile library. * module/ice-9/read.scm (%read)[read-parenthesized]: When SAW-BRACE? is #t but CH isn't #\#, don't eat CH. * test-suite/tests/reader.test ("#{}#): Add four test cases.
896 lines
33 KiB
Scheme
896 lines
33 KiB
Scheme
;;; Scheme reader
|
|
;;; Copyright (C) 1995-1997,1999-2001,2003-2004,2006-2012,2014-2021
|
|
;;; Free Software Foundation, Inc.
|
|
;;;
|
|
;;; This library is free software: you can redistribute it and/or modify
|
|
;;; it under the terms of the GNU Lesser General Public License as
|
|
;;; published by the Free Software Foundation, either version 3 of the
|
|
;;; License, or (at your option) any later version.
|
|
;;;
|
|
;;; This library is distributed in the hope that it will be useful, but
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;;; Lesser General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU Lesser General Public
|
|
;;; License along with this program. If not, see
|
|
;;; <http://www.gnu.org/licenses/>.
|
|
|
|
;;; Commentary:
|
|
;;;
|
|
;;; Implementation of Scheme's "read".
|
|
;;;
|
|
;;; Code:
|
|
|
|
|
|
;; While porting read.c to Scheme, I found these expressions that result
|
|
;; in undesirable behavior in the C reader. Most all of them are also
|
|
;; present in the Scheme reader. Probably I should fix all of them, but
|
|
;; I would first like to prove that the Scheme reader is good enough.
|
|
;;
|
|
;; (call-with-input-string "," read)
|
|
;; (read-disable 'square-brackets), then (call-with-input-string "]" read)
|
|
;; (call-with-input-string "(#tru1)" read) => '(#t ru1)
|
|
;; (call-with-input-string "(#true1)" read) => '(#t 1)
|
|
;; (call-with-input-string "(#fAlse)" read) => '(#f Alse)
|
|
;; (call-with-input-string "(#f1 #f2 #f3)" read) => error reading array
|
|
;; #: foo
|
|
;; #:#|what|#foo
|
|
;; #@-(1 2 3) => #(1 2 3)
|
|
;; (#*10101010102) => (#*1010101010 2)
|
|
|
|
(define-syntax let*-values
|
|
(syntax-rules ()
|
|
((_ () . body) (let () . body))
|
|
((_ ((vars expr) . binds) . body)
|
|
(call-with-values (lambda () expr)
|
|
(lambda vars (let*-values binds . body))))))
|
|
|
|
(define bitfield:record-positions? 0)
|
|
(define bitfield:case-insensitive? 2)
|
|
(define bitfield:keyword-style 4)
|
|
(define bitfield:r6rs-escapes? 6)
|
|
(define bitfield:square-brackets? 8)
|
|
(define bitfield:hungry-eol-escapes? 10)
|
|
(define bitfield:curly-infix? 12)
|
|
(define bitfield:r7rs-symbols? 14)
|
|
(define read-option-bits 16)
|
|
|
|
(define read-option-mask #b11)
|
|
(define read-option-inherit #b11)
|
|
(define read-options-inherit-all (1- (ash 1 read-option-bits)))
|
|
|
|
(define keyword-style-hash-prefix 0)
|
|
(define keyword-style-prefix 1)
|
|
(define keyword-style-postfix 2)
|
|
|
|
(define (compute-reader-options port)
|
|
(let ((options (read-options))
|
|
(port-options (or (%port-property port 'port-read-options)
|
|
read-options-inherit-all)))
|
|
(define-syntax-rule (option field exp)
|
|
(let ((port-option (logand port-options (ash read-option-mask field))))
|
|
(if (= port-option (ash read-option-inherit field))
|
|
exp
|
|
port-option)))
|
|
(define (bool key field)
|
|
(option field
|
|
(if (memq key options) (ash 1 field) 0)))
|
|
(define (enum key values field)
|
|
(option field
|
|
(ash (assq-ref values (and=> (memq key options) cadr)) field)))
|
|
(logior (bool 'positions bitfield:record-positions?)
|
|
(bool 'case-insensitive bitfield:case-insensitive?)
|
|
(enum 'keywords '((#f . 0) (prefix . 1) (postfix . 2))
|
|
bitfield:keyword-style)
|
|
(bool 'r6rs-hex-escapes bitfield:r6rs-escapes?)
|
|
(bool 'square-brackets bitfield:square-brackets?)
|
|
(bool 'hungry-eol-escapes bitfield:hungry-eol-escapes?)
|
|
(bool 'curly-infix bitfield:curly-infix?)
|
|
(bool 'r7rs-symbols bitfield:r7rs-symbols?))))
|
|
|
|
(define (set-option options field new)
|
|
(logior (ash new field) (logand options (lognot (ash #b11 field)))))
|
|
|
|
(define (set-port-read-option! port field value)
|
|
(%set-port-property! port 'port-read-options
|
|
(set-option (or (%port-property port 'port-read-options)
|
|
read-options-inherit-all)
|
|
field value)))
|
|
|
|
(define (%read port annotate strip-annotation)
|
|
;; init read options
|
|
(define opts (compute-reader-options port))
|
|
(define (enabled? field)
|
|
(not (zero? (logand (ash 1 field) opts))))
|
|
(define (set-reader-option! field value)
|
|
(set! opts (set-option opts field value))
|
|
(set-port-read-option! port field value))
|
|
(define (case-insensitive?) (enabled? bitfield:case-insensitive?))
|
|
(define (keyword-style) (logand read-option-mask
|
|
(ash opts (- bitfield:keyword-style))))
|
|
(define (r6rs-escapes?) (enabled? bitfield:r6rs-escapes?))
|
|
(define (square-brackets?) (enabled? bitfield:square-brackets?))
|
|
(define (hungry-eol-escapes?) (enabled? bitfield:hungry-eol-escapes?))
|
|
(define (curly-infix?) (enabled? bitfield:curly-infix?))
|
|
(define (r7rs-symbols?) (enabled? bitfield:r7rs-symbols?))
|
|
(define neoteric 0)
|
|
(define (next) (read-char port))
|
|
(define (peek) (peek-char port))
|
|
(define filename (port-filename port))
|
|
(define (get-pos) (cons (port-line port) (port-column port)))
|
|
;; We are only ever interested in whether an object is a char or not.
|
|
(define (eof-object? x) (not (char? x)))
|
|
|
|
(define (input-error msg args)
|
|
(scm-error 'read-error #f
|
|
(format #f "~A:~S:~S: ~A"
|
|
(or filename "#<unknown port>")
|
|
(1+ (port-line port))
|
|
(1+ (port-column port))
|
|
msg)
|
|
args #f))
|
|
|
|
(define-syntax-rule (error msg arg ...)
|
|
(let ((args (list arg ...)))
|
|
(input-error msg args)))
|
|
|
|
(define (read-semicolon-comment)
|
|
(let ((ch (next)))
|
|
(cond
|
|
((eof-object? ch) ch)
|
|
((eqv? ch #\newline) (next))
|
|
(else (read-semicolon-comment)))))
|
|
|
|
(define-syntax-rule (take-until first pred)
|
|
(let lp ((out (list first)))
|
|
(let ((ch (peek)))
|
|
(if (or (eof-object? ch) (pred ch))
|
|
(reverse-list->string out)
|
|
(begin
|
|
(next)
|
|
(lp (cons ch out)))))))
|
|
(define-syntax-rule (take-while first pred)
|
|
(take-until first (lambda (ch) (not (pred ch)))))
|
|
|
|
(define (delimiter? ch)
|
|
(case ch
|
|
((#\( #\) #\; #\" #\space #\return #\ff #\newline #\tab) #t)
|
|
((#\[ #\]) (or (square-brackets?) (curly-infix?)))
|
|
((#\{ #\}) (curly-infix?))
|
|
(else #f)))
|
|
|
|
(define (read-token ch)
|
|
(take-until ch delimiter?))
|
|
|
|
(define (read-mixed-case-symbol ch)
|
|
(let* ((str (read-token ch))
|
|
(len (string-length str)))
|
|
(cond
|
|
((and (eq? (keyword-style) keyword-style-postfix)
|
|
(> len 1) (eqv? #\: (string-ref str (1- len))))
|
|
(let ((str (substring str 0 (1- len))))
|
|
(symbol->keyword
|
|
(string->symbol
|
|
(if (case-insensitive?)
|
|
(string-downcase str)
|
|
str)))))
|
|
(else
|
|
(string->symbol
|
|
(if (case-insensitive?)
|
|
(string-downcase str)
|
|
str))))))
|
|
|
|
(define (read-parenthesized rdelim)
|
|
(define (finish-curly-infix ret)
|
|
;; Perform syntactic transformations on {...} lists.
|
|
(define (extract-infix-list ls)
|
|
(and (pair? ls)
|
|
(let ((x (car ls))
|
|
(ls (cdr ls)))
|
|
(and (pair? ls)
|
|
(let ((op (car ls))
|
|
(ls (cdr ls)))
|
|
(if (and (pair? ls) (null? (cdr ls)))
|
|
(cons* op x ls)
|
|
(let ((tail (extract-infix-list ls)))
|
|
(and tail
|
|
(equal? (strip-annotation op)
|
|
(strip-annotation (car tail)))
|
|
(cons* op x (cdr tail))))))))))
|
|
(cond
|
|
((not (eqv? rdelim #\})) ret) ; Only on {...} lists.
|
|
((not (pair? ret)) ret) ; {} => (); {.x} => x
|
|
((null? (cdr ret)) (car ret)); {x} => x
|
|
((and (pair? (cdr ret)) (null? (cddr ret))) ret) ; {x y} => (x y)
|
|
((extract-infix-list ret)) ; {x + y + ... + z} => (+ x y ... z)
|
|
(else (cons '$nfx$ ret)))) ; {x y . z} => ($nfx$ x y . z)
|
|
(define curly? (eqv? rdelim #\}))
|
|
(finish-curly-infix
|
|
(let lp ((ch (next-non-whitespace)))
|
|
(when (eof-object? ch)
|
|
(error "unexpected end of input while searching for: ~A"
|
|
rdelim))
|
|
(cond
|
|
((eqv? ch rdelim) '())
|
|
((or (eqv? ch #\))
|
|
(and (eqv? ch #\]) (or (square-brackets?) (curly-infix?)))
|
|
(and (eqv? ch #\}) (curly-infix?)))
|
|
(error "mismatched close paren: ~A" ch))
|
|
(else
|
|
(let ((expr (read-expr ch)))
|
|
;; Note that it is possible for scm_read_expression to
|
|
;; return `.', but not as part of a dotted pair: as in
|
|
;; #{.}#. Indeed an example is here!
|
|
(if (and (eqv? ch #\.) (eq? (strip-annotation expr) '#{.}#))
|
|
(let* ((tail (read-subexpression "tail of improper list"))
|
|
(close (next-non-whitespace)))
|
|
(unless (eqv? close rdelim)
|
|
(error "missing close paren: ~A" close))
|
|
tail)
|
|
(cons expr (lp (next-non-whitespace))))))))))
|
|
|
|
(define (hex-digit ch)
|
|
(case ch
|
|
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
|
|
(- (char->integer ch) (char->integer #\0)))
|
|
((#\a #\b #\c #\d #\e #\f)
|
|
(+ 10 (- (char->integer ch) (char->integer #\a))))
|
|
((#\A #\B #\C #\D #\E #\F)
|
|
(+ 10 (- (char->integer ch) (char->integer #\A))))
|
|
(else #f)))
|
|
|
|
(define (read-r6rs-hex-escape)
|
|
(let ((ch (next)))
|
|
(cond
|
|
((hex-digit ch) =>
|
|
(lambda (res)
|
|
(let lp ((res res))
|
|
(let ((ch (next)))
|
|
(cond
|
|
((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
|
|
((eqv? ch #\;) (integer->char res))
|
|
((eof-object? ch)
|
|
(error "unexpected end of input in character escape sequence"))
|
|
(else
|
|
(error "invalid character in escape sequence: ~S" ch)))))))
|
|
((eof-object? ch)
|
|
(error "unexpected end of input in character escape sequence"))
|
|
(else
|
|
(error "invalid character in escape sequence: ~S" ch)))))
|
|
|
|
(define (read-fixed-hex-escape len)
|
|
(let lp ((len len) (res 0))
|
|
(if (zero? len)
|
|
(integer->char res)
|
|
(let ((ch (next)))
|
|
(cond
|
|
((hex-digit ch) =>
|
|
(lambda (digit)
|
|
(lp (1- len) (+ (* res 16) digit))))
|
|
((eof-object? ch)
|
|
(error "unexpected end of input in character escape sequence"))
|
|
(else
|
|
(error "invalid character in escape sequence: ~S" ch)))))))
|
|
|
|
(define (read-string rdelim)
|
|
(let lp ((out '()))
|
|
(let ((ch (next)))
|
|
(cond
|
|
((eof-object? ch)
|
|
(error "unexpected end of input while reading string"))
|
|
((eqv? ch rdelim)
|
|
(reverse-list->string out))
|
|
((eqv? ch #\\)
|
|
(let ((ch (next)))
|
|
(when (eof-object? ch)
|
|
(error "unexpected end of input while reading string"))
|
|
(cond
|
|
((eqv? ch #\newline)
|
|
(when (hungry-eol-escapes?)
|
|
;; Skip intraline whitespace before continuing.
|
|
(let skip ()
|
|
(let ((ch (peek)))
|
|
(when (and (not (eof-object? ch))
|
|
(or (eqv? ch #\tab)
|
|
(eq? (char-general-category ch) 'Zs)))
|
|
(next)
|
|
(skip)))))
|
|
(lp out))
|
|
((eqv? ch rdelim)
|
|
(lp (cons rdelim out)))
|
|
(else
|
|
(lp
|
|
(cons
|
|
(case ch
|
|
;; Accept "\(" for use at the beginning of
|
|
;; lines in multiline strings to avoid
|
|
;; confusing emacs lisp modes.
|
|
((#\| #\\ #\() ch)
|
|
((#\0) #\nul)
|
|
((#\f) #\ff)
|
|
((#\n) #\newline)
|
|
((#\r) #\return)
|
|
((#\t) #\tab)
|
|
((#\a) #\alarm)
|
|
((#\v) #\vtab)
|
|
((#\b) #\backspace)
|
|
((#\x)
|
|
(if (or (r6rs-escapes?) (eqv? rdelim #\|))
|
|
(read-r6rs-hex-escape)
|
|
(read-fixed-hex-escape 2)))
|
|
((#\u)
|
|
(read-fixed-hex-escape 4))
|
|
((#\U)
|
|
(read-fixed-hex-escape 6))
|
|
(else
|
|
(error "invalid character in escape sequence: ~S" ch)))
|
|
out))))))
|
|
(else
|
|
(lp (cons ch out)))))))
|
|
|
|
(define (read-character)
|
|
(let ((ch (next)))
|
|
(cond
|
|
((eof-object? ch)
|
|
(error "unexpected end of input after #\\"))
|
|
((delimiter? ch)
|
|
ch)
|
|
(else
|
|
(let* ((tok (read-token ch))
|
|
(len (string-length tok)))
|
|
(define dotted-circle #\x25cc)
|
|
(define r5rs-charnames
|
|
'(("space" . #\x20) ("newline" . #\x0a)))
|
|
(define r6rs-charnames
|
|
'(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
|
|
("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
|
|
("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
|
|
("delete" . #\x7f)))
|
|
(define r7rs-charnames
|
|
'(("escape" . #\x1b)))
|
|
(define C0-control-charnames
|
|
'(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
|
|
("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
|
|
("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08)
|
|
("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b)
|
|
("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e)
|
|
("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
|
|
("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
|
|
("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
|
|
("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a)
|
|
("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d)
|
|
("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20)
|
|
("del" . #\x7f)))
|
|
(define alt-charnames
|
|
'(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
|
|
;; Although R6RS and R7RS charnames specified as being
|
|
;; case-sensitive, Guile matches them case-insensitively, like
|
|
;; other char names.
|
|
(define (named-char tok alist)
|
|
(let lp ((alist alist))
|
|
(and (pair? alist)
|
|
(if (string-ci=? tok (caar alist))
|
|
(cdar alist)
|
|
(lp (cdr alist))))))
|
|
(cond
|
|
((= len 1) ch)
|
|
((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
|
|
;; Ignore dotted circles, which may be used to keep
|
|
;; combining characters from combining with the backslash in
|
|
;; #\charname.
|
|
ch)
|
|
((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7))
|
|
(string->number tok 8))
|
|
;; Specifying a codepoint as an octal value.
|
|
=> integer->char)
|
|
((and (eqv? ch #\x) (> len 1)
|
|
(string->number (substring tok 1) 16))
|
|
;; Specifying a codepoint as an hexadecimal value. Skip
|
|
;; initial "x".
|
|
=> integer->char)
|
|
((named-char tok r5rs-charnames))
|
|
((named-char tok r6rs-charnames))
|
|
((named-char tok r7rs-charnames))
|
|
((named-char tok C0-control-charnames))
|
|
((named-char tok alt-charnames))
|
|
(else
|
|
(error "unknown character name ~a" tok))))))))
|
|
|
|
(define (read-vector)
|
|
(list->vector (map strip-annotation (read-parenthesized #\)))))
|
|
|
|
(define (read-srfi-4-vector ch)
|
|
(read-array ch))
|
|
|
|
(define (maybe-read-boolean-tail tail)
|
|
(let ((len (string-length tail)))
|
|
(let lp ((i 0))
|
|
(or (= i len)
|
|
(let ((ch (peek)))
|
|
(and (not (eof-object? ch))
|
|
(eqv? (char-downcase ch) (string-ref tail i))
|
|
(or (begin
|
|
(next)
|
|
(lp (1+ i)))
|
|
(begin
|
|
(unread-char ch port)
|
|
#f))))))))
|
|
|
|
(define (read-false-or-srfi-4-vector)
|
|
(let ((ch (peek)))
|
|
(if (or (eqv? ch #\3)
|
|
(eqv? ch #\6))
|
|
(read-srfi-4-vector #\f)
|
|
(begin
|
|
(maybe-read-boolean-tail "alse")
|
|
#f))))
|
|
|
|
(define (read-bytevector)
|
|
(define (expect ch)
|
|
(unless (eqv? (next) ch)
|
|
(error "invalid bytevector prefix" ch)))
|
|
(expect #\u)
|
|
(expect #\8)
|
|
(expect #\()
|
|
(list->typed-array 'vu8 1
|
|
(map strip-annotation (read-parenthesized #\)))))
|
|
|
|
;; FIXME: We should require a terminating delimiter.
|
|
(define (read-bitvector)
|
|
(list->bitvector
|
|
(let lp ()
|
|
(let ((ch (peek)))
|
|
(case ch
|
|
((#\0) (next) (cons #f (lp)))
|
|
((#\1) (next) (cons #t (lp)))
|
|
(else '()))))))
|
|
|
|
(define (read-boolean ch)
|
|
;; Historically, Guile hasn't required a delimiter after #f / #t.
|
|
;; When the longer #false / #true forms were added, we kept this
|
|
;; behavior. It is terrible and we should change it!!
|
|
(case ch
|
|
((#\t #\T)
|
|
(maybe-read-boolean-tail "rue")
|
|
#t)
|
|
(else
|
|
(maybe-read-boolean-tail "alse")
|
|
#f)))
|
|
|
|
(define (read-keyword)
|
|
(let ((expr (strip-annotation (read-subexpression "keyword"))))
|
|
(unless (symbol? expr)
|
|
(error "keyword prefix #: not followed by a symbol: ~a" expr))
|
|
(symbol->keyword expr)))
|
|
|
|
(define (read-array ch)
|
|
(define (read-decimal-integer ch alt)
|
|
;; This parser has problems but it's what Guile's read.c does. Any
|
|
;; fix should come later and to both of them.
|
|
(define (decimal-digit ch)
|
|
(and (not (eof-object? ch))
|
|
(let ((digit (- (char->integer ch) (char->integer #\0))))
|
|
(and (<= 0 digit 9) digit))))
|
|
(let*-values (((sign ch) (if (eqv? ch #\-)
|
|
(values -1 (next))
|
|
(values 1 ch))))
|
|
(let lp ((ch ch) (res #f))
|
|
(cond
|
|
((decimal-digit ch)
|
|
=> (lambda (digit)
|
|
(lp (next) (if res (+ (* 10 res) digit) digit))))
|
|
(else
|
|
(values ch (if res (* res sign) alt)))))))
|
|
(define (read-rank ch)
|
|
(let*-values (((ch rank) (read-decimal-integer ch 1)))
|
|
(when (< rank 0)
|
|
(error "array rank must be non-negative"))
|
|
(when (eof-object? ch)
|
|
(error "unexpected end of input while reading array"))
|
|
(values ch rank)))
|
|
(define (read-tag ch)
|
|
(let lp ((ch ch) (chars '()))
|
|
(when (eof-object? ch)
|
|
(error "unexpected end of input while reading array"))
|
|
(if (memv ch '(#\( #\@ #\:))
|
|
(values ch
|
|
(if (null? chars)
|
|
#t
|
|
(string->symbol (list->string (reverse chars)))))
|
|
(lp (next) (cons ch chars)))))
|
|
(define (read-dimension ch)
|
|
(let*-values (((ch lbnd) (if (eqv? ch #\@)
|
|
(read-decimal-integer (next) 0)
|
|
(values ch 0)))
|
|
((ch len) (if (eqv? ch #\:)
|
|
(read-decimal-integer (next) 0)
|
|
(values ch #f))))
|
|
(when (and len (< len 0))
|
|
(error "array length must be non-negative"))
|
|
(when (eof-object? ch)
|
|
(error "unexpected end of input while reading array"))
|
|
(values ch
|
|
(if len
|
|
(list lbnd (+ lbnd (1- len)))
|
|
lbnd))))
|
|
(define (read-shape ch alt)
|
|
(if (memv ch '(#\@ #\:))
|
|
(let*-values (((ch head) (read-dimension ch))
|
|
((ch tail) (read-shape ch '())))
|
|
(values ch (cons head tail)))
|
|
(values ch alt)))
|
|
(define (read-elements ch rank)
|
|
(unless (eqv? ch #\()
|
|
(error "missing '(' in vector or array literal"))
|
|
(let ((elts (map strip-annotation (read-parenthesized #\)))))
|
|
(if (zero? rank)
|
|
(begin
|
|
;; Handle special print syntax of rank zero arrays; see
|
|
;; scm_i_print_array for a rationale.
|
|
(when (null? elts)
|
|
(error "too few elements in array literal, need 1"))
|
|
(unless (null? (cdr elts))
|
|
(error "too many elements in array literal, need 1"))
|
|
(car elts))
|
|
elts)))
|
|
(let*-values (((ch rank) (read-rank ch))
|
|
((ch tag) (read-tag ch))
|
|
((ch shape) (read-shape ch rank))
|
|
((elts) (read-elements ch rank)))
|
|
(when (and (pair? shape) (not (eqv? (length shape) rank)))
|
|
(error "the number of shape specifications must match the array rank"))
|
|
(list->typed-array tag shape elts)))
|
|
|
|
(define (read-number-and-radix ch)
|
|
(let ((tok (string-append "#" (read-token ch))))
|
|
(or (string->number tok)
|
|
(error "unknown # object: ~S" tok))))
|
|
|
|
(define (read-extended-symbol)
|
|
(define (next-not-eof)
|
|
(let ((ch (next)))
|
|
(when (eof-object? ch)
|
|
(error "end of input while reading symbol"))
|
|
ch))
|
|
(string->symbol
|
|
(list->string
|
|
(let lp ((saw-brace? #f))
|
|
(let lp/inner ((ch (next-not-eof))
|
|
(saw-brace? saw-brace?))
|
|
(cond
|
|
(saw-brace?
|
|
(if (eqv? ch #\#)
|
|
'()
|
|
;; Don't eat CH, see
|
|
;; <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=49623>.
|
|
(cons #\} (lp/inner ch #f))))
|
|
((eqv? ch #\})
|
|
(lp #t))
|
|
((eqv? ch #\\)
|
|
;; It used to be that print.c would print extended-read-syntax
|
|
;; symbols with backslashes before "non-standard" chars, but
|
|
;; this routine wouldn't do anything with those escapes.
|
|
;; Bummer. What we've done is to change print.c to output
|
|
;; R6RS hex escapes for those characters, relying on the fact
|
|
;; that the extended read syntax would never put a `\' before
|
|
;; an `x'. For now, we just ignore other instances of
|
|
;; backslash in the string.
|
|
(let* ((ch (next-not-eof))
|
|
(ch (if (eqv? ch #\x)
|
|
(read-r6rs-hex-escape)
|
|
ch)))
|
|
(cons ch (lp #f))))
|
|
(else
|
|
(cons ch (lp #f)))))))))
|
|
|
|
(define (read-nil)
|
|
;; Have already read "#\n" -- now read "il".
|
|
(let ((id (read-mixed-case-symbol #\n)))
|
|
(unless (eq? id 'nil)
|
|
(error "unexpected input while reading #nil: ~a" id))
|
|
#nil))
|
|
|
|
(define (read-sharp)
|
|
(let* ((ch (next)))
|
|
(cond
|
|
((eof-object? ch)
|
|
(error "unexpected end of input after #"))
|
|
((read-hash-procedure ch)
|
|
=> (lambda (proc) (proc ch port)))
|
|
(else
|
|
(case ch
|
|
((#\\) (read-character))
|
|
((#\() (read-vector))
|
|
((#\s #\u #\c) (read-srfi-4-vector ch))
|
|
((#\f) (read-false-or-srfi-4-vector))
|
|
((#\v) (read-bytevector))
|
|
((#\*) (read-bitvector))
|
|
((#\t #\T #\F) (read-boolean ch))
|
|
((#\:) (read-keyword))
|
|
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\@)
|
|
(read-array ch))
|
|
((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
|
|
(read-number-and-radix ch))
|
|
((#\{) (read-extended-symbol))
|
|
((#\') (list 'syntax (read-subexpression "syntax expression")))
|
|
((#\`) (list 'quasisyntax
|
|
(read-subexpression "quasisyntax expression")))
|
|
((#\,)
|
|
(if (eqv? #\@ (peek))
|
|
(begin
|
|
(next)
|
|
(list 'unsyntax-splicing
|
|
(read-subexpression "unsyntax-splicing expression")))
|
|
(list 'unsyntax (read-subexpression "unsyntax expression"))))
|
|
((#\n) (read-nil))
|
|
(else
|
|
(error "Unknown # object: ~S" (string #\# ch))))))))
|
|
|
|
(define (read-number ch)
|
|
(let* ((str (read-token ch)))
|
|
(or (string->number str)
|
|
(string->symbol (if (case-insensitive?)
|
|
(string-downcase str)
|
|
str)))))
|
|
|
|
(define (read-expr* ch)
|
|
(case ch
|
|
((#\{)
|
|
(cond
|
|
((curly-infix?)
|
|
(set! neoteric (1+ neoteric))
|
|
(let ((expr (read-parenthesized #\})))
|
|
(set! neoteric (1- neoteric))
|
|
expr))
|
|
(else
|
|
(read-mixed-case-symbol ch))))
|
|
((#\[)
|
|
(cond
|
|
((square-brackets?)
|
|
(read-parenthesized #\]))
|
|
((curly-infix?)
|
|
;; The syntax of neoteric expressions requires that '[' be a
|
|
;; delimiter when curly-infix is enabled, so it cannot be part
|
|
;; of an unescaped symbol. We might as well do something
|
|
;; useful with it, so we adopt Kawa's convention: [...] =>
|
|
;; ($bracket-list$ ...)
|
|
;; FIXME: source locations for this cons
|
|
(cons '$bracket-list$ (read-parenthesized #\])))
|
|
(else
|
|
(read-mixed-case-symbol ch))))
|
|
((#\()
|
|
(read-parenthesized #\)))
|
|
((#\")
|
|
(read-string ch))
|
|
((#\|)
|
|
(if (r7rs-symbols?)
|
|
(string->symbol (read-string ch))
|
|
(read-mixed-case-symbol ch)))
|
|
((#\')
|
|
(list 'quote (read-subexpression "quoted expression")))
|
|
((#\`)
|
|
(list 'quasiquote (read-subexpression "quasiquoted expression")))
|
|
((#\,)
|
|
(cond
|
|
((eqv? #\@ (peek))
|
|
(next)
|
|
(list 'unquote-splicing (read-subexpression "subexpression of ,@")))
|
|
(else
|
|
(list 'unquote (read-subexpression "unquoted expression")))))
|
|
((#\#)
|
|
;; FIXME: read-sharp should recur if we read a comment
|
|
(read-sharp))
|
|
((#\))
|
|
(error "unexpected \")\""))
|
|
((#\})
|
|
(if (curly-infix?)
|
|
(error "unexpected \"}\"")
|
|
(read-mixed-case-symbol ch)))
|
|
((#\])
|
|
(if (square-brackets?)
|
|
(error "unexpected \"]\"")
|
|
(read-mixed-case-symbol ch)))
|
|
((#\:)
|
|
(if (eq? (keyword-style) keyword-style-prefix)
|
|
;; FIXME: Don't skip whitespace here.
|
|
(let ((sym (read-subexpression ":keyword")))
|
|
(symbol->keyword (strip-annotation sym)))
|
|
(read-mixed-case-symbol ch)))
|
|
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.)
|
|
(read-number ch))
|
|
(else
|
|
(read-mixed-case-symbol ch))))
|
|
|
|
(define (read-neoteric ch)
|
|
(let lp ((expr (read-expr* ch)))
|
|
;; 'expr' is the first component of the neoteric expression. If
|
|
;; the next character is '(', '[', or '{', (without any
|
|
;; intervening whitespace), we use it to construct a new
|
|
;; expression, and loop. For example:
|
|
;; f{n - 1}(x) => ((f (- n 1)) x).
|
|
(case (peek)
|
|
((#\() ;; e(...) => (e ...)
|
|
(next)
|
|
(lp (cons expr (read-parenthesized #\)))))
|
|
((#\[) ;; e[...] => ($bracket-apply$ e ...)
|
|
(next)
|
|
(lp (cons* '$bracket-apply$ expr (read-parenthesized #\]))))
|
|
((#\{) ;; e{} => (e); e{...} => (e {...})
|
|
(next)
|
|
(let ((args (read-parenthesized #\})))
|
|
(lp (if (null? args)
|
|
(list expr)
|
|
(list expr args)))))
|
|
(else
|
|
expr))))
|
|
|
|
(define (read-expr ch)
|
|
(let ((line (port-line port))
|
|
(column (port-column port)))
|
|
(annotate line
|
|
column
|
|
(if (zero? neoteric)
|
|
(read-expr* ch)
|
|
(read-neoteric ch)))))
|
|
|
|
(define (read-directive)
|
|
(define (directive-char? ch)
|
|
(and (char? ch)
|
|
(or (eqv? ch #\-)
|
|
(char-alphabetic? ch)
|
|
(char-numeric? ch))))
|
|
(let ((ch (peek)))
|
|
(cond
|
|
((directive-char? ch)
|
|
(next)
|
|
(string->symbol (take-while ch directive-char?)))
|
|
(else
|
|
#f))))
|
|
|
|
(define (skip-scsh-comment)
|
|
(let lp ((ch (next)))
|
|
(cond
|
|
((eof-object? ch)
|
|
(error "unterminated `#! ... !#' comment"))
|
|
((eqv? ch #\!)
|
|
(let ((ch (next)))
|
|
(if (eqv? ch #\#)
|
|
(next)
|
|
(lp ch))))
|
|
(else
|
|
(lp (next))))))
|
|
|
|
(define (process-shebang)
|
|
;; After having read #!, we complete either with #!r6rs,
|
|
;; #!fold-case, #!no-fold-case, #!curly-infix,
|
|
;; #!curly-infix-and-bracket-lists, or a SCSH block comment
|
|
;; terminated by !#.
|
|
(let ((sym (read-directive)))
|
|
(cond
|
|
((eq? sym 'r6rs)
|
|
(set-reader-option! bitfield:case-insensitive? 0)
|
|
(set-reader-option! bitfield:r6rs-escapes? 1)
|
|
(set-reader-option! bitfield:square-brackets? 1)
|
|
(set-reader-option! bitfield:keyword-style keyword-style-hash-prefix)
|
|
(set-reader-option! bitfield:hungry-eol-escapes? 1)
|
|
(next))
|
|
((eq? sym 'fold-case)
|
|
(set-reader-option! bitfield:case-insensitive? 1)
|
|
(next))
|
|
((eq? sym 'no-fold-case)
|
|
(set-reader-option! bitfield:case-insensitive? 0)
|
|
(next))
|
|
((eq? sym 'curly-infix)
|
|
(set-reader-option! bitfield:curly-infix? 1)
|
|
(next))
|
|
((eq? sym 'curly-infix-and-bracket-lists)
|
|
(set-reader-option! bitfield:curly-infix? 1)
|
|
(set-reader-option! bitfield:square-brackets? 0)
|
|
(next))
|
|
(else
|
|
(skip-scsh-comment)))))
|
|
|
|
(define (skip-eol-comment)
|
|
(let ((ch (next)))
|
|
(cond
|
|
((eof-object? ch) ch)
|
|
((eq? ch #\newline) (next))
|
|
(else (skip-eol-comment)))))
|
|
|
|
;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
|
|
;; nested.
|
|
(define (skip-r6rs-block-comment)
|
|
;; We have read #|, now looking for |#.
|
|
(let ((ch (next)))
|
|
(when (eof-object? ch)
|
|
(error "unterminated `#| ... |#' comment"))
|
|
(cond
|
|
((and (eqv? ch #\|) (eqv? (peek) #\#))
|
|
;; Done.
|
|
(next)
|
|
(values))
|
|
((and (eqv? ch #\#) (eqv? (peek) #\|))
|
|
;; A nested comment.
|
|
(next)
|
|
(skip-r6rs-block-comment)
|
|
(skip-r6rs-block-comment))
|
|
(else
|
|
(skip-r6rs-block-comment)))))
|
|
|
|
(define (read-subexpression what)
|
|
(let ((ch (next-non-whitespace)))
|
|
(when (eof-object? ch)
|
|
(error (string-append "unexpected end of input while reading " what)))
|
|
(read-expr ch)))
|
|
|
|
(define (next-non-whitespace)
|
|
(let lp ((ch (next)))
|
|
(case ch
|
|
((#\;)
|
|
(lp (skip-eol-comment)))
|
|
((#\#)
|
|
(case (peek)
|
|
((#\!)
|
|
(next)
|
|
(lp (process-shebang)))
|
|
((#\;)
|
|
(next)
|
|
(read-subexpression "#; comment")
|
|
(next-non-whitespace))
|
|
((#\|)
|
|
(if (read-hash-procedure #\|)
|
|
ch
|
|
(begin
|
|
(next)
|
|
(skip-r6rs-block-comment)
|
|
(next-non-whitespace))))
|
|
(else ch)))
|
|
((#\space #\return #\ff #\newline #\tab)
|
|
(next-non-whitespace))
|
|
(else ch))))
|
|
|
|
(let ((ch (next-non-whitespace)))
|
|
(if (eof-object? ch)
|
|
ch
|
|
(read-expr ch))))
|
|
|
|
(define* (read #:optional (port (current-input-port)))
|
|
(define filename (port-filename port))
|
|
(define annotate
|
|
(if (memq 'positions (read-options))
|
|
(lambda (line column datum)
|
|
(when (and (supports-source-properties? datum)
|
|
;; Line or column can be invalid via
|
|
;; set-port-column! or ungetting chars beyond start
|
|
;; of line.
|
|
(<= 0 line)
|
|
(<= 1 column))
|
|
;; We always capture the column after one char of lookahead;
|
|
;; subtract off that lookahead value.
|
|
(set-source-properties! datum
|
|
`((filename . ,filename)
|
|
(line . ,line)
|
|
(column . ,(1- column)))))
|
|
datum)
|
|
(lambda (line column datum)
|
|
datum)))
|
|
(%read port annotate identity))
|
|
|
|
(define* (read-syntax #:optional (port (current-input-port)))
|
|
(define filename (port-filename port))
|
|
(define (annotate line column datum)
|
|
;; Usually when reading compound expressions consisting of multiple
|
|
;; syntax objects, like lists, the "leaves" of the expression are
|
|
;; annotated but the "root" isn't. Like in (A . B), A and B will be
|
|
;; annotated but the pair won't. Therefore the usually correct
|
|
;; thing to do is to just annotate the result. However in the case
|
|
;; of reading ( . C), the result is the already annotated C, which
|
|
;; we don't want to re-annotate. Therefore we avoid re-annotating
|
|
;; already annotated objects.
|
|
(if (syntax? datum)
|
|
datum
|
|
(datum->syntax #f ; No lexical context.
|
|
datum
|
|
#:source (vector filename line (1- column)))))
|
|
(%read port annotate syntax->datum))
|