1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00
guile/module/ice-9/read.scm
Maxime Devos c78c130b1d ice-9/read: Parse #{}}# properly.
This is a regression since Guile 3.0.2 and breaks compilation
of a Guile library.

* module/ice-9/read.scm
  (%read)[read-parenthesized]: When SAW-BRACE? is #t but CH isn't
  #\#, don't eat CH.
* test-suite/tests/reader.test
  ("#{}#): Add four test cases.
2021-08-02 12:15:59 +02:00

896 lines
33 KiB
Scheme

;;; Scheme reader
;;; Copyright (C) 1995-1997,1999-2001,2003-2004,2006-2012,2014-2021
;;; Free Software Foundation, Inc.
;;;
;;; This library is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU Lesser General Public License as
;;; published by the Free Software Foundation, either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this program. If not, see
;;; <http://www.gnu.org/licenses/>.
;;; Commentary:
;;;
;;; Implementation of Scheme's "read".
;;;
;;; Code:
;; While porting read.c to Scheme, I found these expressions that result
;; in undesirable behavior in the C reader. Most all of them are also
;; present in the Scheme reader. Probably I should fix all of them, but
;; I would first like to prove that the Scheme reader is good enough.
;;
;; (call-with-input-string "," read)
;; (read-disable 'square-brackets), then (call-with-input-string "]" read)
;; (call-with-input-string "(#tru1)" read) => '(#t ru1)
;; (call-with-input-string "(#true1)" read) => '(#t 1)
;; (call-with-input-string "(#fAlse)" read) => '(#f Alse)
;; (call-with-input-string "(#f1 #f2 #f3)" read) => error reading array
;; #: foo
;; #:#|what|#foo
;; #@-(1 2 3) => #(1 2 3)
;; (#*10101010102) => (#*1010101010 2)
(define-syntax let*-values
(syntax-rules ()
((_ () . body) (let () . body))
((_ ((vars expr) . binds) . body)
(call-with-values (lambda () expr)
(lambda vars (let*-values binds . body))))))
(define bitfield:record-positions? 0)
(define bitfield:case-insensitive? 2)
(define bitfield:keyword-style 4)
(define bitfield:r6rs-escapes? 6)
(define bitfield:square-brackets? 8)
(define bitfield:hungry-eol-escapes? 10)
(define bitfield:curly-infix? 12)
(define bitfield:r7rs-symbols? 14)
(define read-option-bits 16)
(define read-option-mask #b11)
(define read-option-inherit #b11)
(define read-options-inherit-all (1- (ash 1 read-option-bits)))
(define keyword-style-hash-prefix 0)
(define keyword-style-prefix 1)
(define keyword-style-postfix 2)
(define (compute-reader-options port)
(let ((options (read-options))
(port-options (or (%port-property port 'port-read-options)
read-options-inherit-all)))
(define-syntax-rule (option field exp)
(let ((port-option (logand port-options (ash read-option-mask field))))
(if (= port-option (ash read-option-inherit field))
exp
port-option)))
(define (bool key field)
(option field
(if (memq key options) (ash 1 field) 0)))
(define (enum key values field)
(option field
(ash (assq-ref values (and=> (memq key options) cadr)) field)))
(logior (bool 'positions bitfield:record-positions?)
(bool 'case-insensitive bitfield:case-insensitive?)
(enum 'keywords '((#f . 0) (prefix . 1) (postfix . 2))
bitfield:keyword-style)
(bool 'r6rs-hex-escapes bitfield:r6rs-escapes?)
(bool 'square-brackets bitfield:square-brackets?)
(bool 'hungry-eol-escapes bitfield:hungry-eol-escapes?)
(bool 'curly-infix bitfield:curly-infix?)
(bool 'r7rs-symbols bitfield:r7rs-symbols?))))
(define (set-option options field new)
(logior (ash new field) (logand options (lognot (ash #b11 field)))))
(define (set-port-read-option! port field value)
(%set-port-property! port 'port-read-options
(set-option (or (%port-property port 'port-read-options)
read-options-inherit-all)
field value)))
(define (%read port annotate strip-annotation)
;; init read options
(define opts (compute-reader-options port))
(define (enabled? field)
(not (zero? (logand (ash 1 field) opts))))
(define (set-reader-option! field value)
(set! opts (set-option opts field value))
(set-port-read-option! port field value))
(define (case-insensitive?) (enabled? bitfield:case-insensitive?))
(define (keyword-style) (logand read-option-mask
(ash opts (- bitfield:keyword-style))))
(define (r6rs-escapes?) (enabled? bitfield:r6rs-escapes?))
(define (square-brackets?) (enabled? bitfield:square-brackets?))
(define (hungry-eol-escapes?) (enabled? bitfield:hungry-eol-escapes?))
(define (curly-infix?) (enabled? bitfield:curly-infix?))
(define (r7rs-symbols?) (enabled? bitfield:r7rs-symbols?))
(define neoteric 0)
(define (next) (read-char port))
(define (peek) (peek-char port))
(define filename (port-filename port))
(define (get-pos) (cons (port-line port) (port-column port)))
;; We are only ever interested in whether an object is a char or not.
(define (eof-object? x) (not (char? x)))
(define (input-error msg args)
(scm-error 'read-error #f
(format #f "~A:~S:~S: ~A"
(or filename "#<unknown port>")
(1+ (port-line port))
(1+ (port-column port))
msg)
args #f))
(define-syntax-rule (error msg arg ...)
(let ((args (list arg ...)))
(input-error msg args)))
(define (read-semicolon-comment)
(let ((ch (next)))
(cond
((eof-object? ch) ch)
((eqv? ch #\newline) (next))
(else (read-semicolon-comment)))))
(define-syntax-rule (take-until first pred)
(let lp ((out (list first)))
(let ((ch (peek)))
(if (or (eof-object? ch) (pred ch))
(reverse-list->string out)
(begin
(next)
(lp (cons ch out)))))))
(define-syntax-rule (take-while first pred)
(take-until first (lambda (ch) (not (pred ch)))))
(define (delimiter? ch)
(case ch
((#\( #\) #\; #\" #\space #\return #\ff #\newline #\tab) #t)
((#\[ #\]) (or (square-brackets?) (curly-infix?)))
((#\{ #\}) (curly-infix?))
(else #f)))
(define (read-token ch)
(take-until ch delimiter?))
(define (read-mixed-case-symbol ch)
(let* ((str (read-token ch))
(len (string-length str)))
(cond
((and (eq? (keyword-style) keyword-style-postfix)
(> len 1) (eqv? #\: (string-ref str (1- len))))
(let ((str (substring str 0 (1- len))))
(symbol->keyword
(string->symbol
(if (case-insensitive?)
(string-downcase str)
str)))))
(else
(string->symbol
(if (case-insensitive?)
(string-downcase str)
str))))))
(define (read-parenthesized rdelim)
(define (finish-curly-infix ret)
;; Perform syntactic transformations on {...} lists.
(define (extract-infix-list ls)
(and (pair? ls)
(let ((x (car ls))
(ls (cdr ls)))
(and (pair? ls)
(let ((op (car ls))
(ls (cdr ls)))
(if (and (pair? ls) (null? (cdr ls)))
(cons* op x ls)
(let ((tail (extract-infix-list ls)))
(and tail
(equal? (strip-annotation op)
(strip-annotation (car tail)))
(cons* op x (cdr tail))))))))))
(cond
((not (eqv? rdelim #\})) ret) ; Only on {...} lists.
((not (pair? ret)) ret) ; {} => (); {.x} => x
((null? (cdr ret)) (car ret)); {x} => x
((and (pair? (cdr ret)) (null? (cddr ret))) ret) ; {x y} => (x y)
((extract-infix-list ret)) ; {x + y + ... + z} => (+ x y ... z)
(else (cons '$nfx$ ret)))) ; {x y . z} => ($nfx$ x y . z)
(define curly? (eqv? rdelim #\}))
(finish-curly-infix
(let lp ((ch (next-non-whitespace)))
(when (eof-object? ch)
(error "unexpected end of input while searching for: ~A"
rdelim))
(cond
((eqv? ch rdelim) '())
((or (eqv? ch #\))
(and (eqv? ch #\]) (or (square-brackets?) (curly-infix?)))
(and (eqv? ch #\}) (curly-infix?)))
(error "mismatched close paren: ~A" ch))
(else
(let ((expr (read-expr ch)))
;; Note that it is possible for scm_read_expression to
;; return `.', but not as part of a dotted pair: as in
;; #{.}#. Indeed an example is here!
(if (and (eqv? ch #\.) (eq? (strip-annotation expr) '#{.}#))
(let* ((tail (read-subexpression "tail of improper list"))
(close (next-non-whitespace)))
(unless (eqv? close rdelim)
(error "missing close paren: ~A" close))
tail)
(cons expr (lp (next-non-whitespace))))))))))
(define (hex-digit ch)
(case ch
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
(- (char->integer ch) (char->integer #\0)))
((#\a #\b #\c #\d #\e #\f)
(+ 10 (- (char->integer ch) (char->integer #\a))))
((#\A #\B #\C #\D #\E #\F)
(+ 10 (- (char->integer ch) (char->integer #\A))))
(else #f)))
(define (read-r6rs-hex-escape)
(let ((ch (next)))
(cond
((hex-digit ch) =>
(lambda (res)
(let lp ((res res))
(let ((ch (next)))
(cond
((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
((eqv? ch #\;) (integer->char res))
((eof-object? ch)
(error "unexpected end of input in character escape sequence"))
(else
(error "invalid character in escape sequence: ~S" ch)))))))
((eof-object? ch)
(error "unexpected end of input in character escape sequence"))
(else
(error "invalid character in escape sequence: ~S" ch)))))
(define (read-fixed-hex-escape len)
(let lp ((len len) (res 0))
(if (zero? len)
(integer->char res)
(let ((ch (next)))
(cond
((hex-digit ch) =>
(lambda (digit)
(lp (1- len) (+ (* res 16) digit))))
((eof-object? ch)
(error "unexpected end of input in character escape sequence"))
(else
(error "invalid character in escape sequence: ~S" ch)))))))
(define (read-string rdelim)
(let lp ((out '()))
(let ((ch (next)))
(cond
((eof-object? ch)
(error "unexpected end of input while reading string"))
((eqv? ch rdelim)
(reverse-list->string out))
((eqv? ch #\\)
(let ((ch (next)))
(when (eof-object? ch)
(error "unexpected end of input while reading string"))
(cond
((eqv? ch #\newline)
(when (hungry-eol-escapes?)
;; Skip intraline whitespace before continuing.
(let skip ()
(let ((ch (peek)))
(when (and (not (eof-object? ch))
(or (eqv? ch #\tab)
(eq? (char-general-category ch) 'Zs)))
(next)
(skip)))))
(lp out))
((eqv? ch rdelim)
(lp (cons rdelim out)))
(else
(lp
(cons
(case ch
;; Accept "\(" for use at the beginning of
;; lines in multiline strings to avoid
;; confusing emacs lisp modes.
((#\| #\\ #\() ch)
((#\0) #\nul)
((#\f) #\ff)
((#\n) #\newline)
((#\r) #\return)
((#\t) #\tab)
((#\a) #\alarm)
((#\v) #\vtab)
((#\b) #\backspace)
((#\x)
(if (or (r6rs-escapes?) (eqv? rdelim #\|))
(read-r6rs-hex-escape)
(read-fixed-hex-escape 2)))
((#\u)
(read-fixed-hex-escape 4))
((#\U)
(read-fixed-hex-escape 6))
(else
(error "invalid character in escape sequence: ~S" ch)))
out))))))
(else
(lp (cons ch out)))))))
(define (read-character)
(let ((ch (next)))
(cond
((eof-object? ch)
(error "unexpected end of input after #\\"))
((delimiter? ch)
ch)
(else
(let* ((tok (read-token ch))
(len (string-length tok)))
(define dotted-circle #\x25cc)
(define r5rs-charnames
'(("space" . #\x20) ("newline" . #\x0a)))
(define r6rs-charnames
'(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
("delete" . #\x7f)))
(define r7rs-charnames
'(("escape" . #\x1b)))
(define C0-control-charnames
'(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08)
("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b)
("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e)
("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a)
("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d)
("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20)
("del" . #\x7f)))
(define alt-charnames
'(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
;; Although R6RS and R7RS charnames specified as being
;; case-sensitive, Guile matches them case-insensitively, like
;; other char names.
(define (named-char tok alist)
(let lp ((alist alist))
(and (pair? alist)
(if (string-ci=? tok (caar alist))
(cdar alist)
(lp (cdr alist))))))
(cond
((= len 1) ch)
((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
;; Ignore dotted circles, which may be used to keep
;; combining characters from combining with the backslash in
;; #\charname.
ch)
((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7))
(string->number tok 8))
;; Specifying a codepoint as an octal value.
=> integer->char)
((and (eqv? ch #\x) (> len 1)
(string->number (substring tok 1) 16))
;; Specifying a codepoint as an hexadecimal value. Skip
;; initial "x".
=> integer->char)
((named-char tok r5rs-charnames))
((named-char tok r6rs-charnames))
((named-char tok r7rs-charnames))
((named-char tok C0-control-charnames))
((named-char tok alt-charnames))
(else
(error "unknown character name ~a" tok))))))))
(define (read-vector)
(list->vector (map strip-annotation (read-parenthesized #\)))))
(define (read-srfi-4-vector ch)
(read-array ch))
(define (maybe-read-boolean-tail tail)
(let ((len (string-length tail)))
(let lp ((i 0))
(or (= i len)
(let ((ch (peek)))
(and (not (eof-object? ch))
(eqv? (char-downcase ch) (string-ref tail i))
(or (begin
(next)
(lp (1+ i)))
(begin
(unread-char ch port)
#f))))))))
(define (read-false-or-srfi-4-vector)
(let ((ch (peek)))
(if (or (eqv? ch #\3)
(eqv? ch #\6))
(read-srfi-4-vector #\f)
(begin
(maybe-read-boolean-tail "alse")
#f))))
(define (read-bytevector)
(define (expect ch)
(unless (eqv? (next) ch)
(error "invalid bytevector prefix" ch)))
(expect #\u)
(expect #\8)
(expect #\()
(list->typed-array 'vu8 1
(map strip-annotation (read-parenthesized #\)))))
;; FIXME: We should require a terminating delimiter.
(define (read-bitvector)
(list->bitvector
(let lp ()
(let ((ch (peek)))
(case ch
((#\0) (next) (cons #f (lp)))
((#\1) (next) (cons #t (lp)))
(else '()))))))
(define (read-boolean ch)
;; Historically, Guile hasn't required a delimiter after #f / #t.
;; When the longer #false / #true forms were added, we kept this
;; behavior. It is terrible and we should change it!!
(case ch
((#\t #\T)
(maybe-read-boolean-tail "rue")
#t)
(else
(maybe-read-boolean-tail "alse")
#f)))
(define (read-keyword)
(let ((expr (strip-annotation (read-subexpression "keyword"))))
(unless (symbol? expr)
(error "keyword prefix #: not followed by a symbol: ~a" expr))
(symbol->keyword expr)))
(define (read-array ch)
(define (read-decimal-integer ch alt)
;; This parser has problems but it's what Guile's read.c does. Any
;; fix should come later and to both of them.
(define (decimal-digit ch)
(and (not (eof-object? ch))
(let ((digit (- (char->integer ch) (char->integer #\0))))
(and (<= 0 digit 9) digit))))
(let*-values (((sign ch) (if (eqv? ch #\-)
(values -1 (next))
(values 1 ch))))
(let lp ((ch ch) (res #f))
(cond
((decimal-digit ch)
=> (lambda (digit)
(lp (next) (if res (+ (* 10 res) digit) digit))))
(else
(values ch (if res (* res sign) alt)))))))
(define (read-rank ch)
(let*-values (((ch rank) (read-decimal-integer ch 1)))
(when (< rank 0)
(error "array rank must be non-negative"))
(when (eof-object? ch)
(error "unexpected end of input while reading array"))
(values ch rank)))
(define (read-tag ch)
(let lp ((ch ch) (chars '()))
(when (eof-object? ch)
(error "unexpected end of input while reading array"))
(if (memv ch '(#\( #\@ #\:))
(values ch
(if (null? chars)
#t
(string->symbol (list->string (reverse chars)))))
(lp (next) (cons ch chars)))))
(define (read-dimension ch)
(let*-values (((ch lbnd) (if (eqv? ch #\@)
(read-decimal-integer (next) 0)
(values ch 0)))
((ch len) (if (eqv? ch #\:)
(read-decimal-integer (next) 0)
(values ch #f))))
(when (and len (< len 0))
(error "array length must be non-negative"))
(when (eof-object? ch)
(error "unexpected end of input while reading array"))
(values ch
(if len
(list lbnd (+ lbnd (1- len)))
lbnd))))
(define (read-shape ch alt)
(if (memv ch '(#\@ #\:))
(let*-values (((ch head) (read-dimension ch))
((ch tail) (read-shape ch '())))
(values ch (cons head tail)))
(values ch alt)))
(define (read-elements ch rank)
(unless (eqv? ch #\()
(error "missing '(' in vector or array literal"))
(let ((elts (map strip-annotation (read-parenthesized #\)))))
(if (zero? rank)
(begin
;; Handle special print syntax of rank zero arrays; see
;; scm_i_print_array for a rationale.
(when (null? elts)
(error "too few elements in array literal, need 1"))
(unless (null? (cdr elts))
(error "too many elements in array literal, need 1"))
(car elts))
elts)))
(let*-values (((ch rank) (read-rank ch))
((ch tag) (read-tag ch))
((ch shape) (read-shape ch rank))
((elts) (read-elements ch rank)))
(when (and (pair? shape) (not (eqv? (length shape) rank)))
(error "the number of shape specifications must match the array rank"))
(list->typed-array tag shape elts)))
(define (read-number-and-radix ch)
(let ((tok (string-append "#" (read-token ch))))
(or (string->number tok)
(error "unknown # object: ~S" tok))))
(define (read-extended-symbol)
(define (next-not-eof)
(let ((ch (next)))
(when (eof-object? ch)
(error "end of input while reading symbol"))
ch))
(string->symbol
(list->string
(let lp ((saw-brace? #f))
(let lp/inner ((ch (next-not-eof))
(saw-brace? saw-brace?))
(cond
(saw-brace?
(if (eqv? ch #\#)
'()
;; Don't eat CH, see
;; <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=49623>.
(cons #\} (lp/inner ch #f))))
((eqv? ch #\})
(lp #t))
((eqv? ch #\\)
;; It used to be that print.c would print extended-read-syntax
;; symbols with backslashes before "non-standard" chars, but
;; this routine wouldn't do anything with those escapes.
;; Bummer. What we've done is to change print.c to output
;; R6RS hex escapes for those characters, relying on the fact
;; that the extended read syntax would never put a `\' before
;; an `x'. For now, we just ignore other instances of
;; backslash in the string.
(let* ((ch (next-not-eof))
(ch (if (eqv? ch #\x)
(read-r6rs-hex-escape)
ch)))
(cons ch (lp #f))))
(else
(cons ch (lp #f)))))))))
(define (read-nil)
;; Have already read "#\n" -- now read "il".
(let ((id (read-mixed-case-symbol #\n)))
(unless (eq? id 'nil)
(error "unexpected input while reading #nil: ~a" id))
#nil))
(define (read-sharp)
(let* ((ch (next)))
(cond
((eof-object? ch)
(error "unexpected end of input after #"))
((read-hash-procedure ch)
=> (lambda (proc) (proc ch port)))
(else
(case ch
((#\\) (read-character))
((#\() (read-vector))
((#\s #\u #\c) (read-srfi-4-vector ch))
((#\f) (read-false-or-srfi-4-vector))
((#\v) (read-bytevector))
((#\*) (read-bitvector))
((#\t #\T #\F) (read-boolean ch))
((#\:) (read-keyword))
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\@)
(read-array ch))
((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
(read-number-and-radix ch))
((#\{) (read-extended-symbol))
((#\') (list 'syntax (read-subexpression "syntax expression")))
((#\`) (list 'quasisyntax
(read-subexpression "quasisyntax expression")))
((#\,)
(if (eqv? #\@ (peek))
(begin
(next)
(list 'unsyntax-splicing
(read-subexpression "unsyntax-splicing expression")))
(list 'unsyntax (read-subexpression "unsyntax expression"))))
((#\n) (read-nil))
(else
(error "Unknown # object: ~S" (string #\# ch))))))))
(define (read-number ch)
(let* ((str (read-token ch)))
(or (string->number str)
(string->symbol (if (case-insensitive?)
(string-downcase str)
str)))))
(define (read-expr* ch)
(case ch
((#\{)
(cond
((curly-infix?)
(set! neoteric (1+ neoteric))
(let ((expr (read-parenthesized #\})))
(set! neoteric (1- neoteric))
expr))
(else
(read-mixed-case-symbol ch))))
((#\[)
(cond
((square-brackets?)
(read-parenthesized #\]))
((curly-infix?)
;; The syntax of neoteric expressions requires that '[' be a
;; delimiter when curly-infix is enabled, so it cannot be part
;; of an unescaped symbol. We might as well do something
;; useful with it, so we adopt Kawa's convention: [...] =>
;; ($bracket-list$ ...)
;; FIXME: source locations for this cons
(cons '$bracket-list$ (read-parenthesized #\])))
(else
(read-mixed-case-symbol ch))))
((#\()
(read-parenthesized #\)))
((#\")
(read-string ch))
((#\|)
(if (r7rs-symbols?)
(string->symbol (read-string ch))
(read-mixed-case-symbol ch)))
((#\')
(list 'quote (read-subexpression "quoted expression")))
((#\`)
(list 'quasiquote (read-subexpression "quasiquoted expression")))
((#\,)
(cond
((eqv? #\@ (peek))
(next)
(list 'unquote-splicing (read-subexpression "subexpression of ,@")))
(else
(list 'unquote (read-subexpression "unquoted expression")))))
((#\#)
;; FIXME: read-sharp should recur if we read a comment
(read-sharp))
((#\))
(error "unexpected \")\""))
((#\})
(if (curly-infix?)
(error "unexpected \"}\"")
(read-mixed-case-symbol ch)))
((#\])
(if (square-brackets?)
(error "unexpected \"]\"")
(read-mixed-case-symbol ch)))
((#\:)
(if (eq? (keyword-style) keyword-style-prefix)
;; FIXME: Don't skip whitespace here.
(let ((sym (read-subexpression ":keyword")))
(symbol->keyword (strip-annotation sym)))
(read-mixed-case-symbol ch)))
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.)
(read-number ch))
(else
(read-mixed-case-symbol ch))))
(define (read-neoteric ch)
(let lp ((expr (read-expr* ch)))
;; 'expr' is the first component of the neoteric expression. If
;; the next character is '(', '[', or '{', (without any
;; intervening whitespace), we use it to construct a new
;; expression, and loop. For example:
;; f{n - 1}(x) => ((f (- n 1)) x).
(case (peek)
((#\() ;; e(...) => (e ...)
(next)
(lp (cons expr (read-parenthesized #\)))))
((#\[) ;; e[...] => ($bracket-apply$ e ...)
(next)
(lp (cons* '$bracket-apply$ expr (read-parenthesized #\]))))
((#\{) ;; e{} => (e); e{...} => (e {...})
(next)
(let ((args (read-parenthesized #\})))
(lp (if (null? args)
(list expr)
(list expr args)))))
(else
expr))))
(define (read-expr ch)
(let ((line (port-line port))
(column (port-column port)))
(annotate line
column
(if (zero? neoteric)
(read-expr* ch)
(read-neoteric ch)))))
(define (read-directive)
(define (directive-char? ch)
(and (char? ch)
(or (eqv? ch #\-)
(char-alphabetic? ch)
(char-numeric? ch))))
(let ((ch (peek)))
(cond
((directive-char? ch)
(next)
(string->symbol (take-while ch directive-char?)))
(else
#f))))
(define (skip-scsh-comment)
(let lp ((ch (next)))
(cond
((eof-object? ch)
(error "unterminated `#! ... !#' comment"))
((eqv? ch #\!)
(let ((ch (next)))
(if (eqv? ch #\#)
(next)
(lp ch))))
(else
(lp (next))))))
(define (process-shebang)
;; After having read #!, we complete either with #!r6rs,
;; #!fold-case, #!no-fold-case, #!curly-infix,
;; #!curly-infix-and-bracket-lists, or a SCSH block comment
;; terminated by !#.
(let ((sym (read-directive)))
(cond
((eq? sym 'r6rs)
(set-reader-option! bitfield:case-insensitive? 0)
(set-reader-option! bitfield:r6rs-escapes? 1)
(set-reader-option! bitfield:square-brackets? 1)
(set-reader-option! bitfield:keyword-style keyword-style-hash-prefix)
(set-reader-option! bitfield:hungry-eol-escapes? 1)
(next))
((eq? sym 'fold-case)
(set-reader-option! bitfield:case-insensitive? 1)
(next))
((eq? sym 'no-fold-case)
(set-reader-option! bitfield:case-insensitive? 0)
(next))
((eq? sym 'curly-infix)
(set-reader-option! bitfield:curly-infix? 1)
(next))
((eq? sym 'curly-infix-and-bracket-lists)
(set-reader-option! bitfield:curly-infix? 1)
(set-reader-option! bitfield:square-brackets? 0)
(next))
(else
(skip-scsh-comment)))))
(define (skip-eol-comment)
(let ((ch (next)))
(cond
((eof-object? ch) ch)
((eq? ch #\newline) (next))
(else (skip-eol-comment)))))
;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
;; nested.
(define (skip-r6rs-block-comment)
;; We have read #|, now looking for |#.
(let ((ch (next)))
(when (eof-object? ch)
(error "unterminated `#| ... |#' comment"))
(cond
((and (eqv? ch #\|) (eqv? (peek) #\#))
;; Done.
(next)
(values))
((and (eqv? ch #\#) (eqv? (peek) #\|))
;; A nested comment.
(next)
(skip-r6rs-block-comment)
(skip-r6rs-block-comment))
(else
(skip-r6rs-block-comment)))))
(define (read-subexpression what)
(let ((ch (next-non-whitespace)))
(when (eof-object? ch)
(error (string-append "unexpected end of input while reading " what)))
(read-expr ch)))
(define (next-non-whitespace)
(let lp ((ch (next)))
(case ch
((#\;)
(lp (skip-eol-comment)))
((#\#)
(case (peek)
((#\!)
(next)
(lp (process-shebang)))
((#\;)
(next)
(read-subexpression "#; comment")
(next-non-whitespace))
((#\|)
(if (read-hash-procedure #\|)
ch
(begin
(next)
(skip-r6rs-block-comment)
(next-non-whitespace))))
(else ch)))
((#\space #\return #\ff #\newline #\tab)
(next-non-whitespace))
(else ch))))
(let ((ch (next-non-whitespace)))
(if (eof-object? ch)
ch
(read-expr ch))))
(define* (read #:optional (port (current-input-port)))
(define filename (port-filename port))
(define annotate
(if (memq 'positions (read-options))
(lambda (line column datum)
(when (and (supports-source-properties? datum)
;; Line or column can be invalid via
;; set-port-column! or ungetting chars beyond start
;; of line.
(<= 0 line)
(<= 1 column))
;; We always capture the column after one char of lookahead;
;; subtract off that lookahead value.
(set-source-properties! datum
`((filename . ,filename)
(line . ,line)
(column . ,(1- column)))))
datum)
(lambda (line column datum)
datum)))
(%read port annotate identity))
(define* (read-syntax #:optional (port (current-input-port)))
(define filename (port-filename port))
(define (annotate line column datum)
;; Usually when reading compound expressions consisting of multiple
;; syntax objects, like lists, the "leaves" of the expression are
;; annotated but the "root" isn't. Like in (A . B), A and B will be
;; annotated but the pair won't. Therefore the usually correct
;; thing to do is to just annotate the result. However in the case
;; of reading ( . C), the result is the already annotated C, which
;; we don't want to re-annotate. Therefore we avoid re-annotating
;; already annotated objects.
(if (syntax? datum)
datum
(datum->syntax #f ; No lexical context.
datum
#:source (vector filename line (1- column)))))
(%read port annotate syntax->datum))