1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Add "read" implementation in Scheme

* module/Makefile.am (SOURCES): Add ice-9/read.
* module/ice-9/read.scm: New file.  The idea is to move the compiler to
  use this "read", after proving that it does the same as C.  Then we
  can switch to read-syntax that returns syntax objects with source
  locations, allowing us to annotate any datum.
This commit is contained in:
Andy Wingo 2021-02-13 22:22:33 +01:00
parent 7f8149b4de
commit 40e4e3b2a4
2 changed files with 867 additions and 0 deletions

View file

@ -146,6 +146,7 @@ SOURCES = \
ice-9/q.scm \
ice-9/r5rs.scm \
ice-9/rdelim.scm \
ice-9/read.scm \
ice-9/receive.scm \
ice-9/regex.scm \
ice-9/runq.scm \

866
module/ice-9/read.scm Normal file
View file

@ -0,0 +1,866 @@
;;; Scheme reader
;;; Copyright (C) 1995-1997,1999-2001,2003-2004,2006-2012,2014-2021
;;; Free Software Foundation, Inc.
;;;
;;; This library is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU Lesser General Public License as
;;; published by the Free Software Foundation, either version 3 of the
;;; License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this program. If not, see
;;; <http://www.gnu.org/licenses/>.
;;; Commentary:
;;;
;;; Implementation of Scheme's "read".
;;;
;;; Code:
;; While porting read.c to Scheme, I found these expressions that result
;; in undesirable behavior in the C reader. Most all of them are also
;; present in the Scheme reader. Probably I should fix all of them, but
;; I would first like to prove that the Scheme reader is good enough.
;;
;; (call-with-input-string "," read)
;; (read-disable 'square-brackets), then (call-with-input-string "]" read)
;; (call-with-input-string "(#tru1)" read) => '(#t ru1)
;; (call-with-input-string "(#true1)" read) => '(#t 1)
;; (call-with-input-string "(#fAlse)" read) => '(#f Alse)
;; (call-with-input-string "(#f1 #f2 #f3)" read) => error reading array
;; #: foo
;; #:#|what|#foo
;; #@-(1 2 3) => #(1 2 3)
;; (#*10101010102) => (#*1010101010 2)
(define-module (ice-9 read)
#:use-module (srfi srfi-11)
#:use-module (ice-9 textual-ports)
#:use-module (rnrs bytevectors)
#:replace (read))
(define read-hash-procedures
(fluid->parameter %read-hash-procedures))
(define (read-hash-procedure ch)
(assq-ref (read-hash-procedures) ch))
(define (read-hash-extend ch proc)
(let ((alist (read-hash-procedures)))
(read-hash-procedures
(if proc
(assq-set! alist ch proc)
(assq-remove! alist ch)))))
(define bitfield:record-positions? 0)
(define bitfield:case-insensitive? 2)
(define bitfield:keyword-style 4)
(define bitfield:r6rs-escapes? 6)
(define bitfield:square-brackets? 8)
(define bitfield:hungry-eol-escapes? 10)
(define bitfield:curly-infix? 12)
(define bitfield:r7rs-symbols? 14)
(define read-option-bits 16)
(define read-option-mask #b11)
(define read-option-inherit #b11)
(define read-options-inherit-all (1- (ash 1 read-option-bits)))
(define keyword-style-hash-prefix 0)
(define keyword-style-prefix 1)
(define keyword-style-postfix 2)
(define (compute-reader-options port)
(let ((options (read-options))
(port-options (or (%port-property port 'port-read-options)
read-options-inherit-all)))
(define-syntax-rule (option field exp)
(let ((port-option (logand port-options (ash read-option-mask field))))
(if (= port-option (ash read-option-inherit field))
exp
port-option)))
(define (bool key field)
(option field
(if (memq key options) (ash 1 field) 0)))
(define (enum key values field)
(option field
(ash (assq-ref values (and=> (memq key options) cadr)) field)))
(logior (bool 'positions bitfield:record-positions?)
(bool 'case-insensitive bitfield:case-insensitive?)
(enum 'keyword-style '((#f . 0) (prefix . 1) (postfix . 2))
bitfield:keyword-style)
(bool 'r6rs-hex-escapes bitfield:r6rs-escapes?)
(bool 'square-brackets bitfield:square-brackets?)
(bool 'hungry-eol-escapes bitfield:hungry-eol-escapes?)
(bool 'curly-infix bitfield:curly-infix?)
(bool 'r7rs-symbols bitfield:r7rs-symbols?))))
(define (set-option options field new)
(logior new (logand options (lognot (ash #b11 field)))))
(define (set-port-read-option! port field value)
(let ((options (or (%port-property port 'port-read-options)
read-options-inherit-all))
(new (ash value field)))
(%set-port-property! port 'port-read-options
(set-option options field new)
)))
(define* (read #:optional (port (current-input-port)))
;; init read options
(define opts (compute-reader-options port))
(define (enabled? field)
(not (zero? (logand (ash 1 field) opts))))
(define (set-reader-option! field value)
(set! opts (set-option opts field value))
(set-port-read-option! port field value))
(define (record-positions?) (enabled? bitfield:record-positions?))
(define (case-insensitive?) (enabled? bitfield:case-insensitive?))
(define (keyword-style) (logand read-option-mask
(ash opts (- bitfield:keyword-style))))
(define (r6rs-escapes?) (enabled? bitfield:r6rs-escapes?))
(define (square-brackets?) (enabled? bitfield:square-brackets?))
(define (hungry-eol-escapes?) (enabled? bitfield:hungry-eol-escapes?))
(define (curly-infix?) (enabled? bitfield:curly-infix?))
(define (r7rs-symbols?) (enabled? bitfield:r7rs-symbols?))
(define neoteric 0)
(define (next) (get-char port))
(define (peek) (lookahead-char port))
(define filename (port-filename port))
(define (get-pos) (cons (port-line port) (port-column port)))
(define (annotate line column datum)
;; FIXME: Return a syntax object instead, so we can avoid the
;; srcprops side table.
(when (and (supports-source-properties? datum)
;; Line or column can be invalid via set-port-column! or
;; ungetting chars beyond start of line.
(<= 0 line)
(<= 1 column))
;; We always capture the column after one char of lookahead;
;; subtract off that lookahead value.
(set-source-properties! datum `((filename . ,filename)
(line . ,line)
(column . ,(1- column)))))
datum)
(define (input-error msg . args)
(apply error msg args))
(define (read-semicolon-comment)
(let ((ch (next)))
(cond
((eof-object? ch) ch)
((eqv? ch #\newline) (next))
(else (read-semicolon-comment)))))
(define-syntax-rule (take-until first pred)
(let ((acc (open-output-string)))
(put-char acc first)
(let lp ()
(let ((ch (peek)))
(cond
((or (eof-object? ch)
(pred ch))
(get-output-string acc))
(else
(put-char acc ch)
(next)
(lp)))))))
(define-syntax-rule (take-while first pred)
(take-until first (lambda (ch) (not (pred ch)))))
(define (delimiter? ch)
(or (memv ch '(#\( #\) #\; #\"
#\space #\return #\ff #\newline #\tab))
(and (memv ch '(#\[ #\])) (or (square-brackets?) (curly-infix?)))
(and (memv ch '(#\{ #\})) (curly-infix?))))
(define (read-token ch)
(take-until ch delimiter?))
(define (read-mixed-case-symbol ch)
(let* ((str (read-token ch))
(len (string-length str)))
(cond
((and (eq? (keyword-style) keyword-style-postfix)
(> len 0) (eqv? #\: (string-ref str (1- len))))
(let ((str (substring str 0 (1- len))))
(symbol->keyword
(string->symbol
(if (case-insensitive?)
(string-downcase str)
str)))))
(else
(string->symbol
(if (case-insensitive?)
(string-downcase str)
str))))))
(define (read-parenthesized rdelim)
(define (finish-curly-infix ret)
;; Perform syntactic transformations on {...} lists.
(define (extract-infix-list ls)
(and (pair? ls)
(let ((x (car ls))
(ls (cdr ls)))
(and (pair? ls)
(let ((op (car ls))
(ls (cdr ls)))
(if (null? ls)
(list op x)
(let ((tail (extract-infix-list ls)))
(and tail
(equal? op (car tail))
(cons* op x (cdr tail))))))))))
(cond
((or (not (eqv? rdelim #\}))) ret) ; Only on {...} lists.
((null? ret) ret) ; {} => ()
((null? (cdr ret)) (car ret)) ; {x} => x
((null? (cddr ret)) ret) ; {x y} => (x y)
((extract-infix-list ret)) ; {x + y + ... + z} => (+ x y ... z)
(else (cons '$nfx$ ret)))) ; {x y . z} => ($nfx$ x y . z)
(define curly? (eqv? rdelim #\}))
(finish-curly-infix
(let lp ((ch (next-non-whitespace)))
(when (eof-object? ch)
(input-error "unexpected end of input while searching for " rdelim))
(cond
((eqv? ch rdelim) '())
((or (eqv? ch #\))
(and (eqv? ch #\]) (or (square-brackets?) (curly-infix?)))
(and (eqv? ch #\}) (curly-infix?)))
(input-error "mismatched close paren" ch))
(else
(let ((expr (read-expr ch)))
;; Note that it is possible for scm_read_expression to
;; return `.', but not as part of a dotted pair: as in
;; #{.}#. Indeed an example is here!
(if (and (eqv? ch #\.) (eq? expr '#{.}#))
(let* ((tail (read-expr (next-non-whitespace)))
(close (next-non-whitespace)))
(unless (eqv? close rdelim)
(input-error "missing close paren" rdelim))
tail)
(cons expr (lp (next-non-whitespace))))))))))
(define (hex-digit ch)
(case ch
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
(- (char->integer ch) (char->integer #\0)))
((#\a #\b #\c #\d #\e #\f)
(+ 10 (- (char->integer ch) (char->integer #\a))))
((#\A #\B #\C #\D #\E #\F)
(+ 10 (- (char->integer ch) (char->integer #\A))))
(else #f)))
(define (read-r6rs-hex-escape)
(let ((ch (next)))
(cond
((hex-digit ch) =>
(lambda (res)
(let lp ((res res))
(let ((ch (next)))
(cond
((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
((eqv? ch #\;) (integer->char res))
(else
(input-error "invalid character in escape sequence: ~S" ch)))))))
(else
(input-error "invalid character in escape sequence: ~S" ch)))))
(define (read-fixed-hex-escape len)
(let lp ((len len) (res 0))
(if (zero? len)
(integer->char res)
(let ((ch (next)))
(cond
((hex-digit ch) =>
(lambda (digit)
(lp (1- len) (+ (* res 16) digit))))
(else
(input-error "invalid character in escape sequence: ~S" ch)))))))
(define (read-string rdelim)
(let ((acc (open-output-string)))
(let lp ()
(let ((ch (next)))
(cond
((eof-object? ch)
(input-error "unexpected end of input while reading string"))
((eqv? ch rdelim)
(get-output-string acc))
((eqv? ch #\\)
(let ((ch (next)))
(when (eof-object? ch)
(input-error "unexpected end of input while reading string"))
(case ch
((#\newline)
(when (hungry-eol-escapes?)
;; Skip intraline whitespace before continuing.
(let lp ()
(let ((ch (peek)))
(unless (or (eof-object? ch)
(eqv? ch #\tab)
(eq? (char-general-category ch) 'Zs))
(next)
(lp))))))
;; Accept "\(" for use at the beginning of
;; lines in multiline strings to avoid
;; confusing emacs lisp modes.
((#\| #\\ #\() (put-char acc ch))
((#\0) (put-char acc #\nul))
((#\f) (put-char acc #\ff))
((#\n) (put-char acc #\newline))
((#\r) (put-char acc #\return))
((#\t) (put-char acc #\tab))
((#\a) (put-char acc #\alarm))
((#\v) (put-char acc #\vtab))
((#\b) (put-char acc #\backspace))
((#\x)
(let ((ch (if (or (r6rs-escapes?) (eqv? rdelim #\|))
(read-r6rs-hex-escape)
(read-fixed-hex-escape 2))))
(put-char acc ch)))
((#\u)
(put-char acc (read-fixed-hex-escape 4)))
((#\U)
(put-char acc (read-fixed-hex-escape 8)))
(else
(unless (eqv? ch rdelim)
(input-error "invalid character in escape sequence: ~S" ch))
(put-char acc ch)))
(lp)))
(else
(put-char acc ch)
(lp)))))))
(define (read-character)
(let ((ch (next)))
(cond
((eof-object? ch)
(input-error "unexpected end of input after #\\"))
(else
(let* ((tok (read-token ch))
(len (string-length tok)))
(define dotted-circle #\x25cc)
(define r5rs-charnames
'(("space" . #\x20) ("newline" . #\x0a)))
(define r6rs-charnames
'(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
("delete" . #\x7f)))
(define r7rs-charnames
'(("escape" . #\x1b)))
(define C0-control-charnames
'(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08)
("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b)
("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e)
("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a)
("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d)
("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20)
("del" . #\x7f)))
(define alt-charnames
'(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
;; Although R6RS and R7RS charnames specified as being
;; case-sensitive, Guile matches them case-insensitively, like
;; other char names.
(define (named-char tok alist)
(let lp ((alist alist))
(and (pair? alist)
(if (string-ci=? tok (caar alist))
(cdar alist)
(lp (cdr alist))))))
(cond
((= len 1) ch)
((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
;; Ignore dotted circles, which may be used to keep
;; combining characters from combining with the backslash in
;; #\charname.
ch)
((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7))
(string->number tok 8))
;; Specifying a codepoint as an octal value.
=> integer->char)
((and (eqv? ch #\x) (> len 1)
(string->number (substring tok 1) 16))
;; Specifying a codepoint as an hexadecimal value. Skip
;; initial "x".
=> integer->char)
((named-char tok r5rs-charnames))
((named-char tok r6rs-charnames))
((named-char tok r7rs-charnames))
((named-char tok C0-control-charnames))
((named-char tok alt-charnames))
(else
(input-error "unknown character name ~a" tok))))))))
(define (read-vector)
(list->vector (read-parenthesized #\))))
(define (read-srfi-4-vector ch)
(read-array ch))
(define (maybe-read-boolean-tail tail)
(let ((len (string-length tail)))
(let lp ((i 0))
(or (= i len)
(let ((ch (peek)))
(and (not (eof-object? ch))
(eqv? (char-downcase ch) (string-ref tail i))
(or (begin
(next)
(lp (1+ i)))
(begin
(unget-char port ch)
#f))))))))
(define (read-false-or-srfi-4-vector)
(let ((ch (peek)))
(if (or (eqv? ch #\3)
(eqv? ch #\6))
(read-srfi-4-vector #\f)
(begin
(maybe-read-boolean-tail "alse")
#f))))
(define (read-bytevector)
(define (expect ch)
(unless (eqv? (next) ch)
(input-error "invalid bytevector prefix" ch)))
(expect #\u)
(expect #\8)
(expect #\()
(u8-list->bytevector (read-parenthesized #\))))
;; FIXME: We should require a terminating delimiter.
(define (read-bitvector)
(list->bitvector
(let lp ()
(let ((ch (peek)))
(case ch
((#\0) (next) (cons #f (lp)))
((#\1) (next) (cons #t (lp)))
(else '()))))))
(define (read-boolean ch)
;; Historically, Guile hasn't required a delimiter after #f / #t.
;; When the longer #false / #true forms were added, we kept this
;; behavior. It is terrible and we should change it!!
(case ch
((#\t #\T)
(maybe-read-boolean-tail "rue")
#t)
(else
(maybe-read-boolean-tail "alse")
#f)))
(define (read-keyword)
(let ((ch (next-non-whitespace)))
(when (eof-object? ch)
(input-error "end of input while reading keyword"))
(let ((expr (read-expr ch)))
(unless (symbol? expr)
(input-error "keyword prefix #: not followed by a symbol: ~a"
expr))
(symbol->keyword expr))))
(define (read-array ch)
(define (read-decimal-integer ch alt)
;; This parser has problems but it's what Guile's read.c does. Any
;; fix should come later and to both of them.
(define (decimal-digit ch)
(and (not (eof-object? ch))
(let ((digit (- (char->integer ch) (char->integer #\0))))
(and (<= 0 digit 9) digit))))
(let-values (((sign ch) (if (eqv? ch #\-)
(values -1 (next))
(values 1 ch))))
(let lp ((ch ch) (res #f))
(cond
((decimal-digit ch)
=> (lambda (digit)
(lp (next) (if res (+ (* 10 res) digit) digit))))
(else
(values ch (if res (* res sign) alt)))))))
(define (read-rank ch)
(let-values (((ch rank) (read-decimal-integer ch 1)))
(when (< rank 0)
(input-error "array rank must be non-negative"))
(when (eof-object? ch)
(input-error "unexpected end of input while reading array"))
(values ch rank)))
(define (read-tag ch)
(let lp ((ch ch) (chars '()))
(when (eof-object? ch)
(input-error "unexpected end of input while reading array"))
(if (memv ch '(#\( #\@ @\:))
(values ch
(if (null? chars)
#t
(string->symbol (list->string (reverse chars)))))
(lp (next) (cons ch chars)))))
(define (read-dimension ch)
(let*-values (((ch lbnd) (if (eqv? ch #\@)
(read-decimal-integer (next) 0)
(values ch 0)))
((ch len) (if (eqv? ch #\:)
(read-decimal-integer (next) 0)
(values ch #f))))
(when (and len (< len 0))
(input-error "array length must be non-negative"))
(when (eof-object? ch)
(input-error "unexpected end of input while reading array"))
(values ch
(if len
(if (zero? lbnd)
len
(list lbnd (+ lbnd (1- len))))
lbnd))))
(define (read-shape ch alt)
(if (memv ch '(#\@ @\:))
(let*-values (((ch head) (read-dimension ch))
((ch tail) (read-shape ch '())))
(values ch (cons head tail)))
(values ch alt)))
(define (read-elements ch rank)
(unless (eqv? ch #\()
(input-error "missing '(' in vector or array literal"))
(let ((elts (read-parenthesized #\))))
(if (zero? rank)
(begin
;; Handle special print syntax of rank zero arrays; see
;; scm_i_print_array for a rationale.
(when (null? elts)
(input-error "too few elements in array literal, need 1"))
(unless (null? (cdr elts))
(input-error "too many elements in array literal, need 1"))
(car elts))
elts)))
(let*-values (((ch rank) (read-rank ch))
((ch tag) (read-tag ch))
((ch shape) (read-shape ch rank))
((elts) (read-elements ch rank)))
(when (and (pair? shape) (not (eqv? (length shape) rank)))
(input-error
"the number of shape specifications must match the array rank"))
(list->typed-array tag shape elts)))
(define (read-number-and-radix ch)
(let ((tok (string-append "#" (read-token ch))))
(or (string->number tok)
(input-error "unknown # object"))))
(define (read-extended-symbol)
(define (next-not-eof)
(let ((ch (next)))
(when (eof-object? ch)
(input-error "end of input while reading symbol"))
ch))
(string->symbol
(list->string
(let lp ((saw-brace? #f))
(let ((ch (next-not-eof)))
(cond
(saw-brace?
(if (eqv? ch #\#)
'()
(cons #\} (lp #f))))
((eqv? ch #\})
(lp #t))
((eqv? ch #\\)
;; It used to be that print.c would print extended-read-syntax
;; symbols with backslashes before "non-standard" chars, but
;; this routine wouldn't do anything with those escapes.
;; Bummer. What we've done is to change print.c to output
;; R6RS hex escapes for those characters, relying on the fact
;; that the extended read syntax would never put a `\' before
;; an `x'. For now, we just ignore other instances of
;; backslash in the string.
(let* ((ch (next-not-eof))
(ch (if (eqv? ch #\x)
(read-r6rs-hex-escape)
ch)))
(cons ch (lp #f))))
(else
(cons ch (lp #f)))))))))
(define (read-nil)
;; Have already read "#\n" -- now read "il".
(let ((id (read-mixed-case-symbol #\n)))
(unless (eq? id 'nil)
(input-error "unexpected input while reading #nil: ~a" id))
#nil))
(define (read-sharp)
(let* ((ch (next)))
(cond
((eof-object? ch)
(input-error "unexpected end of input after #"))
((read-hash-procedure ch)
=> (lambda (proc) (proc ch)))
(else
(case ch
((#\\) (read-character))
((#\() (read-vector))
((#\s #\u \c) (read-srfi-4-vector ch))
((#\f) (read-false-or-srfi-4-vector))
((#\v) (read-bytevector))
((#\*) (read-bitvector))
((#\t #\T #\F) (read-boolean ch))
((#\:) (read-keyword))
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\@)
(read-array ch))
((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
(read-number-and-radix ch))
((#\{) (read-extended-symbol))
((#\') (list 'syntax (read-expr (next-non-whitespace))))
((#\`) (list 'quasisyntax (read-expr (next-non-whitespace))))
((#\,)
(if (eqv? #\@ (peek))
(begin
(next)
(list 'unsyntax-splicing (read-expr (next-non-whitespace))))
(list 'unsyntax (read-expr (next-non-whitespace)))))
((#\n) (read-nil))
(else
(input-error "Unknown # object: ~S" ch)))))))
(define (read-number ch)
(let* ((str (read-token ch)))
(or (string->number str)
(string->symbol (if (case-insensitive?)
(string-downcase str)
str)))))
(define (read-expr* ch)
(case ch
((#\{)
(cond
((curly-infix?)
(set! neoteric (1+ neoteric))
(let ((expr (read-parenthesized #\})))
(set! neoteric (1- neoteric))
expr))
(else
(read-mixed-case-symbol ch))))
((#\[)
(cond
((square-brackets?)
(read-parenthesized #\]))
((curly-infix?)
;; The syntax of neoteric expressions requires that '[' be a
;; delimiter when curly-infix is enabled, so it cannot be part
;; of an unescaped symbol. We might as well do something
;; useful with it, so we adopt Kawa's convention: [...] =>
;; ($bracket-list$ ...)
;; FIXME: source locations for this cons
(cons '$bracket-list$ (read-parenthesized #\])))
(else
(read-mixed-case-symbol ch))))
((#\()
(read-parenthesized #\)))
((#\")
(read-string ch))
((#\|)
(if (r7rs-symbols?)
(string->symbol (read-string ch))
(read-mixed-case-symbol ch)))
((#\')
(list 'quote (read-expr (next-non-whitespace))))
((#\`)
(list 'quasiquote (read-expr (next-non-whitespace))))
((#\,)
(cond
((eqv? #\@ (peek))
(next)
(list 'unquote-splicing (read-expr (next-non-whitespace))))
(else
(list 'unquote (read-expr (next-non-whitespace))))))
((#\#)
;; FIXME: read-sharp should recur if we read a comment
(read-sharp))
((#\))
(input-error "unexpected \")\""))
((#\})
(if (curly-infix?)
(input-error "unexpected \"}\"")
(read-mixed-case-symbol ch)))
((#\])
(if (square-brackets?)
(input-error "unexpected \"]\"")
(read-mixed-case-symbol ch)))
((#f)
;; EOF.
the-eof-object)
((#\:)
(if (eq? (keyword-style) keyword-style-prefix)
;; FIXME: Don't skip whitespace here.
(let ((ch (next-non-whitespace)))
(when (eof-object? ch)
(input-error "unexpected end of input while reading :keyword"))
(symbol->keyword (read-expr ch)))
(read-mixed-case-symbol ch)))
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.)
(read-number ch))
(else
(read-mixed-case-symbol ch))))
(define (read-neoteric ch)
(let lp ((expr (read-expr ch)))
;; 'expr' is the first component of the neoteric expression. If
;; the next character is '(', '[', or '{', (without any
;; intervening whitespace), we use it to construct a new
;; expression, and loop. For example:
;; f{n - 1}(x) => ((f (- n 1)) x).
(case (peek)
((#\() ;; e(...) => (e ...)
(next)
(lp (cons expr (read-parenthesized #\)))))
((#\[) ;; e[...] => ($bracket-apply$ e ...)
(next)
(lp (cons* '$bracket-apply$ expr (read-parenthesized #\]))))
((#\{) ;; e{} => (e); e{...} => (e {...})
(next)
(let ((args (read-parenthesized #\})))
(lp (if (null? args)
(list expr)
(list expr args)))))
(else
expr))))
(define (read-expr ch)
(let ((line (port-line port))
(column (port-column port)))
(annotate
line
column
(if (zero? neoteric)
(read-expr* ch)
(read-neoteric ch)))))
(define (read-directive)
(let ((ch (next)))
(cond
((eof-object? ch)
(input-error "unexpected end of input after #!"))
(else
(string->symbol
(take-while ch (lambda (ch)
(or (eqv? ch #\-) (char-alphabetic? ch)))))))))
(define (skip-scsh-comment)
(let lp ((ch (next)))
(cond
((eof-object? ch)
(input-error "unexpected end of input while looking for !#"))
((eqv? ch #\!)
(let ((ch (next)))
(if (eqv? ch #\#)
(next)
(lp ch))))
(else
(lp (next))))))
(define (process-shebang)
;; After having read #!, we complete either with #!r6rs,
;; #!fold-case, #!no-fold-case, #!curly-infix,
;; #!curly-infix-and-bracket-lists, or a SCSH block comment
;; terminated by !#.
(let ((sym (read-directive)))
(cond
((eq? sym 'r6rs)
(set-reader-option! bitfield:case-insensitive? 0)
(set-reader-option! bitfield:r6rs-escapes? 1)
(set-reader-option! bitfield:square-brackets? 1)
(set-reader-option! bitfield:keyword-style keyword-style-hash-prefix)
(set-reader-option! bitfield:hungry-eol-escapes? 1)
(next))
((eq? sym 'fold-case)
(set-reader-option! bitfield:case-insensitive? 1)
(next))
((eq? sym 'no-fold-case)
(set-reader-option! bitfield:case-insensitive? 0)
(next))
((eq? sym 'curly-infix)
(set-reader-option! bitfield:curly-infix? 1)
(next))
((eq? sym 'curly-infix-and-bracket-lists)
(set-reader-option! bitfield:curly-infix? 1)
(set-reader-option! bitfield:square-brackets? 0)
(next))
(else
(skip-scsh-comment)))))
(define (skip-eol-comment)
(let ((ch (next)))
(cond
((eof-object? ch) ch)
((eq? ch #\newline) (next))
(else (skip-eol-comment)))))
;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
;; nested.
(define (skip-r6rs-block-comment)
;; We have read #|, now looking for |#.
(let ((ch (next)))
(when (eof-object? ch)
(input-error "unterminated `#| ... |#' comment"))
(cond
((and (eqv? ch #\|) (eqv? (peek) #\#))
;; Done.
(next)
(values))
((and (eqv? ch #\#) (eqv? (peek) #\|))
;; A nested comment.
(next)
(skip-r6rs-block-comment)
(skip-r6rs-block-comment))
(else
(skip-r6rs-block-comment)))))
(define (next-non-whitespace)
(let lp ((ch (next)))
(case ch
((#\;)
(lp (skip-eol-comment)))
((#\#)
(case (peek)
((#\!)
(next)
(lp (process-shebang)))
((#\;)
(next)
(let ((ch (next-non-whitespace)))
(when (eof-object? ch)
(input-error "no expression after #; comment"))
(read-expr ch))
(next-non-whitespace))
((#\|)
(if (read-hash-procedure #\|)
ch
(begin
(next)
(skip-r6rs-block-comment)
(next-non-whitespace))))
(else ch)))
((#\space #\return #\ff #\newline #\tab)
(next-non-whitespace))
(else ch))))
(let ((ch (next-non-whitespace)))
(if (eof-object? ch)
ch
(read-expr ch))))