1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-12 23:00:22 +02:00

Do not scan for coding declarations in open-file.

* libguile/fports.c (scm_open_file): Do not scan for coding
  declarations.  Replace 'use_encoding' local variable with
  'binary'.  Update documentation string.

* module/ice-9/psyntax.scm (include): Add the same file-encoding
  logic that's used in compile-file and scm_primitive_load.

* module/ice-9/psyntax-pp.scm: Regenerate.

* doc/ref/api-io.texi (File Ports): Update docs.

* test-suite/tests/ports.test: Change "open-file HONORS file coding
  declarations" test to "open-file IGNORES file coding declaration".

* test-suite/tests/coding.test (scan-coding): Use 'file-encoding' to
  scan for the encoding, since 'open-input-file' no longer does so.
This commit is contained in:
Mark H Weaver 2013-01-30 14:45:28 -05:00
parent bc3901092d
commit 9a334eb3ab
6 changed files with 40 additions and 44 deletions

View file

@ -885,8 +885,8 @@ Use binary mode, ensuring that each byte in the file will be read as one
Scheme character. Scheme character.
To provide this property, the file will be opened with the 8-bit To provide this property, the file will be opened with the 8-bit
character encoding "ISO-8859-1", ignoring any coding declaration or port character encoding "ISO-8859-1", ignoring the default port encoding.
encoding. @xref{Ports}, for more information on port encodings. @xref{Ports}, for more information on port encodings.
Note that while it is possible to read and write binary data as Note that while it is possible to read and write binary data as
characters or strings, it is usually better to treat bytes as octets, characters or strings, it is usually better to treat bytes as octets,
@ -903,12 +903,20 @@ because of its port encoding ramifications.
If a file cannot be opened with the access If a file cannot be opened with the access
requested, @code{open-file} throws an exception. requested, @code{open-file} throws an exception.
When the file is opened, this procedure will scan for a coding When the file is opened, its encoding is set to the current
declaration (@pxref{Character Encoding of Source Files}). If a coding @code{%default-port-encoding}, unless the @code{b} flag was supplied.
declaration is found, it will be used to interpret the file. Otherwise, Sometimes it is desirable to honor Emacs-style coding declarations in
the port's encoding will be used. To suppress this behavior, open the files@footnote{Guile 2.0.0 to 2.0.7 would do this by default. This
file in binary mode and then set the port encoding explicitly using behavior was deemed inappropriate and disabled starting from Guile
@code{set-port-encoding!}. 2.0.8.}. When that is the case, the @code{file-encoding} procedure can
be used as follows (@pxref{Character Encoding of Source Files,
@code{file-encoding}}):
@example
(let* ((port (open-input-file file))
(encoding (file-encoding port)))
(set-port-encoding! port (or encoding (port-encoding port))))
@end example
In theory we could create read/write ports which were buffered In theory we could create read/write ports which were buffered
in one direction only. However this isn't included in the in one direction only. However this isn't included in the

View file

@ -349,8 +349,7 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
"@item b\n" "@item b\n"
"Open the underlying file in binary mode, if supported by the system.\n" "Open the underlying file in binary mode, if supported by the system.\n"
"Also, open the file using the binary-compatible character encoding\n" "Also, open the file using the binary-compatible character encoding\n"
"\"ISO-8859-1\", ignoring the port's encoding and the coding declaration\n" "\"ISO-8859-1\", ignoring the default port encoding.\n"
"at the top of the input file, if any.\n"
"@item +\n" "@item +\n"
"Open the port for both input and output. E.g., @code{r+}: open\n" "Open the port for both input and output. E.g., @code{r+}: open\n"
"an existing file for both input and output.\n" "an existing file for both input and output.\n"
@ -365,11 +364,6 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
"Add line-buffering to the port. The port output buffer will be\n" "Add line-buffering to the port. The port output buffer will be\n"
"automatically flushed whenever a newline character is written.\n" "automatically flushed whenever a newline character is written.\n"
"@end table\n" "@end table\n"
"When the file is opened, this procedure will scan for a coding\n"
"declaration@pxref{Character Encoding of Source Files}. If present\n"
"will use that encoding for interpreting the file. Otherwise, the\n"
"port's encoding will be used.\n"
"\n"
"In theory we could create read/write ports which were buffered\n" "In theory we could create read/write ports which were buffered\n"
"in one direction only. However this isn't included in the\n" "in one direction only. However this isn't included in the\n"
"current interfaces. If a file cannot be opened with the access\n" "current interfaces. If a file cannot be opened with the access\n"
@ -377,7 +371,7 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
#define FUNC_NAME s_scm_open_file #define FUNC_NAME s_scm_open_file
{ {
SCM port; SCM port;
int fdes, flags = 0, use_encoding = 1; int fdes, flags = 0, binary = 0;
unsigned int retries; unsigned int retries;
char *file, *md, *ptr; char *file, *md, *ptr;
@ -412,7 +406,7 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR; flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR;
break; break;
case 'b': case 'b':
use_encoding = 0; binary = 1;
#if defined (O_BINARY) #if defined (O_BINARY)
flags |= O_BINARY; flags |= O_BINARY;
#endif #endif
@ -451,20 +445,8 @@ SCM_DEFINE (scm_open_file, "open-file", 2, 0, 0,
port = scm_i_fdes_to_port (fdes, scm_i_mode_bits (mode), port = scm_i_fdes_to_port (fdes, scm_i_mode_bits (mode),
fport_canonicalize_filename (filename)); fport_canonicalize_filename (filename));
if (use_encoding) if (binary)
{ /* Use the binary-friendly ISO-8859-1 encoding. */
/* If this file has a coding declaration, use that as the port
encoding. */
if (SCM_INPUT_PORT_P (port))
{
char *enc = scm_i_scan_for_encoding (port);
if (enc != NULL)
scm_i_set_port_encoding_x (port, enc);
}
}
else
/* If this is a binary file, use the binary-friendly ISO-8859-1
encoding. */
scm_i_set_port_encoding_x (port, NULL); scm_i_set_port_encoding_x (port, NULL);
scm_dynwind_end (); scm_dynwind_end ();

View file

@ -2975,10 +2975,12 @@
(lambda (fn dir k) (lambda (fn dir k)
(let ((p (open-input-file (let ((p (open-input-file
(if (absolute-file-name? fn) fn (in-vicinity dir fn))))) (if (absolute-file-name? fn) fn (in-vicinity dir fn)))))
(let f ((x (read p)) (result '())) (let ((enc (file-encoding p)))
(if (eof-object? x) (set-port-encoding! p (let ((t enc)) (if t t "UTF-8")))
(begin (close-input-port p) (reverse result)) (let f ((x (read p)) (result '()))
(f (read p) (cons (datum->syntax k x) result)))))))) (if (eof-object? x)
(begin (close-input-port p) (reverse result))
(f (read p) (cons (datum->syntax k x) result)))))))))
(let ((src (syntax-source x))) (let ((src (syntax-source x)))
(let ((file (if src (assq-ref src 'filename) #f))) (let ((file (if src (assq-ref src 'filename) #f)))
(let ((dir (if (string? file) (dirname file) #f))) (let ((dir (if (string? file) (dirname file) #f)))

View file

@ -2945,10 +2945,15 @@
(lambda (x) (lambda (x)
(define read-file (define read-file
(lambda (fn dir k) (lambda (fn dir k)
(let ((p (open-input-file (let* ((p (open-input-file
(if (absolute-file-name? fn) (if (absolute-file-name? fn)
fn fn
(in-vicinity dir fn))))) (in-vicinity dir fn))))
(enc (file-encoding p)))
;; Choose the input encoding deterministically.
(set-port-encoding! p (or enc "UTF-8"))
(let f ((x (read p)) (let f ((x (read p))
(result '())) (result '()))
(if (eof-object? x) (if (eof-object? x)

View file

@ -1,6 +1,6 @@
;;;; coding.test --- test suite for coding declarations. -*- mode: scheme -*- ;;;; coding.test --- test suite for coding declarations. -*- mode: scheme -*-
;;;; ;;;;
;;;; Copyright (C) 2011 Free Software Foundation, Inc. ;;;; Copyright (C) 2011, 2013 Free Software Foundation, Inc.
;;;; ;;;;
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -40,7 +40,7 @@
;; relies on the opportunistic filling of the input buffer, which ;; relies on the opportunistic filling of the input buffer, which
;; doesn't happen after a seek. ;; doesn't happen after a seek.
(let* ((port (open-input-file name)) (let* ((port (open-input-file name))
(res (port-encoding port))) (res (file-encoding port)))
(close-port port) (close-port port)
res)))) res))))

View file

@ -270,13 +270,12 @@
(delete-file filename) (delete-file filename)
(string=? line2 binary-test-string))))) (string=? line2 binary-test-string)))))
;; open-file honors file coding declarations ;; open-file ignores file coding declaration
(pass-if "file: open-file honors coding declarations" (pass-if "file: open-file ignores coding declarations"
(with-fluids ((%default-port-encoding "UTF-8")) (with-fluids ((%default-port-encoding "UTF-8"))
(let* ((filename (test-file)) (let* ((filename (test-file))
(port (open-output-file filename)) (port (open-output-file filename))
(test-string "€100")) (test-string "€100"))
(set-port-encoding! port "ISO-8859-15")
(write-line ";; coding: iso-8859-15" port) (write-line ";; coding: iso-8859-15" port)
(write-line test-string port) (write-line test-string port)
(close-port port) (close-port port)