mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-06 07:30:28 +02:00
Ports are given two additional properties: a character encoding and a conversion failure strategy. These properties have getters and setters. The new properties are used to convert any locale text to/from the internal representation of strings. If unspecified, ports use a default value. The default value of these properties is held in a fluid. The default character encoding can be modified by calling setlocale. ISO-8859-1 is treated specially. Since it is a native encoding of strings, it can be processed more quickly. Source code is assumed to be ISO-8859-1 unless otherwise specified. The encoding of a source code file can be given as 'coding: XXXXX' in a magic comment at the top of a file. The C functions that deal with encoding often use a null pointer as shorthand for the native Latin-1 encoding, for efficiency's sake. * test-suite/tests/encoding-iso88591.test: new tests * test-suite/tests/encoding-iso88597.test: new tests * test-suite/tests/encoding-utf8.test: new tests * test-suite/tests/encoding-escapes.test: new tests * test-suite/tests/numbers.test: declare 'binary' encoding * test-suite/tests/ports.test: declare 'binary' encoding * test-suite/tests/r6rs-ports.test: declare 'binary' encoding * module/system/base/compile.scm (compile-file): use source-code file's self-declared encoding when compiling files * libguile/strports.c: store string ports in locale encoding (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector) (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector): new functions * libguile/strings.h: new declaration for scm_i_string_contains_char * libguile/strings.c (scm_i_string_contains_char): new function (scm_from_stringn, scm_to_stringn): use NULL for Latin-1 (scm_from_locale_stringn, scm_to_locale_stringn): respect character encoding of input and output ports * libguile/read.h: declaration for scm_scan_for_encoding * libguile/read.c: (read_token): now takes scheme string instead of C string/length (read_complete_token): new function (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol) (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment) (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector) (scm_read_scsh_block_comment, scm_read_commented_expression) (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart) (scm_read_expression): use scm_t_wchar for char type, use read_complete_token (scm_scan_for_encoding): new function to find a file's character encoding (scm_file_encoding): new function to find a port's character encoding * libguile/rdelim.c: don't unpack strings * libguile/print.h: declaration for modified function scm_i_charprint * libguile/print.c: use locale when printing characters and strings (scm_i_charprint): input parameter is now scm_t_wchar (scm_simple_format): don't unpack strings * libguile/posix.h: new declaration for scm_setbinary. * libguile/posix.c (scm_setlocale): set default and stdio port encodings based on the locale's character encoding (scm_setbinary): new function * libguile/ports.h (scm_t_port): add encoding and failed conversion handler to port type. Declarations for new or modified functions scm_getc, scm_unget_byte, scm_ungetc, scm_i_get_port_encoding, scm_i_set_port_encoding_x, scm_port_encoding, scm_set_port_encoding_x, scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x, scm_port_conversion_strategy, scm_set_port_conversion_strategy_x. * libguile/ports.c: assign the current ports to zero on startup so we can see if they've been set. (scm_current_input_port, scm_current_output_port, scm_current_error_port): return #f if the port is not yet initialized (scm_new_port_table_entry): set up a new port's encoding and illegal sequence handler based on the thread's current defaults (scm_i_remove_port): free port encoding name when port is removed (scm_i_mode_bits_n): now takes a scheme string instead of a c string and length. All callers changed. (SCM_MBCHAR_BUF_SIZE): new const (scm_getc): new function, since the scm_getc in inline.h is now scm_get_byte_or_eof. This pulls one codepoint from a port. (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding (scm_unget_byte): new function, incorportaing the low-level functionality of scm_ungetc (scm_ungetc): uses scm_unget_byte * libguile/numbers.h (scm_t_wchar): compilation order problem with scm_t_wchar being use in functions in multiple headers. Forward declare scm_t_wchar. * libguile/load.c (scm_primitive_load): scan for file encoding at top of file and use it to set the load port's encoding * libguile/inline.h (scm_get_byte_or_eof): new function incorporating most of the functionality of scm_getc. * libguile/fports.c (fport_fill_input): now returns scm_t_wchar * libguile/chars.h (scm_t_wchar): avoid compilation order problem with declaration of scm_t_wchar
258 lines
8.7 KiB
Scheme
258 lines
8.7 KiB
Scheme
;;; High-level compiler interface
|
||
|
||
;; Copyright (C) 2001, 2009 Free Software Foundation, Inc.
|
||
|
||
;;; This library is free software; you can redistribute it and/or
|
||
;;; modify it under the terms of the GNU Lesser General Public
|
||
;;; License as published by the Free Software Foundation; either
|
||
;;; version 3 of the License, or (at your option) any later version.
|
||
;;;
|
||
;;; This library is distributed in the hope that it will be useful,
|
||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
;;; Lesser General Public License for more details.
|
||
;;;
|
||
;;; You should have received a copy of the GNU Lesser General Public
|
||
;;; License along with this library; if not, write to the Free Software
|
||
;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||
|
||
;;; Code:
|
||
|
||
(define-module (system base compile)
|
||
#:use-module (system base syntax)
|
||
#:use-module (system base language)
|
||
#:use-module (system base message)
|
||
#:use-module (system vm vm) ;; FIXME: there's a reason for this, can't remember why tho
|
||
#:use-module (ice-9 regex)
|
||
#:use-module (ice-9 optargs)
|
||
#:use-module (ice-9 receive)
|
||
#:export (syntax-error
|
||
*current-language*
|
||
compiled-file-name compile-file compile-and-load
|
||
compile
|
||
decompile)
|
||
#:export-syntax (call-with-compile-error-catch))
|
||
|
||
;;;
|
||
;;; Compiler environment
|
||
;;;
|
||
|
||
(define (syntax-error loc msg exp)
|
||
(throw 'syntax-error-compile-time loc msg exp))
|
||
|
||
(define-macro (call-with-compile-error-catch thunk)
|
||
`(catch 'syntax-error-compile-time
|
||
,thunk
|
||
(lambda (key loc msg exp)
|
||
(if (pair? loc)
|
||
(let ((file (or (assq-ref loc 'filename) "unknown file"))
|
||
(line (assq-ref loc 'line))
|
||
(col (assq-ref loc 'column)))
|
||
(format (current-error-port)
|
||
"~A:~A:~A: ~A: ~A~%" file line col msg exp))
|
||
(format (current-error-port)
|
||
"unknown location: ~A: ~S~%" msg exp)))))
|
||
|
||
|
||
;;;
|
||
;;; Compiler
|
||
;;;
|
||
|
||
(define *current-language* (make-fluid))
|
||
(fluid-set! *current-language* 'scheme)
|
||
(define (current-language)
|
||
(fluid-ref *current-language*))
|
||
|
||
(define (call-once thunk)
|
||
(let ((entered #f))
|
||
(dynamic-wind
|
||
(lambda ()
|
||
(if entered
|
||
(error "thunk may only be entered once: ~a" thunk))
|
||
(set! entered #t))
|
||
thunk
|
||
(lambda () #t))))
|
||
|
||
(define* (call-with-output-file/atomic filename proc #:optional reference)
|
||
(let* ((template (string-append filename ".XXXXXX"))
|
||
(tmp (mkstemp! template)))
|
||
(call-once
|
||
(lambda ()
|
||
(with-throw-handler #t
|
||
(lambda ()
|
||
(proc tmp)
|
||
(chmod tmp (logand #o0666 (lognot (umask))))
|
||
(close-port tmp)
|
||
(if reference
|
||
(let ((st (stat reference)))
|
||
(utime template (stat:atime st) (stat:mtime st))))
|
||
(rename-file template filename))
|
||
(lambda args
|
||
(delete-file template)))))))
|
||
|
||
(define (ensure-language x)
|
||
(if (language? x)
|
||
x
|
||
(lookup-language x)))
|
||
|
||
;; Throws an exception if `dir' is not writable. The double-stat is OK,
|
||
;; as this is only used during compilation.
|
||
(define (ensure-writable-dir dir)
|
||
(if (file-exists? dir)
|
||
(if (access? dir W_OK)
|
||
#t
|
||
(error "directory not writable" dir))
|
||
(begin
|
||
(ensure-writable-dir (dirname dir))
|
||
(mkdir dir))))
|
||
|
||
(define (dsu-sort list key less)
|
||
(map cdr
|
||
(stable-sort (map (lambda (x) (cons (key x) x)) list)
|
||
(lambda (x y) (less (car x) (car y))))))
|
||
|
||
;;; This function is among the trickiest I've ever written. I tried many
|
||
;;; variants. In the end, simple is best, of course.
|
||
;;;
|
||
;;; After turning this around a number of times, it seems that the the
|
||
;;; desired behavior is that .go files should exist in a path, for
|
||
;;; searching. That is orthogonal to this function. For writing .go
|
||
;;; files, either you know where they should go, in which case you tell
|
||
;;; compile-file explicitly, as in the srcdir != builddir case; or you
|
||
;;; don't know, in which case this function is called, and we just put
|
||
;;; them in your own ccache dir in ~/.guile-ccache.
|
||
(define (compiled-file-name file)
|
||
(define (compiled-extension)
|
||
(cond ((or (null? %load-compiled-extensions)
|
||
(string-null? (car %load-compiled-extensions)))
|
||
(warn "invalid %load-compiled-extensions"
|
||
%load-compiled-extensions)
|
||
".go")
|
||
(else (car %load-compiled-extensions))))
|
||
(and %compile-fallback-path
|
||
(let ((f (string-append
|
||
%compile-fallback-path
|
||
;; no need for '/' separator here, canonicalize-path
|
||
;; will give us an absolute path
|
||
(canonicalize-path file)
|
||
(compiled-extension))))
|
||
(and (false-if-exception (ensure-writable-dir (dirname f)))
|
||
f))))
|
||
|
||
(define* (compile-file file #:key
|
||
(output-file #f)
|
||
(env #f)
|
||
(from (current-language))
|
||
(to 'objcode)
|
||
(opts '()))
|
||
(let* ((comp (or output-file (compiled-file-name file)))
|
||
(in (open-input-file file))
|
||
(enc (file-encoding in)))
|
||
(if enc
|
||
(set-port-encoding! in enc))
|
||
(ensure-writable-dir (dirname comp))
|
||
(call-with-output-file/atomic comp
|
||
(lambda (port)
|
||
((language-printer (ensure-language to))
|
||
(read-and-compile in #:env env #:from from #:to to #:opts opts)
|
||
port))
|
||
file)
|
||
comp))
|
||
|
||
(define* (compile-and-load file #:key (from 'scheme) (to 'value) (opts '()))
|
||
(read-and-compile (open-input-file file)
|
||
#:from from #:to to #:opts opts))
|
||
|
||
|
||
;;;
|
||
;;; Compiler interface
|
||
;;;
|
||
|
||
(define (compile-passes from to opts)
|
||
(map cdr
|
||
(or (lookup-compilation-order from to)
|
||
(error "no way to compile" from "to" to))))
|
||
|
||
(define (compile-fold passes exp env opts)
|
||
(let lp ((passes passes) (x exp) (e env) (cenv env) (first? #t))
|
||
(if (null? passes)
|
||
(values x e cenv)
|
||
(receive (x e new-cenv) ((car passes) x e opts)
|
||
(lp (cdr passes) x e (if first? new-cenv cenv) #f)))))
|
||
|
||
(define (find-language-joint from to)
|
||
(let lp ((in (reverse (or (lookup-compilation-order from to)
|
||
(error "no way to compile" from "to" to))))
|
||
(lang to))
|
||
(cond ((null? in)
|
||
(error "don't know how to join expressions" from to))
|
||
((language-joiner lang) lang)
|
||
(else
|
||
(lp (cdr in) (caar in))))))
|
||
|
||
(define* (read-and-compile port #:key
|
||
(env #f)
|
||
(from (current-language))
|
||
(to 'objcode)
|
||
(opts '()))
|
||
(let ((from (ensure-language from))
|
||
(to (ensure-language to)))
|
||
(let ((joint (find-language-joint from to)))
|
||
(with-fluids ((*current-language* from))
|
||
(let lp ((exps '()) (env #f) (cenv env))
|
||
(let ((x ((language-reader (current-language)) port)))
|
||
(cond
|
||
((eof-object? x)
|
||
(compile ((language-joiner joint) (reverse exps) env)
|
||
#:from joint #:to to #:env env #:opts opts))
|
||
(else
|
||
;; compile-fold instead of compile so we get the env too
|
||
(receive (jexp jenv jcenv)
|
||
(compile-fold (compile-passes (current-language) joint opts)
|
||
x cenv opts)
|
||
(lp (cons jexp exps) jenv jcenv))))))))))
|
||
|
||
(define* (compile x #:key
|
||
(env #f)
|
||
(from (current-language))
|
||
(to 'value)
|
||
(opts '()))
|
||
|
||
(let ((warnings (memq #:warnings opts)))
|
||
(if (pair? warnings)
|
||
(let ((warnings (cadr warnings)))
|
||
;; Sanity-check the requested warnings.
|
||
(for-each (lambda (w)
|
||
(or (lookup-warning-type w)
|
||
(warning 'unsupported-warning #f w)))
|
||
warnings))))
|
||
|
||
(receive (exp env cenv)
|
||
(compile-fold (compile-passes from to opts) x env opts)
|
||
exp))
|
||
|
||
|
||
;;;
|
||
;;; Decompiler interface
|
||
;;;
|
||
|
||
(define (decompile-passes from to opts)
|
||
(map cdr
|
||
(or (lookup-decompilation-order from to)
|
||
(error "no way to decompile" from "to" to))))
|
||
|
||
(define (decompile-fold passes exp env opts)
|
||
(if (null? passes)
|
||
(values exp env)
|
||
(receive (exp env) ((car passes) exp env opts)
|
||
(decompile-fold (cdr passes) exp env opts))))
|
||
|
||
(define* (decompile x #:key
|
||
(env #f)
|
||
(from 'value)
|
||
(to 'assembly)
|
||
(opts '()))
|
||
(decompile-fold (decompile-passes from to opts)
|
||
x
|
||
env
|
||
opts))
|