1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-06 07:30:28 +02:00
guile/module/system/base/compile.scm
Michael Gran 889975e51a Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00

258 lines
8.7 KiB
Scheme
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;;; High-level compiler interface
;; Copyright (C) 2001, 2009 Free Software Foundation, Inc.
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation; either
;;; version 3 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free Software
;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;; Code:
(define-module (system base compile)
#:use-module (system base syntax)
#:use-module (system base language)
#:use-module (system base message)
#:use-module (system vm vm) ;; FIXME: there's a reason for this, can't remember why tho
#:use-module (ice-9 regex)
#:use-module (ice-9 optargs)
#:use-module (ice-9 receive)
#:export (syntax-error
*current-language*
compiled-file-name compile-file compile-and-load
compile
decompile)
#:export-syntax (call-with-compile-error-catch))
;;;
;;; Compiler environment
;;;
(define (syntax-error loc msg exp)
(throw 'syntax-error-compile-time loc msg exp))
(define-macro (call-with-compile-error-catch thunk)
`(catch 'syntax-error-compile-time
,thunk
(lambda (key loc msg exp)
(if (pair? loc)
(let ((file (or (assq-ref loc 'filename) "unknown file"))
(line (assq-ref loc 'line))
(col (assq-ref loc 'column)))
(format (current-error-port)
"~A:~A:~A: ~A: ~A~%" file line col msg exp))
(format (current-error-port)
"unknown location: ~A: ~S~%" msg exp)))))
;;;
;;; Compiler
;;;
(define *current-language* (make-fluid))
(fluid-set! *current-language* 'scheme)
(define (current-language)
(fluid-ref *current-language*))
(define (call-once thunk)
(let ((entered #f))
(dynamic-wind
(lambda ()
(if entered
(error "thunk may only be entered once: ~a" thunk))
(set! entered #t))
thunk
(lambda () #t))))
(define* (call-with-output-file/atomic filename proc #:optional reference)
(let* ((template (string-append filename ".XXXXXX"))
(tmp (mkstemp! template)))
(call-once
(lambda ()
(with-throw-handler #t
(lambda ()
(proc tmp)
(chmod tmp (logand #o0666 (lognot (umask))))
(close-port tmp)
(if reference
(let ((st (stat reference)))
(utime template (stat:atime st) (stat:mtime st))))
(rename-file template filename))
(lambda args
(delete-file template)))))))
(define (ensure-language x)
(if (language? x)
x
(lookup-language x)))
;; Throws an exception if `dir' is not writable. The double-stat is OK,
;; as this is only used during compilation.
(define (ensure-writable-dir dir)
(if (file-exists? dir)
(if (access? dir W_OK)
#t
(error "directory not writable" dir))
(begin
(ensure-writable-dir (dirname dir))
(mkdir dir))))
(define (dsu-sort list key less)
(map cdr
(stable-sort (map (lambda (x) (cons (key x) x)) list)
(lambda (x y) (less (car x) (car y))))))
;;; This function is among the trickiest I've ever written. I tried many
;;; variants. In the end, simple is best, of course.
;;;
;;; After turning this around a number of times, it seems that the the
;;; desired behavior is that .go files should exist in a path, for
;;; searching. That is orthogonal to this function. For writing .go
;;; files, either you know where they should go, in which case you tell
;;; compile-file explicitly, as in the srcdir != builddir case; or you
;;; don't know, in which case this function is called, and we just put
;;; them in your own ccache dir in ~/.guile-ccache.
(define (compiled-file-name file)
(define (compiled-extension)
(cond ((or (null? %load-compiled-extensions)
(string-null? (car %load-compiled-extensions)))
(warn "invalid %load-compiled-extensions"
%load-compiled-extensions)
".go")
(else (car %load-compiled-extensions))))
(and %compile-fallback-path
(let ((f (string-append
%compile-fallback-path
;; no need for '/' separator here, canonicalize-path
;; will give us an absolute path
(canonicalize-path file)
(compiled-extension))))
(and (false-if-exception (ensure-writable-dir (dirname f)))
f))))
(define* (compile-file file #:key
(output-file #f)
(env #f)
(from (current-language))
(to 'objcode)
(opts '()))
(let* ((comp (or output-file (compiled-file-name file)))
(in (open-input-file file))
(enc (file-encoding in)))
(if enc
(set-port-encoding! in enc))
(ensure-writable-dir (dirname comp))
(call-with-output-file/atomic comp
(lambda (port)
((language-printer (ensure-language to))
(read-and-compile in #:env env #:from from #:to to #:opts opts)
port))
file)
comp))
(define* (compile-and-load file #:key (from 'scheme) (to 'value) (opts '()))
(read-and-compile (open-input-file file)
#:from from #:to to #:opts opts))
;;;
;;; Compiler interface
;;;
(define (compile-passes from to opts)
(map cdr
(or (lookup-compilation-order from to)
(error "no way to compile" from "to" to))))
(define (compile-fold passes exp env opts)
(let lp ((passes passes) (x exp) (e env) (cenv env) (first? #t))
(if (null? passes)
(values x e cenv)
(receive (x e new-cenv) ((car passes) x e opts)
(lp (cdr passes) x e (if first? new-cenv cenv) #f)))))
(define (find-language-joint from to)
(let lp ((in (reverse (or (lookup-compilation-order from to)
(error "no way to compile" from "to" to))))
(lang to))
(cond ((null? in)
(error "don't know how to join expressions" from to))
((language-joiner lang) lang)
(else
(lp (cdr in) (caar in))))))
(define* (read-and-compile port #:key
(env #f)
(from (current-language))
(to 'objcode)
(opts '()))
(let ((from (ensure-language from))
(to (ensure-language to)))
(let ((joint (find-language-joint from to)))
(with-fluids ((*current-language* from))
(let lp ((exps '()) (env #f) (cenv env))
(let ((x ((language-reader (current-language)) port)))
(cond
((eof-object? x)
(compile ((language-joiner joint) (reverse exps) env)
#:from joint #:to to #:env env #:opts opts))
(else
;; compile-fold instead of compile so we get the env too
(receive (jexp jenv jcenv)
(compile-fold (compile-passes (current-language) joint opts)
x cenv opts)
(lp (cons jexp exps) jenv jcenv))))))))))
(define* (compile x #:key
(env #f)
(from (current-language))
(to 'value)
(opts '()))
(let ((warnings (memq #:warnings opts)))
(if (pair? warnings)
(let ((warnings (cadr warnings)))
;; Sanity-check the requested warnings.
(for-each (lambda (w)
(or (lookup-warning-type w)
(warning 'unsupported-warning #f w)))
warnings))))
(receive (exp env cenv)
(compile-fold (compile-passes from to opts) x env opts)
exp))
;;;
;;; Decompiler interface
;;;
(define (decompile-passes from to opts)
(map cdr
(or (lookup-decompilation-order from to)
(error "no way to decompile" from "to" to))))
(define (decompile-fold passes exp env opts)
(if (null? passes)
(values exp env)
(receive (exp env) ((car passes) exp env opts)
(decompile-fold (cdr passes) exp env opts))))
(define* (decompile x #:key
(env #f)
(from 'value)
(to 'assembly)
(opts '()))
(decompile-fold (decompile-passes from to opts)
x
env
opts))