1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-18 01:30:27 +02:00

Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests
This commit is contained in:
Michael Gran 2009-08-08 02:35:00 -07:00
parent a876e7dcea
commit 9c44cd4559
15 changed files with 1046 additions and 306 deletions

View file

@ -34,6 +34,10 @@
;; lengths are encoded in 3 bytes
(define *len-len* 3)
;; the number of bytes per string character is encoded in 1 byte
(define *width-len* 1)
(define (byte-length assembly)
(pmatch assembly
(,label (guard (not (pair? label)))
@ -45,15 +49,15 @@
((load-number ,str)
(+ 1 *len-len* (string-length str)))
((load-string ,str)
(+ 1 *len-len* (string-length str)))
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-symbol ,str)
(+ 1 *len-len* (string-length str)))
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-keyword ,str)
(+ 1 *len-len* (string-length str)))
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-array ,bv)
(+ 1 *len-len* (bytevector-length bv)))
((define ,str)
(+ 1 *len-len* (string-length str)))
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-program ,nargs ,nrest ,nlocs ,labels ,len ,meta . ,code)
(+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0)))
((,inst . _) (guard (>= (instruction-length inst) 0))

View file

@ -65,6 +65,12 @@
(write-byte (logand (ash x -8) 255))
(write-byte (logand (ash x -16) 255))
(write-byte (logand (ash x -24) 255)))
(define (write-uint32 x) (case byte-order
((1234) (write-uint32-le x))
((4321) (write-uint32-be x))
(else (error "unknown endianness" byte-order))))
(define (write-wide-string s)
(string-for-each (lambda (c) (write-uint32 (char->integer c))) s))
(define (write-loader-len len)
(write-byte (ash len -16))
(write-byte (logand (ash len -8) 255))
@ -72,6 +78,14 @@
(define (write-loader str)
(write-loader-len (string-length str))
(write-string str))
(define (write-sized-loader str)
(let ((len (string-length str))
(wid (string-width str)))
(write-loader-len len)
(write-byte wid)
(if (= wid 4)
(write-wide-string str)
(write-string str))))
(define (write-bytevector bv)
(write-loader-len (bytevector-length bv))
;; Ew!
@ -89,10 +103,6 @@
(write-uint16 (case byte-order
((1234) write-uint16-le)
((4321) write-uint16-be)
(else (error "unknown endianness" byte-order))))
(write-uint32 (case byte-order
((1234) write-uint32-le)
((4321) write-uint32-be)
(else (error "unknown endianness" byte-order)))))
(let ((opcode (instruction->opcode inst))
(len (instruction-length inst)))
@ -126,11 +136,11 @@
((load-unsigned-integer ,str) (write-loader str))
((load-integer ,str) (write-loader str))
((load-number ,str) (write-loader str))
((load-string ,str) (write-loader str))
((load-symbol ,str) (write-loader str))
((load-keyword ,str) (write-loader str))
((load-string ,str) (write-sized-loader str))
((load-symbol ,str) (write-sized-loader str))
((load-keyword ,str) (write-sized-loader str))
((load-array ,bv) (write-bytevector bv))
((define ,str) (write-loader str))
((define ,str) (write-sized-loader str))
((br ,l) (write-break l))
((br-if ,l) (write-break l))
((br-if-not ,l) (write-break l))