diff --git a/libguile/intrinsics.c b/libguile/intrinsics.c index 837464709..99c044cbd 100644 --- a/libguile/intrinsics.c +++ b/libguile/intrinsics.c @@ -26,6 +26,7 @@ #include "alist.h" #include "atomics-internal.h" #include "boolean.h" +#include "bytevectors.h" #include "cache-internal.h" #include "extensions.h" #include "fluids.h" @@ -36,6 +37,7 @@ #include "keywords.h" #include "modules.h" #include "numbers.h" +#include "strings.h" #include "struct.h" #include "symbols.h" #include "threads.h" @@ -560,6 +562,26 @@ struct_set_x_immediate (SCM x, uint8_t idx, SCM z) scm_struct_set_x (x, scm_from_uint8 (idx), z); } +static uint64_t +string_utf8_length (SCM str) +{ + return scm_c_string_utf8_length (str); +} + +#if INDIRECT_INT64_INTRINSICS +static void +indirect_string_utf8_length (uint64_t *dst, SCM str) +{ + *dst = string_utf8_length (str); +} +#endif + +#if INDIRECT_INT64_INTRINSICS +#define INT64_INTRINSIC(name) indirect_##name +#else +#define INT64_INTRINSIC(name) name +#endif + void scm_bootstrap_intrinsics (void) { @@ -581,19 +603,11 @@ scm_bootstrap_intrinsics (void) scm_vm_intrinsics.symbol_to_keyword = scm_symbol_to_keyword; scm_vm_intrinsics.class_of = scm_class_of; scm_vm_intrinsics.scm_to_f64 = scm_to_double; -#if INDIRECT_INT64_INTRINSICS - scm_vm_intrinsics.scm_to_u64 = indirect_scm_to_uint64; - scm_vm_intrinsics.scm_to_u64_truncate = indirect_scm_to_uint64_truncate; - scm_vm_intrinsics.scm_to_s64 = indirect_scm_to_int64; - scm_vm_intrinsics.u64_to_scm = indirect_scm_from_uint64; - scm_vm_intrinsics.s64_to_scm = indirect_scm_from_int64; -#else - scm_vm_intrinsics.scm_to_u64 = scm_to_uint64; - scm_vm_intrinsics.scm_to_u64_truncate = scm_to_uint64_truncate; - scm_vm_intrinsics.scm_to_s64 = scm_to_int64; - scm_vm_intrinsics.u64_to_scm = scm_from_uint64; - scm_vm_intrinsics.s64_to_scm = scm_from_int64; -#endif + scm_vm_intrinsics.scm_to_u64 = INT64_INTRINSIC (scm_to_uint64); + scm_vm_intrinsics.scm_to_u64_truncate = INT64_INTRINSIC (scm_to_uint64_truncate); + scm_vm_intrinsics.scm_to_s64 = INT64_INTRINSIC (scm_to_int64); + scm_vm_intrinsics.u64_to_scm = INT64_INTRINSIC (scm_from_uint64); + scm_vm_intrinsics.s64_to_scm = INT64_INTRINSIC (scm_from_int64); scm_vm_intrinsics.logsub = logsub; scm_vm_intrinsics.wind = wind; scm_vm_intrinsics.unwind = unwind; @@ -603,13 +617,8 @@ scm_bootstrap_intrinsics (void) scm_vm_intrinsics.fluid_set_x = fluid_set_x; scm_vm_intrinsics.push_dynamic_state = push_dynamic_state; scm_vm_intrinsics.pop_dynamic_state = pop_dynamic_state; -#if INDIRECT_INT64_INTRINSICS - scm_vm_intrinsics.lsh = indirect_lsh; - scm_vm_intrinsics.rsh = indirect_rsh; -#else - scm_vm_intrinsics.lsh = lsh; - scm_vm_intrinsics.rsh = rsh; -#endif + scm_vm_intrinsics.lsh = INT64_INTRINSIC (lsh); + scm_vm_intrinsics.rsh = INT64_INTRINSIC (rsh); scm_vm_intrinsics.lsh_immediate = lsh_immediate; scm_vm_intrinsics.rsh_immediate = rsh_immediate; scm_vm_intrinsics.heap_numbers_equal_p = scm_i_heap_numbers_equal_p; @@ -659,6 +668,9 @@ scm_bootstrap_intrinsics (void) scm_vm_intrinsics.allocate_pointerless_words_with_freelist = allocate_pointerless_words_with_freelist; scm_vm_intrinsics.inexact = scm_exact_to_inexact; + scm_vm_intrinsics.string_to_utf8 = scm_string_to_utf8; + scm_vm_intrinsics.string_utf8_length = INT64_INTRINSIC (string_utf8_length); + scm_vm_intrinsics.utf8_to_string = scm_utf8_to_string; /* Intrinsics for the baseline compiler. */ scm_vm_intrinsics.car = scm_car; @@ -679,7 +691,7 @@ scm_bootstrap_intrinsics (void) scm_vm_intrinsics.struct_ref_immediate = struct_ref_immediate; scm_vm_intrinsics.struct_set_x_immediate = struct_set_x_immediate; scm_vm_intrinsics.symbol_to_string = scm_symbol_to_string; - + scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION, "scm_init_intrinsics", (scm_t_extension_init_func)scm_init_intrinsics, diff --git a/libguile/intrinsics.h b/libguile/intrinsics.h index 87fcd0e5e..d2ffc847e 100644 --- a/libguile/intrinsics.h +++ b/libguile/intrinsics.h @@ -218,6 +218,9 @@ typedef void (*scm_t_scm_uimm_scm_intrinsic) (SCM, uint8_t, SCM); M(scm_from_scmn_scmn, lookup_bound_public, "lookup-bound-public", LOOKUP_BOUND_PUBLIC) \ M(scm_from_scmn_scmn, lookup_bound_private, "lookup-bound-private", LOOKUP_BOUND_PRIVATE) \ M(scm_from_scm, symbol_to_string, "symbol->string", SYMBOL_TO_STRING) \ + M(scm_from_scm, string_to_utf8, "string->utf8", STRING_TO_UTF8) \ + M(u64_from_scm, string_utf8_length, "string-utf8-length", STRING_UTF8_LENGTH) \ + M(scm_from_scm, utf8_to_string, "utf8->string", UTF8_TO_STRING) \ /* Add new intrinsics here; also update scm_bootstrap_intrinsics. */ /* Intrinsics prefixed with $ are meant to reduce bytecode size, diff --git a/module/language/cps/effects-analysis.scm b/module/language/cps/effects-analysis.scm index 69f0a51de..7b1e1d0ea 100644 --- a/module/language/cps/effects-analysis.scm +++ b/module/language/cps/effects-analysis.scm @@ -411,7 +411,10 @@ the LABELS that are clobbered by the effects of LABEL." ((symbol->string x)) ;; CPS lowering includes symbol? type check. ((symbol->keyword)) ;; Same. ((keyword->symbol)) ;; Same, for keyword?. - ((string->symbol) (&read-object &string) &type-check)) + ((string->symbol) (&read-object &string) &type-check) + ((string->utf8) (&read-object &string)) + ((utf8->string) (&read-object &bytevector) &type-check) + ((string-utf8-length) (&read-object &string))) ;; Threads. Calls cause &all-effects, which reflects the fact that any ;; call can capture a partial continuation and reinstate it on another diff --git a/module/language/cps/guile-vm/reify-primitives.scm b/module/language/cps/guile-vm/reify-primitives.scm index 871d12524..035a3266b 100644 --- a/module/language/cps/guile-vm/reify-primitives.scm +++ b/module/language/cps/guile-vm/reify-primitives.scm @@ -26,6 +26,8 @@ (define-module (language cps guile-vm reify-primitives) #:use-module (ice-9 match) + #:use-module ((language tree-il primitives) + #:select ((primitive-module . tree-il:primitive-module))) #:use-module (language cps) #:use-module (language cps utils) #:use-module (language cps with-cps) @@ -36,47 +38,7 @@ #:export (reify-primitives)) (define (primitive-module name) - (case name - ((bytevector? - bytevector-length - - bytevector-u8-ref bytevector-u8-set! - bytevector-s8-ref bytevector-s8-set! - - bytevector-u16-ref bytevector-u16-set! - bytevector-u16-native-ref bytevector-u16-native-set! - bytevector-s16-ref bytevector-s16-set! - bytevector-s16-native-ref bytevector-s16-native-set! - - bytevector-u32-ref bytevector-u32-set! - bytevector-u32-native-ref bytevector-u32-native-set! - bytevector-s32-ref bytevector-s32-set! - bytevector-s32-native-ref bytevector-s32-native-set! - - bytevector-u64-ref bytevector-u64-set! - bytevector-u64-native-ref bytevector-u64-native-set! - bytevector-s64-ref bytevector-s64-set! - bytevector-s64-native-ref bytevector-s64-native-set! - - bytevector-ieee-single-ref bytevector-ieee-single-set! - bytevector-ieee-single-native-ref bytevector-ieee-single-native-set! - bytevector-ieee-double-ref bytevector-ieee-double-set! - bytevector-ieee-double-native-ref bytevector-ieee-double-native-set!) - '(rnrs bytevectors)) - ((atomic-box? - make-atomic-box atomic-box-ref atomic-box-set! - atomic-box-swap! atomic-box-compare-and-swap!) - '(ice-9 atomic)) - ((current-thread) '(ice-9 threads)) - ((class-of) '(oop goops)) - ((u8vector-ref - u8vector-set! s8vector-ref s8vector-set! - u16vector-ref u16vector-set! s16vector-ref s16vector-set! - u32vector-ref u32vector-set! s32vector-ref s32vector-set! - u64vector-ref u64vector-set! s64vector-ref s64vector-set! - f32vector-ref f32vector-set! f64vector-ref f64vector-set!) - '(srfi srfi-4)) - (else '(guile)))) + (tree-il:primitive-module name)) (define (primitive-ref cps name k src) (with-cps cps @@ -338,6 +300,7 @@ string->symbol symbol->keyword symbol->string + string-utf8-length string->utf8 utf8->string class-of scm->f64 s64->u64 s64->scm scm->s64 diff --git a/module/language/cps/types.scm b/module/language/cps/types.scm index 858f08b2e..9816078d4 100644 --- a/module/language/cps/types.scm +++ b/module/language/cps/types.scm @@ -937,13 +937,15 @@ minimum, and maximum." ((symbol->keyword &symbol) &keyword) ((keyword->symbol &keyword) &symbol) ((symbol->string &symbol) &string) - ((string->symbol &string) &symbol)) + ((string->symbol &string) &symbol) + ((string-utf8-length &string) &u64) + ((utf8->string &bytevector) &string)) ;;; -;;; Threads. We don't currently track threads as an object type. +;;; We don't currently track threads as an object type. ;;; (define-simple-types diff --git a/module/language/tree-il/compile-bytecode.scm b/module/language/tree-il/compile-bytecode.scm index 2be2b1397..d98c40fe9 100644 --- a/module/language/tree-il/compile-bytecode.scm +++ b/module/language/tree-il/compile-bytecode.scm @@ -29,6 +29,7 @@ #:use-module (ice-9 match) #:use-module (language bytecode) #:use-module (language tree-il) + #:use-module ((language tree-il primitives) #:select (primitive-module)) #:use-module ((srfi srfi-1) #:select (filter-map fold lset-adjoin lset-union lset-difference)) @@ -348,49 +349,6 @@ (visit-immediate-tags define-immediate-type-predicate) (visit-heap-tags define-heap-type-predicate) -(define (primitive-module name) - (case name - ((bytevector? - bytevector-length - - bytevector-u8-ref bytevector-u8-set! - bytevector-s8-ref bytevector-s8-set! - - bytevector-u16-ref bytevector-u16-set! - bytevector-u16-native-ref bytevector-u16-native-set! - bytevector-s16-ref bytevector-s16-set! - bytevector-s16-native-ref bytevector-s16-native-set! - - bytevector-u32-ref bytevector-u32-set! - bytevector-u32-native-ref bytevector-u32-native-set! - bytevector-s32-ref bytevector-s32-set! - bytevector-s32-native-ref bytevector-s32-native-set! - - bytevector-u64-ref bytevector-u64-set! - bytevector-u64-native-ref bytevector-u64-native-set! - bytevector-s64-ref bytevector-s64-set! - bytevector-s64-native-ref bytevector-s64-native-set! - - bytevector-ieee-single-ref bytevector-ieee-single-set! - bytevector-ieee-single-native-ref bytevector-ieee-single-native-set! - bytevector-ieee-double-ref bytevector-ieee-double-set! - bytevector-ieee-double-native-ref bytevector-ieee-double-native-set!) - '(rnrs bytevectors)) - ((atomic-box? - make-atomic-box atomic-box-ref atomic-box-set! - atomic-box-swap! atomic-box-compare-and-swap!) - '(ice-9 atomic)) - ((current-thread) '(ice-9 threads)) - ((class-of) '(oop goops)) - ((u8vector-ref - u8vector-set! s8vector-ref s8vector-set! - u16vector-ref u16vector-set! s16vector-ref s16vector-set! - u32vector-ref u32vector-set! s32vector-ref s32vector-set! - u64vector-ref u64vector-set! s64vector-ref s64vector-set! - f32vector-ref f32vector-set! f64vector-ref f64vector-set!) - '(srfi srfi-4)) - (else '(guile)))) - (define (canonicalize exp) (define (reify-primref src name) ;; some are builtin-ref diff --git a/module/language/tree-il/compile-cps.scm b/module/language/tree-il/compile-cps.scm index 5c0fac579..052c9ec6f 100644 --- a/module/language/tree-il/compile-cps.scm +++ b/module/language/tree-il/compile-cps.scm @@ -408,6 +408,64 @@ (build-term ($branch knot-keyword kheap-object src 'heap-object? #f (kw)))))) +(define-primcall-converter string->utf8 + (lambda (cps k src op param str) + (define not-string + #(wrong-type-arg + "string->utf8" + "Wrong type argument in position 1 (expecting string): ~S")) + (with-cps cps + (letk knot-string + ($kargs () () ($throw src 'throw/value+data not-string (str)))) + (letk kstr + ($kargs () () + ($continue k src ($primcall 'string->utf8 #f (str))))) + (letk kheap-object + ($kargs () () + ($branch knot-string kstr src 'string? #f (str)))) + (build-term + ($branch knot-string kheap-object src 'heap-object? #f (str)))))) + +(define-primcall-converter string-utf8-length + (lambda (cps k src op param str) + (define not-string + #(wrong-type-arg + "string-utf8-length" + "Wrong type argument in position 1 (expecting string): ~S")) + (with-cps cps + (letv len) + (letk knot-string + ($kargs () () ($throw src 'throw/value+data not-string (str)))) + (letk ktag + ($kargs ('len) (len) + ($continue k src ($primcall 'u64->scm #f (len))))) + (letk kstr + ($kargs () () + ($continue ktag src ($primcall 'string-utf8-length #f (str))))) + (letk kheap-object + ($kargs () () + ($branch knot-string kstr src 'string? #f (str)))) + (build-term + ($branch knot-string kheap-object src 'heap-object? #f (str)))))) + +(define-primcall-converter utf8->string + (lambda (cps k src op param bv) + (define not-bv + #(wrong-type-arg + "utf8->string" + "Wrong type argument in position 1 (expecting bytevector): ~S")) + (with-cps cps + (letk knot-bv + ($kargs () () ($throw src 'throw/value+data not-bv (bv)))) + (letk kbv + ($kargs () () + ($continue k src ($primcall 'utf8->string #f (bv))))) + (letk kheap-object + ($kargs () () + ($branch knot-bv kbv src 'bytevector? #f (bv)))) + (build-term + ($branch knot-bv kheap-object src 'heap-object? #f (bv)))))) + (define (ensure-pair cps src op pred x is-pair) (define msg (match pred diff --git a/module/language/tree-il/cps-primitives.scm b/module/language/tree-il/cps-primitives.scm index f755d9474..5acb41857 100644 --- a/module/language/tree-il/cps-primitives.scm +++ b/module/language/tree-il/cps-primitives.scm @@ -131,6 +131,9 @@ (define-cps-primitive class-of 1 1) +(define-cps-primitive string-utf8-length 1 1) +(define-cps-primitive utf8->string 1 1) +(define-cps-primitive string->utf8 1 1) (define-cps-primitive (bytevector-length bv-length) 1 1) (define-cps-primitive (bytevector-u8-ref bv-u8-ref) 2 1) (define-cps-primitive (bytevector-u16-native-ref bv-u16-ref) 2 1) diff --git a/module/language/tree-il/effects.scm b/module/language/tree-il/effects.scm index f69f84165..a37a6d522 100644 --- a/module/language/tree-il/effects.scm +++ b/module/language/tree-il/effects.scm @@ -1,6 +1,6 @@ ;;; Effects analysis on Tree-IL -;; Copyright (C) 2011, 2012, 2013, 2021 Free Software Foundation, Inc. +;; Copyright (C) 2011, 2012, 2013, 2021, 2023 Free Software Foundation, Inc. ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -465,6 +465,21 @@ of an expression." (cause &type-check) (cause &string))) + (($ _ 'string->utf8 (s)) + (logior (compute-effects s) + (cause &type-check) + (cause &allocation) + &string)) + (($ _ 'string-utf8-length (s)) + (logior (compute-effects s) + (cause &type-check) + &string)) + (($ _ 'utf8->string (bv)) + (logior (compute-effects bv) + (cause &type-check) + (cause &allocation) + &bytevector)) + (($ _ (or 'bytevector-u8-ref 'bytevector-s8-ref 'bytevector-u16-ref 'bytevector-u16-native-ref diff --git a/module/language/tree-il/peval.scm b/module/language/tree-il/peval.scm index baf4f5847..05a2d7f05 100644 --- a/module/language/tree-il/peval.scm +++ b/module/language/tree-il/peval.scm @@ -581,9 +581,10 @@ top-level bindings from ENV and return the resulting expression." ;; todo: further optimize commutative primitives (catch #t (lambda () + (define mod (resolve-interface (primitive-module name))) (call-with-values (lambda () - (apply (module-ref the-scm-module name) args)) + (apply (module-ref mod name) args)) (lambda results (values #t results)))) (lambda _ diff --git a/module/language/tree-il/primitives.scm b/module/language/tree-il/primitives.scm index 3921f81d2..22a89063d 100644 --- a/module/language/tree-il/primitives.scm +++ b/module/language/tree-il/primitives.scm @@ -33,7 +33,8 @@ constructor-primitive? singly-valued-primitive? equality-primitive? bailout-primitive? - negate-primitive)) + negate-primitive + primitive-module)) ;; When adding to this, be sure to update *multiply-valued-primitives* ;; if appropriate. @@ -100,6 +101,8 @@ string-length string-ref string-set! + string->utf8 string-utf8-length utf8->string + make-struct/simple struct-vtable struct-ref struct-set! bytevector-length @@ -160,6 +163,7 @@ memq memv struct-ref string-ref + string->utf8 string-utf8-length utf8->string bytevector-u8-ref bytevector-s8-ref bytevector-u16-ref bytevector-u16-native-ref bytevector-s16-ref bytevector-s16-native-ref @@ -325,6 +329,53 @@ +(define (primitive-module name) + (case name + ((bytevector? + bytevector-length + + bytevector-u8-ref bytevector-u8-set! + bytevector-s8-ref bytevector-s8-set! + + bytevector-u16-ref bytevector-u16-set! + bytevector-u16-native-ref bytevector-u16-native-set! + bytevector-s16-ref bytevector-s16-set! + bytevector-s16-native-ref bytevector-s16-native-set! + + bytevector-u32-ref bytevector-u32-set! + bytevector-u32-native-ref bytevector-u32-native-set! + bytevector-s32-ref bytevector-s32-set! + bytevector-s32-native-ref bytevector-s32-native-set! + + bytevector-u64-ref bytevector-u64-set! + bytevector-u64-native-ref bytevector-u64-native-set! + bytevector-s64-ref bytevector-s64-set! + bytevector-s64-native-ref bytevector-s64-native-set! + + bytevector-ieee-single-ref bytevector-ieee-single-set! + bytevector-ieee-single-native-ref bytevector-ieee-single-native-set! + bytevector-ieee-double-ref bytevector-ieee-double-set! + bytevector-ieee-double-native-ref bytevector-ieee-double-native-set! + + string->utf8 utf8->string) + '(rnrs bytevectors)) + ((atomic-box? + make-atomic-box atomic-box-ref atomic-box-set! + atomic-box-swap! atomic-box-compare-and-swap!) + '(ice-9 atomic)) + ((current-thread) '(ice-9 threads)) + ((class-of) '(oop goops)) + ((u8vector-ref + u8vector-set! s8vector-ref s8vector-set! + u16vector-ref u16vector-set! s16vector-ref s16vector-set! + u32vector-ref u32vector-set! s32vector-ref s32vector-set! + u64vector-ref u64vector-set! s64vector-ref s64vector-set! + f32vector-ref f32vector-set! f64vector-ref f64vector-set!) + '(srfi srfi-4)) + (else '(guile)))) + + + (define *primitive-expand-table* (make-hash-table)) (define (expand-primcall x) diff --git a/module/system/vm/assembler.scm b/module/system/vm/assembler.scm index aa1d324a2..0ffc0c6e3 100644 --- a/module/system/vm/assembler.scm +++ b/module/system/vm/assembler.scm @@ -259,6 +259,9 @@ emit-define! emit-current-module emit-symbol->string + emit-string-utf8-length + emit-string->utf8 + emit-utf8->string ;; Intrinsics for use by the baseline compiler. emit-$car @@ -1574,6 +1577,9 @@ returned instead." (define-scm<-scm-scm-intrinsic define!) (define-scm<-thread-intrinsic current-module) (define-scm<-scm-intrinsic symbol->string) +(define-scm<-scm-intrinsic string->utf8) +(define-scm<-scm-intrinsic utf8->string) +(define-u64<-scm-intrinsic string-utf8-length) (define-scm<-scm-intrinsic $car) (define-scm<-scm-intrinsic $cdr)