1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00

Allow string->utf8 to constant-fold

* module/language/tree-il/primitives.scm (*interesting-primitive-names*):
(*primitive-accessors*): Add string->utf8, utf8->string, and
string-utf8-length.
(primitive-module): New public function, moved here from (language
tree-il compile-bytecode).

* module/language/tree-il/compile-bytecode.scm: Use primitive-module
from (language tree-il primitives).

* module/language/tree-il/peval.scm (peval): A bugfix: load primitives
from their proper module.  Allows bytevector primitives to fold.

* module/language/cps/guile-vm/reify-primitives.scm:
* module/language/cps/effects-analysis.scm:
* module/language/cps/types.scm
* module/language/tree-il/primitives.scm:
* module/language/tree-il/cps-primitives.scm:
* module/language/tree-il/effects.scm (make-effects-analyzer):
Add string->utf8, utf8->string, and string-utf8-length.

* module/language/tree-il/compile-cps.scm (string->utf8)
(string-utf8-length, utf8->string): New custom lowerers, including type
checks and an unboxed result for string-utf8-length.

* module/system/vm/assembler.scm:
* libguile/intrinsics.h:
* libguile/intrinsics.c: Because string-utf8-length returns an unboxed
value, we need an intrinsic for it; go ahead and add an intrinsic for
string->utf8 and utf8->string too, as we will likely be able to use
these in the future.
This commit is contained in:
Andy Wingo 2023-11-15 10:18:37 +01:00
parent 437e5ac43d
commit d08cc4f6e2
12 changed files with 186 additions and 111 deletions

View file

@ -26,6 +26,7 @@
#include "alist.h"
#include "atomics-internal.h"
#include "boolean.h"
#include "bytevectors.h"
#include "cache-internal.h"
#include "extensions.h"
#include "fluids.h"
@ -36,6 +37,7 @@
#include "keywords.h"
#include "modules.h"
#include "numbers.h"
#include "strings.h"
#include "struct.h"
#include "symbols.h"
#include "threads.h"
@ -560,6 +562,26 @@ struct_set_x_immediate (SCM x, uint8_t idx, SCM z)
scm_struct_set_x (x, scm_from_uint8 (idx), z);
}
static uint64_t
string_utf8_length (SCM str)
{
return scm_c_string_utf8_length (str);
}
#if INDIRECT_INT64_INTRINSICS
static void
indirect_string_utf8_length (uint64_t *dst, SCM str)
{
*dst = string_utf8_length (str);
}
#endif
#if INDIRECT_INT64_INTRINSICS
#define INT64_INTRINSIC(name) indirect_##name
#else
#define INT64_INTRINSIC(name) name
#endif
void
scm_bootstrap_intrinsics (void)
{
@ -581,19 +603,11 @@ scm_bootstrap_intrinsics (void)
scm_vm_intrinsics.symbol_to_keyword = scm_symbol_to_keyword;
scm_vm_intrinsics.class_of = scm_class_of;
scm_vm_intrinsics.scm_to_f64 = scm_to_double;
#if INDIRECT_INT64_INTRINSICS
scm_vm_intrinsics.scm_to_u64 = indirect_scm_to_uint64;
scm_vm_intrinsics.scm_to_u64_truncate = indirect_scm_to_uint64_truncate;
scm_vm_intrinsics.scm_to_s64 = indirect_scm_to_int64;
scm_vm_intrinsics.u64_to_scm = indirect_scm_from_uint64;
scm_vm_intrinsics.s64_to_scm = indirect_scm_from_int64;
#else
scm_vm_intrinsics.scm_to_u64 = scm_to_uint64;
scm_vm_intrinsics.scm_to_u64_truncate = scm_to_uint64_truncate;
scm_vm_intrinsics.scm_to_s64 = scm_to_int64;
scm_vm_intrinsics.u64_to_scm = scm_from_uint64;
scm_vm_intrinsics.s64_to_scm = scm_from_int64;
#endif
scm_vm_intrinsics.scm_to_u64 = INT64_INTRINSIC (scm_to_uint64);
scm_vm_intrinsics.scm_to_u64_truncate = INT64_INTRINSIC (scm_to_uint64_truncate);
scm_vm_intrinsics.scm_to_s64 = INT64_INTRINSIC (scm_to_int64);
scm_vm_intrinsics.u64_to_scm = INT64_INTRINSIC (scm_from_uint64);
scm_vm_intrinsics.s64_to_scm = INT64_INTRINSIC (scm_from_int64);
scm_vm_intrinsics.logsub = logsub;
scm_vm_intrinsics.wind = wind;
scm_vm_intrinsics.unwind = unwind;
@ -603,13 +617,8 @@ scm_bootstrap_intrinsics (void)
scm_vm_intrinsics.fluid_set_x = fluid_set_x;
scm_vm_intrinsics.push_dynamic_state = push_dynamic_state;
scm_vm_intrinsics.pop_dynamic_state = pop_dynamic_state;
#if INDIRECT_INT64_INTRINSICS
scm_vm_intrinsics.lsh = indirect_lsh;
scm_vm_intrinsics.rsh = indirect_rsh;
#else
scm_vm_intrinsics.lsh = lsh;
scm_vm_intrinsics.rsh = rsh;
#endif
scm_vm_intrinsics.lsh = INT64_INTRINSIC (lsh);
scm_vm_intrinsics.rsh = INT64_INTRINSIC (rsh);
scm_vm_intrinsics.lsh_immediate = lsh_immediate;
scm_vm_intrinsics.rsh_immediate = rsh_immediate;
scm_vm_intrinsics.heap_numbers_equal_p = scm_i_heap_numbers_equal_p;
@ -659,6 +668,9 @@ scm_bootstrap_intrinsics (void)
scm_vm_intrinsics.allocate_pointerless_words_with_freelist =
allocate_pointerless_words_with_freelist;
scm_vm_intrinsics.inexact = scm_exact_to_inexact;
scm_vm_intrinsics.string_to_utf8 = scm_string_to_utf8;
scm_vm_intrinsics.string_utf8_length = INT64_INTRINSIC (string_utf8_length);
scm_vm_intrinsics.utf8_to_string = scm_utf8_to_string;
/* Intrinsics for the baseline compiler. */
scm_vm_intrinsics.car = scm_car;

View file

@ -218,6 +218,9 @@ typedef void (*scm_t_scm_uimm_scm_intrinsic) (SCM, uint8_t, SCM);
M(scm_from_scmn_scmn, lookup_bound_public, "lookup-bound-public", LOOKUP_BOUND_PUBLIC) \
M(scm_from_scmn_scmn, lookup_bound_private, "lookup-bound-private", LOOKUP_BOUND_PRIVATE) \
M(scm_from_scm, symbol_to_string, "symbol->string", SYMBOL_TO_STRING) \
M(scm_from_scm, string_to_utf8, "string->utf8", STRING_TO_UTF8) \
M(u64_from_scm, string_utf8_length, "string-utf8-length", STRING_UTF8_LENGTH) \
M(scm_from_scm, utf8_to_string, "utf8->string", UTF8_TO_STRING) \
/* Add new intrinsics here; also update scm_bootstrap_intrinsics. */
/* Intrinsics prefixed with $ are meant to reduce bytecode size,

View file

@ -411,7 +411,10 @@ the LABELS that are clobbered by the effects of LABEL."
((symbol->string x)) ;; CPS lowering includes symbol? type check.
((symbol->keyword)) ;; Same.
((keyword->symbol)) ;; Same, for keyword?.
((string->symbol) (&read-object &string) &type-check))
((string->symbol) (&read-object &string) &type-check)
((string->utf8) (&read-object &string))
((utf8->string) (&read-object &bytevector) &type-check)
((string-utf8-length) (&read-object &string)))
;; Threads. Calls cause &all-effects, which reflects the fact that any
;; call can capture a partial continuation and reinstate it on another

View file

@ -26,6 +26,8 @@
(define-module (language cps guile-vm reify-primitives)
#:use-module (ice-9 match)
#:use-module ((language tree-il primitives)
#:select ((primitive-module . tree-il:primitive-module)))
#:use-module (language cps)
#:use-module (language cps utils)
#:use-module (language cps with-cps)
@ -36,47 +38,7 @@
#:export (reify-primitives))
(define (primitive-module name)
(case name
((bytevector?
bytevector-length
bytevector-u8-ref bytevector-u8-set!
bytevector-s8-ref bytevector-s8-set!
bytevector-u16-ref bytevector-u16-set!
bytevector-u16-native-ref bytevector-u16-native-set!
bytevector-s16-ref bytevector-s16-set!
bytevector-s16-native-ref bytevector-s16-native-set!
bytevector-u32-ref bytevector-u32-set!
bytevector-u32-native-ref bytevector-u32-native-set!
bytevector-s32-ref bytevector-s32-set!
bytevector-s32-native-ref bytevector-s32-native-set!
bytevector-u64-ref bytevector-u64-set!
bytevector-u64-native-ref bytevector-u64-native-set!
bytevector-s64-ref bytevector-s64-set!
bytevector-s64-native-ref bytevector-s64-native-set!
bytevector-ieee-single-ref bytevector-ieee-single-set!
bytevector-ieee-single-native-ref bytevector-ieee-single-native-set!
bytevector-ieee-double-ref bytevector-ieee-double-set!
bytevector-ieee-double-native-ref bytevector-ieee-double-native-set!)
'(rnrs bytevectors))
((atomic-box?
make-atomic-box atomic-box-ref atomic-box-set!
atomic-box-swap! atomic-box-compare-and-swap!)
'(ice-9 atomic))
((current-thread) '(ice-9 threads))
((class-of) '(oop goops))
((u8vector-ref
u8vector-set! s8vector-ref s8vector-set!
u16vector-ref u16vector-set! s16vector-ref s16vector-set!
u32vector-ref u32vector-set! s32vector-ref s32vector-set!
u64vector-ref u64vector-set! s64vector-ref s64vector-set!
f32vector-ref f32vector-set! f64vector-ref f64vector-set!)
'(srfi srfi-4))
(else '(guile))))
(tree-il:primitive-module name))
(define (primitive-ref cps name k src)
(with-cps cps
@ -338,6 +300,7 @@
string->symbol
symbol->keyword
symbol->string
string-utf8-length string->utf8 utf8->string
class-of
scm->f64
s64->u64 s64->scm scm->s64

View file

@ -937,13 +937,15 @@ minimum, and maximum."
((symbol->keyword &symbol) &keyword)
((keyword->symbol &keyword) &symbol)
((symbol->string &symbol) &string)
((string->symbol &string) &symbol))
((string->symbol &string) &symbol)
((string-utf8-length &string) &u64)
((utf8->string &bytevector) &string))
;;;
;;; Threads. We don't currently track threads as an object type.
;;; We don't currently track threads as an object type.
;;;
(define-simple-types

View file

@ -29,6 +29,7 @@
#:use-module (ice-9 match)
#:use-module (language bytecode)
#:use-module (language tree-il)
#:use-module ((language tree-il primitives) #:select (primitive-module))
#:use-module ((srfi srfi-1) #:select (filter-map
fold
lset-adjoin lset-union lset-difference))
@ -348,49 +349,6 @@
(visit-immediate-tags define-immediate-type-predicate)
(visit-heap-tags define-heap-type-predicate)
(define (primitive-module name)
(case name
((bytevector?
bytevector-length
bytevector-u8-ref bytevector-u8-set!
bytevector-s8-ref bytevector-s8-set!
bytevector-u16-ref bytevector-u16-set!
bytevector-u16-native-ref bytevector-u16-native-set!
bytevector-s16-ref bytevector-s16-set!
bytevector-s16-native-ref bytevector-s16-native-set!
bytevector-u32-ref bytevector-u32-set!
bytevector-u32-native-ref bytevector-u32-native-set!
bytevector-s32-ref bytevector-s32-set!
bytevector-s32-native-ref bytevector-s32-native-set!
bytevector-u64-ref bytevector-u64-set!
bytevector-u64-native-ref bytevector-u64-native-set!
bytevector-s64-ref bytevector-s64-set!
bytevector-s64-native-ref bytevector-s64-native-set!
bytevector-ieee-single-ref bytevector-ieee-single-set!
bytevector-ieee-single-native-ref bytevector-ieee-single-native-set!
bytevector-ieee-double-ref bytevector-ieee-double-set!
bytevector-ieee-double-native-ref bytevector-ieee-double-native-set!)
'(rnrs bytevectors))
((atomic-box?
make-atomic-box atomic-box-ref atomic-box-set!
atomic-box-swap! atomic-box-compare-and-swap!)
'(ice-9 atomic))
((current-thread) '(ice-9 threads))
((class-of) '(oop goops))
((u8vector-ref
u8vector-set! s8vector-ref s8vector-set!
u16vector-ref u16vector-set! s16vector-ref s16vector-set!
u32vector-ref u32vector-set! s32vector-ref s32vector-set!
u64vector-ref u64vector-set! s64vector-ref s64vector-set!
f32vector-ref f32vector-set! f64vector-ref f64vector-set!)
'(srfi srfi-4))
(else '(guile))))
(define (canonicalize exp)
(define (reify-primref src name)
;; some are builtin-ref

View file

@ -408,6 +408,64 @@
(build-term
($branch knot-keyword kheap-object src 'heap-object? #f (kw))))))
(define-primcall-converter string->utf8
(lambda (cps k src op param str)
(define not-string
#(wrong-type-arg
"string->utf8"
"Wrong type argument in position 1 (expecting string): ~S"))
(with-cps cps
(letk knot-string
($kargs () () ($throw src 'throw/value+data not-string (str))))
(letk kstr
($kargs () ()
($continue k src ($primcall 'string->utf8 #f (str)))))
(letk kheap-object
($kargs () ()
($branch knot-string kstr src 'string? #f (str))))
(build-term
($branch knot-string kheap-object src 'heap-object? #f (str))))))
(define-primcall-converter string-utf8-length
(lambda (cps k src op param str)
(define not-string
#(wrong-type-arg
"string-utf8-length"
"Wrong type argument in position 1 (expecting string): ~S"))
(with-cps cps
(letv len)
(letk knot-string
($kargs () () ($throw src 'throw/value+data not-string (str))))
(letk ktag
($kargs ('len) (len)
($continue k src ($primcall 'u64->scm #f (len)))))
(letk kstr
($kargs () ()
($continue ktag src ($primcall 'string-utf8-length #f (str)))))
(letk kheap-object
($kargs () ()
($branch knot-string kstr src 'string? #f (str))))
(build-term
($branch knot-string kheap-object src 'heap-object? #f (str))))))
(define-primcall-converter utf8->string
(lambda (cps k src op param bv)
(define not-bv
#(wrong-type-arg
"utf8->string"
"Wrong type argument in position 1 (expecting bytevector): ~S"))
(with-cps cps
(letk knot-bv
($kargs () () ($throw src 'throw/value+data not-bv (bv))))
(letk kbv
($kargs () ()
($continue k src ($primcall 'utf8->string #f (bv)))))
(letk kheap-object
($kargs () ()
($branch knot-bv kbv src 'bytevector? #f (bv))))
(build-term
($branch knot-bv kheap-object src 'heap-object? #f (bv))))))
(define (ensure-pair cps src op pred x is-pair)
(define msg
(match pred

View file

@ -131,6 +131,9 @@
(define-cps-primitive class-of 1 1)
(define-cps-primitive string-utf8-length 1 1)
(define-cps-primitive utf8->string 1 1)
(define-cps-primitive string->utf8 1 1)
(define-cps-primitive (bytevector-length bv-length) 1 1)
(define-cps-primitive (bytevector-u8-ref bv-u8-ref) 2 1)
(define-cps-primitive (bytevector-u16-native-ref bv-u16-ref) 2 1)

View file

@ -1,6 +1,6 @@
;;; Effects analysis on Tree-IL
;; Copyright (C) 2011, 2012, 2013, 2021 Free Software Foundation, Inc.
;; Copyright (C) 2011, 2012, 2013, 2021, 2023 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@ -465,6 +465,21 @@ of an expression."
(cause &type-check)
(cause &string)))
(($ <primcall> _ 'string->utf8 (s))
(logior (compute-effects s)
(cause &type-check)
(cause &allocation)
&string))
(($ <primcall> _ 'string-utf8-length (s))
(logior (compute-effects s)
(cause &type-check)
&string))
(($ <primcall> _ 'utf8->string (bv))
(logior (compute-effects bv)
(cause &type-check)
(cause &allocation)
&bytevector))
(($ <primcall> _
(or 'bytevector-u8-ref 'bytevector-s8-ref
'bytevector-u16-ref 'bytevector-u16-native-ref

View file

@ -581,9 +581,10 @@ top-level bindings from ENV and return the resulting expression."
;; todo: further optimize commutative primitives
(catch #t
(lambda ()
(define mod (resolve-interface (primitive-module name)))
(call-with-values
(lambda ()
(apply (module-ref the-scm-module name) args))
(apply (module-ref mod name) args))
(lambda results
(values #t results))))
(lambda _

View file

@ -33,7 +33,8 @@
constructor-primitive?
singly-valued-primitive? equality-primitive?
bailout-primitive?
negate-primitive))
negate-primitive
primitive-module))
;; When adding to this, be sure to update *multiply-valued-primitives*
;; if appropriate.
@ -100,6 +101,8 @@
string-length string-ref string-set!
string->utf8 string-utf8-length utf8->string
make-struct/simple struct-vtable struct-ref struct-set!
bytevector-length
@ -160,6 +163,7 @@
memq memv
struct-ref
string-ref
string->utf8 string-utf8-length utf8->string
bytevector-u8-ref bytevector-s8-ref
bytevector-u16-ref bytevector-u16-native-ref
bytevector-s16-ref bytevector-s16-native-ref
@ -325,6 +329,53 @@
(define (primitive-module name)
(case name
((bytevector?
bytevector-length
bytevector-u8-ref bytevector-u8-set!
bytevector-s8-ref bytevector-s8-set!
bytevector-u16-ref bytevector-u16-set!
bytevector-u16-native-ref bytevector-u16-native-set!
bytevector-s16-ref bytevector-s16-set!
bytevector-s16-native-ref bytevector-s16-native-set!
bytevector-u32-ref bytevector-u32-set!
bytevector-u32-native-ref bytevector-u32-native-set!
bytevector-s32-ref bytevector-s32-set!
bytevector-s32-native-ref bytevector-s32-native-set!
bytevector-u64-ref bytevector-u64-set!
bytevector-u64-native-ref bytevector-u64-native-set!
bytevector-s64-ref bytevector-s64-set!
bytevector-s64-native-ref bytevector-s64-native-set!
bytevector-ieee-single-ref bytevector-ieee-single-set!
bytevector-ieee-single-native-ref bytevector-ieee-single-native-set!
bytevector-ieee-double-ref bytevector-ieee-double-set!
bytevector-ieee-double-native-ref bytevector-ieee-double-native-set!
string->utf8 utf8->string)
'(rnrs bytevectors))
((atomic-box?
make-atomic-box atomic-box-ref atomic-box-set!
atomic-box-swap! atomic-box-compare-and-swap!)
'(ice-9 atomic))
((current-thread) '(ice-9 threads))
((class-of) '(oop goops))
((u8vector-ref
u8vector-set! s8vector-ref s8vector-set!
u16vector-ref u16vector-set! s16vector-ref s16vector-set!
u32vector-ref u32vector-set! s32vector-ref s32vector-set!
u64vector-ref u64vector-set! s64vector-ref s64vector-set!
f32vector-ref f32vector-set! f64vector-ref f64vector-set!)
'(srfi srfi-4))
(else '(guile))))
(define *primitive-expand-table* (make-hash-table))
(define (expand-primcall x)

View file

@ -259,6 +259,9 @@
emit-define!
emit-current-module
emit-symbol->string
emit-string-utf8-length
emit-string->utf8
emit-utf8->string
;; Intrinsics for use by the baseline compiler.
emit-$car
@ -1574,6 +1577,9 @@ returned instead."
(define-scm<-scm-scm-intrinsic define!)
(define-scm<-thread-intrinsic current-module)
(define-scm<-scm-intrinsic symbol->string)
(define-scm<-scm-intrinsic string->utf8)
(define-scm<-scm-intrinsic utf8->string)
(define-u64<-scm-intrinsic string-utf8-length)
(define-scm<-scm-intrinsic $car)
(define-scm<-scm-intrinsic $cdr)