1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00
guile/libguile/srfi-14.h
Michael Gran f49dbcadf3 Unicode-capable srfi-14 charsets
* libguile/Makefile.am: distribute new files srfi-14.i.c and
  unidata_to_charset.pl

* chars.c (scm_c_upcase, scm_c_downcase): use unicode-enable toupper
  and tolower

* libguile/srfi-14.h (scm_t_char_range, scm_t_char_set): new structures
  to describe char-sets
  (scm_t_char_set_cursor): new structure to describe char-set-cursors
  (SCM_BITS_PER_LONG): removed
  (SCM_CHARSET_GET): calls function
  New declarations for scm_i_charset_get, scm_i_charset_set,
  scm_i_charset_unset, and scm_debug_char_set.

* test-suite/tests/srfi-14.test: new tests

* libguile/srfi-14.c (SCM_CHARSET_DATA): new macro
  (SCM_CHARSET_SET, SCM_CHARSET_UNSET): call function
  (BYTES_PER_CHARSET, LONGS_PER_CHARSET): removed
  (scm_i_charset_get, scm_i_charset_set, scm_i_charset_unset)
  (charsets_equal, charsets_leq, charsets_union)
  (charsets_intersection, charsets_complement, charsets_xor): new
  functions that are low-level charset operators
  (charset_print, charset_free): modified for new charset struct
  (charset_cursor_print, charset_cursor_free): new function
  (make_char_set, scm_char_set_p, scm_char_set_eq, scm_car_set_leq)
  (scm_char_set_hash, scm_char_set_cursor, scm_char_set_ref)
  (scm_char_set_cursor_next, scm_end_of_char_set_p, scm_char_set_fold)
  (scm_char_set_unfold, scm_char_set_unfold_x, scm_char_set_for_each)
  (scm_char_set_map, scm_char_set_copy, scm_char_set, scm_list_to_char_set)
  (scm_list_to_char_set_x, scm_string_to_char_set, scm_string_to_char_set_x)
  (scm_char_set_filter, scm_char_set_filter_x, scm_ucs_range_to_char_set)
  (scm_ucs_range_to_char_set_x, scm_to_char_set, scm_char_set_size)
  (scm_char_set_count, scm_char_set_to_list, scm_char_set_to_string)
  (scm_char_set_contains_p, scm_char_set_every, scm_char_set_any)
  (scm_char_set_adjoin, scm_char_set_delete, scm_char_set_adjoin_x)
  (scm_char_set_delete_x, scm_char_set_complement, scm_char_set_union)
  (scm_char_set_intersection, scm_char_set_difference, scm_char_set_xor)
  (scm_char_set_diff_plus_intersection, scm_char_set_complement_x)
  (scm_char_set_union_x, scm_char_set_intersection_x, scm_char_set_difference_x)
  (scm_char_set_xor_x, scm_char_set_diff_plus_intersection_x): modified
  to use new charset and charset-cursor data structures
  (CSET_BLANK_PRED, CSET_SYMBOL_PRED, CSET_PUNCT_PRED, CSET_LOWER_PRED)
  (CSET_UPPER_PRED, CSET_LETTER_PRED, CSET_DIGIT_PRED, CSET_WHITESPACE_PRED)
  (CSET_CONTROL_PRED, CSET_HEX_DIGIT_PRED, CSET_ASCII_PRED, CSET_LETTER_PRED)
  (CSET_LETTER_AND_DIGIT_PRED, CSET_PRINTING_PRED, CSET_TRUE_PRED)
  (CSET_FALSE_PRED): removed
  (scm_srfi_14_compute_char_sets): removed - too slow to iterate
  over all of unicode at startup
  (scm_debug_char_set) [SCM_CHARSET_DEBUG]: new function
2009-08-27 07:43:33 -07:00

127 lines
4.9 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#ifndef SCM_SRFI_14_H
#define SCM_SRFI_14_H
/* srfi-14.c --- SRFI-14 procedures for Guile
*
* Copyright (C) 2001, 2004, 2006, 2008 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#include "libguile/__scm.h"
typedef struct
{
scm_t_wchar lo;
scm_t_wchar hi;
} scm_t_char_range;
typedef struct
{
size_t len;
scm_t_char_range *ranges;
} scm_t_char_set;
typedef struct
{
size_t range;
scm_t_wchar n;
} scm_t_char_set_cursor;
#define SCM_CHARSET_GET(cs,idx) \
scm_i_charset_get((scm_t_char_set *)SCM_SMOB_DATA(cs),idx)
#define SCM_CHARSETP(x) (!SCM_IMP (x) && (SCM_TYP16 (x) == scm_tc16_charset))
/* Smob type code for character sets. */
SCM_API int scm_tc16_charset;
SCM_INTERNAL int scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n);
SCM_INTERNAL void scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n);
SCM_INTERNAL void scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n);
SCM_API SCM scm_char_set_p (SCM obj);
SCM_API SCM scm_char_set_eq (SCM char_sets);
SCM_API SCM scm_char_set_leq (SCM char_sets);
SCM_API SCM scm_char_set_hash (SCM cs, SCM bound);
SCM_API SCM scm_char_set_cursor (SCM cs);
SCM_API SCM scm_char_set_ref (SCM cs, SCM cursor);
SCM_API SCM scm_char_set_cursor_next (SCM cs, SCM cursor);
SCM_API SCM scm_end_of_char_set_p (SCM cursor);
SCM_API SCM scm_char_set_fold (SCM kons, SCM knil, SCM cs);
SCM_API SCM scm_char_set_unfold (SCM p, SCM f, SCM g, SCM seed, SCM base_cs);
SCM_API SCM scm_char_set_unfold_x (SCM p, SCM f, SCM g, SCM seed, SCM base_cs);
SCM_API SCM scm_char_set_for_each (SCM proc, SCM cs);
SCM_API SCM scm_char_set_map (SCM proc, SCM cs);
SCM_API SCM scm_char_set_copy (SCM cs);
SCM_API SCM scm_char_set (SCM rest);
SCM_API SCM scm_list_to_char_set (SCM list, SCM base_cs);
SCM_API SCM scm_list_to_char_set_x (SCM list, SCM base_cs);
SCM_API SCM scm_string_to_char_set (SCM str, SCM base_cs);
SCM_API SCM scm_string_to_char_set_x (SCM str, SCM base_cs);
SCM_API SCM scm_char_set_filter (SCM pred, SCM cs, SCM base_cs);
SCM_API SCM scm_char_set_filter_x (SCM pred, SCM cs, SCM base_cs);
SCM_API SCM scm_ucs_range_to_char_set (SCM lower, SCM upper, SCM error, SCM base_cs);
SCM_API SCM scm_ucs_range_to_char_set_x (SCM lower, SCM upper, SCM error, SCM base_cs);
SCM_API SCM scm_to_char_set (SCM x);
SCM_API SCM scm_char_set_size (SCM cs);
SCM_API SCM scm_char_set_count (SCM pred, SCM cs);
SCM_API SCM scm_char_set_to_list (SCM cs);
SCM_API SCM scm_char_set_to_string (SCM cs);
SCM_API SCM scm_char_set_contains_p (SCM cs, SCM ch);
SCM_API SCM scm_char_set_every (SCM pred, SCM cs);
SCM_API SCM scm_char_set_any (SCM pred, SCM cs);
SCM_API SCM scm_char_set_adjoin (SCM cs, SCM rest);
SCM_API SCM scm_char_set_delete (SCM cs, SCM rest);
SCM_API SCM scm_char_set_adjoin_x (SCM cs, SCM rest);
SCM_API SCM scm_char_set_delete_x (SCM cs, SCM rest);
SCM_API SCM scm_char_set_complement (SCM cs);
SCM_API SCM scm_char_set_union (SCM rest);
SCM_API SCM scm_char_set_intersection (SCM rest);
SCM_API SCM scm_char_set_difference (SCM cs1, SCM rest);
SCM_API SCM scm_char_set_xor (SCM rest);
SCM_API SCM scm_char_set_diff_plus_intersection (SCM cs1, SCM rest);
SCM_API SCM scm_char_set_complement_x (SCM cs);
SCM_API SCM scm_char_set_union_x (SCM cs1, SCM rest);
SCM_API SCM scm_char_set_intersection_x (SCM cs1, SCM rest);
SCM_API SCM scm_char_set_difference_x (SCM cs1, SCM rest);
SCM_API SCM scm_char_set_xor_x (SCM cs1, SCM rest);
SCM_API SCM scm_char_set_diff_plus_intersection_x (SCM cs1, SCM cs2, SCM rest);
#if SCM_CHARSET_DEBUG
SCM_API SCM scm_debug_char_set (SCM cs);
#endif
SCM_API SCM scm_char_set_lower_case;
SCM_API SCM scm_char_set_upper_case;
SCM_API SCM scm_char_set_title_case;
SCM_API SCM scm_char_set_letter;
SCM_API SCM scm_char_set_digit;
SCM_API SCM scm_char_set_letter_and_digit;
SCM_API SCM scm_char_set_graphic;
SCM_API SCM scm_char_set_printing;
SCM_API SCM scm_char_set_whitespace;
SCM_API SCM scm_char_set_iso_control;
SCM_API SCM scm_char_set_punctuation;
SCM_API SCM scm_char_set_symbol;
SCM_API SCM scm_char_set_hex_digit;
SCM_API SCM scm_char_set_blank;
SCM_API SCM scm_char_set_ascii;
SCM_API SCM scm_char_set_empty;
SCM_API SCM scm_char_set_full;
SCM_INTERNAL void scm_init_srfi_14 (void);
#endif /* SCM_SRFI_14_H */