diff --git a/libguile/guile-snarf.c b/libguile/guile-snarf.c index 5feafd7c6..e69de29bb 100644 --- a/libguile/guile-snarf.c +++ b/libguile/guile-snarf.c @@ -1,986 +0,0 @@ -/* guile-snarf.c --- extract declarations from Guile source code - Jim Blandy --- September 1999 - - Copyright (C) 1999 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this software; see the file COPYING. If not, write to - the Free Software Foundation, Inc., 59 Temple Place, Suite 330, - Boston, MA 02111-1307 USA - - As a special exception, the Free Software Foundation gives permission - for additional uses of the text contained in its release of GUILE. - - The exception is that, if you link the GUILE library with other files - to produce an executable, this does not by itself cause the - resulting executable to be covered by the GNU General Public License. - Your use of that executable is in no way restricted on account of - linking the GUILE library code into it. - - This exception does not however invalidate any other reasons why - the executable file might be covered by the GNU General Public License. - - This exception applies only to the code released by the - Free Software Foundation under the name GUILE. If you copy - code from other Free Software Foundation releases into a copy of - GUILE, as the General Public License permits, the exception does - not apply to the code that you add in this way. To avoid misleading - anyone as to the status of such modified files, you must delete - this exception notice from them. - - If you write modifications of your own for GUILE, it is your choice - whether to permit this exception to apply to your modifications. - If you do not wish that, delete this exception notice. */ - - -/* The problem: - - It's really nice to be able to create Scheme procedures by simply - adding a declaration above the C function's definition. And you - can use similar facilities for variables, symbols, etc. - - Using the C preprocessor to extract declarations from the C code is - a clever idea, but it's not very robust. Finding the C - preprocessor, and getting it to run without errors, is too prone to - interference from outside influences. We end up having to pass all - the -I flags and compiler flags to guile-snarf, whose job doesn't - have anything to do with them. - - Thus, we redefine snarfing as a process which operates on a single - text file, and ignores #inclusion. - - The general strategy: - - Scan the source file in just enough detail to correctly recognize C - identifiers or reserved words. This means watching for comments, - string and character literals, etc. - - When we see an identifier from our selected list, parse an argument - list after it according to the C preprocessor's rules, and then - pass it to an appropriate function to handle. - - Still to do: - - - Test by comparing output with old shell script. - - Add a test for this to Makefile.am. - - Change build process to actually use this. - - Make sure that errors cause .x file to be deleted. `make' will do - this if the process exits with a non-zero status, won't it? If not, - delete it ourselves. - - Could we generate both the declarations and the initializations? - - Can we simplify or improve the argument lists for some of the - snarfing keywords, now that we can parse them any way we like? - - When a file declares a *lot* of something, would it save space to - emit a table and a loop, instead of a zillion function calls? */ - - -#include -#include -#include -#include -#include - -#include "versiondat.h" - - -/* Utility functions. */ - -char *program_name; - -static void * -check_ptr (void *p) -{ - if (! p) - { - fprintf (stderr, "%s: out of memory\n", program_name); - exit (2); - } - return p; -} - -static void * -xmalloc (size_t size) -{ - return check_ptr (malloc (size)); -} - -static void * -xrealloc (void *ptr, size_t size) -{ - return check_ptr (realloc (ptr, size)); -} - -static void -system_error (char *message, char *filename) -{ - fprintf (stderr, "%s: ", program_name); - if (filename) fprintf (stderr, "%s: ", filename); - perror (message); - - exit (1); -} - - -/* Character table. */ - -struct chartable { - - enum action { - - /* Just skip this character. */ - act_normal, - - /* This character is whitespace. */ - act_space, - - /* This character begins and ends a string/char literal. */ - act_literal, - - /* This character is '/'. If followed by a '*', begin a block - comment. If followed by another '/', begin a line comment. */ - act_comment_start, - - /* This character increments the nesting level, and must be - matched by the character in `follow'. */ - act_open, - - /* This character decrements the nesting level, if it matches properly. */ - act_close, - - /* This character could appear anywhere in an identifier. */ - act_id_start, - - /* This character could appear anywhere in an identifier, except - as the first character. */ - act_id_follow, - - /* This character separates arguments in a keyword argument list, - if it occurs at the outermost nesting level. */ - act_arg_separator - - } action; - - int follow; -}; - -struct chartable chartable[256]; -#define CHARTABLE(i) (chartable[(unsigned) (i)]) - - -static void -match (char open, char close) -{ - CHARTABLE (open).action = act_open; - CHARTABLE (open).follow = close; - CHARTABLE (close).action = act_close; -} - -static void -init_chartable () -{ - int i; - char *c; - - for (i = 0; i < 256; i++) - CHARTABLE (i).action = act_normal; - - for (c = " \t\n\f"; *c; c++) - CHARTABLE (*c).action = act_space; - - CHARTABLE ('"' ).action = act_literal; - CHARTABLE ('\'').action = act_literal; - - CHARTABLE ('/').action = act_comment_start; - - match ('(', ')'); - match ('[', ']'); - match ('{', '}'); - - CHARTABLE (',').action = act_arg_separator; - - for (i = 'a'; i <= 'z'; i++) - CHARTABLE (i).action = act_id_start; - for (i = 'A'; i <= 'Z'; i++) - CHARTABLE (i).action = act_id_start; - CHARTABLE ('_').action = act_id_start; - - for (i = '0'; i <= '9'; i++) - CHARTABLE (i).action = act_id_follow; -} - - - -/* Reading text while tracking the current line number. */ - -/* The input file and the line number are global, because it's too - much of a pain to pass them around everywhere as arguments. Sorry. */ - -FILE *in; /* The current input file. */ -char *in_name; /* Its filename. */ -int in_line; /* The current line number in that file. */ - -FILE *out; /* The current output file. */ - -static int -source_getc () -{ - int c = getc (in); - if (c == '\n') - in_line++; - return c; -} - -static void -source_ungetc (int c) -{ - if (c == '\n') - in_line--; - ungetc (c, in); -} - -static void -syntax_error (char *message, ...) -{ - va_list args; - va_start (args, message); - - fprintf (stderr, "%s:%d: ", in_name, in_line); - vfprintf (stderr, message, args); - va_end (args); - exit (1); -} - - -/* Skipping comments. */ - -/* If we've read the first character of a comment start (that is, the - character '/'), then check the second character. If it's '*', - we're in a block comment; read to the end of it. If it's '/', - we're starting a line comment; skip to the end of the line. - Otherwise, put the second character back on the input stream. - - Return non-zero if this was a comment, zero otherwise. */ -static int -maybe_skip_comment () -{ - int d = source_getc (); - - if (d == '*') - { - int prev; - int start_line = in_line; - - /* Make sure the asterisk of the comment start doesn't - get mistaken for the asterisk of the comment end. */ - d = 0; - - do - { - prev = d; - d = source_getc (); - } - while (d != EOF && ! (prev == '*' && d == '/')); - - if (d == EOF) - { - in_line = start_line; - syntax_error ("unterminated comment"); - } - - return 1; - } - else if (d == '/') - { - do - d = source_getc (); - while (d != '\n' && d != EOF); - - return 1; - } - else - { - source_ungetc (d); - return 0; - } -} - - -/* Parsing argument lists for keywords. */ - -static int -arglist_getc (char *keyword) -{ - int c = source_getc (); - if (c == EOF) - syntax_error ("file ends in midst of arguments to keyword `%s'\n", - keyword); - return c; -} - -struct buffer { - int size, len; - char *text; -}; - -static void -init_buffer (struct buffer *b) -{ - b->size = 1; - b->text = (char *) xmalloc (b->size); - b->len = 0; -} - -static void -add_char (struct buffer *b, int c) -{ - if (b->len >= b->size) - { - b->size *= 2; - b->text = (char *) xrealloc (b->text, b->size); - } - - b->text[b->len++] = c; -} - -/* Parse a string or character literal, appending its text to b. */ -static void -read_literal (char *keyword, struct buffer *b, int start) -{ - add_char (b, start); - - for (;;) - { - int c = arglist_getc (keyword); - add_char (b, c); - if (c == '\\') - add_char (b, arglist_getc (keyword)); - else if (c == start) - break; - } -} - - -/* Parse the argument list following a keyword, just as the - C preprocessor would parse the arguments to a macro invocation. - Return the arguments as an array of strings, terminated by a null - pointer. - - If there's an error parsing the arguments, print an error message - using KEYWORD as the keyword name, and exit. - - Caller must free the array and each string; see the free_args - function. */ -static char ** -parse_args (char *keyword) -{ - /* Our read-ahead character. */ - int c = arglist_getc (keyword); - - /* String of closing parens we are expecting, outermost first. */ - struct buffer stack; - - /* Current list of arguments. */ - int arglist_size = 1; - char **arglist = (char **) xmalloc (arglist_size * sizeof (*arglist)); - int arglist_len = 0; - - init_buffer (&stack); - - /* Skip any initial whitespace. */ - while (CHARTABLE (c).action == act_space) - c = arglist_getc (keyword); - - /* Require an opening paren. */ - if (c != '(') - syntax_error ("arguments to snarfing keyword `%s' missing", keyword); - add_char (&stack, ')'); - c = arglist_getc (keyword); - - /* Read arguments, separated by commas outside of any (), {}, or [] - pairs. */ - while (stack.len > 0) - { - /* start a new argument. */ - struct buffer arg; - int arg_incomplete = 1; - - init_buffer (&arg); - - /* Skip whitespace. */ - while (CHARTABLE (c).action == act_space) - c = arglist_getc (keyword); - - do - { - switch (CHARTABLE (c).action) - { - case act_normal: - case act_space: - case act_id_start: - case act_id_follow: - add_char (&arg, c); - break; - - case act_literal: - read_literal (keyword, &arg, c); - break; - - case act_comment_start: - if (maybe_skip_comment ()) - add_char (&arg, ' '); - else - add_char (&arg, c); - break; - - case act_open: - add_char (&stack, CHARTABLE (c).follow); - add_char (&arg, c); - break; - - case act_close: - if (stack.len == 0) - /* We should have finished argument list parsing - when the stack became empty. */ - abort (); - if (c != stack.text[stack.len - 1]) - syntax_error ("mismatched parenthesis: '%c' and '%c'", - stack.text[stack.len - 1], c); - stack.len--; - /* Closing parens are part of the argument, except for the - outermost closing paren. */ - if (stack.len > 0) - add_char (&arg, c); - else - arg_incomplete = 0; - break; - - case act_arg_separator: - /* Commas are part of the argument, unless they occur at - the top level within the argument list. */ - if (stack.len == 1) - arg_incomplete = 0; - else - add_char (&arg, c); - break; - - default: - abort (); - } - - c = arglist_getc (keyword); - } - while (arg_incomplete); - - /* Add this argument to the list. */ - add_char (&arg, '\0'); - if (arglist_len >= arglist_size) - { - arglist_size *= 2; - arglist = (char **) xrealloc (arglist, - arglist_size * sizeof (*arglist)); - } - arglist[arglist_len++] = arg.text; - } - - /* Null-terminate the argument list. */ - arglist = (char **) xrealloc (arglist, - (arglist_len + 1) * sizeof (*arglist)); - arglist[arglist_len] = 0; - return arglist; -} - -static int -count_args (char **args) -{ - int i; - - for (i = 0; args[i]; i++) - ; - - return i; -} - -static void -free_args (char **args) -{ - int i; - - for (i = 0; args[i]; i++) - free (args[i]); - free (args); -} - - -/* Individual routines for processing keywords. */ - -/* Flags these routines might use to select details of their behavior. */ -enum keyword_flags { - kw_gsubr = 0x01, /* create a gsubr */ - kw_generic = 0x02, /* create a generic function */ - kw_global = 0x04, /* global declaration, not static */ - kw_init = 0x08, /* initialized value */ - kw_long = 0x10, /* initialize it with a long */ - kw_keyword = 0x20 /* make a keyword, not a symbol */ -}; - -static void -check_arg_count (char *keyword, char **args, int expected) -{ - int actual = count_args (args); - if (actual != expected) - syntax_error ("keyword `%s' expects %d args, but got %d", - keyword, expected, actual); -} - -static void -proc_keyword (char *keyword, char **args, int flags) -{ - int expected_args; - - if (flags & kw_gsubr) - expected_args = 6; - else - expected_args = 4; - - if (flags & kw_generic) - expected_args++; - - check_arg_count (keyword, args, expected_args); - - /* Print some nice indentation. */ - fputs (" ", out); - - /* Print out the function name. */ - fprintf (out, "scm_make_%s%s", - (flags & kw_gsubr) ? "gsubr" : "subr", - (flags & kw_generic) ? "_with_generic" : ""); - - /* And the arguments. */ - fputs (" (", out); - if (flags & kw_gsubr) - fprintf (out, "%s, %s, %s, %s, (SCM (*) (...)) %s", - args[0], args[2], args[3], args[4], args[5]); - else - fprintf (out, "%s, %s, (SCM (*) (...)) %s", - args[0], args[2], args[3]); - - /* The generic versions have an extra argument at the end, - which is a pointer to a generic function variable. */ - if (flags & kw_generic) - fprintf (out, ", &%s", (flags & kw_gsubr) ? args[6] : args[4]); - - fputs (");\n", out); -} - -static void -syntax_keyword (char *keyword, char **args, int flags) -{ - check_arg_count (keyword, args, 4); - - fprintf (out, " scm_make_synt (%s, %s, %s);\n", - args[0], args[2], args[3]); -} - -static void -symbol_keyword (char *keyword, char **args, int flags) -{ - check_arg_count (keyword, args, 2); - - fprintf (out, " %s = scm_permanent_object (", args[0]); - if (flags & kw_keyword) - fprintf (out, "scm_c_make_keyword (%s)", args[1]); - else - fprintf (out, "SCM_CAR (scm_intern0 (%s))", args[1]); - fputs (");\n", out); -} - -static void -vcell_keyword (char *keyword, char **args, int flags) -{ - check_arg_count (keyword, args, (flags & kw_init) ? 3 : 2); - - fprintf (out, " %s = scm_permanent_object (scm_intern0 (%s));\n", - args[0], args[1]); - fprintf (out, " SCM_SETCDR (%s, ", args[0]); - - if (flags & kw_long) - fprintf (out, "scm_long2num (%s)", args[2]); - else if (flags & kw_init) - fputs (args[2], out); - else - fputs ("SCM_BOOL_F", out); - - fputs (");\n", out); -} - - -/* The keyword table. */ - -struct keyword { - char *name; - void (*func) (char *keyword, char **args, int flags); - int flags; -}; - -/* The maximum length of any keyword, in bytes. */ -#define MAX_KEYWORD_LEN (20) - -struct keyword keywords[] = -{ - { "SCM_PROC", proc_keyword, kw_gsubr }, - { "SCM_GPROC", proc_keyword, kw_gsubr | kw_generic }, - { "SCM_PROC1", proc_keyword, 0 }, - { "SCM_GPROC1", proc_keyword, kw_generic }, - - { "SCM_SYNTAX", syntax_keyword, 0 }, - - { "SCM_SYMBOL", symbol_keyword, 0 }, - { "SCM_GLOBAL_SYMBOL", symbol_keyword, kw_global }, - { "SCM_KEYWORD", symbol_keyword, kw_keyword }, - { "SCM_GLOBAL_KEYWORD", symbol_keyword, kw_keyword | kw_global }, - - { "SCM_VCELL", vcell_keyword, 0 }, - { "SCM_GLOBAL_VCELL", vcell_keyword, kw_global }, - { "SCM_VCELL_INIT", vcell_keyword, kw_init }, - { "SCM_GLOBAL_VCELL_INIT", vcell_keyword, kw_init | kw_global }, - { "SCM_CONST_LONG", vcell_keyword, kw_init | kw_long }, - - { 0, 0, 0 } - -}; - -/* The bigger this is, the less likely a random identifier is - to clash with the user's program. */ -#define KEYWORD_HASH_SIZE (1009) -struct keyword *keyword_hash[KEYWORD_HASH_SIZE]; - -static unsigned long -hash (char *text) -{ - long h = 0; - - while (*text) - { - unsigned char c = (unsigned char) *text++; - h = (h << 4) + c + (c << 9) + (h >> 24) + 32; - } - - return h; -} - -static struct keyword * -is_keyword (char *name) -{ - unsigned long h = hash (name) % KEYWORD_HASH_SIZE; - if (keyword_hash[h]) - { - if (strcmp (name, keyword_hash[h]->name)) - { - /* Just for testing. */ - fprintf (stderr, "%s: keyword/user id hash collision: %s and %s\n", - program_name, keyword_hash[h]->name, name); - return 0; - } - - return keyword_hash[h]; - } - - return 0; -} - -static void -process_keyword (struct keyword *k) -{ - char **args = parse_args (k->name); - k->func (k->name, args, k->flags); - free_args (args); -} - -static void -init_keyword_hash_table () -{ - int i; - - for (i = 0; keywords[i].func; i++) - { - unsigned long h = hash (keywords[i].name) % KEYWORD_HASH_SIZE; - if (keyword_hash[h]) - { - fprintf (stderr, "%s: keyword hash collision: %s and %s\n", - program_name, keyword_hash[h]->name, keywords[i].name); - exit (2); - } - keyword_hash[h] = &keywords[i]; - } -} - - -/* Scanning a file of C code. */ - -/* Skip a string or character literal that started with the character - FOLLOW. */ -static void -skip_literal (int follow) -{ - int start_line = in_line; - - for (;;) - { - int c = source_getc (); - - if (c == EOF) - { - in_line = start_line; - syntax_error ("unterminated character or string literal"); - } - else if (c == '\\') - source_getc (); - else if (c == follow) - break; - } -} - -static void -read_id (char *buf, size_t size) -{ - int i = 0; - - for (;;) - { - int c = source_getc (); - int action; - - if (c == EOF) - break; - - action = CHARTABLE (c).action; - if (action == act_id_start || action == act_id_follow) - { - if (i < size) - buf[i] = c; - i++; - } - else - { - source_ungetc (c); - break; - } - } - - /* It doesn't matter that we truncate the keyword to SIZE-1 - characters, since the buffer is large enough to hold any valid - keyword. */ - buf[(i >= size) ? size - 1 : i] = '\0'; -} - -static void -process_stream () -{ - int c; - - for (;;) - { - c = source_getc (); - if (c == EOF) - break; - - switch (CHARTABLE (c).action) - { - case act_normal: - case act_space: - case act_open: - case act_close: - case act_id_follow: - case act_arg_separator: - break; - - case act_literal: - skip_literal (c); - break; - - case act_comment_start: - maybe_skip_comment (); - break; - - case act_id_start: - { - char buf[MAX_KEYWORD_LEN + 1]; - struct keyword *k; - - source_ungetc (c); - read_id (buf, sizeof (buf)); - k = is_keyword (buf); - if (k) - process_keyword (k); - } - break; - - default: - abort (); - } - } -} - -static void -process_file (char *inname, char *outname) -{ - in = fopen (inname, "r"); - if (! in) - system_error ("error opening input file", inname); - in_line = 1; - in_name = inname; - - out = fopen (outname, "w"); - if (! out) - system_error ("error opening output file", outname); - - process_stream (in, out); - - /* I'm told the AIX C preprocessor doesn't like to #include empty files. */ - putc ('\n', out); - - if (ferror (in)) - system_error ("error reading input file", inname); - if (ferror (out)) - system_error ("error writing output file", outname); - - if (fclose (in) == EOF) - system_error ("error closing input file", inname); - if (fclose (out) == EOF) - system_error ("error closing output file", outname); -} - - -/* The main function. */ - -static void -version () -{ - fprintf (stderr, "guile-snarf (Guile) %s\n", GUILE_VERSION); - exit (0); -} - -static void -usage () -{ - fprintf (stderr, "Usage: %s [-o OUTFILE] INFILE\n", program_name); -} - -static void -help () -{ - fprintf (stderr, "guile-snarf -- extract declarations from Guile C code.\n"); - usage (); - fprintf (stderr, "\ -This program makes it easier to write C code which defines Scheme\n\ -functions and variables and uses Scheme symbols in Guile. Instead of\n\ -writing code yourself to build the Scheme objects when your code is\n\ -initialized, you can write brief declarations above your C functions\n\ -indicating how they should be called from Scheme. Then, if you run\n\ -this program on your source file, it will write code for you that\n\ -defines functions, builds Scheme objects, and so on. You can then\n\ -#include this program's output into your initialization function.\n\ -\n\ -`INFILE' is the file from which we should extract declarations and\n\ -generate initialization code.\n\ -\n\ -`-o OUTFILE' specifies the file where we should place the output. If\n\ -omitted, it defaults to `INFILE', with its extension changed to `.x'.\n"); - exit (0); -} - -static void -bad_argument () -{ - usage (); - fprintf (stderr, "Type `%s --help' for more information.\n", program_name); - exit (1); -} - -int -main (int argc, char **argv) -{ - char *infile, *outfile; - - program_name = strrchr (argv[0], '/'); - if (! program_name) - program_name = argv[0]; - else - program_name++; - - infile = 0; - outfile = 0; - argc--, argv++; - while (argc > 0) - { - if (! strcmp (argv[0], "--version")) - version (); - else if (! strcmp (argv[0], "--help")) - help (); - else if (! strcmp (argv[0], "-o")) - outfile = *++argv, argc--; - else if (argv[0][0] == '-') - { - fprintf (stderr, "%s: unrecognized switch: `%s'\n", - program_name, argv[0]); - bad_argument (); - } - else if (infile) - { - fprintf (stderr, "%s: more than input file given: `%s' and `%s'\n", - program_name, infile, argv[0]); - bad_argument (); - } - else - infile = argv[0]; - - argc--, argv++; - } - - if (! infile) - { - fprintf (stderr, "%s: no input file given\n", program_name); - bad_argument (); - } - - if (! outfile) - { - /* Choose a default output file name. This should be the input - file name, with the extension changed to `.x'. */ - char *ext = strrchr (infile, '.'); - - /* Make sure we haven't mistaken a dot in an earlier path - component for the start of the extension. */ - if (ext && strchr (ext, '/')) - ext = 0; - - if (ext) - { - outfile = (char *) xmalloc (ext - infile + 3); - strcpy (outfile, infile); - strcpy (outfile + (ext - infile), ".x"); - } - else - { - outfile = (char *) xmalloc (strlen (infile) + 3); - strcpy (outfile, infile); - strcat (outfile, ".x"); - } - } - - init_chartable (); - init_keyword_hash_table (); - process_file (infile, outfile); - - return 0; -}