diff --git a/libguile/guile-snarf.c b/libguile/guile-snarf.c new file mode 100644 index 000000000..c06254c25 --- /dev/null +++ b/libguile/guile-snarf.c @@ -0,0 +1,981 @@ +/* guile-snarf.c --- extract declarations from Guile source code + Jim Blandy --- September 1999 + + Copyright (C) 1999 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this software; see the file COPYING. If not, write to + the Free Software Foundation, Inc., 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + As a special exception, the Free Software Foundation gives permission + for additional uses of the text contained in its release of GUILE. + + The exception is that, if you link the GUILE library with other files + to produce an executable, this does not by itself cause the + resulting executable to be covered by the GNU General Public License. + Your use of that executable is in no way restricted on account of + linking the GUILE library code into it. + + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. + + This exception applies only to the code released by the + Free Software Foundation under the name GUILE. If you copy + code from other Free Software Foundation releases into a copy of + GUILE, as the General Public License permits, the exception does + not apply to the code that you add in this way. To avoid misleading + anyone as to the status of such modified files, you must delete + this exception notice from them. + + If you write modifications of your own for GUILE, it is your choice + whether to permit this exception to apply to your modifications. + If you do not wish that, delete this exception notice. */ + + +/* The problem: + + It's really nice to be able to create Scheme procedures by simply + adding a declaration above the C function's definition. And you + can use similar facilities for variables, symbols, etc. + + Using the C preprocessor to extract declarations from the C code is + a clever idea, but it's not very robust. Finding the C + preprocessor, and getting it to run without errors, is too prone to + interference from outside influences. We end up having to pass all + the -I flags and compiler flags to guile-snarf, whose job doesn't + have anything to do with them. + + Thus, we redefine snarfing as a process which operates on a single + text file, and ignores #inclusion. + + The general strategy: + + Scan the source file in just enough detail to correctly recognize C + identifiers or reserved words. This means watching for comments, + string and character literals, etc. + + When we see an identifier from our selected list, parse an argument + list after it according to the C preprocessor's rules, and then + pass it to an appropriate function to handle. + + Still to do: + + - Test by comparing output with old shell script. + - Change build process to actually use this. + - Make sure that errors cause .x file to be deleted. `make' will do + this if the process exits with a non-zero status, won't it? If not, + delete it ourselves. + - Can we simplify or improve the argument lists for some of the + snarfing keywords, now that we can parse them any way we like? + - When a file declares a *lot* of something, would it save space to + emit a table and a loop, instead of a zillion function calls? + + +#include +#include +#include +#include +#include + +#include "versiondat.h" + + +/* Utility functions. */ + +char *program_name; + +static void * +check_ptr (void *p) +{ + if (! p) + { + fprintf (stderr, "%s: out of memory\n", program_name); + exit (2); + } + return p; +} + +static void * +xmalloc (size_t size) +{ + return check_ptr (malloc (size)); +} + +static void * +xrealloc (void *ptr, size_t size) +{ + return check_ptr (realloc (ptr, size)); +} + +static void +system_error (char *message, char *filename) +{ + fprintf (stderr, "%s: ", program_name); + if (filename) fprintf (stderr, "%s: ", filename); + perror (message); + + exit (1); +} + + +/* Character table. */ + +struct chartable { + + enum action { + + /* Just skip this character. */ + act_normal, + + /* This character is whitespace. */ + act_space, + + /* This character begins and ends a string/char literal. */ + act_literal, + + /* This character is '/'. If followed by a '*', begin a block + comment. If followed by another '/', begin a line comment. */ + act_comment_start, + + /* This character increments the nesting level, and must be + matched by the character in `follow'. */ + act_open, + + /* This character decrements the nesting level, if it matches properly. */ + act_close, + + /* This character could appear anywhere in an identifier. */ + act_id_start, + + /* This character could appear anywhere in an identifier, except + as the first character. */ + act_id_follow, + + /* This character separates arguments in a keyword argument list, + if it occurs at the outermost nesting level. */ + act_arg_separator + + } action; + + int follow; +}; + +struct chartable chartable[256]; +#define CHARTABLE(i) (chartable[(unsigned) (i)]) + + +static void +match (char open, char close) +{ + CHARTABLE (open).action = act_open; + CHARTABLE (open).follow = close; + CHARTABLE (close).action = act_close; +} + +static void +init_chartable () +{ + int i; + char *c; + + for (i = 0; i < 256; i++) + CHARTABLE (i).action = act_normal; + + for (c = " \t\n\f"; *c; c++) + CHARTABLE (*c).action = act_space; + + CHARTABLE ('"' ).action = act_literal; + CHARTABLE ('\'').action = act_literal; + + CHARTABLE ('/').action = act_comment_start; + + match ('(', ')'); + match ('[', ']'); + match ('{', '}'); + + CHARTABLE (',').action = act_arg_separator; + + for (i = 'a'; i <= 'z'; i++) + CHARTABLE (i).action = act_id_start; + for (i = 'A'; i <= 'Z'; i++) + CHARTABLE (i).action = act_id_start; + CHARTABLE ('_').action = act_id_start; + + for (i = '0'; i <= '9'; i++) + CHARTABLE (i).action = act_id_follow; +} + + + +/* Reading text while tracking the current line number. */ + +/* The input file and the line number are global, because it's too + much of a pain to pass them around everywhere as arguments. Sorry. */ + +FILE *in; /* The current input file. */ +char *in_name; /* Its filename. */ +int in_line; /* The current line number in that file. */ + +FILE *out; /* The current output file. */ + +static int +source_getc () +{ + int c = getc (in); + if (c == '\n') + in_line++; + return c; +} + +static void +source_ungetc (int c) +{ + if (c == '\n') + in_line--; + ungetc (c, in); +} + +static void +syntax_error (char *message, ...) +{ + va_list args; + va_start (args, message); + + fprintf (stderr, "%s:%d: ", in_name, in_line); + vfprintf (stderr, message, args); + va_end (args); + exit (1); +} + + +/* Skipping comments. */ + +/* If we've read the first character of a comment start (that is, the + character '/'), then check the second character. If it's '*', + we're in a block comment; read to the end of it. If it's '/', + we're starting a line comment; skip to the end of the line. + Otherwise, put the second character back on the input stream. + + Return non-zero if this was a comment, zero otherwise. */ +static int +maybe_skip_comment () +{ + int d = source_getc (); + + if (d == '*') + { + int prev; + int start_line = in_line; + + /* Make sure the asterisk of the comment start doesn't + get mistaken for the asterisk of the comment end. */ + d = 0; + + do + { + prev = d; + d = source_getc (); + } + while (d != EOF && ! (prev == '*' && d == '/')); + + if (d == EOF) + { + in_line = start_line; + syntax_error ("unterminated comment"); + } + + return 1; + } + else if (d == '/') + { + do + d = source_getc (); + while (d != '\n' && d != EOF); + + return 1; + } + else + { + source_ungetc (d); + return 0; + } +} + + +/* Parsing argument lists for keywords. */ + +static int +arglist_getc (char *keyword) +{ + int c = source_getc (); + if (c == EOF) + syntax_error ("file ends in midst of arguments to keyword `%s'\n", + keyword); + return c; +} + +struct buffer { + int size, len; + char *text; +}; + +static void +init_buffer (struct buffer *b) +{ + b->size = 1; + b->text = (char *) xmalloc (b->size); + b->len = 0; +} + +static void +add_char (struct buffer *b, int c) +{ + if (b->len >= b->size) + { + b->size *= 2; + b->text = (char *) xrealloc (b->text, b->size); + } + + b->text[b->len++] = c; +} + +/* Parse a string or character literal, appending its text to b. */ +static void +read_literal (char *keyword, struct buffer *b, int start) +{ + add_char (b, start); + + for (;;) + { + int c = arglist_getc (keyword); + add_char (b, c); + if (c == '\\') + add_char (b, arglist_getc (keyword)); + else if (c == start) + break; + } +} + + +/* Parse the argument list following a keyword, just as the + C preprocessor would parse the arguments to a macro invocation. + Return the arguments as an array of strings, terminated by a null + pointer. + + If there's an error parsing the arguments, print an error message + using KEYWORD as the keyword name, and exit. + + Caller must free the array and each string; see the free_args + function. */ +static char ** +parse_args (char *keyword) +{ + /* Our read-ahead character. */ + int c = arglist_getc (keyword); + + /* String of closing parens we are expecting, outermost first. */ + struct buffer stack; + + /* Current list of arguments. */ + int arglist_size = 1; + char **arglist = (char **) xmalloc (arglist_size * sizeof (*arglist)); + int arglist_len = 0; + + init_buffer (&stack); + + /* Skip any initial whitespace. */ + while (CHARTABLE (c).action == act_space) + c = arglist_getc (keyword); + + /* Require an opening paren. */ + if (c != '(') + syntax_error ("arguments to snarfing keyword `%s' missing", keyword); + add_char (&stack, ')'); + c = arglist_getc (keyword); + + /* Read arguments, separated by commas outside of any (), {}, or [] + pairs. */ + while (stack.len > 0) + { + /* start a new argument. */ + struct buffer arg; + int arg_incomplete = 1; + + init_buffer (&arg); + + /* Skip whitespace. */ + while (CHARTABLE (c).action == act_space) + c = arglist_getc (keyword); + + do + { + switch (CHARTABLE (c).action) + { + case act_normal: + case act_space: + case act_id_start: + case act_id_follow: + add_char (&arg, c); + break; + + case act_literal: + read_literal (keyword, &arg, c); + break; + + case act_comment_start: + if (maybe_skip_comment ()) + add_char (&arg, ' '); + else + add_char (&arg, c); + break; + + case act_open: + add_char (&stack, CHARTABLE (c).follow); + add_char (&arg, c); + break; + + case act_close: + if (stack.len == 0) + /* We should have finished argument list parsing + when the stack became empty. */ + abort (); + if (c != stack.text[stack.len - 1]) + syntax_error ("mismatched parenthesis: '%c' and '%c'", + stack.text[stack.len - 1], c); + stack.len--; + /* Closing parens are part of the argument, except for the + outermost closing paren. */ + if (stack.len > 0) + add_char (&arg, c); + else + arg_incomplete = 0; + break; + + case act_arg_separator: + /* Commas are part of the argument, unless they occur at + the top level within the argument list. */ + if (stack.len == 1) + arg_incomplete = 0; + else + add_char (&arg, c); + break; + } + + c = arglist_getc (keyword); + } + while (arg_incomplete); + + /* Add this argument to the list. */ + add_char (&arg, '\0'); + if (arglist_len >= arglist_size) + { + arglist_size *= 2; + arglist = (char **) xrealloc (arglist, + arglist_size * sizeof (*arglist)); + } + arglist[arglist_len++] = arg.text; + } + + /* Null-terminate the argument list. */ + arglist = (char **) xrealloc (arglist, + (arglist_len + 1) * sizeof (*arglist)); + arglist[arglist_len] = 0; + return arglist; +} + +static int +count_args (char **args) +{ + int i; + + for (i = 0; args[i]; i++) + ; + + return i; +} + +static void +free_args (char **args) +{ + int i; + + for (i = 0; args[i]; i++) + free (args[i]); + free (args); +} + + +/* Individual routines for processing keywords. */ + +/* Flags these routines might use to select details of their behavior. */ +enum keyword_flags { + kw_gsubr = 0x01, /* create a gsubr */ + kw_generic = 0x02, /* create a generic function */ + kw_global = 0x04, /* global declaration, not static */ + kw_init = 0x08, /* initialized value */ + kw_long = 0x10, /* initialize it with a long */ + kw_keyword = 0x20 /* make a keyword, not a symbol */ +}; + +static void +check_arg_count (char *keyword, char **args, int expected) +{ + int actual = count_args (args); + if (actual != expected) + syntax_error ("keyword `%s' expects %d args, but got %d", + keyword, expected, actual); +} + +static void +proc_keyword (char *keyword, char **args, int flags) +{ + int expected_args; + + if (flags & kw_gsubr) + expected_args = 6; + else + expected_args = 4; + + if (flags & kw_generic) + expected_args++; + + check_arg_count (keyword, args, expected_args); + + /* Print some nice indentation. */ + fputs (" ", out); + + /* Print out the function name. */ + fprintf (out, "scm_make_%s%s", + (flags & kw_gsubr) ? "gsubr" : "subr", + (flags & kw_generic) ? "_with_generic" : ""); + + /* And the arguments. */ + fputs (" (", out); + if (flags & kw_gsubr) + fprintf (out, "%s, %s, %s, %s, (SCM (*) (...)) %s", + args[0], args[2], args[3], args[4], args[5]); + else + fprintf (out, "%s, %s, (SCM (*) (...)) %s", + args[0], args[2], args[3]); + + /* The generic versions have an extra argument at the end, + which is a pointer to a generic function variable. */ + if (flags & kw_generic) + fprintf (out, ", &%s", (flags & kw_gsubr) ? args[6] : args[4]); + + fputs (");\n", out); +} + +static void +syntax_keyword (char *keyword, char **args, int flags) +{ + check_arg_count (keyword, args, 4); + + fprintf (out, " scm_make_synt (%s, %s, %s);\n", + args[0], args[2], args[3]); +} + +static void +symbol_keyword (char *keyword, char **args, int flags) +{ + check_arg_count (keyword, args, 2); + + fprintf (out, " %s = scm_permanent_object (", args[0]); + if (flags & kw_keyword) + fprintf (out, "scm_c_make_keyword (%s)", args[1]); + else + fprintf (out, "SCM_CAR (scm_intern0 (%s))", args[1]); + fputs (");\n", out); +} + +static void +vcell_keyword (char *keyword, char **args, int flags) +{ + check_arg_count (keyword, args, (flags & kw_init) ? 3 : 2); + + fprintf (out, " %s = scm_permanent_object (scm_intern0 (%s));\n", + args[0], args[1]); + fprintf (out, " SCM_SETCDR (%s, ", args[0]); + + if (flags & kw_long) + fprintf (out, "scm_long2num (%s)", args[2]); + else if (flags & kw_init) + fputs (args[2], out); + else + fputs ("SCM_BOOL_F", out); + + fputs (");\n", out); +} + + +/* The keyword table. */ + +struct keyword { + char *name; + void (*func) (char *keyword, char **args, int flags); + int flags; +}; + +/* The maximum length of any keyword, in bytes. */ +#define MAX_KEYWORD_LEN (20) + +struct keyword keywords[] = +{ + { "SCM_PROC", proc_keyword, kw_gsubr }, + { "SCM_GPROC", proc_keyword, kw_gsubr | kw_generic }, + { "SCM_PROC1", proc_keyword, 0 }, + { "SCM_GPROC1", proc_keyword, kw_generic }, + + { "SCM_SYNTAX", syntax_keyword, 0 }, + + { "SCM_SYMBOL", symbol_keyword, 0 }, + { "SCM_GLOBAL_SYMBOL", symbol_keyword, kw_global }, + { "SCM_KEYWORD", symbol_keyword, kw_keyword }, + { "SCM_GLOBAL_KEYWORD", symbol_keyword, kw_keyword | kw_global }, + + { "SCM_VCELL", vcell_keyword, 0 }, + { "SCM_GLOBAL_VCELL", vcell_keyword, kw_global }, + { "SCM_VCELL_INIT", vcell_keyword, kw_init }, + { "SCM_GLOBAL_VCELL_INIT", vcell_keyword, kw_init | kw_global }, + { "SCM_CONST_LONG", vcell_keyword, kw_init | kw_long }, + + { 0, 0, 0 } + +}; + +/* The bigger this is, the less likely a random identifier is + to clash with the user's program. */ +#define KEYWORD_HASH_SIZE (1009) +struct keyword *keyword_hash[KEYWORD_HASH_SIZE]; + +static unsigned long +hash (char *text) +{ + long h = 0; + + while (*text) + { + unsigned char c = (unsigned char) *text++; + h = (h << 4) + c + (c << 9) + (h >> 24) + 32; + } + + return h; +} + +static struct keyword * +is_keyword (char *name) +{ + unsigned long h = hash (name) % KEYWORD_HASH_SIZE; + if (keyword_hash[h]) + { + if (strcmp (name, keyword_hash[h]->name)) + { + /* Just for testing. */ + fprintf (stderr, "%s: keyword/user id hash collision: %s and %s\n", + program_name, keyword_hash[h]->name, name); + return 0; + } + + return keyword_hash[h]; + } + + return 0; +} + +static void +process_keyword (struct keyword *k) +{ + char **args = parse_args (k->name); + k->func (k->name, args, k->flags); + free_args (args); +} + +static void +init_keyword_hash_table () +{ + int i; + + for (i = 0; keywords[i].func; i++) + { + unsigned long h = hash (keywords[i].name) % KEYWORD_HASH_SIZE; + if (keyword_hash[h]) + { + fprintf (stderr, "%s: keyword hash collision: %s and %s\n", + program_name, keyword_hash[h]->name, keywords[i].name); + exit (2); + } + keyword_hash[h] = &keywords[i]; + } +} + + +/* Scanning a file of C code. */ + +/* Skip a string or character literal that started with the character + FOLLOW. */ +static void +skip_literal (int follow) +{ + int start_line = in_line; + + for (;;) + { + int c = source_getc (); + + if (c == EOF) + { + in_line = start_line; + syntax_error ("unterminated character or string literal"); + } + else if (c == '\\') + source_getc (); + else if (c == follow) + break; + } +} + +static void +read_id (char *buf, size_t size) +{ + int i = 0; + + for (;;) + { + int c = source_getc (); + int action; + + if (c == EOF) + break; + + action = CHARTABLE (c).action; + if (action == act_id_start || action == act_id_follow) + { + if (i < size) + buf[i] = c; + i++; + } + else + { + source_ungetc (c); + break; + } + } + + /* It doesn't matter that we truncate the keyword to SIZE-1 + characters, since the buffer is large enough to hold any valid + keyword. */ + buf[(i >= size) ? size - 1 : i] = '\0'; +} + +static void +process_stream () +{ + int c; + + for (;;) + { + c = source_getc (); + if (c == EOF) + break; + + switch (CHARTABLE (c).action) + { + case act_normal: + case act_space: + case act_open: + case act_close: + case act_id_follow: + case act_arg_separator: + break; + + case act_literal: + skip_literal (c); + break; + + case act_comment_start: + maybe_skip_comment (); + break; + + case act_id_start: + { + char buf[MAX_KEYWORD_LEN + 1]; + struct keyword *k; + + source_ungetc (c); + read_id (buf, sizeof (buf)); + k = is_keyword (buf); + if (k) + process_keyword (k); + } + break; + + default: + abort (); + } + } +} + +static void +process_file (char *inname, char *outname) +{ + in = fopen (inname, "r"); + if (! in) + system_error ("error opening input file", inname); + in_line = 1; + in_name = inname; + + out = fopen (outname, "w"); + if (! out) + system_error ("error opening output file", outname); + + process_stream (in, out); + + /* I'm told the AIX C preprocessor doesn't like to #include empty files. */ + putc ('\n', out); + + if (ferror (in)) + system_error ("error reading input file", inname); + if (ferror (out)) + system_error ("error writing output file", outname); + + if (fclose (in) == EOF) + system_error ("error closing input file", inname); + if (fclose (out) == EOF) + system_error ("error closing output file", outname); +} + + +/* The main function. */ + +static void +version () +{ + fprintf (stderr, "guile-snarf (Guile) %s\n", GUILE_VERSION); + exit (0); +} + +static void +usage () +{ + fprintf (stderr, "Usage: %s [-o OUTFILE] INFILE\n", program_name); +} + +static void +help () +{ + fprintf (stderr, "guile-snarf -- extract declarations from Guile C code.\n"); + usage (); + fprintf (stderr, "\ +This program makes it easier to write C code which defines Scheme\n\ +functions and variables and uses Scheme symbols in Guile. Instead of\n\ +writing code yourself to build the Scheme objects when your code is\n\ +initialized, you can write brief declarations above your C functions\n\ +indicating how they should be called from Scheme. Then, if you run\n\ +this program on your source file, it will write code for you that\n\ +defines functions, builds Scheme objects, and so on. You can then\n\ +#include this program's output into your initialization function.\n\ +\n\ +`INFILE' is the file from which we should extract declarations and\n\ +generate initialization code.\n\ +\n\ +`-o OUTFILE' specifies the file where we should place the output. If\n\ +omitted, it defaults to `INFILE', with its extension changed to `.x'.\n"); + exit (0); +} + +static void +bad_argument () +{ + usage (); + fprintf (stderr, "Type `%s --help' for more information.\n", program_name); + exit (1); +} + +int +main (int argc, char **argv) +{ + char *infile, *outfile; + + program_name = strrchr (argv[0], '/'); + if (! program_name) + program_name = argv[0]; + else + program_name++; + + infile = 0; + outfile = 0; + argc--, argv++; + while (argc > 0) + { + if (! strcmp (argv[0], "--version")) + version (); + else if (! strcmp (argv[0], "--help")) + help (); + else if (! strcmp (argv[0], "-o")) + outfile = *++argv, argc--; + else if (argv[0][0] == '-') + { + fprintf (stderr, "%s: unrecognized switch: `%s'\n", + program_name, argv[0]); + bad_argument (); + } + else if (infile) + { + fprintf (stderr, "%s: more than input file given: `%s' and `%s'\n", + program_name, infile, argv[0]); + bad_argument (); + } + else + infile = argv[0]; + + argc--, argv++; + } + + if (! infile) + { + fprintf (stderr, "%s: no input file given\n", program_name); + bad_argument (); + } + + if (! outfile) + { + /* Choose a default output file name. This should be the input + file name, with the extension changed to `.x'. */ + char *ext = strrchr (infile, '.'); + + /* Make sure we haven't mistaken a dot in an earlier path + component for the start of the extension. */ + if (ext && strchr (ext, '/')) + ext = 0; + + if (ext) + { + outfile = (char *) xmalloc (ext - infile + 3); + strcpy (outfile, infile); + strcpy (outfile + (ext - infile), ".x"); + } + else + { + outfile = (char *) xmalloc (strlen (infile) + 3); + strcpy (outfile, infile); + strcat (outfile, ".x"); + } + } + + init_chartable (); + init_keyword_hash_table (); + process_file (infile, outfile); + + return 0; +}