1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-04 22:40:25 +02:00

Random bug fixes

This commit is contained in:
Jim Blandy 1999-09-06 01:53:52 +00:00
parent edb3b2ca56
commit 37f2e24212

View file

@ -1,986 +0,0 @@
/* guile-snarf.c --- extract declarations from Guile source code
Jim Blandy <jimb@red-bean.com> --- September 1999
Copyright (C) 1999 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this software; see the file COPYING. If not, write to
the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
As a special exception, the Free Software Foundation gives permission
for additional uses of the text contained in its release of GUILE.
The exception is that, if you link the GUILE library with other files
to produce an executable, this does not by itself cause the
resulting executable to be covered by the GNU General Public License.
Your use of that executable is in no way restricted on account of
linking the GUILE library code into it.
This exception does not however invalidate any other reasons why
the executable file might be covered by the GNU General Public License.
This exception applies only to the code released by the
Free Software Foundation under the name GUILE. If you copy
code from other Free Software Foundation releases into a copy of
GUILE, as the General Public License permits, the exception does
not apply to the code that you add in this way. To avoid misleading
anyone as to the status of such modified files, you must delete
this exception notice from them.
If you write modifications of your own for GUILE, it is your choice
whether to permit this exception to apply to your modifications.
If you do not wish that, delete this exception notice. */
/* The problem:
It's really nice to be able to create Scheme procedures by simply
adding a declaration above the C function's definition. And you
can use similar facilities for variables, symbols, etc.
Using the C preprocessor to extract declarations from the C code is
a clever idea, but it's not very robust. Finding the C
preprocessor, and getting it to run without errors, is too prone to
interference from outside influences. We end up having to pass all
the -I flags and compiler flags to guile-snarf, whose job doesn't
have anything to do with them.
Thus, we redefine snarfing as a process which operates on a single
text file, and ignores #inclusion.
The general strategy:
Scan the source file in just enough detail to correctly recognize C
identifiers or reserved words. This means watching for comments,
string and character literals, etc.
When we see an identifier from our selected list, parse an argument
list after it according to the C preprocessor's rules, and then
pass it to an appropriate function to handle.
Still to do:
- Test by comparing output with old shell script.
- Add a test for this to Makefile.am.
- Change build process to actually use this.
- Make sure that errors cause .x file to be deleted. `make' will do
this if the process exits with a non-zero status, won't it? If not,
delete it ourselves.
- Could we generate both the declarations and the initializations?
- Can we simplify or improve the argument lists for some of the
snarfing keywords, now that we can parse them any way we like?
- When a file declares a *lot* of something, would it save space to
emit a table and a loop, instead of a zillion function calls? */
#include <stddef.h>
#include <stdlib.h>
#include <strings.h>
#include <stdio.h>
#include <stdarg.h>
#include "versiondat.h"
/* Utility functions. */
char *program_name;
static void *
check_ptr (void *p)
{
if (! p)
{
fprintf (stderr, "%s: out of memory\n", program_name);
exit (2);
}
return p;
}
static void *
xmalloc (size_t size)
{
return check_ptr (malloc (size));
}
static void *
xrealloc (void *ptr, size_t size)
{
return check_ptr (realloc (ptr, size));
}
static void
system_error (char *message, char *filename)
{
fprintf (stderr, "%s: ", program_name);
if (filename) fprintf (stderr, "%s: ", filename);
perror (message);
exit (1);
}
/* Character table. */
struct chartable {
enum action {
/* Just skip this character. */
act_normal,
/* This character is whitespace. */
act_space,
/* This character begins and ends a string/char literal. */
act_literal,
/* This character is '/'. If followed by a '*', begin a block
comment. If followed by another '/', begin a line comment. */
act_comment_start,
/* This character increments the nesting level, and must be
matched by the character in `follow'. */
act_open,
/* This character decrements the nesting level, if it matches properly. */
act_close,
/* This character could appear anywhere in an identifier. */
act_id_start,
/* This character could appear anywhere in an identifier, except
as the first character. */
act_id_follow,
/* This character separates arguments in a keyword argument list,
if it occurs at the outermost nesting level. */
act_arg_separator
} action;
int follow;
};
struct chartable chartable[256];
#define CHARTABLE(i) (chartable[(unsigned) (i)])
static void
match (char open, char close)
{
CHARTABLE (open).action = act_open;
CHARTABLE (open).follow = close;
CHARTABLE (close).action = act_close;
}
static void
init_chartable ()
{
int i;
char *c;
for (i = 0; i < 256; i++)
CHARTABLE (i).action = act_normal;
for (c = " \t\n\f"; *c; c++)
CHARTABLE (*c).action = act_space;
CHARTABLE ('"' ).action = act_literal;
CHARTABLE ('\'').action = act_literal;
CHARTABLE ('/').action = act_comment_start;
match ('(', ')');
match ('[', ']');
match ('{', '}');
CHARTABLE (',').action = act_arg_separator;
for (i = 'a'; i <= 'z'; i++)
CHARTABLE (i).action = act_id_start;
for (i = 'A'; i <= 'Z'; i++)
CHARTABLE (i).action = act_id_start;
CHARTABLE ('_').action = act_id_start;
for (i = '0'; i <= '9'; i++)
CHARTABLE (i).action = act_id_follow;
}
/* Reading text while tracking the current line number. */
/* The input file and the line number are global, because it's too
much of a pain to pass them around everywhere as arguments. Sorry. */
FILE *in; /* The current input file. */
char *in_name; /* Its filename. */
int in_line; /* The current line number in that file. */
FILE *out; /* The current output file. */
static int
source_getc ()
{
int c = getc (in);
if (c == '\n')
in_line++;
return c;
}
static void
source_ungetc (int c)
{
if (c == '\n')
in_line--;
ungetc (c, in);
}
static void
syntax_error (char *message, ...)
{
va_list args;
va_start (args, message);
fprintf (stderr, "%s:%d: ", in_name, in_line);
vfprintf (stderr, message, args);
va_end (args);
exit (1);
}
/* Skipping comments. */
/* If we've read the first character of a comment start (that is, the
character '/'), then check the second character. If it's '*',
we're in a block comment; read to the end of it. If it's '/',
we're starting a line comment; skip to the end of the line.
Otherwise, put the second character back on the input stream.
Return non-zero if this was a comment, zero otherwise. */
static int
maybe_skip_comment ()
{
int d = source_getc ();
if (d == '*')
{
int prev;
int start_line = in_line;
/* Make sure the asterisk of the comment start doesn't
get mistaken for the asterisk of the comment end. */
d = 0;
do
{
prev = d;
d = source_getc ();
}
while (d != EOF && ! (prev == '*' && d == '/'));
if (d == EOF)
{
in_line = start_line;
syntax_error ("unterminated comment");
}
return 1;
}
else if (d == '/')
{
do
d = source_getc ();
while (d != '\n' && d != EOF);
return 1;
}
else
{
source_ungetc (d);
return 0;
}
}
/* Parsing argument lists for keywords. */
static int
arglist_getc (char *keyword)
{
int c = source_getc ();
if (c == EOF)
syntax_error ("file ends in midst of arguments to keyword `%s'\n",
keyword);
return c;
}
struct buffer {
int size, len;
char *text;
};
static void
init_buffer (struct buffer *b)
{
b->size = 1;
b->text = (char *) xmalloc (b->size);
b->len = 0;
}
static void
add_char (struct buffer *b, int c)
{
if (b->len >= b->size)
{
b->size *= 2;
b->text = (char *) xrealloc (b->text, b->size);
}
b->text[b->len++] = c;
}
/* Parse a string or character literal, appending its text to b. */
static void
read_literal (char *keyword, struct buffer *b, int start)
{
add_char (b, start);
for (;;)
{
int c = arglist_getc (keyword);
add_char (b, c);
if (c == '\\')
add_char (b, arglist_getc (keyword));
else if (c == start)
break;
}
}
/* Parse the argument list following a keyword, just as the
C preprocessor would parse the arguments to a macro invocation.
Return the arguments as an array of strings, terminated by a null
pointer.
If there's an error parsing the arguments, print an error message
using KEYWORD as the keyword name, and exit.
Caller must free the array and each string; see the free_args
function. */
static char **
parse_args (char *keyword)
{
/* Our read-ahead character. */
int c = arglist_getc (keyword);
/* String of closing parens we are expecting, outermost first. */
struct buffer stack;
/* Current list of arguments. */
int arglist_size = 1;
char **arglist = (char **) xmalloc (arglist_size * sizeof (*arglist));
int arglist_len = 0;
init_buffer (&stack);
/* Skip any initial whitespace. */
while (CHARTABLE (c).action == act_space)
c = arglist_getc (keyword);
/* Require an opening paren. */
if (c != '(')
syntax_error ("arguments to snarfing keyword `%s' missing", keyword);
add_char (&stack, ')');
c = arglist_getc (keyword);
/* Read arguments, separated by commas outside of any (), {}, or []
pairs. */
while (stack.len > 0)
{
/* start a new argument. */
struct buffer arg;
int arg_incomplete = 1;
init_buffer (&arg);
/* Skip whitespace. */
while (CHARTABLE (c).action == act_space)
c = arglist_getc (keyword);
do
{
switch (CHARTABLE (c).action)
{
case act_normal:
case act_space:
case act_id_start:
case act_id_follow:
add_char (&arg, c);
break;
case act_literal:
read_literal (keyword, &arg, c);
break;
case act_comment_start:
if (maybe_skip_comment ())
add_char (&arg, ' ');
else
add_char (&arg, c);
break;
case act_open:
add_char (&stack, CHARTABLE (c).follow);
add_char (&arg, c);
break;
case act_close:
if (stack.len == 0)
/* We should have finished argument list parsing
when the stack became empty. */
abort ();
if (c != stack.text[stack.len - 1])
syntax_error ("mismatched parenthesis: '%c' and '%c'",
stack.text[stack.len - 1], c);
stack.len--;
/* Closing parens are part of the argument, except for the
outermost closing paren. */
if (stack.len > 0)
add_char (&arg, c);
else
arg_incomplete = 0;
break;
case act_arg_separator:
/* Commas are part of the argument, unless they occur at
the top level within the argument list. */
if (stack.len == 1)
arg_incomplete = 0;
else
add_char (&arg, c);
break;
default:
abort ();
}
c = arglist_getc (keyword);
}
while (arg_incomplete);
/* Add this argument to the list. */
add_char (&arg, '\0');
if (arglist_len >= arglist_size)
{
arglist_size *= 2;
arglist = (char **) xrealloc (arglist,
arglist_size * sizeof (*arglist));
}
arglist[arglist_len++] = arg.text;
}
/* Null-terminate the argument list. */
arglist = (char **) xrealloc (arglist,
(arglist_len + 1) * sizeof (*arglist));
arglist[arglist_len] = 0;
return arglist;
}
static int
count_args (char **args)
{
int i;
for (i = 0; args[i]; i++)
;
return i;
}
static void
free_args (char **args)
{
int i;
for (i = 0; args[i]; i++)
free (args[i]);
free (args);
}
/* Individual routines for processing keywords. */
/* Flags these routines might use to select details of their behavior. */
enum keyword_flags {
kw_gsubr = 0x01, /* create a gsubr */
kw_generic = 0x02, /* create a generic function */
kw_global = 0x04, /* global declaration, not static */
kw_init = 0x08, /* initialized value */
kw_long = 0x10, /* initialize it with a long */
kw_keyword = 0x20 /* make a keyword, not a symbol */
};
static void
check_arg_count (char *keyword, char **args, int expected)
{
int actual = count_args (args);
if (actual != expected)
syntax_error ("keyword `%s' expects %d args, but got %d",
keyword, expected, actual);
}
static void
proc_keyword (char *keyword, char **args, int flags)
{
int expected_args;
if (flags & kw_gsubr)
expected_args = 6;
else
expected_args = 4;
if (flags & kw_generic)
expected_args++;
check_arg_count (keyword, args, expected_args);
/* Print some nice indentation. */
fputs (" ", out);
/* Print out the function name. */
fprintf (out, "scm_make_%s%s",
(flags & kw_gsubr) ? "gsubr" : "subr",
(flags & kw_generic) ? "_with_generic" : "");
/* And the arguments. */
fputs (" (", out);
if (flags & kw_gsubr)
fprintf (out, "%s, %s, %s, %s, (SCM (*) (...)) %s",
args[0], args[2], args[3], args[4], args[5]);
else
fprintf (out, "%s, %s, (SCM (*) (...)) %s",
args[0], args[2], args[3]);
/* The generic versions have an extra argument at the end,
which is a pointer to a generic function variable. */
if (flags & kw_generic)
fprintf (out, ", &%s", (flags & kw_gsubr) ? args[6] : args[4]);
fputs (");\n", out);
}
static void
syntax_keyword (char *keyword, char **args, int flags)
{
check_arg_count (keyword, args, 4);
fprintf (out, " scm_make_synt (%s, %s, %s);\n",
args[0], args[2], args[3]);
}
static void
symbol_keyword (char *keyword, char **args, int flags)
{
check_arg_count (keyword, args, 2);
fprintf (out, " %s = scm_permanent_object (", args[0]);
if (flags & kw_keyword)
fprintf (out, "scm_c_make_keyword (%s)", args[1]);
else
fprintf (out, "SCM_CAR (scm_intern0 (%s))", args[1]);
fputs (");\n", out);
}
static void
vcell_keyword (char *keyword, char **args, int flags)
{
check_arg_count (keyword, args, (flags & kw_init) ? 3 : 2);
fprintf (out, " %s = scm_permanent_object (scm_intern0 (%s));\n",
args[0], args[1]);
fprintf (out, " SCM_SETCDR (%s, ", args[0]);
if (flags & kw_long)
fprintf (out, "scm_long2num (%s)", args[2]);
else if (flags & kw_init)
fputs (args[2], out);
else
fputs ("SCM_BOOL_F", out);
fputs (");\n", out);
}
/* The keyword table. */
struct keyword {
char *name;
void (*func) (char *keyword, char **args, int flags);
int flags;
};
/* The maximum length of any keyword, in bytes. */
#define MAX_KEYWORD_LEN (20)
struct keyword keywords[] =
{
{ "SCM_PROC", proc_keyword, kw_gsubr },
{ "SCM_GPROC", proc_keyword, kw_gsubr | kw_generic },
{ "SCM_PROC1", proc_keyword, 0 },
{ "SCM_GPROC1", proc_keyword, kw_generic },
{ "SCM_SYNTAX", syntax_keyword, 0 },
{ "SCM_SYMBOL", symbol_keyword, 0 },
{ "SCM_GLOBAL_SYMBOL", symbol_keyword, kw_global },
{ "SCM_KEYWORD", symbol_keyword, kw_keyword },
{ "SCM_GLOBAL_KEYWORD", symbol_keyword, kw_keyword | kw_global },
{ "SCM_VCELL", vcell_keyword, 0 },
{ "SCM_GLOBAL_VCELL", vcell_keyword, kw_global },
{ "SCM_VCELL_INIT", vcell_keyword, kw_init },
{ "SCM_GLOBAL_VCELL_INIT", vcell_keyword, kw_init | kw_global },
{ "SCM_CONST_LONG", vcell_keyword, kw_init | kw_long },
{ 0, 0, 0 }
};
/* The bigger this is, the less likely a random identifier is
to clash with the user's program. */
#define KEYWORD_HASH_SIZE (1009)
struct keyword *keyword_hash[KEYWORD_HASH_SIZE];
static unsigned long
hash (char *text)
{
long h = 0;
while (*text)
{
unsigned char c = (unsigned char) *text++;
h = (h << 4) + c + (c << 9) + (h >> 24) + 32;
}
return h;
}
static struct keyword *
is_keyword (char *name)
{
unsigned long h = hash (name) % KEYWORD_HASH_SIZE;
if (keyword_hash[h])
{
if (strcmp (name, keyword_hash[h]->name))
{
/* Just for testing. */
fprintf (stderr, "%s: keyword/user id hash collision: %s and %s\n",
program_name, keyword_hash[h]->name, name);
return 0;
}
return keyword_hash[h];
}
return 0;
}
static void
process_keyword (struct keyword *k)
{
char **args = parse_args (k->name);
k->func (k->name, args, k->flags);
free_args (args);
}
static void
init_keyword_hash_table ()
{
int i;
for (i = 0; keywords[i].func; i++)
{
unsigned long h = hash (keywords[i].name) % KEYWORD_HASH_SIZE;
if (keyword_hash[h])
{
fprintf (stderr, "%s: keyword hash collision: %s and %s\n",
program_name, keyword_hash[h]->name, keywords[i].name);
exit (2);
}
keyword_hash[h] = &keywords[i];
}
}
/* Scanning a file of C code. */
/* Skip a string or character literal that started with the character
FOLLOW. */
static void
skip_literal (int follow)
{
int start_line = in_line;
for (;;)
{
int c = source_getc ();
if (c == EOF)
{
in_line = start_line;
syntax_error ("unterminated character or string literal");
}
else if (c == '\\')
source_getc ();
else if (c == follow)
break;
}
}
static void
read_id (char *buf, size_t size)
{
int i = 0;
for (;;)
{
int c = source_getc ();
int action;
if (c == EOF)
break;
action = CHARTABLE (c).action;
if (action == act_id_start || action == act_id_follow)
{
if (i < size)
buf[i] = c;
i++;
}
else
{
source_ungetc (c);
break;
}
}
/* It doesn't matter that we truncate the keyword to SIZE-1
characters, since the buffer is large enough to hold any valid
keyword. */
buf[(i >= size) ? size - 1 : i] = '\0';
}
static void
process_stream ()
{
int c;
for (;;)
{
c = source_getc ();
if (c == EOF)
break;
switch (CHARTABLE (c).action)
{
case act_normal:
case act_space:
case act_open:
case act_close:
case act_id_follow:
case act_arg_separator:
break;
case act_literal:
skip_literal (c);
break;
case act_comment_start:
maybe_skip_comment ();
break;
case act_id_start:
{
char buf[MAX_KEYWORD_LEN + 1];
struct keyword *k;
source_ungetc (c);
read_id (buf, sizeof (buf));
k = is_keyword (buf);
if (k)
process_keyword (k);
}
break;
default:
abort ();
}
}
}
static void
process_file (char *inname, char *outname)
{
in = fopen (inname, "r");
if (! in)
system_error ("error opening input file", inname);
in_line = 1;
in_name = inname;
out = fopen (outname, "w");
if (! out)
system_error ("error opening output file", outname);
process_stream (in, out);
/* I'm told the AIX C preprocessor doesn't like to #include empty files. */
putc ('\n', out);
if (ferror (in))
system_error ("error reading input file", inname);
if (ferror (out))
system_error ("error writing output file", outname);
if (fclose (in) == EOF)
system_error ("error closing input file", inname);
if (fclose (out) == EOF)
system_error ("error closing output file", outname);
}
/* The main function. */
static void
version ()
{
fprintf (stderr, "guile-snarf (Guile) %s\n", GUILE_VERSION);
exit (0);
}
static void
usage ()
{
fprintf (stderr, "Usage: %s [-o OUTFILE] INFILE\n", program_name);
}
static void
help ()
{
fprintf (stderr, "guile-snarf -- extract declarations from Guile C code.\n");
usage ();
fprintf (stderr, "\
This program makes it easier to write C code which defines Scheme\n\
functions and variables and uses Scheme symbols in Guile. Instead of\n\
writing code yourself to build the Scheme objects when your code is\n\
initialized, you can write brief declarations above your C functions\n\
indicating how they should be called from Scheme. Then, if you run\n\
this program on your source file, it will write code for you that\n\
defines functions, builds Scheme objects, and so on. You can then\n\
#include this program's output into your initialization function.\n\
\n\
`INFILE' is the file from which we should extract declarations and\n\
generate initialization code.\n\
\n\
`-o OUTFILE' specifies the file where we should place the output. If\n\
omitted, it defaults to `INFILE', with its extension changed to `.x'.\n");
exit (0);
}
static void
bad_argument ()
{
usage ();
fprintf (stderr, "Type `%s --help' for more information.\n", program_name);
exit (1);
}
int
main (int argc, char **argv)
{
char *infile, *outfile;
program_name = strrchr (argv[0], '/');
if (! program_name)
program_name = argv[0];
else
program_name++;
infile = 0;
outfile = 0;
argc--, argv++;
while (argc > 0)
{
if (! strcmp (argv[0], "--version"))
version ();
else if (! strcmp (argv[0], "--help"))
help ();
else if (! strcmp (argv[0], "-o"))
outfile = *++argv, argc--;
else if (argv[0][0] == '-')
{
fprintf (stderr, "%s: unrecognized switch: `%s'\n",
program_name, argv[0]);
bad_argument ();
}
else if (infile)
{
fprintf (stderr, "%s: more than input file given: `%s' and `%s'\n",
program_name, infile, argv[0]);
bad_argument ();
}
else
infile = argv[0];
argc--, argv++;
}
if (! infile)
{
fprintf (stderr, "%s: no input file given\n", program_name);
bad_argument ();
}
if (! outfile)
{
/* Choose a default output file name. This should be the input
file name, with the extension changed to `.x'. */
char *ext = strrchr (infile, '.');
/* Make sure we haven't mistaken a dot in an earlier path
component for the start of the extension. */
if (ext && strchr (ext, '/'))
ext = 0;
if (ext)
{
outfile = (char *) xmalloc (ext - infile + 3);
strcpy (outfile, infile);
strcpy (outfile + (ext - infile), ".x");
}
else
{
outfile = (char *) xmalloc (strlen (infile) + 3);
strcpy (outfile, infile);
strcat (outfile, ".x");
}
}
init_chartable ();
init_keyword_hash_table ();
process_file (infile, outfile);
return 0;
}